RubyGems - omnivore - Versions diffs - 0.0.1 - Mend

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

data/.gitignore +4 -0
data/.rvmrc +7 -0
data/Gemfile +4 -0
data/README.md +1 -0
data/Rakefile +1 -0
data/lib/omnivore/http_client.rb +21 -0
data/lib/omnivore/version.rb +3 -0
data/lib/omnivore/xpath_extractor.rb +12 -0
data/lib/omnivore.rb +7 -0
data/omnivore.gemspec +24 -0
data/spec/http_client_spec.rb +11 -0
data/spec/xpath_extractor_spec.rb +30 -0
metadata +78 -0

data/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*

data/.rvmrc ADDED Viewed

@@ -0,0 +1,7 @@
+if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
+  && -s "${rvm_path:-$HOME/.rvm}/environments/ruby-1.9.2-p0@omnivore" ]] ; then
+  \. "${rvm_path:-$HOME/.rvm}/environments/ruby-1.9.2-p0@omnivore"
+else
+  rvm --create  "ruby-1.9.2-p0@omnivore"
+fi

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in omnivore.gemspec
+gemspec

data/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ Nothing to see here, move along.

data/Rakefile ADDED Viewed

	@@ -0,0 +1 @@
1	+ require "bundler/gem_tasks"

data/lib/omnivore/http_client.rb ADDED Viewed

@@ -0,0 +1,21 @@
+require 'net/http'
+require 'uri'
+module Omnivore
+  class HttpClient
+    def HttpClient.get(url, attempts=3)
+      raise ArgumentError, 'HTTP redirect too deep' if attempts == 0
+      response = Net::HTTP.get_response(URI.parse(url))
+      case response
+      when Net::HTTPSuccess then response.body
+      when Net::HTTPRedirection then HttpClient.get(response['location'], attempts - 1)
+      else
+        response.error!
+      end
+    end
+  end
+end

data/lib/omnivore/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Omnivore
+  VERSION = "0.0.1"
+end

data/lib/omnivore/xpath_extractor.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require "rexml/document"
+module Omnivore
+  class XPathExtractor
+    def XPathExtractor.match(html, xpath)
+      xmldoc = REXML::Document.new(html)
+      REXML::XPath.match(xmldoc, xpath)
+    end
+  end
+end

data/lib/omnivore.rb ADDED Viewed

@@ -0,0 +1,7 @@
+require "omnivore/version"
+require "omnivore/http_client"
+require "omnivore/xpath_extractor"
+module Omnivore
+  # Your code goes here...
+end

data/omnivore.gemspec ADDED Viewed

@@ -0,0 +1,24 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "omnivore/version"
+Gem::Specification.new do |s|
+  s.name        = "omnivore"
+  s.version     = Omnivore::VERSION
+  s.authors     = ["Matthias Eder"]
+  s.email       = ["matthias@izume.com"]
+  s.homepage    = ""
+  s.summary     = %q{Content extraction and analysis}
+  s.description = %q{A library for extracting content from HTML documents.}
+  s.rubyforge_project = "omnivore"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  # specify any dependencies here; for example:
+  s.add_development_dependency "rspec"
+  # s.add_runtime_dependency "rest-client"
+end

data/spec/http_client_spec.rb ADDED Viewed

@@ -0,0 +1,11 @@
+require 'omnivore/http_client'
+describe Omnivore::HttpClient do
+  it "should fetch the content of a url" do
+    html = Omnivore::HttpClient.get("http://blog.steveklabnik.com/posts/2011-09-28-real-modern-ruby-development")
+    html.should_not be_nil
+    html.should_not be_empty
+  end
+end

data/spec/xpath_extractor_spec.rb ADDED Viewed

@@ -0,0 +1,30 @@
+require "omnivore/xpath_extractor"
+CONTENT = %{
+  <html>
+    <head></head>
+    <body>
+      <div class="banner">
+        This is a banner
+      </div>
+      <div class="topnav">
+        <ul>
+          <li>Home</li>
+          <li>About</li>
+        </ul>
+      </div>
+      <div class="content">
+        This is where the real stuff is.
+      </div>
+    </body>
+  </html>
+}
+describe Omnivore::XPathExtractor do
+  it "should match the correct xpath" do
+    matches = Omnivore::XPathExtractor.match(CONTENT, "//div[@class=\"content\"]")
+    matches.size.should be > 0
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,78 @@
+--- !ruby/object:Gem::Specification
+name: omnivore
+version: !ruby/object:Gem::Version
+  prerelease:
+  version: 0.0.1
+platform: ruby
+authors:
+- Matthias Eder
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-01-05 00:00:00 -07:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0"
+  type: :development
+  version_requirements: *id001
+description: A library for extracting content from HTML documents.
+email:
+- matthias@izume.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- .rvmrc
+- Gemfile
+- README.md
+- Rakefile
+- lib/omnivore.rb
+- lib/omnivore/http_client.rb
+- lib/omnivore/version.rb
+- lib/omnivore/xpath_extractor.rb
+- omnivore.gemspec
+- spec/http_client_spec.rb
+- spec/xpath_extractor_spec.rb
+has_rdoc: true
+homepage: ""
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+requirements: []
+rubyforge_project: omnivore
+rubygems_version: 1.5.0
+signing_key:
+specification_version: 3
+summary: Content extraction and analysis
+test_files: []

omnivore 0.0.1