RubyGems - scrape - Versions diffs - 0.1 - Mend

scrape 0.1

Files changed (29) hide show

data/.gitignore +2 -0
data/Gemfile +7 -0
data/Gemfile.lock +22 -0
data/LICENSE +22 -0
data/README.md +44 -0
data/Rakefile +10 -0
data/bin/scrape +8 -0
data/examples/github.scrape +7 -0
data/examples/google.scrape +5 -0
data/lib/scrape/application.rb +57 -0
data/lib/scrape/cli.rb +39 -0
data/lib/scrape/default_loader.rb +19 -0
data/lib/scrape/dsl.rb +13 -0
data/lib/scrape/match.rb +23 -0
data/lib/scrape/site.rb +31 -0
data/lib/scrape/uri.rb +58 -0
data/lib/scrape/version.rb +3 -0
data/lib/scrape.rb +36 -0
data/scrape.gemspec +22 -0
data/test/support/test1.scrape +5 -0
data/test/support/test2.scrape +1 -0
data/test/support/test3.scrape +3 -0
data/test/test_helper.rb +19 -0
data/test/unit/application_test.rb +72 -0
data/test/unit/default_loader_test.rb +25 -0
data/test/unit/match_test.rb +54 -0
data/test/unit/site_test.rb +75 -0
data/test/unit/uri_test.rb +52 -0
metadata +90 -0

data/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ .DS_Store
2	+ pkg/

data/Gemfile ADDED Viewed

@@ -0,0 +1,7 @@
+source 'http://rubygems.org'
+gemspec
+group :test do
+  gem 'webmock', '~> 1.8.7'
+end

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,22 @@
+PATH
+  remote: .
+  specs:
+    scrape (0.1)
+GEM
+  remote: http://rubygems.org/
+  specs:
+    addressable (2.2.8)
+    crack (0.3.1)
+    nokogiri (1.5.5)
+    webmock (1.8.7)
+      addressable (>= 2.2.7)
+      crack (>= 0.1.7)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  nokogiri (~> 1.5.5)
+  scrape!
+  webmock (~> 1.8.7)

data/LICENSE ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2012 Marty Zalega
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,44 @@
+# Scrape
+A really simple web scraper.
+```ruby
+site "https://github.com/explore" # The site to scrape. Will be used as the base address.
+match /evilmarty/ do |doc| # A regexp/string/proc to match against the current url.
+  doc.search('a[href]') # The nokogiri document of the contents of the current url.
+end
+site "http://www.tumblr.com" # Can define multiple sites
+match "/tagged" do |doc|
+  # Do what ever we want with the document.
+end
+```
+## Usage
+After creating a `Scrapefile` simple run:
+```
+scrape -f [FILE]
+```
+If no scapefile is specified then `Scrapefile` is used by default.
+## Installation
+Simply install the gem
+```
+gem install scrape
+```
+## TODO
+* Fix bugs
+* Add support for Robots.txt
+* Depth limiting
+* Better docs

data/Rakefile ADDED Viewed

@@ -0,0 +1,10 @@
+require "bundler/gem_tasks"
+require "rake/testtask"
+Rake::TestTask.new do |t|
+  t.libs << "test"
+  t.test_files = FileList['test/**/*.rb']
+  t.verbose = true
+end
+task :default => "test"

data/bin/scrape ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/env ruby
+$: << File.expand_path('../../lib', __FILE__)
+require "scrape"
+require "scrape/cli"
+Scrape::CLI.new(File.basename($0)).run(ARGV)

data/examples/github.scrape ADDED Viewed

@@ -0,0 +1,7 @@
+site "https://github.com/explore"
+match "explore" do |doc|
+  doc.css('ol.ranked-repositories li h3').each do |node|
+    puts node.inner_text.strip
+  end
+end

data/examples/google.scrape ADDED Viewed

@@ -0,0 +1,5 @@
+site "http://www.google.com/search?q=cats"
+match "/search" do |doc|
+  puts "Page title: #{doc.css('title').inner_text}"
+end

data/lib/scrape/application.rb ADDED Viewed

@@ -0,0 +1,57 @@
+class Scrape::Application
+  attr_reader :scrapefile, :loader, :sites, :history
+  def initialize scrapefile, loader = Scrape::DefaultLoader.new
+    @scrapefile = File.expand_path scrapefile
+    @loader = loader
+    @sites = {}
+    @queue = []
+    @history = []
+  end
+  def run
+    load_scrapefile
+    while url = @queue.shift
+      Scrape.logger.info "Loading: #{url}..."
+      @history << url
+      if site = self[url]
+        if urls = site.parse(url)
+          enqueue *urls
+          Scrape.logger.info "Found #{urls.length} urls."
+        else
+          Scrape.logger.info "Done."
+        end
+      else
+        Scrape.logger.info "Not defined."
+      end
+    end
+  end
+  def reset
+    @history = []
+    @queue = sites.values.map{|site| site.url.to_s }
+  end
+  def queue
+    @queue.dup
+  end
+  def enqueue *urls
+    urls.flatten.each do |url|
+      @queue << url unless @history.include?(url) || @queue.include?(url)
+    end
+  end
+  def [] url
+    @sites.values.detect{|site| site.url < url }
+  end
+  def load_scrapefile
+    return if @scrapefile_loaded
+    result = loader.load(scrapefile)
+    @sites.update result if result.is_a? Hash
+    reset
+    @scrapefile_loaded = true
+  end
+end

data/lib/scrape/cli.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require "optparse"
+class Scrape::CLI
+  attr_reader :command, :pwd
+  def initialize command, pwd = Dir.pwd
+    @command, @pwd = command, pwd
+  end
+  def run argv
+    options = {:file => File.join(pwd, 'Scrapefile')}
+    opts = OptionParser.new do |opts|
+      opts.banner = "Scrape #{Scrape::VERSION} - Usage: #{command} [options]"
+      opts.separator ""
+      opts.separator "Specific options:"
+      opts.on "-f", "--scrapefile [FILE]", "Use FILE as scrapefile" do |file|
+        options[:file] = File.expand_path file
+      end
+      opts.on_tail "-h", "--help", "Show this message" do
+        puts opts
+        exit
+      end
+      opts.on_tail "-v", "--version", "Show version" do
+        puts Scrape::VERSION
+        exit
+      end
+    end
+    opts.parse argv
+    if File.exists? options[:file]
+      Scrape::Application.new(options[:file]).run
+    else
+      puts "#{command} aborted!"
+      puts "No Scrapefile found"
+      exit -1
+    end
+  end
+end

data/lib/scrape/default_loader.rb ADDED Viewed

@@ -0,0 +1,19 @@
+class Scrape::DefaultLoader
+  def load path
+    path = File.expand_path path
+    sites = {}
+    sandbox = Sandbox.new sites
+    sandbox.instance_eval File.read(path), path
+    sites
+  end
+  class Sandbox
+    include Scrape::DSL
+    def initialize sites
+      @sites = sites
+    end
+  end
+end

data/lib/scrape/dsl.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module Scrape::DSL
+  def site *urls
+    @_sites ||= {}
+    @sites ||= {}
+    @current_sites = urls.flatten.map{|url| @_sites[url] ||= Scrape::Site.new(url) }
+  end
+  def match matcher, &proc
+    raise ArgumentError.new("site must be set") unless defined? @current_sites
+    matches = @current_sites.map{|site| @sites[site.url.to_s] = site; site.add_match matcher, &proc }
+    matches.size == 1 ? matches.first : matches
+  end
+end

data/lib/scrape/match.rb ADDED Viewed

@@ -0,0 +1,23 @@
+class Scrape::Match
+  attr_reader :matcher
+  def initialize matcher, &proc
+    @matcher, @proc = matcher, proc
+    raise ArgumentError.new("Match block expects one argument") if proc.arity != 1
+  end
+  def invoke doc
+    @proc.call doc
+  end
+  def =~ url
+    case @matcher
+    when String
+      url.to_s.include? @matcher
+    when Regexp
+      url.to_s =~ @matcher
+    when Proc
+      @matcher.call url
+    end
+  end
+end

data/lib/scrape/site.rb ADDED Viewed

@@ -0,0 +1,31 @@
+require 'nokogiri'
+class Scrape::Site
+  attr_reader :url, :matches
+  def initialize url
+    @url = Scrape::URI.new url
+    @url.query = nil
+    @url.fragment = nil
+    @matches = []
+  end
+  def add_match matcher, &proc
+    match = Scrape::Match.new(matcher, &proc)
+    @matches << match
+    match
+  end
+  def parse url
+    url = self.url + url
+    doc = Nokogiri::HTML url.open
+    @matches.each{|match| match.invoke doc if match =~ url }
+    urls = doc.css("a[href]").map do |node|
+      href = self.url + node['href']
+      self.url < href ? href : nil
+    end
+    urls.compact
+  end
+end

data/lib/scrape/uri.rb ADDED Viewed

@@ -0,0 +1,58 @@
+require 'uri'
+require 'open-uri'
+class Scrape::URI
+  def initialize uri = nil
+    @uri = case uri
+    when URI then uri.clone
+    when NilClass then URI.new
+    else URI.parse uri.to_s
+    end
+  end
+  %w[fragment host hostname password path port query scheme user to_s relative? absolute?].each do |method_name|
+    class_eval <<-EOT, __FILE__, __LINE__ + 1
+      def #{method_name}
+        @uri.#{method_name}
+      end
+    EOT
+  end
+  %w[fragment host hostname password path port query scheme user].each do |method_name|
+    class_eval <<-EOT, __FILE__, __LINE__ + 1
+      def #{method_name}= value
+        @uri.#{method_name} = value
+      end
+    EOT
+  end
+  def + url
+    return clone if self == url
+    relative = (url.to_s =~ /^(?!.+:\/\/|\/)/)
+    uri = self.class.new @uri.merge(url)
+    uri.path = "#{@uri.path}#{uri.path}" if relative
+    uri
+  end
+  def < url
+    url[0, length] == to_s
+  end
+  def [] *args
+    to_s[*args]
+  end
+  def == url
+    to_s == url.to_s
+  end
+  def length
+    to_s.length
+  end
+  alias_method :size, :length
+  def open headers = {}, &block
+    headers = {"User-Agent" => Scrape.user_agent}.merge(headers)
+    super(to_s, headers, &block).read
+  end
+end

data/lib/scrape/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Scrape
+  VERSION = '0.1' unless defined? ::Scrape::VERSION
+end

data/lib/scrape.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require "rubygems"
+require "logger"
+require "bundler/setup"
+module Scrape
+  require 'scrape/version'
+  autoload 'Application', 'scrape/application'
+  autoload 'Site', 'scrape/site'
+  autoload 'Match', 'scrape/match'
+  autoload 'DefaultLoader', 'scrape/default_loader'
+  autoload 'DSL', 'scrape/dsl'
+  autoload 'URI', 'scrape/uri'
+  class ScrapeFileNotFound < Exception; end
+  class << self
+    attr_writer :user_agent
+    def user_agent
+      @user_agent || "Scrape/#{Scrape::VERSION}"
+    end
+    def logger
+      @logger ||= Logger.new STDOUT
+    end
+    def logger= log
+      @logger = log
+    end
+    def load_scrapefile path
+      Application.new path
+    end
+  end
+end

data/scrape.gemspec ADDED Viewed

@@ -0,0 +1,22 @@
+# -*- encoding: utf-8 -*-
+require File.expand_path("../lib/scrape/version", __FILE__)
+Gem::Specification.new do |s|
+  s.name        = "scrape"
+  s.version     = Scrape::VERSION
+  s.platform    = Gem::Platform::RUBY
+  s.authors     = ["Marty Zalega"]
+  s.email       = ["evilmarty@gmail.com"]
+  s.homepage    = "http://github.com/evilmarty/scrape"
+  s.summary     = %q{A really simple web scraper}
+  s.description = %q{An easy to use utility to scrape websites using a DSL similar to rake.}
+  s.rubyforge_project = "scrape"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{|f| File.basename(f) }
+  s.require_paths = ["lib"]
+  s.add_development_dependency "nokogiri", "~> 1.5.5"
+end

data/test/support/test1.scrape ADDED Viewed

@@ -0,0 +1,5 @@
+site "http://example.com"
+match "test" do |doc|
+  "boo!"
+end

data/test/support/test2.scrape ADDED Viewed

	@@ -0,0 +1 @@
1	+ site "http://example.com"

data/test/support/test3.scrape ADDED Viewed

@@ -0,0 +1,3 @@
+match "test" do |doc|
+  "boo!"
+end

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,19 @@
+$: << File.expand_path('../../lib', __FILE__)
+require "minitest/autorun"
+require "webmock/minitest"
+require "bundler/setup"
+Bundler.setup(:default, :test)
+require "scrape"
+class Scrape::TestCase < MiniTest::Unit::TestCase
+  class << self
+    def test name, &block
+      method_name = name.gsub /[^a-z0-9_]+/i, '_'
+      define_method "test_#{method_name}", &block
+    end
+    private :test
+  end
+end

data/test/unit/application_test.rb ADDED Viewed

@@ -0,0 +1,72 @@
+require "test_helper"
+class ApplicationTest < Scrape::TestCase
+  SUPPORT_FILES = File.expand_path File.join(File.dirname(__FILE__), '..', 'support')
+  test "#load_scrapefile should parse the specified file" do
+    filepath = File.join(SUPPORT_FILES, 'test1.scrape')
+    app = Scrape::Application.new(filepath)
+    assert app.load_scrapefile, "scrape file failed to load"
+    assert_equal ["http://example.com"], app.sites.keys
+  end
+  test "#load_scrapefile should return nil when already loaded" do
+    filepath = File.join(SUPPORT_FILES, 'test1.scrape')
+    app = Scrape::Application.new(filepath)
+    assert app.load_scrapefile, "scrape file failed to load"
+    refute app.load_scrapefile, "scrape file should not have loaded again"
+  end
+  test "#[] should return the site that matches the given url" do
+    site1 = Scrape::Site.new "http://example.com"
+    site2 = Scrape::Site.new "http://example.org"
+    app = Scrape::Application.new(".")
+    app.sites.update site1.to_s => site1, site2.to_s => site2
+    assert_equal site1, app["http://example.com"]
+  end
+  test "#[] should return the site that is relative to the given url" do
+    site1 = Scrape::Site.new "http://example.com"
+    site2 = Scrape::Site.new "http://example.org"
+    app = Scrape::Application.new(".")
+    app.sites.update site1.to_s => site1, site2.to_s => site2
+    assert_equal site1, app["http://example.com/test"]
+  end
+  test "#[] should return nil when no site matches the given url" do
+    site1 = Scrape::Site.new "http://example.com"
+    site2 = Scrape::Site.new "http://example.org"
+    app = Scrape::Application.new(".")
+    app.sites.update site1.to_s => site1, site2.to_s => site2
+    assert_nil app["http://example.net"]
+  end
+  test "#reset should enqueue the sites that have been defined" do
+    site1 = Scrape::Site.new "http://example.com"
+    site2 = Scrape::Site.new "http://example.org"
+    app = Scrape::Application.new(".")
+    app.sites.update site1.to_s => site1, site2.to_s => site2
+    app.reset
+    assert_equal ["http://example.com", "http://example.org"], app.queue
+  end
+  test "#run should load the specified file" do
+    filepath = File.join(SUPPORT_FILES, 'test1.scrape')
+    test_loader = MiniTest::Mock.new
+    test_loader.expect :load, nil, [filepath]
+    Scrape::Application.new(filepath, test_loader).run
+    assert test_loader.verify, "loader did not receive file"
+  end
+  test "#enqueue should add the given url to the queue" do
+    app = Scrape::Application.new(".")
+    app.enqueue "http://example.com"
+    assert_equal ["http://example.com"], app.queue
+  end
+  test "#enqueue should not add the given to the queue when it already is added" do
+    app = Scrape::Application.new(".")
+    3.times{ app.enqueue "http://example.com" }
+    assert_equal ["http://example.com"], app.queue
+  end
+end

data/test/unit/default_loader_test.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require "test_helper"
+class DefaultLoaderTest < Scrape::TestCase
+  SUPPORT_FILES = File.expand_path File.join(File.dirname(__FILE__), '..', 'support')
+  test "#load should return sites parsed from the specified file" do
+    loader = Scrape::DefaultLoader.new
+    sites = loader.load File.join(SUPPORT_FILES, "test1.scrape")
+    assert_equal ["http://example.com"], sites.keys
+    assert_instance_of Scrape::Site, sites.values[0]
+  end
+  test "#load should return an empty hash when no matches have been defined" do
+    loader = Scrape::DefaultLoader.new
+    sites = loader.load File.join(SUPPORT_FILES, "test2.scrape")
+    assert_equal Hash.new, sites
+  end
+  test "#load should raise an error when no site is defined" do
+    loader = Scrape::DefaultLoader.new
+    assert_raises ArgumentError do
+      loader.load File.join(SUPPORT_FILES, "test3.scrape")
+    end
+  end
+end

data/test/unit/match_test.rb ADDED Viewed

@@ -0,0 +1,54 @@
+require "test_helper"
+class MatchTest < Scrape::TestCase
+  test "#initialize should raise error when proc's arity isn't one" do
+    assert_raises ArgumentError do
+      Scrape::Match.new("test"){ "no arguments" }
+    end
+  end
+  test "#invoke should call the proc" do
+    ok = false
+    match = Scrape::Match.new("test"){|doc| ok = true }
+    match.invoke nil
+    assert ok, "Proc was not called"
+  end
+  test "#invoke should pass the document to the proc" do
+    doc = "yay"
+    ok = false
+    match = Scrape::Match.new("test"){|d| ok = (doc == d) }
+    match.invoke doc
+    assert ok, "Document was not passed into the proc"
+  end
+  test "#=~ should return true when contains string" do
+    match = Scrape::Match.new("bar"){|doc|}
+    assert match =~ "foobar", "Expected true"
+  end
+  test "#=~ should return false when doesn't contains string" do
+    match = Scrape::Match.new("bar"){|doc|}
+    refute match =~ "ponies", "Expected false"
+  end
+  test "#=~ should return true when matches regexp" do
+    match = Scrape::Match.new(/bar/){|doc|}
+    assert match =~ "foobar", "Expected true"
+  end
+  test "#=~ should return false when doesn't match regexp" do
+    match = Scrape::Match.new(/bar/){|doc|}
+    refute match =~ "ponies", "Expected false"
+  end
+  test "#=~ should return true when proc is truthy" do
+    match = Scrape::Match.new(lambda{|url| true }){|doc|}
+    assert match =~ "ponies", "Expected true"
+  end
+  test "#=~ should return false when proc is falsy" do
+    match = Scrape::Match.new(lambda{|url| false }){|doc|}
+    refute match =~ "ponies", "Expected false"
+  end
+end

data/test/unit/site_test.rb ADDED Viewed

@@ -0,0 +1,75 @@
+require "test_helper"
+class SiteTest < Scrape::TestCase
+  test "#add_match should create a Match object and be added to the collection" do
+    site = Scrape::Site.new "http://www.example.com"
+    match = site.add_match("/test") { |doc| }
+    assert_instance_of Scrape::Match, match
+  end
+  test "#parse should return absolute urls that match the site's url" do
+    stub_request(:get, "http://www.example.com/test").
+      with(:headers => {"User-Agent" => Scrape.user_agent}).
+      to_return(:status => 200, :body => <<-HTML)
+      <html>
+        <body>
+          <a href="http://www.example.com/link1.html">link 1</a>
+          <a href="http://example.com/link2.html">link 2</a>
+          <a href="http://example.org/link3.html">link 3</a>
+        </body>
+      </html>
+    HTML
+    site = Scrape::Site.new "http://www.example.com"
+    assert_equal [Scrape::URI.new("http://www.example.com/link1.html")], site.parse("/test")
+  end
+  test "#parse should return relative urls to the site" do
+    stub_request(:get, "http://www.example.com/test").
+      with(:headers => {"User-Agent" => Scrape.user_agent}).
+      to_return(:status => 200, :body => <<-HTML)
+      <html>
+        <body>
+          <a href="link1.html">link 1</a>
+        </body>
+      </html>
+    HTML
+    site = Scrape::Site.new "http://www.example.com"
+    assert_equal [Scrape::URI.new("http://www.example.com/link1.html")], site.parse("/test")
+  end
+  test "#parse should return no urls" do
+    stub_request(:get, "http://www.example.com/test").
+      with(:headers => {"User-Agent" => Scrape.user_agent}).
+      to_return(:status => 200, :body => <<-HTML)
+      <html>
+        <body>
+          <a href="/link1.html">link 1</a>
+        </body>
+      </html>
+    HTML
+    site = Scrape::Site.new "http://www.example.com/test"
+    assert_equal [], site.parse("/test")
+  end
+  test "#parse should invoke Match when hit" do
+    stub_request(:get, "http://www.example.com/test").
+      with(:headers => {"User-Agent" => Scrape.user_agent}).
+      to_return(:status => 200, :body => <<-HTML)
+      <html>
+        <body>
+          <a href="link1.html">link 1</a>
+        </body>
+      </html>
+    HTML
+    ok = false
+    site = Scrape::Site.new "http://www.example.com"
+    site.add_match(/test/){|doc| ok = true }
+    site.parse "/test"
+    assert ok, "Match was not invoked"
+  end
+end

data/test/unit/uri_test.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require "test_helper"
+class URITest < Scrape::TestCase
+  {fragment: "blah", host: "www.example.com", password: "secret", path: "/dot", query: "foo=bar", scheme: "http", user: "chuck", relative?: false, absolute?: true}.each do |method_name, value|
+    test "##{method_name} should return value" do
+      uri = Scrape::URI.new "http://chuck:secret@www.example.com/dot?foo=bar#blah"
+      assert_equal value, uri.send(method_name)
+    end
+  end
+  test "#open should return the contents at the url" do
+    stub_request(:get, "http://www.example.com/").with(headers: {"User-Agent" => Scrape.user_agent}).to_return(status: 200, body: "Howdie")
+    uri = Scrape::URI.new "http://www.example.com"
+    assert_equal "Howdie", uri.open
+  end
+  test "#+ should return a URI with the specified path" do
+    uri1 = Scrape::URI.new "http://www.example.com"
+    uri2 = uri1 + "/bar"
+    assert_equal "http://www.example.com/bar", uri2.to_s
+  end
+  test "#+ should return a URI overwriting with the specified path" do
+    uri1 = Scrape::URI.new "http://www.example.com/foo"
+    uri2 = uri1 + "/bar"
+    assert_equal "http://www.example.com/bar", uri2.to_s
+  end
+  test "#+ should return a URI with the specified path appended" do
+    uri1 = Scrape::URI.new "http://www.example.com/foo"
+    uri2 = uri1 + "bar"
+    assert_equal "http://www.example.com/foo/bar", uri2.to_s
+  end
+  test "#+ should return a URI from the absolute url" do
+    uri1 = Scrape::URI.new "http://www.example.com/foo"
+    uri2 = uri1 + "http://www.example.com/bar"
+    assert_equal "http://www.example.com/bar", uri2.to_s
+  end
+  test "#+ should return a URI appended from the absolute url" do
+    uri1 = Scrape::URI.new "http://www.example.com/foo"
+    uri2 = uri1 + "http://www.example.com/foo/bar"
+    assert_equal "http://www.example.com/foo/bar", uri2.to_s
+  end
+  test "#< should return true when specified url is greater" do
+    uri1 = Scrape::URI.new "http://www.example.com/foo"
+    assert uri1 < "http://www.example.com/foo/bar"
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,90 @@
+--- !ruby/object:Gem::Specification
+name: scrape
+version: !ruby/object:Gem::Version
+  version: '0.1'
+  prerelease:
+platform: ruby
+authors:
+- Marty Zalega
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2012-07-10 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 1.5.5
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 1.5.5
+description: An easy to use utility to scrape websites using a DSL similar to rake.
+email:
+- evilmarty@gmail.com
+executables:
+- scrape
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- Gemfile.lock
+- LICENSE
+- README.md
+- Rakefile
+- bin/scrape
+- examples/github.scrape
+- examples/google.scrape
+- lib/scrape.rb
+- lib/scrape/application.rb
+- lib/scrape/cli.rb
+- lib/scrape/default_loader.rb
+- lib/scrape/dsl.rb
+- lib/scrape/match.rb
+- lib/scrape/site.rb
+- lib/scrape/uri.rb
+- lib/scrape/version.rb
+- scrape.gemspec
+- test/support/test1.scrape
+- test/support/test2.scrape
+- test/support/test3.scrape
+- test/test_helper.rb
+- test/unit/application_test.rb
+- test/unit/default_loader_test.rb
+- test/unit/match_test.rb
+- test/unit/site_test.rb
+- test/unit/uri_test.rb
+homepage: http://github.com/evilmarty/scrape
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project: scrape
+rubygems_version: 1.8.23
+signing_key:
+specification_version: 3
+summary: A really simple web scraper
+test_files: []