RubyGems - wombat - Versions diffs - 1.0.0 → 2.0.0 - Mend

wombat 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

data/README.md +13 -30
data/Rakefile +1 -1
data/VERSION +1 -1
data/fixtures/vcr_cassettes/follow_links.yml +2143 -0
data/lib/wombat/crawler.rb +7 -17
data/lib/wombat/dsl/follower.rb +19 -0
data/lib/wombat/dsl/iterator.rb +19 -0
data/lib/wombat/dsl/metadata.rb +27 -0
data/lib/wombat/dsl/property.rb +27 -0
data/lib/wombat/dsl/property_group.rb +48 -0
data/lib/wombat/processing/node_selector.rb +12 -0
data/lib/wombat/processing/parser.rb +48 -0
data/lib/wombat/property/locators/base.rb +33 -0
data/lib/wombat/property/locators/factory.rb +39 -0
data/lib/wombat/property/locators/follow.rb +25 -0
data/lib/wombat/property/locators/html.rb +14 -0
data/lib/wombat/property/locators/iterator.rb +23 -0
data/lib/wombat/property/locators/list.rb +17 -0
data/lib/wombat/property/locators/property_group.rb +20 -0
data/lib/wombat/property/locators/text.rb +22 -0
data/lib/wombat.rb +8 -4
data/spec/crawler_spec.rb +38 -48
data/spec/dsl/property_spec.rb +12 -0
data/spec/helpers/sample_crawler.rb +2 -15
data/spec/integration/integration_spec.rb +61 -33
data/spec/processing/parser_spec.rb +32 -0
data/spec/property/locators/factory_spec.rb +18 -0
data/spec/property/locators/follow_spec.rb +4 -0
data/spec/property/locators/html_spec.rb +15 -0
data/spec/property/locators/iterator_spec.rb +4 -0
data/spec/property/locators/list_spec.rb +13 -0
data/spec/property/locators/text_spec.rb +49 -0
data/spec/sample_crawler_spec.rb +7 -11
data/spec/wombat_spec.rb +13 -1
data/wombat.gemspec +27 -16
metadata +27 -16
data/lib/wombat/iterator.rb +0 -38
data/lib/wombat/metadata.rb +0 -24
data/lib/wombat/node_selector.rb +0 -10
data/lib/wombat/parser.rb +0 -59
data/lib/wombat/property.rb +0 -21
data/lib/wombat/property_container.rb +0 -70
data/lib/wombat/property_locator.rb +0 -20
data/spec/iterator_spec.rb +0 -52
data/spec/metadata_spec.rb +0 -20
data/spec/parser_spec.rb +0 -125
data/spec/property_container_spec.rb +0 -62
data/spec/property_locator_spec.rb +0 -75
data/spec/property_spec.rb +0 -16

data/spec/dsl/property_spec.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require 'spec_helper'
+describe Wombat::DSL::Property do
+  it 'should store property data' do
+    property = Wombat::DSL::Property.new("title", *["/some/selector", :html]) { false }
+    property.wombat_property_name.should == "title"
+    property.selector.should == "/some/selector"
+    property.format.should == :html
+    property.callback.should == lambda { false }
+  end
+end

data/spec/helpers/sample_crawler.rb CHANGED Viewed

@@ -5,9 +5,9 @@ class SampleCrawler
   include Wombat::Crawler
   base_url "http://www.obaoba.com.br"
-  list_page "/porto-alegre/agenda"
+  path "/porto-alegre/agenda"
-  for_each "css=div.title-agenda" do
+  event_group "css=div.title-agenda", :iterator do
     event do |e|
       e.title("xpath=.") { |t| t.split(" | ")[1].strip }
       e.date "xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a" do |d|
@@ -19,18 +19,5 @@ class SampleCrawler
     venue do |v|
       v.name("xpath=.") { |n| name.split(" | ")[2].strip }
     end
-    # follow_links "xpath=.//a[1]/@href" do
-    #   event { |e| e.description "css=#main-node-content", :html }
-    #   venue do |v|
-    #     v.phone "css=span.tel .value"
-    #     v.image "xpath=//div[@id='article-image']/div/img/@src"
-    #   end
-    #   location do |l|
-    #     l.city "css=span.locality"
-    #     l.street("css=span.street-address") { |s| s.gsub(/\n/, '').gsub(/  /, '') }
-    #   end
-    # end
    end
 end

data/spec/integration/integration_spec.rb CHANGED Viewed

@@ -8,17 +8,15 @@ describe 'basic crawler setup' do
       crawler.send(:include, Wombat::Crawler)
       crawler.base_url "http://www.terra.com.br"
-      crawler.list_page '/portal'
+      crawler.path '/portal'
       crawler.search "css=.btn-search"
-      crawler.social do |s|
-        s.twitter "css=.ctn-bar li.last"
+      crawler.social do
+        twitter "css=.ctn-bar li.last"
       end
-      crawler.for_each "css=.ctn-links" do
+      crawler.links "css=.ctn-links", :iterator do
         menu "css=a"
       end
       crawler.subheader "css=h2.ttl-dynamic" do |h|
         h.gsub("London", "Londres")
       end
@@ -28,7 +26,7 @@ describe 'basic crawler setup' do
       results = crawler_instance.crawl
       results["search"].should == "Buscar"
-      results["iterator0"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
+      results["links"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
       results["subheader"].should == "Londres 2012"
       results["social"]["twitter"].should == "Verão"
     end
@@ -39,9 +37,9 @@ describe 'basic crawler setup' do
     crawler.send(:include, Wombat::Crawler)
     crawler.base_url "http://www.terra.com.br"
-    crawler.list_page '/portal'
+    crawler.path '/portal'
-    crawler.for_each "css=.ctn-links" do
+    crawler.links "css=.ctn-links", :iterator do
       menu "css=a"
     end
@@ -53,13 +51,13 @@ describe 'basic crawler setup' do
       results = crawler_instance.crawl
     end
-    results["iterator0"].should == result_hash
+    results["links"].should == result_hash
     VCR.use_cassette('basic_crawler_page') do
       results = crawler_instance.crawl
     end
-    results["iterator0"].should == result_hash
+    results["links"].should == result_hash
   end
   it 'should crawl page through block to class instance crawl method' do
@@ -69,15 +67,15 @@ describe 'basic crawler setup' do
       crawler_instance = crawler.new
       results = crawler_instance.crawl do
         base_url "http://www.terra.com.br"
-        list_page '/portal'
+        path '/portal'
         search "css=.btn-search"
-        social do |s|
-          s.twitter "css=.ctn-bar li.last"
+        social do
+          twitter "css=.ctn-bar li.last"
         end
-        for_each "css=.ctn-links" do
+        links "css=.ctn-links", :iterator do
           menu "css=a"
         end
@@ -87,7 +85,7 @@ describe 'basic crawler setup' do
       end
       results["search"].should == "Buscar"
-      results["iterator0"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
+      results["links"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
       results["subheader"].should == "Londres 2012"
       results["social"]["twitter"].should == "Verão"
     end
@@ -97,15 +95,15 @@ describe 'basic crawler setup' do
     VCR.use_cassette('basic_crawler_page') do
       results = Wombat.crawl do
         base_url "http://www.terra.com.br"
-        list_page '/portal'
+        path '/portal'
         search "css=.btn-search"
-        social do |s|
-          s.twitter "css=.ctn-bar li.last"
+        social do
+          twitter "css=.ctn-bar li.last"
         end
-        for_each "css=.ctn-links" do
+        links "css=.ctn-links", :iterator do
           menu "css=a"
         end
@@ -115,7 +113,7 @@ describe 'basic crawler setup' do
       end
       results["search"].should == "Buscar"
-      results["iterator0"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
+      results["links"].should == [{"menu"=>"Agenda"}, {"menu"=>"Brasileiro"}, {"menu"=>"Brasil"}, {"menu"=>"Bolsas"}, {"menu"=>"Cinema"}, {"menu"=>"Galerias de Fotos"}, {"menu"=>"Beleza"}, {"menu"=>"Esportes"}, {"menu"=>"Assine o RSS"}]
       results["subheader"].should == "Londres 2012"
       results["social"]["twitter"].should == "Verão"
     end
@@ -127,24 +125,24 @@ describe 'basic crawler setup' do
       crawler.send(:include, Wombat::Crawler)
       crawler.base_url "https://www.github.com"
-      crawler.list_page "/explore"
+      crawler.path "/explore"
-      crawler.for_each "css=ol.ranked-repositories li" do
-        project do |p|
-          p.repo 'css=h3'
-          p.description('css=p.description') { |d| d.gsub(/for/, '') }
+      crawler.repos "css=ol.ranked-repositories>li", :iterator do
+        project do
+          repo 'css=h3'
+          description('css=p.description') { |d| d ? d.gsub(/for/, '') : nil }
         end
       end
-      crawler_instance = crawler.new
-      results = crawler_instance.crawl
+      results = crawler.new.crawl
-      results.should == { "iterator0" => [
+      results.should == { "repos" => [
         { "project" => { "repo" => "jairajs89 / Touchy.js", "description" => "A simple light-weight JavaScript library  dealing with touch events" } },
         { "project" => { "repo" => "mcavage / node-restify", "description" => "node.js REST framework specifically meant  web service APIs" } },
         { "project" => { "repo" => "notlion / streetview-stereographic", "description" => "Shader Toy + Google Map + Panoramic Explorer" } },
         { "project" => { "repo" => "twitter / bootstrap", "description" => "HTML, CSS, and JS toolkit from Twitter" } },
-        { "project" => { "repo" => "stolksdorf / Parallaxjs", "description" => "a Library  Javascript that allows easy page parallaxing" } }
+        { "project" => { "repo" => "stolksdorf / Parallaxjs", "description" => "a Library  Javascript that allows easy page parallaxing" } },
+        { "project" => { "repo" => nil, "description" => nil}}
       ]}
     end
   end
@@ -156,18 +154,18 @@ describe 'basic crawler setup' do
       crawler.document_format :xml
       crawler.base_url "http://ws.audioscrobbler.com"
-      crawler.list_page "/2.0/?method=geo.getevents&location=#{URI.escape('San Francisco')}&api_key=060decb474b73437d5bbec37f527ae7b"
+      crawler.path "/2.0/?method=geo.getevents&location=#{URI.escape('San Francisco')}&api_key=060decb474b73437d5bbec37f527ae7b"
       crawler.artist "xpath=//title", :list
-      crawler.for_each 'xpath=//event' do
+      crawler.location 'xpath=//event', :iterator do
         latitude "xpath=./venue/location/geo:point/geo:lat", :text, { 'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#' }
         longitude "xpath=./venue/location/geo:point/geo:long", :text, { 'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#' }
       end
       crawler_instance = crawler.new
       results = crawler_instance.crawl
-      iterator = results['iterator0']
+      iterator = results['location']
       iterator.should == [
         {"latitude"=>"37.807775", "longitude"=>"-122.272736"},
@@ -185,4 +183,34 @@ describe 'basic crawler setup' do
       results["artist"].should =~ ["Davka", "Digitalism (DJ Set)", "Gary Clark Jr.", "Lenny Kravitz", "Little Muddy", "Michael Schenker Group", "The Asteroids Galaxy Tour", "When Indie Attacks", "When Indie Attacks", "YOB"]
     end
   end
+  it 'should follow links' do
+    VCR.use_cassette('follow_links') do
+      crawler = Class.new
+      crawler.send(:include, Wombat::Crawler)
+      crawler.document_format :html
+      crawler.base_url "https://www.github.com"
+      crawler.path "/"
+      crawler.github 'xpath=//ul[@class="footer_nav"][1]//a', :follow do
+        heading 'css=h1'
+      end
+      crawler_instance = crawler.new
+      results = crawler_instance.crawl
+      results.should == {
+        "github" => [
+          { "heading"=>"GitHub helps people build software together."},
+          { "heading"=>nil},
+          { "heading"=>"Features"},
+          { "heading"=>"Contact GitHub"},
+          { "heading"=>"GitHub Training — Git Training from the Experts"},
+          { "heading"=>"GitHub on Your Servers"},
+          { "heading"=>"Loading..."}
+        ]
+      }
+    end
+  end
 end

data/spec/processing/parser_spec.rb ADDED Viewed

@@ -0,0 +1,32 @@
+require 'spec_helper'
+describe Wombat::Processing::Parser do
+  before(:each) do
+    crawler = Class.new
+    crawler.send(:include, Wombat::Processing::Parser)
+    @parser = crawler.new
+    @metadata = Wombat::DSL::Metadata.new
+  end
+  it 'should request page document with correct url' do
+    @metadata.base_url "http://www.google.com"
+    @metadata.path "/search"
+    fake_document = double :document
+    fake_parser = double :parser
+    fake_document.should_receive(:parser).and_return(fake_parser)
+    @parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document
+    @parser.parse @metadata
+  end
+  it 'should correctly parse xml documents' do
+    fake_document = double :xml
+    fake_parser = double :parser
+    @metadata.document_format :xml
+    @parser.mechanize.should_not_receive(:get)
+    RestClient.should_receive(:get).and_return fake_document
+    Nokogiri.should_receive(:XML).with(fake_document).and_return fake_parser
+    @parser.parse @metadata
+  end
+end

data/spec/property/locators/factory_spec.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require 'spec_helper'
+describe Wombat::Property::Locators::Factory do
+	it 'should instantiate correct locator according to property type' do
+		Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :text)).should be_a(Wombat::Property::Locators::Text)
+		Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :html)).should be_a(Wombat::Property::Locators::Html)
+		Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :list)).should be_a(Wombat::Property::Locators::List)
+		Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :follow)).should be_a(Wombat::Property::Locators::Follow)
+		Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :iterator)).should be_a(Wombat::Property::Locators::Iterator)
+		Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :container)).should be_a(Wombat::Property::Locators::PropertyGroup)
+	end
+	it 'should raise correct exception if provided property is of unknown type' do
+		lambda {
+			Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :weird))
+		}.should raise_error(Wombat::Property::Locators::UnknownTypeException, "Unknown property format weird.")
+	end
+end

data/spec/property/locators/follow_spec.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'spec_helper'
+describe Wombat::Property::Locators::Follow do
+end

data/spec/property/locators/html_spec.rb ADDED Viewed

@@ -0,0 +1,15 @@
+require 'spec_helper'
+describe Wombat::Property::Locators::Html do
+	it 'should locate html property' do
+		fake_elem = double :element
+		context   = double :context
+		fake_elem.stub inner_html: "Something cool "
+		context.stub(:xpath).with("/abc", nil).and_return [fake_elem]
+		property = Wombat::DSL::Property.new('data1', 'xpath=/abc', :html)
+		locator = Wombat::Property::Locators::Html.new(property)
+		locator.locate(context).should == { "data1" => "Something cool" }
+	end
+end

data/spec/property/locators/iterator_spec.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'spec_helper'
+describe Wombat::Property::Locators::Iterator do
+end

data/spec/property/locators/list_spec.rb ADDED Viewed

@@ -0,0 +1,13 @@
+require 'spec_helper'
+describe Wombat::Property::Locators::List do
+	it 'should locate a list of nodes' do
+		context   = double :context
+		context.stub(:css).with(".selector").and_return %w(1 2 3 4 5)
+		property = Wombat::DSL::Property.new('data1', 'css=.selector', :list)
+		locator = Wombat::Property::Locators::List.new(property)
+		locator.locate(context).should == { "data1" => %w(1 2 3 4 5) }
+	end
+end

data/spec/property/locators/text_spec.rb ADDED Viewed

@@ -0,0 +1,49 @@
+require 'spec_helper'
+describe Wombat::Property::Locators::Text do
+	it 'should locate text property with xpath selector and namespaces' do
+		fake_elem = double :element
+		context   = double :context
+		fake_elem.stub inner_text: "Something cool "
+		context.stub(:xpath).with("/abc", 'boom').and_return [fake_elem]
+		property = Wombat::DSL::Property.new('data1', 'xpath=/abc', :text, 'boom')
+		locator = Wombat::Property::Locators::Text.new(property)
+		locator.locate(context).should == { "data1" => "Something cool" }
+	end
+	it 'should locate text property with css selector' do
+		fake_elem = double :element
+		context   = double :context
+		fake_elem.stub inner_text: "My name"
+		context.stub(:css).with("/def").and_return [fake_elem]
+		property = Wombat::DSL::Property.new('data1', 'css=/def', :text)
+		locator = Wombat::Property::Locators::Text.new(property)
+		locator.locate(context).should == { "data1" => "My name" }
+	end
+	it 'should return plain symbols as strings' do
+		fake_elem = double :element
+		context   = double :context
+		property = Wombat::DSL::Property.new('data_2', :hardcoded_value, :text)
+		locator = Wombat::Property::Locators::Text.new(property)
+		locator.locate(context).should == { "data_2" => "hardcoded_value" }
+	end
+	it 'should invoke property callback' do
+		fake_elem = double :element
+		context   = double :context
+		fake_elem.stub inner_text: "My name"
+		context.stub(:css).with("/def").and_return [fake_elem]
+		property = Wombat::DSL::Property.new('data1', 'css=/def', :text) { |s| s.gsub(/name/, 'ass') }
+		locator = Wombat::Property::Locators::Text.new(property)
+		locator.locate(context).should == { "data1" => "My ass" }
+	end
+end

data/spec/sample_crawler_spec.rb CHANGED Viewed

@@ -8,19 +8,15 @@ describe SampleCrawler do
   it 'should correctly assign event metadata' do
     @sample_crawler.should_receive(:parse) do |args|
-      # args["event"]["description"].selector.should == "css=#main-node-content"
-      # args["venue"]["address"].selector.should == "324 Dom Pedro II Street"
-      it = args.iterators.first
-      it.selector.should == "css=div.title-agenda"
-      it["event"]["title"].selector.should == "xpath=."
-      it["event"]["date"].selector.should == "xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a"
-      it["event"]["type"].selector.should == "xpath=.type"
-      it["venue"]["name"].selector.should == "xpath=."
+      args['event_group'].wombat_property_selector.should == "css=div.title-agenda"
+      it = args['event_group']
+      it["event"]["title"].wombat_property_selector.should == "xpath=."
+      it["event"]["date"].wombat_property_selector.should == "xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a"
+      it["event"]["type"].wombat_property_selector.should == "xpath=.type"
+      it["venue"]["name"].wombat_property_selector.should == "xpath=."
       args[:base_url].should == 'http://www.obaoba.com.br'
-      args[:list_page].should == '/porto-alegre/agenda'
+      args[:path].should == '/porto-alegre/agenda'
     end
     @sample_crawler.crawl

data/spec/wombat_spec.rb CHANGED Viewed

@@ -5,12 +5,24 @@ describe Wombat do
 		Wombat.should respond_to(:crawl)
 	end
+	it 'should provide syntactic sugar method Wombat.scrape' do
+		Wombat.should respond_to(:scrape)
+	end
+	it 'should redirect .scrape to .crawl' do
+		fake_class = double :fake
+		fake_class.stub :include
+		fake_class.should_receive(:new).and_return(stub(crawl: nil))
+		Class.stub :new => fake_class
+		Wombat.scrape
+	end
 	it 'should accept regular properties (non-selectors)' do
 		VCR.use_cassette('broken_selector') do
 			lambda {
 				Wombat.crawl do
 					base_url "http://www.github.com"
-		  		list_page "/"
+		  		path "/"
 		  		source :obaoba
 			    description 'Oba Oba'

data/wombat.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = "wombat"
-  s.version = "1.0.0"
+  s.version = "2.0.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Felipe Lima"]
-  s.date = "2012-06-25"
+  s.date = "2012-07-31"
   s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
   s.email = "felipe.lima@gmail.com"
   s.extra_rdoc_files = [
@@ -30,26 +30,37 @@ Gem::Specification.new do |s|
     "fixtures/vcr_cassettes/basic_crawler_page.yml",
     "fixtures/vcr_cassettes/broken_selector.yml",
     "fixtures/vcr_cassettes/error_page.yml",
+    "fixtures/vcr_cassettes/follow_links.yml",
     "fixtures/vcr_cassettes/for_each_page.yml",
     "fixtures/vcr_cassettes/xml_with_namespace.yml",
     "lib/wombat.rb",
     "lib/wombat/crawler.rb",
-    "lib/wombat/iterator.rb",
-    "lib/wombat/metadata.rb",
-    "lib/wombat/node_selector.rb",
-    "lib/wombat/parser.rb",
-    "lib/wombat/property.rb",
-    "lib/wombat/property_container.rb",
-    "lib/wombat/property_locator.rb",
+    "lib/wombat/dsl/follower.rb",
+    "lib/wombat/dsl/iterator.rb",
+    "lib/wombat/dsl/metadata.rb",
+    "lib/wombat/dsl/property.rb",
+    "lib/wombat/dsl/property_group.rb",
+    "lib/wombat/processing/node_selector.rb",
+    "lib/wombat/processing/parser.rb",
+    "lib/wombat/property/locators/base.rb",
+    "lib/wombat/property/locators/factory.rb",
+    "lib/wombat/property/locators/follow.rb",
+    "lib/wombat/property/locators/html.rb",
+    "lib/wombat/property/locators/iterator.rb",
+    "lib/wombat/property/locators/list.rb",
+    "lib/wombat/property/locators/property_group.rb",
+    "lib/wombat/property/locators/text.rb",
     "spec/crawler_spec.rb",
+    "spec/dsl/property_spec.rb",
     "spec/helpers/sample_crawler.rb",
     "spec/integration/integration_spec.rb",
-    "spec/iterator_spec.rb",
-    "spec/metadata_spec.rb",
-    "spec/parser_spec.rb",
-    "spec/property_container_spec.rb",
-    "spec/property_locator_spec.rb",
-    "spec/property_spec.rb",
+    "spec/processing/parser_spec.rb",
+    "spec/property/locators/factory_spec.rb",
+    "spec/property/locators/follow_spec.rb",
+    "spec/property/locators/html_spec.rb",
+    "spec/property/locators/iterator_spec.rb",
+    "spec/property/locators/list_spec.rb",
+    "spec/property/locators/text_spec.rb",
     "spec/sample_crawler_spec.rb",
     "spec/spec_helper.rb",
     "spec/wombat_spec.rb",
@@ -60,7 +71,7 @@ Gem::Specification.new do |s|
   s.require_paths = ["lib"]
   s.required_ruby_version = Gem::Requirement.new(">= 1.9")
   s.rubygems_version = "1.8.24"
-  s.summary = "Ruby DSL to crawl web pages"
+  s.summary = "Ruby DSL to scrape web pages"
   if s.respond_to? :specification_version then
     s.specification_version = 3

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: wombat
 version: !ruby/object:Gem::Version
-  version: 1.0.0
+  version: 2.0.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-06-25 00:00:00.000000000 Z
+date: 2012-07-31 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -192,26 +192,37 @@ files:
 - fixtures/vcr_cassettes/basic_crawler_page.yml
 - fixtures/vcr_cassettes/broken_selector.yml
 - fixtures/vcr_cassettes/error_page.yml
+- fixtures/vcr_cassettes/follow_links.yml
 - fixtures/vcr_cassettes/for_each_page.yml
 - fixtures/vcr_cassettes/xml_with_namespace.yml
 - lib/wombat.rb
 - lib/wombat/crawler.rb
-- lib/wombat/iterator.rb
-- lib/wombat/metadata.rb
-- lib/wombat/node_selector.rb
-- lib/wombat/parser.rb
-- lib/wombat/property.rb
-- lib/wombat/property_container.rb
-- lib/wombat/property_locator.rb
+- lib/wombat/dsl/follower.rb
+- lib/wombat/dsl/iterator.rb
+- lib/wombat/dsl/metadata.rb
+- lib/wombat/dsl/property.rb
+- lib/wombat/dsl/property_group.rb
+- lib/wombat/processing/node_selector.rb
+- lib/wombat/processing/parser.rb
+- lib/wombat/property/locators/base.rb
+- lib/wombat/property/locators/factory.rb
+- lib/wombat/property/locators/follow.rb
+- lib/wombat/property/locators/html.rb
+- lib/wombat/property/locators/iterator.rb
+- lib/wombat/property/locators/list.rb
+- lib/wombat/property/locators/property_group.rb
+- lib/wombat/property/locators/text.rb
 - spec/crawler_spec.rb
+- spec/dsl/property_spec.rb
 - spec/helpers/sample_crawler.rb
 - spec/integration/integration_spec.rb
-- spec/iterator_spec.rb
-- spec/metadata_spec.rb
-- spec/parser_spec.rb
-- spec/property_container_spec.rb
-- spec/property_locator_spec.rb
-- spec/property_spec.rb
+- spec/processing/parser_spec.rb
+- spec/property/locators/factory_spec.rb
+- spec/property/locators/follow_spec.rb
+- spec/property/locators/html_spec.rb
+- spec/property/locators/iterator_spec.rb
+- spec/property/locators/list_spec.rb
+- spec/property/locators/text_spec.rb
 - spec/sample_crawler_spec.rb
 - spec/spec_helper.rb
 - spec/wombat_spec.rb
@@ -240,5 +251,5 @@ rubyforge_project:
 rubygems_version: 1.8.24
 signing_key:
 specification_version: 3
-summary: Ruby DSL to crawl web pages
+summary: Ruby DSL to scrape web pages
 test_files: []

data/lib/wombat/iterator.rb DELETED Viewed

@@ -1,38 +0,0 @@
-module Wombat
-  class Iterator < PropertyContainer
-    attr_accessor :selector
-    def initialize(selector)
-      @selector = selector
-      super()
-    end
-    def parse
-    	raise ArgumentError.new('Must provide a block to locate property values') unless block_given?
-      all_properties.each do |p|
-        p.result ||= []
-        result = yield p
-        if result
-          result = p.callback ? p.callback.call(result) : result
-          p.result << result
-        end
-      end
-    end
-    def reset
-      all_properties.each { |p| p.reset }
-    end
-    def flatten(depth = nil)
-    	# determine the iterator length by the biggest property array that we have
-    	length = all_properties.map(&:result).sort { |a| a.length }.last.size
-    	Array.new.tap do |a|
-		  	length.times do |i|
-		  		a << super(i)
-		  	end
-		  end
-	  end
-  end
-end