RubyGems - scrapify - Versions diffs - 0.0.5 → 0.0.6 - Mend

scrapify 0.0.5 → 0.0.6

Files changed (5) hide show

data/lib/scrapify/base.rb CHANGED Viewed

@@ -25,7 +25,7 @@ module Scrapify
         define_finders
       end
-      def attribute(name, options={})
+      def attribute(name, options={}, &block)
         add_attribute(name)
         parser = options[:xpath] ? :xpath : :css
         selector = options[parser]
@@ -34,12 +34,16 @@ module Scrapify
         meta_define "#{name}_values" do
           self.doc ||= parse_html
           self.doc.send(parser, selector).map do |element|
-            content = element.content
-            if matcher
-              match_data = content.scan(matcher).map &:first
-              options[:array] ? match_data : match_data.first
+            if block
+              yield element
             else
-              content.strip
+              content = element.content
+              if matcher
+                match_data = content.scan(matcher).map &:first
+                options[:array] ? match_data : match_data.first
+              else
+                content.strip
+              end
             end
           end
         end

data/lib/scrapify/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Scrapify
-  VERSION = "0.0.5"
+  VERSION = "0.0.6"
 end

data/spec/pizza.rb CHANGED Viewed

@@ -6,6 +6,11 @@ class Pizza
   attribute :image_url, xpath: "//li//input//@value"
   attribute :price, css: ".price", regex: /([\d\.]+)/
   attribute :ingredients, css: ".ingredients", regex: /contains (\w+)/, array: true
+  attribute :ingredient_urls, css: '.references ol li' do |element|
+    element.children.map do |child|
+        child.attributes['href'].value if child.attributes['href']
+    end.compact
+  end
   key :name
-end
+end

data/spec/scrapify_spec.rb CHANGED Viewed

@@ -20,6 +20,7 @@ describe Scrapify do
               <li>contains tomato</li>
             <ol>
           </span>
+          <span class='references'><ol><li></li></ol></span
         </li>
         <li>
           <a>veg supreme</a><input value="veg.jpg">
@@ -30,18 +31,33 @@ describe Scrapify do
               <li>contains jalapeno</li>
             <ol>
           </span>
+          <span class='references'><ol><li></li></ol></span
         </li>
         <li>
           <a>pepperoni</a><input value="pepperoni.jpg">
           <span class='price'>(3.45)</span>
           <span class='ingredients'></span>
+          <span class='references'><ol><li></li></ol></span
+        </li>
+        <li>
+          <a>chicken golden delight</a><input value="golden.jpg">
+          <span class='price'>(4.56)</span>
+          <span class='ingredients'/>
+          <span class='references'>
+            <ol>
+              <li>
+                <div href='chicken.html'>chicken</div>
+                <div href='delight.html'>delight</div>
+              </li>
+            </ol>
+          </span>
         </li>
       </ul>
     HTML
   end
   it "should return attribute names" do
-    ::Pizza.attribute_names.should == [:name, :image_url, :price, :ingredients]
+    ::Pizza.attribute_names.should == [:name, :image_url, :price, :ingredients, :ingredient_urls]
   end
   describe "html" do
@@ -50,19 +66,23 @@ describe Scrapify do
     end
     it "should parse html and fetch attributes using css" do
-      ::Pizza.name_values.should == ['chicken supreme', 'veg supreme', 'pepperoni']
+      ::Pizza.name_values.should == ['chicken supreme', 'veg supreme', 'pepperoni', 'chicken golden delight']
     end
     it "should parse html and fetch attributes using xpath" do
-      ::Pizza.image_url_values.should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg']
+      ::Pizza.image_url_values.should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg', 'golden.jpg']
     end
     it "should parse html and extract attributes using regex" do
-      ::Pizza.price_values.should == ['1.23', '2.34', '3.45']
+      ::Pizza.price_values.should == ['1.23', '2.34', '3.45', '4.56']
     end
     it "should parse html and extract multiple attributes using regex" do
-      ::Pizza.ingredients_values.should == [['corn','tomato'], ['mushroom','jalapeno'], []]
+      ::Pizza.ingredients_values.should == [['corn','tomato'], ['mushroom','jalapeno'], [], []]
+    end
+    it 'should accept block to yield attribute values' do
+      ::Pizza.ingredient_urls_values.should == [[], [], [], ['chicken.html', 'delight.html']]
     end
     it "should strip content" do
@@ -104,23 +124,23 @@ describe Scrapify do
   describe "last" do
     it "should fetch last matching element" do
       last_pizza = ::Pizza.last
-      last_pizza.name.should == 'pepperoni'
-      last_pizza.image_url.should == 'pepperoni.jpg'
+      last_pizza.name.should == 'chicken golden delight'
+      last_pizza.image_url.should == 'golden.jpg'
     end
   end
   describe "all" do
     it "should fetch all objects" do
       pizzas = ::Pizza.all
-      pizzas.size.should == 3
-      pizzas.map(&:name).should == ['chicken supreme', 'veg supreme', 'pepperoni']
-      pizzas.map(&:image_url).should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg']
+      pizzas.size.should == 4
+      pizzas.map(&:name).should == ['chicken supreme', 'veg supreme', 'pepperoni', 'chicken golden delight']
+      pizzas.map(&:image_url).should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg', 'golden.jpg']
     end
   end
   describe "count" do
     it "should return number of matching elements" do
-      ::Pizza.count.should == 3
+      ::Pizza.count.should == 4
     end
   end
@@ -131,7 +151,8 @@ describe Scrapify do
         name: "chicken supreme",
         image_url: "chicken.jpg",
         price: '1.23',
-        ingredients: ['corn', 'tomato']
+        ingredients: ['corn', 'tomato'],
+        ingredient_urls: []
       }
     end
   end
@@ -143,16 +164,18 @@ describe Scrapify do
         name: "chicken supreme",
         image_url: "chicken.jpg",
         price: '1.23',
-        ingredients: ['corn', 'tomato']
+        ingredients: ['corn', 'tomato'],
+        ingredient_urls: []
       }.to_json
     end
     it "should convert array to json" do
       pizzas = ::Pizza.all
       pizzas.to_json.should == [
-        {name: "chicken supreme", image_url: "chicken.jpg", price: '1.23', ingredients: ['corn', 'tomato']},
-        {name: "veg supreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno']},
-        {name: "pepperoni", image_url: "pepperoni.jpg", price: '3.45', ingredients: []},
+        {name: "chicken supreme", image_url: "chicken.jpg", price: '1.23', ingredients: ['corn', 'tomato'], :ingredient_urls => []},
+        {name: "veg supreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno'], :ingredient_urls => []},
+        {name: "pepperoni", image_url: "pepperoni.jpg", price: '3.45', ingredients: [], :ingredient_urls => []},
+        {name: "chicken golden delight", image_url: "golden.jpg", price: '4.56', ingredients: [], :ingredient_urls => ['chicken.html', 'delight.html']},
       ].to_json
     end
   end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: scrapify
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.6
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-06-14 00:00:00.000000000Z
+date: 2012-06-19 00:00:00.000000000Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
-  requirement: &70352195064240 !ruby/object:Gem::Requirement
+  requirement: &70142157555980 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *70352195064240
+  version_requirements: *70142157555980
 - !ruby/object:Gem::Dependency
   name: mocha
-  requirement: &70352195063700 !ruby/object:Gem::Requirement
+  requirement: &70142157554660 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *70352195063700
+  version_requirements: *70142157554660
 - !ruby/object:Gem::Dependency
   name: fakeweb
-  requirement: &70352195062820 !ruby/object:Gem::Requirement
+  requirement: &70142157538940 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *70352195062820
+  version_requirements: *70142157538940
 - !ruby/object:Gem::Dependency
   name: nokogiri
-  requirement: &70352195062120 !ruby/object:Gem::Requirement
+  requirement: &70142157538080 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *70352195062120
+  version_requirements: *70142157538080
 - !ruby/object:Gem::Dependency
   name: activesupport
-  requirement: &70352195061500 !ruby/object:Gem::Requirement
+  requirement: &70142157537240 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *70352195061500
+  version_requirements: *70142157537240
 - !ruby/object:Gem::Dependency
   name: json
-  requirement: &70352195060200 !ruby/object:Gem::Requirement
+  requirement: &70142157536340 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
         version: '0'
   type: :runtime
   prerelease: false
-  version_requirements: *70352195060200
+  version_requirements: *70142157536340
 description: ScrApify scraps static html sites to RESTlike APIs
 email:
 - sathish316@gmail.com