scrapify 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/scrapify/base.rb CHANGED
@@ -25,7 +25,7 @@ module Scrapify
25
25
  define_finders
26
26
  end
27
27
 
28
- def attribute(name, options={})
28
+ def attribute(name, options={}, &block)
29
29
  add_attribute(name)
30
30
  parser = options[:xpath] ? :xpath : :css
31
31
  selector = options[parser]
@@ -34,12 +34,16 @@ module Scrapify
34
34
  meta_define "#{name}_values" do
35
35
  self.doc ||= parse_html
36
36
  self.doc.send(parser, selector).map do |element|
37
- content = element.content
38
- if matcher
39
- match_data = content.scan(matcher).map &:first
40
- options[:array] ? match_data : match_data.first
37
+ if block
38
+ yield element
41
39
  else
42
- content.strip
40
+ content = element.content
41
+ if matcher
42
+ match_data = content.scan(matcher).map &:first
43
+ options[:array] ? match_data : match_data.first
44
+ else
45
+ content.strip
46
+ end
43
47
  end
44
48
  end
45
49
  end
@@ -1,3 +1,3 @@
1
1
  module Scrapify
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
data/spec/pizza.rb CHANGED
@@ -6,6 +6,11 @@ class Pizza
6
6
  attribute :image_url, xpath: "//li//input//@value"
7
7
  attribute :price, css: ".price", regex: /([\d\.]+)/
8
8
  attribute :ingredients, css: ".ingredients", regex: /contains (\w+)/, array: true
9
+ attribute :ingredient_urls, css: '.references ol li' do |element|
10
+ element.children.map do |child|
11
+ child.attributes['href'].value if child.attributes['href']
12
+ end.compact
13
+ end
9
14
 
10
15
  key :name
11
- end
16
+ end
@@ -20,6 +20,7 @@ describe Scrapify do
20
20
  <li>contains tomato</li>
21
21
  <ol>
22
22
  </span>
23
+ <span class='references'><ol><li></li></ol></span
23
24
  </li>
24
25
  <li>
25
26
  <a>veg supreme</a><input value="veg.jpg">
@@ -30,18 +31,33 @@ describe Scrapify do
30
31
  <li>contains jalapeno</li>
31
32
  <ol>
32
33
  </span>
34
+ <span class='references'><ol><li></li></ol></span
33
35
  </li>
34
36
  <li>
35
37
  <a>pepperoni</a><input value="pepperoni.jpg">
36
38
  <span class='price'>(3.45)</span>
37
39
  <span class='ingredients'></span>
40
+ <span class='references'><ol><li></li></ol></span
41
+ </li>
42
+ <li>
43
+ <a>chicken golden delight</a><input value="golden.jpg">
44
+ <span class='price'>(4.56)</span>
45
+ <span class='ingredients'/>
46
+ <span class='references'>
47
+ <ol>
48
+ <li>
49
+ <div href='chicken.html'>chicken</div>
50
+ <div href='delight.html'>delight</div>
51
+ </li>
52
+ </ol>
53
+ </span>
38
54
  </li>
39
55
  </ul>
40
56
  HTML
41
57
  end
42
58
 
43
59
  it "should return attribute names" do
44
- ::Pizza.attribute_names.should == [:name, :image_url, :price, :ingredients]
60
+ ::Pizza.attribute_names.should == [:name, :image_url, :price, :ingredients, :ingredient_urls]
45
61
  end
46
62
 
47
63
  describe "html" do
@@ -50,19 +66,23 @@ describe Scrapify do
50
66
  end
51
67
 
52
68
  it "should parse html and fetch attributes using css" do
53
- ::Pizza.name_values.should == ['chicken supreme', 'veg supreme', 'pepperoni']
69
+ ::Pizza.name_values.should == ['chicken supreme', 'veg supreme', 'pepperoni', 'chicken golden delight']
54
70
  end
55
71
 
56
72
  it "should parse html and fetch attributes using xpath" do
57
- ::Pizza.image_url_values.should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg']
73
+ ::Pizza.image_url_values.should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg', 'golden.jpg']
58
74
  end
59
75
 
60
76
  it "should parse html and extract attributes using regex" do
61
- ::Pizza.price_values.should == ['1.23', '2.34', '3.45']
77
+ ::Pizza.price_values.should == ['1.23', '2.34', '3.45', '4.56']
62
78
  end
63
79
 
64
80
  it "should parse html and extract multiple attributes using regex" do
65
- ::Pizza.ingredients_values.should == [['corn','tomato'], ['mushroom','jalapeno'], []]
81
+ ::Pizza.ingredients_values.should == [['corn','tomato'], ['mushroom','jalapeno'], [], []]
82
+ end
83
+
84
+ it 'should accept block to yield attribute values' do
85
+ ::Pizza.ingredient_urls_values.should == [[], [], [], ['chicken.html', 'delight.html']]
66
86
  end
67
87
 
68
88
  it "should strip content" do
@@ -104,23 +124,23 @@ describe Scrapify do
104
124
  describe "last" do
105
125
  it "should fetch last matching element" do
106
126
  last_pizza = ::Pizza.last
107
- last_pizza.name.should == 'pepperoni'
108
- last_pizza.image_url.should == 'pepperoni.jpg'
127
+ last_pizza.name.should == 'chicken golden delight'
128
+ last_pizza.image_url.should == 'golden.jpg'
109
129
  end
110
130
  end
111
131
 
112
132
  describe "all" do
113
133
  it "should fetch all objects" do
114
134
  pizzas = ::Pizza.all
115
- pizzas.size.should == 3
116
- pizzas.map(&:name).should == ['chicken supreme', 'veg supreme', 'pepperoni']
117
- pizzas.map(&:image_url).should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg']
135
+ pizzas.size.should == 4
136
+ pizzas.map(&:name).should == ['chicken supreme', 'veg supreme', 'pepperoni', 'chicken golden delight']
137
+ pizzas.map(&:image_url).should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg', 'golden.jpg']
118
138
  end
119
139
  end
120
140
 
121
141
  describe "count" do
122
142
  it "should return number of matching elements" do
123
- ::Pizza.count.should == 3
143
+ ::Pizza.count.should == 4
124
144
  end
125
145
  end
126
146
 
@@ -131,7 +151,8 @@ describe Scrapify do
131
151
  name: "chicken supreme",
132
152
  image_url: "chicken.jpg",
133
153
  price: '1.23',
134
- ingredients: ['corn', 'tomato']
154
+ ingredients: ['corn', 'tomato'],
155
+ ingredient_urls: []
135
156
  }
136
157
  end
137
158
  end
@@ -143,16 +164,18 @@ describe Scrapify do
143
164
  name: "chicken supreme",
144
165
  image_url: "chicken.jpg",
145
166
  price: '1.23',
146
- ingredients: ['corn', 'tomato']
167
+ ingredients: ['corn', 'tomato'],
168
+ ingredient_urls: []
147
169
  }.to_json
148
170
  end
149
171
 
150
172
  it "should convert array to json" do
151
173
  pizzas = ::Pizza.all
152
174
  pizzas.to_json.should == [
153
- {name: "chicken supreme", image_url: "chicken.jpg", price: '1.23', ingredients: ['corn', 'tomato']},
154
- {name: "veg supreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno']},
155
- {name: "pepperoni", image_url: "pepperoni.jpg", price: '3.45', ingredients: []},
175
+ {name: "chicken supreme", image_url: "chicken.jpg", price: '1.23', ingredients: ['corn', 'tomato'], :ingredient_urls => []},
176
+ {name: "veg supreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno'], :ingredient_urls => []},
177
+ {name: "pepperoni", image_url: "pepperoni.jpg", price: '3.45', ingredients: [], :ingredient_urls => []},
178
+ {name: "chicken golden delight", image_url: "golden.jpg", price: '4.56', ingredients: [], :ingredient_urls => ['chicken.html', 'delight.html']},
156
179
  ].to_json
157
180
  end
158
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapify
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-14 00:00:00.000000000Z
12
+ date: 2012-06-19 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70352195064240 !ruby/object:Gem::Requirement
16
+ requirement: &70142157555980 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70352195064240
24
+ version_requirements: *70142157555980
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: mocha
27
- requirement: &70352195063700 !ruby/object:Gem::Requirement
27
+ requirement: &70142157554660 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70352195063700
35
+ version_requirements: *70142157554660
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: fakeweb
38
- requirement: &70352195062820 !ruby/object:Gem::Requirement
38
+ requirement: &70142157538940 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70352195062820
46
+ version_requirements: *70142157538940
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &70352195062120 !ruby/object:Gem::Requirement
49
+ requirement: &70142157538080 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70352195062120
57
+ version_requirements: *70142157538080
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: activesupport
60
- requirement: &70352195061500 !ruby/object:Gem::Requirement
60
+ requirement: &70142157537240 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70352195061500
68
+ version_requirements: *70142157537240
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: json
71
- requirement: &70352195060200 !ruby/object:Gem::Requirement
71
+ requirement: &70142157536340 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70352195060200
79
+ version_requirements: *70142157536340
80
80
  description: ScrApify scraps static html sites to RESTlike APIs
81
81
  email:
82
82
  - sathish316@gmail.com