scrapify 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/scrapify/base.rb +12 -1
- data/lib/scrapify/exceptions.rb +3 -0
- data/lib/scrapify/version.rb +1 -1
- data/lib/scrapify.rb +1 -0
- data/spec/shared/finder.rb +10 -0
- data/spec/shared/scrapify.rb +25 -5
- metadata +17 -14
data/lib/scrapify/base.rb
CHANGED
@@ -69,7 +69,9 @@ module Scrapify
|
|
69
69
|
end
|
70
70
|
|
71
71
|
def parse_html
|
72
|
-
Nokogiri::HTML(html_content)
|
72
|
+
doc = Nokogiri::HTML(html_content)
|
73
|
+
doc.css('br').each {|br| br.replace("\n")}
|
74
|
+
doc
|
73
75
|
end
|
74
76
|
|
75
77
|
def html_content
|
@@ -106,6 +108,15 @@ module Scrapify
|
|
106
108
|
attributes = Hash[attribute_names.map {|attribute| [attribute, send("#{attribute}_values")[index]]}]
|
107
109
|
self.new(attributes)
|
108
110
|
end
|
111
|
+
|
112
|
+
define_singleton_method :where do |conditions = {}|
|
113
|
+
raise Scrapify::AttributeDoesNotExist.new(conditions.keys - attribute_names) unless conditions.keys.all?{|key| attribute_names.include?(key) }
|
114
|
+
indices = conditions.collect do |attribute, value|
|
115
|
+
send("#{attribute}_values").each_with_index.find_all{|attr_val, index| attr_val == value}.collect(&:last)
|
116
|
+
end
|
117
|
+
common_indices = indices.reduce {|a, b| a & b}
|
118
|
+
common_indices.collect{|index| find_by_index(index)}
|
119
|
+
end
|
109
120
|
end
|
110
121
|
|
111
122
|
def define_count(key_attribute)
|
data/lib/scrapify/version.rb
CHANGED
data/lib/scrapify.rb
CHANGED
@@ -0,0 +1,10 @@
|
|
1
|
+
shared_examples_for '#finder' do |klass_or_object, conditions|
|
2
|
+
it 'should fetch objects based on conditions' do
|
3
|
+
pizza = klass_or_object.where(conditions).first
|
4
|
+
pizza.name.should == 'chicken golden delight'
|
5
|
+
pizza.image_url.should == 'golden.jpg'
|
6
|
+
pizza.price.should == '4.56'
|
7
|
+
pizza.ingredients.should be_empty
|
8
|
+
pizza.ingredient_urls.should == ['chicken.html', 'delight.html']
|
9
|
+
end
|
10
|
+
end
|
data/spec/shared/scrapify.rb
CHANGED
@@ -19,7 +19,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
|
|
19
19
|
<span class='references'><ol><li></li></ol></span
|
20
20
|
</li>
|
21
21
|
<li>
|
22
|
-
<a>veg
|
22
|
+
<a>veg<br/>supreme</a><input value="veg.jpg">
|
23
23
|
<span class='price'>(2.34)</span>
|
24
24
|
<span class='ingredients'>
|
25
25
|
<ol>
|
@@ -62,7 +62,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
|
|
62
62
|
end
|
63
63
|
|
64
64
|
it "should parse html and fetch attributes using css" do
|
65
|
-
klass_or_object.name_values.should == ['chicken supreme',
|
65
|
+
klass_or_object.name_values.should == ['chicken supreme', "veg\nsupreme", 'pepperoni', 'chicken golden delight']
|
66
66
|
end
|
67
67
|
|
68
68
|
it "should parse html and fetch attributes using xpath" do
|
@@ -85,6 +85,10 @@ shared_examples_for "Scrapify" do |klass_or_object|
|
|
85
85
|
klass_or_object.first.name.should == 'chicken supreme'
|
86
86
|
end
|
87
87
|
|
88
|
+
it "should replace br with newline" do
|
89
|
+
klass_or_object.all[1].name.should == "veg\nsupreme"
|
90
|
+
end
|
91
|
+
|
88
92
|
describe "cache headers" do
|
89
93
|
it "should return the http headers" do
|
90
94
|
klass_or_object.http_cache_header.should == {
|
@@ -120,7 +124,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
|
|
120
124
|
describe "last" do
|
121
125
|
it "should fetch last matching element" do
|
122
126
|
last_pizza = klass_or_object.last
|
123
|
-
last_pizza.name.should ==
|
127
|
+
last_pizza.name.should == "chicken golden delight"
|
124
128
|
last_pizza.image_url.should == 'golden.jpg'
|
125
129
|
end
|
126
130
|
end
|
@@ -129,7 +133,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
|
|
129
133
|
it "should fetch all objects" do
|
130
134
|
pizzas = klass_or_object.all
|
131
135
|
pizzas.size.should == 4
|
132
|
-
pizzas.map(&:name).should == ['chicken supreme',
|
136
|
+
pizzas.map(&:name).should == ['chicken supreme', "veg\nsupreme", 'pepperoni', 'chicken golden delight']
|
133
137
|
pizzas.map(&:image_url).should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg', 'golden.jpg']
|
134
138
|
end
|
135
139
|
end
|
@@ -169,10 +173,26 @@ shared_examples_for "Scrapify" do |klass_or_object|
|
|
169
173
|
pizzas = klass_or_object.all
|
170
174
|
pizzas.to_json.should == [
|
171
175
|
{name: "chicken supreme", image_url: "chicken.jpg", price: '1.23', ingredients: ['corn', 'tomato'], :ingredient_urls => []},
|
172
|
-
{name: "veg
|
176
|
+
{name: "veg\nsupreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno'], :ingredient_urls => []},
|
173
177
|
{name: "pepperoni", image_url: "pepperoni.jpg", price: '3.45', ingredients: [], :ingredient_urls => []},
|
174
178
|
{name: "chicken golden delight", image_url: "golden.jpg", price: '4.56', ingredients: [], :ingredient_urls => ['chicken.html', 'delight.html']},
|
175
179
|
].to_json
|
176
180
|
end
|
177
181
|
end
|
182
|
+
|
183
|
+
it_behaves_like '#finder', klass_or_object, :name => 'chicken golden delight'
|
184
|
+
it_behaves_like '#finder', klass_or_object, :image_url => 'golden.jpg'
|
185
|
+
it_behaves_like '#finder', klass_or_object, :price => '4.56'
|
186
|
+
it_behaves_like '#finder', klass_or_object, :ingredient_urls => ['chicken.html', 'delight.html']
|
187
|
+
it_behaves_like '#finder', klass_or_object, :name => 'chicken golden delight', :image_url => 'golden.jpg', :price => '4.56', :ingredient_urls => ['chicken.html', 'delight.html']
|
188
|
+
|
189
|
+
it 'should return empty array if object is not found' do
|
190
|
+
klass_or_object.where(:name => 'does not exist').should be_empty
|
191
|
+
end
|
192
|
+
|
193
|
+
it 'should throw exception if attribute is not defined' do
|
194
|
+
lambda {
|
195
|
+
klass_or_object.where(:some_attribute => 'chicken golden delight')
|
196
|
+
}.should raise_error(Scrapify::AttributeDoesNotExist)
|
197
|
+
end
|
178
198
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapify
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-11 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
16
|
-
requirement: &
|
16
|
+
requirement: &70278707139780 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70278707139780
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: mocha
|
27
|
-
requirement: &
|
27
|
+
requirement: &70278707139300 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70278707139300
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: fakeweb
|
38
|
-
requirement: &
|
38
|
+
requirement: &70278707138800 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70278707138800
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: nokogiri
|
49
|
-
requirement: &
|
49
|
+
requirement: &70278707138260 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ! '>='
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70278707138260
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: activesupport
|
60
|
-
requirement: &
|
60
|
+
requirement: &70278707137740 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ! '>='
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: '0'
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70278707137740
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: json
|
71
|
-
requirement: &
|
71
|
+
requirement: &70278707137260 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70278707137260
|
80
80
|
description: ScrApify scraps static html sites to RESTlike APIs
|
81
81
|
email:
|
82
82
|
- sathish316@gmail.com
|
@@ -92,6 +92,7 @@ files:
|
|
92
92
|
- lib/jsonify.rb
|
93
93
|
- lib/scrapify.rb
|
94
94
|
- lib/scrapify/base.rb
|
95
|
+
- lib/scrapify/exceptions.rb
|
95
96
|
- lib/scrapify/scraper.rb
|
96
97
|
- lib/scrapify/version.rb
|
97
98
|
- scrapify.gemspec
|
@@ -99,6 +100,7 @@ files:
|
|
99
100
|
- spec/pizza.rb
|
100
101
|
- spec/scraper_spec.rb
|
101
102
|
- spec/scrapify_spec.rb
|
103
|
+
- spec/shared/finder.rb
|
102
104
|
- spec/shared/scrapify.rb
|
103
105
|
- spec/spec_helper.rb
|
104
106
|
- spec/test_models.rb
|
@@ -131,6 +133,7 @@ test_files:
|
|
131
133
|
- spec/pizza.rb
|
132
134
|
- spec/scraper_spec.rb
|
133
135
|
- spec/scrapify_spec.rb
|
136
|
+
- spec/shared/finder.rb
|
134
137
|
- spec/shared/scrapify.rb
|
135
138
|
- spec/spec_helper.rb
|
136
139
|
- spec/test_models.rb
|