scrapify 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/lib/scrapify/base.rb CHANGED
@@ -69,7 +69,9 @@ module Scrapify
69
69
  end
70
70
 
71
71
  def parse_html
72
- Nokogiri::HTML(html_content)
72
+ doc = Nokogiri::HTML(html_content)
73
+ doc.css('br').each {|br| br.replace("\n")}
74
+ doc
73
75
  end
74
76
 
75
77
  def html_content
@@ -106,6 +108,15 @@ module Scrapify
106
108
  attributes = Hash[attribute_names.map {|attribute| [attribute, send("#{attribute}_values")[index]]}]
107
109
  self.new(attributes)
108
110
  end
111
+
112
+ define_singleton_method :where do |conditions = {}|
113
+ raise Scrapify::AttributeDoesNotExist.new(conditions.keys - attribute_names) unless conditions.keys.all?{|key| attribute_names.include?(key) }
114
+ indices = conditions.collect do |attribute, value|
115
+ send("#{attribute}_values").each_with_index.find_all{|attr_val, index| attr_val == value}.collect(&:last)
116
+ end
117
+ common_indices = indices.reduce {|a, b| a & b}
118
+ common_indices.collect{|index| find_by_index(index)}
119
+ end
109
120
  end
110
121
 
111
122
  def define_count(key_attribute)
@@ -0,0 +1,3 @@
1
+ module Scrapify
2
+ class AttributeDoesNotExist < StandardError; end
3
+ end
@@ -1,3 +1,3 @@
1
1
  module Scrapify
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
data/lib/scrapify.rb CHANGED
@@ -6,5 +6,6 @@ require 'uri'
6
6
  require 'net/http'
7
7
  require 'scrapify/base'
8
8
  require 'scrapify/scraper'
9
+ require 'scrapify/exceptions'
9
10
  require 'json'
10
11
  require 'jsonify'
@@ -0,0 +1,10 @@
1
+ shared_examples_for '#finder' do |klass_or_object, conditions|
2
+ it 'should fetch objects based on conditions' do
3
+ pizza = klass_or_object.where(conditions).first
4
+ pizza.name.should == 'chicken golden delight'
5
+ pizza.image_url.should == 'golden.jpg'
6
+ pizza.price.should == '4.56'
7
+ pizza.ingredients.should be_empty
8
+ pizza.ingredient_urls.should == ['chicken.html', 'delight.html']
9
+ end
10
+ end
@@ -19,7 +19,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
19
19
  <span class='references'><ol><li></li></ol></span
20
20
  </li>
21
21
  <li>
22
- <a>veg supreme</a><input value="veg.jpg">
22
+ <a>veg<br/>supreme</a><input value="veg.jpg">
23
23
  <span class='price'>(2.34)</span>
24
24
  <span class='ingredients'>
25
25
  <ol>
@@ -62,7 +62,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
62
62
  end
63
63
 
64
64
  it "should parse html and fetch attributes using css" do
65
- klass_or_object.name_values.should == ['chicken supreme', 'veg supreme', 'pepperoni', 'chicken golden delight']
65
+ klass_or_object.name_values.should == ['chicken supreme', "veg\nsupreme", 'pepperoni', 'chicken golden delight']
66
66
  end
67
67
 
68
68
  it "should parse html and fetch attributes using xpath" do
@@ -85,6 +85,10 @@ shared_examples_for "Scrapify" do |klass_or_object|
85
85
  klass_or_object.first.name.should == 'chicken supreme'
86
86
  end
87
87
 
88
+ it "should replace br with newline" do
89
+ klass_or_object.all[1].name.should == "veg\nsupreme"
90
+ end
91
+
88
92
  describe "cache headers" do
89
93
  it "should return the http headers" do
90
94
  klass_or_object.http_cache_header.should == {
@@ -120,7 +124,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
120
124
  describe "last" do
121
125
  it "should fetch last matching element" do
122
126
  last_pizza = klass_or_object.last
123
- last_pizza.name.should == 'chicken golden delight'
127
+ last_pizza.name.should == "chicken golden delight"
124
128
  last_pizza.image_url.should == 'golden.jpg'
125
129
  end
126
130
  end
@@ -129,7 +133,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
129
133
  it "should fetch all objects" do
130
134
  pizzas = klass_or_object.all
131
135
  pizzas.size.should == 4
132
- pizzas.map(&:name).should == ['chicken supreme', 'veg supreme', 'pepperoni', 'chicken golden delight']
136
+ pizzas.map(&:name).should == ['chicken supreme', "veg\nsupreme", 'pepperoni', 'chicken golden delight']
133
137
  pizzas.map(&:image_url).should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg', 'golden.jpg']
134
138
  end
135
139
  end
@@ -169,10 +173,26 @@ shared_examples_for "Scrapify" do |klass_or_object|
169
173
  pizzas = klass_or_object.all
170
174
  pizzas.to_json.should == [
171
175
  {name: "chicken supreme", image_url: "chicken.jpg", price: '1.23', ingredients: ['corn', 'tomato'], :ingredient_urls => []},
172
- {name: "veg supreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno'], :ingredient_urls => []},
176
+ {name: "veg\nsupreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno'], :ingredient_urls => []},
173
177
  {name: "pepperoni", image_url: "pepperoni.jpg", price: '3.45', ingredients: [], :ingredient_urls => []},
174
178
  {name: "chicken golden delight", image_url: "golden.jpg", price: '4.56', ingredients: [], :ingredient_urls => ['chicken.html', 'delight.html']},
175
179
  ].to_json
176
180
  end
177
181
  end
182
+
183
+ it_behaves_like '#finder', klass_or_object, :name => 'chicken golden delight'
184
+ it_behaves_like '#finder', klass_or_object, :image_url => 'golden.jpg'
185
+ it_behaves_like '#finder', klass_or_object, :price => '4.56'
186
+ it_behaves_like '#finder', klass_or_object, :ingredient_urls => ['chicken.html', 'delight.html']
187
+ it_behaves_like '#finder', klass_or_object, :name => 'chicken golden delight', :image_url => 'golden.jpg', :price => '4.56', :ingredient_urls => ['chicken.html', 'delight.html']
188
+
189
+ it 'should return empty array if object is not found' do
190
+ klass_or_object.where(:name => 'does not exist').should be_empty
191
+ end
192
+
193
+ it 'should throw exception if attribute is not defined' do
194
+ lambda {
195
+ klass_or_object.where(:some_attribute => 'chicken golden delight')
196
+ }.should raise_error(Scrapify::AttributeDoesNotExist)
197
+ end
178
198
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapify
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-28 00:00:00.000000000Z
12
+ date: 2012-07-11 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70233941360460 !ruby/object:Gem::Requirement
16
+ requirement: &70278707139780 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70233941360460
24
+ version_requirements: *70278707139780
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: mocha
27
- requirement: &70233941359980 !ruby/object:Gem::Requirement
27
+ requirement: &70278707139300 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70233941359980
35
+ version_requirements: *70278707139300
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: fakeweb
38
- requirement: &70233941359460 !ruby/object:Gem::Requirement
38
+ requirement: &70278707138800 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70233941359460
46
+ version_requirements: *70278707138800
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &70233941358800 !ruby/object:Gem::Requirement
49
+ requirement: &70278707138260 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70233941358800
57
+ version_requirements: *70278707138260
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: activesupport
60
- requirement: &70233941358220 !ruby/object:Gem::Requirement
60
+ requirement: &70278707137740 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70233941358220
68
+ version_requirements: *70278707137740
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: json
71
- requirement: &70233941351380 !ruby/object:Gem::Requirement
71
+ requirement: &70278707137260 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70233941351380
79
+ version_requirements: *70278707137260
80
80
  description: ScrApify scraps static html sites to RESTlike APIs
81
81
  email:
82
82
  - sathish316@gmail.com
@@ -92,6 +92,7 @@ files:
92
92
  - lib/jsonify.rb
93
93
  - lib/scrapify.rb
94
94
  - lib/scrapify/base.rb
95
+ - lib/scrapify/exceptions.rb
95
96
  - lib/scrapify/scraper.rb
96
97
  - lib/scrapify/version.rb
97
98
  - scrapify.gemspec
@@ -99,6 +100,7 @@ files:
99
100
  - spec/pizza.rb
100
101
  - spec/scraper_spec.rb
101
102
  - spec/scrapify_spec.rb
103
+ - spec/shared/finder.rb
102
104
  - spec/shared/scrapify.rb
103
105
  - spec/spec_helper.rb
104
106
  - spec/test_models.rb
@@ -131,6 +133,7 @@ test_files:
131
133
  - spec/pizza.rb
132
134
  - spec/scraper_spec.rb
133
135
  - spec/scrapify_spec.rb
136
+ - spec/shared/finder.rb
134
137
  - spec/shared/scrapify.rb
135
138
  - spec/spec_helper.rb
136
139
  - spec/test_models.rb