scrapify 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/scrapify/base.rb CHANGED
@@ -69,7 +69,9 @@ module Scrapify
69
69
  end
70
70
 
71
71
  def parse_html
72
- Nokogiri::HTML(html_content)
72
+ doc = Nokogiri::HTML(html_content)
73
+ doc.css('br').each {|br| br.replace("\n")}
74
+ doc
73
75
  end
74
76
 
75
77
  def html_content
@@ -106,6 +108,15 @@ module Scrapify
106
108
  attributes = Hash[attribute_names.map {|attribute| [attribute, send("#{attribute}_values")[index]]}]
107
109
  self.new(attributes)
108
110
  end
111
+
112
+ define_singleton_method :where do |conditions = {}|
113
+ raise Scrapify::AttributeDoesNotExist.new(conditions.keys - attribute_names) unless conditions.keys.all?{|key| attribute_names.include?(key) }
114
+ indices = conditions.collect do |attribute, value|
115
+ send("#{attribute}_values").each_with_index.find_all{|attr_val, index| attr_val == value}.collect(&:last)
116
+ end
117
+ common_indices = indices.reduce {|a, b| a & b}
118
+ common_indices.collect{|index| find_by_index(index)}
119
+ end
109
120
  end
110
121
 
111
122
  def define_count(key_attribute)
@@ -0,0 +1,3 @@
1
+ module Scrapify
2
+ class AttributeDoesNotExist < StandardError; end
3
+ end
@@ -1,3 +1,3 @@
1
1
  module Scrapify
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
data/lib/scrapify.rb CHANGED
@@ -6,5 +6,6 @@ require 'uri'
6
6
  require 'net/http'
7
7
  require 'scrapify/base'
8
8
  require 'scrapify/scraper'
9
+ require 'scrapify/exceptions'
9
10
  require 'json'
10
11
  require 'jsonify'
@@ -0,0 +1,10 @@
1
+ shared_examples_for '#finder' do |klass_or_object, conditions|
2
+ it 'should fetch objects based on conditions' do
3
+ pizza = klass_or_object.where(conditions).first
4
+ pizza.name.should == 'chicken golden delight'
5
+ pizza.image_url.should == 'golden.jpg'
6
+ pizza.price.should == '4.56'
7
+ pizza.ingredients.should be_empty
8
+ pizza.ingredient_urls.should == ['chicken.html', 'delight.html']
9
+ end
10
+ end
@@ -19,7 +19,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
19
19
  <span class='references'><ol><li></li></ol></span
20
20
  </li>
21
21
  <li>
22
- <a>veg supreme</a><input value="veg.jpg">
22
+ <a>veg<br/>supreme</a><input value="veg.jpg">
23
23
  <span class='price'>(2.34)</span>
24
24
  <span class='ingredients'>
25
25
  <ol>
@@ -62,7 +62,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
62
62
  end
63
63
 
64
64
  it "should parse html and fetch attributes using css" do
65
- klass_or_object.name_values.should == ['chicken supreme', 'veg supreme', 'pepperoni', 'chicken golden delight']
65
+ klass_or_object.name_values.should == ['chicken supreme', "veg\nsupreme", 'pepperoni', 'chicken golden delight']
66
66
  end
67
67
 
68
68
  it "should parse html and fetch attributes using xpath" do
@@ -85,6 +85,10 @@ shared_examples_for "Scrapify" do |klass_or_object|
85
85
  klass_or_object.first.name.should == 'chicken supreme'
86
86
  end
87
87
 
88
+ it "should replace br with newline" do
89
+ klass_or_object.all[1].name.should == "veg\nsupreme"
90
+ end
91
+
88
92
  describe "cache headers" do
89
93
  it "should return the http headers" do
90
94
  klass_or_object.http_cache_header.should == {
@@ -120,7 +124,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
120
124
  describe "last" do
121
125
  it "should fetch last matching element" do
122
126
  last_pizza = klass_or_object.last
123
- last_pizza.name.should == 'chicken golden delight'
127
+ last_pizza.name.should == "chicken golden delight"
124
128
  last_pizza.image_url.should == 'golden.jpg'
125
129
  end
126
130
  end
@@ -129,7 +133,7 @@ shared_examples_for "Scrapify" do |klass_or_object|
129
133
  it "should fetch all objects" do
130
134
  pizzas = klass_or_object.all
131
135
  pizzas.size.should == 4
132
- pizzas.map(&:name).should == ['chicken supreme', 'veg supreme', 'pepperoni', 'chicken golden delight']
136
+ pizzas.map(&:name).should == ['chicken supreme', "veg\nsupreme", 'pepperoni', 'chicken golden delight']
133
137
  pizzas.map(&:image_url).should == ['chicken.jpg', 'veg.jpg', 'pepperoni.jpg', 'golden.jpg']
134
138
  end
135
139
  end
@@ -169,10 +173,26 @@ shared_examples_for "Scrapify" do |klass_or_object|
169
173
  pizzas = klass_or_object.all
170
174
  pizzas.to_json.should == [
171
175
  {name: "chicken supreme", image_url: "chicken.jpg", price: '1.23', ingredients: ['corn', 'tomato'], :ingredient_urls => []},
172
- {name: "veg supreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno'], :ingredient_urls => []},
176
+ {name: "veg\nsupreme", image_url: "veg.jpg", price: '2.34', ingredients: ['mushroom', 'jalapeno'], :ingredient_urls => []},
173
177
  {name: "pepperoni", image_url: "pepperoni.jpg", price: '3.45', ingredients: [], :ingredient_urls => []},
174
178
  {name: "chicken golden delight", image_url: "golden.jpg", price: '4.56', ingredients: [], :ingredient_urls => ['chicken.html', 'delight.html']},
175
179
  ].to_json
176
180
  end
177
181
  end
182
+
183
+ it_behaves_like '#finder', klass_or_object, :name => 'chicken golden delight'
184
+ it_behaves_like '#finder', klass_or_object, :image_url => 'golden.jpg'
185
+ it_behaves_like '#finder', klass_or_object, :price => '4.56'
186
+ it_behaves_like '#finder', klass_or_object, :ingredient_urls => ['chicken.html', 'delight.html']
187
+ it_behaves_like '#finder', klass_or_object, :name => 'chicken golden delight', :image_url => 'golden.jpg', :price => '4.56', :ingredient_urls => ['chicken.html', 'delight.html']
188
+
189
+ it 'should return empty array if object is not found' do
190
+ klass_or_object.where(:name => 'does not exist').should be_empty
191
+ end
192
+
193
+ it 'should throw exception if attribute is not defined' do
194
+ lambda {
195
+ klass_or_object.where(:some_attribute => 'chicken golden delight')
196
+ }.should raise_error(Scrapify::AttributeDoesNotExist)
197
+ end
178
198
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapify
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-28 00:00:00.000000000Z
12
+ date: 2012-07-11 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70233941360460 !ruby/object:Gem::Requirement
16
+ requirement: &70278707139780 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70233941360460
24
+ version_requirements: *70278707139780
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: mocha
27
- requirement: &70233941359980 !ruby/object:Gem::Requirement
27
+ requirement: &70278707139300 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *70233941359980
35
+ version_requirements: *70278707139300
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: fakeweb
38
- requirement: &70233941359460 !ruby/object:Gem::Requirement
38
+ requirement: &70278707138800 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70233941359460
46
+ version_requirements: *70278707138800
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &70233941358800 !ruby/object:Gem::Requirement
49
+ requirement: &70278707138260 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70233941358800
57
+ version_requirements: *70278707138260
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: activesupport
60
- requirement: &70233941358220 !ruby/object:Gem::Requirement
60
+ requirement: &70278707137740 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70233941358220
68
+ version_requirements: *70278707137740
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: json
71
- requirement: &70233941351380 !ruby/object:Gem::Requirement
71
+ requirement: &70278707137260 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70233941351380
79
+ version_requirements: *70278707137260
80
80
  description: ScrApify scraps static html sites to RESTlike APIs
81
81
  email:
82
82
  - sathish316@gmail.com
@@ -92,6 +92,7 @@ files:
92
92
  - lib/jsonify.rb
93
93
  - lib/scrapify.rb
94
94
  - lib/scrapify/base.rb
95
+ - lib/scrapify/exceptions.rb
95
96
  - lib/scrapify/scraper.rb
96
97
  - lib/scrapify/version.rb
97
98
  - scrapify.gemspec
@@ -99,6 +100,7 @@ files:
99
100
  - spec/pizza.rb
100
101
  - spec/scraper_spec.rb
101
102
  - spec/scrapify_spec.rb
103
+ - spec/shared/finder.rb
102
104
  - spec/shared/scrapify.rb
103
105
  - spec/spec_helper.rb
104
106
  - spec/test_models.rb
@@ -131,6 +133,7 @@ test_files:
131
133
  - spec/pizza.rb
132
134
  - spec/scraper_spec.rb
133
135
  - spec/scrapify_spec.rb
136
+ - spec/shared/finder.rb
134
137
  - spec/shared/scrapify.rb
135
138
  - spec/spec_helper.rb
136
139
  - spec/test_models.rb