raspar 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/README.md +5 -4
- data/example/Gemfile +3 -2
- data/example/best_buy.rb +30 -0
- data/example/{ccode.rb → country_code.rb} +3 -3
- data/example/leguide.rb +9 -10
- data/example/{dynamic.rb → leguide_dynamic.rb} +5 -8
- data/example/sample_parser.rb +116 -0
- data/lib/raspar.rb +2 -3
- data/lib/raspar/parser.rb +1 -1
- data/lib/raspar/version.rb +1 -1
- data/raspar.gemspec +1 -1
- data/spec/add_parser_spec.rb +10 -13
- data/spec/dynamic_parser_spec.rb +11 -11
- data/spec/parser_spec.rb +21 -20
- data/spec/raspar_spec.rb +5 -5
- data/spec/sample_parser.rb +12 -2
- data/spec/spec_helper.rb +1 -1
- metadata +15 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 144d5399e4105ea847e8d5441d80e15f022d2a8d
|
4
|
+
data.tar.gz: f73462c4d5be73a4f189cc430a539b2d7cd37577
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9b42d385112c7da047ba39a4bb2202ad4ded4767a6c510b526ab61bbf30420cf3e8ce9f9af8e00e5bbb81066605456ea805d4385863936cfae29508792cd499
|
7
|
+
data.tar.gz: 9588cf43acef8472bb0a9493c1dc364a99721401c0c5e55a13ee9fe2035a76d2eb01e536a29879a29a3f0e85c112ea50b8b409bcb34456fd69ef1bde4774b18f
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -23,16 +23,17 @@ And then execute:
|
|
23
23
|
result = Rapsar.parse(url, html) #This will return parsed result object array.
|
24
24
|
|
25
25
|
#Result
|
26
|
-
[
|
26
|
+
{ :products => [
|
27
27
|
#<Raspar::Result:0x007ffc91e4d640
|
28
28
|
@attrs={:name=>"Test1", :price=>"10", :image=>"1", :desc=>"Description"},
|
29
29
|
@domain="example.com",
|
30
|
-
@name=:
|
30
|
+
@name=:products>,
|
31
31
|
#<Raspar::Result:0x007ffc91e57be0
|
32
32
|
@attrs={:name=>"Test2", :price=>"20", :image=>"2", :desc=>"Description"},
|
33
33
|
@domain="example.com",
|
34
|
-
@name=:
|
35
|
-
|
34
|
+
@name=:products>
|
35
|
+
]
|
36
|
+
}
|
36
37
|
|
37
38
|
```
|
38
39
|
|
data/example/Gemfile
CHANGED
data/example/best_buy.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rest_client'
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'raspar'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
class BestBuy
|
8
|
+
include Raspar
|
9
|
+
|
10
|
+
domain 'www.bestbuy.com'
|
11
|
+
|
12
|
+
collection :products, '.hproduct' do
|
13
|
+
attr :image, '.image-col img', prop: 'src'
|
14
|
+
attr :name, '.info-main .name'
|
15
|
+
attr :price, 'span[itemprop="price"]'
|
16
|
+
attr :sku, '.sku'
|
17
|
+
attr :description, '.product-short-description li', as: :array
|
18
|
+
attr :rating, 'span[itemprop="ratingValue"]', eval: ->(text, ele){ text.to_f }
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
url = ARGV[0] || "http://www.bestbuy.com/site/promo/htc-one-offer-118429"
|
24
|
+
p url
|
25
|
+
page = RestClient.get(url).to_str
|
26
|
+
|
27
|
+
Raspar.parse(url, page).each do |product|
|
28
|
+
pp product
|
29
|
+
p "*"*40
|
30
|
+
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'rest_client'
|
2
3
|
require 'bundler/setup'
|
3
|
-
require 'open-uri'
|
4
4
|
require 'raspar'
|
5
5
|
require 'pp'
|
6
6
|
|
7
|
-
class
|
7
|
+
class CountryCode
|
8
8
|
include Raspar
|
9
9
|
|
10
10
|
domain 'http://www.exchange-rate.com'
|
@@ -17,6 +17,6 @@ class CCode
|
|
17
17
|
end
|
18
18
|
|
19
19
|
url = 'http://www.exchange-rate.com/currency-list.html'
|
20
|
-
page =
|
20
|
+
page = RestClient.get(url).to_str
|
21
21
|
|
22
22
|
Raspar.parse(url, page).each {|i| pp i }
|
data/example/leguide.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'rest_client'
|
2
3
|
require 'bundler/setup'
|
3
|
-
require 'open-uri'
|
4
4
|
require 'raspar'
|
5
5
|
require 'pp'
|
6
6
|
|
@@ -16,13 +16,12 @@ class Leguide
|
|
16
16
|
attr :name, '.block_bpu_feature .p b'
|
17
17
|
attr :specifications, '#page2', :eval => :build_specification
|
18
18
|
|
19
|
-
collection :product, '.
|
20
|
-
attr :alt_name, '.gopt.
|
21
|
-
attr :image, '.lg_photo img', :prop => 'src'
|
22
|
-
attr :price, '.
|
23
|
-
attr :orignal_price, '.
|
19
|
+
collection :product, '.offers_item' do
|
20
|
+
attr :alt_name, '.gopt.l'
|
21
|
+
attr :image, '.lg_photo img, .img_container img', :prop => 'src'
|
22
|
+
attr :price, '.gopt .prices'
|
23
|
+
attr :orignal_price, '.gopt.old_price'
|
24
24
|
attr :desc, '.gopt.description,.info .description'
|
25
|
-
attr :vendor, '.name a'
|
26
25
|
attr :availability, '.av', :prop => 'data-value', :eval => DATA_PROC
|
27
26
|
attr :delivery_time, '.dv', :prop => 'data-value', :eval => DATA_PROC
|
28
27
|
attr :shipping_price, '.delivery.gopt', :eval => SHIPPING_PROC
|
@@ -44,11 +43,11 @@ class Leguide
|
|
44
43
|
|
45
44
|
end
|
46
45
|
|
47
|
-
url = 'http://www.leguide.com/sb/bp/5010500/hotpoint_ariston/ECO9F_149_FRS/55743410.htm'
|
48
46
|
url = 'http://www.leguide.com/electromenager.htm'
|
47
|
+
url = ARGV[0] || url
|
49
48
|
p ARGV[0] || url
|
50
|
-
|
51
|
-
page =
|
49
|
+
|
50
|
+
page = RestClient.get(url).to_str
|
52
51
|
|
53
52
|
Raspar.parse(url, page).each do |o|
|
54
53
|
pp o
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'rest_client'
|
2
3
|
require 'bundler/setup'
|
3
|
-
require 'open-uri'
|
4
4
|
require 'raspar'
|
5
5
|
require 'pp'
|
6
6
|
|
@@ -38,13 +38,9 @@ selector_map = {
|
|
38
38
|
:select => '.offers_list li',
|
39
39
|
:attrs => {
|
40
40
|
:image => { :select => 'img', :prop => 'src'},
|
41
|
-
:price => { :select => '.
|
42
|
-
:orignal_price => { :select => '.price .barre', :eval => :parse_price},
|
41
|
+
:price => { :select => '.gopt .prices', :eval => :parse_price},
|
43
42
|
:desc => { :select => '.gopt.description,.info .description'},
|
44
43
|
:vendor => { :select => '.name a' },
|
45
|
-
:availability => { :select => '.av', :prop => 'data-value', :eval => :data_attr_parse},
|
46
|
-
:delivery_time => { :select => '.dv', :prop => 'data-value', :eval => :data_attr_parse},
|
47
|
-
:shipping_price => { :select => '.delivery.gopt'}
|
48
44
|
}
|
49
45
|
}
|
50
46
|
}
|
@@ -52,8 +48,9 @@ selector_map = {
|
|
52
48
|
|
53
49
|
Raspar.add(domain, selector_map, ParserHelper)
|
54
50
|
|
55
|
-
url = 'http://www.leguide.com/
|
56
|
-
|
51
|
+
url = 'http://www.leguide.com/electromenager.htm'
|
52
|
+
p url
|
53
|
+
page = RestClient.get(url).to_str
|
57
54
|
|
58
55
|
Raspar.parse(url, page).each do |i|
|
59
56
|
pp i
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'raspar'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
FAKE_PAGE = %q{
|
7
|
+
<!DOCTYPE html>
|
8
|
+
<html>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<span class="desc">Description</span>
|
12
|
+
<ul class="specs">
|
13
|
+
<li>Spec 1</li>
|
14
|
+
<li>Spec 2</li>
|
15
|
+
<li>Spec 3</li>
|
16
|
+
</ul>
|
17
|
+
|
18
|
+
<div>
|
19
|
+
<img src="1">
|
20
|
+
<span>Test1</span>
|
21
|
+
<span class="price">10</span>
|
22
|
+
</div>
|
23
|
+
|
24
|
+
<div>
|
25
|
+
<img src="2">
|
26
|
+
<span>Test2</span>
|
27
|
+
<span class="price">20</span>
|
28
|
+
</div>
|
29
|
+
|
30
|
+
<span class="second">
|
31
|
+
<img src="3">
|
32
|
+
<span>Test3</span>
|
33
|
+
<span class="price">30</span>
|
34
|
+
</span>
|
35
|
+
|
36
|
+
<div class="offer">
|
37
|
+
<span class="name">First Offer</span>
|
38
|
+
<span class="percentage">10% off</span>
|
39
|
+
</div>
|
40
|
+
|
41
|
+
</body>
|
42
|
+
</html>
|
43
|
+
}
|
44
|
+
|
45
|
+
class SampleParser
|
46
|
+
include Raspar
|
47
|
+
|
48
|
+
domain 'http://sample.com'
|
49
|
+
|
50
|
+
attr :desc, '.desc', :eval => :full_desc
|
51
|
+
attr :specs, '.specs li', :as => :array, :eval => :format_specs
|
52
|
+
|
53
|
+
collection :product, 'div,span.second' do
|
54
|
+
attr :image, 'img', :attr => 'src'
|
55
|
+
attr :image_url, 'img', :attr => 'src', :eval => :make_image_url
|
56
|
+
attr :name, 'span:first, .name', :eval => :full_name
|
57
|
+
attr :price, '.price', :eval => Proc.new{|i| i.to_i}
|
58
|
+
attr :all_text
|
59
|
+
attr :price_map do |text, ele|
|
60
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
61
|
+
{val[0] => val[1].to_f}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
collection :offer, '.offer' do
|
66
|
+
attr :name, '.name'
|
67
|
+
attr :percentage, '.percentage'
|
68
|
+
end
|
69
|
+
|
70
|
+
def full_name(val, ele)
|
71
|
+
"Full Name: #{val}"
|
72
|
+
end
|
73
|
+
|
74
|
+
def name_price(val, ele)
|
75
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
76
|
+
{val[0] => val[1].to_f}
|
77
|
+
end
|
78
|
+
|
79
|
+
def make_image_url(path, ele)
|
80
|
+
self.class.absolute_url(path)
|
81
|
+
end
|
82
|
+
|
83
|
+
def full_desc(text, ele)
|
84
|
+
"#{text} full desc"
|
85
|
+
end
|
86
|
+
|
87
|
+
def format_specs(text, ele)
|
88
|
+
text.downcase
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
##pp SampleParser.attrs
|
94
|
+
pp Raspar.parse('http://sample.com', FAKE_PAGE)
|
95
|
+
|
96
|
+
|
97
|
+
selector_map = {
|
98
|
+
:common_attrs => {
|
99
|
+
:desc => {:select => '.desc'}
|
100
|
+
},
|
101
|
+
:collections =>{
|
102
|
+
:product => {
|
103
|
+
:select => 'div, span.second',
|
104
|
+
:attrs => {
|
105
|
+
:name => { :select => 'span:first'},
|
106
|
+
:price => { :select => 'span.price'},
|
107
|
+
:image => { :select => 'img', :attr => 'src'}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
@site = 'http://dynmaicparser.com'
|
114
|
+
@dynmaic_parser = Raspar.add(@site, selector_map)
|
115
|
+
pp @dynmaic_parser.attrs
|
116
|
+
pp Raspar.parse(@site, FAKE_PAGE)
|
data/lib/raspar.rb
CHANGED
@@ -55,7 +55,7 @@ module Raspar
|
|
55
55
|
def parse(url, html)
|
56
56
|
host = URI(url).host
|
57
57
|
if @parsers[host]
|
58
|
-
@parsers[host].parse(html)
|
58
|
+
@parsers[host].parse(html).group_by(&:name)
|
59
59
|
else
|
60
60
|
puts "No parser define for #{host}"
|
61
61
|
nil
|
@@ -83,8 +83,7 @@ module Raspar
|
|
83
83
|
klass.domain(url)
|
84
84
|
klass.class_exec(&block) if block_given?
|
85
85
|
|
86
|
-
|
87
|
-
Object.const_set(klass_name, klass)
|
86
|
+
Raspar.const_set(klass_name, klass)
|
88
87
|
end
|
89
88
|
|
90
89
|
end
|
data/lib/raspar/parser.rb
CHANGED
data/lib/raspar/version.rb
CHANGED
data/raspar.gemspec
CHANGED
data/spec/add_parser_spec.rb
CHANGED
@@ -8,7 +8,7 @@ describe 'Add Parser' do
|
|
8
8
|
Raspar.add(@site) do
|
9
9
|
attr :desc, '.desc', :common => true
|
10
10
|
|
11
|
-
collection :
|
11
|
+
collection :products, '.item,span.second' do
|
12
12
|
attr :name, 'span:first, .name', :eval => :full_name
|
13
13
|
attr :price, '.price', :eval => Proc.new{|i| i.to_i}
|
14
14
|
end
|
@@ -27,29 +27,26 @@ describe 'Add Parser' do
|
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'should register parser and parse data' do
|
30
|
-
Raspar.parsers.
|
30
|
+
expect(Raspar.parsers).to include({@domain => @parser_class})
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should have info" do
|
34
|
-
@parser_class.info.
|
34
|
+
expect(@parser_class.info).to eq({
|
35
35
|
:domain => @domain,
|
36
|
-
:collections => [:
|
36
|
+
:collections => [:products],
|
37
37
|
:common_attrs => [:desc]
|
38
|
-
}
|
38
|
+
})
|
39
39
|
end
|
40
40
|
|
41
41
|
it "should parse html and create object" do
|
42
42
|
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
43
43
|
|
44
|
-
parsed_objs.length.
|
45
|
-
|
46
|
-
parsed_objs.count{|o| o.name == :product}.should == 4
|
47
|
-
|
44
|
+
expect(parsed_objs[:products].length).to eq(4)
|
48
45
|
count = 1
|
49
|
-
parsed_objs
|
50
|
-
o[:name].
|
51
|
-
o[:price].
|
52
|
-
o[:desc].
|
46
|
+
parsed_objs[:products].each do |o|
|
47
|
+
expect(o[:name]).to eq("Full Name: Test#{count}")
|
48
|
+
expect(o[:price]).to eq(count * 10)
|
49
|
+
expect(o[:desc]).to eq("Description")
|
53
50
|
|
54
51
|
count = count + 1
|
55
52
|
end
|
data/spec/dynamic_parser_spec.rb
CHANGED
@@ -15,7 +15,7 @@ module Raspar
|
|
15
15
|
:desc => {:select => '.desc'}
|
16
16
|
},
|
17
17
|
:collections =>{
|
18
|
-
:
|
18
|
+
:products => {
|
19
19
|
:select => 'div.item, span.second',
|
20
20
|
:attrs => {
|
21
21
|
:name => { :select => 'span:first'},
|
@@ -33,9 +33,9 @@ module Raspar
|
|
33
33
|
describe '#onload' do
|
34
34
|
|
35
35
|
it "should register DynamicParser to Raspar parser list" do
|
36
|
-
Raspar.parsers[@domain].class.
|
36
|
+
expect(Raspar.parsers[@domain].class).to eq(Raspar::DynamicParser)
|
37
37
|
|
38
|
-
@dynmaic_parser.domain.
|
38
|
+
expect(@dynmaic_parser.domain).to eq(@domain)
|
39
39
|
end
|
40
40
|
|
41
41
|
end
|
@@ -44,28 +44,28 @@ module Raspar
|
|
44
44
|
|
45
45
|
it "should parse html and create object" do
|
46
46
|
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
47
|
-
parsed_objs
|
47
|
+
parsed_objs[:products] == 4
|
48
48
|
|
49
49
|
count = 1
|
50
|
-
parsed_objs.each do |o|
|
51
|
-
o.class.
|
50
|
+
parsed_objs[:products].each do |o|
|
51
|
+
expect(o.class).to eq(Raspar::Result)
|
52
52
|
|
53
|
-
o[:name].
|
54
|
-
o[:image].
|
53
|
+
expect(o[:name]).to eq("Test#{count}")
|
54
|
+
expect(o[:image]).to eq(count.to_s)
|
55
55
|
|
56
56
|
#Price should eval using proc given in option which convert string value
|
57
57
|
#to integer
|
58
|
-
o[:price].
|
58
|
+
expect(o[:price]).to eq((count * 10).to_s)
|
59
59
|
|
60
60
|
#External Field check
|
61
|
-
o[:desc].
|
61
|
+
expect(o[:desc]).to eq("Description")
|
62
62
|
count = count + 1
|
63
63
|
end
|
64
64
|
|
65
65
|
end
|
66
66
|
|
67
67
|
it 'should return absoulte url' do
|
68
|
-
@dynmaic_parser.absolute_url('/test').
|
68
|
+
expect(@dynmaic_parser.absolute_url('/test')).to eq(@site + '/test')
|
69
69
|
end
|
70
70
|
|
71
71
|
end
|
data/spec/parser_spec.rb
CHANGED
@@ -17,21 +17,21 @@ module Raspar
|
|
17
17
|
describe 'onload' do
|
18
18
|
|
19
19
|
it "should register SampleParser to Raspar parser list" do
|
20
|
-
Raspar.parsers.
|
20
|
+
expect(Raspar.parsers).to include({@domain => SampleParser})
|
21
21
|
|
22
|
-
SampleParser.domain.
|
22
|
+
expect(SampleParser.domain).to eq(@domain)
|
23
23
|
end
|
24
24
|
|
25
25
|
it 'should return absoulte url' do
|
26
|
-
SampleParser.absolute_url('/test').
|
26
|
+
expect(SampleParser.absolute_url('/test')).to eq(@site + '/test')
|
27
27
|
end
|
28
28
|
|
29
29
|
it "should have info" do
|
30
|
-
SampleParser.info.
|
30
|
+
expect(SampleParser.info).to eq({
|
31
31
|
:domain => @domain,
|
32
|
-
:collections => [:
|
32
|
+
:collections => [:products, :offers, :related_products],
|
33
33
|
:common_attrs => [:desc, :specs]
|
34
|
-
}
|
34
|
+
})
|
35
35
|
end
|
36
36
|
|
37
37
|
it "should not define accessor if options not contail :selector" do
|
@@ -46,36 +46,37 @@ module Raspar
|
|
46
46
|
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
47
47
|
|
48
48
|
#Total parse objects
|
49
|
-
parsed_objs.length.
|
49
|
+
expect(parsed_objs.keys.length).to eq(3)
|
50
50
|
|
51
|
-
parsed_objs.
|
52
|
-
parsed_objs.
|
51
|
+
expect(parsed_objs[:products].length).to eq(4)
|
52
|
+
expect(parsed_objs[:offers].length).to eq(1)
|
53
|
+
expect(parsed_objs[:related_products].length).to eq(1)
|
53
54
|
|
54
55
|
count = 1
|
55
|
-
parsed_objs
|
56
|
-
o[:name].
|
57
|
-
o[:image].
|
56
|
+
parsed_objs[:products].each do |o|
|
57
|
+
expect(o[:name]).to eq("Full Name: Test#{count}")
|
58
|
+
expect(o[:image]).to eq(count.to_s)
|
58
59
|
|
59
60
|
#Price should eval using proc given in option which convert string value
|
60
61
|
#to integer
|
61
|
-
o[:price].
|
62
|
+
expect(o[:price]).to eq(count * 10)
|
62
63
|
|
63
64
|
#External Field check
|
64
|
-
o[:desc].
|
65
|
+
expect(o[:desc]).to eq("Description is full desc")
|
65
66
|
|
66
67
|
#self selector
|
67
|
-
o[:all_text].
|
68
|
+
expect(o[:all_text]).to eq("Test#{count}\n #{count*10}")
|
68
69
|
|
69
|
-
o[:price_map].
|
70
|
+
expect(o[:price_map]).to eq({"Test#{count}" => (count*10).to_f})
|
70
71
|
|
71
|
-
o[:specs].
|
72
|
+
expect(o[:specs]).to eq(['spec 1', 'spec 2', 'spec 3'])
|
72
73
|
|
73
74
|
count = count + 1
|
74
75
|
end
|
75
76
|
|
76
|
-
parsed_objs
|
77
|
-
o[:name].
|
78
|
-
o[:percentage].
|
77
|
+
parsed_objs[:offers].each do |o|
|
78
|
+
expect(o[:name]).to eq('First Offer')
|
79
|
+
expect(o[:percentage]).to eq('10% off')
|
79
80
|
end
|
80
81
|
|
81
82
|
end
|
data/spec/raspar_spec.rb
CHANGED
@@ -9,16 +9,16 @@ describe Raspar do
|
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should add domain to register parser list" do
|
12
|
-
Raspar.register(@site, TestParser).
|
13
|
-
Raspar.parsers.
|
14
|
-
Raspar.parsers.size.
|
12
|
+
expect(Raspar.register(@site, TestParser)).to eq(@host)
|
13
|
+
expect(Raspar.parsers).to include({@host => TestParser})
|
14
|
+
expect(Raspar.parsers.size).to be > 0
|
15
15
|
end
|
16
16
|
|
17
17
|
it "should clear registered domains" do
|
18
18
|
Raspar.register(@site, TestParser)
|
19
19
|
Raspar.clear_parser_list
|
20
20
|
|
21
|
-
Raspar.parsers.size.
|
21
|
+
expect(Raspar.parsers.size).to eq(0)
|
22
22
|
end
|
23
23
|
|
24
24
|
it "should able to remove parser from the registered list" do
|
@@ -27,7 +27,7 @@ describe Raspar do
|
|
27
27
|
|
28
28
|
Raspar.remove(@site)
|
29
29
|
|
30
|
-
Raspar.parsers.
|
30
|
+
expect(Raspar.parsers).not_to include(@host)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
data/spec/sample_parser.rb
CHANGED
@@ -6,7 +6,7 @@ class SampleParser
|
|
6
6
|
attr :desc, '.desc', :common => true, :eval => :full_desc
|
7
7
|
attr :specs, '.specs li', :common => true, :as => :array, :eval => :format_specs
|
8
8
|
|
9
|
-
collection :
|
9
|
+
collection :products, '.item,span.second' do
|
10
10
|
attr :image, 'img', :prop => 'src'
|
11
11
|
attr :image_url, 'img', :prop => 'src', :eval => :make_image_url
|
12
12
|
attr :name, 'span:first, .name', :eval => :full_name
|
@@ -18,11 +18,15 @@ class SampleParser
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
collection :
|
21
|
+
collection :offers, '.offer' do
|
22
22
|
attr :name, '.name'
|
23
23
|
attr :percentage, '.percentage'
|
24
24
|
end
|
25
25
|
|
26
|
+
collection :related_products, 'ol.related_products' do
|
27
|
+
attr :name, 'li', as: :array
|
28
|
+
end
|
29
|
+
|
26
30
|
def full_name(val, ele)
|
27
31
|
"Full Name: #{val}"
|
28
32
|
end
|
@@ -87,6 +91,12 @@ FAKE_PAGE = %q{
|
|
87
91
|
<span class="percentage">10% off</span>
|
88
92
|
</div>
|
89
93
|
|
94
|
+
<ol class="related_products">
|
95
|
+
<li> Product 1 </li>
|
96
|
+
<li> Product 2 </li>
|
97
|
+
<li> Product 3 </li>
|
98
|
+
</ol>
|
99
|
+
|
90
100
|
</body>
|
91
101
|
</html>
|
92
102
|
}
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: raspar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jiren Patel
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 1.5.5
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.5.5
|
41
41
|
description: Raspar collects data from the html page and creates object from it.
|
@@ -45,17 +45,19 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
- .gitignore
|
49
|
-
- .travis.yml
|
48
|
+
- ".gitignore"
|
49
|
+
- ".travis.yml"
|
50
50
|
- Gemfile
|
51
51
|
- LICENSE
|
52
52
|
- README.md
|
53
53
|
- Rakefile
|
54
54
|
- example/Gemfile
|
55
|
-
- example/
|
55
|
+
- example/best_buy.rb
|
56
|
+
- example/country_code.rb
|
56
57
|
- example/debug.rb
|
57
|
-
- example/dynamic.rb
|
58
58
|
- example/leguide.rb
|
59
|
+
- example/leguide_dynamic.rb
|
60
|
+
- example/sample_parser.rb
|
59
61
|
- lib/raspar.rb
|
60
62
|
- lib/raspar/dynamic_parser.rb
|
61
63
|
- lib/raspar/parser.rb
|
@@ -77,17 +79,17 @@ require_paths:
|
|
77
79
|
- lib
|
78
80
|
required_ruby_version: !ruby/object:Gem::Requirement
|
79
81
|
requirements:
|
80
|
-
- -
|
82
|
+
- - ">="
|
81
83
|
- !ruby/object:Gem::Version
|
82
84
|
version: '0'
|
83
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
86
|
requirements:
|
85
|
-
- -
|
87
|
+
- - ">="
|
86
88
|
- !ruby/object:Gem::Version
|
87
89
|
version: '0'
|
88
90
|
requirements: []
|
89
91
|
rubyforge_project: raspar
|
90
|
-
rubygems_version: 2.
|
92
|
+
rubygems_version: 2.2.2
|
91
93
|
signing_key:
|
92
94
|
specification_version: 4
|
93
95
|
summary: A generic html/xml parser
|