raspar 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/README.md +5 -4
- data/example/Gemfile +3 -2
- data/example/best_buy.rb +30 -0
- data/example/{ccode.rb → country_code.rb} +3 -3
- data/example/leguide.rb +9 -10
- data/example/{dynamic.rb → leguide_dynamic.rb} +5 -8
- data/example/sample_parser.rb +116 -0
- data/lib/raspar.rb +2 -3
- data/lib/raspar/parser.rb +1 -1
- data/lib/raspar/version.rb +1 -1
- data/raspar.gemspec +1 -1
- data/spec/add_parser_spec.rb +10 -13
- data/spec/dynamic_parser_spec.rb +11 -11
- data/spec/parser_spec.rb +21 -20
- data/spec/raspar_spec.rb +5 -5
- data/spec/sample_parser.rb +12 -2
- data/spec/spec_helper.rb +1 -1
- metadata +15 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 144d5399e4105ea847e8d5441d80e15f022d2a8d
|
4
|
+
data.tar.gz: f73462c4d5be73a4f189cc430a539b2d7cd37577
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a9b42d385112c7da047ba39a4bb2202ad4ded4767a6c510b526ab61bbf30420cf3e8ce9f9af8e00e5bbb81066605456ea805d4385863936cfae29508792cd499
|
7
|
+
data.tar.gz: 9588cf43acef8472bb0a9493c1dc364a99721401c0c5e55a13ee9fe2035a76d2eb01e536a29879a29a3f0e85c112ea50b8b409bcb34456fd69ef1bde4774b18f
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -23,16 +23,17 @@ And then execute:
|
|
23
23
|
result = Rapsar.parse(url, html) #This will return parsed result object array.
|
24
24
|
|
25
25
|
#Result
|
26
|
-
[
|
26
|
+
{ :products => [
|
27
27
|
#<Raspar::Result:0x007ffc91e4d640
|
28
28
|
@attrs={:name=>"Test1", :price=>"10", :image=>"1", :desc=>"Description"},
|
29
29
|
@domain="example.com",
|
30
|
-
@name=:
|
30
|
+
@name=:products>,
|
31
31
|
#<Raspar::Result:0x007ffc91e57be0
|
32
32
|
@attrs={:name=>"Test2", :price=>"20", :image=>"2", :desc=>"Description"},
|
33
33
|
@domain="example.com",
|
34
|
-
@name=:
|
35
|
-
|
34
|
+
@name=:products>
|
35
|
+
]
|
36
|
+
}
|
36
37
|
|
37
38
|
```
|
38
39
|
|
data/example/Gemfile
CHANGED
data/example/best_buy.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rest_client'
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'raspar'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
class BestBuy
|
8
|
+
include Raspar
|
9
|
+
|
10
|
+
domain 'www.bestbuy.com'
|
11
|
+
|
12
|
+
collection :products, '.hproduct' do
|
13
|
+
attr :image, '.image-col img', prop: 'src'
|
14
|
+
attr :name, '.info-main .name'
|
15
|
+
attr :price, 'span[itemprop="price"]'
|
16
|
+
attr :sku, '.sku'
|
17
|
+
attr :description, '.product-short-description li', as: :array
|
18
|
+
attr :rating, 'span[itemprop="ratingValue"]', eval: ->(text, ele){ text.to_f }
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
url = ARGV[0] || "http://www.bestbuy.com/site/promo/htc-one-offer-118429"
|
24
|
+
p url
|
25
|
+
page = RestClient.get(url).to_str
|
26
|
+
|
27
|
+
Raspar.parse(url, page).each do |product|
|
28
|
+
pp product
|
29
|
+
p "*"*40
|
30
|
+
end
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'rest_client'
|
2
3
|
require 'bundler/setup'
|
3
|
-
require 'open-uri'
|
4
4
|
require 'raspar'
|
5
5
|
require 'pp'
|
6
6
|
|
7
|
-
class
|
7
|
+
class CountryCode
|
8
8
|
include Raspar
|
9
9
|
|
10
10
|
domain 'http://www.exchange-rate.com'
|
@@ -17,6 +17,6 @@ class CCode
|
|
17
17
|
end
|
18
18
|
|
19
19
|
url = 'http://www.exchange-rate.com/currency-list.html'
|
20
|
-
page =
|
20
|
+
page = RestClient.get(url).to_str
|
21
21
|
|
22
22
|
Raspar.parse(url, page).each {|i| pp i }
|
data/example/leguide.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'rest_client'
|
2
3
|
require 'bundler/setup'
|
3
|
-
require 'open-uri'
|
4
4
|
require 'raspar'
|
5
5
|
require 'pp'
|
6
6
|
|
@@ -16,13 +16,12 @@ class Leguide
|
|
16
16
|
attr :name, '.block_bpu_feature .p b'
|
17
17
|
attr :specifications, '#page2', :eval => :build_specification
|
18
18
|
|
19
|
-
collection :product, '.
|
20
|
-
attr :alt_name, '.gopt.
|
21
|
-
attr :image, '.lg_photo img', :prop => 'src'
|
22
|
-
attr :price, '.
|
23
|
-
attr :orignal_price, '.
|
19
|
+
collection :product, '.offers_item' do
|
20
|
+
attr :alt_name, '.gopt.l'
|
21
|
+
attr :image, '.lg_photo img, .img_container img', :prop => 'src'
|
22
|
+
attr :price, '.gopt .prices'
|
23
|
+
attr :orignal_price, '.gopt.old_price'
|
24
24
|
attr :desc, '.gopt.description,.info .description'
|
25
|
-
attr :vendor, '.name a'
|
26
25
|
attr :availability, '.av', :prop => 'data-value', :eval => DATA_PROC
|
27
26
|
attr :delivery_time, '.dv', :prop => 'data-value', :eval => DATA_PROC
|
28
27
|
attr :shipping_price, '.delivery.gopt', :eval => SHIPPING_PROC
|
@@ -44,11 +43,11 @@ class Leguide
|
|
44
43
|
|
45
44
|
end
|
46
45
|
|
47
|
-
url = 'http://www.leguide.com/sb/bp/5010500/hotpoint_ariston/ECO9F_149_FRS/55743410.htm'
|
48
46
|
url = 'http://www.leguide.com/electromenager.htm'
|
47
|
+
url = ARGV[0] || url
|
49
48
|
p ARGV[0] || url
|
50
|
-
|
51
|
-
page =
|
49
|
+
|
50
|
+
page = RestClient.get(url).to_str
|
52
51
|
|
53
52
|
Raspar.parse(url, page).each do |o|
|
54
53
|
pp o
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rubygems'
|
2
|
+
require 'rest_client'
|
2
3
|
require 'bundler/setup'
|
3
|
-
require 'open-uri'
|
4
4
|
require 'raspar'
|
5
5
|
require 'pp'
|
6
6
|
|
@@ -38,13 +38,9 @@ selector_map = {
|
|
38
38
|
:select => '.offers_list li',
|
39
39
|
:attrs => {
|
40
40
|
:image => { :select => 'img', :prop => 'src'},
|
41
|
-
:price => { :select => '.
|
42
|
-
:orignal_price => { :select => '.price .barre', :eval => :parse_price},
|
41
|
+
:price => { :select => '.gopt .prices', :eval => :parse_price},
|
43
42
|
:desc => { :select => '.gopt.description,.info .description'},
|
44
43
|
:vendor => { :select => '.name a' },
|
45
|
-
:availability => { :select => '.av', :prop => 'data-value', :eval => :data_attr_parse},
|
46
|
-
:delivery_time => { :select => '.dv', :prop => 'data-value', :eval => :data_attr_parse},
|
47
|
-
:shipping_price => { :select => '.delivery.gopt'}
|
48
44
|
}
|
49
45
|
}
|
50
46
|
}
|
@@ -52,8 +48,9 @@ selector_map = {
|
|
52
48
|
|
53
49
|
Raspar.add(domain, selector_map, ParserHelper)
|
54
50
|
|
55
|
-
url = 'http://www.leguide.com/
|
56
|
-
|
51
|
+
url = 'http://www.leguide.com/electromenager.htm'
|
52
|
+
p url
|
53
|
+
page = RestClient.get(url).to_str
|
57
54
|
|
58
55
|
Raspar.parse(url, page).each do |i|
|
59
56
|
pp i
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'raspar'
|
4
|
+
require 'pp'
|
5
|
+
|
6
|
+
FAKE_PAGE = %q{
|
7
|
+
<!DOCTYPE html>
|
8
|
+
<html>
|
9
|
+
<body>
|
10
|
+
|
11
|
+
<span class="desc">Description</span>
|
12
|
+
<ul class="specs">
|
13
|
+
<li>Spec 1</li>
|
14
|
+
<li>Spec 2</li>
|
15
|
+
<li>Spec 3</li>
|
16
|
+
</ul>
|
17
|
+
|
18
|
+
<div>
|
19
|
+
<img src="1">
|
20
|
+
<span>Test1</span>
|
21
|
+
<span class="price">10</span>
|
22
|
+
</div>
|
23
|
+
|
24
|
+
<div>
|
25
|
+
<img src="2">
|
26
|
+
<span>Test2</span>
|
27
|
+
<span class="price">20</span>
|
28
|
+
</div>
|
29
|
+
|
30
|
+
<span class="second">
|
31
|
+
<img src="3">
|
32
|
+
<span>Test3</span>
|
33
|
+
<span class="price">30</span>
|
34
|
+
</span>
|
35
|
+
|
36
|
+
<div class="offer">
|
37
|
+
<span class="name">First Offer</span>
|
38
|
+
<span class="percentage">10% off</span>
|
39
|
+
</div>
|
40
|
+
|
41
|
+
</body>
|
42
|
+
</html>
|
43
|
+
}
|
44
|
+
|
45
|
+
class SampleParser
|
46
|
+
include Raspar
|
47
|
+
|
48
|
+
domain 'http://sample.com'
|
49
|
+
|
50
|
+
attr :desc, '.desc', :eval => :full_desc
|
51
|
+
attr :specs, '.specs li', :as => :array, :eval => :format_specs
|
52
|
+
|
53
|
+
collection :product, 'div,span.second' do
|
54
|
+
attr :image, 'img', :attr => 'src'
|
55
|
+
attr :image_url, 'img', :attr => 'src', :eval => :make_image_url
|
56
|
+
attr :name, 'span:first, .name', :eval => :full_name
|
57
|
+
attr :price, '.price', :eval => Proc.new{|i| i.to_i}
|
58
|
+
attr :all_text
|
59
|
+
attr :price_map do |text, ele|
|
60
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
61
|
+
{val[0] => val[1].to_f}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
collection :offer, '.offer' do
|
66
|
+
attr :name, '.name'
|
67
|
+
attr :percentage, '.percentage'
|
68
|
+
end
|
69
|
+
|
70
|
+
def full_name(val, ele)
|
71
|
+
"Full Name: #{val}"
|
72
|
+
end
|
73
|
+
|
74
|
+
def name_price(val, ele)
|
75
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
76
|
+
{val[0] => val[1].to_f}
|
77
|
+
end
|
78
|
+
|
79
|
+
def make_image_url(path, ele)
|
80
|
+
self.class.absolute_url(path)
|
81
|
+
end
|
82
|
+
|
83
|
+
def full_desc(text, ele)
|
84
|
+
"#{text} full desc"
|
85
|
+
end
|
86
|
+
|
87
|
+
def format_specs(text, ele)
|
88
|
+
text.downcase
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
##pp SampleParser.attrs
|
94
|
+
pp Raspar.parse('http://sample.com', FAKE_PAGE)
|
95
|
+
|
96
|
+
|
97
|
+
selector_map = {
|
98
|
+
:common_attrs => {
|
99
|
+
:desc => {:select => '.desc'}
|
100
|
+
},
|
101
|
+
:collections =>{
|
102
|
+
:product => {
|
103
|
+
:select => 'div, span.second',
|
104
|
+
:attrs => {
|
105
|
+
:name => { :select => 'span:first'},
|
106
|
+
:price => { :select => 'span.price'},
|
107
|
+
:image => { :select => 'img', :attr => 'src'}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
@site = 'http://dynmaicparser.com'
|
114
|
+
@dynmaic_parser = Raspar.add(@site, selector_map)
|
115
|
+
pp @dynmaic_parser.attrs
|
116
|
+
pp Raspar.parse(@site, FAKE_PAGE)
|
data/lib/raspar.rb
CHANGED
@@ -55,7 +55,7 @@ module Raspar
|
|
55
55
|
def parse(url, html)
|
56
56
|
host = URI(url).host
|
57
57
|
if @parsers[host]
|
58
|
-
@parsers[host].parse(html)
|
58
|
+
@parsers[host].parse(html).group_by(&:name)
|
59
59
|
else
|
60
60
|
puts "No parser define for #{host}"
|
61
61
|
nil
|
@@ -83,8 +83,7 @@ module Raspar
|
|
83
83
|
klass.domain(url)
|
84
84
|
klass.class_exec(&block) if block_given?
|
85
85
|
|
86
|
-
|
87
|
-
Object.const_set(klass_name, klass)
|
86
|
+
Raspar.const_set(klass_name, klass)
|
88
87
|
end
|
89
88
|
|
90
89
|
end
|
data/lib/raspar/parser.rb
CHANGED
data/lib/raspar/version.rb
CHANGED
data/raspar.gemspec
CHANGED
data/spec/add_parser_spec.rb
CHANGED
@@ -8,7 +8,7 @@ describe 'Add Parser' do
|
|
8
8
|
Raspar.add(@site) do
|
9
9
|
attr :desc, '.desc', :common => true
|
10
10
|
|
11
|
-
collection :
|
11
|
+
collection :products, '.item,span.second' do
|
12
12
|
attr :name, 'span:first, .name', :eval => :full_name
|
13
13
|
attr :price, '.price', :eval => Proc.new{|i| i.to_i}
|
14
14
|
end
|
@@ -27,29 +27,26 @@ describe 'Add Parser' do
|
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'should register parser and parse data' do
|
30
|
-
Raspar.parsers.
|
30
|
+
expect(Raspar.parsers).to include({@domain => @parser_class})
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should have info" do
|
34
|
-
@parser_class.info.
|
34
|
+
expect(@parser_class.info).to eq({
|
35
35
|
:domain => @domain,
|
36
|
-
:collections => [:
|
36
|
+
:collections => [:products],
|
37
37
|
:common_attrs => [:desc]
|
38
|
-
}
|
38
|
+
})
|
39
39
|
end
|
40
40
|
|
41
41
|
it "should parse html and create object" do
|
42
42
|
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
43
43
|
|
44
|
-
parsed_objs.length.
|
45
|
-
|
46
|
-
parsed_objs.count{|o| o.name == :product}.should == 4
|
47
|
-
|
44
|
+
expect(parsed_objs[:products].length).to eq(4)
|
48
45
|
count = 1
|
49
|
-
parsed_objs
|
50
|
-
o[:name].
|
51
|
-
o[:price].
|
52
|
-
o[:desc].
|
46
|
+
parsed_objs[:products].each do |o|
|
47
|
+
expect(o[:name]).to eq("Full Name: Test#{count}")
|
48
|
+
expect(o[:price]).to eq(count * 10)
|
49
|
+
expect(o[:desc]).to eq("Description")
|
53
50
|
|
54
51
|
count = count + 1
|
55
52
|
end
|
data/spec/dynamic_parser_spec.rb
CHANGED
@@ -15,7 +15,7 @@ module Raspar
|
|
15
15
|
:desc => {:select => '.desc'}
|
16
16
|
},
|
17
17
|
:collections =>{
|
18
|
-
:
|
18
|
+
:products => {
|
19
19
|
:select => 'div.item, span.second',
|
20
20
|
:attrs => {
|
21
21
|
:name => { :select => 'span:first'},
|
@@ -33,9 +33,9 @@ module Raspar
|
|
33
33
|
describe '#onload' do
|
34
34
|
|
35
35
|
it "should register DynamicParser to Raspar parser list" do
|
36
|
-
Raspar.parsers[@domain].class.
|
36
|
+
expect(Raspar.parsers[@domain].class).to eq(Raspar::DynamicParser)
|
37
37
|
|
38
|
-
@dynmaic_parser.domain.
|
38
|
+
expect(@dynmaic_parser.domain).to eq(@domain)
|
39
39
|
end
|
40
40
|
|
41
41
|
end
|
@@ -44,28 +44,28 @@ module Raspar
|
|
44
44
|
|
45
45
|
it "should parse html and create object" do
|
46
46
|
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
47
|
-
parsed_objs
|
47
|
+
parsed_objs[:products] == 4
|
48
48
|
|
49
49
|
count = 1
|
50
|
-
parsed_objs.each do |o|
|
51
|
-
o.class.
|
50
|
+
parsed_objs[:products].each do |o|
|
51
|
+
expect(o.class).to eq(Raspar::Result)
|
52
52
|
|
53
|
-
o[:name].
|
54
|
-
o[:image].
|
53
|
+
expect(o[:name]).to eq("Test#{count}")
|
54
|
+
expect(o[:image]).to eq(count.to_s)
|
55
55
|
|
56
56
|
#Price should eval using proc given in option which convert string value
|
57
57
|
#to integer
|
58
|
-
o[:price].
|
58
|
+
expect(o[:price]).to eq((count * 10).to_s)
|
59
59
|
|
60
60
|
#External Field check
|
61
|
-
o[:desc].
|
61
|
+
expect(o[:desc]).to eq("Description")
|
62
62
|
count = count + 1
|
63
63
|
end
|
64
64
|
|
65
65
|
end
|
66
66
|
|
67
67
|
it 'should return absoulte url' do
|
68
|
-
@dynmaic_parser.absolute_url('/test').
|
68
|
+
expect(@dynmaic_parser.absolute_url('/test')).to eq(@site + '/test')
|
69
69
|
end
|
70
70
|
|
71
71
|
end
|
data/spec/parser_spec.rb
CHANGED
@@ -17,21 +17,21 @@ module Raspar
|
|
17
17
|
describe 'onload' do
|
18
18
|
|
19
19
|
it "should register SampleParser to Raspar parser list" do
|
20
|
-
Raspar.parsers.
|
20
|
+
expect(Raspar.parsers).to include({@domain => SampleParser})
|
21
21
|
|
22
|
-
SampleParser.domain.
|
22
|
+
expect(SampleParser.domain).to eq(@domain)
|
23
23
|
end
|
24
24
|
|
25
25
|
it 'should return absoulte url' do
|
26
|
-
SampleParser.absolute_url('/test').
|
26
|
+
expect(SampleParser.absolute_url('/test')).to eq(@site + '/test')
|
27
27
|
end
|
28
28
|
|
29
29
|
it "should have info" do
|
30
|
-
SampleParser.info.
|
30
|
+
expect(SampleParser.info).to eq({
|
31
31
|
:domain => @domain,
|
32
|
-
:collections => [:
|
32
|
+
:collections => [:products, :offers, :related_products],
|
33
33
|
:common_attrs => [:desc, :specs]
|
34
|
-
}
|
34
|
+
})
|
35
35
|
end
|
36
36
|
|
37
37
|
it "should not define accessor if options not contail :selector" do
|
@@ -46,36 +46,37 @@ module Raspar
|
|
46
46
|
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
47
47
|
|
48
48
|
#Total parse objects
|
49
|
-
parsed_objs.length.
|
49
|
+
expect(parsed_objs.keys.length).to eq(3)
|
50
50
|
|
51
|
-
parsed_objs.
|
52
|
-
parsed_objs.
|
51
|
+
expect(parsed_objs[:products].length).to eq(4)
|
52
|
+
expect(parsed_objs[:offers].length).to eq(1)
|
53
|
+
expect(parsed_objs[:related_products].length).to eq(1)
|
53
54
|
|
54
55
|
count = 1
|
55
|
-
parsed_objs
|
56
|
-
o[:name].
|
57
|
-
o[:image].
|
56
|
+
parsed_objs[:products].each do |o|
|
57
|
+
expect(o[:name]).to eq("Full Name: Test#{count}")
|
58
|
+
expect(o[:image]).to eq(count.to_s)
|
58
59
|
|
59
60
|
#Price should eval using proc given in option which convert string value
|
60
61
|
#to integer
|
61
|
-
o[:price].
|
62
|
+
expect(o[:price]).to eq(count * 10)
|
62
63
|
|
63
64
|
#External Field check
|
64
|
-
o[:desc].
|
65
|
+
expect(o[:desc]).to eq("Description is full desc")
|
65
66
|
|
66
67
|
#self selector
|
67
|
-
o[:all_text].
|
68
|
+
expect(o[:all_text]).to eq("Test#{count}\n #{count*10}")
|
68
69
|
|
69
|
-
o[:price_map].
|
70
|
+
expect(o[:price_map]).to eq({"Test#{count}" => (count*10).to_f})
|
70
71
|
|
71
|
-
o[:specs].
|
72
|
+
expect(o[:specs]).to eq(['spec 1', 'spec 2', 'spec 3'])
|
72
73
|
|
73
74
|
count = count + 1
|
74
75
|
end
|
75
76
|
|
76
|
-
parsed_objs
|
77
|
-
o[:name].
|
78
|
-
o[:percentage].
|
77
|
+
parsed_objs[:offers].each do |o|
|
78
|
+
expect(o[:name]).to eq('First Offer')
|
79
|
+
expect(o[:percentage]).to eq('10% off')
|
79
80
|
end
|
80
81
|
|
81
82
|
end
|
data/spec/raspar_spec.rb
CHANGED
@@ -9,16 +9,16 @@ describe Raspar do
|
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should add domain to register parser list" do
|
12
|
-
Raspar.register(@site, TestParser).
|
13
|
-
Raspar.parsers.
|
14
|
-
Raspar.parsers.size.
|
12
|
+
expect(Raspar.register(@site, TestParser)).to eq(@host)
|
13
|
+
expect(Raspar.parsers).to include({@host => TestParser})
|
14
|
+
expect(Raspar.parsers.size).to be > 0
|
15
15
|
end
|
16
16
|
|
17
17
|
it "should clear registered domains" do
|
18
18
|
Raspar.register(@site, TestParser)
|
19
19
|
Raspar.clear_parser_list
|
20
20
|
|
21
|
-
Raspar.parsers.size.
|
21
|
+
expect(Raspar.parsers.size).to eq(0)
|
22
22
|
end
|
23
23
|
|
24
24
|
it "should able to remove parser from the registered list" do
|
@@ -27,7 +27,7 @@ describe Raspar do
|
|
27
27
|
|
28
28
|
Raspar.remove(@site)
|
29
29
|
|
30
|
-
Raspar.parsers.
|
30
|
+
expect(Raspar.parsers).not_to include(@host)
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
data/spec/sample_parser.rb
CHANGED
@@ -6,7 +6,7 @@ class SampleParser
|
|
6
6
|
attr :desc, '.desc', :common => true, :eval => :full_desc
|
7
7
|
attr :specs, '.specs li', :common => true, :as => :array, :eval => :format_specs
|
8
8
|
|
9
|
-
collection :
|
9
|
+
collection :products, '.item,span.second' do
|
10
10
|
attr :image, 'img', :prop => 'src'
|
11
11
|
attr :image_url, 'img', :prop => 'src', :eval => :make_image_url
|
12
12
|
attr :name, 'span:first, .name', :eval => :full_name
|
@@ -18,11 +18,15 @@ class SampleParser
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
21
|
-
collection :
|
21
|
+
collection :offers, '.offer' do
|
22
22
|
attr :name, '.name'
|
23
23
|
attr :percentage, '.percentage'
|
24
24
|
end
|
25
25
|
|
26
|
+
collection :related_products, 'ol.related_products' do
|
27
|
+
attr :name, 'li', as: :array
|
28
|
+
end
|
29
|
+
|
26
30
|
def full_name(val, ele)
|
27
31
|
"Full Name: #{val}"
|
28
32
|
end
|
@@ -87,6 +91,12 @@ FAKE_PAGE = %q{
|
|
87
91
|
<span class="percentage">10% off</span>
|
88
92
|
</div>
|
89
93
|
|
94
|
+
<ol class="related_products">
|
95
|
+
<li> Product 1 </li>
|
96
|
+
<li> Product 2 </li>
|
97
|
+
<li> Product 3 </li>
|
98
|
+
</ol>
|
99
|
+
|
90
100
|
</body>
|
91
101
|
</html>
|
92
102
|
}
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: raspar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jiren Patel
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 1.5.5
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 1.5.5
|
41
41
|
description: Raspar collects data from the html page and creates object from it.
|
@@ -45,17 +45,19 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
- .gitignore
|
49
|
-
- .travis.yml
|
48
|
+
- ".gitignore"
|
49
|
+
- ".travis.yml"
|
50
50
|
- Gemfile
|
51
51
|
- LICENSE
|
52
52
|
- README.md
|
53
53
|
- Rakefile
|
54
54
|
- example/Gemfile
|
55
|
-
- example/
|
55
|
+
- example/best_buy.rb
|
56
|
+
- example/country_code.rb
|
56
57
|
- example/debug.rb
|
57
|
-
- example/dynamic.rb
|
58
58
|
- example/leguide.rb
|
59
|
+
- example/leguide_dynamic.rb
|
60
|
+
- example/sample_parser.rb
|
59
61
|
- lib/raspar.rb
|
60
62
|
- lib/raspar/dynamic_parser.rb
|
61
63
|
- lib/raspar/parser.rb
|
@@ -77,17 +79,17 @@ require_paths:
|
|
77
79
|
- lib
|
78
80
|
required_ruby_version: !ruby/object:Gem::Requirement
|
79
81
|
requirements:
|
80
|
-
- -
|
82
|
+
- - ">="
|
81
83
|
- !ruby/object:Gem::Version
|
82
84
|
version: '0'
|
83
85
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
86
|
requirements:
|
85
|
-
- -
|
87
|
+
- - ">="
|
86
88
|
- !ruby/object:Gem::Version
|
87
89
|
version: '0'
|
88
90
|
requirements: []
|
89
91
|
rubyforge_project: raspar
|
90
|
-
rubygems_version: 2.
|
92
|
+
rubygems_version: 2.2.2
|
91
93
|
signing_key:
|
92
94
|
specification_version: 4
|
93
95
|
summary: A generic html/xml parser
|