raspar 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +10 -0
- data/Gemfile +6 -0
- data/LICENSE +21 -0
- data/README.md +197 -0
- data/Rakefile +1 -0
- data/example/Gemfile +5 -0
- data/example/ccode.rb +22 -0
- data/example/debug.rb +4 -0
- data/example/dynamic.rb +63 -0
- data/example/leguide.rb +56 -0
- data/lib/raspar.rb +94 -0
- data/lib/raspar/dynamic_parser.rb +44 -0
- data/lib/raspar/parser.rb +152 -0
- data/lib/raspar/result.rb +20 -0
- data/lib/raspar/version.rb +3 -0
- data/raspar.gemspec +23 -0
- data/spec/add_parser_spec.rb +58 -0
- data/spec/dynamic_parser_spec.rb +74 -0
- data/spec/parser_spec.rb +86 -0
- data/spec/raspar_spec.rb +33 -0
- data/spec/sample_parser.rb +94 -0
- data/spec/spec_helper.rb +29 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: df5341823f5ad794338001c6a0bbf53de1ac4089
|
4
|
+
data.tar.gz: 540306d13e95a15312ba517fbc470fa136270bf0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d3966a090724b5435ba847c738b59f73d5fd23f59555db20b0317297759d0084032071bdf46cfba0171d84d7716080da09fbd58e8a56003bb4124ee3fa603cbc
|
7
|
+
data.tar.gz: 9e169aff0a96dc1f6b7ccdf8e9e92fd8c8d4ae4d2543fbae983b5fa148a91c448577b2117cde2164758432abc5ddabd4e987ffc974936024b411faf686fb8cb6
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2013 Jiren Patel[jirenpatel@gmail.com]
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,197 @@
|
|
1
|
+
## Raspar - scraping library
|
2
|
+
|
3
|
+
Raspar is a html scraping library which help to map html elements to ruby object using 'css' or 'xpath' selector.Using this library user can define multiple parser for different websites and it select parser according to input html page url.
|
4
|
+
|
5
|
+
[![Build Status](https://travis-ci.org/jiren/raspar.png?branch=master)](https://travis-ci.org/jiren/raspar)
|
6
|
+
[![Coverage Status](https://coveralls.io/repos/jiren/raspar/badge.png?branch=master)](https://coveralls.io/r/jiren/raspar?branch=master)
|
7
|
+
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Add this line to your application's Gemfile:
|
12
|
+
|
13
|
+
gem 'raspar', :git => 'git://github.com/jiren/raspar.git'
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
|
23
|
+
result = Rapsar.parse(url, html) #This will return parsed result object array.
|
24
|
+
|
25
|
+
#Result
|
26
|
+
[
|
27
|
+
#<Raspar::Result:0x007ffc91e4d640
|
28
|
+
@attrs={:name=>"Test1", :price=>"10", :image=>"1", :desc=>"Description"},
|
29
|
+
@domain="example.com",
|
30
|
+
@name=:product>,
|
31
|
+
#<Raspar::Result:0x007ffc91e57be0
|
32
|
+
@attrs={:name=>"Test2", :price=>"20", :image=>"2", :desc=>"Description"},
|
33
|
+
@domain="example.com",
|
34
|
+
@name=:product>
|
35
|
+
]
|
36
|
+
|
37
|
+
```
|
38
|
+
|
39
|
+
## Example
|
40
|
+
|
41
|
+
### Sample HTML
|
42
|
+
|
43
|
+
```html
|
44
|
+
<!DOCTYPE html>
|
45
|
+
<html>
|
46
|
+
<body>
|
47
|
+
<span class="desc">Description</span>
|
48
|
+
<divi class="item">
|
49
|
+
<img src="1">
|
50
|
+
<span>Test1</span>
|
51
|
+
<span class="price">10</span>
|
52
|
+
</div>
|
53
|
+
|
54
|
+
<div class="item">
|
55
|
+
<img src="2">
|
56
|
+
<span>Test2</span>
|
57
|
+
<span class="price">20</span>
|
58
|
+
</div>
|
59
|
+
|
60
|
+
<span class="second">
|
61
|
+
<img src="2">
|
62
|
+
<span>Test2</span>
|
63
|
+
<span class="price">20</span>
|
64
|
+
</span>
|
65
|
+
|
66
|
+
<div class="offer">
|
67
|
+
<span class="name">First Offer</span>
|
68
|
+
<span class="percentage">10% off</span>
|
69
|
+
</div>
|
70
|
+
|
71
|
+
</body>
|
72
|
+
</html>
|
73
|
+
```
|
74
|
+
|
75
|
+
|
76
|
+
#### Parser for above HTML
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
class SampleParser
|
80
|
+
include Raspar
|
81
|
+
|
82
|
+
domain 'http://sample.com'
|
83
|
+
|
84
|
+
attr :desc, '.desc', :eval => :format_desc
|
85
|
+
|
86
|
+
collection :product, '.item,span.second' do
|
87
|
+
attr :image_url, 'img', :prop => 'src', :eval => :make_image_url
|
88
|
+
attr :name, 'span:first'
|
89
|
+
attr :price, 'span.price', :eval => Proc.new{|price, ele| price.to_i}
|
90
|
+
attr :price_map do |text, ele|
|
91
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
92
|
+
{val[0] => val[1].to_f}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
collection :offer, '.offer' do
|
97
|
+
attr :name, '.name'
|
98
|
+
attr :discount, '.discount' do |text, ele|
|
99
|
+
test.split('%').first.to_f
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def name_price(val, ele)
|
104
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
105
|
+
{val[0] => val[1].to_f}
|
106
|
+
end
|
107
|
+
|
108
|
+
def make_image_url(path, ele)
|
109
|
+
URI(@domain_url).merge(path).to_s
|
110
|
+
end
|
111
|
+
|
112
|
+
def format_desc(text, ele)
|
113
|
+
"Description: #{text}"
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
```
|
118
|
+
|
119
|
+
- 'domain' method register parser for given domain value so raspar can differentiate parser at runtime.
|
120
|
+
- Define 'attr' which is going to parse. First argument is 'css' or 'xpath' selector. Second argument contain options.
|
121
|
+
- Valid options are :field, :eval.
|
122
|
+
- :porp is selecting particular property/attribute for html element. In example for image, select image url using :prop => 'src'
|
123
|
+
- :eval is use to post process attr value. It can be proc, method or block. Each method, proc or block use for eval has two argument, first is html element text and second is html element as a Nokogiri doc.
|
124
|
+
- if :eval is not define then parser will return text of selected html element.
|
125
|
+
- If your page has multiple type of objects or collections then define using 'collection' block. In above example '.item' and 'span.second' are product while '.offer' element contain offer detail.
|
126
|
+
- In html page some of attributes are common which is not reside under particular collection and this attributes values are going to add for each parse object.
|
127
|
+
|
128
|
+
### Add Parser in different way
|
129
|
+
|
130
|
+
It takes only one argument domain url and block.
|
131
|
+
|
132
|
+
```ruby
|
133
|
+
|
134
|
+
Raspar.add('http://example.com') do
|
135
|
+
attr :desc, '.desc', :eval => :format_desc
|
136
|
+
|
137
|
+
collection :product, '.item,span.second' do
|
138
|
+
attr :image_url, 'img', :prop => 'src'
|
139
|
+
attr :name, 'span:first'
|
140
|
+
attr :price, 'span.price', :eval => Proc.new{|price, ele| price.to_i}
|
141
|
+
attr :price_map do |text, ele|
|
142
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
143
|
+
{val[0] => val[1].to_f}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def format_desc(text, ele)
|
148
|
+
"Desc: #{text.downcase}"
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
```
|
155
|
+
|
156
|
+
|
157
|
+
### Dynamically add Parser
|
158
|
+
|
159
|
+
```ruby
|
160
|
+
|
161
|
+
domain = 'http://www.sample.com'
|
162
|
+
selector_map = {
|
163
|
+
:common_attrs => {
|
164
|
+
:desc => {:select => '.desc'}
|
165
|
+
},
|
166
|
+
:collections =>{
|
167
|
+
:item => {
|
168
|
+
:select => 'div, span.second',
|
169
|
+
:attrs => {
|
170
|
+
:name => { :select => 'span:first'},
|
171
|
+
:price => { :select => 'span.price', :eval => :parse_price},
|
172
|
+
:image => { :select => 'img', :prop => 'src'}
|
173
|
+
}
|
174
|
+
}
|
175
|
+
}
|
176
|
+
}
|
177
|
+
|
178
|
+
module ParserHelper
|
179
|
+
def parse_price(val, ele)
|
180
|
+
val.gsub(/[ ,]/, ' ' => '', ',' => '.').to_f
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
Raspar.add(domain, selector_map, ParserHelper) //Add parser
|
185
|
+
|
186
|
+
```
|
187
|
+
|
188
|
+
For post processing user can add parser helper, but it is not mandatory.
|
189
|
+
|
190
|
+
|
191
|
+
## Contributing
|
192
|
+
|
193
|
+
Please send me a pull request so that this can be improved.
|
194
|
+
|
195
|
+
## License
|
196
|
+
|
197
|
+
This is released under the MIT license.
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/example/Gemfile
ADDED
data/example/ccode.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'raspar'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
class CCode
|
8
|
+
include Raspar
|
9
|
+
|
10
|
+
domain 'http://www.exchange-rate.com'
|
11
|
+
|
12
|
+
collection :currency_code, 'table[cellpadding="2"] tr:gt(1)' do
|
13
|
+
attr :country, 'td:nth-child(1)'
|
14
|
+
attr :currency, 'td:nth-child(2)'
|
15
|
+
attr :code, 'td:nth-child(3)'
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
url = 'http://www.exchange-rate.com/currency-list.html'
|
20
|
+
page = open(url).read
|
21
|
+
|
22
|
+
Raspar.parse(url, page).each {|i| pp i }
|
data/example/debug.rb
ADDED
data/example/dynamic.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'raspar'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
module ParserHelper
|
8
|
+
def build_specification(val, ele)
|
9
|
+
attrs = {}
|
10
|
+
ele.search('li').each do |li|
|
11
|
+
attrs[li.search('.title').first.content] = li.search('.value').first.content
|
12
|
+
end
|
13
|
+
attrs
|
14
|
+
end
|
15
|
+
|
16
|
+
#For normal attr use instance method
|
17
|
+
def parse_price(val, ele)
|
18
|
+
val.gsub(/[ ,]/, ' ' => '', ',' => '.')
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse_shipping_price(text, ele)
|
22
|
+
text.split(':').last.strip
|
23
|
+
end
|
24
|
+
|
25
|
+
def data_attr_parse(text, ele)
|
26
|
+
Nokogiri::HTML.parse(text).text.split(':').last.strip
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
domain = 'http://www.leguide.com'
|
31
|
+
selector_map = {
|
32
|
+
:common_attrs => {
|
33
|
+
:name => {:select => '.block_bpu_feature .p b'},
|
34
|
+
:specifications => {:select => '#page2', :eval => :build_specification}
|
35
|
+
},
|
36
|
+
:collections => {
|
37
|
+
:product =>{
|
38
|
+
:select => '.offers_list li',
|
39
|
+
:attrs => {
|
40
|
+
:image => { :select => 'img', :prop => 'src'},
|
41
|
+
:price => { :select => '.price .euro.gopt', :eval => :parse_price},
|
42
|
+
:orignal_price => { :select => '.price .barre', :eval => :parse_price},
|
43
|
+
:desc => { :select => '.gopt.description,.info .description'},
|
44
|
+
:vendor => { :select => '.name a' },
|
45
|
+
:availability => { :select => '.av', :prop => 'data-value', :eval => :data_attr_parse},
|
46
|
+
:delivery_time => { :select => '.dv', :prop => 'data-value', :eval => :data_attr_parse},
|
47
|
+
:shipping_price => { :select => '.delivery.gopt'}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
Raspar.add(domain, selector_map, ParserHelper)
|
54
|
+
|
55
|
+
url = 'http://www.leguide.com/sb/bp/5010500/hotpoint_ariston/ECO9F_149_FRS/55743410.htm'
|
56
|
+
page = open(url).read()
|
57
|
+
|
58
|
+
Raspar.parse(url, page).each do |i|
|
59
|
+
pp i
|
60
|
+
p "*"*40
|
61
|
+
end
|
62
|
+
|
63
|
+
|
data/example/leguide.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'raspar'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
class Leguide
|
8
|
+
include Raspar
|
9
|
+
|
10
|
+
SHIPPING_PROC = Proc.new{|text, ele| text.split(':').last.strip}
|
11
|
+
DATA_PROC = Proc.new{|text, ele| Nokogiri::HTML.parse(text).text.split(':').last.strip}
|
12
|
+
|
13
|
+
domain 'http://www.leguide.com'
|
14
|
+
|
15
|
+
#External attrs
|
16
|
+
attr :name, '.block_bpu_feature .p b'
|
17
|
+
attr :specifications, '#page2', :eval => :build_specification
|
18
|
+
|
19
|
+
collection :product, '.offers_list li' do
|
20
|
+
attr :alt_name, '.gopt.offer.t'
|
21
|
+
attr :image, '.lg_photo img', :prop => 'src'
|
22
|
+
attr :price, '.price .euro.gopt'
|
23
|
+
attr :orignal_price, '.price .barre'
|
24
|
+
attr :desc, '.gopt.description,.info .description'
|
25
|
+
attr :vendor, '.name a'
|
26
|
+
attr :availability, '.av', :prop => 'data-value', :eval => DATA_PROC
|
27
|
+
attr :delivery_time, '.dv', :prop => 'data-value', :eval => DATA_PROC
|
28
|
+
attr :shipping_price, '.delivery.gopt', :eval => SHIPPING_PROC
|
29
|
+
end
|
30
|
+
|
31
|
+
#For External attr define class method because it evalute only once for all object in sigle html doc.
|
32
|
+
def build_specification(val, ele)
|
33
|
+
attrs = {}
|
34
|
+
ele.search('li').each do |li|
|
35
|
+
attrs[li.search('.title').first.content] = li.search('.value').first.content
|
36
|
+
end
|
37
|
+
attrs
|
38
|
+
end
|
39
|
+
|
40
|
+
#For normal attr use instance method
|
41
|
+
def parse_price(val, ele)
|
42
|
+
val.gsub(/[ ,]/, ' ' => '', ',' => '.')
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
url = 'http://www.leguide.com/sb/bp/5010500/hotpoint_ariston/ECO9F_149_FRS/55743410.htm'
|
48
|
+
url = 'http://www.leguide.com/electromenager.htm'
|
49
|
+
p ARGV[0] || url
|
50
|
+
#page = open(ARGV[0] || url).read().gsub(/[[:cntrl:]@]/, '')
|
51
|
+
page = open(ARGV[0] || url).read()
|
52
|
+
|
53
|
+
Raspar.parse(url, page).each do |o|
|
54
|
+
pp o
|
55
|
+
p "*"*40
|
56
|
+
end
|
data/lib/raspar.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'net/https'
|
3
|
+
require 'webrick/cookie'
|
4
|
+
require 'delegate'
|
5
|
+
require 'nokogiri'
|
6
|
+
|
7
|
+
require 'raspar/version'
|
8
|
+
require 'raspar/result'
|
9
|
+
require 'raspar/parser'
|
10
|
+
require 'raspar/dynamic_parser'
|
11
|
+
|
12
|
+
module Raspar
|
13
|
+
|
14
|
+
def self.included(base)
|
15
|
+
base.extend Parser::ClassMethods
|
16
|
+
base.send :include, Parser::InstanceMethods
|
17
|
+
base._init_parser_
|
18
|
+
end
|
19
|
+
|
20
|
+
class RasparException < Exception; end
|
21
|
+
|
22
|
+
class << self
|
23
|
+
|
24
|
+
def _init
|
25
|
+
@parsers = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
#Register parser class and domain
|
29
|
+
#
|
30
|
+
# === Example
|
31
|
+
# Raspar::Base.register('http://test.com', TestParser)
|
32
|
+
#
|
33
|
+
def register(domain, klass)
|
34
|
+
@parsers ||= {}
|
35
|
+
(URI(domain).host || domain).tap{ |host| @parsers[host] = klass }
|
36
|
+
end
|
37
|
+
|
38
|
+
# clear parser list
|
39
|
+
def clear_parser_list
|
40
|
+
@parsers = {}
|
41
|
+
end
|
42
|
+
|
43
|
+
def remove(domain)
|
44
|
+
@parsers.delete(URI(domain).host) if @parsers
|
45
|
+
end
|
46
|
+
|
47
|
+
def parsers
|
48
|
+
@parsers
|
49
|
+
end
|
50
|
+
|
51
|
+
def exist?(url)
|
52
|
+
@parsers.include?(URI(url).host)
|
53
|
+
end
|
54
|
+
|
55
|
+
def parse(url, html)
|
56
|
+
host = URI(url).host
|
57
|
+
if @parsers[host]
|
58
|
+
@parsers[host].parse(html)
|
59
|
+
else
|
60
|
+
puts "No parser define for #{host}"
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def add(url, selector_map = nil, helper_module = nil, &block)
|
66
|
+
if self.exist?(url)
|
67
|
+
raise RasparException.new("Parser already exist for '#{url}'")
|
68
|
+
end
|
69
|
+
|
70
|
+
if selector_map
|
71
|
+
return DynamicParser.register(url, selector_map, helper_module)
|
72
|
+
end
|
73
|
+
|
74
|
+
klass_name = URI(url).host
|
75
|
+
.split('.')
|
76
|
+
.reject{|w| w == 'www'}
|
77
|
+
.collect{|w| w[0].upcase + w[1..-1] }
|
78
|
+
.join
|
79
|
+
.gsub(/\W/, '')
|
80
|
+
|
81
|
+
klass = Class.new
|
82
|
+
klass.send :include, Raspar
|
83
|
+
klass.domain(url)
|
84
|
+
klass.class_exec(&block) if block_given?
|
85
|
+
|
86
|
+
klass_name = 'R' + klass_name if Object.const_defined?(klass_name)
|
87
|
+
Object.const_set(klass_name, klass)
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
#Init Raspar parser list
|
93
|
+
self._init
|
94
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Raspar
|
2
|
+
|
3
|
+
class DynamicParser
|
4
|
+
include Parser::InstanceMethods
|
5
|
+
include Parser::ClassMethods
|
6
|
+
|
7
|
+
attr_accessor :domain, :domain_url,
|
8
|
+
:common_attrs, :collections, :_current_container_
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@common_attrs = {}
|
12
|
+
@collections = {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse(html)
|
16
|
+
self.process(html, self)
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.register(url, selector_map, helper_module = nil)
|
20
|
+
dp = self.new
|
21
|
+
|
22
|
+
if selector_map[:common_attrs]
|
23
|
+
selector_map[:common_attrs].each { |attr, opts| dp.attr(attr, opts) }
|
24
|
+
end
|
25
|
+
|
26
|
+
if selector_map[:collections]
|
27
|
+
selector_map[:collections].each do |name, collection_opts|
|
28
|
+
dp.collections[name] = { :select => collection_opts[:select], :attrs => {} }
|
29
|
+
|
30
|
+
dp._current_container_ = name.to_sym
|
31
|
+
collection_opts[:attrs].each { |attr, opts| dp.attr(attr, opts) }
|
32
|
+
dp._current_container_ = nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
#TODO: Create constant from string and extend object.
|
37
|
+
dp.extend(helper_module) if helper_module
|
38
|
+
dp.domain_url = url
|
39
|
+
dp.domain = Raspar.register(url, dp)
|
40
|
+
dp
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
module Raspar
|
2
|
+
module Parser
|
3
|
+
|
4
|
+
module ClassMethods
|
5
|
+
attr_reader :domain, :common_attrs, :collections
|
6
|
+
|
7
|
+
def _init_parser_
|
8
|
+
@common_attrs = {}
|
9
|
+
@collections = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# name // opts = {:select => nil}
|
14
|
+
# name, '.name' // opts = {:select => ['.name']}
|
15
|
+
# name, '.name, .title' // opts = {:select => ['.name', '.title']}
|
16
|
+
# name, ['.name', .title] // opts = {:select => ['.name', '.title']}
|
17
|
+
# name, '.name', {:eval => :parse_name, :attr => 'name'}
|
18
|
+
# opts = {:eval => :parse_name, :attr => 'name', :select => ['.name']}
|
19
|
+
# name, {:eval => :parse_name, :attr => 'name'}
|
20
|
+
# opts = {:eval => :parse_name, :attr => 'name', :select => nil}
|
21
|
+
def attr(name, select = nil, opts = {}, &block)
|
22
|
+
if select.is_a?(Hash)
|
23
|
+
opts = select
|
24
|
+
else
|
25
|
+
opts[:select] = select
|
26
|
+
end
|
27
|
+
|
28
|
+
opts[:select] = case opts[:select]
|
29
|
+
when Array
|
30
|
+
opts[:select].flatten
|
31
|
+
when String
|
32
|
+
opts[:select].split(',').collect(&:strip)
|
33
|
+
else
|
34
|
+
opts[:select]
|
35
|
+
end
|
36
|
+
|
37
|
+
opts[:select] = opts[:select].join(',') if opts[:as] == :array
|
38
|
+
opts[:eval] = opts[:eval].to_sym if opts[:eval].is_a?(String)
|
39
|
+
opts[:eval] = block if block_given?
|
40
|
+
|
41
|
+
if @_current_container_
|
42
|
+
@collections[@_current_container_][:attrs][name.to_sym] = opts
|
43
|
+
else
|
44
|
+
@common_attrs[name.to_sym] = opts
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def collection(collection_name, select, &block)
|
49
|
+
collection_name = collection_name.to_sym
|
50
|
+
@collections[collection_name] = { :select => select, :attrs => {} }
|
51
|
+
|
52
|
+
@_current_container_ = collection_name
|
53
|
+
yield
|
54
|
+
@_current_container_ = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
def domain(url = nil)
|
58
|
+
if url
|
59
|
+
@domain_url = url
|
60
|
+
@domain = Raspar.register(url, self)
|
61
|
+
end
|
62
|
+
@domain
|
63
|
+
end
|
64
|
+
|
65
|
+
def absolute_url(path)
|
66
|
+
URI(@domain_url).merge(path).to_s
|
67
|
+
end
|
68
|
+
|
69
|
+
def parse(html)
|
70
|
+
self.new.process(html)
|
71
|
+
end
|
72
|
+
|
73
|
+
def attrs
|
74
|
+
{:collections => @collections, :common_attrs => @common_attrs}
|
75
|
+
end
|
76
|
+
|
77
|
+
def info
|
78
|
+
{:domain => @domain, :collections => @collections.keys, :common_attrs => @common_attrs.keys}
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
module InstanceMethods
|
83
|
+
attr_reader :attributes
|
84
|
+
|
85
|
+
#Parse doc: html node accroding to attr selector
|
86
|
+
#If selector is :self then input doc is a selected doc
|
87
|
+
#Select first
|
88
|
+
def attr_parser(doc, attr_map)
|
89
|
+
attrs = {}
|
90
|
+
|
91
|
+
attr_map.each do |attr_name, opts|
|
92
|
+
ele = doc
|
93
|
+
|
94
|
+
if opts[:select]
|
95
|
+
if opts[:as] == :array
|
96
|
+
attrs[attr_name] = doc.search(opts[:select]).collect{|e| process_ele(e, opts)}
|
97
|
+
else
|
98
|
+
opts[:select].each do |s|
|
99
|
+
ele = doc.search(s).first
|
100
|
+
break if ele
|
101
|
+
end
|
102
|
+
attrs[attr_name] = process_ele(ele, opts) if ele
|
103
|
+
end
|
104
|
+
else
|
105
|
+
attrs[attr_name] = process_ele(ele, opts) if ele
|
106
|
+
end
|
107
|
+
|
108
|
+
#attrs[opts[:as]] ||= attrs[attr_name] if opts[:as]
|
109
|
+
end
|
110
|
+
|
111
|
+
attrs
|
112
|
+
end
|
113
|
+
|
114
|
+
def process(html, klass = nil)
|
115
|
+
@results = []
|
116
|
+
doc = Nokogiri::HTML(html)
|
117
|
+
klass = self.class unless klass
|
118
|
+
|
119
|
+
common_attrs = attr_parser(doc, klass.common_attrs)
|
120
|
+
|
121
|
+
klass.collections.each do |name, collection|
|
122
|
+
doc.search(collection[:select]).each do |ele|
|
123
|
+
attrs = attr_parser(ele, collection[:attrs]).merge!(common_attrs)
|
124
|
+
@results << Result.new(name, attrs, klass.domain)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
@results << Result.new(:default, common_attrs, klass.domain) if @results.none?
|
129
|
+
@results
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
#Process selected html element
|
135
|
+
#
|
136
|
+
#- If process is false and check eval is present then pass
|
137
|
+
# element to eval proc else html ele return and assign to
|
138
|
+
# attribute.
|
139
|
+
#- If process is false then :value option not going to evaluate.
|
140
|
+
def process_ele(ele, opts)
|
141
|
+
val = opts[:prop] ? ele[opts[:prop]] : ele.content
|
142
|
+
val.strip! if val
|
143
|
+
|
144
|
+
if opts[:eval]
|
145
|
+
return opts[:eval].is_a?(Symbol) ? self.send(opts[:eval], val, ele) : opts[:eval].call(val, ele)
|
146
|
+
end
|
147
|
+
|
148
|
+
val
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Raspar
|
2
|
+
class Result
|
3
|
+
attr_reader :name, :attrs, :domain
|
4
|
+
|
5
|
+
def initialize(name, attrs, domain = nil)
|
6
|
+
@name = name
|
7
|
+
@attrs = attrs
|
8
|
+
@domain = domain if domain
|
9
|
+
end
|
10
|
+
|
11
|
+
def [](f)
|
12
|
+
@attrs[f]
|
13
|
+
end
|
14
|
+
|
15
|
+
def method_missing(name, *args, &block)
|
16
|
+
@attrs[name]
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
data/raspar.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "raspar/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "raspar"
|
7
|
+
s.version = Raspar::VERSION
|
8
|
+
s.authors = ["Jiren Patel"]
|
9
|
+
s.email = ["jiren@joshsoftware.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{A generic html/xml parser}
|
12
|
+
s.description = %q{Raspar collects data from the html page and creates object from it.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "raspar"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency "rspec"
|
22
|
+
s.add_dependency "nokogiri", "~> 1.5.5"
|
23
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'sample_parser'
|
4
|
+
|
5
|
+
describe 'Add Parser' do
|
6
|
+
|
7
|
+
def add_parser
|
8
|
+
Raspar.add(@site) do
|
9
|
+
attr :desc, '.desc', :common => true
|
10
|
+
|
11
|
+
collection :product, '.item,span.second' do
|
12
|
+
attr :name, 'span:first, .name', :eval => :full_name
|
13
|
+
attr :price, '.price', :eval => Proc.new{|i| i.to_i}
|
14
|
+
end
|
15
|
+
|
16
|
+
def full_name(val, ele)
|
17
|
+
"Full Name: #{val}"
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
before(:all) do
|
24
|
+
@site = 'http://addparser.com'
|
25
|
+
@domain = URI(@site).host
|
26
|
+
@parser_class = add_parser
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should register parser and parse data' do
|
30
|
+
Raspar.parsers.should include({@domain => @parser_class})
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should have info" do
|
34
|
+
@parser_class.info.should == {
|
35
|
+
:domain => @domain,
|
36
|
+
:collections => [:product],
|
37
|
+
:common_attrs => [:desc]
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should parse html and create object" do
|
42
|
+
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
43
|
+
|
44
|
+
parsed_objs.length.should == 4
|
45
|
+
|
46
|
+
parsed_objs.count{|o| o.name == :product}.should == 4
|
47
|
+
|
48
|
+
count = 1
|
49
|
+
parsed_objs.select{|o| o.name == :product}.each do |o|
|
50
|
+
o[:name].should == "Full Name: Test#{count}"
|
51
|
+
o[:price].should == (count * 10)
|
52
|
+
o[:desc].should == "Description"
|
53
|
+
|
54
|
+
count = count + 1
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'sample_parser'
|
4
|
+
|
5
|
+
module Raspar
|
6
|
+
|
7
|
+
describe DynamicParser do
|
8
|
+
|
9
|
+
before do
|
10
|
+
@site = 'http://dynmaicparser.com'
|
11
|
+
@domain = URI(@site).host
|
12
|
+
|
13
|
+
selector_map = {
|
14
|
+
:common_attrs => {
|
15
|
+
:desc => {:select => '.desc'}
|
16
|
+
},
|
17
|
+
:collections =>{
|
18
|
+
:product => {
|
19
|
+
:select => 'div.item, span.second',
|
20
|
+
:attrs => {
|
21
|
+
:name => { :select => 'span:first'},
|
22
|
+
:price => { :select => 'span.price'},
|
23
|
+
:image => { :select => 'img', :prop => 'src'}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
Raspar.clear_parser_list
|
30
|
+
@dynmaic_parser = Raspar.add(@site, selector_map)
|
31
|
+
end
|
32
|
+
|
33
|
+
describe '#onload' do
|
34
|
+
|
35
|
+
it "should register DynamicParser to Raspar parser list" do
|
36
|
+
Raspar.parsers[@domain].class.should == Raspar::DynamicParser
|
37
|
+
|
38
|
+
@dynmaic_parser.domain.should == @domain
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
describe '#parse' do
|
44
|
+
|
45
|
+
it "should parse html and create object" do
|
46
|
+
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
47
|
+
parsed_objs.length.should == 4
|
48
|
+
|
49
|
+
count = 1
|
50
|
+
parsed_objs.each do |o|
|
51
|
+
o.class.should == Raspar::Result
|
52
|
+
|
53
|
+
o[:name].should == "Test#{count}"
|
54
|
+
o[:image].should == count.to_s
|
55
|
+
|
56
|
+
#Price should eval using proc given in option which convert string value
|
57
|
+
#to integer
|
58
|
+
o[:price].should == (count * 10).to_s
|
59
|
+
|
60
|
+
#External Field check
|
61
|
+
o[:desc].should == "Description"
|
62
|
+
count = count + 1
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'should return absoulte url' do
|
68
|
+
@dynmaic_parser.absolute_url('/test').should == @site + '/test'
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require 'spec_helper'
|
3
|
+
require 'sample_parser'
|
4
|
+
|
5
|
+
module Raspar
|
6
|
+
|
7
|
+
describe Parser do
|
8
|
+
|
9
|
+
before do
|
10
|
+
@site = 'http://sample.com'
|
11
|
+
@domain = URI(@site).host
|
12
|
+
|
13
|
+
Raspar.register(@site, SampleParser) unless Raspar.parsers[@domain]
|
14
|
+
end
|
15
|
+
|
16
|
+
#On load SampleParser class
|
17
|
+
describe 'onload' do
|
18
|
+
|
19
|
+
it "should register SampleParser to Raspar parser list" do
|
20
|
+
Raspar.parsers.should include({@domain => SampleParser})
|
21
|
+
|
22
|
+
SampleParser.domain.should == @domain
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should return absoulte url' do
|
26
|
+
SampleParser.absolute_url('/test').should == @site + '/test'
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should have info" do
|
30
|
+
SampleParser.info.should == {
|
31
|
+
:domain => @domain,
|
32
|
+
:collections => [:product, :offer],
|
33
|
+
:common_attrs => [:desc, :specs]
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should not define accessor if options not contail :selector" do
|
38
|
+
SampleParser.instance_methods.include?(:extra) == false
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
describe 'parse' do
|
44
|
+
|
45
|
+
it "should parse html and create object" do
|
46
|
+
parsed_objs = Raspar.parse(@site, FAKE_PAGE)
|
47
|
+
|
48
|
+
#Total parse objects
|
49
|
+
parsed_objs.length.should == 5
|
50
|
+
|
51
|
+
parsed_objs.count{|o| o.name == :product}.should == 4
|
52
|
+
parsed_objs.count{|o| o.name == :offer}.should == 1
|
53
|
+
|
54
|
+
count = 1
|
55
|
+
parsed_objs.select{|o| o.name == :product}.each do |o|
|
56
|
+
o[:name].should == "Full Name: Test#{count}"
|
57
|
+
o[:image].should == count.to_s
|
58
|
+
|
59
|
+
#Price should eval using proc given in option which convert string value
|
60
|
+
#to integer
|
61
|
+
o[:price].should == (count * 10)
|
62
|
+
|
63
|
+
#External Field check
|
64
|
+
o[:desc].should == "Description is full desc"
|
65
|
+
|
66
|
+
#self selector
|
67
|
+
o[:all_text].should == "Test#{count}\n #{count*10}"
|
68
|
+
|
69
|
+
o[:price_map].should == {"Test#{count}" => (count*10).to_f}
|
70
|
+
|
71
|
+
o[:specs].should == ['spec 1', 'spec 2', 'spec 3']
|
72
|
+
|
73
|
+
count = count + 1
|
74
|
+
end
|
75
|
+
|
76
|
+
parsed_objs.select{|o| o.name == :offer}.each do |o|
|
77
|
+
o[:name].should == 'First Offer'
|
78
|
+
o[:percentage].should == '10% off'
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
data/spec/raspar_spec.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__))
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Raspar do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@site = 'http://test.com'
|
8
|
+
@host = URI(@site).host
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should add domain to register parser list" do
|
12
|
+
Raspar.register(@site, TestParser).should == @host
|
13
|
+
Raspar.parsers.should include({@host => TestParser})
|
14
|
+
Raspar.parsers.size.should > 0
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should clear registered domains" do
|
18
|
+
Raspar.register(@site, TestParser)
|
19
|
+
Raspar.clear_parser_list
|
20
|
+
|
21
|
+
Raspar.parsers.size.should == 0
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should able to remove parser from the registered list" do
|
25
|
+
Raspar.clear_parser_list
|
26
|
+
Raspar.register(@site, TestParser)
|
27
|
+
|
28
|
+
Raspar.remove(@site)
|
29
|
+
|
30
|
+
Raspar.parsers.should_not include(@host)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
class SampleParser
|
2
|
+
include Raspar
|
3
|
+
|
4
|
+
domain 'http://sample.com'
|
5
|
+
|
6
|
+
attr :desc, '.desc', :common => true, :eval => :full_desc
|
7
|
+
attr :specs, '.specs li', :common => true, :as => :array, :eval => :format_specs
|
8
|
+
|
9
|
+
collection :product, '.item,span.second' do
|
10
|
+
attr :image, 'img', :prop => 'src'
|
11
|
+
attr :image_url, 'img', :prop => 'src', :eval => :make_image_url
|
12
|
+
attr :name, 'span:first, .name', :eval => :full_name
|
13
|
+
attr :price, '.price', :eval => Proc.new{|i| i.to_i}
|
14
|
+
attr :all_text
|
15
|
+
attr :price_map do |text, ele|
|
16
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
17
|
+
{val[0] => val[1].to_f}
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
collection :offer, '.offer' do
|
22
|
+
attr :name, '.name'
|
23
|
+
attr :percentage, '.percentage'
|
24
|
+
end
|
25
|
+
|
26
|
+
def full_name(val, ele)
|
27
|
+
"Full Name: #{val}"
|
28
|
+
end
|
29
|
+
|
30
|
+
def name_price(val, ele)
|
31
|
+
val = ele.search('span').collect{|s| s.content.strip}
|
32
|
+
{val[0] => val[1].to_f}
|
33
|
+
end
|
34
|
+
|
35
|
+
def make_image_url(path, ele)
|
36
|
+
self.class.absolute_url(path)
|
37
|
+
end
|
38
|
+
|
39
|
+
def full_desc(text, ele)
|
40
|
+
"#{text} is full desc"
|
41
|
+
end
|
42
|
+
|
43
|
+
def format_specs(text, ele)
|
44
|
+
text.downcase
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
FAKE_PAGE = %q{
|
50
|
+
<!DOCTYPE html>
|
51
|
+
<html>
|
52
|
+
<body>
|
53
|
+
|
54
|
+
<span class="desc">Description</span>
|
55
|
+
<ul class="specs">
|
56
|
+
<li>Spec 1</li>
|
57
|
+
<li>Spec 2</li>
|
58
|
+
<li>Spec 3</li>
|
59
|
+
</ul>
|
60
|
+
|
61
|
+
<div class="item">
|
62
|
+
<img src="1">
|
63
|
+
<span>Test1</span>
|
64
|
+
<span class="price">10</span>
|
65
|
+
</div>
|
66
|
+
|
67
|
+
<div class="item">
|
68
|
+
<img src="2">
|
69
|
+
<span>Test2</span>
|
70
|
+
<span class="price">20</span>
|
71
|
+
</div>
|
72
|
+
|
73
|
+
<div class="item">
|
74
|
+
<img src="3">
|
75
|
+
<span>Test3</span>
|
76
|
+
<span class="price">30</span>
|
77
|
+
</div>
|
78
|
+
|
79
|
+
<span class="second">
|
80
|
+
<img src="4">
|
81
|
+
<span>Test4</span>
|
82
|
+
<span class="price">40</span>
|
83
|
+
</span>
|
84
|
+
|
85
|
+
<div class="offer">
|
86
|
+
<span class="name">First Offer</span>
|
87
|
+
<span class="percentage">10% off</span>
|
88
|
+
</div>
|
89
|
+
|
90
|
+
</body>
|
91
|
+
</html>
|
92
|
+
}
|
93
|
+
|
94
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'simplecov'
|
5
|
+
|
6
|
+
SimpleCov.start do
|
7
|
+
add_filter '/spec/'
|
8
|
+
add_filter '/example/'
|
9
|
+
add_group 'gem', 'lib'
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'coveralls'
|
13
|
+
|
14
|
+
Coveralls.wear!
|
15
|
+
|
16
|
+
RSpec.configure do |config|
|
17
|
+
config.color_enabled = true
|
18
|
+
#config.tty = true
|
19
|
+
#config.formatter = :documentation
|
20
|
+
end
|
21
|
+
|
22
|
+
$:.unshift(File.dirname(__FILE__) + '/../lib/')
|
23
|
+
|
24
|
+
require 'raspar'
|
25
|
+
|
26
|
+
#REAL_PAGE = open('spec/html/test.htm')
|
27
|
+
|
28
|
+
class TestParser
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: raspar
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jiren Patel
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-08-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.5.5
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.5.5
|
41
|
+
description: Raspar collects data from the html page and creates object from it.
|
42
|
+
email:
|
43
|
+
- jiren@joshsoftware.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- .travis.yml
|
50
|
+
- Gemfile
|
51
|
+
- LICENSE
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- example/Gemfile
|
55
|
+
- example/ccode.rb
|
56
|
+
- example/debug.rb
|
57
|
+
- example/dynamic.rb
|
58
|
+
- example/leguide.rb
|
59
|
+
- lib/raspar.rb
|
60
|
+
- lib/raspar/dynamic_parser.rb
|
61
|
+
- lib/raspar/parser.rb
|
62
|
+
- lib/raspar/result.rb
|
63
|
+
- lib/raspar/version.rb
|
64
|
+
- raspar.gemspec
|
65
|
+
- spec/add_parser_spec.rb
|
66
|
+
- spec/dynamic_parser_spec.rb
|
67
|
+
- spec/parser_spec.rb
|
68
|
+
- spec/raspar_spec.rb
|
69
|
+
- spec/sample_parser.rb
|
70
|
+
- spec/spec_helper.rb
|
71
|
+
homepage: ''
|
72
|
+
licenses: []
|
73
|
+
metadata: {}
|
74
|
+
post_install_message:
|
75
|
+
rdoc_options: []
|
76
|
+
require_paths:
|
77
|
+
- lib
|
78
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
requirements: []
|
89
|
+
rubyforge_project: raspar
|
90
|
+
rubygems_version: 2.0.3
|
91
|
+
signing_key:
|
92
|
+
specification_version: 4
|
93
|
+
summary: A generic html/xml parser
|
94
|
+
test_files:
|
95
|
+
- spec/add_parser_spec.rb
|
96
|
+
- spec/dynamic_parser_spec.rb
|
97
|
+
- spec/parser_spec.rb
|
98
|
+
- spec/raspar_spec.rb
|
99
|
+
- spec/sample_parser.rb
|
100
|
+
- spec/spec_helper.rb
|