wombat 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +7 -7
- data/README.md +24 -21
- data/VERSION +1 -1
- data/examples/no_class.rb +21 -21
- data/lib/wombat/dsl/property_group.rb +16 -12
- data/lib/wombat/property/locators/base.rb +12 -3
- data/lib/wombat/property/locators/follow.rb +1 -6
- data/lib/wombat/property/locators/iterator.rb +3 -8
- data/lib/wombat/property/locators/property_group.rb +1 -6
- data/wombat.gemspec +2 -2
- metadata +2 -2
data/Gemfile.lock
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
activesupport (3.2.
|
4
|
+
activesupport (3.2.11)
|
5
5
|
i18n (~> 0.6)
|
6
6
|
multi_json (~> 1.0)
|
7
7
|
diff-lcs (1.1.3)
|
8
|
-
domain_name (0.5.
|
8
|
+
domain_name (0.5.7)
|
9
9
|
unf (~> 0.0.3)
|
10
10
|
fakeweb (1.3.0)
|
11
11
|
git (1.2.5)
|
@@ -15,7 +15,7 @@ GEM
|
|
15
15
|
git (>= 1.2.5)
|
16
16
|
rake
|
17
17
|
rdoc
|
18
|
-
json (1.7.
|
18
|
+
json (1.7.6)
|
19
19
|
mechanize (2.5.1)
|
20
20
|
domain_name (~> 0.5, >= 0.5.1)
|
21
21
|
mime-types (~> 1.17, >= 1.17.2)
|
@@ -28,7 +28,7 @@ GEM
|
|
28
28
|
multi_json (1.5.0)
|
29
29
|
net-http-digest_auth (1.2.1)
|
30
30
|
net-http-persistent (2.8)
|
31
|
-
nokogiri (1.5.
|
31
|
+
nokogiri (1.5.6)
|
32
32
|
ntlm-http (0.1.1)
|
33
33
|
rake (10.0.3)
|
34
34
|
rdoc (3.12)
|
@@ -40,13 +40,13 @@ GEM
|
|
40
40
|
rspec-expectations (~> 2.12.0)
|
41
41
|
rspec-mocks (~> 2.12.0)
|
42
42
|
rspec-core (2.12.2)
|
43
|
-
rspec-expectations (2.12.
|
43
|
+
rspec-expectations (2.12.1)
|
44
44
|
diff-lcs (~> 1.1.3)
|
45
|
-
rspec-mocks (2.12.
|
45
|
+
rspec-mocks (2.12.2)
|
46
46
|
unf (0.0.5)
|
47
47
|
unf_ext
|
48
48
|
unf_ext (0.0.5)
|
49
|
-
vcr (2.
|
49
|
+
vcr (2.4.0)
|
50
50
|
webrobots (0.0.13)
|
51
51
|
yard (0.8.3)
|
52
52
|
|
data/README.md
CHANGED
@@ -26,35 +26,38 @@ Wombat.crawl do
|
|
26
26
|
path "/"
|
27
27
|
|
28
28
|
headline "xpath=//h1"
|
29
|
-
|
30
|
-
repositories "css=a.repo", :list
|
29
|
+
subheading "css=p.subheading"
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
what_is "css=.teaser h3", :list
|
32
|
+
|
33
|
+
links do
|
34
|
+
explore 'xpath=//*[@id="wrapper"]/div[1]/div/ul/li[1]/a' do |e|
|
35
|
+
e.gsub(/Explore/, "Love")
|
36
|
+
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
third_benefit "css=.column.rightmid h3"
|
40
|
-
fourth_benefit "css=.column.rightmost h3"
|
38
|
+
search 'css=.search'
|
39
|
+
features 'css=.features'
|
40
|
+
blog 'css=.blog'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
```
|
44
44
|
|
45
|
-
###### The code above is gonna return the following hash:
|
45
|
+
###### The code above is gonna return the following hash:
|
46
46
|
|
47
47
|
```ruby
|
48
48
|
{
|
49
|
-
"headline"
|
50
|
-
"
|
51
|
-
"
|
52
|
-
|
53
|
-
|
54
|
-
"
|
55
|
-
|
56
|
-
|
57
|
-
"
|
49
|
+
"headline"=>"Build software better, together.",
|
50
|
+
"subheading"=> "Powerful collaboration, review, and code management for open source and private development projects.",
|
51
|
+
"what_is"=> [
|
52
|
+
"Great collaboration starts with communication.",
|
53
|
+
"Manage and contribute from all your devices.",
|
54
|
+
"The world’s largest open source community."
|
55
|
+
],
|
56
|
+
"links"=> {
|
57
|
+
"explore"=>"Love GitHub",
|
58
|
+
"search"=>"Search",
|
59
|
+
"features"=>"Features",
|
60
|
+
"blog"=>"Blog"
|
58
61
|
}
|
59
62
|
}
|
60
63
|
```
|
@@ -65,7 +68,7 @@ end
|
|
65
68
|
|
66
69
|
|
67
70
|
## Contributing to Wombat
|
68
|
-
|
71
|
+
|
69
72
|
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
70
73
|
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
71
74
|
* Fork the project
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.1.
|
1
|
+
2.1.1
|
data/examples/no_class.rb
CHANGED
@@ -6,36 +6,36 @@ data = Wombat.crawl do
|
|
6
6
|
path "/"
|
7
7
|
|
8
8
|
headline "xpath=//h1"
|
9
|
-
|
9
|
+
subheading "css=p.subheading"
|
10
10
|
|
11
|
-
|
12
|
-
e.gsub(/Explore/, "LOVE")
|
13
|
-
end
|
14
|
-
|
15
|
-
benefits do
|
16
|
-
team_mgmt "css=.column.leftmost h3"
|
17
|
-
code_review "css=.column.leftmid h3"
|
18
|
-
hosting "css=.column.rightmid h3"
|
19
|
-
collaboration "css=.column.rightmost h3"
|
11
|
+
what_is "css=.teaser h3", :list
|
20
12
|
|
21
|
-
|
22
|
-
|
13
|
+
links do
|
14
|
+
explore 'xpath=//*[@id="wrapper"]/div[1]/div/ul/li[1]/a' do |e|
|
15
|
+
e.gsub(/Explore/, "Love")
|
23
16
|
end
|
17
|
+
|
18
|
+
search 'css=.search'
|
19
|
+
features 'css=.features'
|
20
|
+
blog 'css=.blog'
|
24
21
|
end
|
25
22
|
end
|
26
23
|
|
27
24
|
=begin
|
28
25
|
pp data
|
29
26
|
{
|
30
|
-
|
31
|
-
|
32
|
-
"
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
27
|
+
"headline"=>"Build software better, together.",
|
28
|
+
"subheading"=>
|
29
|
+
"Powerful collaboration, review, and code management for open source and private development projects.",
|
30
|
+
"what_is"=>
|
31
|
+
["Great collaboration starts with communication.",
|
32
|
+
"Manage and contribute from all your devices.",
|
33
|
+
"The world’s largest open source community."],
|
34
|
+
"links"=>
|
35
|
+
{"explore"=>"Love GitHub",
|
36
|
+
"search"=>"Search",
|
37
|
+
"features"=>"Features",
|
38
|
+
"blog"=>"Blog"
|
39
39
|
}
|
40
40
|
}
|
41
41
|
=end
|
@@ -19,18 +19,10 @@ module Wombat
|
|
19
19
|
self[property_name] = property_group
|
20
20
|
property_group.instance_eval(&block)
|
21
21
|
else
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
elsif args[1] == :follow
|
27
|
-
it = Follower.new(property_name, args.first)
|
28
|
-
self[property_name] = it
|
29
|
-
it.instance_eval(&block) if block
|
30
|
-
else
|
31
|
-
self[property_name] = Property.new(property_name, *args, &block)
|
32
|
-
end
|
33
|
-
end
|
22
|
+
it = build_property(property_name, *args, &block)
|
23
|
+
self[property_name] = it
|
24
|
+
it.instance_eval(&block) if block_given? && !it.instance_of?(Property)
|
25
|
+
end
|
34
26
|
end
|
35
27
|
|
36
28
|
def to_ary
|
@@ -43,6 +35,18 @@ module Wombat
|
|
43
35
|
def wombat_property_namespaces
|
44
36
|
nil
|
45
37
|
end
|
38
|
+
|
39
|
+
protected
|
40
|
+
|
41
|
+
def build_property(name, *args, &block)
|
42
|
+
if args[1] == :iterator
|
43
|
+
Iterator.new(name, args.first)
|
44
|
+
elsif args[1] == :follow
|
45
|
+
Follower.new(name, args.first)
|
46
|
+
else
|
47
|
+
Property.new(name, *args, &block)
|
48
|
+
end
|
49
|
+
end
|
46
50
|
end
|
47
51
|
end
|
48
52
|
end
|
@@ -7,16 +7,16 @@ module Wombat
|
|
7
7
|
# Abstract base class
|
8
8
|
class Base
|
9
9
|
include Wombat::Processing::NodeSelector
|
10
|
-
|
10
|
+
|
11
11
|
def initialize(property)
|
12
12
|
@property = property
|
13
13
|
end
|
14
14
|
|
15
15
|
def locate(context, page = nil)
|
16
16
|
@context = context
|
17
|
-
|
17
|
+
|
18
18
|
raw_data = yield if block_given?
|
19
|
-
data = @property.respond_to?(:callback) && @property.callback ? @property.callback.call(raw_data) : raw_data
|
19
|
+
data = @property.respond_to?(:callback) && @property.callback ? @property.callback.call(raw_data) : raw_data
|
20
20
|
|
21
21
|
@property.wombat_property_name ? { @property.wombat_property_name => data } : data
|
22
22
|
end
|
@@ -27,6 +27,15 @@ module Wombat
|
|
27
27
|
|
28
28
|
select_nodes @property.wombat_property_selector, @property.wombat_property_namespaces
|
29
29
|
end
|
30
|
+
|
31
|
+
def filter_properties(context, page)
|
32
|
+
Hash.new.tap do |h|
|
33
|
+
@property.values
|
34
|
+
.select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
|
35
|
+
.map { |p| Factory.locator_for(p).locate(context, page) }
|
36
|
+
.map { |p| h.merge! p }
|
37
|
+
end
|
38
|
+
end
|
30
39
|
end
|
31
40
|
end
|
32
41
|
end
|
@@ -10,12 +10,7 @@ module Wombat
|
|
10
10
|
target_page = page.click node
|
11
11
|
context = target_page.parser
|
12
12
|
|
13
|
-
|
14
|
-
@property.values
|
15
|
-
.select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
|
16
|
-
.map { |p| Factory.locator_for(p).locate(context, page) }
|
17
|
-
.map { |p| h.merge! p }
|
18
|
-
end
|
13
|
+
filter_properties(context, page)
|
19
14
|
end
|
20
15
|
end
|
21
16
|
end
|
@@ -5,15 +5,10 @@ module Wombat
|
|
5
5
|
module Property
|
6
6
|
module Locators
|
7
7
|
class Iterator < Base
|
8
|
-
def locate(
|
8
|
+
def locate(context, page = nil)
|
9
9
|
super do
|
10
|
-
locate_nodes(
|
11
|
-
|
12
|
-
@property.values
|
13
|
-
.select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
|
14
|
-
.map { |p| Factory.locator_for(p).locate(node, page) }
|
15
|
-
.map { |p| h.merge! p }
|
16
|
-
end
|
10
|
+
locate_nodes(context).flat_map do |node|
|
11
|
+
filter_properties(node, page)
|
17
12
|
end
|
18
13
|
end
|
19
14
|
end
|
@@ -6,12 +6,7 @@ module Wombat
|
|
6
6
|
class PropertyGroup < Base
|
7
7
|
def locate(context, page = nil)
|
8
8
|
super do
|
9
|
-
|
10
|
-
@property.values
|
11
|
-
.select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
|
12
|
-
.map { |p| Factory.locator_for(p).locate(context, page) }
|
13
|
-
.map { |p| h.merge! p }
|
14
|
-
end
|
9
|
+
filter_properties(context, page)
|
15
10
|
end
|
16
11
|
end
|
17
12
|
end
|
data/wombat.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "wombat"
|
8
|
-
s.version = "2.1.
|
8
|
+
s.version = "2.1.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Felipe Lima"]
|
12
|
-
s.date = "
|
12
|
+
s.date = "2013-02-03"
|
13
13
|
s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
|
14
14
|
s.email = "felipe.lima@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wombat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-02-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|