wombat 2.1.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +7 -7
- data/README.md +24 -21
- data/VERSION +1 -1
- data/examples/no_class.rb +21 -21
- data/lib/wombat/dsl/property_group.rb +16 -12
- data/lib/wombat/property/locators/base.rb +12 -3
- data/lib/wombat/property/locators/follow.rb +1 -6
- data/lib/wombat/property/locators/iterator.rb +3 -8
- data/lib/wombat/property/locators/property_group.rb +1 -6
- data/wombat.gemspec +2 -2
- metadata +2 -2
data/Gemfile.lock
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
activesupport (3.2.
|
4
|
+
activesupport (3.2.11)
|
5
5
|
i18n (~> 0.6)
|
6
6
|
multi_json (~> 1.0)
|
7
7
|
diff-lcs (1.1.3)
|
8
|
-
domain_name (0.5.
|
8
|
+
domain_name (0.5.7)
|
9
9
|
unf (~> 0.0.3)
|
10
10
|
fakeweb (1.3.0)
|
11
11
|
git (1.2.5)
|
@@ -15,7 +15,7 @@ GEM
|
|
15
15
|
git (>= 1.2.5)
|
16
16
|
rake
|
17
17
|
rdoc
|
18
|
-
json (1.7.
|
18
|
+
json (1.7.6)
|
19
19
|
mechanize (2.5.1)
|
20
20
|
domain_name (~> 0.5, >= 0.5.1)
|
21
21
|
mime-types (~> 1.17, >= 1.17.2)
|
@@ -28,7 +28,7 @@ GEM
|
|
28
28
|
multi_json (1.5.0)
|
29
29
|
net-http-digest_auth (1.2.1)
|
30
30
|
net-http-persistent (2.8)
|
31
|
-
nokogiri (1.5.
|
31
|
+
nokogiri (1.5.6)
|
32
32
|
ntlm-http (0.1.1)
|
33
33
|
rake (10.0.3)
|
34
34
|
rdoc (3.12)
|
@@ -40,13 +40,13 @@ GEM
|
|
40
40
|
rspec-expectations (~> 2.12.0)
|
41
41
|
rspec-mocks (~> 2.12.0)
|
42
42
|
rspec-core (2.12.2)
|
43
|
-
rspec-expectations (2.12.
|
43
|
+
rspec-expectations (2.12.1)
|
44
44
|
diff-lcs (~> 1.1.3)
|
45
|
-
rspec-mocks (2.12.
|
45
|
+
rspec-mocks (2.12.2)
|
46
46
|
unf (0.0.5)
|
47
47
|
unf_ext
|
48
48
|
unf_ext (0.0.5)
|
49
|
-
vcr (2.
|
49
|
+
vcr (2.4.0)
|
50
50
|
webrobots (0.0.13)
|
51
51
|
yard (0.8.3)
|
52
52
|
|
data/README.md
CHANGED
@@ -26,35 +26,38 @@ Wombat.crawl do
|
|
26
26
|
path "/"
|
27
27
|
|
28
28
|
headline "xpath=//h1"
|
29
|
-
|
30
|
-
repositories "css=a.repo", :list
|
29
|
+
subheading "css=p.subheading"
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
31
|
+
what_is "css=.teaser h3", :list
|
32
|
+
|
33
|
+
links do
|
34
|
+
explore 'xpath=//*[@id="wrapper"]/div[1]/div/ul/li[1]/a' do |e|
|
35
|
+
e.gsub(/Explore/, "Love")
|
36
|
+
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
third_benefit "css=.column.rightmid h3"
|
40
|
-
fourth_benefit "css=.column.rightmost h3"
|
38
|
+
search 'css=.search'
|
39
|
+
features 'css=.features'
|
40
|
+
blog 'css=.blog'
|
41
41
|
end
|
42
42
|
end
|
43
43
|
```
|
44
44
|
|
45
|
-
###### The code above is gonna return the following hash:
|
45
|
+
###### The code above is gonna return the following hash:
|
46
46
|
|
47
47
|
```ruby
|
48
48
|
{
|
49
|
-
"headline"
|
50
|
-
"
|
51
|
-
"
|
52
|
-
|
53
|
-
|
54
|
-
"
|
55
|
-
|
56
|
-
|
57
|
-
"
|
49
|
+
"headline"=>"Build software better, together.",
|
50
|
+
"subheading"=> "Powerful collaboration, review, and code management for open source and private development projects.",
|
51
|
+
"what_is"=> [
|
52
|
+
"Great collaboration starts with communication.",
|
53
|
+
"Manage and contribute from all your devices.",
|
54
|
+
"The world’s largest open source community."
|
55
|
+
],
|
56
|
+
"links"=> {
|
57
|
+
"explore"=>"Love GitHub",
|
58
|
+
"search"=>"Search",
|
59
|
+
"features"=>"Features",
|
60
|
+
"blog"=>"Blog"
|
58
61
|
}
|
59
62
|
}
|
60
63
|
```
|
@@ -65,7 +68,7 @@ end
|
|
65
68
|
|
66
69
|
|
67
70
|
## Contributing to Wombat
|
68
|
-
|
71
|
+
|
69
72
|
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
|
70
73
|
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
|
71
74
|
* Fork the project
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.1.
|
1
|
+
2.1.1
|
data/examples/no_class.rb
CHANGED
@@ -6,36 +6,36 @@ data = Wombat.crawl do
|
|
6
6
|
path "/"
|
7
7
|
|
8
8
|
headline "xpath=//h1"
|
9
|
-
|
9
|
+
subheading "css=p.subheading"
|
10
10
|
|
11
|
-
|
12
|
-
e.gsub(/Explore/, "LOVE")
|
13
|
-
end
|
14
|
-
|
15
|
-
benefits do
|
16
|
-
team_mgmt "css=.column.leftmost h3"
|
17
|
-
code_review "css=.column.leftmid h3"
|
18
|
-
hosting "css=.column.rightmid h3"
|
19
|
-
collaboration "css=.column.rightmost h3"
|
11
|
+
what_is "css=.teaser h3", :list
|
20
12
|
|
21
|
-
|
22
|
-
|
13
|
+
links do
|
14
|
+
explore 'xpath=//*[@id="wrapper"]/div[1]/div/ul/li[1]/a' do |e|
|
15
|
+
e.gsub(/Explore/, "Love")
|
23
16
|
end
|
17
|
+
|
18
|
+
search 'css=.search'
|
19
|
+
features 'css=.features'
|
20
|
+
blog 'css=.blog'
|
24
21
|
end
|
25
22
|
end
|
26
23
|
|
27
24
|
=begin
|
28
25
|
pp data
|
29
26
|
{
|
30
|
-
|
31
|
-
|
32
|
-
"
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
27
|
+
"headline"=>"Build software better, together.",
|
28
|
+
"subheading"=>
|
29
|
+
"Powerful collaboration, review, and code management for open source and private development projects.",
|
30
|
+
"what_is"=>
|
31
|
+
["Great collaboration starts with communication.",
|
32
|
+
"Manage and contribute from all your devices.",
|
33
|
+
"The world’s largest open source community."],
|
34
|
+
"links"=>
|
35
|
+
{"explore"=>"Love GitHub",
|
36
|
+
"search"=>"Search",
|
37
|
+
"features"=>"Features",
|
38
|
+
"blog"=>"Blog"
|
39
39
|
}
|
40
40
|
}
|
41
41
|
=end
|
@@ -19,18 +19,10 @@ module Wombat
|
|
19
19
|
self[property_name] = property_group
|
20
20
|
property_group.instance_eval(&block)
|
21
21
|
else
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
elsif args[1] == :follow
|
27
|
-
it = Follower.new(property_name, args.first)
|
28
|
-
self[property_name] = it
|
29
|
-
it.instance_eval(&block) if block
|
30
|
-
else
|
31
|
-
self[property_name] = Property.new(property_name, *args, &block)
|
32
|
-
end
|
33
|
-
end
|
22
|
+
it = build_property(property_name, *args, &block)
|
23
|
+
self[property_name] = it
|
24
|
+
it.instance_eval(&block) if block_given? && !it.instance_of?(Property)
|
25
|
+
end
|
34
26
|
end
|
35
27
|
|
36
28
|
def to_ary
|
@@ -43,6 +35,18 @@ module Wombat
|
|
43
35
|
def wombat_property_namespaces
|
44
36
|
nil
|
45
37
|
end
|
38
|
+
|
39
|
+
protected
|
40
|
+
|
41
|
+
def build_property(name, *args, &block)
|
42
|
+
if args[1] == :iterator
|
43
|
+
Iterator.new(name, args.first)
|
44
|
+
elsif args[1] == :follow
|
45
|
+
Follower.new(name, args.first)
|
46
|
+
else
|
47
|
+
Property.new(name, *args, &block)
|
48
|
+
end
|
49
|
+
end
|
46
50
|
end
|
47
51
|
end
|
48
52
|
end
|
@@ -7,16 +7,16 @@ module Wombat
|
|
7
7
|
# Abstract base class
|
8
8
|
class Base
|
9
9
|
include Wombat::Processing::NodeSelector
|
10
|
-
|
10
|
+
|
11
11
|
def initialize(property)
|
12
12
|
@property = property
|
13
13
|
end
|
14
14
|
|
15
15
|
def locate(context, page = nil)
|
16
16
|
@context = context
|
17
|
-
|
17
|
+
|
18
18
|
raw_data = yield if block_given?
|
19
|
-
data = @property.respond_to?(:callback) && @property.callback ? @property.callback.call(raw_data) : raw_data
|
19
|
+
data = @property.respond_to?(:callback) && @property.callback ? @property.callback.call(raw_data) : raw_data
|
20
20
|
|
21
21
|
@property.wombat_property_name ? { @property.wombat_property_name => data } : data
|
22
22
|
end
|
@@ -27,6 +27,15 @@ module Wombat
|
|
27
27
|
|
28
28
|
select_nodes @property.wombat_property_selector, @property.wombat_property_namespaces
|
29
29
|
end
|
30
|
+
|
31
|
+
def filter_properties(context, page)
|
32
|
+
Hash.new.tap do |h|
|
33
|
+
@property.values
|
34
|
+
.select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
|
35
|
+
.map { |p| Factory.locator_for(p).locate(context, page) }
|
36
|
+
.map { |p| h.merge! p }
|
37
|
+
end
|
38
|
+
end
|
30
39
|
end
|
31
40
|
end
|
32
41
|
end
|
@@ -10,12 +10,7 @@ module Wombat
|
|
10
10
|
target_page = page.click node
|
11
11
|
context = target_page.parser
|
12
12
|
|
13
|
-
|
14
|
-
@property.values
|
15
|
-
.select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
|
16
|
-
.map { |p| Factory.locator_for(p).locate(context, page) }
|
17
|
-
.map { |p| h.merge! p }
|
18
|
-
end
|
13
|
+
filter_properties(context, page)
|
19
14
|
end
|
20
15
|
end
|
21
16
|
end
|
@@ -5,15 +5,10 @@ module Wombat
|
|
5
5
|
module Property
|
6
6
|
module Locators
|
7
7
|
class Iterator < Base
|
8
|
-
def locate(
|
8
|
+
def locate(context, page = nil)
|
9
9
|
super do
|
10
|
-
locate_nodes(
|
11
|
-
|
12
|
-
@property.values
|
13
|
-
.select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
|
14
|
-
.map { |p| Factory.locator_for(p).locate(node, page) }
|
15
|
-
.map { |p| h.merge! p }
|
16
|
-
end
|
10
|
+
locate_nodes(context).flat_map do |node|
|
11
|
+
filter_properties(node, page)
|
17
12
|
end
|
18
13
|
end
|
19
14
|
end
|
@@ -6,12 +6,7 @@ module Wombat
|
|
6
6
|
class PropertyGroup < Base
|
7
7
|
def locate(context, page = nil)
|
8
8
|
super do
|
9
|
-
|
10
|
-
@property.values
|
11
|
-
.select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
|
12
|
-
.map { |p| Factory.locator_for(p).locate(context, page) }
|
13
|
-
.map { |p| h.merge! p }
|
14
|
-
end
|
9
|
+
filter_properties(context, page)
|
15
10
|
end
|
16
11
|
end
|
17
12
|
end
|
data/wombat.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "wombat"
|
8
|
-
s.version = "2.1.
|
8
|
+
s.version = "2.1.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Felipe Lima"]
|
12
|
-
s.date = "
|
12
|
+
s.date = "2013-02-03"
|
13
13
|
s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
|
14
14
|
s.email = "felipe.lima@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wombat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-02-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|