wombat 2.1.0 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,11 +1,11 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- activesupport (3.2.9)
4
+ activesupport (3.2.11)
5
5
  i18n (~> 0.6)
6
6
  multi_json (~> 1.0)
7
7
  diff-lcs (1.1.3)
8
- domain_name (0.5.6)
8
+ domain_name (0.5.7)
9
9
  unf (~> 0.0.3)
10
10
  fakeweb (1.3.0)
11
11
  git (1.2.5)
@@ -15,7 +15,7 @@ GEM
15
15
  git (>= 1.2.5)
16
16
  rake
17
17
  rdoc
18
- json (1.7.5)
18
+ json (1.7.6)
19
19
  mechanize (2.5.1)
20
20
  domain_name (~> 0.5, >= 0.5.1)
21
21
  mime-types (~> 1.17, >= 1.17.2)
@@ -28,7 +28,7 @@ GEM
28
28
  multi_json (1.5.0)
29
29
  net-http-digest_auth (1.2.1)
30
30
  net-http-persistent (2.8)
31
- nokogiri (1.5.5)
31
+ nokogiri (1.5.6)
32
32
  ntlm-http (0.1.1)
33
33
  rake (10.0.3)
34
34
  rdoc (3.12)
@@ -40,13 +40,13 @@ GEM
40
40
  rspec-expectations (~> 2.12.0)
41
41
  rspec-mocks (~> 2.12.0)
42
42
  rspec-core (2.12.2)
43
- rspec-expectations (2.12.0)
43
+ rspec-expectations (2.12.1)
44
44
  diff-lcs (~> 1.1.3)
45
- rspec-mocks (2.12.0)
45
+ rspec-mocks (2.12.2)
46
46
  unf (0.0.5)
47
47
  unf_ext
48
48
  unf_ext (0.0.5)
49
- vcr (2.3.0)
49
+ vcr (2.4.0)
50
50
  webrobots (0.0.13)
51
51
  yard (0.8.3)
52
52
 
data/README.md CHANGED
@@ -26,35 +26,38 @@ Wombat.crawl do
26
26
  path "/"
27
27
 
28
28
  headline "xpath=//h1"
29
- what_is "css=.column.secondary p", :html
30
- repositories "css=a.repo", :list
29
+ subheading "css=p.subheading"
31
30
 
32
- explore "xpath=//ul/li[2]/a" do |e|
33
- e.gsub(/Explore/, "LOVE")
34
- end
31
+ what_is "css=.teaser h3", :list
32
+
33
+ links do
34
+ explore 'xpath=//*[@id="wrapper"]/div[1]/div/ul/li[1]/a' do |e|
35
+ e.gsub(/Explore/, "Love")
36
+ end
35
37
 
36
- benefits do
37
- first_benefit "css=.column.leftmost h3"
38
- second_benefit "css=.column.leftmid h3"
39
- third_benefit "css=.column.rightmid h3"
40
- fourth_benefit "css=.column.rightmost h3"
38
+ search 'css=.search'
39
+ features 'css=.features'
40
+ blog 'css=.blog'
41
41
  end
42
42
  end
43
43
  ```
44
44
 
45
- ###### The code above is gonna return the following hash:
45
+ ###### The code above is gonna return the following hash:
46
46
 
47
47
  ```ruby
48
48
  {
49
- "headline" => "1,316,633 people hosting over 3,951,378 git repositories",
50
- "what_is" => "GitHub is the best way to collaborate with others. Fork, send pull requests and manage all your <strong>public</strong> and <strong>private</strong> git repositories.",
51
- "explore" => "LOVE GitHub",
52
- "repositories" => ["jQuery", "reddit", "Sparkle", "curl", "Ruby on Rails", "node.js", "ClickToFlash", "Erlang/OTP", "CakePHP", "Redis"]
53
- "benefits" => {
54
- "first_benefit" => "Team management",
55
- "second_benefit" => "Code review",
56
- "third_benefit" => "Reliable code hosting",
57
- "fourth_benefit" => "Open source collaboration"
49
+ "headline"=>"Build software better, together.",
50
+ "subheading"=> "Powerful collaboration, review, and code management for open source and private development projects.",
51
+ "what_is"=> [
52
+ "Great collaboration starts with communication.",
53
+ "Manage and contribute from all your devices.",
54
+ "The world’s largest open source community."
55
+ ],
56
+ "links"=> {
57
+ "explore"=>"Love GitHub",
58
+ "search"=>"Search",
59
+ "features"=>"Features",
60
+ "blog"=>"Blog"
58
61
  }
59
62
  }
60
63
  ```
@@ -65,7 +68,7 @@ end
65
68
 
66
69
 
67
70
  ## Contributing to Wombat
68
-
71
+
69
72
  * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
70
73
  * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
71
74
  * Fork the project
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.1.0
1
+ 2.1.1
@@ -6,36 +6,36 @@ data = Wombat.crawl do
6
6
  path "/"
7
7
 
8
8
  headline "xpath=//h1"
9
- what_is "css=.column.secondary p", :html
9
+ subheading "css=p.subheading"
10
10
 
11
- explore "xpath=//ul/li[2]/a" do |e|
12
- e.gsub(/Explore/, "LOVE")
13
- end
14
-
15
- benefits do
16
- team_mgmt "css=.column.leftmost h3"
17
- code_review "css=.column.leftmid h3"
18
- hosting "css=.column.rightmid h3"
19
- collaboration "css=.column.rightmost h3"
11
+ what_is "css=.teaser h3", :list
20
12
 
21
- links do
22
- team_mgmt "xpath=//div[@class='column leftmost']//a/@href"
13
+ links do
14
+ explore 'xpath=//*[@id="wrapper"]/div[1]/div/ul/li[1]/a' do |e|
15
+ e.gsub(/Explore/, "Love")
23
16
  end
17
+
18
+ search 'css=.search'
19
+ features 'css=.features'
20
+ blog 'css=.blog'
24
21
  end
25
22
  end
26
23
 
27
24
  =begin
28
25
  pp data
29
26
  {
30
- "headline"=>"1,900,094\n people hosting over\n 3,371,168\n repositories",
31
- "what_is"=>"GitHub is the best way to collaborate with others. Fork, send pull requests and manage all your <strong>public</strong> and <strong>private</strong> git repositories.",
32
- "explore"=>"LOVE GitHub",
33
- "benefits"=> {
34
- "team_mgmt"=>"Team management",
35
- "code_review"=>"Code review",
36
- "hosting"=>"Reliable code hosting",
37
- "collaboration"=>"Open source collaboration",
38
- "links"=>{"team_mgmt"=>"/features/projects/collaboration"}
27
+ "headline"=>"Build software better, together.",
28
+ "subheading"=>
29
+ "Powerful collaboration, review, and code management for open source and private development projects.",
30
+ "what_is"=>
31
+ ["Great collaboration starts with communication.",
32
+ "Manage and contribute from all your devices.",
33
+ "The world’s largest open source community."],
34
+ "links"=>
35
+ {"explore"=>"Love GitHub",
36
+ "search"=>"Search",
37
+ "features"=>"Features",
38
+ "blog"=>"Blog"
39
39
  }
40
40
  }
41
41
  =end
@@ -19,18 +19,10 @@ module Wombat
19
19
  self[property_name] = property_group
20
20
  property_group.instance_eval(&block)
21
21
  else
22
- if args[1] == :iterator
23
- it = Iterator.new(property_name, args.first)
24
- self[property_name] = it
25
- it.instance_eval(&block) if block
26
- elsif args[1] == :follow
27
- it = Follower.new(property_name, args.first)
28
- self[property_name] = it
29
- it.instance_eval(&block) if block
30
- else
31
- self[property_name] = Property.new(property_name, *args, &block)
32
- end
33
- end
22
+ it = build_property(property_name, *args, &block)
23
+ self[property_name] = it
24
+ it.instance_eval(&block) if block_given? && !it.instance_of?(Property)
25
+ end
34
26
  end
35
27
 
36
28
  def to_ary
@@ -43,6 +35,18 @@ module Wombat
43
35
  def wombat_property_namespaces
44
36
  nil
45
37
  end
38
+
39
+ protected
40
+
41
+ def build_property(name, *args, &block)
42
+ if args[1] == :iterator
43
+ Iterator.new(name, args.first)
44
+ elsif args[1] == :follow
45
+ Follower.new(name, args.first)
46
+ else
47
+ Property.new(name, *args, &block)
48
+ end
49
+ end
46
50
  end
47
51
  end
48
52
  end
@@ -7,16 +7,16 @@ module Wombat
7
7
  # Abstract base class
8
8
  class Base
9
9
  include Wombat::Processing::NodeSelector
10
-
10
+
11
11
  def initialize(property)
12
12
  @property = property
13
13
  end
14
14
 
15
15
  def locate(context, page = nil)
16
16
  @context = context
17
-
17
+
18
18
  raw_data = yield if block_given?
19
- data = @property.respond_to?(:callback) && @property.callback ? @property.callback.call(raw_data) : raw_data
19
+ data = @property.respond_to?(:callback) && @property.callback ? @property.callback.call(raw_data) : raw_data
20
20
 
21
21
  @property.wombat_property_name ? { @property.wombat_property_name => data } : data
22
22
  end
@@ -27,6 +27,15 @@ module Wombat
27
27
 
28
28
  select_nodes @property.wombat_property_selector, @property.wombat_property_namespaces
29
29
  end
30
+
31
+ def filter_properties(context, page)
32
+ Hash.new.tap do |h|
33
+ @property.values
34
+ .select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
35
+ .map { |p| Factory.locator_for(p).locate(context, page) }
36
+ .map { |p| h.merge! p }
37
+ end
38
+ end
30
39
  end
31
40
  end
32
41
  end
@@ -10,12 +10,7 @@ module Wombat
10
10
  target_page = page.click node
11
11
  context = target_page.parser
12
12
 
13
- Hash.new.tap do |h|
14
- @property.values
15
- .select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
16
- .map { |p| Factory.locator_for(p).locate(context, page) }
17
- .map { |p| h.merge! p }
18
- end
13
+ filter_properties(context, page)
19
14
  end
20
15
  end
21
16
  end
@@ -5,15 +5,10 @@ module Wombat
5
5
  module Property
6
6
  module Locators
7
7
  class Iterator < Base
8
- def locate(contex, page = nil)
8
+ def locate(context, page = nil)
9
9
  super do
10
- locate_nodes(contex).flat_map do |node|
11
- Hash.new.tap do |h|
12
- @property.values
13
- .select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
14
- .map { |p| Factory.locator_for(p).locate(node, page) }
15
- .map { |p| h.merge! p }
16
- end
10
+ locate_nodes(context).flat_map do |node|
11
+ filter_properties(node, page)
17
12
  end
18
13
  end
19
14
  end
@@ -6,12 +6,7 @@ module Wombat
6
6
  class PropertyGroup < Base
7
7
  def locate(context, page = nil)
8
8
  super do
9
- Hash.new.tap do |h|
10
- @property.values
11
- .select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
12
- .map { |p| Factory.locator_for(p).locate(context, page) }
13
- .map { |p| h.merge! p }
14
- end
9
+ filter_properties(context, page)
15
10
  end
16
11
  end
17
12
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "wombat"
8
- s.version = "2.1.0"
8
+ s.version = "2.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Felipe Lima"]
12
- s.date = "2012-12-15"
12
+ s.date = "2013-02-03"
13
13
  s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
14
14
  s.email = "felipe.lima@gmail.com"
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-15 00:00:00.000000000 Z
12
+ date: 2013-02-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize