wombat 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,11 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- activesupport (3.2.9)
4
+ activesupport (3.2.11)
5
5
  i18n (~> 0.6)
6
6
  multi_json (~> 1.0)
7
7
  diff-lcs (1.1.3)
8
- domain_name (0.5.6)
8
+ domain_name (0.5.7)
9
9
  unf (~> 0.0.3)
10
10
  fakeweb (1.3.0)
11
11
  git (1.2.5)
@@ -15,7 +15,7 @@ GEM
15
15
  git (>= 1.2.5)
16
16
  rake
17
17
  rdoc
18
- json (1.7.5)
18
+ json (1.7.6)
19
19
  mechanize (2.5.1)
20
20
  domain_name (~> 0.5, >= 0.5.1)
21
21
  mime-types (~> 1.17, >= 1.17.2)
@@ -28,7 +28,7 @@ GEM
28
28
  multi_json (1.5.0)
29
29
  net-http-digest_auth (1.2.1)
30
30
  net-http-persistent (2.8)
31
- nokogiri (1.5.5)
31
+ nokogiri (1.5.6)
32
32
  ntlm-http (0.1.1)
33
33
  rake (10.0.3)
34
34
  rdoc (3.12)
@@ -40,13 +40,13 @@ GEM
40
40
  rspec-expectations (~> 2.12.0)
41
41
  rspec-mocks (~> 2.12.0)
42
42
  rspec-core (2.12.2)
43
- rspec-expectations (2.12.0)
43
+ rspec-expectations (2.12.1)
44
44
  diff-lcs (~> 1.1.3)
45
- rspec-mocks (2.12.0)
45
+ rspec-mocks (2.12.2)
46
46
  unf (0.0.5)
47
47
  unf_ext
48
48
  unf_ext (0.0.5)
49
- vcr (2.3.0)
49
+ vcr (2.4.0)
50
50
  webrobots (0.0.13)
51
51
  yard (0.8.3)
52
52
 
data/README.md CHANGED
@@ -26,35 +26,38 @@ Wombat.crawl do
26
26
  path "/"
27
27
 
28
28
  headline "xpath=//h1"
29
- what_is "css=.column.secondary p", :html
30
- repositories "css=a.repo", :list
29
+ subheading "css=p.subheading"
31
30
 
32
- explore "xpath=//ul/li[2]/a" do |e|
33
- e.gsub(/Explore/, "LOVE")
34
- end
31
+ what_is "css=.teaser h3", :list
32
+
33
+ links do
34
+ explore 'xpath=//*[@id="wrapper"]/div[1]/div/ul/li[1]/a' do |e|
35
+ e.gsub(/Explore/, "Love")
36
+ end
35
37
 
36
- benefits do
37
- first_benefit "css=.column.leftmost h3"
38
- second_benefit "css=.column.leftmid h3"
39
- third_benefit "css=.column.rightmid h3"
40
- fourth_benefit "css=.column.rightmost h3"
38
+ search 'css=.search'
39
+ features 'css=.features'
40
+ blog 'css=.blog'
41
41
  end
42
42
  end
43
43
  ```
44
44
 
45
- ###### The code above is gonna return the following hash:
45
+ ###### The code above is gonna return the following hash:
46
46
 
47
47
  ```ruby
48
48
  {
49
- "headline" => "1,316,633 people hosting over 3,951,378 git repositories",
50
- "what_is" => "GitHub is the best way to collaborate with others. Fork, send pull requests and manage all your <strong>public</strong> and <strong>private</strong> git repositories.",
51
- "explore" => "LOVE GitHub",
52
- "repositories" => ["jQuery", "reddit", "Sparkle", "curl", "Ruby on Rails", "node.js", "ClickToFlash", "Erlang/OTP", "CakePHP", "Redis"]
53
- "benefits" => {
54
- "first_benefit" => "Team management",
55
- "second_benefit" => "Code review",
56
- "third_benefit" => "Reliable code hosting",
57
- "fourth_benefit" => "Open source collaboration"
49
+ "headline"=>"Build software better, together.",
50
+ "subheading"=> "Powerful collaboration, review, and code management for open source and private development projects.",
51
+ "what_is"=> [
52
+ "Great collaboration starts with communication.",
53
+ "Manage and contribute from all your devices.",
54
+ "The world’s largest open source community."
55
+ ],
56
+ "links"=> {
57
+ "explore"=>"Love GitHub",
58
+ "search"=>"Search",
59
+ "features"=>"Features",
60
+ "blog"=>"Blog"
58
61
  }
59
62
  }
60
63
  ```
@@ -65,7 +68,7 @@ end
65
68
 
66
69
 
67
70
  ## Contributing to Wombat
68
-
71
+
69
72
  * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
70
73
  * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
71
74
  * Fork the project
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.1.0
1
+ 2.1.1
@@ -6,36 +6,36 @@ data = Wombat.crawl do
6
6
  path "/"
7
7
 
8
8
  headline "xpath=//h1"
9
- what_is "css=.column.secondary p", :html
9
+ subheading "css=p.subheading"
10
10
 
11
- explore "xpath=//ul/li[2]/a" do |e|
12
- e.gsub(/Explore/, "LOVE")
13
- end
14
-
15
- benefits do
16
- team_mgmt "css=.column.leftmost h3"
17
- code_review "css=.column.leftmid h3"
18
- hosting "css=.column.rightmid h3"
19
- collaboration "css=.column.rightmost h3"
11
+ what_is "css=.teaser h3", :list
20
12
 
21
- links do
22
- team_mgmt "xpath=//div[@class='column leftmost']//a/@href"
13
+ links do
14
+ explore 'xpath=//*[@id="wrapper"]/div[1]/div/ul/li[1]/a' do |e|
15
+ e.gsub(/Explore/, "Love")
23
16
  end
17
+
18
+ search 'css=.search'
19
+ features 'css=.features'
20
+ blog 'css=.blog'
24
21
  end
25
22
  end
26
23
 
27
24
  =begin
28
25
  pp data
29
26
  {
30
- "headline"=>"1,900,094\n people hosting over\n 3,371,168\n repositories",
31
- "what_is"=>"GitHub is the best way to collaborate with others. Fork, send pull requests and manage all your <strong>public</strong> and <strong>private</strong> git repositories.",
32
- "explore"=>"LOVE GitHub",
33
- "benefits"=> {
34
- "team_mgmt"=>"Team management",
35
- "code_review"=>"Code review",
36
- "hosting"=>"Reliable code hosting",
37
- "collaboration"=>"Open source collaboration",
38
- "links"=>{"team_mgmt"=>"/features/projects/collaboration"}
27
+ "headline"=>"Build software better, together.",
28
+ "subheading"=>
29
+ "Powerful collaboration, review, and code management for open source and private development projects.",
30
+ "what_is"=>
31
+ ["Great collaboration starts with communication.",
32
+ "Manage and contribute from all your devices.",
33
+ "The world’s largest open source community."],
34
+ "links"=>
35
+ {"explore"=>"Love GitHub",
36
+ "search"=>"Search",
37
+ "features"=>"Features",
38
+ "blog"=>"Blog"
39
39
  }
40
40
  }
41
41
  =end
@@ -19,18 +19,10 @@ module Wombat
19
19
  self[property_name] = property_group
20
20
  property_group.instance_eval(&block)
21
21
  else
22
- if args[1] == :iterator
23
- it = Iterator.new(property_name, args.first)
24
- self[property_name] = it
25
- it.instance_eval(&block) if block
26
- elsif args[1] == :follow
27
- it = Follower.new(property_name, args.first)
28
- self[property_name] = it
29
- it.instance_eval(&block) if block
30
- else
31
- self[property_name] = Property.new(property_name, *args, &block)
32
- end
33
- end
22
+ it = build_property(property_name, *args, &block)
23
+ self[property_name] = it
24
+ it.instance_eval(&block) if block_given? && !it.instance_of?(Property)
25
+ end
34
26
  end
35
27
 
36
28
  def to_ary
@@ -43,6 +35,18 @@ module Wombat
43
35
  def wombat_property_namespaces
44
36
  nil
45
37
  end
38
+
39
+ protected
40
+
41
+ def build_property(name, *args, &block)
42
+ if args[1] == :iterator
43
+ Iterator.new(name, args.first)
44
+ elsif args[1] == :follow
45
+ Follower.new(name, args.first)
46
+ else
47
+ Property.new(name, *args, &block)
48
+ end
49
+ end
46
50
  end
47
51
  end
48
52
  end
@@ -7,16 +7,16 @@ module Wombat
7
7
  # Abstract base class
8
8
  class Base
9
9
  include Wombat::Processing::NodeSelector
10
-
10
+
11
11
  def initialize(property)
12
12
  @property = property
13
13
  end
14
14
 
15
15
  def locate(context, page = nil)
16
16
  @context = context
17
-
17
+
18
18
  raw_data = yield if block_given?
19
- data = @property.respond_to?(:callback) && @property.callback ? @property.callback.call(raw_data) : raw_data
19
+ data = @property.respond_to?(:callback) && @property.callback ? @property.callback.call(raw_data) : raw_data
20
20
 
21
21
  @property.wombat_property_name ? { @property.wombat_property_name => data } : data
22
22
  end
@@ -27,6 +27,15 @@ module Wombat
27
27
 
28
28
  select_nodes @property.wombat_property_selector, @property.wombat_property_namespaces
29
29
  end
30
+
31
+ def filter_properties(context, page)
32
+ Hash.new.tap do |h|
33
+ @property.values
34
+ .select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
35
+ .map { |p| Factory.locator_for(p).locate(context, page) }
36
+ .map { |p| h.merge! p }
37
+ end
38
+ end
30
39
  end
31
40
  end
32
41
  end
@@ -10,12 +10,7 @@ module Wombat
10
10
  target_page = page.click node
11
11
  context = target_page.parser
12
12
 
13
- Hash.new.tap do |h|
14
- @property.values
15
- .select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
16
- .map { |p| Factory.locator_for(p).locate(context, page) }
17
- .map { |p| h.merge! p }
18
- end
13
+ filter_properties(context, page)
19
14
  end
20
15
  end
21
16
  end
@@ -5,15 +5,10 @@ module Wombat
5
5
  module Property
6
6
  module Locators
7
7
  class Iterator < Base
8
- def locate(contex, page = nil)
8
+ def locate(context, page = nil)
9
9
  super do
10
- locate_nodes(contex).flat_map do |node|
11
- Hash.new.tap do |h|
12
- @property.values
13
- .select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
14
- .map { |p| Factory.locator_for(p).locate(node, page) }
15
- .map { |p| h.merge! p }
16
- end
10
+ locate_nodes(context).flat_map do |node|
11
+ filter_properties(node, page)
17
12
  end
18
13
  end
19
14
  end
@@ -6,12 +6,7 @@ module Wombat
6
6
  class PropertyGroup < Base
7
7
  def locate(context, page = nil)
8
8
  super do
9
- Hash.new.tap do |h|
10
- @property.values
11
- .select { |v| v.is_a?(Wombat::DSL::Property) || v.is_a?(Wombat::DSL::PropertyGroup) }
12
- .map { |p| Factory.locator_for(p).locate(context, page) }
13
- .map { |p| h.merge! p }
14
- end
9
+ filter_properties(context, page)
15
10
  end
16
11
  end
17
12
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "wombat"
8
- s.version = "2.1.0"
8
+ s.version = "2.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Felipe Lima"]
12
- s.date = "2012-12-15"
12
+ s.date = "2013-02-03"
13
13
  s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
14
14
  s.email = "felipe.lima@gmail.com"
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-15 00:00:00.000000000 Z
12
+ date: 2013-02-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize