wombat 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Wombat
2
2
 
3
- [![CI Build Status](https://secure.travis-ci.org/felipecsl/wombat.png?branch=master)](travis) [![Dependency Status](https://gemnasium.com/felipecsl/wombat.png?travis)](gemnasium)
3
+ [![CI Build Status](https://secure.travis-ci.org/felipecsl/wombat.png?branch=master)][travis] [![Dependency Status](https://gemnasium.com/felipecsl/wombat.png?travis)][gemnasium]
4
4
 
5
5
  [travis]: http://travis-ci.org/felipecsl/wombat
6
6
  [gemnasium]: https://gemnasium.com/felipecsl/wombat
@@ -85,6 +85,13 @@ my_crawler.crawl
85
85
 
86
86
  * Felipe Lima ([@felipecsl](https://github.com/felipecsl))
87
87
  * [@sigi](https://github.com/sigi)
88
+ * Daniel Naves de Carvalho ([@danielnc](https://github.com/danielnc))
89
+
90
+ ## Changelog
91
+
92
+ ### version 0.3.1
93
+
94
+ * Added the ability to provide a block to Crawler#crawl and override the default crawler properties for a one off run (thanks to @danielnc)
88
95
 
89
96
  ## Copyright
90
97
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.0
1
+ 0.3.1
@@ -10,8 +10,19 @@ module Wombat
10
10
  include Parser
11
11
  extend ActiveSupport::Concern
12
12
 
13
- def crawl
14
- parse self.class.send(:metadata)
13
+ def crawl(&block)
14
+ if block
15
+ @metadata_dup = self.class.send(:metadata).clone
16
+ instance_eval do
17
+ def method_missing method, *args, &block
18
+ @metadata_dup.send method, *args, &block
19
+ end
20
+ end
21
+ self.instance_eval &block
22
+ parse @metadata_dup
23
+ else
24
+ parse self.class.send(:metadata)
25
+ end
15
26
  end
16
27
 
17
28
  module ClassMethods
@@ -4,7 +4,7 @@ module Wombat
4
4
  return [selector.to_s] if selector.is_a? Symbol
5
5
  return context.xpath selector[6..-1], namespaces if selector.start_with? "xpath="
6
6
  return context.css selector[4..-1] if selector.start_with? "css="
7
- nil
7
+ selector
8
8
  end
9
9
  end
10
10
  end
data/spec/crawler_spec.rb CHANGED
@@ -9,16 +9,16 @@ describe Wombat::Crawler do
9
9
 
10
10
  it 'should call the provided block' do
11
11
  event_called = false
12
-
12
+
13
13
  @crawler.event { event_called = true }
14
-
14
+
15
15
  event_called.should be_true
16
16
  end
17
17
 
18
18
  it 'should provide metadata to yielded block' do
19
19
  @crawler.event do |e|
20
20
  e.should_not be_nil
21
- end
21
+ end
22
22
  end
23
23
 
24
24
  it 'should store assigned metadata information' do
@@ -38,7 +38,7 @@ describe Wombat::Crawler do
38
38
  arg["venue"]["name"].selector.should == "Scooba"
39
39
  arg["location"]["latitude"].selector.should == -50.2323
40
40
  end
41
-
41
+
42
42
  @crawler_instance.crawl
43
43
  end
44
44
 
@@ -57,8 +57,8 @@ describe Wombat::Crawler do
57
57
  end
58
58
 
59
59
  it 'should be able to assign arbitrary plain text metadata' do
60
- @crawler.some_data("/event/list", :html, "geo") {|p| true }
61
-
60
+ @crawler.some_data("/event/list", :html, "geo") { |p| true }
61
+
62
62
  @crawler_instance.should_receive(:parse) do |arg|
63
63
  prop = arg['some_data']
64
64
  prop.name.should == "some_data"
@@ -67,7 +67,7 @@ describe Wombat::Crawler do
67
67
  prop.namespaces.should == "geo"
68
68
  prop.callback.should_not be_nil
69
69
  end
70
-
70
+
71
71
  @crawler_instance.crawl
72
72
  end
73
73
 
@@ -107,16 +107,39 @@ describe Wombat::Crawler do
107
107
  it["title"].selector.should == "css=.title"
108
108
  it["body"].selector.should == "css=.body"
109
109
  it["event"]["all"].selector.should == "yeah"
110
- end
110
+ end
111
111
 
112
112
  @crawler_instance.crawl
113
113
  end
114
114
 
115
- it 'should assign metadata forma' do
115
+ it 'should assign metadata format' do
116
116
  @crawler_instance.should_receive(:parse) do |arg|
117
117
  arg[:format].should == :xml
118
118
  end
119
119
  @crawler.format :xml
120
120
  @crawler_instance.crawl
121
121
  end
122
+
123
+ it 'should crawl with block' do
124
+ @crawler.base_url "danielnc.com"
125
+ @crawler.list_page "/itens"
126
+
127
+ @crawler_instance.should_receive(:parse) do |arg|
128
+ arg[:base_url].should == "danielnc.com"
129
+ arg[:list_page].should == "/itens/1"
130
+ end
131
+
132
+ @crawler_instance.crawl do
133
+ list_page "/itens/1"
134
+ end
135
+
136
+ another_instance = @crawler.new
137
+
138
+ another_instance.should_receive(:parse) do |arg|
139
+ arg[:base_url].should == "danielnc.com"
140
+ arg[:list_page].should == "/itens"
141
+ end
142
+
143
+ another_instance.crawl
144
+ end
122
145
  end
data/wombat.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "wombat"
8
- s.version = "0.3.0"
8
+ s.version = "0.3.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Felipe Lima"]
12
- s.date = "2012-03-25"
12
+ s.date = "2012-04-12"
13
13
  s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
14
14
  s.email = "felipe.lima@gmail.com"
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-25 00:00:00.000000000 Z
12
+ date: 2012-04-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize