wombat 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Wombat
2
2
 
3
- [![CI Build Status](https://secure.travis-ci.org/felipecsl/wombat.png?branch=master)](travis) [![Dependency Status](https://gemnasium.com/felipecsl/wombat.png?travis)](gemnasium)
3
+ [![CI Build Status](https://secure.travis-ci.org/felipecsl/wombat.png?branch=master)][travis] [![Dependency Status](https://gemnasium.com/felipecsl/wombat.png?travis)][gemnasium]
4
4
 
5
5
  [travis]: http://travis-ci.org/felipecsl/wombat
6
6
  [gemnasium]: https://gemnasium.com/felipecsl/wombat
@@ -85,6 +85,13 @@ my_crawler.crawl
85
85
 
86
86
  * Felipe Lima ([@felipecsl](https://github.com/felipecsl))
87
87
  * [@sigi](https://github.com/sigi)
88
+ * Daniel Naves de Carvalho ([@danielnc](https://github.com/danielnc))
89
+
90
+ ## Changelog
91
+
92
+ ### version 0.3.1
93
+
94
+ * Added the ability to provide a block to Crawler#crawl and override the default crawler properties for a one off run (thanks to @danielnc)
88
95
 
89
96
  ## Copyright
90
97
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.0
1
+ 0.3.1
@@ -10,8 +10,19 @@ module Wombat
10
10
  include Parser
11
11
  extend ActiveSupport::Concern
12
12
 
13
- def crawl
14
- parse self.class.send(:metadata)
13
+ def crawl(&block)
14
+ if block
15
+ @metadata_dup = self.class.send(:metadata).clone
16
+ instance_eval do
17
+ def method_missing method, *args, &block
18
+ @metadata_dup.send method, *args, &block
19
+ end
20
+ end
21
+ self.instance_eval &block
22
+ parse @metadata_dup
23
+ else
24
+ parse self.class.send(:metadata)
25
+ end
15
26
  end
16
27
 
17
28
  module ClassMethods
@@ -4,7 +4,7 @@ module Wombat
4
4
  return [selector.to_s] if selector.is_a? Symbol
5
5
  return context.xpath selector[6..-1], namespaces if selector.start_with? "xpath="
6
6
  return context.css selector[4..-1] if selector.start_with? "css="
7
- nil
7
+ selector
8
8
  end
9
9
  end
10
10
  end
data/spec/crawler_spec.rb CHANGED
@@ -9,16 +9,16 @@ describe Wombat::Crawler do
9
9
 
10
10
  it 'should call the provided block' do
11
11
  event_called = false
12
-
12
+
13
13
  @crawler.event { event_called = true }
14
-
14
+
15
15
  event_called.should be_true
16
16
  end
17
17
 
18
18
  it 'should provide metadata to yielded block' do
19
19
  @crawler.event do |e|
20
20
  e.should_not be_nil
21
- end
21
+ end
22
22
  end
23
23
 
24
24
  it 'should store assigned metadata information' do
@@ -38,7 +38,7 @@ describe Wombat::Crawler do
38
38
  arg["venue"]["name"].selector.should == "Scooba"
39
39
  arg["location"]["latitude"].selector.should == -50.2323
40
40
  end
41
-
41
+
42
42
  @crawler_instance.crawl
43
43
  end
44
44
 
@@ -57,8 +57,8 @@ describe Wombat::Crawler do
57
57
  end
58
58
 
59
59
  it 'should be able to assign arbitrary plain text metadata' do
60
- @crawler.some_data("/event/list", :html, "geo") {|p| true }
61
-
60
+ @crawler.some_data("/event/list", :html, "geo") { |p| true }
61
+
62
62
  @crawler_instance.should_receive(:parse) do |arg|
63
63
  prop = arg['some_data']
64
64
  prop.name.should == "some_data"
@@ -67,7 +67,7 @@ describe Wombat::Crawler do
67
67
  prop.namespaces.should == "geo"
68
68
  prop.callback.should_not be_nil
69
69
  end
70
-
70
+
71
71
  @crawler_instance.crawl
72
72
  end
73
73
 
@@ -107,16 +107,39 @@ describe Wombat::Crawler do
107
107
  it["title"].selector.should == "css=.title"
108
108
  it["body"].selector.should == "css=.body"
109
109
  it["event"]["all"].selector.should == "yeah"
110
- end
110
+ end
111
111
 
112
112
  @crawler_instance.crawl
113
113
  end
114
114
 
115
- it 'should assign metadata forma' do
115
+ it 'should assign metadata format' do
116
116
  @crawler_instance.should_receive(:parse) do |arg|
117
117
  arg[:format].should == :xml
118
118
  end
119
119
  @crawler.format :xml
120
120
  @crawler_instance.crawl
121
121
  end
122
+
123
+ it 'should crawl with block' do
124
+ @crawler.base_url "danielnc.com"
125
+ @crawler.list_page "/itens"
126
+
127
+ @crawler_instance.should_receive(:parse) do |arg|
128
+ arg[:base_url].should == "danielnc.com"
129
+ arg[:list_page].should == "/itens/1"
130
+ end
131
+
132
+ @crawler_instance.crawl do
133
+ list_page "/itens/1"
134
+ end
135
+
136
+ another_instance = @crawler.new
137
+
138
+ another_instance.should_receive(:parse) do |arg|
139
+ arg[:base_url].should == "danielnc.com"
140
+ arg[:list_page].should == "/itens"
141
+ end
142
+
143
+ another_instance.crawl
144
+ end
122
145
  end
data/wombat.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "wombat"
8
- s.version = "0.3.0"
8
+ s.version = "0.3.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Felipe Lima"]
12
- s.date = "2012-03-25"
12
+ s.date = "2012-04-12"
13
13
  s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
14
14
  s.email = "felipe.lima@gmail.com"
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-25 00:00:00.000000000 Z
12
+ date: 2012-04-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize