wombat 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -18,11 +18,14 @@ GEM
18
18
  nokogiri (~> 1.4)
19
19
  ntlm-http (~> 0.1, >= 0.1.1)
20
20
  webrobots (~> 0.0, >= 0.0.9)
21
+ mime-types (1.17.2)
21
22
  net-http-digest_auth (1.2)
22
23
  net-http-persistent (2.3.3)
23
24
  nokogiri (1.5.0)
24
25
  ntlm-http (0.1.1)
25
26
  rake (0.9.2.2)
27
+ rest-client (1.6.7)
28
+ mime-types (>= 1.16)
26
29
  rspec (2.7.0)
27
30
  rspec-core (~> 2.7.0)
28
31
  rspec-expectations (~> 2.7.0)
@@ -49,6 +52,7 @@ DEPENDENCIES
49
52
  jeweler
50
53
  mechanize
51
54
  rake
55
+ rest-client
52
56
  rspec
53
57
  vcr (= 2.0.0.rc1)
54
58
  yard
data/README.md CHANGED
@@ -63,7 +63,7 @@ my_crawler.crawl
63
63
  }
64
64
  ```
65
65
 
66
- ### For additional documentation, please check the project [Wiki](http://github.com/felipecsl/wombat/wiki).
66
+ ### For the documentation, please check the project [Wiki](http://github.com/felipecsl/wombat/wiki).
67
67
 
68
68
 
69
69
  ## Contributing to Wombat
data/Rakefile CHANGED
@@ -16,6 +16,7 @@ Jeweler::Tasks.new do |gem|
16
16
  gem.description = %Q{Generic Web crawler with a DSL that parses structured data from web pages}
17
17
  gem.email = "felipe.lima@gmail.com"
18
18
  gem.authors = ["Felipe Lima"]
19
+ gem.required_ruby_version = ">= 1.9"
19
20
  # dependencies defined in Gemfile
20
21
  end
21
22
 
@@ -26,4 +27,4 @@ RSpec::Core::RakeTask.new(:spec)
26
27
  task :test => :spec
27
28
  task :default => :spec
28
29
 
29
- YARD::Rake::YardocTask.new
30
+ YARD::Rake::YardocTask.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.4
1
+ 0.2.5
@@ -10,13 +10,8 @@ module Wombat
10
10
  include Parser
11
11
  extend ActiveSupport::Concern
12
12
 
13
- module InstanceMethods
14
- def crawl
15
- parse self.class.send(:metadata)
16
- end
17
-
18
- def supports_city?
19
- end
13
+ def crawl
14
+ parse self.class.send(:metadata)
20
15
  end
21
16
 
22
17
  module ClassMethods
@@ -28,17 +23,10 @@ module Wombat
28
23
  metadata.for_each(selector).instance_eval(&block) if block
29
24
  end
30
25
 
31
- def format type
32
- metadata.document_format = type
33
- end
34
-
35
26
  def follow_links selector
36
27
 
37
28
  end
38
29
 
39
- def supported_cities
40
- end
41
-
42
30
  def to_ary
43
31
  end
44
32
 
@@ -4,10 +4,8 @@ require 'wombat/iterator'
4
4
 
5
5
  module Wombat
6
6
  class Metadata < PropertyContainer
7
- attr_accessor :document_format
8
-
9
7
  def initialize
10
- @document_format = :html
8
+ self[:format] = :html
11
9
  super
12
10
  end
13
11
 
@@ -18,5 +16,9 @@ module Wombat
18
16
  def list_page url
19
17
  self[:list_page] = url
20
18
  end
19
+
20
+ def format format
21
+ self[:format] = format
22
+ end
21
23
  end
22
24
  end
data/lib/wombat/parser.rb CHANGED
@@ -22,7 +22,10 @@ module Wombat
22
22
  it.all_properties.each do |p|
23
23
  p.result ||= []
24
24
  result = locate(p)
25
- p.result << result if result
25
+ if result
26
+ result = p.callback ? p.callback.call(result) : result
27
+ p.result << result
28
+ end
26
29
  end
27
30
  end
28
31
  end
@@ -41,7 +44,7 @@ module Wombat
41
44
  def get_parser metadata
42
45
  url = "#{metadata[:base_url]}#{metadata[:list_page]}"
43
46
 
44
- if metadata.document_format == :html
47
+ if metadata[:format] == :html
45
48
  @mechanize.get(url).parser
46
49
  else
47
50
  Nokogiri::XML RestClient.get(url)
data/spec/crawler_spec.rb CHANGED
@@ -114,7 +114,7 @@ describe Wombat::Crawler do
114
114
 
115
115
  it 'should assign metadata forma' do
116
116
  @crawler_instance.should_receive(:parse) do |arg|
117
- arg.document_format.should == :xml
117
+ arg[:format].should == :xml
118
118
  end
119
119
  @crawler.format :xml
120
120
  @crawler_instance.crawl
@@ -44,7 +44,7 @@ describe 'basic crawler setup' do
44
44
 
45
45
  crawler.for_each "css=ol.ranked-repositories li" do
46
46
  repo 'css=h3'
47
- description 'css=p.description'
47
+ description('css=p.description') { |d| d.gsub(/for/, '') }
48
48
  end
49
49
 
50
50
  crawler_instance = crawler.new
@@ -52,11 +52,11 @@ describe 'basic crawler setup' do
52
52
 
53
53
  results["repo"].should =~ ["jairajs89 / Touchy.js", "mcavage / node-restify", "notlion / streetview-stereographic", "twitter / bootstrap", "stolksdorf / Parallaxjs"]
54
54
  results["description"].should =~ [
55
- "node.js REST framework specifically meant for web service APIs",
56
- "A simple light-weight JavaScript library for dealing with touch events",
55
+ "node.js REST framework specifically meant web service APIs",
56
+ "A simple light-weight JavaScript library dealing with touch events",
57
57
  "Shader Toy + Google Map + Panoramic Explorer",
58
58
  "HTML, CSS, and JS toolkit from Twitter",
59
- "a Library for Javascript that allows easy page parallaxing"
59
+ "a Library Javascript that allows easy page parallaxing"
60
60
  ]
61
61
  end
62
62
  end
data/spec/parser_spec.rb CHANGED
@@ -72,6 +72,10 @@ describe Wombat::Parser do
72
72
  block_called.should be_true
73
73
  end
74
74
 
75
+ it 'should invoke callback inside for_each block' do
76
+
77
+ end
78
+
75
79
  it 'should return hash with requested properties' do
76
80
  hash = double :results
77
81
  fake_parser = double :parser
@@ -140,7 +144,7 @@ describe Wombat::Parser do
140
144
  it 'should correctly parse xml documents' do
141
145
  fake_document = double :xml
142
146
  fake_parser = double :parser
143
- @metadata.document_format = :xml
147
+ @metadata.format :xml
144
148
  @parser.mechanize.should_not_receive(:get)
145
149
  RestClient.should_receive(:get).and_return fake_document
146
150
  Nokogiri.should_receive(:XML).with(fake_document).and_return fake_parser
data/wombat.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "wombat"
8
- s.version = "0.2.4"
8
+ s.version = "0.2.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Felipe Lima"]
12
- s.date = "2012-02-15"
12
+ s.date = "2012-03-21"
13
13
  s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
14
14
  s.email = "felipe.lima@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -54,7 +54,8 @@ Gem::Specification.new do |s|
54
54
  s.homepage = "http://github.com/felipecsl/wombat"
55
55
  s.licenses = ["MIT"]
56
56
  s.require_paths = ["lib"]
57
- s.rubygems_version = "1.8.11"
57
+ s.required_ruby_version = Gem::Requirement.new(">= 1.9")
58
+ s.rubygems_version = "1.8.18"
58
59
  s.summary = "Ruby DSL to crawl web pages"
59
60
 
60
61
  if s.respond_to? :specification_version then
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-15 00:00:00.000000000 Z
12
+ date: 2012-03-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: &70159522946820 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70159522946820
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: activesupport
27
- requirement: &70159522962680 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ! '>='
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: '0'
33
38
  type: :runtime
34
39
  prerelease: false
35
- version_requirements: *70159522962680
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: rest-client
38
- requirement: &70159522962060 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ! '>='
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: '0'
44
54
  type: :runtime
45
55
  prerelease: false
46
- version_requirements: *70159522962060
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: bundler
49
- requirement: &70159522961340 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ! '>='
@@ -54,10 +69,15 @@ dependencies:
54
69
  version: '0'
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *70159522961340
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
58
78
  - !ruby/object:Gem::Dependency
59
79
  name: rake
60
- requirement: &70159522960620 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
61
81
  none: false
62
82
  requirements:
63
83
  - - ! '>='
@@ -65,10 +85,15 @@ dependencies:
65
85
  version: '0'
66
86
  type: :development
67
87
  prerelease: false
68
- version_requirements: *70159522960620
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
69
94
  - !ruby/object:Gem::Dependency
70
95
  name: yard
71
- requirement: &70159522960000 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
72
97
  none: false
73
98
  requirements:
74
99
  - - ! '>='
@@ -76,10 +101,15 @@ dependencies:
76
101
  version: '0'
77
102
  type: :development
78
103
  prerelease: false
79
- version_requirements: *70159522960000
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
80
110
  - !ruby/object:Gem::Dependency
81
111
  name: jeweler
82
- requirement: &70159522959520 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
83
113
  none: false
84
114
  requirements:
85
115
  - - ! '>='
@@ -87,10 +117,15 @@ dependencies:
87
117
  version: '0'
88
118
  type: :development
89
119
  prerelease: false
90
- version_requirements: *70159522959520
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
91
126
  - !ruby/object:Gem::Dependency
92
127
  name: rspec
93
- requirement: &70159522959040 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
94
129
  none: false
95
130
  requirements:
96
131
  - - ! '>='
@@ -98,21 +133,31 @@ dependencies:
98
133
  version: '0'
99
134
  type: :development
100
135
  prerelease: false
101
- version_requirements: *70159522959040
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
102
142
  - !ruby/object:Gem::Dependency
103
143
  name: vcr
104
- requirement: &70159522958540 !ruby/object:Gem::Requirement
144
+ requirement: !ruby/object:Gem::Requirement
105
145
  none: false
106
146
  requirements:
107
- - - =
147
+ - - '='
108
148
  - !ruby/object:Gem::Version
109
149
  version: 2.0.0.rc1
110
150
  type: :development
111
151
  prerelease: false
112
- version_requirements: *70159522958540
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - '='
156
+ - !ruby/object:Gem::Version
157
+ version: 2.0.0.rc1
113
158
  - !ruby/object:Gem::Dependency
114
159
  name: fakeweb
115
- requirement: &70159522958060 !ruby/object:Gem::Requirement
160
+ requirement: !ruby/object:Gem::Requirement
116
161
  none: false
117
162
  requirements:
118
163
  - - ! '>='
@@ -120,7 +165,12 @@ dependencies:
120
165
  version: '0'
121
166
  type: :development
122
167
  prerelease: false
123
- version_requirements: *70159522958060
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
124
174
  description: Generic Web crawler with a DSL that parses structured data from web pages
125
175
  email: felipe.lima@gmail.com
126
176
  executables: []
@@ -174,7 +224,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
174
224
  requirements:
175
225
  - - ! '>='
176
226
  - !ruby/object:Gem::Version
177
- version: '0'
227
+ version: '1.9'
178
228
  required_rubygems_version: !ruby/object:Gem::Requirement
179
229
  none: false
180
230
  requirements:
@@ -183,7 +233,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
233
  version: '0'
184
234
  requirements: []
185
235
  rubyforge_project:
186
- rubygems_version: 1.8.11
236
+ rubygems_version: 1.8.18
187
237
  signing_key:
188
238
  specification_version: 3
189
239
  summary: Ruby DSL to crawl web pages