wombat 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -18,11 +18,14 @@ GEM
18
18
  nokogiri (~> 1.4)
19
19
  ntlm-http (~> 0.1, >= 0.1.1)
20
20
  webrobots (~> 0.0, >= 0.0.9)
21
+ mime-types (1.17.2)
21
22
  net-http-digest_auth (1.2)
22
23
  net-http-persistent (2.3.3)
23
24
  nokogiri (1.5.0)
24
25
  ntlm-http (0.1.1)
25
26
  rake (0.9.2.2)
27
+ rest-client (1.6.7)
28
+ mime-types (>= 1.16)
26
29
  rspec (2.7.0)
27
30
  rspec-core (~> 2.7.0)
28
31
  rspec-expectations (~> 2.7.0)
@@ -49,6 +52,7 @@ DEPENDENCIES
49
52
  jeweler
50
53
  mechanize
51
54
  rake
55
+ rest-client
52
56
  rspec
53
57
  vcr (= 2.0.0.rc1)
54
58
  yard
data/README.md CHANGED
@@ -63,7 +63,7 @@ my_crawler.crawl
63
63
  }
64
64
  ```
65
65
 
66
- ### For additional documentation, please check the project [Wiki](http://github.com/felipecsl/wombat/wiki).
66
+ ### For the documentation, please check the project [Wiki](http://github.com/felipecsl/wombat/wiki).
67
67
 
68
68
 
69
69
  ## Contributing to Wombat
data/Rakefile CHANGED
@@ -16,6 +16,7 @@ Jeweler::Tasks.new do |gem|
16
16
  gem.description = %Q{Generic Web crawler with a DSL that parses structured data from web pages}
17
17
  gem.email = "felipe.lima@gmail.com"
18
18
  gem.authors = ["Felipe Lima"]
19
+ gem.required_ruby_version = ">= 1.9"
19
20
  # dependencies defined in Gemfile
20
21
  end
21
22
 
@@ -26,4 +27,4 @@ RSpec::Core::RakeTask.new(:spec)
26
27
  task :test => :spec
27
28
  task :default => :spec
28
29
 
29
- YARD::Rake::YardocTask.new
30
+ YARD::Rake::YardocTask.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.4
1
+ 0.2.5
@@ -10,13 +10,8 @@ module Wombat
10
10
  include Parser
11
11
  extend ActiveSupport::Concern
12
12
 
13
- module InstanceMethods
14
- def crawl
15
- parse self.class.send(:metadata)
16
- end
17
-
18
- def supports_city?
19
- end
13
+ def crawl
14
+ parse self.class.send(:metadata)
20
15
  end
21
16
 
22
17
  module ClassMethods
@@ -28,17 +23,10 @@ module Wombat
28
23
  metadata.for_each(selector).instance_eval(&block) if block
29
24
  end
30
25
 
31
- def format type
32
- metadata.document_format = type
33
- end
34
-
35
26
  def follow_links selector
36
27
 
37
28
  end
38
29
 
39
- def supported_cities
40
- end
41
-
42
30
  def to_ary
43
31
  end
44
32
 
@@ -4,10 +4,8 @@ require 'wombat/iterator'
4
4
 
5
5
  module Wombat
6
6
  class Metadata < PropertyContainer
7
- attr_accessor :document_format
8
-
9
7
  def initialize
10
- @document_format = :html
8
+ self[:format] = :html
11
9
  super
12
10
  end
13
11
 
@@ -18,5 +16,9 @@ module Wombat
18
16
  def list_page url
19
17
  self[:list_page] = url
20
18
  end
19
+
20
+ def format format
21
+ self[:format] = format
22
+ end
21
23
  end
22
24
  end
data/lib/wombat/parser.rb CHANGED
@@ -22,7 +22,10 @@ module Wombat
22
22
  it.all_properties.each do |p|
23
23
  p.result ||= []
24
24
  result = locate(p)
25
- p.result << result if result
25
+ if result
26
+ result = p.callback ? p.callback.call(result) : result
27
+ p.result << result
28
+ end
26
29
  end
27
30
  end
28
31
  end
@@ -41,7 +44,7 @@ module Wombat
41
44
  def get_parser metadata
42
45
  url = "#{metadata[:base_url]}#{metadata[:list_page]}"
43
46
 
44
- if metadata.document_format == :html
47
+ if metadata[:format] == :html
45
48
  @mechanize.get(url).parser
46
49
  else
47
50
  Nokogiri::XML RestClient.get(url)
data/spec/crawler_spec.rb CHANGED
@@ -114,7 +114,7 @@ describe Wombat::Crawler do
114
114
 
115
115
  it 'should assign metadata forma' do
116
116
  @crawler_instance.should_receive(:parse) do |arg|
117
- arg.document_format.should == :xml
117
+ arg[:format].should == :xml
118
118
  end
119
119
  @crawler.format :xml
120
120
  @crawler_instance.crawl
@@ -44,7 +44,7 @@ describe 'basic crawler setup' do
44
44
 
45
45
  crawler.for_each "css=ol.ranked-repositories li" do
46
46
  repo 'css=h3'
47
- description 'css=p.description'
47
+ description('css=p.description') { |d| d.gsub(/for/, '') }
48
48
  end
49
49
 
50
50
  crawler_instance = crawler.new
@@ -52,11 +52,11 @@ describe 'basic crawler setup' do
52
52
 
53
53
  results["repo"].should =~ ["jairajs89 / Touchy.js", "mcavage / node-restify", "notlion / streetview-stereographic", "twitter / bootstrap", "stolksdorf / Parallaxjs"]
54
54
  results["description"].should =~ [
55
- "node.js REST framework specifically meant for web service APIs",
56
- "A simple light-weight JavaScript library for dealing with touch events",
55
+ "node.js REST framework specifically meant web service APIs",
56
+ "A simple light-weight JavaScript library dealing with touch events",
57
57
  "Shader Toy + Google Map + Panoramic Explorer",
58
58
  "HTML, CSS, and JS toolkit from Twitter",
59
- "a Library for Javascript that allows easy page parallaxing"
59
+ "a Library Javascript that allows easy page parallaxing"
60
60
  ]
61
61
  end
62
62
  end
data/spec/parser_spec.rb CHANGED
@@ -72,6 +72,10 @@ describe Wombat::Parser do
72
72
  block_called.should be_true
73
73
  end
74
74
 
75
+ it 'should invoke callback inside for_each block' do
76
+
77
+ end
78
+
75
79
  it 'should return hash with requested properties' do
76
80
  hash = double :results
77
81
  fake_parser = double :parser
@@ -140,7 +144,7 @@ describe Wombat::Parser do
140
144
  it 'should correctly parse xml documents' do
141
145
  fake_document = double :xml
142
146
  fake_parser = double :parser
143
- @metadata.document_format = :xml
147
+ @metadata.format :xml
144
148
  @parser.mechanize.should_not_receive(:get)
145
149
  RestClient.should_receive(:get).and_return fake_document
146
150
  Nokogiri.should_receive(:XML).with(fake_document).and_return fake_parser
data/wombat.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "wombat"
8
- s.version = "0.2.4"
8
+ s.version = "0.2.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Felipe Lima"]
12
- s.date = "2012-02-15"
12
+ s.date = "2012-03-21"
13
13
  s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
14
14
  s.email = "felipe.lima@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -54,7 +54,8 @@ Gem::Specification.new do |s|
54
54
  s.homepage = "http://github.com/felipecsl/wombat"
55
55
  s.licenses = ["MIT"]
56
56
  s.require_paths = ["lib"]
57
- s.rubygems_version = "1.8.11"
57
+ s.required_ruby_version = Gem::Requirement.new(">= 1.9")
58
+ s.rubygems_version = "1.8.18"
58
59
  s.summary = "Ruby DSL to crawl web pages"
59
60
 
60
61
  if s.respond_to? :specification_version then
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-15 00:00:00.000000000 Z
12
+ date: 2012-03-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: &70159522946820 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70159522946820
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: activesupport
27
- requirement: &70159522962680 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ! '>='
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: '0'
33
38
  type: :runtime
34
39
  prerelease: false
35
- version_requirements: *70159522962680
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: rest-client
38
- requirement: &70159522962060 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ! '>='
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: '0'
44
54
  type: :runtime
45
55
  prerelease: false
46
- version_requirements: *70159522962060
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: bundler
49
- requirement: &70159522961340 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ! '>='
@@ -54,10 +69,15 @@ dependencies:
54
69
  version: '0'
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *70159522961340
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
58
78
  - !ruby/object:Gem::Dependency
59
79
  name: rake
60
- requirement: &70159522960620 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
61
81
  none: false
62
82
  requirements:
63
83
  - - ! '>='
@@ -65,10 +85,15 @@ dependencies:
65
85
  version: '0'
66
86
  type: :development
67
87
  prerelease: false
68
- version_requirements: *70159522960620
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
69
94
  - !ruby/object:Gem::Dependency
70
95
  name: yard
71
- requirement: &70159522960000 !ruby/object:Gem::Requirement
96
+ requirement: !ruby/object:Gem::Requirement
72
97
  none: false
73
98
  requirements:
74
99
  - - ! '>='
@@ -76,10 +101,15 @@ dependencies:
76
101
  version: '0'
77
102
  type: :development
78
103
  prerelease: false
79
- version_requirements: *70159522960000
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
80
110
  - !ruby/object:Gem::Dependency
81
111
  name: jeweler
82
- requirement: &70159522959520 !ruby/object:Gem::Requirement
112
+ requirement: !ruby/object:Gem::Requirement
83
113
  none: false
84
114
  requirements:
85
115
  - - ! '>='
@@ -87,10 +117,15 @@ dependencies:
87
117
  version: '0'
88
118
  type: :development
89
119
  prerelease: false
90
- version_requirements: *70159522959520
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
91
126
  - !ruby/object:Gem::Dependency
92
127
  name: rspec
93
- requirement: &70159522959040 !ruby/object:Gem::Requirement
128
+ requirement: !ruby/object:Gem::Requirement
94
129
  none: false
95
130
  requirements:
96
131
  - - ! '>='
@@ -98,21 +133,31 @@ dependencies:
98
133
  version: '0'
99
134
  type: :development
100
135
  prerelease: false
101
- version_requirements: *70159522959040
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
102
142
  - !ruby/object:Gem::Dependency
103
143
  name: vcr
104
- requirement: &70159522958540 !ruby/object:Gem::Requirement
144
+ requirement: !ruby/object:Gem::Requirement
105
145
  none: false
106
146
  requirements:
107
- - - =
147
+ - - '='
108
148
  - !ruby/object:Gem::Version
109
149
  version: 2.0.0.rc1
110
150
  type: :development
111
151
  prerelease: false
112
- version_requirements: *70159522958540
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - '='
156
+ - !ruby/object:Gem::Version
157
+ version: 2.0.0.rc1
113
158
  - !ruby/object:Gem::Dependency
114
159
  name: fakeweb
115
- requirement: &70159522958060 !ruby/object:Gem::Requirement
160
+ requirement: !ruby/object:Gem::Requirement
116
161
  none: false
117
162
  requirements:
118
163
  - - ! '>='
@@ -120,7 +165,12 @@ dependencies:
120
165
  version: '0'
121
166
  type: :development
122
167
  prerelease: false
123
- version_requirements: *70159522958060
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
124
174
  description: Generic Web crawler with a DSL that parses structured data from web pages
125
175
  email: felipe.lima@gmail.com
126
176
  executables: []
@@ -174,7 +224,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
174
224
  requirements:
175
225
  - - ! '>='
176
226
  - !ruby/object:Gem::Version
177
- version: '0'
227
+ version: '1.9'
178
228
  required_rubygems_version: !ruby/object:Gem::Requirement
179
229
  none: false
180
230
  requirements:
@@ -183,7 +233,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
233
  version: '0'
184
234
  requirements: []
185
235
  rubyforge_project:
186
- rubygems_version: 1.8.11
236
+ rubygems_version: 1.8.18
187
237
  signing_key:
188
238
  specification_version: 3
189
239
  summary: Ruby DSL to crawl web pages