spidr 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog.md +14 -0
- data/README.md +1 -1
- data/Rakefile +6 -4
- data/lib/spidr/agent.rb +3 -2
- data/lib/spidr/filters.rb +110 -9
- data/lib/spidr/page.rb +49 -30
- data/lib/spidr/version.rb +1 -1
- data/spidr.gemspec +77 -76
- metadata +21 -21
data/ChangeLog.md
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
### 0.2.4 / 2010-05-05
|
2
|
+
|
3
|
+
* Added {Spidr::Filters#visit_urls}.
|
4
|
+
* Added {Spidr::Filters#visit_urls_like}.
|
5
|
+
* Added {Spidr::Filters#ignore_urls}.
|
6
|
+
* Added {Spidr::Filters#ignore_urls_like}.
|
7
|
+
* Added {Spidr::Page#is_content_type?}.
|
8
|
+
* Default {Spidr::Page#body} to an empty String.
|
9
|
+
* Default {Spidr::Page#content_type} to an empty String.
|
10
|
+
* Default {Spidr::Page#content_types} to an empty Array.
|
11
|
+
* Improved reliability of {Spidr::Page#is_redirect?}.
|
12
|
+
* Improved content type detection in {Spidr::Page} to handle `Content-Type`
|
13
|
+
headers containing charsets (thanks Josh Lindsey).
|
14
|
+
|
1
15
|
### 0.2.3 / 2010-02-27
|
2
16
|
|
3
17
|
* Migrated to Jeweler, for the packaging and releasing RubyGems.
|
data/README.md
CHANGED
data/Rakefile
CHANGED
@@ -7,17 +7,19 @@ begin
|
|
7
7
|
Jeweler::Tasks.new do |gem|
|
8
8
|
gem.name = 'spidr'
|
9
9
|
gem.version = Spidr::VERSION
|
10
|
+
gem.license = 'MIT'
|
10
11
|
gem.summary = %Q{A versatile Ruby web spidering library}
|
11
12
|
gem.description = %Q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
|
12
13
|
gem.email = 'postmodern.mod3@gmail.com'
|
13
14
|
gem.homepage = 'http://github.com/postmodern/spidr'
|
14
15
|
gem.authors = ['Postmodern']
|
15
|
-
gem.add_dependency 'nokogiri', '>= 1.
|
16
|
-
gem.add_development_dependency 'rspec', '
|
17
|
-
gem.add_development_dependency 'yard', '
|
18
|
-
gem.add_development_dependency 'wsoc', '
|
16
|
+
gem.add_dependency 'nokogiri', '>= 1.3.0'
|
17
|
+
gem.add_development_dependency 'rspec', '~> 1.3.0'
|
18
|
+
gem.add_development_dependency 'yard', '~> 0.5.3'
|
19
|
+
gem.add_development_dependency 'wsoc', '~> 0.1.1'
|
19
20
|
gem.has_rdoc = 'yard'
|
20
21
|
end
|
22
|
+
Jeweler::GemcutterTasks.new
|
21
23
|
rescue LoadError
|
22
24
|
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
23
25
|
end
|
data/lib/spidr/agent.rb
CHANGED
@@ -702,12 +702,13 @@ module Spidr
|
|
702
702
|
# Specifies whether the given URL should be visited.
|
703
703
|
#
|
704
704
|
def visit?(url)
|
705
|
-
|
705
|
+
!(visited?(url)) &&
|
706
706
|
visit_scheme?(url.scheme) &&
|
707
707
|
visit_host?(url.host) &&
|
708
708
|
visit_port?(url.port) &&
|
709
709
|
visit_link?(url.to_s) &&
|
710
|
-
|
710
|
+
visit_url?(url) &&
|
711
|
+
visit_ext?(url.path)
|
711
712
|
end
|
712
713
|
|
713
714
|
#
|
data/lib/spidr/filters.rb
CHANGED
@@ -44,6 +44,12 @@ module Spidr
|
|
44
44
|
# @option options [Array<String, Regexp, Proc>] :ignore_links
|
45
45
|
# The patterns which match the links to not visit.
|
46
46
|
#
|
47
|
+
# @option options [Array<String, Regexp, Proc>] :urls
|
48
|
+
# The patterns which match the URLs to visit.
|
49
|
+
#
|
50
|
+
# @option options [Array<String, Regexp, Proc>] :ignore_urls
|
51
|
+
# The patterns which match the URLs to not visit.
|
52
|
+
#
|
47
53
|
# @option options [Array<String, Regexp, Proc>] :exts
|
48
54
|
# The patterns which match the URI path extensions to visit.
|
49
55
|
#
|
@@ -83,6 +89,10 @@ module Spidr
|
|
83
89
|
:accept => options[:links],
|
84
90
|
:reject => options[:ignore_links]
|
85
91
|
)
|
92
|
+
@url_rules = Rules.new(
|
93
|
+
:accept => options[:urls],
|
94
|
+
:reject => options[:ignore_urls]
|
95
|
+
)
|
86
96
|
@ext_rules = Rules.new(
|
87
97
|
:accept => options[:exts],
|
88
98
|
:reject => options[:ignore_exts]
|
@@ -125,7 +135,7 @@ module Spidr
|
|
125
135
|
end
|
126
136
|
|
127
137
|
#
|
128
|
-
# Adds a given pattern to the visit_hosts.
|
138
|
+
# Adds a given pattern to the {#visit_hosts}.
|
129
139
|
#
|
130
140
|
# @param [String, Regexp] pattern
|
131
141
|
# The pattern to match host-names with.
|
@@ -157,7 +167,7 @@ module Spidr
|
|
157
167
|
end
|
158
168
|
|
159
169
|
#
|
160
|
-
# Adds a given pattern to the ignore_hosts.
|
170
|
+
# Adds a given pattern to the {#ignore_hosts}.
|
161
171
|
#
|
162
172
|
# @param [String, Regexp] pattern
|
163
173
|
# The pattern to match host-names with.
|
@@ -189,7 +199,7 @@ module Spidr
|
|
189
199
|
end
|
190
200
|
|
191
201
|
#
|
192
|
-
# Adds a given pattern to the visit_ports.
|
202
|
+
# Adds a given pattern to the {#visit_ports}.
|
193
203
|
#
|
194
204
|
# @param [Integer, Regexp] pattern
|
195
205
|
# The pattern to match ports with.
|
@@ -221,7 +231,7 @@ module Spidr
|
|
221
231
|
end
|
222
232
|
|
223
233
|
#
|
224
|
-
# Adds a given pattern to the ignore_ports.
|
234
|
+
# Adds a given pattern to the {#ignore_ports}.
|
225
235
|
#
|
226
236
|
# @param [Integer, Regexp] pattern
|
227
237
|
# The pattern to match ports with.
|
@@ -248,21 +258,25 @@ module Spidr
|
|
248
258
|
# @return [Array<String, Regexp, Proc>]
|
249
259
|
# The link patterns to visit.
|
250
260
|
#
|
261
|
+
# @since 0.2.4
|
262
|
+
#
|
251
263
|
def visit_links
|
252
264
|
@link_rules.accept
|
253
265
|
end
|
254
266
|
|
255
267
|
#
|
256
|
-
# Adds a given pattern to the visit_links
|
268
|
+
# Adds a given pattern to the {#visit_links}
|
257
269
|
#
|
258
270
|
# @param [String, Regexp] pattern
|
259
|
-
# The pattern to match
|
271
|
+
# The pattern to match link with.
|
260
272
|
#
|
261
273
|
# @yield [link]
|
262
274
|
# If a block is given, it will be used to filter links.
|
263
275
|
#
|
264
276
|
# @yieldparam [String] link
|
265
277
|
# A link to accept or reject.
|
278
|
+
#
|
279
|
+
# @since 0.2.4
|
266
280
|
#
|
267
281
|
def visit_links_like(pattern=nil,&block)
|
268
282
|
if pattern
|
@@ -285,7 +299,7 @@ module Spidr
|
|
285
299
|
end
|
286
300
|
|
287
301
|
#
|
288
|
-
# Adds a given pattern to the ignore_links.
|
302
|
+
# Adds a given pattern to the {#ignore_links}.
|
289
303
|
#
|
290
304
|
# @param [String, Regexp] pattern
|
291
305
|
# The pattern to match links with.
|
@@ -306,6 +320,78 @@ module Spidr
|
|
306
320
|
return self
|
307
321
|
end
|
308
322
|
|
323
|
+
#
|
324
|
+
# Specifies the patterns that match the URLs to visit.
|
325
|
+
#
|
326
|
+
# @return [Array<String, Regexp, Proc>]
|
327
|
+
# The link patterns to visit.
|
328
|
+
#
|
329
|
+
# @since 0.2.4
|
330
|
+
#
|
331
|
+
def visit_urls
|
332
|
+
@url_rules.accept
|
333
|
+
end
|
334
|
+
|
335
|
+
#
|
336
|
+
# Adds a given pattern to the {#visit_urls}
|
337
|
+
#
|
338
|
+
# @param [String, Regexp] pattern
|
339
|
+
# The pattern to match URLs with.
|
340
|
+
#
|
341
|
+
# @yield [url]
|
342
|
+
# If a block is given, it will be used to filter URLs.
|
343
|
+
#
|
344
|
+
# @yieldparam [URI::HTTP, URI::HTTPS] url
|
345
|
+
# A URL to accept or reject.
|
346
|
+
#
|
347
|
+
# @since 0.2.4
|
348
|
+
#
|
349
|
+
def visit_urls_like(pattern=nil,&block)
|
350
|
+
if pattern
|
351
|
+
visit_urls << pattern
|
352
|
+
elsif block
|
353
|
+
visit_urls << block
|
354
|
+
end
|
355
|
+
|
356
|
+
return self
|
357
|
+
end
|
358
|
+
|
359
|
+
#
|
360
|
+
# Specifies the patterns that match URLs to not visit.
|
361
|
+
#
|
362
|
+
# @return [Array<String, Regexp, Proc>]
|
363
|
+
# The URL patterns to not visit.
|
364
|
+
#
|
365
|
+
# @since 0.2.4
|
366
|
+
#
|
367
|
+
def ignore_urls
|
368
|
+
@url_rules.reject
|
369
|
+
end
|
370
|
+
|
371
|
+
#
|
372
|
+
# Adds a given pattern to the {#ignore_urls}.
|
373
|
+
#
|
374
|
+
# @param [String, Regexp] pattern
|
375
|
+
# The pattern to match URLs with.
|
376
|
+
#
|
377
|
+
# @yield [url]
|
378
|
+
# If a block is given, it will be used to filter URLs.
|
379
|
+
#
|
380
|
+
# @yieldparam [URI::HTTP, URI::HTTPS] url
|
381
|
+
# A URL to reject or accept.
|
382
|
+
#
|
383
|
+
# @since 0.2.4
|
384
|
+
#
|
385
|
+
def ignore_urls_like(pattern=nil,&block)
|
386
|
+
if pattern
|
387
|
+
ignore_urls << pattern
|
388
|
+
elsif block
|
389
|
+
ignore_urls << block
|
390
|
+
end
|
391
|
+
|
392
|
+
return self
|
393
|
+
end
|
394
|
+
|
309
395
|
#
|
310
396
|
# Specifies the patterns that match the URI path extensions to visit.
|
311
397
|
#
|
@@ -317,7 +403,7 @@ module Spidr
|
|
317
403
|
end
|
318
404
|
|
319
405
|
#
|
320
|
-
# Adds a given pattern to the visit_exts.
|
406
|
+
# Adds a given pattern to the {#visit_exts}.
|
321
407
|
#
|
322
408
|
# @param [String, Regexp] pattern
|
323
409
|
# The pattern to match URI path extensions with.
|
@@ -349,7 +435,7 @@ module Spidr
|
|
349
435
|
end
|
350
436
|
|
351
437
|
#
|
352
|
-
# Adds a given pattern to the ignore_exts.
|
438
|
+
# Adds a given pattern to the {#ignore_exts}.
|
353
439
|
#
|
354
440
|
# @param [String, Regexp] pattern
|
355
441
|
# The pattern to match URI path extensions with.
|
@@ -428,6 +514,21 @@ module Spidr
|
|
428
514
|
@link_rules.accept?(link)
|
429
515
|
end
|
430
516
|
|
517
|
+
#
|
518
|
+
# Determines if a given URL should be visited.
|
519
|
+
#
|
520
|
+
# @param [URI::HTTP, URI::HTTPS] url
|
521
|
+
# The URL.
|
522
|
+
#
|
523
|
+
# @return [Boolean]
|
524
|
+
# Specifies whether the given URL should be visited.
|
525
|
+
#
|
526
|
+
# @since 0.2.4
|
527
|
+
#
|
528
|
+
def visit_url?(link)
|
529
|
+
@url_rules.accept?(link)
|
530
|
+
end
|
531
|
+
|
431
532
|
#
|
432
533
|
# Determines if a given URI path extension should be visited.
|
433
534
|
#
|
data/lib/spidr/page.rb
CHANGED
@@ -61,13 +61,19 @@ module Spidr
|
|
61
61
|
alias ok? is_ok?
|
62
62
|
|
63
63
|
#
|
64
|
-
# Determines if the response code is `301`
|
64
|
+
# Determines if the response code is `300`, `301`, `302`, `303`
|
65
|
+
# or `307`.
|
65
66
|
#
|
66
67
|
# @return [Boolean]
|
67
|
-
# Specifies whether the response code is
|
68
|
+
# Specifies whether the response code is a HTTP Redirect code.
|
68
69
|
#
|
69
70
|
def is_redirect?
|
70
|
-
|
71
|
+
case code
|
72
|
+
when 300..303, 307
|
73
|
+
true
|
74
|
+
else
|
75
|
+
false
|
76
|
+
end
|
71
77
|
end
|
72
78
|
|
73
79
|
alias redirect? is_redirect?
|
@@ -145,7 +151,7 @@ module Spidr
|
|
145
151
|
# The Content-Type of the page.
|
146
152
|
#
|
147
153
|
def content_type
|
148
|
-
@response['Content-Type']
|
154
|
+
(@response['Content-Type'] || '')
|
149
155
|
end
|
150
156
|
|
151
157
|
#
|
@@ -157,7 +163,7 @@ module Spidr
|
|
157
163
|
# @since 0.2.2
|
158
164
|
#
|
159
165
|
def content_types
|
160
|
-
@headers['content-type']
|
166
|
+
(@headers['content-type'] || [])
|
161
167
|
end
|
162
168
|
|
163
169
|
#
|
@@ -167,7 +173,7 @@ module Spidr
|
|
167
173
|
# Specifies whether the page is plain-text.
|
168
174
|
#
|
169
175
|
def plain_text?
|
170
|
-
|
176
|
+
is_content_type?('text/plain')
|
171
177
|
end
|
172
178
|
|
173
179
|
alias txt? plain_text?
|
@@ -179,7 +185,7 @@ module Spidr
|
|
179
185
|
# Specifies whether the page is HTML document.
|
180
186
|
#
|
181
187
|
def html?
|
182
|
-
|
188
|
+
is_content_type?('text/html')
|
183
189
|
end
|
184
190
|
|
185
191
|
#
|
@@ -189,7 +195,7 @@ module Spidr
|
|
189
195
|
# Specifies whether the page is XML document.
|
190
196
|
#
|
191
197
|
def xml?
|
192
|
-
|
198
|
+
is_content_type?('text/xml')
|
193
199
|
end
|
194
200
|
|
195
201
|
#
|
@@ -199,7 +205,7 @@ module Spidr
|
|
199
205
|
# Specifies whether the page is XML Stylesheet (XSL).
|
200
206
|
#
|
201
207
|
def xsl?
|
202
|
-
|
208
|
+
is_content_type?('text/xsl')
|
203
209
|
end
|
204
210
|
|
205
211
|
#
|
@@ -209,8 +215,8 @@ module Spidr
|
|
209
215
|
# Specifies whether the page is JavaScript.
|
210
216
|
#
|
211
217
|
def javascript?
|
212
|
-
|
213
|
-
|
218
|
+
is_content_type?('text/javascript') || \
|
219
|
+
is_content_type?('application/javascript')
|
214
220
|
end
|
215
221
|
|
216
222
|
#
|
@@ -220,7 +226,7 @@ module Spidr
|
|
220
226
|
# Specifies whether the page is a CSS stylesheet.
|
221
227
|
#
|
222
228
|
def css?
|
223
|
-
|
229
|
+
is_content_type?('text/css')
|
224
230
|
end
|
225
231
|
|
226
232
|
#
|
@@ -230,8 +236,8 @@ module Spidr
|
|
230
236
|
# Specifies whether the page is a RSS feed.
|
231
237
|
#
|
232
238
|
def rss?
|
233
|
-
|
234
|
-
|
239
|
+
is_content_type?('application/rss+xml') || \
|
240
|
+
is_content_type?('application/rdf+xml')
|
235
241
|
end
|
236
242
|
|
237
243
|
#
|
@@ -241,7 +247,7 @@ module Spidr
|
|
241
247
|
# Specifies whether the page is an Atom feed.
|
242
248
|
#
|
243
249
|
def atom?
|
244
|
-
|
250
|
+
is_content_type?('application/atom+xml')
|
245
251
|
end
|
246
252
|
|
247
253
|
#
|
@@ -251,7 +257,7 @@ module Spidr
|
|
251
257
|
# Specifies whether the page is a MS Word document.
|
252
258
|
#
|
253
259
|
def ms_word?
|
254
|
-
|
260
|
+
is_content_type?('application/msword')
|
255
261
|
end
|
256
262
|
|
257
263
|
#
|
@@ -261,7 +267,7 @@ module Spidr
|
|
261
267
|
# Specifies whether the page is a PDF document.
|
262
268
|
#
|
263
269
|
def pdf?
|
264
|
-
|
270
|
+
is_content_type?('application/pdf')
|
265
271
|
end
|
266
272
|
|
267
273
|
#
|
@@ -271,7 +277,7 @@ module Spidr
|
|
271
277
|
# Specifies whether the page is a ZIP archive.
|
272
278
|
#
|
273
279
|
def zip?
|
274
|
-
|
280
|
+
is_content_type?('application/zip')
|
275
281
|
end
|
276
282
|
|
277
283
|
#
|
@@ -329,7 +335,7 @@ module Spidr
|
|
329
335
|
# The body of the response.
|
330
336
|
#
|
331
337
|
def body
|
332
|
-
@response.body
|
338
|
+
(@response.body || '')
|
333
339
|
end
|
334
340
|
|
335
341
|
#
|
@@ -344,7 +350,7 @@ module Spidr
|
|
344
350
|
# @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html
|
345
351
|
#
|
346
352
|
def doc
|
347
|
-
return nil if
|
353
|
+
return nil if body.empty?
|
348
354
|
|
349
355
|
begin
|
350
356
|
if html?
|
@@ -375,10 +381,10 @@ module Spidr
|
|
375
381
|
#
|
376
382
|
def search(*paths)
|
377
383
|
if doc
|
378
|
-
|
384
|
+
doc.search(*paths)
|
385
|
+
else
|
386
|
+
[]
|
379
387
|
end
|
380
|
-
|
381
|
-
return []
|
382
388
|
end
|
383
389
|
|
384
390
|
#
|
@@ -395,10 +401,8 @@ module Spidr
|
|
395
401
|
#
|
396
402
|
def at(*arguments)
|
397
403
|
if doc
|
398
|
-
|
404
|
+
doc.at(*arguments)
|
399
405
|
end
|
400
|
-
|
401
|
-
return nil
|
402
406
|
end
|
403
407
|
|
404
408
|
alias / search
|
@@ -412,7 +416,7 @@ module Spidr
|
|
412
416
|
#
|
413
417
|
def title
|
414
418
|
if (node = at('//title'))
|
415
|
-
|
419
|
+
node.inner_text
|
416
420
|
end
|
417
421
|
end
|
418
422
|
|
@@ -430,8 +434,7 @@ module Spidr
|
|
430
434
|
urls << url unless (url.nil? || url.empty?)
|
431
435
|
}
|
432
436
|
|
433
|
-
|
434
|
-
when 300..303, 307
|
437
|
+
if self.is_redirect?
|
435
438
|
location = @headers['location']
|
436
439
|
|
437
440
|
if location.kind_of?(Array)
|
@@ -506,6 +509,22 @@ module Spidr
|
|
506
509
|
|
507
510
|
protected
|
508
511
|
|
512
|
+
#
|
513
|
+
# Determines if any of the content-types of the page include a given
|
514
|
+
# type.
|
515
|
+
#
|
516
|
+
# @param [String] type
|
517
|
+
# The content-type to test for.
|
518
|
+
#
|
519
|
+
# @return [Boolean]
|
520
|
+
# Specifies whether the page includes the given content-type.
|
521
|
+
#
|
522
|
+
# @since 0.2.4
|
523
|
+
#
|
524
|
+
def is_content_type?(type)
|
525
|
+
content_types.any? { |content| content.include?(type) }
|
526
|
+
end
|
527
|
+
|
509
528
|
#
|
510
529
|
# Provides transparent access to the values in `headers`.
|
511
530
|
#
|
@@ -518,6 +537,6 @@ module Spidr
|
|
518
537
|
|
519
538
|
return super(sym,*args,&block)
|
520
539
|
end
|
521
|
-
|
540
|
+
|
522
541
|
end
|
523
542
|
end
|
data/lib/spidr/version.rb
CHANGED
data/spidr.gemspec
CHANGED
@@ -5,89 +5,90 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{spidr}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Postmodern"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-05-05}
|
13
13
|
s.description = %q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
|
14
14
|
s.email = %q{postmodern.mod3@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"ChangeLog.md",
|
17
|
-
|
18
|
-
|
17
|
+
"LICENSE.txt",
|
18
|
+
"README.md"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
21
|
".gitignore",
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
22
|
+
".specopts",
|
23
|
+
".yardopts",
|
24
|
+
"ChangeLog.md",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.md",
|
27
|
+
"Rakefile",
|
28
|
+
"lib/spidr.rb",
|
29
|
+
"lib/spidr/actions.rb",
|
30
|
+
"lib/spidr/actions/actions.rb",
|
31
|
+
"lib/spidr/actions/exceptions.rb",
|
32
|
+
"lib/spidr/actions/exceptions/action.rb",
|
33
|
+
"lib/spidr/actions/exceptions/paused.rb",
|
34
|
+
"lib/spidr/actions/exceptions/skip_link.rb",
|
35
|
+
"lib/spidr/actions/exceptions/skip_page.rb",
|
36
|
+
"lib/spidr/agent.rb",
|
37
|
+
"lib/spidr/auth_credential.rb",
|
38
|
+
"lib/spidr/auth_store.rb",
|
39
|
+
"lib/spidr/cookie_jar.rb",
|
40
|
+
"lib/spidr/events.rb",
|
41
|
+
"lib/spidr/extensions.rb",
|
42
|
+
"lib/spidr/extensions/uri.rb",
|
43
|
+
"lib/spidr/filters.rb",
|
44
|
+
"lib/spidr/page.rb",
|
45
|
+
"lib/spidr/rules.rb",
|
46
|
+
"lib/spidr/sanitizers.rb",
|
47
|
+
"lib/spidr/session_cache.rb",
|
48
|
+
"lib/spidr/spidr.rb",
|
49
|
+
"lib/spidr/version.rb",
|
50
|
+
"spec/actions_spec.rb",
|
51
|
+
"spec/agent_spec.rb",
|
52
|
+
"spec/auth_store_spec.rb",
|
53
|
+
"spec/cookie_jar_spec.rb",
|
54
|
+
"spec/extensions/uri_spec.rb",
|
55
|
+
"spec/filters_spec.rb",
|
56
|
+
"spec/helpers/history.rb",
|
57
|
+
"spec/helpers/page.rb",
|
58
|
+
"spec/helpers/wsoc.rb",
|
59
|
+
"spec/page_examples.rb",
|
60
|
+
"spec/page_spec.rb",
|
61
|
+
"spec/rules_spec.rb",
|
62
|
+
"spec/sanitizers_spec.rb",
|
63
|
+
"spec/session_cache.rb",
|
64
|
+
"spec/spec_helper.rb",
|
65
|
+
"spec/spidr_spec.rb",
|
66
|
+
"spidr.gemspec"
|
67
67
|
]
|
68
68
|
s.has_rdoc = %q{yard}
|
69
69
|
s.homepage = %q{http://github.com/postmodern/spidr}
|
70
|
+
s.licenses = ["MIT"]
|
70
71
|
s.rdoc_options = ["--charset=UTF-8"]
|
71
72
|
s.require_paths = ["lib"]
|
72
73
|
s.rubygems_version = %q{1.3.6}
|
73
74
|
s.summary = %q{A versatile Ruby web spidering library}
|
74
75
|
s.test_files = [
|
75
|
-
"spec/agent_spec.rb",
|
76
|
-
"spec/helpers/history.rb",
|
77
|
-
"spec/helpers/wsoc.rb",
|
78
|
-
"spec/helpers/page.rb",
|
79
|
-
"spec/spec_helper.rb",
|
80
|
-
"spec/extensions/uri_spec.rb",
|
81
|
-
"spec/page_spec.rb",
|
82
|
-
"spec/spidr_spec.rb",
|
83
|
-
"spec/sanitizers_spec.rb",
|
84
|
-
"spec/page_examples.rb",
|
85
|
-
"spec/filters_spec.rb",
|
86
|
-
"spec/actions_spec.rb",
|
87
|
-
"spec/rules_spec.rb",
|
88
76
|
"spec/auth_store_spec.rb",
|
89
|
-
|
90
|
-
|
77
|
+
"spec/rules_spec.rb",
|
78
|
+
"spec/session_cache.rb",
|
79
|
+
"spec/spec_helper.rb",
|
80
|
+
"spec/sanitizers_spec.rb",
|
81
|
+
"spec/filters_spec.rb",
|
82
|
+
"spec/page_spec.rb",
|
83
|
+
"spec/spidr_spec.rb",
|
84
|
+
"spec/agent_spec.rb",
|
85
|
+
"spec/cookie_jar_spec.rb",
|
86
|
+
"spec/extensions/uri_spec.rb",
|
87
|
+
"spec/helpers/history.rb",
|
88
|
+
"spec/helpers/page.rb",
|
89
|
+
"spec/helpers/wsoc.rb",
|
90
|
+
"spec/page_examples.rb",
|
91
|
+
"spec/actions_spec.rb"
|
91
92
|
]
|
92
93
|
|
93
94
|
if s.respond_to? :specification_version then
|
@@ -95,21 +96,21 @@ Gem::Specification.new do |s|
|
|
95
96
|
s.specification_version = 3
|
96
97
|
|
97
98
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
98
|
-
s.add_runtime_dependency(%q<nokogiri>, [">= 1.
|
99
|
-
s.add_development_dependency(%q<rspec>, ["
|
100
|
-
s.add_development_dependency(%q<yard>, ["
|
101
|
-
s.add_development_dependency(%q<wsoc>, ["
|
99
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.0"])
|
100
|
+
s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
|
101
|
+
s.add_development_dependency(%q<yard>, ["~> 0.5.3"])
|
102
|
+
s.add_development_dependency(%q<wsoc>, ["~> 0.1.1"])
|
102
103
|
else
|
103
|
-
s.add_dependency(%q<nokogiri>, [">= 1.
|
104
|
-
s.add_dependency(%q<rspec>, ["
|
105
|
-
s.add_dependency(%q<yard>, ["
|
106
|
-
s.add_dependency(%q<wsoc>, ["
|
104
|
+
s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
|
105
|
+
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
106
|
+
s.add_dependency(%q<yard>, ["~> 0.5.3"])
|
107
|
+
s.add_dependency(%q<wsoc>, ["~> 0.1.1"])
|
107
108
|
end
|
108
109
|
else
|
109
|
-
s.add_dependency(%q<nokogiri>, [">= 1.
|
110
|
-
s.add_dependency(%q<rspec>, ["
|
111
|
-
s.add_dependency(%q<yard>, ["
|
112
|
-
s.add_dependency(%q<wsoc>, ["
|
110
|
+
s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
|
111
|
+
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
112
|
+
s.add_dependency(%q<yard>, ["~> 0.5.3"])
|
113
|
+
s.add_dependency(%q<wsoc>, ["~> 0.1.1"])
|
113
114
|
end
|
114
115
|
end
|
115
116
|
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 4
|
9
|
+
version: 0.2.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Postmodern
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-05-05 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -26,9 +26,9 @@ dependencies:
|
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
segments:
|
28
28
|
- 1
|
29
|
-
-
|
29
|
+
- 3
|
30
30
|
- 0
|
31
|
-
version: 1.
|
31
|
+
version: 1.3.0
|
32
32
|
type: :runtime
|
33
33
|
version_requirements: *id001
|
34
34
|
- !ruby/object:Gem::Dependency
|
@@ -36,7 +36,7 @@ dependencies:
|
|
36
36
|
prerelease: false
|
37
37
|
requirement: &id002 !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- -
|
39
|
+
- - ~>
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
segments:
|
42
42
|
- 1
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
prerelease: false
|
51
51
|
requirement: &id003 !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
|
-
- -
|
53
|
+
- - ~>
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
segments:
|
56
56
|
- 0
|
@@ -64,7 +64,7 @@ dependencies:
|
|
64
64
|
prerelease: false
|
65
65
|
requirement: &id004 !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - ~>
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
segments:
|
70
70
|
- 0
|
@@ -132,8 +132,8 @@ files:
|
|
132
132
|
- spidr.gemspec
|
133
133
|
has_rdoc: yard
|
134
134
|
homepage: http://github.com/postmodern/spidr
|
135
|
-
licenses:
|
136
|
-
|
135
|
+
licenses:
|
136
|
+
- MIT
|
137
137
|
post_install_message:
|
138
138
|
rdoc_options:
|
139
139
|
- --charset=UTF-8
|
@@ -161,19 +161,19 @@ signing_key:
|
|
161
161
|
specification_version: 3
|
162
162
|
summary: A versatile Ruby web spidering library
|
163
163
|
test_files:
|
164
|
-
- spec/
|
165
|
-
- spec/
|
166
|
-
- spec/
|
167
|
-
- spec/helpers/page.rb
|
164
|
+
- spec/auth_store_spec.rb
|
165
|
+
- spec/rules_spec.rb
|
166
|
+
- spec/session_cache.rb
|
168
167
|
- spec/spec_helper.rb
|
169
|
-
- spec/
|
168
|
+
- spec/sanitizers_spec.rb
|
169
|
+
- spec/filters_spec.rb
|
170
170
|
- spec/page_spec.rb
|
171
171
|
- spec/spidr_spec.rb
|
172
|
-
- spec/
|
172
|
+
- spec/agent_spec.rb
|
173
|
+
- spec/cookie_jar_spec.rb
|
174
|
+
- spec/extensions/uri_spec.rb
|
175
|
+
- spec/helpers/history.rb
|
176
|
+
- spec/helpers/page.rb
|
177
|
+
- spec/helpers/wsoc.rb
|
173
178
|
- spec/page_examples.rb
|
174
|
-
- spec/filters_spec.rb
|
175
179
|
- spec/actions_spec.rb
|
176
|
-
- spec/rules_spec.rb
|
177
|
-
- spec/auth_store_spec.rb
|
178
|
-
- spec/cookie_jar_spec.rb
|
179
|
-
- spec/session_cache.rb
|