spidr 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog.md +14 -0
- data/README.md +1 -1
- data/Rakefile +6 -4
- data/lib/spidr/agent.rb +3 -2
- data/lib/spidr/filters.rb +110 -9
- data/lib/spidr/page.rb +49 -30
- data/lib/spidr/version.rb +1 -1
- data/spidr.gemspec +77 -76
- metadata +21 -21
data/ChangeLog.md
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
### 0.2.4 / 2010-05-05
|
2
|
+
|
3
|
+
* Added {Spidr::Filters#visit_urls}.
|
4
|
+
* Added {Spidr::Filters#visit_urls_like}.
|
5
|
+
* Added {Spidr::Filters#ignore_urls}.
|
6
|
+
* Added {Spidr::Filters#ignore_urls_like}.
|
7
|
+
* Added {Spidr::Page#is_content_type?}.
|
8
|
+
* Default {Spidr::Page#body} to an empty String.
|
9
|
+
* Default {Spidr::Page#content_type} to an empty String.
|
10
|
+
* Default {Spidr::Page#content_types} to an empty Array.
|
11
|
+
* Improved reliability of {Spidr::Page#is_redirect?}.
|
12
|
+
* Improved content type detection in {Spidr::Page} to handle `Content-Type`
|
13
|
+
headers containing charsets (thanks Josh Lindsey).
|
14
|
+
|
1
15
|
### 0.2.3 / 2010-02-27
|
2
16
|
|
3
17
|
* Migrated to Jeweler, for the packaging and releasing RubyGems.
|
data/README.md
CHANGED
data/Rakefile
CHANGED
@@ -7,17 +7,19 @@ begin
|
|
7
7
|
Jeweler::Tasks.new do |gem|
|
8
8
|
gem.name = 'spidr'
|
9
9
|
gem.version = Spidr::VERSION
|
10
|
+
gem.license = 'MIT'
|
10
11
|
gem.summary = %Q{A versatile Ruby web spidering library}
|
11
12
|
gem.description = %Q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
|
12
13
|
gem.email = 'postmodern.mod3@gmail.com'
|
13
14
|
gem.homepage = 'http://github.com/postmodern/spidr'
|
14
15
|
gem.authors = ['Postmodern']
|
15
|
-
gem.add_dependency 'nokogiri', '>= 1.
|
16
|
-
gem.add_development_dependency 'rspec', '
|
17
|
-
gem.add_development_dependency 'yard', '
|
18
|
-
gem.add_development_dependency 'wsoc', '
|
16
|
+
gem.add_dependency 'nokogiri', '>= 1.3.0'
|
17
|
+
gem.add_development_dependency 'rspec', '~> 1.3.0'
|
18
|
+
gem.add_development_dependency 'yard', '~> 0.5.3'
|
19
|
+
gem.add_development_dependency 'wsoc', '~> 0.1.1'
|
19
20
|
gem.has_rdoc = 'yard'
|
20
21
|
end
|
22
|
+
Jeweler::GemcutterTasks.new
|
21
23
|
rescue LoadError
|
22
24
|
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
23
25
|
end
|
data/lib/spidr/agent.rb
CHANGED
@@ -702,12 +702,13 @@ module Spidr
|
|
702
702
|
# Specifies whether the given URL should be visited.
|
703
703
|
#
|
704
704
|
def visit?(url)
|
705
|
-
|
705
|
+
!(visited?(url)) &&
|
706
706
|
visit_scheme?(url.scheme) &&
|
707
707
|
visit_host?(url.host) &&
|
708
708
|
visit_port?(url.port) &&
|
709
709
|
visit_link?(url.to_s) &&
|
710
|
-
|
710
|
+
visit_url?(url) &&
|
711
|
+
visit_ext?(url.path)
|
711
712
|
end
|
712
713
|
|
713
714
|
#
|
data/lib/spidr/filters.rb
CHANGED
@@ -44,6 +44,12 @@ module Spidr
|
|
44
44
|
# @option options [Array<String, Regexp, Proc>] :ignore_links
|
45
45
|
# The patterns which match the links to not visit.
|
46
46
|
#
|
47
|
+
# @option options [Array<String, Regexp, Proc>] :urls
|
48
|
+
# The patterns which match the URLs to visit.
|
49
|
+
#
|
50
|
+
# @option options [Array<String, Regexp, Proc>] :ignore_urls
|
51
|
+
# The patterns which match the URLs to not visit.
|
52
|
+
#
|
47
53
|
# @option options [Array<String, Regexp, Proc>] :exts
|
48
54
|
# The patterns which match the URI path extensions to visit.
|
49
55
|
#
|
@@ -83,6 +89,10 @@ module Spidr
|
|
83
89
|
:accept => options[:links],
|
84
90
|
:reject => options[:ignore_links]
|
85
91
|
)
|
92
|
+
@url_rules = Rules.new(
|
93
|
+
:accept => options[:urls],
|
94
|
+
:reject => options[:ignore_urls]
|
95
|
+
)
|
86
96
|
@ext_rules = Rules.new(
|
87
97
|
:accept => options[:exts],
|
88
98
|
:reject => options[:ignore_exts]
|
@@ -125,7 +135,7 @@ module Spidr
|
|
125
135
|
end
|
126
136
|
|
127
137
|
#
|
128
|
-
# Adds a given pattern to the visit_hosts.
|
138
|
+
# Adds a given pattern to the {#visit_hosts}.
|
129
139
|
#
|
130
140
|
# @param [String, Regexp] pattern
|
131
141
|
# The pattern to match host-names with.
|
@@ -157,7 +167,7 @@ module Spidr
|
|
157
167
|
end
|
158
168
|
|
159
169
|
#
|
160
|
-
# Adds a given pattern to the ignore_hosts.
|
170
|
+
# Adds a given pattern to the {#ignore_hosts}.
|
161
171
|
#
|
162
172
|
# @param [String, Regexp] pattern
|
163
173
|
# The pattern to match host-names with.
|
@@ -189,7 +199,7 @@ module Spidr
|
|
189
199
|
end
|
190
200
|
|
191
201
|
#
|
192
|
-
# Adds a given pattern to the visit_ports.
|
202
|
+
# Adds a given pattern to the {#visit_ports}.
|
193
203
|
#
|
194
204
|
# @param [Integer, Regexp] pattern
|
195
205
|
# The pattern to match ports with.
|
@@ -221,7 +231,7 @@ module Spidr
|
|
221
231
|
end
|
222
232
|
|
223
233
|
#
|
224
|
-
# Adds a given pattern to the ignore_ports.
|
234
|
+
# Adds a given pattern to the {#ignore_ports}.
|
225
235
|
#
|
226
236
|
# @param [Integer, Regexp] pattern
|
227
237
|
# The pattern to match ports with.
|
@@ -248,21 +258,25 @@ module Spidr
|
|
248
258
|
# @return [Array<String, Regexp, Proc>]
|
249
259
|
# The link patterns to visit.
|
250
260
|
#
|
261
|
+
# @since 0.2.4
|
262
|
+
#
|
251
263
|
def visit_links
|
252
264
|
@link_rules.accept
|
253
265
|
end
|
254
266
|
|
255
267
|
#
|
256
|
-
# Adds a given pattern to the visit_links
|
268
|
+
# Adds a given pattern to the {#visit_links}
|
257
269
|
#
|
258
270
|
# @param [String, Regexp] pattern
|
259
|
-
# The pattern to match
|
271
|
+
# The pattern to match link with.
|
260
272
|
#
|
261
273
|
# @yield [link]
|
262
274
|
# If a block is given, it will be used to filter links.
|
263
275
|
#
|
264
276
|
# @yieldparam [String] link
|
265
277
|
# A link to accept or reject.
|
278
|
+
#
|
279
|
+
# @since 0.2.4
|
266
280
|
#
|
267
281
|
def visit_links_like(pattern=nil,&block)
|
268
282
|
if pattern
|
@@ -285,7 +299,7 @@ module Spidr
|
|
285
299
|
end
|
286
300
|
|
287
301
|
#
|
288
|
-
# Adds a given pattern to the ignore_links.
|
302
|
+
# Adds a given pattern to the {#ignore_links}.
|
289
303
|
#
|
290
304
|
# @param [String, Regexp] pattern
|
291
305
|
# The pattern to match links with.
|
@@ -306,6 +320,78 @@ module Spidr
|
|
306
320
|
return self
|
307
321
|
end
|
308
322
|
|
323
|
+
#
|
324
|
+
# Specifies the patterns that match the URLs to visit.
|
325
|
+
#
|
326
|
+
# @return [Array<String, Regexp, Proc>]
|
327
|
+
# The link patterns to visit.
|
328
|
+
#
|
329
|
+
# @since 0.2.4
|
330
|
+
#
|
331
|
+
def visit_urls
|
332
|
+
@url_rules.accept
|
333
|
+
end
|
334
|
+
|
335
|
+
#
|
336
|
+
# Adds a given pattern to the {#visit_urls}
|
337
|
+
#
|
338
|
+
# @param [String, Regexp] pattern
|
339
|
+
# The pattern to match URLs with.
|
340
|
+
#
|
341
|
+
# @yield [url]
|
342
|
+
# If a block is given, it will be used to filter URLs.
|
343
|
+
#
|
344
|
+
# @yieldparam [URI::HTTP, URI::HTTPS] url
|
345
|
+
# A URL to accept or reject.
|
346
|
+
#
|
347
|
+
# @since 0.2.4
|
348
|
+
#
|
349
|
+
def visit_urls_like(pattern=nil,&block)
|
350
|
+
if pattern
|
351
|
+
visit_urls << pattern
|
352
|
+
elsif block
|
353
|
+
visit_urls << block
|
354
|
+
end
|
355
|
+
|
356
|
+
return self
|
357
|
+
end
|
358
|
+
|
359
|
+
#
|
360
|
+
# Specifies the patterns that match URLs to not visit.
|
361
|
+
#
|
362
|
+
# @return [Array<String, Regexp, Proc>]
|
363
|
+
# The URL patterns to not visit.
|
364
|
+
#
|
365
|
+
# @since 0.2.4
|
366
|
+
#
|
367
|
+
def ignore_urls
|
368
|
+
@url_rules.reject
|
369
|
+
end
|
370
|
+
|
371
|
+
#
|
372
|
+
# Adds a given pattern to the {#ignore_urls}.
|
373
|
+
#
|
374
|
+
# @param [String, Regexp] pattern
|
375
|
+
# The pattern to match URLs with.
|
376
|
+
#
|
377
|
+
# @yield [url]
|
378
|
+
# If a block is given, it will be used to filter URLs.
|
379
|
+
#
|
380
|
+
# @yieldparam [URI::HTTP, URI::HTTPS] url
|
381
|
+
# A URL to reject or accept.
|
382
|
+
#
|
383
|
+
# @since 0.2.4
|
384
|
+
#
|
385
|
+
def ignore_urls_like(pattern=nil,&block)
|
386
|
+
if pattern
|
387
|
+
ignore_urls << pattern
|
388
|
+
elsif block
|
389
|
+
ignore_urls << block
|
390
|
+
end
|
391
|
+
|
392
|
+
return self
|
393
|
+
end
|
394
|
+
|
309
395
|
#
|
310
396
|
# Specifies the patterns that match the URI path extensions to visit.
|
311
397
|
#
|
@@ -317,7 +403,7 @@ module Spidr
|
|
317
403
|
end
|
318
404
|
|
319
405
|
#
|
320
|
-
# Adds a given pattern to the visit_exts.
|
406
|
+
# Adds a given pattern to the {#visit_exts}.
|
321
407
|
#
|
322
408
|
# @param [String, Regexp] pattern
|
323
409
|
# The pattern to match URI path extensions with.
|
@@ -349,7 +435,7 @@ module Spidr
|
|
349
435
|
end
|
350
436
|
|
351
437
|
#
|
352
|
-
# Adds a given pattern to the ignore_exts.
|
438
|
+
# Adds a given pattern to the {#ignore_exts}.
|
353
439
|
#
|
354
440
|
# @param [String, Regexp] pattern
|
355
441
|
# The pattern to match URI path extensions with.
|
@@ -428,6 +514,21 @@ module Spidr
|
|
428
514
|
@link_rules.accept?(link)
|
429
515
|
end
|
430
516
|
|
517
|
+
#
|
518
|
+
# Determines if a given URL should be visited.
|
519
|
+
#
|
520
|
+
# @param [URI::HTTP, URI::HTTPS] url
|
521
|
+
# The URL.
|
522
|
+
#
|
523
|
+
# @return [Boolean]
|
524
|
+
# Specifies whether the given URL should be visited.
|
525
|
+
#
|
526
|
+
# @since 0.2.4
|
527
|
+
#
|
528
|
+
def visit_url?(link)
|
529
|
+
@url_rules.accept?(link)
|
530
|
+
end
|
531
|
+
|
431
532
|
#
|
432
533
|
# Determines if a given URI path extension should be visited.
|
433
534
|
#
|
data/lib/spidr/page.rb
CHANGED
@@ -61,13 +61,19 @@ module Spidr
|
|
61
61
|
alias ok? is_ok?
|
62
62
|
|
63
63
|
#
|
64
|
-
# Determines if the response code is `301`
|
64
|
+
# Determines if the response code is `300`, `301`, `302`, `303`
|
65
|
+
# or `307`.
|
65
66
|
#
|
66
67
|
# @return [Boolean]
|
67
|
-
# Specifies whether the response code is
|
68
|
+
# Specifies whether the response code is a HTTP Redirect code.
|
68
69
|
#
|
69
70
|
def is_redirect?
|
70
|
-
|
71
|
+
case code
|
72
|
+
when 300..303, 307
|
73
|
+
true
|
74
|
+
else
|
75
|
+
false
|
76
|
+
end
|
71
77
|
end
|
72
78
|
|
73
79
|
alias redirect? is_redirect?
|
@@ -145,7 +151,7 @@ module Spidr
|
|
145
151
|
# The Content-Type of the page.
|
146
152
|
#
|
147
153
|
def content_type
|
148
|
-
@response['Content-Type']
|
154
|
+
(@response['Content-Type'] || '')
|
149
155
|
end
|
150
156
|
|
151
157
|
#
|
@@ -157,7 +163,7 @@ module Spidr
|
|
157
163
|
# @since 0.2.2
|
158
164
|
#
|
159
165
|
def content_types
|
160
|
-
@headers['content-type']
|
166
|
+
(@headers['content-type'] || [])
|
161
167
|
end
|
162
168
|
|
163
169
|
#
|
@@ -167,7 +173,7 @@ module Spidr
|
|
167
173
|
# Specifies whether the page is plain-text.
|
168
174
|
#
|
169
175
|
def plain_text?
|
170
|
-
|
176
|
+
is_content_type?('text/plain')
|
171
177
|
end
|
172
178
|
|
173
179
|
alias txt? plain_text?
|
@@ -179,7 +185,7 @@ module Spidr
|
|
179
185
|
# Specifies whether the page is HTML document.
|
180
186
|
#
|
181
187
|
def html?
|
182
|
-
|
188
|
+
is_content_type?('text/html')
|
183
189
|
end
|
184
190
|
|
185
191
|
#
|
@@ -189,7 +195,7 @@ module Spidr
|
|
189
195
|
# Specifies whether the page is XML document.
|
190
196
|
#
|
191
197
|
def xml?
|
192
|
-
|
198
|
+
is_content_type?('text/xml')
|
193
199
|
end
|
194
200
|
|
195
201
|
#
|
@@ -199,7 +205,7 @@ module Spidr
|
|
199
205
|
# Specifies whether the page is XML Stylesheet (XSL).
|
200
206
|
#
|
201
207
|
def xsl?
|
202
|
-
|
208
|
+
is_content_type?('text/xsl')
|
203
209
|
end
|
204
210
|
|
205
211
|
#
|
@@ -209,8 +215,8 @@ module Spidr
|
|
209
215
|
# Specifies whether the page is JavaScript.
|
210
216
|
#
|
211
217
|
def javascript?
|
212
|
-
|
213
|
-
|
218
|
+
is_content_type?('text/javascript') || \
|
219
|
+
is_content_type?('application/javascript')
|
214
220
|
end
|
215
221
|
|
216
222
|
#
|
@@ -220,7 +226,7 @@ module Spidr
|
|
220
226
|
# Specifies whether the page is a CSS stylesheet.
|
221
227
|
#
|
222
228
|
def css?
|
223
|
-
|
229
|
+
is_content_type?('text/css')
|
224
230
|
end
|
225
231
|
|
226
232
|
#
|
@@ -230,8 +236,8 @@ module Spidr
|
|
230
236
|
# Specifies whether the page is a RSS feed.
|
231
237
|
#
|
232
238
|
def rss?
|
233
|
-
|
234
|
-
|
239
|
+
is_content_type?('application/rss+xml') || \
|
240
|
+
is_content_type?('application/rdf+xml')
|
235
241
|
end
|
236
242
|
|
237
243
|
#
|
@@ -241,7 +247,7 @@ module Spidr
|
|
241
247
|
# Specifies whether the page is an Atom feed.
|
242
248
|
#
|
243
249
|
def atom?
|
244
|
-
|
250
|
+
is_content_type?('application/atom+xml')
|
245
251
|
end
|
246
252
|
|
247
253
|
#
|
@@ -251,7 +257,7 @@ module Spidr
|
|
251
257
|
# Specifies whether the page is a MS Word document.
|
252
258
|
#
|
253
259
|
def ms_word?
|
254
|
-
|
260
|
+
is_content_type?('application/msword')
|
255
261
|
end
|
256
262
|
|
257
263
|
#
|
@@ -261,7 +267,7 @@ module Spidr
|
|
261
267
|
# Specifies whether the page is a PDF document.
|
262
268
|
#
|
263
269
|
def pdf?
|
264
|
-
|
270
|
+
is_content_type?('application/pdf')
|
265
271
|
end
|
266
272
|
|
267
273
|
#
|
@@ -271,7 +277,7 @@ module Spidr
|
|
271
277
|
# Specifies whether the page is a ZIP archive.
|
272
278
|
#
|
273
279
|
def zip?
|
274
|
-
|
280
|
+
is_content_type?('application/zip')
|
275
281
|
end
|
276
282
|
|
277
283
|
#
|
@@ -329,7 +335,7 @@ module Spidr
|
|
329
335
|
# The body of the response.
|
330
336
|
#
|
331
337
|
def body
|
332
|
-
@response.body
|
338
|
+
(@response.body || '')
|
333
339
|
end
|
334
340
|
|
335
341
|
#
|
@@ -344,7 +350,7 @@ module Spidr
|
|
344
350
|
# @see http://nokogiri.rubyforge.org/nokogiri/Nokogiri/HTML/Document.html
|
345
351
|
#
|
346
352
|
def doc
|
347
|
-
return nil if
|
353
|
+
return nil if body.empty?
|
348
354
|
|
349
355
|
begin
|
350
356
|
if html?
|
@@ -375,10 +381,10 @@ module Spidr
|
|
375
381
|
#
|
376
382
|
def search(*paths)
|
377
383
|
if doc
|
378
|
-
|
384
|
+
doc.search(*paths)
|
385
|
+
else
|
386
|
+
[]
|
379
387
|
end
|
380
|
-
|
381
|
-
return []
|
382
388
|
end
|
383
389
|
|
384
390
|
#
|
@@ -395,10 +401,8 @@ module Spidr
|
|
395
401
|
#
|
396
402
|
def at(*arguments)
|
397
403
|
if doc
|
398
|
-
|
404
|
+
doc.at(*arguments)
|
399
405
|
end
|
400
|
-
|
401
|
-
return nil
|
402
406
|
end
|
403
407
|
|
404
408
|
alias / search
|
@@ -412,7 +416,7 @@ module Spidr
|
|
412
416
|
#
|
413
417
|
def title
|
414
418
|
if (node = at('//title'))
|
415
|
-
|
419
|
+
node.inner_text
|
416
420
|
end
|
417
421
|
end
|
418
422
|
|
@@ -430,8 +434,7 @@ module Spidr
|
|
430
434
|
urls << url unless (url.nil? || url.empty?)
|
431
435
|
}
|
432
436
|
|
433
|
-
|
434
|
-
when 300..303, 307
|
437
|
+
if self.is_redirect?
|
435
438
|
location = @headers['location']
|
436
439
|
|
437
440
|
if location.kind_of?(Array)
|
@@ -506,6 +509,22 @@ module Spidr
|
|
506
509
|
|
507
510
|
protected
|
508
511
|
|
512
|
+
#
|
513
|
+
# Determines if any of the content-types of the page include a given
|
514
|
+
# type.
|
515
|
+
#
|
516
|
+
# @param [String] type
|
517
|
+
# The content-type to test for.
|
518
|
+
#
|
519
|
+
# @return [Boolean]
|
520
|
+
# Specifies whether the page includes the given content-type.
|
521
|
+
#
|
522
|
+
# @since 0.2.4
|
523
|
+
#
|
524
|
+
def is_content_type?(type)
|
525
|
+
content_types.any? { |content| content.include?(type) }
|
526
|
+
end
|
527
|
+
|
509
528
|
#
|
510
529
|
# Provides transparent access to the values in `headers`.
|
511
530
|
#
|
@@ -518,6 +537,6 @@ module Spidr
|
|
518
537
|
|
519
538
|
return super(sym,*args,&block)
|
520
539
|
end
|
521
|
-
|
540
|
+
|
522
541
|
end
|
523
542
|
end
|
data/lib/spidr/version.rb
CHANGED
data/spidr.gemspec
CHANGED
@@ -5,89 +5,90 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{spidr}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Postmodern"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-05-05}
|
13
13
|
s.description = %q{Spidr is a versatile Ruby web spidering library that can spider a site, multiple domains, certain links or infinitely. Spidr is designed to be fast and easy to use.}
|
14
14
|
s.email = %q{postmodern.mod3@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"ChangeLog.md",
|
17
|
-
|
18
|
-
|
17
|
+
"LICENSE.txt",
|
18
|
+
"README.md"
|
19
19
|
]
|
20
20
|
s.files = [
|
21
21
|
".gitignore",
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
22
|
+
".specopts",
|
23
|
+
".yardopts",
|
24
|
+
"ChangeLog.md",
|
25
|
+
"LICENSE.txt",
|
26
|
+
"README.md",
|
27
|
+
"Rakefile",
|
28
|
+
"lib/spidr.rb",
|
29
|
+
"lib/spidr/actions.rb",
|
30
|
+
"lib/spidr/actions/actions.rb",
|
31
|
+
"lib/spidr/actions/exceptions.rb",
|
32
|
+
"lib/spidr/actions/exceptions/action.rb",
|
33
|
+
"lib/spidr/actions/exceptions/paused.rb",
|
34
|
+
"lib/spidr/actions/exceptions/skip_link.rb",
|
35
|
+
"lib/spidr/actions/exceptions/skip_page.rb",
|
36
|
+
"lib/spidr/agent.rb",
|
37
|
+
"lib/spidr/auth_credential.rb",
|
38
|
+
"lib/spidr/auth_store.rb",
|
39
|
+
"lib/spidr/cookie_jar.rb",
|
40
|
+
"lib/spidr/events.rb",
|
41
|
+
"lib/spidr/extensions.rb",
|
42
|
+
"lib/spidr/extensions/uri.rb",
|
43
|
+
"lib/spidr/filters.rb",
|
44
|
+
"lib/spidr/page.rb",
|
45
|
+
"lib/spidr/rules.rb",
|
46
|
+
"lib/spidr/sanitizers.rb",
|
47
|
+
"lib/spidr/session_cache.rb",
|
48
|
+
"lib/spidr/spidr.rb",
|
49
|
+
"lib/spidr/version.rb",
|
50
|
+
"spec/actions_spec.rb",
|
51
|
+
"spec/agent_spec.rb",
|
52
|
+
"spec/auth_store_spec.rb",
|
53
|
+
"spec/cookie_jar_spec.rb",
|
54
|
+
"spec/extensions/uri_spec.rb",
|
55
|
+
"spec/filters_spec.rb",
|
56
|
+
"spec/helpers/history.rb",
|
57
|
+
"spec/helpers/page.rb",
|
58
|
+
"spec/helpers/wsoc.rb",
|
59
|
+
"spec/page_examples.rb",
|
60
|
+
"spec/page_spec.rb",
|
61
|
+
"spec/rules_spec.rb",
|
62
|
+
"spec/sanitizers_spec.rb",
|
63
|
+
"spec/session_cache.rb",
|
64
|
+
"spec/spec_helper.rb",
|
65
|
+
"spec/spidr_spec.rb",
|
66
|
+
"spidr.gemspec"
|
67
67
|
]
|
68
68
|
s.has_rdoc = %q{yard}
|
69
69
|
s.homepage = %q{http://github.com/postmodern/spidr}
|
70
|
+
s.licenses = ["MIT"]
|
70
71
|
s.rdoc_options = ["--charset=UTF-8"]
|
71
72
|
s.require_paths = ["lib"]
|
72
73
|
s.rubygems_version = %q{1.3.6}
|
73
74
|
s.summary = %q{A versatile Ruby web spidering library}
|
74
75
|
s.test_files = [
|
75
|
-
"spec/agent_spec.rb",
|
76
|
-
"spec/helpers/history.rb",
|
77
|
-
"spec/helpers/wsoc.rb",
|
78
|
-
"spec/helpers/page.rb",
|
79
|
-
"spec/spec_helper.rb",
|
80
|
-
"spec/extensions/uri_spec.rb",
|
81
|
-
"spec/page_spec.rb",
|
82
|
-
"spec/spidr_spec.rb",
|
83
|
-
"spec/sanitizers_spec.rb",
|
84
|
-
"spec/page_examples.rb",
|
85
|
-
"spec/filters_spec.rb",
|
86
|
-
"spec/actions_spec.rb",
|
87
|
-
"spec/rules_spec.rb",
|
88
76
|
"spec/auth_store_spec.rb",
|
89
|
-
|
90
|
-
|
77
|
+
"spec/rules_spec.rb",
|
78
|
+
"spec/session_cache.rb",
|
79
|
+
"spec/spec_helper.rb",
|
80
|
+
"spec/sanitizers_spec.rb",
|
81
|
+
"spec/filters_spec.rb",
|
82
|
+
"spec/page_spec.rb",
|
83
|
+
"spec/spidr_spec.rb",
|
84
|
+
"spec/agent_spec.rb",
|
85
|
+
"spec/cookie_jar_spec.rb",
|
86
|
+
"spec/extensions/uri_spec.rb",
|
87
|
+
"spec/helpers/history.rb",
|
88
|
+
"spec/helpers/page.rb",
|
89
|
+
"spec/helpers/wsoc.rb",
|
90
|
+
"spec/page_examples.rb",
|
91
|
+
"spec/actions_spec.rb"
|
91
92
|
]
|
92
93
|
|
93
94
|
if s.respond_to? :specification_version then
|
@@ -95,21 +96,21 @@ Gem::Specification.new do |s|
|
|
95
96
|
s.specification_version = 3
|
96
97
|
|
97
98
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
98
|
-
s.add_runtime_dependency(%q<nokogiri>, [">= 1.
|
99
|
-
s.add_development_dependency(%q<rspec>, ["
|
100
|
-
s.add_development_dependency(%q<yard>, ["
|
101
|
-
s.add_development_dependency(%q<wsoc>, ["
|
99
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.3.0"])
|
100
|
+
s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
|
101
|
+
s.add_development_dependency(%q<yard>, ["~> 0.5.3"])
|
102
|
+
s.add_development_dependency(%q<wsoc>, ["~> 0.1.1"])
|
102
103
|
else
|
103
|
-
s.add_dependency(%q<nokogiri>, [">= 1.
|
104
|
-
s.add_dependency(%q<rspec>, ["
|
105
|
-
s.add_dependency(%q<yard>, ["
|
106
|
-
s.add_dependency(%q<wsoc>, ["
|
104
|
+
s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
|
105
|
+
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
106
|
+
s.add_dependency(%q<yard>, ["~> 0.5.3"])
|
107
|
+
s.add_dependency(%q<wsoc>, ["~> 0.1.1"])
|
107
108
|
end
|
108
109
|
else
|
109
|
-
s.add_dependency(%q<nokogiri>, [">= 1.
|
110
|
-
s.add_dependency(%q<rspec>, ["
|
111
|
-
s.add_dependency(%q<yard>, ["
|
112
|
-
s.add_dependency(%q<wsoc>, ["
|
110
|
+
s.add_dependency(%q<nokogiri>, [">= 1.3.0"])
|
111
|
+
s.add_dependency(%q<rspec>, ["~> 1.3.0"])
|
112
|
+
s.add_dependency(%q<yard>, ["~> 0.5.3"])
|
113
|
+
s.add_dependency(%q<wsoc>, ["~> 0.1.1"])
|
113
114
|
end
|
114
115
|
end
|
115
116
|
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 0.2.
|
8
|
+
- 4
|
9
|
+
version: 0.2.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Postmodern
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-05-05 00:00:00 -07:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -26,9 +26,9 @@ dependencies:
|
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
segments:
|
28
28
|
- 1
|
29
|
-
-
|
29
|
+
- 3
|
30
30
|
- 0
|
31
|
-
version: 1.
|
31
|
+
version: 1.3.0
|
32
32
|
type: :runtime
|
33
33
|
version_requirements: *id001
|
34
34
|
- !ruby/object:Gem::Dependency
|
@@ -36,7 +36,7 @@ dependencies:
|
|
36
36
|
prerelease: false
|
37
37
|
requirement: &id002 !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- -
|
39
|
+
- - ~>
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
segments:
|
42
42
|
- 1
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
prerelease: false
|
51
51
|
requirement: &id003 !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
|
-
- -
|
53
|
+
- - ~>
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
segments:
|
56
56
|
- 0
|
@@ -64,7 +64,7 @@ dependencies:
|
|
64
64
|
prerelease: false
|
65
65
|
requirement: &id004 !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - ~>
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
segments:
|
70
70
|
- 0
|
@@ -132,8 +132,8 @@ files:
|
|
132
132
|
- spidr.gemspec
|
133
133
|
has_rdoc: yard
|
134
134
|
homepage: http://github.com/postmodern/spidr
|
135
|
-
licenses:
|
136
|
-
|
135
|
+
licenses:
|
136
|
+
- MIT
|
137
137
|
post_install_message:
|
138
138
|
rdoc_options:
|
139
139
|
- --charset=UTF-8
|
@@ -161,19 +161,19 @@ signing_key:
|
|
161
161
|
specification_version: 3
|
162
162
|
summary: A versatile Ruby web spidering library
|
163
163
|
test_files:
|
164
|
-
- spec/
|
165
|
-
- spec/
|
166
|
-
- spec/
|
167
|
-
- spec/helpers/page.rb
|
164
|
+
- spec/auth_store_spec.rb
|
165
|
+
- spec/rules_spec.rb
|
166
|
+
- spec/session_cache.rb
|
168
167
|
- spec/spec_helper.rb
|
169
|
-
- spec/
|
168
|
+
- spec/sanitizers_spec.rb
|
169
|
+
- spec/filters_spec.rb
|
170
170
|
- spec/page_spec.rb
|
171
171
|
- spec/spidr_spec.rb
|
172
|
-
- spec/
|
172
|
+
- spec/agent_spec.rb
|
173
|
+
- spec/cookie_jar_spec.rb
|
174
|
+
- spec/extensions/uri_spec.rb
|
175
|
+
- spec/helpers/history.rb
|
176
|
+
- spec/helpers/page.rb
|
177
|
+
- spec/helpers/wsoc.rb
|
173
178
|
- spec/page_examples.rb
|
174
|
-
- spec/filters_spec.rb
|
175
179
|
- spec/actions_spec.rb
|
176
|
-
- spec/rules_spec.rb
|
177
|
-
- spec/auth_store_spec.rb
|
178
|
-
- spec/cookie_jar_spec.rb
|
179
|
-
- spec/session_cache.rb
|