webrobots 0.0.13 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+
6
+ coverage
7
+ rdoc
8
+ doc
9
+ .yardoc
10
+
11
+ /lib/webrobots/robotstxt.output
@@ -0,0 +1,15 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - 2.0.0
7
+ - ree
8
+ - jruby-18mode
9
+ - jruby-19mode
10
+ - rbx-18mode
11
+ - rbx-19mode
12
+ matrix:
13
+ allow_failures:
14
+ - rvm: rbx-18mode
15
+ - rvm: rbx-19mode
data/Gemfile CHANGED
@@ -1,17 +1,4 @@
1
1
  source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
2
 
6
- # Add dependencies to develop your gem here.
7
- # Include everything needed to run rake, tests, features, etc.
8
- group :development do
9
- gem "racc", ">= 0"
10
- gem "shoulda", ">= 0"
11
- gem "bundler", ">= 1.0.0"
12
- gem "jeweler", "~> 1.6.4"
13
- gem "rcov", "~> 0.9.11"
14
-
15
- # To test the webrobots/nokogiri module.
16
- gem "nokogiri", ">= 1.4.4"
17
- end
3
+ # Specify your gem's dependencies in webrobots.gemspec
4
+ gemspec
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2011, 2012 Akinori MUSHA
1
+ Copyright (c) 2010, 2011, 2012, 2013 Akinori MUSHA
2
2
 
3
3
  All rights reserved.
4
4
 
@@ -34,5 +34,5 @@ This is a library to help write robots.txt compliant web robots.
34
34
 
35
35
  == Copyright
36
36
 
37
- Copyright (c) 2010, 2011, 2012 Akinori MUSHA. See LICENSE.txt for
37
+ Copyright (c) 2010, 2011, 2012, 2013 Akinori MUSHA. See LICENSE.txt for
38
38
  further details.
data/Rakefile CHANGED
@@ -1,45 +1,22 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'rubygems'
4
- require 'bundler'
5
- begin
6
- Bundler.setup(:default, :development)
7
- rescue Bundler::BundlerError => e
8
- $stderr.puts e.message
9
- $stderr.puts "Run `bundle install` to install missing gems"
10
- exit e.status_code
11
- end
12
- require 'rake'
3
+ require 'bundler/gem_tasks'
13
4
 
14
- require 'jeweler'
15
- Jeweler::Tasks.new do |gem|
16
- # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
- gem.name = "webrobots"
18
- gem.homepage = "https://github.com/knu/webrobots"
19
- gem.license = "2-clause BSDL"
20
- gem.summary = %Q{A Ruby library to help write robots.txt compliant web robots}
21
- gem.description = <<-'EOS'
22
- This library helps write robots.txt compliant web robots in Ruby.
23
- EOS
24
- gem.email = "knu@idaemons.org"
25
- gem.authors = ["Akinori MUSHA"]
26
- # dependencies defined in Gemfile
27
- end
28
- Jeweler::RubygemsDotOrgTasks.new
5
+ gemspec = Bundler::GemHelper.gemspec
29
6
 
30
7
  require 'rake/testtask'
31
8
  Rake::TestTask.new(:test) do |test|
32
- test.libs << 'lib' << 'test'
33
- test.pattern = 'test/**/test_*.rb'
9
+ test.libs << 'test'
10
+ test.test_files = gemspec.test_files
34
11
  test.verbose = true
35
12
  end
36
13
 
37
- require 'rcov/rcovtask'
38
- Rcov::RcovTask.new do |test|
39
- test.libs << 'test'
40
- test.pattern = 'test/**/test_*.rb'
41
- test.verbose = true
42
- test.rcov_opts << '--exclude "gems/*"'
14
+ require 'rdoc/task'
15
+ Rake::RDocTask.new do |rdoc|
16
+ rdoc.rdoc_dir = 'rdoc'
17
+ rdoc.title = "#{gemspec.name} #{gemspec.version}"
18
+ rdoc.rdoc_files.include(gemspec.extra_rdoc_files)
19
+ rdoc.rdoc_files.include('lib/**/*.rb')
43
20
  end
44
21
 
45
22
  task :default => :test
@@ -49,13 +26,3 @@ task :test => 'lib/webrobots/robotstxt.rb'
49
26
  file 'lib/webrobots/robotstxt.rb' => 'lib/webrobots/robotstxt.ry' do
50
27
  sh 'racc', '-o', 'lib/webrobots/robotstxt.rb', 'lib/webrobots/robotstxt.ry'
51
28
  end
52
-
53
- require 'rake/rdoctask'
54
- Rake::RDocTask.new do |rdoc|
55
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
56
-
57
- rdoc.rdoc_dir = 'rdoc'
58
- rdoc.title = "webrobots #{version}"
59
- rdoc.rdoc_files.include('README*')
60
- rdoc.rdoc_files.include('lib/**/*.rb')
61
- end
@@ -1,3 +1,4 @@
1
+ require 'webrobots/version'
1
2
  require 'webrobots/robotstxt'
2
3
  require 'uri'
3
4
  require 'net/https'
@@ -17,13 +18,34 @@ class WebRobots
17
18
  # the response body if successful, return an empty string if the
18
19
  # resource is not found, and return nil or raise any error on
19
20
  # failure. Redirects should be handled within this proc.
21
+ #
22
+ # * :crawl_delay => determines how to react to Crawl-delay
23
+ # directives. If +:sleep+ is given, WebRobots sleeps as demanded
24
+ # when allowed?(url)/disallowed?(url) is called. This is the
25
+ # default behavior. If +:ignore+ is given, WebRobots does
26
+ # nothing. If a custom method, proc, or anything that responds to
27
+ # .call(delay, last_checked_at), it is called.
20
28
  def initialize(user_agent, options = nil)
21
29
  @user_agent = user_agent
22
- @parser = RobotsTxt::Parser.new(user_agent)
23
- @parser_mutex = Mutex.new
24
30
 
25
31
  options ||= {}
26
32
  @http_get = options[:http_get] || method(:http_get)
33
+ crawl_delay_handler =
34
+ case value = options[:crawl_delay] || :sleep
35
+ when :ignore
36
+ nil
37
+ when :sleep
38
+ method(:crawl_delay_handler)
39
+ else
40
+ if value.respond_to?(:call)
41
+ value
42
+ else
43
+ raise ArgumentError, "invalid Crawl-delay handler: #{value.inspect}"
44
+ end
45
+ end
46
+
47
+ @parser = RobotsTxt::Parser.new(user_agent, crawl_delay_handler)
48
+ @parser_mutex = Mutex.new
27
49
 
28
50
  @robotstxt = create_cache()
29
51
  end
@@ -57,6 +79,13 @@ class WebRobots
57
79
  !allowed?(url)
58
80
  end
59
81
 
82
+ # Returns the number of seconds that the configured agent should wait
83
+ # between successive requests to the site identified by +url+ according
84
+ # to the site's robots.txt +Crawl-delay+ directive.
85
+ def crawl_delay(url)
86
+ robots_txt_for(url).crawl_delay()
87
+ end
88
+
60
89
  # Returns extended option values for a resource at +url+ in a hash
61
90
  # with each field name lower-cased. See allowed?() for a list of
62
91
  # errors that may be raised.
@@ -169,4 +198,11 @@ class WebRobots
169
198
  }
170
199
  raise 'too many HTTP redirects'
171
200
  end
201
+
202
+ def crawl_delay_handler(delay, last_checked_at)
203
+ if last_checked_at
204
+ delay -= Time.now - last_checked_at
205
+ sleep delay if delay > 0
206
+ end
207
+ end
172
208
  end
@@ -1,6 +1,6 @@
1
1
  #
2
2
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by Racc 1.4.7
3
+ # This file is automatically generated by Racc 1.4.9
4
4
  # from Racc grammer file "".
5
5
  #
6
6
 
@@ -30,17 +30,21 @@ class WebRobots
30
30
  class RobotsTxt
31
31
  class Parser < Racc::Parser
32
32
 
33
- module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 169)
33
+ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 171)
34
34
 
35
- def initialize(target = nil)
35
+ def initialize(target, crawl_delay_handler = nil)
36
36
  super()
37
37
  @target = target
38
+ @crawl_delay_handler = crawl_delay_handler
38
39
  end
39
40
 
40
41
  def parse!(input, site)
41
42
  parse(input, site)
42
43
  rescue Error => e
43
- RobotsTxt.new(site, nil, :error => e, :target => @target)
44
+ RobotsTxt.new(site, nil,
45
+ :error => e,
46
+ :target => @target,
47
+ :crawl_delay_handler => @crawl_delay_handler)
44
48
  end
45
49
 
46
50
  KNOWN_TOKENS = %w[User-agent Allow Disallow Crawl-delay Sitemap]
@@ -334,7 +338,9 @@ module_eval(<<'.,.,', 'robotstxt.ry', 11)
334
338
  def _reduce_2(val, _values, result)
335
339
  body = val[2]
336
340
  result = RobotsTxt.new(@site, body,
337
- :target => @target, :sitemaps => @sitemaps)
341
+ :target => @target,
342
+ :sitemaps => @sitemaps,
343
+ :crawl_delay_handler => @crawl_delay_handler)
338
344
 
339
345
  result
340
346
  end
@@ -368,7 +374,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 11)
368
374
 
369
375
  # reduce 16 omitted
370
376
 
371
- module_eval(<<'.,.,', 'robotstxt.ry', 42)
377
+ module_eval(<<'.,.,', 'robotstxt.ry', 44)
372
378
  def _reduce_17(val, _values, result)
373
379
  @sitemaps << val[3]
374
380
 
@@ -376,7 +382,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 42)
376
382
  end
377
383
  .,.,
378
384
 
379
- module_eval(<<'.,.,', 'robotstxt.ry', 47)
385
+ module_eval(<<'.,.,', 'robotstxt.ry', 49)
380
386
  def _reduce_18(val, _values, result)
381
387
  result = []
382
388
  result << val[0]
@@ -385,7 +391,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 47)
385
391
  end
386
392
  .,.,
387
393
 
388
- module_eval(<<'.,.,', 'robotstxt.ry', 52)
394
+ module_eval(<<'.,.,', 'robotstxt.ry', 54)
389
395
  def _reduce_19(val, _values, result)
390
396
  result = []
391
397
 
@@ -393,7 +399,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 52)
393
399
  end
394
400
  .,.,
395
401
 
396
- module_eval(<<'.,.,', 'robotstxt.ry', 58)
402
+ module_eval(<<'.,.,', 'robotstxt.ry', 60)
397
403
  def _reduce_20(val, _values, result)
398
404
  result << val[2]
399
405
 
@@ -401,7 +407,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 58)
401
407
  end
402
408
  .,.,
403
409
 
404
- module_eval(<<'.,.,', 'robotstxt.ry', 64)
410
+ module_eval(<<'.,.,', 'robotstxt.ry', 66)
405
411
  def _reduce_21(val, _values, result)
406
412
  val[2].each_with_index { |line, i|
407
413
  warn "%s line %d: %s: orphan rule line" %
@@ -416,7 +422,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 64)
416
422
 
417
423
  # reduce 23 omitted
418
424
 
419
- module_eval(<<'.,.,', 'robotstxt.ry', 79)
425
+ module_eval(<<'.,.,', 'robotstxt.ry', 81)
420
426
  def _reduce_24(val, _values, result)
421
427
  result = Record.new(val[1], val[2])
422
428
 
@@ -424,7 +430,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 79)
424
430
  end
425
431
  .,.,
426
432
 
427
- module_eval(<<'.,.,', 'robotstxt.ry', 84)
433
+ module_eval(<<'.,.,', 'robotstxt.ry', 86)
428
434
  def _reduce_25(val, _values, result)
429
435
  result = [val[0]]
430
436
 
@@ -432,7 +438,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 84)
432
438
  end
433
439
  .,.,
434
440
 
435
- module_eval(<<'.,.,', 'robotstxt.ry', 89)
441
+ module_eval(<<'.,.,', 'robotstxt.ry', 91)
436
442
  def _reduce_26(val, _values, result)
437
443
  result << val[1]
438
444
 
@@ -442,7 +448,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 89)
442
448
 
443
449
  # reduce 27 omitted
444
450
 
445
- module_eval(<<'.,.,', 'robotstxt.ry', 96)
451
+ module_eval(<<'.,.,', 'robotstxt.ry', 98)
446
452
  def _reduce_28(val, _values, result)
447
453
  result = AgentLine.new(val[0], val[3])
448
454
 
@@ -454,7 +460,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 96)
454
460
 
455
461
  # reduce 30 omitted
456
462
 
457
- module_eval(<<'.,.,', 'robotstxt.ry', 104)
463
+ module_eval(<<'.,.,', 'robotstxt.ry', 106)
458
464
  def _reduce_31(val, _values, result)
459
465
  result = [result]
460
466
  @rulelinenos = []
@@ -463,7 +469,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 104)
463
469
  end
464
470
  .,.,
465
471
 
466
- module_eval(<<'.,.,', 'robotstxt.ry', 110)
472
+ module_eval(<<'.,.,', 'robotstxt.ry', 112)
467
473
  def _reduce_32(val, _values, result)
468
474
  result << val[1]
469
475
  @rulelinenos << @lineno
@@ -482,7 +488,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 110)
482
488
 
483
489
  # reduce 37 omitted
484
490
 
485
- module_eval(<<'.,.,', 'robotstxt.ry', 123)
491
+ module_eval(<<'.,.,', 'robotstxt.ry', 125)
486
492
  def _reduce_38(val, _values, result)
487
493
  result = AllowLine.new(val[0], val[3])
488
494
 
@@ -490,7 +496,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 123)
490
496
  end
491
497
  .,.,
492
498
 
493
- module_eval(<<'.,.,', 'robotstxt.ry', 128)
499
+ module_eval(<<'.,.,', 'robotstxt.ry', 130)
494
500
  def _reduce_39(val, _values, result)
495
501
  result = DisallowLine.new(val[0], val[3])
496
502
 
@@ -498,7 +504,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 128)
498
504
  end
499
505
  .,.,
500
506
 
501
- module_eval(<<'.,.,', 'robotstxt.ry', 133)
507
+ module_eval(<<'.,.,', 'robotstxt.ry', 135)
502
508
  def _reduce_40(val, _values, result)
503
509
  result = CrawlDelayLine.new(val[0], val[3])
504
510
 
@@ -506,7 +512,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 133)
506
512
  end
507
513
  .,.,
508
514
 
509
- module_eval(<<'.,.,', 'robotstxt.ry', 138)
515
+ module_eval(<<'.,.,', 'robotstxt.ry', 140)
510
516
  def _reduce_41(val, _values, result)
511
517
  result = ExtentionLine.new(val[0], val[3])
512
518
 
@@ -528,11 +534,12 @@ end # class Parser
528
534
  @timestamp = Time.now
529
535
  @site = site
530
536
  @options = options || {}
531
- @last_checked = nil
537
+ @last_checked_at = nil
532
538
 
533
539
  @error = @options[:error]
534
540
  @target = @options[:target]
535
541
  @sitemaps = @options[:sitemaps] || []
542
+ @crawl_delay_handler = @options[:crawl_delay_handler]
536
543
 
537
544
  if records && !records.empty?
538
545
  @records, defaults = [], []
@@ -578,14 +585,18 @@ end # class Parser
578
585
  def allow?(request_uri, user_agent = nil)
579
586
  record = find_record(user_agent) or return true
580
587
  allow = record.allow?(request_uri)
581
- if @last_checked and delay = record.delay
582
- delay -= Time.now - @last_checked
583
- sleep delay if delay > 0
588
+ if delay = record.delay and @crawl_delay_handler
589
+ @crawl_delay_handler.call(delay, @last_checked_at)
584
590
  end
585
- @last_checked = Time.now
591
+ @last_checked_at = Time.now
586
592
  return allow
587
593
  end
588
594
 
595
+ def crawl_delay(user_agent = nil)
596
+ record = find_record(user_agent) or return 0
597
+ record.delay or return 0
598
+ end
599
+
589
600
  def options(user_agent = nil)
590
601
  record = find_record(user_agent) or return {}
591
602
  record.options
@@ -11,7 +11,9 @@ rule
11
11
  {
12
12
  body = val[2]
13
13
  result = RobotsTxt.new(@site, body,
14
- :target => @target, :sitemaps => @sitemaps)
14
+ :target => @target,
15
+ :sitemaps => @sitemaps,
16
+ :crawl_delay_handler => @crawl_delay_handler)
15
17
  }
16
18
 
17
19
  body :
@@ -167,15 +169,19 @@ class WebRobots
167
169
  class RobotsTxt
168
170
  ---- inner
169
171
 
170
- def initialize(target = nil)
172
+ def initialize(target, crawl_delay_handler = nil)
171
173
  super()
172
174
  @target = target
175
+ @crawl_delay_handler = crawl_delay_handler
173
176
  end
174
177
 
175
178
  def parse!(input, site)
176
179
  parse(input, site)
177
180
  rescue Error => e
178
- RobotsTxt.new(site, nil, :error => e, :target => @target)
181
+ RobotsTxt.new(site, nil,
182
+ :error => e,
183
+ :target => @target,
184
+ :crawl_delay_handler => @crawl_delay_handler)
179
185
  end
180
186
 
181
187
  KNOWN_TOKENS = %w[User-agent Allow Disallow Crawl-delay Sitemap]
@@ -263,11 +269,12 @@ class WebRobots
263
269
  @timestamp = Time.now
264
270
  @site = site
265
271
  @options = options || {}
266
- @last_checked = nil
272
+ @last_checked_at = nil
267
273
 
268
274
  @error = @options[:error]
269
275
  @target = @options[:target]
270
276
  @sitemaps = @options[:sitemaps] || []
277
+ @crawl_delay_handler = @options[:crawl_delay_handler]
271
278
 
272
279
  if records && !records.empty?
273
280
  @records, defaults = [], []
@@ -313,14 +320,18 @@ class WebRobots
313
320
  def allow?(request_uri, user_agent = nil)
314
321
  record = find_record(user_agent) or return true
315
322
  allow = record.allow?(request_uri)
316
- if @last_checked and delay = record.delay
317
- delay -= Time.now - @last_checked
318
- sleep delay if delay > 0
323
+ if delay = record.delay and @crawl_delay_handler
324
+ @crawl_delay_handler.call(delay, @last_checked_at)
319
325
  end
320
- @last_checked = Time.now
326
+ @last_checked_at = Time.now
321
327
  return allow
322
328
  end
323
329
 
330
+ def crawl_delay(user_agent = nil)
331
+ record = find_record(user_agent) or return 0
332
+ record.delay or return 0
333
+ end
334
+
324
335
  def options(user_agent = nil)
325
336
  record = find_record(user_agent) or return {}
326
337
  record.options
@@ -0,0 +1,3 @@
1
+ module Webrobots
2
+ VERSION = "0.1.0"
3
+ end
@@ -384,6 +384,12 @@ Option1: Foo
384
384
  Option2: Hello
385
385
  Crawl-Delay: 1.5
386
386
 
387
+ User-Agent: HerBot
388
+ Disallow: /2heavy/
389
+ Allow: /2heavy/*.html
390
+ Option1: Baz
391
+ Option2: Qux
392
+
387
393
  User-Agent: *
388
394
  Disallow: /2heavy/
389
395
  Allow: /2heavy/*.html
@@ -400,6 +406,9 @@ Option3: Hi
400
406
  }
401
407
 
402
408
  @robots_mybot = WebRobots.new('MyBot', :http_get => http_get)
409
+ @robots_mybot_ignore = WebRobots.new('MyBot', :http_get => http_get, :crawl_delay => :ignore)
410
+ @robots_mybot_custom = WebRobots.new('MyBot', :http_get => http_get, :crawl_delay => proc { |*args| @delay_args = args })
411
+ @robots_herbot = WebRobots.new('HerBot', :http_get => http_get)
403
412
  @robots_hisbot = WebRobots.new('HisBot', :http_get => http_get)
404
413
  end
405
414
 
@@ -411,6 +420,27 @@ Option3: Hi
411
420
  assert_equal 'Hello', @robots_mybot.option('http://www.example.org/', 'Option2')
412
421
  assert_equal 'Hello', options['option2']
413
422
 
423
+ options = @robots_mybot_ignore.options('http://www.example.org/')
424
+ assert_equal 2, options.size
425
+ assert_equal 'Foo', @robots_mybot_ignore.option('http://www.example.org/', 'Option1')
426
+ assert_equal 'Foo', options['option1']
427
+ assert_equal 'Hello', @robots_mybot_ignore.option('http://www.example.org/', 'Option2')
428
+ assert_equal 'Hello', options['option2']
429
+
430
+ options = @robots_mybot_custom.options('http://www.example.org/')
431
+ assert_equal 2, options.size
432
+ assert_equal 'Foo', @robots_mybot_custom.option('http://www.example.org/', 'Option1')
433
+ assert_equal 'Foo', options['option1']
434
+ assert_equal 'Hello', @robots_mybot_custom.option('http://www.example.org/', 'Option2')
435
+ assert_equal 'Hello', options['option2']
436
+
437
+ options = @robots_herbot.options('http://www.example.org/')
438
+ assert_equal 2, options.size
439
+ assert_equal 'Baz', @robots_herbot.option('http://www.example.org/', 'Option1')
440
+ assert_equal 'Baz', options['option1']
441
+ assert_equal 'Qux', @robots_herbot.option('http://www.example.org/', 'Option2')
442
+ assert_equal 'Qux', options['option2']
443
+
414
444
  options = @robots_hisbot.options('http://www.example.org/')
415
445
  assert_equal 2, options.size
416
446
  assert_equal 'Bar', @robots_hisbot.option('http://www.example.org/', 'Option1')
@@ -422,11 +452,25 @@ Option3: Hi
422
452
  http://www.example.org/sitemap-host1.xml
423
453
  http://www.example.org/sitemap-host2.xml
424
454
  ], @robots_mybot.sitemaps('http://www.example.org/')
455
+ assert_equal %w[
456
+ http://www.example.org/sitemap-host1.xml
457
+ http://www.example.org/sitemap-host2.xml
458
+ ], @robots_mybot_ignore.sitemaps('http://www.example.org/')
459
+ assert_equal %w[
460
+ http://www.example.org/sitemap-host1.xml
461
+ http://www.example.org/sitemap-host2.xml
462
+ ], @robots_herbot.sitemaps('http://www.example.org/')
425
463
  assert_equal %w[
426
464
  http://www.example.org/sitemap-host1.xml
427
465
  http://www.example.org/sitemap-host2.xml
428
466
  ], @robots_hisbot.sitemaps('http://www.example.org/')
429
467
 
468
+ assert_equal 1.5, @robots_mybot.crawl_delay('http://www.example.org/')
469
+ assert_equal 1.5, @robots_mybot_ignore.crawl_delay('http://www.example.org/')
470
+ assert_equal 1.5, @robots_mybot_custom.crawl_delay('http://www.example.org/')
471
+ assert_equal 0, @robots_herbot.crawl_delay('http://www.example.org/')
472
+ assert_equal 0, @robots_hisbot.crawl_delay('http://www.example.org/')
473
+
430
474
  t1 = Time.now
431
475
  @robots_mybot.allowed?('http://www.example.org/')
432
476
  @robots_mybot.allowed?('http://www.example.org/article1.html')
@@ -435,6 +479,25 @@ Option3: Hi
435
479
  @robots_mybot.allowed?('http://www.example.org/article2.html')
436
480
  t3 = Time.now
437
481
  assert_in_delta 1.5, t3 - t2, 0.1
482
+
483
+ t1 = Time.now
484
+ @robots_mybot_ignore.allowed?('http://www.example.org/')
485
+ @robots_mybot_ignore.allowed?('http://www.example.org/article1.html')
486
+ t2 = Time.now
487
+ assert_in_delta 0, t2 - t1, 0.1
488
+ @robots_mybot_ignore.allowed?('http://www.example.org/article2.html')
489
+ t3 = Time.now
490
+ assert_in_delta 0, t3 - t2, 0.1
491
+
492
+ t1 = Time.now
493
+ @robots_mybot_custom.allowed?('http://www.example.org/')
494
+ @robots_mybot_custom.allowed?('http://www.example.org/article1.html')
495
+ t2 = Time.now
496
+ assert_in_delta 0, t2 - t1, 0.1
497
+ assert_instance_of Array, @delay_args
498
+ assert_equal 2, @delay_args.size
499
+ assert_equal 1.5, @delay_args[0]
500
+ assert_instance_of Time, @delay_args[1]
438
501
  end
439
502
  end
440
503
 
@@ -1,68 +1,33 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
1
  # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "webrobots/version"
5
4
 
6
5
  Gem::Specification.new do |s|
7
- s.name = "webrobots"
8
- s.version = "0.0.13"
6
+ s.name = "webrobots"
7
+ s.version = Webrobots::VERSION
8
+ s.authors = ["Akinori MUSHA"]
9
+ s.email = ["knu@idaemons.org"]
10
+ s.homepage = %q{https://github.com/knu/webrobots}
11
+ s.licenses = [%q{2-clause BSDL}]
12
+ s.summary = %q{A Ruby library to help write robots.txt compliant web robots}
13
+ s.description = <<-'EOS'
14
+ This library helps write robots.txt compliant web robots in Ruby.
15
+ EOS
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = s.files.grep(%r{/test_[^/]+\.rb$})
19
+ s.executables = s.files.grep(%r{^bin/[^.]}).map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
9
21
 
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Akinori MUSHA"]
12
- s.date = "2012-01-24"
13
- s.description = "This library helps write robots.txt compliant web robots in Ruby.\n"
14
- s.email = "knu@idaemons.org"
15
22
  s.extra_rdoc_files = [
16
23
  "LICENSE.txt",
17
24
  "README.rdoc"
18
25
  ]
19
- s.files = [
20
- ".document",
21
- "Gemfile",
22
- "Gemfile.lock",
23
- "LICENSE.txt",
24
- "README.rdoc",
25
- "Rakefile",
26
- "VERSION",
27
- "lib/webrobots.rb",
28
- "lib/webrobots/nokogiri.rb",
29
- "lib/webrobots/robotstxt.rb",
30
- "lib/webrobots/robotstxt.ry",
31
- "test/helper.rb",
32
- "test/test_webrobots.rb",
33
- "webrobots.gemspec"
34
- ]
35
- s.homepage = "https://github.com/knu/webrobots"
36
- s.licenses = ["2-clause BSDL"]
37
- s.require_paths = ["lib"]
38
- s.rubygems_version = "1.8.15"
39
- s.summary = "A Ruby library to help write robots.txt compliant web robots"
40
-
41
- if s.respond_to? :specification_version then
42
- s.specification_version = 3
43
26
 
44
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
- s.add_development_dependency(%q<racc>, [">= 0"])
46
- s.add_development_dependency(%q<shoulda>, [">= 0"])
47
- s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
48
- s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
49
- s.add_development_dependency(%q<rcov>, ["~> 0.9.11"])
50
- s.add_development_dependency(%q<nokogiri>, [">= 1.4.4"])
51
- else
52
- s.add_dependency(%q<racc>, [">= 0"])
53
- s.add_dependency(%q<shoulda>, [">= 0"])
54
- s.add_dependency(%q<bundler>, [">= 1.0.0"])
55
- s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
56
- s.add_dependency(%q<rcov>, ["~> 0.9.11"])
57
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
58
- end
59
- else
60
- s.add_dependency(%q<racc>, [">= 0"])
61
- s.add_dependency(%q<shoulda>, [">= 0"])
62
- s.add_dependency(%q<bundler>, [">= 1.0.0"])
63
- s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
64
- s.add_dependency(%q<rcov>, ["~> 0.9.11"])
65
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
66
- end
27
+ s.add_development_dependency("rake", [">= 0.9.2.2"])
28
+ s.add_development_dependency("racc", [">= 0"]) unless RUBY_PLATFORM == "java"
29
+ s.add_development_dependency("shoulda", [">= 0"])
30
+ s.add_development_dependency("rdoc", ["> 2.4.2"])
31
+ s.add_development_dependency("bundler", [">= 1.2"])
32
+ s.add_development_dependency("nokogiri", [">= 1.4.4"])
67
33
  end
68
-
metadata CHANGED
@@ -1,172 +1,162 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: webrobots
3
- version: !ruby/object:Gem::Version
4
- hash: 5
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 13
10
- version: 0.0.13
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Akinori MUSHA
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-01-24 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- requirement: &id001 !ruby/object:Gem::Requirement
12
+ date: 2013-02-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
22
17
  none: false
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- hash: 3
27
- segments:
28
- - 0
29
- version: "0"
30
- version_requirements: *id001
31
- name: racc
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.9.2.2
22
+ type: :development
32
23
  prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.9.2.2
30
+ - !ruby/object:Gem::Dependency
31
+ name: racc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
33
38
  type: :development
34
- - !ruby/object:Gem::Dependency
35
- requirement: &id002 !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
36
41
  none: false
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- hash: 3
41
- segments:
42
- - 0
43
- version: "0"
44
- version_requirements: *id002
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
45
47
  name: shoulda
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
46
55
  prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rdoc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>'
68
+ - !ruby/object:Gem::Version
69
+ version: 2.4.2
47
70
  type: :development
48
- - !ruby/object:Gem::Dependency
49
- requirement: &id003 !ruby/object:Gem::Requirement
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
50
73
  none: false
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- hash: 23
55
- segments:
56
- - 1
57
- - 0
58
- - 0
59
- version: 1.0.0
60
- version_requirements: *id003
74
+ requirements:
75
+ - - ! '>'
76
+ - !ruby/object:Gem::Version
77
+ version: 2.4.2
78
+ - !ruby/object:Gem::Dependency
61
79
  name: bundler
62
- prerelease: false
63
- type: :development
64
- - !ruby/object:Gem::Dependency
65
- requirement: &id004 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
66
81
  none: false
67
- requirements:
68
- - - ~>
69
- - !ruby/object:Gem::Version
70
- hash: 7
71
- segments:
72
- - 1
73
- - 6
74
- - 4
75
- version: 1.6.4
76
- version_requirements: *id004
77
- name: jeweler
78
- prerelease: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '1.2'
79
86
  type: :development
80
- - !ruby/object:Gem::Dependency
81
- requirement: &id005 !ruby/object:Gem::Requirement
82
- none: false
83
- requirements:
84
- - - ~>
85
- - !ruby/object:Gem::Version
86
- hash: 45
87
- segments:
88
- - 0
89
- - 9
90
- - 11
91
- version: 0.9.11
92
- version_requirements: *id005
93
- name: rcov
94
87
  prerelease: false
95
- type: :development
96
- - !ruby/object:Gem::Dependency
97
- requirement: &id006 !ruby/object:Gem::Requirement
88
+ version_requirements: !ruby/object:Gem::Requirement
98
89
  none: false
99
- requirements:
100
- - - ">="
101
- - !ruby/object:Gem::Version
102
- hash: 15
103
- segments:
104
- - 1
105
- - 4
106
- - 4
107
- version: 1.4.4
108
- version_requirements: *id006
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '1.2'
94
+ - !ruby/object:Gem::Dependency
109
95
  name: nokogiri
110
- prerelease: false
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: 1.4.4
111
102
  type: :development
112
- description: |
113
- This library helps write robots.txt compliant web robots in Ruby.
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: 1.4.4
110
+ description: ! 'This library helps write robots.txt compliant web robots in Ruby.
114
111
 
115
- email: knu@idaemons.org
112
+ '
113
+ email:
114
+ - knu@idaemons.org
116
115
  executables: []
117
-
118
116
  extensions: []
119
-
120
- extra_rdoc_files:
117
+ extra_rdoc_files:
121
118
  - LICENSE.txt
122
119
  - README.rdoc
123
- files:
120
+ files:
124
121
  - .document
122
+ - .gitignore
123
+ - .travis.yml
125
124
  - Gemfile
126
- - Gemfile.lock
127
125
  - LICENSE.txt
128
126
  - README.rdoc
129
127
  - Rakefile
130
- - VERSION
131
128
  - lib/webrobots.rb
132
129
  - lib/webrobots/nokogiri.rb
133
130
  - lib/webrobots/robotstxt.rb
134
131
  - lib/webrobots/robotstxt.ry
132
+ - lib/webrobots/version.rb
135
133
  - test/helper.rb
136
134
  - test/test_webrobots.rb
137
135
  - webrobots.gemspec
138
136
  homepage: https://github.com/knu/webrobots
139
- licenses:
137
+ licenses:
140
138
  - 2-clause BSDL
141
139
  post_install_message:
142
140
  rdoc_options: []
143
-
144
- require_paths:
141
+ require_paths:
145
142
  - lib
146
- required_ruby_version: !ruby/object:Gem::Requirement
143
+ required_ruby_version: !ruby/object:Gem::Requirement
147
144
  none: false
148
- requirements:
149
- - - ">="
150
- - !ruby/object:Gem::Version
151
- hash: 3
152
- segments:
153
- - 0
154
- version: "0"
155
- required_rubygems_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ! '>='
147
+ - !ruby/object:Gem::Version
148
+ version: '0'
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
156
150
  none: false
157
- requirements:
158
- - - ">="
159
- - !ruby/object:Gem::Version
160
- hash: 3
161
- segments:
162
- - 0
163
- version: "0"
151
+ requirements:
152
+ - - ! '>='
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
164
155
  requirements: []
165
-
166
156
  rubyforge_project:
167
- rubygems_version: 1.8.15
157
+ rubygems_version: 1.8.24
168
158
  signing_key:
169
159
  specification_version: 3
170
160
  summary: A Ruby library to help write robots.txt compliant web robots
171
- test_files: []
172
-
161
+ test_files:
162
+ - test/test_webrobots.rb
@@ -1,24 +0,0 @@
1
- GEM
2
- remote: http://rubygems.org/
3
- specs:
4
- git (1.2.5)
5
- jeweler (1.6.4)
6
- bundler (~> 1.0)
7
- git (>= 1.2.5)
8
- rake
9
- nokogiri (1.5.0)
10
- racc (1.4.7)
11
- rake (0.9.2.2)
12
- rcov (0.9.11)
13
- shoulda (2.11.3)
14
-
15
- PLATFORMS
16
- ruby
17
-
18
- DEPENDENCIES
19
- bundler (>= 1.0.0)
20
- jeweler (~> 1.6.4)
21
- nokogiri (>= 1.4.4)
22
- racc
23
- rcov (~> 0.9.11)
24
- shoulda
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.13