webrobots 0.0.13 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+
6
+ coverage
7
+ rdoc
8
+ doc
9
+ .yardoc
10
+
11
+ /lib/webrobots/robotstxt.output
@@ -0,0 +1,15 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.8.7
4
+ - 1.9.2
5
+ - 1.9.3
6
+ - 2.0.0
7
+ - ree
8
+ - jruby-18mode
9
+ - jruby-19mode
10
+ - rbx-18mode
11
+ - rbx-19mode
12
+ matrix:
13
+ allow_failures:
14
+ - rvm: rbx-18mode
15
+ - rvm: rbx-19mode
data/Gemfile CHANGED
@@ -1,17 +1,4 @@
1
1
  source "http://rubygems.org"
2
- # Add dependencies required to use your gem here.
3
- # Example:
4
- # gem "activesupport", ">= 2.3.5"
5
2
 
6
- # Add dependencies to develop your gem here.
7
- # Include everything needed to run rake, tests, features, etc.
8
- group :development do
9
- gem "racc", ">= 0"
10
- gem "shoulda", ">= 0"
11
- gem "bundler", ">= 1.0.0"
12
- gem "jeweler", "~> 1.6.4"
13
- gem "rcov", "~> 0.9.11"
14
-
15
- # To test the webrobots/nokogiri module.
16
- gem "nokogiri", ">= 1.4.4"
17
- end
3
+ # Specify your gem's dependencies in webrobots.gemspec
4
+ gemspec
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2011, 2012 Akinori MUSHA
1
+ Copyright (c) 2010, 2011, 2012, 2013 Akinori MUSHA
2
2
 
3
3
  All rights reserved.
4
4
 
@@ -34,5 +34,5 @@ This is a library to help write robots.txt compliant web robots.
34
34
 
35
35
  == Copyright
36
36
 
37
- Copyright (c) 2010, 2011, 2012 Akinori MUSHA. See LICENSE.txt for
37
+ Copyright (c) 2010, 2011, 2012, 2013 Akinori MUSHA. See LICENSE.txt for
38
38
  further details.
data/Rakefile CHANGED
@@ -1,45 +1,22 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'rubygems'
4
- require 'bundler'
5
- begin
6
- Bundler.setup(:default, :development)
7
- rescue Bundler::BundlerError => e
8
- $stderr.puts e.message
9
- $stderr.puts "Run `bundle install` to install missing gems"
10
- exit e.status_code
11
- end
12
- require 'rake'
3
+ require 'bundler/gem_tasks'
13
4
 
14
- require 'jeweler'
15
- Jeweler::Tasks.new do |gem|
16
- # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
- gem.name = "webrobots"
18
- gem.homepage = "https://github.com/knu/webrobots"
19
- gem.license = "2-clause BSDL"
20
- gem.summary = %Q{A Ruby library to help write robots.txt compliant web robots}
21
- gem.description = <<-'EOS'
22
- This library helps write robots.txt compliant web robots in Ruby.
23
- EOS
24
- gem.email = "knu@idaemons.org"
25
- gem.authors = ["Akinori MUSHA"]
26
- # dependencies defined in Gemfile
27
- end
28
- Jeweler::RubygemsDotOrgTasks.new
5
+ gemspec = Bundler::GemHelper.gemspec
29
6
 
30
7
  require 'rake/testtask'
31
8
  Rake::TestTask.new(:test) do |test|
32
- test.libs << 'lib' << 'test'
33
- test.pattern = 'test/**/test_*.rb'
9
+ test.libs << 'test'
10
+ test.test_files = gemspec.test_files
34
11
  test.verbose = true
35
12
  end
36
13
 
37
- require 'rcov/rcovtask'
38
- Rcov::RcovTask.new do |test|
39
- test.libs << 'test'
40
- test.pattern = 'test/**/test_*.rb'
41
- test.verbose = true
42
- test.rcov_opts << '--exclude "gems/*"'
14
+ require 'rdoc/task'
15
+ Rake::RDocTask.new do |rdoc|
16
+ rdoc.rdoc_dir = 'rdoc'
17
+ rdoc.title = "#{gemspec.name} #{gemspec.version}"
18
+ rdoc.rdoc_files.include(gemspec.extra_rdoc_files)
19
+ rdoc.rdoc_files.include('lib/**/*.rb')
43
20
  end
44
21
 
45
22
  task :default => :test
@@ -49,13 +26,3 @@ task :test => 'lib/webrobots/robotstxt.rb'
49
26
  file 'lib/webrobots/robotstxt.rb' => 'lib/webrobots/robotstxt.ry' do
50
27
  sh 'racc', '-o', 'lib/webrobots/robotstxt.rb', 'lib/webrobots/robotstxt.ry'
51
28
  end
52
-
53
- require 'rake/rdoctask'
54
- Rake::RDocTask.new do |rdoc|
55
- version = File.exist?('VERSION') ? File.read('VERSION') : ""
56
-
57
- rdoc.rdoc_dir = 'rdoc'
58
- rdoc.title = "webrobots #{version}"
59
- rdoc.rdoc_files.include('README*')
60
- rdoc.rdoc_files.include('lib/**/*.rb')
61
- end
@@ -1,3 +1,4 @@
1
+ require 'webrobots/version'
1
2
  require 'webrobots/robotstxt'
2
3
  require 'uri'
3
4
  require 'net/https'
@@ -17,13 +18,34 @@ class WebRobots
17
18
  # the response body if successful, return an empty string if the
18
19
  # resource is not found, and return nil or raise any error on
19
20
  # failure. Redirects should be handled within this proc.
21
+ #
22
+ # * :crawl_delay => determines how to react to Crawl-delay
23
+ # directives. If +:sleep+ is given, WebRobots sleeps as demanded
24
+ # when allowed?(url)/disallowed?(url) is called. This is the
25
+ # default behavior. If +:ignore+ is given, WebRobots does
26
+ # nothing. If a custom method, proc, or anything that responds to
27
+ # .call(delay, last_checked_at), it is called.
20
28
  def initialize(user_agent, options = nil)
21
29
  @user_agent = user_agent
22
- @parser = RobotsTxt::Parser.new(user_agent)
23
- @parser_mutex = Mutex.new
24
30
 
25
31
  options ||= {}
26
32
  @http_get = options[:http_get] || method(:http_get)
33
+ crawl_delay_handler =
34
+ case value = options[:crawl_delay] || :sleep
35
+ when :ignore
36
+ nil
37
+ when :sleep
38
+ method(:crawl_delay_handler)
39
+ else
40
+ if value.respond_to?(:call)
41
+ value
42
+ else
43
+ raise ArgumentError, "invalid Crawl-delay handler: #{value.inspect}"
44
+ end
45
+ end
46
+
47
+ @parser = RobotsTxt::Parser.new(user_agent, crawl_delay_handler)
48
+ @parser_mutex = Mutex.new
27
49
 
28
50
  @robotstxt = create_cache()
29
51
  end
@@ -57,6 +79,13 @@ class WebRobots
57
79
  !allowed?(url)
58
80
  end
59
81
 
82
+ # Returns the number of seconds that the configured agent should wait
83
+ # between successive requests to the site identified by +url+ according
84
+ # to the site's robots.txt +Crawl-delay+ directive.
85
+ def crawl_delay(url)
86
+ robots_txt_for(url).crawl_delay()
87
+ end
88
+
60
89
  # Returns extended option values for a resource at +url+ in a hash
61
90
  # with each field name lower-cased. See allowed?() for a list of
62
91
  # errors that may be raised.
@@ -169,4 +198,11 @@ class WebRobots
169
198
  }
170
199
  raise 'too many HTTP redirects'
171
200
  end
201
+
202
+ def crawl_delay_handler(delay, last_checked_at)
203
+ if last_checked_at
204
+ delay -= Time.now - last_checked_at
205
+ sleep delay if delay > 0
206
+ end
207
+ end
172
208
  end
@@ -1,6 +1,6 @@
1
1
  #
2
2
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by Racc 1.4.7
3
+ # This file is automatically generated by Racc 1.4.9
4
4
  # from Racc grammer file "".
5
5
  #
6
6
 
@@ -30,17 +30,21 @@ class WebRobots
30
30
  class RobotsTxt
31
31
  class Parser < Racc::Parser
32
32
 
33
- module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 169)
33
+ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 171)
34
34
 
35
- def initialize(target = nil)
35
+ def initialize(target, crawl_delay_handler = nil)
36
36
  super()
37
37
  @target = target
38
+ @crawl_delay_handler = crawl_delay_handler
38
39
  end
39
40
 
40
41
  def parse!(input, site)
41
42
  parse(input, site)
42
43
  rescue Error => e
43
- RobotsTxt.new(site, nil, :error => e, :target => @target)
44
+ RobotsTxt.new(site, nil,
45
+ :error => e,
46
+ :target => @target,
47
+ :crawl_delay_handler => @crawl_delay_handler)
44
48
  end
45
49
 
46
50
  KNOWN_TOKENS = %w[User-agent Allow Disallow Crawl-delay Sitemap]
@@ -334,7 +338,9 @@ module_eval(<<'.,.,', 'robotstxt.ry', 11)
334
338
  def _reduce_2(val, _values, result)
335
339
  body = val[2]
336
340
  result = RobotsTxt.new(@site, body,
337
- :target => @target, :sitemaps => @sitemaps)
341
+ :target => @target,
342
+ :sitemaps => @sitemaps,
343
+ :crawl_delay_handler => @crawl_delay_handler)
338
344
 
339
345
  result
340
346
  end
@@ -368,7 +374,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 11)
368
374
 
369
375
  # reduce 16 omitted
370
376
 
371
- module_eval(<<'.,.,', 'robotstxt.ry', 42)
377
+ module_eval(<<'.,.,', 'robotstxt.ry', 44)
372
378
  def _reduce_17(val, _values, result)
373
379
  @sitemaps << val[3]
374
380
 
@@ -376,7 +382,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 42)
376
382
  end
377
383
  .,.,
378
384
 
379
- module_eval(<<'.,.,', 'robotstxt.ry', 47)
385
+ module_eval(<<'.,.,', 'robotstxt.ry', 49)
380
386
  def _reduce_18(val, _values, result)
381
387
  result = []
382
388
  result << val[0]
@@ -385,7 +391,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 47)
385
391
  end
386
392
  .,.,
387
393
 
388
- module_eval(<<'.,.,', 'robotstxt.ry', 52)
394
+ module_eval(<<'.,.,', 'robotstxt.ry', 54)
389
395
  def _reduce_19(val, _values, result)
390
396
  result = []
391
397
 
@@ -393,7 +399,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 52)
393
399
  end
394
400
  .,.,
395
401
 
396
- module_eval(<<'.,.,', 'robotstxt.ry', 58)
402
+ module_eval(<<'.,.,', 'robotstxt.ry', 60)
397
403
  def _reduce_20(val, _values, result)
398
404
  result << val[2]
399
405
 
@@ -401,7 +407,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 58)
401
407
  end
402
408
  .,.,
403
409
 
404
- module_eval(<<'.,.,', 'robotstxt.ry', 64)
410
+ module_eval(<<'.,.,', 'robotstxt.ry', 66)
405
411
  def _reduce_21(val, _values, result)
406
412
  val[2].each_with_index { |line, i|
407
413
  warn "%s line %d: %s: orphan rule line" %
@@ -416,7 +422,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 64)
416
422
 
417
423
  # reduce 23 omitted
418
424
 
419
- module_eval(<<'.,.,', 'robotstxt.ry', 79)
425
+ module_eval(<<'.,.,', 'robotstxt.ry', 81)
420
426
  def _reduce_24(val, _values, result)
421
427
  result = Record.new(val[1], val[2])
422
428
 
@@ -424,7 +430,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 79)
424
430
  end
425
431
  .,.,
426
432
 
427
- module_eval(<<'.,.,', 'robotstxt.ry', 84)
433
+ module_eval(<<'.,.,', 'robotstxt.ry', 86)
428
434
  def _reduce_25(val, _values, result)
429
435
  result = [val[0]]
430
436
 
@@ -432,7 +438,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 84)
432
438
  end
433
439
  .,.,
434
440
 
435
- module_eval(<<'.,.,', 'robotstxt.ry', 89)
441
+ module_eval(<<'.,.,', 'robotstxt.ry', 91)
436
442
  def _reduce_26(val, _values, result)
437
443
  result << val[1]
438
444
 
@@ -442,7 +448,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 89)
442
448
 
443
449
  # reduce 27 omitted
444
450
 
445
- module_eval(<<'.,.,', 'robotstxt.ry', 96)
451
+ module_eval(<<'.,.,', 'robotstxt.ry', 98)
446
452
  def _reduce_28(val, _values, result)
447
453
  result = AgentLine.new(val[0], val[3])
448
454
 
@@ -454,7 +460,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 96)
454
460
 
455
461
  # reduce 30 omitted
456
462
 
457
- module_eval(<<'.,.,', 'robotstxt.ry', 104)
463
+ module_eval(<<'.,.,', 'robotstxt.ry', 106)
458
464
  def _reduce_31(val, _values, result)
459
465
  result = [result]
460
466
  @rulelinenos = []
@@ -463,7 +469,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 104)
463
469
  end
464
470
  .,.,
465
471
 
466
- module_eval(<<'.,.,', 'robotstxt.ry', 110)
472
+ module_eval(<<'.,.,', 'robotstxt.ry', 112)
467
473
  def _reduce_32(val, _values, result)
468
474
  result << val[1]
469
475
  @rulelinenos << @lineno
@@ -482,7 +488,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 110)
482
488
 
483
489
  # reduce 37 omitted
484
490
 
485
- module_eval(<<'.,.,', 'robotstxt.ry', 123)
491
+ module_eval(<<'.,.,', 'robotstxt.ry', 125)
486
492
  def _reduce_38(val, _values, result)
487
493
  result = AllowLine.new(val[0], val[3])
488
494
 
@@ -490,7 +496,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 123)
490
496
  end
491
497
  .,.,
492
498
 
493
- module_eval(<<'.,.,', 'robotstxt.ry', 128)
499
+ module_eval(<<'.,.,', 'robotstxt.ry', 130)
494
500
  def _reduce_39(val, _values, result)
495
501
  result = DisallowLine.new(val[0], val[3])
496
502
 
@@ -498,7 +504,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 128)
498
504
  end
499
505
  .,.,
500
506
 
501
- module_eval(<<'.,.,', 'robotstxt.ry', 133)
507
+ module_eval(<<'.,.,', 'robotstxt.ry', 135)
502
508
  def _reduce_40(val, _values, result)
503
509
  result = CrawlDelayLine.new(val[0], val[3])
504
510
 
@@ -506,7 +512,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 133)
506
512
  end
507
513
  .,.,
508
514
 
509
- module_eval(<<'.,.,', 'robotstxt.ry', 138)
515
+ module_eval(<<'.,.,', 'robotstxt.ry', 140)
510
516
  def _reduce_41(val, _values, result)
511
517
  result = ExtentionLine.new(val[0], val[3])
512
518
 
@@ -528,11 +534,12 @@ end # class Parser
528
534
  @timestamp = Time.now
529
535
  @site = site
530
536
  @options = options || {}
531
- @last_checked = nil
537
+ @last_checked_at = nil
532
538
 
533
539
  @error = @options[:error]
534
540
  @target = @options[:target]
535
541
  @sitemaps = @options[:sitemaps] || []
542
+ @crawl_delay_handler = @options[:crawl_delay_handler]
536
543
 
537
544
  if records && !records.empty?
538
545
  @records, defaults = [], []
@@ -578,14 +585,18 @@ end # class Parser
578
585
  def allow?(request_uri, user_agent = nil)
579
586
  record = find_record(user_agent) or return true
580
587
  allow = record.allow?(request_uri)
581
- if @last_checked and delay = record.delay
582
- delay -= Time.now - @last_checked
583
- sleep delay if delay > 0
588
+ if delay = record.delay and @crawl_delay_handler
589
+ @crawl_delay_handler.call(delay, @last_checked_at)
584
590
  end
585
- @last_checked = Time.now
591
+ @last_checked_at = Time.now
586
592
  return allow
587
593
  end
588
594
 
595
+ def crawl_delay(user_agent = nil)
596
+ record = find_record(user_agent) or return 0
597
+ record.delay or return 0
598
+ end
599
+
589
600
  def options(user_agent = nil)
590
601
  record = find_record(user_agent) or return {}
591
602
  record.options
@@ -11,7 +11,9 @@ rule
11
11
  {
12
12
  body = val[2]
13
13
  result = RobotsTxt.new(@site, body,
14
- :target => @target, :sitemaps => @sitemaps)
14
+ :target => @target,
15
+ :sitemaps => @sitemaps,
16
+ :crawl_delay_handler => @crawl_delay_handler)
15
17
  }
16
18
 
17
19
  body :
@@ -167,15 +169,19 @@ class WebRobots
167
169
  class RobotsTxt
168
170
  ---- inner
169
171
 
170
- def initialize(target = nil)
172
+ def initialize(target, crawl_delay_handler = nil)
171
173
  super()
172
174
  @target = target
175
+ @crawl_delay_handler = crawl_delay_handler
173
176
  end
174
177
 
175
178
  def parse!(input, site)
176
179
  parse(input, site)
177
180
  rescue Error => e
178
- RobotsTxt.new(site, nil, :error => e, :target => @target)
181
+ RobotsTxt.new(site, nil,
182
+ :error => e,
183
+ :target => @target,
184
+ :crawl_delay_handler => @crawl_delay_handler)
179
185
  end
180
186
 
181
187
  KNOWN_TOKENS = %w[User-agent Allow Disallow Crawl-delay Sitemap]
@@ -263,11 +269,12 @@ class WebRobots
263
269
  @timestamp = Time.now
264
270
  @site = site
265
271
  @options = options || {}
266
- @last_checked = nil
272
+ @last_checked_at = nil
267
273
 
268
274
  @error = @options[:error]
269
275
  @target = @options[:target]
270
276
  @sitemaps = @options[:sitemaps] || []
277
+ @crawl_delay_handler = @options[:crawl_delay_handler]
271
278
 
272
279
  if records && !records.empty?
273
280
  @records, defaults = [], []
@@ -313,14 +320,18 @@ class WebRobots
313
320
  def allow?(request_uri, user_agent = nil)
314
321
  record = find_record(user_agent) or return true
315
322
  allow = record.allow?(request_uri)
316
- if @last_checked and delay = record.delay
317
- delay -= Time.now - @last_checked
318
- sleep delay if delay > 0
323
+ if delay = record.delay and @crawl_delay_handler
324
+ @crawl_delay_handler.call(delay, @last_checked_at)
319
325
  end
320
- @last_checked = Time.now
326
+ @last_checked_at = Time.now
321
327
  return allow
322
328
  end
323
329
 
330
+ def crawl_delay(user_agent = nil)
331
+ record = find_record(user_agent) or return 0
332
+ record.delay or return 0
333
+ end
334
+
324
335
  def options(user_agent = nil)
325
336
  record = find_record(user_agent) or return {}
326
337
  record.options
@@ -0,0 +1,3 @@
1
+ module Webrobots
2
+ VERSION = "0.1.0"
3
+ end
@@ -384,6 +384,12 @@ Option1: Foo
384
384
  Option2: Hello
385
385
  Crawl-Delay: 1.5
386
386
 
387
+ User-Agent: HerBot
388
+ Disallow: /2heavy/
389
+ Allow: /2heavy/*.html
390
+ Option1: Baz
391
+ Option2: Qux
392
+
387
393
  User-Agent: *
388
394
  Disallow: /2heavy/
389
395
  Allow: /2heavy/*.html
@@ -400,6 +406,9 @@ Option3: Hi
400
406
  }
401
407
 
402
408
  @robots_mybot = WebRobots.new('MyBot', :http_get => http_get)
409
+ @robots_mybot_ignore = WebRobots.new('MyBot', :http_get => http_get, :crawl_delay => :ignore)
410
+ @robots_mybot_custom = WebRobots.new('MyBot', :http_get => http_get, :crawl_delay => proc { |*args| @delay_args = args })
411
+ @robots_herbot = WebRobots.new('HerBot', :http_get => http_get)
403
412
  @robots_hisbot = WebRobots.new('HisBot', :http_get => http_get)
404
413
  end
405
414
 
@@ -411,6 +420,27 @@ Option3: Hi
411
420
  assert_equal 'Hello', @robots_mybot.option('http://www.example.org/', 'Option2')
412
421
  assert_equal 'Hello', options['option2']
413
422
 
423
+ options = @robots_mybot_ignore.options('http://www.example.org/')
424
+ assert_equal 2, options.size
425
+ assert_equal 'Foo', @robots_mybot_ignore.option('http://www.example.org/', 'Option1')
426
+ assert_equal 'Foo', options['option1']
427
+ assert_equal 'Hello', @robots_mybot_ignore.option('http://www.example.org/', 'Option2')
428
+ assert_equal 'Hello', options['option2']
429
+
430
+ options = @robots_mybot_custom.options('http://www.example.org/')
431
+ assert_equal 2, options.size
432
+ assert_equal 'Foo', @robots_mybot_custom.option('http://www.example.org/', 'Option1')
433
+ assert_equal 'Foo', options['option1']
434
+ assert_equal 'Hello', @robots_mybot_custom.option('http://www.example.org/', 'Option2')
435
+ assert_equal 'Hello', options['option2']
436
+
437
+ options = @robots_herbot.options('http://www.example.org/')
438
+ assert_equal 2, options.size
439
+ assert_equal 'Baz', @robots_herbot.option('http://www.example.org/', 'Option1')
440
+ assert_equal 'Baz', options['option1']
441
+ assert_equal 'Qux', @robots_herbot.option('http://www.example.org/', 'Option2')
442
+ assert_equal 'Qux', options['option2']
443
+
414
444
  options = @robots_hisbot.options('http://www.example.org/')
415
445
  assert_equal 2, options.size
416
446
  assert_equal 'Bar', @robots_hisbot.option('http://www.example.org/', 'Option1')
@@ -422,11 +452,25 @@ Option3: Hi
422
452
  http://www.example.org/sitemap-host1.xml
423
453
  http://www.example.org/sitemap-host2.xml
424
454
  ], @robots_mybot.sitemaps('http://www.example.org/')
455
+ assert_equal %w[
456
+ http://www.example.org/sitemap-host1.xml
457
+ http://www.example.org/sitemap-host2.xml
458
+ ], @robots_mybot_ignore.sitemaps('http://www.example.org/')
459
+ assert_equal %w[
460
+ http://www.example.org/sitemap-host1.xml
461
+ http://www.example.org/sitemap-host2.xml
462
+ ], @robots_herbot.sitemaps('http://www.example.org/')
425
463
  assert_equal %w[
426
464
  http://www.example.org/sitemap-host1.xml
427
465
  http://www.example.org/sitemap-host2.xml
428
466
  ], @robots_hisbot.sitemaps('http://www.example.org/')
429
467
 
468
+ assert_equal 1.5, @robots_mybot.crawl_delay('http://www.example.org/')
469
+ assert_equal 1.5, @robots_mybot_ignore.crawl_delay('http://www.example.org/')
470
+ assert_equal 1.5, @robots_mybot_custom.crawl_delay('http://www.example.org/')
471
+ assert_equal 0, @robots_herbot.crawl_delay('http://www.example.org/')
472
+ assert_equal 0, @robots_hisbot.crawl_delay('http://www.example.org/')
473
+
430
474
  t1 = Time.now
431
475
  @robots_mybot.allowed?('http://www.example.org/')
432
476
  @robots_mybot.allowed?('http://www.example.org/article1.html')
@@ -435,6 +479,25 @@ Option3: Hi
435
479
  @robots_mybot.allowed?('http://www.example.org/article2.html')
436
480
  t3 = Time.now
437
481
  assert_in_delta 1.5, t3 - t2, 0.1
482
+
483
+ t1 = Time.now
484
+ @robots_mybot_ignore.allowed?('http://www.example.org/')
485
+ @robots_mybot_ignore.allowed?('http://www.example.org/article1.html')
486
+ t2 = Time.now
487
+ assert_in_delta 0, t2 - t1, 0.1
488
+ @robots_mybot_ignore.allowed?('http://www.example.org/article2.html')
489
+ t3 = Time.now
490
+ assert_in_delta 0, t3 - t2, 0.1
491
+
492
+ t1 = Time.now
493
+ @robots_mybot_custom.allowed?('http://www.example.org/')
494
+ @robots_mybot_custom.allowed?('http://www.example.org/article1.html')
495
+ t2 = Time.now
496
+ assert_in_delta 0, t2 - t1, 0.1
497
+ assert_instance_of Array, @delay_args
498
+ assert_equal 2, @delay_args.size
499
+ assert_equal 1.5, @delay_args[0]
500
+ assert_instance_of Time, @delay_args[1]
438
501
  end
439
502
  end
440
503
 
@@ -1,68 +1,33 @@
1
- # Generated by jeweler
2
- # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
1
  # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "webrobots/version"
5
4
 
6
5
  Gem::Specification.new do |s|
7
- s.name = "webrobots"
8
- s.version = "0.0.13"
6
+ s.name = "webrobots"
7
+ s.version = Webrobots::VERSION
8
+ s.authors = ["Akinori MUSHA"]
9
+ s.email = ["knu@idaemons.org"]
10
+ s.homepage = %q{https://github.com/knu/webrobots}
11
+ s.licenses = [%q{2-clause BSDL}]
12
+ s.summary = %q{A Ruby library to help write robots.txt compliant web robots}
13
+ s.description = <<-'EOS'
14
+ This library helps write robots.txt compliant web robots in Ruby.
15
+ EOS
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = s.files.grep(%r{/test_[^/]+\.rb$})
19
+ s.executables = s.files.grep(%r{^bin/[^.]}).map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
9
21
 
10
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Akinori MUSHA"]
12
- s.date = "2012-01-24"
13
- s.description = "This library helps write robots.txt compliant web robots in Ruby.\n"
14
- s.email = "knu@idaemons.org"
15
22
  s.extra_rdoc_files = [
16
23
  "LICENSE.txt",
17
24
  "README.rdoc"
18
25
  ]
19
- s.files = [
20
- ".document",
21
- "Gemfile",
22
- "Gemfile.lock",
23
- "LICENSE.txt",
24
- "README.rdoc",
25
- "Rakefile",
26
- "VERSION",
27
- "lib/webrobots.rb",
28
- "lib/webrobots/nokogiri.rb",
29
- "lib/webrobots/robotstxt.rb",
30
- "lib/webrobots/robotstxt.ry",
31
- "test/helper.rb",
32
- "test/test_webrobots.rb",
33
- "webrobots.gemspec"
34
- ]
35
- s.homepage = "https://github.com/knu/webrobots"
36
- s.licenses = ["2-clause BSDL"]
37
- s.require_paths = ["lib"]
38
- s.rubygems_version = "1.8.15"
39
- s.summary = "A Ruby library to help write robots.txt compliant web robots"
40
-
41
- if s.respond_to? :specification_version then
42
- s.specification_version = 3
43
26
 
44
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
- s.add_development_dependency(%q<racc>, [">= 0"])
46
- s.add_development_dependency(%q<shoulda>, [">= 0"])
47
- s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
48
- s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
49
- s.add_development_dependency(%q<rcov>, ["~> 0.9.11"])
50
- s.add_development_dependency(%q<nokogiri>, [">= 1.4.4"])
51
- else
52
- s.add_dependency(%q<racc>, [">= 0"])
53
- s.add_dependency(%q<shoulda>, [">= 0"])
54
- s.add_dependency(%q<bundler>, [">= 1.0.0"])
55
- s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
56
- s.add_dependency(%q<rcov>, ["~> 0.9.11"])
57
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
58
- end
59
- else
60
- s.add_dependency(%q<racc>, [">= 0"])
61
- s.add_dependency(%q<shoulda>, [">= 0"])
62
- s.add_dependency(%q<bundler>, [">= 1.0.0"])
63
- s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
64
- s.add_dependency(%q<rcov>, ["~> 0.9.11"])
65
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
66
- end
27
+ s.add_development_dependency("rake", [">= 0.9.2.2"])
28
+ s.add_development_dependency("racc", [">= 0"]) unless RUBY_PLATFORM == "java"
29
+ s.add_development_dependency("shoulda", [">= 0"])
30
+ s.add_development_dependency("rdoc", ["> 2.4.2"])
31
+ s.add_development_dependency("bundler", [">= 1.2"])
32
+ s.add_development_dependency("nokogiri", [">= 1.4.4"])
67
33
  end
68
-
metadata CHANGED
@@ -1,172 +1,162 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: webrobots
3
- version: !ruby/object:Gem::Version
4
- hash: 5
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 13
10
- version: 0.0.13
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Akinori MUSHA
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-01-24 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- requirement: &id001 !ruby/object:Gem::Requirement
12
+ date: 2013-02-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
22
17
  none: false
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- hash: 3
27
- segments:
28
- - 0
29
- version: "0"
30
- version_requirements: *id001
31
- name: racc
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.9.2.2
22
+ type: :development
32
23
  prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 0.9.2.2
30
+ - !ruby/object:Gem::Dependency
31
+ name: racc
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
33
38
  type: :development
34
- - !ruby/object:Gem::Dependency
35
- requirement: &id002 !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
36
41
  none: false
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- hash: 3
41
- segments:
42
- - 0
43
- version: "0"
44
- version_requirements: *id002
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
45
47
  name: shoulda
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
46
55
  prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rdoc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>'
68
+ - !ruby/object:Gem::Version
69
+ version: 2.4.2
47
70
  type: :development
48
- - !ruby/object:Gem::Dependency
49
- requirement: &id003 !ruby/object:Gem::Requirement
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
50
73
  none: false
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- hash: 23
55
- segments:
56
- - 1
57
- - 0
58
- - 0
59
- version: 1.0.0
60
- version_requirements: *id003
74
+ requirements:
75
+ - - ! '>'
76
+ - !ruby/object:Gem::Version
77
+ version: 2.4.2
78
+ - !ruby/object:Gem::Dependency
61
79
  name: bundler
62
- prerelease: false
63
- type: :development
64
- - !ruby/object:Gem::Dependency
65
- requirement: &id004 !ruby/object:Gem::Requirement
80
+ requirement: !ruby/object:Gem::Requirement
66
81
  none: false
67
- requirements:
68
- - - ~>
69
- - !ruby/object:Gem::Version
70
- hash: 7
71
- segments:
72
- - 1
73
- - 6
74
- - 4
75
- version: 1.6.4
76
- version_requirements: *id004
77
- name: jeweler
78
- prerelease: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '1.2'
79
86
  type: :development
80
- - !ruby/object:Gem::Dependency
81
- requirement: &id005 !ruby/object:Gem::Requirement
82
- none: false
83
- requirements:
84
- - - ~>
85
- - !ruby/object:Gem::Version
86
- hash: 45
87
- segments:
88
- - 0
89
- - 9
90
- - 11
91
- version: 0.9.11
92
- version_requirements: *id005
93
- name: rcov
94
87
  prerelease: false
95
- type: :development
96
- - !ruby/object:Gem::Dependency
97
- requirement: &id006 !ruby/object:Gem::Requirement
88
+ version_requirements: !ruby/object:Gem::Requirement
98
89
  none: false
99
- requirements:
100
- - - ">="
101
- - !ruby/object:Gem::Version
102
- hash: 15
103
- segments:
104
- - 1
105
- - 4
106
- - 4
107
- version: 1.4.4
108
- version_requirements: *id006
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '1.2'
94
+ - !ruby/object:Gem::Dependency
109
95
  name: nokogiri
110
- prerelease: false
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: 1.4.4
111
102
  type: :development
112
- description: |
113
- This library helps write robots.txt compliant web robots in Ruby.
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: 1.4.4
110
+ description: ! 'This library helps write robots.txt compliant web robots in Ruby.
114
111
 
115
- email: knu@idaemons.org
112
+ '
113
+ email:
114
+ - knu@idaemons.org
116
115
  executables: []
117
-
118
116
  extensions: []
119
-
120
- extra_rdoc_files:
117
+ extra_rdoc_files:
121
118
  - LICENSE.txt
122
119
  - README.rdoc
123
- files:
120
+ files:
124
121
  - .document
122
+ - .gitignore
123
+ - .travis.yml
125
124
  - Gemfile
126
- - Gemfile.lock
127
125
  - LICENSE.txt
128
126
  - README.rdoc
129
127
  - Rakefile
130
- - VERSION
131
128
  - lib/webrobots.rb
132
129
  - lib/webrobots/nokogiri.rb
133
130
  - lib/webrobots/robotstxt.rb
134
131
  - lib/webrobots/robotstxt.ry
132
+ - lib/webrobots/version.rb
135
133
  - test/helper.rb
136
134
  - test/test_webrobots.rb
137
135
  - webrobots.gemspec
138
136
  homepage: https://github.com/knu/webrobots
139
- licenses:
137
+ licenses:
140
138
  - 2-clause BSDL
141
139
  post_install_message:
142
140
  rdoc_options: []
143
-
144
- require_paths:
141
+ require_paths:
145
142
  - lib
146
- required_ruby_version: !ruby/object:Gem::Requirement
143
+ required_ruby_version: !ruby/object:Gem::Requirement
147
144
  none: false
148
- requirements:
149
- - - ">="
150
- - !ruby/object:Gem::Version
151
- hash: 3
152
- segments:
153
- - 0
154
- version: "0"
155
- required_rubygems_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ! '>='
147
+ - !ruby/object:Gem::Version
148
+ version: '0'
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
156
150
  none: false
157
- requirements:
158
- - - ">="
159
- - !ruby/object:Gem::Version
160
- hash: 3
161
- segments:
162
- - 0
163
- version: "0"
151
+ requirements:
152
+ - - ! '>='
153
+ - !ruby/object:Gem::Version
154
+ version: '0'
164
155
  requirements: []
165
-
166
156
  rubyforge_project:
167
- rubygems_version: 1.8.15
157
+ rubygems_version: 1.8.24
168
158
  signing_key:
169
159
  specification_version: 3
170
160
  summary: A Ruby library to help write robots.txt compliant web robots
171
- test_files: []
172
-
161
+ test_files:
162
+ - test/test_webrobots.rb
@@ -1,24 +0,0 @@
1
- GEM
2
- remote: http://rubygems.org/
3
- specs:
4
- git (1.2.5)
5
- jeweler (1.6.4)
6
- bundler (~> 1.0)
7
- git (>= 1.2.5)
8
- rake
9
- nokogiri (1.5.0)
10
- racc (1.4.7)
11
- rake (0.9.2.2)
12
- rcov (0.9.11)
13
- shoulda (2.11.3)
14
-
15
- PLATFORMS
16
- ruby
17
-
18
- DEPENDENCIES
19
- bundler (>= 1.0.0)
20
- jeweler (~> 1.6.4)
21
- nokogiri (>= 1.4.4)
22
- racc
23
- rcov (~> 0.9.11)
24
- shoulda
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.13