webrobots 0.0.10 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -10,6 +10,6 @@ group :development do
10
10
  gem "racc", ">= 0"
11
11
  gem "shoulda", ">= 0"
12
12
  gem "bundler", "~> 1.0.0"
13
- gem "jeweler", "~> 1.6.2"
13
+ gem "jeweler", "~> 1.6.4"
14
14
  gem "rcov", ">= 0"
15
15
  end
data/Gemfile.lock CHANGED
@@ -2,14 +2,14 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  git (1.2.5)
5
- jeweler (1.6.2)
5
+ jeweler (1.6.4)
6
6
  bundler (~> 1.0)
7
7
  git (>= 1.2.5)
8
8
  rake
9
- nokogiri (1.4.6)
9
+ nokogiri (1.5.0)
10
10
  racc (1.4.6)
11
11
  rake (0.9.2)
12
- rcov (0.9.9)
12
+ rcov (0.9.10)
13
13
  shoulda (2.11.3)
14
14
 
15
15
  PLATFORMS
@@ -17,7 +17,7 @@ PLATFORMS
17
17
 
18
18
  DEPENDENCIES
19
19
  bundler (~> 1.0.0)
20
- jeweler (~> 1.6.2)
20
+ jeweler (~> 1.6.4)
21
21
  nokogiri (>= 1.4.4)
22
22
  racc
23
23
  rcov
data/Rakefile CHANGED
@@ -15,7 +15,7 @@ require 'jeweler'
15
15
  Jeweler::Tasks.new do |gem|
16
16
  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
17
  gem.name = "webrobots"
18
- # gem.homepage = "http://github.com/knu/webrobots"
18
+ gem.homepage = "https://github.com/knu/webrobots"
19
19
  gem.license = "2-clause BSDL"
20
20
  gem.summary = %Q{A Ruby library to help write robots.txt compliant web robots}
21
21
  gem.description = <<-'EOS'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.10
1
+ 0.0.11
data/lib/webrobots.rb CHANGED
@@ -30,7 +30,12 @@ class WebRobots
30
30
 
31
31
  # :nodoc:
32
32
  def create_cache
33
- Hash.new # Must respond to [], []=, and delete.
33
+ Hash.new # Must respond to [], []=, delete and clear.
34
+ end
35
+
36
+ # Flushes robots.txt cache.
37
+ def flush_cache
38
+ @robotstxt.clear
34
39
  end
35
40
 
36
41
  # Returns the robot name initially given.
@@ -19,7 +19,7 @@ class WebRobots
19
19
  class RobotsTxt
20
20
  class Parser < Racc::Parser
21
21
 
22
- module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
22
+ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 158)
23
23
 
24
24
  def initialize(target = nil)
25
25
  super()
@@ -38,7 +38,7 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
38
38
  def parse(input, site)
39
39
  @q ||= []
40
40
  @errors = []
41
- @lineno = 1
41
+ @lineno = 0
42
42
  @site = site
43
43
 
44
44
  string = input.respond_to?(:read) ? input.read : input
@@ -46,6 +46,7 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
46
46
  value_expected = false
47
47
 
48
48
  until s.eos?
49
+ @lineno += 1 if s.bol?
49
50
  if t = s.scan(/[ \t]*(?:\r?\n|\z)/)
50
51
  if value_expected
51
52
  @q << [:VALUE, '']
@@ -115,70 +116,70 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
115
116
  ##### State transition tables begin ###
116
117
 
117
118
  racc_action_table = [
118
- 6, 13, -11, 17, 53, 6, -13, 37, 38, 39,
119
- 40, 13, -11, 17, 47, 28, 28, 37, 38, 39,
120
- 40, 13, -11, 17, 50, 51, 52, 37, 38, 39,
121
- 40, 13, -11, 17, 13, 54, 25, 37, 38, 39,
122
- 40, 13, -11, 17, 13, 13, -13, 13, -11, 17,
123
- 6, 13, -14, 17, 6, 13, 13, 17, 6, 13,
124
- 13, 17, 6, 13, 13, 17, 6, 13, 24, 17,
125
- 6, 13, 63, 17, 64, 65, 66, 67, 6, 10,
126
- 6, 7, 6 ]
119
+ 5, 12, -10, 16, 52, 40, -12, 36, 37, 38,
120
+ 39, 12, -10, 16, 46, 27, 27, 36, 37, 38,
121
+ 39, 12, -10, 16, 49, 50, 51, 36, 37, 38,
122
+ 39, 12, -10, 16, 12, 53, 24, 36, 37, 38,
123
+ 39, 12, -10, 16, 12, 12, -12, 12, -10, 16,
124
+ 60, 12, -13, 16, 60, 12, 12, 16, 60, 12,
125
+ 12, 16, 60, 12, 12, 16, 60, 12, 23, 16,
126
+ 60, 12, 62, 16, 63, 64, 65, 66, 5, 9,
127
+ 5, 6, 5 ]
127
128
 
128
129
  racc_action_check = [
129
- 22, 22, 22, 22, 40, 24, 22, 22, 22, 22,
130
- 22, 26, 26, 26, 28, 20, 26, 26, 26, 26,
131
- 26, 46, 46, 46, 37, 38, 39, 46, 46, 46,
132
- 46, 30, 30, 30, 25, 42, 17, 30, 30, 30,
133
- 30, 8, 8, 8, 47, 50, 8, 14, 14, 14,
134
- 63, 63, 14, 63, 54, 54, 51, 54, 64, 64,
135
- 52, 64, 65, 65, 53, 65, 66, 66, 16, 66,
136
- 67, 67, 55, 67, 56, 57, 58, 59, 12, 7,
130
+ 21, 21, 21, 21, 39, 23, 21, 21, 21, 21,
131
+ 21, 25, 25, 25, 27, 19, 25, 25, 25, 25,
132
+ 25, 45, 45, 45, 36, 37, 38, 45, 45, 45,
133
+ 45, 29, 29, 29, 24, 41, 16, 29, 29, 29,
134
+ 29, 7, 7, 7, 46, 49, 7, 13, 13, 13,
135
+ 62, 62, 13, 62, 53, 53, 50, 53, 63, 63,
136
+ 51, 63, 64, 64, 52, 64, 65, 65, 15, 65,
137
+ 66, 66, 54, 66, 55, 56, 57, 58, 11, 6,
137
138
  3, 1, 0 ]
138
139
 
139
140
  racc_action_pointer = [
140
- 80, 81, nil, 78, nil, nil, nil, 79, 38, nil,
141
- nil, nil, 76, nil, 44, nil, 64, 30, nil, nil,
142
- 7, nil, -2, nil, 3, 31, 8, nil, 8, nil,
143
- 28, nil, nil, nil, nil, nil, nil, 18, 19, 20,
144
- -2, nil, 28, nil, nil, nil, 18, 41, nil, nil,
145
- 42, 53, 57, 61, 52, 65, 67, 68, 69, 70,
146
- nil, nil, nil, 48, 56, 60, 64, 68, nil, nil,
147
- nil, nil, nil ]
141
+ 80, 81, nil, 78, nil, nil, 79, 38, nil, nil,
142
+ nil, 76, nil, 44, nil, 64, 30, nil, nil, 7,
143
+ nil, -2, nil, 3, 31, 8, nil, 8, nil, 28,
144
+ nil, nil, nil, nil, nil, nil, 18, 19, 20, -2,
145
+ nil, 28, nil, nil, nil, 18, 41, nil, nil, 42,
146
+ 53, 57, 61, 52, 65, 67, 68, 69, 70, nil,
147
+ nil, nil, 48, 56, 60, 64, 68, nil, nil, nil,
148
+ nil, nil ]
148
149
 
149
150
  racc_action_default = [
150
- -5, -45, -1, -6, -7, -9, -10, -45, -3, -8,
151
- 73, -2, -5, -12, -24, -15, -45, -45, -19, -20,
152
- -45, -4, -6, -16, -45, -11, -30, -26, -45, -21,
153
- -22, -23, -32, -35, -36, -37, -38, -45, -45, -45,
154
- -45, -17, -45, -25, -27, -28, -31, -11, -33, -34,
155
- -11, -11, -11, -11, -11, -45, -45, -45, -45, -45,
156
- -18, -43, -44, -11, -11, -11, -11, -11, -29, -39,
157
- -40, -41, -42 ]
151
+ -5, -44, -1, -6, -7, -9, -44, -3, -8, 72,
152
+ -2, -5, -11, -23, -14, -44, -44, -18, -19, -44,
153
+ -4, -6, -15, -44, -10, -29, -25, -44, -20, -21,
154
+ -22, -31, -34, -35, -36, -37, -44, -44, -44, -44,
155
+ -16, -44, -24, -26, -27, -30, -10, -32, -33, -10,
156
+ -10, -10, -10, -10, -44, -44, -44, -44, -44, -17,
157
+ -42, -43, -10, -10, -10, -10, -10, -28, -38, -39,
158
+ -40, -41 ]
158
159
 
159
160
  racc_goto_table = [
160
- 15, 42, 9, 48, 3, 12, 23, 11, 5, 27,
161
- 18, 5, 26, 2, 15, 44, 22, 19, 45, 48,
162
- 5, 9, 49, 55, 29, 21, 56, 57, 58, 59,
163
- 5, 31, 41, 60, 43, 30, 8, 1, 49, 46,
164
- nil, nil, 68, 69, 70, 71, 72 ]
161
+ 14, 41, 8, 47, 3, 2, 22, 17, 29, 11,
162
+ 18, 26, 45, 10, 14, 21, 20, 43, 44, 47,
163
+ 8, 28, 48, 54, 30, 25, 55, 56, 57, 58,
164
+ 59, 42, 7, 1, nil, nil, nil, nil, 48, 67,
165
+ 68, 69, 70, 71 ]
165
166
 
166
167
  racc_goto_check = [
167
- 12, 9, 7, 20, 6, 5, 12, 3, 8, 19,
168
- 14, 8, 17, 2, 12, 19, 6, 15, 12, 20,
169
- 8, 7, 12, 9, 14, 2, 9, 9, 9, 9,
170
- 8, 15, 8, 13, 18, 16, 4, 1, 12, 16,
171
- nil, nil, 13, 13, 13, 13, 13 ]
168
+ 11, 8, 7, 19, 6, 2, 11, 13, 15, 5,
169
+ 14, 18, 15, 3, 11, 6, 2, 18, 11, 19,
170
+ 7, 13, 11, 8, 14, 16, 8, 8, 8, 8,
171
+ 12, 17, 4, 1, nil, nil, nil, nil, 11, 12,
172
+ 12, 12, 12, 12 ]
172
173
 
173
174
  racc_goto_pointer = [
174
- nil, 37, 13, -1, 34, -3, 4, -1, 8, -24,
175
- nil, nil, -8, -21, 2, 9, 13, -8, 8, -11,
176
- -27, nil, nil, nil, nil ]
175
+ nil, 33, 5, 6, 30, 2, 4, -1, -23, nil,
176
+ nil, -7, -23, 0, 3, -13, 6, 6, -8, -26,
177
+ nil, nil, nil, nil ]
177
178
 
178
179
  racc_goto_default = [
179
- nil, nil, nil, nil, nil, nil, nil, 4, 61, 16,
180
- 20, 14, 62, nil, nil, nil, nil, nil, nil, nil,
181
- 32, 33, 34, 35, 36 ]
180
+ nil, nil, nil, nil, nil, nil, nil, 4, 15, 19,
181
+ 13, 61, nil, nil, nil, nil, nil, nil, nil, 31,
182
+ 32, 33, 34, 35 ]
182
183
 
183
184
  racc_reduce_table = [
184
185
  0, 0, :racc_error,
@@ -191,45 +192,44 @@ racc_reduce_table = [
191
192
  1, 19, :_reduce_none,
192
193
  2, 19, :_reduce_none,
193
194
  1, 20, :_reduce_none,
194
- 1, 21, :_reduce_10,
195
+ 0, 21, :_reduce_none,
196
+ 1, 21, :_reduce_none,
195
197
  0, 22, :_reduce_none,
196
198
  1, 22, :_reduce_none,
197
- 0, 23, :_reduce_none,
198
199
  1, 23, :_reduce_none,
199
- 1, 24, :_reduce_none,
200
- 2, 24, :_reduce_none,
201
- 3, 25, :_reduce_none,
202
- 5, 25, :_reduce_18,
200
+ 2, 23, :_reduce_none,
201
+ 3, 24, :_reduce_none,
202
+ 5, 24, :_reduce_17,
203
+ 1, 18, :_reduce_18,
203
204
  1, 18, :_reduce_19,
204
- 1, 18, :_reduce_20,
205
+ 3, 18, :_reduce_20,
205
206
  3, 18, :_reduce_21,
206
- 3, 18, :_reduce_22,
207
207
  3, 18, :_reduce_none,
208
- 1, 28, :_reduce_none,
209
- 3, 27, :_reduce_25,
210
- 1, 30, :_reduce_26,
211
- 2, 30, :_reduce_27,
212
- 2, 30, :_reduce_none,
213
- 5, 32, :_reduce_29,
214
- 0, 31, :_reduce_none,
215
- 1, 31, :_reduce_none,
216
- 1, 29, :_reduce_32,
217
- 2, 29, :_reduce_33,
208
+ 1, 27, :_reduce_none,
209
+ 3, 26, :_reduce_24,
210
+ 1, 29, :_reduce_25,
211
+ 2, 29, :_reduce_26,
218
212
  2, 29, :_reduce_none,
219
- 1, 33, :_reduce_none,
220
- 1, 33, :_reduce_none,
221
- 1, 33, :_reduce_none,
222
- 1, 33, :_reduce_none,
213
+ 5, 31, :_reduce_28,
214
+ 0, 30, :_reduce_none,
215
+ 1, 30, :_reduce_none,
216
+ 1, 28, :_reduce_31,
217
+ 2, 28, :_reduce_32,
218
+ 2, 28, :_reduce_none,
219
+ 1, 32, :_reduce_none,
220
+ 1, 32, :_reduce_none,
221
+ 1, 32, :_reduce_none,
222
+ 1, 32, :_reduce_none,
223
+ 5, 33, :_reduce_38,
223
224
  5, 34, :_reduce_39,
224
225
  5, 35, :_reduce_40,
225
226
  5, 36, :_reduce_41,
226
- 5, 37, :_reduce_42,
227
- 1, 26, :_reduce_none,
228
- 1, 26, :_reduce_none ]
227
+ 1, 25, :_reduce_none,
228
+ 1, 25, :_reduce_none ]
229
229
 
230
- racc_reduce_n = 45
230
+ racc_reduce_n = 44
231
231
 
232
- racc_shift_n = 73
232
+ racc_shift_n = 72
233
233
 
234
234
  racc_token_table = {
235
235
  false => 0,
@@ -288,7 +288,6 @@ Racc_token_to_s_table = [
288
288
  "records",
289
289
  "blanklines",
290
290
  "blankline",
291
- "eol",
292
291
  "opt_space",
293
292
  "opt_commentlines",
294
293
  "commentlines",
@@ -344,13 +343,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 11)
344
343
 
345
344
  # reduce 9 omitted
346
345
 
347
- module_eval(<<'.,.,', 'robotstxt.ry', 31)
348
- def _reduce_10(val, _values, result)
349
- @lineno += 1
350
-
351
- result
352
- end
353
- .,.,
346
+ # reduce 10 omitted
354
347
 
355
348
  # reduce 11 omitted
356
349
 
@@ -364,18 +357,16 @@ module_eval(<<'.,.,', 'robotstxt.ry', 31)
364
357
 
365
358
  # reduce 16 omitted
366
359
 
367
- # reduce 17 omitted
368
-
369
- module_eval(<<'.,.,', 'robotstxt.ry', 47)
370
- def _reduce_18(val, _values, result)
360
+ module_eval(<<'.,.,', 'robotstxt.ry', 42)
361
+ def _reduce_17(val, _values, result)
371
362
  @sitemaps << val[3]
372
363
 
373
364
  result
374
365
  end
375
366
  .,.,
376
367
 
377
- module_eval(<<'.,.,', 'robotstxt.ry', 52)
378
- def _reduce_19(val, _values, result)
368
+ module_eval(<<'.,.,', 'robotstxt.ry', 47)
369
+ def _reduce_18(val, _values, result)
379
370
  result = []
380
371
  result << val[0]
381
372
 
@@ -383,24 +374,24 @@ module_eval(<<'.,.,', 'robotstxt.ry', 52)
383
374
  end
384
375
  .,.,
385
376
 
386
- module_eval(<<'.,.,', 'robotstxt.ry', 57)
387
- def _reduce_20(val, _values, result)
377
+ module_eval(<<'.,.,', 'robotstxt.ry', 52)
378
+ def _reduce_19(val, _values, result)
388
379
  result = []
389
380
 
390
381
  result
391
382
  end
392
383
  .,.,
393
384
 
394
- module_eval(<<'.,.,', 'robotstxt.ry', 63)
395
- def _reduce_21(val, _values, result)
385
+ module_eval(<<'.,.,', 'robotstxt.ry', 58)
386
+ def _reduce_20(val, _values, result)
396
387
  result << val[2]
397
388
 
398
389
  result
399
390
  end
400
391
  .,.,
401
392
 
402
- module_eval(<<'.,.,', 'robotstxt.ry', 69)
403
- def _reduce_22(val, _values, result)
393
+ module_eval(<<'.,.,', 'robotstxt.ry', 64)
394
+ def _reduce_21(val, _values, result)
404
395
  val[2].each_with_index { |line, i|
405
396
  warn "%s line %d: %s: orphan rule line" %
406
397
  [@site.to_s, @rulelinenos[i], line.token] if $VERBOSE
@@ -410,50 +401,50 @@ module_eval(<<'.,.,', 'robotstxt.ry', 69)
410
401
  end
411
402
  .,.,
412
403
 
413
- # reduce 23 omitted
404
+ # reduce 22 omitted
414
405
 
415
- # reduce 24 omitted
406
+ # reduce 23 omitted
416
407
 
417
- module_eval(<<'.,.,', 'robotstxt.ry', 84)
418
- def _reduce_25(val, _values, result)
408
+ module_eval(<<'.,.,', 'robotstxt.ry', 79)
409
+ def _reduce_24(val, _values, result)
419
410
  result = Record.new(val[1], val[2])
420
411
 
421
412
  result
422
413
  end
423
414
  .,.,
424
415
 
425
- module_eval(<<'.,.,', 'robotstxt.ry', 89)
426
- def _reduce_26(val, _values, result)
416
+ module_eval(<<'.,.,', 'robotstxt.ry', 84)
417
+ def _reduce_25(val, _values, result)
427
418
  result = [val[0]]
428
419
 
429
420
  result
430
421
  end
431
422
  .,.,
432
423
 
433
- module_eval(<<'.,.,', 'robotstxt.ry', 94)
434
- def _reduce_27(val, _values, result)
424
+ module_eval(<<'.,.,', 'robotstxt.ry', 89)
425
+ def _reduce_26(val, _values, result)
435
426
  result << val[1]
436
427
 
437
428
  result
438
429
  end
439
430
  .,.,
440
431
 
441
- # reduce 28 omitted
432
+ # reduce 27 omitted
442
433
 
443
- module_eval(<<'.,.,', 'robotstxt.ry', 101)
444
- def _reduce_29(val, _values, result)
434
+ module_eval(<<'.,.,', 'robotstxt.ry', 96)
435
+ def _reduce_28(val, _values, result)
445
436
  result = AgentLine.new(val[0], val[3])
446
437
 
447
438
  result
448
439
  end
449
440
  .,.,
450
441
 
451
- # reduce 30 omitted
442
+ # reduce 29 omitted
452
443
 
453
- # reduce 31 omitted
444
+ # reduce 30 omitted
454
445
 
455
- module_eval(<<'.,.,', 'robotstxt.ry', 109)
456
- def _reduce_32(val, _values, result)
446
+ module_eval(<<'.,.,', 'robotstxt.ry', 104)
447
+ def _reduce_31(val, _values, result)
457
448
  result = [result]
458
449
  @rulelinenos = []
459
450
 
@@ -461,8 +452,8 @@ module_eval(<<'.,.,', 'robotstxt.ry', 109)
461
452
  end
462
453
  .,.,
463
454
 
464
- module_eval(<<'.,.,', 'robotstxt.ry', 115)
465
- def _reduce_33(val, _values, result)
455
+ module_eval(<<'.,.,', 'robotstxt.ry', 110)
456
+ def _reduce_32(val, _values, result)
466
457
  result << val[1]
467
458
  @rulelinenos << @lineno
468
459
 
@@ -470,6 +461,8 @@ module_eval(<<'.,.,', 'robotstxt.ry', 115)
470
461
  end
471
462
  .,.,
472
463
 
464
+ # reduce 33 omitted
465
+
473
466
  # reduce 34 omitted
474
467
 
475
468
  # reduce 35 omitted
@@ -478,43 +471,41 @@ module_eval(<<'.,.,', 'robotstxt.ry', 115)
478
471
 
479
472
  # reduce 37 omitted
480
473
 
481
- # reduce 38 omitted
482
-
483
- module_eval(<<'.,.,', 'robotstxt.ry', 128)
484
- def _reduce_39(val, _values, result)
474
+ module_eval(<<'.,.,', 'robotstxt.ry', 123)
475
+ def _reduce_38(val, _values, result)
485
476
  result = AllowLine.new(val[0], val[3])
486
477
 
487
478
  result
488
479
  end
489
480
  .,.,
490
481
 
491
- module_eval(<<'.,.,', 'robotstxt.ry', 133)
492
- def _reduce_40(val, _values, result)
482
+ module_eval(<<'.,.,', 'robotstxt.ry', 128)
483
+ def _reduce_39(val, _values, result)
493
484
  result = DisallowLine.new(val[0], val[3])
494
485
 
495
486
  result
496
487
  end
497
488
  .,.,
498
489
 
499
- module_eval(<<'.,.,', 'robotstxt.ry', 138)
500
- def _reduce_41(val, _values, result)
490
+ module_eval(<<'.,.,', 'robotstxt.ry', 133)
491
+ def _reduce_40(val, _values, result)
501
492
  result = CrawlDelayLine.new(val[0], val[3])
502
493
 
503
494
  result
504
495
  end
505
496
  .,.,
506
497
 
507
- module_eval(<<'.,.,', 'robotstxt.ry', 143)
508
- def _reduce_42(val, _values, result)
498
+ module_eval(<<'.,.,', 'robotstxt.ry', 138)
499
+ def _reduce_41(val, _values, result)
509
500
  result = ExtentionLine.new(val[0], val[3])
510
501
 
511
502
  result
512
503
  end
513
504
  .,.,
514
505
 
515
- # reduce 43 omitted
506
+ # reduce 42 omitted
516
507
 
517
- # reduce 44 omitted
508
+ # reduce 43 omitted
518
509
 
519
510
  def _reduce_none(val, _values, result)
520
511
  val[0]
@@ -25,12 +25,7 @@ rule
25
25
  | blanklines
26
26
  blankline
27
27
 
28
- blankline : eol
29
-
30
- eol : EOL
31
- {
32
- @lineno += 1
33
- }
28
+ blankline : EOL
34
29
 
35
30
  opt_space :
36
31
  | SPACE
@@ -42,7 +37,7 @@ rule
42
37
  | commentlines
43
38
  comment
44
39
 
45
- comment : opt_space COMMENT eol
40
+ comment : opt_space COMMENT EOL
46
41
  | 'sitemap' ':' opt_space VALUE eol_opt_comment
47
42
  {
48
43
  @sitemaps << val[3]
@@ -144,7 +139,7 @@ rule
144
139
  result = ExtentionLine.new(val[0], val[3])
145
140
  }
146
141
 
147
- eol_opt_comment : eol
142
+ eol_opt_comment : EOL
148
143
  | comment
149
144
 
150
145
  ---- header
@@ -178,7 +173,7 @@ class WebRobots
178
173
  def parse(input, site)
179
174
  @q ||= []
180
175
  @errors = []
181
- @lineno = 1
176
+ @lineno = 0
182
177
  @site = site
183
178
 
184
179
  string = input.respond_to?(:read) ? input.read : input
@@ -186,6 +181,7 @@ class WebRobots
186
181
  value_expected = false
187
182
 
188
183
  until s.eos?
184
+ @lineno += 1 if s.bol?
189
185
  if t = s.scan(/[ \t]*(?:\r?\n|\z)/)
190
186
  if value_expected
191
187
  @q << [:VALUE, '']
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'helper'
2
3
 
3
4
  class TestWebRobots < Test::Unit::TestCase
@@ -582,4 +583,76 @@ TXT
582
583
  end
583
584
  end
584
585
 
586
+ context "robots.txt cache" do
587
+ setup do
588
+ @fetched = false
589
+ @robots = WebRobots.new('RandomBot', :http_get => lambda { |uri|
590
+ case uri.to_s
591
+ when 'http://site1.example.org/robots.txt'
592
+ @fetched = true
593
+ <<-'TXT'
594
+ User-Agent: *
595
+ Disallow: /foo
596
+ TXT
597
+ when 'http://site2.example.org/robots.txt'
598
+ @fetched = true
599
+ nil
600
+ end
601
+ })
602
+ end
603
+
604
+ should "persist unless cache is cleared" do
605
+ assert !@fetched
606
+ assert !@robots.allowed?('http://site1.example.org/foo')
607
+ assert @fetched
608
+
609
+ @fetched = false
610
+ assert @robots.allowed?('http://site1.example.org/bar')
611
+ assert !@fetched
612
+ assert @robots.allowed?('http://site1.example.org/baz')
613
+ assert !@fetched
614
+ assert !@robots.allowed?('http://site1.example.org/foo')
615
+ assert !@fetched
616
+
617
+ @robots.flush_cache
618
+ assert !@fetched
619
+ assert !@robots.allowed?('http://site1.example.org/foo')
620
+ assert @fetched
621
+
622
+ @fetched = false
623
+ assert @robots.allowed?('http://site1.example.org/bar')
624
+ assert !@fetched
625
+ assert @robots.allowed?('http://site1.example.org/baz')
626
+ assert !@fetched
627
+ assert !@robots.allowed?('http://site1.example.org/foo')
628
+ assert !@fetched
629
+ end
630
+
631
+ should "persist for non-existent robots.txt unless cache is cleared" do
632
+ assert !@fetched
633
+ assert !@robots.allowed?('http://site2.example.org/foo')
634
+ assert @fetched
635
+
636
+ @fetched = false
637
+ assert !@robots.allowed?('http://site2.example.org/bar')
638
+ assert !@fetched
639
+ assert !@robots.allowed?('http://site2.example.org/baz')
640
+ assert !@fetched
641
+ assert !@robots.allowed?('http://site2.example.org/foo')
642
+ assert !@fetched
643
+
644
+ @robots.flush_cache
645
+ assert !@fetched
646
+ assert !@robots.allowed?('http://site2.example.org/foo')
647
+ assert @fetched
648
+
649
+ @fetched = false
650
+ assert !@robots.allowed?('http://site2.example.org/bar')
651
+ assert !@fetched
652
+ assert !@robots.allowed?('http://site2.example.org/baz')
653
+ assert !@fetched
654
+ assert !@robots.allowed?('http://site2.example.org/foo')
655
+ assert !@fetched
656
+ end
657
+ end
585
658
  end
data/webrobots.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{webrobots}
8
- s.version = "0.0.10"
8
+ s.version = "0.0.11"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = [%q{Akinori MUSHA}]
12
- s.date = %q{2011-07-01}
12
+ s.date = %q{2011-08-10}
13
13
  s.description = %q{This library helps write robots.txt compliant web robots in Ruby.
14
14
  }
15
15
  s.email = %q{knu@idaemons.org}
@@ -33,9 +33,10 @@ Gem::Specification.new do |s|
33
33
  "test/test_webrobots.rb",
34
34
  "webrobots.gemspec"
35
35
  ]
36
+ s.homepage = %q{https://github.com/knu/webrobots}
36
37
  s.licenses = [%q{2-clause BSDL}]
37
38
  s.require_paths = [%q{lib}]
38
- s.rubygems_version = %q{1.8.5}
39
+ s.rubygems_version = %q{1.8.7}
39
40
  s.summary = %q{A Ruby library to help write robots.txt compliant web robots}
40
41
 
41
42
  if s.respond_to? :specification_version then
@@ -46,14 +47,14 @@ Gem::Specification.new do |s|
46
47
  s.add_development_dependency(%q<racc>, [">= 0"])
47
48
  s.add_development_dependency(%q<shoulda>, [">= 0"])
48
49
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
49
- s.add_development_dependency(%q<jeweler>, ["~> 1.6.2"])
50
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
50
51
  s.add_development_dependency(%q<rcov>, [">= 0"])
51
52
  else
52
53
  s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
53
54
  s.add_dependency(%q<racc>, [">= 0"])
54
55
  s.add_dependency(%q<shoulda>, [">= 0"])
55
56
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
56
- s.add_dependency(%q<jeweler>, ["~> 1.6.2"])
57
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
57
58
  s.add_dependency(%q<rcov>, [">= 0"])
58
59
  end
59
60
  else
@@ -61,7 +62,7 @@ Gem::Specification.new do |s|
61
62
  s.add_dependency(%q<racc>, [">= 0"])
62
63
  s.add_dependency(%q<shoulda>, [">= 0"])
63
64
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
64
- s.add_dependency(%q<jeweler>, ["~> 1.6.2"])
65
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
65
66
  s.add_dependency(%q<rcov>, [">= 0"])
66
67
  end
67
68
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webrobots
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-07-01 00:00:00.000000000Z
12
+ date: 2011-08-10 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &17196464340 !ruby/object:Gem::Requirement
16
+ requirement: &70285160147560 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.4.4
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *17196464340
24
+ version_requirements: *70285160147560
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: racc
27
- requirement: &17196463180 !ruby/object:Gem::Requirement
27
+ requirement: &70285160147080 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *17196463180
35
+ version_requirements: *70285160147080
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: shoulda
38
- requirement: &17196462340 !ruby/object:Gem::Requirement
38
+ requirement: &70285160146600 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *17196462340
46
+ version_requirements: *70285160146600
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: bundler
49
- requirement: &17196461320 !ruby/object:Gem::Requirement
49
+ requirement: &70285160146120 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,21 +54,21 @@ dependencies:
54
54
  version: 1.0.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *17196461320
57
+ version_requirements: *70285160146120
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: jeweler
60
- requirement: &17196460320 !ruby/object:Gem::Requirement
60
+ requirement: &70285160145640 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
64
64
  - !ruby/object:Gem::Version
65
- version: 1.6.2
65
+ version: 1.6.4
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *17196460320
68
+ version_requirements: *70285160145640
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rcov
71
- requirement: &17196459400 !ruby/object:Gem::Requirement
71
+ requirement: &70285160145160 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *17196459400
79
+ version_requirements: *70285160145160
80
80
  description: ! 'This library helps write robots.txt compliant web robots in Ruby.
81
81
 
82
82
  '
@@ -101,7 +101,7 @@ files:
101
101
  - test/helper.rb
102
102
  - test/test_webrobots.rb
103
103
  - webrobots.gemspec
104
- homepage:
104
+ homepage: https://github.com/knu/webrobots
105
105
  licenses:
106
106
  - 2-clause BSDL
107
107
  post_install_message:
@@ -116,7 +116,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
116
116
  version: '0'
117
117
  segments:
118
118
  - 0
119
- hash: 1141598142888730213
119
+ hash: 3895009630851215598
120
120
  required_rubygems_version: !ruby/object:Gem::Requirement
121
121
  none: false
122
122
  requirements:
@@ -125,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  version: '0'
126
126
  requirements: []
127
127
  rubyforge_project:
128
- rubygems_version: 1.8.5
128
+ rubygems_version: 1.8.7
129
129
  signing_key:
130
130
  specification_version: 3
131
131
  summary: A Ruby library to help write robots.txt compliant web robots