webrobots 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -10,6 +10,6 @@ group :development do
10
10
  gem "racc", ">= 0"
11
11
  gem "shoulda", ">= 0"
12
12
  gem "bundler", "~> 1.0.0"
13
- gem "jeweler", "~> 1.6.2"
13
+ gem "jeweler", "~> 1.6.4"
14
14
  gem "rcov", ">= 0"
15
15
  end
data/Gemfile.lock CHANGED
@@ -2,14 +2,14 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  git (1.2.5)
5
- jeweler (1.6.2)
5
+ jeweler (1.6.4)
6
6
  bundler (~> 1.0)
7
7
  git (>= 1.2.5)
8
8
  rake
9
- nokogiri (1.4.6)
9
+ nokogiri (1.5.0)
10
10
  racc (1.4.6)
11
11
  rake (0.9.2)
12
- rcov (0.9.9)
12
+ rcov (0.9.10)
13
13
  shoulda (2.11.3)
14
14
 
15
15
  PLATFORMS
@@ -17,7 +17,7 @@ PLATFORMS
17
17
 
18
18
  DEPENDENCIES
19
19
  bundler (~> 1.0.0)
20
- jeweler (~> 1.6.2)
20
+ jeweler (~> 1.6.4)
21
21
  nokogiri (>= 1.4.4)
22
22
  racc
23
23
  rcov
data/Rakefile CHANGED
@@ -15,7 +15,7 @@ require 'jeweler'
15
15
  Jeweler::Tasks.new do |gem|
16
16
  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
17
  gem.name = "webrobots"
18
- # gem.homepage = "http://github.com/knu/webrobots"
18
+ gem.homepage = "https://github.com/knu/webrobots"
19
19
  gem.license = "2-clause BSDL"
20
20
  gem.summary = %Q{A Ruby library to help write robots.txt compliant web robots}
21
21
  gem.description = <<-'EOS'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.10
1
+ 0.0.11
data/lib/webrobots.rb CHANGED
@@ -30,7 +30,12 @@ class WebRobots
30
30
 
31
31
  # :nodoc:
32
32
  def create_cache
33
- Hash.new # Must respond to [], []=, and delete.
33
+ Hash.new # Must respond to [], []=, delete and clear.
34
+ end
35
+
36
+ # Flushes robots.txt cache.
37
+ def flush_cache
38
+ @robotstxt.clear
34
39
  end
35
40
 
36
41
  # Returns the robot name initially given.
@@ -19,7 +19,7 @@ class WebRobots
19
19
  class RobotsTxt
20
20
  class Parser < Racc::Parser
21
21
 
22
- module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
22
+ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 158)
23
23
 
24
24
  def initialize(target = nil)
25
25
  super()
@@ -38,7 +38,7 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
38
38
  def parse(input, site)
39
39
  @q ||= []
40
40
  @errors = []
41
- @lineno = 1
41
+ @lineno = 0
42
42
  @site = site
43
43
 
44
44
  string = input.respond_to?(:read) ? input.read : input
@@ -46,6 +46,7 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
46
46
  value_expected = false
47
47
 
48
48
  until s.eos?
49
+ @lineno += 1 if s.bol?
49
50
  if t = s.scan(/[ \t]*(?:\r?\n|\z)/)
50
51
  if value_expected
51
52
  @q << [:VALUE, '']
@@ -115,70 +116,70 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
115
116
  ##### State transition tables begin ###
116
117
 
117
118
  racc_action_table = [
118
- 6, 13, -11, 17, 53, 6, -13, 37, 38, 39,
119
- 40, 13, -11, 17, 47, 28, 28, 37, 38, 39,
120
- 40, 13, -11, 17, 50, 51, 52, 37, 38, 39,
121
- 40, 13, -11, 17, 13, 54, 25, 37, 38, 39,
122
- 40, 13, -11, 17, 13, 13, -13, 13, -11, 17,
123
- 6, 13, -14, 17, 6, 13, 13, 17, 6, 13,
124
- 13, 17, 6, 13, 13, 17, 6, 13, 24, 17,
125
- 6, 13, 63, 17, 64, 65, 66, 67, 6, 10,
126
- 6, 7, 6 ]
119
+ 5, 12, -10, 16, 52, 40, -12, 36, 37, 38,
120
+ 39, 12, -10, 16, 46, 27, 27, 36, 37, 38,
121
+ 39, 12, -10, 16, 49, 50, 51, 36, 37, 38,
122
+ 39, 12, -10, 16, 12, 53, 24, 36, 37, 38,
123
+ 39, 12, -10, 16, 12, 12, -12, 12, -10, 16,
124
+ 60, 12, -13, 16, 60, 12, 12, 16, 60, 12,
125
+ 12, 16, 60, 12, 12, 16, 60, 12, 23, 16,
126
+ 60, 12, 62, 16, 63, 64, 65, 66, 5, 9,
127
+ 5, 6, 5 ]
127
128
 
128
129
  racc_action_check = [
129
- 22, 22, 22, 22, 40, 24, 22, 22, 22, 22,
130
- 22, 26, 26, 26, 28, 20, 26, 26, 26, 26,
131
- 26, 46, 46, 46, 37, 38, 39, 46, 46, 46,
132
- 46, 30, 30, 30, 25, 42, 17, 30, 30, 30,
133
- 30, 8, 8, 8, 47, 50, 8, 14, 14, 14,
134
- 63, 63, 14, 63, 54, 54, 51, 54, 64, 64,
135
- 52, 64, 65, 65, 53, 65, 66, 66, 16, 66,
136
- 67, 67, 55, 67, 56, 57, 58, 59, 12, 7,
130
+ 21, 21, 21, 21, 39, 23, 21, 21, 21, 21,
131
+ 21, 25, 25, 25, 27, 19, 25, 25, 25, 25,
132
+ 25, 45, 45, 45, 36, 37, 38, 45, 45, 45,
133
+ 45, 29, 29, 29, 24, 41, 16, 29, 29, 29,
134
+ 29, 7, 7, 7, 46, 49, 7, 13, 13, 13,
135
+ 62, 62, 13, 62, 53, 53, 50, 53, 63, 63,
136
+ 51, 63, 64, 64, 52, 64, 65, 65, 15, 65,
137
+ 66, 66, 54, 66, 55, 56, 57, 58, 11, 6,
137
138
  3, 1, 0 ]
138
139
 
139
140
  racc_action_pointer = [
140
- 80, 81, nil, 78, nil, nil, nil, 79, 38, nil,
141
- nil, nil, 76, nil, 44, nil, 64, 30, nil, nil,
142
- 7, nil, -2, nil, 3, 31, 8, nil, 8, nil,
143
- 28, nil, nil, nil, nil, nil, nil, 18, 19, 20,
144
- -2, nil, 28, nil, nil, nil, 18, 41, nil, nil,
145
- 42, 53, 57, 61, 52, 65, 67, 68, 69, 70,
146
- nil, nil, nil, 48, 56, 60, 64, 68, nil, nil,
147
- nil, nil, nil ]
141
+ 80, 81, nil, 78, nil, nil, 79, 38, nil, nil,
142
+ nil, 76, nil, 44, nil, 64, 30, nil, nil, 7,
143
+ nil, -2, nil, 3, 31, 8, nil, 8, nil, 28,
144
+ nil, nil, nil, nil, nil, nil, 18, 19, 20, -2,
145
+ nil, 28, nil, nil, nil, 18, 41, nil, nil, 42,
146
+ 53, 57, 61, 52, 65, 67, 68, 69, 70, nil,
147
+ nil, nil, 48, 56, 60, 64, 68, nil, nil, nil,
148
+ nil, nil ]
148
149
 
149
150
  racc_action_default = [
150
- -5, -45, -1, -6, -7, -9, -10, -45, -3, -8,
151
- 73, -2, -5, -12, -24, -15, -45, -45, -19, -20,
152
- -45, -4, -6, -16, -45, -11, -30, -26, -45, -21,
153
- -22, -23, -32, -35, -36, -37, -38, -45, -45, -45,
154
- -45, -17, -45, -25, -27, -28, -31, -11, -33, -34,
155
- -11, -11, -11, -11, -11, -45, -45, -45, -45, -45,
156
- -18, -43, -44, -11, -11, -11, -11, -11, -29, -39,
157
- -40, -41, -42 ]
151
+ -5, -44, -1, -6, -7, -9, -44, -3, -8, 72,
152
+ -2, -5, -11, -23, -14, -44, -44, -18, -19, -44,
153
+ -4, -6, -15, -44, -10, -29, -25, -44, -20, -21,
154
+ -22, -31, -34, -35, -36, -37, -44, -44, -44, -44,
155
+ -16, -44, -24, -26, -27, -30, -10, -32, -33, -10,
156
+ -10, -10, -10, -10, -44, -44, -44, -44, -44, -17,
157
+ -42, -43, -10, -10, -10, -10, -10, -28, -38, -39,
158
+ -40, -41 ]
158
159
 
159
160
  racc_goto_table = [
160
- 15, 42, 9, 48, 3, 12, 23, 11, 5, 27,
161
- 18, 5, 26, 2, 15, 44, 22, 19, 45, 48,
162
- 5, 9, 49, 55, 29, 21, 56, 57, 58, 59,
163
- 5, 31, 41, 60, 43, 30, 8, 1, 49, 46,
164
- nil, nil, 68, 69, 70, 71, 72 ]
161
+ 14, 41, 8, 47, 3, 2, 22, 17, 29, 11,
162
+ 18, 26, 45, 10, 14, 21, 20, 43, 44, 47,
163
+ 8, 28, 48, 54, 30, 25, 55, 56, 57, 58,
164
+ 59, 42, 7, 1, nil, nil, nil, nil, 48, 67,
165
+ 68, 69, 70, 71 ]
165
166
 
166
167
  racc_goto_check = [
167
- 12, 9, 7, 20, 6, 5, 12, 3, 8, 19,
168
- 14, 8, 17, 2, 12, 19, 6, 15, 12, 20,
169
- 8, 7, 12, 9, 14, 2, 9, 9, 9, 9,
170
- 8, 15, 8, 13, 18, 16, 4, 1, 12, 16,
171
- nil, nil, 13, 13, 13, 13, 13 ]
168
+ 11, 8, 7, 19, 6, 2, 11, 13, 15, 5,
169
+ 14, 18, 15, 3, 11, 6, 2, 18, 11, 19,
170
+ 7, 13, 11, 8, 14, 16, 8, 8, 8, 8,
171
+ 12, 17, 4, 1, nil, nil, nil, nil, 11, 12,
172
+ 12, 12, 12, 12 ]
172
173
 
173
174
  racc_goto_pointer = [
174
- nil, 37, 13, -1, 34, -3, 4, -1, 8, -24,
175
- nil, nil, -8, -21, 2, 9, 13, -8, 8, -11,
176
- -27, nil, nil, nil, nil ]
175
+ nil, 33, 5, 6, 30, 2, 4, -1, -23, nil,
176
+ nil, -7, -23, 0, 3, -13, 6, 6, -8, -26,
177
+ nil, nil, nil, nil ]
177
178
 
178
179
  racc_goto_default = [
179
- nil, nil, nil, nil, nil, nil, nil, 4, 61, 16,
180
- 20, 14, 62, nil, nil, nil, nil, nil, nil, nil,
181
- 32, 33, 34, 35, 36 ]
180
+ nil, nil, nil, nil, nil, nil, nil, 4, 15, 19,
181
+ 13, 61, nil, nil, nil, nil, nil, nil, nil, 31,
182
+ 32, 33, 34, 35 ]
182
183
 
183
184
  racc_reduce_table = [
184
185
  0, 0, :racc_error,
@@ -191,45 +192,44 @@ racc_reduce_table = [
191
192
  1, 19, :_reduce_none,
192
193
  2, 19, :_reduce_none,
193
194
  1, 20, :_reduce_none,
194
- 1, 21, :_reduce_10,
195
+ 0, 21, :_reduce_none,
196
+ 1, 21, :_reduce_none,
195
197
  0, 22, :_reduce_none,
196
198
  1, 22, :_reduce_none,
197
- 0, 23, :_reduce_none,
198
199
  1, 23, :_reduce_none,
199
- 1, 24, :_reduce_none,
200
- 2, 24, :_reduce_none,
201
- 3, 25, :_reduce_none,
202
- 5, 25, :_reduce_18,
200
+ 2, 23, :_reduce_none,
201
+ 3, 24, :_reduce_none,
202
+ 5, 24, :_reduce_17,
203
+ 1, 18, :_reduce_18,
203
204
  1, 18, :_reduce_19,
204
- 1, 18, :_reduce_20,
205
+ 3, 18, :_reduce_20,
205
206
  3, 18, :_reduce_21,
206
- 3, 18, :_reduce_22,
207
207
  3, 18, :_reduce_none,
208
- 1, 28, :_reduce_none,
209
- 3, 27, :_reduce_25,
210
- 1, 30, :_reduce_26,
211
- 2, 30, :_reduce_27,
212
- 2, 30, :_reduce_none,
213
- 5, 32, :_reduce_29,
214
- 0, 31, :_reduce_none,
215
- 1, 31, :_reduce_none,
216
- 1, 29, :_reduce_32,
217
- 2, 29, :_reduce_33,
208
+ 1, 27, :_reduce_none,
209
+ 3, 26, :_reduce_24,
210
+ 1, 29, :_reduce_25,
211
+ 2, 29, :_reduce_26,
218
212
  2, 29, :_reduce_none,
219
- 1, 33, :_reduce_none,
220
- 1, 33, :_reduce_none,
221
- 1, 33, :_reduce_none,
222
- 1, 33, :_reduce_none,
213
+ 5, 31, :_reduce_28,
214
+ 0, 30, :_reduce_none,
215
+ 1, 30, :_reduce_none,
216
+ 1, 28, :_reduce_31,
217
+ 2, 28, :_reduce_32,
218
+ 2, 28, :_reduce_none,
219
+ 1, 32, :_reduce_none,
220
+ 1, 32, :_reduce_none,
221
+ 1, 32, :_reduce_none,
222
+ 1, 32, :_reduce_none,
223
+ 5, 33, :_reduce_38,
223
224
  5, 34, :_reduce_39,
224
225
  5, 35, :_reduce_40,
225
226
  5, 36, :_reduce_41,
226
- 5, 37, :_reduce_42,
227
- 1, 26, :_reduce_none,
228
- 1, 26, :_reduce_none ]
227
+ 1, 25, :_reduce_none,
228
+ 1, 25, :_reduce_none ]
229
229
 
230
- racc_reduce_n = 45
230
+ racc_reduce_n = 44
231
231
 
232
- racc_shift_n = 73
232
+ racc_shift_n = 72
233
233
 
234
234
  racc_token_table = {
235
235
  false => 0,
@@ -288,7 +288,6 @@ Racc_token_to_s_table = [
288
288
  "records",
289
289
  "blanklines",
290
290
  "blankline",
291
- "eol",
292
291
  "opt_space",
293
292
  "opt_commentlines",
294
293
  "commentlines",
@@ -344,13 +343,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 11)
344
343
 
345
344
  # reduce 9 omitted
346
345
 
347
- module_eval(<<'.,.,', 'robotstxt.ry', 31)
348
- def _reduce_10(val, _values, result)
349
- @lineno += 1
350
-
351
- result
352
- end
353
- .,.,
346
+ # reduce 10 omitted
354
347
 
355
348
  # reduce 11 omitted
356
349
 
@@ -364,18 +357,16 @@ module_eval(<<'.,.,', 'robotstxt.ry', 31)
364
357
 
365
358
  # reduce 16 omitted
366
359
 
367
- # reduce 17 omitted
368
-
369
- module_eval(<<'.,.,', 'robotstxt.ry', 47)
370
- def _reduce_18(val, _values, result)
360
+ module_eval(<<'.,.,', 'robotstxt.ry', 42)
361
+ def _reduce_17(val, _values, result)
371
362
  @sitemaps << val[3]
372
363
 
373
364
  result
374
365
  end
375
366
  .,.,
376
367
 
377
- module_eval(<<'.,.,', 'robotstxt.ry', 52)
378
- def _reduce_19(val, _values, result)
368
+ module_eval(<<'.,.,', 'robotstxt.ry', 47)
369
+ def _reduce_18(val, _values, result)
379
370
  result = []
380
371
  result << val[0]
381
372
 
@@ -383,24 +374,24 @@ module_eval(<<'.,.,', 'robotstxt.ry', 52)
383
374
  end
384
375
  .,.,
385
376
 
386
- module_eval(<<'.,.,', 'robotstxt.ry', 57)
387
- def _reduce_20(val, _values, result)
377
+ module_eval(<<'.,.,', 'robotstxt.ry', 52)
378
+ def _reduce_19(val, _values, result)
388
379
  result = []
389
380
 
390
381
  result
391
382
  end
392
383
  .,.,
393
384
 
394
- module_eval(<<'.,.,', 'robotstxt.ry', 63)
395
- def _reduce_21(val, _values, result)
385
+ module_eval(<<'.,.,', 'robotstxt.ry', 58)
386
+ def _reduce_20(val, _values, result)
396
387
  result << val[2]
397
388
 
398
389
  result
399
390
  end
400
391
  .,.,
401
392
 
402
- module_eval(<<'.,.,', 'robotstxt.ry', 69)
403
- def _reduce_22(val, _values, result)
393
+ module_eval(<<'.,.,', 'robotstxt.ry', 64)
394
+ def _reduce_21(val, _values, result)
404
395
  val[2].each_with_index { |line, i|
405
396
  warn "%s line %d: %s: orphan rule line" %
406
397
  [@site.to_s, @rulelinenos[i], line.token] if $VERBOSE
@@ -410,50 +401,50 @@ module_eval(<<'.,.,', 'robotstxt.ry', 69)
410
401
  end
411
402
  .,.,
412
403
 
413
- # reduce 23 omitted
404
+ # reduce 22 omitted
414
405
 
415
- # reduce 24 omitted
406
+ # reduce 23 omitted
416
407
 
417
- module_eval(<<'.,.,', 'robotstxt.ry', 84)
418
- def _reduce_25(val, _values, result)
408
+ module_eval(<<'.,.,', 'robotstxt.ry', 79)
409
+ def _reduce_24(val, _values, result)
419
410
  result = Record.new(val[1], val[2])
420
411
 
421
412
  result
422
413
  end
423
414
  .,.,
424
415
 
425
- module_eval(<<'.,.,', 'robotstxt.ry', 89)
426
- def _reduce_26(val, _values, result)
416
+ module_eval(<<'.,.,', 'robotstxt.ry', 84)
417
+ def _reduce_25(val, _values, result)
427
418
  result = [val[0]]
428
419
 
429
420
  result
430
421
  end
431
422
  .,.,
432
423
 
433
- module_eval(<<'.,.,', 'robotstxt.ry', 94)
434
- def _reduce_27(val, _values, result)
424
+ module_eval(<<'.,.,', 'robotstxt.ry', 89)
425
+ def _reduce_26(val, _values, result)
435
426
  result << val[1]
436
427
 
437
428
  result
438
429
  end
439
430
  .,.,
440
431
 
441
- # reduce 28 omitted
432
+ # reduce 27 omitted
442
433
 
443
- module_eval(<<'.,.,', 'robotstxt.ry', 101)
444
- def _reduce_29(val, _values, result)
434
+ module_eval(<<'.,.,', 'robotstxt.ry', 96)
435
+ def _reduce_28(val, _values, result)
445
436
  result = AgentLine.new(val[0], val[3])
446
437
 
447
438
  result
448
439
  end
449
440
  .,.,
450
441
 
451
- # reduce 30 omitted
442
+ # reduce 29 omitted
452
443
 
453
- # reduce 31 omitted
444
+ # reduce 30 omitted
454
445
 
455
- module_eval(<<'.,.,', 'robotstxt.ry', 109)
456
- def _reduce_32(val, _values, result)
446
+ module_eval(<<'.,.,', 'robotstxt.ry', 104)
447
+ def _reduce_31(val, _values, result)
457
448
  result = [result]
458
449
  @rulelinenos = []
459
450
 
@@ -461,8 +452,8 @@ module_eval(<<'.,.,', 'robotstxt.ry', 109)
461
452
  end
462
453
  .,.,
463
454
 
464
- module_eval(<<'.,.,', 'robotstxt.ry', 115)
465
- def _reduce_33(val, _values, result)
455
+ module_eval(<<'.,.,', 'robotstxt.ry', 110)
456
+ def _reduce_32(val, _values, result)
466
457
  result << val[1]
467
458
  @rulelinenos << @lineno
468
459
 
@@ -470,6 +461,8 @@ module_eval(<<'.,.,', 'robotstxt.ry', 115)
470
461
  end
471
462
  .,.,
472
463
 
464
+ # reduce 33 omitted
465
+
473
466
  # reduce 34 omitted
474
467
 
475
468
  # reduce 35 omitted
@@ -478,43 +471,41 @@ module_eval(<<'.,.,', 'robotstxt.ry', 115)
478
471
 
479
472
  # reduce 37 omitted
480
473
 
481
- # reduce 38 omitted
482
-
483
- module_eval(<<'.,.,', 'robotstxt.ry', 128)
484
- def _reduce_39(val, _values, result)
474
+ module_eval(<<'.,.,', 'robotstxt.ry', 123)
475
+ def _reduce_38(val, _values, result)
485
476
  result = AllowLine.new(val[0], val[3])
486
477
 
487
478
  result
488
479
  end
489
480
  .,.,
490
481
 
491
- module_eval(<<'.,.,', 'robotstxt.ry', 133)
492
- def _reduce_40(val, _values, result)
482
+ module_eval(<<'.,.,', 'robotstxt.ry', 128)
483
+ def _reduce_39(val, _values, result)
493
484
  result = DisallowLine.new(val[0], val[3])
494
485
 
495
486
  result
496
487
  end
497
488
  .,.,
498
489
 
499
- module_eval(<<'.,.,', 'robotstxt.ry', 138)
500
- def _reduce_41(val, _values, result)
490
+ module_eval(<<'.,.,', 'robotstxt.ry', 133)
491
+ def _reduce_40(val, _values, result)
501
492
  result = CrawlDelayLine.new(val[0], val[3])
502
493
 
503
494
  result
504
495
  end
505
496
  .,.,
506
497
 
507
- module_eval(<<'.,.,', 'robotstxt.ry', 143)
508
- def _reduce_42(val, _values, result)
498
+ module_eval(<<'.,.,', 'robotstxt.ry', 138)
499
+ def _reduce_41(val, _values, result)
509
500
  result = ExtentionLine.new(val[0], val[3])
510
501
 
511
502
  result
512
503
  end
513
504
  .,.,
514
505
 
515
- # reduce 43 omitted
506
+ # reduce 42 omitted
516
507
 
517
- # reduce 44 omitted
508
+ # reduce 43 omitted
518
509
 
519
510
  def _reduce_none(val, _values, result)
520
511
  val[0]
@@ -25,12 +25,7 @@ rule
25
25
  | blanklines
26
26
  blankline
27
27
 
28
- blankline : eol
29
-
30
- eol : EOL
31
- {
32
- @lineno += 1
33
- }
28
+ blankline : EOL
34
29
 
35
30
  opt_space :
36
31
  | SPACE
@@ -42,7 +37,7 @@ rule
42
37
  | commentlines
43
38
  comment
44
39
 
45
- comment : opt_space COMMENT eol
40
+ comment : opt_space COMMENT EOL
46
41
  | 'sitemap' ':' opt_space VALUE eol_opt_comment
47
42
  {
48
43
  @sitemaps << val[3]
@@ -144,7 +139,7 @@ rule
144
139
  result = ExtentionLine.new(val[0], val[3])
145
140
  }
146
141
 
147
- eol_opt_comment : eol
142
+ eol_opt_comment : EOL
148
143
  | comment
149
144
 
150
145
  ---- header
@@ -178,7 +173,7 @@ class WebRobots
178
173
  def parse(input, site)
179
174
  @q ||= []
180
175
  @errors = []
181
- @lineno = 1
176
+ @lineno = 0
182
177
  @site = site
183
178
 
184
179
  string = input.respond_to?(:read) ? input.read : input
@@ -186,6 +181,7 @@ class WebRobots
186
181
  value_expected = false
187
182
 
188
183
  until s.eos?
184
+ @lineno += 1 if s.bol?
189
185
  if t = s.scan(/[ \t]*(?:\r?\n|\z)/)
190
186
  if value_expected
191
187
  @q << [:VALUE, '']
@@ -1,3 +1,4 @@
1
+ # -*- coding: utf-8 -*-
1
2
  require 'helper'
2
3
 
3
4
  class TestWebRobots < Test::Unit::TestCase
@@ -582,4 +583,76 @@ TXT
582
583
  end
583
584
  end
584
585
 
586
+ context "robots.txt cache" do
587
+ setup do
588
+ @fetched = false
589
+ @robots = WebRobots.new('RandomBot', :http_get => lambda { |uri|
590
+ case uri.to_s
591
+ when 'http://site1.example.org/robots.txt'
592
+ @fetched = true
593
+ <<-'TXT'
594
+ User-Agent: *
595
+ Disallow: /foo
596
+ TXT
597
+ when 'http://site2.example.org/robots.txt'
598
+ @fetched = true
599
+ nil
600
+ end
601
+ })
602
+ end
603
+
604
+ should "persist unless cache is cleared" do
605
+ assert !@fetched
606
+ assert !@robots.allowed?('http://site1.example.org/foo')
607
+ assert @fetched
608
+
609
+ @fetched = false
610
+ assert @robots.allowed?('http://site1.example.org/bar')
611
+ assert !@fetched
612
+ assert @robots.allowed?('http://site1.example.org/baz')
613
+ assert !@fetched
614
+ assert !@robots.allowed?('http://site1.example.org/foo')
615
+ assert !@fetched
616
+
617
+ @robots.flush_cache
618
+ assert !@fetched
619
+ assert !@robots.allowed?('http://site1.example.org/foo')
620
+ assert @fetched
621
+
622
+ @fetched = false
623
+ assert @robots.allowed?('http://site1.example.org/bar')
624
+ assert !@fetched
625
+ assert @robots.allowed?('http://site1.example.org/baz')
626
+ assert !@fetched
627
+ assert !@robots.allowed?('http://site1.example.org/foo')
628
+ assert !@fetched
629
+ end
630
+
631
+ should "persist for non-existent robots.txt unless cache is cleared" do
632
+ assert !@fetched
633
+ assert !@robots.allowed?('http://site2.example.org/foo')
634
+ assert @fetched
635
+
636
+ @fetched = false
637
+ assert !@robots.allowed?('http://site2.example.org/bar')
638
+ assert !@fetched
639
+ assert !@robots.allowed?('http://site2.example.org/baz')
640
+ assert !@fetched
641
+ assert !@robots.allowed?('http://site2.example.org/foo')
642
+ assert !@fetched
643
+
644
+ @robots.flush_cache
645
+ assert !@fetched
646
+ assert !@robots.allowed?('http://site2.example.org/foo')
647
+ assert @fetched
648
+
649
+ @fetched = false
650
+ assert !@robots.allowed?('http://site2.example.org/bar')
651
+ assert !@fetched
652
+ assert !@robots.allowed?('http://site2.example.org/baz')
653
+ assert !@fetched
654
+ assert !@robots.allowed?('http://site2.example.org/foo')
655
+ assert !@fetched
656
+ end
657
+ end
585
658
  end
data/webrobots.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{webrobots}
8
- s.version = "0.0.10"
8
+ s.version = "0.0.11"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = [%q{Akinori MUSHA}]
12
- s.date = %q{2011-07-01}
12
+ s.date = %q{2011-08-10}
13
13
  s.description = %q{This library helps write robots.txt compliant web robots in Ruby.
14
14
  }
15
15
  s.email = %q{knu@idaemons.org}
@@ -33,9 +33,10 @@ Gem::Specification.new do |s|
33
33
  "test/test_webrobots.rb",
34
34
  "webrobots.gemspec"
35
35
  ]
36
+ s.homepage = %q{https://github.com/knu/webrobots}
36
37
  s.licenses = [%q{2-clause BSDL}]
37
38
  s.require_paths = [%q{lib}]
38
- s.rubygems_version = %q{1.8.5}
39
+ s.rubygems_version = %q{1.8.7}
39
40
  s.summary = %q{A Ruby library to help write robots.txt compliant web robots}
40
41
 
41
42
  if s.respond_to? :specification_version then
@@ -46,14 +47,14 @@ Gem::Specification.new do |s|
46
47
  s.add_development_dependency(%q<racc>, [">= 0"])
47
48
  s.add_development_dependency(%q<shoulda>, [">= 0"])
48
49
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
49
- s.add_development_dependency(%q<jeweler>, ["~> 1.6.2"])
50
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
50
51
  s.add_development_dependency(%q<rcov>, [">= 0"])
51
52
  else
52
53
  s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
53
54
  s.add_dependency(%q<racc>, [">= 0"])
54
55
  s.add_dependency(%q<shoulda>, [">= 0"])
55
56
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
56
- s.add_dependency(%q<jeweler>, ["~> 1.6.2"])
57
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
57
58
  s.add_dependency(%q<rcov>, [">= 0"])
58
59
  end
59
60
  else
@@ -61,7 +62,7 @@ Gem::Specification.new do |s|
61
62
  s.add_dependency(%q<racc>, [">= 0"])
62
63
  s.add_dependency(%q<shoulda>, [">= 0"])
63
64
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
64
- s.add_dependency(%q<jeweler>, ["~> 1.6.2"])
65
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
65
66
  s.add_dependency(%q<rcov>, [">= 0"])
66
67
  end
67
68
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webrobots
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-07-01 00:00:00.000000000Z
12
+ date: 2011-08-10 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &17196464340 !ruby/object:Gem::Requirement
16
+ requirement: &70285160147560 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.4.4
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *17196464340
24
+ version_requirements: *70285160147560
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: racc
27
- requirement: &17196463180 !ruby/object:Gem::Requirement
27
+ requirement: &70285160147080 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *17196463180
35
+ version_requirements: *70285160147080
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: shoulda
38
- requirement: &17196462340 !ruby/object:Gem::Requirement
38
+ requirement: &70285160146600 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *17196462340
46
+ version_requirements: *70285160146600
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: bundler
49
- requirement: &17196461320 !ruby/object:Gem::Requirement
49
+ requirement: &70285160146120 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,21 +54,21 @@ dependencies:
54
54
  version: 1.0.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *17196461320
57
+ version_requirements: *70285160146120
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: jeweler
60
- requirement: &17196460320 !ruby/object:Gem::Requirement
60
+ requirement: &70285160145640 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
64
64
  - !ruby/object:Gem::Version
65
- version: 1.6.2
65
+ version: 1.6.4
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *17196460320
68
+ version_requirements: *70285160145640
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rcov
71
- requirement: &17196459400 !ruby/object:Gem::Requirement
71
+ requirement: &70285160145160 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,7 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *17196459400
79
+ version_requirements: *70285160145160
80
80
  description: ! 'This library helps write robots.txt compliant web robots in Ruby.
81
81
 
82
82
  '
@@ -101,7 +101,7 @@ files:
101
101
  - test/helper.rb
102
102
  - test/test_webrobots.rb
103
103
  - webrobots.gemspec
104
- homepage:
104
+ homepage: https://github.com/knu/webrobots
105
105
  licenses:
106
106
  - 2-clause BSDL
107
107
  post_install_message:
@@ -116,7 +116,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
116
116
  version: '0'
117
117
  segments:
118
118
  - 0
119
- hash: 1141598142888730213
119
+ hash: 3895009630851215598
120
120
  required_rubygems_version: !ruby/object:Gem::Requirement
121
121
  none: false
122
122
  requirements:
@@ -125,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
125
125
  version: '0'
126
126
  requirements: []
127
127
  rubyforge_project:
128
- rubygems_version: 1.8.5
128
+ rubygems_version: 1.8.7
129
129
  signing_key:
130
130
  specification_version: 3
131
131
  summary: A Ruby library to help write robots.txt compliant web robots