webrobots 0.0.10 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -1
- data/Gemfile.lock +4 -4
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/webrobots.rb +6 -1
- data/lib/webrobots/robotstxt.rb +118 -127
- data/lib/webrobots/robotstxt.ry +5 -9
- data/test/test_webrobots.rb +73 -0
- data/webrobots.gemspec +7 -6
- metadata +18 -18
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -2,14 +2,14 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
git (1.2.5)
|
5
|
-
jeweler (1.6.
|
5
|
+
jeweler (1.6.4)
|
6
6
|
bundler (~> 1.0)
|
7
7
|
git (>= 1.2.5)
|
8
8
|
rake
|
9
|
-
nokogiri (1.
|
9
|
+
nokogiri (1.5.0)
|
10
10
|
racc (1.4.6)
|
11
11
|
rake (0.9.2)
|
12
|
-
rcov (0.9.
|
12
|
+
rcov (0.9.10)
|
13
13
|
shoulda (2.11.3)
|
14
14
|
|
15
15
|
PLATFORMS
|
@@ -17,7 +17,7 @@ PLATFORMS
|
|
17
17
|
|
18
18
|
DEPENDENCIES
|
19
19
|
bundler (~> 1.0.0)
|
20
|
-
jeweler (~> 1.6.
|
20
|
+
jeweler (~> 1.6.4)
|
21
21
|
nokogiri (>= 1.4.4)
|
22
22
|
racc
|
23
23
|
rcov
|
data/Rakefile
CHANGED
@@ -15,7 +15,7 @@ require 'jeweler'
|
|
15
15
|
Jeweler::Tasks.new do |gem|
|
16
16
|
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
17
|
gem.name = "webrobots"
|
18
|
-
|
18
|
+
gem.homepage = "https://github.com/knu/webrobots"
|
19
19
|
gem.license = "2-clause BSDL"
|
20
20
|
gem.summary = %Q{A Ruby library to help write robots.txt compliant web robots}
|
21
21
|
gem.description = <<-'EOS'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.11
|
data/lib/webrobots.rb
CHANGED
@@ -30,7 +30,12 @@ class WebRobots
|
|
30
30
|
|
31
31
|
# :nodoc:
|
32
32
|
def create_cache
|
33
|
-
Hash.new # Must respond to [], []=, and
|
33
|
+
Hash.new # Must respond to [], []=, delete and clear.
|
34
|
+
end
|
35
|
+
|
36
|
+
# Flushes robots.txt cache.
|
37
|
+
def flush_cache
|
38
|
+
@robotstxt.clear
|
34
39
|
end
|
35
40
|
|
36
41
|
# Returns the robot name initially given.
|
data/lib/webrobots/robotstxt.rb
CHANGED
@@ -19,7 +19,7 @@ class WebRobots
|
|
19
19
|
class RobotsTxt
|
20
20
|
class Parser < Racc::Parser
|
21
21
|
|
22
|
-
module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry',
|
22
|
+
module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 158)
|
23
23
|
|
24
24
|
def initialize(target = nil)
|
25
25
|
super()
|
@@ -38,7 +38,7 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
|
|
38
38
|
def parse(input, site)
|
39
39
|
@q ||= []
|
40
40
|
@errors = []
|
41
|
-
@lineno =
|
41
|
+
@lineno = 0
|
42
42
|
@site = site
|
43
43
|
|
44
44
|
string = input.respond_to?(:read) ? input.read : input
|
@@ -46,6 +46,7 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
|
|
46
46
|
value_expected = false
|
47
47
|
|
48
48
|
until s.eos?
|
49
|
+
@lineno += 1 if s.bol?
|
49
50
|
if t = s.scan(/[ \t]*(?:\r?\n|\z)/)
|
50
51
|
if value_expected
|
51
52
|
@q << [:VALUE, '']
|
@@ -115,70 +116,70 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
|
|
115
116
|
##### State transition tables begin ###
|
116
117
|
|
117
118
|
racc_action_table = [
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
119
|
+
5, 12, -10, 16, 52, 40, -12, 36, 37, 38,
|
120
|
+
39, 12, -10, 16, 46, 27, 27, 36, 37, 38,
|
121
|
+
39, 12, -10, 16, 49, 50, 51, 36, 37, 38,
|
122
|
+
39, 12, -10, 16, 12, 53, 24, 36, 37, 38,
|
123
|
+
39, 12, -10, 16, 12, 12, -12, 12, -10, 16,
|
124
|
+
60, 12, -13, 16, 60, 12, 12, 16, 60, 12,
|
125
|
+
12, 16, 60, 12, 12, 16, 60, 12, 23, 16,
|
126
|
+
60, 12, 62, 16, 63, 64, 65, 66, 5, 9,
|
127
|
+
5, 6, 5 ]
|
127
128
|
|
128
129
|
racc_action_check = [
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
130
|
+
21, 21, 21, 21, 39, 23, 21, 21, 21, 21,
|
131
|
+
21, 25, 25, 25, 27, 19, 25, 25, 25, 25,
|
132
|
+
25, 45, 45, 45, 36, 37, 38, 45, 45, 45,
|
133
|
+
45, 29, 29, 29, 24, 41, 16, 29, 29, 29,
|
134
|
+
29, 7, 7, 7, 46, 49, 7, 13, 13, 13,
|
135
|
+
62, 62, 13, 62, 53, 53, 50, 53, 63, 63,
|
136
|
+
51, 63, 64, 64, 52, 64, 65, 65, 15, 65,
|
137
|
+
66, 66, 54, 66, 55, 56, 57, 58, 11, 6,
|
137
138
|
3, 1, 0 ]
|
138
139
|
|
139
140
|
racc_action_pointer = [
|
140
|
-
80, 81, nil, 78, nil, nil,
|
141
|
-
nil,
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
nil, nil,
|
147
|
-
nil, nil
|
141
|
+
80, 81, nil, 78, nil, nil, 79, 38, nil, nil,
|
142
|
+
nil, 76, nil, 44, nil, 64, 30, nil, nil, 7,
|
143
|
+
nil, -2, nil, 3, 31, 8, nil, 8, nil, 28,
|
144
|
+
nil, nil, nil, nil, nil, nil, 18, 19, 20, -2,
|
145
|
+
nil, 28, nil, nil, nil, 18, 41, nil, nil, 42,
|
146
|
+
53, 57, 61, 52, 65, 67, 68, 69, 70, nil,
|
147
|
+
nil, nil, 48, 56, 60, 64, 68, nil, nil, nil,
|
148
|
+
nil, nil ]
|
148
149
|
|
149
150
|
racc_action_default = [
|
150
|
-
-5, -
|
151
|
-
|
152
|
-
|
153
|
-
-22, -
|
154
|
-
-
|
155
|
-
-
|
156
|
-
-
|
157
|
-
-40, -41
|
151
|
+
-5, -44, -1, -6, -7, -9, -44, -3, -8, 72,
|
152
|
+
-2, -5, -11, -23, -14, -44, -44, -18, -19, -44,
|
153
|
+
-4, -6, -15, -44, -10, -29, -25, -44, -20, -21,
|
154
|
+
-22, -31, -34, -35, -36, -37, -44, -44, -44, -44,
|
155
|
+
-16, -44, -24, -26, -27, -30, -10, -32, -33, -10,
|
156
|
+
-10, -10, -10, -10, -44, -44, -44, -44, -44, -17,
|
157
|
+
-42, -43, -10, -10, -10, -10, -10, -28, -38, -39,
|
158
|
+
-40, -41 ]
|
158
159
|
|
159
160
|
racc_goto_table = [
|
160
|
-
|
161
|
-
18,
|
162
|
-
|
163
|
-
|
164
|
-
|
161
|
+
14, 41, 8, 47, 3, 2, 22, 17, 29, 11,
|
162
|
+
18, 26, 45, 10, 14, 21, 20, 43, 44, 47,
|
163
|
+
8, 28, 48, 54, 30, 25, 55, 56, 57, 58,
|
164
|
+
59, 42, 7, 1, nil, nil, nil, nil, 48, 67,
|
165
|
+
68, 69, 70, 71 ]
|
165
166
|
|
166
167
|
racc_goto_check = [
|
167
|
-
|
168
|
-
14,
|
169
|
-
|
170
|
-
|
171
|
-
|
168
|
+
11, 8, 7, 19, 6, 2, 11, 13, 15, 5,
|
169
|
+
14, 18, 15, 3, 11, 6, 2, 18, 11, 19,
|
170
|
+
7, 13, 11, 8, 14, 16, 8, 8, 8, 8,
|
171
|
+
12, 17, 4, 1, nil, nil, nil, nil, 11, 12,
|
172
|
+
12, 12, 12, 12 ]
|
172
173
|
|
173
174
|
racc_goto_pointer = [
|
174
|
-
nil,
|
175
|
-
nil,
|
176
|
-
|
175
|
+
nil, 33, 5, 6, 30, 2, 4, -1, -23, nil,
|
176
|
+
nil, -7, -23, 0, 3, -13, 6, 6, -8, -26,
|
177
|
+
nil, nil, nil, nil ]
|
177
178
|
|
178
179
|
racc_goto_default = [
|
179
|
-
nil, nil, nil, nil, nil, nil, nil, 4,
|
180
|
-
|
181
|
-
32, 33, 34, 35
|
180
|
+
nil, nil, nil, nil, nil, nil, nil, 4, 15, 19,
|
181
|
+
13, 61, nil, nil, nil, nil, nil, nil, nil, 31,
|
182
|
+
32, 33, 34, 35 ]
|
182
183
|
|
183
184
|
racc_reduce_table = [
|
184
185
|
0, 0, :racc_error,
|
@@ -191,45 +192,44 @@ racc_reduce_table = [
|
|
191
192
|
1, 19, :_reduce_none,
|
192
193
|
2, 19, :_reduce_none,
|
193
194
|
1, 20, :_reduce_none,
|
194
|
-
|
195
|
+
0, 21, :_reduce_none,
|
196
|
+
1, 21, :_reduce_none,
|
195
197
|
0, 22, :_reduce_none,
|
196
198
|
1, 22, :_reduce_none,
|
197
|
-
0, 23, :_reduce_none,
|
198
199
|
1, 23, :_reduce_none,
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
200
|
+
2, 23, :_reduce_none,
|
201
|
+
3, 24, :_reduce_none,
|
202
|
+
5, 24, :_reduce_17,
|
203
|
+
1, 18, :_reduce_18,
|
203
204
|
1, 18, :_reduce_19,
|
204
|
-
|
205
|
+
3, 18, :_reduce_20,
|
205
206
|
3, 18, :_reduce_21,
|
206
|
-
3, 18, :_reduce_22,
|
207
207
|
3, 18, :_reduce_none,
|
208
|
-
1,
|
209
|
-
3,
|
210
|
-
1,
|
211
|
-
2,
|
212
|
-
2, 30, :_reduce_none,
|
213
|
-
5, 32, :_reduce_29,
|
214
|
-
0, 31, :_reduce_none,
|
215
|
-
1, 31, :_reduce_none,
|
216
|
-
1, 29, :_reduce_32,
|
217
|
-
2, 29, :_reduce_33,
|
208
|
+
1, 27, :_reduce_none,
|
209
|
+
3, 26, :_reduce_24,
|
210
|
+
1, 29, :_reduce_25,
|
211
|
+
2, 29, :_reduce_26,
|
218
212
|
2, 29, :_reduce_none,
|
219
|
-
|
220
|
-
|
221
|
-
1,
|
222
|
-
1,
|
213
|
+
5, 31, :_reduce_28,
|
214
|
+
0, 30, :_reduce_none,
|
215
|
+
1, 30, :_reduce_none,
|
216
|
+
1, 28, :_reduce_31,
|
217
|
+
2, 28, :_reduce_32,
|
218
|
+
2, 28, :_reduce_none,
|
219
|
+
1, 32, :_reduce_none,
|
220
|
+
1, 32, :_reduce_none,
|
221
|
+
1, 32, :_reduce_none,
|
222
|
+
1, 32, :_reduce_none,
|
223
|
+
5, 33, :_reduce_38,
|
223
224
|
5, 34, :_reduce_39,
|
224
225
|
5, 35, :_reduce_40,
|
225
226
|
5, 36, :_reduce_41,
|
226
|
-
|
227
|
-
1,
|
228
|
-
1, 26, :_reduce_none ]
|
227
|
+
1, 25, :_reduce_none,
|
228
|
+
1, 25, :_reduce_none ]
|
229
229
|
|
230
|
-
racc_reduce_n =
|
230
|
+
racc_reduce_n = 44
|
231
231
|
|
232
|
-
racc_shift_n =
|
232
|
+
racc_shift_n = 72
|
233
233
|
|
234
234
|
racc_token_table = {
|
235
235
|
false => 0,
|
@@ -288,7 +288,6 @@ Racc_token_to_s_table = [
|
|
288
288
|
"records",
|
289
289
|
"blanklines",
|
290
290
|
"blankline",
|
291
|
-
"eol",
|
292
291
|
"opt_space",
|
293
292
|
"opt_commentlines",
|
294
293
|
"commentlines",
|
@@ -344,13 +343,7 @@ module_eval(<<'.,.,', 'robotstxt.ry', 11)
|
|
344
343
|
|
345
344
|
# reduce 9 omitted
|
346
345
|
|
347
|
-
|
348
|
-
def _reduce_10(val, _values, result)
|
349
|
-
@lineno += 1
|
350
|
-
|
351
|
-
result
|
352
|
-
end
|
353
|
-
.,.,
|
346
|
+
# reduce 10 omitted
|
354
347
|
|
355
348
|
# reduce 11 omitted
|
356
349
|
|
@@ -364,18 +357,16 @@ module_eval(<<'.,.,', 'robotstxt.ry', 31)
|
|
364
357
|
|
365
358
|
# reduce 16 omitted
|
366
359
|
|
367
|
-
|
368
|
-
|
369
|
-
module_eval(<<'.,.,', 'robotstxt.ry', 47)
|
370
|
-
def _reduce_18(val, _values, result)
|
360
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 42)
|
361
|
+
def _reduce_17(val, _values, result)
|
371
362
|
@sitemaps << val[3]
|
372
363
|
|
373
364
|
result
|
374
365
|
end
|
375
366
|
.,.,
|
376
367
|
|
377
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
378
|
-
def
|
368
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 47)
|
369
|
+
def _reduce_18(val, _values, result)
|
379
370
|
result = []
|
380
371
|
result << val[0]
|
381
372
|
|
@@ -383,24 +374,24 @@ module_eval(<<'.,.,', 'robotstxt.ry', 52)
|
|
383
374
|
end
|
384
375
|
.,.,
|
385
376
|
|
386
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
387
|
-
def
|
377
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 52)
|
378
|
+
def _reduce_19(val, _values, result)
|
388
379
|
result = []
|
389
380
|
|
390
381
|
result
|
391
382
|
end
|
392
383
|
.,.,
|
393
384
|
|
394
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
395
|
-
def
|
385
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 58)
|
386
|
+
def _reduce_20(val, _values, result)
|
396
387
|
result << val[2]
|
397
388
|
|
398
389
|
result
|
399
390
|
end
|
400
391
|
.,.,
|
401
392
|
|
402
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
403
|
-
def
|
393
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 64)
|
394
|
+
def _reduce_21(val, _values, result)
|
404
395
|
val[2].each_with_index { |line, i|
|
405
396
|
warn "%s line %d: %s: orphan rule line" %
|
406
397
|
[@site.to_s, @rulelinenos[i], line.token] if $VERBOSE
|
@@ -410,50 +401,50 @@ module_eval(<<'.,.,', 'robotstxt.ry', 69)
|
|
410
401
|
end
|
411
402
|
.,.,
|
412
403
|
|
413
|
-
# reduce
|
404
|
+
# reduce 22 omitted
|
414
405
|
|
415
|
-
# reduce
|
406
|
+
# reduce 23 omitted
|
416
407
|
|
417
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
418
|
-
def
|
408
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 79)
|
409
|
+
def _reduce_24(val, _values, result)
|
419
410
|
result = Record.new(val[1], val[2])
|
420
411
|
|
421
412
|
result
|
422
413
|
end
|
423
414
|
.,.,
|
424
415
|
|
425
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
426
|
-
def
|
416
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 84)
|
417
|
+
def _reduce_25(val, _values, result)
|
427
418
|
result = [val[0]]
|
428
419
|
|
429
420
|
result
|
430
421
|
end
|
431
422
|
.,.,
|
432
423
|
|
433
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
434
|
-
def
|
424
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 89)
|
425
|
+
def _reduce_26(val, _values, result)
|
435
426
|
result << val[1]
|
436
427
|
|
437
428
|
result
|
438
429
|
end
|
439
430
|
.,.,
|
440
431
|
|
441
|
-
# reduce
|
432
|
+
# reduce 27 omitted
|
442
433
|
|
443
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
444
|
-
def
|
434
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 96)
|
435
|
+
def _reduce_28(val, _values, result)
|
445
436
|
result = AgentLine.new(val[0], val[3])
|
446
437
|
|
447
438
|
result
|
448
439
|
end
|
449
440
|
.,.,
|
450
441
|
|
451
|
-
# reduce
|
442
|
+
# reduce 29 omitted
|
452
443
|
|
453
|
-
# reduce
|
444
|
+
# reduce 30 omitted
|
454
445
|
|
455
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
456
|
-
def
|
446
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 104)
|
447
|
+
def _reduce_31(val, _values, result)
|
457
448
|
result = [result]
|
458
449
|
@rulelinenos = []
|
459
450
|
|
@@ -461,8 +452,8 @@ module_eval(<<'.,.,', 'robotstxt.ry', 109)
|
|
461
452
|
end
|
462
453
|
.,.,
|
463
454
|
|
464
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
465
|
-
def
|
455
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 110)
|
456
|
+
def _reduce_32(val, _values, result)
|
466
457
|
result << val[1]
|
467
458
|
@rulelinenos << @lineno
|
468
459
|
|
@@ -470,6 +461,8 @@ module_eval(<<'.,.,', 'robotstxt.ry', 115)
|
|
470
461
|
end
|
471
462
|
.,.,
|
472
463
|
|
464
|
+
# reduce 33 omitted
|
465
|
+
|
473
466
|
# reduce 34 omitted
|
474
467
|
|
475
468
|
# reduce 35 omitted
|
@@ -478,43 +471,41 @@ module_eval(<<'.,.,', 'robotstxt.ry', 115)
|
|
478
471
|
|
479
472
|
# reduce 37 omitted
|
480
473
|
|
481
|
-
|
482
|
-
|
483
|
-
module_eval(<<'.,.,', 'robotstxt.ry', 128)
|
484
|
-
def _reduce_39(val, _values, result)
|
474
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 123)
|
475
|
+
def _reduce_38(val, _values, result)
|
485
476
|
result = AllowLine.new(val[0], val[3])
|
486
477
|
|
487
478
|
result
|
488
479
|
end
|
489
480
|
.,.,
|
490
481
|
|
491
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
492
|
-
def
|
482
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 128)
|
483
|
+
def _reduce_39(val, _values, result)
|
493
484
|
result = DisallowLine.new(val[0], val[3])
|
494
485
|
|
495
486
|
result
|
496
487
|
end
|
497
488
|
.,.,
|
498
489
|
|
499
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
500
|
-
def
|
490
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 133)
|
491
|
+
def _reduce_40(val, _values, result)
|
501
492
|
result = CrawlDelayLine.new(val[0], val[3])
|
502
493
|
|
503
494
|
result
|
504
495
|
end
|
505
496
|
.,.,
|
506
497
|
|
507
|
-
module_eval(<<'.,.,', 'robotstxt.ry',
|
508
|
-
def
|
498
|
+
module_eval(<<'.,.,', 'robotstxt.ry', 138)
|
499
|
+
def _reduce_41(val, _values, result)
|
509
500
|
result = ExtentionLine.new(val[0], val[3])
|
510
501
|
|
511
502
|
result
|
512
503
|
end
|
513
504
|
.,.,
|
514
505
|
|
515
|
-
# reduce
|
506
|
+
# reduce 42 omitted
|
516
507
|
|
517
|
-
# reduce
|
508
|
+
# reduce 43 omitted
|
518
509
|
|
519
510
|
def _reduce_none(val, _values, result)
|
520
511
|
val[0]
|
data/lib/webrobots/robotstxt.ry
CHANGED
@@ -25,12 +25,7 @@ rule
|
|
25
25
|
| blanklines
|
26
26
|
blankline
|
27
27
|
|
28
|
-
blankline :
|
29
|
-
|
30
|
-
eol : EOL
|
31
|
-
{
|
32
|
-
@lineno += 1
|
33
|
-
}
|
28
|
+
blankline : EOL
|
34
29
|
|
35
30
|
opt_space :
|
36
31
|
| SPACE
|
@@ -42,7 +37,7 @@ rule
|
|
42
37
|
| commentlines
|
43
38
|
comment
|
44
39
|
|
45
|
-
comment : opt_space COMMENT
|
40
|
+
comment : opt_space COMMENT EOL
|
46
41
|
| 'sitemap' ':' opt_space VALUE eol_opt_comment
|
47
42
|
{
|
48
43
|
@sitemaps << val[3]
|
@@ -144,7 +139,7 @@ rule
|
|
144
139
|
result = ExtentionLine.new(val[0], val[3])
|
145
140
|
}
|
146
141
|
|
147
|
-
eol_opt_comment :
|
142
|
+
eol_opt_comment : EOL
|
148
143
|
| comment
|
149
144
|
|
150
145
|
---- header
|
@@ -178,7 +173,7 @@ class WebRobots
|
|
178
173
|
def parse(input, site)
|
179
174
|
@q ||= []
|
180
175
|
@errors = []
|
181
|
-
@lineno =
|
176
|
+
@lineno = 0
|
182
177
|
@site = site
|
183
178
|
|
184
179
|
string = input.respond_to?(:read) ? input.read : input
|
@@ -186,6 +181,7 @@ class WebRobots
|
|
186
181
|
value_expected = false
|
187
182
|
|
188
183
|
until s.eos?
|
184
|
+
@lineno += 1 if s.bol?
|
189
185
|
if t = s.scan(/[ \t]*(?:\r?\n|\z)/)
|
190
186
|
if value_expected
|
191
187
|
@q << [:VALUE, '']
|
data/test/test_webrobots.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
1
2
|
require 'helper'
|
2
3
|
|
3
4
|
class TestWebRobots < Test::Unit::TestCase
|
@@ -582,4 +583,76 @@ TXT
|
|
582
583
|
end
|
583
584
|
end
|
584
585
|
|
586
|
+
context "robots.txt cache" do
|
587
|
+
setup do
|
588
|
+
@fetched = false
|
589
|
+
@robots = WebRobots.new('RandomBot', :http_get => lambda { |uri|
|
590
|
+
case uri.to_s
|
591
|
+
when 'http://site1.example.org/robots.txt'
|
592
|
+
@fetched = true
|
593
|
+
<<-'TXT'
|
594
|
+
User-Agent: *
|
595
|
+
Disallow: /foo
|
596
|
+
TXT
|
597
|
+
when 'http://site2.example.org/robots.txt'
|
598
|
+
@fetched = true
|
599
|
+
nil
|
600
|
+
end
|
601
|
+
})
|
602
|
+
end
|
603
|
+
|
604
|
+
should "persist unless cache is cleared" do
|
605
|
+
assert !@fetched
|
606
|
+
assert !@robots.allowed?('http://site1.example.org/foo')
|
607
|
+
assert @fetched
|
608
|
+
|
609
|
+
@fetched = false
|
610
|
+
assert @robots.allowed?('http://site1.example.org/bar')
|
611
|
+
assert !@fetched
|
612
|
+
assert @robots.allowed?('http://site1.example.org/baz')
|
613
|
+
assert !@fetched
|
614
|
+
assert !@robots.allowed?('http://site1.example.org/foo')
|
615
|
+
assert !@fetched
|
616
|
+
|
617
|
+
@robots.flush_cache
|
618
|
+
assert !@fetched
|
619
|
+
assert !@robots.allowed?('http://site1.example.org/foo')
|
620
|
+
assert @fetched
|
621
|
+
|
622
|
+
@fetched = false
|
623
|
+
assert @robots.allowed?('http://site1.example.org/bar')
|
624
|
+
assert !@fetched
|
625
|
+
assert @robots.allowed?('http://site1.example.org/baz')
|
626
|
+
assert !@fetched
|
627
|
+
assert !@robots.allowed?('http://site1.example.org/foo')
|
628
|
+
assert !@fetched
|
629
|
+
end
|
630
|
+
|
631
|
+
should "persist for non-existent robots.txt unless cache is cleared" do
|
632
|
+
assert !@fetched
|
633
|
+
assert !@robots.allowed?('http://site2.example.org/foo')
|
634
|
+
assert @fetched
|
635
|
+
|
636
|
+
@fetched = false
|
637
|
+
assert !@robots.allowed?('http://site2.example.org/bar')
|
638
|
+
assert !@fetched
|
639
|
+
assert !@robots.allowed?('http://site2.example.org/baz')
|
640
|
+
assert !@fetched
|
641
|
+
assert !@robots.allowed?('http://site2.example.org/foo')
|
642
|
+
assert !@fetched
|
643
|
+
|
644
|
+
@robots.flush_cache
|
645
|
+
assert !@fetched
|
646
|
+
assert !@robots.allowed?('http://site2.example.org/foo')
|
647
|
+
assert @fetched
|
648
|
+
|
649
|
+
@fetched = false
|
650
|
+
assert !@robots.allowed?('http://site2.example.org/bar')
|
651
|
+
assert !@fetched
|
652
|
+
assert !@robots.allowed?('http://site2.example.org/baz')
|
653
|
+
assert !@fetched
|
654
|
+
assert !@robots.allowed?('http://site2.example.org/foo')
|
655
|
+
assert !@fetched
|
656
|
+
end
|
657
|
+
end
|
585
658
|
end
|
data/webrobots.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{webrobots}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.11"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = [%q{Akinori MUSHA}]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-08-10}
|
13
13
|
s.description = %q{This library helps write robots.txt compliant web robots in Ruby.
|
14
14
|
}
|
15
15
|
s.email = %q{knu@idaemons.org}
|
@@ -33,9 +33,10 @@ Gem::Specification.new do |s|
|
|
33
33
|
"test/test_webrobots.rb",
|
34
34
|
"webrobots.gemspec"
|
35
35
|
]
|
36
|
+
s.homepage = %q{https://github.com/knu/webrobots}
|
36
37
|
s.licenses = [%q{2-clause BSDL}]
|
37
38
|
s.require_paths = [%q{lib}]
|
38
|
-
s.rubygems_version = %q{1.8.
|
39
|
+
s.rubygems_version = %q{1.8.7}
|
39
40
|
s.summary = %q{A Ruby library to help write robots.txt compliant web robots}
|
40
41
|
|
41
42
|
if s.respond_to? :specification_version then
|
@@ -46,14 +47,14 @@ Gem::Specification.new do |s|
|
|
46
47
|
s.add_development_dependency(%q<racc>, [">= 0"])
|
47
48
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
48
49
|
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
49
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.6.
|
50
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
50
51
|
s.add_development_dependency(%q<rcov>, [">= 0"])
|
51
52
|
else
|
52
53
|
s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
|
53
54
|
s.add_dependency(%q<racc>, [">= 0"])
|
54
55
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
55
56
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
56
|
-
s.add_dependency(%q<jeweler>, ["~> 1.6.
|
57
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
57
58
|
s.add_dependency(%q<rcov>, [">= 0"])
|
58
59
|
end
|
59
60
|
else
|
@@ -61,7 +62,7 @@ Gem::Specification.new do |s|
|
|
61
62
|
s.add_dependency(%q<racc>, [">= 0"])
|
62
63
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
63
64
|
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
64
|
-
s.add_dependency(%q<jeweler>, ["~> 1.6.
|
65
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
65
66
|
s.add_dependency(%q<rcov>, [">= 0"])
|
66
67
|
end
|
67
68
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webrobots
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-08-10 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &70285160147560 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 1.4.4
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70285160147560
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: racc
|
27
|
-
requirement: &
|
27
|
+
requirement: &70285160147080 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70285160147080
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: shoulda
|
38
|
-
requirement: &
|
38
|
+
requirement: &70285160146600 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70285160146600
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: bundler
|
49
|
-
requirement: &
|
49
|
+
requirement: &70285160146120 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,21 +54,21 @@ dependencies:
|
|
54
54
|
version: 1.0.0
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70285160146120
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: jeweler
|
60
|
-
requirement: &
|
60
|
+
requirement: &70285160145640 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
64
64
|
- !ruby/object:Gem::Version
|
65
|
-
version: 1.6.
|
65
|
+
version: 1.6.4
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70285160145640
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rcov
|
71
|
-
requirement: &
|
71
|
+
requirement: &70285160145160 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ! '>='
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70285160145160
|
80
80
|
description: ! 'This library helps write robots.txt compliant web robots in Ruby.
|
81
81
|
|
82
82
|
'
|
@@ -101,7 +101,7 @@ files:
|
|
101
101
|
- test/helper.rb
|
102
102
|
- test/test_webrobots.rb
|
103
103
|
- webrobots.gemspec
|
104
|
-
homepage:
|
104
|
+
homepage: https://github.com/knu/webrobots
|
105
105
|
licenses:
|
106
106
|
- 2-clause BSDL
|
107
107
|
post_install_message:
|
@@ -116,7 +116,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
116
116
|
version: '0'
|
117
117
|
segments:
|
118
118
|
- 0
|
119
|
-
hash:
|
119
|
+
hash: 3895009630851215598
|
120
120
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
121
121
|
none: false
|
122
122
|
requirements:
|
@@ -125,7 +125,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
125
125
|
version: '0'
|
126
126
|
requirements: []
|
127
127
|
rubyforge_project:
|
128
|
-
rubygems_version: 1.8.
|
128
|
+
rubygems_version: 1.8.7
|
129
129
|
signing_key:
|
130
130
|
specification_version: 3
|
131
131
|
summary: A Ruby library to help write robots.txt compliant web robots
|