legitbot 1.11.13 → 1.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/autocorrect.yml +1 -1
- data/.github/workflows/pr.yml +1 -1
- data/.ruby-version +1 -1
- data/README.md +4 -3
- data/legitbot.gemspec +1 -1
- data/lib/legitbot/amazon.rb +10 -5
- data/lib/legitbot/duckduckgo.rb +254 -3
- data/lib/legitbot/openai.rb +36 -0
- data/lib/legitbot/version.rb +1 -1
- data/test/amazon_ad_bot_test.rb +60 -0
- data/test/{amazon_test.rb → amazon_bot_test.rb} +11 -28
- data/test/legitbot/validators/domains_test.rb +2 -0
- data/test/legitbot/validators/ip_ranges_test.rb +5 -0
- data/test/lib/dns_server_mock.rb +3 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d56f4dd32414017581a905af1e021634e30d22c4e76c4146c7e3a50923277a02
|
4
|
+
data.tar.gz: 47fe62aa1baf95dee8a3e3cee3e815ec28aa4dc13ec077318b18b67214a6dc5d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2fd237294ef2d18e7c23dcd91c11e0b397d0db982fce65b4e3d2429a0da2dc26e8707b9d4c2aa8f6eac4ffb2c508c70364ea39007551ee6386292e20d32a72f2
|
7
|
+
data.tar.gz: 4bc856536c0187d2bcebef7959f59becb69398685aa1917fb978bd2888649707e4072dc039a7aca8f88561e75f79686f8307538c733913ba1ca9d0157294728b
|
data/.github/workflows/pr.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.
|
1
|
+
3.2.7
|
data/README.md
CHANGED
@@ -50,14 +50,15 @@ end
|
|
50
50
|
## Supported
|
51
51
|
|
52
52
|
- [Ahrefs](https://ahrefs.com/robot)
|
53
|
-
- [
|
54
|
-
- [
|
53
|
+
- [AmazonAdBot](https://adbot.amazon.com/)
|
54
|
+
- [AmazonBot](https://developer.amazon.com/amazonbot)
|
55
55
|
- [Applebot](https://support.apple.com/en-us/119829)
|
56
56
|
- [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
57
57
|
- [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
58
58
|
- [BLEXBot (WebMeUp)](http://webmeup-crawler.com/)
|
59
59
|
- [DataForSEO](https://dataforseo.com/dataforseo-bot)
|
60
|
-
- [
|
60
|
+
- [DuckAssistBot](https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot)
|
61
|
+
- [DuckDuckBot](https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot)
|
61
62
|
- [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
62
63
|
- [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
|
63
64
|
- [OpenAI GPTBot](https://platform.openai.com/docs/gptbot)
|
data/legitbot.gemspec
CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
|
|
19
19
|
'rubygems_mfa_required' => 'true'
|
20
20
|
}
|
21
21
|
|
22
|
-
spec.required_ruby_version = '>= 3.
|
22
|
+
spec.required_ruby_version = '>= 3.1.0'
|
23
23
|
spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
|
24
24
|
spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
|
25
25
|
|
data/lib/legitbot/amazon.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Legitbot # :nodoc:
|
4
|
-
# https://adbot.amazon.com
|
5
|
-
|
6
|
-
|
7
|
-
domains 'amazon.', 'amazonadbot.com.'
|
4
|
+
# https://adbot.amazon.com
|
5
|
+
class AmazonAdBot < BotMatch
|
6
|
+
domains 'amazonadbot.com.'
|
8
7
|
end
|
9
8
|
|
10
|
-
|
9
|
+
# https://developer.amazon.com/en/amazonbot
|
10
|
+
class AmazonBot < BotMatch
|
11
|
+
domains 'crawl.amazonbot.amazon.'
|
12
|
+
end
|
13
|
+
|
14
|
+
rule Legitbot::AmazonBot, %w[Amazonbot]
|
15
|
+
rule Legitbot::AmazonAdBot, %w[AmazonAdBot]
|
11
16
|
end
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Legitbot # :nodoc:
|
4
|
-
# https://duckduckgo.com/duckduckbot
|
4
|
+
# https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
|
5
5
|
# rubocop:disable Metrics/ClassLength
|
6
|
-
class
|
6
|
+
class DuckDuckBot < BotMatch
|
7
7
|
# @fetch:url https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
|
8
8
|
# @fetch:selector main > article ul > li > div
|
9
9
|
ip_ranges %w[
|
@@ -251,5 +251,256 @@ module Legitbot # :nodoc:
|
|
251
251
|
end
|
252
252
|
# rubocop:enable Metrics/ClassLength
|
253
253
|
|
254
|
-
|
254
|
+
# https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot
|
255
|
+
# rubocop:disable Metrics/ClassLength
|
256
|
+
class DuckAssistBot < BotMatch
|
257
|
+
# @fetch:url https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot
|
258
|
+
# @fetch:selector main > article ul > li > div
|
259
|
+
ip_ranges %w[
|
260
|
+
4.156.136.79
|
261
|
+
4.182.131.108
|
262
|
+
4.195.133.120
|
263
|
+
4.209.224.56
|
264
|
+
4.213.46.14
|
265
|
+
4.228.76.163
|
266
|
+
13.89.106.77
|
267
|
+
20.3.1.178
|
268
|
+
20.12.141.99
|
269
|
+
20.40.133.240
|
270
|
+
20.43.150.85
|
271
|
+
20.43.150.93
|
272
|
+
20.43.172.120
|
273
|
+
20.44.222.1
|
274
|
+
20.49.136.28
|
275
|
+
20.50.48.159
|
276
|
+
20.50.48.192
|
277
|
+
20.50.49.0
|
278
|
+
20.50.49.25
|
279
|
+
20.50.49.40
|
280
|
+
20.50.49.55
|
281
|
+
20.50.49.237
|
282
|
+
20.50.50.46
|
283
|
+
20.50.50.118
|
284
|
+
20.50.50.121
|
285
|
+
20.50.50.123
|
286
|
+
20.50.50.130
|
287
|
+
20.50.50.134
|
288
|
+
20.50.50.145
|
289
|
+
20.50.50.146
|
290
|
+
20.50.50.163
|
291
|
+
20.50.168.91
|
292
|
+
20.53.78.106
|
293
|
+
20.53.78.123
|
294
|
+
20.53.78.138
|
295
|
+
20.53.78.144
|
296
|
+
20.53.78.236
|
297
|
+
20.53.91.2
|
298
|
+
20.53.92.211
|
299
|
+
20.53.134.160
|
300
|
+
20.56.197.58
|
301
|
+
20.56.197.63
|
302
|
+
20.61.34.40
|
303
|
+
20.62.224.44
|
304
|
+
20.71.12.143
|
305
|
+
20.72.242.93
|
306
|
+
20.73.132.240
|
307
|
+
20.73.202.147
|
308
|
+
20.75.144.152
|
309
|
+
20.79.226.26
|
310
|
+
20.79.238.198
|
311
|
+
20.79.239.66
|
312
|
+
20.80.129.80
|
313
|
+
20.93.28.24
|
314
|
+
20.99.255.235
|
315
|
+
20.113.3.121
|
316
|
+
20.113.14.159
|
317
|
+
20.185.79.15
|
318
|
+
20.185.79.47
|
319
|
+
20.191.44.16
|
320
|
+
20.191.44.22
|
321
|
+
20.191.44.119
|
322
|
+
20.191.44.234
|
323
|
+
20.191.45.212
|
324
|
+
20.193.12.126
|
325
|
+
20.193.24.10
|
326
|
+
20.193.24.251
|
327
|
+
20.193.25.197
|
328
|
+
20.193.27.215
|
329
|
+
20.193.45.113
|
330
|
+
20.195.108.47
|
331
|
+
20.197.209.11
|
332
|
+
20.197.209.27
|
333
|
+
20.201.15.208
|
334
|
+
20.204.240.172
|
335
|
+
20.204.241.148
|
336
|
+
20.204.242.19
|
337
|
+
20.204.242.101
|
338
|
+
20.204.243.55
|
339
|
+
20.204.246.81
|
340
|
+
20.204.246.254
|
341
|
+
20.207.72.11
|
342
|
+
20.207.72.21
|
343
|
+
20.207.72.110
|
344
|
+
20.207.72.113
|
345
|
+
20.207.97.190
|
346
|
+
20.207.99.197
|
347
|
+
20.207.107.181
|
348
|
+
20.219.43.246
|
349
|
+
20.219.45.67
|
350
|
+
20.219.45.190
|
351
|
+
20.226.133.105
|
352
|
+
20.232.184.230
|
353
|
+
40.64.105.247
|
354
|
+
40.64.106.11
|
355
|
+
40.76.162.191
|
356
|
+
40.76.162.208
|
357
|
+
40.76.162.247
|
358
|
+
40.76.163.7
|
359
|
+
40.76.163.23
|
360
|
+
40.76.173.151
|
361
|
+
40.80.242.63
|
362
|
+
40.81.250.205
|
363
|
+
40.88.21.235
|
364
|
+
40.89.243.175
|
365
|
+
40.114.182.45
|
366
|
+
40.114.182.153
|
367
|
+
40.114.182.172
|
368
|
+
40.114.183.88
|
369
|
+
40.114.183.196
|
370
|
+
40.114.183.251
|
371
|
+
40.119.232.50
|
372
|
+
40.119.232.146
|
373
|
+
40.119.232.215
|
374
|
+
40.119.232.218
|
375
|
+
40.119.232.251
|
376
|
+
51.8.71.117
|
377
|
+
51.8.253.152
|
378
|
+
51.104.146.225
|
379
|
+
51.104.146.235
|
380
|
+
51.104.160.167
|
381
|
+
51.104.160.177
|
382
|
+
51.104.161.32
|
383
|
+
51.104.162.149
|
384
|
+
51.104.163.250
|
385
|
+
51.104.164.109
|
386
|
+
51.104.164.147
|
387
|
+
51.104.164.189
|
388
|
+
51.104.164.215
|
389
|
+
51.104.166.111
|
390
|
+
51.104.167.19
|
391
|
+
51.104.167.52
|
392
|
+
51.104.167.54
|
393
|
+
51.104.167.61
|
394
|
+
51.104.167.71
|
395
|
+
51.104.167.87
|
396
|
+
51.104.167.88
|
397
|
+
51.104.167.95
|
398
|
+
51.104.167.96
|
399
|
+
51.104.167.104
|
400
|
+
51.104.167.110
|
401
|
+
51.104.180.26
|
402
|
+
51.104.180.47
|
403
|
+
51.104.180.53
|
404
|
+
51.107.40.209
|
405
|
+
51.116.131.221
|
406
|
+
51.120.48.122
|
407
|
+
51.138.90.161
|
408
|
+
51.138.90.206
|
409
|
+
51.138.90.233
|
410
|
+
52.142.24.149
|
411
|
+
52.142.26.175
|
412
|
+
52.143.95.162
|
413
|
+
52.143.95.204
|
414
|
+
52.143.241.111
|
415
|
+
52.143.242.6
|
416
|
+
52.143.243.117
|
417
|
+
52.143.244.81
|
418
|
+
52.143.247.235
|
419
|
+
52.146.58.236
|
420
|
+
52.146.59.12
|
421
|
+
52.146.59.154
|
422
|
+
52.146.59.156
|
423
|
+
52.146.63.80
|
424
|
+
52.148.161.87
|
425
|
+
52.148.165.38
|
426
|
+
52.149.25.43
|
427
|
+
52.149.28.18
|
428
|
+
52.149.28.83
|
429
|
+
52.149.30.45
|
430
|
+
52.149.56.151
|
431
|
+
52.149.58.27
|
432
|
+
52.149.58.69
|
433
|
+
52.149.58.139
|
434
|
+
52.149.58.173
|
435
|
+
52.149.60.38
|
436
|
+
52.149.61.51
|
437
|
+
52.154.60.82
|
438
|
+
52.154.169.50
|
439
|
+
52.154.169.200
|
440
|
+
52.154.170.26
|
441
|
+
52.154.170.28
|
442
|
+
52.154.170.88
|
443
|
+
52.154.170.96
|
444
|
+
52.154.170.113
|
445
|
+
52.154.170.117
|
446
|
+
52.154.170.122
|
447
|
+
52.154.170.209
|
448
|
+
52.154.170.229
|
449
|
+
52.154.170.243
|
450
|
+
52.154.171.0
|
451
|
+
52.154.171.44
|
452
|
+
52.154.171.70
|
453
|
+
52.154.171.87
|
454
|
+
52.154.171.150
|
455
|
+
52.154.171.196
|
456
|
+
52.154.171.205
|
457
|
+
52.154.171.235
|
458
|
+
52.154.171.250
|
459
|
+
52.154.172.2
|
460
|
+
52.190.37.160
|
461
|
+
52.224.16.221
|
462
|
+
52.224.16.229
|
463
|
+
52.224.19.152
|
464
|
+
52.224.20.174
|
465
|
+
52.224.20.181
|
466
|
+
52.224.20.186
|
467
|
+
52.224.20.190
|
468
|
+
52.224.20.193
|
469
|
+
52.224.20.203
|
470
|
+
52.224.20.204
|
471
|
+
52.224.20.223
|
472
|
+
52.224.20.227
|
473
|
+
52.224.20.249
|
474
|
+
52.224.21.4
|
475
|
+
52.224.21.19
|
476
|
+
52.224.21.20
|
477
|
+
52.224.21.23
|
478
|
+
52.224.21.27
|
479
|
+
52.224.21.49
|
480
|
+
52.224.21.51
|
481
|
+
52.224.21.53
|
482
|
+
52.224.21.55
|
483
|
+
52.224.21.61
|
484
|
+
52.242.224.168
|
485
|
+
57.152.72.128
|
486
|
+
104.43.54.127
|
487
|
+
104.43.55.116
|
488
|
+
104.43.55.117
|
489
|
+
104.43.55.166
|
490
|
+
104.43.55.167
|
491
|
+
108.141.83.74
|
492
|
+
172.169.17.165
|
493
|
+
191.233.3.197
|
494
|
+
191.233.3.202
|
495
|
+
191.234.216.4
|
496
|
+
191.234.216.178
|
497
|
+
191.235.201.214
|
498
|
+
191.235.202.38
|
499
|
+
191.235.202.48
|
500
|
+
]
|
501
|
+
end
|
502
|
+
# rubocop:enable Metrics/ClassLength
|
503
|
+
|
504
|
+
rule Legitbot::DuckDuckBot, %w[DuckDuckBot]
|
505
|
+
rule Legitbot::DuckAssistBot, %w[DuckAssistBot]
|
255
506
|
end
|
data/lib/legitbot/openai.rb
CHANGED
@@ -27,46 +27,70 @@ module Legitbot # :nodoc:
|
|
27
27
|
4.151.119.48/28
|
28
28
|
4.151.241.240/28
|
29
29
|
4.196.118.112/28
|
30
|
+
4.196.198.80/28
|
31
|
+
4.197.19.176/28
|
30
32
|
4.197.22.112/28
|
31
33
|
4.197.115.112/28
|
32
34
|
4.205.128.176/28
|
35
|
+
4.234.83.96/28
|
33
36
|
13.65.138.96/28
|
34
37
|
13.65.138.112/28
|
38
|
+
13.70.107.160/28
|
39
|
+
13.76.32.208/28
|
35
40
|
13.76.116.80/28
|
36
41
|
13.83.167.128/28
|
37
42
|
13.83.237.176/28
|
43
|
+
20.0.53.96/28
|
38
44
|
20.55.229.144/28
|
39
45
|
20.63.221.64/28
|
46
|
+
20.77.178.240/28
|
40
47
|
20.90.7.144/28
|
41
48
|
20.97.189.96/28
|
42
49
|
20.102.212.144/28
|
43
50
|
20.117.22.224/28
|
44
51
|
20.161.75.208/28
|
45
52
|
20.193.50.32/28
|
53
|
+
20.194.0.208/28
|
54
|
+
20.194.1.0/28
|
55
|
+
20.194.157.176/28
|
56
|
+
20.210.154.128/28
|
46
57
|
20.215.188.192/28
|
47
58
|
20.215.214.16/28
|
48
59
|
20.228.106.176/28
|
60
|
+
20.235.75.208/28
|
49
61
|
20.235.87.224/28
|
50
62
|
20.249.63.208/28
|
63
|
+
23.97.109.224/28
|
64
|
+
23.98.142.176/28
|
51
65
|
23.98.179.16/28
|
52
66
|
23.98.186.64/28
|
53
67
|
23.98.186.96/28
|
54
68
|
23.98.186.176/28
|
55
69
|
23.98.186.192/28
|
70
|
+
23.102.140.144/28
|
71
|
+
23.102.141.32/28
|
72
|
+
40.75.14.224/28
|
56
73
|
40.84.181.32/28
|
57
74
|
40.84.221.208/28
|
58
75
|
40.84.221.224/28
|
59
76
|
40.116.73.208/28
|
60
77
|
51.8.155.48/28
|
61
78
|
51.8.155.64/28
|
79
|
+
51.8.155.80/28
|
62
80
|
51.8.155.112/28
|
63
81
|
52.148.129.32/28
|
82
|
+
52.154.22.48/28
|
64
83
|
52.156.77.144/28
|
65
84
|
52.159.227.32/28
|
66
85
|
52.159.249.96/28
|
86
|
+
52.173.123.0/28
|
87
|
+
52.173.219.96/28
|
88
|
+
52.173.219.112/28
|
67
89
|
52.173.234.16/28
|
68
90
|
52.173.234.80/28
|
91
|
+
52.173.235.80/28
|
69
92
|
52.176.139.176/28
|
93
|
+
52.187.246.128/28
|
70
94
|
52.190.137.16/28
|
71
95
|
52.190.137.144/28
|
72
96
|
52.190.139.48/28
|
@@ -78,6 +102,7 @@ module Legitbot # :nodoc:
|
|
78
102
|
52.236.94.144/28
|
79
103
|
52.242.132.224/28
|
80
104
|
52.242.132.240/28
|
105
|
+
52.242.245.208/28
|
81
106
|
52.252.113.240/28
|
82
107
|
52.255.109.80/28
|
83
108
|
52.255.109.96/28
|
@@ -94,6 +119,7 @@ module Legitbot # :nodoc:
|
|
94
119
|
57.154.175.0/28
|
95
120
|
57.154.187.32/28
|
96
121
|
68.154.28.96/28
|
122
|
+
68.218.30.112/28
|
97
123
|
68.220.57.64/28
|
98
124
|
68.221.67.160/28
|
99
125
|
68.221.67.192/28
|
@@ -115,17 +141,27 @@ module Legitbot # :nodoc:
|
|
115
141
|
135.237.133.48/28
|
116
142
|
135.237.133.112/28
|
117
143
|
137.135.191.176/28
|
144
|
+
138.91.30.48/28
|
145
|
+
138.91.46.96/28
|
118
146
|
172.178.140.144/28
|
119
147
|
172.178.141.112/28
|
120
148
|
172.178.141.128/28
|
121
149
|
172.183.143.224/28
|
122
150
|
172.183.222.128/28
|
151
|
+
172.204.16.64/28
|
123
152
|
172.212.159.64/28
|
124
153
|
172.213.11.144/28
|
125
154
|
172.213.12.112/28
|
126
155
|
172.213.21.16/28
|
127
156
|
172.213.21.112/28
|
128
157
|
172.213.21.144/28
|
158
|
+
191.233.194.32/28
|
159
|
+
191.233.196.112/28
|
160
|
+
191.234.167.128/28
|
161
|
+
191.235.66.16/28
|
162
|
+
191.235.98.144/28
|
163
|
+
191.235.99.80/28
|
164
|
+
191.239.245.16/28
|
129
165
|
]
|
130
166
|
end
|
131
167
|
# rubocop:enable Metrics/ClassLength
|
data/lib/legitbot/version.rb
CHANGED
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class AmazonAdBotTest < Minitest::Test
|
6
|
+
include Minitest::Hooks
|
7
|
+
include DnsServerMock
|
8
|
+
|
9
|
+
def test_malicious_ip
|
10
|
+
ip = '149.210.164.47'
|
11
|
+
match = Legitbot::AmazonAdBot.new ip
|
12
|
+
|
13
|
+
refute_predicate match, :valid?
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_valid_ip
|
17
|
+
ip = '54.166.7.90'
|
18
|
+
match = Legitbot::AmazonAdBot.new ip
|
19
|
+
|
20
|
+
assert_predicate match, :valid?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_malicious_ua
|
24
|
+
bot = Legitbot.bot(
|
25
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
26
|
+
'149.210.164.47'
|
27
|
+
)
|
28
|
+
|
29
|
+
assert bot
|
30
|
+
refute_predicate bot, :valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_user_agent
|
34
|
+
bot = Legitbot.bot(
|
35
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
36
|
+
'54.166.7.90'
|
37
|
+
)
|
38
|
+
|
39
|
+
assert bot
|
40
|
+
assert_predicate bot, :valid?
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_valid_name
|
44
|
+
bot = Legitbot.bot(
|
45
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
46
|
+
'54.166.7.90'
|
47
|
+
)
|
48
|
+
|
49
|
+
assert_equal :amazonadbot, bot.detected_as
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_fake_name
|
53
|
+
bot = Legitbot.bot(
|
54
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
55
|
+
'81.1.172.108'
|
56
|
+
)
|
57
|
+
|
58
|
+
assert_equal :amazonadbot, bot.detected_as
|
59
|
+
end
|
60
|
+
end
|
@@ -2,20 +2,20 @@
|
|
2
2
|
|
3
3
|
require_relative 'test_helper'
|
4
4
|
|
5
|
-
class
|
5
|
+
class AmazonBotTest < Minitest::Test
|
6
6
|
include Minitest::Hooks
|
7
7
|
include DnsServerMock
|
8
8
|
|
9
9
|
def test_malicious_ip
|
10
10
|
ip = '149.210.164.47'
|
11
|
-
match = Legitbot::
|
11
|
+
match = Legitbot::AmazonBot.new ip
|
12
12
|
|
13
13
|
refute_predicate match, :valid?
|
14
14
|
end
|
15
15
|
|
16
16
|
def test_valid_ip
|
17
|
-
ip = '
|
18
|
-
match = Legitbot::
|
17
|
+
ip = '52.70.240.171'
|
18
|
+
match = Legitbot::AmazonBot.new ip
|
19
19
|
|
20
20
|
assert_predicate match, :valid?
|
21
21
|
end
|
@@ -30,18 +30,8 @@ class AmazonTest < Minitest::Test
|
|
30
30
|
refute_predicate bot, :valid?
|
31
31
|
end
|
32
32
|
|
33
|
-
def test_user_agent1
|
34
|
-
bot = Legitbot.bot(
|
35
|
-
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
36
|
-
'54.166.7.90'
|
37
|
-
)
|
38
|
-
|
39
|
-
assert bot
|
40
|
-
assert_predicate bot, :valid?
|
41
|
-
end
|
42
|
-
|
43
33
|
# rubocop:disable Layout/LineLength
|
44
|
-
def
|
34
|
+
def test_user_agent
|
45
35
|
bot = Legitbot.bot(
|
46
36
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
|
47
37
|
'52.70.240.171'
|
@@ -52,32 +42,25 @@ class AmazonTest < Minitest::Test
|
|
52
42
|
end
|
53
43
|
# rubocop:enable Layout/LineLength
|
54
44
|
|
55
|
-
def test_valid_name1
|
56
|
-
bot = Legitbot.bot(
|
57
|
-
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
58
|
-
'54.166.7.90'
|
59
|
-
)
|
60
|
-
|
61
|
-
assert_equal :amazon, bot.detected_as
|
62
|
-
end
|
63
|
-
|
64
45
|
# rubocop:disable Layout/LineLength
|
65
|
-
def
|
46
|
+
def test_valid_name
|
66
47
|
bot = Legitbot.bot(
|
67
48
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
|
68
49
|
'52.70.240.171'
|
69
50
|
)
|
70
51
|
|
71
|
-
assert_equal :
|
52
|
+
assert_equal :amazonbot, bot.detected_as
|
72
53
|
end
|
73
54
|
# rubocop:enable Layout/LineLength
|
74
55
|
|
56
|
+
# rubocop:disable Layout/LineLength
|
75
57
|
def test_fake_name
|
76
58
|
bot = Legitbot.bot(
|
77
|
-
'Mozilla/5.0 (
|
59
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
|
78
60
|
'81.1.172.108'
|
79
61
|
)
|
80
62
|
|
81
|
-
assert_equal :
|
63
|
+
assert_equal :amazonbot, bot.detected_as
|
82
64
|
end
|
65
|
+
# rubocop:enable Layout/LineLength
|
83
66
|
end
|
@@ -11,6 +11,7 @@ module Legitbot
|
|
11
11
|
|
12
12
|
class DomainMatch
|
13
13
|
include Domains
|
14
|
+
|
14
15
|
domains 'search.msn.com', reverse: false
|
15
16
|
|
16
17
|
@resolver = Minitest::Mock.new
|
@@ -27,6 +28,7 @@ module Legitbot
|
|
27
28
|
|
28
29
|
class ReverseMatch
|
29
30
|
include Domains
|
31
|
+
|
30
32
|
domains 'search.msn.com'
|
31
33
|
end
|
32
34
|
|
@@ -12,16 +12,19 @@ module Legitbot
|
|
12
12
|
|
13
13
|
class ArrayRanges
|
14
14
|
include IpRanges
|
15
|
+
|
15
16
|
ip_ranges '66.220.144.0/21', '2a03:2880:f234::/48'
|
16
17
|
end
|
17
18
|
|
18
19
|
class FlattenRanges
|
19
20
|
include IpRanges
|
21
|
+
|
20
22
|
ip_ranges %w[66.220.144.0/21 2a03:2880:f234::/48]
|
21
23
|
end
|
22
24
|
|
23
25
|
class EmptyRanges
|
24
26
|
include IpRanges
|
27
|
+
|
25
28
|
ip_ranges
|
26
29
|
|
27
30
|
def initialize(ip)
|
@@ -49,11 +52,13 @@ module Legitbot
|
|
49
52
|
|
50
53
|
class NilRanges
|
51
54
|
include IpRanges
|
55
|
+
|
52
56
|
ip_ranges { nil }
|
53
57
|
end
|
54
58
|
|
55
59
|
class Ipv4Ranges
|
56
60
|
include IpRanges
|
61
|
+
|
57
62
|
ip_ranges { ['66.220.144.0/21'] }
|
58
63
|
end
|
59
64
|
|
data/test/lib/dns_server_mock.rb
CHANGED
@@ -22,13 +22,15 @@ TEST_DNS_RECORDS = {
|
|
22
22
|
ptr: %w[crawl-52-86-176-3.alexa.com]
|
23
23
|
},
|
24
24
|
|
25
|
-
#
|
25
|
+
# AmazonAdBot
|
26
26
|
'crawler-54-166-7-90.amazonadbot.com' => {
|
27
27
|
a: %w[54.166.7.90]
|
28
28
|
},
|
29
29
|
'54.166.7.90' => {
|
30
30
|
ptr: %w[crawler-54-166-7-90.amazonadbot.com]
|
31
31
|
},
|
32
|
+
|
33
|
+
# AmazonBot
|
32
34
|
'52-70-240-171.crawl.amazonbot.amazon' => {
|
33
35
|
a: %w[52.70.240.171]
|
34
36
|
},
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.12.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-08-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fast_interval_tree
|
@@ -103,7 +103,8 @@ files:
|
|
103
103
|
- rakelib/console.rake
|
104
104
|
- rakelib/test.rake
|
105
105
|
- test/ahrefs_test.rb
|
106
|
-
- test/
|
106
|
+
- test/amazon_ad_bot_test.rb
|
107
|
+
- test/amazon_bot_test.rb
|
107
108
|
- test/apple_test.rb
|
108
109
|
- test/blexbot_test.rb
|
109
110
|
- test/botmatch_test.rb
|
@@ -136,14 +137,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
136
137
|
requirements:
|
137
138
|
- - ">="
|
138
139
|
- !ruby/object:Gem::Version
|
139
|
-
version: 3.
|
140
|
+
version: 3.1.0
|
140
141
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
141
142
|
requirements:
|
142
143
|
- - ">="
|
143
144
|
- !ruby/object:Gem::Version
|
144
145
|
version: '0'
|
145
146
|
requirements: []
|
146
|
-
rubygems_version: 3.
|
147
|
+
rubygems_version: 3.4.19
|
147
148
|
signing_key:
|
148
149
|
specification_version: 4
|
149
150
|
summary: Web crawler request validation
|