legitbot 1.11.13 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e4e488aedddc99f554b6c77583c9c83a002e619d67abd39be7a5c6e7ffa91648
4
- data.tar.gz: 42d72ad19facb323cef74b12557e7ff11ef296670c3792e966f1696232266214
3
+ metadata.gz: 8d61139628b7848376d114a6e077089f9c18f67e78187bdadd48333a06e6f866
4
+ data.tar.gz: 81025808fc543da79cbf0847b47b212cd2522ef4cfd3af648b7eb3c4bcd8e9a6
5
5
  SHA512:
6
- metadata.gz: 39f5610ea3341cc4f8ac6e8e9c1a89b22393880f89f841b7ede8e1a2091fede2bd80a0bf8ca944887a3ebc20b6283ee497f46f2cfa8a28fe50ed111d391a35ea
7
- data.tar.gz: 07ecd3ffaab28b65e8d6d2a424cd534b197d6c42595342afc4a5268cd946ca221989f45cec4c16984e9b0b8de52554d56a5e4f5c71478fdda4bb5802d2b0318c
6
+ metadata.gz: 8530f29add357c66339fd4d85aa96b338603347bc50d689dd3fa14e7e93913e76ff96460b6f21bf5757bb13f0a4e50c353e703518b83ffbf77cb514ee6f9b0a6
7
+ data.tar.gz: 63ec993dba0680788bc14a301cd8eaf9a07ba354d128bc8fc3c875312602fd441bafcd109621583f3d9e329c811851787c45d9dc46b828012f3001c26547492b
@@ -12,7 +12,7 @@ jobs:
12
12
 
13
13
  strategy:
14
14
  matrix:
15
- ruby: [3.1]
15
+ ruby: [3.2]
16
16
 
17
17
  steps:
18
18
  - uses: actions/checkout@v3
@@ -10,7 +10,7 @@ jobs:
10
10
  strategy:
11
11
  fail-fast: false
12
12
  matrix:
13
- ruby: [jruby, 3.0.6, 3.1, 3.2]
13
+ ruby: [jruby, 3.1, 3.2, 3.3, 3.4]
14
14
 
15
15
  steps:
16
16
  - uses: actions/checkout@v3
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.1.6
1
+ 3.2.7
data/README.md CHANGED
@@ -50,14 +50,15 @@ end
50
50
  ## Supported
51
51
 
52
52
  - [Ahrefs](https://ahrefs.com/robot)
53
- - [Amazonbot](https://developer.amazon.com/amazonbot)
54
- - [Amazon AdBot](https://adbot.amazon.com/index.html)
53
+ - [AmazonAdBot](https://adbot.amazon.com/)
54
+ - [AmazonBot](https://developer.amazon.com/amazonbot)
55
55
  - [Applebot](https://support.apple.com/en-us/119829)
56
56
  - [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
57
57
  - [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
58
58
  - [BLEXBot (WebMeUp)](http://webmeup-crawler.com/)
59
59
  - [DataForSEO](https://dataforseo.com/dataforseo-bot)
60
- - [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
60
+ - [DuckAssistBot](https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot)
61
+ - [DuckDuckBot](https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot)
61
62
  - [Google crawlers](https://support.google.com/webmasters/answer/1061943)
62
63
  - [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
63
64
  - [OpenAI GPTBot](https://platform.openai.com/docs/gptbot)
data/legitbot.gemspec CHANGED
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  'rubygems_mfa_required' => 'true'
20
20
  }
21
21
 
22
- spec.required_ruby_version = '>= 3.0.0'
22
+ spec.required_ruby_version = '>= 3.1.0'
23
23
  spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
24
24
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
25
25
 
@@ -1,11 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot # :nodoc:
4
- # https://adbot.amazon.com/index.html
5
- # https://developer.amazon.com/amazonbot
6
- class Amazon < BotMatch
7
- domains 'amazon.', 'amazonadbot.com.'
4
+ # https://adbot.amazon.com
5
+ class AmazonAdBot < BotMatch
6
+ domains 'amazonadbot.com.'
8
7
  end
9
8
 
10
- rule Legitbot::Amazon, %w[Amazonbot AmazonAdBot]
9
+ # https://developer.amazon.com/en/amazonbot
10
+ class AmazonBot < BotMatch
11
+ domains 'crawl.amazonbot.amazon.'
12
+ end
13
+
14
+ rule Legitbot::AmazonBot, %w[Amazonbot]
15
+ rule Legitbot::AmazonAdBot, %w[AmazonAdBot]
11
16
  end
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot # :nodoc:
4
- # https://duckduckgo.com/duckduckbot
4
+ # https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
5
5
  # rubocop:disable Metrics/ClassLength
6
- class DuckDuckGo < BotMatch
6
+ class DuckDuckBot < BotMatch
7
7
  # @fetch:url https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
8
8
  # @fetch:selector main > article ul > li > div
9
9
  ip_ranges %w[
@@ -251,5 +251,256 @@ module Legitbot # :nodoc:
251
251
  end
252
252
  # rubocop:enable Metrics/ClassLength
253
253
 
254
- rule Legitbot::DuckDuckGo, %w[DuckDuckBot]
254
+ # https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot
255
+ # rubocop:disable Metrics/ClassLength
256
+ class DuckAssistBot < BotMatch
257
+ # @fetch:url https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot
258
+ # @fetch:selector main > article ul > li > div
259
+ ip_ranges %w[
260
+ 4.156.136.79
261
+ 4.182.131.108
262
+ 4.195.133.120
263
+ 4.209.224.56
264
+ 4.213.46.14
265
+ 4.228.76.163
266
+ 13.89.106.77
267
+ 20.3.1.178
268
+ 20.12.141.99
269
+ 20.40.133.240
270
+ 20.43.150.85
271
+ 20.43.150.93
272
+ 20.43.172.120
273
+ 20.44.222.1
274
+ 20.49.136.28
275
+ 20.50.48.159
276
+ 20.50.48.192
277
+ 20.50.49.0
278
+ 20.50.49.25
279
+ 20.50.49.40
280
+ 20.50.49.55
281
+ 20.50.49.237
282
+ 20.50.50.46
283
+ 20.50.50.118
284
+ 20.50.50.121
285
+ 20.50.50.123
286
+ 20.50.50.130
287
+ 20.50.50.134
288
+ 20.50.50.145
289
+ 20.50.50.146
290
+ 20.50.50.163
291
+ 20.50.168.91
292
+ 20.53.78.106
293
+ 20.53.78.123
294
+ 20.53.78.138
295
+ 20.53.78.144
296
+ 20.53.78.236
297
+ 20.53.91.2
298
+ 20.53.92.211
299
+ 20.53.134.160
300
+ 20.56.197.58
301
+ 20.56.197.63
302
+ 20.61.34.40
303
+ 20.62.224.44
304
+ 20.71.12.143
305
+ 20.72.242.93
306
+ 20.73.132.240
307
+ 20.73.202.147
308
+ 20.75.144.152
309
+ 20.79.226.26
310
+ 20.79.238.198
311
+ 20.79.239.66
312
+ 20.80.129.80
313
+ 20.93.28.24
314
+ 20.99.255.235
315
+ 20.113.3.121
316
+ 20.113.14.159
317
+ 20.185.79.15
318
+ 20.185.79.47
319
+ 20.191.44.16
320
+ 20.191.44.22
321
+ 20.191.44.119
322
+ 20.191.44.234
323
+ 20.191.45.212
324
+ 20.193.12.126
325
+ 20.193.24.10
326
+ 20.193.24.251
327
+ 20.193.25.197
328
+ 20.193.27.215
329
+ 20.193.45.113
330
+ 20.195.108.47
331
+ 20.197.209.11
332
+ 20.197.209.27
333
+ 20.201.15.208
334
+ 20.204.240.172
335
+ 20.204.241.148
336
+ 20.204.242.19
337
+ 20.204.242.101
338
+ 20.204.243.55
339
+ 20.204.246.81
340
+ 20.204.246.254
341
+ 20.207.72.11
342
+ 20.207.72.21
343
+ 20.207.72.110
344
+ 20.207.72.113
345
+ 20.207.97.190
346
+ 20.207.99.197
347
+ 20.207.107.181
348
+ 20.219.43.246
349
+ 20.219.45.67
350
+ 20.219.45.190
351
+ 20.226.133.105
352
+ 20.232.184.230
353
+ 40.64.105.247
354
+ 40.64.106.11
355
+ 40.76.162.191
356
+ 40.76.162.208
357
+ 40.76.162.247
358
+ 40.76.163.7
359
+ 40.76.163.23
360
+ 40.76.173.151
361
+ 40.80.242.63
362
+ 40.81.250.205
363
+ 40.88.21.235
364
+ 40.89.243.175
365
+ 40.114.182.45
366
+ 40.114.182.153
367
+ 40.114.182.172
368
+ 40.114.183.88
369
+ 40.114.183.196
370
+ 40.114.183.251
371
+ 40.119.232.50
372
+ 40.119.232.146
373
+ 40.119.232.215
374
+ 40.119.232.218
375
+ 40.119.232.251
376
+ 51.8.71.117
377
+ 51.8.253.152
378
+ 51.104.146.225
379
+ 51.104.146.235
380
+ 51.104.160.167
381
+ 51.104.160.177
382
+ 51.104.161.32
383
+ 51.104.162.149
384
+ 51.104.163.250
385
+ 51.104.164.109
386
+ 51.104.164.147
387
+ 51.104.164.189
388
+ 51.104.164.215
389
+ 51.104.166.111
390
+ 51.104.167.19
391
+ 51.104.167.52
392
+ 51.104.167.54
393
+ 51.104.167.61
394
+ 51.104.167.71
395
+ 51.104.167.87
396
+ 51.104.167.88
397
+ 51.104.167.95
398
+ 51.104.167.96
399
+ 51.104.167.104
400
+ 51.104.167.110
401
+ 51.104.180.26
402
+ 51.104.180.47
403
+ 51.104.180.53
404
+ 51.107.40.209
405
+ 51.116.131.221
406
+ 51.120.48.122
407
+ 51.138.90.161
408
+ 51.138.90.206
409
+ 51.138.90.233
410
+ 52.142.24.149
411
+ 52.142.26.175
412
+ 52.143.95.162
413
+ 52.143.95.204
414
+ 52.143.241.111
415
+ 52.143.242.6
416
+ 52.143.243.117
417
+ 52.143.244.81
418
+ 52.143.247.235
419
+ 52.146.58.236
420
+ 52.146.59.12
421
+ 52.146.59.154
422
+ 52.146.59.156
423
+ 52.146.63.80
424
+ 52.148.161.87
425
+ 52.148.165.38
426
+ 52.149.25.43
427
+ 52.149.28.18
428
+ 52.149.28.83
429
+ 52.149.30.45
430
+ 52.149.56.151
431
+ 52.149.58.27
432
+ 52.149.58.69
433
+ 52.149.58.139
434
+ 52.149.58.173
435
+ 52.149.60.38
436
+ 52.149.61.51
437
+ 52.154.60.82
438
+ 52.154.169.50
439
+ 52.154.169.200
440
+ 52.154.170.26
441
+ 52.154.170.28
442
+ 52.154.170.88
443
+ 52.154.170.96
444
+ 52.154.170.113
445
+ 52.154.170.117
446
+ 52.154.170.122
447
+ 52.154.170.209
448
+ 52.154.170.229
449
+ 52.154.170.243
450
+ 52.154.171.0
451
+ 52.154.171.44
452
+ 52.154.171.70
453
+ 52.154.171.87
454
+ 52.154.171.150
455
+ 52.154.171.196
456
+ 52.154.171.205
457
+ 52.154.171.235
458
+ 52.154.171.250
459
+ 52.154.172.2
460
+ 52.190.37.160
461
+ 52.224.16.221
462
+ 52.224.16.229
463
+ 52.224.19.152
464
+ 52.224.20.174
465
+ 52.224.20.181
466
+ 52.224.20.186
467
+ 52.224.20.190
468
+ 52.224.20.193
469
+ 52.224.20.203
470
+ 52.224.20.204
471
+ 52.224.20.223
472
+ 52.224.20.227
473
+ 52.224.20.249
474
+ 52.224.21.4
475
+ 52.224.21.19
476
+ 52.224.21.20
477
+ 52.224.21.23
478
+ 52.224.21.27
479
+ 52.224.21.49
480
+ 52.224.21.51
481
+ 52.224.21.53
482
+ 52.224.21.55
483
+ 52.224.21.61
484
+ 52.242.224.168
485
+ 57.152.72.128
486
+ 104.43.54.127
487
+ 104.43.55.116
488
+ 104.43.55.117
489
+ 104.43.55.166
490
+ 104.43.55.167
491
+ 108.141.83.74
492
+ 172.169.17.165
493
+ 191.233.3.197
494
+ 191.233.3.202
495
+ 191.234.216.4
496
+ 191.234.216.178
497
+ 191.235.201.214
498
+ 191.235.202.38
499
+ 191.235.202.48
500
+ ]
501
+ end
502
+ # rubocop:enable Metrics/ClassLength
503
+
504
+ rule Legitbot::DuckDuckBot, %w[DuckDuckBot]
505
+ rule Legitbot::DuckAssistBot, %w[DuckAssistBot]
255
506
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.11.13'
4
+ VERSION = '1.12.0'
5
5
  end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class AmazonAdBotTest < Minitest::Test
6
+ include Minitest::Hooks
7
+ include DnsServerMock
8
+
9
+ def test_malicious_ip
10
+ ip = '149.210.164.47'
11
+ match = Legitbot::AmazonAdBot.new ip
12
+
13
+ refute_predicate match, :valid?
14
+ end
15
+
16
+ def test_valid_ip
17
+ ip = '54.166.7.90'
18
+ match = Legitbot::AmazonAdBot.new ip
19
+
20
+ assert_predicate match, :valid?
21
+ end
22
+
23
+ def test_malicious_ua
24
+ bot = Legitbot.bot(
25
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
26
+ '149.210.164.47'
27
+ )
28
+
29
+ assert bot
30
+ refute_predicate bot, :valid?
31
+ end
32
+
33
+ def test_user_agent
34
+ bot = Legitbot.bot(
35
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
36
+ '54.166.7.90'
37
+ )
38
+
39
+ assert bot
40
+ assert_predicate bot, :valid?
41
+ end
42
+
43
+ def test_valid_name
44
+ bot = Legitbot.bot(
45
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
46
+ '54.166.7.90'
47
+ )
48
+
49
+ assert_equal :amazonadbot, bot.detected_as
50
+ end
51
+
52
+ def test_fake_name
53
+ bot = Legitbot.bot(
54
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
55
+ '81.1.172.108'
56
+ )
57
+
58
+ assert_equal :amazonadbot, bot.detected_as
59
+ end
60
+ end
@@ -2,20 +2,20 @@
2
2
 
3
3
  require_relative 'test_helper'
4
4
 
5
- class AmazonTest < Minitest::Test
5
+ class AmazonBotTest < Minitest::Test
6
6
  include Minitest::Hooks
7
7
  include DnsServerMock
8
8
 
9
9
  def test_malicious_ip
10
10
  ip = '149.210.164.47'
11
- match = Legitbot::Amazon.new ip
11
+ match = Legitbot::AmazonBot.new ip
12
12
 
13
13
  refute_predicate match, :valid?
14
14
  end
15
15
 
16
16
  def test_valid_ip
17
- ip = '54.166.7.90'
18
- match = Legitbot::Amazon.new ip
17
+ ip = '52.70.240.171'
18
+ match = Legitbot::AmazonBot.new ip
19
19
 
20
20
  assert_predicate match, :valid?
21
21
  end
@@ -30,18 +30,8 @@ class AmazonTest < Minitest::Test
30
30
  refute_predicate bot, :valid?
31
31
  end
32
32
 
33
- def test_user_agent1
34
- bot = Legitbot.bot(
35
- 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
36
- '54.166.7.90'
37
- )
38
-
39
- assert bot
40
- assert_predicate bot, :valid?
41
- end
42
-
43
33
  # rubocop:disable Layout/LineLength
44
- def test_user_agent2
34
+ def test_user_agent
45
35
  bot = Legitbot.bot(
46
36
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
47
37
  '52.70.240.171'
@@ -52,32 +42,25 @@ class AmazonTest < Minitest::Test
52
42
  end
53
43
  # rubocop:enable Layout/LineLength
54
44
 
55
- def test_valid_name1
56
- bot = Legitbot.bot(
57
- 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
58
- '54.166.7.90'
59
- )
60
-
61
- assert_equal :amazon, bot.detected_as
62
- end
63
-
64
45
  # rubocop:disable Layout/LineLength
65
- def test_valid_name2
46
+ def test_valid_name
66
47
  bot = Legitbot.bot(
67
48
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
68
49
  '52.70.240.171'
69
50
  )
70
51
 
71
- assert_equal :amazon, bot.detected_as
52
+ assert_equal :amazonbot, bot.detected_as
72
53
  end
73
54
  # rubocop:enable Layout/LineLength
74
55
 
56
+ # rubocop:disable Layout/LineLength
75
57
  def test_fake_name
76
58
  bot = Legitbot.bot(
77
- 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
59
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
78
60
  '81.1.172.108'
79
61
  )
80
62
 
81
- assert_equal :amazon, bot.detected_as
63
+ assert_equal :amazonbot, bot.detected_as
82
64
  end
65
+ # rubocop:enable Layout/LineLength
83
66
  end
@@ -22,13 +22,15 @@ TEST_DNS_RECORDS = {
22
22
  ptr: %w[crawl-52-86-176-3.alexa.com]
23
23
  },
24
24
 
25
- # Amazon
25
+ # AmazonAdBot
26
26
  'crawler-54-166-7-90.amazonadbot.com' => {
27
27
  a: %w[54.166.7.90]
28
28
  },
29
29
  '54.166.7.90' => {
30
30
  ptr: %w[crawler-54-166-7-90.amazonadbot.com]
31
31
  },
32
+
33
+ # AmazonBot
32
34
  '52-70-240-171.crawl.amazonbot.amazon' => {
33
35
  a: %w[52.70.240.171]
34
36
  },
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.11.13
4
+ version: 1.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-06-14 00:00:00.000000000 Z
11
+ date: 2025-06-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast_interval_tree
@@ -103,7 +103,8 @@ files:
103
103
  - rakelib/console.rake
104
104
  - rakelib/test.rake
105
105
  - test/ahrefs_test.rb
106
- - test/amazon_test.rb
106
+ - test/amazon_ad_bot_test.rb
107
+ - test/amazon_bot_test.rb
107
108
  - test/apple_test.rb
108
109
  - test/blexbot_test.rb
109
110
  - test/botmatch_test.rb
@@ -136,14 +137,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
136
137
  requirements:
137
138
  - - ">="
138
139
  - !ruby/object:Gem::Version
139
- version: 3.0.0
140
+ version: 3.1.0
140
141
  required_rubygems_version: !ruby/object:Gem::Requirement
141
142
  requirements:
142
143
  - - ">="
143
144
  - !ruby/object:Gem::Version
144
145
  version: '0'
145
146
  requirements: []
146
- rubygems_version: 3.3.27
147
+ rubygems_version: 3.4.19
147
148
  signing_key:
148
149
  specification_version: 4
149
150
  summary: Web crawler request validation