legitbot 1.12.11 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 91f8fa53bd35c40ab2eb9c98ef5fad1dda96a869fd474076dde8df0842acf380
4
- data.tar.gz: 20b7a9bc47fd9eeaed1391797d22c39b3b52a47aa491ab3a9507b61f3535b5cb
3
+ metadata.gz: 997d2f1450b83a076215f798f9f6d3aad707047ae625e97a42b11f32b0370d53
4
+ data.tar.gz: bcecf8e39cd6040a616539b93813ad59eb63e781e65873a06af8cb09c817aba0
5
5
  SHA512:
6
- metadata.gz: f102c06a48ccfebbbfeb3d3c8d430979639347ac653f8ce71d345ced341b8a76efafa30f9d0931c3756458727b0c66a019a560b10f5064f16a3c1f8eb3563dc4
7
- data.tar.gz: 93626c8c0621b88fdf2e5e3f77f06986036e53ecee44966751268111f28aa56fa4f73d095a7784e5cf4414eba668fae324d8966fc43df2daec60aec4b2cce3a1
6
+ metadata.gz: f8761fac855610b44cc896b04a037b45ebb288205b465699e9759f45b2729a6930f612f29b463f75046a3dcf01823d36bb0e645231837baee5058ba12d8a77b0
7
+ data.tar.gz: 8bd0acc57a5402715ce70e251837db3483777db44f360faced2a0b11b4785d25d660edd381d60d1ae0ad12eea0547adbb99d3636d43913afcf407f5d9e1e430f
@@ -11,7 +11,7 @@ jobs:
11
11
  strategy:
12
12
  fail-fast: false
13
13
  matrix:
14
- ruby: [jruby, 3.1, 3.2, 3.3, 3.4]
14
+ ruby: [jruby, 3.2, 3.3, 3.4, 4.0]
15
15
 
16
16
  steps:
17
17
  - uses: actions/checkout@v5
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.2.7
1
+ 3.3.11
data/README.md CHANGED
@@ -65,6 +65,7 @@ end
65
65
  - [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
66
66
  - [Marginalia](https://www.marginalia.nu/marginalia-search/for-webmasters/)
67
67
  - [Meta / Facebook Web crawlers](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/)
68
+ - [Perplexity](https://docs.perplexity.ai/docs/resources/perplexity-crawlers#ip-address-sources)
68
69
  - [Petal search engine](http://aspiegel.com/petalbot)
69
70
  - [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
70
71
  - [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started),
data/legitbot.gemspec CHANGED
@@ -19,9 +19,10 @@ Gem::Specification.new do |spec|
19
19
  'rubygems_mfa_required' => 'true'
20
20
  }
21
21
 
22
- spec.required_ruby_version = '>= 3.1.0'
22
+ spec.required_ruby_version = '>= 3.2.0'
23
23
  spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
24
24
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
25
+ spec.add_dependency 'logger', '~> 1.7', '>= 1.7.0'
25
26
 
26
27
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
27
28
  spec.rdoc_options = ['--charset=UTF-8']
@@ -7,6 +7,7 @@ module Legitbot # :nodoc:
7
7
  # @fetch:url https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot
8
8
  # @fetch:selector main > article ul > li > div
9
9
  ip_ranges %w[
10
+ 4.144.182.50
10
11
  4.150.142.218
11
12
  4.156.136.79
12
13
  4.156.154.107
@@ -31,6 +32,7 @@ module Legitbot # :nodoc:
31
32
  20.40.133.240
32
33
  20.43.150.85
33
34
  20.43.150.93
35
+ 20.43.152.28
34
36
  20.43.172.120
35
37
  20.44.222.1
36
38
  20.49.136.28
@@ -65,6 +67,9 @@ module Legitbot # :nodoc:
65
67
  20.61.34.40
66
68
  20.61.142.192
67
69
  20.62.224.44
70
+ 20.64.185.45
71
+ 20.69.129.205
72
+ 20.69.131.45
68
73
  20.71.12.143
69
74
  20.72.242.93
70
75
  20.73.132.240
@@ -74,6 +79,7 @@ module Legitbot # :nodoc:
74
79
  20.79.238.198
75
80
  20.79.239.66
76
81
  20.80.129.80
82
+ 20.80.156.88
77
83
  20.93.28.24
78
84
  20.99.255.235
79
85
  20.112.58.44
@@ -93,6 +99,7 @@ module Legitbot # :nodoc:
93
99
  20.193.25.197
94
100
  20.193.27.215
95
101
  20.193.45.113
102
+ 20.195.58.189
96
103
  20.195.108.47
97
104
  20.197.209.11
98
105
  20.197.209.27
@@ -111,12 +118,13 @@ module Legitbot # :nodoc:
111
118
  20.207.97.190
112
119
  20.207.99.197
113
120
  20.207.107.181
121
+ 20.212.90.107
114
122
  20.219.43.246
115
123
  20.219.45.67
116
124
  20.219.45.190
117
125
  20.226.133.105
118
126
  20.232.51.46
119
- 20.232.184.230
127
+ 20.252.16.193
120
128
  20.253.59.76
121
129
  40.64.105.247
122
130
  40.64.106.11
@@ -126,7 +134,6 @@ module Legitbot # :nodoc:
126
134
  40.76.163.7
127
135
  40.76.163.23
128
136
  40.76.173.151
129
- 40.80.242.63
130
137
  40.81.250.205
131
138
  40.88.21.235
132
139
  40.89.243.175
@@ -142,6 +149,7 @@ module Legitbot # :nodoc:
142
149
  40.119.232.218
143
150
  40.119.232.251
144
151
  40.127.154.196
152
+ 48.204.32.87
145
153
  48.217.23.236
146
154
  48.217.129.210
147
155
  48.217.212.89
@@ -258,6 +266,7 @@ module Legitbot # :nodoc:
258
266
  52.242.224.168
259
267
  52.250.46.221
260
268
  57.152.72.128
269
+ 57.155.18.44
261
270
  64.236.118.43
262
271
  68.219.152.220
263
272
  74.179.232.116
@@ -280,6 +289,7 @@ module Legitbot # :nodoc:
280
289
  172.169.60.134
281
290
  172.169.177.131
282
291
  172.193.245.229
292
+ 172.194.141.49
283
293
  172.199.55.212
284
294
  191.233.3.197
285
295
  191.233.3.202
@@ -298,18 +308,32 @@ module Legitbot # :nodoc:
298
308
  # @fetch:url https://duckduckgo.com/duckduckgo-help-pages/results/duckassistbot
299
309
  # @fetch:selector main > article ul > li > div
300
310
  ip_ranges %w[
311
+ 4.144.182.50
312
+ 4.150.142.218
301
313
  4.156.136.79
314
+ 4.156.154.107
302
315
  4.182.131.108
303
316
  4.195.133.120
317
+ 4.201.125.59
318
+ 4.201.141.71
319
+ 4.201.197.203
320
+ 4.201.206.133
321
+ 4.201.220.8
322
+ 4.207.220.92
304
323
  4.209.224.56
305
324
  4.213.46.14
306
325
  4.228.76.163
326
+ 4.249.216.104
327
+ 4.255.35.121
307
328
  13.89.106.77
308
329
  20.3.1.178
330
+ 20.8.252.26
309
331
  20.12.141.99
332
+ 20.13.44.19
310
333
  20.40.133.240
311
334
  20.43.150.85
312
335
  20.43.150.93
336
+ 20.43.152.28
313
337
  20.43.172.120
314
338
  20.44.222.1
315
339
  20.49.136.28
@@ -338,10 +362,15 @@ module Legitbot # :nodoc:
338
362
  20.53.91.2
339
363
  20.53.92.211
340
364
  20.53.134.160
365
+ 20.54.224.39
341
366
  20.56.197.58
342
367
  20.56.197.63
343
368
  20.61.34.40
369
+ 20.61.142.192
344
370
  20.62.224.44
371
+ 20.64.185.45
372
+ 20.69.129.205
373
+ 20.69.131.45
345
374
  20.71.12.143
346
375
  20.72.242.93
347
376
  20.73.132.240
@@ -351,10 +380,13 @@ module Legitbot # :nodoc:
351
380
  20.79.238.198
352
381
  20.79.239.66
353
382
  20.80.129.80
383
+ 20.80.156.88
354
384
  20.93.28.24
355
385
  20.99.255.235
386
+ 20.112.58.44
356
387
  20.113.3.121
357
388
  20.113.14.159
389
+ 20.166.171.150
358
390
  20.185.79.15
359
391
  20.185.79.47
360
392
  20.191.44.16
@@ -368,6 +400,7 @@ module Legitbot # :nodoc:
368
400
  20.193.25.197
369
401
  20.193.27.215
370
402
  20.193.45.113
403
+ 20.195.58.189
371
404
  20.195.108.47
372
405
  20.197.209.11
373
406
  20.197.209.27
@@ -386,11 +419,14 @@ module Legitbot # :nodoc:
386
419
  20.207.97.190
387
420
  20.207.99.197
388
421
  20.207.107.181
422
+ 20.212.90.107
389
423
  20.219.43.246
390
424
  20.219.45.67
391
425
  20.219.45.190
392
426
  20.226.133.105
393
- 20.232.184.230
427
+ 20.232.51.46
428
+ 20.252.16.193
429
+ 20.253.59.76
394
430
  40.64.105.247
395
431
  40.64.106.11
396
432
  40.76.162.191
@@ -399,7 +435,6 @@ module Legitbot # :nodoc:
399
435
  40.76.163.7
400
436
  40.76.163.23
401
437
  40.76.173.151
402
- 40.80.242.63
403
438
  40.81.250.205
404
439
  40.88.21.235
405
440
  40.89.243.175
@@ -414,6 +449,11 @@ module Legitbot # :nodoc:
414
449
  40.119.232.215
415
450
  40.119.232.218
416
451
  40.119.232.251
452
+ 40.127.154.196
453
+ 48.204.32.87
454
+ 48.217.23.236
455
+ 48.217.129.210
456
+ 48.217.212.89
417
457
  51.8.71.117
418
458
  51.8.253.152
419
459
  51.104.146.225
@@ -498,6 +538,8 @@ module Legitbot # :nodoc:
498
538
  52.154.171.235
499
539
  52.154.171.250
500
540
  52.154.172.2
541
+ 52.186.37.211
542
+ 52.188.89.106
501
543
  52.190.37.160
502
544
  52.224.16.221
503
545
  52.224.16.229
@@ -523,15 +565,33 @@ module Legitbot # :nodoc:
523
565
  52.224.21.55
524
566
  52.224.21.61
525
567
  52.242.224.168
568
+ 52.250.46.221
526
569
  57.152.72.128
570
+ 57.155.18.44
571
+ 64.236.118.43
572
+ 68.219.152.220
573
+ 74.179.232.116
527
574
  104.43.54.127
528
575
  104.43.55.116
529
576
  104.43.55.117
530
577
  104.43.55.166
531
578
  104.43.55.167
532
579
  108.141.83.74
580
+ 128.203.132.152
533
581
  130.107.228.224
582
+ 132.164.209.198
583
+ 135.234.221.112
584
+ 172.168.53.53
585
+ 172.168.115.250
586
+ 172.168.227.120
587
+ 172.168.254.119
534
588
  172.169.17.165
589
+ 172.169.28.184
590
+ 172.169.60.134
591
+ 172.169.177.131
592
+ 172.193.245.229
593
+ 172.194.141.49
594
+ 172.199.55.212
535
595
  191.233.3.197
536
596
  191.233.3.202
537
597
  191.234.216.4
@@ -50,12 +50,13 @@ module Legitbot # :nodoc:
50
50
  4.197.64.48/28
51
51
  4.197.64.64/28
52
52
  4.197.115.112/28
53
+ 4.198.72.16/28
53
54
  4.205.128.176/28
55
+ 4.226.226.32/28
54
56
  9.160.163.224/28
55
57
  9.160.164.128/28
56
58
  13.65.138.96/28
57
59
  13.65.138.112/28
58
- 13.67.46.240/28
59
60
  13.67.72.16/28
60
61
  13.70.107.160/28
61
62
  13.71.2.208/28
@@ -63,19 +64,19 @@ module Legitbot # :nodoc:
63
64
  13.76.115.224/28
64
65
  13.76.115.240/28
65
66
  13.76.116.80/28
66
- 13.76.223.48/28
67
- 13.79.43.0/28
68
67
  13.83.167.128/28
69
68
  13.83.237.176/28
70
69
  20.0.53.96/28
70
+ 20.17.108.96/28
71
71
  20.27.94.128/28
72
+ 20.42.250.32/28
72
73
  20.45.178.144/28
73
74
  20.55.229.144/28
74
75
  20.63.221.64/28
75
- 20.90.7.144/28
76
76
  20.97.189.96/28
77
77
  20.102.212.144/28
78
- 20.117.22.224/28
78
+ 20.113.218.16/28
79
+ 20.113.225.112/28
79
80
  20.125.112.224/28
80
81
  20.125.144.144/28
81
82
  20.161.75.208/28
@@ -107,6 +108,7 @@ module Legitbot # :nodoc:
107
108
  20.194.1.0/28
108
109
  20.194.157.176/28
109
110
  20.198.67.96/28
111
+ 20.199.211.160/28
110
112
  20.203.245.32/28
111
113
  20.204.24.240/28
112
114
  20.206.107.192/28
@@ -159,6 +161,8 @@ module Legitbot # :nodoc:
159
161
  51.8.155.64/28
160
162
  51.8.155.80/28
161
163
  51.107.70.192/28
164
+ 51.116.2.64/28
165
+ 51.116.2.80/28
162
166
  52.148.129.32/28
163
167
  52.154.22.48/28
164
168
  52.156.77.144/28
@@ -229,6 +233,7 @@ module Legitbot # :nodoc:
229
233
  74.7.36.96/28
230
234
  74.226.253.160/28
231
235
  74.249.86.176/28
236
+ 85.211.241.128/28
232
237
  104.210.139.192/28
233
238
  104.210.139.224/28
234
239
  132.196.82.48/28
@@ -238,9 +243,6 @@ module Legitbot # :nodoc:
238
243
  135.220.73.240/28
239
244
  135.237.131.208/28
240
245
  135.237.133.48/28
241
- 137.135.183.96/28
242
- 137.135.190.240/28
243
- 137.135.191.32/28
244
246
  137.135.191.176/28
245
247
  138.91.30.48/28
246
248
  138.91.46.96/28
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://docs.perplexity.ai/docs/resources/perplexity-crawlers
5
+ class PerplexityBot < BotMatch
6
+ # @fetch:url https://www.perplexity.ai/perplexitybot.json
7
+ # @fetch:jsonpath $.prefixes[*].ipv4Prefix
8
+ ip_ranges %w[
9
+ 107.20.236.150/32
10
+ 18.210.92.235/32
11
+ 18.97.1.228/30
12
+ 18.97.9.96/29
13
+ 3.211.124.183/32
14
+ 3.222.232.239/32
15
+ 3.224.62.45/32
16
+ 3.231.139.107/32
17
+ ]
18
+ end
19
+
20
+ # https://docs.perplexity.ai/docs/resources/perplexity-crawlers
21
+ class PerplexityUser < BotMatch
22
+ # @fetch:url https://www.perplexity.ai/perplexity-user.json
23
+ # @fetch:jsonpath $.prefixes[*].ipv4Prefix
24
+ ip_ranges %w[
25
+ 18.97.21.0/30
26
+ 18.97.43.80/29
27
+ 34.193.163.52/32
28
+ 44.208.221.197/32
29
+ ]
30
+ end
31
+
32
+ rule Legitbot::PerplexityBot, %w[PerplexityBot]
33
+ rule Legitbot::PerplexityUser, %w[Perplexity-User]
34
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.12.11'
4
+ VERSION = '1.13.0'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -18,6 +18,7 @@ require_relative 'legitbot/openai'
18
18
  require_relative 'legitbot/oracle'
19
19
  require_relative 'legitbot/marginalia'
20
20
  require_relative 'legitbot/meta'
21
+ require_relative 'legitbot/perplexity'
21
22
  require_relative 'legitbot/petalbot'
22
23
  require_relative 'legitbot/pinterest'
23
24
  require_relative 'legitbot/twitter'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.12.11
4
+ version: 1.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-28 00:00:00.000000000 Z
11
+ date: 2026-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast_interval_tree
@@ -50,6 +50,26 @@ dependencies:
50
50
  - - ">="
51
51
  - !ruby/object:Gem::Version
52
52
  version: 0.2.1
53
+ - !ruby/object:Gem::Dependency
54
+ name: logger
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '1.7'
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 1.7.0
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.7'
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 1.7.0
53
73
  description: Is this Web request from a real search engine or from an impersonating
54
74
  agent?
55
75
  email: self@alaz.me
@@ -88,6 +108,7 @@ files:
88
108
  - lib/legitbot/meta.rb
89
109
  - lib/legitbot/openai.rb
90
110
  - lib/legitbot/oracle.rb
111
+ - lib/legitbot/perplexity.rb
91
112
  - lib/legitbot/petalbot.rb
92
113
  - lib/legitbot/pinterest.rb
93
114
  - lib/legitbot/twitter.rb
@@ -136,14 +157,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
136
157
  requirements:
137
158
  - - ">="
138
159
  - !ruby/object:Gem::Version
139
- version: 3.1.0
160
+ version: 3.2.0
140
161
  required_rubygems_version: !ruby/object:Gem::Requirement
141
162
  requirements:
142
163
  - - ">="
143
164
  - !ruby/object:Gem::Version
144
165
  version: '0'
145
166
  requirements: []
146
- rubygems_version: 3.4.19
167
+ rubygems_version: 3.5.22
147
168
  signing_key:
148
169
  specification_version: 4
149
170
  summary: Web crawler request validation