legitbot 1.10.3 → 1.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/autocorrect.yml +1 -1
- data/.github/workflows/codecov.yml +1 -1
- data/.github/workflows/pr.yml +2 -2
- data/.ruby-version +1 -1
- data/legitbot.gemspec +3 -3
- data/lib/legitbot/duckduckgo.rb +12 -0
- data/lib/legitbot/marginalia.rb +16 -1
- data/lib/legitbot/version.rb +1 -1
- data/lib/rubocop/cop/custom/ip_ranges.rb +2 -2
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ac43bac7745d6e0ef906342c82102e87c8644a6cd8a8af3077831f13ebbd79e
|
4
|
+
data.tar.gz: 0eeed734eadee6c380d41a604fa21e83aba8d8431cb8444b27fa2b0ec0cabe2a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36b25cf91d14fd4211e31c9dd92f8ec7439155a2e7862329d848615ceaff52f5fd1800b56fb25f0956b01572ed019f4689a81de9ba1b828c696710f451517344
|
7
|
+
data.tar.gz: 49fbc215737f0d863c1a5ffdf28f5a41f438d31923987cfdbe58a60dd6056f20bc37985ebf1dca2e218dc30ab0a216ba005a670e2027f780280899e689ea7c0c
|
data/.github/workflows/pr.yml
CHANGED
@@ -10,7 +10,7 @@ jobs:
|
|
10
10
|
strategy:
|
11
11
|
fail-fast: false
|
12
12
|
matrix:
|
13
|
-
ruby: [jruby, 3.0, 3.1, 3.2]
|
13
|
+
ruby: [jruby, 3.0.6, 3.1, 3.2]
|
14
14
|
|
15
15
|
steps:
|
16
16
|
- uses: actions/checkout@v3
|
@@ -38,7 +38,7 @@ jobs:
|
|
38
38
|
|
39
39
|
strategy:
|
40
40
|
matrix:
|
41
|
-
ruby: [3.
|
41
|
+
ruby: [3.1]
|
42
42
|
|
43
43
|
steps:
|
44
44
|
- uses: actions/checkout@v3
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.
|
1
|
+
3.1.6
|
data/legitbot.gemspec
CHANGED
@@ -13,13 +13,13 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.author = 'Alexander Azarov'
|
14
14
|
spec.email = 'self@alaz.me'
|
15
15
|
spec.homepage = 'https://github.com/alaz/legitbot'
|
16
|
-
spec.summary = '
|
17
|
-
spec.description = '
|
16
|
+
spec.summary = 'Web crawler request validation'
|
17
|
+
spec.description = 'Is this Web request from a real search engine or from an impersonating agent?'
|
18
18
|
spec.metadata = {
|
19
19
|
'rubygems_mfa_required' => 'true'
|
20
20
|
}
|
21
21
|
|
22
|
-
spec.required_ruby_version = '>=
|
22
|
+
spec.required_ruby_version = '>= 3.0.0'
|
23
23
|
spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
|
24
24
|
spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
|
25
25
|
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -7,7 +7,13 @@ module Legitbot # :nodoc:
|
|
7
7
|
# @fetch:url https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/
|
8
8
|
# @fetch:selector section.main article.content ul > li
|
9
9
|
ip_ranges %w[
|
10
|
+
4.182.131.108
|
11
|
+
4.195.133.120
|
12
|
+
4.209.224.56
|
13
|
+
4.213.46.14
|
14
|
+
4.228.76.163
|
10
15
|
13.89.106.77
|
16
|
+
20.3.1.178
|
11
17
|
20.12.141.99
|
12
18
|
20.40.133.240
|
13
19
|
20.43.150.85
|
@@ -70,6 +76,7 @@ module Legitbot # :nodoc:
|
|
70
76
|
20.193.25.197
|
71
77
|
20.193.27.215
|
72
78
|
20.193.45.113
|
79
|
+
20.195.108.47
|
73
80
|
20.197.209.11
|
74
81
|
20.197.209.27
|
75
82
|
20.201.15.208
|
@@ -114,6 +121,8 @@ module Legitbot # :nodoc:
|
|
114
121
|
40.119.232.215
|
115
122
|
40.119.232.218
|
116
123
|
40.119.232.251
|
124
|
+
51.8.71.117
|
125
|
+
51.8.253.152
|
117
126
|
51.104.146.225
|
118
127
|
51.104.146.235
|
119
128
|
51.104.160.167
|
@@ -221,11 +230,14 @@ module Legitbot # :nodoc:
|
|
221
230
|
52.224.21.55
|
222
231
|
52.224.21.61
|
223
232
|
52.242.224.168
|
233
|
+
57.152.72.128
|
224
234
|
104.43.54.127
|
225
235
|
104.43.55.116
|
226
236
|
104.43.55.117
|
227
237
|
104.43.55.166
|
228
238
|
104.43.55.167
|
239
|
+
108.141.83.74
|
240
|
+
172.169.17.165
|
229
241
|
191.233.3.197
|
230
242
|
191.233.3.202
|
231
243
|
191.234.216.4
|
data/lib/legitbot/marginalia.rb
CHANGED
@@ -3,8 +3,23 @@
|
|
3
3
|
module Legitbot # :nodoc:
|
4
4
|
# https://www.marginalia.nu/marginalia-search/for-webmasters/
|
5
5
|
class Marginalia < BotMatch
|
6
|
+
# https://x.com/MarginaliaNu/status/1824172354081263991
|
7
|
+
# @fetch:url https://search.marginalia.nu/crawler-ips.txt
|
6
8
|
ip_ranges %w[
|
7
|
-
81.170.128.
|
9
|
+
81.170.128.52
|
10
|
+
193.183.0.162
|
11
|
+
193.183.0.163
|
12
|
+
193.183.0.164
|
13
|
+
193.183.0.165
|
14
|
+
193.183.0.166
|
15
|
+
193.183.0.167
|
16
|
+
193.183.0.168
|
17
|
+
193.183.0.169
|
18
|
+
193.183.0.170
|
19
|
+
193.183.0.171
|
20
|
+
193.183.0.172
|
21
|
+
193.183.0.173
|
22
|
+
193.183.0.174
|
8
23
|
]
|
9
24
|
end
|
10
25
|
|
data/lib/legitbot/version.rb
CHANGED
@@ -14,7 +14,7 @@ module RuboCop
|
|
14
14
|
extend AutoCorrector
|
15
15
|
|
16
16
|
MSG = 'Outdated list of IP ranges compared to %<url>s'
|
17
|
-
REGEXP = /^\s*#\s*@fetch:(?<param>[a-z0-9_]+)\s+(?<arg>.*)
|
17
|
+
REGEXP = /^\s*#\s*@fetch:(?<param>[a-z0-9_]+)\s+(?<arg>.*)?/
|
18
18
|
|
19
19
|
def_node_matcher :on_ip_ranges, <<~PATTERN
|
20
20
|
(send nil? :ip_ranges $(array str+))
|
@@ -75,7 +75,7 @@ module RuboCop
|
|
75
75
|
end
|
76
76
|
|
77
77
|
def normalise_list(ips)
|
78
|
-
ips.sort_by(&IPAddr.method(:new))
|
78
|
+
ips.uniq.sort_by(&IPAddr.method(:new))
|
79
79
|
end
|
80
80
|
|
81
81
|
def register_offense(node, new_ips, **params)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.10.
|
4
|
+
version: 1.10.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-08-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fast_interval_tree
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
- - ">="
|
51
51
|
- !ruby/object:Gem::Version
|
52
52
|
version: 0.2.1
|
53
|
-
description:
|
53
|
+
description: Is this Web request from a real search engine or from an impersonating
|
54
54
|
agent?
|
55
55
|
email: self@alaz.me
|
56
56
|
executables: []
|
@@ -131,15 +131,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
131
131
|
requirements:
|
132
132
|
- - ">="
|
133
133
|
- !ruby/object:Gem::Version
|
134
|
-
version:
|
134
|
+
version: 3.0.0
|
135
135
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
136
|
requirements:
|
137
137
|
- - ">="
|
138
138
|
- !ruby/object:Gem::Version
|
139
139
|
version: '0'
|
140
140
|
requirements: []
|
141
|
-
rubygems_version: 3.
|
141
|
+
rubygems_version: 3.3.27
|
142
142
|
signing_key:
|
143
143
|
specification_version: 4
|
144
|
-
summary:
|
144
|
+
summary: Web crawler request validation
|
145
145
|
test_files: []
|