legitbot 1.4.3 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/README.md +11 -1
- data/legitbot.gemspec +6 -1
- data/lib/legitbot/amazon.rb +10 -0
- data/lib/legitbot/duckduckgo.rb +10 -9
- data/lib/legitbot/facebook.rb +4 -1
- data/lib/legitbot/google.rb +9 -1
- data/lib/legitbot/twitter.rb +1 -0
- data/lib/legitbot/version.rb +1 -1
- data/lib/legitbot.rb +1 -0
- data/test/amazon_test.rb +52 -0
- metadata +11 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90eab3a4d53cc388fac2db75058e11c549ec09b3acb3352a398c10e437c0f233
|
4
|
+
data.tar.gz: 5a252a30d37a44de60bb5b7baf1f264477211c22c0acc1477317f8281b7e9103
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c56a32b2bccb2667f5c42c7580c31b2d79884331dfecf5f3cd01f1242ad2e53a5a4d1192a01f1f814014ef3da38702ad5b6041da3ba89d6b14cfbb5658550c47
|
7
|
+
data.tar.gz: c388aad0dec6a86a8d7a9ae23ae1e3200cd7eb3df421f74934b1a7582d9e63c4da1b66d9f28def1685f821c9ac2813b2d7fa38c4653a0a72301576c1cdddcf9b
|
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -39,10 +39,18 @@ Rack::Attack.blocklist 'fake search engines' do |request|
|
|
39
39
|
end
|
40
40
|
```
|
41
41
|
|
42
|
+
## Versioning
|
43
|
+
|
44
|
+
[Semantic versioning](https://semver.org/) with the following clarifications:
|
45
|
+
|
46
|
+
* MINOR version is incremented when support for new bots is added.
|
47
|
+
* PATCH version is incremented when validation logic for a bot changes (IP list updated, for example).
|
48
|
+
|
42
49
|
## Supported
|
43
50
|
|
44
51
|
* [Ahrefs](https://ahrefs.com/robot)
|
45
52
|
* [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
|
53
|
+
* [Amazon AdBot](https://adbot.amazon.com/index.html)
|
46
54
|
* [Applebot](https://support.apple.com/en-us/HT204683)
|
47
55
|
* [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
48
56
|
* [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
@@ -59,7 +67,7 @@ end
|
|
59
67
|
|
60
68
|
Apache 2.0
|
61
69
|
|
62
|
-
##
|
70
|
+
## Other projects
|
63
71
|
|
64
72
|
* Play Framework variant in Scala: [play-legitbot](https://github.com/osinka/play-legitbot)
|
65
73
|
* Article [When (Fake) Googlebots Attack Your Rails App](http://jessewolgamott.com/blog/2015/11/17/when-fake-googlebots-attack-your-rails-app/)
|
@@ -72,3 +80,5 @@ Apache 2.0
|
|
72
80
|
classify IP as a search engine, but also label them as suspicious and
|
73
81
|
reports the number of days since the last activity. My implementation of
|
74
82
|
the protocol in Scala is [here](https://github.com/osinka/httpbl).
|
83
|
+
* [CIDRAM](https://github.com/CIDRAM/CIDRAM) is a PHP routing manager with built-in support
|
84
|
+
to validate bots.
|
data/legitbot.gemspec
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'English'
|
4
|
+
|
3
5
|
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
6
|
require 'legitbot/version'
|
5
7
|
|
@@ -13,6 +15,9 @@ Gem::Specification.new do |spec|
|
|
13
15
|
spec.homepage = 'https://github.com/alaz/legitbot'
|
14
16
|
spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
|
15
17
|
spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
|
18
|
+
spec.metadata = {
|
19
|
+
'rubygems_mfa_required' => 'true'
|
20
|
+
}
|
16
21
|
|
17
22
|
spec.required_ruby_version = '>= 2.5.0'
|
18
23
|
spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
|
@@ -20,7 +25,7 @@ Gem::Specification.new do |spec|
|
|
20
25
|
spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
|
21
26
|
spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
|
22
27
|
spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
|
23
|
-
spec.add_development_dependency 'rubocop', '~> 1.
|
28
|
+
spec.add_development_dependency 'rubocop', '~> 1.24.0', '>= 1.24.0'
|
24
29
|
|
25
30
|
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
26
31
|
spec.rdoc_options = ['--charset=UTF-8']
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -5,16 +5,17 @@ module Legitbot # :nodoc:
|
|
5
5
|
class DuckDuckGo < BotMatch
|
6
6
|
ip_ranges %w[
|
7
7
|
20.191.45.212
|
8
|
-
23.21.227.69
|
9
8
|
40.88.21.235
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
52.
|
16
|
-
|
17
|
-
|
9
|
+
40.76.173.151
|
10
|
+
40.76.163.7
|
11
|
+
20.185.79.47
|
12
|
+
52.142.26.175
|
13
|
+
20.185.79.15
|
14
|
+
52.142.24.149
|
15
|
+
40.76.162.208
|
16
|
+
40.76.163.23
|
17
|
+
40.76.162.191
|
18
|
+
40.76.162.247
|
18
19
|
54.208.102.37
|
19
20
|
107.21.1.8
|
20
21
|
]
|
data/lib/legitbot/facebook.rb
CHANGED
data/lib/legitbot/google.rb
CHANGED
@@ -7,5 +7,13 @@ module Legitbot # :nodoc:
|
|
7
7
|
domains 'google.com.', 'googlebot.com.'
|
8
8
|
end
|
9
9
|
|
10
|
-
rule Legitbot::Google, %w[
|
10
|
+
rule Legitbot::Google, %w[
|
11
|
+
APIs-Google
|
12
|
+
AdsBot-Google-Mobile
|
13
|
+
AdsBot-Google
|
14
|
+
Googlebot
|
15
|
+
Mediapartners-Google
|
16
|
+
AdsBot-Google-Mobile-Apps
|
17
|
+
FeedFetcher-Google
|
18
|
+
]
|
11
19
|
end
|
data/lib/legitbot/twitter.rb
CHANGED
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot.rb
CHANGED
data/test/amazon_test.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class AmazonTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Amazon.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real AmazonAdBot IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '54.166.7.90'
|
15
|
+
match = Legitbot::Amazon.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid AmazonAdBot IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'AmazonAdBot detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid AmazonAdBot'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
31
|
+
'54.166.7.90'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'AmazonAdBot detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid AmazonAdBot'
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_valid_name
|
38
|
+
bot = Legitbot.bot(
|
39
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
40
|
+
'54.166.7.90'
|
41
|
+
)
|
42
|
+
assert_equal :amazon, bot.detected_as
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_fake_name
|
46
|
+
bot = Legitbot.bot(
|
47
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
48
|
+
'81.1.172.108'
|
49
|
+
)
|
50
|
+
assert_equal :amazon, bot.detected_as
|
51
|
+
end
|
52
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: augmented_interval_tree
|
@@ -116,20 +116,20 @@ dependencies:
|
|
116
116
|
requirements:
|
117
117
|
- - "~>"
|
118
118
|
- !ruby/object:Gem::Version
|
119
|
-
version: 1.
|
119
|
+
version: 1.24.0
|
120
120
|
- - ">="
|
121
121
|
- !ruby/object:Gem::Version
|
122
|
-
version: 1.
|
122
|
+
version: 1.24.0
|
123
123
|
type: :development
|
124
124
|
prerelease: false
|
125
125
|
version_requirements: !ruby/object:Gem::Requirement
|
126
126
|
requirements:
|
127
127
|
- - "~>"
|
128
128
|
- !ruby/object:Gem::Version
|
129
|
-
version: 1.
|
129
|
+
version: 1.24.0
|
130
130
|
- - ">="
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version: 1.
|
132
|
+
version: 1.24.0
|
133
133
|
description: Does Web request come from a real search engine or from an impersonating
|
134
134
|
agent?
|
135
135
|
email: self@alaz.me
|
@@ -150,6 +150,7 @@ files:
|
|
150
150
|
- lib/legitbot.rb
|
151
151
|
- lib/legitbot/ahrefs.rb
|
152
152
|
- lib/legitbot/alexa.rb
|
153
|
+
- lib/legitbot/amazon.rb
|
153
154
|
- lib/legitbot/apple.rb
|
154
155
|
- lib/legitbot/baidu.rb
|
155
156
|
- lib/legitbot/bing.rb
|
@@ -169,6 +170,7 @@ files:
|
|
169
170
|
- lib/legitbot/yandex.rb
|
170
171
|
- test/ahrefs_test.rb
|
171
172
|
- test/alexa_test.rb
|
173
|
+
- test/amazon_test.rb
|
172
174
|
- test/apple_test.rb
|
173
175
|
- test/botmatch_test.rb
|
174
176
|
- test/facebook_test.rb
|
@@ -183,7 +185,8 @@ files:
|
|
183
185
|
homepage: https://github.com/alaz/legitbot
|
184
186
|
licenses:
|
185
187
|
- Apache-2.0
|
186
|
-
metadata:
|
188
|
+
metadata:
|
189
|
+
rubygems_mfa_required: 'true'
|
187
190
|
post_install_message:
|
188
191
|
rdoc_options:
|
189
192
|
- "--charset=UTF-8"
|
@@ -214,6 +217,7 @@ test_files:
|
|
214
217
|
- test/apple_test.rb
|
215
218
|
- test/oracle_test.rb
|
216
219
|
- test/google_test.rb
|
220
|
+
- test/amazon_test.rb
|
217
221
|
- test/petalbot_test.rb
|
218
222
|
- test/botmatch_test.rb
|
219
223
|
- test/facebook_test.rb
|