legitbot 1.0.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +17 -0
- data/.github/workflows/build.yml +1 -1
- data/.rubocop.yml +1 -0
- data/README.md +3 -1
- data/legitbot.gemspec +2 -2
- data/lib/legitbot.rb +2 -0
- data/lib/legitbot/alexa.rb +17 -0
- data/lib/legitbot/apple.rb +0 -8
- data/lib/legitbot/legitbot.rb +2 -2
- data/lib/legitbot/petalbot.rb +10 -0
- data/lib/legitbot/validators/ip_ranges.rb +1 -1
- data/lib/legitbot/version.rb +1 -1
- data/test/alexa_test.rb +36 -0
- data/test/facebook_test.rb +2 -2
- data/test/legitbot/validators/ip_ranges_test.rb +9 -0
- data/test/petalbot_test.rb +52 -0
- metadata +18 -13
- data/test/apple_as_google_test.rb +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d709839505242be7ed07c5f6413e607bd8eb127a011765b9f45c1a3e0bba2fbb
|
4
|
+
data.tar.gz: 53a19413068473299ce78887756a2ef50f59859e54d652e4314755178a30028c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: daaf7bbde5fae2ab3937b5e4f4cd936a61f54b80d5ec87eaca8db473977f633c993f267f6aeb101b94404119e58835878ee1433683cf613b476c6fc7f27b34b3
|
7
|
+
data.tar.gz: 1e51fe4ee02eb14bb0a95230e2ca3d4716a56cee199a40f3cfcd4d0999158526f151fa7915a9a3f0a21a9054ebc3862353e1a68a89057c2cf8bbcac4d082afb8
|
data/.editorconfig
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
root = true
|
2
|
+
|
3
|
+
[*]
|
4
|
+
end_of_line = lf
|
5
|
+
insert_final_newline = true
|
6
|
+
trim_trailing_whitespace = true
|
7
|
+
charset = utf-8
|
8
|
+
|
9
|
+
indent_style = space
|
10
|
+
indent_size = 2
|
11
|
+
|
12
|
+
[*.md]
|
13
|
+
trim_trailing_whitespace = false
|
14
|
+
|
15
|
+
[*.yml]
|
16
|
+
indent_style = space
|
17
|
+
indent_size = 2
|
data/.github/workflows/build.yml
CHANGED
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Legitbot 
|
1
|
+
# Legitbot  
|
2
2
|
|
3
3
|
Ruby gem to check that an IP belongs to a bot, typically a search
|
4
4
|
engine. This can be of help in protecting a web site from fake search
|
@@ -44,6 +44,7 @@ end
|
|
44
44
|
## Supported
|
45
45
|
|
46
46
|
* [Ahrefs](https://ahrefs.com/robot)
|
47
|
+
* [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
|
47
48
|
* [Applebot](https://support.apple.com/en-us/HT204683)
|
48
49
|
* [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
49
50
|
* [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
@@ -54,6 +55,7 @@ end
|
|
54
55
|
* [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
|
55
56
|
* [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
|
56
57
|
* [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
|
58
|
+
* [Petal robots (Huawei search)](http://aspiegel.com/petalbot)
|
57
59
|
|
58
60
|
## License
|
59
61
|
|
data/legitbot.gemspec
CHANGED
@@ -19,8 +19,8 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
|
20
20
|
spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
|
21
21
|
spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
|
22
|
-
spec.add_development_dependency 'rake', '~>
|
23
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
22
|
+
spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
|
23
|
+
spec.add_development_dependency 'rubocop', '~> 0.92.0', '>= 0.92.0'
|
24
24
|
|
25
25
|
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
26
26
|
spec.rdoc_options = ['--charset=UTF-8']
|
data/lib/legitbot.rb
CHANGED
@@ -4,6 +4,7 @@ require_relative 'legitbot/legitbot'
|
|
4
4
|
require_relative 'legitbot/botmatch'
|
5
5
|
|
6
6
|
require_relative 'legitbot/ahrefs'
|
7
|
+
require_relative 'legitbot/alexa'
|
7
8
|
require_relative 'legitbot/apple'
|
8
9
|
require_relative 'legitbot/baidu'
|
9
10
|
require_relative 'legitbot/bing'
|
@@ -14,3 +15,4 @@ require_relative 'legitbot/oracle'
|
|
14
15
|
require_relative 'legitbot/pinterest'
|
15
16
|
require_relative 'legitbot/twitter'
|
16
17
|
require_relative 'legitbot/yandex'
|
18
|
+
require_relative 'legitbot/petalbot'
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-
|
5
|
+
# https://support.alexa.com/hc/en-us/articles/200462340
|
6
|
+
# https://support.alexa.com/hc/en-us/articles/200450194
|
7
|
+
class Alexa < BotMatch
|
8
|
+
ip_ranges %w[
|
9
|
+
52.86.176.3
|
10
|
+
52.4.48.181
|
11
|
+
52.2.182.169
|
12
|
+
52.86.185.29
|
13
|
+
]
|
14
|
+
end
|
15
|
+
|
16
|
+
rule Legitbot::Alexa, %w[Alexabot ia_archiver]
|
17
|
+
end
|
data/lib/legitbot/apple.rb
CHANGED
@@ -8,13 +8,5 @@ module Legitbot # :nodoc:
|
|
8
8
|
ip_ranges '17.0.0.0/8'
|
9
9
|
end
|
10
10
|
|
11
|
-
# https://support.apple.com/en-us/HT204683
|
12
|
-
# rubocop:disable Naming/ClassAndModuleCamelCase
|
13
|
-
class Apple_as_Google < BotMatch
|
14
|
-
ip_ranges '17.0.0.0/8'
|
15
|
-
end
|
16
|
-
# rubocop:enable Naming/ClassAndModuleCamelCase
|
17
|
-
|
18
11
|
rule Legitbot::Apple, %w[Applebot]
|
19
|
-
rule Legitbot::Apple_as_Google, %w[Googlebot]
|
20
12
|
end
|
data/lib/legitbot/legitbot.rb
CHANGED
@@ -18,7 +18,7 @@ module Legitbot
|
|
18
18
|
# otherwise.
|
19
19
|
# :yields: a found bot
|
20
20
|
#
|
21
|
-
# rubocop:disable Metrics/CyclomaticComplexity
|
21
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
22
22
|
def self.bot(user_agent, ip)
|
23
23
|
bots = @rules
|
24
24
|
.select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
|
@@ -33,7 +33,7 @@ module Legitbot
|
|
33
33
|
selected
|
34
34
|
end
|
35
35
|
end
|
36
|
-
# rubocop:enable Metrics/CyclomaticComplexity
|
36
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
37
37
|
|
38
38
|
def self.rule(clazz, fragments)
|
39
39
|
@rules << { class: clazz, fragments: fragments }
|
data/lib/legitbot/version.rb
CHANGED
data/test/alexa_test.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class AlexaTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Alexa.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Alexa IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '52.86.176.3'
|
15
|
+
match = Legitbot::Alexa.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid Alexa IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'Alexa detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid Alexa'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
|
31
|
+
'52.86.176.3'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'Alexa detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid Alexa'
|
35
|
+
end
|
36
|
+
end
|
data/test/facebook_test.rb
CHANGED
@@ -33,7 +33,7 @@ class FacebookTest < Minitest::Test
|
|
33
33
|
assert match.fake?, msg: "#{ip} is a fake Facebook IP"
|
34
34
|
end
|
35
35
|
|
36
|
-
# rubocop:disable
|
36
|
+
# rubocop:disable Layout/LineLength, Metrics/MethodLength
|
37
37
|
def test_user_agent
|
38
38
|
Legitbot.bot(
|
39
39
|
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
@@ -59,5 +59,5 @@ class FacebookTest < Minitest::Test
|
|
59
59
|
assert bot.fake?, msg: 'fake Facebook'
|
60
60
|
end
|
61
61
|
end
|
62
|
-
# rubocop:enable
|
62
|
+
# rubocop:enable Layout/LineLength, Metrics/MethodLength
|
63
63
|
end
|
@@ -51,6 +51,11 @@ module Legitbot
|
|
51
51
|
ip_ranges { nil }
|
52
52
|
end
|
53
53
|
|
54
|
+
class Ipv4Ranges
|
55
|
+
include IpRanges
|
56
|
+
ip_ranges { ['66.220.144.0/21'] }
|
57
|
+
end
|
58
|
+
|
54
59
|
class IpRangesTest < Minitest::Test
|
55
60
|
def test_partition_method
|
56
61
|
empty = NoRanges.partition_ips([])
|
@@ -117,6 +122,10 @@ module Legitbot
|
|
117
122
|
def test_nil_ranges
|
118
123
|
assert NilRanges.valid_ip?('127.0.0.1')
|
119
124
|
end
|
125
|
+
|
126
|
+
def test_ipv4_only_ranges
|
127
|
+
refute Ipv4Ranges.valid_ip?('2a03:2880:f234:0:0:0:0:1')
|
128
|
+
end
|
120
129
|
end
|
121
130
|
end
|
122
131
|
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class PetalbotTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Petalbot.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Petalbot IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '114.119.153.50'
|
15
|
+
match = Legitbot::Petalbot.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'Petalbot detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid Petalbot'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
|
31
|
+
'114.119.153.50'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'Petalbot detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid Petalbot'
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_valid_name
|
38
|
+
bot = Legitbot.bot(
|
39
|
+
'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
|
40
|
+
'66.249.64.141'
|
41
|
+
)
|
42
|
+
assert_equal :petalbot, bot.detected_as
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_fake_name
|
46
|
+
bot = Legitbot.bot(
|
47
|
+
'Mozilla/5.0 (compatible; PetalBot/2.1; +http://www.google.com/bot.html)',
|
48
|
+
'81.1.172.108'
|
49
|
+
)
|
50
|
+
assert_equal :petalbot, bot.detected_as
|
51
|
+
end
|
52
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: augmented_interval_tree
|
@@ -96,40 +96,40 @@ dependencies:
|
|
96
96
|
requirements:
|
97
97
|
- - "~>"
|
98
98
|
- !ruby/object:Gem::Version
|
99
|
-
version: '
|
99
|
+
version: '13.0'
|
100
100
|
- - ">="
|
101
101
|
- !ruby/object:Gem::Version
|
102
|
-
version:
|
102
|
+
version: 13.0.0
|
103
103
|
type: :development
|
104
104
|
prerelease: false
|
105
105
|
version_requirements: !ruby/object:Gem::Requirement
|
106
106
|
requirements:
|
107
107
|
- - "~>"
|
108
108
|
- !ruby/object:Gem::Version
|
109
|
-
version: '
|
109
|
+
version: '13.0'
|
110
110
|
- - ">="
|
111
111
|
- !ruby/object:Gem::Version
|
112
|
-
version:
|
112
|
+
version: 13.0.0
|
113
113
|
- !ruby/object:Gem::Dependency
|
114
114
|
name: rubocop
|
115
115
|
requirement: !ruby/object:Gem::Requirement
|
116
116
|
requirements:
|
117
117
|
- - "~>"
|
118
118
|
- !ruby/object:Gem::Version
|
119
|
-
version:
|
119
|
+
version: 0.92.0
|
120
120
|
- - ">="
|
121
121
|
- !ruby/object:Gem::Version
|
122
|
-
version: 0.
|
122
|
+
version: 0.92.0
|
123
123
|
type: :development
|
124
124
|
prerelease: false
|
125
125
|
version_requirements: !ruby/object:Gem::Requirement
|
126
126
|
requirements:
|
127
127
|
- - "~>"
|
128
128
|
- !ruby/object:Gem::Version
|
129
|
-
version:
|
129
|
+
version: 0.92.0
|
130
130
|
- - ">="
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version: 0.
|
132
|
+
version: 0.92.0
|
133
133
|
description: Does Web request come from a real search engine or from an impersonating
|
134
134
|
agent?
|
135
135
|
email: self@alaz.me
|
@@ -137,6 +137,7 @@ executables: []
|
|
137
137
|
extensions: []
|
138
138
|
extra_rdoc_files: []
|
139
139
|
files:
|
140
|
+
- ".editorconfig"
|
140
141
|
- ".github/workflows/build.yml"
|
141
142
|
- ".gitignore"
|
142
143
|
- ".rubocop.yml"
|
@@ -148,6 +149,7 @@ files:
|
|
148
149
|
- legitbot.gemspec
|
149
150
|
- lib/legitbot.rb
|
150
151
|
- lib/legitbot/ahrefs.rb
|
152
|
+
- lib/legitbot/alexa.rb
|
151
153
|
- lib/legitbot/apple.rb
|
152
154
|
- lib/legitbot/baidu.rb
|
153
155
|
- lib/legitbot/bing.rb
|
@@ -158,6 +160,7 @@ files:
|
|
158
160
|
- lib/legitbot/google.rb
|
159
161
|
- lib/legitbot/legitbot.rb
|
160
162
|
- lib/legitbot/oracle.rb
|
163
|
+
- lib/legitbot/petalbot.rb
|
161
164
|
- lib/legitbot/pinterest.rb
|
162
165
|
- lib/legitbot/twitter.rb
|
163
166
|
- lib/legitbot/validators/domains.rb
|
@@ -165,7 +168,7 @@ files:
|
|
165
168
|
- lib/legitbot/version.rb
|
166
169
|
- lib/legitbot/yandex.rb
|
167
170
|
- test/ahrefs_test.rb
|
168
|
-
- test/
|
171
|
+
- test/alexa_test.rb
|
169
172
|
- test/apple_test.rb
|
170
173
|
- test/botmatch_test.rb
|
171
174
|
- test/facebook_test.rb
|
@@ -174,6 +177,7 @@ files:
|
|
174
177
|
- test/legitbot/validators/ip_ranges_test.rb
|
175
178
|
- test/legitbot_test.rb
|
176
179
|
- test/oracle_test.rb
|
180
|
+
- test/petalbot_test.rb
|
177
181
|
- test/pinterest_test.rb
|
178
182
|
- test/twitter_test.rb
|
179
183
|
homepage: https://github.com/alaz/legitbot
|
@@ -196,7 +200,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
196
200
|
- !ruby/object:Gem::Version
|
197
201
|
version: '0'
|
198
202
|
requirements: []
|
199
|
-
rubygems_version: 3.1.
|
203
|
+
rubygems_version: 3.1.4
|
200
204
|
signing_key:
|
201
205
|
specification_version: 4
|
202
206
|
summary: 'Validate requests from Web crawlers: impersonating or not?'
|
@@ -205,11 +209,12 @@ test_files:
|
|
205
209
|
- test/legitbot/validators/domains_test.rb
|
206
210
|
- test/legitbot/validators/ip_ranges_test.rb
|
207
211
|
- test/pinterest_test.rb
|
212
|
+
- test/alexa_test.rb
|
208
213
|
- test/ahrefs_test.rb
|
209
214
|
- test/apple_test.rb
|
210
|
-
- test/apple_as_google_test.rb
|
211
215
|
- test/oracle_test.rb
|
212
216
|
- test/google_test.rb
|
217
|
+
- test/petalbot_test.rb
|
213
218
|
- test/botmatch_test.rb
|
214
219
|
- test/facebook_test.rb
|
215
220
|
- test/twitter_test.rb
|
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'minitest/autorun'
|
4
|
-
require 'legitbot'
|
5
|
-
|
6
|
-
class AppleAsGoogleTest < Minitest::Test
|
7
|
-
def test_valid_ip
|
8
|
-
ip = '17.58.98.60'
|
9
|
-
match = Legitbot::Apple_as_Google.new(ip)
|
10
|
-
assert match.valid?, msg: "#{ip} is a valid Applebot IP"
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_invalid_ip
|
14
|
-
ip = '127.0.0.1'
|
15
|
-
match = Legitbot::Apple_as_Google.new(ip)
|
16
|
-
assert match.fake?, msg: "#{ip} is a fake Applebot IP"
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_user_agent
|
20
|
-
bot = Legitbot.bot(
|
21
|
-
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
22
|
-
'17.58.98.60'
|
23
|
-
)
|
24
|
-
assert_equal :apple_as_google, bot.detected_as
|
25
|
-
assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
|
26
|
-
end
|
27
|
-
end
|