legitbot 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +1 -1
- data/.rubocop.yml +1 -0
- data/README.md +1 -0
- data/legitbot.gemspec +1 -1
- data/lib/legitbot.rb +1 -0
- data/lib/legitbot/alexa.rb +17 -0
- data/lib/legitbot/legitbot.rb +2 -2
- data/lib/legitbot/version.rb +1 -1
- data/test/alexa_test.rb +36 -0
- data/test/facebook_test.rb +2 -2
- metadata +11 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0b43dcf4386a8427415729bf9ba5edfe3c0efc24a002bb4301ff98d43c4ee0d
|
4
|
+
data.tar.gz: 9f18f03e613473a97cda23e8663d44da10cfd32b3fd271e1ffd5ee20cfdef48d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73ad2e000c4513880bc5558c7fe5f7c7c04d01b1a820ef7b12541c8c9d6cc5dddd20c785d67ef489d5b00664632c6b88a805c3bb1e2a060517126c7d7292d661
|
7
|
+
data.tar.gz: a578a30a512495523e906ec1d6a650f4f472a58856caa542714d3e57a0b99f764ee015f11624c4a20cae79a3975eb41eaf8ced106977dd994981d71c288b5653
|
data/.github/workflows/build.yml
CHANGED
data/.rubocop.yml
CHANGED
data/README.md
CHANGED
@@ -44,6 +44,7 @@ end
|
|
44
44
|
## Supported
|
45
45
|
|
46
46
|
* [Ahrefs](https://ahrefs.com/robot)
|
47
|
+
* [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
|
47
48
|
* [Applebot](https://support.apple.com/en-us/HT204683)
|
48
49
|
* [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
49
50
|
* [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
data/legitbot.gemspec
CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
|
|
20
20
|
spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
|
21
21
|
spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
|
22
22
|
spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
|
23
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
23
|
+
spec.add_development_dependency 'rubocop', '~> 0.90', '< 0.91'
|
24
24
|
|
25
25
|
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
26
26
|
spec.rdoc_options = ['--charset=UTF-8']
|
data/lib/legitbot.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-
|
5
|
+
# https://support.alexa.com/hc/en-us/articles/200462340
|
6
|
+
# https://support.alexa.com/hc/en-us/articles/200450194
|
7
|
+
class Alexa < BotMatch
|
8
|
+
ip_ranges %w[
|
9
|
+
52.86.176.3
|
10
|
+
52.4.48.181
|
11
|
+
52.2.182.169
|
12
|
+
52.86.185.29
|
13
|
+
]
|
14
|
+
end
|
15
|
+
|
16
|
+
rule Legitbot::Alexa, %w[Alexabot ia_archiver]
|
17
|
+
end
|
data/lib/legitbot/legitbot.rb
CHANGED
@@ -18,7 +18,7 @@ module Legitbot
|
|
18
18
|
# otherwise.
|
19
19
|
# :yields: a found bot
|
20
20
|
#
|
21
|
-
# rubocop:disable Metrics/CyclomaticComplexity
|
21
|
+
# rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
22
22
|
def self.bot(user_agent, ip)
|
23
23
|
bots = @rules
|
24
24
|
.select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
|
@@ -33,7 +33,7 @@ module Legitbot
|
|
33
33
|
selected
|
34
34
|
end
|
35
35
|
end
|
36
|
-
# rubocop:enable Metrics/CyclomaticComplexity
|
36
|
+
# rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
37
37
|
|
38
38
|
def self.rule(clazz, fragments)
|
39
39
|
@rules << { class: clazz, fragments: fragments }
|
data/lib/legitbot/version.rb
CHANGED
data/test/alexa_test.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class AlexaTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Alexa.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Alexa IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '52.86.176.3'
|
15
|
+
match = Legitbot::Alexa.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid Alexa IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'Alexa detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid Alexa'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
|
31
|
+
'52.86.176.3'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'Alexa detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid Alexa'
|
35
|
+
end
|
36
|
+
end
|
data/test/facebook_test.rb
CHANGED
@@ -33,7 +33,7 @@ class FacebookTest < Minitest::Test
|
|
33
33
|
assert match.fake?, msg: "#{ip} is a fake Facebook IP"
|
34
34
|
end
|
35
35
|
|
36
|
-
# rubocop:disable
|
36
|
+
# rubocop:disable Layout/LineLength, Metrics/MethodLength
|
37
37
|
def test_user_agent
|
38
38
|
Legitbot.bot(
|
39
39
|
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
@@ -59,5 +59,5 @@ class FacebookTest < Minitest::Test
|
|
59
59
|
assert bot.fake?, msg: 'fake Facebook'
|
60
60
|
end
|
61
61
|
end
|
62
|
-
# rubocop:enable
|
62
|
+
# rubocop:enable Layout/LineLength, Metrics/MethodLength
|
63
63
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: augmented_interval_tree
|
@@ -116,20 +116,20 @@ dependencies:
|
|
116
116
|
requirements:
|
117
117
|
- - "~>"
|
118
118
|
- !ruby/object:Gem::Version
|
119
|
-
version: '0.
|
120
|
-
- - "
|
119
|
+
version: '0.90'
|
120
|
+
- - "<"
|
121
121
|
- !ruby/object:Gem::Version
|
122
|
-
version: 0.
|
122
|
+
version: '0.91'
|
123
123
|
type: :development
|
124
124
|
prerelease: false
|
125
125
|
version_requirements: !ruby/object:Gem::Requirement
|
126
126
|
requirements:
|
127
127
|
- - "~>"
|
128
128
|
- !ruby/object:Gem::Version
|
129
|
-
version: '0.
|
130
|
-
- - "
|
129
|
+
version: '0.90'
|
130
|
+
- - "<"
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version: 0.
|
132
|
+
version: '0.91'
|
133
133
|
description: Does Web request come from a real search engine or from an impersonating
|
134
134
|
agent?
|
135
135
|
email: self@alaz.me
|
@@ -148,6 +148,7 @@ files:
|
|
148
148
|
- legitbot.gemspec
|
149
149
|
- lib/legitbot.rb
|
150
150
|
- lib/legitbot/ahrefs.rb
|
151
|
+
- lib/legitbot/alexa.rb
|
151
152
|
- lib/legitbot/apple.rb
|
152
153
|
- lib/legitbot/baidu.rb
|
153
154
|
- lib/legitbot/bing.rb
|
@@ -165,6 +166,7 @@ files:
|
|
165
166
|
- lib/legitbot/version.rb
|
166
167
|
- lib/legitbot/yandex.rb
|
167
168
|
- test/ahrefs_test.rb
|
169
|
+
- test/alexa_test.rb
|
168
170
|
- test/apple_as_google_test.rb
|
169
171
|
- test/apple_test.rb
|
170
172
|
- test/botmatch_test.rb
|
@@ -205,6 +207,7 @@ test_files:
|
|
205
207
|
- test/legitbot/validators/domains_test.rb
|
206
208
|
- test/legitbot/validators/ip_ranges_test.rb
|
207
209
|
- test/pinterest_test.rb
|
210
|
+
- test/alexa_test.rb
|
208
211
|
- test/ahrefs_test.rb
|
209
212
|
- test/apple_test.rb
|
210
213
|
- test/apple_as_google_test.rb
|