legitbot 1.4.5 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +2 -2
- data/.rubocop.yml +6 -0
- data/.ruby-version +1 -1
- data/README.md +11 -1
- data/Rakefile +0 -24
- data/legitbot.gemspec +12 -4
- data/lib/legitbot/amazon.rb +10 -0
- data/lib/legitbot/duckduckgo.rb +10 -9
- data/lib/legitbot/validators/domains.rb +2 -0
- data/lib/legitbot/version.rb +1 -1
- data/lib/legitbot.rb +1 -0
- data/rakelib/bump.rake +5 -0
- data/rakelib/bundler.rake +4 -0
- data/rakelib/console.rake +8 -0
- data/rakelib/test.rake +12 -0
- data/test/ahrefs_test.rb +11 -6
- data/test/alexa_test.rb +11 -6
- data/test/amazon_test.rb +57 -0
- data/test/apple_test.rb +8 -3
- data/test/botmatch_test.rb +7 -2
- data/test/facebook_test.rb +14 -10
- data/test/google_test.rb +11 -6
- data/test/legitbot/validators/ip_ranges_test.rb +2 -0
- data/test/legitbot_test.rb +2 -4
- data/test/lib/dns_server_mock.rb +89 -0
- data/test/oracle_test.rb +6 -6
- data/test/petalbot_test.rb +13 -8
- data/test/pinterest_test.rb +11 -6
- data/test/twitter_test.rb +6 -6
- metadata +91 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df2907ab34b159613adf21c3301607f53fa893e0823f81560e45ad2c8303075b
|
4
|
+
data.tar.gz: fe913e9b72878969839326da227c1169ad8234a8ae335e276f614b53e5777d42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8af647da764cd722f27f2936f8f9728f9cdd7b313e295d9c80ebe7144a601a5360f5b615039d5afe75f7ded891e3bfc06292383e9e41e342977d39f1e485dab
|
7
|
+
data.tar.gz: 74e3dd5cb2c4d2e7df717c6bb3a072c74aa51b922f7f71d280cde82387ba458d30970f85c4f79508852ef5c2969196bd640427bf2323104229616960314fb9df
|
data/.github/workflows/build.yml
CHANGED
@@ -13,7 +13,7 @@ jobs:
|
|
13
13
|
strategy:
|
14
14
|
fail-fast: false
|
15
15
|
matrix:
|
16
|
-
ruby: [ jruby, 2.
|
16
|
+
ruby: [ jruby, 2.6, 2.7, 3.0 ]
|
17
17
|
|
18
18
|
steps:
|
19
19
|
- uses: actions/checkout@v2
|
@@ -41,7 +41,7 @@ jobs:
|
|
41
41
|
|
42
42
|
strategy:
|
43
43
|
matrix:
|
44
|
-
ruby: [
|
44
|
+
ruby: [ 3.0 ]
|
45
45
|
|
46
46
|
steps:
|
47
47
|
- uses: actions/checkout@v2
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.3
|
data/README.md
CHANGED
@@ -39,10 +39,18 @@ Rack::Attack.blocklist 'fake search engines' do |request|
|
|
39
39
|
end
|
40
40
|
```
|
41
41
|
|
42
|
+
## Versioning
|
43
|
+
|
44
|
+
[Semantic versioning](https://semver.org/) with the following clarifications:
|
45
|
+
|
46
|
+
* MINOR version is incremented when support for new bots is added.
|
47
|
+
* PATCH version is incremented when validation logic for a bot changes (IP list updated, for example).
|
48
|
+
|
42
49
|
## Supported
|
43
50
|
|
44
51
|
* [Ahrefs](https://ahrefs.com/robot)
|
45
52
|
* [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
|
53
|
+
* [Amazon AdBot](https://adbot.amazon.com/index.html)
|
46
54
|
* [Applebot](https://support.apple.com/en-us/HT204683)
|
47
55
|
* [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
48
56
|
* [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
@@ -59,7 +67,7 @@ end
|
|
59
67
|
|
60
68
|
Apache 2.0
|
61
69
|
|
62
|
-
##
|
70
|
+
## Other projects
|
63
71
|
|
64
72
|
* Play Framework variant in Scala: [play-legitbot](https://github.com/osinka/play-legitbot)
|
65
73
|
* Article [When (Fake) Googlebots Attack Your Rails App](http://jessewolgamott.com/blog/2015/11/17/when-fake-googlebots-attack-your-rails-app/)
|
@@ -72,3 +80,5 @@ Apache 2.0
|
|
72
80
|
classify IP as a search engine, but also label them as suspicious and
|
73
81
|
reports the number of days since the last activity. My implementation of
|
74
82
|
the protocol in Scala is [here](https://github.com/osinka/httpbl).
|
83
|
+
* [CIDRAM](https://github.com/CIDRAM/CIDRAM) is a PHP routing manager with built-in support
|
84
|
+
to validate bots.
|
data/Rakefile
CHANGED
@@ -1,25 +1 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler'
|
5
|
-
require 'bump/tasks'
|
6
|
-
require 'rake/testtask'
|
7
|
-
Bundler::GemHelper.install_tasks
|
8
|
-
|
9
|
-
Bump.tag_by_default = true
|
10
|
-
|
11
|
-
Rake::TestTask.new do |t|
|
12
|
-
t.libs << 'test'
|
13
|
-
t.test_files = FileList['test/**/*_test.rb']
|
14
|
-
t.warning = true
|
15
|
-
t.verbose = true
|
16
|
-
end
|
17
|
-
|
18
|
-
desc 'Start a console'
|
19
|
-
task :console do
|
20
|
-
require 'irb'
|
21
|
-
ARGV.clear
|
22
|
-
IRB.start
|
23
|
-
end
|
24
|
-
|
25
|
-
task default: %w[test]
|
data/legitbot.gemspec
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'English'
|
4
|
+
|
3
5
|
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
6
|
require 'legitbot/version'
|
5
7
|
|
@@ -13,16 +15,22 @@ Gem::Specification.new do |spec|
|
|
13
15
|
spec.homepage = 'https://github.com/alaz/legitbot'
|
14
16
|
spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
|
15
17
|
spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
|
18
|
+
spec.metadata = {
|
19
|
+
'rubygems_mfa_required' => 'true'
|
20
|
+
}
|
16
21
|
|
17
|
-
spec.required_ruby_version = '>= 2.
|
18
|
-
spec.add_dependency '
|
22
|
+
spec.required_ruby_version = '>= 2.6.0'
|
23
|
+
spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
|
19
24
|
spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
|
20
25
|
spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
|
26
|
+
spec.add_development_dependency 'dns_mock', '~> 1.5.0', '>= 1.5.0'
|
21
27
|
spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
|
28
|
+
spec.add_development_dependency 'minitest-hooks', '~> 1.5', '>= 1.5.0'
|
22
29
|
spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
|
23
|
-
spec.add_development_dependency 'rubocop', '~> 1.
|
30
|
+
spec.add_development_dependency 'rubocop', '~> 1.24.0', '>= 1.24.0'
|
31
|
+
spec.add_development_dependency 'rubocop-minitest', '~> 0.17.0', '>= 0.17.0'
|
24
32
|
|
25
33
|
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
26
34
|
spec.rdoc_options = ['--charset=UTF-8']
|
27
|
-
spec.test_files = Dir.glob('test/**/*')
|
35
|
+
spec.test_files = Dir.glob('test/**/*').reject { |f| f.start_with? 'test/lib' }
|
28
36
|
end
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -5,16 +5,17 @@ module Legitbot # :nodoc:
|
|
5
5
|
class DuckDuckGo < BotMatch
|
6
6
|
ip_ranges %w[
|
7
7
|
20.191.45.212
|
8
|
-
23.21.227.69
|
9
8
|
40.88.21.235
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
52.
|
16
|
-
|
17
|
-
|
9
|
+
40.76.173.151
|
10
|
+
40.76.163.7
|
11
|
+
20.185.79.47
|
12
|
+
52.142.26.175
|
13
|
+
20.185.79.15
|
14
|
+
52.142.24.149
|
15
|
+
40.76.162.208
|
16
|
+
40.76.163.23
|
17
|
+
40.76.162.191
|
18
|
+
40.76.162.247
|
18
19
|
54.208.102.37
|
19
20
|
107.21.1.8
|
20
21
|
]
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot.rb
CHANGED
data/rakelib/bump.rake
ADDED
data/rakelib/test.rake
ADDED
data/test/ahrefs_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class AhrefsTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Ahrefs.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
19
|
ip = '54.36.148.0'
|
15
20
|
match = Legitbot::Ahrefs.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,8 +26,8 @@ class AhrefsTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
@@ -30,7 +35,7 @@ class AhrefsTest < Minitest::Test
|
|
30
35
|
'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
|
31
36
|
'54.36.148.0'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
end
|
data/test/alexa_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class AlexaTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Alexa.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
19
|
ip = '52.86.176.3'
|
15
20
|
match = Legitbot::Alexa.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,8 +26,8 @@ class AlexaTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
@@ -30,7 +35,7 @@ class AlexaTest < Minitest::Test
|
|
30
35
|
'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
|
31
36
|
'52.86.176.3'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
end
|
data/test/amazon_test.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
6
|
+
require 'legitbot'
|
7
|
+
|
8
|
+
class AmazonTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
12
|
+
def test_malicious_ip
|
13
|
+
ip = '149.210.164.47'
|
14
|
+
match = Legitbot::Amazon.new ip
|
15
|
+
refute match.valid?
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_valid_ip
|
19
|
+
ip = '54.166.7.90'
|
20
|
+
match = Legitbot::Amazon.new ip
|
21
|
+
assert match.valid?
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_malicious_ua
|
25
|
+
bot = Legitbot.bot(
|
26
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
27
|
+
'149.210.164.47'
|
28
|
+
)
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_valid_ua
|
34
|
+
bot = Legitbot.bot(
|
35
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
36
|
+
'54.166.7.90'
|
37
|
+
)
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_valid_name
|
43
|
+
bot = Legitbot.bot(
|
44
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
45
|
+
'54.166.7.90'
|
46
|
+
)
|
47
|
+
assert_equal :amazon, bot.detected_as
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_fake_name
|
51
|
+
bot = Legitbot.bot(
|
52
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
53
|
+
'81.1.172.108'
|
54
|
+
)
|
55
|
+
assert_equal :amazon, bot.detected_as
|
56
|
+
end
|
57
|
+
end
|
data/test/apple_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class AppleTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_valid_ip
|
8
13
|
ip = '17.58.98.60'
|
9
14
|
match = Legitbot::Apple.new(ip)
|
10
|
-
assert match.valid
|
15
|
+
assert match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_invalid_ip
|
14
19
|
ip = '127.0.0.1'
|
15
20
|
match = Legitbot::Apple.new(ip)
|
16
|
-
assert match.fake
|
21
|
+
assert match.fake?
|
17
22
|
end
|
18
23
|
|
19
24
|
# rubocop:disable Layout/LineLength
|
@@ -23,7 +28,7 @@ class AppleTest < Minitest::Test
|
|
23
28
|
'17.58.98.60'
|
24
29
|
)
|
25
30
|
assert_equal :apple, bot.detected_as
|
26
|
-
assert bot.valid
|
31
|
+
assert bot.valid?
|
27
32
|
end
|
28
33
|
# rubocop:enable Layout/LineLength
|
29
34
|
end
|
data/test/botmatch_test.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class BotMatchTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_valid_class_syntax
|
8
|
-
assert Legitbot::Google.valid?('66.249.64.141')
|
9
|
-
assert Legitbot::Google.fake?('149.210.164.47')
|
13
|
+
assert Legitbot::Google.valid?('66.249.64.141')
|
14
|
+
assert Legitbot::Google.fake?('149.210.164.47')
|
10
15
|
end
|
11
16
|
end
|
data/test/facebook_test.rb
CHANGED
@@ -20,44 +20,48 @@ class FacebookTest < Minitest::Test
|
|
20
20
|
def test_valid_ip
|
21
21
|
ip = '69.63.186.89'
|
22
22
|
match = Legitbot::Facebook.new(ip)
|
23
|
-
assert match.valid
|
23
|
+
assert match.valid?
|
24
24
|
|
25
25
|
ip = '69.171.251.1'
|
26
26
|
match = Legitbot::Facebook.new(ip)
|
27
|
-
assert match.valid
|
27
|
+
assert match.valid?
|
28
28
|
end
|
29
29
|
|
30
30
|
def test_invalid_ip
|
31
31
|
ip = '127.0.0.1'
|
32
32
|
match = Legitbot::Facebook.new(ip)
|
33
|
-
assert match.fake
|
33
|
+
assert match.fake?
|
34
34
|
end
|
35
35
|
|
36
|
-
|
37
|
-
def test_user_agent
|
36
|
+
def test_user_agent1
|
38
37
|
Legitbot.bot(
|
39
38
|
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
40
39
|
'31.13.76.56'
|
41
40
|
) do |bot|
|
42
41
|
assert_equal :facebook, bot.detected_as
|
43
|
-
assert bot.valid
|
42
|
+
assert bot.valid?
|
44
43
|
end
|
44
|
+
end
|
45
45
|
|
46
|
+
def test_user_agent2
|
46
47
|
Legitbot.bot(
|
47
48
|
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
48
49
|
'173.252.87.8'
|
49
50
|
) do |bot|
|
50
51
|
assert_equal :facebook, bot.detected_as
|
51
|
-
assert bot.valid
|
52
|
+
assert bot.valid?
|
52
53
|
end
|
54
|
+
end
|
53
55
|
|
56
|
+
# rubocop:disable Layout/LineLength
|
57
|
+
def test_user_agent3
|
54
58
|
Legitbot.bot(
|
55
59
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0',
|
56
60
|
'92.243.181.7'
|
57
61
|
) do |bot|
|
58
|
-
|
59
|
-
assert bot.fake
|
62
|
+
assert_includes %i[facebook twitter], bot.detected_as
|
63
|
+
assert bot.fake?
|
60
64
|
end
|
61
65
|
end
|
62
|
-
# rubocop:enable Layout/LineLength
|
66
|
+
# rubocop:enable Layout/LineLength
|
63
67
|
end
|
data/test/google_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class GoogleTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Google.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
19
|
ip = '66.249.64.141'
|
15
20
|
match = Legitbot::Google.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,8 +26,8 @@ class GoogleTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
@@ -30,8 +35,8 @@ class GoogleTest < Minitest::Test
|
|
30
35
|
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
31
36
|
'66.249.64.141'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
|
37
42
|
def test_valid_name
|
@@ -4,6 +4,7 @@ require 'minitest/autorun'
|
|
4
4
|
require 'legitbot'
|
5
5
|
|
6
6
|
module Legitbot
|
7
|
+
# rubocop:disable Minitest/MultipleAssertions
|
7
8
|
module Validators
|
8
9
|
class NoRanges
|
9
10
|
include IpRanges
|
@@ -128,4 +129,5 @@ module Legitbot
|
|
128
129
|
end
|
129
130
|
end
|
130
131
|
end
|
132
|
+
# rubocop:enable Minitest/MultipleAssertions
|
131
133
|
end
|
data/test/legitbot_test.rb
CHANGED
@@ -5,10 +5,8 @@ require 'legitbot'
|
|
5
5
|
|
6
6
|
class LegitbotTest < Minitest::Test
|
7
7
|
def test_rules
|
8
|
-
|
9
|
-
|
10
|
-
assert Legitbot.bot('Googlebot', '5.140.70.64'),
|
11
|
-
msg: 'No reverse resolve, bot'
|
8
|
+
refute Legitbot.bot('Firefox', '127.0.0.1')
|
9
|
+
assert Legitbot.bot('Googlebot', '5.140.70.64')
|
12
10
|
|
13
11
|
Legitbot.bot('Firefox', '127.0.0.1') do |_bot|
|
14
12
|
flunk 'No bot Firefox is possible'
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'dns_mock'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
TEST_DNS_RECORDS = {
|
7
|
+
# Malicious
|
8
|
+
'149.210.164.47' => {
|
9
|
+
ptr: %w[malicious.spam.co]
|
10
|
+
},
|
11
|
+
|
12
|
+
# Ahrefs
|
13
|
+
'ip-54-36-148-0.a.ahrefs.com' => {
|
14
|
+
a: %w[54.36.148.0]
|
15
|
+
},
|
16
|
+
'54.36.148.0' => {
|
17
|
+
ptr: %w[ip-54-36-148-0.a.ahrefs.com]
|
18
|
+
},
|
19
|
+
|
20
|
+
# Alexa
|
21
|
+
'52.86.176.3' => {
|
22
|
+
ptr: %w[crawl-52-86-176-3.alexa.com]
|
23
|
+
},
|
24
|
+
|
25
|
+
# Amazon
|
26
|
+
'crawler-54-166-7-90.amazonadbot.com' => {
|
27
|
+
a: %w[54.166.7.90]
|
28
|
+
},
|
29
|
+
'54.166.7.90' => {
|
30
|
+
ptr: %w[crawler-54-166-7-90.amazonadbot.com]
|
31
|
+
},
|
32
|
+
|
33
|
+
# Apple
|
34
|
+
'17-58-98-60.applebot.apple.com' => {
|
35
|
+
a: %w[17.58.98.60]
|
36
|
+
},
|
37
|
+
'17.58.98.60' => {
|
38
|
+
ptr: %w[17-58-98-60.applebot.apple.com]
|
39
|
+
},
|
40
|
+
|
41
|
+
# Google
|
42
|
+
'crawl-66-249-64-141.googlebot.com' => {
|
43
|
+
a: %w[66.249.64.141]
|
44
|
+
},
|
45
|
+
'66.249.64.141' => {
|
46
|
+
ptr: %w[crawl-66-249-64-141.googlebot.com]
|
47
|
+
},
|
48
|
+
|
49
|
+
# Petalbot
|
50
|
+
'petalbot-114-119-134-10.petalsearch.com' => {
|
51
|
+
a: %w[114.119.134.10]
|
52
|
+
},
|
53
|
+
'114.119.134.10' => {
|
54
|
+
ptr: %w[petalbot-114-119-134-10.petalsearch.com]
|
55
|
+
},
|
56
|
+
|
57
|
+
# Pinterest
|
58
|
+
'crawl-54-236-1-11.pinterest.com' => {
|
59
|
+
a: %w[54.236.1.11]
|
60
|
+
},
|
61
|
+
'54.236.1.11' => {
|
62
|
+
ptr: %w[crawl-54-236-1-11.pinterest.com]
|
63
|
+
}
|
64
|
+
}.freeze
|
65
|
+
|
66
|
+
class DnsServer
|
67
|
+
class << self
|
68
|
+
attr_accessor :mock
|
69
|
+
end
|
70
|
+
|
71
|
+
@mock = DnsMock.start_server records: TEST_DNS_RECORDS
|
72
|
+
end
|
73
|
+
|
74
|
+
module DnsServerMock
|
75
|
+
def before_all
|
76
|
+
super
|
77
|
+
|
78
|
+
Legitbot.resolver_config = {
|
79
|
+
nameserver: 'localhost',
|
80
|
+
nameserver_port: [['localhost', DnsServer.mock.port]]
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
def after_all
|
85
|
+
Legitbot.resolver_config = nil
|
86
|
+
|
87
|
+
super
|
88
|
+
end
|
89
|
+
end
|
data/test/oracle_test.rb
CHANGED
@@ -7,13 +7,13 @@ class OracleTest < Minitest::Test
|
|
7
7
|
def test_malicious_ip
|
8
8
|
ip = '149.210.164.47'
|
9
9
|
match = Legitbot::Oracle.new ip
|
10
|
-
|
10
|
+
refute match.valid?
|
11
11
|
end
|
12
12
|
|
13
13
|
def test_valid_ip
|
14
14
|
ip = '148.64.56.64'
|
15
15
|
match = Legitbot::Oracle.new ip
|
16
|
-
assert match.valid
|
16
|
+
assert match.valid?
|
17
17
|
end
|
18
18
|
|
19
19
|
def test_malicious_ua
|
@@ -21,8 +21,8 @@ class OracleTest < Minitest::Test
|
|
21
21
|
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
22
22
|
'149.210.164.47'
|
23
23
|
)
|
24
|
-
assert bot
|
25
|
-
|
24
|
+
assert bot
|
25
|
+
refute bot.valid?
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_valid_ua
|
@@ -30,7 +30,7 @@ class OracleTest < Minitest::Test
|
|
30
30
|
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
31
31
|
'148.64.56.64'
|
32
32
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
33
|
+
assert bot
|
34
|
+
assert bot.valid?
|
35
35
|
end
|
36
36
|
end
|
data/test/petalbot_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class PetalbotTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Petalbot.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
|
-
ip = '114.119.
|
19
|
+
ip = '114.119.134.10'
|
15
20
|
match = Legitbot::Petalbot.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,17 +26,17 @@ class PetalbotTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
29
34
|
bot = Legitbot.bot(
|
30
35
|
'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
|
31
|
-
'114.119.
|
36
|
+
'114.119.134.10'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
|
37
42
|
def test_valid_name
|
data/test/pinterest_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class PinterestTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Pinterest.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
19
|
ip = '54.236.1.11'
|
15
20
|
match = Legitbot::Pinterest.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,8 +26,8 @@ class PinterestTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
@@ -30,8 +35,8 @@ class PinterestTest < Minitest::Test
|
|
30
35
|
'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)',
|
31
36
|
'54.236.1.11'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
|
37
42
|
# rubocop:disable Layout/LineLength
|
data/test/twitter_test.rb
CHANGED
@@ -7,13 +7,13 @@ class TwitterTest < Minitest::Test
|
|
7
7
|
def test_malicious_ip
|
8
8
|
ip = '149.210.164.47'
|
9
9
|
match = Legitbot::Twitter.new ip
|
10
|
-
|
10
|
+
refute match.valid?
|
11
11
|
end
|
12
12
|
|
13
13
|
def test_valid_ip
|
14
14
|
ip = '199.16.156.125'
|
15
15
|
match = Legitbot::Twitter.new ip
|
16
|
-
assert match.valid
|
16
|
+
assert match.valid?
|
17
17
|
end
|
18
18
|
|
19
19
|
def test_malicious_ua
|
@@ -21,8 +21,8 @@ class TwitterTest < Minitest::Test
|
|
21
21
|
'Twitterbot/1.0',
|
22
22
|
'149.210.164.47'
|
23
23
|
)
|
24
|
-
assert bot
|
25
|
-
|
24
|
+
assert bot
|
25
|
+
refute bot.valid?
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_valid_ua
|
@@ -30,7 +30,7 @@ class TwitterTest < Minitest::Test
|
|
30
30
|
'Twitterbot/1.0',
|
31
31
|
'199.16.156.125'
|
32
32
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
33
|
+
assert bot
|
34
|
+
assert bot.valid?
|
35
35
|
end
|
36
36
|
end
|
metadata
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: fast_interval_tree
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0.
|
19
|
+
version: '0.2'
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 0.
|
22
|
+
version: 0.2.2
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
27
|
- - "~>"
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: '0.
|
29
|
+
version: '0.2'
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 0.
|
32
|
+
version: 0.2.2
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: irrc
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,6 +70,26 @@ dependencies:
|
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
72
|
version: 0.8.0
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: dns_mock
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.5.0
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.5.0
|
83
|
+
type: :development
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 1.5.0
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.5.0
|
73
93
|
- !ruby/object:Gem::Dependency
|
74
94
|
name: minitest
|
75
95
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,6 +110,26 @@ dependencies:
|
|
90
110
|
- - ">="
|
91
111
|
- !ruby/object:Gem::Version
|
92
112
|
version: 5.1.0
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: minitest-hooks
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '1.5'
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 1.5.0
|
123
|
+
type: :development
|
124
|
+
prerelease: false
|
125
|
+
version_requirements: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '1.5'
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 1.5.0
|
93
133
|
- !ruby/object:Gem::Dependency
|
94
134
|
name: rake
|
95
135
|
requirement: !ruby/object:Gem::Requirement
|
@@ -116,20 +156,40 @@ dependencies:
|
|
116
156
|
requirements:
|
117
157
|
- - "~>"
|
118
158
|
- !ruby/object:Gem::Version
|
119
|
-
version: 1.
|
159
|
+
version: 1.24.0
|
120
160
|
- - ">="
|
121
161
|
- !ruby/object:Gem::Version
|
122
|
-
version: 1.
|
162
|
+
version: 1.24.0
|
123
163
|
type: :development
|
124
164
|
prerelease: false
|
125
165
|
version_requirements: !ruby/object:Gem::Requirement
|
126
166
|
requirements:
|
127
167
|
- - "~>"
|
128
168
|
- !ruby/object:Gem::Version
|
129
|
-
version: 1.
|
169
|
+
version: 1.24.0
|
130
170
|
- - ">="
|
131
171
|
- !ruby/object:Gem::Version
|
132
|
-
version: 1.
|
172
|
+
version: 1.24.0
|
173
|
+
- !ruby/object:Gem::Dependency
|
174
|
+
name: rubocop-minitest
|
175
|
+
requirement: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - "~>"
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: 0.17.0
|
180
|
+
- - ">="
|
181
|
+
- !ruby/object:Gem::Version
|
182
|
+
version: 0.17.0
|
183
|
+
type: :development
|
184
|
+
prerelease: false
|
185
|
+
version_requirements: !ruby/object:Gem::Requirement
|
186
|
+
requirements:
|
187
|
+
- - "~>"
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: 0.17.0
|
190
|
+
- - ">="
|
191
|
+
- !ruby/object:Gem::Version
|
192
|
+
version: 0.17.0
|
133
193
|
description: Does Web request come from a real search engine or from an impersonating
|
134
194
|
agent?
|
135
195
|
email: self@alaz.me
|
@@ -150,6 +210,7 @@ files:
|
|
150
210
|
- lib/legitbot.rb
|
151
211
|
- lib/legitbot/ahrefs.rb
|
152
212
|
- lib/legitbot/alexa.rb
|
213
|
+
- lib/legitbot/amazon.rb
|
153
214
|
- lib/legitbot/apple.rb
|
154
215
|
- lib/legitbot/baidu.rb
|
155
216
|
- lib/legitbot/bing.rb
|
@@ -167,8 +228,13 @@ files:
|
|
167
228
|
- lib/legitbot/validators/ip_ranges.rb
|
168
229
|
- lib/legitbot/version.rb
|
169
230
|
- lib/legitbot/yandex.rb
|
231
|
+
- rakelib/bump.rake
|
232
|
+
- rakelib/bundler.rake
|
233
|
+
- rakelib/console.rake
|
234
|
+
- rakelib/test.rake
|
170
235
|
- test/ahrefs_test.rb
|
171
236
|
- test/alexa_test.rb
|
237
|
+
- test/amazon_test.rb
|
172
238
|
- test/apple_test.rb
|
173
239
|
- test/botmatch_test.rb
|
174
240
|
- test/facebook_test.rb
|
@@ -176,6 +242,7 @@ files:
|
|
176
242
|
- test/legitbot/validators/domains_test.rb
|
177
243
|
- test/legitbot/validators/ip_ranges_test.rb
|
178
244
|
- test/legitbot_test.rb
|
245
|
+
- test/lib/dns_server_mock.rb
|
179
246
|
- test/oracle_test.rb
|
180
247
|
- test/petalbot_test.rb
|
181
248
|
- test/pinterest_test.rb
|
@@ -183,7 +250,8 @@ files:
|
|
183
250
|
homepage: https://github.com/alaz/legitbot
|
184
251
|
licenses:
|
185
252
|
- Apache-2.0
|
186
|
-
metadata:
|
253
|
+
metadata:
|
254
|
+
rubygems_mfa_required: 'true'
|
187
255
|
post_install_message:
|
188
256
|
rdoc_options:
|
189
257
|
- "--charset=UTF-8"
|
@@ -193,28 +261,29 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
193
261
|
requirements:
|
194
262
|
- - ">="
|
195
263
|
- !ruby/object:Gem::Version
|
196
|
-
version: 2.
|
264
|
+
version: 2.6.0
|
197
265
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
198
266
|
requirements:
|
199
267
|
- - ">="
|
200
268
|
- !ruby/object:Gem::Version
|
201
269
|
version: '0'
|
202
270
|
requirements: []
|
203
|
-
rubygems_version: 3.
|
271
|
+
rubygems_version: 3.2.32
|
204
272
|
signing_key:
|
205
273
|
specification_version: 4
|
206
274
|
summary: 'Validate requests from Web crawlers: impersonating or not?'
|
207
275
|
test_files:
|
208
|
-
- test/legitbot_test.rb
|
209
|
-
- test/legitbot/validators/domains_test.rb
|
210
|
-
- test/legitbot/validators/ip_ranges_test.rb
|
211
|
-
- test/pinterest_test.rb
|
212
|
-
- test/alexa_test.rb
|
213
276
|
- test/ahrefs_test.rb
|
277
|
+
- test/alexa_test.rb
|
278
|
+
- test/amazon_test.rb
|
214
279
|
- test/apple_test.rb
|
215
|
-
- test/oracle_test.rb
|
216
|
-
- test/google_test.rb
|
217
|
-
- test/petalbot_test.rb
|
218
280
|
- test/botmatch_test.rb
|
219
281
|
- test/facebook_test.rb
|
282
|
+
- test/google_test.rb
|
283
|
+
- test/legitbot/validators/domains_test.rb
|
284
|
+
- test/legitbot/validators/ip_ranges_test.rb
|
285
|
+
- test/legitbot_test.rb
|
286
|
+
- test/oracle_test.rb
|
287
|
+
- test/petalbot_test.rb
|
288
|
+
- test/pinterest_test.rb
|
220
289
|
- test/twitter_test.rb
|