legitbot 1.4.5 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +2 -2
- data/.rubocop.yml +6 -0
- data/.ruby-version +1 -1
- data/README.md +11 -1
- data/Rakefile +0 -24
- data/legitbot.gemspec +12 -4
- data/lib/legitbot/amazon.rb +10 -0
- data/lib/legitbot/duckduckgo.rb +10 -9
- data/lib/legitbot/validators/domains.rb +2 -0
- data/lib/legitbot/version.rb +1 -1
- data/lib/legitbot.rb +1 -0
- data/rakelib/bump.rake +5 -0
- data/rakelib/bundler.rake +4 -0
- data/rakelib/console.rake +8 -0
- data/rakelib/test.rake +12 -0
- data/test/ahrefs_test.rb +11 -6
- data/test/alexa_test.rb +11 -6
- data/test/amazon_test.rb +57 -0
- data/test/apple_test.rb +8 -3
- data/test/botmatch_test.rb +7 -2
- data/test/facebook_test.rb +14 -10
- data/test/google_test.rb +11 -6
- data/test/legitbot/validators/ip_ranges_test.rb +2 -0
- data/test/legitbot_test.rb +2 -4
- data/test/lib/dns_server_mock.rb +89 -0
- data/test/oracle_test.rb +6 -6
- data/test/petalbot_test.rb +13 -8
- data/test/pinterest_test.rb +11 -6
- data/test/twitter_test.rb +6 -6
- metadata +91 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df2907ab34b159613adf21c3301607f53fa893e0823f81560e45ad2c8303075b
|
4
|
+
data.tar.gz: fe913e9b72878969839326da227c1169ad8234a8ae335e276f614b53e5777d42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8af647da764cd722f27f2936f8f9728f9cdd7b313e295d9c80ebe7144a601a5360f5b615039d5afe75f7ded891e3bfc06292383e9e41e342977d39f1e485dab
|
7
|
+
data.tar.gz: 74e3dd5cb2c4d2e7df717c6bb3a072c74aa51b922f7f71d280cde82387ba458d30970f85c4f79508852ef5c2969196bd640427bf2323104229616960314fb9df
|
data/.github/workflows/build.yml
CHANGED
@@ -13,7 +13,7 @@ jobs:
|
|
13
13
|
strategy:
|
14
14
|
fail-fast: false
|
15
15
|
matrix:
|
16
|
-
ruby: [ jruby, 2.
|
16
|
+
ruby: [ jruby, 2.6, 2.7, 3.0 ]
|
17
17
|
|
18
18
|
steps:
|
19
19
|
- uses: actions/checkout@v2
|
@@ -41,7 +41,7 @@ jobs:
|
|
41
41
|
|
42
42
|
strategy:
|
43
43
|
matrix:
|
44
|
-
ruby: [
|
44
|
+
ruby: [ 3.0 ]
|
45
45
|
|
46
46
|
steps:
|
47
47
|
- uses: actions/checkout@v2
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.0.3
|
data/README.md
CHANGED
@@ -39,10 +39,18 @@ Rack::Attack.blocklist 'fake search engines' do |request|
|
|
39
39
|
end
|
40
40
|
```
|
41
41
|
|
42
|
+
## Versioning
|
43
|
+
|
44
|
+
[Semantic versioning](https://semver.org/) with the following clarifications:
|
45
|
+
|
46
|
+
* MINOR version is incremented when support for new bots is added.
|
47
|
+
* PATCH version is incremented when validation logic for a bot changes (IP list updated, for example).
|
48
|
+
|
42
49
|
## Supported
|
43
50
|
|
44
51
|
* [Ahrefs](https://ahrefs.com/robot)
|
45
52
|
* [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
|
53
|
+
* [Amazon AdBot](https://adbot.amazon.com/index.html)
|
46
54
|
* [Applebot](https://support.apple.com/en-us/HT204683)
|
47
55
|
* [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
48
56
|
* [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
@@ -59,7 +67,7 @@ end
|
|
59
67
|
|
60
68
|
Apache 2.0
|
61
69
|
|
62
|
-
##
|
70
|
+
## Other projects
|
63
71
|
|
64
72
|
* Play Framework variant in Scala: [play-legitbot](https://github.com/osinka/play-legitbot)
|
65
73
|
* Article [When (Fake) Googlebots Attack Your Rails App](http://jessewolgamott.com/blog/2015/11/17/when-fake-googlebots-attack-your-rails-app/)
|
@@ -72,3 +80,5 @@ Apache 2.0
|
|
72
80
|
classify IP as a search engine, but also label them as suspicious and
|
73
81
|
reports the number of days since the last activity. My implementation of
|
74
82
|
the protocol in Scala is [here](https://github.com/osinka/httpbl).
|
83
|
+
* [CIDRAM](https://github.com/CIDRAM/CIDRAM) is a PHP routing manager with built-in support
|
84
|
+
to validate bots.
|
data/Rakefile
CHANGED
@@ -1,25 +1 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'bundler'
|
5
|
-
require 'bump/tasks'
|
6
|
-
require 'rake/testtask'
|
7
|
-
Bundler::GemHelper.install_tasks
|
8
|
-
|
9
|
-
Bump.tag_by_default = true
|
10
|
-
|
11
|
-
Rake::TestTask.new do |t|
|
12
|
-
t.libs << 'test'
|
13
|
-
t.test_files = FileList['test/**/*_test.rb']
|
14
|
-
t.warning = true
|
15
|
-
t.verbose = true
|
16
|
-
end
|
17
|
-
|
18
|
-
desc 'Start a console'
|
19
|
-
task :console do
|
20
|
-
require 'irb'
|
21
|
-
ARGV.clear
|
22
|
-
IRB.start
|
23
|
-
end
|
24
|
-
|
25
|
-
task default: %w[test]
|
data/legitbot.gemspec
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'English'
|
4
|
+
|
3
5
|
$LOAD_PATH.push File.expand_path('lib', __dir__)
|
4
6
|
require 'legitbot/version'
|
5
7
|
|
@@ -13,16 +15,22 @@ Gem::Specification.new do |spec|
|
|
13
15
|
spec.homepage = 'https://github.com/alaz/legitbot'
|
14
16
|
spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
|
15
17
|
spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
|
18
|
+
spec.metadata = {
|
19
|
+
'rubygems_mfa_required' => 'true'
|
20
|
+
}
|
16
21
|
|
17
|
-
spec.required_ruby_version = '>= 2.
|
18
|
-
spec.add_dependency '
|
22
|
+
spec.required_ruby_version = '>= 2.6.0'
|
23
|
+
spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
|
19
24
|
spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
|
20
25
|
spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
|
26
|
+
spec.add_development_dependency 'dns_mock', '~> 1.5.0', '>= 1.5.0'
|
21
27
|
spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
|
28
|
+
spec.add_development_dependency 'minitest-hooks', '~> 1.5', '>= 1.5.0'
|
22
29
|
spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
|
23
|
-
spec.add_development_dependency 'rubocop', '~> 1.
|
30
|
+
spec.add_development_dependency 'rubocop', '~> 1.24.0', '>= 1.24.0'
|
31
|
+
spec.add_development_dependency 'rubocop-minitest', '~> 0.17.0', '>= 0.17.0'
|
24
32
|
|
25
33
|
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
26
34
|
spec.rdoc_options = ['--charset=UTF-8']
|
27
|
-
spec.test_files = Dir.glob('test/**/*')
|
35
|
+
spec.test_files = Dir.glob('test/**/*').reject { |f| f.start_with? 'test/lib' }
|
28
36
|
end
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -5,16 +5,17 @@ module Legitbot # :nodoc:
|
|
5
5
|
class DuckDuckGo < BotMatch
|
6
6
|
ip_ranges %w[
|
7
7
|
20.191.45.212
|
8
|
-
23.21.227.69
|
9
8
|
40.88.21.235
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
52.
|
16
|
-
|
17
|
-
|
9
|
+
40.76.173.151
|
10
|
+
40.76.163.7
|
11
|
+
20.185.79.47
|
12
|
+
52.142.26.175
|
13
|
+
20.185.79.15
|
14
|
+
52.142.24.149
|
15
|
+
40.76.162.208
|
16
|
+
40.76.163.23
|
17
|
+
40.76.162.191
|
18
|
+
40.76.162.247
|
18
19
|
54.208.102.37
|
19
20
|
107.21.1.8
|
20
21
|
]
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot.rb
CHANGED
data/rakelib/bump.rake
ADDED
data/rakelib/test.rake
ADDED
data/test/ahrefs_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class AhrefsTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Ahrefs.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
19
|
ip = '54.36.148.0'
|
15
20
|
match = Legitbot::Ahrefs.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,8 +26,8 @@ class AhrefsTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
@@ -30,7 +35,7 @@ class AhrefsTest < Minitest::Test
|
|
30
35
|
'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
|
31
36
|
'54.36.148.0'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
end
|
data/test/alexa_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class AlexaTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Alexa.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
19
|
ip = '52.86.176.3'
|
15
20
|
match = Legitbot::Alexa.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,8 +26,8 @@ class AlexaTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
@@ -30,7 +35,7 @@ class AlexaTest < Minitest::Test
|
|
30
35
|
'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
|
31
36
|
'52.86.176.3'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
end
|
data/test/amazon_test.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
6
|
+
require 'legitbot'
|
7
|
+
|
8
|
+
class AmazonTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
12
|
+
def test_malicious_ip
|
13
|
+
ip = '149.210.164.47'
|
14
|
+
match = Legitbot::Amazon.new ip
|
15
|
+
refute match.valid?
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_valid_ip
|
19
|
+
ip = '54.166.7.90'
|
20
|
+
match = Legitbot::Amazon.new ip
|
21
|
+
assert match.valid?
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_malicious_ua
|
25
|
+
bot = Legitbot.bot(
|
26
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
27
|
+
'149.210.164.47'
|
28
|
+
)
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_valid_ua
|
34
|
+
bot = Legitbot.bot(
|
35
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
36
|
+
'54.166.7.90'
|
37
|
+
)
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_valid_name
|
43
|
+
bot = Legitbot.bot(
|
44
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
45
|
+
'54.166.7.90'
|
46
|
+
)
|
47
|
+
assert_equal :amazon, bot.detected_as
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_fake_name
|
51
|
+
bot = Legitbot.bot(
|
52
|
+
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
53
|
+
'81.1.172.108'
|
54
|
+
)
|
55
|
+
assert_equal :amazon, bot.detected_as
|
56
|
+
end
|
57
|
+
end
|
data/test/apple_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class AppleTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_valid_ip
|
8
13
|
ip = '17.58.98.60'
|
9
14
|
match = Legitbot::Apple.new(ip)
|
10
|
-
assert match.valid
|
15
|
+
assert match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_invalid_ip
|
14
19
|
ip = '127.0.0.1'
|
15
20
|
match = Legitbot::Apple.new(ip)
|
16
|
-
assert match.fake
|
21
|
+
assert match.fake?
|
17
22
|
end
|
18
23
|
|
19
24
|
# rubocop:disable Layout/LineLength
|
@@ -23,7 +28,7 @@ class AppleTest < Minitest::Test
|
|
23
28
|
'17.58.98.60'
|
24
29
|
)
|
25
30
|
assert_equal :apple, bot.detected_as
|
26
|
-
assert bot.valid
|
31
|
+
assert bot.valid?
|
27
32
|
end
|
28
33
|
# rubocop:enable Layout/LineLength
|
29
34
|
end
|
data/test/botmatch_test.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class BotMatchTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_valid_class_syntax
|
8
|
-
assert Legitbot::Google.valid?('66.249.64.141')
|
9
|
-
assert Legitbot::Google.fake?('149.210.164.47')
|
13
|
+
assert Legitbot::Google.valid?('66.249.64.141')
|
14
|
+
assert Legitbot::Google.fake?('149.210.164.47')
|
10
15
|
end
|
11
16
|
end
|
data/test/facebook_test.rb
CHANGED
@@ -20,44 +20,48 @@ class FacebookTest < Minitest::Test
|
|
20
20
|
def test_valid_ip
|
21
21
|
ip = '69.63.186.89'
|
22
22
|
match = Legitbot::Facebook.new(ip)
|
23
|
-
assert match.valid
|
23
|
+
assert match.valid?
|
24
24
|
|
25
25
|
ip = '69.171.251.1'
|
26
26
|
match = Legitbot::Facebook.new(ip)
|
27
|
-
assert match.valid
|
27
|
+
assert match.valid?
|
28
28
|
end
|
29
29
|
|
30
30
|
def test_invalid_ip
|
31
31
|
ip = '127.0.0.1'
|
32
32
|
match = Legitbot::Facebook.new(ip)
|
33
|
-
assert match.fake
|
33
|
+
assert match.fake?
|
34
34
|
end
|
35
35
|
|
36
|
-
|
37
|
-
def test_user_agent
|
36
|
+
def test_user_agent1
|
38
37
|
Legitbot.bot(
|
39
38
|
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
40
39
|
'31.13.76.56'
|
41
40
|
) do |bot|
|
42
41
|
assert_equal :facebook, bot.detected_as
|
43
|
-
assert bot.valid
|
42
|
+
assert bot.valid?
|
44
43
|
end
|
44
|
+
end
|
45
45
|
|
46
|
+
def test_user_agent2
|
46
47
|
Legitbot.bot(
|
47
48
|
'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
|
48
49
|
'173.252.87.8'
|
49
50
|
) do |bot|
|
50
51
|
assert_equal :facebook, bot.detected_as
|
51
|
-
assert bot.valid
|
52
|
+
assert bot.valid?
|
52
53
|
end
|
54
|
+
end
|
53
55
|
|
56
|
+
# rubocop:disable Layout/LineLength
|
57
|
+
def test_user_agent3
|
54
58
|
Legitbot.bot(
|
55
59
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0',
|
56
60
|
'92.243.181.7'
|
57
61
|
) do |bot|
|
58
|
-
|
59
|
-
assert bot.fake
|
62
|
+
assert_includes %i[facebook twitter], bot.detected_as
|
63
|
+
assert bot.fake?
|
60
64
|
end
|
61
65
|
end
|
62
|
-
# rubocop:enable Layout/LineLength
|
66
|
+
# rubocop:enable Layout/LineLength
|
63
67
|
end
|
data/test/google_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class GoogleTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Google.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
19
|
ip = '66.249.64.141'
|
15
20
|
match = Legitbot::Google.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,8 +26,8 @@ class GoogleTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
@@ -30,8 +35,8 @@ class GoogleTest < Minitest::Test
|
|
30
35
|
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
|
31
36
|
'66.249.64.141'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
|
37
42
|
def test_valid_name
|
@@ -4,6 +4,7 @@ require 'minitest/autorun'
|
|
4
4
|
require 'legitbot'
|
5
5
|
|
6
6
|
module Legitbot
|
7
|
+
# rubocop:disable Minitest/MultipleAssertions
|
7
8
|
module Validators
|
8
9
|
class NoRanges
|
9
10
|
include IpRanges
|
@@ -128,4 +129,5 @@ module Legitbot
|
|
128
129
|
end
|
129
130
|
end
|
130
131
|
end
|
132
|
+
# rubocop:enable Minitest/MultipleAssertions
|
131
133
|
end
|
data/test/legitbot_test.rb
CHANGED
@@ -5,10 +5,8 @@ require 'legitbot'
|
|
5
5
|
|
6
6
|
class LegitbotTest < Minitest::Test
|
7
7
|
def test_rules
|
8
|
-
|
9
|
-
|
10
|
-
assert Legitbot.bot('Googlebot', '5.140.70.64'),
|
11
|
-
msg: 'No reverse resolve, bot'
|
8
|
+
refute Legitbot.bot('Firefox', '127.0.0.1')
|
9
|
+
assert Legitbot.bot('Googlebot', '5.140.70.64')
|
12
10
|
|
13
11
|
Legitbot.bot('Firefox', '127.0.0.1') do |_bot|
|
14
12
|
flunk 'No bot Firefox is possible'
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'dns_mock'
|
4
|
+
require 'json'
|
5
|
+
|
6
|
+
TEST_DNS_RECORDS = {
|
7
|
+
# Malicious
|
8
|
+
'149.210.164.47' => {
|
9
|
+
ptr: %w[malicious.spam.co]
|
10
|
+
},
|
11
|
+
|
12
|
+
# Ahrefs
|
13
|
+
'ip-54-36-148-0.a.ahrefs.com' => {
|
14
|
+
a: %w[54.36.148.0]
|
15
|
+
},
|
16
|
+
'54.36.148.0' => {
|
17
|
+
ptr: %w[ip-54-36-148-0.a.ahrefs.com]
|
18
|
+
},
|
19
|
+
|
20
|
+
# Alexa
|
21
|
+
'52.86.176.3' => {
|
22
|
+
ptr: %w[crawl-52-86-176-3.alexa.com]
|
23
|
+
},
|
24
|
+
|
25
|
+
# Amazon
|
26
|
+
'crawler-54-166-7-90.amazonadbot.com' => {
|
27
|
+
a: %w[54.166.7.90]
|
28
|
+
},
|
29
|
+
'54.166.7.90' => {
|
30
|
+
ptr: %w[crawler-54-166-7-90.amazonadbot.com]
|
31
|
+
},
|
32
|
+
|
33
|
+
# Apple
|
34
|
+
'17-58-98-60.applebot.apple.com' => {
|
35
|
+
a: %w[17.58.98.60]
|
36
|
+
},
|
37
|
+
'17.58.98.60' => {
|
38
|
+
ptr: %w[17-58-98-60.applebot.apple.com]
|
39
|
+
},
|
40
|
+
|
41
|
+
# Google
|
42
|
+
'crawl-66-249-64-141.googlebot.com' => {
|
43
|
+
a: %w[66.249.64.141]
|
44
|
+
},
|
45
|
+
'66.249.64.141' => {
|
46
|
+
ptr: %w[crawl-66-249-64-141.googlebot.com]
|
47
|
+
},
|
48
|
+
|
49
|
+
# Petalbot
|
50
|
+
'petalbot-114-119-134-10.petalsearch.com' => {
|
51
|
+
a: %w[114.119.134.10]
|
52
|
+
},
|
53
|
+
'114.119.134.10' => {
|
54
|
+
ptr: %w[petalbot-114-119-134-10.petalsearch.com]
|
55
|
+
},
|
56
|
+
|
57
|
+
# Pinterest
|
58
|
+
'crawl-54-236-1-11.pinterest.com' => {
|
59
|
+
a: %w[54.236.1.11]
|
60
|
+
},
|
61
|
+
'54.236.1.11' => {
|
62
|
+
ptr: %w[crawl-54-236-1-11.pinterest.com]
|
63
|
+
}
|
64
|
+
}.freeze
|
65
|
+
|
66
|
+
class DnsServer
|
67
|
+
class << self
|
68
|
+
attr_accessor :mock
|
69
|
+
end
|
70
|
+
|
71
|
+
@mock = DnsMock.start_server records: TEST_DNS_RECORDS
|
72
|
+
end
|
73
|
+
|
74
|
+
module DnsServerMock
|
75
|
+
def before_all
|
76
|
+
super
|
77
|
+
|
78
|
+
Legitbot.resolver_config = {
|
79
|
+
nameserver: 'localhost',
|
80
|
+
nameserver_port: [['localhost', DnsServer.mock.port]]
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
def after_all
|
85
|
+
Legitbot.resolver_config = nil
|
86
|
+
|
87
|
+
super
|
88
|
+
end
|
89
|
+
end
|
data/test/oracle_test.rb
CHANGED
@@ -7,13 +7,13 @@ class OracleTest < Minitest::Test
|
|
7
7
|
def test_malicious_ip
|
8
8
|
ip = '149.210.164.47'
|
9
9
|
match = Legitbot::Oracle.new ip
|
10
|
-
|
10
|
+
refute match.valid?
|
11
11
|
end
|
12
12
|
|
13
13
|
def test_valid_ip
|
14
14
|
ip = '148.64.56.64'
|
15
15
|
match = Legitbot::Oracle.new ip
|
16
|
-
assert match.valid
|
16
|
+
assert match.valid?
|
17
17
|
end
|
18
18
|
|
19
19
|
def test_malicious_ua
|
@@ -21,8 +21,8 @@ class OracleTest < Minitest::Test
|
|
21
21
|
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
22
22
|
'149.210.164.47'
|
23
23
|
)
|
24
|
-
assert bot
|
25
|
-
|
24
|
+
assert bot
|
25
|
+
refute bot.valid?
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_valid_ua
|
@@ -30,7 +30,7 @@ class OracleTest < Minitest::Test
|
|
30
30
|
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
31
31
|
'148.64.56.64'
|
32
32
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
33
|
+
assert bot
|
34
|
+
assert bot.valid?
|
35
35
|
end
|
36
36
|
end
|
data/test/petalbot_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class PetalbotTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Petalbot.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
|
-
ip = '114.119.
|
19
|
+
ip = '114.119.134.10'
|
15
20
|
match = Legitbot::Petalbot.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,17 +26,17 @@ class PetalbotTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
29
34
|
bot = Legitbot.bot(
|
30
35
|
'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
|
31
|
-
'114.119.
|
36
|
+
'114.119.134.10'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
|
37
42
|
def test_valid_name
|
data/test/pinterest_test.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'minitest/autorun'
|
4
|
+
require 'minitest/hooks/test'
|
5
|
+
require 'lib/dns_server_mock'
|
4
6
|
require 'legitbot'
|
5
7
|
|
6
8
|
class PinterestTest < Minitest::Test
|
9
|
+
include Minitest::Hooks
|
10
|
+
include DnsServerMock
|
11
|
+
|
7
12
|
def test_malicious_ip
|
8
13
|
ip = '149.210.164.47'
|
9
14
|
match = Legitbot::Pinterest.new ip
|
10
|
-
|
15
|
+
refute match.valid?
|
11
16
|
end
|
12
17
|
|
13
18
|
def test_valid_ip
|
14
19
|
ip = '54.236.1.11'
|
15
20
|
match = Legitbot::Pinterest.new ip
|
16
|
-
assert match.valid
|
21
|
+
assert match.valid?
|
17
22
|
end
|
18
23
|
|
19
24
|
def test_malicious_ua
|
@@ -21,8 +26,8 @@ class PinterestTest < Minitest::Test
|
|
21
26
|
'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)',
|
22
27
|
'149.210.164.47'
|
23
28
|
)
|
24
|
-
assert bot
|
25
|
-
|
29
|
+
assert bot
|
30
|
+
refute bot.valid?
|
26
31
|
end
|
27
32
|
|
28
33
|
def test_valid_ua
|
@@ -30,8 +35,8 @@ class PinterestTest < Minitest::Test
|
|
30
35
|
'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)',
|
31
36
|
'54.236.1.11'
|
32
37
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
38
|
+
assert bot
|
39
|
+
assert bot.valid?
|
35
40
|
end
|
36
41
|
|
37
42
|
# rubocop:disable Layout/LineLength
|
data/test/twitter_test.rb
CHANGED
@@ -7,13 +7,13 @@ class TwitterTest < Minitest::Test
|
|
7
7
|
def test_malicious_ip
|
8
8
|
ip = '149.210.164.47'
|
9
9
|
match = Legitbot::Twitter.new ip
|
10
|
-
|
10
|
+
refute match.valid?
|
11
11
|
end
|
12
12
|
|
13
13
|
def test_valid_ip
|
14
14
|
ip = '199.16.156.125'
|
15
15
|
match = Legitbot::Twitter.new ip
|
16
|
-
assert match.valid
|
16
|
+
assert match.valid?
|
17
17
|
end
|
18
18
|
|
19
19
|
def test_malicious_ua
|
@@ -21,8 +21,8 @@ class TwitterTest < Minitest::Test
|
|
21
21
|
'Twitterbot/1.0',
|
22
22
|
'149.210.164.47'
|
23
23
|
)
|
24
|
-
assert bot
|
25
|
-
|
24
|
+
assert bot
|
25
|
+
refute bot.valid?
|
26
26
|
end
|
27
27
|
|
28
28
|
def test_valid_ua
|
@@ -30,7 +30,7 @@ class TwitterTest < Minitest::Test
|
|
30
30
|
'Twitterbot/1.0',
|
31
31
|
'199.16.156.125'
|
32
32
|
)
|
33
|
-
assert bot
|
34
|
-
assert bot.valid
|
33
|
+
assert bot
|
34
|
+
assert bot.valid?
|
35
35
|
end
|
36
36
|
end
|
metadata
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-03-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: fast_interval_tree
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0.
|
19
|
+
version: '0.2'
|
20
20
|
- - ">="
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: 0.
|
22
|
+
version: 0.2.2
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
27
|
- - "~>"
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: '0.
|
29
|
+
version: '0.2'
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: 0.
|
32
|
+
version: 0.2.2
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: irrc
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -70,6 +70,26 @@ dependencies:
|
|
70
70
|
- - ">="
|
71
71
|
- !ruby/object:Gem::Version
|
72
72
|
version: 0.8.0
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: dns_mock
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.5.0
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.5.0
|
83
|
+
type: :development
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 1.5.0
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.5.0
|
73
93
|
- !ruby/object:Gem::Dependency
|
74
94
|
name: minitest
|
75
95
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,6 +110,26 @@ dependencies:
|
|
90
110
|
- - ">="
|
91
111
|
- !ruby/object:Gem::Version
|
92
112
|
version: 5.1.0
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: minitest-hooks
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '1.5'
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 1.5.0
|
123
|
+
type: :development
|
124
|
+
prerelease: false
|
125
|
+
version_requirements: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '1.5'
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 1.5.0
|
93
133
|
- !ruby/object:Gem::Dependency
|
94
134
|
name: rake
|
95
135
|
requirement: !ruby/object:Gem::Requirement
|
@@ -116,20 +156,40 @@ dependencies:
|
|
116
156
|
requirements:
|
117
157
|
- - "~>"
|
118
158
|
- !ruby/object:Gem::Version
|
119
|
-
version: 1.
|
159
|
+
version: 1.24.0
|
120
160
|
- - ">="
|
121
161
|
- !ruby/object:Gem::Version
|
122
|
-
version: 1.
|
162
|
+
version: 1.24.0
|
123
163
|
type: :development
|
124
164
|
prerelease: false
|
125
165
|
version_requirements: !ruby/object:Gem::Requirement
|
126
166
|
requirements:
|
127
167
|
- - "~>"
|
128
168
|
- !ruby/object:Gem::Version
|
129
|
-
version: 1.
|
169
|
+
version: 1.24.0
|
130
170
|
- - ">="
|
131
171
|
- !ruby/object:Gem::Version
|
132
|
-
version: 1.
|
172
|
+
version: 1.24.0
|
173
|
+
- !ruby/object:Gem::Dependency
|
174
|
+
name: rubocop-minitest
|
175
|
+
requirement: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - "~>"
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: 0.17.0
|
180
|
+
- - ">="
|
181
|
+
- !ruby/object:Gem::Version
|
182
|
+
version: 0.17.0
|
183
|
+
type: :development
|
184
|
+
prerelease: false
|
185
|
+
version_requirements: !ruby/object:Gem::Requirement
|
186
|
+
requirements:
|
187
|
+
- - "~>"
|
188
|
+
- !ruby/object:Gem::Version
|
189
|
+
version: 0.17.0
|
190
|
+
- - ">="
|
191
|
+
- !ruby/object:Gem::Version
|
192
|
+
version: 0.17.0
|
133
193
|
description: Does Web request come from a real search engine or from an impersonating
|
134
194
|
agent?
|
135
195
|
email: self@alaz.me
|
@@ -150,6 +210,7 @@ files:
|
|
150
210
|
- lib/legitbot.rb
|
151
211
|
- lib/legitbot/ahrefs.rb
|
152
212
|
- lib/legitbot/alexa.rb
|
213
|
+
- lib/legitbot/amazon.rb
|
153
214
|
- lib/legitbot/apple.rb
|
154
215
|
- lib/legitbot/baidu.rb
|
155
216
|
- lib/legitbot/bing.rb
|
@@ -167,8 +228,13 @@ files:
|
|
167
228
|
- lib/legitbot/validators/ip_ranges.rb
|
168
229
|
- lib/legitbot/version.rb
|
169
230
|
- lib/legitbot/yandex.rb
|
231
|
+
- rakelib/bump.rake
|
232
|
+
- rakelib/bundler.rake
|
233
|
+
- rakelib/console.rake
|
234
|
+
- rakelib/test.rake
|
170
235
|
- test/ahrefs_test.rb
|
171
236
|
- test/alexa_test.rb
|
237
|
+
- test/amazon_test.rb
|
172
238
|
- test/apple_test.rb
|
173
239
|
- test/botmatch_test.rb
|
174
240
|
- test/facebook_test.rb
|
@@ -176,6 +242,7 @@ files:
|
|
176
242
|
- test/legitbot/validators/domains_test.rb
|
177
243
|
- test/legitbot/validators/ip_ranges_test.rb
|
178
244
|
- test/legitbot_test.rb
|
245
|
+
- test/lib/dns_server_mock.rb
|
179
246
|
- test/oracle_test.rb
|
180
247
|
- test/petalbot_test.rb
|
181
248
|
- test/pinterest_test.rb
|
@@ -183,7 +250,8 @@ files:
|
|
183
250
|
homepage: https://github.com/alaz/legitbot
|
184
251
|
licenses:
|
185
252
|
- Apache-2.0
|
186
|
-
metadata:
|
253
|
+
metadata:
|
254
|
+
rubygems_mfa_required: 'true'
|
187
255
|
post_install_message:
|
188
256
|
rdoc_options:
|
189
257
|
- "--charset=UTF-8"
|
@@ -193,28 +261,29 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
193
261
|
requirements:
|
194
262
|
- - ">="
|
195
263
|
- !ruby/object:Gem::Version
|
196
|
-
version: 2.
|
264
|
+
version: 2.6.0
|
197
265
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
198
266
|
requirements:
|
199
267
|
- - ">="
|
200
268
|
- !ruby/object:Gem::Version
|
201
269
|
version: '0'
|
202
270
|
requirements: []
|
203
|
-
rubygems_version: 3.
|
271
|
+
rubygems_version: 3.2.32
|
204
272
|
signing_key:
|
205
273
|
specification_version: 4
|
206
274
|
summary: 'Validate requests from Web crawlers: impersonating or not?'
|
207
275
|
test_files:
|
208
|
-
- test/legitbot_test.rb
|
209
|
-
- test/legitbot/validators/domains_test.rb
|
210
|
-
- test/legitbot/validators/ip_ranges_test.rb
|
211
|
-
- test/pinterest_test.rb
|
212
|
-
- test/alexa_test.rb
|
213
276
|
- test/ahrefs_test.rb
|
277
|
+
- test/alexa_test.rb
|
278
|
+
- test/amazon_test.rb
|
214
279
|
- test/apple_test.rb
|
215
|
-
- test/oracle_test.rb
|
216
|
-
- test/google_test.rb
|
217
|
-
- test/petalbot_test.rb
|
218
280
|
- test/botmatch_test.rb
|
219
281
|
- test/facebook_test.rb
|
282
|
+
- test/google_test.rb
|
283
|
+
- test/legitbot/validators/domains_test.rb
|
284
|
+
- test/legitbot/validators/ip_ranges_test.rb
|
285
|
+
- test/legitbot_test.rb
|
286
|
+
- test/oracle_test.rb
|
287
|
+
- test/petalbot_test.rb
|
288
|
+
- test/pinterest_test.rb
|
220
289
|
- test/twitter_test.rb
|