legitbot 1.4.5 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3eb1ad3043aa5d9c87f6417fa287121cc5ec37fe1fcb6b043f9e6ee9a88ec771
4
- data.tar.gz: 01764d6f90fd4f72751d18ea17d4f6921103ba2e050fe8f179d13d889d920347
3
+ metadata.gz: df2907ab34b159613adf21c3301607f53fa893e0823f81560e45ad2c8303075b
4
+ data.tar.gz: fe913e9b72878969839326da227c1169ad8234a8ae335e276f614b53e5777d42
5
5
  SHA512:
6
- metadata.gz: '0269f1dd308855bb4dd15e82601dba739b2f77bfb2777d802f4b813b0a73d31f052e3f75cbc26812589a3816aefe6d49c9b9ab20b59b229a346c4d8da27b46a5'
7
- data.tar.gz: 1e2f8e9e5e7ae7812df3016e6f0c6e45af9f4b60d67512f9678be1437ccbc8dcf55ae1fbeb404274d03b80c32e6bc8e7678b22654c089900b11f2681fc090b73
6
+ metadata.gz: f8af647da764cd722f27f2936f8f9728f9cdd7b313e295d9c80ebe7144a601a5360f5b615039d5afe75f7ded891e3bfc06292383e9e41e342977d39f1e485dab
7
+ data.tar.gz: 74e3dd5cb2c4d2e7df717c6bb3a072c74aa51b922f7f71d280cde82387ba458d30970f85c4f79508852ef5c2969196bd640427bf2323104229616960314fb9df
@@ -13,7 +13,7 @@ jobs:
13
13
  strategy:
14
14
  fail-fast: false
15
15
  matrix:
16
- ruby: [ jruby, 2.5, 2.6, 2.7 ]
16
+ ruby: [ jruby, 2.6, 2.7, 3.0 ]
17
17
 
18
18
  steps:
19
19
  - uses: actions/checkout@v2
@@ -41,7 +41,7 @@ jobs:
41
41
 
42
42
  strategy:
43
43
  matrix:
44
- ruby: [ 2.7 ]
44
+ ruby: [ 3.0 ]
45
45
 
46
46
  steps:
47
47
  - uses: actions/checkout@v2
data/.rubocop.yml CHANGED
@@ -1,3 +1,6 @@
1
+ require:
2
+ - rubocop-minitest
3
+
1
4
  AllCops:
2
5
  CacheRootDirectory: 'vendor'
3
6
  NewCops: enable
@@ -8,3 +11,6 @@ Gemspec/RequiredRubyVersion:
8
11
 
9
12
  Naming/MemoizedInstanceVariableName:
10
13
  Enabled: false
14
+
15
+ Style/MapToHash:
16
+ Enabled: false
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.7.3
1
+ 3.0.3
data/README.md CHANGED
@@ -39,10 +39,18 @@ Rack::Attack.blocklist 'fake search engines' do |request|
39
39
  end
40
40
  ```
41
41
 
42
+ ## Versioning
43
+
44
+ [Semantic versioning](https://semver.org/) with the following clarifications:
45
+
46
+ * MINOR version is incremented when support for new bots is added.
47
+ * PATCH version is incremented when validation logic for a bot changes (IP list updated, for example).
48
+
42
49
  ## Supported
43
50
 
44
51
  * [Ahrefs](https://ahrefs.com/robot)
45
52
  * [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
53
+ * [Amazon AdBot](https://adbot.amazon.com/index.html)
46
54
  * [Applebot](https://support.apple.com/en-us/HT204683)
47
55
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
48
56
  * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
@@ -59,7 +67,7 @@ end
59
67
 
60
68
  Apache 2.0
61
69
 
62
- ## References
70
+ ## Other projects
63
71
 
64
72
  * Play Framework variant in Scala: [play-legitbot](https://github.com/osinka/play-legitbot)
65
73
  * Article [When (Fake) Googlebots Attack Your Rails App](http://jessewolgamott.com/blog/2015/11/17/when-fake-googlebots-attack-your-rails-app/)
@@ -72,3 +80,5 @@ Apache 2.0
72
80
  classify IP as a search engine, but also label them as suspicious and
73
81
  reports the number of days since the last activity. My implementation of
74
82
  the protocol in Scala is [here](https://github.com/osinka/httpbl).
83
+ * [CIDRAM](https://github.com/CIDRAM/CIDRAM) is a PHP routing manager with built-in support
84
+ to validate bots.
data/Rakefile CHANGED
@@ -1,25 +1 @@
1
1
  # frozen_string_literal: true
2
-
3
- require 'rubygems'
4
- require 'bundler'
5
- require 'bump/tasks'
6
- require 'rake/testtask'
7
- Bundler::GemHelper.install_tasks
8
-
9
- Bump.tag_by_default = true
10
-
11
- Rake::TestTask.new do |t|
12
- t.libs << 'test'
13
- t.test_files = FileList['test/**/*_test.rb']
14
- t.warning = true
15
- t.verbose = true
16
- end
17
-
18
- desc 'Start a console'
19
- task :console do
20
- require 'irb'
21
- ARGV.clear
22
- IRB.start
23
- end
24
-
25
- task default: %w[test]
data/legitbot.gemspec CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'English'
4
+
3
5
  $LOAD_PATH.push File.expand_path('lib', __dir__)
4
6
  require 'legitbot/version'
5
7
 
@@ -13,16 +15,22 @@ Gem::Specification.new do |spec|
13
15
  spec.homepage = 'https://github.com/alaz/legitbot'
14
16
  spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
15
17
  spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
18
+ spec.metadata = {
19
+ 'rubygems_mfa_required' => 'true'
20
+ }
16
21
 
17
- spec.required_ruby_version = '>= 2.5.0'
18
- spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
22
+ spec.required_ruby_version = '>= 2.6.0'
23
+ spec.add_dependency 'fast_interval_tree', '~> 0.2', '>= 0.2.2'
19
24
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
20
25
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
26
+ spec.add_development_dependency 'dns_mock', '~> 1.5.0', '>= 1.5.0'
21
27
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
28
+ spec.add_development_dependency 'minitest-hooks', '~> 1.5', '>= 1.5.0'
22
29
  spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
- spec.add_development_dependency 'rubocop', '~> 1.22.0', '>= 1.22.0'
30
+ spec.add_development_dependency 'rubocop', '~> 1.24.0', '>= 1.24.0'
31
+ spec.add_development_dependency 'rubocop-minitest', '~> 0.17.0', '>= 0.17.0'
24
32
 
25
33
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
34
  spec.rdoc_options = ['--charset=UTF-8']
27
- spec.test_files = Dir.glob('test/**/*')
35
+ spec.test_files = Dir.glob('test/**/*').reject { |f| f.start_with? 'test/lib' }
28
36
  end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://adbot.amazon.com/index.html
5
+ class Amazon < BotMatch
6
+ domains 'amazonadbot.com.'
7
+ end
8
+
9
+ rule Legitbot::Amazon, %w[AmazonAdBot]
10
+ end
@@ -5,16 +5,17 @@ module Legitbot # :nodoc:
5
5
  class DuckDuckGo < BotMatch
6
6
  ip_ranges %w[
7
7
  20.191.45.212
8
- 23.21.227.69
9
8
  40.88.21.235
10
- 50.16.241.113
11
- 50.16.241.114
12
- 50.16.241.117
13
- 50.16.247.234
14
- 52.5.190.19
15
- 52.204.97.54
16
- 54.197.234.188
17
- 54.208.100.253
9
+ 40.76.173.151
10
+ 40.76.163.7
11
+ 20.185.79.47
12
+ 52.142.26.175
13
+ 20.185.79.15
14
+ 52.142.24.149
15
+ 40.76.162.208
16
+ 40.76.163.23
17
+ 40.76.162.191
18
+ 40.76.162.247
18
19
  54.208.102.37
19
20
  107.21.1.8
20
21
  ]
@@ -64,6 +64,8 @@ module Legitbot
64
64
  return nil if record.nil?
65
65
 
66
66
  resolver.getaddress(record.to_s).to_s
67
+ rescue Resolv::ResolvError
68
+ nil
67
69
  end
68
70
  end
69
71
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.4.5'
4
+ VERSION = '1.6.0'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -5,6 +5,7 @@ require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
7
  require_relative 'legitbot/alexa'
8
+ require_relative 'legitbot/amazon'
8
9
  require_relative 'legitbot/apple'
9
10
  require_relative 'legitbot/baidu'
10
11
  require_relative 'legitbot/bing'
data/rakelib/bump.rake ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bump/tasks'
4
+
5
+ Bump.tag_by_default = true
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler'
4
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ desc 'Start a console'
4
+ task :console do
5
+ require 'irb'
6
+ ARGV.clear
7
+ IRB.start
8
+ end
data/rakelib/test.rake ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ t.warning = true
9
+ t.verbose = true
10
+ end
11
+
12
+ task default: %w[test]
data/test/ahrefs_test.rb CHANGED
@@ -1,19 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'minitest/autorun'
4
+ require 'minitest/hooks/test'
5
+ require 'lib/dns_server_mock'
4
6
  require 'legitbot'
5
7
 
6
8
  class AhrefsTest < Minitest::Test
9
+ include Minitest::Hooks
10
+ include DnsServerMock
11
+
7
12
  def test_malicious_ip
8
13
  ip = '149.210.164.47'
9
14
  match = Legitbot::Ahrefs.new ip
10
- assert !match.valid?, msg: "#{ip} is not a real Ahrefs IP"
15
+ refute match.valid?
11
16
  end
12
17
 
13
18
  def test_valid_ip
14
19
  ip = '54.36.148.0'
15
20
  match = Legitbot::Ahrefs.new ip
16
- assert match.valid?, msg: "#{ip} is a valid Ahrefs IP"
21
+ assert match.valid?
17
22
  end
18
23
 
19
24
  def test_malicious_ua
@@ -21,8 +26,8 @@ class AhrefsTest < Minitest::Test
21
26
  'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
22
27
  '149.210.164.47'
23
28
  )
24
- assert bot, msg: 'Ahrefs detected from User-Agent'
25
- assert !bot.valid?, msg: 'Not a valid Ahrefs'
29
+ assert bot
30
+ refute bot.valid?
26
31
  end
27
32
 
28
33
  def test_valid_ua
@@ -30,7 +35,7 @@ class AhrefsTest < Minitest::Test
30
35
  'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
31
36
  '54.36.148.0'
32
37
  )
33
- assert bot, msg: 'Ahrefs detected from User-Agent'
34
- assert bot.valid?, msg: 'Valid Ahrefs'
38
+ assert bot
39
+ assert bot.valid?
35
40
  end
36
41
  end
data/test/alexa_test.rb CHANGED
@@ -1,19 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'minitest/autorun'
4
+ require 'minitest/hooks/test'
5
+ require 'lib/dns_server_mock'
4
6
  require 'legitbot'
5
7
 
6
8
  class AlexaTest < Minitest::Test
9
+ include Minitest::Hooks
10
+ include DnsServerMock
11
+
7
12
  def test_malicious_ip
8
13
  ip = '149.210.164.47'
9
14
  match = Legitbot::Alexa.new ip
10
- assert !match.valid?, msg: "#{ip} is not a real Alexa IP"
15
+ refute match.valid?
11
16
  end
12
17
 
13
18
  def test_valid_ip
14
19
  ip = '52.86.176.3'
15
20
  match = Legitbot::Alexa.new ip
16
- assert match.valid?, msg: "#{ip} is a valid Alexa IP"
21
+ assert match.valid?
17
22
  end
18
23
 
19
24
  def test_malicious_ua
@@ -21,8 +26,8 @@ class AlexaTest < Minitest::Test
21
26
  'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
22
27
  '149.210.164.47'
23
28
  )
24
- assert bot, msg: 'Alexa detected from User-Agent'
25
- assert !bot.valid?, msg: 'Not a valid Alexa'
29
+ assert bot
30
+ refute bot.valid?
26
31
  end
27
32
 
28
33
  def test_valid_ua
@@ -30,7 +35,7 @@ class AlexaTest < Minitest::Test
30
35
  'Mozilla/5.0 (compatible; Alexabot/1.0; +http://www.alexa.com/help/certifyscan; certifyscan@alexa.com)',
31
36
  '52.86.176.3'
32
37
  )
33
- assert bot, msg: 'Alexa detected from User-Agent'
34
- assert bot.valid?, msg: 'Valid Alexa'
38
+ assert bot
39
+ assert bot.valid?
35
40
  end
36
41
  end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'minitest/hooks/test'
5
+ require 'lib/dns_server_mock'
6
+ require 'legitbot'
7
+
8
+ class AmazonTest < Minitest::Test
9
+ include Minitest::Hooks
10
+ include DnsServerMock
11
+
12
+ def test_malicious_ip
13
+ ip = '149.210.164.47'
14
+ match = Legitbot::Amazon.new ip
15
+ refute match.valid?
16
+ end
17
+
18
+ def test_valid_ip
19
+ ip = '54.166.7.90'
20
+ match = Legitbot::Amazon.new ip
21
+ assert match.valid?
22
+ end
23
+
24
+ def test_malicious_ua
25
+ bot = Legitbot.bot(
26
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
27
+ '149.210.164.47'
28
+ )
29
+ assert bot
30
+ refute bot.valid?
31
+ end
32
+
33
+ def test_valid_ua
34
+ bot = Legitbot.bot(
35
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
36
+ '54.166.7.90'
37
+ )
38
+ assert bot
39
+ assert bot.valid?
40
+ end
41
+
42
+ def test_valid_name
43
+ bot = Legitbot.bot(
44
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
45
+ '54.166.7.90'
46
+ )
47
+ assert_equal :amazon, bot.detected_as
48
+ end
49
+
50
+ def test_fake_name
51
+ bot = Legitbot.bot(
52
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
53
+ '81.1.172.108'
54
+ )
55
+ assert_equal :amazon, bot.detected_as
56
+ end
57
+ end
data/test/apple_test.rb CHANGED
@@ -1,19 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'minitest/autorun'
4
+ require 'minitest/hooks/test'
5
+ require 'lib/dns_server_mock'
4
6
  require 'legitbot'
5
7
 
6
8
  class AppleTest < Minitest::Test
9
+ include Minitest::Hooks
10
+ include DnsServerMock
11
+
7
12
  def test_valid_ip
8
13
  ip = '17.58.98.60'
9
14
  match = Legitbot::Apple.new(ip)
10
- assert match.valid?, msg: "#{ip} is a valid Applebot IP"
15
+ assert match.valid?
11
16
  end
12
17
 
13
18
  def test_invalid_ip
14
19
  ip = '127.0.0.1'
15
20
  match = Legitbot::Apple.new(ip)
16
- assert match.fake?, msg: "#{ip} is a fake Applebot IP"
21
+ assert match.fake?
17
22
  end
18
23
 
19
24
  # rubocop:disable Layout/LineLength
@@ -23,7 +28,7 @@ class AppleTest < Minitest::Test
23
28
  '17.58.98.60'
24
29
  )
25
30
  assert_equal :apple, bot.detected_as
26
- assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
31
+ assert bot.valid?
27
32
  end
28
33
  # rubocop:enable Layout/LineLength
29
34
  end
@@ -1,11 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'minitest/autorun'
4
+ require 'minitest/hooks/test'
5
+ require 'lib/dns_server_mock'
4
6
  require 'legitbot'
5
7
 
6
8
  class BotMatchTest < Minitest::Test
9
+ include Minitest::Hooks
10
+ include DnsServerMock
11
+
7
12
  def test_valid_class_syntax
8
- assert Legitbot::Google.valid?('66.249.64.141'), msg: 'Valid Googlebot'
9
- assert Legitbot::Google.fake?('149.210.164.47'), msg: 'Fake Googlebot'
13
+ assert Legitbot::Google.valid?('66.249.64.141')
14
+ assert Legitbot::Google.fake?('149.210.164.47')
10
15
  end
11
16
  end
@@ -20,44 +20,48 @@ class FacebookTest < Minitest::Test
20
20
  def test_valid_ip
21
21
  ip = '69.63.186.89'
22
22
  match = Legitbot::Facebook.new(ip)
23
- assert match.valid?, msg: "#{ip} is a valid Facebook IP"
23
+ assert match.valid?
24
24
 
25
25
  ip = '69.171.251.1'
26
26
  match = Legitbot::Facebook.new(ip)
27
- assert match.valid?, msg: "#{ip} is a valid Facebook IP"
27
+ assert match.valid?
28
28
  end
29
29
 
30
30
  def test_invalid_ip
31
31
  ip = '127.0.0.1'
32
32
  match = Legitbot::Facebook.new(ip)
33
- assert match.fake?, msg: "#{ip} is a fake Facebook IP"
33
+ assert match.fake?
34
34
  end
35
35
 
36
- # rubocop:disable Layout/LineLength, Metrics/MethodLength
37
- def test_user_agent
36
+ def test_user_agent1
38
37
  Legitbot.bot(
39
38
  'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
40
39
  '31.13.76.56'
41
40
  ) do |bot|
42
41
  assert_equal :facebook, bot.detected_as
43
- assert bot.valid?, msg: 'true Facebook'
42
+ assert bot.valid?
44
43
  end
44
+ end
45
45
 
46
+ def test_user_agent2
46
47
  Legitbot.bot(
47
48
  'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',
48
49
  '173.252.87.8'
49
50
  ) do |bot|
50
51
  assert_equal :facebook, bot.detected_as
51
- assert bot.valid?, msg: 'true Facebook'
52
+ assert bot.valid?
52
53
  end
54
+ end
53
55
 
56
+ # rubocop:disable Layout/LineLength
57
+ def test_user_agent3
54
58
  Legitbot.bot(
55
59
  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0',
56
60
  '92.243.181.7'
57
61
  ) do |bot|
58
- assert %i[facebook twitter].include?(bot.detected_as)
59
- assert bot.fake?, msg: 'fake Facebook'
62
+ assert_includes %i[facebook twitter], bot.detected_as
63
+ assert bot.fake?
60
64
  end
61
65
  end
62
- # rubocop:enable Layout/LineLength, Metrics/MethodLength
66
+ # rubocop:enable Layout/LineLength
63
67
  end
data/test/google_test.rb CHANGED
@@ -1,19 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'minitest/autorun'
4
+ require 'minitest/hooks/test'
5
+ require 'lib/dns_server_mock'
4
6
  require 'legitbot'
5
7
 
6
8
  class GoogleTest < Minitest::Test
9
+ include Minitest::Hooks
10
+ include DnsServerMock
11
+
7
12
  def test_malicious_ip
8
13
  ip = '149.210.164.47'
9
14
  match = Legitbot::Google.new ip
10
- assert !match.valid?, msg: "#{ip} is not a real Googlebot IP"
15
+ refute match.valid?
11
16
  end
12
17
 
13
18
  def test_valid_ip
14
19
  ip = '66.249.64.141'
15
20
  match = Legitbot::Google.new ip
16
- assert match.valid?, msg: "#{ip} is a valid Googlebot IP"
21
+ assert match.valid?
17
22
  end
18
23
 
19
24
  def test_malicious_ua
@@ -21,8 +26,8 @@ class GoogleTest < Minitest::Test
21
26
  'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
22
27
  '149.210.164.47'
23
28
  )
24
- assert bot, msg: 'Googlebot detected from User-Agent'
25
- assert !bot.valid?, msg: 'Not a valid Googlebot'
29
+ assert bot
30
+ refute bot.valid?
26
31
  end
27
32
 
28
33
  def test_valid_ua
@@ -30,8 +35,8 @@ class GoogleTest < Minitest::Test
30
35
  'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
31
36
  '66.249.64.141'
32
37
  )
33
- assert bot, msg: 'Googlebot detected from User-Agent'
34
- assert bot.valid?, msg: 'Valid Googlebot'
38
+ assert bot
39
+ assert bot.valid?
35
40
  end
36
41
 
37
42
  def test_valid_name
@@ -4,6 +4,7 @@ require 'minitest/autorun'
4
4
  require 'legitbot'
5
5
 
6
6
  module Legitbot
7
+ # rubocop:disable Minitest/MultipleAssertions
7
8
  module Validators
8
9
  class NoRanges
9
10
  include IpRanges
@@ -128,4 +129,5 @@ module Legitbot
128
129
  end
129
130
  end
130
131
  end
132
+ # rubocop:enable Minitest/MultipleAssertions
131
133
  end
@@ -5,10 +5,8 @@ require 'legitbot'
5
5
 
6
6
  class LegitbotTest < Minitest::Test
7
7
  def test_rules
8
- assert !Legitbot.bot('Firefox', '127.0.0.1'),
9
- msg: 'Not a bot'
10
- assert Legitbot.bot('Googlebot', '5.140.70.64'),
11
- msg: 'No reverse resolve, bot'
8
+ refute Legitbot.bot('Firefox', '127.0.0.1')
9
+ assert Legitbot.bot('Googlebot', '5.140.70.64')
12
10
 
13
11
  Legitbot.bot('Firefox', '127.0.0.1') do |_bot|
14
12
  flunk 'No bot Firefox is possible'
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dns_mock'
4
+ require 'json'
5
+
6
+ TEST_DNS_RECORDS = {
7
+ # Malicious
8
+ '149.210.164.47' => {
9
+ ptr: %w[malicious.spam.co]
10
+ },
11
+
12
+ # Ahrefs
13
+ 'ip-54-36-148-0.a.ahrefs.com' => {
14
+ a: %w[54.36.148.0]
15
+ },
16
+ '54.36.148.0' => {
17
+ ptr: %w[ip-54-36-148-0.a.ahrefs.com]
18
+ },
19
+
20
+ # Alexa
21
+ '52.86.176.3' => {
22
+ ptr: %w[crawl-52-86-176-3.alexa.com]
23
+ },
24
+
25
+ # Amazon
26
+ 'crawler-54-166-7-90.amazonadbot.com' => {
27
+ a: %w[54.166.7.90]
28
+ },
29
+ '54.166.7.90' => {
30
+ ptr: %w[crawler-54-166-7-90.amazonadbot.com]
31
+ },
32
+
33
+ # Apple
34
+ '17-58-98-60.applebot.apple.com' => {
35
+ a: %w[17.58.98.60]
36
+ },
37
+ '17.58.98.60' => {
38
+ ptr: %w[17-58-98-60.applebot.apple.com]
39
+ },
40
+
41
+ # Google
42
+ 'crawl-66-249-64-141.googlebot.com' => {
43
+ a: %w[66.249.64.141]
44
+ },
45
+ '66.249.64.141' => {
46
+ ptr: %w[crawl-66-249-64-141.googlebot.com]
47
+ },
48
+
49
+ # Petalbot
50
+ 'petalbot-114-119-134-10.petalsearch.com' => {
51
+ a: %w[114.119.134.10]
52
+ },
53
+ '114.119.134.10' => {
54
+ ptr: %w[petalbot-114-119-134-10.petalsearch.com]
55
+ },
56
+
57
+ # Pinterest
58
+ 'crawl-54-236-1-11.pinterest.com' => {
59
+ a: %w[54.236.1.11]
60
+ },
61
+ '54.236.1.11' => {
62
+ ptr: %w[crawl-54-236-1-11.pinterest.com]
63
+ }
64
+ }.freeze
65
+
66
+ class DnsServer
67
+ class << self
68
+ attr_accessor :mock
69
+ end
70
+
71
+ @mock = DnsMock.start_server records: TEST_DNS_RECORDS
72
+ end
73
+
74
+ module DnsServerMock
75
+ def before_all
76
+ super
77
+
78
+ Legitbot.resolver_config = {
79
+ nameserver: 'localhost',
80
+ nameserver_port: [['localhost', DnsServer.mock.port]]
81
+ }
82
+ end
83
+
84
+ def after_all
85
+ Legitbot.resolver_config = nil
86
+
87
+ super
88
+ end
89
+ end
data/test/oracle_test.rb CHANGED
@@ -7,13 +7,13 @@ class OracleTest < Minitest::Test
7
7
  def test_malicious_ip
8
8
  ip = '149.210.164.47'
9
9
  match = Legitbot::Oracle.new ip
10
- assert !match.valid?, msg: "#{ip} is not a real Oracle IP"
10
+ refute match.valid?
11
11
  end
12
12
 
13
13
  def test_valid_ip
14
14
  ip = '148.64.56.64'
15
15
  match = Legitbot::Oracle.new ip
16
- assert match.valid?, msg: "#{ip} is a valid Oracle IP"
16
+ assert match.valid?
17
17
  end
18
18
 
19
19
  def test_malicious_ua
@@ -21,8 +21,8 @@ class OracleTest < Minitest::Test
21
21
  'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
22
22
  '149.210.164.47'
23
23
  )
24
- assert bot, msg: 'Oracle detected from User-Agent'
25
- assert !bot.valid?, msg: 'Not a valid Oracle'
24
+ assert bot
25
+ refute bot.valid?
26
26
  end
27
27
 
28
28
  def test_valid_ua
@@ -30,7 +30,7 @@ class OracleTest < Minitest::Test
30
30
  'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
31
31
  '148.64.56.64'
32
32
  )
33
- assert bot, msg: 'Oracle detected from User-Agent'
34
- assert bot.valid?, msg: 'Valid Oracle'
33
+ assert bot
34
+ assert bot.valid?
35
35
  end
36
36
  end
@@ -1,19 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'minitest/autorun'
4
+ require 'minitest/hooks/test'
5
+ require 'lib/dns_server_mock'
4
6
  require 'legitbot'
5
7
 
6
8
  class PetalbotTest < Minitest::Test
9
+ include Minitest::Hooks
10
+ include DnsServerMock
11
+
7
12
  def test_malicious_ip
8
13
  ip = '149.210.164.47'
9
14
  match = Legitbot::Petalbot.new ip
10
- assert !match.valid?, msg: "#{ip} is not a real Petalbot IP"
15
+ refute match.valid?
11
16
  end
12
17
 
13
18
  def test_valid_ip
14
- ip = '114.119.128.10'
19
+ ip = '114.119.134.10'
15
20
  match = Legitbot::Petalbot.new ip
16
- assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
21
+ assert match.valid?
17
22
  end
18
23
 
19
24
  def test_malicious_ua
@@ -21,17 +26,17 @@ class PetalbotTest < Minitest::Test
21
26
  'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
22
27
  '149.210.164.47'
23
28
  )
24
- assert bot, msg: 'Petalbot detected from User-Agent'
25
- assert !bot.valid?, msg: 'Not a valid Petalbot'
29
+ assert bot
30
+ refute bot.valid?
26
31
  end
27
32
 
28
33
  def test_valid_ua
29
34
  bot = Legitbot.bot(
30
35
  'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
31
- '114.119.128.10'
36
+ '114.119.134.10'
32
37
  )
33
- assert bot, msg: 'Petalbot detected from User-Agent'
34
- assert bot.valid?, msg: 'Valid Petalbot'
38
+ assert bot
39
+ assert bot.valid?
35
40
  end
36
41
 
37
42
  def test_valid_name
@@ -1,19 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'minitest/autorun'
4
+ require 'minitest/hooks/test'
5
+ require 'lib/dns_server_mock'
4
6
  require 'legitbot'
5
7
 
6
8
  class PinterestTest < Minitest::Test
9
+ include Minitest::Hooks
10
+ include DnsServerMock
11
+
7
12
  def test_malicious_ip
8
13
  ip = '149.210.164.47'
9
14
  match = Legitbot::Pinterest.new ip
10
- assert !match.valid?, msg: "#{ip} is not a real Pinterest IP"
15
+ refute match.valid?
11
16
  end
12
17
 
13
18
  def test_valid_ip
14
19
  ip = '54.236.1.11'
15
20
  match = Legitbot::Pinterest.new ip
16
- assert match.valid?, msg: "#{ip} is a valid Pinterest IP"
21
+ assert match.valid?
17
22
  end
18
23
 
19
24
  def test_malicious_ua
@@ -21,8 +26,8 @@ class PinterestTest < Minitest::Test
21
26
  'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)',
22
27
  '149.210.164.47'
23
28
  )
24
- assert bot, msg: 'Pinterest detected from User-Agent'
25
- assert !bot.valid?, msg: 'Not a valid Pinterest'
29
+ assert bot
30
+ refute bot.valid?
26
31
  end
27
32
 
28
33
  def test_valid_ua
@@ -30,8 +35,8 @@ class PinterestTest < Minitest::Test
30
35
  'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)',
31
36
  '54.236.1.11'
32
37
  )
33
- assert bot, msg: 'Pinterest detected from User-Agent'
34
- assert bot.valid?, msg: 'Valid Pinterest'
38
+ assert bot
39
+ assert bot.valid?
35
40
  end
36
41
 
37
42
  # rubocop:disable Layout/LineLength
data/test/twitter_test.rb CHANGED
@@ -7,13 +7,13 @@ class TwitterTest < Minitest::Test
7
7
  def test_malicious_ip
8
8
  ip = '149.210.164.47'
9
9
  match = Legitbot::Twitter.new ip
10
- assert !match.valid?, msg: "#{ip} is not a real Twitter IP"
10
+ refute match.valid?
11
11
  end
12
12
 
13
13
  def test_valid_ip
14
14
  ip = '199.16.156.125'
15
15
  match = Legitbot::Twitter.new ip
16
- assert match.valid?, msg: "#{ip} is a valid Twitter IP"
16
+ assert match.valid?
17
17
  end
18
18
 
19
19
  def test_malicious_ua
@@ -21,8 +21,8 @@ class TwitterTest < Minitest::Test
21
21
  'Twitterbot/1.0',
22
22
  '149.210.164.47'
23
23
  )
24
- assert bot, msg: 'Twitter detected from User-Agent'
25
- assert !bot.valid?, msg: 'Not a valid Twitter'
24
+ assert bot
25
+ refute bot.valid?
26
26
  end
27
27
 
28
28
  def test_valid_ua
@@ -30,7 +30,7 @@ class TwitterTest < Minitest::Test
30
30
  'Twitterbot/1.0',
31
31
  '199.16.156.125'
32
32
  )
33
- assert bot, msg: 'Twitter detected from User-Agent'
34
- assert bot.valid?, msg: 'Valid Twitter'
33
+ assert bot
34
+ assert bot.valid?
35
35
  end
36
36
  end
metadata CHANGED
@@ -1,35 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.5
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-01 00:00:00.000000000 Z
11
+ date: 2022-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: augmented_interval_tree
14
+ name: fast_interval_tree
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.1'
19
+ version: '0.2'
20
20
  - - ">="
21
21
  - !ruby/object:Gem::Version
22
- version: 0.1.1
22
+ version: 0.2.2
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
27
  - - "~>"
28
28
  - !ruby/object:Gem::Version
29
- version: '0.1'
29
+ version: '0.2'
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: 0.1.1
32
+ version: 0.2.2
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: irrc
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -70,6 +70,26 @@ dependencies:
70
70
  - - ">="
71
71
  - !ruby/object:Gem::Version
72
72
  version: 0.8.0
73
+ - !ruby/object:Gem::Dependency
74
+ name: dns_mock
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: 1.5.0
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.5.0
83
+ type: :development
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.5.0
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 1.5.0
73
93
  - !ruby/object:Gem::Dependency
74
94
  name: minitest
75
95
  requirement: !ruby/object:Gem::Requirement
@@ -90,6 +110,26 @@ dependencies:
90
110
  - - ">="
91
111
  - !ruby/object:Gem::Version
92
112
  version: 5.1.0
113
+ - !ruby/object:Gem::Dependency
114
+ name: minitest-hooks
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: '1.5'
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: 1.5.0
123
+ type: :development
124
+ prerelease: false
125
+ version_requirements: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - "~>"
128
+ - !ruby/object:Gem::Version
129
+ version: '1.5'
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: 1.5.0
93
133
  - !ruby/object:Gem::Dependency
94
134
  name: rake
95
135
  requirement: !ruby/object:Gem::Requirement
@@ -116,20 +156,40 @@ dependencies:
116
156
  requirements:
117
157
  - - "~>"
118
158
  - !ruby/object:Gem::Version
119
- version: 1.22.0
159
+ version: 1.24.0
120
160
  - - ">="
121
161
  - !ruby/object:Gem::Version
122
- version: 1.22.0
162
+ version: 1.24.0
123
163
  type: :development
124
164
  prerelease: false
125
165
  version_requirements: !ruby/object:Gem::Requirement
126
166
  requirements:
127
167
  - - "~>"
128
168
  - !ruby/object:Gem::Version
129
- version: 1.22.0
169
+ version: 1.24.0
130
170
  - - ">="
131
171
  - !ruby/object:Gem::Version
132
- version: 1.22.0
172
+ version: 1.24.0
173
+ - !ruby/object:Gem::Dependency
174
+ name: rubocop-minitest
175
+ requirement: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - "~>"
178
+ - !ruby/object:Gem::Version
179
+ version: 0.17.0
180
+ - - ">="
181
+ - !ruby/object:Gem::Version
182
+ version: 0.17.0
183
+ type: :development
184
+ prerelease: false
185
+ version_requirements: !ruby/object:Gem::Requirement
186
+ requirements:
187
+ - - "~>"
188
+ - !ruby/object:Gem::Version
189
+ version: 0.17.0
190
+ - - ">="
191
+ - !ruby/object:Gem::Version
192
+ version: 0.17.0
133
193
  description: Does Web request come from a real search engine or from an impersonating
134
194
  agent?
135
195
  email: self@alaz.me
@@ -150,6 +210,7 @@ files:
150
210
  - lib/legitbot.rb
151
211
  - lib/legitbot/ahrefs.rb
152
212
  - lib/legitbot/alexa.rb
213
+ - lib/legitbot/amazon.rb
153
214
  - lib/legitbot/apple.rb
154
215
  - lib/legitbot/baidu.rb
155
216
  - lib/legitbot/bing.rb
@@ -167,8 +228,13 @@ files:
167
228
  - lib/legitbot/validators/ip_ranges.rb
168
229
  - lib/legitbot/version.rb
169
230
  - lib/legitbot/yandex.rb
231
+ - rakelib/bump.rake
232
+ - rakelib/bundler.rake
233
+ - rakelib/console.rake
234
+ - rakelib/test.rake
170
235
  - test/ahrefs_test.rb
171
236
  - test/alexa_test.rb
237
+ - test/amazon_test.rb
172
238
  - test/apple_test.rb
173
239
  - test/botmatch_test.rb
174
240
  - test/facebook_test.rb
@@ -176,6 +242,7 @@ files:
176
242
  - test/legitbot/validators/domains_test.rb
177
243
  - test/legitbot/validators/ip_ranges_test.rb
178
244
  - test/legitbot_test.rb
245
+ - test/lib/dns_server_mock.rb
179
246
  - test/oracle_test.rb
180
247
  - test/petalbot_test.rb
181
248
  - test/pinterest_test.rb
@@ -183,7 +250,8 @@ files:
183
250
  homepage: https://github.com/alaz/legitbot
184
251
  licenses:
185
252
  - Apache-2.0
186
- metadata: {}
253
+ metadata:
254
+ rubygems_mfa_required: 'true'
187
255
  post_install_message:
188
256
  rdoc_options:
189
257
  - "--charset=UTF-8"
@@ -193,28 +261,29 @@ required_ruby_version: !ruby/object:Gem::Requirement
193
261
  requirements:
194
262
  - - ">="
195
263
  - !ruby/object:Gem::Version
196
- version: 2.5.0
264
+ version: 2.6.0
197
265
  required_rubygems_version: !ruby/object:Gem::Requirement
198
266
  requirements:
199
267
  - - ">="
200
268
  - !ruby/object:Gem::Version
201
269
  version: '0'
202
270
  requirements: []
203
- rubygems_version: 3.1.6
271
+ rubygems_version: 3.2.32
204
272
  signing_key:
205
273
  specification_version: 4
206
274
  summary: 'Validate requests from Web crawlers: impersonating or not?'
207
275
  test_files:
208
- - test/legitbot_test.rb
209
- - test/legitbot/validators/domains_test.rb
210
- - test/legitbot/validators/ip_ranges_test.rb
211
- - test/pinterest_test.rb
212
- - test/alexa_test.rb
213
276
  - test/ahrefs_test.rb
277
+ - test/alexa_test.rb
278
+ - test/amazon_test.rb
214
279
  - test/apple_test.rb
215
- - test/oracle_test.rb
216
- - test/google_test.rb
217
- - test/petalbot_test.rb
218
280
  - test/botmatch_test.rb
219
281
  - test/facebook_test.rb
282
+ - test/google_test.rb
283
+ - test/legitbot/validators/domains_test.rb
284
+ - test/legitbot/validators/ip_ranges_test.rb
285
+ - test/legitbot_test.rb
286
+ - test/oracle_test.rb
287
+ - test/petalbot_test.rb
288
+ - test/pinterest_test.rb
220
289
  - test/twitter_test.rb