legitbot 1.4.3 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 395c6363b221d300574d3a0bd0d324f882589ca44afc8db86f9f39e42c61a44c
4
- data.tar.gz: 58f7c5205cdc0e0cd138b00d244bca133c16424f1b16f7a99263c121d02ae5f9
3
+ metadata.gz: 90eab3a4d53cc388fac2db75058e11c549ec09b3acb3352a398c10e437c0f233
4
+ data.tar.gz: 5a252a30d37a44de60bb5b7baf1f264477211c22c0acc1477317f8281b7e9103
5
5
  SHA512:
6
- metadata.gz: df1f576284899afa5386cdea651a1cc811f9ddbf6cb683fdf85af71a829fedeae91995858e2453ed4743bd86105b3796a87cc2269f420e144f59faa5f982b646
7
- data.tar.gz: 7f06269a4c24d08f957e47551a85338986e004f817a2d68ddacbc0354919fdcaef2106478ed2fce32133665343628b0b8ba922d0958485ee20ca98808ca3dbff
6
+ metadata.gz: c56a32b2bccb2667f5c42c7580c31b2d79884331dfecf5f3cd01f1242ad2e53a5a4d1192a01f1f814014ef3da38702ad5b6041da3ba89d6b14cfbb5658550c47
7
+ data.tar.gz: c388aad0dec6a86a8d7a9ae23ae1e3200cd7eb3df421f74934b1a7582d9e63c4da1b66d9f28def1685f821c9ac2813b2d7fa38c4653a0a72301576c1cdddcf9b
data/.rubocop.yml CHANGED
@@ -8,3 +8,6 @@ Gemspec/RequiredRubyVersion:
8
8
 
9
9
  Naming/MemoizedInstanceVariableName:
10
10
  Enabled: false
11
+
12
+ Style/MapToHash:
13
+ Enabled: false
data/README.md CHANGED
@@ -39,10 +39,18 @@ Rack::Attack.blocklist 'fake search engines' do |request|
39
39
  end
40
40
  ```
41
41
 
42
+ ## Versioning
43
+
44
+ [Semantic versioning](https://semver.org/) with the following clarifications:
45
+
46
+ * MINOR version is incremented when support for new bots is added.
47
+ * PATCH version is incremented when validation logic for a bot changes (IP list updated, for example).
48
+
42
49
  ## Supported
43
50
 
44
51
  * [Ahrefs](https://ahrefs.com/robot)
45
52
  * [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
53
+ * [Amazon AdBot](https://adbot.amazon.com/index.html)
46
54
  * [Applebot](https://support.apple.com/en-us/HT204683)
47
55
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
48
56
  * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
@@ -59,7 +67,7 @@ end
59
67
 
60
68
  Apache 2.0
61
69
 
62
- ## References
70
+ ## Other projects
63
71
 
64
72
  * Play Framework variant in Scala: [play-legitbot](https://github.com/osinka/play-legitbot)
65
73
  * Article [When (Fake) Googlebots Attack Your Rails App](http://jessewolgamott.com/blog/2015/11/17/when-fake-googlebots-attack-your-rails-app/)
@@ -72,3 +80,5 @@ Apache 2.0
72
80
  classify IP as a search engine, but also label them as suspicious and
73
81
  reports the number of days since the last activity. My implementation of
74
82
  the protocol in Scala is [here](https://github.com/osinka/httpbl).
83
+ * [CIDRAM](https://github.com/CIDRAM/CIDRAM) is a PHP routing manager with built-in support
84
+ to validate bots.
data/legitbot.gemspec CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'English'
4
+
3
5
  $LOAD_PATH.push File.expand_path('lib', __dir__)
4
6
  require 'legitbot/version'
5
7
 
@@ -13,6 +15,9 @@ Gem::Specification.new do |spec|
13
15
  spec.homepage = 'https://github.com/alaz/legitbot'
14
16
  spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
15
17
  spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
18
+ spec.metadata = {
19
+ 'rubygems_mfa_required' => 'true'
20
+ }
16
21
 
17
22
  spec.required_ruby_version = '>= 2.5.0'
18
23
  spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
@@ -20,7 +25,7 @@ Gem::Specification.new do |spec|
20
25
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
26
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
27
  spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
- spec.add_development_dependency 'rubocop', '~> 1.20.0', '>= 1.20.0'
28
+ spec.add_development_dependency 'rubocop', '~> 1.24.0', '>= 1.24.0'
24
29
 
25
30
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
31
  spec.rdoc_options = ['--charset=UTF-8']
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://adbot.amazon.com/index.html
5
+ class Amazon < BotMatch
6
+ domains 'amazonadbot.com.'
7
+ end
8
+
9
+ rule Legitbot::Amazon, %w[AmazonAdBot]
10
+ end
@@ -5,16 +5,17 @@ module Legitbot # :nodoc:
5
5
  class DuckDuckGo < BotMatch
6
6
  ip_ranges %w[
7
7
  20.191.45.212
8
- 23.21.227.69
9
8
  40.88.21.235
10
- 50.16.241.113
11
- 50.16.241.114
12
- 50.16.241.117
13
- 50.16.247.234
14
- 52.5.190.19
15
- 52.204.97.54
16
- 54.197.234.188
17
- 54.208.100.253
9
+ 40.76.173.151
10
+ 40.76.163.7
11
+ 20.185.79.47
12
+ 52.142.26.175
13
+ 20.185.79.15
14
+ 52.142.24.149
15
+ 40.76.162.208
16
+ 40.76.163.23
17
+ 40.76.162.191
18
+ 40.76.162.247
18
19
  54.208.102.37
19
20
  107.21.1.8
20
21
  ]
@@ -18,5 +18,8 @@ module Legitbot # :nodoc:
18
18
  end
19
19
  end
20
20
 
21
- rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
21
+ rule Legitbot::Facebook, %w[
22
+ facebookexternalhit/1.1
23
+ facebookcatalog/1.0
24
+ ]
22
25
  end
@@ -7,5 +7,13 @@ module Legitbot # :nodoc:
7
7
  domains 'google.com.', 'googlebot.com.'
8
8
  end
9
9
 
10
- rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
10
+ rule Legitbot::Google, %w[
11
+ APIs-Google
12
+ AdsBot-Google-Mobile
13
+ AdsBot-Google
14
+ Googlebot
15
+ Mediapartners-Google
16
+ AdsBot-Google-Mobile-Apps
17
+ FeedFetcher-Google
18
+ ]
11
19
  end
@@ -7,6 +7,7 @@ module Legitbot # :nodoc:
7
7
  ip_ranges %w[
8
8
  199.16.156.0/22
9
9
  199.59.148.0/22
10
+ 192.133.76.0/22
10
11
  ]
11
12
  end
12
13
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.4.3'
4
+ VERSION = '1.5.1'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -5,6 +5,7 @@ require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
7
  require_relative 'legitbot/alexa'
8
+ require_relative 'legitbot/amazon'
8
9
  require_relative 'legitbot/apple'
9
10
  require_relative 'legitbot/baidu'
10
11
  require_relative 'legitbot/bing'
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class AmazonTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Amazon.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real AmazonAdBot IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '54.166.7.90'
15
+ match = Legitbot::Amazon.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid AmazonAdBot IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'AmazonAdBot detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid AmazonAdBot'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
31
+ '54.166.7.90'
32
+ )
33
+ assert bot, msg: 'AmazonAdBot detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid AmazonAdBot'
35
+ end
36
+
37
+ def test_valid_name
38
+ bot = Legitbot.bot(
39
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
40
+ '54.166.7.90'
41
+ )
42
+ assert_equal :amazon, bot.detected_as
43
+ end
44
+
45
+ def test_fake_name
46
+ bot = Legitbot.bot(
47
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
48
+ '81.1.172.108'
49
+ )
50
+ assert_equal :amazon, bot.detected_as
51
+ end
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.3
4
+ version: 1.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-10 00:00:00.000000000 Z
11
+ date: 2022-01-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -116,20 +116,20 @@ dependencies:
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: 1.20.0
119
+ version: 1.24.0
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 1.20.0
122
+ version: 1.24.0
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: 1.20.0
129
+ version: 1.24.0
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: 1.20.0
132
+ version: 1.24.0
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -150,6 +150,7 @@ files:
150
150
  - lib/legitbot.rb
151
151
  - lib/legitbot/ahrefs.rb
152
152
  - lib/legitbot/alexa.rb
153
+ - lib/legitbot/amazon.rb
153
154
  - lib/legitbot/apple.rb
154
155
  - lib/legitbot/baidu.rb
155
156
  - lib/legitbot/bing.rb
@@ -169,6 +170,7 @@ files:
169
170
  - lib/legitbot/yandex.rb
170
171
  - test/ahrefs_test.rb
171
172
  - test/alexa_test.rb
173
+ - test/amazon_test.rb
172
174
  - test/apple_test.rb
173
175
  - test/botmatch_test.rb
174
176
  - test/facebook_test.rb
@@ -183,7 +185,8 @@ files:
183
185
  homepage: https://github.com/alaz/legitbot
184
186
  licenses:
185
187
  - Apache-2.0
186
- metadata: {}
188
+ metadata:
189
+ rubygems_mfa_required: 'true'
187
190
  post_install_message:
188
191
  rdoc_options:
189
192
  - "--charset=UTF-8"
@@ -214,6 +217,7 @@ test_files:
214
217
  - test/apple_test.rb
215
218
  - test/oracle_test.rb
216
219
  - test/google_test.rb
220
+ - test/amazon_test.rb
217
221
  - test/petalbot_test.rb
218
222
  - test/botmatch_test.rb
219
223
  - test/facebook_test.rb