legitbot 1.4.3 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 395c6363b221d300574d3a0bd0d324f882589ca44afc8db86f9f39e42c61a44c
4
- data.tar.gz: 58f7c5205cdc0e0cd138b00d244bca133c16424f1b16f7a99263c121d02ae5f9
3
+ metadata.gz: 90eab3a4d53cc388fac2db75058e11c549ec09b3acb3352a398c10e437c0f233
4
+ data.tar.gz: 5a252a30d37a44de60bb5b7baf1f264477211c22c0acc1477317f8281b7e9103
5
5
  SHA512:
6
- metadata.gz: df1f576284899afa5386cdea651a1cc811f9ddbf6cb683fdf85af71a829fedeae91995858e2453ed4743bd86105b3796a87cc2269f420e144f59faa5f982b646
7
- data.tar.gz: 7f06269a4c24d08f957e47551a85338986e004f817a2d68ddacbc0354919fdcaef2106478ed2fce32133665343628b0b8ba922d0958485ee20ca98808ca3dbff
6
+ metadata.gz: c56a32b2bccb2667f5c42c7580c31b2d79884331dfecf5f3cd01f1242ad2e53a5a4d1192a01f1f814014ef3da38702ad5b6041da3ba89d6b14cfbb5658550c47
7
+ data.tar.gz: c388aad0dec6a86a8d7a9ae23ae1e3200cd7eb3df421f74934b1a7582d9e63c4da1b66d9f28def1685f821c9ac2813b2d7fa38c4653a0a72301576c1cdddcf9b
data/.rubocop.yml CHANGED
@@ -8,3 +8,6 @@ Gemspec/RequiredRubyVersion:
8
8
 
9
9
  Naming/MemoizedInstanceVariableName:
10
10
  Enabled: false
11
+
12
+ Style/MapToHash:
13
+ Enabled: false
data/README.md CHANGED
@@ -39,10 +39,18 @@ Rack::Attack.blocklist 'fake search engines' do |request|
39
39
  end
40
40
  ```
41
41
 
42
+ ## Versioning
43
+
44
+ [Semantic versioning](https://semver.org/) with the following clarifications:
45
+
46
+ * MINOR version is incremented when support for new bots is added.
47
+ * PATCH version is incremented when validation logic for a bot changes (IP list updated, for example).
48
+
42
49
  ## Supported
43
50
 
44
51
  * [Ahrefs](https://ahrefs.com/robot)
45
52
  * [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
53
+ * [Amazon AdBot](https://adbot.amazon.com/index.html)
46
54
  * [Applebot](https://support.apple.com/en-us/HT204683)
47
55
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
48
56
  * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
@@ -59,7 +67,7 @@ end
59
67
 
60
68
  Apache 2.0
61
69
 
62
- ## References
70
+ ## Other projects
63
71
 
64
72
  * Play Framework variant in Scala: [play-legitbot](https://github.com/osinka/play-legitbot)
65
73
  * Article [When (Fake) Googlebots Attack Your Rails App](http://jessewolgamott.com/blog/2015/11/17/when-fake-googlebots-attack-your-rails-app/)
@@ -72,3 +80,5 @@ Apache 2.0
72
80
  classify IP as a search engine, but also label them as suspicious and
73
81
  reports the number of days since the last activity. My implementation of
74
82
  the protocol in Scala is [here](https://github.com/osinka/httpbl).
83
+ * [CIDRAM](https://github.com/CIDRAM/CIDRAM) is a PHP routing manager with built-in support
84
+ to validate bots.
data/legitbot.gemspec CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'English'
4
+
3
5
  $LOAD_PATH.push File.expand_path('lib', __dir__)
4
6
  require 'legitbot/version'
5
7
 
@@ -13,6 +15,9 @@ Gem::Specification.new do |spec|
13
15
  spec.homepage = 'https://github.com/alaz/legitbot'
14
16
  spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
15
17
  spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
18
+ spec.metadata = {
19
+ 'rubygems_mfa_required' => 'true'
20
+ }
16
21
 
17
22
  spec.required_ruby_version = '>= 2.5.0'
18
23
  spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
@@ -20,7 +25,7 @@ Gem::Specification.new do |spec|
20
25
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
26
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
27
  spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
- spec.add_development_dependency 'rubocop', '~> 1.20.0', '>= 1.20.0'
28
+ spec.add_development_dependency 'rubocop', '~> 1.24.0', '>= 1.24.0'
24
29
 
25
30
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
31
  spec.rdoc_options = ['--charset=UTF-8']
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://adbot.amazon.com/index.html
5
+ class Amazon < BotMatch
6
+ domains 'amazonadbot.com.'
7
+ end
8
+
9
+ rule Legitbot::Amazon, %w[AmazonAdBot]
10
+ end
@@ -5,16 +5,17 @@ module Legitbot # :nodoc:
5
5
  class DuckDuckGo < BotMatch
6
6
  ip_ranges %w[
7
7
  20.191.45.212
8
- 23.21.227.69
9
8
  40.88.21.235
10
- 50.16.241.113
11
- 50.16.241.114
12
- 50.16.241.117
13
- 50.16.247.234
14
- 52.5.190.19
15
- 52.204.97.54
16
- 54.197.234.188
17
- 54.208.100.253
9
+ 40.76.173.151
10
+ 40.76.163.7
11
+ 20.185.79.47
12
+ 52.142.26.175
13
+ 20.185.79.15
14
+ 52.142.24.149
15
+ 40.76.162.208
16
+ 40.76.163.23
17
+ 40.76.162.191
18
+ 40.76.162.247
18
19
  54.208.102.37
19
20
  107.21.1.8
20
21
  ]
@@ -18,5 +18,8 @@ module Legitbot # :nodoc:
18
18
  end
19
19
  end
20
20
 
21
- rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
21
+ rule Legitbot::Facebook, %w[
22
+ facebookexternalhit/1.1
23
+ facebookcatalog/1.0
24
+ ]
22
25
  end
@@ -7,5 +7,13 @@ module Legitbot # :nodoc:
7
7
  domains 'google.com.', 'googlebot.com.'
8
8
  end
9
9
 
10
- rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
10
+ rule Legitbot::Google, %w[
11
+ APIs-Google
12
+ AdsBot-Google-Mobile
13
+ AdsBot-Google
14
+ Googlebot
15
+ Mediapartners-Google
16
+ AdsBot-Google-Mobile-Apps
17
+ FeedFetcher-Google
18
+ ]
11
19
  end
@@ -7,6 +7,7 @@ module Legitbot # :nodoc:
7
7
  ip_ranges %w[
8
8
  199.16.156.0/22
9
9
  199.59.148.0/22
10
+ 192.133.76.0/22
10
11
  ]
11
12
  end
12
13
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.4.3'
4
+ VERSION = '1.5.1'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -5,6 +5,7 @@ require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
7
  require_relative 'legitbot/alexa'
8
+ require_relative 'legitbot/amazon'
8
9
  require_relative 'legitbot/apple'
9
10
  require_relative 'legitbot/baidu'
10
11
  require_relative 'legitbot/bing'
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class AmazonTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Amazon.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real AmazonAdBot IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '54.166.7.90'
15
+ match = Legitbot::Amazon.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid AmazonAdBot IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'AmazonAdBot detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid AmazonAdBot'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
31
+ '54.166.7.90'
32
+ )
33
+ assert bot, msg: 'AmazonAdBot detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid AmazonAdBot'
35
+ end
36
+
37
+ def test_valid_name
38
+ bot = Legitbot.bot(
39
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
40
+ '54.166.7.90'
41
+ )
42
+ assert_equal :amazon, bot.detected_as
43
+ end
44
+
45
+ def test_fake_name
46
+ bot = Legitbot.bot(
47
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
48
+ '81.1.172.108'
49
+ )
50
+ assert_equal :amazon, bot.detected_as
51
+ end
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.3
4
+ version: 1.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-10 00:00:00.000000000 Z
11
+ date: 2022-01-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -116,20 +116,20 @@ dependencies:
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: 1.20.0
119
+ version: 1.24.0
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 1.20.0
122
+ version: 1.24.0
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: 1.20.0
129
+ version: 1.24.0
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: 1.20.0
132
+ version: 1.24.0
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -150,6 +150,7 @@ files:
150
150
  - lib/legitbot.rb
151
151
  - lib/legitbot/ahrefs.rb
152
152
  - lib/legitbot/alexa.rb
153
+ - lib/legitbot/amazon.rb
153
154
  - lib/legitbot/apple.rb
154
155
  - lib/legitbot/baidu.rb
155
156
  - lib/legitbot/bing.rb
@@ -169,6 +170,7 @@ files:
169
170
  - lib/legitbot/yandex.rb
170
171
  - test/ahrefs_test.rb
171
172
  - test/alexa_test.rb
173
+ - test/amazon_test.rb
172
174
  - test/apple_test.rb
173
175
  - test/botmatch_test.rb
174
176
  - test/facebook_test.rb
@@ -183,7 +185,8 @@ files:
183
185
  homepage: https://github.com/alaz/legitbot
184
186
  licenses:
185
187
  - Apache-2.0
186
- metadata: {}
188
+ metadata:
189
+ rubygems_mfa_required: 'true'
187
190
  post_install_message:
188
191
  rdoc_options:
189
192
  - "--charset=UTF-8"
@@ -214,6 +217,7 @@ test_files:
214
217
  - test/apple_test.rb
215
218
  - test/oracle_test.rb
216
219
  - test/google_test.rb
220
+ - test/amazon_test.rb
217
221
  - test/petalbot_test.rb
218
222
  - test/botmatch_test.rb
219
223
  - test/facebook_test.rb