legitbot 1.4.2 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 48fa3cd6d810e32c24c3027b57879438c658b64bdf2dfae3fbbcfd5ac67fb790
4
- data.tar.gz: f8b77b3571978137339b06e633c9725ee8d1f4f6793bcdae8321a16cb9b37dfd
3
+ metadata.gz: 8d80bbf2deb17b90d29f6d19b5405b63e52911e5fadf05a2b6c10a7a8772afa6
4
+ data.tar.gz: fcac15d55a5f59d8076eeab89b67c07466c15f646b9205d8fdc49b29f55084b4
5
5
  SHA512:
6
- metadata.gz: ff62eb5208ee4e565504ada36c3d94129171f2487c5cf61e3cfbc5388a189ab5e5645537d311a01376e16abd486d69c465d148d6a4ebf35f5cf5d4c51a212bd8
7
- data.tar.gz: c3a9d54b1add735629ac5caaf744c8bb75d2d9fa88fe5e793bd351e860cf00ef39cb084eb3bb048fe34b1ab9d95dec66eb08b075b9ee0bd7657e0160873f24bb
6
+ metadata.gz: d0e367f8e02357527a46112cfd9acdc3448fc99320d6c0a117af81b8bd17a58c997a678747f1bc137fe7772b2c2e80fbf1fb4bb39cbdf52d6e53a4f4ce7dc1b7
7
+ data.tar.gz: '019f70211891e34b1290565e006ca8e58b25db9303db94a274ca8697871420f22983642f125e38faab02a4df9984bd097e0e61ee88adf0ce70940334d70232c4'
data/README.md CHANGED
@@ -39,10 +39,18 @@ Rack::Attack.blocklist 'fake search engines' do |request|
39
39
  end
40
40
  ```
41
41
 
42
+ ## Versioning
43
+
44
+ [Semantic versioning](https://semver.org/) with the following clarifications:
45
+
46
+ * MINOR version is incremented when support for new bots is added.
47
+ * PATCH version is incremented when validation logic for a bot changes (IP list updated, for example).
48
+
42
49
  ## Supported
43
50
 
44
51
  * [Ahrefs](https://ahrefs.com/robot)
45
52
  * [Alexa](https://support.alexa.com/hc/en-us/articles/360046707834-What-are-the-IP-addresses-for-Alexa-s-Certify-and-Site-Audit-crawlers-)
53
+ * [Amazon AdBot](https://adbot.amazon.com/index.html)
46
54
  * [Applebot](https://support.apple.com/en-us/HT204683)
47
55
  * [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
48
56
  * [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
@@ -50,10 +58,10 @@ end
50
58
  * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
51
59
  * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
52
60
  * [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
61
+ * [Petal search engine](http://aspiegel.com/petalbot)
53
62
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
54
63
  * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
55
64
  * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
56
- * [Petal search engine](http://aspiegel.com/petalbot)
57
65
 
58
66
  ## License
59
67
 
data/legitbot.gemspec CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
21
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
22
  spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
- spec.add_development_dependency 'rubocop', '~> 1.18.0', '>= 1.18.0'
23
+ spec.add_development_dependency 'rubocop', '~> 1.22.0', '>= 1.22.0'
24
24
 
25
25
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
26
  spec.rdoc_options = ['--charset=UTF-8']
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://adbot.amazon.com/index.html
5
+ class Amazon < BotMatch
6
+ domains 'amazonadbot.com.'
7
+ end
8
+
9
+ rule Legitbot::Amazon, %w[AmazonAdBot]
10
+ end
@@ -18,5 +18,8 @@ module Legitbot # :nodoc:
18
18
  end
19
19
  end
20
20
 
21
- rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
21
+ rule Legitbot::Facebook, %w[
22
+ facebookexternalhit/1.1
23
+ facebookcatalog/1.0
24
+ ]
22
25
  end
@@ -7,5 +7,13 @@ module Legitbot # :nodoc:
7
7
  domains 'google.com.', 'googlebot.com.'
8
8
  end
9
9
 
10
- rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
10
+ rule Legitbot::Google, %w[
11
+ APIs-Google
12
+ AdsBot-Google-Mobile
13
+ AdsBot-Google
14
+ Googlebot
15
+ Mediapartners-Google
16
+ AdsBot-Google-Mobile-Apps
17
+ FeedFetcher-Google
18
+ ]
11
19
  end
@@ -3,7 +3,31 @@
3
3
  module Legitbot # :nodoc:
4
4
  # https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html
5
5
  class Oracle < BotMatch
6
- ip_ranges '148.64.56.0/24'
6
+ ip_ranges %w[
7
+ 132.145.9.5
8
+ 132.145.11.125
9
+ 132.145.14.70
10
+ 132.145.15.209
11
+ 132.145.64.33
12
+ 132.145.66.116
13
+ 132.145.66.156
14
+ 132.145.67.248
15
+ 140.238.81.78
16
+ 140.238.83.181
17
+ 140.238.94.137
18
+ 140.238.95.47
19
+ 140.238.95.199
20
+ 152.67.128.219
21
+ 152.67.137.35
22
+ 152.67.138.180
23
+
24
+ 148.64.56.64/28
25
+ 148.64.56.79
26
+ 148.64.56.80
27
+ 148.64.56.112/28
28
+ 148.64.56.127
29
+ 148.64.56.128
30
+ ]
7
31
  end
8
32
 
9
33
  rule Legitbot::Oracle, %w[GrapeshotCrawler]
@@ -7,6 +7,7 @@ module Legitbot # :nodoc:
7
7
  ip_ranges %w[
8
8
  199.16.156.0/22
9
9
  199.59.148.0/22
10
+ 192.133.76.0/22
10
11
  ]
11
12
  end
12
13
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.4.2'
4
+ VERSION = '1.5.0'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -5,6 +5,7 @@ require_relative 'legitbot/botmatch'
5
5
 
6
6
  require_relative 'legitbot/ahrefs'
7
7
  require_relative 'legitbot/alexa'
8
+ require_relative 'legitbot/amazon'
8
9
  require_relative 'legitbot/apple'
9
10
  require_relative 'legitbot/baidu'
10
11
  require_relative 'legitbot/bing'
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class AmazonTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Amazon.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real AmazonAdBot IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '54.166.7.90'
15
+ match = Legitbot::Amazon.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid AmazonAdBot IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'AmazonAdBot detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid AmazonAdBot'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
31
+ '54.166.7.90'
32
+ )
33
+ assert bot, msg: 'AmazonAdBot detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid AmazonAdBot'
35
+ end
36
+
37
+ def test_valid_name
38
+ bot = Legitbot.bot(
39
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
40
+ '54.166.7.90'
41
+ )
42
+ assert_equal :amazon, bot.detected_as
43
+ end
44
+
45
+ def test_fake_name
46
+ bot = Legitbot.bot(
47
+ 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
48
+ '81.1.172.108'
49
+ )
50
+ assert_equal :amazon, bot.detected_as
51
+ end
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.2
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-10 00:00:00.000000000 Z
11
+ date: 2021-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -116,20 +116,20 @@ dependencies:
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: 1.18.0
119
+ version: 1.22.0
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 1.18.0
122
+ version: 1.22.0
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: 1.18.0
129
+ version: 1.22.0
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: 1.18.0
132
+ version: 1.22.0
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -150,6 +150,7 @@ files:
150
150
  - lib/legitbot.rb
151
151
  - lib/legitbot/ahrefs.rb
152
152
  - lib/legitbot/alexa.rb
153
+ - lib/legitbot/amazon.rb
153
154
  - lib/legitbot/apple.rb
154
155
  - lib/legitbot/baidu.rb
155
156
  - lib/legitbot/bing.rb
@@ -169,6 +170,7 @@ files:
169
170
  - lib/legitbot/yandex.rb
170
171
  - test/ahrefs_test.rb
171
172
  - test/alexa_test.rb
173
+ - test/amazon_test.rb
172
174
  - test/apple_test.rb
173
175
  - test/botmatch_test.rb
174
176
  - test/facebook_test.rb
@@ -214,6 +216,7 @@ test_files:
214
216
  - test/apple_test.rb
215
217
  - test/oracle_test.rb
216
218
  - test/google_test.rb
219
+ - test/amazon_test.rb
217
220
  - test/petalbot_test.rb
218
221
  - test/botmatch_test.rb
219
222
  - test/facebook_test.rb