legitbot 1.4.0 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 783bc47b1bc8f80e5d4818f150663a2cb6fb90e5dd2378bae456eb2fb1d78823
4
- data.tar.gz: 0e274e1eaa295ffa0e00a8d6e2884194ed53108da9ecb75c3ef2ef4746048c9e
3
+ metadata.gz: fcbd5f9d7bbd29804b84b01d95765a0e950c3c1d7c42c78b8d2ba4e0c98ca679
4
+ data.tar.gz: 3f69f1ebc410d1d6db1c20e0ff489ba552ba34ffad3bbe9b55f7e69377b72f52
5
5
  SHA512:
6
- metadata.gz: ec7c2f2cb78cc8a586a35bea72e39b12835679089f4b4d5cba754aa0caccee0c9e8c1c94461a7fd8fcb1cf4f8195f196346ea4abe7bd96a080c5c831f9d68c7a
7
- data.tar.gz: a92ea8ff049127871988e8dbad7cb76b3c53f13dde97a157509e9013898c1da2cc3e573ae695d9af2e82372bf478348aeca6ec77a673798b52ac1e3ab623e214
6
+ metadata.gz: 998f9f6f722b9eb9ba57774ce3525ca139789321d0cdd5e024f954ef13d002ac839aca8de53917dda8c3fcea08d23add43ff1dd856cf54b8d68d57cefc26bb42
7
+ data.tar.gz: 3a539ba08618ecbfe3d93388b0046c0331654f87bf72eef23fca1594bedf8cdad35aa8eca91121f675f6e39e60b020bec5a1d19dee55c245e4e9b20fafee4ee8
data/README.md CHANGED
@@ -1,13 +1,11 @@
1
1
  # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) ![](https://badge.fury.io/rb/legitbot.svg)
2
2
 
3
- Ruby gem to check that an IP belongs to a bot, typically a search
4
- engine. This can be of help in protecting a web site from fake search
5
- engines.
3
+ Ruby gem to make sure that an IP really belongs to a bot, typically a search
4
+ engine.
6
5
 
7
6
  ## Usage
8
7
 
9
- Suppose you have a Web request and you'd like to make sure it's not from a fake
10
- search engine:
8
+ Suppose you have a Web request and you would like to check it is not diguised:
11
9
 
12
10
  ```ruby
13
11
  bot = Legitbot.bot(userAgent, ip)
@@ -22,7 +20,7 @@ bot.valid? # => true
22
20
  bot.fake? # => false
23
21
  ```
24
22
 
25
- Sometimes you already know what search engine to expect. For example, you might
23
+ Sometimes you already know which search engine to expect. For example, you might
26
24
  be using [rack-attack](https://github.com/kickstarter/rack-attack):
27
25
 
28
26
  ```ruby
@@ -31,8 +29,8 @@ Rack::Attack.blocklist("fake Googlebot") do |req|
31
29
  end
32
30
  ```
33
31
 
34
- Or if you do not like all these nasty crawlers stealing your content or
35
- maybe evaluating it and getting ready to invade your site with spammers,
32
+ Or if you do not like all those ghoulish crawlers stealing your
33
+ content, evaluating it and getting ready to invade your site with spammers,
36
34
  then block them all:
37
35
 
38
36
  ```ruby
@@ -52,6 +50,7 @@ end
52
50
  * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
53
51
  * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
54
52
  * [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
53
+ * [Petal search engine](http://aspiegel.com/petalbot)
55
54
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
56
55
  * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
57
56
  * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
data/legitbot.gemspec CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
21
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
22
  spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
- spec.add_development_dependency 'rubocop', '~> 1.15.0', '>= 1.15.0'
23
+ spec.add_development_dependency 'rubocop', '~> 1.20.0', '>= 1.20.0'
24
24
 
25
25
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
26
  spec.rdoc_options = ['--charset=UTF-8']
@@ -4,14 +4,15 @@ module Legitbot # :nodoc:
4
4
  # https://duckduckgo.com/duckduckbot
5
5
  class DuckDuckGo < BotMatch
6
6
  ip_ranges %w[
7
+ 20.191.45.212
7
8
  23.21.227.69
8
9
  40.88.21.235
9
10
  50.16.241.113
10
11
  50.16.241.114
11
12
  50.16.241.117
12
13
  50.16.247.234
13
- 52.204.97.54
14
14
  52.5.190.19
15
+ 52.204.97.54
15
16
  54.197.234.188
16
17
  54.208.100.253
17
18
  54.208.102.37
@@ -18,5 +18,8 @@ module Legitbot # :nodoc:
18
18
  end
19
19
  end
20
20
 
21
- rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
21
+ rule Legitbot::Facebook, %w[
22
+ facebookexternalhit/1.1
23
+ facebookcatalog/1.0
24
+ ]
22
25
  end
@@ -3,7 +3,31 @@
3
3
  module Legitbot # :nodoc:
4
4
  # https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html
5
5
  class Oracle < BotMatch
6
- ip_ranges '148.64.56.0/24'
6
+ ip_ranges %w[
7
+ 132.145.9.5
8
+ 132.145.11.125
9
+ 132.145.14.70
10
+ 132.145.15.209
11
+ 132.145.64.33
12
+ 132.145.66.116
13
+ 132.145.66.156
14
+ 132.145.67.248
15
+ 140.238.81.78
16
+ 140.238.83.181
17
+ 140.238.94.137
18
+ 140.238.95.47
19
+ 140.238.95.199
20
+ 152.67.128.219
21
+ 152.67.137.35
22
+ 152.67.138.180
23
+
24
+ 148.64.56.64/28
25
+ 148.64.56.79
26
+ 148.64.56.80
27
+ 148.64.56.112/28
28
+ 148.64.56.127
29
+ 148.64.56.128
30
+ ]
7
31
  end
8
32
 
9
33
  rule Legitbot::Oracle, %w[GrapeshotCrawler]
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # http://aspiegel.com/petalbot
5
+ # https://webmaster.petalsearch.com/site/petalbot
6
+ class Petalbot < BotMatch
7
+ domains 'petalsearch.com.'
8
+ end
9
+
10
+ rule Legitbot::Petalbot, %w[PetalBot]
11
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.4.0'
4
+ VERSION = '1.4.4'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -15,3 +15,4 @@ require_relative 'legitbot/oracle'
15
15
  require_relative 'legitbot/pinterest'
16
16
  require_relative 'legitbot/twitter'
17
17
  require_relative 'legitbot/yandex'
18
+ require_relative 'legitbot/petalbot'
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class PetalbotTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Petalbot.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real Petalbot IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '114.119.128.10'
15
+ match = Legitbot::Petalbot.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Petalbot detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Petalbot'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
31
+ '114.119.128.10'
32
+ )
33
+ assert bot, msg: 'Petalbot detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Petalbot'
35
+ end
36
+
37
+ def test_valid_name
38
+ bot = Legitbot.bot(
39
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
40
+ '66.249.64.141'
41
+ )
42
+ assert_equal :petalbot, bot.detected_as
43
+ end
44
+
45
+ def test_fake_name
46
+ bot = Legitbot.bot(
47
+ 'Mozilla/5.0 (compatible; PetalBot/2.1; +http://www.google.com/bot.html)',
48
+ '81.1.172.108'
49
+ )
50
+ assert_equal :petalbot, bot.detected_as
51
+ end
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-28 00:00:00.000000000 Z
11
+ date: 2021-09-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -116,20 +116,20 @@ dependencies:
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: 1.15.0
119
+ version: 1.20.0
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 1.15.0
122
+ version: 1.20.0
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: 1.15.0
129
+ version: 1.20.0
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: 1.15.0
132
+ version: 1.20.0
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -160,6 +160,7 @@ files:
160
160
  - lib/legitbot/google.rb
161
161
  - lib/legitbot/legitbot.rb
162
162
  - lib/legitbot/oracle.rb
163
+ - lib/legitbot/petalbot.rb
163
164
  - lib/legitbot/pinterest.rb
164
165
  - lib/legitbot/twitter.rb
165
166
  - lib/legitbot/validators/domains.rb
@@ -176,6 +177,7 @@ files:
176
177
  - test/legitbot/validators/ip_ranges_test.rb
177
178
  - test/legitbot_test.rb
178
179
  - test/oracle_test.rb
180
+ - test/petalbot_test.rb
179
181
  - test/pinterest_test.rb
180
182
  - test/twitter_test.rb
181
183
  homepage: https://github.com/alaz/legitbot
@@ -212,6 +214,7 @@ test_files:
212
214
  - test/apple_test.rb
213
215
  - test/oracle_test.rb
214
216
  - test/google_test.rb
217
+ - test/petalbot_test.rb
215
218
  - test/botmatch_test.rb
216
219
  - test/facebook_test.rb
217
220
  - test/twitter_test.rb