legitbot 1.4.0 → 1.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 783bc47b1bc8f80e5d4818f150663a2cb6fb90e5dd2378bae456eb2fb1d78823
4
- data.tar.gz: 0e274e1eaa295ffa0e00a8d6e2884194ed53108da9ecb75c3ef2ef4746048c9e
3
+ metadata.gz: fcbd5f9d7bbd29804b84b01d95765a0e950c3c1d7c42c78b8d2ba4e0c98ca679
4
+ data.tar.gz: 3f69f1ebc410d1d6db1c20e0ff489ba552ba34ffad3bbe9b55f7e69377b72f52
5
5
  SHA512:
6
- metadata.gz: ec7c2f2cb78cc8a586a35bea72e39b12835679089f4b4d5cba754aa0caccee0c9e8c1c94461a7fd8fcb1cf4f8195f196346ea4abe7bd96a080c5c831f9d68c7a
7
- data.tar.gz: a92ea8ff049127871988e8dbad7cb76b3c53f13dde97a157509e9013898c1da2cc3e573ae695d9af2e82372bf478348aeca6ec77a673798b52ac1e3ab623e214
6
+ metadata.gz: 998f9f6f722b9eb9ba57774ce3525ca139789321d0cdd5e024f954ef13d002ac839aca8de53917dda8c3fcea08d23add43ff1dd856cf54b8d68d57cefc26bb42
7
+ data.tar.gz: 3a539ba08618ecbfe3d93388b0046c0331654f87bf72eef23fca1594bedf8cdad35aa8eca91121f675f6e39e60b020bec5a1d19dee55c245e4e9b20fafee4ee8
data/README.md CHANGED
@@ -1,13 +1,11 @@
1
1
  # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) ![](https://badge.fury.io/rb/legitbot.svg)
2
2
 
3
- Ruby gem to check that an IP belongs to a bot, typically a search
4
- engine. This can be of help in protecting a web site from fake search
5
- engines.
3
+ Ruby gem to make sure that an IP really belongs to a bot, typically a search
4
+ engine.
6
5
 
7
6
  ## Usage
8
7
 
9
- Suppose you have a Web request and you'd like to make sure it's not from a fake
10
- search engine:
8
+ Suppose you have a Web request and you would like to check it is not diguised:
11
9
 
12
10
  ```ruby
13
11
  bot = Legitbot.bot(userAgent, ip)
@@ -22,7 +20,7 @@ bot.valid? # => true
22
20
  bot.fake? # => false
23
21
  ```
24
22
 
25
- Sometimes you already know what search engine to expect. For example, you might
23
+ Sometimes you already know which search engine to expect. For example, you might
26
24
  be using [rack-attack](https://github.com/kickstarter/rack-attack):
27
25
 
28
26
  ```ruby
@@ -31,8 +29,8 @@ Rack::Attack.blocklist("fake Googlebot") do |req|
31
29
  end
32
30
  ```
33
31
 
34
- Or if you do not like all these nasty crawlers stealing your content or
35
- maybe evaluating it and getting ready to invade your site with spammers,
32
+ Or if you do not like all those ghoulish crawlers stealing your
33
+ content, evaluating it and getting ready to invade your site with spammers,
36
34
  then block them all:
37
35
 
38
36
  ```ruby
@@ -52,6 +50,7 @@ end
52
50
  * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
53
51
  * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
54
52
  * [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
53
+ * [Petal search engine](http://aspiegel.com/petalbot)
55
54
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
56
55
  * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
57
56
  * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
data/legitbot.gemspec CHANGED
@@ -20,7 +20,7 @@ Gem::Specification.new do |spec|
20
20
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
21
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
22
  spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
- spec.add_development_dependency 'rubocop', '~> 1.15.0', '>= 1.15.0'
23
+ spec.add_development_dependency 'rubocop', '~> 1.20.0', '>= 1.20.0'
24
24
 
25
25
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
26
  spec.rdoc_options = ['--charset=UTF-8']
@@ -4,14 +4,15 @@ module Legitbot # :nodoc:
4
4
  # https://duckduckgo.com/duckduckbot
5
5
  class DuckDuckGo < BotMatch
6
6
  ip_ranges %w[
7
+ 20.191.45.212
7
8
  23.21.227.69
8
9
  40.88.21.235
9
10
  50.16.241.113
10
11
  50.16.241.114
11
12
  50.16.241.117
12
13
  50.16.247.234
13
- 52.204.97.54
14
14
  52.5.190.19
15
+ 52.204.97.54
15
16
  54.197.234.188
16
17
  54.208.100.253
17
18
  54.208.102.37
@@ -18,5 +18,8 @@ module Legitbot # :nodoc:
18
18
  end
19
19
  end
20
20
 
21
- rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
21
+ rule Legitbot::Facebook, %w[
22
+ facebookexternalhit/1.1
23
+ facebookcatalog/1.0
24
+ ]
22
25
  end
@@ -3,7 +3,31 @@
3
3
  module Legitbot # :nodoc:
4
4
  # https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html
5
5
  class Oracle < BotMatch
6
- ip_ranges '148.64.56.0/24'
6
+ ip_ranges %w[
7
+ 132.145.9.5
8
+ 132.145.11.125
9
+ 132.145.14.70
10
+ 132.145.15.209
11
+ 132.145.64.33
12
+ 132.145.66.116
13
+ 132.145.66.156
14
+ 132.145.67.248
15
+ 140.238.81.78
16
+ 140.238.83.181
17
+ 140.238.94.137
18
+ 140.238.95.47
19
+ 140.238.95.199
20
+ 152.67.128.219
21
+ 152.67.137.35
22
+ 152.67.138.180
23
+
24
+ 148.64.56.64/28
25
+ 148.64.56.79
26
+ 148.64.56.80
27
+ 148.64.56.112/28
28
+ 148.64.56.127
29
+ 148.64.56.128
30
+ ]
7
31
  end
8
32
 
9
33
  rule Legitbot::Oracle, %w[GrapeshotCrawler]
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # http://aspiegel.com/petalbot
5
+ # https://webmaster.petalsearch.com/site/petalbot
6
+ class Petalbot < BotMatch
7
+ domains 'petalsearch.com.'
8
+ end
9
+
10
+ rule Legitbot::Petalbot, %w[PetalBot]
11
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.4.0'
4
+ VERSION = '1.4.4'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -15,3 +15,4 @@ require_relative 'legitbot/oracle'
15
15
  require_relative 'legitbot/pinterest'
16
16
  require_relative 'legitbot/twitter'
17
17
  require_relative 'legitbot/yandex'
18
+ require_relative 'legitbot/petalbot'
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'minitest/autorun'
4
+ require 'legitbot'
5
+
6
+ class PetalbotTest < Minitest::Test
7
+ def test_malicious_ip
8
+ ip = '149.210.164.47'
9
+ match = Legitbot::Petalbot.new ip
10
+ assert !match.valid?, msg: "#{ip} is not a real Petalbot IP"
11
+ end
12
+
13
+ def test_valid_ip
14
+ ip = '114.119.128.10'
15
+ match = Legitbot::Petalbot.new ip
16
+ assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
17
+ end
18
+
19
+ def test_malicious_ua
20
+ bot = Legitbot.bot(
21
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
22
+ '149.210.164.47'
23
+ )
24
+ assert bot, msg: 'Petalbot detected from User-Agent'
25
+ assert !bot.valid?, msg: 'Not a valid Petalbot'
26
+ end
27
+
28
+ def test_valid_ua
29
+ bot = Legitbot.bot(
30
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
31
+ '114.119.128.10'
32
+ )
33
+ assert bot, msg: 'Petalbot detected from User-Agent'
34
+ assert bot.valid?, msg: 'Valid Petalbot'
35
+ end
36
+
37
+ def test_valid_name
38
+ bot = Legitbot.bot(
39
+ 'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
40
+ '66.249.64.141'
41
+ )
42
+ assert_equal :petalbot, bot.detected_as
43
+ end
44
+
45
+ def test_fake_name
46
+ bot = Legitbot.bot(
47
+ 'Mozilla/5.0 (compatible; PetalBot/2.1; +http://www.google.com/bot.html)',
48
+ '81.1.172.108'
49
+ )
50
+ assert_equal :petalbot, bot.detected_as
51
+ end
52
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-28 00:00:00.000000000 Z
11
+ date: 2021-09-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -116,20 +116,20 @@ dependencies:
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: 1.15.0
119
+ version: 1.20.0
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 1.15.0
122
+ version: 1.20.0
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: 1.15.0
129
+ version: 1.20.0
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: 1.15.0
132
+ version: 1.20.0
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -160,6 +160,7 @@ files:
160
160
  - lib/legitbot/google.rb
161
161
  - lib/legitbot/legitbot.rb
162
162
  - lib/legitbot/oracle.rb
163
+ - lib/legitbot/petalbot.rb
163
164
  - lib/legitbot/pinterest.rb
164
165
  - lib/legitbot/twitter.rb
165
166
  - lib/legitbot/validators/domains.rb
@@ -176,6 +177,7 @@ files:
176
177
  - test/legitbot/validators/ip_ranges_test.rb
177
178
  - test/legitbot_test.rb
178
179
  - test/oracle_test.rb
180
+ - test/petalbot_test.rb
179
181
  - test/pinterest_test.rb
180
182
  - test/twitter_test.rb
181
183
  homepage: https://github.com/alaz/legitbot
@@ -212,6 +214,7 @@ test_files:
212
214
  - test/apple_test.rb
213
215
  - test/oracle_test.rb
214
216
  - test/google_test.rb
217
+ - test/petalbot_test.rb
215
218
  - test/botmatch_test.rb
216
219
  - test/facebook_test.rb
217
220
  - test/twitter_test.rb