legitbot 1.3.0 → 1.4.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d709839505242be7ed07c5f6413e607bd8eb127a011765b9f45c1a3e0bba2fbb
4
- data.tar.gz: 53a19413068473299ce78887756a2ef50f59859e54d652e4314755178a30028c
3
+ metadata.gz: 395c6363b221d300574d3a0bd0d324f882589ca44afc8db86f9f39e42c61a44c
4
+ data.tar.gz: 58f7c5205cdc0e0cd138b00d244bca133c16424f1b16f7a99263c121d02ae5f9
5
5
  SHA512:
6
- metadata.gz: daaf7bbde5fae2ab3937b5e4f4cd936a61f54b80d5ec87eaca8db473977f633c993f267f6aeb101b94404119e58835878ee1433683cf613b476c6fc7f27b34b3
7
- data.tar.gz: 1e51fe4ee02eb14bb0a95230e2ca3d4716a56cee199a40f3cfcd4d0999158526f151fa7915a9a3f0a21a9054ebc3862353e1a68a89057c2cf8bbcac4d082afb8
6
+ metadata.gz: df1f576284899afa5386cdea651a1cc811f9ddbf6cb683fdf85af71a829fedeae91995858e2453ed4743bd86105b3796a87cc2269f420e144f59faa5f982b646
7
+ data.tar.gz: 7f06269a4c24d08f957e47551a85338986e004f817a2d68ddacbc0354919fdcaef2106478ed2fce32133665343628b0b8ba922d0958485ee20ca98808ca3dbff
@@ -1,6 +1,10 @@
1
1
  name: build
2
2
 
3
- on: [pull_request, push]
3
+ on:
4
+ pull_request:
5
+ push:
6
+ schedule:
7
+ - cron: '29 6 * * 6'
4
8
 
5
9
  jobs:
6
10
  test:
@@ -9,7 +13,7 @@ jobs:
9
13
  strategy:
10
14
  fail-fast: false
11
15
  matrix:
12
- ruby: [ jruby, 2.6 ]
16
+ ruby: [ jruby, 2.5, 2.6, 2.7 ]
13
17
 
14
18
  steps:
15
19
  - uses: actions/checkout@v2
@@ -37,7 +41,7 @@ jobs:
37
41
 
38
42
  strategy:
39
43
  matrix:
40
- ruby: [ 2.6 ]
44
+ ruby: [ 2.7 ]
41
45
 
42
46
  steps:
43
47
  - uses: actions/checkout@v2
data/.rubocop.yml CHANGED
@@ -1,3 +1,10 @@
1
1
  AllCops:
2
2
  CacheRootDirectory: 'vendor'
3
3
  NewCops: enable
4
+ SuggestExtensions: false
5
+
6
+ Gemspec/RequiredRubyVersion:
7
+ Enabled: false
8
+
9
+ Naming/MemoizedInstanceVariableName:
10
+ Enabled: false
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.4
1
+ 2.7.3
data/README.md CHANGED
@@ -1,13 +1,11 @@
1
1
  # Legitbot ![](https://github.com/alaz/legitbot/workflows/build/badge.svg) ![](https://badge.fury.io/rb/legitbot.svg)
2
2
 
3
- Ruby gem to check that an IP belongs to a bot, typically a search
4
- engine. This can be of help in protecting a web site from fake search
5
- engines.
3
+ Ruby gem to make sure that an IP really belongs to a bot, typically a search
4
+ engine.
6
5
 
7
6
  ## Usage
8
7
 
9
- Suppose you have a Web request and you'd like to make sure it's not from a fake
10
- search engine:
8
+ Suppose you have a Web request and you would like to check it is not diguised:
11
9
 
12
10
  ```ruby
13
11
  bot = Legitbot.bot(userAgent, ip)
@@ -22,7 +20,7 @@ bot.valid? # => true
22
20
  bot.fake? # => false
23
21
  ```
24
22
 
25
- Sometimes you already know what search engine to expect. For example, you might
23
+ Sometimes you already know which search engine to expect. For example, you might
26
24
  be using [rack-attack](https://github.com/kickstarter/rack-attack):
27
25
 
28
26
  ```ruby
@@ -31,8 +29,8 @@ Rack::Attack.blocklist("fake Googlebot") do |req|
31
29
  end
32
30
  ```
33
31
 
34
- Or if you do not like all these nasty crawlers stealing your content or
35
- maybe evaluating it and getting ready to invade your site with spammers,
32
+ Or if you do not like all those ghoulish crawlers stealing your
33
+ content, evaluating it and getting ready to invade your site with spammers,
36
34
  then block them all:
37
35
 
38
36
  ```ruby
@@ -52,10 +50,10 @@ end
52
50
  * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
53
51
  * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
54
52
  * [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
53
+ * [Petal search engine](http://aspiegel.com/petalbot)
55
54
  * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
56
55
  * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
57
56
  * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
58
- * [Petal robots (Huawei search)](http://aspiegel.com/petalbot)
59
57
 
60
58
  ## License
61
59
 
data/legitbot.gemspec CHANGED
@@ -14,13 +14,13 @@ Gem::Specification.new do |spec|
14
14
  spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
15
15
  spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
16
16
 
17
- spec.required_ruby_version = '>= 2.4.0'
17
+ spec.required_ruby_version = '>= 2.5.0'
18
18
  spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
19
19
  spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
20
20
  spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
21
21
  spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
22
22
  spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
23
- spec.add_development_dependency 'rubocop', '~> 0.92.0', '>= 0.92.0'
23
+ spec.add_development_dependency 'rubocop', '~> 1.20.0', '>= 1.20.0'
24
24
 
25
25
  spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
26
26
  spec.rdoc_options = ['--charset=UTF-8']
@@ -4,14 +4,15 @@ module Legitbot # :nodoc:
4
4
  # https://duckduckgo.com/duckduckbot
5
5
  class DuckDuckGo < BotMatch
6
6
  ip_ranges %w[
7
+ 20.191.45.212
7
8
  23.21.227.69
8
9
  40.88.21.235
9
10
  50.16.241.113
10
11
  50.16.241.114
11
12
  50.16.241.117
12
13
  50.16.247.234
13
- 52.204.97.54
14
14
  52.5.190.19
15
+ 52.204.97.54
15
16
  54.197.234.188
16
17
  54.208.100.253
17
18
  54.208.102.37
@@ -3,7 +3,31 @@
3
3
  module Legitbot # :nodoc:
4
4
  # https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html
5
5
  class Oracle < BotMatch
6
- ip_ranges '148.64.56.0/24'
6
+ ip_ranges %w[
7
+ 132.145.9.5
8
+ 132.145.11.125
9
+ 132.145.14.70
10
+ 132.145.15.209
11
+ 132.145.64.33
12
+ 132.145.66.116
13
+ 132.145.66.156
14
+ 132.145.67.248
15
+ 140.238.81.78
16
+ 140.238.83.181
17
+ 140.238.94.137
18
+ 140.238.95.47
19
+ 140.238.95.199
20
+ 152.67.128.219
21
+ 152.67.137.35
22
+ 152.67.138.180
23
+
24
+ 148.64.56.64/28
25
+ 148.64.56.79
26
+ 148.64.56.80
27
+ 148.64.56.112/28
28
+ 148.64.56.127
29
+ 148.64.56.128
30
+ ]
7
31
  end
8
32
 
9
33
  rule Legitbot::Oracle, %w[GrapeshotCrawler]
@@ -2,8 +2,9 @@
2
2
 
3
3
  module Legitbot # :nodoc:
4
4
  # http://aspiegel.com/petalbot
5
+ # https://webmaster.petalsearch.com/site/petalbot
5
6
  class Petalbot < BotMatch
6
- domains 'aspiegel.com.'
7
+ domains 'petalsearch.com.'
7
8
  end
8
9
 
9
10
  rule Legitbot::Petalbot, %w[PetalBot]
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.3.0'
4
+ VERSION = '1.4.3'
5
5
  end
@@ -11,7 +11,7 @@ class PetalbotTest < Minitest::Test
11
11
  end
12
12
 
13
13
  def test_valid_ip
14
- ip = '114.119.153.50'
14
+ ip = '114.119.128.10'
15
15
  match = Legitbot::Petalbot.new ip
16
16
  assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
17
17
  end
@@ -28,7 +28,7 @@ class PetalbotTest < Minitest::Test
28
28
  def test_valid_ua
29
29
  bot = Legitbot.bot(
30
30
  'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
31
- '114.119.153.50'
31
+ '114.119.128.10'
32
32
  )
33
33
  assert bot, msg: 'Petalbot detected from User-Agent'
34
34
  assert bot.valid?, msg: 'Valid Petalbot'
@@ -34,7 +34,7 @@ class PinterestTest < Minitest::Test
34
34
  assert bot.valid?, msg: 'Valid Pinterest'
35
35
  end
36
36
 
37
- # rubocop:disable Metrics/LineLength
37
+ # rubocop:disable Layout/LineLength
38
38
  def test_android_not_bot
39
39
  bot = Legitbot.bot(
40
40
  'Mozilla/5.0 (Linux; Android 8.0.0; SM-G965F Build/R16NW; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.64 Mobile Safari/537.36 [Pinterest/Android]',
@@ -42,7 +42,7 @@ class PinterestTest < Minitest::Test
42
42
  )
43
43
  assert_nil bot
44
44
  end
45
- # rubocop:enable Metrics/LineLength
45
+ # rubocop:enable Layout/LineLength
46
46
 
47
47
  def test_engine_name
48
48
  bot = Legitbot.bot(
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.0
4
+ version: 1.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-16 00:00:00.000000000 Z
11
+ date: 2021-09-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: augmented_interval_tree
@@ -116,20 +116,20 @@ dependencies:
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: 0.92.0
119
+ version: 1.20.0
120
120
  - - ">="
121
121
  - !ruby/object:Gem::Version
122
- version: 0.92.0
122
+ version: 1.20.0
123
123
  type: :development
124
124
  prerelease: false
125
125
  version_requirements: !ruby/object:Gem::Requirement
126
126
  requirements:
127
127
  - - "~>"
128
128
  - !ruby/object:Gem::Version
129
- version: 0.92.0
129
+ version: 1.20.0
130
130
  - - ">="
131
131
  - !ruby/object:Gem::Version
132
- version: 0.92.0
132
+ version: 1.20.0
133
133
  description: Does Web request come from a real search engine or from an impersonating
134
134
  agent?
135
135
  email: self@alaz.me
@@ -193,14 +193,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
193
193
  requirements:
194
194
  - - ">="
195
195
  - !ruby/object:Gem::Version
196
- version: 2.4.0
196
+ version: 2.5.0
197
197
  required_rubygems_version: !ruby/object:Gem::Requirement
198
198
  requirements:
199
199
  - - ">="
200
200
  - !ruby/object:Gem::Version
201
201
  version: '0'
202
202
  requirements: []
203
- rubygems_version: 3.1.4
203
+ rubygems_version: 3.1.6
204
204
  signing_key:
205
205
  specification_version: 4
206
206
  summary: 'Validate requests from Web crawlers: impersonating or not?'