legitbot 1.3.0 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +7 -3
- data/.rubocop.yml +7 -0
- data/.ruby-version +1 -1
- data/README.md +7 -9
- data/legitbot.gemspec +2 -2
- data/lib/legitbot/duckduckgo.rb +2 -1
- data/lib/legitbot/oracle.rb +25 -1
- data/lib/legitbot/petalbot.rb +2 -1
- data/lib/legitbot/version.rb +1 -1
- data/test/petalbot_test.rb +2 -2
- data/test/pinterest_test.rb +2 -2
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 395c6363b221d300574d3a0bd0d324f882589ca44afc8db86f9f39e42c61a44c
|
4
|
+
data.tar.gz: 58f7c5205cdc0e0cd138b00d244bca133c16424f1b16f7a99263c121d02ae5f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df1f576284899afa5386cdea651a1cc811f9ddbf6cb683fdf85af71a829fedeae91995858e2453ed4743bd86105b3796a87cc2269f420e144f59faa5f982b646
|
7
|
+
data.tar.gz: 7f06269a4c24d08f957e47551a85338986e004f817a2d68ddacbc0354919fdcaef2106478ed2fce32133665343628b0b8ba922d0958485ee20ca98808ca3dbff
|
data/.github/workflows/build.yml
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
name: build
|
2
2
|
|
3
|
-
on:
|
3
|
+
on:
|
4
|
+
pull_request:
|
5
|
+
push:
|
6
|
+
schedule:
|
7
|
+
- cron: '29 6 * * 6'
|
4
8
|
|
5
9
|
jobs:
|
6
10
|
test:
|
@@ -9,7 +13,7 @@ jobs:
|
|
9
13
|
strategy:
|
10
14
|
fail-fast: false
|
11
15
|
matrix:
|
12
|
-
ruby: [ jruby, 2.6 ]
|
16
|
+
ruby: [ jruby, 2.5, 2.6, 2.7 ]
|
13
17
|
|
14
18
|
steps:
|
15
19
|
- uses: actions/checkout@v2
|
@@ -37,7 +41,7 @@ jobs:
|
|
37
41
|
|
38
42
|
strategy:
|
39
43
|
matrix:
|
40
|
-
ruby: [ 2.
|
44
|
+
ruby: [ 2.7 ]
|
41
45
|
|
42
46
|
steps:
|
43
47
|
- uses: actions/checkout@v2
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.7.3
|
data/README.md
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
# Legitbot  
|
2
2
|
|
3
|
-
Ruby gem to
|
4
|
-
engine.
|
5
|
-
engines.
|
3
|
+
Ruby gem to make sure that an IP really belongs to a bot, typically a search
|
4
|
+
engine.
|
6
5
|
|
7
6
|
## Usage
|
8
7
|
|
9
|
-
Suppose you have a Web request and you
|
10
|
-
search engine:
|
8
|
+
Suppose you have a Web request and you would like to check it is not diguised:
|
11
9
|
|
12
10
|
```ruby
|
13
11
|
bot = Legitbot.bot(userAgent, ip)
|
@@ -22,7 +20,7 @@ bot.valid? # => true
|
|
22
20
|
bot.fake? # => false
|
23
21
|
```
|
24
22
|
|
25
|
-
Sometimes you already know
|
23
|
+
Sometimes you already know which search engine to expect. For example, you might
|
26
24
|
be using [rack-attack](https://github.com/kickstarter/rack-attack):
|
27
25
|
|
28
26
|
```ruby
|
@@ -31,8 +29,8 @@ Rack::Attack.blocklist("fake Googlebot") do |req|
|
|
31
29
|
end
|
32
30
|
```
|
33
31
|
|
34
|
-
Or if you do not like all
|
35
|
-
|
32
|
+
Or if you do not like all those ghoulish crawlers stealing your
|
33
|
+
content, evaluating it and getting ready to invade your site with spammers,
|
36
34
|
then block them all:
|
37
35
|
|
38
36
|
```ruby
|
@@ -52,10 +50,10 @@ end
|
|
52
50
|
* [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
|
53
51
|
* [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
54
52
|
* [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
|
53
|
+
* [Petal search engine](http://aspiegel.com/petalbot)
|
55
54
|
* [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
|
56
55
|
* [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
|
57
56
|
* [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
|
58
|
-
* [Petal robots (Huawei search)](http://aspiegel.com/petalbot)
|
59
57
|
|
60
58
|
## License
|
61
59
|
|
data/legitbot.gemspec
CHANGED
@@ -14,13 +14,13 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
|
15
15
|
spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
|
16
16
|
|
17
|
-
spec.required_ruby_version = '>= 2.
|
17
|
+
spec.required_ruby_version = '>= 2.5.0'
|
18
18
|
spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
|
19
19
|
spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
|
20
20
|
spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
|
21
21
|
spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
|
22
22
|
spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
|
23
|
-
spec.add_development_dependency 'rubocop', '~>
|
23
|
+
spec.add_development_dependency 'rubocop', '~> 1.20.0', '>= 1.20.0'
|
24
24
|
|
25
25
|
spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
26
26
|
spec.rdoc_options = ['--charset=UTF-8']
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -4,14 +4,15 @@ module Legitbot # :nodoc:
|
|
4
4
|
# https://duckduckgo.com/duckduckbot
|
5
5
|
class DuckDuckGo < BotMatch
|
6
6
|
ip_ranges %w[
|
7
|
+
20.191.45.212
|
7
8
|
23.21.227.69
|
8
9
|
40.88.21.235
|
9
10
|
50.16.241.113
|
10
11
|
50.16.241.114
|
11
12
|
50.16.241.117
|
12
13
|
50.16.247.234
|
13
|
-
52.204.97.54
|
14
14
|
52.5.190.19
|
15
|
+
52.204.97.54
|
15
16
|
54.197.234.188
|
16
17
|
54.208.100.253
|
17
18
|
54.208.102.37
|
data/lib/legitbot/oracle.rb
CHANGED
@@ -3,7 +3,31 @@
|
|
3
3
|
module Legitbot # :nodoc:
|
4
4
|
# https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html
|
5
5
|
class Oracle < BotMatch
|
6
|
-
ip_ranges
|
6
|
+
ip_ranges %w[
|
7
|
+
132.145.9.5
|
8
|
+
132.145.11.125
|
9
|
+
132.145.14.70
|
10
|
+
132.145.15.209
|
11
|
+
132.145.64.33
|
12
|
+
132.145.66.116
|
13
|
+
132.145.66.156
|
14
|
+
132.145.67.248
|
15
|
+
140.238.81.78
|
16
|
+
140.238.83.181
|
17
|
+
140.238.94.137
|
18
|
+
140.238.95.47
|
19
|
+
140.238.95.199
|
20
|
+
152.67.128.219
|
21
|
+
152.67.137.35
|
22
|
+
152.67.138.180
|
23
|
+
|
24
|
+
148.64.56.64/28
|
25
|
+
148.64.56.79
|
26
|
+
148.64.56.80
|
27
|
+
148.64.56.112/28
|
28
|
+
148.64.56.127
|
29
|
+
148.64.56.128
|
30
|
+
]
|
7
31
|
end
|
8
32
|
|
9
33
|
rule Legitbot::Oracle, %w[GrapeshotCrawler]
|
data/lib/legitbot/petalbot.rb
CHANGED
data/lib/legitbot/version.rb
CHANGED
data/test/petalbot_test.rb
CHANGED
@@ -11,7 +11,7 @@ class PetalbotTest < Minitest::Test
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def test_valid_ip
|
14
|
-
ip = '114.119.
|
14
|
+
ip = '114.119.128.10'
|
15
15
|
match = Legitbot::Petalbot.new ip
|
16
16
|
assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
|
17
17
|
end
|
@@ -28,7 +28,7 @@ class PetalbotTest < Minitest::Test
|
|
28
28
|
def test_valid_ua
|
29
29
|
bot = Legitbot.bot(
|
30
30
|
'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
|
31
|
-
'114.119.
|
31
|
+
'114.119.128.10'
|
32
32
|
)
|
33
33
|
assert bot, msg: 'Petalbot detected from User-Agent'
|
34
34
|
assert bot.valid?, msg: 'Valid Petalbot'
|
data/test/pinterest_test.rb
CHANGED
@@ -34,7 +34,7 @@ class PinterestTest < Minitest::Test
|
|
34
34
|
assert bot.valid?, msg: 'Valid Pinterest'
|
35
35
|
end
|
36
36
|
|
37
|
-
# rubocop:disable
|
37
|
+
# rubocop:disable Layout/LineLength
|
38
38
|
def test_android_not_bot
|
39
39
|
bot = Legitbot.bot(
|
40
40
|
'Mozilla/5.0 (Linux; Android 8.0.0; SM-G965F Build/R16NW; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.64 Mobile Safari/537.36 [Pinterest/Android]',
|
@@ -42,7 +42,7 @@ class PinterestTest < Minitest::Test
|
|
42
42
|
)
|
43
43
|
assert_nil bot
|
44
44
|
end
|
45
|
-
# rubocop:enable
|
45
|
+
# rubocop:enable Layout/LineLength
|
46
46
|
|
47
47
|
def test_engine_name
|
48
48
|
bot = Legitbot.bot(
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3
|
4
|
+
version: 1.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: augmented_interval_tree
|
@@ -116,20 +116,20 @@ dependencies:
|
|
116
116
|
requirements:
|
117
117
|
- - "~>"
|
118
118
|
- !ruby/object:Gem::Version
|
119
|
-
version:
|
119
|
+
version: 1.20.0
|
120
120
|
- - ">="
|
121
121
|
- !ruby/object:Gem::Version
|
122
|
-
version:
|
122
|
+
version: 1.20.0
|
123
123
|
type: :development
|
124
124
|
prerelease: false
|
125
125
|
version_requirements: !ruby/object:Gem::Requirement
|
126
126
|
requirements:
|
127
127
|
- - "~>"
|
128
128
|
- !ruby/object:Gem::Version
|
129
|
-
version:
|
129
|
+
version: 1.20.0
|
130
130
|
- - ">="
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version:
|
132
|
+
version: 1.20.0
|
133
133
|
description: Does Web request come from a real search engine or from an impersonating
|
134
134
|
agent?
|
135
135
|
email: self@alaz.me
|
@@ -193,14 +193,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
193
193
|
requirements:
|
194
194
|
- - ">="
|
195
195
|
- !ruby/object:Gem::Version
|
196
|
-
version: 2.
|
196
|
+
version: 2.5.0
|
197
197
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - ">="
|
200
200
|
- !ruby/object:Gem::Version
|
201
201
|
version: '0'
|
202
202
|
requirements: []
|
203
|
-
rubygems_version: 3.1.
|
203
|
+
rubygems_version: 3.1.6
|
204
204
|
signing_key:
|
205
205
|
specification_version: 4
|
206
206
|
summary: 'Validate requests from Web crawlers: impersonating or not?'
|