legitbot 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/legitbot.gemspec +1 -2
- data/lib/legitbot.rb +1 -0
- data/lib/legitbot/oracle.rb +10 -0
- data/lib/legitbot/version.rb +1 -1
- data/test/oracle_test.rb +36 -0
- metadata +32 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 585e32c80a367b94dee89599e33e659ed9461404f5b66dc5f2d84a7ba03849a0
|
4
|
+
data.tar.gz: dd60d23e4b7332817b6a6f3a2161ce15c57f5863efea1741902e1efb69fb2798
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a89023768d7dd4b9cce76bd9e50cb1fd76d2cc88dd03518ea6c6ef70c880de91b6a7eb342e50fa89dbcb428cae19ab4c619ab8715b7c84041b7a60b967cebfd
|
7
|
+
data.tar.gz: 745d57d3976bf59af9ffdc0f642cc6ccc85e2a3a261bcbfd40ad8644f34881c8765a884a420c85fdf19731de5ce8f18d4ad8ee3a2153a1a36fca402cf6d49a12
|
data/README.md
CHANGED
@@ -50,6 +50,7 @@ end
|
|
50
50
|
* [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
|
51
51
|
* [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
|
52
52
|
* [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
53
|
+
* [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
|
53
54
|
* [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
|
54
55
|
* [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
|
55
56
|
* [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
|
data/legitbot.gemspec
CHANGED
@@ -12,8 +12,7 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.email = 'self@alaz.me'
|
13
13
|
spec.homepage = 'https://github.com/alaz/legitbot'
|
14
14
|
spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
|
15
|
-
spec.description = '
|
16
|
-
'made by a real search engine, not a malicious agent'
|
15
|
+
spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
|
17
16
|
|
18
17
|
spec.required_ruby_version = '>= 2.3.0'
|
19
18
|
spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
|
data/lib/legitbot.rb
CHANGED
@@ -10,6 +10,7 @@ require_relative 'legitbot/bing'
|
|
10
10
|
require_relative 'legitbot/duckduckgo'
|
11
11
|
require_relative 'legitbot/facebook'
|
12
12
|
require_relative 'legitbot/google'
|
13
|
+
require_relative 'legitbot/oracle'
|
13
14
|
require_relative 'legitbot/pinterest'
|
14
15
|
require_relative 'legitbot/twitter'
|
15
16
|
require_relative 'legitbot/yandex'
|
data/lib/legitbot/version.rb
CHANGED
data/test/oracle_test.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'minitest/autorun'
|
4
|
+
require 'legitbot'
|
5
|
+
|
6
|
+
class OracleTest < Minitest::Test
|
7
|
+
def test_malicious_ip
|
8
|
+
ip = '149.210.164.47'
|
9
|
+
match = Legitbot::Oracle.new ip
|
10
|
+
assert !match.valid?, msg: "#{ip} is not a real Oracle IP"
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_valid_ip
|
14
|
+
ip = '148.64.56.64'
|
15
|
+
match = Legitbot::Oracle.new ip
|
16
|
+
assert match.valid?, msg: "#{ip} is a valid Oracle IP"
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_malicious_ua
|
20
|
+
bot = Legitbot.bot(
|
21
|
+
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
22
|
+
'149.210.164.47'
|
23
|
+
)
|
24
|
+
assert bot, msg: 'Oracle detected from User-Agent'
|
25
|
+
assert !bot.valid?, msg: 'Not a valid Oracle'
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_valid_ua
|
29
|
+
bot = Legitbot.bot(
|
30
|
+
'Mozilla/5.0 (compatible; GrapeshotCrawler/2.0; +http://www.grapeshot.co.uk/crawler.php)',
|
31
|
+
'148.64.56.64'
|
32
|
+
)
|
33
|
+
assert bot, msg: 'Oracle detected from User-Agent'
|
34
|
+
assert bot.valid?, msg: 'Valid Oracle'
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: augmented_interval_tree
|
@@ -54,84 +54,84 @@ dependencies:
|
|
54
54
|
name: bump
|
55
55
|
requirement: !ruby/object:Gem::Requirement
|
56
56
|
requirements:
|
57
|
-
- - "~>"
|
58
|
-
- !ruby/object:Gem::Version
|
59
|
-
version: '0.8'
|
60
57
|
- - ">="
|
61
58
|
- !ruby/object:Gem::Version
|
62
59
|
version: 0.8.0
|
60
|
+
- - "~>"
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0.8'
|
63
63
|
type: :development
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- - "~>"
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
version: '0.8'
|
70
67
|
- - ">="
|
71
68
|
- !ruby/object:Gem::Version
|
72
69
|
version: 0.8.0
|
70
|
+
- - "~>"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0.8'
|
73
73
|
- !ruby/object:Gem::Dependency
|
74
74
|
name: minitest
|
75
75
|
requirement: !ruby/object:Gem::Requirement
|
76
76
|
requirements:
|
77
|
-
- - "~>"
|
78
|
-
- !ruby/object:Gem::Version
|
79
|
-
version: '5.1'
|
80
77
|
- - ">="
|
81
78
|
- !ruby/object:Gem::Version
|
82
79
|
version: 5.1.0
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '5.1'
|
83
83
|
type: :development
|
84
84
|
prerelease: false
|
85
85
|
version_requirements: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '5.1'
|
90
87
|
- - ">="
|
91
88
|
- !ruby/object:Gem::Version
|
92
89
|
version: 5.1.0
|
90
|
+
- - "~>"
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '5.1'
|
93
93
|
- !ruby/object:Gem::Dependency
|
94
94
|
name: rake
|
95
95
|
requirement: !ruby/object:Gem::Requirement
|
96
96
|
requirements:
|
97
|
-
- - "~>"
|
98
|
-
- !ruby/object:Gem::Version
|
99
|
-
version: '12.3'
|
100
97
|
- - ">="
|
101
98
|
- !ruby/object:Gem::Version
|
102
99
|
version: 12.3.0
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '12.3'
|
103
103
|
type: :development
|
104
104
|
prerelease: false
|
105
105
|
version_requirements: !ruby/object:Gem::Requirement
|
106
106
|
requirements:
|
107
|
-
- - "~>"
|
108
|
-
- !ruby/object:Gem::Version
|
109
|
-
version: '12.3'
|
110
107
|
- - ">="
|
111
108
|
- !ruby/object:Gem::Version
|
112
109
|
version: 12.3.0
|
110
|
+
- - "~>"
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '12.3'
|
113
113
|
- !ruby/object:Gem::Dependency
|
114
114
|
name: rubocop
|
115
115
|
requirement: !ruby/object:Gem::Requirement
|
116
116
|
requirements:
|
117
|
-
- - "~>"
|
118
|
-
- !ruby/object:Gem::Version
|
119
|
-
version: '0.74'
|
120
117
|
- - ">="
|
121
118
|
- !ruby/object:Gem::Version
|
122
119
|
version: 0.74.0
|
120
|
+
- - "~>"
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0.74'
|
123
123
|
type: :development
|
124
124
|
prerelease: false
|
125
125
|
version_requirements: !ruby/object:Gem::Requirement
|
126
126
|
requirements:
|
127
|
-
- - "~>"
|
128
|
-
- !ruby/object:Gem::Version
|
129
|
-
version: '0.74'
|
130
127
|
- - ">="
|
131
128
|
- !ruby/object:Gem::Version
|
132
129
|
version: 0.74.0
|
133
|
-
|
134
|
-
|
130
|
+
- - "~>"
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: '0.74'
|
133
|
+
description: Does Web request come from a real search engine or from an impersonating
|
134
|
+
agent?
|
135
135
|
email: self@alaz.me
|
136
136
|
executables: []
|
137
137
|
extensions: []
|
@@ -156,6 +156,7 @@ files:
|
|
156
156
|
- lib/legitbot/facebook.rb
|
157
157
|
- lib/legitbot/google.rb
|
158
158
|
- lib/legitbot/legitbot.rb
|
159
|
+
- lib/legitbot/oracle.rb
|
159
160
|
- lib/legitbot/pinterest.rb
|
160
161
|
- lib/legitbot/twitter.rb
|
161
162
|
- lib/legitbot/validators/domains.rb
|
@@ -171,6 +172,7 @@ files:
|
|
171
172
|
- test/legitbot/validators/domains_test.rb
|
172
173
|
- test/legitbot/validators/ip_ranges_test.rb
|
173
174
|
- test/legitbot_test.rb
|
175
|
+
- test/oracle_test.rb
|
174
176
|
- test/pinterest_test.rb
|
175
177
|
- test/twitter_test.rb
|
176
178
|
homepage: https://github.com/alaz/legitbot
|
@@ -193,8 +195,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
193
195
|
- !ruby/object:Gem::Version
|
194
196
|
version: '0'
|
195
197
|
requirements: []
|
196
|
-
|
197
|
-
rubygems_version: 2.7.6.2
|
198
|
+
rubygems_version: 3.0.6
|
198
199
|
signing_key:
|
199
200
|
specification_version: 4
|
200
201
|
summary: 'Validate requests from Web crawlers: impersonating or not?'
|
@@ -206,6 +207,7 @@ test_files:
|
|
206
207
|
- test/ahrefs_test.rb
|
207
208
|
- test/apple_test.rb
|
208
209
|
- test/apple_as_google_test.rb
|
210
|
+
- test/oracle_test.rb
|
209
211
|
- test/google_test.rb
|
210
212
|
- test/botmatch_test.rb
|
211
213
|
- test/facebook_test.rb
|