legitbot 1.10.6 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -0
- data/lib/legitbot/amazon.rb +3 -2
- data/lib/legitbot/blexbot.rb +10 -0
- data/lib/legitbot/dataforseo.rb +10 -0
- data/lib/legitbot/version.rb +1 -1
- data/lib/legitbot.rb +2 -0
- data/test/amazon_test.rb +25 -2
- data/test/blexbot_test.rb +60 -0
- data/test/dataforseo_test.rb +60 -0
- data/test/lib/dns_server_mock.rb +21 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ccf22fe1fab3a7cab2955709eb3b0ada75a66305b255a36ec795eb092d9741c8
|
4
|
+
data.tar.gz: 6b03643eb517f59626c7a1e59b04f6808ce067b87597bf5f7e8486f4c1eb309c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44f09102368337c185aa95c32a76dd551a01bf9fda2e098757383101bb3c57cced58f85fcad00f7c86dda1550df142b0a9549ea2d7c9c54c41cb2fffe7bfbabe
|
7
|
+
data.tar.gz: 6149c2e4eca68be2224ee529184a4df492fd224602aed1c08aa0a80af5b77ac902e23258331099c769ea4cb80f07c4b99df00526626d328fa13852afedb57eb0
|
data/README.md
CHANGED
@@ -50,10 +50,13 @@ end
|
|
50
50
|
## Supported
|
51
51
|
|
52
52
|
- [Ahrefs](https://ahrefs.com/robot)
|
53
|
+
- [Amazonbot](https://developer.amazon.com/amazonbot)
|
53
54
|
- [Amazon AdBot](https://adbot.amazon.com/index.html)
|
54
55
|
- [Applebot](https://support.apple.com/en-us/119829)
|
55
56
|
- [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
56
57
|
- [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
58
|
+
- [BLEXBot (WebMeUp)](http://webmeup-crawler.com/)
|
59
|
+
- [DataForSEO](https://dataforseo.com/dataforseo-bot)
|
57
60
|
- [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
|
58
61
|
- [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
59
62
|
- [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
|
data/lib/legitbot/amazon.rb
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
module Legitbot # :nodoc:
|
4
4
|
# https://adbot.amazon.com/index.html
|
5
|
+
# https://developer.amazon.com/amazonbot
|
5
6
|
class Amazon < BotMatch
|
6
|
-
domains 'amazonadbot.com.'
|
7
|
+
domains 'amazon.', 'amazonadbot.com.'
|
7
8
|
end
|
8
9
|
|
9
|
-
rule Legitbot::Amazon, %w[AmazonAdBot]
|
10
|
+
rule Legitbot::Amazon, %w[Amazonbot AmazonAdBot]
|
10
11
|
end
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot.rb
CHANGED
@@ -8,6 +8,8 @@ require_relative 'legitbot/amazon'
|
|
8
8
|
require_relative 'legitbot/apple'
|
9
9
|
require_relative 'legitbot/baidu'
|
10
10
|
require_relative 'legitbot/bing'
|
11
|
+
require_relative 'legitbot/blexbot'
|
12
|
+
require_relative 'legitbot/dataforseo'
|
11
13
|
require_relative 'legitbot/duckduckgo'
|
12
14
|
require_relative 'legitbot/facebook'
|
13
15
|
require_relative 'legitbot/google'
|
data/test/amazon_test.rb
CHANGED
@@ -30,7 +30,7 @@ class AmazonTest < Minitest::Test
|
|
30
30
|
refute_predicate bot, :valid?
|
31
31
|
end
|
32
32
|
|
33
|
-
def
|
33
|
+
def test_user_agent1
|
34
34
|
bot = Legitbot.bot(
|
35
35
|
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
36
36
|
'54.166.7.90'
|
@@ -40,7 +40,19 @@ class AmazonTest < Minitest::Test
|
|
40
40
|
assert_predicate bot, :valid?
|
41
41
|
end
|
42
42
|
|
43
|
-
|
43
|
+
# rubocop:disable Layout/LineLength
|
44
|
+
def test_user_agent2
|
45
|
+
bot = Legitbot.bot(
|
46
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
|
47
|
+
'52.70.240.171'
|
48
|
+
)
|
49
|
+
|
50
|
+
assert bot
|
51
|
+
assert_predicate bot, :valid?
|
52
|
+
end
|
53
|
+
# rubocop:enable Layout/LineLength
|
54
|
+
|
55
|
+
def test_valid_name1
|
44
56
|
bot = Legitbot.bot(
|
45
57
|
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
46
58
|
'54.166.7.90'
|
@@ -49,6 +61,17 @@ class AmazonTest < Minitest::Test
|
|
49
61
|
assert_equal :amazon, bot.detected_as
|
50
62
|
end
|
51
63
|
|
64
|
+
# rubocop:disable Layout/LineLength
|
65
|
+
def test_valid_name2
|
66
|
+
bot = Legitbot.bot(
|
67
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
|
68
|
+
'52.70.240.171'
|
69
|
+
)
|
70
|
+
|
71
|
+
assert_equal :amazon, bot.detected_as
|
72
|
+
end
|
73
|
+
# rubocop:enable Layout/LineLength
|
74
|
+
|
52
75
|
def test_fake_name
|
53
76
|
bot = Legitbot.bot(
|
54
77
|
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class BLEXBot < Minitest::Test
|
6
|
+
include Minitest::Hooks
|
7
|
+
include DnsServerMock
|
8
|
+
|
9
|
+
def test_malicious_ip
|
10
|
+
ip = '149.210.164.47'
|
11
|
+
match = Legitbot::BLEXBot.new ip
|
12
|
+
|
13
|
+
refute_predicate match, :valid?
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_valid_ip
|
17
|
+
ip = '65.21.113.197'
|
18
|
+
match = Legitbot::BLEXBot.new ip
|
19
|
+
|
20
|
+
assert_predicate match, :valid?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_malicious_ua
|
24
|
+
bot = Legitbot.bot(
|
25
|
+
'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
|
26
|
+
'149.210.164.47'
|
27
|
+
)
|
28
|
+
|
29
|
+
assert bot
|
30
|
+
refute_predicate bot, :valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_valid_ua
|
34
|
+
bot = Legitbot.bot(
|
35
|
+
'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
|
36
|
+
'65.21.113.197'
|
37
|
+
)
|
38
|
+
|
39
|
+
assert bot
|
40
|
+
assert_predicate bot, :valid?
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_valid_name
|
44
|
+
bot = Legitbot.bot(
|
45
|
+
'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
|
46
|
+
'65.21.113.197'
|
47
|
+
)
|
48
|
+
|
49
|
+
assert_equal :blexbot, bot.detected_as
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_fake_name
|
53
|
+
bot = Legitbot.bot(
|
54
|
+
'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
|
55
|
+
'81.1.172.108'
|
56
|
+
)
|
57
|
+
|
58
|
+
assert_equal :blexbot, bot.detected_as
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class DataForSEOTest < Minitest::Test
|
6
|
+
include Minitest::Hooks
|
7
|
+
include DnsServerMock
|
8
|
+
|
9
|
+
def test_malicious_ip
|
10
|
+
ip = '149.210.164.47'
|
11
|
+
match = Legitbot::DataForSEO.new ip
|
12
|
+
|
13
|
+
refute_predicate match, :valid?
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_valid_ip
|
17
|
+
ip = '136.243.228.176'
|
18
|
+
match = Legitbot::DataForSEO.new ip
|
19
|
+
|
20
|
+
assert_predicate match, :valid?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_malicious_ua
|
24
|
+
bot = Legitbot.bot(
|
25
|
+
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
|
26
|
+
'149.210.164.47'
|
27
|
+
)
|
28
|
+
|
29
|
+
assert bot
|
30
|
+
refute_predicate bot, :valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_valid_ua
|
34
|
+
bot = Legitbot.bot(
|
35
|
+
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
|
36
|
+
'136.243.228.176'
|
37
|
+
)
|
38
|
+
|
39
|
+
assert bot
|
40
|
+
assert_predicate bot, :valid?
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_valid_name
|
44
|
+
bot = Legitbot.bot(
|
45
|
+
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
|
46
|
+
'136.243.228.176'
|
47
|
+
)
|
48
|
+
|
49
|
+
assert_equal :dataforseo, bot.detected_as
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_fake_name
|
53
|
+
bot = Legitbot.bot(
|
54
|
+
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
|
55
|
+
'81.1.172.108'
|
56
|
+
)
|
57
|
+
|
58
|
+
assert_equal :dataforseo, bot.detected_as
|
59
|
+
end
|
60
|
+
end
|
data/test/lib/dns_server_mock.rb
CHANGED
@@ -29,6 +29,12 @@ TEST_DNS_RECORDS = {
|
|
29
29
|
'54.166.7.90' => {
|
30
30
|
ptr: %w[crawler-54-166-7-90.amazonadbot.com]
|
31
31
|
},
|
32
|
+
'52-70-240-171.crawl.amazonbot.amazon' => {
|
33
|
+
a: %w[52.70.240.171]
|
34
|
+
},
|
35
|
+
'52.70.240.171' => {
|
36
|
+
ptr: %w[52-70-240-171.crawl.amazonbot.amazon]
|
37
|
+
},
|
32
38
|
|
33
39
|
# Apple
|
34
40
|
'17-58-98-60.applebot.apple.com' => {
|
@@ -38,6 +44,21 @@ TEST_DNS_RECORDS = {
|
|
38
44
|
ptr: %w[17-58-98-60.applebot.apple.com]
|
39
45
|
},
|
40
46
|
|
47
|
+
# BLEXBot (WebMeUp)
|
48
|
+
'pot22.webmeup.com' => {
|
49
|
+
a: %w[65.21.113.197]
|
50
|
+
},
|
51
|
+
'65.21.113.197' => {
|
52
|
+
ptr: %w[pot22.webmeup.com]
|
53
|
+
},
|
54
|
+
# DataForSEO
|
55
|
+
'crawling-gateway-136-243-228-176.dataforseo.com' => {
|
56
|
+
a: %w[136.243.228.176]
|
57
|
+
},
|
58
|
+
'136.243.228.176' => {
|
59
|
+
ptr: %w[crawling-gateway-136-243-228-176.dataforseo.com]
|
60
|
+
},
|
61
|
+
|
41
62
|
# Google
|
42
63
|
'crawl-66-249-64-141.googlebot.com' => {
|
43
64
|
a: %w[66.249.64.141]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09-
|
11
|
+
date: 2024-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fast_interval_tree
|
@@ -76,8 +76,10 @@ files:
|
|
76
76
|
- lib/legitbot/apple.rb
|
77
77
|
- lib/legitbot/baidu.rb
|
78
78
|
- lib/legitbot/bing.rb
|
79
|
+
- lib/legitbot/blexbot.rb
|
79
80
|
- lib/legitbot/botmatch.rb
|
80
81
|
- lib/legitbot/config/resolver.rb
|
82
|
+
- lib/legitbot/dataforseo.rb
|
81
83
|
- lib/legitbot/duckduckgo.rb
|
82
84
|
- lib/legitbot/facebook.rb
|
83
85
|
- lib/legitbot/google.rb
|
@@ -104,7 +106,9 @@ files:
|
|
104
106
|
- test/ahrefs_test.rb
|
105
107
|
- test/amazon_test.rb
|
106
108
|
- test/apple_test.rb
|
109
|
+
- test/blexbot_test.rb
|
107
110
|
- test/botmatch_test.rb
|
111
|
+
- test/dataforseo_test.rb
|
108
112
|
- test/facebook_test.rb
|
109
113
|
- test/google_test.rb
|
110
114
|
- test/ias_test.rb
|