legitbot 1.10.5 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -2
- data/lib/legitbot/amazon.rb +3 -2
- data/lib/legitbot/apple.rb +6 -3
- data/lib/legitbot/blexbot.rb +10 -0
- data/lib/legitbot/dataforseo.rb +10 -0
- data/lib/legitbot/facebook.rb +3 -9
- data/lib/legitbot/meta.rb +33 -0
- data/lib/legitbot/openai.rb +46 -0
- data/lib/legitbot/version.rb +1 -1
- data/lib/legitbot.rb +4 -1
- data/test/amazon_test.rb +25 -2
- data/test/blexbot_test.rb +60 -0
- data/test/dataforseo_test.rb +60 -0
- data/test/facebook_test.rb +0 -13
- data/test/lib/dns_server_mock.rb +21 -0
- data/test/lib/meta_ip_ranges_mock.rb +13 -0
- data/test/meta_test.rb +79 -0
- data/test/test_helper.rb +1 -0
- metadata +10 -3
- data/lib/legitbot/gptbot.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ccf22fe1fab3a7cab2955709eb3b0ada75a66305b255a36ec795eb092d9741c8
|
4
|
+
data.tar.gz: 6b03643eb517f59626c7a1e59b04f6808ce067b87597bf5f7e8486f4c1eb309c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44f09102368337c185aa95c32a76dd551a01bf9fda2e098757383101bb3c57cced58f85fcad00f7c86dda1550df142b0a9549ea2d7c9c54c41cb2fffe7bfbabe
|
7
|
+
data.tar.gz: 6149c2e4eca68be2224ee529184a4df492fd224602aed1c08aa0a80af5b77ac902e23258331099c769ea4cb80f07c4b99df00526626d328fa13852afedb57eb0
|
data/README.md
CHANGED
@@ -50,17 +50,20 @@ end
|
|
50
50
|
## Supported
|
51
51
|
|
52
52
|
- [Ahrefs](https://ahrefs.com/robot)
|
53
|
+
- [Amazonbot](https://developer.amazon.com/amazonbot)
|
53
54
|
- [Amazon AdBot](https://adbot.amazon.com/index.html)
|
54
|
-
- [Applebot](https://support.apple.com/en-us/
|
55
|
+
- [Applebot](https://support.apple.com/en-us/119829)
|
55
56
|
- [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
|
56
57
|
- [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
|
58
|
+
- [BLEXBot (WebMeUp)](http://webmeup-crawler.com/)
|
59
|
+
- [DataForSEO](https://dataforseo.com/dataforseo-bot)
|
57
60
|
- [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
|
58
|
-
- [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
|
59
61
|
- [Google crawlers](https://support.google.com/webmasters/answer/1061943)
|
60
62
|
- [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
|
61
63
|
- [OpenAI GPTBot](https://platform.openai.com/docs/gptbot)
|
62
64
|
- [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
|
63
65
|
- [Marginalia](https://www.marginalia.nu/marginalia-search/for-webmasters/)
|
66
|
+
- [Meta / Facebook Web crawlers](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/)
|
64
67
|
- [Petal search engine](http://aspiegel.com/petalbot)
|
65
68
|
- [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
|
66
69
|
- [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started),
|
data/lib/legitbot/amazon.rb
CHANGED
@@ -2,9 +2,10 @@
|
|
2
2
|
|
3
3
|
module Legitbot # :nodoc:
|
4
4
|
# https://adbot.amazon.com/index.html
|
5
|
+
# https://developer.amazon.com/amazonbot
|
5
6
|
class Amazon < BotMatch
|
6
|
-
domains 'amazonadbot.com.'
|
7
|
+
domains 'amazon.', 'amazonadbot.com.'
|
7
8
|
end
|
8
9
|
|
9
|
-
rule Legitbot::Amazon, %w[AmazonAdBot]
|
10
|
+
rule Legitbot::Amazon, %w[Amazonbot AmazonAdBot]
|
10
11
|
end
|
data/lib/legitbot/apple.rb
CHANGED
@@ -3,10 +3,13 @@
|
|
3
3
|
require 'ipaddr'
|
4
4
|
|
5
5
|
module Legitbot # :nodoc:
|
6
|
-
# https://support.apple.com/en-us/
|
6
|
+
# https://support.apple.com/en-us/119829
|
7
7
|
class Apple < BotMatch
|
8
|
-
|
8
|
+
domains 'applebot.apple.com.'
|
9
9
|
end
|
10
10
|
|
11
|
-
rule Legitbot::Apple, %w[
|
11
|
+
rule Legitbot::Apple, %w[
|
12
|
+
Applebot
|
13
|
+
iTMS
|
14
|
+
]
|
12
15
|
end
|
data/lib/legitbot/facebook.rb
CHANGED
@@ -1,20 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
require_relative 'meta'
|
4
4
|
|
5
5
|
module Legitbot # :nodoc:
|
6
6
|
# https://developers.facebook.com/docs/sharing/webmasters/crawler
|
7
7
|
class Facebook < BotMatch
|
8
|
-
|
8
|
+
extend MetaIpRanges
|
9
9
|
|
10
10
|
ip_ranges do
|
11
|
-
|
12
|
-
client.query :radb, AS, source: :radb
|
13
|
-
results = client.perform
|
14
|
-
|
15
|
-
%i[ipv4 ipv6].map do |family|
|
16
|
-
results[AS][family][AS]
|
17
|
-
end.flatten
|
11
|
+
fetch_ip_ranges
|
18
12
|
end
|
19
13
|
end
|
20
14
|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'irrc'
|
4
|
+
|
5
|
+
module Legitbot # :nodoc:
|
6
|
+
module MetaIpRanges # :nodoc:
|
7
|
+
AS = 'AS32934'
|
8
|
+
|
9
|
+
def fetch_ip_ranges
|
10
|
+
client = Irrc::Client.new
|
11
|
+
client.query :radb, AS, source: :radb
|
12
|
+
results = client.perform
|
13
|
+
|
14
|
+
%i[ipv4 ipv6].map do |family|
|
15
|
+
results[AS][family][AS]
|
16
|
+
end.flatten
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/
|
21
|
+
class Meta < BotMatch
|
22
|
+
extend MetaIpRanges
|
23
|
+
|
24
|
+
ip_ranges do
|
25
|
+
fetch_ip_ranges
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
rule Legitbot::Meta, %w[
|
30
|
+
meta-externalagent
|
31
|
+
meta-externalfetcher
|
32
|
+
]
|
33
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://platform.openai.com/docs/gptbot
|
5
|
+
class GPTBot < BotMatch
|
6
|
+
# NOTE: fetching is disabled, see #131
|
7
|
+
# @ fetch:url https://openai.com/gptbot.json
|
8
|
+
ip_ranges %w[
|
9
|
+
20.171.206.0/24
|
10
|
+
52.230.152.0/24
|
11
|
+
52.233.106.0/24
|
12
|
+
]
|
13
|
+
end
|
14
|
+
|
15
|
+
# https://platform.openai.com/docs/bots
|
16
|
+
class OpenAIChat < BotMatch
|
17
|
+
# NOTE: fetching is disabled, see #131
|
18
|
+
# @ fetch:url https://openai.com/chatgpt-user.json
|
19
|
+
ip_ranges %w[
|
20
|
+
23.98.142.176/28
|
21
|
+
40.84.180.224/28
|
22
|
+
13.65.240.240/28
|
23
|
+
20.97.189.96/28
|
24
|
+
20.161.75.208/28
|
25
|
+
52.225.75.208/28
|
26
|
+
52.156.77.144/28
|
27
|
+
40.84.221.208/28
|
28
|
+
40.84.221.224/28
|
29
|
+
40.84.180.64/28
|
30
|
+
]
|
31
|
+
end
|
32
|
+
|
33
|
+
# https://platform.openai.com/docs/bots
|
34
|
+
class OpenAISearch < BotMatch
|
35
|
+
# NOTE: fetching is disabled, see #131
|
36
|
+
# @ fetch:url https://openai.com/searchbot.json
|
37
|
+
ip_ranges %w[
|
38
|
+
20.42.10.176/28
|
39
|
+
172.203.190.128/28
|
40
|
+
]
|
41
|
+
end
|
42
|
+
|
43
|
+
rule Legitbot::GPTBot, %w[GPTBot]
|
44
|
+
rule Legitbot::OpenAIChat, %w[ChatGPT-User]
|
45
|
+
rule Legitbot::OpenAISearch, %w[OAI-SearchBot]
|
46
|
+
end
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot.rb
CHANGED
@@ -8,13 +8,16 @@ require_relative 'legitbot/amazon'
|
|
8
8
|
require_relative 'legitbot/apple'
|
9
9
|
require_relative 'legitbot/baidu'
|
10
10
|
require_relative 'legitbot/bing'
|
11
|
+
require_relative 'legitbot/blexbot'
|
12
|
+
require_relative 'legitbot/dataforseo'
|
11
13
|
require_relative 'legitbot/duckduckgo'
|
12
14
|
require_relative 'legitbot/facebook'
|
13
15
|
require_relative 'legitbot/google'
|
14
|
-
require_relative 'legitbot/gptbot'
|
15
16
|
require_relative 'legitbot/ias'
|
17
|
+
require_relative 'legitbot/openai'
|
16
18
|
require_relative 'legitbot/oracle'
|
17
19
|
require_relative 'legitbot/marginalia'
|
20
|
+
require_relative 'legitbot/meta'
|
18
21
|
require_relative 'legitbot/petalbot'
|
19
22
|
require_relative 'legitbot/pinterest'
|
20
23
|
require_relative 'legitbot/twitter'
|
data/test/amazon_test.rb
CHANGED
@@ -30,7 +30,7 @@ class AmazonTest < Minitest::Test
|
|
30
30
|
refute_predicate bot, :valid?
|
31
31
|
end
|
32
32
|
|
33
|
-
def
|
33
|
+
def test_user_agent1
|
34
34
|
bot = Legitbot.bot(
|
35
35
|
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
36
36
|
'54.166.7.90'
|
@@ -40,7 +40,19 @@ class AmazonTest < Minitest::Test
|
|
40
40
|
assert_predicate bot, :valid?
|
41
41
|
end
|
42
42
|
|
43
|
-
|
43
|
+
# rubocop:disable Layout/LineLength
|
44
|
+
def test_user_agent2
|
45
|
+
bot = Legitbot.bot(
|
46
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
|
47
|
+
'52.70.240.171'
|
48
|
+
)
|
49
|
+
|
50
|
+
assert bot
|
51
|
+
assert_predicate bot, :valid?
|
52
|
+
end
|
53
|
+
# rubocop:enable Layout/LineLength
|
54
|
+
|
55
|
+
def test_valid_name1
|
44
56
|
bot = Legitbot.bot(
|
45
57
|
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
46
58
|
'54.166.7.90'
|
@@ -49,6 +61,17 @@ class AmazonTest < Minitest::Test
|
|
49
61
|
assert_equal :amazon, bot.detected_as
|
50
62
|
end
|
51
63
|
|
64
|
+
# rubocop:disable Layout/LineLength
|
65
|
+
def test_valid_name2
|
66
|
+
bot = Legitbot.bot(
|
67
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
|
68
|
+
'52.70.240.171'
|
69
|
+
)
|
70
|
+
|
71
|
+
assert_equal :amazon, bot.detected_as
|
72
|
+
end
|
73
|
+
# rubocop:enable Layout/LineLength
|
74
|
+
|
52
75
|
def test_fake_name
|
53
76
|
bot = Legitbot.bot(
|
54
77
|
'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class BLEXBot < Minitest::Test
|
6
|
+
include Minitest::Hooks
|
7
|
+
include DnsServerMock
|
8
|
+
|
9
|
+
def test_malicious_ip
|
10
|
+
ip = '149.210.164.47'
|
11
|
+
match = Legitbot::BLEXBot.new ip
|
12
|
+
|
13
|
+
refute_predicate match, :valid?
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_valid_ip
|
17
|
+
ip = '65.21.113.197'
|
18
|
+
match = Legitbot::BLEXBot.new ip
|
19
|
+
|
20
|
+
assert_predicate match, :valid?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_malicious_ua
|
24
|
+
bot = Legitbot.bot(
|
25
|
+
'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
|
26
|
+
'149.210.164.47'
|
27
|
+
)
|
28
|
+
|
29
|
+
assert bot
|
30
|
+
refute_predicate bot, :valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_valid_ua
|
34
|
+
bot = Legitbot.bot(
|
35
|
+
'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
|
36
|
+
'65.21.113.197'
|
37
|
+
)
|
38
|
+
|
39
|
+
assert bot
|
40
|
+
assert_predicate bot, :valid?
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_valid_name
|
44
|
+
bot = Legitbot.bot(
|
45
|
+
'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
|
46
|
+
'65.21.113.197'
|
47
|
+
)
|
48
|
+
|
49
|
+
assert_equal :blexbot, bot.detected_as
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_fake_name
|
53
|
+
bot = Legitbot.bot(
|
54
|
+
'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
|
55
|
+
'81.1.172.108'
|
56
|
+
)
|
57
|
+
|
58
|
+
assert_equal :blexbot, bot.detected_as
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class DataForSEOTest < Minitest::Test
|
6
|
+
include Minitest::Hooks
|
7
|
+
include DnsServerMock
|
8
|
+
|
9
|
+
def test_malicious_ip
|
10
|
+
ip = '149.210.164.47'
|
11
|
+
match = Legitbot::DataForSEO.new ip
|
12
|
+
|
13
|
+
refute_predicate match, :valid?
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_valid_ip
|
17
|
+
ip = '136.243.228.176'
|
18
|
+
match = Legitbot::DataForSEO.new ip
|
19
|
+
|
20
|
+
assert_predicate match, :valid?
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_malicious_ua
|
24
|
+
bot = Legitbot.bot(
|
25
|
+
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
|
26
|
+
'149.210.164.47'
|
27
|
+
)
|
28
|
+
|
29
|
+
assert bot
|
30
|
+
refute_predicate bot, :valid?
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_valid_ua
|
34
|
+
bot = Legitbot.bot(
|
35
|
+
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
|
36
|
+
'136.243.228.176'
|
37
|
+
)
|
38
|
+
|
39
|
+
assert bot
|
40
|
+
assert_predicate bot, :valid?
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_valid_name
|
44
|
+
bot = Legitbot.bot(
|
45
|
+
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
|
46
|
+
'136.243.228.176'
|
47
|
+
)
|
48
|
+
|
49
|
+
assert_equal :dataforseo, bot.detected_as
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_fake_name
|
53
|
+
bot = Legitbot.bot(
|
54
|
+
'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
|
55
|
+
'81.1.172.108'
|
56
|
+
)
|
57
|
+
|
58
|
+
assert_equal :dataforseo, bot.detected_as
|
59
|
+
end
|
60
|
+
end
|
data/test/facebook_test.rb
CHANGED
@@ -2,19 +2,6 @@
|
|
2
2
|
|
3
3
|
require_relative 'test_helper'
|
4
4
|
|
5
|
-
module Legitbot
|
6
|
-
class Facebook
|
7
|
-
# rubocop:disable Layout/LineLength
|
8
|
-
def self.whois
|
9
|
-
{
|
10
|
-
ipv4: ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24'],
|
11
|
-
ipv6: []
|
12
|
-
}
|
13
|
-
end
|
14
|
-
# rubocop:enable Layout/LineLength
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
5
|
class FacebookTest < Minitest::Test
|
19
6
|
def test_valid_ip
|
20
7
|
ip = '69.63.186.89'
|
data/test/lib/dns_server_mock.rb
CHANGED
@@ -29,6 +29,12 @@ TEST_DNS_RECORDS = {
|
|
29
29
|
'54.166.7.90' => {
|
30
30
|
ptr: %w[crawler-54-166-7-90.amazonadbot.com]
|
31
31
|
},
|
32
|
+
'52-70-240-171.crawl.amazonbot.amazon' => {
|
33
|
+
a: %w[52.70.240.171]
|
34
|
+
},
|
35
|
+
'52.70.240.171' => {
|
36
|
+
ptr: %w[52-70-240-171.crawl.amazonbot.amazon]
|
37
|
+
},
|
32
38
|
|
33
39
|
# Apple
|
34
40
|
'17-58-98-60.applebot.apple.com' => {
|
@@ -38,6 +44,21 @@ TEST_DNS_RECORDS = {
|
|
38
44
|
ptr: %w[17-58-98-60.applebot.apple.com]
|
39
45
|
},
|
40
46
|
|
47
|
+
# BLEXBot (WebMeUp)
|
48
|
+
'pot22.webmeup.com' => {
|
49
|
+
a: %w[65.21.113.197]
|
50
|
+
},
|
51
|
+
'65.21.113.197' => {
|
52
|
+
ptr: %w[pot22.webmeup.com]
|
53
|
+
},
|
54
|
+
# DataForSEO
|
55
|
+
'crawling-gateway-136-243-228-176.dataforseo.com' => {
|
56
|
+
a: %w[136.243.228.176]
|
57
|
+
},
|
58
|
+
'136.243.228.176' => {
|
59
|
+
ptr: %w[crawling-gateway-136-243-228-176.dataforseo.com]
|
60
|
+
},
|
61
|
+
|
41
62
|
# Google
|
42
63
|
'crawl-66-249-64-141.googlebot.com' => {
|
43
64
|
a: %w[66.249.64.141]
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot
|
4
|
+
module MetaIpRanges
|
5
|
+
alias fetch_ip_ranges_orig fetch_ip_ranges
|
6
|
+
|
7
|
+
# rubocop:disable Layout/LineLength
|
8
|
+
def fetch_ip_ranges
|
9
|
+
['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24']
|
10
|
+
end
|
11
|
+
# rubocop:enable Layout/LineLength
|
12
|
+
end
|
13
|
+
end
|
data/test/meta_test.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
|
5
|
+
class MetaIpRanges
|
6
|
+
include Legitbot::MetaIpRanges
|
7
|
+
end
|
8
|
+
|
9
|
+
class MetaTest < Minitest::Test
|
10
|
+
def test_fetch_ips
|
11
|
+
# NOTE: network call
|
12
|
+
ip_ranges = MetaIpRanges.new.fetch_ip_ranges_orig
|
13
|
+
|
14
|
+
refute_nil ip_ranges
|
15
|
+
assert_kind_of Array, ip_ranges
|
16
|
+
refute_empty ip_ranges
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_valid_ip
|
20
|
+
ip = '69.63.186.89'
|
21
|
+
match = Legitbot::Meta.new(ip)
|
22
|
+
|
23
|
+
assert_predicate match, :valid?
|
24
|
+
|
25
|
+
ip = '69.171.251.1'
|
26
|
+
match = Legitbot::Meta.new(ip)
|
27
|
+
|
28
|
+
assert_predicate match, :valid?
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_invalid_ip
|
32
|
+
ip = '127.0.0.1'
|
33
|
+
match = Legitbot::Meta.new(ip)
|
34
|
+
|
35
|
+
assert_predicate match, :fake?
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_user_agent1
|
39
|
+
Legitbot.bot(
|
40
|
+
'meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
|
41
|
+
'31.13.76.56'
|
42
|
+
) do |bot|
|
43
|
+
assert_equal :meta, bot.detected_as
|
44
|
+
assert_predicate bot, :valid?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_user_agent2
|
49
|
+
Legitbot.bot(
|
50
|
+
'meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
|
51
|
+
'173.252.87.8'
|
52
|
+
) do |bot|
|
53
|
+
assert_equal :meta, bot.detected_as
|
54
|
+
assert_predicate bot, :valid?
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_user_agent3
|
59
|
+
Legitbot.bot(
|
60
|
+
'meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
|
61
|
+
'173.252.87.8'
|
62
|
+
) do |bot|
|
63
|
+
assert_equal :meta, bot.detected_as
|
64
|
+
assert_predicate bot, :valid?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# rubocop:disable Layout/LineLength
|
69
|
+
def test_user_agent4
|
70
|
+
Legitbot.bot(
|
71
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 meta-externalagent/1.1 Twitterbot/1.0',
|
72
|
+
'92.243.181.7'
|
73
|
+
) do |bot|
|
74
|
+
assert_includes %i[meta twitter], bot.detected_as
|
75
|
+
assert_predicate bot, :fake?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
# rubocop:enable Layout/LineLength
|
79
|
+
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: legitbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexander Azarov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: fast_interval_tree
|
@@ -76,15 +76,18 @@ files:
|
|
76
76
|
- lib/legitbot/apple.rb
|
77
77
|
- lib/legitbot/baidu.rb
|
78
78
|
- lib/legitbot/bing.rb
|
79
|
+
- lib/legitbot/blexbot.rb
|
79
80
|
- lib/legitbot/botmatch.rb
|
80
81
|
- lib/legitbot/config/resolver.rb
|
82
|
+
- lib/legitbot/dataforseo.rb
|
81
83
|
- lib/legitbot/duckduckgo.rb
|
82
84
|
- lib/legitbot/facebook.rb
|
83
85
|
- lib/legitbot/google.rb
|
84
|
-
- lib/legitbot/gptbot.rb
|
85
86
|
- lib/legitbot/ias.rb
|
86
87
|
- lib/legitbot/legitbot.rb
|
87
88
|
- lib/legitbot/marginalia.rb
|
89
|
+
- lib/legitbot/meta.rb
|
90
|
+
- lib/legitbot/openai.rb
|
88
91
|
- lib/legitbot/oracle.rb
|
89
92
|
- lib/legitbot/petalbot.rb
|
90
93
|
- lib/legitbot/pinterest.rb
|
@@ -103,7 +106,9 @@ files:
|
|
103
106
|
- test/ahrefs_test.rb
|
104
107
|
- test/amazon_test.rb
|
105
108
|
- test/apple_test.rb
|
109
|
+
- test/blexbot_test.rb
|
106
110
|
- test/botmatch_test.rb
|
111
|
+
- test/dataforseo_test.rb
|
107
112
|
- test/facebook_test.rb
|
108
113
|
- test/google_test.rb
|
109
114
|
- test/ias_test.rb
|
@@ -111,6 +116,8 @@ files:
|
|
111
116
|
- test/legitbot/validators/ip_ranges_test.rb
|
112
117
|
- test/legitbot_test.rb
|
113
118
|
- test/lib/dns_server_mock.rb
|
119
|
+
- test/lib/meta_ip_ranges_mock.rb
|
120
|
+
- test/meta_test.rb
|
114
121
|
- test/oracle_test.rb
|
115
122
|
- test/petalbot_test.rb
|
116
123
|
- test/pinterest_test.rb
|
data/lib/legitbot/gptbot.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Legitbot # :nodoc:
|
4
|
-
# https://platform.openai.com/docs/gptbot
|
5
|
-
class GPTBot < BotMatch
|
6
|
-
# NOTE: fetching has been disabled, see #131
|
7
|
-
# @ fetch:url https://openai.com/gptbot-ranges.txt
|
8
|
-
ip_ranges %w[
|
9
|
-
52.230.152.0/24
|
10
|
-
52.233.106.0/24
|
11
|
-
]
|
12
|
-
end
|
13
|
-
|
14
|
-
rule Legitbot::GPTBot, %w[GPTBot]
|
15
|
-
end
|