legitbot 1.10.5 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2ac43bac7745d6e0ef906342c82102e87c8644a6cd8a8af3077831f13ebbd79e
4
- data.tar.gz: 0eeed734eadee6c380d41a604fa21e83aba8d8431cb8444b27fa2b0ec0cabe2a
3
+ metadata.gz: ccf22fe1fab3a7cab2955709eb3b0ada75a66305b255a36ec795eb092d9741c8
4
+ data.tar.gz: 6b03643eb517f59626c7a1e59b04f6808ce067b87597bf5f7e8486f4c1eb309c
5
5
  SHA512:
6
- metadata.gz: 36b25cf91d14fd4211e31c9dd92f8ec7439155a2e7862329d848615ceaff52f5fd1800b56fb25f0956b01572ed019f4689a81de9ba1b828c696710f451517344
7
- data.tar.gz: 49fbc215737f0d863c1a5ffdf28f5a41f438d31923987cfdbe58a60dd6056f20bc37985ebf1dca2e218dc30ab0a216ba005a670e2027f780280899e689ea7c0c
6
+ metadata.gz: 44f09102368337c185aa95c32a76dd551a01bf9fda2e098757383101bb3c57cced58f85fcad00f7c86dda1550df142b0a9549ea2d7c9c54c41cb2fffe7bfbabe
7
+ data.tar.gz: 6149c2e4eca68be2224ee529184a4df492fd224602aed1c08aa0a80af5b77ac902e23258331099c769ea4cb80f07c4b99df00526626d328fa13852afedb57eb0
data/README.md CHANGED
@@ -50,17 +50,20 @@ end
50
50
  ## Supported
51
51
 
52
52
  - [Ahrefs](https://ahrefs.com/robot)
53
+ - [Amazonbot](https://developer.amazon.com/amazonbot)
53
54
  - [Amazon AdBot](https://adbot.amazon.com/index.html)
54
- - [Applebot](https://support.apple.com/en-us/HT204683)
55
+ - [Applebot](https://support.apple.com/en-us/119829)
55
56
  - [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
56
57
  - [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
58
+ - [BLEXBot (WebMeUp)](http://webmeup-crawler.com/)
59
+ - [DataForSEO](https://dataforseo.com/dataforseo-bot)
57
60
  - [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
58
- - [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
59
61
  - [Google crawlers](https://support.google.com/webmasters/answer/1061943)
60
62
  - [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
61
63
  - [OpenAI GPTBot](https://platform.openai.com/docs/gptbot)
62
64
  - [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
63
65
  - [Marginalia](https://www.marginalia.nu/marginalia-search/for-webmasters/)
66
+ - [Meta / Facebook Web crawlers](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/)
64
67
  - [Petal search engine](http://aspiegel.com/petalbot)
65
68
  - [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
66
69
  - [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started),
@@ -2,9 +2,10 @@
2
2
 
3
3
  module Legitbot # :nodoc:
4
4
  # https://adbot.amazon.com/index.html
5
+ # https://developer.amazon.com/amazonbot
5
6
  class Amazon < BotMatch
6
- domains 'amazonadbot.com.'
7
+ domains 'amazon.', 'amazonadbot.com.'
7
8
  end
8
9
 
9
- rule Legitbot::Amazon, %w[AmazonAdBot]
10
+ rule Legitbot::Amazon, %w[Amazonbot AmazonAdBot]
10
11
  end
@@ -3,10 +3,13 @@
3
3
  require 'ipaddr'
4
4
 
5
5
  module Legitbot # :nodoc:
6
- # https://support.apple.com/en-us/HT204683
6
+ # https://support.apple.com/en-us/119829
7
7
  class Apple < BotMatch
8
- ip_ranges '17.0.0.0/8'
8
+ domains 'applebot.apple.com.'
9
9
  end
10
10
 
11
- rule Legitbot::Apple, %w[Applebot]
11
+ rule Legitbot::Apple, %w[
12
+ Applebot
13
+ iTMS
14
+ ]
12
15
  end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # http://webmeup-crawler.com/
5
+ class BLEXBot < BotMatch
6
+ domains 'webmeup.com.'
7
+ end
8
+
9
+ rule Legitbot::BLEXBot, %w[BLEXBot]
10
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://dataforseo.com/dataforseo-bot
5
+ class DataForSEO < BotMatch
6
+ domains 'dataforseo.com.'
7
+ end
8
+
9
+ rule Legitbot::DataForSEO, %w[DataForSeoBot]
10
+ end
@@ -1,20 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'irrc'
3
+ require_relative 'meta'
4
4
 
5
5
  module Legitbot # :nodoc:
6
6
  # https://developers.facebook.com/docs/sharing/webmasters/crawler
7
7
  class Facebook < BotMatch
8
- AS = 'AS32934'
8
+ extend MetaIpRanges
9
9
 
10
10
  ip_ranges do
11
- client = Irrc::Client.new
12
- client.query :radb, AS, source: :radb
13
- results = client.perform
14
-
15
- %i[ipv4 ipv6].map do |family|
16
- results[AS][family][AS]
17
- end.flatten
11
+ fetch_ip_ranges
18
12
  end
19
13
  end
20
14
 
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'irrc'
4
+
5
+ module Legitbot # :nodoc:
6
+ module MetaIpRanges # :nodoc:
7
+ AS = 'AS32934'
8
+
9
+ def fetch_ip_ranges
10
+ client = Irrc::Client.new
11
+ client.query :radb, AS, source: :radb
12
+ results = client.perform
13
+
14
+ %i[ipv4 ipv6].map do |family|
15
+ results[AS][family][AS]
16
+ end.flatten
17
+ end
18
+ end
19
+
20
+ # https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/
21
+ class Meta < BotMatch
22
+ extend MetaIpRanges
23
+
24
+ ip_ranges do
25
+ fetch_ip_ranges
26
+ end
27
+ end
28
+
29
+ rule Legitbot::Meta, %w[
30
+ meta-externalagent
31
+ meta-externalfetcher
32
+ ]
33
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://platform.openai.com/docs/gptbot
5
+ class GPTBot < BotMatch
6
+ # NOTE: fetching is disabled, see #131
7
+ # @ fetch:url https://openai.com/gptbot.json
8
+ ip_ranges %w[
9
+ 20.171.206.0/24
10
+ 52.230.152.0/24
11
+ 52.233.106.0/24
12
+ ]
13
+ end
14
+
15
+ # https://platform.openai.com/docs/bots
16
+ class OpenAIChat < BotMatch
17
+ # NOTE: fetching is disabled, see #131
18
+ # @ fetch:url https://openai.com/chatgpt-user.json
19
+ ip_ranges %w[
20
+ 23.98.142.176/28
21
+ 40.84.180.224/28
22
+ 13.65.240.240/28
23
+ 20.97.189.96/28
24
+ 20.161.75.208/28
25
+ 52.225.75.208/28
26
+ 52.156.77.144/28
27
+ 40.84.221.208/28
28
+ 40.84.221.224/28
29
+ 40.84.180.64/28
30
+ ]
31
+ end
32
+
33
+ # https://platform.openai.com/docs/bots
34
+ class OpenAISearch < BotMatch
35
+ # NOTE: fetching is disabled, see #131
36
+ # @ fetch:url https://openai.com/searchbot.json
37
+ ip_ranges %w[
38
+ 20.42.10.176/28
39
+ 172.203.190.128/28
40
+ ]
41
+ end
42
+
43
+ rule Legitbot::GPTBot, %w[GPTBot]
44
+ rule Legitbot::OpenAIChat, %w[ChatGPT-User]
45
+ rule Legitbot::OpenAISearch, %w[OAI-SearchBot]
46
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.10.5'
4
+ VERSION = '1.11.0'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -8,13 +8,16 @@ require_relative 'legitbot/amazon'
8
8
  require_relative 'legitbot/apple'
9
9
  require_relative 'legitbot/baidu'
10
10
  require_relative 'legitbot/bing'
11
+ require_relative 'legitbot/blexbot'
12
+ require_relative 'legitbot/dataforseo'
11
13
  require_relative 'legitbot/duckduckgo'
12
14
  require_relative 'legitbot/facebook'
13
15
  require_relative 'legitbot/google'
14
- require_relative 'legitbot/gptbot'
15
16
  require_relative 'legitbot/ias'
17
+ require_relative 'legitbot/openai'
16
18
  require_relative 'legitbot/oracle'
17
19
  require_relative 'legitbot/marginalia'
20
+ require_relative 'legitbot/meta'
18
21
  require_relative 'legitbot/petalbot'
19
22
  require_relative 'legitbot/pinterest'
20
23
  require_relative 'legitbot/twitter'
data/test/amazon_test.rb CHANGED
@@ -30,7 +30,7 @@ class AmazonTest < Minitest::Test
30
30
  refute_predicate bot, :valid?
31
31
  end
32
32
 
33
- def test_valid_ua
33
+ def test_user_agent1
34
34
  bot = Legitbot.bot(
35
35
  'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
36
36
  '54.166.7.90'
@@ -40,7 +40,19 @@ class AmazonTest < Minitest::Test
40
40
  assert_predicate bot, :valid?
41
41
  end
42
42
 
43
- def test_valid_name
43
+ # rubocop:disable Layout/LineLength
44
+ def test_user_agent2
45
+ bot = Legitbot.bot(
46
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
47
+ '52.70.240.171'
48
+ )
49
+
50
+ assert bot
51
+ assert_predicate bot, :valid?
52
+ end
53
+ # rubocop:enable Layout/LineLength
54
+
55
+ def test_valid_name1
44
56
  bot = Legitbot.bot(
45
57
  'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
46
58
  '54.166.7.90'
@@ -49,6 +61,17 @@ class AmazonTest < Minitest::Test
49
61
  assert_equal :amazon, bot.detected_as
50
62
  end
51
63
 
64
+ # rubocop:disable Layout/LineLength
65
+ def test_valid_name2
66
+ bot = Legitbot.bot(
67
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)',
68
+ '52.70.240.171'
69
+ )
70
+
71
+ assert_equal :amazon, bot.detected_as
72
+ end
73
+ # rubocop:enable Layout/LineLength
74
+
52
75
  def test_fake_name
53
76
  bot = Legitbot.bot(
54
77
  'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)',
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class BLEXBot < Minitest::Test
6
+ include Minitest::Hooks
7
+ include DnsServerMock
8
+
9
+ def test_malicious_ip
10
+ ip = '149.210.164.47'
11
+ match = Legitbot::BLEXBot.new ip
12
+
13
+ refute_predicate match, :valid?
14
+ end
15
+
16
+ def test_valid_ip
17
+ ip = '65.21.113.197'
18
+ match = Legitbot::BLEXBot.new ip
19
+
20
+ assert_predicate match, :valid?
21
+ end
22
+
23
+ def test_malicious_ua
24
+ bot = Legitbot.bot(
25
+ 'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
26
+ '149.210.164.47'
27
+ )
28
+
29
+ assert bot
30
+ refute_predicate bot, :valid?
31
+ end
32
+
33
+ def test_valid_ua
34
+ bot = Legitbot.bot(
35
+ 'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
36
+ '65.21.113.197'
37
+ )
38
+
39
+ assert bot
40
+ assert_predicate bot, :valid?
41
+ end
42
+
43
+ def test_valid_name
44
+ bot = Legitbot.bot(
45
+ 'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
46
+ '65.21.113.197'
47
+ )
48
+
49
+ assert_equal :blexbot, bot.detected_as
50
+ end
51
+
52
+ def test_fake_name
53
+ bot = Legitbot.bot(
54
+ 'Mozilla/5.0 (compatible; BLEXBot/1.0; +http://webmeup-crawler.com/)',
55
+ '81.1.172.108'
56
+ )
57
+
58
+ assert_equal :blexbot, bot.detected_as
59
+ end
60
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class DataForSEOTest < Minitest::Test
6
+ include Minitest::Hooks
7
+ include DnsServerMock
8
+
9
+ def test_malicious_ip
10
+ ip = '149.210.164.47'
11
+ match = Legitbot::DataForSEO.new ip
12
+
13
+ refute_predicate match, :valid?
14
+ end
15
+
16
+ def test_valid_ip
17
+ ip = '136.243.228.176'
18
+ match = Legitbot::DataForSEO.new ip
19
+
20
+ assert_predicate match, :valid?
21
+ end
22
+
23
+ def test_malicious_ua
24
+ bot = Legitbot.bot(
25
+ 'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
26
+ '149.210.164.47'
27
+ )
28
+
29
+ assert bot
30
+ refute_predicate bot, :valid?
31
+ end
32
+
33
+ def test_valid_ua
34
+ bot = Legitbot.bot(
35
+ 'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
36
+ '136.243.228.176'
37
+ )
38
+
39
+ assert bot
40
+ assert_predicate bot, :valid?
41
+ end
42
+
43
+ def test_valid_name
44
+ bot = Legitbot.bot(
45
+ 'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
46
+ '136.243.228.176'
47
+ )
48
+
49
+ assert_equal :dataforseo, bot.detected_as
50
+ end
51
+
52
+ def test_fake_name
53
+ bot = Legitbot.bot(
54
+ 'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)',
55
+ '81.1.172.108'
56
+ )
57
+
58
+ assert_equal :dataforseo, bot.detected_as
59
+ end
60
+ end
@@ -2,19 +2,6 @@
2
2
 
3
3
  require_relative 'test_helper'
4
4
 
5
- module Legitbot
6
- class Facebook
7
- # rubocop:disable Layout/LineLength
8
- def self.whois
9
- {
10
- ipv4: ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24'],
11
- ipv6: []
12
- }
13
- end
14
- # rubocop:enable Layout/LineLength
15
- end
16
- end
17
-
18
5
  class FacebookTest < Minitest::Test
19
6
  def test_valid_ip
20
7
  ip = '69.63.186.89'
@@ -29,6 +29,12 @@ TEST_DNS_RECORDS = {
29
29
  '54.166.7.90' => {
30
30
  ptr: %w[crawler-54-166-7-90.amazonadbot.com]
31
31
  },
32
+ '52-70-240-171.crawl.amazonbot.amazon' => {
33
+ a: %w[52.70.240.171]
34
+ },
35
+ '52.70.240.171' => {
36
+ ptr: %w[52-70-240-171.crawl.amazonbot.amazon]
37
+ },
32
38
 
33
39
  # Apple
34
40
  '17-58-98-60.applebot.apple.com' => {
@@ -38,6 +44,21 @@ TEST_DNS_RECORDS = {
38
44
  ptr: %w[17-58-98-60.applebot.apple.com]
39
45
  },
40
46
 
47
+ # BLEXBot (WebMeUp)
48
+ 'pot22.webmeup.com' => {
49
+ a: %w[65.21.113.197]
50
+ },
51
+ '65.21.113.197' => {
52
+ ptr: %w[pot22.webmeup.com]
53
+ },
54
+ # DataForSEO
55
+ 'crawling-gateway-136-243-228-176.dataforseo.com' => {
56
+ a: %w[136.243.228.176]
57
+ },
58
+ '136.243.228.176' => {
59
+ ptr: %w[crawling-gateway-136-243-228-176.dataforseo.com]
60
+ },
61
+
41
62
  # Google
42
63
  'crawl-66-249-64-141.googlebot.com' => {
43
64
  a: %w[66.249.64.141]
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot
4
+ module MetaIpRanges
5
+ alias fetch_ip_ranges_orig fetch_ip_ranges
6
+
7
+ # rubocop:disable Layout/LineLength
8
+ def fetch_ip_ranges
9
+ ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24']
10
+ end
11
+ # rubocop:enable Layout/LineLength
12
+ end
13
+ end
data/test/meta_test.rb ADDED
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class MetaIpRanges
6
+ include Legitbot::MetaIpRanges
7
+ end
8
+
9
+ class MetaTest < Minitest::Test
10
+ def test_fetch_ips
11
+ # NOTE: network call
12
+ ip_ranges = MetaIpRanges.new.fetch_ip_ranges_orig
13
+
14
+ refute_nil ip_ranges
15
+ assert_kind_of Array, ip_ranges
16
+ refute_empty ip_ranges
17
+ end
18
+
19
+ def test_valid_ip
20
+ ip = '69.63.186.89'
21
+ match = Legitbot::Meta.new(ip)
22
+
23
+ assert_predicate match, :valid?
24
+
25
+ ip = '69.171.251.1'
26
+ match = Legitbot::Meta.new(ip)
27
+
28
+ assert_predicate match, :valid?
29
+ end
30
+
31
+ def test_invalid_ip
32
+ ip = '127.0.0.1'
33
+ match = Legitbot::Meta.new(ip)
34
+
35
+ assert_predicate match, :fake?
36
+ end
37
+
38
+ def test_user_agent1
39
+ Legitbot.bot(
40
+ 'meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
41
+ '31.13.76.56'
42
+ ) do |bot|
43
+ assert_equal :meta, bot.detected_as
44
+ assert_predicate bot, :valid?
45
+ end
46
+ end
47
+
48
+ def test_user_agent2
49
+ Legitbot.bot(
50
+ 'meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
51
+ '173.252.87.8'
52
+ ) do |bot|
53
+ assert_equal :meta, bot.detected_as
54
+ assert_predicate bot, :valid?
55
+ end
56
+ end
57
+
58
+ def test_user_agent3
59
+ Legitbot.bot(
60
+ 'meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
61
+ '173.252.87.8'
62
+ ) do |bot|
63
+ assert_equal :meta, bot.detected_as
64
+ assert_predicate bot, :valid?
65
+ end
66
+ end
67
+
68
+ # rubocop:disable Layout/LineLength
69
+ def test_user_agent4
70
+ Legitbot.bot(
71
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 meta-externalagent/1.1 Twitterbot/1.0',
72
+ '92.243.181.7'
73
+ ) do |bot|
74
+ assert_includes %i[meta twitter], bot.detected_as
75
+ assert_predicate bot, :fake?
76
+ end
77
+ end
78
+ # rubocop:enable Layout/LineLength
79
+ end
data/test/test_helper.rb CHANGED
@@ -13,3 +13,4 @@ require 'legitbot'
13
13
  require 'minitest/autorun'
14
14
  require 'minitest/hooks/test'
15
15
  require 'lib/dns_server_mock'
16
+ require 'lib/meta_ip_ranges_mock'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.5
4
+ version: 1.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-17 00:00:00.000000000 Z
11
+ date: 2024-09-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast_interval_tree
@@ -76,15 +76,18 @@ files:
76
76
  - lib/legitbot/apple.rb
77
77
  - lib/legitbot/baidu.rb
78
78
  - lib/legitbot/bing.rb
79
+ - lib/legitbot/blexbot.rb
79
80
  - lib/legitbot/botmatch.rb
80
81
  - lib/legitbot/config/resolver.rb
82
+ - lib/legitbot/dataforseo.rb
81
83
  - lib/legitbot/duckduckgo.rb
82
84
  - lib/legitbot/facebook.rb
83
85
  - lib/legitbot/google.rb
84
- - lib/legitbot/gptbot.rb
85
86
  - lib/legitbot/ias.rb
86
87
  - lib/legitbot/legitbot.rb
87
88
  - lib/legitbot/marginalia.rb
89
+ - lib/legitbot/meta.rb
90
+ - lib/legitbot/openai.rb
88
91
  - lib/legitbot/oracle.rb
89
92
  - lib/legitbot/petalbot.rb
90
93
  - lib/legitbot/pinterest.rb
@@ -103,7 +106,9 @@ files:
103
106
  - test/ahrefs_test.rb
104
107
  - test/amazon_test.rb
105
108
  - test/apple_test.rb
109
+ - test/blexbot_test.rb
106
110
  - test/botmatch_test.rb
111
+ - test/dataforseo_test.rb
107
112
  - test/facebook_test.rb
108
113
  - test/google_test.rb
109
114
  - test/ias_test.rb
@@ -111,6 +116,8 @@ files:
111
116
  - test/legitbot/validators/ip_ranges_test.rb
112
117
  - test/legitbot_test.rb
113
118
  - test/lib/dns_server_mock.rb
119
+ - test/lib/meta_ip_ranges_mock.rb
120
+ - test/meta_test.rb
114
121
  - test/oracle_test.rb
115
122
  - test/petalbot_test.rb
116
123
  - test/pinterest_test.rb
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Legitbot # :nodoc:
4
- # https://platform.openai.com/docs/gptbot
5
- class GPTBot < BotMatch
6
- # NOTE: fetching has been disabled, see #131
7
- # @ fetch:url https://openai.com/gptbot-ranges.txt
8
- ip_ranges %w[
9
- 52.230.152.0/24
10
- 52.233.106.0/24
11
- ]
12
- end
13
-
14
- rule Legitbot::GPTBot, %w[GPTBot]
15
- end