legitbot 1.10.5 → 1.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2ac43bac7745d6e0ef906342c82102e87c8644a6cd8a8af3077831f13ebbd79e
4
- data.tar.gz: 0eeed734eadee6c380d41a604fa21e83aba8d8431cb8444b27fa2b0ec0cabe2a
3
+ metadata.gz: 6dbba3fc1b7fd156d5d560786ac18939387e2e059ce7f3385a105adf1c10ee0d
4
+ data.tar.gz: b030a92a6210016021debd9df09db177366a90eb7f841037e8f3b093319e161c
5
5
  SHA512:
6
- metadata.gz: 36b25cf91d14fd4211e31c9dd92f8ec7439155a2e7862329d848615ceaff52f5fd1800b56fb25f0956b01572ed019f4689a81de9ba1b828c696710f451517344
7
- data.tar.gz: 49fbc215737f0d863c1a5ffdf28f5a41f438d31923987cfdbe58a60dd6056f20bc37985ebf1dca2e218dc30ab0a216ba005a670e2027f780280899e689ea7c0c
6
+ metadata.gz: 9451f510ddc37f1cb57be2143a496189c3422fb7754b92ddcd5a2acdf8896873093933ebc4b427948f2c452abeb610027f1dc557dcf0f8687e662cd75f1d4be2
7
+ data.tar.gz: 45e258d414f71c5df1e9642a8549defdce012ab96bbf6d133a34a3003620dbb03d14ca74cc75e5d96dd50759b634535e343446823fbffb0ffc77ce28b458b442
data/README.md CHANGED
@@ -51,16 +51,16 @@ end
51
51
 
52
52
  - [Ahrefs](https://ahrefs.com/robot)
53
53
  - [Amazon AdBot](https://adbot.amazon.com/index.html)
54
- - [Applebot](https://support.apple.com/en-us/HT204683)
54
+ - [Applebot](https://support.apple.com/en-us/119829)
55
55
  - [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
56
56
  - [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
57
57
  - [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
58
- - [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
59
58
  - [Google crawlers](https://support.google.com/webmasters/answer/1061943)
60
59
  - [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
61
60
  - [OpenAI GPTBot](https://platform.openai.com/docs/gptbot)
62
61
  - [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
63
62
  - [Marginalia](https://www.marginalia.nu/marginalia-search/for-webmasters/)
63
+ - [Meta / Facebook Web crawlers](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/)
64
64
  - [Petal search engine](http://aspiegel.com/petalbot)
65
65
  - [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
66
66
  - [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started),
@@ -3,10 +3,13 @@
3
3
  require 'ipaddr'
4
4
 
5
5
  module Legitbot # :nodoc:
6
- # https://support.apple.com/en-us/HT204683
6
+ # https://support.apple.com/en-us/119829
7
7
  class Apple < BotMatch
8
- ip_ranges '17.0.0.0/8'
8
+ domains 'applebot.apple.com.'
9
9
  end
10
10
 
11
- rule Legitbot::Apple, %w[Applebot]
11
+ rule Legitbot::Apple, %w[
12
+ Applebot
13
+ iTMS
14
+ ]
12
15
  end
@@ -1,20 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'irrc'
3
+ require_relative 'meta'
4
4
 
5
5
  module Legitbot # :nodoc:
6
6
  # https://developers.facebook.com/docs/sharing/webmasters/crawler
7
7
  class Facebook < BotMatch
8
- AS = 'AS32934'
8
+ extend MetaIpRanges
9
9
 
10
10
  ip_ranges do
11
- client = Irrc::Client.new
12
- client.query :radb, AS, source: :radb
13
- results = client.perform
14
-
15
- %i[ipv4 ipv6].map do |family|
16
- results[AS][family][AS]
17
- end.flatten
11
+ fetch_ip_ranges
18
12
  end
19
13
  end
20
14
 
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'irrc'
4
+
5
+ module Legitbot # :nodoc:
6
+ module MetaIpRanges # :nodoc:
7
+ AS = 'AS32934'
8
+
9
+ def fetch_ip_ranges
10
+ client = Irrc::Client.new
11
+ client.query :radb, AS, source: :radb
12
+ results = client.perform
13
+
14
+ %i[ipv4 ipv6].map do |family|
15
+ results[AS][family][AS]
16
+ end.flatten
17
+ end
18
+ end
19
+
20
+ # https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/
21
+ class Meta < BotMatch
22
+ extend MetaIpRanges
23
+
24
+ ip_ranges do
25
+ fetch_ip_ranges
26
+ end
27
+ end
28
+
29
+ rule Legitbot::Meta, %w[
30
+ meta-externalagent
31
+ meta-externalfetcher
32
+ ]
33
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://platform.openai.com/docs/gptbot
5
+ class GPTBot < BotMatch
6
+ # NOTE: fetching is disabled, see #131
7
+ # @ fetch:url https://openai.com/gptbot.json
8
+ ip_ranges %w[
9
+ 20.171.206.0/24
10
+ 52.230.152.0/24
11
+ 52.233.106.0/24
12
+ ]
13
+ end
14
+
15
+ # https://platform.openai.com/docs/bots
16
+ class OpenAIChat < BotMatch
17
+ # NOTE: fetching is disabled, see #131
18
+ # @ fetch:url https://openai.com/chatgpt-user.json
19
+ ip_ranges %w[
20
+ 23.98.142.176/28
21
+ 40.84.180.224/28
22
+ 13.65.240.240/28
23
+ 20.97.189.96/28
24
+ 20.161.75.208/28
25
+ 52.225.75.208/28
26
+ 52.156.77.144/28
27
+ 40.84.221.208/28
28
+ 40.84.221.224/28
29
+ 40.84.180.64/28
30
+ ]
31
+ end
32
+
33
+ # https://platform.openai.com/docs/bots
34
+ class OpenAISearch < BotMatch
35
+ # NOTE: fetching is disabled, see #131
36
+ # @ fetch:url https://openai.com/searchbot.json
37
+ ip_ranges %w[
38
+ 20.42.10.176/28
39
+ 172.203.190.128/28
40
+ ]
41
+ end
42
+
43
+ rule Legitbot::GPTBot, %w[GPTBot]
44
+ rule Legitbot::OpenAIChat, %w[ChatGPT-User]
45
+ rule Legitbot::OpenAISearch, %w[OAI-SearchBot]
46
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.10.5'
4
+ VERSION = '1.10.6'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -11,10 +11,11 @@ require_relative 'legitbot/bing'
11
11
  require_relative 'legitbot/duckduckgo'
12
12
  require_relative 'legitbot/facebook'
13
13
  require_relative 'legitbot/google'
14
- require_relative 'legitbot/gptbot'
15
14
  require_relative 'legitbot/ias'
15
+ require_relative 'legitbot/openai'
16
16
  require_relative 'legitbot/oracle'
17
17
  require_relative 'legitbot/marginalia'
18
+ require_relative 'legitbot/meta'
18
19
  require_relative 'legitbot/petalbot'
19
20
  require_relative 'legitbot/pinterest'
20
21
  require_relative 'legitbot/twitter'
@@ -2,19 +2,6 @@
2
2
 
3
3
  require_relative 'test_helper'
4
4
 
5
- module Legitbot
6
- class Facebook
7
- # rubocop:disable Layout/LineLength
8
- def self.whois
9
- {
10
- ipv4: ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24'],
11
- ipv6: []
12
- }
13
- end
14
- # rubocop:enable Layout/LineLength
15
- end
16
- end
17
-
18
5
  class FacebookTest < Minitest::Test
19
6
  def test_valid_ip
20
7
  ip = '69.63.186.89'
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot
4
+ module MetaIpRanges
5
+ alias fetch_ip_ranges_orig fetch_ip_ranges
6
+
7
+ # rubocop:disable Layout/LineLength
8
+ def fetch_ip_ranges
9
+ ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24']
10
+ end
11
+ # rubocop:enable Layout/LineLength
12
+ end
13
+ end
data/test/meta_test.rb ADDED
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class MetaIpRanges
6
+ include Legitbot::MetaIpRanges
7
+ end
8
+
9
+ class MetaTest < Minitest::Test
10
+ def test_fetch_ips
11
+ # NOTE: network call
12
+ ip_ranges = MetaIpRanges.new.fetch_ip_ranges_orig
13
+
14
+ refute_nil ip_ranges
15
+ assert_kind_of Array, ip_ranges
16
+ refute_empty ip_ranges
17
+ end
18
+
19
+ def test_valid_ip
20
+ ip = '69.63.186.89'
21
+ match = Legitbot::Meta.new(ip)
22
+
23
+ assert_predicate match, :valid?
24
+
25
+ ip = '69.171.251.1'
26
+ match = Legitbot::Meta.new(ip)
27
+
28
+ assert_predicate match, :valid?
29
+ end
30
+
31
+ def test_invalid_ip
32
+ ip = '127.0.0.1'
33
+ match = Legitbot::Meta.new(ip)
34
+
35
+ assert_predicate match, :fake?
36
+ end
37
+
38
+ def test_user_agent1
39
+ Legitbot.bot(
40
+ 'meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
41
+ '31.13.76.56'
42
+ ) do |bot|
43
+ assert_equal :meta, bot.detected_as
44
+ assert_predicate bot, :valid?
45
+ end
46
+ end
47
+
48
+ def test_user_agent2
49
+ Legitbot.bot(
50
+ 'meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
51
+ '173.252.87.8'
52
+ ) do |bot|
53
+ assert_equal :meta, bot.detected_as
54
+ assert_predicate bot, :valid?
55
+ end
56
+ end
57
+
58
+ def test_user_agent3
59
+ Legitbot.bot(
60
+ 'meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
61
+ '173.252.87.8'
62
+ ) do |bot|
63
+ assert_equal :meta, bot.detected_as
64
+ assert_predicate bot, :valid?
65
+ end
66
+ end
67
+
68
+ # rubocop:disable Layout/LineLength
69
+ def test_user_agent4
70
+ Legitbot.bot(
71
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 meta-externalagent/1.1 Twitterbot/1.0',
72
+ '92.243.181.7'
73
+ ) do |bot|
74
+ assert_includes %i[meta twitter], bot.detected_as
75
+ assert_predicate bot, :fake?
76
+ end
77
+ end
78
+ # rubocop:enable Layout/LineLength
79
+ end
data/test/test_helper.rb CHANGED
@@ -13,3 +13,4 @@ require 'legitbot'
13
13
  require 'minitest/autorun'
14
14
  require 'minitest/hooks/test'
15
15
  require 'lib/dns_server_mock'
16
+ require 'lib/meta_ip_ranges_mock'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.5
4
+ version: 1.10.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-17 00:00:00.000000000 Z
11
+ date: 2024-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast_interval_tree
@@ -81,10 +81,11 @@ files:
81
81
  - lib/legitbot/duckduckgo.rb
82
82
  - lib/legitbot/facebook.rb
83
83
  - lib/legitbot/google.rb
84
- - lib/legitbot/gptbot.rb
85
84
  - lib/legitbot/ias.rb
86
85
  - lib/legitbot/legitbot.rb
87
86
  - lib/legitbot/marginalia.rb
87
+ - lib/legitbot/meta.rb
88
+ - lib/legitbot/openai.rb
88
89
  - lib/legitbot/oracle.rb
89
90
  - lib/legitbot/petalbot.rb
90
91
  - lib/legitbot/pinterest.rb
@@ -111,6 +112,8 @@ files:
111
112
  - test/legitbot/validators/ip_ranges_test.rb
112
113
  - test/legitbot_test.rb
113
114
  - test/lib/dns_server_mock.rb
115
+ - test/lib/meta_ip_ranges_mock.rb
116
+ - test/meta_test.rb
114
117
  - test/oracle_test.rb
115
118
  - test/petalbot_test.rb
116
119
  - test/pinterest_test.rb
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Legitbot # :nodoc:
4
- # https://platform.openai.com/docs/gptbot
5
- class GPTBot < BotMatch
6
- # NOTE: fetching has been disabled, see #131
7
- # @ fetch:url https://openai.com/gptbot-ranges.txt
8
- ip_ranges %w[
9
- 52.230.152.0/24
10
- 52.233.106.0/24
11
- ]
12
- end
13
-
14
- rule Legitbot::GPTBot, %w[GPTBot]
15
- end