legitbot 1.10.4 → 1.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6f6866d656dda624b8222126873aeb7276886344a3bedcb11b6aeb18cd10e64
4
- data.tar.gz: fcc350098a93804f5dfddcf38d945ad1a75c8dced01405cf4a59a70d570f4196
3
+ metadata.gz: 6dbba3fc1b7fd156d5d560786ac18939387e2e059ce7f3385a105adf1c10ee0d
4
+ data.tar.gz: b030a92a6210016021debd9df09db177366a90eb7f841037e8f3b093319e161c
5
5
  SHA512:
6
- metadata.gz: 998bca74d492853877cd0775aff8bdea57d94521dd91f829289689e35717a190c94250a490a9033c1edfade19bf5caaeeb15771575c46a9096e811b08e6473a4
7
- data.tar.gz: e491ef1b1e8aa07783989ef2992d7d484c62685ba3ea5147c4b2256880939a644d489a0defa3c59a7b0a5b8cb7ff0b82d4cc5a786ed294d647c19ff40c35b2e0
6
+ metadata.gz: 9451f510ddc37f1cb57be2143a496189c3422fb7754b92ddcd5a2acdf8896873093933ebc4b427948f2c452abeb610027f1dc557dcf0f8687e662cd75f1d4be2
7
+ data.tar.gz: 45e258d414f71c5df1e9642a8549defdce012ab96bbf6d133a34a3003620dbb03d14ca74cc75e5d96dd50759b634535e343446823fbffb0ffc77ce28b458b442
data/README.md CHANGED
@@ -51,16 +51,16 @@ end
51
51
 
52
52
  - [Ahrefs](https://ahrefs.com/robot)
53
53
  - [Amazon AdBot](https://adbot.amazon.com/index.html)
54
- - [Applebot](https://support.apple.com/en-us/HT204683)
54
+ - [Applebot](https://support.apple.com/en-us/119829)
55
55
  - [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973)
56
56
  - [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/)
57
57
  - [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
58
- - [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
59
58
  - [Google crawlers](https://support.google.com/webmasters/answer/1061943)
60
59
  - [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/)
61
60
  - [OpenAI GPTBot](https://platform.openai.com/docs/gptbot)
62
61
  - [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
63
62
  - [Marginalia](https://www.marginalia.nu/marginalia-search/for-webmasters/)
63
+ - [Meta / Facebook Web crawlers](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/)
64
64
  - [Petal search engine](http://aspiegel.com/petalbot)
65
65
  - [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
66
66
  - [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started),
@@ -3,10 +3,13 @@
3
3
  require 'ipaddr'
4
4
 
5
5
  module Legitbot # :nodoc:
6
- # https://support.apple.com/en-us/HT204683
6
+ # https://support.apple.com/en-us/119829
7
7
  class Apple < BotMatch
8
- ip_ranges '17.0.0.0/8'
8
+ domains 'applebot.apple.com.'
9
9
  end
10
10
 
11
- rule Legitbot::Apple, %w[Applebot]
11
+ rule Legitbot::Apple, %w[
12
+ Applebot
13
+ iTMS
14
+ ]
12
15
  end
@@ -7,7 +7,13 @@ module Legitbot # :nodoc:
7
7
  # @fetch:url https://duckduckgo.com/duckduckgo-help-pages/results/duckduckbot/
8
8
  # @fetch:selector section.main article.content ul > li
9
9
  ip_ranges %w[
10
+ 4.182.131.108
11
+ 4.195.133.120
12
+ 4.209.224.56
13
+ 4.213.46.14
14
+ 4.228.76.163
10
15
  13.89.106.77
16
+ 20.3.1.178
11
17
  20.12.141.99
12
18
  20.40.133.240
13
19
  20.43.150.85
@@ -70,6 +76,7 @@ module Legitbot # :nodoc:
70
76
  20.193.25.197
71
77
  20.193.27.215
72
78
  20.193.45.113
79
+ 20.195.108.47
73
80
  20.197.209.11
74
81
  20.197.209.27
75
82
  20.201.15.208
@@ -114,6 +121,7 @@ module Legitbot # :nodoc:
114
121
  40.119.232.215
115
122
  40.119.232.218
116
123
  40.119.232.251
124
+ 51.8.71.117
117
125
  51.8.253.152
118
126
  51.104.146.225
119
127
  51.104.146.235
@@ -228,6 +236,8 @@ module Legitbot # :nodoc:
228
236
  104.43.55.117
229
237
  104.43.55.166
230
238
  104.43.55.167
239
+ 108.141.83.74
240
+ 172.169.17.165
231
241
  191.233.3.197
232
242
  191.233.3.202
233
243
  191.234.216.4
@@ -1,20 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'irrc'
3
+ require_relative 'meta'
4
4
 
5
5
  module Legitbot # :nodoc:
6
6
  # https://developers.facebook.com/docs/sharing/webmasters/crawler
7
7
  class Facebook < BotMatch
8
- AS = 'AS32934'
8
+ extend MetaIpRanges
9
9
 
10
10
  ip_ranges do
11
- client = Irrc::Client.new
12
- client.query :radb, AS, source: :radb
13
- results = client.perform
14
-
15
- %i[ipv4 ipv6].map do |family|
16
- results[AS][family][AS]
17
- end.flatten
11
+ fetch_ip_ranges
18
12
  end
19
13
  end
20
14
 
@@ -3,8 +3,23 @@
3
3
  module Legitbot # :nodoc:
4
4
  # https://www.marginalia.nu/marginalia-search/for-webmasters/
5
5
  class Marginalia < BotMatch
6
+ # https://x.com/MarginaliaNu/status/1824172354081263991
7
+ # @fetch:url https://search.marginalia.nu/crawler-ips.txt
6
8
  ip_ranges %w[
7
- 81.170.128.21/32
9
+ 81.170.128.52
10
+ 193.183.0.162
11
+ 193.183.0.163
12
+ 193.183.0.164
13
+ 193.183.0.165
14
+ 193.183.0.166
15
+ 193.183.0.167
16
+ 193.183.0.168
17
+ 193.183.0.169
18
+ 193.183.0.170
19
+ 193.183.0.171
20
+ 193.183.0.172
21
+ 193.183.0.173
22
+ 193.183.0.174
8
23
  ]
9
24
  end
10
25
 
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'irrc'
4
+
5
+ module Legitbot # :nodoc:
6
+ module MetaIpRanges # :nodoc:
7
+ AS = 'AS32934'
8
+
9
+ def fetch_ip_ranges
10
+ client = Irrc::Client.new
11
+ client.query :radb, AS, source: :radb
12
+ results = client.perform
13
+
14
+ %i[ipv4 ipv6].map do |family|
15
+ results[AS][family][AS]
16
+ end.flatten
17
+ end
18
+ end
19
+
20
+ # https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/
21
+ class Meta < BotMatch
22
+ extend MetaIpRanges
23
+
24
+ ip_ranges do
25
+ fetch_ip_ranges
26
+ end
27
+ end
28
+
29
+ rule Legitbot::Meta, %w[
30
+ meta-externalagent
31
+ meta-externalfetcher
32
+ ]
33
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
4
+ # https://platform.openai.com/docs/gptbot
5
+ class GPTBot < BotMatch
6
+ # NOTE: fetching is disabled, see #131
7
+ # @ fetch:url https://openai.com/gptbot.json
8
+ ip_ranges %w[
9
+ 20.171.206.0/24
10
+ 52.230.152.0/24
11
+ 52.233.106.0/24
12
+ ]
13
+ end
14
+
15
+ # https://platform.openai.com/docs/bots
16
+ class OpenAIChat < BotMatch
17
+ # NOTE: fetching is disabled, see #131
18
+ # @ fetch:url https://openai.com/chatgpt-user.json
19
+ ip_ranges %w[
20
+ 23.98.142.176/28
21
+ 40.84.180.224/28
22
+ 13.65.240.240/28
23
+ 20.97.189.96/28
24
+ 20.161.75.208/28
25
+ 52.225.75.208/28
26
+ 52.156.77.144/28
27
+ 40.84.221.208/28
28
+ 40.84.221.224/28
29
+ 40.84.180.64/28
30
+ ]
31
+ end
32
+
33
+ # https://platform.openai.com/docs/bots
34
+ class OpenAISearch < BotMatch
35
+ # NOTE: fetching is disabled, see #131
36
+ # @ fetch:url https://openai.com/searchbot.json
37
+ ip_ranges %w[
38
+ 20.42.10.176/28
39
+ 172.203.190.128/28
40
+ ]
41
+ end
42
+
43
+ rule Legitbot::GPTBot, %w[GPTBot]
44
+ rule Legitbot::OpenAIChat, %w[ChatGPT-User]
45
+ rule Legitbot::OpenAISearch, %w[OAI-SearchBot]
46
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Legitbot
4
- VERSION = '1.10.4'
4
+ VERSION = '1.10.6'
5
5
  end
data/lib/legitbot.rb CHANGED
@@ -11,10 +11,11 @@ require_relative 'legitbot/bing'
11
11
  require_relative 'legitbot/duckduckgo'
12
12
  require_relative 'legitbot/facebook'
13
13
  require_relative 'legitbot/google'
14
- require_relative 'legitbot/gptbot'
15
14
  require_relative 'legitbot/ias'
15
+ require_relative 'legitbot/openai'
16
16
  require_relative 'legitbot/oracle'
17
17
  require_relative 'legitbot/marginalia'
18
+ require_relative 'legitbot/meta'
18
19
  require_relative 'legitbot/petalbot'
19
20
  require_relative 'legitbot/pinterest'
20
21
  require_relative 'legitbot/twitter'
@@ -75,7 +75,7 @@ module RuboCop
75
75
  end
76
76
 
77
77
  def normalise_list(ips)
78
- ips.sort_by(&IPAddr.method(:new))
78
+ ips.uniq.sort_by(&IPAddr.method(:new))
79
79
  end
80
80
 
81
81
  def register_offense(node, new_ips, **params)
@@ -2,19 +2,6 @@
2
2
 
3
3
  require_relative 'test_helper'
4
4
 
5
- module Legitbot
6
- class Facebook
7
- # rubocop:disable Layout/LineLength
8
- def self.whois
9
- {
10
- ipv4: ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24'],
11
- ipv6: []
12
- }
13
- end
14
- # rubocop:enable Layout/LineLength
15
- end
16
- end
17
-
18
5
  class FacebookTest < Minitest::Test
19
6
  def test_valid_ip
20
7
  ip = '69.63.186.89'
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot
4
+ module MetaIpRanges
5
+ alias fetch_ip_ranges_orig fetch_ip_ranges
6
+
7
+ # rubocop:disable Layout/LineLength
8
+ def fetch_ip_ranges
9
+ ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24']
10
+ end
11
+ # rubocop:enable Layout/LineLength
12
+ end
13
+ end
data/test/meta_test.rb ADDED
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'test_helper'
4
+
5
+ class MetaIpRanges
6
+ include Legitbot::MetaIpRanges
7
+ end
8
+
9
+ class MetaTest < Minitest::Test
10
+ def test_fetch_ips
11
+ # NOTE: network call
12
+ ip_ranges = MetaIpRanges.new.fetch_ip_ranges_orig
13
+
14
+ refute_nil ip_ranges
15
+ assert_kind_of Array, ip_ranges
16
+ refute_empty ip_ranges
17
+ end
18
+
19
+ def test_valid_ip
20
+ ip = '69.63.186.89'
21
+ match = Legitbot::Meta.new(ip)
22
+
23
+ assert_predicate match, :valid?
24
+
25
+ ip = '69.171.251.1'
26
+ match = Legitbot::Meta.new(ip)
27
+
28
+ assert_predicate match, :valid?
29
+ end
30
+
31
+ def test_invalid_ip
32
+ ip = '127.0.0.1'
33
+ match = Legitbot::Meta.new(ip)
34
+
35
+ assert_predicate match, :fake?
36
+ end
37
+
38
+ def test_user_agent1
39
+ Legitbot.bot(
40
+ 'meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
41
+ '31.13.76.56'
42
+ ) do |bot|
43
+ assert_equal :meta, bot.detected_as
44
+ assert_predicate bot, :valid?
45
+ end
46
+ end
47
+
48
+ def test_user_agent2
49
+ Legitbot.bot(
50
+ 'meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
51
+ '173.252.87.8'
52
+ ) do |bot|
53
+ assert_equal :meta, bot.detected_as
54
+ assert_predicate bot, :valid?
55
+ end
56
+ end
57
+
58
+ def test_user_agent3
59
+ Legitbot.bot(
60
+ 'meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)',
61
+ '173.252.87.8'
62
+ ) do |bot|
63
+ assert_equal :meta, bot.detected_as
64
+ assert_predicate bot, :valid?
65
+ end
66
+ end
67
+
68
+ # rubocop:disable Layout/LineLength
69
+ def test_user_agent4
70
+ Legitbot.bot(
71
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 meta-externalagent/1.1 Twitterbot/1.0',
72
+ '92.243.181.7'
73
+ ) do |bot|
74
+ assert_includes %i[meta twitter], bot.detected_as
75
+ assert_predicate bot, :fake?
76
+ end
77
+ end
78
+ # rubocop:enable Layout/LineLength
79
+ end
data/test/test_helper.rb CHANGED
@@ -13,3 +13,4 @@ require 'legitbot'
13
13
  require 'minitest/autorun'
14
14
  require 'minitest/hooks/test'
15
15
  require 'lib/dns_server_mock'
16
+ require 'lib/meta_ip_ranges_mock'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: legitbot
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.10.4
4
+ version: 1.10.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexander Azarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-13 00:00:00.000000000 Z
11
+ date: 2024-09-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fast_interval_tree
@@ -81,10 +81,11 @@ files:
81
81
  - lib/legitbot/duckduckgo.rb
82
82
  - lib/legitbot/facebook.rb
83
83
  - lib/legitbot/google.rb
84
- - lib/legitbot/gptbot.rb
85
84
  - lib/legitbot/ias.rb
86
85
  - lib/legitbot/legitbot.rb
87
86
  - lib/legitbot/marginalia.rb
87
+ - lib/legitbot/meta.rb
88
+ - lib/legitbot/openai.rb
88
89
  - lib/legitbot/oracle.rb
89
90
  - lib/legitbot/petalbot.rb
90
91
  - lib/legitbot/pinterest.rb
@@ -111,6 +112,8 @@ files:
111
112
  - test/legitbot/validators/ip_ranges_test.rb
112
113
  - test/legitbot_test.rb
113
114
  - test/lib/dns_server_mock.rb
115
+ - test/lib/meta_ip_ranges_mock.rb
116
+ - test/meta_test.rb
114
117
  - test/oracle_test.rb
115
118
  - test/petalbot_test.rb
116
119
  - test/pinterest_test.rb
@@ -1,15 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Legitbot # :nodoc:
4
- # https://platform.openai.com/docs/gptbot
5
- class GPTBot < BotMatch
6
- # NOTE: fetching has been disabled, see #131
7
- # @ fetch:url https://openai.com/gptbot-ranges.txt
8
- ip_ranges %w[
9
- 52.230.152.0/24
10
- 52.233.106.0/24
11
- ]
12
- end
13
-
14
- rule Legitbot::GPTBot, %w[GPTBot]
15
- end