device_detector 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +19 -17
- data/lib/device_detector/browser.rb +22 -2
- data/lib/device_detector/client_hint.rb +22 -7
- data/lib/device_detector/device.rb +231 -1
- data/lib/device_detector/os.rb +10 -2
- data/lib/device_detector/parser.rb +25 -3
- data/lib/device_detector/vendor_fragment.rb +25 -0
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector.rb +37 -5
- data/regexes/bots.yml +308 -17
- data/regexes/client/browsers.yml +82 -25
- data/regexes/client/feed_readers.yml +31 -23
- data/regexes/client/hints/apps.yml +22 -0
- data/regexes/client/hints/browsers.yml +20 -0
- data/regexes/client/libraries.yml +172 -24
- data/regexes/client/mediaplayers.yml +47 -0
- data/regexes/client/mobile_apps.yml +857 -39
- data/regexes/client/pim.yml +4 -0
- data/regexes/device/car_browsers.yml +1 -1
- data/regexes/device/consoles.yml +8 -4
- data/regexes/device/mobiles.yml +4945 -1234
- data/regexes/device/portable_media_player.yml +20 -4
- data/regexes/device/shell_tv.yml +5 -0
- data/regexes/device/televisions.yml +144 -6
- data/regexes/oss.yml +221 -107
- metadata +6 -4
data/lib/device_detector/os.rb
CHANGED
|
@@ -94,6 +94,7 @@ class DeviceDetector
|
|
|
94
94
|
'INF' => 'Inferno',
|
|
95
95
|
'JME' => 'Java ME',
|
|
96
96
|
'KOS' => 'KaiOS',
|
|
97
|
+
'KAL' => 'Kali',
|
|
97
98
|
'KAN' => 'Kanotix',
|
|
98
99
|
'KNO' => 'Knoppix',
|
|
99
100
|
'KTV' => 'KreaTV',
|
|
@@ -104,6 +105,7 @@ class DeviceDetector
|
|
|
104
105
|
'LEN' => 'Lineage OS',
|
|
105
106
|
'LBT' => 'Lubuntu',
|
|
106
107
|
'LOS' => 'Lumin OS',
|
|
108
|
+
'LUN' => 'LuneOS',
|
|
107
109
|
'VLN' => 'VectorLinux',
|
|
108
110
|
'MAC' => 'Mac',
|
|
109
111
|
'MAE' => 'Maemo',
|
|
@@ -126,14 +128,17 @@ class DeviceDetector
|
|
|
126
128
|
'OBS' => 'OpenBSD',
|
|
127
129
|
'OWR' => 'OpenWrt',
|
|
128
130
|
'OTV' => 'Opera TV',
|
|
131
|
+
'ORA' => 'Oracle Linux',
|
|
129
132
|
'ORD' => 'Ordissimo',
|
|
130
133
|
'PAR' => 'Pardus',
|
|
131
134
|
'PCL' => 'PCLinuxOS',
|
|
135
|
+
'PIC' => 'PICO OS',
|
|
132
136
|
'PLA' => 'Plasma Mobile',
|
|
133
137
|
'PSP' => 'PlayStation Portable',
|
|
134
138
|
'PS3' => 'PlayStation',
|
|
135
139
|
'PUR' => 'PureOS',
|
|
136
140
|
'RHT' => 'Red Hat',
|
|
141
|
+
'RED' => 'RedOS',
|
|
137
142
|
'REV' => 'Revenge OS',
|
|
138
143
|
'ROS' => 'RISC OS',
|
|
139
144
|
'ROK' => 'Roku OS',
|
|
@@ -159,9 +164,12 @@ class DeviceDetector
|
|
|
159
164
|
'TEN' => 'TencentOS',
|
|
160
165
|
'TDX' => 'ThreadX',
|
|
161
166
|
'TIZ' => 'Tizen',
|
|
167
|
+
'TIV' => 'TiVo OS',
|
|
162
168
|
'TOS' => 'TmaxOS',
|
|
163
169
|
'UBT' => 'Ubuntu',
|
|
170
|
+
'VID' => 'VIDAA',
|
|
164
171
|
'WAS' => 'watchOS',
|
|
172
|
+
'WER' => 'Wear OS',
|
|
165
173
|
'WTV' => 'WebTV',
|
|
166
174
|
'WHS' => 'Whale OS',
|
|
167
175
|
'WIN' => 'Windows',
|
|
@@ -186,7 +194,7 @@ class DeviceDetector
|
|
|
186
194
|
|
|
187
195
|
OS_FAMILIES = {
|
|
188
196
|
'Android' => %w[ AND CYN FIR REM RZD MLD MCD YNS GRI HAR
|
|
189
|
-
ADR CLR BOS REV LEN SIR RRS],
|
|
197
|
+
ADR CLR BOS REV LEN SIR RRS WER PIC],
|
|
190
198
|
'AmigaOS' => %w[AMG MOR],
|
|
191
199
|
'BlackBerry' => %w[BLB QNX],
|
|
192
200
|
'Brew' => ['BMP'],
|
|
@@ -204,7 +212,7 @@ class DeviceDetector
|
|
|
204
212
|
ORD TOS RSO DEE FRE MAG FEN CAI PCL HAS
|
|
205
213
|
LOS DVK ROK OWR OTV KTV PUR PLA FUC PAR
|
|
206
214
|
FOR MON KAN ZEN LND LNS CHN AMZ TEN CST
|
|
207
|
-
NOV ROU ZOR
|
|
215
|
+
NOV ROU ZOR VID
|
|
208
216
|
],
|
|
209
217
|
'Mac' => ['MAC'],
|
|
210
218
|
'Mobile Gaming Console' => %w[PSP NDS XBX],
|
|
@@ -58,7 +58,29 @@ class DeviceDetector
|
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
def load_regexes(file_paths)
|
|
61
|
-
file_paths.map
|
|
61
|
+
file_paths.map do |path, full_path|
|
|
62
|
+
object = YAML.load_file(full_path)
|
|
63
|
+
object = rewrite_device_object!(object) if is_device_yml_file?(full_path)
|
|
64
|
+
object = rewrite_vendor_object!(object) if is_vendor_yml_file?(full_path)
|
|
65
|
+
|
|
66
|
+
[path, symbolize_keys!(object)]
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def is_device_yml_file?(file_path)
|
|
71
|
+
file_path.include?('/regexes/device/')
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def is_vendor_yml_file?(file_path)
|
|
75
|
+
file_path.include?('/regexes/vendorfragments')
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def rewrite_vendor_object!(object)
|
|
79
|
+
object.map { |key, values| values.map { |v| { 'regex_name' => key, 'regex' => v } } }.flatten
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def rewrite_device_object!(object)
|
|
83
|
+
object.map { |key, value| [key, { 'regex_name' => key }.merge!(value)] }.to_h
|
|
62
84
|
end
|
|
63
85
|
|
|
64
86
|
def symbolize_keys!(object)
|
|
@@ -88,8 +110,8 @@ class DeviceDetector
|
|
|
88
110
|
Regexp.new('(?:^|[^A-Z0-9\-_]|[^A-Z0-9\-]_|sprd-|MZ-)(?:' + src + ')', Regexp::IGNORECASE)
|
|
89
111
|
end
|
|
90
112
|
|
|
91
|
-
def from_cache(key)
|
|
92
|
-
DeviceDetector.cache.get_or_set(key)
|
|
113
|
+
def from_cache(key, &block)
|
|
114
|
+
DeviceDetector.cache.get_or_set(key, &block)
|
|
93
115
|
end
|
|
94
116
|
end
|
|
95
117
|
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
|
|
5
|
+
class DeviceDetector
|
|
6
|
+
class VendorFragment < Parser
|
|
7
|
+
def name
|
|
8
|
+
vendor_fragment_info
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
def vendor_fragment_info
|
|
14
|
+
from_cache(['vendor_fragment', self.class.name, user_agent]) do
|
|
15
|
+
return if regex_meta.nil? || regex_meta.empty?
|
|
16
|
+
|
|
17
|
+
regex_meta[:regex_name]
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def filenames
|
|
22
|
+
['vendorfragments.yml']
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
data/lib/device_detector.rb
CHANGED
|
@@ -15,13 +15,34 @@ require 'device_detector/device'
|
|
|
15
15
|
require 'device_detector/os'
|
|
16
16
|
require 'device_detector/browser'
|
|
17
17
|
require 'device_detector/client_hint'
|
|
18
|
+
require 'device_detector/vendor_fragment'
|
|
18
19
|
|
|
19
20
|
class DeviceDetector
|
|
20
21
|
attr_reader :client_hint, :user_agent
|
|
21
22
|
|
|
22
23
|
def initialize(user_agent, headers = nil)
|
|
23
24
|
@client_hint = ClientHint.new(headers)
|
|
24
|
-
|
|
25
|
+
utf8_user_agent = encode_user_agent_if_needed(user_agent)
|
|
26
|
+
@user_agent = set_user_agent(utf8_user_agent)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# https://github.com/matomo-org/device-detector/blob/c235832dba13961ab0f71b681616baf1aa48de23/Parser/Device/AbstractDeviceParser.php#L1873
|
|
30
|
+
def set_user_agent(user_agent)
|
|
31
|
+
return user_agent if client_hint.model.nil?
|
|
32
|
+
|
|
33
|
+
regex = build_regex('Android 10[.\d]*; K(?: Build/|[;)])')
|
|
34
|
+
return user_agent unless user_agent =~ regex
|
|
35
|
+
|
|
36
|
+
version = client_hint.os_version || '10'
|
|
37
|
+
|
|
38
|
+
user_agent.gsub(regex, "Android #{version}, #{client_hint.model}")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def encode_user_agent_if_needed(user_agent)
|
|
42
|
+
return if user_agent.nil?
|
|
43
|
+
return user_agent if user_agent.encoding.name == 'UTF-8'
|
|
44
|
+
|
|
45
|
+
user_agent.encode('utf-8', 'binary', undef: :replace)
|
|
25
46
|
end
|
|
26
47
|
|
|
27
48
|
def name
|
|
@@ -53,13 +74,18 @@ class DeviceDetector
|
|
|
53
74
|
end
|
|
54
75
|
|
|
55
76
|
def device_name
|
|
77
|
+
return if fake_ua?
|
|
78
|
+
|
|
56
79
|
device.name || client_hint.model || fix_for_x_music
|
|
57
80
|
end
|
|
58
81
|
|
|
59
82
|
def device_brand
|
|
83
|
+
return if fake_ua?
|
|
84
|
+
|
|
60
85
|
# Assume all devices running iOS / Mac OS are from Apple
|
|
61
86
|
brand = device.brand
|
|
62
|
-
brand = 'Apple' if brand.nil? && [
|
|
87
|
+
brand = 'Apple' if brand.nil? && %w[iPadOS tvOS watchOS iOS Mac].include?(os_name)
|
|
88
|
+
|
|
63
89
|
brand
|
|
64
90
|
end
|
|
65
91
|
|
|
@@ -136,7 +162,8 @@ class DeviceDetector
|
|
|
136
162
|
t = 'tv' if t.nil? && tizen_samsung_tv?
|
|
137
163
|
|
|
138
164
|
# Devices running Kylo or Espital TV Browsers are assumed to be a TV
|
|
139
|
-
t = 'tv' if
|
|
165
|
+
t = 'tv' if ['Kylo', 'Espial TV Browser', 'LUJO TV Browser', 'LogicUI TV Browser',
|
|
166
|
+
'Open TV Browser'].include?(name)
|
|
140
167
|
|
|
141
168
|
# All devices containing TV fragment are assumed to be a tv
|
|
142
169
|
t = 'tv' if t.nil? && user_agent =~ build_regex('\(TV;')
|
|
@@ -206,6 +233,11 @@ class DeviceDetector
|
|
|
206
233
|
@os ||= OS.new(user_agent)
|
|
207
234
|
end
|
|
208
235
|
|
|
236
|
+
# https://github.com/matomo-org/device-detector/blob/827a3fab7e38c3274c18d2f5f5bc2a78b7ef4a3a/DeviceDetector.php#L921C5-L921C5
|
|
237
|
+
def fake_ua?
|
|
238
|
+
os_name == 'Android' && device.brand == 'Apple'
|
|
239
|
+
end
|
|
240
|
+
|
|
209
241
|
# https://github.com/matomo-org/device-detector/blob/be1c9ef486c247dc4886668da5ed0b1c49d90ba8/Parser/Client/Browser.php#L772
|
|
210
242
|
# Fix mobile browser names e.g. Chrome => Chrome Mobile
|
|
211
243
|
def mobile_fix?
|
|
@@ -218,7 +250,7 @@ class DeviceDetector
|
|
|
218
250
|
|
|
219
251
|
# Related to issue mentionned in device.rb#1562
|
|
220
252
|
def fix_for_x_music
|
|
221
|
-
user_agent
|
|
253
|
+
user_agent&.include?('X-music Ⅲ') ? 'X-Music III' : nil
|
|
222
254
|
end
|
|
223
255
|
|
|
224
256
|
def skip_os_version?
|
|
@@ -242,7 +274,7 @@ class DeviceDetector
|
|
|
242
274
|
end
|
|
243
275
|
|
|
244
276
|
def opera_tv_store?
|
|
245
|
-
user_agent =~ build_regex('Opera TV Store')
|
|
277
|
+
user_agent =~ build_regex('Opera TV Store|OMI/')
|
|
246
278
|
end
|
|
247
279
|
|
|
248
280
|
def opera_tablet?
|
data/regexes/bots.yml
CHANGED
|
@@ -13,6 +13,14 @@
|
|
|
13
13
|
name: 'Plesk International GmbH'
|
|
14
14
|
url: 'https://www.plesk.com'
|
|
15
15
|
|
|
16
|
+
- regex: 'Cloudflare-Healthchecks'
|
|
17
|
+
name: 'Cloudflare Health Checks'
|
|
18
|
+
category: 'Service Agent'
|
|
19
|
+
url: 'https://developers.cloudflare.com/health-checks/'
|
|
20
|
+
producer:
|
|
21
|
+
name: 'CloudFlare'
|
|
22
|
+
url: 'https://www.cloudflare.com/'
|
|
23
|
+
|
|
16
24
|
- regex: '360Spider'
|
|
17
25
|
name: '360Spider'
|
|
18
26
|
category: 'Search bot'
|
|
@@ -323,21 +331,53 @@
|
|
|
323
331
|
name: 'CloudFlare'
|
|
324
332
|
url: 'http://www.cloudflare.com'
|
|
325
333
|
|
|
326
|
-
- regex: '
|
|
334
|
+
- regex: 'Cloudflare-?Diagnostics'
|
|
327
335
|
name: 'Cloudflare Diagnostics'
|
|
328
336
|
category: 'Site Monitor'
|
|
329
337
|
url: 'https://www.cloudflare.com/'
|
|
330
338
|
producer:
|
|
331
339
|
name: 'Cloudflare'
|
|
332
|
-
url: 'https://www.cloudflare.com'
|
|
340
|
+
url: 'https://www.cloudflare.com/'
|
|
333
341
|
|
|
334
342
|
- regex: 'CloudFlare-AlwaysOnline'
|
|
335
343
|
name: 'CloudFlare Always Online'
|
|
336
344
|
category: 'Site Monitor'
|
|
337
|
-
url: '
|
|
345
|
+
url: 'https://www.cloudflare.com/always-online'
|
|
338
346
|
producer:
|
|
339
347
|
name: 'CloudFlare'
|
|
340
|
-
url: '
|
|
348
|
+
url: 'https://www.cloudflare.com/'
|
|
349
|
+
|
|
350
|
+
- regex: 'Cloudflare-SSLDetector'
|
|
351
|
+
name: 'Cloudflare SSL Detector'
|
|
352
|
+
category: 'Site Monitor'
|
|
353
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
|
354
|
+
producer:
|
|
355
|
+
name: 'CloudFlare'
|
|
356
|
+
url: 'https://www.cloudflare.com/'
|
|
357
|
+
|
|
358
|
+
- regex: 'Cloudflare Custom Hostname Verification'
|
|
359
|
+
name: 'Cloudflare Custom Hostname Verification'
|
|
360
|
+
category: 'Service Agent'
|
|
361
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
|
362
|
+
producer:
|
|
363
|
+
name: 'CloudFlare'
|
|
364
|
+
url: 'https://www.cloudflare.com/'
|
|
365
|
+
|
|
366
|
+
- regex: 'Cloudflare-Traffic-Manager'
|
|
367
|
+
name: 'Cloudflare Traffic Manager'
|
|
368
|
+
category: 'Site Monitor'
|
|
369
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
|
370
|
+
producer:
|
|
371
|
+
name: 'CloudFlare'
|
|
372
|
+
url: 'https://www.cloudflare.com/'
|
|
373
|
+
|
|
374
|
+
- regex: 'https://developers.cloudflare.com/security-center/'
|
|
375
|
+
name: 'Cloudflare Security Insights'
|
|
376
|
+
category: 'Site Monitor'
|
|
377
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
|
378
|
+
producer:
|
|
379
|
+
name: 'CloudFlare'
|
|
380
|
+
url: 'https://www.cloudflare.com/'
|
|
341
381
|
|
|
342
382
|
- regex: 'coccoc.com'
|
|
343
383
|
name: 'Cốc Cốc Bot'
|
|
@@ -704,7 +744,15 @@
|
|
|
704
744
|
name: 'Visual Meta'
|
|
705
745
|
url: 'https://www.shopalike.cz/'
|
|
706
746
|
|
|
707
|
-
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Producer)|Google.*/\+/web/snippet'
|
|
747
|
+
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|InspectionTool|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Other|Prober|Producer)|Google.*/\+/web/snippet'
|
|
748
|
+
name: 'Googlebot'
|
|
749
|
+
category: 'Search bot'
|
|
750
|
+
url: 'http://www.google.com/bot.html'
|
|
751
|
+
producer:
|
|
752
|
+
name: 'Google Inc.'
|
|
753
|
+
url: 'http://www.google.com'
|
|
754
|
+
|
|
755
|
+
- regex: '^Google$'
|
|
708
756
|
name: 'Googlebot'
|
|
709
757
|
category: 'Search bot'
|
|
710
758
|
url: 'http://www.google.com/bot.html'
|
|
@@ -727,6 +775,11 @@
|
|
|
727
775
|
name: 'HubSpot Inc.'
|
|
728
776
|
url: 'https://www.hubspot.com'
|
|
729
777
|
|
|
778
|
+
- regex: 'vuhuvBot'
|
|
779
|
+
name: 'Vuhuv Bot'
|
|
780
|
+
category: 'Crawler'
|
|
781
|
+
url: 'http://vuhuv.com/bot.html'
|
|
782
|
+
|
|
730
783
|
- regex: 'HTTPMon'
|
|
731
784
|
name: 'HTTPMon'
|
|
732
785
|
category: 'Site Monitor'
|
|
@@ -1020,6 +1073,14 @@
|
|
|
1020
1073
|
- regex: 'Octopus [0-9]'
|
|
1021
1074
|
name: 'Octopus'
|
|
1022
1075
|
|
|
1076
|
+
- regex: 'OnlineOrNot.com_bot'
|
|
1077
|
+
name: 'OnlineOrNot Bot'
|
|
1078
|
+
category: 'Site Monitor'
|
|
1079
|
+
url: 'https://onlineornot.com/website-monitoring'
|
|
1080
|
+
producer:
|
|
1081
|
+
name: 'OnlineOrNot'
|
|
1082
|
+
url: 'https://onlineornot.com'
|
|
1083
|
+
|
|
1023
1084
|
- regex: 'omgili'
|
|
1024
1085
|
name: 'Omgili bot'
|
|
1025
1086
|
category: 'Search bot'
|
|
@@ -1741,7 +1802,15 @@
|
|
|
1741
1802
|
- regex: 'Y!J-BRW'
|
|
1742
1803
|
name: 'Yahoo! Japan BRW'
|
|
1743
1804
|
category: 'Crawler'
|
|
1744
|
-
url: 'https://
|
|
1805
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
|
1806
|
+
producer:
|
|
1807
|
+
name: 'Yahoo! Japan Corp.'
|
|
1808
|
+
url: 'https://www.yahoo.co.jp/'
|
|
1809
|
+
|
|
1810
|
+
- regex: 'Y!J-WSC'
|
|
1811
|
+
name: 'Yahoo! Japan WSC'
|
|
1812
|
+
category: 'Crawler'
|
|
1813
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
|
1745
1814
|
producer:
|
|
1746
1815
|
name: 'Yahoo! Japan Corp.'
|
|
1747
1816
|
url: 'https://www.yahoo.co.jp/'
|
|
@@ -1966,7 +2035,7 @@
|
|
|
1966
2035
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
|
1967
2036
|
name: 'RSSRadio Bot'
|
|
1968
2037
|
|
|
1969
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
|
|
2038
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent)'
|
|
1970
2039
|
name: 'Generic Bot'
|
|
1971
2040
|
|
|
1972
2041
|
- regex: '^sentry'
|
|
@@ -2025,7 +2094,15 @@
|
|
|
2025
2094
|
name: 'WooRank sprl'
|
|
2026
2095
|
url: 'https://www.woorank.com/'
|
|
2027
2096
|
|
|
2028
|
-
- regex: '
|
|
2097
|
+
- regex: 'by Siteimprove\.com'
|
|
2098
|
+
name: 'Siteimprove'
|
|
2099
|
+
category: 'Search bot'
|
|
2100
|
+
url: 'https://siteimprove.com/'
|
|
2101
|
+
producer:
|
|
2102
|
+
name: 'Siteimprove GmbH'
|
|
2103
|
+
url: 'https://siteimprove.com/'
|
|
2104
|
+
|
|
2105
|
+
- regex: 'Image size by Siteimprove\.com'
|
|
2029
2106
|
name: 'Siteimprove'
|
|
2030
2107
|
category: 'Search bot'
|
|
2031
2108
|
url: 'https://siteimprove.com/'
|
|
@@ -2153,6 +2230,14 @@
|
|
|
2153
2230
|
name: 'Startpagina B.V.'
|
|
2154
2231
|
url: 'https://www.startpagina.nl/'
|
|
2155
2232
|
|
|
2233
|
+
- regex: 'MoodleBot-Linkchecker'
|
|
2234
|
+
name: 'MoodleBot Linkchecker'
|
|
2235
|
+
category: 'Search bot'
|
|
2236
|
+
url: 'hhttps://docs.moodle.org/en/Usage'
|
|
2237
|
+
producer:
|
|
2238
|
+
name: 'Moodle Pty Ltd'
|
|
2239
|
+
url: 'https://moodle.org/'
|
|
2240
|
+
|
|
2156
2241
|
- regex: 'GTmetrix'
|
|
2157
2242
|
name: 'GTmetrix'
|
|
2158
2243
|
category: 'Crawler'
|
|
@@ -2412,6 +2497,13 @@
|
|
|
2412
2497
|
producer:
|
|
2413
2498
|
name: 'Hatena Co., Ltd.'
|
|
2414
2499
|
url: 'https://www.hatena.ne.jp'
|
|
2500
|
+
- regex: 'Hatena-?Bookmark'
|
|
2501
|
+
name: 'Hatena Bookmark'
|
|
2502
|
+
category: 'Crawler'
|
|
2503
|
+
url: 'https://www.hatena.ne.jp/faq/'
|
|
2504
|
+
producer:
|
|
2505
|
+
name: 'Hatena Co., Ltd.'
|
|
2506
|
+
url: 'https://www.hatena.ne.jp'
|
|
2415
2507
|
|
|
2416
2508
|
- regex: 'RyowlEngine/(\d+)'
|
|
2417
2509
|
name: 'Ryowl'
|
|
@@ -2556,6 +2648,14 @@
|
|
|
2556
2648
|
category: 'Security Checker'
|
|
2557
2649
|
url: 'https://github.com/LeakIX/l9explore'
|
|
2558
2650
|
|
|
2651
|
+
- regex: 'l9scan/|^Lkx-(.*)/([\d+.]+)'
|
|
2652
|
+
name: 'LeakIX'
|
|
2653
|
+
category: 'Security Checker'
|
|
2654
|
+
url: 'https://leakix.net/'
|
|
2655
|
+
producer:
|
|
2656
|
+
name: 'BaDaaS SRL'
|
|
2657
|
+
url: 'https://leakix.net/'
|
|
2658
|
+
|
|
2559
2659
|
- regex: 'MegaIndex.ru/([\d+\.])'
|
|
2560
2660
|
name: 'MegaIndex'
|
|
2561
2661
|
category: 'Crawler'
|
|
@@ -3047,14 +3147,6 @@
|
|
|
3047
3147
|
name: 'New Work SE'
|
|
3048
3148
|
url: 'https://www.xing.com/'
|
|
3049
3149
|
|
|
3050
|
-
- regex: '^Lkx-(.*)/([\d+.]+)'
|
|
3051
|
-
name: 'LeakIX'
|
|
3052
|
-
category: 'Security Checker'
|
|
3053
|
-
url: 'https://leakix.net/'
|
|
3054
|
-
producer:
|
|
3055
|
-
name: 'BaDaaS SRL'
|
|
3056
|
-
url: 'https://leakix.net/'
|
|
3057
|
-
|
|
3058
3150
|
- regex: 'RepoLookoutBot/([\d+.]+)'
|
|
3059
3151
|
name: 'Repo Lookout'
|
|
3060
3152
|
category: 'Security Checker'
|
|
@@ -3217,6 +3309,205 @@
|
|
|
3217
3309
|
category: 'Crawler'
|
|
3218
3310
|
url: 'https://reqbin.com/curl'
|
|
3219
3311
|
|
|
3312
|
+
- regex: 'XoviBot/([\d+.]+)'
|
|
3313
|
+
name: 'XoviBot'
|
|
3314
|
+
category: 'Crawler'
|
|
3315
|
+
url: 'https://www.xovibot.net'
|
|
3316
|
+
producer:
|
|
3317
|
+
name: 'Xovi GmbH'
|
|
3318
|
+
url: 'http://www.xovi.de'
|
|
3319
|
+
|
|
3320
|
+
- regex: 'Overcast/([\d+.]+) Podcast Sync'
|
|
3321
|
+
name: 'Overcast Podcast Sync'
|
|
3322
|
+
category: 'Service Agent'
|
|
3323
|
+
url: 'https://overcast.fm/podcasterinfo'
|
|
3324
|
+
|
|
3325
|
+
- regex: '^Verity/([\d+.]+)'
|
|
3326
|
+
name: 'GumGum Verity'
|
|
3327
|
+
category: 'Service Agent'
|
|
3328
|
+
url: 'https://gumgum.com/verity'
|
|
3329
|
+
|
|
3330
|
+
- regex: 'hackermention'
|
|
3331
|
+
name: 'hackermention'
|
|
3332
|
+
category: 'Feed Reader'
|
|
3333
|
+
url: 'https://github.com/snarfed/hackermention'
|
|
3334
|
+
|
|
3335
|
+
- regex: 'BitSightBot/([\d+.]+)'
|
|
3336
|
+
name: 'BitSight'
|
|
3337
|
+
category: 'Security Checker'
|
|
3338
|
+
url: 'https://www.bitsight.com/'
|
|
3339
|
+
producer:
|
|
3340
|
+
name: 'BitSight Technologies, Inc.'
|
|
3341
|
+
url: 'https://www.bitsight.com/'
|
|
3342
|
+
|
|
3343
|
+
- regex: 'Ezgif/([\d+.]+)'
|
|
3344
|
+
name: 'Ezgif'
|
|
3345
|
+
category: 'Service Agent'
|
|
3346
|
+
url: 'https://ezgif.com/about'
|
|
3347
|
+
|
|
3348
|
+
- regex: 'intelx.io_bot'
|
|
3349
|
+
name: 'Intelligence X'
|
|
3350
|
+
category: 'Crawler'
|
|
3351
|
+
url: 'https://intelx.io/'
|
|
3352
|
+
producer:
|
|
3353
|
+
name: 'Kleissner Investments s.r.o.'
|
|
3354
|
+
url: 'https://intelx.io/'
|
|
3355
|
+
|
|
3356
|
+
- regex: 'FemtosearchBot/([\d+.]+)'
|
|
3357
|
+
name: 'Femtosearch'
|
|
3358
|
+
category: 'Crawler'
|
|
3359
|
+
url: 'http://femtosearch.com/'
|
|
3360
|
+
producer:
|
|
3361
|
+
name: 'Grier Forensics, LLC'
|
|
3362
|
+
url: 'https://www.grierforensics.com/'
|
|
3363
|
+
|
|
3364
|
+
- regex: 'AdsTxtCrawler/([\d+.]+)'
|
|
3365
|
+
name: 'AdsTxtCrawler'
|
|
3366
|
+
category: 'Crawler'
|
|
3367
|
+
url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
|
|
3368
|
+
producer:
|
|
3369
|
+
name: 'IAB Technology Laboratory, Inc.'
|
|
3370
|
+
url: 'https://iabtechlab.com/'
|
|
3371
|
+
|
|
3372
|
+
- regex: 'Morningscore'
|
|
3373
|
+
name: 'Morningscore Bot'
|
|
3374
|
+
category: 'Crawler'
|
|
3375
|
+
url: 'https://morningscore.io/'
|
|
3376
|
+
producer:
|
|
3377
|
+
name: 'Morningscore'
|
|
3378
|
+
url: 'https://morningscore.io/'
|
|
3379
|
+
|
|
3380
|
+
- regex: 'Uptime-Kuma/([\d+.]+)'
|
|
3381
|
+
name: 'Uptime-Kuma'
|
|
3382
|
+
category: 'Site Monitor'
|
|
3383
|
+
url: 'https://github.com/louislam/uptime-kuma'
|
|
3384
|
+
|
|
3385
|
+
- regex: 'ChatGPT-User'
|
|
3386
|
+
name: 'ChatGPT'
|
|
3387
|
+
category: 'Crawler'
|
|
3388
|
+
url: 'https://platform.openai.com/docs/plugins/bot'
|
|
3389
|
+
producer:
|
|
3390
|
+
name: 'OpenAI OpCo, LLC'
|
|
3391
|
+
url: 'https://openai.com/'
|
|
3392
|
+
|
|
3393
|
+
- regex: 'BrightEdge Crawler/([\d+.]+)'
|
|
3394
|
+
name: 'BrightEdge'
|
|
3395
|
+
category: 'Crawler'
|
|
3396
|
+
url: 'https://www.brightedge.com/'
|
|
3397
|
+
producer:
|
|
3398
|
+
name: 'BrightEdge Technologies, Inc'
|
|
3399
|
+
url: 'https://www.brightedge.com/'
|
|
3400
|
+
|
|
3401
|
+
- regex: 'sfFeedReader/([\d+.]+)'
|
|
3402
|
+
name: 'sfFeedReader'
|
|
3403
|
+
url: 'https://github.com/diem-project/sfFeed2Plugin'
|
|
3404
|
+
category: 'Feed Fetcher'
|
|
3405
|
+
|
|
3406
|
+
- regex: 'cyberscan.io'
|
|
3407
|
+
name: 'Cyberscan'
|
|
3408
|
+
category: 'Security Checker'
|
|
3409
|
+
url: 'https://www.cyberscan.io/'
|
|
3410
|
+
producer:
|
|
3411
|
+
name: 'DGC Verwaltungs GmbH'
|
|
3412
|
+
url: 'https://dgc.org/'
|
|
3413
|
+
|
|
3414
|
+
- regex: 'deepcrawl\.com'
|
|
3415
|
+
name: 'Lumar'
|
|
3416
|
+
category: 'Crawler'
|
|
3417
|
+
url: 'https://deepcrawl.com/bot'
|
|
3418
|
+
producer:
|
|
3419
|
+
name: 'Lumar'
|
|
3420
|
+
url: 'https://www.lumar.io/'
|
|
3421
|
+
|
|
3422
|
+
- regex: 'RepoLookoutBot'
|
|
3423
|
+
name: 'Repo Lookout'
|
|
3424
|
+
category: 'Crawler'
|
|
3425
|
+
url: 'https://www.repo-lookout.org/'
|
|
3426
|
+
producer:
|
|
3427
|
+
name: 'Crissy Field GmbH'
|
|
3428
|
+
url: 'https://www.crissyfield.de/'
|
|
3429
|
+
|
|
3430
|
+
- regex: 'researchscan.comsys.rwth-aachen.de'
|
|
3431
|
+
name: 'Research Scan'
|
|
3432
|
+
category: 'Crawler'
|
|
3433
|
+
url: 'http://researchscan.comsys.rwth-aachen.de/'
|
|
3434
|
+
producer:
|
|
3435
|
+
name: 'RWTH Aachen University'
|
|
3436
|
+
url: 'https://www.comsys.rwth-aachen.de/'
|
|
3437
|
+
|
|
3438
|
+
- regex: 'newspaper/([\d+.]+)'
|
|
3439
|
+
name: 'Scraping Robot'
|
|
3440
|
+
category: 'Crawler'
|
|
3441
|
+
url: 'https://scrapingrobot.com/'
|
|
3442
|
+
producer:
|
|
3443
|
+
name: 'Sprious LLC'
|
|
3444
|
+
url: 'https://sprious.com/'
|
|
3445
|
+
|
|
3446
|
+
- regex: 'GPTBot/([\d+.]+)'
|
|
3447
|
+
name: 'GPTBot'
|
|
3448
|
+
category: 'Crawler'
|
|
3449
|
+
url: 'https://platform.openai.com/docs/gptbot'
|
|
3450
|
+
producer:
|
|
3451
|
+
name: 'OpenAI OpCo, LLC'
|
|
3452
|
+
url: 'https://openai.com/'
|
|
3453
|
+
|
|
3454
|
+
- regex: 'Ant.com beta/([\d+.]+)'
|
|
3455
|
+
name: 'Ant'
|
|
3456
|
+
category: 'Crawler'
|
|
3457
|
+
url: 'https://www.ant.com/'
|
|
3458
|
+
producer:
|
|
3459
|
+
name: 'Ant.com Ltd.'
|
|
3460
|
+
url: 'https://www.ant.com/'
|
|
3461
|
+
|
|
3462
|
+
- regex: 'WebwikiBot/([\d+.]+)'
|
|
3463
|
+
name: 'Webwiki'
|
|
3464
|
+
category: 'Crawler'
|
|
3465
|
+
url: 'https://www.webwiki.com/'
|
|
3466
|
+
producer:
|
|
3467
|
+
name: 'webwiki GmbH'
|
|
3468
|
+
url: 'https://www.webwiki.com/'
|
|
3469
|
+
|
|
3470
|
+
- regex: 'phpMyAdmin'
|
|
3471
|
+
name: 'phpMyAdmin'
|
|
3472
|
+
category: 'Service Agent'
|
|
3473
|
+
url: 'https://www.phpmyadmin.net/'
|
|
3474
|
+
|
|
3475
|
+
- regex: 'Matomo/([\d+.]+)'
|
|
3476
|
+
name: 'Matomo'
|
|
3477
|
+
category: 'Service Agent'
|
|
3478
|
+
url: 'https://github.com/matomo-org/matomo'
|
|
3479
|
+
producer:
|
|
3480
|
+
name: 'InnoCraft Ltd'
|
|
3481
|
+
url: 'https://matomo.org/'
|
|
3482
|
+
|
|
3483
|
+
- regex: 'Prometheus/([\d+.]+)'
|
|
3484
|
+
name: 'Prometheus'
|
|
3485
|
+
category: 'Service Agent'
|
|
3486
|
+
url: 'https://github.com/prometheus/prometheus'
|
|
3487
|
+
producer:
|
|
3488
|
+
name: 'The Linux Foundation'
|
|
3489
|
+
url: 'https://www.cncf.io/'
|
|
3490
|
+
|
|
3491
|
+
- regex: 'ArchiveTeam ArchiveBot'
|
|
3492
|
+
name: 'ArchiveBot'
|
|
3493
|
+
category: 'Crawler'
|
|
3494
|
+
url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot'
|
|
3495
|
+
producer:
|
|
3496
|
+
name: 'ArchiveTeam'
|
|
3497
|
+
url: 'https://wiki.archiveteam.org/'
|
|
3498
|
+
|
|
3499
|
+
- regex: 'MADBbot/([\d+.]+)'
|
|
3500
|
+
name: 'MADBbot'
|
|
3501
|
+
category: 'Crawler'
|
|
3502
|
+
url: 'https://madb.zapto.org/bot.html'
|
|
3503
|
+
|
|
3504
|
+
- regex: 'MeltwaterNews'
|
|
3505
|
+
name: 'MeltwaterNews'
|
|
3506
|
+
category: 'Crawler'
|
|
3507
|
+
producer:
|
|
3508
|
+
name: 'Meltwater Deutschland GmbH'
|
|
3509
|
+
url: 'https://www.meltwater.com/'
|
|
3510
|
+
|
|
3220
3511
|
# Generic detections
|
|
3221
|
-
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
|
3512
|
+
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider|^firefox$|^chrome$)([^a-z]|$)'
|
|
3222
3513
|
name: 'Generic Bot'
|