device_detector 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,6 +94,7 @@ class DeviceDetector
94
94
  'INF' => 'Inferno',
95
95
  'JME' => 'Java ME',
96
96
  'KOS' => 'KaiOS',
97
+ 'KAL' => 'Kali',
97
98
  'KAN' => 'Kanotix',
98
99
  'KNO' => 'Knoppix',
99
100
  'KTV' => 'KreaTV',
@@ -104,6 +105,7 @@ class DeviceDetector
104
105
  'LEN' => 'Lineage OS',
105
106
  'LBT' => 'Lubuntu',
106
107
  'LOS' => 'Lumin OS',
108
+ 'LUN' => 'LuneOS',
107
109
  'VLN' => 'VectorLinux',
108
110
  'MAC' => 'Mac',
109
111
  'MAE' => 'Maemo',
@@ -126,14 +128,17 @@ class DeviceDetector
126
128
  'OBS' => 'OpenBSD',
127
129
  'OWR' => 'OpenWrt',
128
130
  'OTV' => 'Opera TV',
131
+ 'ORA' => 'Oracle Linux',
129
132
  'ORD' => 'Ordissimo',
130
133
  'PAR' => 'Pardus',
131
134
  'PCL' => 'PCLinuxOS',
135
+ 'PIC' => 'PICO OS',
132
136
  'PLA' => 'Plasma Mobile',
133
137
  'PSP' => 'PlayStation Portable',
134
138
  'PS3' => 'PlayStation',
135
139
  'PUR' => 'PureOS',
136
140
  'RHT' => 'Red Hat',
141
+ 'RED' => 'RedOS',
137
142
  'REV' => 'Revenge OS',
138
143
  'ROS' => 'RISC OS',
139
144
  'ROK' => 'Roku OS',
@@ -159,9 +164,12 @@ class DeviceDetector
159
164
  'TEN' => 'TencentOS',
160
165
  'TDX' => 'ThreadX',
161
166
  'TIZ' => 'Tizen',
167
+ 'TIV' => 'TiVo OS',
162
168
  'TOS' => 'TmaxOS',
163
169
  'UBT' => 'Ubuntu',
170
+ 'VID' => 'VIDAA',
164
171
  'WAS' => 'watchOS',
172
+ 'WER' => 'Wear OS',
165
173
  'WTV' => 'WebTV',
166
174
  'WHS' => 'Whale OS',
167
175
  'WIN' => 'Windows',
@@ -186,7 +194,7 @@ class DeviceDetector
186
194
 
187
195
  OS_FAMILIES = {
188
196
  'Android' => %w[ AND CYN FIR REM RZD MLD MCD YNS GRI HAR
189
- ADR CLR BOS REV LEN SIR RRS],
197
+ ADR CLR BOS REV LEN SIR RRS WER PIC],
190
198
  'AmigaOS' => %w[AMG MOR],
191
199
  'BlackBerry' => %w[BLB QNX],
192
200
  'Brew' => ['BMP'],
@@ -204,7 +212,7 @@ class DeviceDetector
204
212
  ORD TOS RSO DEE FRE MAG FEN CAI PCL HAS
205
213
  LOS DVK ROK OWR OTV KTV PUR PLA FUC PAR
206
214
  FOR MON KAN ZEN LND LNS CHN AMZ TEN CST
207
- NOV ROU ZOR
215
+ NOV ROU ZOR VID
208
216
  ],
209
217
  'Mac' => ['MAC'],
210
218
  'Mobile Gaming Console' => %w[PSP NDS XBX],
@@ -58,7 +58,29 @@ class DeviceDetector
58
58
  end
59
59
 
60
60
  def load_regexes(file_paths)
61
- file_paths.map { |path, full_path| [path, symbolize_keys!(YAML.load_file(full_path))] }
61
+ file_paths.map do |path, full_path|
62
+ object = YAML.load_file(full_path)
63
+ object = rewrite_device_object!(object) if is_device_yml_file?(full_path)
64
+ object = rewrite_vendor_object!(object) if is_vendor_yml_file?(full_path)
65
+
66
+ [path, symbolize_keys!(object)]
67
+ end
68
+ end
69
+
70
+ def is_device_yml_file?(file_path)
71
+ file_path.include?('/regexes/device/')
72
+ end
73
+
74
+ def is_vendor_yml_file?(file_path)
75
+ file_path.include?('/regexes/vendorfragments')
76
+ end
77
+
78
+ def rewrite_vendor_object!(object)
79
+ object.map { |key, values| values.map { |v| { 'regex_name' => key, 'regex' => v } } }.flatten
80
+ end
81
+
82
+ def rewrite_device_object!(object)
83
+ object.map { |key, value| [key, { 'regex_name' => key }.merge!(value)] }.to_h
62
84
  end
63
85
 
64
86
  def symbolize_keys!(object)
@@ -88,8 +110,8 @@ class DeviceDetector
88
110
  Regexp.new('(?:^|[^A-Z0-9\-_]|[^A-Z0-9\-]_|sprd-|MZ-)(?:' + src + ')', Regexp::IGNORECASE)
89
111
  end
90
112
 
91
- def from_cache(key)
92
- DeviceDetector.cache.get_or_set(key) { yield }
113
+ def from_cache(key, &block)
114
+ DeviceDetector.cache.get_or_set(key, &block)
93
115
  end
94
116
  end
95
117
  end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ class DeviceDetector
6
+ class VendorFragment < Parser
7
+ def name
8
+ vendor_fragment_info
9
+ end
10
+
11
+ private
12
+
13
+ def vendor_fragment_info
14
+ from_cache(['vendor_fragment', self.class.name, user_agent]) do
15
+ return if regex_meta.nil? || regex_meta.empty?
16
+
17
+ regex_meta[:regex_name]
18
+ end
19
+ end
20
+
21
+ def filenames
22
+ ['vendorfragments.yml']
23
+ end
24
+ end
25
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DeviceDetector
4
- VERSION = '1.1.0'
4
+ VERSION = '1.1.2'
5
5
  end
@@ -15,13 +15,34 @@ require 'device_detector/device'
15
15
  require 'device_detector/os'
16
16
  require 'device_detector/browser'
17
17
  require 'device_detector/client_hint'
18
+ require 'device_detector/vendor_fragment'
18
19
 
19
20
  class DeviceDetector
20
21
  attr_reader :client_hint, :user_agent
21
22
 
22
23
  def initialize(user_agent, headers = nil)
23
24
  @client_hint = ClientHint.new(headers)
24
- @user_agent = user_agent
25
+ utf8_user_agent = encode_user_agent_if_needed(user_agent)
26
+ @user_agent = set_user_agent(utf8_user_agent)
27
+ end
28
+
29
+ # https://github.com/matomo-org/device-detector/blob/c235832dba13961ab0f71b681616baf1aa48de23/Parser/Device/AbstractDeviceParser.php#L1873
30
+ def set_user_agent(user_agent)
31
+ return user_agent if client_hint.model.nil?
32
+
33
+ regex = build_regex('Android 10[.\d]*; K(?: Build/|[;)])')
34
+ return user_agent unless user_agent =~ regex
35
+
36
+ version = client_hint.os_version || '10'
37
+
38
+ user_agent.gsub(regex, "Android #{version}, #{client_hint.model}")
39
+ end
40
+
41
+ def encode_user_agent_if_needed(user_agent)
42
+ return if user_agent.nil?
43
+ return user_agent if user_agent.encoding.name == 'UTF-8'
44
+
45
+ user_agent.encode('utf-8', 'binary', undef: :replace)
25
46
  end
26
47
 
27
48
  def name
@@ -53,13 +74,18 @@ class DeviceDetector
53
74
  end
54
75
 
55
76
  def device_name
77
+ return if fake_ua?
78
+
56
79
  device.name || client_hint.model || fix_for_x_music
57
80
  end
58
81
 
59
82
  def device_brand
83
+ return if fake_ua?
84
+
60
85
  # Assume all devices running iOS / Mac OS are from Apple
61
86
  brand = device.brand
62
- brand = 'Apple' if brand.nil? && ['Apple TV', 'iOS', 'Mac'].include?(os_name)
87
+ brand = 'Apple' if brand.nil? && %w[iPadOS tvOS watchOS iOS Mac].include?(os_name)
88
+
63
89
  brand
64
90
  end
65
91
 
@@ -136,7 +162,8 @@ class DeviceDetector
136
162
  t = 'tv' if t.nil? && tizen_samsung_tv?
137
163
 
138
164
  # Devices running Kylo or Espital TV Browsers are assumed to be a TV
139
- t = 'tv' if t.nil? && ['Kylo', 'Espial TV Browser'].include?(name)
165
+ t = 'tv' if ['Kylo', 'Espial TV Browser', 'LUJO TV Browser', 'LogicUI TV Browser',
166
+ 'Open TV Browser'].include?(name)
140
167
 
141
168
  # All devices containing TV fragment are assumed to be a tv
142
169
  t = 'tv' if t.nil? && user_agent =~ build_regex('\(TV;')
@@ -206,6 +233,11 @@ class DeviceDetector
206
233
  @os ||= OS.new(user_agent)
207
234
  end
208
235
 
236
+ # https://github.com/matomo-org/device-detector/blob/827a3fab7e38c3274c18d2f5f5bc2a78b7ef4a3a/DeviceDetector.php#L921C5-L921C5
237
+ def fake_ua?
238
+ os_name == 'Android' && device.brand == 'Apple'
239
+ end
240
+
209
241
  # https://github.com/matomo-org/device-detector/blob/be1c9ef486c247dc4886668da5ed0b1c49d90ba8/Parser/Client/Browser.php#L772
210
242
  # Fix mobile browser names e.g. Chrome => Chrome Mobile
211
243
  def mobile_fix?
@@ -218,7 +250,7 @@ class DeviceDetector
218
250
 
219
251
  # Related to issue mentionned in device.rb#1562
220
252
  def fix_for_x_music
221
- user_agent.include?('X-music Ⅲ') ? 'X-Music III' : nil
253
+ user_agent&.include?('X-music Ⅲ') ? 'X-Music III' : nil
222
254
  end
223
255
 
224
256
  def skip_os_version?
@@ -242,7 +274,7 @@ class DeviceDetector
242
274
  end
243
275
 
244
276
  def opera_tv_store?
245
- user_agent =~ build_regex('Opera TV Store')
277
+ user_agent =~ build_regex('Opera TV Store|OMI/')
246
278
  end
247
279
 
248
280
  def opera_tablet?
data/regexes/bots.yml CHANGED
@@ -13,6 +13,14 @@
13
13
  name: 'Plesk International GmbH'
14
14
  url: 'https://www.plesk.com'
15
15
 
16
+ - regex: 'Cloudflare-Healthchecks'
17
+ name: 'Cloudflare Health Checks'
18
+ category: 'Service Agent'
19
+ url: 'https://developers.cloudflare.com/health-checks/'
20
+ producer:
21
+ name: 'CloudFlare'
22
+ url: 'https://www.cloudflare.com/'
23
+
16
24
  - regex: '360Spider'
17
25
  name: '360Spider'
18
26
  category: 'Search bot'
@@ -323,21 +331,53 @@
323
331
  name: 'CloudFlare'
324
332
  url: 'http://www.cloudflare.com'
325
333
 
326
- - regex: 'CloudflareDiagnostics'
334
+ - regex: 'Cloudflare-?Diagnostics'
327
335
  name: 'Cloudflare Diagnostics'
328
336
  category: 'Site Monitor'
329
337
  url: 'https://www.cloudflare.com/'
330
338
  producer:
331
339
  name: 'Cloudflare'
332
- url: 'https://www.cloudflare.com'
340
+ url: 'https://www.cloudflare.com/'
333
341
 
334
342
  - regex: 'CloudFlare-AlwaysOnline'
335
343
  name: 'CloudFlare Always Online'
336
344
  category: 'Site Monitor'
337
- url: 'http://www.cloudflare.com/always-online'
345
+ url: 'https://www.cloudflare.com/always-online'
338
346
  producer:
339
347
  name: 'CloudFlare'
340
- url: 'http://www.cloudflare.com'
348
+ url: 'https://www.cloudflare.com/'
349
+
350
+ - regex: 'Cloudflare-SSLDetector'
351
+ name: 'Cloudflare SSL Detector'
352
+ category: 'Site Monitor'
353
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
354
+ producer:
355
+ name: 'CloudFlare'
356
+ url: 'https://www.cloudflare.com/'
357
+
358
+ - regex: 'Cloudflare Custom Hostname Verification'
359
+ name: 'Cloudflare Custom Hostname Verification'
360
+ category: 'Service Agent'
361
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
362
+ producer:
363
+ name: 'CloudFlare'
364
+ url: 'https://www.cloudflare.com/'
365
+
366
+ - regex: 'Cloudflare-Traffic-Manager'
367
+ name: 'Cloudflare Traffic Manager'
368
+ category: 'Site Monitor'
369
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
370
+ producer:
371
+ name: 'CloudFlare'
372
+ url: 'https://www.cloudflare.com/'
373
+
374
+ - regex: 'https://developers.cloudflare.com/security-center/'
375
+ name: 'Cloudflare Security Insights'
376
+ category: 'Site Monitor'
377
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
378
+ producer:
379
+ name: 'CloudFlare'
380
+ url: 'https://www.cloudflare.com/'
341
381
 
342
382
  - regex: 'coccoc.com'
343
383
  name: 'Cốc Cốc Bot'
@@ -704,7 +744,15 @@
704
744
  name: 'Visual Meta'
705
745
  url: 'https://www.shopalike.cz/'
706
746
 
707
- - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Producer)|Google.*/\+/web/snippet'
747
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|InspectionTool|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Other|Prober|Producer)|Google.*/\+/web/snippet'
748
+ name: 'Googlebot'
749
+ category: 'Search bot'
750
+ url: 'http://www.google.com/bot.html'
751
+ producer:
752
+ name: 'Google Inc.'
753
+ url: 'http://www.google.com'
754
+
755
+ - regex: '^Google$'
708
756
  name: 'Googlebot'
709
757
  category: 'Search bot'
710
758
  url: 'http://www.google.com/bot.html'
@@ -727,6 +775,11 @@
727
775
  name: 'HubSpot Inc.'
728
776
  url: 'https://www.hubspot.com'
729
777
 
778
+ - regex: 'vuhuvBot'
779
+ name: 'Vuhuv Bot'
780
+ category: 'Crawler'
781
+ url: 'http://vuhuv.com/bot.html'
782
+
730
783
  - regex: 'HTTPMon'
731
784
  name: 'HTTPMon'
732
785
  category: 'Site Monitor'
@@ -1020,6 +1073,14 @@
1020
1073
  - regex: 'Octopus [0-9]'
1021
1074
  name: 'Octopus'
1022
1075
 
1076
+ - regex: 'OnlineOrNot.com_bot'
1077
+ name: 'OnlineOrNot Bot'
1078
+ category: 'Site Monitor'
1079
+ url: 'https://onlineornot.com/website-monitoring'
1080
+ producer:
1081
+ name: 'OnlineOrNot'
1082
+ url: 'https://onlineornot.com'
1083
+
1023
1084
  - regex: 'omgili'
1024
1085
  name: 'Omgili bot'
1025
1086
  category: 'Search bot'
@@ -1741,7 +1802,15 @@
1741
1802
  - regex: 'Y!J-BRW'
1742
1803
  name: 'Yahoo! Japan BRW'
1743
1804
  category: 'Crawler'
1744
- url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1805
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1806
+ producer:
1807
+ name: 'Yahoo! Japan Corp.'
1808
+ url: 'https://www.yahoo.co.jp/'
1809
+
1810
+ - regex: 'Y!J-WSC'
1811
+ name: 'Yahoo! Japan WSC'
1812
+ category: 'Crawler'
1813
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1745
1814
  producer:
1746
1815
  name: 'Yahoo! Japan Corp.'
1747
1816
  url: 'https://www.yahoo.co.jp/'
@@ -1966,7 +2035,7 @@
1966
2035
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1967
2036
  name: 'RSSRadio Bot'
1968
2037
 
1969
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel)'
2038
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent)'
1970
2039
  name: 'Generic Bot'
1971
2040
 
1972
2041
  - regex: '^sentry'
@@ -2025,7 +2094,15 @@
2025
2094
  name: 'WooRank sprl'
2026
2095
  url: 'https://www.woorank.com/'
2027
2096
 
2028
- - regex: '(Match|LinkCheck) by Siteimprove.com'
2097
+ - regex: 'by Siteimprove\.com'
2098
+ name: 'Siteimprove'
2099
+ category: 'Search bot'
2100
+ url: 'https://siteimprove.com/'
2101
+ producer:
2102
+ name: 'Siteimprove GmbH'
2103
+ url: 'https://siteimprove.com/'
2104
+
2105
+ - regex: 'Image size by Siteimprove\.com'
2029
2106
  name: 'Siteimprove'
2030
2107
  category: 'Search bot'
2031
2108
  url: 'https://siteimprove.com/'
@@ -2153,6 +2230,14 @@
2153
2230
  name: 'Startpagina B.V.'
2154
2231
  url: 'https://www.startpagina.nl/'
2155
2232
 
2233
+ - regex: 'MoodleBot-Linkchecker'
2234
+ name: 'MoodleBot Linkchecker'
2235
+ category: 'Search bot'
2236
+ url: 'hhttps://docs.moodle.org/en/Usage'
2237
+ producer:
2238
+ name: 'Moodle Pty Ltd'
2239
+ url: 'https://moodle.org/'
2240
+
2156
2241
  - regex: 'GTmetrix'
2157
2242
  name: 'GTmetrix'
2158
2243
  category: 'Crawler'
@@ -2412,6 +2497,13 @@
2412
2497
  producer:
2413
2498
  name: 'Hatena Co., Ltd.'
2414
2499
  url: 'https://www.hatena.ne.jp'
2500
+ - regex: 'Hatena-?Bookmark'
2501
+ name: 'Hatena Bookmark'
2502
+ category: 'Crawler'
2503
+ url: 'https://www.hatena.ne.jp/faq/'
2504
+ producer:
2505
+ name: 'Hatena Co., Ltd.'
2506
+ url: 'https://www.hatena.ne.jp'
2415
2507
 
2416
2508
  - regex: 'RyowlEngine/(\d+)'
2417
2509
  name: 'Ryowl'
@@ -2556,6 +2648,14 @@
2556
2648
  category: 'Security Checker'
2557
2649
  url: 'https://github.com/LeakIX/l9explore'
2558
2650
 
2651
+ - regex: 'l9scan/|^Lkx-(.*)/([\d+.]+)'
2652
+ name: 'LeakIX'
2653
+ category: 'Security Checker'
2654
+ url: 'https://leakix.net/'
2655
+ producer:
2656
+ name: 'BaDaaS SRL'
2657
+ url: 'https://leakix.net/'
2658
+
2559
2659
  - regex: 'MegaIndex.ru/([\d+\.])'
2560
2660
  name: 'MegaIndex'
2561
2661
  category: 'Crawler'
@@ -3047,14 +3147,6 @@
3047
3147
  name: 'New Work SE'
3048
3148
  url: 'https://www.xing.com/'
3049
3149
 
3050
- - regex: '^Lkx-(.*)/([\d+.]+)'
3051
- name: 'LeakIX'
3052
- category: 'Security Checker'
3053
- url: 'https://leakix.net/'
3054
- producer:
3055
- name: 'BaDaaS SRL'
3056
- url: 'https://leakix.net/'
3057
-
3058
3150
  - regex: 'RepoLookoutBot/([\d+.]+)'
3059
3151
  name: 'Repo Lookout'
3060
3152
  category: 'Security Checker'
@@ -3217,6 +3309,205 @@
3217
3309
  category: 'Crawler'
3218
3310
  url: 'https://reqbin.com/curl'
3219
3311
 
3312
+ - regex: 'XoviBot/([\d+.]+)'
3313
+ name: 'XoviBot'
3314
+ category: 'Crawler'
3315
+ url: 'https://www.xovibot.net'
3316
+ producer:
3317
+ name: 'Xovi GmbH'
3318
+ url: 'http://www.xovi.de'
3319
+
3320
+ - regex: 'Overcast/([\d+.]+) Podcast Sync'
3321
+ name: 'Overcast Podcast Sync'
3322
+ category: 'Service Agent'
3323
+ url: 'https://overcast.fm/podcasterinfo'
3324
+
3325
+ - regex: '^Verity/([\d+.]+)'
3326
+ name: 'GumGum Verity'
3327
+ category: 'Service Agent'
3328
+ url: 'https://gumgum.com/verity'
3329
+
3330
+ - regex: 'hackermention'
3331
+ name: 'hackermention'
3332
+ category: 'Feed Reader'
3333
+ url: 'https://github.com/snarfed/hackermention'
3334
+
3335
+ - regex: 'BitSightBot/([\d+.]+)'
3336
+ name: 'BitSight'
3337
+ category: 'Security Checker'
3338
+ url: 'https://www.bitsight.com/'
3339
+ producer:
3340
+ name: 'BitSight Technologies, Inc.'
3341
+ url: 'https://www.bitsight.com/'
3342
+
3343
+ - regex: 'Ezgif/([\d+.]+)'
3344
+ name: 'Ezgif'
3345
+ category: 'Service Agent'
3346
+ url: 'https://ezgif.com/about'
3347
+
3348
+ - regex: 'intelx.io_bot'
3349
+ name: 'Intelligence X'
3350
+ category: 'Crawler'
3351
+ url: 'https://intelx.io/'
3352
+ producer:
3353
+ name: 'Kleissner Investments s.r.o.'
3354
+ url: 'https://intelx.io/'
3355
+
3356
+ - regex: 'FemtosearchBot/([\d+.]+)'
3357
+ name: 'Femtosearch'
3358
+ category: 'Crawler'
3359
+ url: 'http://femtosearch.com/'
3360
+ producer:
3361
+ name: 'Grier Forensics, LLC'
3362
+ url: 'https://www.grierforensics.com/'
3363
+
3364
+ - regex: 'AdsTxtCrawler/([\d+.]+)'
3365
+ name: 'AdsTxtCrawler'
3366
+ category: 'Crawler'
3367
+ url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
3368
+ producer:
3369
+ name: 'IAB Technology Laboratory, Inc.'
3370
+ url: 'https://iabtechlab.com/'
3371
+
3372
+ - regex: 'Morningscore'
3373
+ name: 'Morningscore Bot'
3374
+ category: 'Crawler'
3375
+ url: 'https://morningscore.io/'
3376
+ producer:
3377
+ name: 'Morningscore'
3378
+ url: 'https://morningscore.io/'
3379
+
3380
+ - regex: 'Uptime-Kuma/([\d+.]+)'
3381
+ name: 'Uptime-Kuma'
3382
+ category: 'Site Monitor'
3383
+ url: 'https://github.com/louislam/uptime-kuma'
3384
+
3385
+ - regex: 'ChatGPT-User'
3386
+ name: 'ChatGPT'
3387
+ category: 'Crawler'
3388
+ url: 'https://platform.openai.com/docs/plugins/bot'
3389
+ producer:
3390
+ name: 'OpenAI OpCo, LLC'
3391
+ url: 'https://openai.com/'
3392
+
3393
+ - regex: 'BrightEdge Crawler/([\d+.]+)'
3394
+ name: 'BrightEdge'
3395
+ category: 'Crawler'
3396
+ url: 'https://www.brightedge.com/'
3397
+ producer:
3398
+ name: 'BrightEdge Technologies, Inc'
3399
+ url: 'https://www.brightedge.com/'
3400
+
3401
+ - regex: 'sfFeedReader/([\d+.]+)'
3402
+ name: 'sfFeedReader'
3403
+ url: 'https://github.com/diem-project/sfFeed2Plugin'
3404
+ category: 'Feed Fetcher'
3405
+
3406
+ - regex: 'cyberscan.io'
3407
+ name: 'Cyberscan'
3408
+ category: 'Security Checker'
3409
+ url: 'https://www.cyberscan.io/'
3410
+ producer:
3411
+ name: 'DGC Verwaltungs GmbH'
3412
+ url: 'https://dgc.org/'
3413
+
3414
+ - regex: 'deepcrawl\.com'
3415
+ name: 'Lumar'
3416
+ category: 'Crawler'
3417
+ url: 'https://deepcrawl.com/bot'
3418
+ producer:
3419
+ name: 'Lumar'
3420
+ url: 'https://www.lumar.io/'
3421
+
3422
+ - regex: 'RepoLookoutBot'
3423
+ name: 'Repo Lookout'
3424
+ category: 'Crawler'
3425
+ url: 'https://www.repo-lookout.org/'
3426
+ producer:
3427
+ name: 'Crissy Field GmbH'
3428
+ url: 'https://www.crissyfield.de/'
3429
+
3430
+ - regex: 'researchscan.comsys.rwth-aachen.de'
3431
+ name: 'Research Scan'
3432
+ category: 'Crawler'
3433
+ url: 'http://researchscan.comsys.rwth-aachen.de/'
3434
+ producer:
3435
+ name: 'RWTH Aachen University'
3436
+ url: 'https://www.comsys.rwth-aachen.de/'
3437
+
3438
+ - regex: 'newspaper/([\d+.]+)'
3439
+ name: 'Scraping Robot'
3440
+ category: 'Crawler'
3441
+ url: 'https://scrapingrobot.com/'
3442
+ producer:
3443
+ name: 'Sprious LLC'
3444
+ url: 'https://sprious.com/'
3445
+
3446
+ - regex: 'GPTBot/([\d+.]+)'
3447
+ name: 'GPTBot'
3448
+ category: 'Crawler'
3449
+ url: 'https://platform.openai.com/docs/gptbot'
3450
+ producer:
3451
+ name: 'OpenAI OpCo, LLC'
3452
+ url: 'https://openai.com/'
3453
+
3454
+ - regex: 'Ant.com beta/([\d+.]+)'
3455
+ name: 'Ant'
3456
+ category: 'Crawler'
3457
+ url: 'https://www.ant.com/'
3458
+ producer:
3459
+ name: 'Ant.com Ltd.'
3460
+ url: 'https://www.ant.com/'
3461
+
3462
+ - regex: 'WebwikiBot/([\d+.]+)'
3463
+ name: 'Webwiki'
3464
+ category: 'Crawler'
3465
+ url: 'https://www.webwiki.com/'
3466
+ producer:
3467
+ name: 'webwiki GmbH'
3468
+ url: 'https://www.webwiki.com/'
3469
+
3470
+ - regex: 'phpMyAdmin'
3471
+ name: 'phpMyAdmin'
3472
+ category: 'Service Agent'
3473
+ url: 'https://www.phpmyadmin.net/'
3474
+
3475
+ - regex: 'Matomo/([\d+.]+)'
3476
+ name: 'Matomo'
3477
+ category: 'Service Agent'
3478
+ url: 'https://github.com/matomo-org/matomo'
3479
+ producer:
3480
+ name: 'InnoCraft Ltd'
3481
+ url: 'https://matomo.org/'
3482
+
3483
+ - regex: 'Prometheus/([\d+.]+)'
3484
+ name: 'Prometheus'
3485
+ category: 'Service Agent'
3486
+ url: 'https://github.com/prometheus/prometheus'
3487
+ producer:
3488
+ name: 'The Linux Foundation'
3489
+ url: 'https://www.cncf.io/'
3490
+
3491
+ - regex: 'ArchiveTeam ArchiveBot'
3492
+ name: 'ArchiveBot'
3493
+ category: 'Crawler'
3494
+ url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot'
3495
+ producer:
3496
+ name: 'ArchiveTeam'
3497
+ url: 'https://wiki.archiveteam.org/'
3498
+
3499
+ - regex: 'MADBbot/([\d+.]+)'
3500
+ name: 'MADBbot'
3501
+ category: 'Crawler'
3502
+ url: 'https://madb.zapto.org/bot.html'
3503
+
3504
+ - regex: 'MeltwaterNews'
3505
+ name: 'MeltwaterNews'
3506
+ category: 'Crawler'
3507
+ producer:
3508
+ name: 'Meltwater Deutschland GmbH'
3509
+ url: 'https://www.meltwater.com/'
3510
+
3220
3511
  # Generic detections
3221
- - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
3512
+ - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider|^firefox$|^chrome$)([^a-z]|$)'
3222
3513
  name: 'Generic Bot'