device_detector 1.0.0 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +58 -4
  3. data/README.md +57 -21
  4. data/lib/device_detector/bot.rb +2 -2
  5. data/lib/device_detector/browser.rb +691 -0
  6. data/lib/device_detector/client.rb +11 -2
  7. data/lib/device_detector/client_hint.rb +249 -0
  8. data/lib/device_detector/device.rb +1954 -23
  9. data/lib/device_detector/memory_cache.rb +26 -19
  10. data/lib/device_detector/metadata_extractor.rb +7 -8
  11. data/lib/device_detector/model_extractor.rb +3 -3
  12. data/lib/device_detector/name_extractor.rb +2 -2
  13. data/lib/device_detector/os.rb +289 -112
  14. data/lib/device_detector/parser.rb +49 -13
  15. data/lib/device_detector/vendor_fragment.rb +25 -0
  16. data/lib/device_detector/version.rb +3 -1
  17. data/lib/device_detector/version_extractor.rb +29 -2
  18. data/lib/device_detector.rb +192 -44
  19. data/regexes/bots.yml +3399 -91
  20. data/regexes/client/browser_engine.yml +28 -4
  21. data/regexes/client/browsers.yml +2697 -408
  22. data/regexes/client/feed_readers.yml +60 -22
  23. data/regexes/client/hints/apps.yml +150 -0
  24. data/regexes/client/hints/browsers.yml +292 -0
  25. data/regexes/client/libraries.yml +598 -4
  26. data/regexes/client/mediaplayers.yml +110 -5
  27. data/regexes/client/mobile_apps.yml +2451 -14
  28. data/regexes/client/pim.yml +128 -3
  29. data/regexes/device/cameras.yml +6 -6
  30. data/regexes/device/car_browsers.yml +39 -3
  31. data/regexes/device/consoles.yml +40 -6
  32. data/regexes/device/mobiles.yml +38844 -2907
  33. data/regexes/device/notebooks.yml +127 -0
  34. data/regexes/device/portable_media_player.yml +75 -12
  35. data/regexes/device/shell_tv.yml +145 -0
  36. data/regexes/device/televisions.yml +981 -40
  37. data/regexes/oss.yml +1560 -311
  38. data/regexes/vendorfragments.yml +6 -2
  39. metadata +31 -105
  40. data/.gitignore +0 -14
  41. data/.travis.yml +0 -18
  42. data/Gemfile +0 -8
  43. data/Rakefile +0 -79
  44. data/device_detector.gemspec +0 -26
  45. data/spec/device_detector/bot_fixtures_spec.rb +0 -30
  46. data/spec/device_detector/client_fixtures_spec.rb +0 -31
  47. data/spec/device_detector/concrete_user_agent_spec.rb +0 -136
  48. data/spec/device_detector/detector_fixtures_spec.rb +0 -60
  49. data/spec/device_detector/device_fixtures_spec.rb +0 -36
  50. data/spec/device_detector/device_spec.rb +0 -151
  51. data/spec/device_detector/memory_cache_spec.rb +0 -116
  52. data/spec/device_detector/model_extractor_spec.rb +0 -63
  53. data/spec/device_detector/os_fixtures_spec.rb +0 -26
  54. data/spec/device_detector/version_extractor_spec.rb +0 -80
  55. data/spec/device_detector_spec.rb +0 -198
  56. data/spec/fixtures/client/browser.yml +0 -1313
  57. data/spec/fixtures/client/feed_reader.yml +0 -187
  58. data/spec/fixtures/client/library.yml +0 -84
  59. data/spec/fixtures/client/mediaplayer.yml +0 -168
  60. data/spec/fixtures/client/mobile_app.yml +0 -30
  61. data/spec/fixtures/client/pim.yml +0 -96
  62. data/spec/fixtures/detector/bots.yml +0 -2418
  63. data/spec/fixtures/detector/camera.yml +0 -115
  64. data/spec/fixtures/detector/car_browser.yml +0 -20
  65. data/spec/fixtures/detector/console.yml +0 -267
  66. data/spec/fixtures/detector/desktop.yml +0 -4828
  67. data/spec/fixtures/detector/feature_phone.yml +0 -782
  68. data/spec/fixtures/detector/feed_reader.yml +0 -486
  69. data/spec/fixtures/detector/mediaplayer.yml +0 -179
  70. data/spec/fixtures/detector/mobile_apps.yml +0 -149
  71. data/spec/fixtures/detector/phablet.yml +0 -2140
  72. data/spec/fixtures/detector/portable_media_player.yml +0 -153
  73. data/spec/fixtures/detector/smart_display.yml +0 -58
  74. data/spec/fixtures/detector/smartphone-1.yml +0 -9469
  75. data/spec/fixtures/detector/smartphone-2.yml +0 -9414
  76. data/spec/fixtures/detector/smartphone-3.yml +0 -9396
  77. data/spec/fixtures/detector/smartphone-4.yml +0 -5742
  78. data/spec/fixtures/detector/smartphone.yml +0 -9411
  79. data/spec/fixtures/detector/tablet-1.yml +0 -9495
  80. data/spec/fixtures/detector/tablet-2.yml +0 -248
  81. data/spec/fixtures/detector/tablet.yml +0 -9484
  82. data/spec/fixtures/detector/tv.yml +0 -2582
  83. data/spec/fixtures/detector/unknown.yml +0 -3196
  84. data/spec/fixtures/device/camera.yml +0 -18
  85. data/spec/fixtures/device/car_browser.yml +0 -6
  86. data/spec/fixtures/device/console.yml +0 -78
  87. data/spec/fixtures/parser/oss.yml +0 -800
  88. data/spec/fixtures/parser/vendorfragments.yml +0 -162
  89. data/spec/spec_helper.rb +0 -9
data/regexes/bots.yml CHANGED
@@ -1,14 +1,35 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
- - regex: '360Spider(-Image|-Video)?'
8
+ - regex: 'WireReaderBot(?:/([\d+.]+))?'
9
+ name: 'WireReaderBot'
10
+ category: 'Feed Fetcher'
11
+ url: 'https://wirereader.app/'
12
+
13
+ - regex: 'monitoring360bot'
14
+ name: '360 Monitoring'
15
+ category: 'Site Monitor'
16
+ url: 'https://www.360monitoring.io'
17
+ producer:
18
+ name: 'Plesk International GmbH'
19
+ url: 'https://www.plesk.com'
20
+
21
+ - regex: 'Cloudflare-Healthchecks'
22
+ name: 'Cloudflare Health Checks'
23
+ category: 'Service Agent'
24
+ url: 'https://developers.cloudflare.com/health-checks/'
25
+ producer:
26
+ name: 'CloudFlare'
27
+ url: 'https://www.cloudflare.com/'
28
+
29
+ - regex: '360Spider'
9
30
  name: '360Spider'
10
31
  category: 'Search bot'
11
- url: 'http://www.so.com/help/help_3_2.html'
32
+ url: 'https://www.so.com/help/help_3_2.html'
12
33
  producer:
13
34
  name: 'Online Media Group, Inc.'
14
35
  url: ''
@@ -40,18 +61,57 @@
40
61
  - regex: 'AhrefsBot'
41
62
  name: 'aHrefs Bot'
42
63
  category: 'Crawler'
43
- url: 'http://ahrefs.com/robot'
64
+ url: 'https://ahrefs.com/robot'
65
+ producer:
66
+ name: 'Ahrefs Pte Ltd'
67
+ url: 'https://ahrefs.com/robot'
68
+
69
+ - regex: 'AhrefsSiteAudit/[\d.]+'
70
+ name: 'AhrefsSiteAudit'
71
+ category: 'Site Monitor'
72
+ url: 'https://ahrefs.com/robot/site-audit'
44
73
  producer:
45
74
  name: 'Ahrefs Pte Ltd'
46
- url: 'http://ahrefs.com/robot'
75
+ url: 'https://ahrefs.com/'
47
76
 
48
77
  - regex: 'ia_archiver|alexabot|verifybot'
49
78
  name: 'Alexa Crawler'
50
79
  category: 'Search bot'
51
- url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
80
+ url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
81
+ producer:
82
+ name: 'Alexa Internet'
83
+ url: 'https://www.alexa.com'
84
+
85
+ - regex: 'alexa site audit'
86
+ name: 'Alexa Site Audit'
87
+ category: 'Site Monitor'
88
+ url: 'https://support.alexa.com/hc/en-us/articles/200450194'
52
89
  producer:
53
90
  name: 'Alexa Internet'
54
- url: 'http://www.alexa.com'
91
+ url: 'https://www.alexa.com'
92
+
93
+ - regex: 'Amazonbot/[\d.]+'
94
+ name: 'Amazon Bot'
95
+ category: 'Crawler'
96
+ url: 'https://developer.amazon.com/support/amazonbot'
97
+ producer:
98
+ name: 'Amazon.com, Inc.'
99
+ url: 'https://www.amazon.com/'
100
+
101
+ - regex: 'AmazonAdBot/[\d.]+'
102
+ name: 'Amazon AdBot'
103
+ category: 'Crawler'
104
+ url: 'https://adbot.amazon.com/'
105
+ producer:
106
+ name: 'Amazon.com, Inc.'
107
+ url: 'https://www.amazon.com/'
108
+
109
+ - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
110
+ name: 'Amazon Route53 Health Check'
111
+ category: 'Service Agent'
112
+ producer:
113
+ name: 'Amazon Web Services'
114
+ url: 'https://aws.amazon.com/'
55
115
 
56
116
  - regex: 'AmorankSpider'
57
117
  name: 'Amorank Spider'
@@ -61,13 +121,61 @@
61
121
  name: 'Amorank'
62
122
  url: 'http://www.amorank.com'
63
123
 
124
+ - regex: 'ApacheBench'
125
+ name: 'ApacheBench'
126
+ category: 'Benchmark'
127
+ url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
128
+ producer:
129
+ name: 'The Apache Software Foundation'
130
+ url: 'https://www.apache.org/foundation/'
131
+
64
132
  - regex: 'Applebot'
65
133
  name: 'Applebot'
66
134
  category: 'Crawler'
67
- url: 'http://www.apple.com/go/applebot'
135
+ url: 'https://support.apple.com/en-us/119829'
136
+ producer:
137
+ name: 'Apple Inc'
138
+ url: 'https://www.apple.com/'
139
+
140
+ - regex: 'iTMS'
141
+ name: 'iTMS'
142
+ category: 'Crawler'
143
+ url: 'https://support.apple.com/en-us/119829'
68
144
  producer:
69
145
  name: 'Apple Inc'
70
- url: 'http://www.apple.com'
146
+ url: 'https://www.apple.com/'
147
+
148
+ - regex: 'AppSignalBot'
149
+ name: 'AppSignalBot'
150
+ category: 'Site Monitor'
151
+ url: 'https://docs.appsignal.com/uptime-monitoring/'
152
+ producer:
153
+ name: 'AppSignal'
154
+ url: 'https://appsignal.com/'
155
+
156
+ - regex: 'Arachni'
157
+ name: 'Arachni'
158
+ category: 'Security Checker'
159
+ url: 'https://www.arachni-scanner.com/'
160
+ producer:
161
+ name: 'Sarosys LLC'
162
+ url: 'https://www.sarosys.com/'
163
+
164
+ - regex: 'AspiegelBot'
165
+ name: 'AspiegelBot'
166
+ category: 'Crawler'
167
+ url: 'https://aspiegel.com/'
168
+ producer:
169
+ name: 'Huawei'
170
+ url: 'https://www.huawei.com/'
171
+
172
+ - regex: 'Castro 2, Episode Duration Lookup'
173
+ name: 'Castro 2'
174
+ category: 'Service Agent'
175
+ url: 'http://supertop.co/castro/'
176
+ producer:
177
+ name: 'Supertop'
178
+ url: 'http://supertop.co'
71
179
 
72
180
  - regex: 'Curious George'
73
181
  name: 'Analytics SEO Crawler'
@@ -80,10 +188,10 @@
80
188
  - regex: 'archive\.org_bot|special_archiver'
81
189
  name: 'archive.org bot'
82
190
  category: 'Crawler'
83
- url: 'http://www.archive.org/details/archive.org_bot'
191
+ url: 'https://archive.org/details/archive.org_bot'
84
192
  producer:
85
193
  name: 'The Internet Archive'
86
- url: 'http://www.archive.org'
194
+ url: 'https://archive.org'
87
195
 
88
196
  - regex: 'Ask Jeeves/Teoma'
89
197
  name: 'Ask Jeeves'
@@ -93,8 +201,8 @@
93
201
  name: 'Ask Jeeves Inc.'
94
202
  url: 'http://www.ask.com'
95
203
 
96
- - regex: 'Backlink-Ceck\.de'
97
- name: 'Backlink-Ceck.de'
204
+ - regex: 'Backlink-Check\.de'
205
+ name: 'Backlink-Check.de'
98
206
  category: 'Crawler'
99
207
  url: 'http://www.backlink-check.de/bot.html'
100
208
  producer:
@@ -109,7 +217,7 @@
109
217
  name: '2.0Promotion GbR'
110
218
  url: 'http://www.backlinktest.com'
111
219
 
112
- - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
220
+ - regex: 'Baidu.*spider|baidu Transcoder'
113
221
  name: 'Baidu Spider'
114
222
  category: 'Search bot'
115
223
  url: 'http://www.baidu.com/search/spider.htm'
@@ -125,7 +233,15 @@
125
233
  name: ''
126
234
  url: ''
127
235
 
128
- - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
236
+ - regex: 'Better Uptime Bot'
237
+ name: 'Better Uptime Bot'
238
+ category: 'Site Monitor'
239
+ url: 'https://betteruptime.com/faq'
240
+ producer:
241
+ name: 'Better Uptime'
242
+ url: 'https://betteruptime.com/'
243
+
244
+ - regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
129
245
  name: 'BingBot'
130
246
  category: 'Search bot'
131
247
  url: 'http://search.msn.com/msnbot.htmn'
@@ -141,7 +257,7 @@
141
257
  name: 'Blekko'
142
258
  url: 'http://blekko.com'
143
259
 
144
- - regex: 'BLEXBot(Test)?'
260
+ - regex: 'BLEXBot'
145
261
  name: 'BLEXBot Crawler'
146
262
  category: 'Crawler'
147
263
  url: 'http://webmeup-crawler.com'
@@ -165,6 +281,13 @@
165
281
  name: 'Blogtrottr Ltd'
166
282
  url: 'https://blogtrottr.com/'
167
283
 
284
+ - regex: 'BoardReader Blog Indexer'
285
+ name: 'BoardReader Blog Indexer'
286
+ category: 'Crawler'
287
+ producer:
288
+ name: 'BoardReader'
289
+ url: 'https://boardreader.com/'
290
+
168
291
  - regex: 'BountiiBot'
169
292
  name: 'Bountii Bot'
170
293
  category: 'Search bot'
@@ -186,8 +309,8 @@
186
309
  category: 'Crawler'
187
310
  url: 'http://law.di.unimi.it/BUbiNG.html'
188
311
  producer:
189
- name: ''
190
- url: ''
312
+ name: 'The Laboratory for Web Algorithmics (LAW)'
313
+ url: 'http://law.di.unimi.it/software.php#buging'
191
314
 
192
315
  - regex: '(?<!HTC)[ _]Butterfly/'
193
316
  name: 'Butterfly Robot'
@@ -221,21 +344,93 @@
221
344
  name: '10betterpages GmbH'
222
345
  url: 'http://cliqz.com'
223
346
 
347
+ - regex: 'Cloudflare-AMP'
348
+ name: 'CloudFlare AMP Fetcher'
349
+ category: 'Crawler'
350
+ url: 'https://amp.cloudflare.com/doc/fetcher.html'
351
+ producer:
352
+ name: 'CloudFlare'
353
+ url: 'http://www.cloudflare.com'
354
+
355
+ - regex: 'Cloudflare-?Diagnostics'
356
+ name: 'Cloudflare Diagnostics'
357
+ category: 'Site Monitor'
358
+ url: 'https://www.cloudflare.com/'
359
+ producer:
360
+ name: 'Cloudflare'
361
+ url: 'https://www.cloudflare.com/'
362
+
224
363
  - regex: 'CloudFlare-AlwaysOnline'
225
364
  name: 'CloudFlare Always Online'
226
365
  category: 'Site Monitor'
227
- url: 'http://www.cloudflare.com/always-online'
366
+ url: 'https://www.cloudflare.com/always-online'
228
367
  producer:
229
368
  name: 'CloudFlare'
230
- url: 'http://www.cloudflare.com'
369
+ url: 'https://www.cloudflare.com/'
370
+
371
+ - regex: 'Cloudflare-SSLDetector'
372
+ name: 'Cloudflare SSL Detector'
373
+ category: 'Site Monitor'
374
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
375
+ producer:
376
+ name: 'CloudFlare'
377
+ url: 'https://www.cloudflare.com/'
378
+
379
+ - regex: 'Cloudflare Custom Hostname Verification'
380
+ name: 'Cloudflare Custom Hostname Verification'
381
+ category: 'Service Agent'
382
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
383
+ producer:
384
+ name: 'CloudFlare'
385
+ url: 'https://www.cloudflare.com/'
386
+
387
+ - regex: 'Cloudflare-Traffic-Manager'
388
+ name: 'Cloudflare Traffic Manager'
389
+ category: 'Site Monitor'
390
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
391
+ producer:
392
+ name: 'CloudFlare'
393
+ url: 'https://www.cloudflare.com/'
394
+
395
+ - regex: 'Cloudflare-Smart-Transit'
396
+ name: 'Cloudflare Smart Transit'
397
+ category: 'Site Monitor'
398
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
399
+ producer:
400
+ name: 'CloudFlare'
401
+ url: 'https://www.cloudflare.com/'
402
+
403
+ - regex: 'CloudflareObservatory'
404
+ name: 'Cloudflare Observatory'
405
+ category: 'Site Monitor'
406
+ url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test'
407
+ producer:
408
+ name: 'CloudFlare'
409
+ url: 'https://www.cloudflare.com/'
410
+
411
+ - regex: 'https://developers\.cloudflare\.com/security-center/'
412
+ name: 'Cloudflare Security Insights'
413
+ category: 'Site Monitor'
414
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
415
+ producer:
416
+ name: 'CloudFlare'
417
+ url: 'https://www.cloudflare.com/'
231
418
 
232
- - regex: 'coccoc/'
419
+ - regex: 'coccoc\.com'
233
420
  name: 'Cốc Cốc Bot'
234
- url: 'http://help.coccoc.com/'
421
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
235
422
  category: 'Search bot'
236
423
  producer:
237
424
  name: 'Cốc Cốc'
238
- url: 'http://coccoc.com/'
425
+ url: 'https://coccoc.com/'
426
+
427
+ - regex: 'collectd'
428
+ name: 'Collectd'
429
+ url: 'https://collectd.org/'
430
+ category: 'Site Monitor'
431
+ producer:
432
+ name: 'Collectd'
433
+ url: 'https://collectd.org/'
239
434
 
240
435
  - regex: 'CommaFeed'
241
436
  name: 'CommaFeed'
@@ -245,7 +440,39 @@
245
440
  name: ''
246
441
  url: ''
247
442
 
248
- - regex: 'Daum(oa)?[ /][0-9]'
443
+ - regex: 'CSS Certificate Spider'
444
+ name: 'CSS Certificate Spider'
445
+ category: 'Crawler'
446
+ url: 'http://www.css-security.com/certificatespider/'
447
+ producer:
448
+ name: 'Certified Security Solutions'
449
+ url: 'https://www.css-security.com/company/about-us/'
450
+
451
+ - regex: 'Datadog Agent|Datadog/?Synthetics'
452
+ name: 'Datadog Agent'
453
+ url: 'https://github.com/DataDog/dd-agent'
454
+ category: 'Site Monitor'
455
+ producer:
456
+ name: 'Datadog'
457
+ url: 'https://www.datadoghq.com/'
458
+
459
+ - regex: 'Datanyze'
460
+ name: 'Datanyze'
461
+ url: ''
462
+ category: 'Crawler'
463
+ producer:
464
+ name: 'Datanyze'
465
+ url: 'https://www.datanyze.com'
466
+
467
+ - regex: 'Dataprovider'
468
+ name: 'Dataprovider'
469
+ category: 'Crawler'
470
+ url: ''
471
+ producer:
472
+ name: 'Dataprovider B.V.'
473
+ url: 'https://www.dataprovider.com/'
474
+
475
+ - regex: 'Daum(?!(?:Apps|Device))'
249
476
  name: 'Daum'
250
477
  category: 'Search bot'
251
478
  url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
@@ -261,7 +488,7 @@
261
488
  name: 'DAZOO.FR'
262
489
  url: 'http://dazoo.fr'
263
490
 
264
- - regex: 'discobot(-news)?'
491
+ - regex: 'discobot'
265
492
  name: 'Discobot'
266
493
  category: 'Search bot'
267
494
  url: 'http://discoveryengine.com/discobot.html'
@@ -269,7 +496,7 @@
269
496
  name: 'Discovery Engine'
270
497
  url: 'http://discoveryengine.com'
271
498
 
272
- - regex: 'Domain Re-Animator Bot|support@domainreanimator.com'
499
+ - regex: 'Domain Re-Animator Bot|support@domainreanimator\.com'
273
500
  name: 'Domain Re-Animator Bot'
274
501
  category: 'Crawler'
275
502
  url: ''
@@ -285,6 +512,14 @@
285
512
  name: 'SEOmoz, Inc.'
286
513
  url: 'http://moz.com/'
287
514
 
515
+ - regex: 'DuckDuck(?:Go-Favicons-)?Bot'
516
+ name: 'DuckDuckGo Bot'
517
+ category: 'Search bot'
518
+ url: 'https://duckduckgo.com/duckduckbot'
519
+ producer:
520
+ name: 'DuckDuckGo'
521
+ url: 'https://duckduckgo.com/'
522
+
288
523
  - regex: 'EasouSpider'
289
524
  name: 'Easou Spider'
290
525
  category: 'Search bot'
@@ -293,6 +528,13 @@
293
528
  name: 'easou ICP'
294
529
  url: 'http://www.easou.com'
295
530
 
531
+ - regex: 'eCairn-Grabber'
532
+ name: 'eCairn-Grabber'
533
+ category: 'Crawler'
534
+ producer:
535
+ name: 'eCairn'
536
+ url: 'https://ecairn.com'
537
+
296
538
  - regex: 'EMail Exractor'
297
539
  name: 'EMail Exractor'
298
540
  category: 'Crawler'
@@ -301,7 +543,15 @@
301
543
  name: ''
302
544
  url: ''
303
545
 
304
- - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
546
+ - regex: 'evc-batch'
547
+ name: 'evc-batch'
548
+ category: 'Crawler'
549
+ url: ''
550
+ producer:
551
+ name: 'eVenture Capital Partners II, LLC'
552
+ url: 'http://www.eventures.vc/'
553
+
554
+ - regex: 'Exabot|ExaleadCloudview'
305
555
  name: 'ExaBot'
306
556
  category: 'Crawler'
307
557
  url: 'http://www.exabot.com/go/robot'
@@ -325,13 +575,21 @@
325
575
  name: 'SEOmoz, Inc.'
326
576
  url: 'http://moz.com/'
327
577
 
328
- - regex: 'facebookexternalhit|facebookplatform'
329
- name: 'Facebook External Hit'
578
+ - regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)'
579
+ name: 'Facebook Crawler'
330
580
  category: 'Social Media Agent'
331
- url: 'https://www.facebook.com/externalhit_uatext.php'
581
+ url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/'
332
582
  producer:
333
- name: 'Facebook'
334
- url: 'http://www.facebook.com'
583
+ name: 'Meta Platforms, Inc.'
584
+ url: 'https://www.meta.com/'
585
+
586
+ - regex: 'FacebookBot/[\d.]+'
587
+ name: 'FacebookBot'
588
+ category: 'Crawler'
589
+ url: 'https://developers.facebook.com/docs/sharing/bot'
590
+ producer:
591
+ name: 'Meta Platforms, Inc.'
592
+ url: 'https://www.meta.com/'
335
593
 
336
594
  - regex: 'Feedbin'
337
595
  name: 'Feedbin'
@@ -357,7 +615,7 @@
357
615
  name: 'David Smith & Developing Perspective, LLC'
358
616
  url: 'https://david-smith.org'
359
617
 
360
- - regex: '(Meta)?Feedly(Bot|App)?'
618
+ - regex: 'Feedly'
361
619
  name: 'Feedly'
362
620
  url: 'http://www.feedly.com'
363
621
  category: 'Feed Fetcher'
@@ -381,6 +639,24 @@
381
639
  name: ''
382
640
  url: ''
383
641
 
642
+ - regex: 'FlipboardProxy|FlipboardRSS'
643
+ name: 'Flipboard'
644
+ url: 'http://flipboard.com/browserproxy'
645
+ category: 'Feed Fetcher'
646
+ producer:
647
+ name: 'Flipboard'
648
+ url: 'http://flipboard.com/'
649
+
650
+ - regex: 'Findxbot'
651
+ name: 'Findxbot'
652
+ category: 'Crawler'
653
+ url: 'http://www.findxbot.com'
654
+
655
+ - regex: 'FreshRSS'
656
+ name: 'FreshRSS'
657
+ category: 'Feed Fetcher'
658
+ url: 'https://freshrss.org/'
659
+
384
660
  - regex: 'Genieo'
385
661
  name: 'Genieo Web filter'
386
662
  category: ''
@@ -389,6 +665,14 @@
389
665
  name: 'Genieo'
390
666
  url: 'http://www.genieo.com'
391
667
 
668
+ - regex: 'GigablastOpenSource'
669
+ name: 'Gigablast'
670
+ category: 'Search bot'
671
+ url: 'https://github.com/gigablast/open-source-search-engine'
672
+ producer:
673
+ name: 'Matt Wells'
674
+ url: 'http://www.gigablast.com/faq.html'
675
+
392
676
  - regex: 'Gluten Free Crawler'
393
677
  name: 'Gluten Free Crawler'
394
678
  category: 'Crawler'
@@ -397,6 +681,10 @@
397
681
  name: ''
398
682
  url: ''
399
683
 
684
+ - regex: 'gobuster'
685
+ name: 'Gobuster'
686
+ url: 'https://github.com/OJ/gobuster'
687
+
400
688
  - regex: 'ichiro/mobile goo'
401
689
  name: 'Goo'
402
690
  category: 'Search bot'
@@ -405,13 +693,29 @@
405
693
  name: 'NTT Resonant'
406
694
  url: 'http://goo.ne.jp'
407
695
 
696
+ - regex: 'Storebot-Google'
697
+ name: 'Google StoreBot'
698
+ category: 'Crawler'
699
+
700
+ - regex: 'Google Favicon'
701
+ name: 'Google Favicon'
702
+ category: 'Crawler'
703
+
704
+ - regex: 'Google Search Console'
705
+ name: 'Google Search Console'
706
+ category: 'Crawler'
707
+ url: 'https://search.google.com/search-console/about'
708
+ producer:
709
+ name: 'Google Inc.'
710
+ url: 'https://www.google.com/'
711
+
408
712
  - regex: 'Google Page Speed Insights'
409
713
  name: 'Google PageSpeed Insights'
410
714
  category: 'Site Monitor'
411
715
  url: 'http://developers.google.com/speed/pagespeed/insights/'
412
716
  producer:
413
717
  name: 'Google Inc.'
414
- url: 'http://www.google.com'
718
+ url: 'https://www.google.com/'
415
719
 
416
720
  - regex: 'google_partner_monitoring'
417
721
  name: 'Google Partner Monitoring'
@@ -419,7 +723,39 @@
419
723
  url: ''
420
724
  producer:
421
725
  name: 'Google Inc.'
422
- url: 'http://www.google.com'
726
+ url: 'https://www.google.com/'
727
+
728
+ - regex: 'Google-Cloud-Scheduler'
729
+ name: 'Google Cloud Scheduler'
730
+ category: 'Crawler'
731
+ url: 'https://cloud.google.com/scheduler'
732
+ producer:
733
+ name: 'Google Inc.'
734
+ url: 'https://www.google.com'
735
+
736
+ - regex: 'Google-Structured-Data-Testing-Tool'
737
+ name: 'Google Structured Data Testing Tool'
738
+ category: 'Validator'
739
+ url: 'https://search.google.com/structured-data/testing-tool'
740
+ producer:
741
+ name: 'Google Inc.'
742
+ url: 'https://www.google.com/'
743
+
744
+ - regex: 'GoogleStackdriverMonitoring'
745
+ name: 'Google Stackdriver Monitoring'
746
+ category: 'Site Monitor'
747
+ url: 'https://cloud.google.com/monitoring'
748
+ producer:
749
+ name: 'Google Inc.'
750
+ url: 'https://www.google.com'
751
+
752
+ - regex: 'Google-Transparency-Report'
753
+ name: 'Google Transparency Report'
754
+ category: 'Site Monitor'
755
+ url: 'https://transparencyreport.google.com/'
756
+ producer:
757
+ name: 'Google Inc.'
758
+ url: 'https://www.google.com/'
423
759
 
424
760
  - regex: 'via ggpht\.com GoogleImageProxy'
425
761
  name: 'Gmail Image Proxy'
@@ -427,15 +763,71 @@
427
763
  url: ''
428
764
  producer:
429
765
  name: 'Google Inc.'
430
- url: 'http://www.google.com'
766
+ url: 'https://www.google.com/'
767
+
768
+ - regex: 'SeznamEmailProxy'
769
+ name: 'Seznam Email Proxy'
770
+ category: 'Crawler'
771
+ url: ''
772
+ producer:
773
+ name: 'Seznam.cz, a.s.'
774
+ url: 'http://www.seznam.cz/'
775
+
776
+ - regex: 'Seznam-Zbozi-robot'
777
+ name: 'Seznam Zbozi.cz'
778
+ category: 'Crawler'
779
+ url: ''
780
+ producer:
781
+ name: 'Seznam.cz, a.s.'
782
+ url: 'https://www.zbozi.cz/'
783
+
784
+ - regex: 'Heurekabot-Feed'
785
+ name: 'Heureka Feed'
786
+ category: 'Crawler'
787
+ url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
788
+ producer:
789
+ name: 'Heureka.cz, a.s.'
790
+ url: 'https://www.heureka.cz/'
791
+
792
+ - regex: 'ShopAlike'
793
+ name: 'ShopAlike'
794
+ category: 'Crawler'
795
+ url: ''
796
+ producer:
797
+ name: 'Visual Meta'
798
+ url: 'https://www.shopalike.cz/'
799
+
800
+ - regex: 'Googlebot-News'
801
+ name: 'Googlebot News'
802
+ category: 'Search bot'
803
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
804
+ producer:
805
+ name: 'Google Inc.'
806
+ url: 'https://www.google.com/'
807
+
808
+ - regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet'
809
+ name: 'Googlebot'
810
+ category: 'Search bot'
811
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
812
+ producer:
813
+ name: 'Google Inc.'
814
+ url: 'https://www.google.com/'
431
815
 
432
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin'
816
+ - regex: '^Google$'
433
817
  name: 'Googlebot'
434
818
  category: 'Search bot'
435
- url: 'http://www.google.com/bot.html'
819
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
820
+ producer:
821
+ name: 'Google Inc.'
822
+ url: 'https://www.google.com/'
823
+
824
+ - regex: 'Google-Area120-PrivacyPolicyFetcher'
825
+ name: 'Google Area 120 Privacy Policy Fetcher'
826
+ category: 'Crawler'
827
+ url: 'https://area120.google.com/'
436
828
  producer:
437
829
  name: 'Google Inc.'
438
- url: 'http://www.google.com'
830
+ url: 'https://www.google.com/'
439
831
 
440
832
  - regex: 'heritrix'
441
833
  name: 'Heritrix'
@@ -443,9 +835,21 @@
443
835
  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
444
836
  producer:
445
837
  name: 'The Internet Archive'
446
- url: 'http://www.archive.org'
838
+ url: 'https://archive.org'
839
+
840
+ - regex: 'HubSpot '
841
+ name: 'HubSpot'
842
+ category: 'Crawler'
843
+ producer:
844
+ name: 'HubSpot Inc.'
845
+ url: 'https://www.hubspot.com'
447
846
 
448
- - regex: 'HTTPMon'
847
+ - regex: 'vuhuvBot'
848
+ name: 'Vuhuv Bot'
849
+ category: 'Crawler'
850
+ url: 'http://vuhuv.com/bot.html'
851
+
852
+ - regex: 'HTTPMon/[\d.]+'
449
853
  name: 'HTTPMon'
450
854
  category: 'Site Monitor'
451
855
  url: 'http://www.httpmon.com'
@@ -461,6 +865,11 @@
461
865
  name: ''
462
866
  url: ''
463
867
 
868
+ - regex: 'inoreader\.com'
869
+ name: 'inoreader'
870
+ category: 'Feed Reader'
871
+ url: 'https://www.inoreader.com'
872
+
464
873
  - regex: 'iisbot'
465
874
  name: 'IIS Site Analysis'
466
875
  category: 'Crawler'
@@ -469,6 +878,25 @@
469
878
  name: 'Microsoft Corporation'
470
879
  url: 'http://www.microsoft.com'
471
880
 
881
+ - regex: 'ips-agent'
882
+ name: 'IPS Agent'
883
+ category: 'Crawler'
884
+ producer:
885
+ name: 'VeriSign, Inc'
886
+ url: 'http://www.verisign.com/'
887
+
888
+ - regex: 'IP-Guide\.com'
889
+ name: 'IP-Guide Crawler'
890
+ category: 'Crawler'
891
+ url: ''
892
+ producer:
893
+ name: ''
894
+ url: 'https://ip-guide.com'
895
+
896
+ - regex: 'k6/[0-9\.]+'
897
+ name: 'K6'
898
+ url: 'https://k6.io/'
899
+
472
900
  - regex: 'kouio'
473
901
  name: 'Kouio'
474
902
  url: 'http://kouio.com/'
@@ -485,7 +913,23 @@
485
913
  name: ''
486
914
  url: ''
487
915
 
488
- - regex: 'linkdexbot(-mobile)?|linkdex\.com'
916
+ - regex: '[A-z0-9]*-Lighthouse'
917
+ name: 'Lighthouse'
918
+ category: 'Site Monitor'
919
+ url: 'https://developers.google.com/web/tools/lighthouse'
920
+ producer:
921
+ name: 'Lighthouse'
922
+ url: 'https://developers.google.com/web/tools/lighthouse'
923
+
924
+ - regex: 'last-modified\.com'
925
+ name: 'LastMod Bot'
926
+ category: 'Site Monitor'
927
+ url: 'https://last-modified.com/en/about'
928
+ producer:
929
+ name: ''
930
+ url: 'https://last-modified.com/en'
931
+
932
+ - regex: 'linkdexbot|linkdex\.com'
489
933
  name: 'Linkdex Bot'
490
934
  category: 'Search bot'
491
935
  url: 'http://www.linkdex.com/bots'
@@ -503,12 +947,13 @@
503
947
 
504
948
  - regex: 'ltx71'
505
949
  name: 'LTX71'
506
- url: 'http://ltx71.com/'
950
+ category: 'Security Checker'
951
+ url: 'https://ltx71.com/'
507
952
  producer:
508
953
  name: ''
509
954
  url: ''
510
955
 
511
- - regex: 'Mail\.RU(_Bot)?'
956
+ - regex: 'Mail\.RU'
512
957
  name: 'Mail.Ru Bot'
513
958
  category: 'Search bot'
514
959
  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
@@ -532,6 +977,26 @@
532
977
  name: ''
533
978
  url: ''
534
979
 
980
+ - regex: 'masscan-ng/[\d.]+'
981
+ name: 'masscan-ng'
982
+ url: 'https://github.com/bi-zone/masscan-ng'
983
+ category: 'Crawler'
984
+ producer:
985
+ name: 'BIZON, OOO'
986
+ url: 'https://bi.zone/'
987
+
988
+ - regex: '.*masscan'
989
+ name: 'masscan'
990
+ url: 'https://github.com/robertdavidgraham/masscan'
991
+ category: 'Crawler'
992
+ producer:
993
+ name: 'Robert Graham'
994
+ url: 'https://github.com/robertdavidgraham'
995
+
996
+ - regex: 'Mastodon/'
997
+ name: 'Mastodon Bot'
998
+ category: 'Social Media Agent'
999
+
535
1000
  - regex: 'meanpathbot'
536
1001
  name: 'Meanpath Bot'
537
1002
  category: 'Search bot'
@@ -540,6 +1005,19 @@
540
1005
  name: 'Meanpath'
541
1006
  url: 'http://www.meanpath.com'
542
1007
 
1008
+ - regex: 'MetaJobBot'
1009
+ name: 'MetaJobBot'
1010
+ category: 'Crawler'
1011
+ url: 'http://www.metajob.at/the/crawler'
1012
+ producer:
1013
+ name: 'MetaJob'
1014
+ url: 'http://www.metajob.at'
1015
+
1016
+ - regex: 'MetaInspector'
1017
+ name: 'MetaInspector'
1018
+ category: 'Crawler'
1019
+ url: 'https://github.com/jaimeiniesta/metainspector'
1020
+
543
1021
  - regex: 'MixrankBot'
544
1022
  name: 'Mixrank Bot'
545
1023
  category: 'Crawler'
@@ -556,6 +1034,13 @@
556
1034
  name: 'Majestic-12'
557
1035
  url: 'http://majestic12.co.uk'
558
1036
 
1037
+ - regex: 'Mnogosearch'
1038
+ name: 'Mnogosearch'
1039
+ category: 'Search bot'
1040
+ url: 'http://www.mnogosearch.org/'
1041
+ producer:
1042
+ name: 'Lavtech.Com Corp.'
1043
+ url: ''
559
1044
  - regex: 'MojeekBot'
560
1045
  name: 'MojeekBot'
561
1046
  category: 'Search bot'
@@ -564,6 +1049,14 @@
564
1049
  name: 'Mojeek Ltd.'
565
1050
  url: 'http://www.mojeek.com'
566
1051
 
1052
+ - regex: 'munin'
1053
+ name: 'Munin'
1054
+ category: 'Site Monitor'
1055
+ url: 'http://munin-monitoring.org/'
1056
+ producer:
1057
+ name: 'Munin'
1058
+ url: 'http://munin-monitoring.org/'
1059
+
567
1060
  - regex: 'NalezenCzBot'
568
1061
  name: 'NalezenCzBot'
569
1062
  category: 'Crawler'
@@ -572,7 +1065,19 @@
572
1065
  name: 'Jaroslav Kuboš'
573
1066
  url: ''
574
1067
 
575
- - regex: 'Netcraft Web Server Survey'
1068
+ - regex: 'check_http/v'
1069
+ name: 'Nagios check_http'
1070
+ category: 'Site Monitor'
1071
+ url: 'https://nagios.org'
1072
+ producer:
1073
+ name: 'Nagios Plugins Development Team'
1074
+ url: 'https://nagios.org'
1075
+
1076
+ - regex: 'nbertaupete95\(at\)gmail\.com'
1077
+ name: 'nbertaupete95'
1078
+ category: 'Crawler'
1079
+
1080
+ - regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)'
576
1081
  name: 'Netcraft Survey Bot'
577
1082
  category: 'Search bot'
578
1083
  url: ''
@@ -580,6 +1085,14 @@
580
1085
  name: 'Netcraft'
581
1086
  url: 'http://www.netcraft.com'
582
1087
 
1088
+ - regex: 'netEstate NE Crawler'
1089
+ name: 'netEstate'
1090
+ category: 'Crawler'
1091
+ url: 'http://www.website-datenbank.de/Impressum'
1092
+ producer:
1093
+ name: 'netEstate GmbH'
1094
+ url: 'https://www.netestate.de/en/'
1095
+
583
1096
  - regex: 'Netvibes'
584
1097
  name: 'Netvibes'
585
1098
  url: 'http://www.netvibes.com/'
@@ -588,7 +1101,7 @@
588
1101
  name: ''
589
1102
  url: ''
590
1103
 
591
- - regex: 'NewsBlur .*(Fetcher|Finder)'
1104
+ - regex: 'NewsBlur .*(?:Fetcher|Finder)'
592
1105
  name: 'NewsBlur'
593
1106
  url: 'http://www.newsblur.com'
594
1107
  category: 'Feed Fetcher'
@@ -612,7 +1125,41 @@
612
1125
  name: 'Northern Light'
613
1126
  url: 'http://northernlight.com'
614
1127
 
615
- - regex: 'omgilibot'
1128
+ - regex: 'Nmap Scripting Engine'
1129
+ name: 'Nmap'
1130
+ category: 'Security Checker'
1131
+ url: 'https://nmap.org/book/nse.html'
1132
+ producer:
1133
+ name: 'Nmap'
1134
+ url: 'https://nmap.org/'
1135
+
1136
+ - regex: 'Nuzzel'
1137
+ name: 'Nuzzel'
1138
+ category: 'Crawler'
1139
+ producer:
1140
+ name: 'Nuzzel'
1141
+ url: 'https://www.nuzzel.com/'
1142
+
1143
+ - regex: 'NodePing'
1144
+ name: 'NodePing'
1145
+ category: 'Site Monitor'
1146
+ url: 'https://nodeping.com'
1147
+ producer:
1148
+ name: 'NodePing'
1149
+ url: 'https://nodeping.com'
1150
+
1151
+ - regex: 'Octopus [0-9]'
1152
+ name: 'Octopus'
1153
+
1154
+ - regex: 'OnlineOrNot\.com_bot'
1155
+ name: 'OnlineOrNot Bot'
1156
+ category: 'Site Monitor'
1157
+ url: 'https://onlineornot.com/website-monitoring'
1158
+ producer:
1159
+ name: 'OnlineOrNot'
1160
+ url: 'https://onlineornot.com'
1161
+
1162
+ - regex: 'omgili'
616
1163
  name: 'Omgili bot'
617
1164
  category: 'Search bot'
618
1165
  url: 'http://www.omgili.com/Crawler.html'
@@ -660,6 +1207,11 @@
660
1207
  name: 'Smallrivers SA'
661
1208
  url: 'http://www.paper.li'
662
1209
 
1210
+ - regex: 'phantomas/'
1211
+ name: 'Phantomas'
1212
+ category: 'Site Monitor'
1213
+ url: 'https://github.com/macbre/phantomas'
1214
+
663
1215
  - regex: 'phpservermon'
664
1216
  name: 'PHP Server Monitor'
665
1217
  category: 'Site Monitor'
@@ -668,7 +1220,31 @@
668
1220
  name: 'PHP Server Monitor'
669
1221
  url: 'http://www.phpservermonitor.org/'
670
1222
 
671
- - regex: 'psbot(-page)?'
1223
+ - regex: 'Pocket(?:ImageCache|Parser)/[\d.]+'
1224
+ name: 'Pocket'
1225
+ category: 'Read-it-later Service'
1226
+ url: 'https://getpocket.com/pocketparser_ua'
1227
+ producer:
1228
+ name: 'Read It Later, Inc.'
1229
+ url: 'https://getpocket.com/'
1230
+
1231
+ - regex: 'PritTorrent'
1232
+ name: 'PritTorrent'
1233
+ category: 'Crawler'
1234
+ url: 'https://github.com/astro/prittorrent'
1235
+ producer:
1236
+ name: 'Bitlove'
1237
+ url: 'http://bitlove.org/'
1238
+
1239
+ - regex: 'PRTG Network Monitor'
1240
+ name: 'PRTG Network Monitor'
1241
+ category: 'Network Monitor'
1242
+ url: 'https://www.paessler.com/prtg'
1243
+ producer:
1244
+ name: 'Paessler AG'
1245
+ url: 'https://www.paessler.com'
1246
+
1247
+ - regex: 'psbot'
672
1248
  name: 'Picsearch bot'
673
1249
  category: 'Search bot'
674
1250
  url: 'http://www.picsearch.com/bot.html'
@@ -676,7 +1252,7 @@
676
1252
  name: 'Picsearch'
677
1253
  url: 'http://www.picsearch.com'
678
1254
 
679
- - regex: 'Pingdom\.com'
1255
+ - regex: 'Pingdom(?:\.com|TMS)'
680
1256
  name: 'Pingdom Bot'
681
1257
  category: 'Site Monitor'
682
1258
  url: ''
@@ -684,14 +1260,51 @@
684
1260
  name: 'Pingdom AB'
685
1261
  url: 'https://www.pingdom.com'
686
1262
 
687
- - regex: 'QuerySeekerSpider'
688
- name: 'QuerySeekerSpider'
1263
+ - regex: 'Quora Link Preview'
1264
+ name: 'Quora Link Preview'
689
1265
  category: 'Crawler'
690
- url: 'http://queryseeker.com/bot.html'
1266
+ url: ''
691
1267
  producer:
692
- name: 'QueryEye Inc.'
1268
+ name: 'Quora'
1269
+ url: 'http://www.quora.com'
1270
+
1271
+ - regex: 'Quora-Bot'
1272
+ name: 'Quora Bot'
1273
+ category: 'Crawler'
1274
+ url: ''
1275
+ producer:
1276
+ name: 'Quora'
1277
+ url: 'https://www.quora.com/'
1278
+
1279
+ - regex: 'RamblerMail'
1280
+ name: 'RamblerMail Image Proxy'
1281
+ category: 'Crawler'
1282
+ url: ''
1283
+ producer:
1284
+ name: 'Rambler&Co'
1285
+ url: 'https://rambler-co.ru/'
1286
+
1287
+ - regex: 'QuerySeekerSpider'
1288
+ name: 'QuerySeekerSpider'
1289
+ category: 'Crawler'
1290
+ url: 'http://queryseeker.com/bot.html'
1291
+ producer:
1292
+ name: 'QueryEye Inc.'
693
1293
  url: 'http://queryeye.com'
694
1294
 
1295
+ - regex: 'Qwantify'
1296
+ name: 'Qwantify'
1297
+ category: 'Crawler'
1298
+ url: 'https://www.qwant.com/'
1299
+ producer:
1300
+ name: 'Qwant Corporation'
1301
+ url: 'https://www.qwant.com/'
1302
+
1303
+ - regex: 'Rainmeter'
1304
+ name: 'Rainmeter'
1305
+ category: 'Crawler'
1306
+ url: 'https://www.rainmeter.net'
1307
+
695
1308
  - regex: 'redditbot'
696
1309
  name: 'Reddit Bot'
697
1310
  category: 'Social Media Agent'
@@ -700,6 +1313,14 @@
700
1313
  name: 'reddit inc.'
701
1314
  url: 'http://www.reddit.com'
702
1315
 
1316
+ - regex: 'Riddler'
1317
+ name: 'Riddler'
1318
+ category: 'Security search bot'
1319
+ url: 'https://riddler.io/about'
1320
+ producer:
1321
+ name: 'F-Secure'
1322
+ url: 'https://www.f-secure.com'
1323
+
703
1324
  - regex: 'rogerbot'
704
1325
  name: 'Rogerbot'
705
1326
  category: 'Crawler'
@@ -716,6 +1337,14 @@
716
1337
  name: 'Roihunter a.s.'
717
1338
  url: 'http://roihunter.com/'
718
1339
 
1340
+ - regex: 'SafeDNSBot'
1341
+ name: 'SafeDNSBot'
1342
+ category: 'Crawler'
1343
+ url: 'https://www.safedns.com/searchbot'
1344
+ producer:
1345
+ name: 'SafeDNS, Inc.'
1346
+ url: 'https://www.safedns.com/'
1347
+
719
1348
  - regex: 'Scrapy'
720
1349
  name: 'Scrapy'
721
1350
  category: 'Crawler'
@@ -738,12 +1367,36 @@
738
1367
  url: ''
739
1368
 
740
1369
  - regex: 'SemrushBot'
741
- name: 'Semrush Bot'
1370
+ name: 'SemrushBot'
1371
+ category: 'Crawler'
1372
+ url: 'https://www.semrush.com/bot/'
1373
+ producer:
1374
+ name: 'Semrush Inc.'
1375
+ url: 'https://www.semrush.com/'
1376
+
1377
+ - regex: 'SerpReputationManagementAgent/[\d.]+'
1378
+ name: 'Semrush Reputation Management'
1379
+ category: 'Service Agent'
1380
+ url: 'https://www.semrush.com/bot/'
1381
+ producer:
1382
+ name: 'Semrush Inc.'
1383
+ url: 'https://www.semrush.com/'
1384
+
1385
+ - regex: 'SplitSignalBot'
1386
+ name: 'SplitSignalBot'
1387
+ category: 'Crawler'
1388
+ url: 'https://www.semrush.com/bot/'
1389
+ producer:
1390
+ name: 'Semrush Inc.'
1391
+ url: 'https://www.semrush.com/'
1392
+
1393
+ - regex: 'SiteAuditBot/[\d.]+'
1394
+ name: 'SiteAuditBot'
742
1395
  category: 'Crawler'
743
- url: 'http://www.semrush.com/bot.html'
1396
+ url: 'https://www.semrush.com/bot/'
744
1397
  producer:
745
- name: 'SEMrush'
746
- url: 'http://www.semrush.com'
1398
+ name: 'Semrush Inc.'
1399
+ url: 'https://www.semrush.com/'
747
1400
 
748
1401
  - regex: 'SensikaBot'
749
1402
  name: 'Sensika Bot'
@@ -753,7 +1406,7 @@
753
1406
  name: 'Sensika'
754
1407
  url: 'http://sensika.com'
755
1408
 
756
- - regex: 'SEOENG(World)?Bot'
1409
+ - regex: 'SEOENG(?:World)?Bot'
757
1410
  name: 'SEOENGBot'
758
1411
  category: 'Crawler'
759
1412
  url: 'http://www.seoengine.com/seoengbot.htm'
@@ -761,6 +1414,19 @@
761
1414
  name: 'SEO Engine'
762
1415
  url: 'http://www.seoengine.com'
763
1416
 
1417
+ - regex: 'SEOkicks-Robot'
1418
+ name: 'SEOkicks-Robot'
1419
+ category: 'Crawler'
1420
+ url: 'http://www.seokicks.de/robot.html'
1421
+ producer:
1422
+ name: 'SEOkicks'
1423
+ url: 'https://www.seokicks.de/'
1424
+
1425
+ - regex: 'seoscanners\.net'
1426
+ name: 'Seoscanners.net'
1427
+ category: 'Crawler'
1428
+ url: ''
1429
+
764
1430
  - regex: 'SkypeUriPreview'
765
1431
  name: 'Skype URI Preview'
766
1432
  category: 'Service Agent'
@@ -777,6 +1443,14 @@
777
1443
  name: 'Seznam.cz, a.s.'
778
1444
  url: 'http://www.seznam.cz/'
779
1445
 
1446
+ - regex: 'shopify-partner-homepage-scraper'
1447
+ name: 'Shopify Partner'
1448
+ category: 'Crawler'
1449
+ url: 'https://www.shopify.com/partners'
1450
+ producer:
1451
+ name: 'Shopify'
1452
+ url: 'https://www.shopify.com/'
1453
+
780
1454
  - regex: 'ShopWiki'
781
1455
  name: 'ShopWiki'
782
1456
  category: 'Search tools'
@@ -809,6 +1483,27 @@
809
1483
  name: 'SISTRIX GmbH'
810
1484
  url: 'http://www.sistrix.de'
811
1485
 
1486
+ - regex: 'compatible; (?:SISTRIX )?Optimizer'
1487
+ name: 'SISTRIX Optimizer'
1488
+ category: 'Crawler'
1489
+ url: 'https://optimizer.sistrix.com'
1490
+ producer:
1491
+ name: 'SISTRIX GmbH'
1492
+ url: 'http://www.sistrix.de'
1493
+
1494
+ - regex: 'SiteSucker'
1495
+ name: 'SiteSucker'
1496
+ category: 'Crawler'
1497
+ url: 'http://ricks-apps.com/osx/sitesucker/'
1498
+
1499
+ - regex: 'sixy\.ch'
1500
+ name: 'Sixy.ch'
1501
+ category: 'Site Monitor'
1502
+ url: 'http://sixy.ch'
1503
+ producer:
1504
+ name: 'Manuel Kasper'
1505
+ url: 'https://neon1.net/'
1506
+
812
1507
  - regex: 'Slackbot|Slack-ImgProxy'
813
1508
  name: 'Slackbot'
814
1509
  category: 'Crawler'
@@ -817,7 +1512,7 @@
817
1512
  name: 'Slack Technologies'
818
1513
  url: 'http://slack.com'
819
1514
 
820
- - regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
1515
+ - regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider'
821
1516
  name: 'Sogou Spider'
822
1517
  category: 'Search bot'
823
1518
  url: 'http://www.sogou.com/docs/help/webmasters.htm'
@@ -833,6 +1528,38 @@
833
1528
  name: 'Tencent Holdings'
834
1529
  url: 'http://www.soso.com'
835
1530
 
1531
+ - regex: 'Sprinklr'
1532
+ name: 'Sprinklr'
1533
+ category: 'Crawler'
1534
+ url: ''
1535
+ producer:
1536
+ name: 'Sprinklr, Inc.'
1537
+ url: 'https://www.sprinklr.com/'
1538
+
1539
+ - regex: 'sqlmap/'
1540
+ name: 'sqlmap'
1541
+ category: 'Security Checker'
1542
+ url: 'http://sqlmap.org/'
1543
+ producer:
1544
+ name: 'sqlmap'
1545
+ url: 'http://sqlmap.org/'
1546
+
1547
+ - regex: 'SSL Labs'
1548
+ name: 'SSL Labs'
1549
+ category: 'Validator'
1550
+ url: 'https://www.ssllabs.com/about/assessment.html'
1551
+ producer:
1552
+ name: 'SSL Labs'
1553
+ url: 'https://www.ssllabs.com/about/assessment.html'
1554
+
1555
+ - regex: 'StatusCake'
1556
+ name: 'StatusCake'
1557
+ category: 'Site Monitor'
1558
+ url: 'https://www.statuscake.com'
1559
+ producer:
1560
+ name: 'StatusCake'
1561
+ url: 'https://www.statuscake.com'
1562
+
836
1563
  - regex: 'Superfeedr bot'
837
1564
  name: 'Superfeedr Bot'
838
1565
  category: 'Feed Fetcher'
@@ -841,6 +1568,11 @@
841
1568
  name: 'Superfeedr'
842
1569
  url: 'https://superfeedr.com/'
843
1570
 
1571
+ - regex: 'Sparkler/[0-9]'
1572
+ name: 'Sparkler'
1573
+ category: 'Crawler'
1574
+ url: 'https://github.com/USCDataScience/sparkler'
1575
+
844
1576
  - regex: 'Spinn3r'
845
1577
  name: 'Spinn3r'
846
1578
  category: 'Crawler'
@@ -849,13 +1581,20 @@
849
1581
  name: 'Tailrank Inc'
850
1582
  url: 'http://spinn3r.com'
851
1583
 
852
- - regex: 'Sputnik(Image)?Bot'
1584
+ - regex: 'SputnikBot'
853
1585
  name: 'Sputnik Bot'
854
- category: ''
1586
+ category: 'Crawler'
1587
+ url: ''
1588
+
1589
+ - regex: 'SputnikFaviconBot'
1590
+ name: 'Sputnik Favicon Bot'
1591
+ category: 'Crawler'
1592
+ url: ''
1593
+
1594
+ - regex: 'SputnikImageBot'
1595
+ name: 'Sputnik Image Bot'
1596
+ category: 'Crawler'
855
1597
  url: ''
856
- producer:
857
- name: ''
858
- url: ''
859
1598
 
860
1599
  - regex: 'SurveyBot'
861
1600
  name: 'Survey Bot'
@@ -865,10 +1604,23 @@
865
1604
  name: 'Domain Tools'
866
1605
  url: 'http://www.domaintools.com'
867
1606
 
1607
+ - regex: 'TarmotGezgin'
1608
+ name: 'Tarmot Gezgin'
1609
+ url: 'http://www.tarmot.com/gezgin/'
1610
+ category: 'Search bot'
1611
+
868
1612
  - regex: 'TelegramBot'
869
- name: 'TelgramBot'
1613
+ name: 'TelegramBot'
870
1614
  url: 'https://telegram.org/blog/bot-revolution'
871
1615
 
1616
+ - regex: 'TLSProbe'
1617
+ name: 'TLSProbe'
1618
+ url: 'https://scan.trustnet.venafi.com/'
1619
+ category: 'Security search bot'
1620
+ producer:
1621
+ name: 'Venafi TrustNet'
1622
+ url: 'https://www.venafi.com'
1623
+
872
1624
  - regex: 'TinEye-bot'
873
1625
  name: 'TinEye Crawler'
874
1626
  category: 'Search bot'
@@ -885,6 +1637,27 @@
885
1637
  name: ''
886
1638
  url: ''
887
1639
 
1640
+ - regex: 'theoldreader\.com'
1641
+ name: 'theoldreader'
1642
+ category: 'Feed Reader'
1643
+ url: 'https://theoldreader.com'
1644
+
1645
+ - regex: 'Trackable/0\.1'
1646
+ name: 'Chartable'
1647
+ category: 'Site Monitor'
1648
+ url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix'
1649
+ producer:
1650
+ name: 'Chartable'
1651
+ url: 'https://chartable.com'
1652
+
1653
+ - regex: 'trendictionbot'
1654
+ name: 'Trendiction Bot'
1655
+ category: 'Crawler'
1656
+ url: 'http://www.trendiction.de/bot'
1657
+ producer:
1658
+ name: 'Talkwalker Inc.'
1659
+ url: 'http://www.talkwalker.com'
1660
+
888
1661
  - regex: 'TurnitinBot'
889
1662
  name: 'TurnitinBot'
890
1663
  category: 'Crawler'
@@ -893,13 +1666,13 @@
893
1666
  name: 'iParadigms, LLC.'
894
1667
  url: 'http://www.turnitin.com'
895
1668
 
896
- - regex: 'TweetedTimes Bot'
1669
+ - regex: 'TweetedTimes'
897
1670
  name: 'TweetedTimes Bot'
898
1671
  category: 'Crawler'
899
- url: 'http://tweetedtimes.com'
1672
+ url: 'https://tweetedtimes.com/'
900
1673
  producer:
901
1674
  name: 'TweetedTimes'
902
- url: 'http://tweetedtimes.com/'
1675
+ url: 'https://tweetedtimes.com/'
903
1676
 
904
1677
  - regex: 'TweetmemeBot'
905
1678
  name: 'Tweetmeme Bot'
@@ -909,6 +1682,13 @@
909
1682
  name: 'Mediasift'
910
1683
  url: ''
911
1684
 
1685
+ - regex: 'Twingly Recon'
1686
+ name: 'Twingly Recon'
1687
+ category: 'Crawler'
1688
+ producer:
1689
+ name: 'Twingly'
1690
+ url: 'https://www.twingly.com'
1691
+
912
1692
  - regex: 'Twitterbot'
913
1693
  name: 'Twitterbot'
914
1694
  category: 'Social Media Agent'
@@ -917,13 +1697,37 @@
917
1697
  name: 'Twitter'
918
1698
  url: 'http://www.twitter.com'
919
1699
 
1700
+ - regex: 'UniversalFeedParser'
1701
+ name: 'UniversalFeedParser'
1702
+ category: 'Feed Fetcher'
1703
+ url: 'https://github.com/kurtmckee/feedparser'
1704
+ producer:
1705
+ name: 'Kurt McKee'
1706
+ url: 'https://github.com/kurtmckee'
1707
+
1708
+ - regex: 'via secureurl\.fwdcdn\.com'
1709
+ name: 'UkrNet Mail Proxy'
1710
+ category: 'Crawler'
1711
+ url: ''
1712
+ producer:
1713
+ name: 'UkrNet Ltd'
1714
+ url: 'https://www.ukr.net/'
1715
+
1716
+ - regex: 'Uptime(?:bot)?/[\d.]+'
1717
+ name: 'Uptimebot'
1718
+ category: 'Site Monitor'
1719
+ url: 'https://uptime.com/uptime-bot'
1720
+ producer:
1721
+ name: 'Uptime'
1722
+ url: 'https://uptime.com/'
1723
+
920
1724
  - regex: 'UptimeRobot'
921
- name: 'Uptime Robot'
1725
+ name: 'UptimeRobot'
922
1726
  category: 'Site Monitor'
923
- url: ''
1727
+ url: 'https://uptimerobot.com/'
924
1728
  producer:
925
1729
  name: 'Uptime Robot'
926
- url: 'http://uptimerobot.com'
1730
+ url: 'https://uptimerobot.com/'
927
1731
 
928
1732
  - regex: 'URLAppendBot'
929
1733
  name: 'URLAppendBot'
@@ -933,6 +1737,30 @@
933
1737
  name: 'Profound Networks'
934
1738
  url: 'http://www.profound.net'
935
1739
 
1740
+ - regex: 'Vagabondo'
1741
+ name: 'Vagabondo'
1742
+ category: 'Crawler'
1743
+ url: ''
1744
+ producer:
1745
+ name: 'WiseGuys'
1746
+ url: 'http://www.wise-guys.nl/'
1747
+
1748
+ - regex: 'vkShare; '
1749
+ name: 'VK Share Button'
1750
+ category: 'Crawler'
1751
+ url: 'https://dev.vk.com/en/widgets/share'
1752
+ producer:
1753
+ name: 'VK'
1754
+ url: 'https://vk.com/'
1755
+
1756
+ - regex: 'VKRobot'
1757
+ name: 'VK Robot'
1758
+ category: 'Crawler'
1759
+ url: 'https://dev.vk.com/en/'
1760
+ producer:
1761
+ name: 'VK'
1762
+ url: 'https://vk.com/'
1763
+
936
1764
  - regex: 'VSMCrawler'
937
1765
  name: 'Visual Site Mapper Crawler'
938
1766
  category: 'Crawler'
@@ -965,7 +1793,7 @@
965
1793
  name: 'W3C'
966
1794
  url: 'http://www.w3.org'
967
1795
 
968
- - regex: 'W3C_Validator'
1796
+ - regex: 'W3C_Validator|Validator\.nu'
969
1797
  name: 'W3C Markup Validation Service'
970
1798
  category: 'Validator'
971
1799
  url: 'http://validator.w3.org/services'
@@ -989,7 +1817,27 @@
989
1817
  name: 'W3C'
990
1818
  url: 'http://www.w3.org'
991
1819
 
992
- - regex: 'WeSEE(:Search)?'
1820
+ - regex: 'P3P Validator'
1821
+ name: 'W3C P3P Validator'
1822
+ category: 'Validator'
1823
+ url: 'https://www.w3.org/P3P/validator.html'
1824
+ producer:
1825
+ name: 'W3C'
1826
+ url: 'https://www.w3.org'
1827
+
1828
+ - regex: 'Wappalyzer'
1829
+ name: 'Wappalyzer'
1830
+ url: 'https://github.com/AliasIO/Wappalyzer'
1831
+ producer:
1832
+ name: 'AliasIO'
1833
+ url: 'https://github.com/AliasIO'
1834
+
1835
+ - regex: 'PTST/'
1836
+ name: 'WebPageTest'
1837
+ category: 'Site Monitor'
1838
+ url: 'https://www.webpagetest.org'
1839
+
1840
+ - regex: 'WeSEE'
993
1841
  name: 'WeSEE:Search'
994
1842
  category: 'Search bot'
995
1843
  url: 'http://www.wesee.com/bot'
@@ -1013,6 +1861,30 @@
1013
1861
  name: 'WebSitePulse'
1014
1862
  url: 'http://www.websitepulse.com/'
1015
1863
 
1864
+ - regex: 'WordPress.+isitwp\.com'
1865
+ name: 'IsItWP'
1866
+ category: 'Crawler'
1867
+ url: 'https://www.isitwp.com/'
1868
+ producer:
1869
+ name: 'WPBeginner, LLC'
1870
+ url: 'https://www.wpbeginner.com/'
1871
+
1872
+ - regex: 'Automattic Analytics Crawler/[\d.]+'
1873
+ name: 'Automattic Analytics'
1874
+ category: 'Crawler'
1875
+ url: 'https://wordpress.com/crawler/'
1876
+ producer:
1877
+ name: 'Wordpress.org'
1878
+ url: 'https://wordpress.org/'
1879
+
1880
+ - regex: 'WordPress'
1881
+ name: 'WordPress'
1882
+ category: 'Service Agent'
1883
+ url: 'https://wordpress.org/'
1884
+ producer:
1885
+ name: 'Wordpress.org'
1886
+ url: 'https://wordpress.org/'
1887
+
1016
1888
  - regex: 'Wotbox'
1017
1889
  name: 'Wotbox'
1018
1890
  category: 'Search bot'
@@ -1021,6 +1893,14 @@
1021
1893
  name: 'Wotbox'
1022
1894
  url: 'http://www.wotbox.com'
1023
1895
 
1896
+ - regex: 'XenForo'
1897
+ name: 'XenForo'
1898
+ category: 'Service Agent'
1899
+ url: 'https://xenforo.com/'
1900
+ producer:
1901
+ name: 'XenForo Ltd.'
1902
+ url: 'https://xenforo.com/'
1903
+
1024
1904
  - regex: 'yacybot'
1025
1905
  name: 'YaCy'
1026
1906
  category: 'Search bot'
@@ -1045,6 +1925,14 @@
1045
1925
  name: 'Yahoo! Inc.'
1046
1926
  url: 'http://www.yahoo.com'
1047
1927
 
1928
+ - regex: 'YahooMailProxy'
1929
+ name: 'Yahoo! Mail Proxy'
1930
+ category: 'Service Agent'
1931
+ url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
1932
+ producer:
1933
+ name: 'Yahoo! Inc.'
1934
+ url: 'http://www.yahoo.com'
1935
+
1048
1936
  - regex: 'YahooCacheSystem'
1049
1937
  name: 'Yahoo! Cache System'
1050
1938
  category: 'Crawler'
@@ -1053,15 +1941,47 @@
1053
1941
  name: 'Yahoo! Inc.'
1054
1942
  url: 'http://www.yahoo.com'
1055
1943
 
1056
- - regex: 'Yandex(Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1944
+ - regex: 'Y!J-BRW'
1945
+ name: 'Yahoo! Japan BRW'
1946
+ category: 'Crawler'
1947
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1948
+ producer:
1949
+ name: 'Yahoo! Japan Corp.'
1950
+ url: 'https://www.yahoo.co.jp/'
1951
+
1952
+ - regex: 'Y!J-WSC'
1953
+ name: 'Yahoo! Japan WSC'
1954
+ category: 'Crawler'
1955
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1956
+ producer:
1957
+ name: 'Yahoo! Japan Corp.'
1958
+ url: 'https://www.yahoo.co.jp/'
1959
+
1960
+ - regex: 'Y!J-ASR'
1961
+ name: 'Yahoo! Japan ASR'
1962
+ category: 'Crawler'
1963
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1964
+ producer:
1965
+ name: 'Yahoo! Japan Corp.'
1966
+ url: 'https://www.yahoo.co.jp/'
1967
+
1968
+ - regex: '^Y!J'
1969
+ name: 'Yahoo! Japan'
1970
+ category: 'Crawler'
1971
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1972
+ producer:
1973
+ name: 'Yahoo! Japan Corp.'
1974
+ url: 'https://www.yahoo.co.jp/'
1975
+
1976
+ - regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher'
1057
1977
  name: 'Yandex Bot'
1058
1978
  category: 'Search bot'
1059
- url: 'http://www.yandex.com/bots'
1979
+ url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html'
1060
1980
  producer:
1061
1981
  name: 'Yandex LLC'
1062
- url: 'http://company.yandex.com'
1982
+ url: 'https://yandex.com/company/'
1063
1983
 
1064
- - regex: 'Yeti'
1984
+ - regex: 'Yeti|NaverJapan|AdsBot-Naver'
1065
1985
  name: 'Yeti/Naverbot'
1066
1986
  category: 'Search bot'
1067
1987
  url: 'http://help.naver.com/robots/'
@@ -1090,6 +2010,11 @@
1090
2010
  name: 'YunYun'
1091
2011
  url: 'http://www.yunyun.com'
1092
2012
 
2013
+ - regex: 'zgrab'
2014
+ name: 'zgrab'
2015
+ category: 'Security Checker'
2016
+ url: 'https://github.com/zmap/zgrab'
2017
+
1093
2018
  - regex: 'Zookabot'
1094
2019
  name: 'Zookabot'
1095
2020
  category: 'Crawler'
@@ -1114,7 +2039,7 @@
1114
2039
  name: 'Yottaa'
1115
2040
  url: 'http://www.yottaa.com/'
1116
2041
 
1117
- - regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857.*'
2042
+ - regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857'
1118
2043
  name: 'Yahoo Gemini'
1119
2044
  category: 'Crawler'
1120
2045
  url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
@@ -1133,20 +2058,20 @@
1133
2058
  - regex: 'HubPages.*crawlingpolicy'
1134
2059
  name: 'HubPages'
1135
2060
  category: 'Crawler'
1136
- url: 'http://hubpages.com/help/crawlingpolicy'
2061
+ url: 'https://hubpages.com/help/crawlingpolicy'
1137
2062
  producer:
1138
- name: 'HubPages'
1139
- url: 'http://hubpages.com/'
2063
+ name: 'HubPages, Inc.'
2064
+ url: 'https://discover.hubpages.com/'
1140
2065
 
1141
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
2066
+ - regex: 'Pinterest(?:bot)?/[\d.]+.*www\.pinterest\.com'
1142
2067
  name: 'Pinterest'
1143
- url: ''
2068
+ url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1144
2069
  category: 'Crawler'
1145
2070
  producer:
1146
2071
  name: 'Pinterest'
1147
- url: 'http://www.pinterest.com/'
2072
+ url: 'https://www.pinterest.com/'
1148
2073
 
1149
- - regex: 'Site24x7'
2074
+ - regex: '.*Site24x7'
1150
2075
  name: 'Site24x7 Website Monitoring'
1151
2076
  category: 'Site Monitor'
1152
2077
  url: 'https://www.site24x7.com/site24x7-faq.html'
@@ -1154,6 +2079,54 @@
1154
2079
  name: 'Site24x7'
1155
2080
  url: 'https://www.site24x7.com'
1156
2081
 
2082
+ - regex: '.* HLB/[\d.]+'
2083
+ name: 'Site24x7 Defacement Monitor'
2084
+ category: 'Site Monitor'
2085
+ url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor'
2086
+ producer:
2087
+ name: 'Site24x7'
2088
+ url: 'https://www.site24x7.com/'
2089
+
2090
+ - regex: 's~snapchat-proxy'
2091
+ name: 'Snapchat Proxy'
2092
+ category: 'Crawler'
2093
+ url: 'https://www.snapchat.com'
2094
+ producer:
2095
+ name: 'Snapchat Inc.'
2096
+ url: 'https://www.snapchat.com'
2097
+
2098
+ - regex: 'Snap URL Preview Service'
2099
+ name: 'Snap URL Preview Service'
2100
+ category: 'Service Agent'
2101
+ url: 'https://developers.snap.com/robots'
2102
+ producer:
2103
+ name: 'Snapchat Inc.'
2104
+ url: 'https://www.snapchat.com/'
2105
+
2106
+ - regex: 'SnapchatAds/[\d.]+'
2107
+ name: 'Snapchat Ads'
2108
+ category: 'Crawler'
2109
+ url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US'
2110
+ producer:
2111
+ name: 'Snapchat Inc.'
2112
+ url: 'https://www.snapchat.com/'
2113
+
2114
+ - regex: "Let's Encrypt validation server"
2115
+ name: "Let's Encrypt Validation"
2116
+ category: 'Service Agent'
2117
+ url: 'https://letsencrypt.org/how-it-works/'
2118
+ producer:
2119
+ name: "Let's Encrypt"
2120
+ url: 'https://letsencrypt.org'
2121
+
2122
+ - regex: 'GrapeshotCrawler'
2123
+ name: 'Grapeshot'
2124
+ category: 'Crawler'
2125
+ url: 'https://www.grapeshot.com/crawler'
2126
+ producer:
2127
+ name: 'Grapeshot'
2128
+ url: 'https://www.grapeshot.com'
2129
+
1157
2130
  - regex: 'www\.monitor\.us'
1158
2131
  name: 'Monitor.Us'
1159
2132
  category: 'Site Monitor'
@@ -1162,6 +2135,22 @@
1162
2135
  name: 'Monitor.Us'
1163
2136
  url: 'http://www.monitor.us'
1164
2137
 
2138
+ - regex: 'Catchpoint'
2139
+ name: 'Catchpoint'
2140
+ category: 'Site Monitor'
2141
+ url: 'https://www.catchpoint.com/'
2142
+ producer:
2143
+ name: 'Catchpoint Systems'
2144
+ url: 'https://www.catchpoint.com/'
2145
+
2146
+ - regex: 'bitlybot'
2147
+ name: 'BitlyBot'
2148
+ category: 'Crawler'
2149
+ url: 'https://bitly.com'
2150
+ producer:
2151
+ name: 'Bitly, Inc.'
2152
+ url: 'https://bitly.com'
2153
+
1165
2154
  - regex: 'Zao/'
1166
2155
  name: 'Zao'
1167
2156
  category: 'Crawler'
@@ -1214,13 +2203,211 @@
1214
2203
  - regex: 'AdMantX.*admantx\.com'
1215
2204
  name: 'ADMantX'
1216
2205
 
1217
- - regex: 'Server Density Service Monitoring.*'
2206
+ - regex: 'Server Density Service Monitoring'
1218
2207
  name: 'Server Density'
1219
2208
 
1220
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Catchpoint bot|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1221
- name: 'Generic Bot'
2209
+ - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
2210
+ name: 'RSSRadio Bot'
1222
2211
 
1223
- # Generic detections
2212
+ - regex: '^sentry'
2213
+ name: 'Sentry Bot'
2214
+ producer:
2215
+ name: 'Sentry'
2216
+ url: 'https://sentry.io'
2217
+
2218
+ - regex: '^Spotify/[\d.]+$'
2219
+ name: 'Spotify'
2220
+ producer:
2221
+ name: 'Spotify'
2222
+ url: 'https://www.spotify.com'
2223
+
2224
+ - regex: 'The Knowledge AI'
2225
+ name: 'The Knowledge AI'
2226
+ category: 'Crawler'
2227
+
2228
+ - regex: 'Embedly'
2229
+ name: 'Embedly'
2230
+ category: 'Crawler'
2231
+ url: 'https://support.embed.ly/hc/en-us'
2232
+ producer:
2233
+ name: 'A Medium, Corp.'
2234
+ url: 'https://medium.com/'
2235
+
2236
+ - regex: 'BrandVerity'
2237
+ name: 'BrandVerity'
2238
+ category: 'Crawler'
2239
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
2240
+ producer:
2241
+ name: 'BrandVerity, Inc.'
2242
+ url: 'https://www.brandverity.com/'
2243
+
2244
+ - regex: 'Kaspersky Lab CFR link resolver'
2245
+ name: 'Kaspersky'
2246
+ category: 'Security Checker'
2247
+ url: 'https://www.kaspersky.com/'
2248
+ producer:
2249
+ name: 'AO Kaspersky Lab'
2250
+ url: 'https://www.kaspersky.com/'
2251
+
2252
+ - regex: 'eZ Publish Link Validator'
2253
+ name: 'eZ Publish Link Validator'
2254
+ category: 'Crawler'
2255
+ url: 'https://ez.no/'
2256
+ producer:
2257
+ name: 'eZ Systems AS'
2258
+ url: 'https://ez.no/'
2259
+
2260
+ - regex: 'woorankreview'
2261
+ name: 'WooRank'
2262
+ category: 'Search bot'
2263
+ url: 'https://www.woorank.com/'
2264
+ producer:
2265
+ name: 'WooRank sprl'
2266
+ url: 'https://www.woorank.com/'
2267
+
2268
+ - regex: 'by Siteimprove\.com'
2269
+ name: 'Siteimprove'
2270
+ category: 'Search bot'
2271
+ url: 'https://siteimprove.com/'
2272
+ producer:
2273
+ name: 'Siteimprove GmbH'
2274
+ url: 'https://siteimprove.com/'
2275
+
2276
+ - regex: 'CATExplorador'
2277
+ name: 'CATExplorador'
2278
+ category: 'Search bot'
2279
+ url: 'https://fundacio.cat/ca/domini/'
2280
+ producer:
2281
+ name: 'Fundació puntCAT'
2282
+ url: 'https://fundacio.cat/ca/domini/'
2283
+
2284
+ - regex: 'Buck'
2285
+ name: 'Buck'
2286
+ category: 'Search bot'
2287
+ url: 'https://hypefactors.com/'
2288
+ producer:
2289
+ name: 'Hypefactors A/S'
2290
+ url: 'https://hypefactors.com/'
2291
+
2292
+ - regex: 'tracemyfile'
2293
+ name: 'TraceMyFile'
2294
+ category: 'Search bot'
2295
+ url: 'https://www.tracemyfile.com/'
2296
+ producer:
2297
+ name: 'Idee Inc.'
2298
+ url: 'http://ideeinc.com/'
2299
+
2300
+ - regex: 'zelist\.ro feed parser'
2301
+ name: 'Ze List'
2302
+ url: 'https://www.zelist.ro/'
2303
+ category: 'Feed Fetcher'
2304
+ producer:
2305
+ name: 'Treeworks SRL'
2306
+ url: 'https://www.tree.ro/'
2307
+
2308
+ - regex: 'weborama-fetcher'
2309
+ name: 'Weborama'
2310
+ category: 'Search bot'
2311
+ url: 'https://weborama.com/'
2312
+ producer:
2313
+ name: 'Weborama SA'
2314
+ url: 'https://weborama.com/'
2315
+
2316
+ - regex: 'BoardReader Favicon Fetcher'
2317
+ name: 'BoardReader'
2318
+ category: 'Search bot'
2319
+ url: 'https://boardreader.com/'
2320
+ producer:
2321
+ name: 'Effyis Inc'
2322
+ url: 'https://boardreader.com/'
2323
+
2324
+ - regex: 'IDG/IT'
2325
+ name: 'IDG/IT'
2326
+ category: 'Search bot'
2327
+ url: 'https://spaziodati.eu/'
2328
+ producer:
2329
+ name: 'SpazioDati S.r.l.'
2330
+ url: 'https://spaziodati.eu/'
2331
+
2332
+ - regex: 'Bytespider'
2333
+ name: 'Bytespider'
2334
+ category: 'Search bot'
2335
+ url: 'https://bytedance.com/'
2336
+ producer:
2337
+ name: 'ByteDance Ltd.'
2338
+ url: 'https://bytedance.com/'
2339
+
2340
+ - regex: 'WikiDo'
2341
+ name: 'WikiDo'
2342
+ category: 'Search bot'
2343
+ url: 'https://www.wikido.com/'
2344
+ producer:
2345
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
2346
+ url: 'https://www.wikido.com/'
2347
+
2348
+ - regex: 'Awario(?:Smart)?Bot'
2349
+ name: 'Awario'
2350
+ category: 'Search bot'
2351
+ url: 'https://awario.com/bots.html'
2352
+ producer:
2353
+ name: 'TechFusion Ltd.'
2354
+ url: 'https://www.techfusion.com.cy/'
2355
+
2356
+ - regex: 'AwarioRssBot'
2357
+ name: 'Awario'
2358
+ category: 'Feed Fetcher'
2359
+ url: 'https://awario.com/bots.html'
2360
+ producer:
2361
+ name: 'TechFusion Ltd.'
2362
+ url: 'https://www.techfusion.com.cy/'
2363
+
2364
+ - regex: 'oBot'
2365
+ name: 'oBot'
2366
+ category: 'Search bot'
2367
+ url: 'https://www.xforce-security.com/crawler/'
2368
+ producer:
2369
+ name: 'IBM Germany Research & Development GmbH'
2370
+ url: 'https://exchange.xforce.ibmcloud.com/'
2371
+
2372
+ - regex: 'SMTBot'
2373
+ name: 'SMTBot'
2374
+ category: 'Search bot'
2375
+ url: 'https://www.similartech.com/smtbot'
2376
+ producer:
2377
+ name: 'SimilarTech Ltd.'
2378
+ url: 'https://www.similartech.com/'
2379
+
2380
+ - regex: 'LCC'
2381
+ name: 'LCC'
2382
+ category: 'Search bot'
2383
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2384
+ producer:
2385
+ name: 'Universität Leipzig'
2386
+ url: 'https://www.uni-leipzig.de/'
2387
+
2388
+ - regex: 'Startpagina-Linkchecker'
2389
+ name: 'Startpagina Linkchecker'
2390
+ category: 'Search bot'
2391
+ url: 'https://www.startpagina.nl/linkchecker'
2392
+ producer:
2393
+ name: 'Startpagina B.V.'
2394
+ url: 'https://www.startpagina.nl/'
2395
+
2396
+ - regex: 'MoodleBot-Linkchecker'
2397
+ name: 'MoodleBot Linkchecker'
2398
+ category: 'Search bot'
2399
+ url: 'hhttps://docs.moodle.org/en/Usage'
2400
+ producer:
2401
+ name: 'Moodle Pty Ltd'
2402
+ url: 'https://moodle.org/'
2403
+
2404
+ - regex: 'GTmetrix'
2405
+ name: 'GTmetrix'
2406
+ category: 'Crawler'
2407
+ url: 'https://gtmetrix.com/'
2408
+ producer:
2409
+ name: 'Carbon60 Operating Co. Ltd.'
2410
+ url: 'https://www.carbon60.com/'
1224
2411
 
1225
2412
  - regex: 'Nutch'
1226
2413
  name: 'Nutch-based Bot'
@@ -1228,7 +2415,2128 @@
1228
2415
  url: 'https://nutch.apache.org'
1229
2416
  producer:
1230
2417
  name: 'The Apache Software Foundation'
1231
- url: 'http://www.apache.org/foundation/'
2418
+ url: 'https://www.apache.org/foundation/'
2419
+
2420
+ - regex: 'Seobility'
2421
+ name: 'Seobility'
2422
+ category: 'Crawler'
2423
+ url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2424
+
2425
+ - regex: 'Vercelbot'
2426
+ name: 'Vercel Bot'
2427
+ category: 'Service bot'
2428
+ url: 'https://vercel.com'
2429
+
2430
+ - regex: 'Grammarly'
2431
+ name: 'Grammarly'
2432
+ category: 'Service bot'
2433
+ url: 'https://www.grammarly.com'
1232
2434
 
1233
- - regex: '[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)([^a-z]|$)'
2435
+ - regex: 'Robozilla'
2436
+ name: 'Robozilla'
2437
+ category: 'Crawler'
2438
+
2439
+ - regex: 'Domains Project'
2440
+ name: 'Domains Project'
2441
+ category: 'Crawler'
2442
+ url: 'https://domainsproject.org'
2443
+
2444
+ - regex: 'PetalBot'
2445
+ name: 'Petal Bot'
2446
+ category: 'Crawler'
2447
+ url: 'https://aspiegel.com/petalbot'
2448
+
2449
+ - regex: 'SerendeputyBot'
2450
+ name: 'Serendeputy Bot'
2451
+ category: 'Crawler'
2452
+ url: 'https://serendeputy.com/about/serendeputy-bot'
2453
+
2454
+ - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher'
2455
+ name: 'ADmantX Service Fetcher'
2456
+ category: 'Service bot'
2457
+ url: 'https://www.admantx.com/service-fetcher.html'
2458
+
2459
+ - regex: 'SemanticScholarBot'
2460
+ name: 'Semantic Scholar Bot'
2461
+ category: 'Crawler'
2462
+ url: 'https://www.semanticscholar.org/crawler'
2463
+
2464
+ - regex: 'VelenPublicWebCrawler'
2465
+ name: 'Velen Public Web Crawler'
2466
+ category: 'Crawler'
2467
+ url: 'https://hunter.io/robot'
2468
+
2469
+ - regex: 'Barkrowler'
2470
+ name: 'Barkrowler'
2471
+ category: 'Crawler'
2472
+ url: 'http://www.exensa.com/crawl'
2473
+
2474
+ - regex: 'BDCbot'
2475
+ name: 'BDCbot'
2476
+ category: 'Crawler'
2477
+ url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2478
+ producer:
2479
+ name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2480
+ url: 'https://bigdatacorp.com.br/'
2481
+
2482
+ - regex: 'adbeat'
2483
+ name: 'Adbeat'
2484
+ category: 'Crawler'
2485
+ url: 'https://www.adbeat.com/operation_policy'
2486
+ producer:
2487
+ name: 'PPC Labs LLC'
2488
+ url: 'https://www.adbeat.com/'
2489
+
2490
+ - regex: '(?:BuiltWith|BW)/[\d.]+'
2491
+ name: 'BuiltWith'
2492
+ category: 'Crawler'
2493
+ url: 'https://builtwith.com/biup'
2494
+ producer:
2495
+ name: 'BuiltWith Pty Ltd'
2496
+ url: 'https://builtwith.com/'
2497
+
2498
+ - regex: 'https://whatis\.contentkingapp\.com'
2499
+ name: 'ContentKing'
2500
+ category: 'Site Monitor'
2501
+ url: 'https://whatis.contentkingapp.com/'
2502
+ producer:
2503
+ name: 'ContentKing BV'
2504
+ url: 'https://www.contentkingapp.com/'
2505
+
2506
+ - regex: 'MicroAdBot'
2507
+ name: 'MicroAdBot'
2508
+ category: 'Crawler'
2509
+ url: 'https://www.microad.co.jp/'
2510
+ producer:
2511
+ name: 'MicroAd, Inc.'
2512
+ url: 'https://www.microad.co.jp/'
2513
+
2514
+ - regex: 'PingAdmin\.Ru'
2515
+ name: 'PingAdmin.Ru'
2516
+ category: 'Site Monitor'
2517
+ url: 'https://ping-admin.ru/'
2518
+
2519
+ - regex: 'notifyninja.+monitoring'
2520
+ name: 'Notify Ninja'
2521
+ category: 'Site Monitor'
2522
+ url: 'http://notifyninja.com'
2523
+
2524
+ - regex: 'WebDataStats'
2525
+ name: 'WebDataStats'
2526
+ category: 'Crawler'
2527
+ url: 'https://webdatastats.com/policy.html'
2528
+ producer:
2529
+ name: 'WebTehRazrabotka LLC'
2530
+ url: 'https://webdatastats.com/'
2531
+
2532
+ - regex: 'parse\.ly scraper'
2533
+ name: 'parse.ly'
2534
+ category: 'Crawler'
2535
+ url: 'https://www.parse.ly/help/integration/crawler'
2536
+ producer:
2537
+ name: 'Parsely, Inc.'
2538
+ url: 'https://www.parse.ly/'
2539
+
2540
+ - regex: 'Nimbostratus-Bot'
2541
+ name: 'Nimbostratus Bot'
2542
+ category: 'Site Monitor'
2543
+ url: 'http://cloudsystemnetworks.com'
2544
+
2545
+ - regex: 'HeartRails_Capture/[\d.]+'
2546
+ name: 'Heart Rails Capture'
2547
+ category: 'Service Agent'
2548
+ url: 'http://capture.heartrails.com'
2549
+
2550
+ - regex: 'Project-Resonance'
2551
+ name: 'Project Resonance'
2552
+ category: 'Crawler'
2553
+ url: 'https://project-resonance.com/'
2554
+ producer:
2555
+ name: 'RedHunt Labs Limited'
2556
+ url: 'https://redhuntlabs.com/'
2557
+
2558
+ - regex: 'DataXu/[\d.]+'
2559
+ name: 'DataXu'
2560
+ category: 'Service Agent'
2561
+ url: 'https://advertising.roku.com/dataxu'
2562
+ producer:
2563
+ name: 'Roku, Inc.'
2564
+ url: 'https://roku.com'
2565
+
2566
+ - regex: 'Cocolyzebot'
2567
+ name: 'Cocolyzebot'
2568
+ category: 'Crawler'
2569
+ url: 'https://cocolyze.com/en/cocolyzebot'
2570
+ producer:
2571
+ name: 'VSI INNOVATION SAS'
2572
+ url: 'https://vsi-innovation.com/'
2573
+
2574
+ - regex: 'veryhip'
2575
+ name: 'VeryHip'
2576
+ category: 'Crawler'
2577
+ url: 'https://veryhip.com/'
2578
+ producer:
2579
+ name: 'VeryHip'
2580
+ url: 'https://veryhip.com/'
2581
+
2582
+ - regex: 'LinkpadBot'
2583
+ name: 'LinkpadBot'
2584
+ category: 'Crawler'
2585
+ url: 'https://www.linkpad.org/'
2586
+ producer:
2587
+ name: 'Solomono LLC'
2588
+ url: 'https://www.linkpad.org/'
2589
+
2590
+ - regex: 'MuscatFerret'
2591
+ name: 'MuscatFerret'
2592
+ category: 'Crawler'
2593
+ url: 'http://www.webtop.com/'
2594
+
2595
+ - regex: 'PageThing\.com'
2596
+ name: 'PageThing'
2597
+ category: 'Crawler'
2598
+ url: 'https://www.pagething.com/'
2599
+ producer:
2600
+ name: 'SPECIALNOISE LTD'
2601
+ url: 'https://www.specialnoise.com/'
2602
+
2603
+ - regex: 'ArchiveBox'
2604
+ name: 'ArchiveBox'
2605
+ url: 'https://archivebox.io/'
2606
+ category: 'Crawler'
2607
+ producer:
2608
+ name: ''
2609
+ url: ''
2610
+
2611
+ - regex: 'Choosito'
2612
+ name: 'Choosito'
2613
+ url: 'https://www.choosito.com/'
2614
+ category: 'Crawler'
2615
+ producer:
2616
+ name: 'Choosito! Inc.'
2617
+ url: 'https://www.choosito.com/'
2618
+
2619
+ - regex: 'datagnionbot'
2620
+ name: 'datagnionbot'
2621
+ url: 'https://www.datagnion.com/bot.html'
2622
+ category: 'Crawler'
2623
+ producer:
2624
+ name: 'DATAGNION GMBH'
2625
+ url: 'https://www.datagnion.com/'
2626
+
2627
+ - regex: 'WhatCMS'
2628
+ name: 'WhatCMS'
2629
+ url: 'https://whatcms.org/'
2630
+ category: 'Crawler'
2631
+ producer:
2632
+ name: 'Nineteen Ten LLC'
2633
+ url: 'https://whatcms.org/'
2634
+
2635
+ - regex: 'httpx'
2636
+ name: 'httpx'
2637
+ url: 'https://github.com/projectdiscovery/httpx'
2638
+ category: 'Crawler'
2639
+ producer:
2640
+ name: 'ProjectDiscovery, Inc.'
2641
+ url: 'https://projectdiscovery.io/'
2642
+
2643
+ - regex: '.*\.oast\.'
2644
+ name: 'Interactsh'
2645
+ category: 'Security Checker'
2646
+ url: 'https://github.com/projectdiscovery/interactsh'
2647
+ producer:
2648
+ name: 'ProjectDiscovery, Inc.'
2649
+ url: 'https://projectdiscovery.io/'
2650
+
2651
+ - regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com'
2652
+ name: 'Expanse'
2653
+ category: 'Security Checker'
2654
+ url: 'https://expanse.co/'
2655
+ producer:
2656
+ name: 'Expanse Inc.'
2657
+ url: 'https://expanse.co/'
2658
+
2659
+ - regex: 'HuaweiWebCatBot'
2660
+ name: 'HuaweiWebCatBot'
2661
+ category: 'Crawler'
2662
+ url: 'https://isecurity.huawei.com'
2663
+ producer:
2664
+ name: 'Huawei Technologies Co., Ltd.'
2665
+ url: 'https://huawei.com'
2666
+
2667
+ - regex: 'Hatena-Favicon'
2668
+ name: 'Hatena Favicon'
2669
+ category: 'Crawler'
2670
+ url: 'https://www.hatena.ne.jp/faq/'
2671
+ producer:
2672
+ name: 'Hatena Co., Ltd.'
2673
+ url: 'https://www.hatena.ne.jp'
2674
+ - regex: 'Hatena-?Bookmark'
2675
+ name: 'Hatena Bookmark'
2676
+ category: 'Crawler'
2677
+ url: 'https://www.hatena.ne.jp/faq/'
2678
+ producer:
2679
+ name: 'Hatena Co., Ltd.'
2680
+ url: 'https://www.hatena.ne.jp'
2681
+
2682
+ - regex: 'RyowlEngine/[\d.]+'
2683
+ name: 'Ryowl'
2684
+ category: 'Crawler'
2685
+ url: 'https://ryowl.org'
2686
+
2687
+ - regex: 'OdklBot/[\d.]+'
2688
+ name: 'Odnoklassniki Bot'
2689
+ category: 'Crawler'
2690
+ url: 'https://odnoklassniki.ru'
2691
+
2692
+ - regex: 'Mediatoolkitbot'
2693
+ name: 'Mediatoolkit Bot'
2694
+ category: 'Crawler'
2695
+ url: 'https://mediatoolkit.com'
2696
+
2697
+ - regex: 'ZoominfoBot'
2698
+ name: 'ZoominfoBot'
2699
+ category: 'Crawler'
2700
+ url: 'https://www.zoominfo.com'
2701
+
2702
+ - regex: 'WeViKaBot/[\d.]+'
2703
+ name: 'WeViKaBot'
2704
+ category: 'Crawler'
2705
+ url: 'http://www.wevika.de'
2706
+
2707
+ - regex: 'SEOkicks'
2708
+ name: 'SEOkicks'
2709
+ category: 'Crawler'
2710
+ url: 'https://www.seokicks.de/robot.html'
2711
+
2712
+ - regex: 'Plukkie/[\d.]+'
2713
+ name: 'Plukkie'
2714
+ category: 'Crawler'
2715
+ url: 'http://www.botje.com/plukkie.htm'
2716
+
2717
+ - regex: 'proximic;'
2718
+ name: 'Comscore'
2719
+ category: 'Crawler'
2720
+ url: 'https://www.comscore.com/Web-Crawler'
2721
+
2722
+ - regex: 'SurdotlyBot/[\d.]+'
2723
+ name: 'SurdotlyBot'
2724
+ category: 'Crawler'
2725
+ url: 'http://sur.ly/bot.html'
2726
+
2727
+ - regex: 'Gowikibot/[\d.]+'
2728
+ name: 'Gowikibot'
2729
+ category: 'Crawler'
2730
+ url: 'http:/www.gowikibot.com'
2731
+
2732
+ - regex: 'SabsimBot/[\d.]+'
2733
+ name: 'SabsimBot'
2734
+ category: 'Crawler'
2735
+ url: 'https://sabsim.com'
2736
+
2737
+ - regex: 'LumtelBot/[\d.]+'
2738
+ name: 'LumtelBot'
2739
+ category: 'Crawler'
2740
+ url: 'https://umtel.com'
2741
+
2742
+ - regex: 'PiplBot'
2743
+ name: 'PiplBot'
2744
+ category: 'Crawler'
2745
+ url: 'http://www.pipl.com/bot'
2746
+
2747
+ - regex: 'woobot/[\d.]+'
2748
+ name: 'WooRank'
2749
+ category: 'Crawler'
2750
+ url: 'https://www.woorank.com/bot'
2751
+
2752
+ - regex: 'Cookiebot/[\d.]+'
2753
+ name: 'Cookiebot'
2754
+ category: 'Crawler'
2755
+ url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2756
+ producer:
2757
+ name: 'Cybot A/S'
2758
+ url: 'https://www.cybot.com/'
2759
+
2760
+ - regex: 'NetSystemsResearch'
2761
+ name: 'NetSystemsResearch'
2762
+ category: 'Security Checker'
2763
+ url: 'https://www.netsystemsresearch.com/'
2764
+ producer:
2765
+ name: 'NET SYSTEMS RESEARCH LLC'
2766
+ url: 'https://www.netsystemsresearch.com/'
2767
+
2768
+ - regex: 'CensysInspect/[\d.]+'
2769
+ name: 'CensysInspect'
2770
+ category: 'Security Checker'
2771
+ url: 'https://about.censys.io/'
2772
+ producer:
2773
+ name: 'Censys, Inc.'
2774
+ url: 'https://censys.io/'
2775
+
2776
+ - regex: 'gdnplus\.com'
2777
+ name: 'GDNP'
2778
+ category: 'Crawler'
2779
+ url: 'https://gdnplus.com/'
2780
+ producer:
2781
+ name: 'Global Digital Network Plus, LLC'
2782
+ url: 'https://gdnplus.com/'
2783
+
2784
+ - regex: 'WellKnownBot/[\d.]+'
2785
+ name: 'WellKnownBot'
2786
+ category: 'Crawler'
2787
+ url: 'https://well-known.dev'
2788
+
2789
+ - regex: 'Adsbot/[\d.]+'
2790
+ name: 'Adsbot'
2791
+ category: 'Crawler'
2792
+ url: 'https://seostar.co/robot/'
2793
+
2794
+ - regex: 'MTRobot/[\d.]+'
2795
+ name: 'MTRobot'
2796
+ category: 'Crawler'
2797
+ url: 'https://metrics-tools.de/robot.html'
2798
+ producer:
2799
+ name: 'Metrics Tools'
2800
+ url: 'https://metrics-tools.de/'
2801
+
2802
+ - regex: 'serpstatbot/[\d.]+'
2803
+ name: 'serpstatbot'
2804
+ category: 'Crawler'
2805
+ url: 'http://serpstatbot.com/'
2806
+ producer:
2807
+ name: 'Netpeak Ltd'
2808
+ url: 'https://netpeak.net/'
2809
+
2810
+ - regex: 'colly'
2811
+ name: 'colly'
2812
+ category: 'Crawler'
2813
+ url: 'https://github.com/gocolly/colly/'
2814
+
2815
+ - regex: 'l9tcpid/v[\d.]+'
2816
+ name: 'l9tcpid'
2817
+ category: 'Security Checker'
2818
+ url: 'https://github.com/LeakIX/l9tcpid'
2819
+
2820
+ - regex: 'l9explore/[\d.]+'
2821
+ name: 'l9explore'
2822
+ category: 'Security Checker'
2823
+ url: 'https://github.com/LeakIX/l9explore'
2824
+
2825
+ - regex: 'l9scan/|^Lkx-.*/[\d.]+'
2826
+ name: 'LeakIX'
2827
+ category: 'Security Checker'
2828
+ url: 'https://leakix.net/'
2829
+ producer:
2830
+ name: 'BaDaaS SRL'
2831
+ url: 'https://leakix.net/'
2832
+
2833
+ - regex: 'MegaIndex\.ru/[\d.]+'
2834
+ name: 'MegaIndex'
2835
+ category: 'Crawler'
2836
+ url: 'https://megaindex.com/crawler'
2837
+
2838
+ - regex: 'Seekport'
2839
+ name: 'Seekport'
2840
+ category: 'Crawler'
2841
+ url: 'https://bot.seekport.com/'
2842
+ producer:
2843
+ name: 'SISTRIX GmbH'
2844
+ url: 'https://www.sistrix.de/'
2845
+
2846
+ - regex: 'seolyt/[\d.]+'
2847
+ name: 'seolyt'
2848
+ category: 'Crawler'
2849
+ url: 'https://seolyt.com/'
2850
+
2851
+ - regex: 'YaK/[\d.]+'
2852
+ name: 'YaK'
2853
+ category: 'Crawler'
2854
+ url: 'https://www.linkfluence.com/'
2855
+ producer:
2856
+ name: 'Linkfluence SAS'
2857
+ url: 'https://www.linkfluence.com/'
2858
+
2859
+ - regex: 'KomodiaBot/[\d.]+'
2860
+ name: 'KomodiaBot'
2861
+ category: 'Crawler'
2862
+ url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2863
+ producer:
2864
+ name: 'Komodia Inc.'
2865
+ url: 'https://www.komodia.com/'
2866
+
2867
+ - regex: 'Neevabot/[\d.]+'
2868
+ name: 'Neevabot'
2869
+ category: 'Search bot'
2870
+ url: 'https://neeva.com/neevabot'
2871
+ producer:
2872
+ name: 'Neeva Inc.'
2873
+ url: 'https://neeva.com/'
2874
+
2875
+ - regex: 'LinkPreview/[\d.]+'
2876
+ name: 'LinkPreview'
2877
+ category: 'Service Agent'
2878
+ url: 'https://www.linkpreview.net/'
2879
+
2880
+ - regex: 'JungleKeyThumbnail/[\d.]+'
2881
+ name: 'JungleKeyThumbnail'
2882
+ category: 'Crawler'
2883
+ url: 'https://junglekey.com/'
2884
+
2885
+ - regex: 'rocketmonitor(?: |bot/)[\d.]+'
2886
+ name: 'RocketMonitorBot'
2887
+ category: 'Site Monitor'
2888
+ url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
2889
+ producer:
2890
+ name: 'Radio Mast, Inc.'
2891
+ url: 'https://www.radiomast.io/'
2892
+
2893
+ - regex: 'SitemapParser-VIPnytt/[\d.]+'
2894
+ name: 'SitemapParser-VIPnytt'
2895
+ category: 'Crawler'
2896
+ url: 'https://github.com/VIPnytt/SitemapParser/'
2897
+
2898
+ - regex: '^Turnitin'
2899
+ name: 'Turnitin'
2900
+ category: 'Crawler'
2901
+ url: 'https://turnitin.com/robot/crawlerinfo.html'
2902
+
2903
+ - regex: 'DMBrowser/[\d.]+|DMBrowser-[UB]V'
2904
+ name: 'Dotcom Monitor'
2905
+ category: 'Site Monitor'
2906
+ url: 'https://www.dotcom-monitor.com'
2907
+
2908
+ - regex: 'ThinkChaos/'
2909
+ name: 'ThinkChaos'
2910
+ category: 'Crawler'
2911
+
2912
+ - regex: 'DataForSeoBot'
2913
+ name: 'DataForSeoBot'
2914
+ category: 'Crawler'
2915
+ url: 'https://dataforseo.com/dataforseo-bot'
2916
+
2917
+ - regex: 'Discordbot/[\d.]+'
2918
+ name: 'Discord Bot'
2919
+ category: 'Service Agent'
2920
+ url: 'https://discordapp.com'
2921
+
2922
+ - regex: 'Linespider/[\d.]+'
2923
+ name: 'Linespider'
2924
+ category: 'Crawler'
2925
+ url: 'https://lin.ee/4dwXkTH'
2926
+
2927
+ - regex: 'Cincraw/[\d.]+'
2928
+ name: 'Cincraw'
2929
+ category: 'Crawler'
2930
+ url: 'http://cincrawdata.net/bot/'
2931
+
2932
+ - regex: 'CISPA Web Analyzer'
2933
+ name: 'CISPA Web Analyzer'
2934
+ category: 'Crawler'
2935
+ url: 'https://notify.cispa.de/'
2936
+ producer:
2937
+ name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
2938
+ url: 'https://cispa.de/en'
2939
+
2940
+ - regex: 'IonCrawl'
2941
+ name: 'IONOS Crawler'
2942
+ category: 'Crawler'
2943
+ url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
2944
+ producer:
2945
+ name: 'IONOS SE'
2946
+ url: 'https://www.ionos.de/'
2947
+
2948
+ - regex: 'Crawldad'
2949
+ name: 'Crawldad'
2950
+ category: 'Crawler'
2951
+ url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
2952
+
2953
+ - regex: 'https://securitytxt-scan\.cs\.hm\.edu/'
2954
+ name: 'security.txt scanserver'
2955
+ category: 'Security Checker'
2956
+ url: 'https://securitytxt-scan.cs.hm.edu/'
2957
+ producer:
2958
+ name: 'Hochschule für angewandte Wissenschaften München'
2959
+ url: 'https://www.hm.edu/'
2960
+
2961
+ - regex: 'TigerBot/[\d.]+'
2962
+ name: 'TigerBot'
2963
+ category: 'Crawler'
2964
+ url: 'https://tiger.ch/'
2965
+
2966
+ - regex: 'TestCrawler/[\d.]+'
2967
+ name: 'TestCrawler'
2968
+ category: 'Crawler'
2969
+ url: 'https://www.comcepta.com/'
2970
+
2971
+ - regex: 'CrowdTanglebot/[\d.]+'
2972
+ name: 'CrowdTangle'
2973
+ category: 'Crawler'
2974
+ url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
2975
+ producer:
2976
+ name: 'CrowdTangle, Inc.'
2977
+ url: 'https://www.crowdtangle.com/'
2978
+
2979
+ - regex: 'Sellers\.Guide Crawler by Primis'
2980
+ name: 'Sellers.Guide'
2981
+ category: 'Crawler'
2982
+ url: 'https://sellers.guide/'
2983
+ producer:
2984
+ name: 'McCann Disciplines, Ltd.'
2985
+ url: 'https://www.primis.tech/'
2986
+
2987
+ - regex: 'OnalyticaBot'
2988
+ name: 'Onalytica'
2989
+ category: 'Crawler'
2990
+ url: 'https://www.airslate.com/bot/explore/onalytica-bot'
2991
+ producer:
2992
+ name: 'airSlate, Inc.'
2993
+ url: 'https://www.airslate.com/'
2994
+
2995
+ - regex: 'deepnoc'
2996
+ name: 'deepnoc'
2997
+ category: 'Crawler'
2998
+ url: 'https://deepnoc.com/bot'
2999
+ producer:
3000
+ name: 'deepnoc, GmbH'
3001
+ url: 'https://deepnoc.com/'
3002
+
3003
+ - regex: 'Newslitbot/[\d.]+'
3004
+ name: 'Newslitbot'
3005
+ category: 'Crawler'
3006
+ url: 'https://www.newslit.co/'
3007
+ producer:
3008
+ name: 'Newslit, LLC.'
3009
+ url: 'https://www.newslit.co/'
3010
+
3011
+ - regex: 'um-LN/[\d.]+'
3012
+ name: 'uMBot'
3013
+ category: 'Crawler'
3014
+ url: 'https://www.ubermetrics-technologies.com/'
3015
+ producer:
3016
+ name: 'Ubermetrics Technologies GmbH'
3017
+ url: 'https://www.ubermetrics-technologies.com/'
3018
+
3019
+ - regex: 'Abonti/[\d.]+'
3020
+ name: 'Abonti'
3021
+ category: 'Crawler'
3022
+ url: 'http://abonti.com/'
3023
+
3024
+ - regex: 'collection@infegy\.com'
3025
+ name: 'Infegy'
3026
+ category: 'Crawler'
3027
+ url: 'https://infegy.com/'
3028
+ producer:
3029
+ name: 'Infegy, Inc.'
3030
+ url: 'https://infegy.com/'
3031
+
3032
+ - regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)'
3033
+ name: 'IPIP'
3034
+ category: 'Security Checker'
3035
+ url: 'https://security.ipip.net/'
3036
+ producer:
3037
+ name: 'Beijing Tiantexin Tech. Co., Ltd.'
3038
+ url: 'https://en.ipip.net/'
3039
+
3040
+ - regex: 'ev-crawler/[\d.]+'
3041
+ name: 'Headline'
3042
+ category: 'Crawler'
3043
+ url: 'https://headline.com/legal/crawler'
3044
+ producer:
3045
+ name: 'e.ventures Managementgesellschaft mbH'
3046
+ url: 'https://headline.com/'
3047
+
3048
+ - regex: 'webprosbot/[\d.]+'
3049
+ name: 'WebPros'
3050
+ category: 'Crawler'
3051
+ url: 'https://webpros.com/'
3052
+ producer:
3053
+ name: 'WebPros Holdco B.V.'
3054
+ url: 'https://webpros.com/'
3055
+
3056
+ - regex: 'ELB-HealthChecker'
3057
+ name: 'Amazon ELB'
3058
+ category: 'Site Monitor'
3059
+ url: 'https://aws.amazon.com/elasticloadbalancing/'
3060
+ producer:
3061
+ name: 'Amazon.com, Inc.'
3062
+ url: 'https://www.amazon.com/'
3063
+
3064
+ - regex: 'Wheregoes\.com Redirect Checker/[\d.]+'
3065
+ name: 'WhereGoes'
3066
+ category: 'Crawler'
3067
+ url: 'https://wheregoes.com/'
3068
+
3069
+ - regex: 'project_patchwatch'
3070
+ name: 'Project Patchwatch'
3071
+ category: 'Crawler'
3072
+ url: 'http://66.240.192.82/'
3073
+
3074
+ - regex: 'InternetMeasurement/[\d.]+'
3075
+ name: 'InternetMeasurement'
3076
+ category: 'Crawler'
3077
+ url: 'https://internet-measurement.com/'
3078
+
3079
+ - regex: 'DomainAppender /[\d.]+'
3080
+ name: 'DomainAppender'
3081
+ category: 'Crawler'
3082
+ url: 'https://www.profound.net/product/domain_append/'
3083
+ producer:
3084
+ name: 'Profound Networks, LLC'
3085
+ url: 'https://www.profound.net/'
3086
+
3087
+ - regex: 'FreeWebMonitoring SiteChecker/[\d.]+'
3088
+ name: 'FreeWebMonitoring'
3089
+ category: 'Site Monitor'
3090
+ url: 'https://www.freewebmonitoring.com/bot.html'
3091
+ producer:
3092
+ name: 'GreenWave Online, Inc.'
3093
+ url: 'http://www.greenwaveonline.com/'
3094
+
3095
+ - regex: 'Page Modified Pinger'
3096
+ name: 'Page Modified Pinger'
3097
+ category: 'Site Monitor'
3098
+ url: 'https://www.pagemodified.com/'
3099
+ producer:
3100
+ name: 'Valley Hosting, LLC'
3101
+ url: 'https://www.pagemodified.com/'
3102
+
3103
+ - regex: 'adstxtlab\.com'
3104
+ name: 'adstxtlab.com'
3105
+ category: 'Crawler'
3106
+ url: 'https://adstxtlab.com/validator.php'
3107
+ producer:
3108
+ name: 'Jaohawi AB'
3109
+ url: 'https://adstxtlab.com/'
3110
+
3111
+ - regex: 'Iframely/[\d.]+'
3112
+ name: 'Iframely'
3113
+ category: 'Crawler'
3114
+ url: 'https://iframely.com/'
3115
+ producer:
3116
+ name: 'Itteco Software, Corp.'
3117
+ url: 'https://iframely.com/'
3118
+
3119
+ - regex: 'DomainStatsBot/[\d.]+'
3120
+ name: 'DomainStatsBot'
3121
+ category: 'Crawler'
3122
+ url: 'https://domainstats.com/pages/our-bot'
3123
+ producer:
3124
+ name: 'Domainstats Ltd'
3125
+ url: 'https://domainstats.com/'
3126
+
3127
+ - regex: 'aiHitBot/[\d.]+'
3128
+ name: 'aiHitBot'
3129
+ category: 'Crawler'
3130
+ url: 'https://www.aihitdata.com/about'
3131
+
3132
+ - regex: 'DomainCrawler/'
3133
+ name: 'DomainCrawler'
3134
+ category: 'Crawler'
3135
+ url: 'https://domaincrawler.com/about-us/'
3136
+
3137
+ - regex: 'DNSResearchBot'
3138
+ name: 'DNSResearchBot'
3139
+ category: 'Crawler'
3140
+
3141
+ - regex: 'GitCrawlerBot'
3142
+ name: 'GitCrawlerBot'
3143
+ category: 'Crawler'
3144
+
3145
+ - regex: 'AdAuth/[\d.]+'
3146
+ name: 'AdAuth'
3147
+ category: 'Crawler'
3148
+ url: 'https://www.adauth.com'
3149
+
3150
+ - regex: 'faveeo\.com'
3151
+ name: 'Faveeo'
3152
+ category: 'Crawler'
3153
+ url: 'http://www.faveeo.com'
3154
+
3155
+ - regex: 'kozmonavt\.'
3156
+ name: 'Kozmonavt'
3157
+ category: 'Crawler'
3158
+ url: 'https://kozmonavt.ml'
3159
+
3160
+ - regex: 'CriteoBot/'
3161
+ name: 'CriteoBot'
3162
+ category: 'Crawler'
3163
+ url: 'https://www.criteo.com/criteo-crawler/'
3164
+
3165
+ - regex: 'PayPal IPN'
3166
+ name: 'PayPal IPN'
3167
+ category: 'Service Agent'
3168
+ url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
3169
+ producer:
3170
+ name: 'PayPal, Inc.'
3171
+ url: 'https://www.paypal.com/'
3172
+
3173
+ - regex: 'MaCoCu'
3174
+ name: 'MaCoCu'
3175
+ category: 'Crawler'
3176
+ url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
3177
+ producer:
3178
+ name: 'Jožef Stefan Institute'
3179
+ url: 'https://www.ijs.si/ijsw/JSI'
3180
+
3181
+ - regex: 'dnt-policy@eff\.org'
3182
+ name: 'EFF Do Not Track Verifier'
3183
+ category: 'Crawler'
3184
+ url: 'https://www.eff.org/issues/do-not-track'
3185
+ producer:
3186
+ name: 'Electronic Frontier Foundation'
3187
+ url: 'https://www.eff.org/'
3188
+
3189
+ - regex: 'InfoTigerBot'
3190
+ name: 'InfoTigerBot'
3191
+ category: 'Crawler'
3192
+ url: 'https://infotiger.com/bot'
3193
+ producer:
3194
+ name: 'Infotiger UG'
3195
+ url: 'https://infotiger.com/'
3196
+
3197
+ - regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
3198
+ name: 'Birdcrawlerbot'
3199
+ category: 'Crawler'
3200
+ url: 'https://crawla.de/de/index.php'
3201
+ producer:
3202
+ name: 'Swoppen Systems GmbH'
3203
+ url: 'https://www.swoppen.com/de'
3204
+
3205
+ - regex: 'ScamadviserExternalHit/[\d.]+'
3206
+ name: 'Scamadviser External Hit'
3207
+ category: 'Crawler'
3208
+ url: 'https://www.scamadviser.com/'
3209
+ producer:
3210
+ name: 'Ecommerce Operations B.V.'
3211
+ url: 'https://www.scamadviser.com/'
3212
+
3213
+ - regex: 'ZaldamoSearchBot'
3214
+ name: 'Zaldamo'
3215
+ category: 'Crawler'
3216
+ url: 'https://www.zaldamo.com/search.html'
3217
+ producer:
3218
+ name: 'Zaldamo, LLC.'
3219
+ url: 'https://www.zaldamo.com/'
3220
+
3221
+ - regex: 'AFB/[\d.]+'
3222
+ name: 'Allloadin Favicon Bot'
3223
+ category: 'Crawler'
3224
+ url: 'https://allloadin.com/'
3225
+
3226
+ - regex: 'SeolytBot/[\d.]+'
3227
+ name: 'Seolyt Bot'
3228
+ category: 'Crawler'
3229
+ url: 'https://seolyt.com'
3230
+
3231
+ - regex: 'LinkWalker/[\d.]+'
3232
+ name: 'LinkWalker'
3233
+ category: 'Crawler'
3234
+ url: 'https://www.phishlabs.com/'
3235
+ producer:
3236
+ name: 'PhishLabs, Inc.'
3237
+ url: 'https://www.phishlabs.com/'
3238
+
3239
+ - regex: 'RenovateBot/[\d.]+'
3240
+ name: 'RenovateBot'
3241
+ category: 'Security Checker'
3242
+ url: 'https://github.com/renovatebot/renovate'
3243
+ producer:
3244
+ name: 'White Source Ltd.'
3245
+ url: 'https://www.mend.io/free-developer-tools/renovate/'
3246
+
3247
+ - regex: 'INETDEX-BOT/[\d.]+'
3248
+ name: 'Inetdex Bot'
3249
+ category: 'Crawler'
3250
+ url: 'https://www.inetdex.com/'
3251
+
3252
+ - regex: 'NETZZAPPEN'
3253
+ name: 'NETZZAPPEN'
3254
+ category: 'Crawler'
3255
+ url: 'https://www.netzzappen.com/'
3256
+ producer:
3257
+ name: 'Marc Huemer'
3258
+ url: 'https://www.netzzappen.com/'
3259
+
3260
+ - regex: 'panscient\.com'
3261
+ name: 'Panscient'
3262
+ category: 'Crawler'
3263
+ url: 'https://www.panscient.com/faq.htm'
3264
+ producer:
3265
+ name: 'Panscient, Inc.'
3266
+ url: 'https://www.panscient.com/'
3267
+
3268
+ - regex: 'research@pdrlabs\.net'
3269
+ name: 'PDR Labs'
3270
+ category: 'Security Checker'
3271
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3272
+ producer:
3273
+ name: 'PDR Labs'
3274
+ url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3275
+
3276
+ - regex: 'Nicecrawler/[\d.]+'
3277
+ name: 'NiceCrawler'
3278
+ category: 'Crawler'
3279
+ url: 'https://www.nicecrawler.com/'
3280
+ producer:
3281
+ name: 'Intelium Corp.'
3282
+ url: 'https://www.intelium.com/'
3283
+
3284
+ - regex: 't3versionsBot/[\d.]+'
3285
+ name: 't3versions'
3286
+ category: 'Crawler'
3287
+ url: 'https://www.t3versions.com/bot'
3288
+ producer:
3289
+ name: 'Torben Hansen'
3290
+ url: 'https://www.t3versions.com/'
3291
+
3292
+ - regex: 'Crawlson/[\d.]+'
3293
+ name: 'Crawlson'
3294
+ category: 'Crawler'
3295
+ url: 'https://www.crawlson.com/about'
3296
+ producer:
3297
+ name: 'Crawlson'
3298
+ url: 'https://www.crawlson.com/'
3299
+
3300
+ - regex: 'tchelebi/[\d.]+'
3301
+ name: 'tchelebi'
3302
+ category: 'Crawler'
3303
+ url: 'https://tchelebi.io/'
3304
+ producer:
3305
+ name: 'NormShield, Inc.'
3306
+ url: 'https://blackkite.com/'
3307
+
3308
+ - regex: 'JobboerseBot'
3309
+ name: 'JobboerseBot'
3310
+ category: 'Crawler'
3311
+ url: 'https://www.xing.com/jobs'
3312
+ producer:
3313
+ name: 'New Work SE'
3314
+ url: 'https://www.xing.com/'
3315
+
3316
+ - regex: 'RepoLookoutBot/v?[\d.]+'
3317
+ name: 'Repo Lookout'
3318
+ category: 'Security Checker'
3319
+ url: 'https://www.repo-lookout.org/'
3320
+ producer:
3321
+ name: 'Crissy Field GmbH'
3322
+ url: 'https://www.crissyfield.de/'
3323
+
3324
+ - regex: 'PATHspider'
3325
+ name: 'PATHspider'
3326
+ category: 'Security Checker'
3327
+ url: 'https://pathspider.net/'
3328
+ producer:
3329
+ name: 'MAMI Project'
3330
+ url: 'https://mami-project.eu/'
3331
+
3332
+ - regex: 'everyfeed-spider/[\d.]+'
3333
+ name: 'Everyfeed'
3334
+ url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
3335
+ category: 'Feed Fetcher'
3336
+ producer:
3337
+ name: ''
3338
+ url: ''
3339
+
3340
+ - regex: 'Exchange check'
3341
+ name: 'Exchange check'
3342
+ category: 'Security Checker'
3343
+ url: 'https://github.com/GossiTheDog/scanning'
3344
+ producer:
3345
+ name: 'Kevin Beaumont'
3346
+ url: 'https://doublepulsar.com/'
3347
+
3348
+ - regex: 'Sublinq'
3349
+ name: 'Sublinq'
3350
+ category: 'Crawler'
3351
+ url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
3352
+ producer:
3353
+ name: ''
3354
+ url: ''
3355
+
3356
+ - regex: 'Gregarius/[\d.]+'
3357
+ name: 'Gregarius'
3358
+ category: 'Feed Fetcher'
3359
+ url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
3360
+ producer:
3361
+ name: ''
3362
+ url: ''
3363
+
3364
+ - regex: 'COMODO DCV'
3365
+ name: 'COMODO DCV'
3366
+ category: 'Service Agent'
3367
+ url: 'https://www.comodo.com/'
3368
+ producer:
3369
+ name: 'Comodo Security Solutions, Inc.'
3370
+ url: 'https://www.comodo.com/'
3371
+
3372
+ - regex: 'Sectigo DCV|acme\.sectigo\.com'
3373
+ name: 'Sectigo DCV'
3374
+ category: 'Service Agent'
3375
+ url: 'https://sectigo.com/'
3376
+ producer:
3377
+ name: 'Sectigo Limited'
3378
+ url: 'https://sectigo.com/'
3379
+
3380
+ - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\d.]+'
3381
+ name: 'KlarnaBot'
3382
+ category: 'Crawler'
3383
+ url: 'https://docs.klarna.com/klarna-bot/'
3384
+ producer:
3385
+ name: 'Klarna Bank AB'
3386
+ url: 'https://www.klarna.com/'
3387
+
3388
+ - regex: 'Taboolabot/[\d.]+'
3389
+ name: 'Taboolabot'
3390
+ category: 'Crawler'
3391
+ url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
3392
+ producer:
3393
+ name: 'Taboola, Inc.'
3394
+ url: 'https://www.taboola.com/'
3395
+
3396
+ - regex: 'Asana/[\d.]+'
3397
+ name: 'Asana'
3398
+ category: 'Crawler'
3399
+ url: 'https://asana.com/'
3400
+ producer:
3401
+ name: 'Asana, Inc.'
3402
+ url: 'https://asana.com/'
3403
+
3404
+ - regex: 'Chrome Privacy Preserving Prefetch Proxy'
3405
+ name: 'Chrome Privacy Preserving Prefetch Proxy'
3406
+ category: 'Service Agent'
3407
+ url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
3408
+ producer:
3409
+ name: 'Google Inc.'
3410
+ url: 'https://www.google.com/'
3411
+
3412
+ - regex: 'URLinspectorBot/[\d.]+'
3413
+ name: 'URLinspector'
3414
+ category: 'Site Monitor'
3415
+ url: 'https://www.urlinspector.com/bot/'
3416
+ producer:
3417
+ name: 'LinkResearchTools GmbH'
3418
+ url: 'https://www.linkresearchtools.com/'
3419
+
3420
+ - regex: 'EntferBot/[\d.]+'
3421
+ name: 'Entfer'
3422
+ category: 'Crawler'
3423
+ url: 'https://entfer.com/'
3424
+ producer:
3425
+ name: 'Entfer Ltd.'
3426
+ url: 'https://entfer.com/'
3427
+
3428
+ - regex: 'TagInspector/[\d.]+'
3429
+ name: 'Tag Inspector'
3430
+ category: 'Crawler'
3431
+ url: 'https://taginspector.com/'
3432
+ producer:
3433
+ name: 'InfoTrust, LLC'
3434
+ url: 'https://infotrust.com/'
3435
+
3436
+ - regex: 'pageburst'
3437
+ name: 'Pageburst'
3438
+ category: 'Crawler'
3439
+ url: 'https://pageburstls.elsevier.com/'
3440
+ producer:
3441
+ name: 'Elsevier Ltd'
3442
+ url: 'https://www.elsevier.com/'
3443
+
3444
+ - regex: '.+diffbot'
3445
+ name: 'Diffbot'
3446
+ category: 'Crawler'
3447
+ url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
3448
+ producer:
3449
+ name: 'Diffbot Technologies Corp.'
3450
+ url: 'https://www.diffbot.com/'
3451
+
3452
+ - regex: 'DisqusAdstxtCrawler/[\d.]+'
3453
+ name: 'Disqus'
3454
+ category: 'Crawler'
3455
+ url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
3456
+ producer:
3457
+ name: 'Disqus, Inc.'
3458
+ url: 'https://disqus.com/'
3459
+
3460
+ - regex: 'startmebot/[\d.]+'
3461
+ name: 'start.me'
3462
+ category: 'Crawler'
3463
+ url: 'https://about.start.me/'
3464
+ producer:
3465
+ name: 'start.me BV'
3466
+ url: 'https://about.start.me/'
3467
+
3468
+ - regex: '2ip bot/[\d.]+'
3469
+ name: '2ip'
3470
+ category: 'Crawler'
3471
+ url: 'https://2ip.io/'
3472
+
3473
+ - regex: 'ReqBin Curl Client/[\d.]+'
3474
+ name: 'ReqBin'
3475
+ category: 'Crawler'
3476
+ url: 'https://reqbin.com/curl'
3477
+
3478
+ - regex: 'XoviBot/[\d.]+'
3479
+ name: 'XoviBot'
3480
+ category: 'Crawler'
3481
+ url: 'https://www.xovibot.net'
3482
+ producer:
3483
+ name: 'Xovi GmbH'
3484
+ url: 'http://www.xovi.de'
3485
+
3486
+ - regex: 'Overcast/[\d.]+ Podcast Sync'
3487
+ name: 'Overcast Podcast Sync'
3488
+ category: 'Service Agent'
3489
+ url: 'https://overcast.fm/podcasterinfo'
3490
+
3491
+ - regex: '^Verity/[\d.]+'
3492
+ name: 'GumGum Verity'
3493
+ category: 'Service Agent'
3494
+ url: 'https://gumgum.com/verity'
3495
+
3496
+ - regex: 'hackermention'
3497
+ name: 'hackermention'
3498
+ category: 'Feed Reader'
3499
+ url: 'https://github.com/snarfed/hackermention'
3500
+
3501
+ - regex: 'BitSightBot/[\d.]+'
3502
+ name: 'BitSight'
3503
+ category: 'Security Checker'
3504
+ url: 'https://www.bitsight.com/'
3505
+ producer:
3506
+ name: 'BitSight Technologies, Inc.'
3507
+ url: 'https://www.bitsight.com/'
3508
+
3509
+ - regex: 'Ezgif/[\d.]+'
3510
+ name: 'Ezgif'
3511
+ category: 'Service Agent'
3512
+ url: 'https://ezgif.com/about'
3513
+
3514
+ - regex: 'intelx\.io_bot'
3515
+ name: 'Intelligence X'
3516
+ category: 'Crawler'
3517
+ url: 'https://intelx.io/'
3518
+ producer:
3519
+ name: 'Kleissner Investments s.r.o.'
3520
+ url: 'https://intelx.io/'
3521
+
3522
+ - regex: 'FemtosearchBot/[\d.]+'
3523
+ name: 'Femtosearch'
3524
+ category: 'Crawler'
3525
+ url: 'http://femtosearch.com/'
3526
+ producer:
3527
+ name: 'Grier Forensics, LLC'
3528
+ url: 'https://www.grierforensics.com/'
3529
+
3530
+ - regex: 'AdsTxtCrawler/[\d.]+'
3531
+ name: 'AdsTxtCrawler'
3532
+ category: 'Crawler'
3533
+ url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
3534
+ producer:
3535
+ name: 'IAB Technology Laboratory, Inc.'
3536
+ url: 'https://iabtechlab.com/'
3537
+
3538
+ - regex: 'Morningscore'
3539
+ name: 'Morningscore Bot'
3540
+ category: 'Crawler'
3541
+ url: 'https://morningscore.io/'
3542
+ producer:
3543
+ name: 'Morningscore'
3544
+ url: 'https://morningscore.io/'
3545
+
3546
+ - regex: 'Uptime-Kuma/[\d.]+'
3547
+ name: 'Uptime-Kuma'
3548
+ category: 'Site Monitor'
3549
+ url: 'https://github.com/louislam/uptime-kuma'
3550
+
3551
+ - regex: 'ChatGPT-User'
3552
+ name: 'ChatGPT'
3553
+ category: 'Crawler'
3554
+ url: 'https://platform.openai.com/docs/plugins/bot'
3555
+ producer:
3556
+ name: 'OpenAI OpCo, LLC'
3557
+ url: 'https://openai.com/'
3558
+
3559
+ - regex: 'BrightEdge Crawler/[\d.]+'
3560
+ name: 'BrightEdge'
3561
+ category: 'Crawler'
3562
+ url: 'https://www.brightedge.com/'
3563
+ producer:
3564
+ name: 'BrightEdge Technologies, Inc'
3565
+ url: 'https://www.brightedge.com/'
3566
+
3567
+ - regex: 'sfFeedReader/[\d.]+'
3568
+ name: 'sfFeedReader'
3569
+ url: 'https://github.com/diem-project/sfFeed2Plugin'
3570
+ category: 'Feed Fetcher'
3571
+
3572
+ - regex: 'cyberscan\.io'
3573
+ name: 'Cyberscan'
3574
+ category: 'Security Checker'
3575
+ url: 'https://www.cyberscan.io/'
3576
+ producer:
3577
+ name: 'DGC Verwaltungs GmbH'
3578
+ url: 'https://dgc.org/'
3579
+
3580
+ - regex: 'deepcrawl\.com'
3581
+ name: 'Lumar'
3582
+ category: 'Crawler'
3583
+ url: 'https://deepcrawl.com/bot'
3584
+ producer:
3585
+ name: 'Lumar'
3586
+ url: 'https://www.lumar.io/'
3587
+
3588
+ - regex: 'researchscan\.comsys\.rwth-aachen\.de'
3589
+ name: 'Research Scan'
3590
+ category: 'Crawler'
3591
+ url: 'http://researchscan.comsys.rwth-aachen.de/'
3592
+ producer:
3593
+ name: 'RWTH Aachen University'
3594
+ url: 'https://www.comsys.rwth-aachen.de/'
3595
+
3596
+ - regex: 'newspaper/[\d.]+'
3597
+ name: 'Scraping Robot'
3598
+ category: 'Crawler'
3599
+ url: 'https://scrapingrobot.com/'
3600
+ producer:
3601
+ name: 'Sprious LLC'
3602
+ url: 'https://sprious.com/'
3603
+
3604
+ - regex: 'GPTBot/[\d.]+'
3605
+ name: 'GPTBot'
3606
+ category: 'Crawler'
3607
+ url: 'https://platform.openai.com/docs/gptbot'
3608
+ producer:
3609
+ name: 'OpenAI OpCo, LLC'
3610
+ url: 'https://openai.com/'
3611
+
3612
+ - regex: 'Ant(?:\.com beta|Bot)(?:/([\d+.]+))?'
3613
+ name: 'Ant'
3614
+ category: 'Crawler'
3615
+ url: 'https://www.ant.com/'
3616
+ producer:
3617
+ name: 'Ant.com Ltd.'
3618
+ url: 'https://www.ant.com/'
3619
+
3620
+ - regex: 'WebwikiBot/[\d.]+'
3621
+ name: 'Webwiki'
3622
+ category: 'Crawler'
3623
+ url: 'https://www.webwiki.com/'
3624
+ producer:
3625
+ name: 'webwiki GmbH'
3626
+ url: 'https://www.webwiki.com/'
3627
+
3628
+ - regex: 'phpMyAdmin'
3629
+ name: 'phpMyAdmin'
3630
+ category: 'Service Agent'
3631
+ url: 'https://www.phpmyadmin.net/'
3632
+
3633
+ - regex: 'Matomo/[\d.]+'
3634
+ name: 'Matomo'
3635
+ category: 'Service Agent'
3636
+ url: 'https://github.com/matomo-org/matomo'
3637
+ producer:
3638
+ name: 'InnoCraft Ltd'
3639
+ url: 'https://matomo.org/'
3640
+
3641
+ - regex: 'Prometheus/[\d.]+'
3642
+ name: 'Prometheus'
3643
+ category: 'Service Agent'
3644
+ url: 'https://github.com/prometheus/prometheus'
3645
+ producer:
3646
+ name: 'The Linux Foundation'
3647
+ url: 'https://www.cncf.io/'
3648
+
3649
+ - regex: 'ArchiveTeam ArchiveBot'
3650
+ name: 'ArchiveBot'
3651
+ category: 'Crawler'
3652
+ url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot'
3653
+ producer:
3654
+ name: 'ArchiveTeam'
3655
+ url: 'https://wiki.archiveteam.org/'
3656
+
3657
+ - regex: 'MADBbot/[\d.]+'
3658
+ name: 'MADBbot'
3659
+ category: 'Crawler'
3660
+ url: 'https://madb.zapto.org/bot.html'
3661
+
3662
+ - regex: 'MeltwaterNews'
3663
+ name: 'MeltwaterNews'
3664
+ category: 'Crawler'
3665
+ producer:
3666
+ name: 'Meltwater Deutschland GmbH'
3667
+ url: 'https://www.meltwater.com/'
3668
+
3669
+ - regex: '(?:Owler@ows\.eu|OWLer)/[\d.]+'
3670
+ name: 'OWLer'
3671
+ category: 'Crawler'
3672
+ url: 'https://openwebsearch.eu/owler/'
3673
+ producer:
3674
+ name: 'Open Search Foundation e.V.'
3675
+ url: 'https://openwebsearch.eu/'
3676
+
3677
+ - regex: 'bbc\.co\.uk/display/men/Page\+Monitor'
3678
+ name: 'BBC Page Monitor'
3679
+ category: 'Site Monitor'
3680
+ url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor'
3681
+ producer:
3682
+ name: 'BBC'
3683
+ url: 'https://www.bbc.com/'
3684
+
3685
+ - regex: 'BBC-Forge-URL-Monitor-Twisted'
3686
+ name: 'BBC Forge URL Monitor'
3687
+ category: 'Site Monitor'
3688
+ url: 'https://www.bbc.com/'
3689
+ producer:
3690
+ name: 'BBC'
3691
+ url: 'https://www.bbc.com/'
3692
+
3693
+ - regex: 'ClaudeBot'
3694
+ name: 'ClaudeBot'
3695
+ category: 'Crawler'
3696
+ url: 'https://github.com/ClaudeBot/ClaudeBot'
3697
+
3698
+ - regex: 'Imagesift'
3699
+ name: 'ImageSift'
3700
+ category: 'Crawler'
3701
+ url: 'https://imagesift.com/'
3702
+ producer:
3703
+ name: 'Castle Global, Inc.'
3704
+ url: 'https://thehive.ai/'
3705
+
3706
+ - regex: 'TactiScout'
3707
+ name: 'TactiScout'
3708
+ category: 'Crawler'
3709
+ url: 'https://find-it.world/TempCrawl/Crawltheque.php'
3710
+ producer:
3711
+ name: 'Tactikast'
3712
+
3713
+ - regex: 'Brightbot ([\d+.]+)'
3714
+ name: 'BrightBot'
3715
+ category: 'Crawler'
3716
+ url: 'https://www.brightbot.app/'
3717
+ producer:
3718
+ name: 'Bright Interactive Ltd'
3719
+ url: 'https://www.builtbybright.com/'
3720
+
3721
+ - regex: 'DaspeedBot/([\d+.]+)'
3722
+ name: 'DaspeedBot'
3723
+ category: 'Crawler'
3724
+ url: 'https://daspeed.io/'
3725
+ producer:
3726
+ name: 'DAWAP SARL'
3727
+ url: 'https://dawap.fr/'
3728
+
3729
+ - regex: 'StractBot(?:/([\d+.]+))?'
3730
+ name: 'Stract'
3731
+ category: 'Crawler'
3732
+ url: 'https://stract.com/webmasters'
3733
+ producer:
3734
+ name: 'Stract'
3735
+ url: 'https://github.com/StractOrg/stract/'
3736
+
3737
+ - regex: 'GeedoBot(?:/([\d+.]+))?'
3738
+ name: 'GeedoBot'
3739
+ category: 'Crawler'
3740
+ url: 'https://geedo.com/bot/'
3741
+
3742
+ - regex: 'GeedoProductSearch'
3743
+ name: 'GeedoProductSearch'
3744
+ category: 'Crawler'
3745
+ url: 'https://geedo.com/product-search/'
3746
+
3747
+ - regex: 'BackupLand(?:/([\d+.]+))?'
3748
+ name: 'BackupLand'
3749
+ category: 'Crawler'
3750
+ url: 'https://go.backupland.com/'
3751
+ producer:
3752
+ name: 'ООО «КВАРТА»'
3753
+ url: 'https://go.backupland.com/'
3754
+
3755
+ - regex: 'Konturbot(?:/([\d+.]+))?'
3756
+ name: 'Konturbot'
3757
+ category: 'Crawler'
3758
+ url: 'https://kontur.ru/'
3759
+ producer:
3760
+ name: 'АО «ПФ «СКБ Контур»'
3761
+ url: 'https://kontur.ru/'
3762
+
3763
+ - regex: 'keys-so-bot'
3764
+ name: 'Keys.so'
3765
+ category: 'Crawler'
3766
+ url: 'https://www.keys.so/'
3767
+ producer:
3768
+ name: 'ООО «МОДЕСКО»'
3769
+ url: 'https://www.modesco.ru/'
3770
+
3771
+ - regex: 'LetsearchBot(?:/([\d+.]+))?'
3772
+ name: 'LetSearch'
3773
+ category: 'Crawler'
3774
+ url: 'https://letsearch.ru/bots'
3775
+
3776
+ - regex: 'Example3(?:/([\d+.]+))?'
3777
+ name: 'Example3'
3778
+ category: 'Crawler'
3779
+ url: 'https://www.example3.com/'
3780
+
3781
+ - regex: 'StatOnlineRuBot(?:/([\d+.]+))?'
3782
+ name: 'StatOnline.ru'
3783
+ category: 'Crawler'
3784
+ url: 'https://statonline.ru/'
3785
+ producer:
3786
+ name: 'ООО «Регистратор доменных имен РЕГ.РУ»'
3787
+ url: 'https://statonline.ru/'
3788
+
3789
+ - regex: 'Spawning-AI'
3790
+ name: 'Spawning AI'
3791
+ category: 'Crawler'
3792
+ url: 'https://spawning.ai/'
3793
+ producer:
3794
+ name: 'Spawning, Inc'
3795
+ url: 'https://spawning.ai/'
3796
+
3797
+ - regex: 'domain research project'
3798
+ name: 'Domain Research Project'
3799
+ category: 'Crawler'
3800
+ url: 'https://trentwil.es/domains.html'
3801
+ producer:
3802
+ name: 'Trent Wiles'
3803
+ url: 'https://trentwil.es/'
3804
+
3805
+ - regex: 'getodin\.com'
3806
+ name: 'Odin'
3807
+ category: 'Security Checker'
3808
+ url: 'https://docs.getodin.com/'
3809
+ producer:
3810
+ name: 'Cyble Inc.'
3811
+ url: 'https://cyble.com/'
3812
+
3813
+ - regex: 'YouBot'
3814
+ name: 'YouBot'
3815
+ category: 'Crawler'
3816
+ url: 'https://about.you.com/youbot/'
3817
+ producer:
3818
+ name: 'SuSea, Inc.'
3819
+ url: 'https://you.com/'
3820
+
3821
+ - regex: 'SiteScoreBot'
3822
+ name: 'SiteScore'
3823
+ category: 'Crawler'
3824
+ url: 'https://sitescore.ai/'
3825
+
3826
+ - regex: 'MBCrawler'
3827
+ name: 'Monitor Backlinks'
3828
+ category: 'Crawler'
3829
+ url: 'https://www.seoptimer.com/monitor-backlinks/'
3830
+ producer:
3831
+ name: 'SEOptimer'
3832
+ url: 'https://www.seoptimer.com/'
3833
+
3834
+ - regex: 'mariadb-mysql-kbs-bot'
3835
+ name: 'MariaDB/MySQL Knowledge Base'
3836
+ category: 'Crawler'
3837
+ url: 'https://github.com/williamdes/mariadb-mysql-kbs'
3838
+ producer:
3839
+ name: 'WDES SAS'
3840
+ url: 'https://wdes.fr/en/'
3841
+
3842
+ - regex: 'GitHubCopilotChat'
3843
+ name: 'GitHubCopilotChat'
3844
+ category: 'Crawler'
3845
+ url: 'https://github.com/aaamoon/copilot-gpt4-service'
3846
+
3847
+ - regex: '^pdrl\.fm'
3848
+ name: 'Podroll Analyzer'
3849
+ category: 'Crawler'
3850
+ url: 'https://podroll.fm'
3851
+
3852
+ - regex: 'PodUptime/'
3853
+ name: 'PodUptime'
3854
+ category: 'Site Monitor'
3855
+ url: 'https://poduptime.com'
3856
+
3857
+ - regex: 'anthropic-ai'
3858
+ name: 'Anthropic AI'
3859
+ category: 'Crawler'
3860
+ url: 'https://www.anthropic.com/'
3861
+ producer:
3862
+ name: 'Anthropic, PBC'
3863
+ url: 'https://www.anthropic.com/'
3864
+
3865
+ - regex: 'NetpeakCheckerBot/[\d.]+'
3866
+ name: 'Netpeak Checker'
3867
+ category: 'Crawler'
3868
+ url: 'https://netpeaksoftware.com/checker'
3869
+ producer:
3870
+ name: 'Netpeak LTD'
3871
+ url: 'https://netpeaksoftware.com/'
3872
+
3873
+ - regex: 'SandobaCrawler/[\d.]+'
3874
+ name: 'Sandoba//Crawler'
3875
+ category: 'Crawler'
3876
+ url: 'https://www.sandoba.com/en/crawler/'
3877
+ producer:
3878
+ name: 'SANDOBA//EBUSINESS SOLUTIONS'
3879
+ url: 'https://www.sandoba.com/'
3880
+
3881
+ - regex: 'SirdataBot'
3882
+ name: 'Sirdata'
3883
+ category: 'Crawler'
3884
+ url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction'
3885
+ producer:
3886
+ name: 'Sirdata SAS'
3887
+ url: 'https://www.sirdata.com/'
3888
+
3889
+ - regex: 'CheckMarkNetwork/[\d.]+'
3890
+ name: 'CheckMark Network'
3891
+ category: 'Crawler'
3892
+ url: 'https://www.checkmarknetwork.com/spider.html/'
3893
+ producer:
3894
+ name: 'Exipert, Inc.'
3895
+ url: 'https://www.checkmarknetwork.com/'
3896
+
3897
+ - regex: 'cohere-ai'
3898
+ name: 'Cohere AI'
3899
+ category: 'Crawler'
3900
+ url: 'https://cohere.com/'
3901
+ producer:
3902
+ name: 'Cohere, Inc.'
3903
+ url: 'https://cohere.com/'
3904
+
3905
+ - regex: 'PerplexityBot/[\d.]+'
3906
+ name: 'PerplexityBot'
3907
+ category: 'Crawler'
3908
+ url: 'https://docs.perplexity.ai/docs/perplexitybot'
3909
+ producer:
3910
+ name: 'Perplexity AI, Inc.'
3911
+ url: 'https://www.perplexity.ai/'
3912
+
3913
+ - regex: 'TTD-Content'
3914
+ name: 'The Trade Desk Content'
3915
+ category: 'Crawler'
3916
+ url: 'https://www.thetradedesk.com/us/ttd-content'
3917
+ producer:
3918
+ name: 'The Trade Desk, Inc.'
3919
+ url: 'https://www.thetradedesk.com/'
3920
+
3921
+ - regex: 'montastic-monitor'
3922
+ name: 'Montastic Monitor'
3923
+ category: 'Site Monitor'
3924
+ url: 'https://www.montastic.com/'
3925
+ producer:
3926
+ name: 'Metadot, Corp.'
3927
+ url: 'https://www.metadot.com/'
3928
+
3929
+ - regex: 'Ruby, Twurly v[\d.]+'
3930
+ name: 'Twurly'
3931
+ category: 'Crawler'
3932
+ url: 'https://twurly.org/'
3933
+
3934
+ - regex: 'Mixnode(?:(?:Cache)?/[\d.]+)?'
3935
+ name: 'Mixnode'
3936
+ category: 'Crawler'
3937
+ url: 'https://www.mixnode.com/'
3938
+ producer:
3939
+ name: 'Mixnode Technologies, Inc.'
3940
+ url: 'https://www.mixnode.com/'
3941
+
3942
+ - regex: 'CSSCheck/[\d.]+'
3943
+ name: 'CSSCheck'
3944
+ category: 'Validator'
3945
+
3946
+ - regex: 'MicrosoftPreview/[\d.]+'
3947
+ name: 'Microsoft Preview'
3948
+ category: 'Service Agent'
3949
+ url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0'
3950
+ producer:
3951
+ name: 'Microsoft Corporation'
3952
+ url: 'https://www.microsoft.com/'
3953
+
3954
+ - regex: 's~virustotalcloud'
3955
+ name: 'VirusTotal Cloud'
3956
+ category: 'Crawler'
3957
+ url: 'https://www.virustotal.com/'
3958
+ producer:
3959
+ name: 'Chronicle Security Ireland Limited'
3960
+ url: 'https://chronicle.security/'
3961
+
3962
+ - regex: 'TinEye/[\d.]+'
3963
+ name: 'TinEye'
3964
+ category: 'Crawler'
3965
+ url: 'https://tineye.com/'
3966
+ producer:
3967
+ name: 'Idée, Inc.'
3968
+ url: 'https://tineye.com/'
3969
+
3970
+ - regex: 'e~arsnova-filter-system'
3971
+ name: 'ARSNova Filter System'
3972
+ category: 'Crawler'
3973
+ url: 'https://particify.de/en/'
3974
+ producer:
3975
+ name: 'Particify Gerhardt & Weingarten OHG'
3976
+ url: 'https://particify.de/en/'
3977
+
3978
+ - regex: 'botify'
3979
+ name: 'Botify'
3980
+ category: 'Crawler'
3981
+ url: 'https://www.botify.com/'
3982
+ producer:
3983
+ name: 'BOTIFY SAS'
3984
+ url: 'https://www.botify.com/'
3985
+
3986
+ - regex: 'adscanner'
3987
+ name: 'Adscanner'
3988
+ category: 'Crawler'
3989
+ url: 'https://www.alleyesonscreens.com/'
3990
+ producer:
3991
+ name: 'AdScanner d.o.o'
3992
+ url: 'https://www.alleyesonscreens.com/'
3993
+
3994
+ - regex: 'online-webceo-bot/[\d.]+'
3995
+ name: 'WebCEO'
3996
+ category: 'Crawler'
3997
+ url: 'https://www.webceo.com/'
3998
+ producer:
3999
+ name: 'WebCEO, LLC'
4000
+ url: 'https://www.webceo.com/'
4001
+
4002
+ - regex: 'NetTrack'
4003
+ name: 'NetTrack'
4004
+ category: 'Crawler'
4005
+ url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/'
4006
+
4007
+ - regex: 'htmlyse'
4008
+ name: 'htmlyse'
4009
+ category: 'Crawler'
4010
+ url: 'https://www.htmlyse.com/'
4011
+ producer:
4012
+ name: 'Vistex LTD'
4013
+ url: 'https://www.htmlyse.com/'
4014
+
4015
+ - regex: 'TrendsmapResolver/[\d.]+'
4016
+ name: 'Trendsmap'
4017
+ category: 'Crawler'
4018
+ url: 'https://www.trendsmap.com/'
4019
+ producer:
4020
+ name: 'Trendsmap Pty Ltd'
4021
+ url: 'https://www.trendsmap.com/'
4022
+
4023
+ - regex: 'Shareaholic(?:bot)?/[\d.]+'
4024
+ name: 'Steve Bot'
4025
+ category: 'Crawler'
4026
+ url: 'https://www.shareaholic.com/steve'
4027
+ producer:
4028
+ name: 'Shareaholic, Inc.'
4029
+ url: 'https://www.shareaholic.com/'
4030
+
4031
+ - regex: 'keycdn-tools:'
4032
+ name: 'KeyCDN Tools'
4033
+ category: 'Service Agent'
4034
+ url: 'https://tools.keycdn.com/geo'
4035
+
4036
+ - regex: 'keycdn-tools/'
4037
+ name: 'KeyCDN Tools'
4038
+ category: 'Service Agent'
4039
+ url: 'https://tools.keycdn.com/'
4040
+ producer:
4041
+ name: 'proinity LLC'
4042
+ url: 'https://www.keycdn.com/'
4043
+
4044
+ - regex: 'Arquivo-web-crawler'
4045
+ name: 'Arquivo.pt'
4046
+ category: 'Crawler'
4047
+ url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/'
4048
+ producer:
4049
+ name: 'FCT|FCCN'
4050
+ url: 'https://www.fct.pt/'
4051
+
4052
+ - regex: 'WhatsMyIP\.org'
4053
+ name: 'WhatsMyIP.org'
4054
+ category: 'Service Agent'
4055
+ url: 'https://www.whatsmyip.org/ua/'
4056
+
4057
+ - regex: 'SenutoBot/[\d.]+'
4058
+ name: 'Senuto'
4059
+ category: 'Crawler'
4060
+ url: 'https://www.senuto.com/'
4061
+ producer:
4062
+ name: 'Senuto Sp. z o.o.'
4063
+ url: 'https://www.senuto.com/'
4064
+
4065
+ - regex: 'spaziodati'
4066
+ name: 'SpazioDati'
4067
+ category: 'Crawler'
4068
+ url: 'https://www.spaziodati.eu/'
4069
+ producer:
4070
+ name: 'SpazioDati s.r.l.'
4071
+ url: 'https://www.spaziodati.eu/'
4072
+
4073
+ - regex: 'GozleBot'
4074
+ name: 'Gozle'
4075
+ category: 'Crawler'
4076
+ url: 'https://gozle.com.tm/en/blog/post/1'
4077
+ producer:
4078
+ name: 'Doly Horjun HJ'
4079
+ url: 'https://gozle.com.tm/'
4080
+
4081
+ - regex: 'Quantcastbot/[\d.]+'
4082
+ name: 'Quantcast'
4083
+ category: 'Crawler'
4084
+ url: 'https://www.quantcast.com/bot/'
4085
+ producer:
4086
+ name: 'Quantcast Corp.'
4087
+ url: 'https://www.quantcast.com/'
4088
+
4089
+ - regex: 'FontRadar'
4090
+ name: 'FontRadar'
4091
+ category: 'Crawler'
4092
+ url: 'https://www.fontradar.com/'
4093
+ producer:
4094
+ name: 'EMDASH SAS'
4095
+ url: 'https://www.fontradar.com/'
4096
+
4097
+ - regex: 'ViberUrlDownloader'
4098
+ name: 'Viber Url Downloader'
4099
+ category: 'Service Agent'
4100
+ url: 'https://www.viber.com/'
4101
+ producer:
4102
+ name: 'Viber Media S.à r.l.'
4103
+ url: 'https://www.viber.com/'
4104
+
4105
+ - regex: '^Zeno$'
4106
+ name: 'Zeno'
4107
+ category: 'Crawler'
4108
+ url: 'https://github.com/internetarchive/Zeno'
4109
+ producer:
4110
+ name: 'The Internet Archive'
4111
+ url: 'https://archive.org/'
4112
+
4113
+ - regex: 'Barracuda Sentinel'
4114
+ name: 'Barracuda Sentinel'
4115
+ category: 'Service Agent'
4116
+ url: 'https://sentinel.barracudanetworks.com/'
4117
+ producer:
4118
+ name: 'Barracuda Networks, Inc.'
4119
+ url: 'https://www.barracudanetworks.com/'
4120
+
4121
+ - regex: 'RuxitSynthetic/[\d.]+'
4122
+ name: 'RuxitSynthetic'
4123
+ category: 'Site Monitor'
4124
+ url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
4125
+ producer:
4126
+ name: 'Dynatrace LLC'
4127
+ url: 'https://www.dynatrace.com/'
4128
+
4129
+ - regex: 'DynatraceSynthetic/[\d.]+'
4130
+ name: 'DynatraceSynthetic'
4131
+ category: 'Site Monitor'
4132
+ url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
4133
+ producer:
4134
+ name: 'Dynatrace LLC'
4135
+ url: 'https://www.dynatrace.com/'
4136
+
4137
+ - regex: 'sitebulb'
4138
+ name: 'Sitebulb'
4139
+ category: 'Crawler'
4140
+ url: 'https://sitebulb.com/'
4141
+ producer:
4142
+ name: 'Sitebulb Limited'
4143
+ url: 'https://sitebulb.com/'
4144
+
4145
+ - regex: 'Monsidobot/[\d.]+'
4146
+ name: 'Monsidobot'
4147
+ category: 'Crawler'
4148
+ url: 'https://monsido.com/bot-html'
4149
+ producer:
4150
+ name: 'Monsido LLC'
4151
+ url: 'https://monsido.com/'
4152
+
4153
+ - regex: 'AccompanyBot'
4154
+ name: 'AccompanyBot'
4155
+ category: 'Crawler'
4156
+ url: 'https://www.accompany.com/'
4157
+ producer:
4158
+ name: 'Accompani, Inc'
4159
+ url: 'https://www.accompany.com/'
4160
+
4161
+ - regex: 'Ghost Inspector'
4162
+ name: 'Ghost Inspector'
4163
+ category: 'Site Monitor'
4164
+ url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site'
4165
+ producer:
4166
+ name: 'Ghost Inspector, Inc.'
4167
+ url: 'https://www.ghostinspector.com/'
4168
+
4169
+ - regex: 'Cypress/[\d.]+'
4170
+ name: 'Cypress'
4171
+ category: 'Site Monitor'
4172
+ url: 'https://github.com/cypress-io/cypress'
4173
+ producer:
4174
+ name: 'Cypress.io, Inc.'
4175
+ url: 'https://www.cypress.io/'
4176
+
4177
+ - regex: 'Google-Apps-Script'
4178
+ name: 'Google Apps Script'
4179
+ category: 'Service Agent'
4180
+ url: 'https://www.google.com/script/start/'
4181
+
4182
+ - regex: 'SiteOne-Crawler/[\d.]+'
4183
+ name: 'SiteOne Crawler'
4184
+ category: 'Crawler'
4185
+ url: 'https://crawler.siteone.io/bot/'
4186
+ producer:
4187
+ name: 'SiteOne s.r.o.'
4188
+ url: 'https://www.siteone.io/'
4189
+
4190
+ - regex: 'Detectify'
4191
+ name: 'Detectify'
4192
+ category: 'Security Checker'
4193
+ url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site'
4194
+ producer:
4195
+ name: 'Detectify AB'
4196
+ url: 'https://detectify.com/'
4197
+
4198
+ - regex: 'DomCopBot'
4199
+ name: 'DomCop Bot'
4200
+ category: 'Crawler'
4201
+ url: 'https://www.domcop.com/bot'
4202
+ producer:
4203
+ name: 'Axeman Technology Solutions LLP'
4204
+ url: 'https://axemantech.com/'
4205
+
4206
+ - regex: 'Paqlebot/[\d.]+'
4207
+ name: 'Paqlebot'
4208
+ category: 'Crawler'
4209
+ url: 'https://www.paqle.dk/about/paqlebot'
4210
+ producer:
4211
+ name: 'Paqle A/S'
4212
+ url: 'https://www.paqle.dk/'
4213
+
4214
+ - regex: 'Wibybot'
4215
+ name: 'Wibybot'
4216
+ category: 'Crawler'
4217
+ url: 'https://www.wiby.me/'
4218
+
4219
+ - regex: 'Synapse'
4220
+ name: 'Synapse'
4221
+ category: 'Crawler'
4222
+ url: 'https://github.com/matrix-org/synapse'
4223
+
4224
+ - regex: 'OSZKbot/[\d.]+'
4225
+ name: 'OSZKbot'
4226
+ category: 'Crawler'
4227
+ url: 'http://mekosztaly.oszk.hu/mia/'
4228
+ producer:
4229
+ name: 'National Szechenyi Library'
4230
+ url: 'https://webarchivum.oszk.hu/'
4231
+
4232
+ - regex: 'ZoomBot'
4233
+ name: 'ZoomBot'
4234
+ category: 'Crawler'
4235
+ url: 'https://suite.seozoom.it/bot.html'
4236
+ producer:
4237
+ name: 'SEO Cube S.r.l.'
4238
+ url: 'https://www.seocube.it/'
4239
+
4240
+ - regex: 'RavenCrawler/[\d.]+'
4241
+ name: 'RavenCrawler'
4242
+ category: 'Crawler'
4243
+ url: 'https://raventools.com/site-auditor/'
4244
+ producer:
4245
+ name: 'TapClicks, Inc.'
4246
+ url: 'https://www.tapclicks.com/'
4247
+
4248
+ - regex: 'KadoBot'
4249
+ name: 'KadoBot'
4250
+ category: 'Crawler'
4251
+ url: 'https://www.kadolijst.nl/bot'
4252
+ producer:
4253
+ name: 'Kadolijst'
4254
+ url: 'https://www.kadolijst.nl/'
4255
+
4256
+ - regex: 'Dubbotbot/[\d.]+'
4257
+ name: 'Dubbotbot'
4258
+ category: 'Crawler'
4259
+ url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent'
4260
+ producer:
4261
+ name: 'DubBot'
4262
+ url: 'https://dubbot.com/'
4263
+
4264
+ - regex: 'Swiftbot/[\d.]+'
4265
+ name: 'Swiftbot'
4266
+ category: 'Crawler'
4267
+ url: 'https://swiftype.com/swiftbot'
4268
+ producer:
4269
+ name: 'Elasticsearch, B.V.'
4270
+ url: 'https://www.elastic.co/'
4271
+
4272
+ - regex: 'EyeMonIT'
4273
+ name: 'EyeMonit'
4274
+ category: 'Site Monitor'
4275
+ url: 'https://eyemonit.com/'
4276
+ producer:
4277
+ name: 'EyeMonit'
4278
+ url: 'https://eyemonit.com/'
4279
+
4280
+ - regex: 'ThousandEyes'
4281
+ name: 'ThousandEyes'
4282
+ category: 'Site Monitor'
4283
+ url: 'https://www.thousandeyes.com/'
4284
+ producer:
4285
+ name: 'Cisco Systems, Inc.'
4286
+ url: 'https://www.cisco.com/'
4287
+
4288
+ - regex: 'OmtrBot/[\d.]+'
4289
+ name: 'OmtrBot'
4290
+ category: 'Site Monitor'
4291
+
4292
+ - regex: 'WebMon/[\d.]+'
4293
+ name: 'WebMon'
4294
+ category: 'Site Monitor'
4295
+
4296
+ - regex: 'AdsTxtCrawlerTP/[\d.]+'
4297
+ name: 'AdsTxtCrawlerTP'
4298
+ category: 'Crawler'
4299
+
4300
+ - regex: 'fragFINN'
4301
+ name: 'fragFINN'
4302
+ category: 'Crawler'
4303
+ url: 'https://www.fragfinn.de/'
4304
+ producer:
4305
+ name: 'fragFINN e.V.'
4306
+ url: 'https://www.fragfinn.de/'
4307
+
4308
+ - regex: 'Clickagy'
4309
+ name: 'Clickagy'
4310
+ category: 'Crawler'
4311
+ url: 'https://www.clickagy.com/'
4312
+ producer:
4313
+ name: 'Clickagy, LLC'
4314
+ url: 'https://www.clickagy.com/'
4315
+
4316
+ - regex: 'kiwitcms-gitops/[\d.]+'
4317
+ name: 'Kiwi TCMS GitOps'
4318
+ category: 'Service Agent'
4319
+ url: 'https://kiwitcms.org'
4320
+ producer:
4321
+ name: 'Open Technologies Bulgaria, Ltd.'
4322
+ url: 'https://kiwitcms.org'
4323
+
4324
+ - regex: 'webtru_crawler'
4325
+ name: 'webtru'
4326
+ category: 'Crawler'
4327
+ url: 'https://webtru.io/'
4328
+ producer:
4329
+ name: 'DataSign Inc.'
4330
+ url: 'https://datasign.jp/'
4331
+
4332
+ - regex: 'URLSuMaBot'
4333
+ name: 'URLSuMaBot'
4334
+ category: 'Crawler'
4335
+ url: 'https://www.urlsuma.de/'
4336
+
4337
+ - regex: '360JK yunjiankong'
4338
+ name: '360JK'
4339
+ category: 'Site Monitor'
4340
+ url: 'http://jk.cloud.360.cn/'
4341
+ producer:
4342
+ name: '360 Security Technology Inc.'
4343
+ url: 'https://www.360.cn/'
4344
+
4345
+ - regex: 'UCSBNetworkMeasurement'
4346
+ name: 'UCSB Network Measurement'
4347
+ category: 'Crawler'
4348
+ url: 'https://www.it.ucsb.edu/'
4349
+ producer:
4350
+ name: 'University of California, Santa Barbara'
4351
+ url: 'https://www.it.ucsb.edu/'
4352
+
4353
+ - regex: 'Plesk screenshot bot'
4354
+ name: 'Plesk Screenshot Service'
4355
+ category: 'Service Agent'
4356
+ url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service'
4357
+ producer:
4358
+ name: 'Plesk International GmbH'
4359
+ url: 'https://www.plesk.com/'
4360
+
4361
+ - regex: 'Who\.is'
4362
+ name: 'Who.is Bot'
4363
+ category: 'Crawler'
4364
+ url: 'https://who.is/'
4365
+
4366
+ - regex: 'Probely'
4367
+ name: 'Probely'
4368
+ category: 'Security Checker'
4369
+ url: 'https://probely.com/sos/'
4370
+ producer:
4371
+ name: 'Probely - Soluções de Cibersegurança, S.A.'
4372
+ url: 'https://probely.com/'
4373
+
4374
+ - regex: 'Uptimia(?:/[\d.]+)?'
4375
+ name: 'Uptimia'
4376
+ category: 'Site Monitor'
4377
+ url: 'https://www.uptimia.com/'
4378
+ producer:
4379
+ name: 'JJ Online GmbH'
4380
+ url: 'https://www.uptimia.com/'
4381
+
4382
+ - regex: '2GDPR/[\d.]+'
4383
+ name: '2GDPR'
4384
+ category: 'Service Agent'
4385
+ url: 'https://2gdpr.com/tos'
4386
+ producer:
4387
+ name: '2GDPR'
4388
+ url: 'https://2gdpr.com/'
4389
+
4390
+ - regex: 'abuse\.xmco\.fr'
4391
+ name: 'Serenety'
4392
+ category: 'Security Checker'
4393
+ url: 'https://abuse.xmco.fr/'
4394
+ producer:
4395
+ name: 'XMCO, SASU'
4396
+ url: 'https://www.xmco.fr/'
4397
+
4398
+ - regex: 'CheckHost'
4399
+ name: 'CheckHost'
4400
+ category: 'Site Monitor'
4401
+ url: 'https://check-host.net/'
4402
+ producer:
4403
+ name: 'CheckHost'
4404
+ url: 'https://check-host.net/'
4405
+
4406
+ - regex: 'LAC_IAHarvester/[\d.]+'
4407
+ name: 'LAC IA Harvester'
4408
+ category: 'Crawler'
4409
+ url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx'
4410
+ producer:
4411
+ name: 'Library and Archives Canada'
4412
+ url: 'https://library-archives.canada.ca/'
4413
+
4414
+ - regex: 'InsytfulBot/[\d.]+'
4415
+ name: 'InsytfulBot'
4416
+ category: 'Crawler'
4417
+ url: 'https://www.insytful.com/'
4418
+ producer:
4419
+ name: 'Zengenti Limited'
4420
+ url: 'https://www.zengenti.com/'
4421
+
4422
+ - regex: 'statista\.com'
4423
+ name: 'Statista'
4424
+ category: 'Crawler'
4425
+ url: 'https://www.statista.com/'
4426
+ producer:
4427
+ name: 'Statista, Inc.'
4428
+ url: 'https://www.statista.com/'
4429
+
4430
+ - regex: 'SubstackContentFetch/[\d.]+'
4431
+ name: 'Substack Content Fetch'
4432
+ category: 'Crawler'
4433
+ url: 'https://substack.com/'
4434
+ producer:
4435
+ name: 'Substack, Inc.'
4436
+ url: 'https://substack.com/'
4437
+
4438
+ - regex: '^ds9'
4439
+ name: 'Deep SEARCH 9'
4440
+ category: 'Crawler'
4441
+ url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/'
4442
+ producer:
4443
+ name: 'Copyright Clearance Center, Inc.'
4444
+ url: 'https://www.copyright.com/'
4445
+
4446
+ - regex: 'LiveJournal\.com'
4447
+ name: 'LiveJournal'
4448
+ url: 'https://www.livejournal.com/'
4449
+ category: 'Feed Fetcher'
4450
+ producer:
4451
+ name: 'ООО "СИМ"'
4452
+ url: 'https://www.livejournal.com/'
4453
+
4454
+ - regex: 'bitdiscovery'
4455
+ name: 'Tenable.asm'
4456
+ category: 'Security Checker'
4457
+ url: 'https://bitdiscovery.com/'
4458
+ producer:
4459
+ name: 'Tenable, Inc.'
4460
+ url: 'https://www.tenable.com/'
4461
+
4462
+ - regex: 'Castopod/[\d.]+'
4463
+ name: 'Castopod'
4464
+ category: 'Crawler'
4465
+ url: 'https://www.castopod.org/'
4466
+
4467
+ - regex: 'Elastic/Synthetics'
4468
+ name: 'Elastic Synthetics'
4469
+ category: 'Site Monitor'
4470
+ url: 'https://github.com/elastic/synthetics'
4471
+ producer:
4472
+ name: 'Elasticsearch B.V.'
4473
+ url: 'https://www.elastic.co/'
4474
+
4475
+ - regex: 'WDG_Validator/[\d.]+'
4476
+ name: 'WDG HTML Validator'
4477
+ category: 'Validator'
4478
+ url: 'http://www.htmlhelp.com/tools/validator/'
4479
+
4480
+ - regex: 'scan@aegis.network'
4481
+ name: 'Aegis'
4482
+ category: 'Crawler'
4483
+ url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/'
4484
+
4485
+ - regex: 'CrawlyProjectCrawler/[\d.]+'
4486
+ name: 'Crawly Project'
4487
+ category: 'Crawler'
4488
+ url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/'
4489
+
4490
+ - regex: 'BDFetch'
4491
+ name: 'BDFetch'
4492
+ category: 'Crawler'
4493
+ url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/'
4494
+
4495
+ - regex: 'PunkMap'
4496
+ name: 'Punk Map'
4497
+ category: 'Security Checker'
4498
+ url: 'https://github.com/openeasm/punkmap'
4499
+
4500
+ - regex: 'GenomeCrawlerd/[\d.]+'
4501
+ name: 'Deepfield Genome'
4502
+ category: 'Crawler'
4503
+ url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/'
4504
+ producer:
4505
+ name: 'Nokia Corporation'
4506
+ url: 'https://www.nokia.com/'
4507
+
4508
+ - regex: 'Gaisbot/[\d.]+'
4509
+ name: 'Gaisbot'
4510
+ category: 'Crawler'
4511
+ url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php'
4512
+
4513
+ - regex: 'FAST-WebCrawler/[\d.]+'
4514
+ name: 'AlltheWeb'
4515
+ category: 'Crawler'
4516
+ url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler'
4517
+
4518
+ - regex: 'ducks\.party'
4519
+ name: 'ducks.party'
4520
+ category: 'Security Checker'
4521
+ url: 'https://ducks.party/'
4522
+
4523
+ - regex: 'DepSpid/[\d.]+'
4524
+ name: 'DepSpid'
4525
+ category: 'Crawler'
4526
+ url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/'
4527
+
4528
+ - regex: 'Website-info\.net'
4529
+ name: 'Website-info'
4530
+ category: 'Crawler'
4531
+ url: 'https://website-info.net/robot'
4532
+ producer:
4533
+ name: 'Meins und Vogel GmbH'
4534
+ url: 'https://muv.com/'
4535
+
4536
+ # Generic bots
4537
+ - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|url|Zeus|ZmEu)$'
4538
+ name: 'Generic Bot'
4539
+
4540
+ # Generic detections
4541
+ - regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)'
1234
4542
  name: 'Generic Bot'