device_detector 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/regexes/bots.yml CHANGED
@@ -5,6 +5,11 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
+ - regex: 'WireReaderBot(?:/([\d+.]+))?'
9
+ name: 'WireReaderBot'
10
+ category: 'Feed Fetcher'
11
+ url: 'https://wirereader.app/'
12
+
8
13
  - regex: 'monitoring360bot'
9
14
  name: '360 Monitoring'
10
15
  category: 'Site Monitor'
@@ -61,7 +66,7 @@
61
66
  name: 'Ahrefs Pte Ltd'
62
67
  url: 'https://ahrefs.com/robot'
63
68
 
64
- - regex: 'AhrefsSiteAudit/([\d+.]+)'
69
+ - regex: 'AhrefsSiteAudit/[\d.]+'
65
70
  name: 'AhrefsSiteAudit'
66
71
  category: 'Site Monitor'
67
72
  url: 'https://ahrefs.com/robot/site-audit'
@@ -85,7 +90,7 @@
85
90
  name: 'Alexa Internet'
86
91
  url: 'https://www.alexa.com'
87
92
 
88
- - regex: 'Amazonbot'
93
+ - regex: 'Amazonbot/[\d.]+'
89
94
  name: 'Amazon Bot'
90
95
  category: 'Crawler'
91
96
  url: 'https://developer.amazon.com/support/amazonbot'
@@ -93,6 +98,14 @@
93
98
  name: 'Amazon.com, Inc.'
94
99
  url: 'https://www.amazon.com/'
95
100
 
101
+ - regex: 'AmazonAdBot/[\d.]+'
102
+ name: 'Amazon AdBot'
103
+ category: 'Crawler'
104
+ url: 'https://adbot.amazon.com/'
105
+ producer:
106
+ name: 'Amazon.com, Inc.'
107
+ url: 'https://www.amazon.com/'
108
+
96
109
  - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
97
110
  name: 'Amazon Route53 Health Check'
98
111
  category: 'Service Agent'
@@ -119,10 +132,18 @@
119
132
  - regex: 'Applebot'
120
133
  name: 'Applebot'
121
134
  category: 'Crawler'
122
- url: 'https://support.apple.com/en-us/HT204683'
135
+ url: 'https://support.apple.com/en-us/119829'
123
136
  producer:
124
137
  name: 'Apple Inc'
125
- url: 'https://www.apple.com'
138
+ url: 'https://www.apple.com/'
139
+
140
+ - regex: 'iTMS'
141
+ name: 'iTMS'
142
+ category: 'Crawler'
143
+ url: 'https://support.apple.com/en-us/119829'
144
+ producer:
145
+ name: 'Apple Inc'
146
+ url: 'https://www.apple.com/'
126
147
 
127
148
  - regex: 'AppSignalBot'
128
149
  name: 'AppSignalBot'
@@ -220,7 +241,7 @@
220
241
  name: 'Better Uptime'
221
242
  url: 'https://betteruptime.com/'
222
243
 
223
- - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
244
+ - regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
224
245
  name: 'BingBot'
225
246
  category: 'Search bot'
226
247
  url: 'http://search.msn.com/msnbot.htmn'
@@ -371,7 +392,23 @@
371
392
  name: 'CloudFlare'
372
393
  url: 'https://www.cloudflare.com/'
373
394
 
374
- - regex: 'https://developers.cloudflare.com/security-center/'
395
+ - regex: 'Cloudflare-Smart-Transit'
396
+ name: 'Cloudflare Smart Transit'
397
+ category: 'Site Monitor'
398
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
399
+ producer:
400
+ name: 'CloudFlare'
401
+ url: 'https://www.cloudflare.com/'
402
+
403
+ - regex: 'CloudflareObservatory'
404
+ name: 'Cloudflare Observatory'
405
+ category: 'Site Monitor'
406
+ url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test'
407
+ producer:
408
+ name: 'CloudFlare'
409
+ url: 'https://www.cloudflare.com/'
410
+
411
+ - regex: 'https://developers\.cloudflare\.com/security-center/'
375
412
  name: 'Cloudflare Security Insights'
376
413
  category: 'Site Monitor'
377
414
  url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
@@ -379,7 +416,7 @@
379
416
  name: 'CloudFlare'
380
417
  url: 'https://www.cloudflare.com/'
381
418
 
382
- - regex: 'coccoc.com'
419
+ - regex: 'coccoc\.com'
383
420
  name: 'Cốc Cốc Bot'
384
421
  url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
385
422
  category: 'Search bot'
@@ -435,7 +472,7 @@
435
472
  name: 'Dataprovider B.V.'
436
473
  url: 'https://www.dataprovider.com/'
437
474
 
438
- - regex: 'Daum(oa)?[ /][0-9]'
475
+ - regex: 'Daum(?!(?:Apps|Device))'
439
476
  name: 'Daum'
440
477
  category: 'Search bot'
441
478
  url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
@@ -459,7 +496,7 @@
459
496
  name: 'Discovery Engine'
460
497
  url: 'http://discoveryengine.com'
461
498
 
462
- - regex: 'Domain Re-Animator Bot|support@domainreanimator.com'
499
+ - regex: 'Domain Re-Animator Bot|support@domainreanimator\.com'
463
500
  name: 'Domain Re-Animator Bot'
464
501
  category: 'Crawler'
465
502
  url: ''
@@ -538,13 +575,21 @@
538
575
  name: 'SEOmoz, Inc.'
539
576
  url: 'http://moz.com/'
540
577
 
541
- - regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
542
- name: 'Facebook External Hit'
578
+ - regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)'
579
+ name: 'Facebook Crawler'
543
580
  category: 'Social Media Agent'
544
- url: 'https://www.facebook.com/externalhit_uatext.php'
581
+ url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/'
545
582
  producer:
546
- name: 'Facebook'
547
- url: 'http://www.facebook.com'
583
+ name: 'Meta Platforms, Inc.'
584
+ url: 'https://www.meta.com/'
585
+
586
+ - regex: 'FacebookBot/[\d.]+'
587
+ name: 'FacebookBot'
588
+ category: 'Crawler'
589
+ url: 'https://developers.facebook.com/docs/sharing/bot'
590
+ producer:
591
+ name: 'Meta Platforms, Inc.'
592
+ url: 'https://www.meta.com/'
548
593
 
549
594
  - regex: 'Feedbin'
550
595
  name: 'Feedbin'
@@ -662,7 +707,7 @@
662
707
  url: 'https://search.google.com/search-console/about'
663
708
  producer:
664
709
  name: 'Google Inc.'
665
- url: 'http://www.google.com'
710
+ url: 'https://www.google.com/'
666
711
 
667
712
  - regex: 'Google Page Speed Insights'
668
713
  name: 'Google PageSpeed Insights'
@@ -670,7 +715,7 @@
670
715
  url: 'http://developers.google.com/speed/pagespeed/insights/'
671
716
  producer:
672
717
  name: 'Google Inc.'
673
- url: 'http://www.google.com'
718
+ url: 'https://www.google.com/'
674
719
 
675
720
  - regex: 'google_partner_monitoring'
676
721
  name: 'Google Partner Monitoring'
@@ -678,7 +723,7 @@
678
723
  url: ''
679
724
  producer:
680
725
  name: 'Google Inc.'
681
- url: 'http://www.google.com'
726
+ url: 'https://www.google.com/'
682
727
 
683
728
  - regex: 'Google-Cloud-Scheduler'
684
729
  name: 'Google Cloud Scheduler'
@@ -694,7 +739,7 @@
694
739
  url: 'https://search.google.com/structured-data/testing-tool'
695
740
  producer:
696
741
  name: 'Google Inc.'
697
- url: 'http://www.google.com'
742
+ url: 'https://www.google.com/'
698
743
 
699
744
  - regex: 'GoogleStackdriverMonitoring'
700
745
  name: 'Google Stackdriver Monitoring'
@@ -704,13 +749,21 @@
704
749
  name: 'Google Inc.'
705
750
  url: 'https://www.google.com'
706
751
 
752
+ - regex: 'Google-Transparency-Report'
753
+ name: 'Google Transparency Report'
754
+ category: 'Site Monitor'
755
+ url: 'https://transparencyreport.google.com/'
756
+ producer:
757
+ name: 'Google Inc.'
758
+ url: 'https://www.google.com/'
759
+
707
760
  - regex: 'via ggpht\.com GoogleImageProxy'
708
761
  name: 'Gmail Image Proxy'
709
762
  category: 'Crawler'
710
763
  url: ''
711
764
  producer:
712
765
  name: 'Google Inc.'
713
- url: 'http://www.google.com'
766
+ url: 'https://www.google.com/'
714
767
 
715
768
  - regex: 'SeznamEmailProxy'
716
769
  name: 'Seznam Email Proxy'
@@ -744,21 +797,37 @@
744
797
  name: 'Visual Meta'
745
798
  url: 'https://www.shopalike.cz/'
746
799
 
747
- - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|InspectionTool|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Other|Prober|Producer)|Google.*/\+/web/snippet'
800
+ - regex: 'Googlebot-News'
801
+ name: 'Googlebot News'
802
+ category: 'Search bot'
803
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
804
+ producer:
805
+ name: 'Google Inc.'
806
+ url: 'https://www.google.com/'
807
+
808
+ - regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet'
748
809
  name: 'Googlebot'
749
810
  category: 'Search bot'
750
- url: 'http://www.google.com/bot.html'
811
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
751
812
  producer:
752
813
  name: 'Google Inc.'
753
- url: 'http://www.google.com'
814
+ url: 'https://www.google.com/'
754
815
 
755
816
  - regex: '^Google$'
756
817
  name: 'Googlebot'
757
818
  category: 'Search bot'
758
- url: 'http://www.google.com/bot.html'
819
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
759
820
  producer:
760
821
  name: 'Google Inc.'
761
- url: 'http://www.google.com'
822
+ url: 'https://www.google.com/'
823
+
824
+ - regex: 'Google-Area120-PrivacyPolicyFetcher'
825
+ name: 'Google Area 120 Privacy Policy Fetcher'
826
+ category: 'Crawler'
827
+ url: 'https://area120.google.com/'
828
+ producer:
829
+ name: 'Google Inc.'
830
+ url: 'https://www.google.com/'
762
831
 
763
832
  - regex: 'heritrix'
764
833
  name: 'Heritrix'
@@ -780,7 +849,7 @@
780
849
  category: 'Crawler'
781
850
  url: 'http://vuhuv.com/bot.html'
782
851
 
783
- - regex: 'HTTPMon'
852
+ - regex: 'HTTPMon/[\d.]+'
784
853
  name: 'HTTPMon'
785
854
  category: 'Site Monitor'
786
855
  url: 'http://www.httpmon.com'
@@ -796,7 +865,7 @@
796
865
  name: ''
797
866
  url: ''
798
867
 
799
- - regex: 'inoreader.com'
868
+ - regex: 'inoreader\.com'
800
869
  name: 'inoreader'
801
870
  category: 'Feed Reader'
802
871
  url: 'https://www.inoreader.com'
@@ -844,7 +913,7 @@
844
913
  name: ''
845
914
  url: ''
846
915
 
847
- - regex: '([A-z0-9]*)-Lighthouse'
916
+ - regex: '[A-z0-9]*-Lighthouse'
848
917
  name: 'Lighthouse'
849
918
  category: 'Site Monitor'
850
919
  url: 'https://developers.google.com/web/tools/lighthouse'
@@ -878,7 +947,8 @@
878
947
 
879
948
  - regex: 'ltx71'
880
949
  name: 'LTX71'
881
- url: 'http://ltx71.com/'
950
+ category: 'Security Checker'
951
+ url: 'https://ltx71.com/'
882
952
  producer:
883
953
  name: ''
884
954
  url: ''
@@ -907,7 +977,7 @@
907
977
  name: ''
908
978
  url: ''
909
979
 
910
- - regex: 'masscan-ng/([\d+.]+)'
980
+ - regex: 'masscan-ng/[\d.]+'
911
981
  name: 'masscan-ng'
912
982
  url: 'https://github.com/bi-zone/masscan-ng'
913
983
  category: 'Crawler'
@@ -915,7 +985,7 @@
915
985
  name: 'BIZON, OOO'
916
986
  url: 'https://bi.zone/'
917
987
 
918
- - regex: 'masscan'
988
+ - regex: '.*masscan'
919
989
  name: 'masscan'
920
990
  url: 'https://github.com/robertdavidgraham/masscan'
921
991
  category: 'Crawler'
@@ -1003,11 +1073,11 @@
1003
1073
  name: 'Nagios Plugins Development Team'
1004
1074
  url: 'https://nagios.org'
1005
1075
 
1006
- - regex: 'nbertaupete95\(at\)gmail.com'
1076
+ - regex: 'nbertaupete95\(at\)gmail\.com'
1007
1077
  name: 'nbertaupete95'
1008
1078
  category: 'Crawler'
1009
1079
 
1010
- - regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
1080
+ - regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)'
1011
1081
  name: 'Netcraft Survey Bot'
1012
1082
  category: 'Search bot'
1013
1083
  url: ''
@@ -1031,7 +1101,7 @@
1031
1101
  name: ''
1032
1102
  url: ''
1033
1103
 
1034
- - regex: 'NewsBlur .*(Fetcher|Finder)'
1104
+ - regex: 'NewsBlur .*(?:Fetcher|Finder)'
1035
1105
  name: 'NewsBlur'
1036
1106
  url: 'http://www.newsblur.com'
1037
1107
  category: 'Feed Fetcher'
@@ -1070,10 +1140,18 @@
1070
1140
  name: 'Nuzzel'
1071
1141
  url: 'https://www.nuzzel.com/'
1072
1142
 
1143
+ - regex: 'NodePing'
1144
+ name: 'NodePing'
1145
+ category: 'Site Monitor'
1146
+ url: 'https://nodeping.com'
1147
+ producer:
1148
+ name: 'NodePing'
1149
+ url: 'https://nodeping.com'
1150
+
1073
1151
  - regex: 'Octopus [0-9]'
1074
1152
  name: 'Octopus'
1075
1153
 
1076
- - regex: 'OnlineOrNot.com_bot'
1154
+ - regex: 'OnlineOrNot\.com_bot'
1077
1155
  name: 'OnlineOrNot Bot'
1078
1156
  category: 'Site Monitor'
1079
1157
  url: 'https://onlineornot.com/website-monitoring'
@@ -1142,7 +1220,7 @@
1142
1220
  name: 'PHP Server Monitor'
1143
1221
  url: 'http://www.phpservermonitor.org/'
1144
1222
 
1145
- - regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
1223
+ - regex: 'Pocket(?:ImageCache|Parser)/[\d.]+'
1146
1224
  name: 'Pocket'
1147
1225
  category: 'Read-it-later Service'
1148
1226
  url: 'https://getpocket.com/pocketparser_ua'
@@ -1289,12 +1367,36 @@
1289
1367
  url: ''
1290
1368
 
1291
1369
  - regex: 'SemrushBot'
1292
- name: 'Semrush Bot'
1370
+ name: 'SemrushBot'
1371
+ category: 'Crawler'
1372
+ url: 'https://www.semrush.com/bot/'
1373
+ producer:
1374
+ name: 'Semrush Inc.'
1375
+ url: 'https://www.semrush.com/'
1376
+
1377
+ - regex: 'SerpReputationManagementAgent/[\d.]+'
1378
+ name: 'Semrush Reputation Management'
1379
+ category: 'Service Agent'
1380
+ url: 'https://www.semrush.com/bot/'
1381
+ producer:
1382
+ name: 'Semrush Inc.'
1383
+ url: 'https://www.semrush.com/'
1384
+
1385
+ - regex: 'SplitSignalBot'
1386
+ name: 'SplitSignalBot'
1293
1387
  category: 'Crawler'
1294
- url: 'http://www.semrush.com/bot.html'
1388
+ url: 'https://www.semrush.com/bot/'
1295
1389
  producer:
1296
- name: 'SEMrush'
1297
- url: 'http://www.semrush.com'
1390
+ name: 'Semrush Inc.'
1391
+ url: 'https://www.semrush.com/'
1392
+
1393
+ - regex: 'SiteAuditBot/[\d.]+'
1394
+ name: 'SiteAuditBot'
1395
+ category: 'Crawler'
1396
+ url: 'https://www.semrush.com/bot/'
1397
+ producer:
1398
+ name: 'Semrush Inc.'
1399
+ url: 'https://www.semrush.com/'
1298
1400
 
1299
1401
  - regex: 'SensikaBot'
1300
1402
  name: 'Sensika Bot'
@@ -1304,7 +1406,7 @@
1304
1406
  name: 'Sensika'
1305
1407
  url: 'http://sensika.com'
1306
1408
 
1307
- - regex: 'SEOENG(World)?Bot'
1409
+ - regex: 'SEOENG(?:World)?Bot'
1308
1410
  name: 'SEOENGBot'
1309
1411
  category: 'Crawler'
1310
1412
  url: 'http://www.seoengine.com/seoengbot.htm'
@@ -1394,7 +1496,7 @@
1394
1496
  category: 'Crawler'
1395
1497
  url: 'http://ricks-apps.com/osx/sitesucker/'
1396
1498
 
1397
- - regex: 'sixy.ch'
1499
+ - regex: 'sixy\.ch'
1398
1500
  name: 'Sixy.ch'
1399
1501
  category: 'Site Monitor'
1400
1502
  url: 'http://sixy.ch'
@@ -1410,7 +1512,7 @@
1410
1512
  name: 'Slack Technologies'
1411
1513
  url: 'http://slack.com'
1412
1514
 
1413
- - regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
1515
+ - regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider'
1414
1516
  name: 'Sogou Spider'
1415
1517
  category: 'Search bot'
1416
1518
  url: 'http://www.sogou.com/docs/help/webmasters.htm'
@@ -1535,11 +1637,19 @@
1535
1637
  name: ''
1536
1638
  url: ''
1537
1639
 
1538
- - regex: 'theoldreader.com'
1640
+ - regex: 'theoldreader\.com'
1539
1641
  name: 'theoldreader'
1540
1642
  category: 'Feed Reader'
1541
1643
  url: 'https://theoldreader.com'
1542
1644
 
1645
+ - regex: 'Trackable/0\.1'
1646
+ name: 'Chartable'
1647
+ category: 'Site Monitor'
1648
+ url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix'
1649
+ producer:
1650
+ name: 'Chartable'
1651
+ url: 'https://chartable.com'
1652
+
1543
1653
  - regex: 'trendictionbot'
1544
1654
  name: 'Trendiction Bot'
1545
1655
  category: 'Crawler'
@@ -1556,13 +1666,13 @@
1556
1666
  name: 'iParadigms, LLC.'
1557
1667
  url: 'http://www.turnitin.com'
1558
1668
 
1559
- - regex: 'TweetedTimes Bot'
1669
+ - regex: 'TweetedTimes'
1560
1670
  name: 'TweetedTimes Bot'
1561
1671
  category: 'Crawler'
1562
- url: 'http://tweetedtimes.com'
1672
+ url: 'https://tweetedtimes.com/'
1563
1673
  producer:
1564
1674
  name: 'TweetedTimes'
1565
- url: 'http://tweetedtimes.com/'
1675
+ url: 'https://tweetedtimes.com/'
1566
1676
 
1567
1677
  - regex: 'TweetmemeBot'
1568
1678
  name: 'Tweetmeme Bot'
@@ -1603,21 +1713,21 @@
1603
1713
  name: 'UkrNet Ltd'
1604
1714
  url: 'https://www.ukr.net/'
1605
1715
 
1606
- - regex: 'Uptimebot'
1716
+ - regex: 'Uptime(?:bot)?/[\d.]+'
1607
1717
  name: 'Uptimebot'
1608
1718
  category: 'Site Monitor'
1609
- url: 'https://uptime.com/uptimebot'
1719
+ url: 'https://uptime.com/uptime-bot'
1610
1720
  producer:
1611
1721
  name: 'Uptime'
1612
- url: 'https://uptime.com'
1722
+ url: 'https://uptime.com/'
1613
1723
 
1614
1724
  - regex: 'UptimeRobot'
1615
- name: 'Uptime Robot'
1725
+ name: 'UptimeRobot'
1616
1726
  category: 'Site Monitor'
1617
- url: ''
1727
+ url: 'https://uptimerobot.com/'
1618
1728
  producer:
1619
1729
  name: 'Uptime Robot'
1620
- url: 'http://uptimerobot.com'
1730
+ url: 'https://uptimerobot.com/'
1621
1731
 
1622
1732
  - regex: 'URLAppendBot'
1623
1733
  name: 'URLAppendBot'
@@ -1638,10 +1748,18 @@
1638
1748
  - regex: 'vkShare; '
1639
1749
  name: 'VK Share Button'
1640
1750
  category: 'Crawler'
1641
- url: 'http://vk.com/dev/Share'
1751
+ url: 'https://dev.vk.com/en/widgets/share'
1752
+ producer:
1753
+ name: 'VK'
1754
+ url: 'https://vk.com/'
1755
+
1756
+ - regex: 'VKRobot'
1757
+ name: 'VK Robot'
1758
+ category: 'Crawler'
1759
+ url: 'https://dev.vk.com/en/'
1642
1760
  producer:
1643
1761
  name: 'VK'
1644
- url: 'http://vk.com/'
1762
+ url: 'https://vk.com/'
1645
1763
 
1646
1764
  - regex: 'VSMCrawler'
1647
1765
  name: 'Visual Site Mapper Crawler'
@@ -1675,7 +1793,7 @@
1675
1793
  name: 'W3C'
1676
1794
  url: 'http://www.w3.org'
1677
1795
 
1678
- - regex: 'W3C_Validator|Validator.nu'
1796
+ - regex: 'W3C_Validator|Validator\.nu'
1679
1797
  name: 'W3C Markup Validation Service'
1680
1798
  category: 'Validator'
1681
1799
  url: 'http://validator.w3.org/services'
@@ -1699,6 +1817,14 @@
1699
1817
  name: 'W3C'
1700
1818
  url: 'http://www.w3.org'
1701
1819
 
1820
+ - regex: 'P3P Validator'
1821
+ name: 'W3C P3P Validator'
1822
+ category: 'Validator'
1823
+ url: 'https://www.w3.org/P3P/validator.html'
1824
+ producer:
1825
+ name: 'W3C'
1826
+ url: 'https://www.w3.org'
1827
+
1702
1828
  - regex: 'Wappalyzer'
1703
1829
  name: 'Wappalyzer'
1704
1830
  url: 'https://github.com/AliasIO/Wappalyzer'
@@ -1735,6 +1861,22 @@
1735
1861
  name: 'WebSitePulse'
1736
1862
  url: 'http://www.websitepulse.com/'
1737
1863
 
1864
+ - regex: 'WordPress.+isitwp\.com'
1865
+ name: 'IsItWP'
1866
+ category: 'Crawler'
1867
+ url: 'https://www.isitwp.com/'
1868
+ producer:
1869
+ name: 'WPBeginner, LLC'
1870
+ url: 'https://www.wpbeginner.com/'
1871
+
1872
+ - regex: 'Automattic Analytics Crawler/[\d.]+'
1873
+ name: 'Automattic Analytics'
1874
+ category: 'Crawler'
1875
+ url: 'https://wordpress.com/crawler/'
1876
+ producer:
1877
+ name: 'Wordpress.org'
1878
+ url: 'https://wordpress.org/'
1879
+
1738
1880
  - regex: 'WordPress'
1739
1881
  name: 'WordPress'
1740
1882
  category: 'Service Agent'
@@ -1815,13 +1957,29 @@
1815
1957
  name: 'Yahoo! Japan Corp.'
1816
1958
  url: 'https://www.yahoo.co.jp/'
1817
1959
 
1818
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1960
+ - regex: 'Y!J-ASR'
1961
+ name: 'Yahoo! Japan ASR'
1962
+ category: 'Crawler'
1963
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1964
+ producer:
1965
+ name: 'Yahoo! Japan Corp.'
1966
+ url: 'https://www.yahoo.co.jp/'
1967
+
1968
+ - regex: '^Y!J'
1969
+ name: 'Yahoo! Japan'
1970
+ category: 'Crawler'
1971
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1972
+ producer:
1973
+ name: 'Yahoo! Japan Corp.'
1974
+ url: 'https://www.yahoo.co.jp/'
1975
+
1976
+ - regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher'
1819
1977
  name: 'Yandex Bot'
1820
1978
  category: 'Search bot'
1821
- url: 'http://www.yandex.com/bots'
1979
+ url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html'
1822
1980
  producer:
1823
1981
  name: 'Yandex LLC'
1824
- url: 'http://company.yandex.com'
1982
+ url: 'https://yandex.com/company/'
1825
1983
 
1826
1984
  - regex: 'Yeti|NaverJapan|AdsBot-Naver'
1827
1985
  name: 'Yeti/Naverbot'
@@ -1881,7 +2039,7 @@
1881
2039
  name: 'Yottaa'
1882
2040
  url: 'http://www.yottaa.com/'
1883
2041
 
1884
- - regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857.*'
2042
+ - regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857'
1885
2043
  name: 'Yahoo Gemini'
1886
2044
  category: 'Crawler'
1887
2045
  url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
@@ -1905,7 +2063,7 @@
1905
2063
  name: 'HubPages, Inc.'
1906
2064
  url: 'https://discover.hubpages.com/'
1907
2065
 
1908
- - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
2066
+ - regex: 'Pinterest(?:bot)?/[\d.]+.*www\.pinterest\.com'
1909
2067
  name: 'Pinterest'
1910
2068
  url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1911
2069
  category: 'Crawler'
@@ -1913,7 +2071,7 @@
1913
2071
  name: 'Pinterest'
1914
2072
  url: 'https://www.pinterest.com/'
1915
2073
 
1916
- - regex: 'Site24x7'
2074
+ - regex: '.*Site24x7'
1917
2075
  name: 'Site24x7 Website Monitoring'
1918
2076
  category: 'Site Monitor'
1919
2077
  url: 'https://www.site24x7.com/site24x7-faq.html'
@@ -1921,6 +2079,14 @@
1921
2079
  name: 'Site24x7'
1922
2080
  url: 'https://www.site24x7.com'
1923
2081
 
2082
+ - regex: '.* HLB/[\d.]+'
2083
+ name: 'Site24x7 Defacement Monitor'
2084
+ category: 'Site Monitor'
2085
+ url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor'
2086
+ producer:
2087
+ name: 'Site24x7'
2088
+ url: 'https://www.site24x7.com/'
2089
+
1924
2090
  - regex: 's~snapchat-proxy'
1925
2091
  name: 'Snapchat Proxy'
1926
2092
  category: 'Crawler'
@@ -1937,6 +2103,14 @@
1937
2103
  name: 'Snapchat Inc.'
1938
2104
  url: 'https://www.snapchat.com/'
1939
2105
 
2106
+ - regex: 'SnapchatAds/[\d.]+'
2107
+ name: 'Snapchat Ads'
2108
+ category: 'Crawler'
2109
+ url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US'
2110
+ producer:
2111
+ name: 'Snapchat Inc.'
2112
+ url: 'https://www.snapchat.com/'
2113
+
1940
2114
  - regex: "Let's Encrypt validation server"
1941
2115
  name: "Let's Encrypt Validation"
1942
2116
  category: 'Service Agent'
@@ -2029,22 +2203,19 @@
2029
2203
  - regex: 'AdMantX.*admantx\.com'
2030
2204
  name: 'ADMantX'
2031
2205
 
2032
- - regex: 'Server Density Service Monitoring.*'
2206
+ - regex: 'Server Density Service Monitoring'
2033
2207
  name: 'Server Density'
2034
2208
 
2035
2209
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
2036
2210
  name: 'RSSRadio Bot'
2037
2211
 
2038
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent)'
2039
- name: 'Generic Bot'
2040
-
2041
2212
  - regex: '^sentry'
2042
2213
  name: 'Sentry Bot'
2043
2214
  producer:
2044
2215
  name: 'Sentry'
2045
2216
  url: 'https://sentry.io'
2046
2217
 
2047
- - regex: '^Spotify/(\d+[\.\d]+)$'
2218
+ - regex: '^Spotify/[\d.]+$'
2048
2219
  name: 'Spotify'
2049
2220
  producer:
2050
2221
  name: 'Spotify'
@@ -2102,14 +2273,6 @@
2102
2273
  name: 'Siteimprove GmbH'
2103
2274
  url: 'https://siteimprove.com/'
2104
2275
 
2105
- - regex: 'Image size by Siteimprove\.com'
2106
- name: 'Siteimprove'
2107
- category: 'Search bot'
2108
- url: 'https://siteimprove.com/'
2109
- producer:
2110
- name: 'Siteimprove GmbH'
2111
- url: 'https://siteimprove.com/'
2112
-
2113
2276
  - regex: 'CATExplorador'
2114
2277
  name: 'CATExplorador'
2115
2278
  category: 'Search bot'
@@ -2134,7 +2297,7 @@
2134
2297
  name: 'Idee Inc.'
2135
2298
  url: 'http://ideeinc.com/'
2136
2299
 
2137
- - regex: 'zelist.ro feed parser'
2300
+ - regex: 'zelist\.ro feed parser'
2138
2301
  name: 'Ze List'
2139
2302
  url: 'https://www.zelist.ro/'
2140
2303
  category: 'Feed Fetcher'
@@ -2182,21 +2345,21 @@
2182
2345
  name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
2183
2346
  url: 'https://www.wikido.com/'
2184
2347
 
2185
- - regex: 'AwarioSmartBot'
2348
+ - regex: 'Awario(?:Smart)?Bot'
2186
2349
  name: 'Awario'
2187
2350
  category: 'Search bot'
2188
2351
  url: 'https://awario.com/bots.html'
2189
2352
  producer:
2190
- name: 'Awario'
2191
- url: 'https://awario.com/'
2353
+ name: 'TechFusion Ltd.'
2354
+ url: 'https://www.techfusion.com.cy/'
2192
2355
 
2193
2356
  - regex: 'AwarioRssBot'
2194
2357
  name: 'Awario'
2195
2358
  category: 'Feed Fetcher'
2196
2359
  url: 'https://awario.com/bots.html'
2197
2360
  producer:
2198
- name: 'Awario'
2199
- url: 'https://awario.com/'
2361
+ name: 'TechFusion Ltd.'
2362
+ url: 'https://www.techfusion.com.cy/'
2200
2363
 
2201
2364
  - regex: 'oBot'
2202
2365
  name: 'oBot'
@@ -2288,7 +2451,7 @@
2288
2451
  category: 'Crawler'
2289
2452
  url: 'https://serendeputy.com/about/serendeputy-bot'
2290
2453
 
2291
- - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
2454
+ - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher'
2292
2455
  name: 'ADmantX Service Fetcher'
2293
2456
  category: 'Service bot'
2294
2457
  url: 'https://www.admantx.com/service-fetcher.html'
@@ -2324,7 +2487,7 @@
2324
2487
  name: 'PPC Labs LLC'
2325
2488
  url: 'https://www.adbeat.com/'
2326
2489
 
2327
- - regex: 'BW/(?:(\d+[\.\d]+))'
2490
+ - regex: '(?:BuiltWith|BW)/[\d.]+'
2328
2491
  name: 'BuiltWith'
2329
2492
  category: 'Crawler'
2330
2493
  url: 'https://builtwith.com/biup'
@@ -2332,7 +2495,7 @@
2332
2495
  name: 'BuiltWith Pty Ltd'
2333
2496
  url: 'https://builtwith.com/'
2334
2497
 
2335
- - regex: 'https://whatis.contentkingapp.com'
2498
+ - regex: 'https://whatis\.contentkingapp\.com'
2336
2499
  name: 'ContentKing'
2337
2500
  category: 'Site Monitor'
2338
2501
  url: 'https://whatis.contentkingapp.com/'
@@ -2348,7 +2511,7 @@
2348
2511
  name: 'MicroAd, Inc.'
2349
2512
  url: 'https://www.microad.co.jp/'
2350
2513
 
2351
- - regex: 'PingAdmin.Ru'
2514
+ - regex: 'PingAdmin\.Ru'
2352
2515
  name: 'PingAdmin.Ru'
2353
2516
  category: 'Site Monitor'
2354
2517
  url: 'https://ping-admin.ru/'
@@ -2366,7 +2529,7 @@
2366
2529
  name: 'WebTehRazrabotka LLC'
2367
2530
  url: 'https://webdatastats.com/'
2368
2531
 
2369
- - regex: 'parse.ly scraper'
2532
+ - regex: 'parse\.ly scraper'
2370
2533
  name: 'parse.ly'
2371
2534
  category: 'Crawler'
2372
2535
  url: 'https://www.parse.ly/help/integration/crawler'
@@ -2379,7 +2542,7 @@
2379
2542
  category: 'Site Monitor'
2380
2543
  url: 'http://cloudsystemnetworks.com'
2381
2544
 
2382
- - regex: 'HeartRails_Capture/\d'
2545
+ - regex: 'HeartRails_Capture/[\d.]+'
2383
2546
  name: 'Heart Rails Capture'
2384
2547
  category: 'Service Agent'
2385
2548
  url: 'http://capture.heartrails.com'
@@ -2387,9 +2550,12 @@
2387
2550
  - regex: 'Project-Resonance'
2388
2551
  name: 'Project Resonance'
2389
2552
  category: 'Crawler'
2390
- url: 'http://project-resonance.com'
2553
+ url: 'https://project-resonance.com/'
2554
+ producer:
2555
+ name: 'RedHunt Labs Limited'
2556
+ url: 'https://redhuntlabs.com/'
2391
2557
 
2392
- - regex: 'DataXu/\d'
2558
+ - regex: 'DataXu/[\d.]+'
2393
2559
  name: 'DataXu'
2394
2560
  category: 'Service Agent'
2395
2561
  url: 'https://advertising.roku.com/dataxu'
@@ -2426,7 +2592,7 @@
2426
2592
  category: 'Crawler'
2427
2593
  url: 'http://www.webtop.com/'
2428
2594
 
2429
- - regex: 'PageThing.com'
2595
+ - regex: 'PageThing\.com'
2430
2596
  name: 'PageThing'
2431
2597
  category: 'Crawler'
2432
2598
  url: 'https://www.pagething.com/'
@@ -2471,10 +2637,18 @@
2471
2637
  url: 'https://github.com/projectdiscovery/httpx'
2472
2638
  category: 'Crawler'
2473
2639
  producer:
2474
- name: ''
2475
- url: ''
2640
+ name: 'ProjectDiscovery, Inc.'
2641
+ url: 'https://projectdiscovery.io/'
2642
+
2643
+ - regex: '.*\.oast\.'
2644
+ name: 'Interactsh'
2645
+ category: 'Security Checker'
2646
+ url: 'https://github.com/projectdiscovery/interactsh'
2647
+ producer:
2648
+ name: 'ProjectDiscovery, Inc.'
2649
+ url: 'https://projectdiscovery.io/'
2476
2650
 
2477
- - regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
2651
+ - regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com'
2478
2652
  name: 'Expanse'
2479
2653
  category: 'Security Checker'
2480
2654
  url: 'https://expanse.co/'
@@ -2505,12 +2679,12 @@
2505
2679
  name: 'Hatena Co., Ltd.'
2506
2680
  url: 'https://www.hatena.ne.jp'
2507
2681
 
2508
- - regex: 'RyowlEngine/(\d+)'
2682
+ - regex: 'RyowlEngine/[\d.]+'
2509
2683
  name: 'Ryowl'
2510
2684
  category: 'Crawler'
2511
2685
  url: 'https://ryowl.org'
2512
2686
 
2513
- - regex: 'OdklBot/(\d+)'
2687
+ - regex: 'OdklBot/[\d.]+'
2514
2688
  name: 'Odnoklassniki Bot'
2515
2689
  category: 'Crawler'
2516
2690
  url: 'https://odnoklassniki.ru'
@@ -2525,7 +2699,7 @@
2525
2699
  category: 'Crawler'
2526
2700
  url: 'https://www.zoominfo.com'
2527
2701
 
2528
- - regex: 'WeViKaBot/([\d+\.])'
2702
+ - regex: 'WeViKaBot/[\d.]+'
2529
2703
  name: 'WeViKaBot'
2530
2704
  category: 'Crawler'
2531
2705
  url: 'http://www.wevika.de'
@@ -2535,7 +2709,7 @@
2535
2709
  category: 'Crawler'
2536
2710
  url: 'https://www.seokicks.de/robot.html'
2537
2711
 
2538
- - regex: 'Plukkie/([\d+\.])'
2712
+ - regex: 'Plukkie/[\d.]+'
2539
2713
  name: 'Plukkie'
2540
2714
  category: 'Crawler'
2541
2715
  url: 'http://www.botje.com/plukkie.htm'
@@ -2545,22 +2719,22 @@
2545
2719
  category: 'Crawler'
2546
2720
  url: 'https://www.comscore.com/Web-Crawler'
2547
2721
 
2548
- - regex: 'SurdotlyBot/([\d+\.])'
2722
+ - regex: 'SurdotlyBot/[\d.]+'
2549
2723
  name: 'SurdotlyBot'
2550
2724
  category: 'Crawler'
2551
2725
  url: 'http://sur.ly/bot.html'
2552
2726
 
2553
- - regex: 'Gowikibot/([\d+\.])'
2727
+ - regex: 'Gowikibot/[\d.]+'
2554
2728
  name: 'Gowikibot'
2555
2729
  category: 'Crawler'
2556
2730
  url: 'http:/www.gowikibot.com'
2557
2731
 
2558
- - regex: 'SabsimBot/([\d+\.])'
2732
+ - regex: 'SabsimBot/[\d.]+'
2559
2733
  name: 'SabsimBot'
2560
2734
  category: 'Crawler'
2561
2735
  url: 'https://sabsim.com'
2562
2736
 
2563
- - regex: 'LumtelBot/([\d+\.])'
2737
+ - regex: 'LumtelBot/[\d.]+'
2564
2738
  name: 'LumtelBot'
2565
2739
  category: 'Crawler'
2566
2740
  url: 'https://umtel.com'
@@ -2570,12 +2744,12 @@
2570
2744
  category: 'Crawler'
2571
2745
  url: 'http://www.pipl.com/bot'
2572
2746
 
2573
- - regex: 'woobot/([\d+\.])'
2747
+ - regex: 'woobot/[\d.]+'
2574
2748
  name: 'WooRank'
2575
2749
  category: 'Crawler'
2576
2750
  url: 'https://www.woorank.com/bot'
2577
2751
 
2578
- - regex: 'Cookiebot/([\d+\.])'
2752
+ - regex: 'Cookiebot/[\d.]+'
2579
2753
  name: 'Cookiebot'
2580
2754
  category: 'Crawler'
2581
2755
  url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
@@ -2591,7 +2765,7 @@
2591
2765
  name: 'NET SYSTEMS RESEARCH LLC'
2592
2766
  url: 'https://www.netsystemsresearch.com/'
2593
2767
 
2594
- - regex: 'CensysInspect/([\d+\.])'
2768
+ - regex: 'CensysInspect/[\d.]+'
2595
2769
  name: 'CensysInspect'
2596
2770
  category: 'Security Checker'
2597
2771
  url: 'https://about.censys.io/'
@@ -2599,7 +2773,7 @@
2599
2773
  name: 'Censys, Inc.'
2600
2774
  url: 'https://censys.io/'
2601
2775
 
2602
- - regex: 'gdnplus.com'
2776
+ - regex: 'gdnplus\.com'
2603
2777
  name: 'GDNP'
2604
2778
  category: 'Crawler'
2605
2779
  url: 'https://gdnplus.com/'
@@ -2607,17 +2781,17 @@
2607
2781
  name: 'Global Digital Network Plus, LLC'
2608
2782
  url: 'https://gdnplus.com/'
2609
2783
 
2610
- - regex: 'WellKnownBot/([\d+\.])'
2784
+ - regex: 'WellKnownBot/[\d.]+'
2611
2785
  name: 'WellKnownBot'
2612
2786
  category: 'Crawler'
2613
2787
  url: 'https://well-known.dev'
2614
2788
 
2615
- - regex: 'Adsbot/([\d+\.])'
2789
+ - regex: 'Adsbot/[\d.]+'
2616
2790
  name: 'Adsbot'
2617
2791
  category: 'Crawler'
2618
2792
  url: 'https://seostar.co/robot/'
2619
2793
 
2620
- - regex: 'MTRobot/([\d+\.])'
2794
+ - regex: 'MTRobot/[\d.]+'
2621
2795
  name: 'MTRobot'
2622
2796
  category: 'Crawler'
2623
2797
  url: 'https://metrics-tools.de/robot.html'
@@ -2625,7 +2799,7 @@
2625
2799
  name: 'Metrics Tools'
2626
2800
  url: 'https://metrics-tools.de/'
2627
2801
 
2628
- - regex: 'serpstatbot/([\d+\.])'
2802
+ - regex: 'serpstatbot/[\d.]+'
2629
2803
  name: 'serpstatbot'
2630
2804
  category: 'Crawler'
2631
2805
  url: 'http://serpstatbot.com/'
@@ -2638,17 +2812,17 @@
2638
2812
  category: 'Crawler'
2639
2813
  url: 'https://github.com/gocolly/colly/'
2640
2814
 
2641
- - regex: 'l9tcpid/v([\d+\.])'
2815
+ - regex: 'l9tcpid/v[\d.]+'
2642
2816
  name: 'l9tcpid'
2643
2817
  category: 'Security Checker'
2644
2818
  url: 'https://github.com/LeakIX/l9tcpid'
2645
2819
 
2646
- - regex: 'l9explore/([\d+\.])'
2820
+ - regex: 'l9explore/[\d.]+'
2647
2821
  name: 'l9explore'
2648
2822
  category: 'Security Checker'
2649
2823
  url: 'https://github.com/LeakIX/l9explore'
2650
2824
 
2651
- - regex: 'l9scan/|^Lkx-(.*)/([\d+.]+)'
2825
+ - regex: 'l9scan/|^Lkx-.*/[\d.]+'
2652
2826
  name: 'LeakIX'
2653
2827
  category: 'Security Checker'
2654
2828
  url: 'https://leakix.net/'
@@ -2656,7 +2830,7 @@
2656
2830
  name: 'BaDaaS SRL'
2657
2831
  url: 'https://leakix.net/'
2658
2832
 
2659
- - regex: 'MegaIndex.ru/([\d+\.])'
2833
+ - regex: 'MegaIndex\.ru/[\d.]+'
2660
2834
  name: 'MegaIndex'
2661
2835
  category: 'Crawler'
2662
2836
  url: 'https://megaindex.com/crawler'
@@ -2664,17 +2838,17 @@
2664
2838
  - regex: 'Seekport'
2665
2839
  name: 'Seekport'
2666
2840
  category: 'Crawler'
2667
- url: 'http://www.seekport.com/'
2841
+ url: 'https://bot.seekport.com/'
2668
2842
  producer:
2669
2843
  name: 'SISTRIX GmbH'
2670
2844
  url: 'https://www.sistrix.de/'
2671
2845
 
2672
- - regex: 'seolyt/([\d+\.])'
2846
+ - regex: 'seolyt/[\d.]+'
2673
2847
  name: 'seolyt'
2674
2848
  category: 'Crawler'
2675
2849
  url: 'https://seolyt.com/'
2676
2850
 
2677
- - regex: 'YaK/([\d+\.])'
2851
+ - regex: 'YaK/[\d.]+'
2678
2852
  name: 'YaK'
2679
2853
  category: 'Crawler'
2680
2854
  url: 'https://www.linkfluence.com/'
@@ -2682,7 +2856,7 @@
2682
2856
  name: 'Linkfluence SAS'
2683
2857
  url: 'https://www.linkfluence.com/'
2684
2858
 
2685
- - regex: 'KomodiaBot/([\d+\.])'
2859
+ - regex: 'KomodiaBot/[\d.]+'
2686
2860
  name: 'KomodiaBot'
2687
2861
  category: 'Crawler'
2688
2862
  url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
@@ -2690,7 +2864,7 @@
2690
2864
  name: 'Komodia Inc.'
2691
2865
  url: 'https://www.komodia.com/'
2692
2866
 
2693
- - regex: 'Neevabot/([\d+\.])'
2867
+ - regex: 'Neevabot/[\d.]+'
2694
2868
  name: 'Neevabot'
2695
2869
  category: 'Search bot'
2696
2870
  url: 'https://neeva.com/neevabot'
@@ -2698,17 +2872,17 @@
2698
2872
  name: 'Neeva Inc.'
2699
2873
  url: 'https://neeva.com/'
2700
2874
 
2701
- - regex: 'LinkPreview/([\d+\.])'
2875
+ - regex: 'LinkPreview/[\d.]+'
2702
2876
  name: 'LinkPreview'
2703
2877
  category: 'Service Agent'
2704
2878
  url: 'https://www.linkpreview.net/'
2705
2879
 
2706
- - regex: 'JungleKeyThumbnail/([\d+\.])'
2880
+ - regex: 'JungleKeyThumbnail/[\d.]+'
2707
2881
  name: 'JungleKeyThumbnail'
2708
2882
  category: 'Crawler'
2709
2883
  url: 'https://junglekey.com/'
2710
2884
 
2711
- - regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2885
+ - regex: 'rocketmonitor(?: |bot/)[\d.]+'
2712
2886
  name: 'RocketMonitorBot'
2713
2887
  category: 'Site Monitor'
2714
2888
  url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
@@ -2716,7 +2890,7 @@
2716
2890
  name: 'Radio Mast, Inc.'
2717
2891
  url: 'https://www.radiomast.io/'
2718
2892
 
2719
- - regex: 'SitemapParser-VIPnytt/([\d+\.])'
2893
+ - regex: 'SitemapParser-VIPnytt/[\d.]+'
2720
2894
  name: 'SitemapParser-VIPnytt'
2721
2895
  category: 'Crawler'
2722
2896
  url: 'https://github.com/VIPnytt/SitemapParser/'
@@ -2726,7 +2900,7 @@
2726
2900
  category: 'Crawler'
2727
2901
  url: 'https://turnitin.com/robot/crawlerinfo.html'
2728
2902
 
2729
- - regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2903
+ - regex: 'DMBrowser/[\d.]+|DMBrowser-[UB]V'
2730
2904
  name: 'Dotcom Monitor'
2731
2905
  category: 'Site Monitor'
2732
2906
  url: 'https://www.dotcom-monitor.com'
@@ -2740,17 +2914,17 @@
2740
2914
  category: 'Crawler'
2741
2915
  url: 'https://dataforseo.com/dataforseo-bot'
2742
2916
 
2743
- - regex: 'Discordbot/([\d+.]+)'
2917
+ - regex: 'Discordbot/[\d.]+'
2744
2918
  name: 'Discord Bot'
2745
2919
  category: 'Service Agent'
2746
2920
  url: 'https://discordapp.com'
2747
2921
 
2748
- - regex: 'Linespider/([\d+.]+)'
2922
+ - regex: 'Linespider/[\d.]+'
2749
2923
  name: 'Linespider'
2750
2924
  category: 'Crawler'
2751
2925
  url: 'https://lin.ee/4dwXkTH'
2752
2926
 
2753
- - regex: 'Cincraw/([\d+.]+)'
2927
+ - regex: 'Cincraw/[\d.]+'
2754
2928
  name: 'Cincraw'
2755
2929
  category: 'Crawler'
2756
2930
  url: 'http://cincrawdata.net/bot/'
@@ -2776,7 +2950,7 @@
2776
2950
  category: 'Crawler'
2777
2951
  url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
2778
2952
 
2779
- - regex: 'https://securitytxt-scan.cs.hm.edu/'
2953
+ - regex: 'https://securitytxt-scan\.cs\.hm\.edu/'
2780
2954
  name: 'security.txt scanserver'
2781
2955
  category: 'Security Checker'
2782
2956
  url: 'https://securitytxt-scan.cs.hm.edu/'
@@ -2784,17 +2958,17 @@
2784
2958
  name: 'Hochschule für angewandte Wissenschaften München'
2785
2959
  url: 'https://www.hm.edu/'
2786
2960
 
2787
- - regex: 'TigerBot/([\d+.]+)'
2961
+ - regex: 'TigerBot/[\d.]+'
2788
2962
  name: 'TigerBot'
2789
2963
  category: 'Crawler'
2790
2964
  url: 'https://tiger.ch/'
2791
2965
 
2792
- - regex: 'TestCrawler/([\d+.]+)'
2966
+ - regex: 'TestCrawler/[\d.]+'
2793
2967
  name: 'TestCrawler'
2794
2968
  category: 'Crawler'
2795
2969
  url: 'https://www.comcepta.com/'
2796
2970
 
2797
- - regex: 'CrowdTanglebot/([\d+.]+)'
2971
+ - regex: 'CrowdTanglebot/[\d.]+'
2798
2972
  name: 'CrowdTangle'
2799
2973
  category: 'Crawler'
2800
2974
  url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
@@ -2802,7 +2976,7 @@
2802
2976
  name: 'CrowdTangle, Inc.'
2803
2977
  url: 'https://www.crowdtangle.com/'
2804
2978
 
2805
- - regex: 'Sellers.Guide Crawler by Primis'
2979
+ - regex: 'Sellers\.Guide Crawler by Primis'
2806
2980
  name: 'Sellers.Guide'
2807
2981
  category: 'Crawler'
2808
2982
  url: 'https://sellers.guide/'
@@ -2826,7 +3000,7 @@
2826
3000
  name: 'deepnoc, GmbH'
2827
3001
  url: 'https://deepnoc.com/'
2828
3002
 
2829
- - regex: 'Newslitbot/([\d+.]+)'
3003
+ - regex: 'Newslitbot/[\d.]+'
2830
3004
  name: 'Newslitbot'
2831
3005
  category: 'Crawler'
2832
3006
  url: 'https://www.newslit.co/'
@@ -2834,7 +3008,7 @@
2834
3008
  name: 'Newslit, LLC.'
2835
3009
  url: 'https://www.newslit.co/'
2836
3010
 
2837
- - regex: 'um-LN/([\d+.]+)'
3011
+ - regex: 'um-LN/[\d.]+'
2838
3012
  name: 'uMBot'
2839
3013
  category: 'Crawler'
2840
3014
  url: 'https://www.ubermetrics-technologies.com/'
@@ -2842,12 +3016,12 @@
2842
3016
  name: 'Ubermetrics Technologies GmbH'
2843
3017
  url: 'https://www.ubermetrics-technologies.com/'
2844
3018
 
2845
- - regex: 'Abonti/([\d+.]+)'
3019
+ - regex: 'Abonti/[\d.]+'
2846
3020
  name: 'Abonti'
2847
3021
  category: 'Crawler'
2848
3022
  url: 'http://abonti.com/'
2849
3023
 
2850
- - regex: 'collection@infegy.com'
3024
+ - regex: 'collection@infegy\.com'
2851
3025
  name: 'Infegy'
2852
3026
  category: 'Crawler'
2853
3027
  url: 'https://infegy.com/'
@@ -2855,7 +3029,7 @@
2855
3029
  name: 'Infegy, Inc.'
2856
3030
  url: 'https://infegy.com/'
2857
3031
 
2858
- - regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
3032
+ - regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)'
2859
3033
  name: 'IPIP'
2860
3034
  category: 'Security Checker'
2861
3035
  url: 'https://security.ipip.net/'
@@ -2863,7 +3037,7 @@
2863
3037
  name: 'Beijing Tiantexin Tech. Co., Ltd.'
2864
3038
  url: 'https://en.ipip.net/'
2865
3039
 
2866
- - regex: 'ev-crawler/([\d+.]+)'
3040
+ - regex: 'ev-crawler/[\d.]+'
2867
3041
  name: 'Headline'
2868
3042
  category: 'Crawler'
2869
3043
  url: 'https://headline.com/legal/crawler'
@@ -2871,7 +3045,7 @@
2871
3045
  name: 'e.ventures Managementgesellschaft mbH'
2872
3046
  url: 'https://headline.com/'
2873
3047
 
2874
- - regex: 'webprosbot/([\d+.]+)'
3048
+ - regex: 'webprosbot/[\d.]+'
2875
3049
  name: 'WebPros'
2876
3050
  category: 'Crawler'
2877
3051
  url: 'https://webpros.com/'
@@ -2887,7 +3061,7 @@
2887
3061
  name: 'Amazon.com, Inc.'
2888
3062
  url: 'https://www.amazon.com/'
2889
3063
 
2890
- - regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
3064
+ - regex: 'Wheregoes\.com Redirect Checker/[\d.]+'
2891
3065
  name: 'WhereGoes'
2892
3066
  category: 'Crawler'
2893
3067
  url: 'https://wheregoes.com/'
@@ -2897,12 +3071,12 @@
2897
3071
  category: 'Crawler'
2898
3072
  url: 'http://66.240.192.82/'
2899
3073
 
2900
- - regex: 'InternetMeasurement/([\d+.]+)'
3074
+ - regex: 'InternetMeasurement/[\d.]+'
2901
3075
  name: 'InternetMeasurement'
2902
3076
  category: 'Crawler'
2903
3077
  url: 'https://internet-measurement.com/'
2904
3078
 
2905
- - regex: 'DomainAppender /([\d+.]+)'
3079
+ - regex: 'DomainAppender /[\d.]+'
2906
3080
  name: 'DomainAppender'
2907
3081
  category: 'Crawler'
2908
3082
  url: 'https://www.profound.net/product/domain_append/'
@@ -2910,7 +3084,7 @@
2910
3084
  name: 'Profound Networks, LLC'
2911
3085
  url: 'https://www.profound.net/'
2912
3086
 
2913
- - regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
3087
+ - regex: 'FreeWebMonitoring SiteChecker/[\d.]+'
2914
3088
  name: 'FreeWebMonitoring'
2915
3089
  category: 'Site Monitor'
2916
3090
  url: 'https://www.freewebmonitoring.com/bot.html'
@@ -2926,7 +3100,7 @@
2926
3100
  name: 'Valley Hosting, LLC'
2927
3101
  url: 'https://www.pagemodified.com/'
2928
3102
 
2929
- - regex: 'adstxtlab.com'
3103
+ - regex: 'adstxtlab\.com'
2930
3104
  name: 'adstxtlab.com'
2931
3105
  category: 'Crawler'
2932
3106
  url: 'https://adstxtlab.com/validator.php'
@@ -2934,7 +3108,7 @@
2934
3108
  name: 'Jaohawi AB'
2935
3109
  url: 'https://adstxtlab.com/'
2936
3110
 
2937
- - regex: 'Iframely/([\d+.]+)'
3111
+ - regex: 'Iframely/[\d.]+'
2938
3112
  name: 'Iframely'
2939
3113
  category: 'Crawler'
2940
3114
  url: 'https://iframely.com/'
@@ -2942,7 +3116,7 @@
2942
3116
  name: 'Itteco Software, Corp.'
2943
3117
  url: 'https://iframely.com/'
2944
3118
 
2945
- - regex: 'DomainStatsBot/([\d+.]+)'
3119
+ - regex: 'DomainStatsBot/[\d.]+'
2946
3120
  name: 'DomainStatsBot'
2947
3121
  category: 'Crawler'
2948
3122
  url: 'https://domainstats.com/pages/our-bot'
@@ -2950,7 +3124,7 @@
2950
3124
  name: 'Domainstats Ltd'
2951
3125
  url: 'https://domainstats.com/'
2952
3126
 
2953
- - regex: 'aiHitBot/([\d+.]+)'
3127
+ - regex: 'aiHitBot/[\d.]+'
2954
3128
  name: 'aiHitBot'
2955
3129
  category: 'Crawler'
2956
3130
  url: 'https://www.aihitdata.com/about'
@@ -2968,12 +3142,12 @@
2968
3142
  name: 'GitCrawlerBot'
2969
3143
  category: 'Crawler'
2970
3144
 
2971
- - regex: 'AdAuth/([\d+.]+)'
3145
+ - regex: 'AdAuth/[\d.]+'
2972
3146
  name: 'AdAuth'
2973
3147
  category: 'Crawler'
2974
3148
  url: 'https://www.adauth.com'
2975
3149
 
2976
- - regex: 'faveeo.com'
3150
+ - regex: 'faveeo\.com'
2977
3151
  name: 'Faveeo'
2978
3152
  category: 'Crawler'
2979
3153
  url: 'http://www.faveeo.com'
@@ -3004,7 +3178,7 @@
3004
3178
  name: 'Jožef Stefan Institute'
3005
3179
  url: 'https://www.ijs.si/ijsw/JSI'
3006
3180
 
3007
- - regex: 'dnt-policy@eff.org'
3181
+ - regex: 'dnt-policy@eff\.org'
3008
3182
  name: 'EFF Do Not Track Verifier'
3009
3183
  category: 'Crawler'
3010
3184
  url: 'https://www.eff.org/issues/do-not-track'
@@ -3028,7 +3202,7 @@
3028
3202
  name: 'Swoppen Systems GmbH'
3029
3203
  url: 'https://www.swoppen.com/de'
3030
3204
 
3031
- - regex: 'ScamadviserExternalHit/([\d+.]+)'
3205
+ - regex: 'ScamadviserExternalHit/[\d.]+'
3032
3206
  name: 'Scamadviser External Hit'
3033
3207
  category: 'Crawler'
3034
3208
  url: 'https://www.scamadviser.com/'
@@ -3041,20 +3215,20 @@
3041
3215
  category: 'Crawler'
3042
3216
  url: 'https://www.zaldamo.com/search.html'
3043
3217
  producer:
3044
- name: 'Project Orlando, LLC.'
3045
- url: 'https://www.projectorlando.com/'
3218
+ name: 'Zaldamo, LLC.'
3219
+ url: 'https://www.zaldamo.com/'
3046
3220
 
3047
- - regex: 'AFB/([\d+.]+)'
3221
+ - regex: 'AFB/[\d.]+'
3048
3222
  name: 'Allloadin Favicon Bot'
3049
3223
  category: 'Crawler'
3050
3224
  url: 'https://allloadin.com/'
3051
3225
 
3052
- - regex: 'SeolytBot/([\d+.]+)'
3226
+ - regex: 'SeolytBot/[\d.]+'
3053
3227
  name: 'Seolyt Bot'
3054
3228
  category: 'Crawler'
3055
3229
  url: 'https://seolyt.com'
3056
3230
 
3057
- - regex: 'LinkWalker/([\d+.]+)'
3231
+ - regex: 'LinkWalker/[\d.]+'
3058
3232
  name: 'LinkWalker'
3059
3233
  category: 'Crawler'
3060
3234
  url: 'https://www.phishlabs.com/'
@@ -3062,7 +3236,7 @@
3062
3236
  name: 'PhishLabs, Inc.'
3063
3237
  url: 'https://www.phishlabs.com/'
3064
3238
 
3065
- - regex: 'RenovateBot/([\d+.]+)'
3239
+ - regex: 'RenovateBot/[\d.]+'
3066
3240
  name: 'RenovateBot'
3067
3241
  category: 'Security Checker'
3068
3242
  url: 'https://github.com/renovatebot/renovate'
@@ -3070,7 +3244,7 @@
3070
3244
  name: 'White Source Ltd.'
3071
3245
  url: 'https://www.mend.io/free-developer-tools/renovate/'
3072
3246
 
3073
- - regex: 'INETDEX-BOT/([\d+.]+)'
3247
+ - regex: 'INETDEX-BOT/[\d.]+'
3074
3248
  name: 'Inetdex Bot'
3075
3249
  category: 'Crawler'
3076
3250
  url: 'https://www.inetdex.com/'
@@ -3083,15 +3257,7 @@
3083
3257
  name: 'Marc Huemer'
3084
3258
  url: 'https://www.netzzappen.com/'
3085
3259
 
3086
- - regex: 'SerpReputationManagementAgent/([\d+.]+)'
3087
- name: 'SEMrush Reputation Management'
3088
- category: 'Service Agent'
3089
- url: 'https://www.semrush.com/bot/'
3090
- producer:
3091
- name: 'SEMrush'
3092
- url: 'https://www.semrush.com/'
3093
-
3094
- - regex: 'panscient.com'
3260
+ - regex: 'panscient\.com'
3095
3261
  name: 'Panscient'
3096
3262
  category: 'Crawler'
3097
3263
  url: 'https://www.panscient.com/faq.htm'
@@ -3099,7 +3265,7 @@
3099
3265
  name: 'Panscient, Inc.'
3100
3266
  url: 'https://www.panscient.com/'
3101
3267
 
3102
- - regex: 'research@pdrlabs.net'
3268
+ - regex: 'research@pdrlabs\.net'
3103
3269
  name: 'PDR Labs'
3104
3270
  category: 'Security Checker'
3105
3271
  url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
@@ -3107,7 +3273,7 @@
3107
3273
  name: 'PDR Labs'
3108
3274
  url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3109
3275
 
3110
- - regex: 'Nicecrawler/([\d+.]+)'
3276
+ - regex: 'Nicecrawler/[\d.]+'
3111
3277
  name: 'NiceCrawler'
3112
3278
  category: 'Crawler'
3113
3279
  url: 'https://www.nicecrawler.com/'
@@ -3115,7 +3281,7 @@
3115
3281
  name: 'Intelium Corp.'
3116
3282
  url: 'https://www.intelium.com/'
3117
3283
 
3118
- - regex: 't3versionsBot/([\d+.]+)'
3284
+ - regex: 't3versionsBot/[\d.]+'
3119
3285
  name: 't3versions'
3120
3286
  category: 'Crawler'
3121
3287
  url: 'https://www.t3versions.com/bot'
@@ -3123,7 +3289,7 @@
3123
3289
  name: 'Torben Hansen'
3124
3290
  url: 'https://www.t3versions.com/'
3125
3291
 
3126
- - regex: 'Crawlson/([\d+.]+)'
3292
+ - regex: 'Crawlson/[\d.]+'
3127
3293
  name: 'Crawlson'
3128
3294
  category: 'Crawler'
3129
3295
  url: 'https://www.crawlson.com/about'
@@ -3131,7 +3297,7 @@
3131
3297
  name: 'Crawlson'
3132
3298
  url: 'https://www.crawlson.com/'
3133
3299
 
3134
- - regex: 'tchelebi/([\d+.]+)'
3300
+ - regex: 'tchelebi/[\d.]+'
3135
3301
  name: 'tchelebi'
3136
3302
  category: 'Crawler'
3137
3303
  url: 'https://tchelebi.io/'
@@ -3147,7 +3313,7 @@
3147
3313
  name: 'New Work SE'
3148
3314
  url: 'https://www.xing.com/'
3149
3315
 
3150
- - regex: 'RepoLookoutBot/([\d+.]+)'
3316
+ - regex: 'RepoLookoutBot/v?[\d.]+'
3151
3317
  name: 'Repo Lookout'
3152
3318
  category: 'Security Checker'
3153
3319
  url: 'https://www.repo-lookout.org/'
@@ -3163,7 +3329,7 @@
3163
3329
  name: 'MAMI Project'
3164
3330
  url: 'https://mami-project.eu/'
3165
3331
 
3166
- - regex: 'everyfeed-spider/([\d+.]+)'
3332
+ - regex: 'everyfeed-spider/[\d.]+'
3167
3333
  name: 'Everyfeed'
3168
3334
  url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
3169
3335
  category: 'Feed Fetcher'
@@ -3187,7 +3353,7 @@
3187
3353
  name: ''
3188
3354
  url: ''
3189
3355
 
3190
- - regex: 'Gregarius/([\d+.]+)'
3356
+ - regex: 'Gregarius/[\d.]+'
3191
3357
  name: 'Gregarius'
3192
3358
  category: 'Feed Fetcher'
3193
3359
  url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
@@ -3203,7 +3369,7 @@
3203
3369
  name: 'Comodo Security Solutions, Inc.'
3204
3370
  url: 'https://www.comodo.com/'
3205
3371
 
3206
- - regex: 'Sectigo DCV'
3372
+ - regex: 'Sectigo DCV|acme\.sectigo\.com'
3207
3373
  name: 'Sectigo DCV'
3208
3374
  category: 'Service Agent'
3209
3375
  url: 'https://sectigo.com/'
@@ -3211,7 +3377,7 @@
3211
3377
  name: 'Sectigo Limited'
3212
3378
  url: 'https://sectigo.com/'
3213
3379
 
3214
- - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
3380
+ - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\d.]+'
3215
3381
  name: 'KlarnaBot'
3216
3382
  category: 'Crawler'
3217
3383
  url: 'https://docs.klarna.com/klarna-bot/'
@@ -3219,7 +3385,7 @@
3219
3385
  name: 'Klarna Bank AB'
3220
3386
  url: 'https://www.klarna.com/'
3221
3387
 
3222
- - regex: 'Taboolabot/([\d+.]+)'
3388
+ - regex: 'Taboolabot/[\d.]+'
3223
3389
  name: 'Taboolabot'
3224
3390
  category: 'Crawler'
3225
3391
  url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
@@ -3227,7 +3393,7 @@
3227
3393
  name: 'Taboola, Inc.'
3228
3394
  url: 'https://www.taboola.com/'
3229
3395
 
3230
- - regex: 'Asana/([\d+.]+)'
3396
+ - regex: 'Asana/[\d.]+'
3231
3397
  name: 'Asana'
3232
3398
  category: 'Crawler'
3233
3399
  url: 'https://asana.com/'
@@ -3243,7 +3409,7 @@
3243
3409
  name: 'Google Inc.'
3244
3410
  url: 'https://www.google.com/'
3245
3411
 
3246
- - regex: 'URLinspectorBot/([\d+.]+)'
3412
+ - regex: 'URLinspectorBot/[\d.]+'
3247
3413
  name: 'URLinspector'
3248
3414
  category: 'Site Monitor'
3249
3415
  url: 'https://www.urlinspector.com/bot/'
@@ -3251,7 +3417,7 @@
3251
3417
  name: 'LinkResearchTools GmbH'
3252
3418
  url: 'https://www.linkresearchtools.com/'
3253
3419
 
3254
- - regex: 'EntferBot/([\d+.]+)'
3420
+ - regex: 'EntferBot/[\d.]+'
3255
3421
  name: 'Entfer'
3256
3422
  category: 'Crawler'
3257
3423
  url: 'https://entfer.com/'
@@ -3259,7 +3425,7 @@
3259
3425
  name: 'Entfer Ltd.'
3260
3426
  url: 'https://entfer.com/'
3261
3427
 
3262
- - regex: 'TagInspector/([\d+.]+)'
3428
+ - regex: 'TagInspector/[\d.]+'
3263
3429
  name: 'Tag Inspector'
3264
3430
  category: 'Crawler'
3265
3431
  url: 'https://taginspector.com/'
@@ -3283,7 +3449,7 @@
3283
3449
  name: 'Diffbot Technologies Corp.'
3284
3450
  url: 'https://www.diffbot.com/'
3285
3451
 
3286
- - regex: 'DisqusAdstxtCrawler/([\d+.]+)'
3452
+ - regex: 'DisqusAdstxtCrawler/[\d.]+'
3287
3453
  name: 'Disqus'
3288
3454
  category: 'Crawler'
3289
3455
  url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
@@ -3291,7 +3457,7 @@
3291
3457
  name: 'Disqus, Inc.'
3292
3458
  url: 'https://disqus.com/'
3293
3459
 
3294
- - regex: 'startmebot/([\d+.]+)'
3460
+ - regex: 'startmebot/[\d.]+'
3295
3461
  name: 'start.me'
3296
3462
  category: 'Crawler'
3297
3463
  url: 'https://about.start.me/'
@@ -3299,17 +3465,17 @@
3299
3465
  name: 'start.me BV'
3300
3466
  url: 'https://about.start.me/'
3301
3467
 
3302
- - regex: '2ip bot/([\d+.]+)'
3468
+ - regex: '2ip bot/[\d.]+'
3303
3469
  name: '2ip'
3304
3470
  category: 'Crawler'
3305
3471
  url: 'https://2ip.io/'
3306
3472
 
3307
- - regex: 'ReqBin Curl Client/([\d+.]+)'
3473
+ - regex: 'ReqBin Curl Client/[\d.]+'
3308
3474
  name: 'ReqBin'
3309
3475
  category: 'Crawler'
3310
3476
  url: 'https://reqbin.com/curl'
3311
3477
 
3312
- - regex: 'XoviBot/([\d+.]+)'
3478
+ - regex: 'XoviBot/[\d.]+'
3313
3479
  name: 'XoviBot'
3314
3480
  category: 'Crawler'
3315
3481
  url: 'https://www.xovibot.net'
@@ -3317,12 +3483,12 @@
3317
3483
  name: 'Xovi GmbH'
3318
3484
  url: 'http://www.xovi.de'
3319
3485
 
3320
- - regex: 'Overcast/([\d+.]+) Podcast Sync'
3486
+ - regex: 'Overcast/[\d.]+ Podcast Sync'
3321
3487
  name: 'Overcast Podcast Sync'
3322
3488
  category: 'Service Agent'
3323
3489
  url: 'https://overcast.fm/podcasterinfo'
3324
3490
 
3325
- - regex: '^Verity/([\d+.]+)'
3491
+ - regex: '^Verity/[\d.]+'
3326
3492
  name: 'GumGum Verity'
3327
3493
  category: 'Service Agent'
3328
3494
  url: 'https://gumgum.com/verity'
@@ -3332,7 +3498,7 @@
3332
3498
  category: 'Feed Reader'
3333
3499
  url: 'https://github.com/snarfed/hackermention'
3334
3500
 
3335
- - regex: 'BitSightBot/([\d+.]+)'
3501
+ - regex: 'BitSightBot/[\d.]+'
3336
3502
  name: 'BitSight'
3337
3503
  category: 'Security Checker'
3338
3504
  url: 'https://www.bitsight.com/'
@@ -3340,12 +3506,12 @@
3340
3506
  name: 'BitSight Technologies, Inc.'
3341
3507
  url: 'https://www.bitsight.com/'
3342
3508
 
3343
- - regex: 'Ezgif/([\d+.]+)'
3509
+ - regex: 'Ezgif/[\d.]+'
3344
3510
  name: 'Ezgif'
3345
3511
  category: 'Service Agent'
3346
3512
  url: 'https://ezgif.com/about'
3347
3513
 
3348
- - regex: 'intelx.io_bot'
3514
+ - regex: 'intelx\.io_bot'
3349
3515
  name: 'Intelligence X'
3350
3516
  category: 'Crawler'
3351
3517
  url: 'https://intelx.io/'
@@ -3353,7 +3519,7 @@
3353
3519
  name: 'Kleissner Investments s.r.o.'
3354
3520
  url: 'https://intelx.io/'
3355
3521
 
3356
- - regex: 'FemtosearchBot/([\d+.]+)'
3522
+ - regex: 'FemtosearchBot/[\d.]+'
3357
3523
  name: 'Femtosearch'
3358
3524
  category: 'Crawler'
3359
3525
  url: 'http://femtosearch.com/'
@@ -3361,7 +3527,7 @@
3361
3527
  name: 'Grier Forensics, LLC'
3362
3528
  url: 'https://www.grierforensics.com/'
3363
3529
 
3364
- - regex: 'AdsTxtCrawler/([\d+.]+)'
3530
+ - regex: 'AdsTxtCrawler/[\d.]+'
3365
3531
  name: 'AdsTxtCrawler'
3366
3532
  category: 'Crawler'
3367
3533
  url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
@@ -3377,7 +3543,7 @@
3377
3543
  name: 'Morningscore'
3378
3544
  url: 'https://morningscore.io/'
3379
3545
 
3380
- - regex: 'Uptime-Kuma/([\d+.]+)'
3546
+ - regex: 'Uptime-Kuma/[\d.]+'
3381
3547
  name: 'Uptime-Kuma'
3382
3548
  category: 'Site Monitor'
3383
3549
  url: 'https://github.com/louislam/uptime-kuma'
@@ -3390,7 +3556,7 @@
3390
3556
  name: 'OpenAI OpCo, LLC'
3391
3557
  url: 'https://openai.com/'
3392
3558
 
3393
- - regex: 'BrightEdge Crawler/([\d+.]+)'
3559
+ - regex: 'BrightEdge Crawler/[\d.]+'
3394
3560
  name: 'BrightEdge'
3395
3561
  category: 'Crawler'
3396
3562
  url: 'https://www.brightedge.com/'
@@ -3398,12 +3564,12 @@
3398
3564
  name: 'BrightEdge Technologies, Inc'
3399
3565
  url: 'https://www.brightedge.com/'
3400
3566
 
3401
- - regex: 'sfFeedReader/([\d+.]+)'
3567
+ - regex: 'sfFeedReader/[\d.]+'
3402
3568
  name: 'sfFeedReader'
3403
3569
  url: 'https://github.com/diem-project/sfFeed2Plugin'
3404
3570
  category: 'Feed Fetcher'
3405
3571
 
3406
- - regex: 'cyberscan.io'
3572
+ - regex: 'cyberscan\.io'
3407
3573
  name: 'Cyberscan'
3408
3574
  category: 'Security Checker'
3409
3575
  url: 'https://www.cyberscan.io/'
@@ -3419,15 +3585,7 @@
3419
3585
  name: 'Lumar'
3420
3586
  url: 'https://www.lumar.io/'
3421
3587
 
3422
- - regex: 'RepoLookoutBot'
3423
- name: 'Repo Lookout'
3424
- category: 'Crawler'
3425
- url: 'https://www.repo-lookout.org/'
3426
- producer:
3427
- name: 'Crissy Field GmbH'
3428
- url: 'https://www.crissyfield.de/'
3429
-
3430
- - regex: 'researchscan.comsys.rwth-aachen.de'
3588
+ - regex: 'researchscan\.comsys\.rwth-aachen\.de'
3431
3589
  name: 'Research Scan'
3432
3590
  category: 'Crawler'
3433
3591
  url: 'http://researchscan.comsys.rwth-aachen.de/'
@@ -3435,7 +3593,7 @@
3435
3593
  name: 'RWTH Aachen University'
3436
3594
  url: 'https://www.comsys.rwth-aachen.de/'
3437
3595
 
3438
- - regex: 'newspaper/([\d+.]+)'
3596
+ - regex: 'newspaper/[\d.]+'
3439
3597
  name: 'Scraping Robot'
3440
3598
  category: 'Crawler'
3441
3599
  url: 'https://scrapingrobot.com/'
@@ -3443,7 +3601,7 @@
3443
3601
  name: 'Sprious LLC'
3444
3602
  url: 'https://sprious.com/'
3445
3603
 
3446
- - regex: 'GPTBot/([\d+.]+)'
3604
+ - regex: 'GPTBot/[\d.]+'
3447
3605
  name: 'GPTBot'
3448
3606
  category: 'Crawler'
3449
3607
  url: 'https://platform.openai.com/docs/gptbot'
@@ -3451,7 +3609,7 @@
3451
3609
  name: 'OpenAI OpCo, LLC'
3452
3610
  url: 'https://openai.com/'
3453
3611
 
3454
- - regex: 'Ant.com beta/([\d+.]+)'
3612
+ - regex: 'Ant(?:\.com beta|Bot)(?:/([\d+.]+))?'
3455
3613
  name: 'Ant'
3456
3614
  category: 'Crawler'
3457
3615
  url: 'https://www.ant.com/'
@@ -3459,7 +3617,7 @@
3459
3617
  name: 'Ant.com Ltd.'
3460
3618
  url: 'https://www.ant.com/'
3461
3619
 
3462
- - regex: 'WebwikiBot/([\d+.]+)'
3620
+ - regex: 'WebwikiBot/[\d.]+'
3463
3621
  name: 'Webwiki'
3464
3622
  category: 'Crawler'
3465
3623
  url: 'https://www.webwiki.com/'
@@ -3472,7 +3630,7 @@
3472
3630
  category: 'Service Agent'
3473
3631
  url: 'https://www.phpmyadmin.net/'
3474
3632
 
3475
- - regex: 'Matomo/([\d+.]+)'
3633
+ - regex: 'Matomo/[\d.]+'
3476
3634
  name: 'Matomo'
3477
3635
  category: 'Service Agent'
3478
3636
  url: 'https://github.com/matomo-org/matomo'
@@ -3480,7 +3638,7 @@
3480
3638
  name: 'InnoCraft Ltd'
3481
3639
  url: 'https://matomo.org/'
3482
3640
 
3483
- - regex: 'Prometheus/([\d+.]+)'
3641
+ - regex: 'Prometheus/[\d.]+'
3484
3642
  name: 'Prometheus'
3485
3643
  category: 'Service Agent'
3486
3644
  url: 'https://github.com/prometheus/prometheus'
@@ -3496,7 +3654,7 @@
3496
3654
  name: 'ArchiveTeam'
3497
3655
  url: 'https://wiki.archiveteam.org/'
3498
3656
 
3499
- - regex: 'MADBbot/([\d+.]+)'
3657
+ - regex: 'MADBbot/[\d.]+'
3500
3658
  name: 'MADBbot'
3501
3659
  category: 'Crawler'
3502
3660
  url: 'https://madb.zapto.org/bot.html'
@@ -3508,6 +3666,877 @@
3508
3666
  name: 'Meltwater Deutschland GmbH'
3509
3667
  url: 'https://www.meltwater.com/'
3510
3668
 
3669
+ - regex: '(?:Owler@ows\.eu|OWLer)/[\d.]+'
3670
+ name: 'OWLer'
3671
+ category: 'Crawler'
3672
+ url: 'https://openwebsearch.eu/owler/'
3673
+ producer:
3674
+ name: 'Open Search Foundation e.V.'
3675
+ url: 'https://openwebsearch.eu/'
3676
+
3677
+ - regex: 'bbc\.co\.uk/display/men/Page\+Monitor'
3678
+ name: 'BBC Page Monitor'
3679
+ category: 'Site Monitor'
3680
+ url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor'
3681
+ producer:
3682
+ name: 'BBC'
3683
+ url: 'https://www.bbc.com/'
3684
+
3685
+ - regex: 'BBC-Forge-URL-Monitor-Twisted'
3686
+ name: 'BBC Forge URL Monitor'
3687
+ category: 'Site Monitor'
3688
+ url: 'https://www.bbc.com/'
3689
+ producer:
3690
+ name: 'BBC'
3691
+ url: 'https://www.bbc.com/'
3692
+
3693
+ - regex: 'ClaudeBot'
3694
+ name: 'ClaudeBot'
3695
+ category: 'Crawler'
3696
+ url: 'https://github.com/ClaudeBot/ClaudeBot'
3697
+
3698
+ - regex: 'Imagesift'
3699
+ name: 'ImageSift'
3700
+ category: 'Crawler'
3701
+ url: 'https://imagesift.com/'
3702
+ producer:
3703
+ name: 'Castle Global, Inc.'
3704
+ url: 'https://thehive.ai/'
3705
+
3706
+ - regex: 'TactiScout'
3707
+ name: 'TactiScout'
3708
+ category: 'Crawler'
3709
+ url: 'https://find-it.world/TempCrawl/Crawltheque.php'
3710
+ producer:
3711
+ name: 'Tactikast'
3712
+
3713
+ - regex: 'Brightbot ([\d+.]+)'
3714
+ name: 'BrightBot'
3715
+ category: 'Crawler'
3716
+ url: 'https://www.brightbot.app/'
3717
+ producer:
3718
+ name: 'Bright Interactive Ltd'
3719
+ url: 'https://www.builtbybright.com/'
3720
+
3721
+ - regex: 'DaspeedBot/([\d+.]+)'
3722
+ name: 'DaspeedBot'
3723
+ category: 'Crawler'
3724
+ url: 'https://daspeed.io/'
3725
+ producer:
3726
+ name: 'DAWAP SARL'
3727
+ url: 'https://dawap.fr/'
3728
+
3729
+ - regex: 'StractBot(?:/([\d+.]+))?'
3730
+ name: 'Stract'
3731
+ category: 'Crawler'
3732
+ url: 'https://stract.com/webmasters'
3733
+ producer:
3734
+ name: 'Stract'
3735
+ url: 'https://github.com/StractOrg/stract/'
3736
+
3737
+ - regex: 'GeedoBot(?:/([\d+.]+))?'
3738
+ name: 'GeedoBot'
3739
+ category: 'Crawler'
3740
+ url: 'https://geedo.com/bot/'
3741
+
3742
+ - regex: 'GeedoProductSearch'
3743
+ name: 'GeedoProductSearch'
3744
+ category: 'Crawler'
3745
+ url: 'https://geedo.com/product-search/'
3746
+
3747
+ - regex: 'BackupLand(?:/([\d+.]+))?'
3748
+ name: 'BackupLand'
3749
+ category: 'Crawler'
3750
+ url: 'https://go.backupland.com/'
3751
+ producer:
3752
+ name: 'ООО «КВАРТА»'
3753
+ url: 'https://go.backupland.com/'
3754
+
3755
+ - regex: 'Konturbot(?:/([\d+.]+))?'
3756
+ name: 'Konturbot'
3757
+ category: 'Crawler'
3758
+ url: 'https://kontur.ru/'
3759
+ producer:
3760
+ name: 'АО «ПФ «СКБ Контур»'
3761
+ url: 'https://kontur.ru/'
3762
+
3763
+ - regex: 'keys-so-bot'
3764
+ name: 'Keys.so'
3765
+ category: 'Crawler'
3766
+ url: 'https://www.keys.so/'
3767
+ producer:
3768
+ name: 'ООО «МОДЕСКО»'
3769
+ url: 'https://www.modesco.ru/'
3770
+
3771
+ - regex: 'LetsearchBot(?:/([\d+.]+))?'
3772
+ name: 'LetSearch'
3773
+ category: 'Crawler'
3774
+ url: 'https://letsearch.ru/bots'
3775
+
3776
+ - regex: 'Example3(?:/([\d+.]+))?'
3777
+ name: 'Example3'
3778
+ category: 'Crawler'
3779
+ url: 'https://www.example3.com/'
3780
+
3781
+ - regex: 'StatOnlineRuBot(?:/([\d+.]+))?'
3782
+ name: 'StatOnline.ru'
3783
+ category: 'Crawler'
3784
+ url: 'https://statonline.ru/'
3785
+ producer:
3786
+ name: 'ООО «Регистратор доменных имен РЕГ.РУ»'
3787
+ url: 'https://statonline.ru/'
3788
+
3789
+ - regex: 'Spawning-AI'
3790
+ name: 'Spawning AI'
3791
+ category: 'Crawler'
3792
+ url: 'https://spawning.ai/'
3793
+ producer:
3794
+ name: 'Spawning, Inc'
3795
+ url: 'https://spawning.ai/'
3796
+
3797
+ - regex: 'domain research project'
3798
+ name: 'Domain Research Project'
3799
+ category: 'Crawler'
3800
+ url: 'https://trentwil.es/domains.html'
3801
+ producer:
3802
+ name: 'Trent Wiles'
3803
+ url: 'https://trentwil.es/'
3804
+
3805
+ - regex: 'getodin\.com'
3806
+ name: 'Odin'
3807
+ category: 'Security Checker'
3808
+ url: 'https://docs.getodin.com/'
3809
+ producer:
3810
+ name: 'Cyble Inc.'
3811
+ url: 'https://cyble.com/'
3812
+
3813
+ - regex: 'YouBot'
3814
+ name: 'YouBot'
3815
+ category: 'Crawler'
3816
+ url: 'https://about.you.com/youbot/'
3817
+ producer:
3818
+ name: 'SuSea, Inc.'
3819
+ url: 'https://you.com/'
3820
+
3821
+ - regex: 'SiteScoreBot'
3822
+ name: 'SiteScore'
3823
+ category: 'Crawler'
3824
+ url: 'https://sitescore.ai/'
3825
+
3826
+ - regex: 'MBCrawler'
3827
+ name: 'Monitor Backlinks'
3828
+ category: 'Crawler'
3829
+ url: 'https://www.seoptimer.com/monitor-backlinks/'
3830
+ producer:
3831
+ name: 'SEOptimer'
3832
+ url: 'https://www.seoptimer.com/'
3833
+
3834
+ - regex: 'mariadb-mysql-kbs-bot'
3835
+ name: 'MariaDB/MySQL Knowledge Base'
3836
+ category: 'Crawler'
3837
+ url: 'https://github.com/williamdes/mariadb-mysql-kbs'
3838
+ producer:
3839
+ name: 'WDES SAS'
3840
+ url: 'https://wdes.fr/en/'
3841
+
3842
+ - regex: 'GitHubCopilotChat'
3843
+ name: 'GitHubCopilotChat'
3844
+ category: 'Crawler'
3845
+ url: 'https://github.com/aaamoon/copilot-gpt4-service'
3846
+
3847
+ - regex: '^pdrl\.fm'
3848
+ name: 'Podroll Analyzer'
3849
+ category: 'Crawler'
3850
+ url: 'https://podroll.fm'
3851
+
3852
+ - regex: 'PodUptime/'
3853
+ name: 'PodUptime'
3854
+ category: 'Site Monitor'
3855
+ url: 'https://poduptime.com'
3856
+
3857
+ - regex: 'anthropic-ai'
3858
+ name: 'Anthropic AI'
3859
+ category: 'Crawler'
3860
+ url: 'https://www.anthropic.com/'
3861
+ producer:
3862
+ name: 'Anthropic, PBC'
3863
+ url: 'https://www.anthropic.com/'
3864
+
3865
+ - regex: 'NetpeakCheckerBot/[\d.]+'
3866
+ name: 'Netpeak Checker'
3867
+ category: 'Crawler'
3868
+ url: 'https://netpeaksoftware.com/checker'
3869
+ producer:
3870
+ name: 'Netpeak LTD'
3871
+ url: 'https://netpeaksoftware.com/'
3872
+
3873
+ - regex: 'SandobaCrawler/[\d.]+'
3874
+ name: 'Sandoba//Crawler'
3875
+ category: 'Crawler'
3876
+ url: 'https://www.sandoba.com/en/crawler/'
3877
+ producer:
3878
+ name: 'SANDOBA//EBUSINESS SOLUTIONS'
3879
+ url: 'https://www.sandoba.com/'
3880
+
3881
+ - regex: 'SirdataBot'
3882
+ name: 'Sirdata'
3883
+ category: 'Crawler'
3884
+ url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction'
3885
+ producer:
3886
+ name: 'Sirdata SAS'
3887
+ url: 'https://www.sirdata.com/'
3888
+
3889
+ - regex: 'CheckMarkNetwork/[\d.]+'
3890
+ name: 'CheckMark Network'
3891
+ category: 'Crawler'
3892
+ url: 'https://www.checkmarknetwork.com/spider.html/'
3893
+ producer:
3894
+ name: 'Exipert, Inc.'
3895
+ url: 'https://www.checkmarknetwork.com/'
3896
+
3897
+ - regex: 'cohere-ai'
3898
+ name: 'Cohere AI'
3899
+ category: 'Crawler'
3900
+ url: 'https://cohere.com/'
3901
+ producer:
3902
+ name: 'Cohere, Inc.'
3903
+ url: 'https://cohere.com/'
3904
+
3905
+ - regex: 'PerplexityBot/[\d.]+'
3906
+ name: 'PerplexityBot'
3907
+ category: 'Crawler'
3908
+ url: 'https://docs.perplexity.ai/docs/perplexitybot'
3909
+ producer:
3910
+ name: 'Perplexity AI, Inc.'
3911
+ url: 'https://www.perplexity.ai/'
3912
+
3913
+ - regex: 'TTD-Content'
3914
+ name: 'The Trade Desk Content'
3915
+ category: 'Crawler'
3916
+ url: 'https://www.thetradedesk.com/us/ttd-content'
3917
+ producer:
3918
+ name: 'The Trade Desk, Inc.'
3919
+ url: 'https://www.thetradedesk.com/'
3920
+
3921
+ - regex: 'montastic-monitor'
3922
+ name: 'Montastic Monitor'
3923
+ category: 'Site Monitor'
3924
+ url: 'https://www.montastic.com/'
3925
+ producer:
3926
+ name: 'Metadot, Corp.'
3927
+ url: 'https://www.metadot.com/'
3928
+
3929
+ - regex: 'Ruby, Twurly v[\d.]+'
3930
+ name: 'Twurly'
3931
+ category: 'Crawler'
3932
+ url: 'https://twurly.org/'
3933
+
3934
+ - regex: 'Mixnode(?:(?:Cache)?/[\d.]+)?'
3935
+ name: 'Mixnode'
3936
+ category: 'Crawler'
3937
+ url: 'https://www.mixnode.com/'
3938
+ producer:
3939
+ name: 'Mixnode Technologies, Inc.'
3940
+ url: 'https://www.mixnode.com/'
3941
+
3942
+ - regex: 'CSSCheck/[\d.]+'
3943
+ name: 'CSSCheck'
3944
+ category: 'Validator'
3945
+
3946
+ - regex: 'MicrosoftPreview/[\d.]+'
3947
+ name: 'Microsoft Preview'
3948
+ category: 'Service Agent'
3949
+ url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0'
3950
+ producer:
3951
+ name: 'Microsoft Corporation'
3952
+ url: 'https://www.microsoft.com/'
3953
+
3954
+ - regex: 's~virustotalcloud'
3955
+ name: 'VirusTotal Cloud'
3956
+ category: 'Crawler'
3957
+ url: 'https://www.virustotal.com/'
3958
+ producer:
3959
+ name: 'Chronicle Security Ireland Limited'
3960
+ url: 'https://chronicle.security/'
3961
+
3962
+ - regex: 'TinEye/[\d.]+'
3963
+ name: 'TinEye'
3964
+ category: 'Crawler'
3965
+ url: 'https://tineye.com/'
3966
+ producer:
3967
+ name: 'Idée, Inc.'
3968
+ url: 'https://tineye.com/'
3969
+
3970
+ - regex: 'e~arsnova-filter-system'
3971
+ name: 'ARSNova Filter System'
3972
+ category: 'Crawler'
3973
+ url: 'https://particify.de/en/'
3974
+ producer:
3975
+ name: 'Particify Gerhardt & Weingarten OHG'
3976
+ url: 'https://particify.de/en/'
3977
+
3978
+ - regex: 'botify'
3979
+ name: 'Botify'
3980
+ category: 'Crawler'
3981
+ url: 'https://www.botify.com/'
3982
+ producer:
3983
+ name: 'BOTIFY SAS'
3984
+ url: 'https://www.botify.com/'
3985
+
3986
+ - regex: 'adscanner'
3987
+ name: 'Adscanner'
3988
+ category: 'Crawler'
3989
+ url: 'https://www.alleyesonscreens.com/'
3990
+ producer:
3991
+ name: 'AdScanner d.o.o'
3992
+ url: 'https://www.alleyesonscreens.com/'
3993
+
3994
+ - regex: 'online-webceo-bot/[\d.]+'
3995
+ name: 'WebCEO'
3996
+ category: 'Crawler'
3997
+ url: 'https://www.webceo.com/'
3998
+ producer:
3999
+ name: 'WebCEO, LLC'
4000
+ url: 'https://www.webceo.com/'
4001
+
4002
+ - regex: 'NetTrack'
4003
+ name: 'NetTrack'
4004
+ category: 'Crawler'
4005
+ url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/'
4006
+
4007
+ - regex: 'htmlyse'
4008
+ name: 'htmlyse'
4009
+ category: 'Crawler'
4010
+ url: 'https://www.htmlyse.com/'
4011
+ producer:
4012
+ name: 'Vistex LTD'
4013
+ url: 'https://www.htmlyse.com/'
4014
+
4015
+ - regex: 'TrendsmapResolver/[\d.]+'
4016
+ name: 'Trendsmap'
4017
+ category: 'Crawler'
4018
+ url: 'https://www.trendsmap.com/'
4019
+ producer:
4020
+ name: 'Trendsmap Pty Ltd'
4021
+ url: 'https://www.trendsmap.com/'
4022
+
4023
+ - regex: 'Shareaholic(?:bot)?/[\d.]+'
4024
+ name: 'Steve Bot'
4025
+ category: 'Crawler'
4026
+ url: 'https://www.shareaholic.com/steve'
4027
+ producer:
4028
+ name: 'Shareaholic, Inc.'
4029
+ url: 'https://www.shareaholic.com/'
4030
+
4031
+ - regex: 'keycdn-tools:'
4032
+ name: 'KeyCDN Tools'
4033
+ category: 'Service Agent'
4034
+ url: 'https://tools.keycdn.com/geo'
4035
+
4036
+ - regex: 'keycdn-tools/'
4037
+ name: 'KeyCDN Tools'
4038
+ category: 'Service Agent'
4039
+ url: 'https://tools.keycdn.com/'
4040
+ producer:
4041
+ name: 'proinity LLC'
4042
+ url: 'https://www.keycdn.com/'
4043
+
4044
+ - regex: 'Arquivo-web-crawler'
4045
+ name: 'Arquivo.pt'
4046
+ category: 'Crawler'
4047
+ url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/'
4048
+ producer:
4049
+ name: 'FCT|FCCN'
4050
+ url: 'https://www.fct.pt/'
4051
+
4052
+ - regex: 'WhatsMyIP\.org'
4053
+ name: 'WhatsMyIP.org'
4054
+ category: 'Service Agent'
4055
+ url: 'https://www.whatsmyip.org/ua/'
4056
+
4057
+ - regex: 'SenutoBot/[\d.]+'
4058
+ name: 'Senuto'
4059
+ category: 'Crawler'
4060
+ url: 'https://www.senuto.com/'
4061
+ producer:
4062
+ name: 'Senuto Sp. z o.o.'
4063
+ url: 'https://www.senuto.com/'
4064
+
4065
+ - regex: 'spaziodati'
4066
+ name: 'SpazioDati'
4067
+ category: 'Crawler'
4068
+ url: 'https://www.spaziodati.eu/'
4069
+ producer:
4070
+ name: 'SpazioDati s.r.l.'
4071
+ url: 'https://www.spaziodati.eu/'
4072
+
4073
+ - regex: 'GozleBot'
4074
+ name: 'Gozle'
4075
+ category: 'Crawler'
4076
+ url: 'https://gozle.com.tm/en/blog/post/1'
4077
+ producer:
4078
+ name: 'Doly Horjun HJ'
4079
+ url: 'https://gozle.com.tm/'
4080
+
4081
+ - regex: 'Quantcastbot/[\d.]+'
4082
+ name: 'Quantcast'
4083
+ category: 'Crawler'
4084
+ url: 'https://www.quantcast.com/bot/'
4085
+ producer:
4086
+ name: 'Quantcast Corp.'
4087
+ url: 'https://www.quantcast.com/'
4088
+
4089
+ - regex: 'FontRadar'
4090
+ name: 'FontRadar'
4091
+ category: 'Crawler'
4092
+ url: 'https://www.fontradar.com/'
4093
+ producer:
4094
+ name: 'EMDASH SAS'
4095
+ url: 'https://www.fontradar.com/'
4096
+
4097
+ - regex: 'ViberUrlDownloader'
4098
+ name: 'Viber Url Downloader'
4099
+ category: 'Service Agent'
4100
+ url: 'https://www.viber.com/'
4101
+ producer:
4102
+ name: 'Viber Media S.à r.l.'
4103
+ url: 'https://www.viber.com/'
4104
+
4105
+ - regex: '^Zeno$'
4106
+ name: 'Zeno'
4107
+ category: 'Crawler'
4108
+ url: 'https://github.com/internetarchive/Zeno'
4109
+ producer:
4110
+ name: 'The Internet Archive'
4111
+ url: 'https://archive.org/'
4112
+
4113
+ - regex: 'Barracuda Sentinel'
4114
+ name: 'Barracuda Sentinel'
4115
+ category: 'Service Agent'
4116
+ url: 'https://sentinel.barracudanetworks.com/'
4117
+ producer:
4118
+ name: 'Barracuda Networks, Inc.'
4119
+ url: 'https://www.barracudanetworks.com/'
4120
+
4121
+ - regex: 'RuxitSynthetic/[\d.]+'
4122
+ name: 'RuxitSynthetic'
4123
+ category: 'Site Monitor'
4124
+ url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
4125
+ producer:
4126
+ name: 'Dynatrace LLC'
4127
+ url: 'https://www.dynatrace.com/'
4128
+
4129
+ - regex: 'DynatraceSynthetic/[\d.]+'
4130
+ name: 'DynatraceSynthetic'
4131
+ category: 'Site Monitor'
4132
+ url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
4133
+ producer:
4134
+ name: 'Dynatrace LLC'
4135
+ url: 'https://www.dynatrace.com/'
4136
+
4137
+ - regex: 'sitebulb'
4138
+ name: 'Sitebulb'
4139
+ category: 'Crawler'
4140
+ url: 'https://sitebulb.com/'
4141
+ producer:
4142
+ name: 'Sitebulb Limited'
4143
+ url: 'https://sitebulb.com/'
4144
+
4145
+ - regex: 'Monsidobot/[\d.]+'
4146
+ name: 'Monsidobot'
4147
+ category: 'Crawler'
4148
+ url: 'https://monsido.com/bot-html'
4149
+ producer:
4150
+ name: 'Monsido LLC'
4151
+ url: 'https://monsido.com/'
4152
+
4153
+ - regex: 'AccompanyBot'
4154
+ name: 'AccompanyBot'
4155
+ category: 'Crawler'
4156
+ url: 'https://www.accompany.com/'
4157
+ producer:
4158
+ name: 'Accompani, Inc'
4159
+ url: 'https://www.accompany.com/'
4160
+
4161
+ - regex: 'Ghost Inspector'
4162
+ name: 'Ghost Inspector'
4163
+ category: 'Site Monitor'
4164
+ url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site'
4165
+ producer:
4166
+ name: 'Ghost Inspector, Inc.'
4167
+ url: 'https://www.ghostinspector.com/'
4168
+
4169
+ - regex: 'Cypress/[\d.]+'
4170
+ name: 'Cypress'
4171
+ category: 'Site Monitor'
4172
+ url: 'https://github.com/cypress-io/cypress'
4173
+ producer:
4174
+ name: 'Cypress.io, Inc.'
4175
+ url: 'https://www.cypress.io/'
4176
+
4177
+ - regex: 'Google-Apps-Script'
4178
+ name: 'Google Apps Script'
4179
+ category: 'Service Agent'
4180
+ url: 'https://www.google.com/script/start/'
4181
+
4182
+ - regex: 'SiteOne-Crawler/[\d.]+'
4183
+ name: 'SiteOne Crawler'
4184
+ category: 'Crawler'
4185
+ url: 'https://crawler.siteone.io/bot/'
4186
+ producer:
4187
+ name: 'SiteOne s.r.o.'
4188
+ url: 'https://www.siteone.io/'
4189
+
4190
+ - regex: 'Detectify'
4191
+ name: 'Detectify'
4192
+ category: 'Security Checker'
4193
+ url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site'
4194
+ producer:
4195
+ name: 'Detectify AB'
4196
+ url: 'https://detectify.com/'
4197
+
4198
+ - regex: 'DomCopBot'
4199
+ name: 'DomCop Bot'
4200
+ category: 'Crawler'
4201
+ url: 'https://www.domcop.com/bot'
4202
+ producer:
4203
+ name: 'Axeman Technology Solutions LLP'
4204
+ url: 'https://axemantech.com/'
4205
+
4206
+ - regex: 'Paqlebot/[\d.]+'
4207
+ name: 'Paqlebot'
4208
+ category: 'Crawler'
4209
+ url: 'https://www.paqle.dk/about/paqlebot'
4210
+ producer:
4211
+ name: 'Paqle A/S'
4212
+ url: 'https://www.paqle.dk/'
4213
+
4214
+ - regex: 'Wibybot'
4215
+ name: 'Wibybot'
4216
+ category: 'Crawler'
4217
+ url: 'https://www.wiby.me/'
4218
+
4219
+ - regex: 'Synapse'
4220
+ name: 'Synapse'
4221
+ category: 'Crawler'
4222
+ url: 'https://github.com/matrix-org/synapse'
4223
+
4224
+ - regex: 'OSZKbot/[\d.]+'
4225
+ name: 'OSZKbot'
4226
+ category: 'Crawler'
4227
+ url: 'http://mekosztaly.oszk.hu/mia/'
4228
+ producer:
4229
+ name: 'National Szechenyi Library'
4230
+ url: 'https://webarchivum.oszk.hu/'
4231
+
4232
+ - regex: 'ZoomBot'
4233
+ name: 'ZoomBot'
4234
+ category: 'Crawler'
4235
+ url: 'https://suite.seozoom.it/bot.html'
4236
+ producer:
4237
+ name: 'SEO Cube S.r.l.'
4238
+ url: 'https://www.seocube.it/'
4239
+
4240
+ - regex: 'RavenCrawler/[\d.]+'
4241
+ name: 'RavenCrawler'
4242
+ category: 'Crawler'
4243
+ url: 'https://raventools.com/site-auditor/'
4244
+ producer:
4245
+ name: 'TapClicks, Inc.'
4246
+ url: 'https://www.tapclicks.com/'
4247
+
4248
+ - regex: 'KadoBot'
4249
+ name: 'KadoBot'
4250
+ category: 'Crawler'
4251
+ url: 'https://www.kadolijst.nl/bot'
4252
+ producer:
4253
+ name: 'Kadolijst'
4254
+ url: 'https://www.kadolijst.nl/'
4255
+
4256
+ - regex: 'Dubbotbot/[\d.]+'
4257
+ name: 'Dubbotbot'
4258
+ category: 'Crawler'
4259
+ url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent'
4260
+ producer:
4261
+ name: 'DubBot'
4262
+ url: 'https://dubbot.com/'
4263
+
4264
+ - regex: 'Swiftbot/[\d.]+'
4265
+ name: 'Swiftbot'
4266
+ category: 'Crawler'
4267
+ url: 'https://swiftype.com/swiftbot'
4268
+ producer:
4269
+ name: 'Elasticsearch, B.V.'
4270
+ url: 'https://www.elastic.co/'
4271
+
4272
+ - regex: 'EyeMonIT'
4273
+ name: 'EyeMonit'
4274
+ category: 'Site Monitor'
4275
+ url: 'https://eyemonit.com/'
4276
+ producer:
4277
+ name: 'EyeMonit'
4278
+ url: 'https://eyemonit.com/'
4279
+
4280
+ - regex: 'ThousandEyes'
4281
+ name: 'ThousandEyes'
4282
+ category: 'Site Monitor'
4283
+ url: 'https://www.thousandeyes.com/'
4284
+ producer:
4285
+ name: 'Cisco Systems, Inc.'
4286
+ url: 'https://www.cisco.com/'
4287
+
4288
+ - regex: 'OmtrBot/[\d.]+'
4289
+ name: 'OmtrBot'
4290
+ category: 'Site Monitor'
4291
+
4292
+ - regex: 'WebMon/[\d.]+'
4293
+ name: 'WebMon'
4294
+ category: 'Site Monitor'
4295
+
4296
+ - regex: 'AdsTxtCrawlerTP/[\d.]+'
4297
+ name: 'AdsTxtCrawlerTP'
4298
+ category: 'Crawler'
4299
+
4300
+ - regex: 'fragFINN'
4301
+ name: 'fragFINN'
4302
+ category: 'Crawler'
4303
+ url: 'https://www.fragfinn.de/'
4304
+ producer:
4305
+ name: 'fragFINN e.V.'
4306
+ url: 'https://www.fragfinn.de/'
4307
+
4308
+ - regex: 'Clickagy'
4309
+ name: 'Clickagy'
4310
+ category: 'Crawler'
4311
+ url: 'https://www.clickagy.com/'
4312
+ producer:
4313
+ name: 'Clickagy, LLC'
4314
+ url: 'https://www.clickagy.com/'
4315
+
4316
+ - regex: 'kiwitcms-gitops/[\d.]+'
4317
+ name: 'Kiwi TCMS GitOps'
4318
+ category: 'Service Agent'
4319
+ url: 'https://kiwitcms.org'
4320
+ producer:
4321
+ name: 'Open Technologies Bulgaria, Ltd.'
4322
+ url: 'https://kiwitcms.org'
4323
+
4324
+ - regex: 'webtru_crawler'
4325
+ name: 'webtru'
4326
+ category: 'Crawler'
4327
+ url: 'https://webtru.io/'
4328
+ producer:
4329
+ name: 'DataSign Inc.'
4330
+ url: 'https://datasign.jp/'
4331
+
4332
+ - regex: 'URLSuMaBot'
4333
+ name: 'URLSuMaBot'
4334
+ category: 'Crawler'
4335
+ url: 'https://www.urlsuma.de/'
4336
+
4337
+ - regex: '360JK yunjiankong'
4338
+ name: '360JK'
4339
+ category: 'Site Monitor'
4340
+ url: 'http://jk.cloud.360.cn/'
4341
+ producer:
4342
+ name: '360 Security Technology Inc.'
4343
+ url: 'https://www.360.cn/'
4344
+
4345
+ - regex: 'UCSBNetworkMeasurement'
4346
+ name: 'UCSB Network Measurement'
4347
+ category: 'Crawler'
4348
+ url: 'https://www.it.ucsb.edu/'
4349
+ producer:
4350
+ name: 'University of California, Santa Barbara'
4351
+ url: 'https://www.it.ucsb.edu/'
4352
+
4353
+ - regex: 'Plesk screenshot bot'
4354
+ name: 'Plesk Screenshot Service'
4355
+ category: 'Service Agent'
4356
+ url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service'
4357
+ producer:
4358
+ name: 'Plesk International GmbH'
4359
+ url: 'https://www.plesk.com/'
4360
+
4361
+ - regex: 'Who\.is'
4362
+ name: 'Who.is Bot'
4363
+ category: 'Crawler'
4364
+ url: 'https://who.is/'
4365
+
4366
+ - regex: 'Probely'
4367
+ name: 'Probely'
4368
+ category: 'Security Checker'
4369
+ url: 'https://probely.com/sos/'
4370
+ producer:
4371
+ name: 'Probely - Soluções de Cibersegurança, S.A.'
4372
+ url: 'https://probely.com/'
4373
+
4374
+ - regex: 'Uptimia(?:/[\d.]+)?'
4375
+ name: 'Uptimia'
4376
+ category: 'Site Monitor'
4377
+ url: 'https://www.uptimia.com/'
4378
+ producer:
4379
+ name: 'JJ Online GmbH'
4380
+ url: 'https://www.uptimia.com/'
4381
+
4382
+ - regex: '2GDPR/[\d.]+'
4383
+ name: '2GDPR'
4384
+ category: 'Service Agent'
4385
+ url: 'https://2gdpr.com/tos'
4386
+ producer:
4387
+ name: '2GDPR'
4388
+ url: 'https://2gdpr.com/'
4389
+
4390
+ - regex: 'abuse\.xmco\.fr'
4391
+ name: 'Serenety'
4392
+ category: 'Security Checker'
4393
+ url: 'https://abuse.xmco.fr/'
4394
+ producer:
4395
+ name: 'XMCO, SASU'
4396
+ url: 'https://www.xmco.fr/'
4397
+
4398
+ - regex: 'CheckHost'
4399
+ name: 'CheckHost'
4400
+ category: 'Site Monitor'
4401
+ url: 'https://check-host.net/'
4402
+ producer:
4403
+ name: 'CheckHost'
4404
+ url: 'https://check-host.net/'
4405
+
4406
+ - regex: 'LAC_IAHarvester/[\d.]+'
4407
+ name: 'LAC IA Harvester'
4408
+ category: 'Crawler'
4409
+ url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx'
4410
+ producer:
4411
+ name: 'Library and Archives Canada'
4412
+ url: 'https://library-archives.canada.ca/'
4413
+
4414
+ - regex: 'InsytfulBot/[\d.]+'
4415
+ name: 'InsytfulBot'
4416
+ category: 'Crawler'
4417
+ url: 'https://www.insytful.com/'
4418
+ producer:
4419
+ name: 'Zengenti Limited'
4420
+ url: 'https://www.zengenti.com/'
4421
+
4422
+ - regex: 'statista\.com'
4423
+ name: 'Statista'
4424
+ category: 'Crawler'
4425
+ url: 'https://www.statista.com/'
4426
+ producer:
4427
+ name: 'Statista, Inc.'
4428
+ url: 'https://www.statista.com/'
4429
+
4430
+ - regex: 'SubstackContentFetch/[\d.]+'
4431
+ name: 'Substack Content Fetch'
4432
+ category: 'Crawler'
4433
+ url: 'https://substack.com/'
4434
+ producer:
4435
+ name: 'Substack, Inc.'
4436
+ url: 'https://substack.com/'
4437
+
4438
+ - regex: '^ds9'
4439
+ name: 'Deep SEARCH 9'
4440
+ category: 'Crawler'
4441
+ url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/'
4442
+ producer:
4443
+ name: 'Copyright Clearance Center, Inc.'
4444
+ url: 'https://www.copyright.com/'
4445
+
4446
+ - regex: 'LiveJournal\.com'
4447
+ name: 'LiveJournal'
4448
+ url: 'https://www.livejournal.com/'
4449
+ category: 'Feed Fetcher'
4450
+ producer:
4451
+ name: 'ООО "СИМ"'
4452
+ url: 'https://www.livejournal.com/'
4453
+
4454
+ - regex: 'bitdiscovery'
4455
+ name: 'Tenable.asm'
4456
+ category: 'Security Checker'
4457
+ url: 'https://bitdiscovery.com/'
4458
+ producer:
4459
+ name: 'Tenable, Inc.'
4460
+ url: 'https://www.tenable.com/'
4461
+
4462
+ - regex: 'Castopod/[\d.]+'
4463
+ name: 'Castopod'
4464
+ category: 'Crawler'
4465
+ url: 'https://www.castopod.org/'
4466
+
4467
+ - regex: 'Elastic/Synthetics'
4468
+ name: 'Elastic Synthetics'
4469
+ category: 'Site Monitor'
4470
+ url: 'https://github.com/elastic/synthetics'
4471
+ producer:
4472
+ name: 'Elasticsearch B.V.'
4473
+ url: 'https://www.elastic.co/'
4474
+
4475
+ - regex: 'WDG_Validator/[\d.]+'
4476
+ name: 'WDG HTML Validator'
4477
+ category: 'Validator'
4478
+ url: 'http://www.htmlhelp.com/tools/validator/'
4479
+
4480
+ - regex: 'scan@aegis.network'
4481
+ name: 'Aegis'
4482
+ category: 'Crawler'
4483
+ url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/'
4484
+
4485
+ - regex: 'CrawlyProjectCrawler/[\d.]+'
4486
+ name: 'Crawly Project'
4487
+ category: 'Crawler'
4488
+ url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/'
4489
+
4490
+ - regex: 'BDFetch'
4491
+ name: 'BDFetch'
4492
+ category: 'Crawler'
4493
+ url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/'
4494
+
4495
+ - regex: 'PunkMap'
4496
+ name: 'Punk Map'
4497
+ category: 'Security Checker'
4498
+ url: 'https://github.com/openeasm/punkmap'
4499
+
4500
+ - regex: 'GenomeCrawlerd/[\d.]+'
4501
+ name: 'Deepfield Genome'
4502
+ category: 'Crawler'
4503
+ url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/'
4504
+ producer:
4505
+ name: 'Nokia Corporation'
4506
+ url: 'https://www.nokia.com/'
4507
+
4508
+ - regex: 'Gaisbot/[\d.]+'
4509
+ name: 'Gaisbot'
4510
+ category: 'Crawler'
4511
+ url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php'
4512
+
4513
+ - regex: 'FAST-WebCrawler/[\d.]+'
4514
+ name: 'AlltheWeb'
4515
+ category: 'Crawler'
4516
+ url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler'
4517
+
4518
+ - regex: 'ducks\.party'
4519
+ name: 'ducks.party'
4520
+ category: 'Security Checker'
4521
+ url: 'https://ducks.party/'
4522
+
4523
+ - regex: 'DepSpid/[\d.]+'
4524
+ name: 'DepSpid'
4525
+ category: 'Crawler'
4526
+ url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/'
4527
+
4528
+ - regex: 'Website-info\.net'
4529
+ name: 'Website-info'
4530
+ category: 'Crawler'
4531
+ url: 'https://website-info.net/robot'
4532
+ producer:
4533
+ name: 'Meins und Vogel GmbH'
4534
+ url: 'https://muv.com/'
4535
+
4536
+ # Generic bots
4537
+ - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|url|Zeus|ZmEu)$'
4538
+ name: 'Generic Bot'
4539
+
3511
4540
  # Generic detections
3512
- - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider|^firefox$|^chrome$)([^a-z]|$)'
4541
+ - regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)'
3513
4542
  name: 'Generic Bot'