device_detector 1.1.2 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/regexes/bots.yml CHANGED
@@ -5,6 +5,11 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
+ - regex: 'WireReaderBot(?:/([\d+.]+))?'
9
+ name: 'WireReaderBot'
10
+ category: 'Feed Fetcher'
11
+ url: 'https://wirereader.app/'
12
+
8
13
  - regex: 'monitoring360bot'
9
14
  name: '360 Monitoring'
10
15
  category: 'Site Monitor'
@@ -61,7 +66,7 @@
61
66
  name: 'Ahrefs Pte Ltd'
62
67
  url: 'https://ahrefs.com/robot'
63
68
 
64
- - regex: 'AhrefsSiteAudit/([\d+.]+)'
69
+ - regex: 'AhrefsSiteAudit/[\d.]+'
65
70
  name: 'AhrefsSiteAudit'
66
71
  category: 'Site Monitor'
67
72
  url: 'https://ahrefs.com/robot/site-audit'
@@ -85,7 +90,7 @@
85
90
  name: 'Alexa Internet'
86
91
  url: 'https://www.alexa.com'
87
92
 
88
- - regex: 'Amazonbot'
93
+ - regex: 'Amazonbot/[\d.]+'
89
94
  name: 'Amazon Bot'
90
95
  category: 'Crawler'
91
96
  url: 'https://developer.amazon.com/support/amazonbot'
@@ -93,6 +98,14 @@
93
98
  name: 'Amazon.com, Inc.'
94
99
  url: 'https://www.amazon.com/'
95
100
 
101
+ - regex: 'AmazonAdBot/[\d.]+'
102
+ name: 'Amazon AdBot'
103
+ category: 'Crawler'
104
+ url: 'https://adbot.amazon.com/'
105
+ producer:
106
+ name: 'Amazon.com, Inc.'
107
+ url: 'https://www.amazon.com/'
108
+
96
109
  - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
97
110
  name: 'Amazon Route53 Health Check'
98
111
  category: 'Service Agent'
@@ -119,10 +132,18 @@
119
132
  - regex: 'Applebot'
120
133
  name: 'Applebot'
121
134
  category: 'Crawler'
122
- url: 'https://support.apple.com/en-us/HT204683'
135
+ url: 'https://support.apple.com/en-us/119829'
123
136
  producer:
124
137
  name: 'Apple Inc'
125
- url: 'https://www.apple.com'
138
+ url: 'https://www.apple.com/'
139
+
140
+ - regex: 'iTMS'
141
+ name: 'iTMS'
142
+ category: 'Crawler'
143
+ url: 'https://support.apple.com/en-us/119829'
144
+ producer:
145
+ name: 'Apple Inc'
146
+ url: 'https://www.apple.com/'
126
147
 
127
148
  - regex: 'AppSignalBot'
128
149
  name: 'AppSignalBot'
@@ -220,7 +241,7 @@
220
241
  name: 'Better Uptime'
221
242
  url: 'https://betteruptime.com/'
222
243
 
223
- - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
244
+ - regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
224
245
  name: 'BingBot'
225
246
  category: 'Search bot'
226
247
  url: 'http://search.msn.com/msnbot.htmn'
@@ -371,7 +392,23 @@
371
392
  name: 'CloudFlare'
372
393
  url: 'https://www.cloudflare.com/'
373
394
 
374
- - regex: 'https://developers.cloudflare.com/security-center/'
395
+ - regex: 'Cloudflare-Smart-Transit'
396
+ name: 'Cloudflare Smart Transit'
397
+ category: 'Site Monitor'
398
+ url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
399
+ producer:
400
+ name: 'CloudFlare'
401
+ url: 'https://www.cloudflare.com/'
402
+
403
+ - regex: 'CloudflareObservatory'
404
+ name: 'Cloudflare Observatory'
405
+ category: 'Site Monitor'
406
+ url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test'
407
+ producer:
408
+ name: 'CloudFlare'
409
+ url: 'https://www.cloudflare.com/'
410
+
411
+ - regex: 'https://developers\.cloudflare\.com/security-center/'
375
412
  name: 'Cloudflare Security Insights'
376
413
  category: 'Site Monitor'
377
414
  url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
@@ -379,7 +416,7 @@
379
416
  name: 'CloudFlare'
380
417
  url: 'https://www.cloudflare.com/'
381
418
 
382
- - regex: 'coccoc.com'
419
+ - regex: 'coccoc\.com'
383
420
  name: 'Cốc Cốc Bot'
384
421
  url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
385
422
  category: 'Search bot'
@@ -435,7 +472,7 @@
435
472
  name: 'Dataprovider B.V.'
436
473
  url: 'https://www.dataprovider.com/'
437
474
 
438
- - regex: 'Daum(oa)?[ /][0-9]'
475
+ - regex: 'Daum(?!(?:Apps|Device))'
439
476
  name: 'Daum'
440
477
  category: 'Search bot'
441
478
  url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
@@ -459,7 +496,7 @@
459
496
  name: 'Discovery Engine'
460
497
  url: 'http://discoveryengine.com'
461
498
 
462
- - regex: 'Domain Re-Animator Bot|support@domainreanimator.com'
499
+ - regex: 'Domain Re-Animator Bot|support@domainreanimator\.com'
463
500
  name: 'Domain Re-Animator Bot'
464
501
  category: 'Crawler'
465
502
  url: ''
@@ -538,13 +575,21 @@
538
575
  name: 'SEOmoz, Inc.'
539
576
  url: 'http://moz.com/'
540
577
 
541
- - regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
542
- name: 'Facebook External Hit'
578
+ - regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)'
579
+ name: 'Facebook Crawler'
543
580
  category: 'Social Media Agent'
544
- url: 'https://www.facebook.com/externalhit_uatext.php'
581
+ url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/'
545
582
  producer:
546
- name: 'Facebook'
547
- url: 'http://www.facebook.com'
583
+ name: 'Meta Platforms, Inc.'
584
+ url: 'https://www.meta.com/'
585
+
586
+ - regex: 'FacebookBot/[\d.]+'
587
+ name: 'FacebookBot'
588
+ category: 'Crawler'
589
+ url: 'https://developers.facebook.com/docs/sharing/bot'
590
+ producer:
591
+ name: 'Meta Platforms, Inc.'
592
+ url: 'https://www.meta.com/'
548
593
 
549
594
  - regex: 'Feedbin'
550
595
  name: 'Feedbin'
@@ -662,7 +707,7 @@
662
707
  url: 'https://search.google.com/search-console/about'
663
708
  producer:
664
709
  name: 'Google Inc.'
665
- url: 'http://www.google.com'
710
+ url: 'https://www.google.com/'
666
711
 
667
712
  - regex: 'Google Page Speed Insights'
668
713
  name: 'Google PageSpeed Insights'
@@ -670,7 +715,7 @@
670
715
  url: 'http://developers.google.com/speed/pagespeed/insights/'
671
716
  producer:
672
717
  name: 'Google Inc.'
673
- url: 'http://www.google.com'
718
+ url: 'https://www.google.com/'
674
719
 
675
720
  - regex: 'google_partner_monitoring'
676
721
  name: 'Google Partner Monitoring'
@@ -678,7 +723,7 @@
678
723
  url: ''
679
724
  producer:
680
725
  name: 'Google Inc.'
681
- url: 'http://www.google.com'
726
+ url: 'https://www.google.com/'
682
727
 
683
728
  - regex: 'Google-Cloud-Scheduler'
684
729
  name: 'Google Cloud Scheduler'
@@ -694,7 +739,7 @@
694
739
  url: 'https://search.google.com/structured-data/testing-tool'
695
740
  producer:
696
741
  name: 'Google Inc.'
697
- url: 'http://www.google.com'
742
+ url: 'https://www.google.com/'
698
743
 
699
744
  - regex: 'GoogleStackdriverMonitoring'
700
745
  name: 'Google Stackdriver Monitoring'
@@ -704,13 +749,21 @@
704
749
  name: 'Google Inc.'
705
750
  url: 'https://www.google.com'
706
751
 
752
+ - regex: 'Google-Transparency-Report'
753
+ name: 'Google Transparency Report'
754
+ category: 'Site Monitor'
755
+ url: 'https://transparencyreport.google.com/'
756
+ producer:
757
+ name: 'Google Inc.'
758
+ url: 'https://www.google.com/'
759
+
707
760
  - regex: 'via ggpht\.com GoogleImageProxy'
708
761
  name: 'Gmail Image Proxy'
709
762
  category: 'Crawler'
710
763
  url: ''
711
764
  producer:
712
765
  name: 'Google Inc.'
713
- url: 'http://www.google.com'
766
+ url: 'https://www.google.com/'
714
767
 
715
768
  - regex: 'SeznamEmailProxy'
716
769
  name: 'Seznam Email Proxy'
@@ -744,21 +797,37 @@
744
797
  name: 'Visual Meta'
745
798
  url: 'https://www.shopalike.cz/'
746
799
 
747
- - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|InspectionTool|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|Google(?:AdSenseInfeed|AssociationService|Other|Prober|Producer)|Google.*/\+/web/snippet'
800
+ - regex: 'Googlebot-News'
801
+ name: 'Googlebot News'
802
+ category: 'Search bot'
803
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
804
+ producer:
805
+ name: 'Google Inc.'
806
+ url: 'https://www.google.com/'
807
+
808
+ - regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet'
748
809
  name: 'Googlebot'
749
810
  category: 'Search bot'
750
- url: 'http://www.google.com/bot.html'
811
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
751
812
  producer:
752
813
  name: 'Google Inc.'
753
- url: 'http://www.google.com'
814
+ url: 'https://www.google.com/'
754
815
 
755
816
  - regex: '^Google$'
756
817
  name: 'Googlebot'
757
818
  category: 'Search bot'
758
- url: 'http://www.google.com/bot.html'
819
+ url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
759
820
  producer:
760
821
  name: 'Google Inc.'
761
- url: 'http://www.google.com'
822
+ url: 'https://www.google.com/'
823
+
824
+ - regex: 'Google-Area120-PrivacyPolicyFetcher'
825
+ name: 'Google Area 120 Privacy Policy Fetcher'
826
+ category: 'Crawler'
827
+ url: 'https://area120.google.com/'
828
+ producer:
829
+ name: 'Google Inc.'
830
+ url: 'https://www.google.com/'
762
831
 
763
832
  - regex: 'heritrix'
764
833
  name: 'Heritrix'
@@ -780,7 +849,7 @@
780
849
  category: 'Crawler'
781
850
  url: 'http://vuhuv.com/bot.html'
782
851
 
783
- - regex: 'HTTPMon'
852
+ - regex: 'HTTPMon/[\d.]+'
784
853
  name: 'HTTPMon'
785
854
  category: 'Site Monitor'
786
855
  url: 'http://www.httpmon.com'
@@ -796,7 +865,7 @@
796
865
  name: ''
797
866
  url: ''
798
867
 
799
- - regex: 'inoreader.com'
868
+ - regex: 'inoreader\.com'
800
869
  name: 'inoreader'
801
870
  category: 'Feed Reader'
802
871
  url: 'https://www.inoreader.com'
@@ -844,7 +913,7 @@
844
913
  name: ''
845
914
  url: ''
846
915
 
847
- - regex: '([A-z0-9]*)-Lighthouse'
916
+ - regex: '[A-z0-9]*-Lighthouse'
848
917
  name: 'Lighthouse'
849
918
  category: 'Site Monitor'
850
919
  url: 'https://developers.google.com/web/tools/lighthouse'
@@ -878,7 +947,8 @@
878
947
 
879
948
  - regex: 'ltx71'
880
949
  name: 'LTX71'
881
- url: 'http://ltx71.com/'
950
+ category: 'Security Checker'
951
+ url: 'https://ltx71.com/'
882
952
  producer:
883
953
  name: ''
884
954
  url: ''
@@ -907,7 +977,7 @@
907
977
  name: ''
908
978
  url: ''
909
979
 
910
- - regex: 'masscan-ng/([\d+.]+)'
980
+ - regex: 'masscan-ng/[\d.]+'
911
981
  name: 'masscan-ng'
912
982
  url: 'https://github.com/bi-zone/masscan-ng'
913
983
  category: 'Crawler'
@@ -915,7 +985,7 @@
915
985
  name: 'BIZON, OOO'
916
986
  url: 'https://bi.zone/'
917
987
 
918
- - regex: 'masscan'
988
+ - regex: '.*masscan'
919
989
  name: 'masscan'
920
990
  url: 'https://github.com/robertdavidgraham/masscan'
921
991
  category: 'Crawler'
@@ -1003,11 +1073,11 @@
1003
1073
  name: 'Nagios Plugins Development Team'
1004
1074
  url: 'https://nagios.org'
1005
1075
 
1006
- - regex: 'nbertaupete95\(at\)gmail.com'
1076
+ - regex: 'nbertaupete95\(at\)gmail\.com'
1007
1077
  name: 'nbertaupete95'
1008
1078
  category: 'Crawler'
1009
1079
 
1010
- - regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
1080
+ - regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)'
1011
1081
  name: 'Netcraft Survey Bot'
1012
1082
  category: 'Search bot'
1013
1083
  url: ''
@@ -1031,7 +1101,7 @@
1031
1101
  name: ''
1032
1102
  url: ''
1033
1103
 
1034
- - regex: 'NewsBlur .*(Fetcher|Finder)'
1104
+ - regex: 'NewsBlur .*(?:Fetcher|Finder)'
1035
1105
  name: 'NewsBlur'
1036
1106
  url: 'http://www.newsblur.com'
1037
1107
  category: 'Feed Fetcher'
@@ -1070,10 +1140,18 @@
1070
1140
  name: 'Nuzzel'
1071
1141
  url: 'https://www.nuzzel.com/'
1072
1142
 
1143
+ - regex: 'NodePing'
1144
+ name: 'NodePing'
1145
+ category: 'Site Monitor'
1146
+ url: 'https://nodeping.com'
1147
+ producer:
1148
+ name: 'NodePing'
1149
+ url: 'https://nodeping.com'
1150
+
1073
1151
  - regex: 'Octopus [0-9]'
1074
1152
  name: 'Octopus'
1075
1153
 
1076
- - regex: 'OnlineOrNot.com_bot'
1154
+ - regex: 'OnlineOrNot\.com_bot'
1077
1155
  name: 'OnlineOrNot Bot'
1078
1156
  category: 'Site Monitor'
1079
1157
  url: 'https://onlineornot.com/website-monitoring'
@@ -1142,7 +1220,7 @@
1142
1220
  name: 'PHP Server Monitor'
1143
1221
  url: 'http://www.phpservermonitor.org/'
1144
1222
 
1145
- - regex: 'Pocket(?:ImageCache|Parser)/([\d+.]+)'
1223
+ - regex: 'Pocket(?:ImageCache|Parser)/[\d.]+'
1146
1224
  name: 'Pocket'
1147
1225
  category: 'Read-it-later Service'
1148
1226
  url: 'https://getpocket.com/pocketparser_ua'
@@ -1289,12 +1367,36 @@
1289
1367
  url: ''
1290
1368
 
1291
1369
  - regex: 'SemrushBot'
1292
- name: 'Semrush Bot'
1370
+ name: 'SemrushBot'
1371
+ category: 'Crawler'
1372
+ url: 'https://www.semrush.com/bot/'
1373
+ producer:
1374
+ name: 'Semrush Inc.'
1375
+ url: 'https://www.semrush.com/'
1376
+
1377
+ - regex: 'SerpReputationManagementAgent/[\d.]+'
1378
+ name: 'Semrush Reputation Management'
1379
+ category: 'Service Agent'
1380
+ url: 'https://www.semrush.com/bot/'
1381
+ producer:
1382
+ name: 'Semrush Inc.'
1383
+ url: 'https://www.semrush.com/'
1384
+
1385
+ - regex: 'SplitSignalBot'
1386
+ name: 'SplitSignalBot'
1293
1387
  category: 'Crawler'
1294
- url: 'http://www.semrush.com/bot.html'
1388
+ url: 'https://www.semrush.com/bot/'
1295
1389
  producer:
1296
- name: 'SEMrush'
1297
- url: 'http://www.semrush.com'
1390
+ name: 'Semrush Inc.'
1391
+ url: 'https://www.semrush.com/'
1392
+
1393
+ - regex: 'SiteAuditBot/[\d.]+'
1394
+ name: 'SiteAuditBot'
1395
+ category: 'Crawler'
1396
+ url: 'https://www.semrush.com/bot/'
1397
+ producer:
1398
+ name: 'Semrush Inc.'
1399
+ url: 'https://www.semrush.com/'
1298
1400
 
1299
1401
  - regex: 'SensikaBot'
1300
1402
  name: 'Sensika Bot'
@@ -1304,7 +1406,7 @@
1304
1406
  name: 'Sensika'
1305
1407
  url: 'http://sensika.com'
1306
1408
 
1307
- - regex: 'SEOENG(World)?Bot'
1409
+ - regex: 'SEOENG(?:World)?Bot'
1308
1410
  name: 'SEOENGBot'
1309
1411
  category: 'Crawler'
1310
1412
  url: 'http://www.seoengine.com/seoengbot.htm'
@@ -1394,7 +1496,7 @@
1394
1496
  category: 'Crawler'
1395
1497
  url: 'http://ricks-apps.com/osx/sitesucker/'
1396
1498
 
1397
- - regex: 'sixy.ch'
1499
+ - regex: 'sixy\.ch'
1398
1500
  name: 'Sixy.ch'
1399
1501
  category: 'Site Monitor'
1400
1502
  url: 'http://sixy.ch'
@@ -1410,7 +1512,7 @@
1410
1512
  name: 'Slack Technologies'
1411
1513
  url: 'http://slack.com'
1412
1514
 
1413
- - regex: '(Sogou[ -](head|inst|Orion|Pic|Test|web)[ -]spider)|New-Sogou-Spider'
1515
+ - regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider'
1414
1516
  name: 'Sogou Spider'
1415
1517
  category: 'Search bot'
1416
1518
  url: 'http://www.sogou.com/docs/help/webmasters.htm'
@@ -1535,11 +1637,19 @@
1535
1637
  name: ''
1536
1638
  url: ''
1537
1639
 
1538
- - regex: 'theoldreader.com'
1640
+ - regex: 'theoldreader\.com'
1539
1641
  name: 'theoldreader'
1540
1642
  category: 'Feed Reader'
1541
1643
  url: 'https://theoldreader.com'
1542
1644
 
1645
+ - regex: 'Trackable/0\.1'
1646
+ name: 'Chartable'
1647
+ category: 'Site Monitor'
1648
+ url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix'
1649
+ producer:
1650
+ name: 'Chartable'
1651
+ url: 'https://chartable.com'
1652
+
1543
1653
  - regex: 'trendictionbot'
1544
1654
  name: 'Trendiction Bot'
1545
1655
  category: 'Crawler'
@@ -1556,13 +1666,13 @@
1556
1666
  name: 'iParadigms, LLC.'
1557
1667
  url: 'http://www.turnitin.com'
1558
1668
 
1559
- - regex: 'TweetedTimes Bot'
1669
+ - regex: 'TweetedTimes'
1560
1670
  name: 'TweetedTimes Bot'
1561
1671
  category: 'Crawler'
1562
- url: 'http://tweetedtimes.com'
1672
+ url: 'https://tweetedtimes.com/'
1563
1673
  producer:
1564
1674
  name: 'TweetedTimes'
1565
- url: 'http://tweetedtimes.com/'
1675
+ url: 'https://tweetedtimes.com/'
1566
1676
 
1567
1677
  - regex: 'TweetmemeBot'
1568
1678
  name: 'Tweetmeme Bot'
@@ -1603,21 +1713,21 @@
1603
1713
  name: 'UkrNet Ltd'
1604
1714
  url: 'https://www.ukr.net/'
1605
1715
 
1606
- - regex: 'Uptimebot'
1716
+ - regex: 'Uptime(?:bot)?/[\d.]+'
1607
1717
  name: 'Uptimebot'
1608
1718
  category: 'Site Monitor'
1609
- url: 'https://uptime.com/uptimebot'
1719
+ url: 'https://uptime.com/uptime-bot'
1610
1720
  producer:
1611
1721
  name: 'Uptime'
1612
- url: 'https://uptime.com'
1722
+ url: 'https://uptime.com/'
1613
1723
 
1614
1724
  - regex: 'UptimeRobot'
1615
- name: 'Uptime Robot'
1725
+ name: 'UptimeRobot'
1616
1726
  category: 'Site Monitor'
1617
- url: ''
1727
+ url: 'https://uptimerobot.com/'
1618
1728
  producer:
1619
1729
  name: 'Uptime Robot'
1620
- url: 'http://uptimerobot.com'
1730
+ url: 'https://uptimerobot.com/'
1621
1731
 
1622
1732
  - regex: 'URLAppendBot'
1623
1733
  name: 'URLAppendBot'
@@ -1638,10 +1748,18 @@
1638
1748
  - regex: 'vkShare; '
1639
1749
  name: 'VK Share Button'
1640
1750
  category: 'Crawler'
1641
- url: 'http://vk.com/dev/Share'
1751
+ url: 'https://dev.vk.com/en/widgets/share'
1752
+ producer:
1753
+ name: 'VK'
1754
+ url: 'https://vk.com/'
1755
+
1756
+ - regex: 'VKRobot'
1757
+ name: 'VK Robot'
1758
+ category: 'Crawler'
1759
+ url: 'https://dev.vk.com/en/'
1642
1760
  producer:
1643
1761
  name: 'VK'
1644
- url: 'http://vk.com/'
1762
+ url: 'https://vk.com/'
1645
1763
 
1646
1764
  - regex: 'VSMCrawler'
1647
1765
  name: 'Visual Site Mapper Crawler'
@@ -1675,7 +1793,7 @@
1675
1793
  name: 'W3C'
1676
1794
  url: 'http://www.w3.org'
1677
1795
 
1678
- - regex: 'W3C_Validator|Validator.nu'
1796
+ - regex: 'W3C_Validator|Validator\.nu'
1679
1797
  name: 'W3C Markup Validation Service'
1680
1798
  category: 'Validator'
1681
1799
  url: 'http://validator.w3.org/services'
@@ -1699,6 +1817,14 @@
1699
1817
  name: 'W3C'
1700
1818
  url: 'http://www.w3.org'
1701
1819
 
1820
+ - regex: 'P3P Validator'
1821
+ name: 'W3C P3P Validator'
1822
+ category: 'Validator'
1823
+ url: 'https://www.w3.org/P3P/validator.html'
1824
+ producer:
1825
+ name: 'W3C'
1826
+ url: 'https://www.w3.org'
1827
+
1702
1828
  - regex: 'Wappalyzer'
1703
1829
  name: 'Wappalyzer'
1704
1830
  url: 'https://github.com/AliasIO/Wappalyzer'
@@ -1735,6 +1861,22 @@
1735
1861
  name: 'WebSitePulse'
1736
1862
  url: 'http://www.websitepulse.com/'
1737
1863
 
1864
+ - regex: 'WordPress.+isitwp\.com'
1865
+ name: 'IsItWP'
1866
+ category: 'Crawler'
1867
+ url: 'https://www.isitwp.com/'
1868
+ producer:
1869
+ name: 'WPBeginner, LLC'
1870
+ url: 'https://www.wpbeginner.com/'
1871
+
1872
+ - regex: 'Automattic Analytics Crawler/[\d.]+'
1873
+ name: 'Automattic Analytics'
1874
+ category: 'Crawler'
1875
+ url: 'https://wordpress.com/crawler/'
1876
+ producer:
1877
+ name: 'Wordpress.org'
1878
+ url: 'https://wordpress.org/'
1879
+
1738
1880
  - regex: 'WordPress'
1739
1881
  name: 'WordPress'
1740
1882
  category: 'Service Agent'
@@ -1815,13 +1957,29 @@
1815
1957
  name: 'Yahoo! Japan Corp.'
1816
1958
  url: 'https://www.yahoo.co.jp/'
1817
1959
 
1818
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1960
+ - regex: 'Y!J-ASR'
1961
+ name: 'Yahoo! Japan ASR'
1962
+ category: 'Crawler'
1963
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1964
+ producer:
1965
+ name: 'Yahoo! Japan Corp.'
1966
+ url: 'https://www.yahoo.co.jp/'
1967
+
1968
+ - regex: '^Y!J'
1969
+ name: 'Yahoo! Japan'
1970
+ category: 'Crawler'
1971
+ url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
1972
+ producer:
1973
+ name: 'Yahoo! Japan Corp.'
1974
+ url: 'https://www.yahoo.co.jp/'
1975
+
1976
+ - regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher'
1819
1977
  name: 'Yandex Bot'
1820
1978
  category: 'Search bot'
1821
- url: 'http://www.yandex.com/bots'
1979
+ url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html'
1822
1980
  producer:
1823
1981
  name: 'Yandex LLC'
1824
- url: 'http://company.yandex.com'
1982
+ url: 'https://yandex.com/company/'
1825
1983
 
1826
1984
  - regex: 'Yeti|NaverJapan|AdsBot-Naver'
1827
1985
  name: 'Yeti/Naverbot'
@@ -1881,7 +2039,7 @@
1881
2039
  name: 'Yottaa'
1882
2040
  url: 'http://www.yottaa.com/'
1883
2041
 
1884
- - regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857.*'
2042
+ - regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857'
1885
2043
  name: 'Yahoo Gemini'
1886
2044
  category: 'Crawler'
1887
2045
  url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
@@ -1905,7 +2063,7 @@
1905
2063
  name: 'HubPages, Inc.'
1906
2064
  url: 'https://discover.hubpages.com/'
1907
2065
 
1908
- - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
2066
+ - regex: 'Pinterest(?:bot)?/[\d.]+.*www\.pinterest\.com'
1909
2067
  name: 'Pinterest'
1910
2068
  url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1911
2069
  category: 'Crawler'
@@ -1913,7 +2071,7 @@
1913
2071
  name: 'Pinterest'
1914
2072
  url: 'https://www.pinterest.com/'
1915
2073
 
1916
- - regex: 'Site24x7'
2074
+ - regex: '.*Site24x7'
1917
2075
  name: 'Site24x7 Website Monitoring'
1918
2076
  category: 'Site Monitor'
1919
2077
  url: 'https://www.site24x7.com/site24x7-faq.html'
@@ -1921,6 +2079,14 @@
1921
2079
  name: 'Site24x7'
1922
2080
  url: 'https://www.site24x7.com'
1923
2081
 
2082
+ - regex: '.* HLB/[\d.]+'
2083
+ name: 'Site24x7 Defacement Monitor'
2084
+ category: 'Site Monitor'
2085
+ url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor'
2086
+ producer:
2087
+ name: 'Site24x7'
2088
+ url: 'https://www.site24x7.com/'
2089
+
1924
2090
  - regex: 's~snapchat-proxy'
1925
2091
  name: 'Snapchat Proxy'
1926
2092
  category: 'Crawler'
@@ -1937,6 +2103,14 @@
1937
2103
  name: 'Snapchat Inc.'
1938
2104
  url: 'https://www.snapchat.com/'
1939
2105
 
2106
+ - regex: 'SnapchatAds/[\d.]+'
2107
+ name: 'Snapchat Ads'
2108
+ category: 'Crawler'
2109
+ url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US'
2110
+ producer:
2111
+ name: 'Snapchat Inc.'
2112
+ url: 'https://www.snapchat.com/'
2113
+
1940
2114
  - regex: "Let's Encrypt validation server"
1941
2115
  name: "Let's Encrypt Validation"
1942
2116
  category: 'Service Agent'
@@ -2029,22 +2203,19 @@
2029
2203
  - regex: 'AdMantX.*admantx\.com'
2030
2204
  name: 'ADMantX'
2031
2205
 
2032
- - regex: 'Server Density Service Monitoring.*'
2206
+ - regex: 'Server Density Service Monitoring'
2033
2207
  name: 'Server Density'
2034
2208
 
2035
2209
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
2036
2210
  name: 'RSSRadio Bot'
2037
2211
 
2038
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent)'
2039
- name: 'Generic Bot'
2040
-
2041
2212
  - regex: '^sentry'
2042
2213
  name: 'Sentry Bot'
2043
2214
  producer:
2044
2215
  name: 'Sentry'
2045
2216
  url: 'https://sentry.io'
2046
2217
 
2047
- - regex: '^Spotify/(\d+[\.\d]+)$'
2218
+ - regex: '^Spotify/[\d.]+$'
2048
2219
  name: 'Spotify'
2049
2220
  producer:
2050
2221
  name: 'Spotify'
@@ -2102,14 +2273,6 @@
2102
2273
  name: 'Siteimprove GmbH'
2103
2274
  url: 'https://siteimprove.com/'
2104
2275
 
2105
- - regex: 'Image size by Siteimprove\.com'
2106
- name: 'Siteimprove'
2107
- category: 'Search bot'
2108
- url: 'https://siteimprove.com/'
2109
- producer:
2110
- name: 'Siteimprove GmbH'
2111
- url: 'https://siteimprove.com/'
2112
-
2113
2276
  - regex: 'CATExplorador'
2114
2277
  name: 'CATExplorador'
2115
2278
  category: 'Search bot'
@@ -2134,7 +2297,7 @@
2134
2297
  name: 'Idee Inc.'
2135
2298
  url: 'http://ideeinc.com/'
2136
2299
 
2137
- - regex: 'zelist.ro feed parser'
2300
+ - regex: 'zelist\.ro feed parser'
2138
2301
  name: 'Ze List'
2139
2302
  url: 'https://www.zelist.ro/'
2140
2303
  category: 'Feed Fetcher'
@@ -2182,21 +2345,21 @@
2182
2345
  name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
2183
2346
  url: 'https://www.wikido.com/'
2184
2347
 
2185
- - regex: 'AwarioSmartBot'
2348
+ - regex: 'Awario(?:Smart)?Bot'
2186
2349
  name: 'Awario'
2187
2350
  category: 'Search bot'
2188
2351
  url: 'https://awario.com/bots.html'
2189
2352
  producer:
2190
- name: 'Awario'
2191
- url: 'https://awario.com/'
2353
+ name: 'TechFusion Ltd.'
2354
+ url: 'https://www.techfusion.com.cy/'
2192
2355
 
2193
2356
  - regex: 'AwarioRssBot'
2194
2357
  name: 'Awario'
2195
2358
  category: 'Feed Fetcher'
2196
2359
  url: 'https://awario.com/bots.html'
2197
2360
  producer:
2198
- name: 'Awario'
2199
- url: 'https://awario.com/'
2361
+ name: 'TechFusion Ltd.'
2362
+ url: 'https://www.techfusion.com.cy/'
2200
2363
 
2201
2364
  - regex: 'oBot'
2202
2365
  name: 'oBot'
@@ -2288,7 +2451,7 @@
2288
2451
  category: 'Crawler'
2289
2452
  url: 'https://serendeputy.com/about/serendeputy-bot'
2290
2453
 
2291
- - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx.com.*service-fetcher'
2454
+ - regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher'
2292
2455
  name: 'ADmantX Service Fetcher'
2293
2456
  category: 'Service bot'
2294
2457
  url: 'https://www.admantx.com/service-fetcher.html'
@@ -2324,7 +2487,7 @@
2324
2487
  name: 'PPC Labs LLC'
2325
2488
  url: 'https://www.adbeat.com/'
2326
2489
 
2327
- - regex: 'BW/(?:(\d+[\.\d]+))'
2490
+ - regex: '(?:BuiltWith|BW)/[\d.]+'
2328
2491
  name: 'BuiltWith'
2329
2492
  category: 'Crawler'
2330
2493
  url: 'https://builtwith.com/biup'
@@ -2332,7 +2495,7 @@
2332
2495
  name: 'BuiltWith Pty Ltd'
2333
2496
  url: 'https://builtwith.com/'
2334
2497
 
2335
- - regex: 'https://whatis.contentkingapp.com'
2498
+ - regex: 'https://whatis\.contentkingapp\.com'
2336
2499
  name: 'ContentKing'
2337
2500
  category: 'Site Monitor'
2338
2501
  url: 'https://whatis.contentkingapp.com/'
@@ -2348,7 +2511,7 @@
2348
2511
  name: 'MicroAd, Inc.'
2349
2512
  url: 'https://www.microad.co.jp/'
2350
2513
 
2351
- - regex: 'PingAdmin.Ru'
2514
+ - regex: 'PingAdmin\.Ru'
2352
2515
  name: 'PingAdmin.Ru'
2353
2516
  category: 'Site Monitor'
2354
2517
  url: 'https://ping-admin.ru/'
@@ -2366,7 +2529,7 @@
2366
2529
  name: 'WebTehRazrabotka LLC'
2367
2530
  url: 'https://webdatastats.com/'
2368
2531
 
2369
- - regex: 'parse.ly scraper'
2532
+ - regex: 'parse\.ly scraper'
2370
2533
  name: 'parse.ly'
2371
2534
  category: 'Crawler'
2372
2535
  url: 'https://www.parse.ly/help/integration/crawler'
@@ -2379,7 +2542,7 @@
2379
2542
  category: 'Site Monitor'
2380
2543
  url: 'http://cloudsystemnetworks.com'
2381
2544
 
2382
- - regex: 'HeartRails_Capture/\d'
2545
+ - regex: 'HeartRails_Capture/[\d.]+'
2383
2546
  name: 'Heart Rails Capture'
2384
2547
  category: 'Service Agent'
2385
2548
  url: 'http://capture.heartrails.com'
@@ -2387,9 +2550,12 @@
2387
2550
  - regex: 'Project-Resonance'
2388
2551
  name: 'Project Resonance'
2389
2552
  category: 'Crawler'
2390
- url: 'http://project-resonance.com'
2553
+ url: 'https://project-resonance.com/'
2554
+ producer:
2555
+ name: 'RedHunt Labs Limited'
2556
+ url: 'https://redhuntlabs.com/'
2391
2557
 
2392
- - regex: 'DataXu/\d'
2558
+ - regex: 'DataXu/[\d.]+'
2393
2559
  name: 'DataXu'
2394
2560
  category: 'Service Agent'
2395
2561
  url: 'https://advertising.roku.com/dataxu'
@@ -2426,7 +2592,7 @@
2426
2592
  category: 'Crawler'
2427
2593
  url: 'http://www.webtop.com/'
2428
2594
 
2429
- - regex: 'PageThing.com'
2595
+ - regex: 'PageThing\.com'
2430
2596
  name: 'PageThing'
2431
2597
  category: 'Crawler'
2432
2598
  url: 'https://www.pagething.com/'
@@ -2471,10 +2637,18 @@
2471
2637
  url: 'https://github.com/projectdiscovery/httpx'
2472
2638
  category: 'Crawler'
2473
2639
  producer:
2474
- name: ''
2475
- url: ''
2640
+ name: 'ProjectDiscovery, Inc.'
2641
+ url: 'https://projectdiscovery.io/'
2642
+
2643
+ - regex: '.*\.oast\.'
2644
+ name: 'Interactsh'
2645
+ category: 'Security Checker'
2646
+ url: 'https://github.com/projectdiscovery/interactsh'
2647
+ producer:
2648
+ name: 'ProjectDiscovery, Inc.'
2649
+ url: 'https://projectdiscovery.io/'
2476
2650
 
2477
- - regex: 'scaninfo@(?:expanseinc|paloaltonetworks).com'
2651
+ - regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com'
2478
2652
  name: 'Expanse'
2479
2653
  category: 'Security Checker'
2480
2654
  url: 'https://expanse.co/'
@@ -2505,12 +2679,12 @@
2505
2679
  name: 'Hatena Co., Ltd.'
2506
2680
  url: 'https://www.hatena.ne.jp'
2507
2681
 
2508
- - regex: 'RyowlEngine/(\d+)'
2682
+ - regex: 'RyowlEngine/[\d.]+'
2509
2683
  name: 'Ryowl'
2510
2684
  category: 'Crawler'
2511
2685
  url: 'https://ryowl.org'
2512
2686
 
2513
- - regex: 'OdklBot/(\d+)'
2687
+ - regex: 'OdklBot/[\d.]+'
2514
2688
  name: 'Odnoklassniki Bot'
2515
2689
  category: 'Crawler'
2516
2690
  url: 'https://odnoklassniki.ru'
@@ -2525,7 +2699,7 @@
2525
2699
  category: 'Crawler'
2526
2700
  url: 'https://www.zoominfo.com'
2527
2701
 
2528
- - regex: 'WeViKaBot/([\d+\.])'
2702
+ - regex: 'WeViKaBot/[\d.]+'
2529
2703
  name: 'WeViKaBot'
2530
2704
  category: 'Crawler'
2531
2705
  url: 'http://www.wevika.de'
@@ -2535,7 +2709,7 @@
2535
2709
  category: 'Crawler'
2536
2710
  url: 'https://www.seokicks.de/robot.html'
2537
2711
 
2538
- - regex: 'Plukkie/([\d+\.])'
2712
+ - regex: 'Plukkie/[\d.]+'
2539
2713
  name: 'Plukkie'
2540
2714
  category: 'Crawler'
2541
2715
  url: 'http://www.botje.com/plukkie.htm'
@@ -2545,22 +2719,22 @@
2545
2719
  category: 'Crawler'
2546
2720
  url: 'https://www.comscore.com/Web-Crawler'
2547
2721
 
2548
- - regex: 'SurdotlyBot/([\d+\.])'
2722
+ - regex: 'SurdotlyBot/[\d.]+'
2549
2723
  name: 'SurdotlyBot'
2550
2724
  category: 'Crawler'
2551
2725
  url: 'http://sur.ly/bot.html'
2552
2726
 
2553
- - regex: 'Gowikibot/([\d+\.])'
2727
+ - regex: 'Gowikibot/[\d.]+'
2554
2728
  name: 'Gowikibot'
2555
2729
  category: 'Crawler'
2556
2730
  url: 'http:/www.gowikibot.com'
2557
2731
 
2558
- - regex: 'SabsimBot/([\d+\.])'
2732
+ - regex: 'SabsimBot/[\d.]+'
2559
2733
  name: 'SabsimBot'
2560
2734
  category: 'Crawler'
2561
2735
  url: 'https://sabsim.com'
2562
2736
 
2563
- - regex: 'LumtelBot/([\d+\.])'
2737
+ - regex: 'LumtelBot/[\d.]+'
2564
2738
  name: 'LumtelBot'
2565
2739
  category: 'Crawler'
2566
2740
  url: 'https://umtel.com'
@@ -2570,12 +2744,12 @@
2570
2744
  category: 'Crawler'
2571
2745
  url: 'http://www.pipl.com/bot'
2572
2746
 
2573
- - regex: 'woobot/([\d+\.])'
2747
+ - regex: 'woobot/[\d.]+'
2574
2748
  name: 'WooRank'
2575
2749
  category: 'Crawler'
2576
2750
  url: 'https://www.woorank.com/bot'
2577
2751
 
2578
- - regex: 'Cookiebot/([\d+\.])'
2752
+ - regex: 'Cookiebot/[\d.]+'
2579
2753
  name: 'Cookiebot'
2580
2754
  category: 'Crawler'
2581
2755
  url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
@@ -2591,7 +2765,7 @@
2591
2765
  name: 'NET SYSTEMS RESEARCH LLC'
2592
2766
  url: 'https://www.netsystemsresearch.com/'
2593
2767
 
2594
- - regex: 'CensysInspect/([\d+\.])'
2768
+ - regex: 'CensysInspect/[\d.]+'
2595
2769
  name: 'CensysInspect'
2596
2770
  category: 'Security Checker'
2597
2771
  url: 'https://about.censys.io/'
@@ -2599,7 +2773,7 @@
2599
2773
  name: 'Censys, Inc.'
2600
2774
  url: 'https://censys.io/'
2601
2775
 
2602
- - regex: 'gdnplus.com'
2776
+ - regex: 'gdnplus\.com'
2603
2777
  name: 'GDNP'
2604
2778
  category: 'Crawler'
2605
2779
  url: 'https://gdnplus.com/'
@@ -2607,17 +2781,17 @@
2607
2781
  name: 'Global Digital Network Plus, LLC'
2608
2782
  url: 'https://gdnplus.com/'
2609
2783
 
2610
- - regex: 'WellKnownBot/([\d+\.])'
2784
+ - regex: 'WellKnownBot/[\d.]+'
2611
2785
  name: 'WellKnownBot'
2612
2786
  category: 'Crawler'
2613
2787
  url: 'https://well-known.dev'
2614
2788
 
2615
- - regex: 'Adsbot/([\d+\.])'
2789
+ - regex: 'Adsbot/[\d.]+'
2616
2790
  name: 'Adsbot'
2617
2791
  category: 'Crawler'
2618
2792
  url: 'https://seostar.co/robot/'
2619
2793
 
2620
- - regex: 'MTRobot/([\d+\.])'
2794
+ - regex: 'MTRobot/[\d.]+'
2621
2795
  name: 'MTRobot'
2622
2796
  category: 'Crawler'
2623
2797
  url: 'https://metrics-tools.de/robot.html'
@@ -2625,7 +2799,7 @@
2625
2799
  name: 'Metrics Tools'
2626
2800
  url: 'https://metrics-tools.de/'
2627
2801
 
2628
- - regex: 'serpstatbot/([\d+\.])'
2802
+ - regex: 'serpstatbot/[\d.]+'
2629
2803
  name: 'serpstatbot'
2630
2804
  category: 'Crawler'
2631
2805
  url: 'http://serpstatbot.com/'
@@ -2638,17 +2812,17 @@
2638
2812
  category: 'Crawler'
2639
2813
  url: 'https://github.com/gocolly/colly/'
2640
2814
 
2641
- - regex: 'l9tcpid/v([\d+\.])'
2815
+ - regex: 'l9tcpid/v[\d.]+'
2642
2816
  name: 'l9tcpid'
2643
2817
  category: 'Security Checker'
2644
2818
  url: 'https://github.com/LeakIX/l9tcpid'
2645
2819
 
2646
- - regex: 'l9explore/([\d+\.])'
2820
+ - regex: 'l9explore/[\d.]+'
2647
2821
  name: 'l9explore'
2648
2822
  category: 'Security Checker'
2649
2823
  url: 'https://github.com/LeakIX/l9explore'
2650
2824
 
2651
- - regex: 'l9scan/|^Lkx-(.*)/([\d+.]+)'
2825
+ - regex: 'l9scan/|^Lkx-.*/[\d.]+'
2652
2826
  name: 'LeakIX'
2653
2827
  category: 'Security Checker'
2654
2828
  url: 'https://leakix.net/'
@@ -2656,7 +2830,7 @@
2656
2830
  name: 'BaDaaS SRL'
2657
2831
  url: 'https://leakix.net/'
2658
2832
 
2659
- - regex: 'MegaIndex.ru/([\d+\.])'
2833
+ - regex: 'MegaIndex\.ru/[\d.]+'
2660
2834
  name: 'MegaIndex'
2661
2835
  category: 'Crawler'
2662
2836
  url: 'https://megaindex.com/crawler'
@@ -2664,17 +2838,17 @@
2664
2838
  - regex: 'Seekport'
2665
2839
  name: 'Seekport'
2666
2840
  category: 'Crawler'
2667
- url: 'http://www.seekport.com/'
2841
+ url: 'https://bot.seekport.com/'
2668
2842
  producer:
2669
2843
  name: 'SISTRIX GmbH'
2670
2844
  url: 'https://www.sistrix.de/'
2671
2845
 
2672
- - regex: 'seolyt/([\d+\.])'
2846
+ - regex: 'seolyt/[\d.]+'
2673
2847
  name: 'seolyt'
2674
2848
  category: 'Crawler'
2675
2849
  url: 'https://seolyt.com/'
2676
2850
 
2677
- - regex: 'YaK/([\d+\.])'
2851
+ - regex: 'YaK/[\d.]+'
2678
2852
  name: 'YaK'
2679
2853
  category: 'Crawler'
2680
2854
  url: 'https://www.linkfluence.com/'
@@ -2682,7 +2856,7 @@
2682
2856
  name: 'Linkfluence SAS'
2683
2857
  url: 'https://www.linkfluence.com/'
2684
2858
 
2685
- - regex: 'KomodiaBot/([\d+\.])'
2859
+ - regex: 'KomodiaBot/[\d.]+'
2686
2860
  name: 'KomodiaBot'
2687
2861
  category: 'Crawler'
2688
2862
  url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
@@ -2690,7 +2864,7 @@
2690
2864
  name: 'Komodia Inc.'
2691
2865
  url: 'https://www.komodia.com/'
2692
2866
 
2693
- - regex: 'Neevabot/([\d+\.])'
2867
+ - regex: 'Neevabot/[\d.]+'
2694
2868
  name: 'Neevabot'
2695
2869
  category: 'Search bot'
2696
2870
  url: 'https://neeva.com/neevabot'
@@ -2698,17 +2872,17 @@
2698
2872
  name: 'Neeva Inc.'
2699
2873
  url: 'https://neeva.com/'
2700
2874
 
2701
- - regex: 'LinkPreview/([\d+\.])'
2875
+ - regex: 'LinkPreview/[\d.]+'
2702
2876
  name: 'LinkPreview'
2703
2877
  category: 'Service Agent'
2704
2878
  url: 'https://www.linkpreview.net/'
2705
2879
 
2706
- - regex: 'JungleKeyThumbnail/([\d+\.])'
2880
+ - regex: 'JungleKeyThumbnail/[\d.]+'
2707
2881
  name: 'JungleKeyThumbnail'
2708
2882
  category: 'Crawler'
2709
2883
  url: 'https://junglekey.com/'
2710
2884
 
2711
- - regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2885
+ - regex: 'rocketmonitor(?: |bot/)[\d.]+'
2712
2886
  name: 'RocketMonitorBot'
2713
2887
  category: 'Site Monitor'
2714
2888
  url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
@@ -2716,7 +2890,7 @@
2716
2890
  name: 'Radio Mast, Inc.'
2717
2891
  url: 'https://www.radiomast.io/'
2718
2892
 
2719
- - regex: 'SitemapParser-VIPnytt/([\d+\.])'
2893
+ - regex: 'SitemapParser-VIPnytt/[\d.]+'
2720
2894
  name: 'SitemapParser-VIPnytt'
2721
2895
  category: 'Crawler'
2722
2896
  url: 'https://github.com/VIPnytt/SitemapParser/'
@@ -2726,7 +2900,7 @@
2726
2900
  category: 'Crawler'
2727
2901
  url: 'https://turnitin.com/robot/crawlerinfo.html'
2728
2902
 
2729
- - regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2903
+ - regex: 'DMBrowser/[\d.]+|DMBrowser-[UB]V'
2730
2904
  name: 'Dotcom Monitor'
2731
2905
  category: 'Site Monitor'
2732
2906
  url: 'https://www.dotcom-monitor.com'
@@ -2740,17 +2914,17 @@
2740
2914
  category: 'Crawler'
2741
2915
  url: 'https://dataforseo.com/dataforseo-bot'
2742
2916
 
2743
- - regex: 'Discordbot/([\d+.]+)'
2917
+ - regex: 'Discordbot/[\d.]+'
2744
2918
  name: 'Discord Bot'
2745
2919
  category: 'Service Agent'
2746
2920
  url: 'https://discordapp.com'
2747
2921
 
2748
- - regex: 'Linespider/([\d+.]+)'
2922
+ - regex: 'Linespider/[\d.]+'
2749
2923
  name: 'Linespider'
2750
2924
  category: 'Crawler'
2751
2925
  url: 'https://lin.ee/4dwXkTH'
2752
2926
 
2753
- - regex: 'Cincraw/([\d+.]+)'
2927
+ - regex: 'Cincraw/[\d.]+'
2754
2928
  name: 'Cincraw'
2755
2929
  category: 'Crawler'
2756
2930
  url: 'http://cincrawdata.net/bot/'
@@ -2776,7 +2950,7 @@
2776
2950
  category: 'Crawler'
2777
2951
  url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
2778
2952
 
2779
- - regex: 'https://securitytxt-scan.cs.hm.edu/'
2953
+ - regex: 'https://securitytxt-scan\.cs\.hm\.edu/'
2780
2954
  name: 'security.txt scanserver'
2781
2955
  category: 'Security Checker'
2782
2956
  url: 'https://securitytxt-scan.cs.hm.edu/'
@@ -2784,17 +2958,17 @@
2784
2958
  name: 'Hochschule für angewandte Wissenschaften München'
2785
2959
  url: 'https://www.hm.edu/'
2786
2960
 
2787
- - regex: 'TigerBot/([\d+.]+)'
2961
+ - regex: 'TigerBot/[\d.]+'
2788
2962
  name: 'TigerBot'
2789
2963
  category: 'Crawler'
2790
2964
  url: 'https://tiger.ch/'
2791
2965
 
2792
- - regex: 'TestCrawler/([\d+.]+)'
2966
+ - regex: 'TestCrawler/[\d.]+'
2793
2967
  name: 'TestCrawler'
2794
2968
  category: 'Crawler'
2795
2969
  url: 'https://www.comcepta.com/'
2796
2970
 
2797
- - regex: 'CrowdTanglebot/([\d+.]+)'
2971
+ - regex: 'CrowdTanglebot/[\d.]+'
2798
2972
  name: 'CrowdTangle'
2799
2973
  category: 'Crawler'
2800
2974
  url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
@@ -2802,7 +2976,7 @@
2802
2976
  name: 'CrowdTangle, Inc.'
2803
2977
  url: 'https://www.crowdtangle.com/'
2804
2978
 
2805
- - regex: 'Sellers.Guide Crawler by Primis'
2979
+ - regex: 'Sellers\.Guide Crawler by Primis'
2806
2980
  name: 'Sellers.Guide'
2807
2981
  category: 'Crawler'
2808
2982
  url: 'https://sellers.guide/'
@@ -2826,7 +3000,7 @@
2826
3000
  name: 'deepnoc, GmbH'
2827
3001
  url: 'https://deepnoc.com/'
2828
3002
 
2829
- - regex: 'Newslitbot/([\d+.]+)'
3003
+ - regex: 'Newslitbot/[\d.]+'
2830
3004
  name: 'Newslitbot'
2831
3005
  category: 'Crawler'
2832
3006
  url: 'https://www.newslit.co/'
@@ -2834,7 +3008,7 @@
2834
3008
  name: 'Newslit, LLC.'
2835
3009
  url: 'https://www.newslit.co/'
2836
3010
 
2837
- - regex: 'um-LN/([\d+.]+)'
3011
+ - regex: 'um-LN/[\d.]+'
2838
3012
  name: 'uMBot'
2839
3013
  category: 'Crawler'
2840
3014
  url: 'https://www.ubermetrics-technologies.com/'
@@ -2842,12 +3016,12 @@
2842
3016
  name: 'Ubermetrics Technologies GmbH'
2843
3017
  url: 'https://www.ubermetrics-technologies.com/'
2844
3018
 
2845
- - regex: 'Abonti/([\d+.]+)'
3019
+ - regex: 'Abonti/[\d.]+'
2846
3020
  name: 'Abonti'
2847
3021
  category: 'Crawler'
2848
3022
  url: 'http://abonti.com/'
2849
3023
 
2850
- - regex: 'collection@infegy.com'
3024
+ - regex: 'collection@infegy\.com'
2851
3025
  name: 'Infegy'
2852
3026
  category: 'Crawler'
2853
3027
  url: 'https://infegy.com/'
@@ -2855,7 +3029,7 @@
2855
3029
  name: 'Infegy, Inc.'
2856
3030
  url: 'https://infegy.com/'
2857
3031
 
2858
- - regex: 'HTTP Banner Detection \(https://security.ipip.net\)'
3032
+ - regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)'
2859
3033
  name: 'IPIP'
2860
3034
  category: 'Security Checker'
2861
3035
  url: 'https://security.ipip.net/'
@@ -2863,7 +3037,7 @@
2863
3037
  name: 'Beijing Tiantexin Tech. Co., Ltd.'
2864
3038
  url: 'https://en.ipip.net/'
2865
3039
 
2866
- - regex: 'ev-crawler/([\d+.]+)'
3040
+ - regex: 'ev-crawler/[\d.]+'
2867
3041
  name: 'Headline'
2868
3042
  category: 'Crawler'
2869
3043
  url: 'https://headline.com/legal/crawler'
@@ -2871,7 +3045,7 @@
2871
3045
  name: 'e.ventures Managementgesellschaft mbH'
2872
3046
  url: 'https://headline.com/'
2873
3047
 
2874
- - regex: 'webprosbot/([\d+.]+)'
3048
+ - regex: 'webprosbot/[\d.]+'
2875
3049
  name: 'WebPros'
2876
3050
  category: 'Crawler'
2877
3051
  url: 'https://webpros.com/'
@@ -2887,7 +3061,7 @@
2887
3061
  name: 'Amazon.com, Inc.'
2888
3062
  url: 'https://www.amazon.com/'
2889
3063
 
2890
- - regex: 'Wheregoes.com Redirect Checker/([\d+.]+)'
3064
+ - regex: 'Wheregoes\.com Redirect Checker/[\d.]+'
2891
3065
  name: 'WhereGoes'
2892
3066
  category: 'Crawler'
2893
3067
  url: 'https://wheregoes.com/'
@@ -2897,12 +3071,12 @@
2897
3071
  category: 'Crawler'
2898
3072
  url: 'http://66.240.192.82/'
2899
3073
 
2900
- - regex: 'InternetMeasurement/([\d+.]+)'
3074
+ - regex: 'InternetMeasurement/[\d.]+'
2901
3075
  name: 'InternetMeasurement'
2902
3076
  category: 'Crawler'
2903
3077
  url: 'https://internet-measurement.com/'
2904
3078
 
2905
- - regex: 'DomainAppender /([\d+.]+)'
3079
+ - regex: 'DomainAppender /[\d.]+'
2906
3080
  name: 'DomainAppender'
2907
3081
  category: 'Crawler'
2908
3082
  url: 'https://www.profound.net/product/domain_append/'
@@ -2910,7 +3084,7 @@
2910
3084
  name: 'Profound Networks, LLC'
2911
3085
  url: 'https://www.profound.net/'
2912
3086
 
2913
- - regex: 'FreeWebMonitoring SiteChecker/([\d+.]+)'
3087
+ - regex: 'FreeWebMonitoring SiteChecker/[\d.]+'
2914
3088
  name: 'FreeWebMonitoring'
2915
3089
  category: 'Site Monitor'
2916
3090
  url: 'https://www.freewebmonitoring.com/bot.html'
@@ -2926,7 +3100,7 @@
2926
3100
  name: 'Valley Hosting, LLC'
2927
3101
  url: 'https://www.pagemodified.com/'
2928
3102
 
2929
- - regex: 'adstxtlab.com'
3103
+ - regex: 'adstxtlab\.com'
2930
3104
  name: 'adstxtlab.com'
2931
3105
  category: 'Crawler'
2932
3106
  url: 'https://adstxtlab.com/validator.php'
@@ -2934,7 +3108,7 @@
2934
3108
  name: 'Jaohawi AB'
2935
3109
  url: 'https://adstxtlab.com/'
2936
3110
 
2937
- - regex: 'Iframely/([\d+.]+)'
3111
+ - regex: 'Iframely/[\d.]+'
2938
3112
  name: 'Iframely'
2939
3113
  category: 'Crawler'
2940
3114
  url: 'https://iframely.com/'
@@ -2942,7 +3116,7 @@
2942
3116
  name: 'Itteco Software, Corp.'
2943
3117
  url: 'https://iframely.com/'
2944
3118
 
2945
- - regex: 'DomainStatsBot/([\d+.]+)'
3119
+ - regex: 'DomainStatsBot/[\d.]+'
2946
3120
  name: 'DomainStatsBot'
2947
3121
  category: 'Crawler'
2948
3122
  url: 'https://domainstats.com/pages/our-bot'
@@ -2950,7 +3124,7 @@
2950
3124
  name: 'Domainstats Ltd'
2951
3125
  url: 'https://domainstats.com/'
2952
3126
 
2953
- - regex: 'aiHitBot/([\d+.]+)'
3127
+ - regex: 'aiHitBot/[\d.]+'
2954
3128
  name: 'aiHitBot'
2955
3129
  category: 'Crawler'
2956
3130
  url: 'https://www.aihitdata.com/about'
@@ -2968,12 +3142,12 @@
2968
3142
  name: 'GitCrawlerBot'
2969
3143
  category: 'Crawler'
2970
3144
 
2971
- - regex: 'AdAuth/([\d+.]+)'
3145
+ - regex: 'AdAuth/[\d.]+'
2972
3146
  name: 'AdAuth'
2973
3147
  category: 'Crawler'
2974
3148
  url: 'https://www.adauth.com'
2975
3149
 
2976
- - regex: 'faveeo.com'
3150
+ - regex: 'faveeo\.com'
2977
3151
  name: 'Faveeo'
2978
3152
  category: 'Crawler'
2979
3153
  url: 'http://www.faveeo.com'
@@ -3004,7 +3178,7 @@
3004
3178
  name: 'Jožef Stefan Institute'
3005
3179
  url: 'https://www.ijs.si/ijsw/JSI'
3006
3180
 
3007
- - regex: 'dnt-policy@eff.org'
3181
+ - regex: 'dnt-policy@eff\.org'
3008
3182
  name: 'EFF Do Not Track Verifier'
3009
3183
  category: 'Crawler'
3010
3184
  url: 'https://www.eff.org/issues/do-not-track'
@@ -3028,7 +3202,7 @@
3028
3202
  name: 'Swoppen Systems GmbH'
3029
3203
  url: 'https://www.swoppen.com/de'
3030
3204
 
3031
- - regex: 'ScamadviserExternalHit/([\d+.]+)'
3205
+ - regex: 'ScamadviserExternalHit/[\d.]+'
3032
3206
  name: 'Scamadviser External Hit'
3033
3207
  category: 'Crawler'
3034
3208
  url: 'https://www.scamadviser.com/'
@@ -3041,20 +3215,20 @@
3041
3215
  category: 'Crawler'
3042
3216
  url: 'https://www.zaldamo.com/search.html'
3043
3217
  producer:
3044
- name: 'Project Orlando, LLC.'
3045
- url: 'https://www.projectorlando.com/'
3218
+ name: 'Zaldamo, LLC.'
3219
+ url: 'https://www.zaldamo.com/'
3046
3220
 
3047
- - regex: 'AFB/([\d+.]+)'
3221
+ - regex: 'AFB/[\d.]+'
3048
3222
  name: 'Allloadin Favicon Bot'
3049
3223
  category: 'Crawler'
3050
3224
  url: 'https://allloadin.com/'
3051
3225
 
3052
- - regex: 'SeolytBot/([\d+.]+)'
3226
+ - regex: 'SeolytBot/[\d.]+'
3053
3227
  name: 'Seolyt Bot'
3054
3228
  category: 'Crawler'
3055
3229
  url: 'https://seolyt.com'
3056
3230
 
3057
- - regex: 'LinkWalker/([\d+.]+)'
3231
+ - regex: 'LinkWalker/[\d.]+'
3058
3232
  name: 'LinkWalker'
3059
3233
  category: 'Crawler'
3060
3234
  url: 'https://www.phishlabs.com/'
@@ -3062,7 +3236,7 @@
3062
3236
  name: 'PhishLabs, Inc.'
3063
3237
  url: 'https://www.phishlabs.com/'
3064
3238
 
3065
- - regex: 'RenovateBot/([\d+.]+)'
3239
+ - regex: 'RenovateBot/[\d.]+'
3066
3240
  name: 'RenovateBot'
3067
3241
  category: 'Security Checker'
3068
3242
  url: 'https://github.com/renovatebot/renovate'
@@ -3070,7 +3244,7 @@
3070
3244
  name: 'White Source Ltd.'
3071
3245
  url: 'https://www.mend.io/free-developer-tools/renovate/'
3072
3246
 
3073
- - regex: 'INETDEX-BOT/([\d+.]+)'
3247
+ - regex: 'INETDEX-BOT/[\d.]+'
3074
3248
  name: 'Inetdex Bot'
3075
3249
  category: 'Crawler'
3076
3250
  url: 'https://www.inetdex.com/'
@@ -3083,15 +3257,7 @@
3083
3257
  name: 'Marc Huemer'
3084
3258
  url: 'https://www.netzzappen.com/'
3085
3259
 
3086
- - regex: 'SerpReputationManagementAgent/([\d+.]+)'
3087
- name: 'SEMrush Reputation Management'
3088
- category: 'Service Agent'
3089
- url: 'https://www.semrush.com/bot/'
3090
- producer:
3091
- name: 'SEMrush'
3092
- url: 'https://www.semrush.com/'
3093
-
3094
- - regex: 'panscient.com'
3260
+ - regex: 'panscient\.com'
3095
3261
  name: 'Panscient'
3096
3262
  category: 'Crawler'
3097
3263
  url: 'https://www.panscient.com/faq.htm'
@@ -3099,7 +3265,7 @@
3099
3265
  name: 'Panscient, Inc.'
3100
3266
  url: 'https://www.panscient.com/'
3101
3267
 
3102
- - regex: 'research@pdrlabs.net'
3268
+ - regex: 'research@pdrlabs\.net'
3103
3269
  name: 'PDR Labs'
3104
3270
  category: 'Security Checker'
3105
3271
  url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
@@ -3107,7 +3273,7 @@
3107
3273
  name: 'PDR Labs'
3108
3274
  url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
3109
3275
 
3110
- - regex: 'Nicecrawler/([\d+.]+)'
3276
+ - regex: 'Nicecrawler/[\d.]+'
3111
3277
  name: 'NiceCrawler'
3112
3278
  category: 'Crawler'
3113
3279
  url: 'https://www.nicecrawler.com/'
@@ -3115,7 +3281,7 @@
3115
3281
  name: 'Intelium Corp.'
3116
3282
  url: 'https://www.intelium.com/'
3117
3283
 
3118
- - regex: 't3versionsBot/([\d+.]+)'
3284
+ - regex: 't3versionsBot/[\d.]+'
3119
3285
  name: 't3versions'
3120
3286
  category: 'Crawler'
3121
3287
  url: 'https://www.t3versions.com/bot'
@@ -3123,7 +3289,7 @@
3123
3289
  name: 'Torben Hansen'
3124
3290
  url: 'https://www.t3versions.com/'
3125
3291
 
3126
- - regex: 'Crawlson/([\d+.]+)'
3292
+ - regex: 'Crawlson/[\d.]+'
3127
3293
  name: 'Crawlson'
3128
3294
  category: 'Crawler'
3129
3295
  url: 'https://www.crawlson.com/about'
@@ -3131,7 +3297,7 @@
3131
3297
  name: 'Crawlson'
3132
3298
  url: 'https://www.crawlson.com/'
3133
3299
 
3134
- - regex: 'tchelebi/([\d+.]+)'
3300
+ - regex: 'tchelebi/[\d.]+'
3135
3301
  name: 'tchelebi'
3136
3302
  category: 'Crawler'
3137
3303
  url: 'https://tchelebi.io/'
@@ -3147,7 +3313,7 @@
3147
3313
  name: 'New Work SE'
3148
3314
  url: 'https://www.xing.com/'
3149
3315
 
3150
- - regex: 'RepoLookoutBot/([\d+.]+)'
3316
+ - regex: 'RepoLookoutBot/v?[\d.]+'
3151
3317
  name: 'Repo Lookout'
3152
3318
  category: 'Security Checker'
3153
3319
  url: 'https://www.repo-lookout.org/'
@@ -3163,7 +3329,7 @@
3163
3329
  name: 'MAMI Project'
3164
3330
  url: 'https://mami-project.eu/'
3165
3331
 
3166
- - regex: 'everyfeed-spider/([\d+.]+)'
3332
+ - regex: 'everyfeed-spider/[\d.]+'
3167
3333
  name: 'Everyfeed'
3168
3334
  url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
3169
3335
  category: 'Feed Fetcher'
@@ -3187,7 +3353,7 @@
3187
3353
  name: ''
3188
3354
  url: ''
3189
3355
 
3190
- - regex: 'Gregarius/([\d+.]+)'
3356
+ - regex: 'Gregarius/[\d.]+'
3191
3357
  name: 'Gregarius'
3192
3358
  category: 'Feed Fetcher'
3193
3359
  url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
@@ -3203,7 +3369,7 @@
3203
3369
  name: 'Comodo Security Solutions, Inc.'
3204
3370
  url: 'https://www.comodo.com/'
3205
3371
 
3206
- - regex: 'Sectigo DCV'
3372
+ - regex: 'Sectigo DCV|acme\.sectigo\.com'
3207
3373
  name: 'Sectigo DCV'
3208
3374
  category: 'Service Agent'
3209
3375
  url: 'https://sectigo.com/'
@@ -3211,7 +3377,7 @@
3211
3377
  name: 'Sectigo Limited'
3212
3378
  url: 'https://sectigo.com/'
3213
3379
 
3214
- - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/([\d+.]+)'
3380
+ - regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\d.]+'
3215
3381
  name: 'KlarnaBot'
3216
3382
  category: 'Crawler'
3217
3383
  url: 'https://docs.klarna.com/klarna-bot/'
@@ -3219,7 +3385,7 @@
3219
3385
  name: 'Klarna Bank AB'
3220
3386
  url: 'https://www.klarna.com/'
3221
3387
 
3222
- - regex: 'Taboolabot/([\d+.]+)'
3388
+ - regex: 'Taboolabot/[\d.]+'
3223
3389
  name: 'Taboolabot'
3224
3390
  category: 'Crawler'
3225
3391
  url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
@@ -3227,7 +3393,7 @@
3227
3393
  name: 'Taboola, Inc.'
3228
3394
  url: 'https://www.taboola.com/'
3229
3395
 
3230
- - regex: 'Asana/([\d+.]+)'
3396
+ - regex: 'Asana/[\d.]+'
3231
3397
  name: 'Asana'
3232
3398
  category: 'Crawler'
3233
3399
  url: 'https://asana.com/'
@@ -3243,7 +3409,7 @@
3243
3409
  name: 'Google Inc.'
3244
3410
  url: 'https://www.google.com/'
3245
3411
 
3246
- - regex: 'URLinspectorBot/([\d+.]+)'
3412
+ - regex: 'URLinspectorBot/[\d.]+'
3247
3413
  name: 'URLinspector'
3248
3414
  category: 'Site Monitor'
3249
3415
  url: 'https://www.urlinspector.com/bot/'
@@ -3251,7 +3417,7 @@
3251
3417
  name: 'LinkResearchTools GmbH'
3252
3418
  url: 'https://www.linkresearchtools.com/'
3253
3419
 
3254
- - regex: 'EntferBot/([\d+.]+)'
3420
+ - regex: 'EntferBot/[\d.]+'
3255
3421
  name: 'Entfer'
3256
3422
  category: 'Crawler'
3257
3423
  url: 'https://entfer.com/'
@@ -3259,7 +3425,7 @@
3259
3425
  name: 'Entfer Ltd.'
3260
3426
  url: 'https://entfer.com/'
3261
3427
 
3262
- - regex: 'TagInspector/([\d+.]+)'
3428
+ - regex: 'TagInspector/[\d.]+'
3263
3429
  name: 'Tag Inspector'
3264
3430
  category: 'Crawler'
3265
3431
  url: 'https://taginspector.com/'
@@ -3283,7 +3449,7 @@
3283
3449
  name: 'Diffbot Technologies Corp.'
3284
3450
  url: 'https://www.diffbot.com/'
3285
3451
 
3286
- - regex: 'DisqusAdstxtCrawler/([\d+.]+)'
3452
+ - regex: 'DisqusAdstxtCrawler/[\d.]+'
3287
3453
  name: 'Disqus'
3288
3454
  category: 'Crawler'
3289
3455
  url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
@@ -3291,7 +3457,7 @@
3291
3457
  name: 'Disqus, Inc.'
3292
3458
  url: 'https://disqus.com/'
3293
3459
 
3294
- - regex: 'startmebot/([\d+.]+)'
3460
+ - regex: 'startmebot/[\d.]+'
3295
3461
  name: 'start.me'
3296
3462
  category: 'Crawler'
3297
3463
  url: 'https://about.start.me/'
@@ -3299,17 +3465,17 @@
3299
3465
  name: 'start.me BV'
3300
3466
  url: 'https://about.start.me/'
3301
3467
 
3302
- - regex: '2ip bot/([\d+.]+)'
3468
+ - regex: '2ip bot/[\d.]+'
3303
3469
  name: '2ip'
3304
3470
  category: 'Crawler'
3305
3471
  url: 'https://2ip.io/'
3306
3472
 
3307
- - regex: 'ReqBin Curl Client/([\d+.]+)'
3473
+ - regex: 'ReqBin Curl Client/[\d.]+'
3308
3474
  name: 'ReqBin'
3309
3475
  category: 'Crawler'
3310
3476
  url: 'https://reqbin.com/curl'
3311
3477
 
3312
- - regex: 'XoviBot/([\d+.]+)'
3478
+ - regex: 'XoviBot/[\d.]+'
3313
3479
  name: 'XoviBot'
3314
3480
  category: 'Crawler'
3315
3481
  url: 'https://www.xovibot.net'
@@ -3317,12 +3483,12 @@
3317
3483
  name: 'Xovi GmbH'
3318
3484
  url: 'http://www.xovi.de'
3319
3485
 
3320
- - regex: 'Overcast/([\d+.]+) Podcast Sync'
3486
+ - regex: 'Overcast/[\d.]+ Podcast Sync'
3321
3487
  name: 'Overcast Podcast Sync'
3322
3488
  category: 'Service Agent'
3323
3489
  url: 'https://overcast.fm/podcasterinfo'
3324
3490
 
3325
- - regex: '^Verity/([\d+.]+)'
3491
+ - regex: '^Verity/[\d.]+'
3326
3492
  name: 'GumGum Verity'
3327
3493
  category: 'Service Agent'
3328
3494
  url: 'https://gumgum.com/verity'
@@ -3332,7 +3498,7 @@
3332
3498
  category: 'Feed Reader'
3333
3499
  url: 'https://github.com/snarfed/hackermention'
3334
3500
 
3335
- - regex: 'BitSightBot/([\d+.]+)'
3501
+ - regex: 'BitSightBot/[\d.]+'
3336
3502
  name: 'BitSight'
3337
3503
  category: 'Security Checker'
3338
3504
  url: 'https://www.bitsight.com/'
@@ -3340,12 +3506,12 @@
3340
3506
  name: 'BitSight Technologies, Inc.'
3341
3507
  url: 'https://www.bitsight.com/'
3342
3508
 
3343
- - regex: 'Ezgif/([\d+.]+)'
3509
+ - regex: 'Ezgif/[\d.]+'
3344
3510
  name: 'Ezgif'
3345
3511
  category: 'Service Agent'
3346
3512
  url: 'https://ezgif.com/about'
3347
3513
 
3348
- - regex: 'intelx.io_bot'
3514
+ - regex: 'intelx\.io_bot'
3349
3515
  name: 'Intelligence X'
3350
3516
  category: 'Crawler'
3351
3517
  url: 'https://intelx.io/'
@@ -3353,7 +3519,7 @@
3353
3519
  name: 'Kleissner Investments s.r.o.'
3354
3520
  url: 'https://intelx.io/'
3355
3521
 
3356
- - regex: 'FemtosearchBot/([\d+.]+)'
3522
+ - regex: 'FemtosearchBot/[\d.]+'
3357
3523
  name: 'Femtosearch'
3358
3524
  category: 'Crawler'
3359
3525
  url: 'http://femtosearch.com/'
@@ -3361,7 +3527,7 @@
3361
3527
  name: 'Grier Forensics, LLC'
3362
3528
  url: 'https://www.grierforensics.com/'
3363
3529
 
3364
- - regex: 'AdsTxtCrawler/([\d+.]+)'
3530
+ - regex: 'AdsTxtCrawler/[\d.]+'
3365
3531
  name: 'AdsTxtCrawler'
3366
3532
  category: 'Crawler'
3367
3533
  url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
@@ -3377,7 +3543,7 @@
3377
3543
  name: 'Morningscore'
3378
3544
  url: 'https://morningscore.io/'
3379
3545
 
3380
- - regex: 'Uptime-Kuma/([\d+.]+)'
3546
+ - regex: 'Uptime-Kuma/[\d.]+'
3381
3547
  name: 'Uptime-Kuma'
3382
3548
  category: 'Site Monitor'
3383
3549
  url: 'https://github.com/louislam/uptime-kuma'
@@ -3390,7 +3556,7 @@
3390
3556
  name: 'OpenAI OpCo, LLC'
3391
3557
  url: 'https://openai.com/'
3392
3558
 
3393
- - regex: 'BrightEdge Crawler/([\d+.]+)'
3559
+ - regex: 'BrightEdge Crawler/[\d.]+'
3394
3560
  name: 'BrightEdge'
3395
3561
  category: 'Crawler'
3396
3562
  url: 'https://www.brightedge.com/'
@@ -3398,12 +3564,12 @@
3398
3564
  name: 'BrightEdge Technologies, Inc'
3399
3565
  url: 'https://www.brightedge.com/'
3400
3566
 
3401
- - regex: 'sfFeedReader/([\d+.]+)'
3567
+ - regex: 'sfFeedReader/[\d.]+'
3402
3568
  name: 'sfFeedReader'
3403
3569
  url: 'https://github.com/diem-project/sfFeed2Plugin'
3404
3570
  category: 'Feed Fetcher'
3405
3571
 
3406
- - regex: 'cyberscan.io'
3572
+ - regex: 'cyberscan\.io'
3407
3573
  name: 'Cyberscan'
3408
3574
  category: 'Security Checker'
3409
3575
  url: 'https://www.cyberscan.io/'
@@ -3419,15 +3585,7 @@
3419
3585
  name: 'Lumar'
3420
3586
  url: 'https://www.lumar.io/'
3421
3587
 
3422
- - regex: 'RepoLookoutBot'
3423
- name: 'Repo Lookout'
3424
- category: 'Crawler'
3425
- url: 'https://www.repo-lookout.org/'
3426
- producer:
3427
- name: 'Crissy Field GmbH'
3428
- url: 'https://www.crissyfield.de/'
3429
-
3430
- - regex: 'researchscan.comsys.rwth-aachen.de'
3588
+ - regex: 'researchscan\.comsys\.rwth-aachen\.de'
3431
3589
  name: 'Research Scan'
3432
3590
  category: 'Crawler'
3433
3591
  url: 'http://researchscan.comsys.rwth-aachen.de/'
@@ -3435,7 +3593,7 @@
3435
3593
  name: 'RWTH Aachen University'
3436
3594
  url: 'https://www.comsys.rwth-aachen.de/'
3437
3595
 
3438
- - regex: 'newspaper/([\d+.]+)'
3596
+ - regex: 'newspaper/[\d.]+'
3439
3597
  name: 'Scraping Robot'
3440
3598
  category: 'Crawler'
3441
3599
  url: 'https://scrapingrobot.com/'
@@ -3443,7 +3601,7 @@
3443
3601
  name: 'Sprious LLC'
3444
3602
  url: 'https://sprious.com/'
3445
3603
 
3446
- - regex: 'GPTBot/([\d+.]+)'
3604
+ - regex: 'GPTBot/[\d.]+'
3447
3605
  name: 'GPTBot'
3448
3606
  category: 'Crawler'
3449
3607
  url: 'https://platform.openai.com/docs/gptbot'
@@ -3451,7 +3609,7 @@
3451
3609
  name: 'OpenAI OpCo, LLC'
3452
3610
  url: 'https://openai.com/'
3453
3611
 
3454
- - regex: 'Ant.com beta/([\d+.]+)'
3612
+ - regex: 'Ant(?:\.com beta|Bot)(?:/([\d+.]+))?'
3455
3613
  name: 'Ant'
3456
3614
  category: 'Crawler'
3457
3615
  url: 'https://www.ant.com/'
@@ -3459,7 +3617,7 @@
3459
3617
  name: 'Ant.com Ltd.'
3460
3618
  url: 'https://www.ant.com/'
3461
3619
 
3462
- - regex: 'WebwikiBot/([\d+.]+)'
3620
+ - regex: 'WebwikiBot/[\d.]+'
3463
3621
  name: 'Webwiki'
3464
3622
  category: 'Crawler'
3465
3623
  url: 'https://www.webwiki.com/'
@@ -3472,7 +3630,7 @@
3472
3630
  category: 'Service Agent'
3473
3631
  url: 'https://www.phpmyadmin.net/'
3474
3632
 
3475
- - regex: 'Matomo/([\d+.]+)'
3633
+ - regex: 'Matomo/[\d.]+'
3476
3634
  name: 'Matomo'
3477
3635
  category: 'Service Agent'
3478
3636
  url: 'https://github.com/matomo-org/matomo'
@@ -3480,7 +3638,7 @@
3480
3638
  name: 'InnoCraft Ltd'
3481
3639
  url: 'https://matomo.org/'
3482
3640
 
3483
- - regex: 'Prometheus/([\d+.]+)'
3641
+ - regex: 'Prometheus/[\d.]+'
3484
3642
  name: 'Prometheus'
3485
3643
  category: 'Service Agent'
3486
3644
  url: 'https://github.com/prometheus/prometheus'
@@ -3496,7 +3654,7 @@
3496
3654
  name: 'ArchiveTeam'
3497
3655
  url: 'https://wiki.archiveteam.org/'
3498
3656
 
3499
- - regex: 'MADBbot/([\d+.]+)'
3657
+ - regex: 'MADBbot/[\d.]+'
3500
3658
  name: 'MADBbot'
3501
3659
  category: 'Crawler'
3502
3660
  url: 'https://madb.zapto.org/bot.html'
@@ -3508,6 +3666,877 @@
3508
3666
  name: 'Meltwater Deutschland GmbH'
3509
3667
  url: 'https://www.meltwater.com/'
3510
3668
 
3669
+ - regex: '(?:Owler@ows\.eu|OWLer)/[\d.]+'
3670
+ name: 'OWLer'
3671
+ category: 'Crawler'
3672
+ url: 'https://openwebsearch.eu/owler/'
3673
+ producer:
3674
+ name: 'Open Search Foundation e.V.'
3675
+ url: 'https://openwebsearch.eu/'
3676
+
3677
+ - regex: 'bbc\.co\.uk/display/men/Page\+Monitor'
3678
+ name: 'BBC Page Monitor'
3679
+ category: 'Site Monitor'
3680
+ url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor'
3681
+ producer:
3682
+ name: 'BBC'
3683
+ url: 'https://www.bbc.com/'
3684
+
3685
+ - regex: 'BBC-Forge-URL-Monitor-Twisted'
3686
+ name: 'BBC Forge URL Monitor'
3687
+ category: 'Site Monitor'
3688
+ url: 'https://www.bbc.com/'
3689
+ producer:
3690
+ name: 'BBC'
3691
+ url: 'https://www.bbc.com/'
3692
+
3693
+ - regex: 'ClaudeBot'
3694
+ name: 'ClaudeBot'
3695
+ category: 'Crawler'
3696
+ url: 'https://github.com/ClaudeBot/ClaudeBot'
3697
+
3698
+ - regex: 'Imagesift'
3699
+ name: 'ImageSift'
3700
+ category: 'Crawler'
3701
+ url: 'https://imagesift.com/'
3702
+ producer:
3703
+ name: 'Castle Global, Inc.'
3704
+ url: 'https://thehive.ai/'
3705
+
3706
+ - regex: 'TactiScout'
3707
+ name: 'TactiScout'
3708
+ category: 'Crawler'
3709
+ url: 'https://find-it.world/TempCrawl/Crawltheque.php'
3710
+ producer:
3711
+ name: 'Tactikast'
3712
+
3713
+ - regex: 'Brightbot ([\d+.]+)'
3714
+ name: 'BrightBot'
3715
+ category: 'Crawler'
3716
+ url: 'https://www.brightbot.app/'
3717
+ producer:
3718
+ name: 'Bright Interactive Ltd'
3719
+ url: 'https://www.builtbybright.com/'
3720
+
3721
+ - regex: 'DaspeedBot/([\d+.]+)'
3722
+ name: 'DaspeedBot'
3723
+ category: 'Crawler'
3724
+ url: 'https://daspeed.io/'
3725
+ producer:
3726
+ name: 'DAWAP SARL'
3727
+ url: 'https://dawap.fr/'
3728
+
3729
+ - regex: 'StractBot(?:/([\d+.]+))?'
3730
+ name: 'Stract'
3731
+ category: 'Crawler'
3732
+ url: 'https://stract.com/webmasters'
3733
+ producer:
3734
+ name: 'Stract'
3735
+ url: 'https://github.com/StractOrg/stract/'
3736
+
3737
+ - regex: 'GeedoBot(?:/([\d+.]+))?'
3738
+ name: 'GeedoBot'
3739
+ category: 'Crawler'
3740
+ url: 'https://geedo.com/bot/'
3741
+
3742
+ - regex: 'GeedoProductSearch'
3743
+ name: 'GeedoProductSearch'
3744
+ category: 'Crawler'
3745
+ url: 'https://geedo.com/product-search/'
3746
+
3747
+ - regex: 'BackupLand(?:/([\d+.]+))?'
3748
+ name: 'BackupLand'
3749
+ category: 'Crawler'
3750
+ url: 'https://go.backupland.com/'
3751
+ producer:
3752
+ name: 'ООО «КВАРТА»'
3753
+ url: 'https://go.backupland.com/'
3754
+
3755
+ - regex: 'Konturbot(?:/([\d+.]+))?'
3756
+ name: 'Konturbot'
3757
+ category: 'Crawler'
3758
+ url: 'https://kontur.ru/'
3759
+ producer:
3760
+ name: 'АО «ПФ «СКБ Контур»'
3761
+ url: 'https://kontur.ru/'
3762
+
3763
+ - regex: 'keys-so-bot'
3764
+ name: 'Keys.so'
3765
+ category: 'Crawler'
3766
+ url: 'https://www.keys.so/'
3767
+ producer:
3768
+ name: 'ООО «МОДЕСКО»'
3769
+ url: 'https://www.modesco.ru/'
3770
+
3771
+ - regex: 'LetsearchBot(?:/([\d+.]+))?'
3772
+ name: 'LetSearch'
3773
+ category: 'Crawler'
3774
+ url: 'https://letsearch.ru/bots'
3775
+
3776
+ - regex: 'Example3(?:/([\d+.]+))?'
3777
+ name: 'Example3'
3778
+ category: 'Crawler'
3779
+ url: 'https://www.example3.com/'
3780
+
3781
+ - regex: 'StatOnlineRuBot(?:/([\d+.]+))?'
3782
+ name: 'StatOnline.ru'
3783
+ category: 'Crawler'
3784
+ url: 'https://statonline.ru/'
3785
+ producer:
3786
+ name: 'ООО «Регистратор доменных имен РЕГ.РУ»'
3787
+ url: 'https://statonline.ru/'
3788
+
3789
+ - regex: 'Spawning-AI'
3790
+ name: 'Spawning AI'
3791
+ category: 'Crawler'
3792
+ url: 'https://spawning.ai/'
3793
+ producer:
3794
+ name: 'Spawning, Inc'
3795
+ url: 'https://spawning.ai/'
3796
+
3797
+ - regex: 'domain research project'
3798
+ name: 'Domain Research Project'
3799
+ category: 'Crawler'
3800
+ url: 'https://trentwil.es/domains.html'
3801
+ producer:
3802
+ name: 'Trent Wiles'
3803
+ url: 'https://trentwil.es/'
3804
+
3805
+ - regex: 'getodin\.com'
3806
+ name: 'Odin'
3807
+ category: 'Security Checker'
3808
+ url: 'https://docs.getodin.com/'
3809
+ producer:
3810
+ name: 'Cyble Inc.'
3811
+ url: 'https://cyble.com/'
3812
+
3813
+ - regex: 'YouBot'
3814
+ name: 'YouBot'
3815
+ category: 'Crawler'
3816
+ url: 'https://about.you.com/youbot/'
3817
+ producer:
3818
+ name: 'SuSea, Inc.'
3819
+ url: 'https://you.com/'
3820
+
3821
+ - regex: 'SiteScoreBot'
3822
+ name: 'SiteScore'
3823
+ category: 'Crawler'
3824
+ url: 'https://sitescore.ai/'
3825
+
3826
+ - regex: 'MBCrawler'
3827
+ name: 'Monitor Backlinks'
3828
+ category: 'Crawler'
3829
+ url: 'https://www.seoptimer.com/monitor-backlinks/'
3830
+ producer:
3831
+ name: 'SEOptimer'
3832
+ url: 'https://www.seoptimer.com/'
3833
+
3834
+ - regex: 'mariadb-mysql-kbs-bot'
3835
+ name: 'MariaDB/MySQL Knowledge Base'
3836
+ category: 'Crawler'
3837
+ url: 'https://github.com/williamdes/mariadb-mysql-kbs'
3838
+ producer:
3839
+ name: 'WDES SAS'
3840
+ url: 'https://wdes.fr/en/'
3841
+
3842
+ - regex: 'GitHubCopilotChat'
3843
+ name: 'GitHubCopilotChat'
3844
+ category: 'Crawler'
3845
+ url: 'https://github.com/aaamoon/copilot-gpt4-service'
3846
+
3847
+ - regex: '^pdrl\.fm'
3848
+ name: 'Podroll Analyzer'
3849
+ category: 'Crawler'
3850
+ url: 'https://podroll.fm'
3851
+
3852
+ - regex: 'PodUptime/'
3853
+ name: 'PodUptime'
3854
+ category: 'Site Monitor'
3855
+ url: 'https://poduptime.com'
3856
+
3857
+ - regex: 'anthropic-ai'
3858
+ name: 'Anthropic AI'
3859
+ category: 'Crawler'
3860
+ url: 'https://www.anthropic.com/'
3861
+ producer:
3862
+ name: 'Anthropic, PBC'
3863
+ url: 'https://www.anthropic.com/'
3864
+
3865
+ - regex: 'NetpeakCheckerBot/[\d.]+'
3866
+ name: 'Netpeak Checker'
3867
+ category: 'Crawler'
3868
+ url: 'https://netpeaksoftware.com/checker'
3869
+ producer:
3870
+ name: 'Netpeak LTD'
3871
+ url: 'https://netpeaksoftware.com/'
3872
+
3873
+ - regex: 'SandobaCrawler/[\d.]+'
3874
+ name: 'Sandoba//Crawler'
3875
+ category: 'Crawler'
3876
+ url: 'https://www.sandoba.com/en/crawler/'
3877
+ producer:
3878
+ name: 'SANDOBA//EBUSINESS SOLUTIONS'
3879
+ url: 'https://www.sandoba.com/'
3880
+
3881
+ - regex: 'SirdataBot'
3882
+ name: 'Sirdata'
3883
+ category: 'Crawler'
3884
+ url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction'
3885
+ producer:
3886
+ name: 'Sirdata SAS'
3887
+ url: 'https://www.sirdata.com/'
3888
+
3889
+ - regex: 'CheckMarkNetwork/[\d.]+'
3890
+ name: 'CheckMark Network'
3891
+ category: 'Crawler'
3892
+ url: 'https://www.checkmarknetwork.com/spider.html/'
3893
+ producer:
3894
+ name: 'Exipert, Inc.'
3895
+ url: 'https://www.checkmarknetwork.com/'
3896
+
3897
+ - regex: 'cohere-ai'
3898
+ name: 'Cohere AI'
3899
+ category: 'Crawler'
3900
+ url: 'https://cohere.com/'
3901
+ producer:
3902
+ name: 'Cohere, Inc.'
3903
+ url: 'https://cohere.com/'
3904
+
3905
+ - regex: 'PerplexityBot/[\d.]+'
3906
+ name: 'PerplexityBot'
3907
+ category: 'Crawler'
3908
+ url: 'https://docs.perplexity.ai/docs/perplexitybot'
3909
+ producer:
3910
+ name: 'Perplexity AI, Inc.'
3911
+ url: 'https://www.perplexity.ai/'
3912
+
3913
+ - regex: 'TTD-Content'
3914
+ name: 'The Trade Desk Content'
3915
+ category: 'Crawler'
3916
+ url: 'https://www.thetradedesk.com/us/ttd-content'
3917
+ producer:
3918
+ name: 'The Trade Desk, Inc.'
3919
+ url: 'https://www.thetradedesk.com/'
3920
+
3921
+ - regex: 'montastic-monitor'
3922
+ name: 'Montastic Monitor'
3923
+ category: 'Site Monitor'
3924
+ url: 'https://www.montastic.com/'
3925
+ producer:
3926
+ name: 'Metadot, Corp.'
3927
+ url: 'https://www.metadot.com/'
3928
+
3929
+ - regex: 'Ruby, Twurly v[\d.]+'
3930
+ name: 'Twurly'
3931
+ category: 'Crawler'
3932
+ url: 'https://twurly.org/'
3933
+
3934
+ - regex: 'Mixnode(?:(?:Cache)?/[\d.]+)?'
3935
+ name: 'Mixnode'
3936
+ category: 'Crawler'
3937
+ url: 'https://www.mixnode.com/'
3938
+ producer:
3939
+ name: 'Mixnode Technologies, Inc.'
3940
+ url: 'https://www.mixnode.com/'
3941
+
3942
+ - regex: 'CSSCheck/[\d.]+'
3943
+ name: 'CSSCheck'
3944
+ category: 'Validator'
3945
+
3946
+ - regex: 'MicrosoftPreview/[\d.]+'
3947
+ name: 'Microsoft Preview'
3948
+ category: 'Service Agent'
3949
+ url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0'
3950
+ producer:
3951
+ name: 'Microsoft Corporation'
3952
+ url: 'https://www.microsoft.com/'
3953
+
3954
+ - regex: 's~virustotalcloud'
3955
+ name: 'VirusTotal Cloud'
3956
+ category: 'Crawler'
3957
+ url: 'https://www.virustotal.com/'
3958
+ producer:
3959
+ name: 'Chronicle Security Ireland Limited'
3960
+ url: 'https://chronicle.security/'
3961
+
3962
+ - regex: 'TinEye/[\d.]+'
3963
+ name: 'TinEye'
3964
+ category: 'Crawler'
3965
+ url: 'https://tineye.com/'
3966
+ producer:
3967
+ name: 'Idée, Inc.'
3968
+ url: 'https://tineye.com/'
3969
+
3970
+ - regex: 'e~arsnova-filter-system'
3971
+ name: 'ARSNova Filter System'
3972
+ category: 'Crawler'
3973
+ url: 'https://particify.de/en/'
3974
+ producer:
3975
+ name: 'Particify Gerhardt & Weingarten OHG'
3976
+ url: 'https://particify.de/en/'
3977
+
3978
+ - regex: 'botify'
3979
+ name: 'Botify'
3980
+ category: 'Crawler'
3981
+ url: 'https://www.botify.com/'
3982
+ producer:
3983
+ name: 'BOTIFY SAS'
3984
+ url: 'https://www.botify.com/'
3985
+
3986
+ - regex: 'adscanner'
3987
+ name: 'Adscanner'
3988
+ category: 'Crawler'
3989
+ url: 'https://www.alleyesonscreens.com/'
3990
+ producer:
3991
+ name: 'AdScanner d.o.o'
3992
+ url: 'https://www.alleyesonscreens.com/'
3993
+
3994
+ - regex: 'online-webceo-bot/[\d.]+'
3995
+ name: 'WebCEO'
3996
+ category: 'Crawler'
3997
+ url: 'https://www.webceo.com/'
3998
+ producer:
3999
+ name: 'WebCEO, LLC'
4000
+ url: 'https://www.webceo.com/'
4001
+
4002
+ - regex: 'NetTrack'
4003
+ name: 'NetTrack'
4004
+ category: 'Crawler'
4005
+ url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/'
4006
+
4007
+ - regex: 'htmlyse'
4008
+ name: 'htmlyse'
4009
+ category: 'Crawler'
4010
+ url: 'https://www.htmlyse.com/'
4011
+ producer:
4012
+ name: 'Vistex LTD'
4013
+ url: 'https://www.htmlyse.com/'
4014
+
4015
+ - regex: 'TrendsmapResolver/[\d.]+'
4016
+ name: 'Trendsmap'
4017
+ category: 'Crawler'
4018
+ url: 'https://www.trendsmap.com/'
4019
+ producer:
4020
+ name: 'Trendsmap Pty Ltd'
4021
+ url: 'https://www.trendsmap.com/'
4022
+
4023
+ - regex: 'Shareaholic(?:bot)?/[\d.]+'
4024
+ name: 'Steve Bot'
4025
+ category: 'Crawler'
4026
+ url: 'https://www.shareaholic.com/steve'
4027
+ producer:
4028
+ name: 'Shareaholic, Inc.'
4029
+ url: 'https://www.shareaholic.com/'
4030
+
4031
+ - regex: 'keycdn-tools:'
4032
+ name: 'KeyCDN Tools'
4033
+ category: 'Service Agent'
4034
+ url: 'https://tools.keycdn.com/geo'
4035
+
4036
+ - regex: 'keycdn-tools/'
4037
+ name: 'KeyCDN Tools'
4038
+ category: 'Service Agent'
4039
+ url: 'https://tools.keycdn.com/'
4040
+ producer:
4041
+ name: 'proinity LLC'
4042
+ url: 'https://www.keycdn.com/'
4043
+
4044
+ - regex: 'Arquivo-web-crawler'
4045
+ name: 'Arquivo.pt'
4046
+ category: 'Crawler'
4047
+ url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/'
4048
+ producer:
4049
+ name: 'FCT|FCCN'
4050
+ url: 'https://www.fct.pt/'
4051
+
4052
+ - regex: 'WhatsMyIP\.org'
4053
+ name: 'WhatsMyIP.org'
4054
+ category: 'Service Agent'
4055
+ url: 'https://www.whatsmyip.org/ua/'
4056
+
4057
+ - regex: 'SenutoBot/[\d.]+'
4058
+ name: 'Senuto'
4059
+ category: 'Crawler'
4060
+ url: 'https://www.senuto.com/'
4061
+ producer:
4062
+ name: 'Senuto Sp. z o.o.'
4063
+ url: 'https://www.senuto.com/'
4064
+
4065
+ - regex: 'spaziodati'
4066
+ name: 'SpazioDati'
4067
+ category: 'Crawler'
4068
+ url: 'https://www.spaziodati.eu/'
4069
+ producer:
4070
+ name: 'SpazioDati s.r.l.'
4071
+ url: 'https://www.spaziodati.eu/'
4072
+
4073
+ - regex: 'GozleBot'
4074
+ name: 'Gozle'
4075
+ category: 'Crawler'
4076
+ url: 'https://gozle.com.tm/en/blog/post/1'
4077
+ producer:
4078
+ name: 'Doly Horjun HJ'
4079
+ url: 'https://gozle.com.tm/'
4080
+
4081
+ - regex: 'Quantcastbot/[\d.]+'
4082
+ name: 'Quantcast'
4083
+ category: 'Crawler'
4084
+ url: 'https://www.quantcast.com/bot/'
4085
+ producer:
4086
+ name: 'Quantcast Corp.'
4087
+ url: 'https://www.quantcast.com/'
4088
+
4089
+ - regex: 'FontRadar'
4090
+ name: 'FontRadar'
4091
+ category: 'Crawler'
4092
+ url: 'https://www.fontradar.com/'
4093
+ producer:
4094
+ name: 'EMDASH SAS'
4095
+ url: 'https://www.fontradar.com/'
4096
+
4097
+ - regex: 'ViberUrlDownloader'
4098
+ name: 'Viber Url Downloader'
4099
+ category: 'Service Agent'
4100
+ url: 'https://www.viber.com/'
4101
+ producer:
4102
+ name: 'Viber Media S.à r.l.'
4103
+ url: 'https://www.viber.com/'
4104
+
4105
+ - regex: '^Zeno$'
4106
+ name: 'Zeno'
4107
+ category: 'Crawler'
4108
+ url: 'https://github.com/internetarchive/Zeno'
4109
+ producer:
4110
+ name: 'The Internet Archive'
4111
+ url: 'https://archive.org/'
4112
+
4113
+ - regex: 'Barracuda Sentinel'
4114
+ name: 'Barracuda Sentinel'
4115
+ category: 'Service Agent'
4116
+ url: 'https://sentinel.barracudanetworks.com/'
4117
+ producer:
4118
+ name: 'Barracuda Networks, Inc.'
4119
+ url: 'https://www.barracudanetworks.com/'
4120
+
4121
+ - regex: 'RuxitSynthetic/[\d.]+'
4122
+ name: 'RuxitSynthetic'
4123
+ category: 'Site Monitor'
4124
+ url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
4125
+ producer:
4126
+ name: 'Dynatrace LLC'
4127
+ url: 'https://www.dynatrace.com/'
4128
+
4129
+ - regex: 'DynatraceSynthetic/[\d.]+'
4130
+ name: 'DynatraceSynthetic'
4131
+ category: 'Site Monitor'
4132
+ url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
4133
+ producer:
4134
+ name: 'Dynatrace LLC'
4135
+ url: 'https://www.dynatrace.com/'
4136
+
4137
+ - regex: 'sitebulb'
4138
+ name: 'Sitebulb'
4139
+ category: 'Crawler'
4140
+ url: 'https://sitebulb.com/'
4141
+ producer:
4142
+ name: 'Sitebulb Limited'
4143
+ url: 'https://sitebulb.com/'
4144
+
4145
+ - regex: 'Monsidobot/[\d.]+'
4146
+ name: 'Monsidobot'
4147
+ category: 'Crawler'
4148
+ url: 'https://monsido.com/bot-html'
4149
+ producer:
4150
+ name: 'Monsido LLC'
4151
+ url: 'https://monsido.com/'
4152
+
4153
+ - regex: 'AccompanyBot'
4154
+ name: 'AccompanyBot'
4155
+ category: 'Crawler'
4156
+ url: 'https://www.accompany.com/'
4157
+ producer:
4158
+ name: 'Accompani, Inc'
4159
+ url: 'https://www.accompany.com/'
4160
+
4161
+ - regex: 'Ghost Inspector'
4162
+ name: 'Ghost Inspector'
4163
+ category: 'Site Monitor'
4164
+ url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site'
4165
+ producer:
4166
+ name: 'Ghost Inspector, Inc.'
4167
+ url: 'https://www.ghostinspector.com/'
4168
+
4169
+ - regex: 'Cypress/[\d.]+'
4170
+ name: 'Cypress'
4171
+ category: 'Site Monitor'
4172
+ url: 'https://github.com/cypress-io/cypress'
4173
+ producer:
4174
+ name: 'Cypress.io, Inc.'
4175
+ url: 'https://www.cypress.io/'
4176
+
4177
+ - regex: 'Google-Apps-Script'
4178
+ name: 'Google Apps Script'
4179
+ category: 'Service Agent'
4180
+ url: 'https://www.google.com/script/start/'
4181
+
4182
+ - regex: 'SiteOne-Crawler/[\d.]+'
4183
+ name: 'SiteOne Crawler'
4184
+ category: 'Crawler'
4185
+ url: 'https://crawler.siteone.io/bot/'
4186
+ producer:
4187
+ name: 'SiteOne s.r.o.'
4188
+ url: 'https://www.siteone.io/'
4189
+
4190
+ - regex: 'Detectify'
4191
+ name: 'Detectify'
4192
+ category: 'Security Checker'
4193
+ url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site'
4194
+ producer:
4195
+ name: 'Detectify AB'
4196
+ url: 'https://detectify.com/'
4197
+
4198
+ - regex: 'DomCopBot'
4199
+ name: 'DomCop Bot'
4200
+ category: 'Crawler'
4201
+ url: 'https://www.domcop.com/bot'
4202
+ producer:
4203
+ name: 'Axeman Technology Solutions LLP'
4204
+ url: 'https://axemantech.com/'
4205
+
4206
+ - regex: 'Paqlebot/[\d.]+'
4207
+ name: 'Paqlebot'
4208
+ category: 'Crawler'
4209
+ url: 'https://www.paqle.dk/about/paqlebot'
4210
+ producer:
4211
+ name: 'Paqle A/S'
4212
+ url: 'https://www.paqle.dk/'
4213
+
4214
+ - regex: 'Wibybot'
4215
+ name: 'Wibybot'
4216
+ category: 'Crawler'
4217
+ url: 'https://www.wiby.me/'
4218
+
4219
+ - regex: 'Synapse'
4220
+ name: 'Synapse'
4221
+ category: 'Crawler'
4222
+ url: 'https://github.com/matrix-org/synapse'
4223
+
4224
+ - regex: 'OSZKbot/[\d.]+'
4225
+ name: 'OSZKbot'
4226
+ category: 'Crawler'
4227
+ url: 'http://mekosztaly.oszk.hu/mia/'
4228
+ producer:
4229
+ name: 'National Szechenyi Library'
4230
+ url: 'https://webarchivum.oszk.hu/'
4231
+
4232
+ - regex: 'ZoomBot'
4233
+ name: 'ZoomBot'
4234
+ category: 'Crawler'
4235
+ url: 'https://suite.seozoom.it/bot.html'
4236
+ producer:
4237
+ name: 'SEO Cube S.r.l.'
4238
+ url: 'https://www.seocube.it/'
4239
+
4240
+ - regex: 'RavenCrawler/[\d.]+'
4241
+ name: 'RavenCrawler'
4242
+ category: 'Crawler'
4243
+ url: 'https://raventools.com/site-auditor/'
4244
+ producer:
4245
+ name: 'TapClicks, Inc.'
4246
+ url: 'https://www.tapclicks.com/'
4247
+
4248
+ - regex: 'KadoBot'
4249
+ name: 'KadoBot'
4250
+ category: 'Crawler'
4251
+ url: 'https://www.kadolijst.nl/bot'
4252
+ producer:
4253
+ name: 'Kadolijst'
4254
+ url: 'https://www.kadolijst.nl/'
4255
+
4256
+ - regex: 'Dubbotbot/[\d.]+'
4257
+ name: 'Dubbotbot'
4258
+ category: 'Crawler'
4259
+ url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent'
4260
+ producer:
4261
+ name: 'DubBot'
4262
+ url: 'https://dubbot.com/'
4263
+
4264
+ - regex: 'Swiftbot/[\d.]+'
4265
+ name: 'Swiftbot'
4266
+ category: 'Crawler'
4267
+ url: 'https://swiftype.com/swiftbot'
4268
+ producer:
4269
+ name: 'Elasticsearch, B.V.'
4270
+ url: 'https://www.elastic.co/'
4271
+
4272
+ - regex: 'EyeMonIT'
4273
+ name: 'EyeMonit'
4274
+ category: 'Site Monitor'
4275
+ url: 'https://eyemonit.com/'
4276
+ producer:
4277
+ name: 'EyeMonit'
4278
+ url: 'https://eyemonit.com/'
4279
+
4280
+ - regex: 'ThousandEyes'
4281
+ name: 'ThousandEyes'
4282
+ category: 'Site Monitor'
4283
+ url: 'https://www.thousandeyes.com/'
4284
+ producer:
4285
+ name: 'Cisco Systems, Inc.'
4286
+ url: 'https://www.cisco.com/'
4287
+
4288
+ - regex: 'OmtrBot/[\d.]+'
4289
+ name: 'OmtrBot'
4290
+ category: 'Site Monitor'
4291
+
4292
+ - regex: 'WebMon/[\d.]+'
4293
+ name: 'WebMon'
4294
+ category: 'Site Monitor'
4295
+
4296
+ - regex: 'AdsTxtCrawlerTP/[\d.]+'
4297
+ name: 'AdsTxtCrawlerTP'
4298
+ category: 'Crawler'
4299
+
4300
+ - regex: 'fragFINN'
4301
+ name: 'fragFINN'
4302
+ category: 'Crawler'
4303
+ url: 'https://www.fragfinn.de/'
4304
+ producer:
4305
+ name: 'fragFINN e.V.'
4306
+ url: 'https://www.fragfinn.de/'
4307
+
4308
+ - regex: 'Clickagy'
4309
+ name: 'Clickagy'
4310
+ category: 'Crawler'
4311
+ url: 'https://www.clickagy.com/'
4312
+ producer:
4313
+ name: 'Clickagy, LLC'
4314
+ url: 'https://www.clickagy.com/'
4315
+
4316
+ - regex: 'kiwitcms-gitops/[\d.]+'
4317
+ name: 'Kiwi TCMS GitOps'
4318
+ category: 'Service Agent'
4319
+ url: 'https://kiwitcms.org'
4320
+ producer:
4321
+ name: 'Open Technologies Bulgaria, Ltd.'
4322
+ url: 'https://kiwitcms.org'
4323
+
4324
+ - regex: 'webtru_crawler'
4325
+ name: 'webtru'
4326
+ category: 'Crawler'
4327
+ url: 'https://webtru.io/'
4328
+ producer:
4329
+ name: 'DataSign Inc.'
4330
+ url: 'https://datasign.jp/'
4331
+
4332
+ - regex: 'URLSuMaBot'
4333
+ name: 'URLSuMaBot'
4334
+ category: 'Crawler'
4335
+ url: 'https://www.urlsuma.de/'
4336
+
4337
+ - regex: '360JK yunjiankong'
4338
+ name: '360JK'
4339
+ category: 'Site Monitor'
4340
+ url: 'http://jk.cloud.360.cn/'
4341
+ producer:
4342
+ name: '360 Security Technology Inc.'
4343
+ url: 'https://www.360.cn/'
4344
+
4345
+ - regex: 'UCSBNetworkMeasurement'
4346
+ name: 'UCSB Network Measurement'
4347
+ category: 'Crawler'
4348
+ url: 'https://www.it.ucsb.edu/'
4349
+ producer:
4350
+ name: 'University of California, Santa Barbara'
4351
+ url: 'https://www.it.ucsb.edu/'
4352
+
4353
+ - regex: 'Plesk screenshot bot'
4354
+ name: 'Plesk Screenshot Service'
4355
+ category: 'Service Agent'
4356
+ url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service'
4357
+ producer:
4358
+ name: 'Plesk International GmbH'
4359
+ url: 'https://www.plesk.com/'
4360
+
4361
+ - regex: 'Who\.is'
4362
+ name: 'Who.is Bot'
4363
+ category: 'Crawler'
4364
+ url: 'https://who.is/'
4365
+
4366
+ - regex: 'Probely'
4367
+ name: 'Probely'
4368
+ category: 'Security Checker'
4369
+ url: 'https://probely.com/sos/'
4370
+ producer:
4371
+ name: 'Probely - Soluções de Cibersegurança, S.A.'
4372
+ url: 'https://probely.com/'
4373
+
4374
+ - regex: 'Uptimia(?:/[\d.]+)?'
4375
+ name: 'Uptimia'
4376
+ category: 'Site Monitor'
4377
+ url: 'https://www.uptimia.com/'
4378
+ producer:
4379
+ name: 'JJ Online GmbH'
4380
+ url: 'https://www.uptimia.com/'
4381
+
4382
+ - regex: '2GDPR/[\d.]+'
4383
+ name: '2GDPR'
4384
+ category: 'Service Agent'
4385
+ url: 'https://2gdpr.com/tos'
4386
+ producer:
4387
+ name: '2GDPR'
4388
+ url: 'https://2gdpr.com/'
4389
+
4390
+ - regex: 'abuse\.xmco\.fr'
4391
+ name: 'Serenety'
4392
+ category: 'Security Checker'
4393
+ url: 'https://abuse.xmco.fr/'
4394
+ producer:
4395
+ name: 'XMCO, SASU'
4396
+ url: 'https://www.xmco.fr/'
4397
+
4398
+ - regex: 'CheckHost'
4399
+ name: 'CheckHost'
4400
+ category: 'Site Monitor'
4401
+ url: 'https://check-host.net/'
4402
+ producer:
4403
+ name: 'CheckHost'
4404
+ url: 'https://check-host.net/'
4405
+
4406
+ - regex: 'LAC_IAHarvester/[\d.]+'
4407
+ name: 'LAC IA Harvester'
4408
+ category: 'Crawler'
4409
+ url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx'
4410
+ producer:
4411
+ name: 'Library and Archives Canada'
4412
+ url: 'https://library-archives.canada.ca/'
4413
+
4414
+ - regex: 'InsytfulBot/[\d.]+'
4415
+ name: 'InsytfulBot'
4416
+ category: 'Crawler'
4417
+ url: 'https://www.insytful.com/'
4418
+ producer:
4419
+ name: 'Zengenti Limited'
4420
+ url: 'https://www.zengenti.com/'
4421
+
4422
+ - regex: 'statista\.com'
4423
+ name: 'Statista'
4424
+ category: 'Crawler'
4425
+ url: 'https://www.statista.com/'
4426
+ producer:
4427
+ name: 'Statista, Inc.'
4428
+ url: 'https://www.statista.com/'
4429
+
4430
+ - regex: 'SubstackContentFetch/[\d.]+'
4431
+ name: 'Substack Content Fetch'
4432
+ category: 'Crawler'
4433
+ url: 'https://substack.com/'
4434
+ producer:
4435
+ name: 'Substack, Inc.'
4436
+ url: 'https://substack.com/'
4437
+
4438
+ - regex: '^ds9'
4439
+ name: 'Deep SEARCH 9'
4440
+ category: 'Crawler'
4441
+ url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/'
4442
+ producer:
4443
+ name: 'Copyright Clearance Center, Inc.'
4444
+ url: 'https://www.copyright.com/'
4445
+
4446
+ - regex: 'LiveJournal\.com'
4447
+ name: 'LiveJournal'
4448
+ url: 'https://www.livejournal.com/'
4449
+ category: 'Feed Fetcher'
4450
+ producer:
4451
+ name: 'ООО "СИМ"'
4452
+ url: 'https://www.livejournal.com/'
4453
+
4454
+ - regex: 'bitdiscovery'
4455
+ name: 'Tenable.asm'
4456
+ category: 'Security Checker'
4457
+ url: 'https://bitdiscovery.com/'
4458
+ producer:
4459
+ name: 'Tenable, Inc.'
4460
+ url: 'https://www.tenable.com/'
4461
+
4462
+ - regex: 'Castopod/[\d.]+'
4463
+ name: 'Castopod'
4464
+ category: 'Crawler'
4465
+ url: 'https://www.castopod.org/'
4466
+
4467
+ - regex: 'Elastic/Synthetics'
4468
+ name: 'Elastic Synthetics'
4469
+ category: 'Site Monitor'
4470
+ url: 'https://github.com/elastic/synthetics'
4471
+ producer:
4472
+ name: 'Elasticsearch B.V.'
4473
+ url: 'https://www.elastic.co/'
4474
+
4475
+ - regex: 'WDG_Validator/[\d.]+'
4476
+ name: 'WDG HTML Validator'
4477
+ category: 'Validator'
4478
+ url: 'http://www.htmlhelp.com/tools/validator/'
4479
+
4480
+ - regex: 'scan@aegis.network'
4481
+ name: 'Aegis'
4482
+ category: 'Crawler'
4483
+ url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/'
4484
+
4485
+ - regex: 'CrawlyProjectCrawler/[\d.]+'
4486
+ name: 'Crawly Project'
4487
+ category: 'Crawler'
4488
+ url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/'
4489
+
4490
+ - regex: 'BDFetch'
4491
+ name: 'BDFetch'
4492
+ category: 'Crawler'
4493
+ url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/'
4494
+
4495
+ - regex: 'PunkMap'
4496
+ name: 'Punk Map'
4497
+ category: 'Security Checker'
4498
+ url: 'https://github.com/openeasm/punkmap'
4499
+
4500
+ - regex: 'GenomeCrawlerd/[\d.]+'
4501
+ name: 'Deepfield Genome'
4502
+ category: 'Crawler'
4503
+ url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/'
4504
+ producer:
4505
+ name: 'Nokia Corporation'
4506
+ url: 'https://www.nokia.com/'
4507
+
4508
+ - regex: 'Gaisbot/[\d.]+'
4509
+ name: 'Gaisbot'
4510
+ category: 'Crawler'
4511
+ url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php'
4512
+
4513
+ - regex: 'FAST-WebCrawler/[\d.]+'
4514
+ name: 'AlltheWeb'
4515
+ category: 'Crawler'
4516
+ url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler'
4517
+
4518
+ - regex: 'ducks\.party'
4519
+ name: 'ducks.party'
4520
+ category: 'Security Checker'
4521
+ url: 'https://ducks.party/'
4522
+
4523
+ - regex: 'DepSpid/[\d.]+'
4524
+ name: 'DepSpid'
4525
+ category: 'Crawler'
4526
+ url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/'
4527
+
4528
+ - regex: 'Website-info\.net'
4529
+ name: 'Website-info'
4530
+ category: 'Crawler'
4531
+ url: 'https://website-info.net/robot'
4532
+ producer:
4533
+ name: 'Meins und Vogel GmbH'
4534
+ url: 'https://muv.com/'
4535
+
4536
+ # Generic bots
4537
+ - regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|url|Zeus|ZmEu)$'
4538
+ name: 'Generic Bot'
4539
+
3511
4540
  # Generic detections
3512
- - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider|^firefox$|^chrome$)([^a-z]|$)'
4541
+ - regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)'
3513
4542
  name: 'Generic Bot'