device_detector 1.1.2 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -1
- data/README.md +19 -5
- data/lib/device_detector/browser.rb +141 -7
- data/lib/device_detector/client_hint.rb +100 -32
- data/lib/device_detector/device.rb +188 -1
- data/lib/device_detector/os.rb +109 -7
- data/lib/device_detector/parser.rb +6 -5
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector.rb +56 -23
- data/regexes/bots.yml +1242 -213
- data/regexes/client/browser_engine.yml +11 -2
- data/regexes/client/browsers.yml +543 -102
- data/regexes/client/feed_readers.yml +1 -1
- data/regexes/client/hints/apps.yml +29 -3
- data/regexes/client/hints/browsers.yml +87 -5
- data/regexes/client/libraries.yml +107 -1
- data/regexes/client/mediaplayers.yml +15 -1
- data/regexes/client/mobile_apps.yml +413 -117
- data/regexes/client/pim.yml +36 -2
- data/regexes/device/car_browsers.yml +16 -0
- data/regexes/device/consoles.yml +18 -5
- data/regexes/device/mobiles.yml +4180 -1210
- data/regexes/device/notebooks.yml +14 -1
- data/regexes/device/portable_media_player.yml +7 -1
- data/regexes/device/shell_tv.yml +12 -0
- data/regexes/device/televisions.yml +409 -47
- data/regexes/oss.yml +661 -238
- metadata +3 -3
data/regexes/bots.yml
CHANGED
@@ -5,6 +5,11 @@
|
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
8
|
+
- regex: 'WireReaderBot(?:/([\d+.]+))?'
|
9
|
+
name: 'WireReaderBot'
|
10
|
+
category: 'Feed Fetcher'
|
11
|
+
url: 'https://wirereader.app/'
|
12
|
+
|
8
13
|
- regex: 'monitoring360bot'
|
9
14
|
name: '360 Monitoring'
|
10
15
|
category: 'Site Monitor'
|
@@ -61,7 +66,7 @@
|
|
61
66
|
name: 'Ahrefs Pte Ltd'
|
62
67
|
url: 'https://ahrefs.com/robot'
|
63
68
|
|
64
|
-
- regex: 'AhrefsSiteAudit/
|
69
|
+
- regex: 'AhrefsSiteAudit/[\d.]+'
|
65
70
|
name: 'AhrefsSiteAudit'
|
66
71
|
category: 'Site Monitor'
|
67
72
|
url: 'https://ahrefs.com/robot/site-audit'
|
@@ -85,7 +90,7 @@
|
|
85
90
|
name: 'Alexa Internet'
|
86
91
|
url: 'https://www.alexa.com'
|
87
92
|
|
88
|
-
- regex: 'Amazonbot'
|
93
|
+
- regex: 'Amazonbot/[\d.]+'
|
89
94
|
name: 'Amazon Bot'
|
90
95
|
category: 'Crawler'
|
91
96
|
url: 'https://developer.amazon.com/support/amazonbot'
|
@@ -93,6 +98,14 @@
|
|
93
98
|
name: 'Amazon.com, Inc.'
|
94
99
|
url: 'https://www.amazon.com/'
|
95
100
|
|
101
|
+
- regex: 'AmazonAdBot/[\d.]+'
|
102
|
+
name: 'Amazon AdBot'
|
103
|
+
category: 'Crawler'
|
104
|
+
url: 'https://adbot.amazon.com/'
|
105
|
+
producer:
|
106
|
+
name: 'Amazon.com, Inc.'
|
107
|
+
url: 'https://www.amazon.com/'
|
108
|
+
|
96
109
|
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
97
110
|
name: 'Amazon Route53 Health Check'
|
98
111
|
category: 'Service Agent'
|
@@ -119,10 +132,18 @@
|
|
119
132
|
- regex: 'Applebot'
|
120
133
|
name: 'Applebot'
|
121
134
|
category: 'Crawler'
|
122
|
-
url: 'https://support.apple.com/en-us/
|
135
|
+
url: 'https://support.apple.com/en-us/119829'
|
123
136
|
producer:
|
124
137
|
name: 'Apple Inc'
|
125
|
-
url: 'https://www.apple.com'
|
138
|
+
url: 'https://www.apple.com/'
|
139
|
+
|
140
|
+
- regex: 'iTMS'
|
141
|
+
name: 'iTMS'
|
142
|
+
category: 'Crawler'
|
143
|
+
url: 'https://support.apple.com/en-us/119829'
|
144
|
+
producer:
|
145
|
+
name: 'Apple Inc'
|
146
|
+
url: 'https://www.apple.com/'
|
126
147
|
|
127
148
|
- regex: 'AppSignalBot'
|
128
149
|
name: 'AppSignalBot'
|
@@ -220,7 +241,7 @@
|
|
220
241
|
name: 'Better Uptime'
|
221
242
|
url: 'https://betteruptime.com/'
|
222
243
|
|
223
|
-
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
244
|
+
- regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
224
245
|
name: 'BingBot'
|
225
246
|
category: 'Search bot'
|
226
247
|
url: 'http://search.msn.com/msnbot.htmn'
|
@@ -371,7 +392,23 @@
|
|
371
392
|
name: 'CloudFlare'
|
372
393
|
url: 'https://www.cloudflare.com/'
|
373
394
|
|
374
|
-
- regex: '
|
395
|
+
- regex: 'Cloudflare-Smart-Transit'
|
396
|
+
name: 'Cloudflare Smart Transit'
|
397
|
+
category: 'Site Monitor'
|
398
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
399
|
+
producer:
|
400
|
+
name: 'CloudFlare'
|
401
|
+
url: 'https://www.cloudflare.com/'
|
402
|
+
|
403
|
+
- regex: 'CloudflareObservatory'
|
404
|
+
name: 'Cloudflare Observatory'
|
405
|
+
category: 'Site Monitor'
|
406
|
+
url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test'
|
407
|
+
producer:
|
408
|
+
name: 'CloudFlare'
|
409
|
+
url: 'https://www.cloudflare.com/'
|
410
|
+
|
411
|
+
- regex: 'https://developers\.cloudflare\.com/security-center/'
|
375
412
|
name: 'Cloudflare Security Insights'
|
376
413
|
category: 'Site Monitor'
|
377
414
|
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
@@ -379,7 +416,7 @@
|
|
379
416
|
name: 'CloudFlare'
|
380
417
|
url: 'https://www.cloudflare.com/'
|
381
418
|
|
382
|
-
- regex: 'coccoc
|
419
|
+
- regex: 'coccoc\.com'
|
383
420
|
name: 'Cốc Cốc Bot'
|
384
421
|
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
385
422
|
category: 'Search bot'
|
@@ -435,7 +472,7 @@
|
|
435
472
|
name: 'Dataprovider B.V.'
|
436
473
|
url: 'https://www.dataprovider.com/'
|
437
474
|
|
438
|
-
- regex: 'Daum(
|
475
|
+
- regex: 'Daum(?!(?:Apps|Device))'
|
439
476
|
name: 'Daum'
|
440
477
|
category: 'Search bot'
|
441
478
|
url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
|
@@ -459,7 +496,7 @@
|
|
459
496
|
name: 'Discovery Engine'
|
460
497
|
url: 'http://discoveryengine.com'
|
461
498
|
|
462
|
-
- regex: 'Domain Re-Animator Bot|support@domainreanimator
|
499
|
+
- regex: 'Domain Re-Animator Bot|support@domainreanimator\.com'
|
463
500
|
name: 'Domain Re-Animator Bot'
|
464
501
|
category: 'Crawler'
|
465
502
|
url: ''
|
@@ -538,13 +575,21 @@
|
|
538
575
|
name: 'SEOmoz, Inc.'
|
539
576
|
url: 'http://moz.com/'
|
540
577
|
|
541
|
-
- regex: '
|
542
|
-
name: 'Facebook
|
578
|
+
- regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)'
|
579
|
+
name: 'Facebook Crawler'
|
543
580
|
category: 'Social Media Agent'
|
544
|
-
url: 'https://
|
581
|
+
url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/'
|
545
582
|
producer:
|
546
|
-
name: '
|
547
|
-
url: '
|
583
|
+
name: 'Meta Platforms, Inc.'
|
584
|
+
url: 'https://www.meta.com/'
|
585
|
+
|
586
|
+
- regex: 'FacebookBot/[\d.]+'
|
587
|
+
name: 'FacebookBot'
|
588
|
+
category: 'Crawler'
|
589
|
+
url: 'https://developers.facebook.com/docs/sharing/bot'
|
590
|
+
producer:
|
591
|
+
name: 'Meta Platforms, Inc.'
|
592
|
+
url: 'https://www.meta.com/'
|
548
593
|
|
549
594
|
- regex: 'Feedbin'
|
550
595
|
name: 'Feedbin'
|
@@ -662,7 +707,7 @@
|
|
662
707
|
url: 'https://search.google.com/search-console/about'
|
663
708
|
producer:
|
664
709
|
name: 'Google Inc.'
|
665
|
-
url: '
|
710
|
+
url: 'https://www.google.com/'
|
666
711
|
|
667
712
|
- regex: 'Google Page Speed Insights'
|
668
713
|
name: 'Google PageSpeed Insights'
|
@@ -670,7 +715,7 @@
|
|
670
715
|
url: 'http://developers.google.com/speed/pagespeed/insights/'
|
671
716
|
producer:
|
672
717
|
name: 'Google Inc.'
|
673
|
-
url: '
|
718
|
+
url: 'https://www.google.com/'
|
674
719
|
|
675
720
|
- regex: 'google_partner_monitoring'
|
676
721
|
name: 'Google Partner Monitoring'
|
@@ -678,7 +723,7 @@
|
|
678
723
|
url: ''
|
679
724
|
producer:
|
680
725
|
name: 'Google Inc.'
|
681
|
-
url: '
|
726
|
+
url: 'https://www.google.com/'
|
682
727
|
|
683
728
|
- regex: 'Google-Cloud-Scheduler'
|
684
729
|
name: 'Google Cloud Scheduler'
|
@@ -694,7 +739,7 @@
|
|
694
739
|
url: 'https://search.google.com/structured-data/testing-tool'
|
695
740
|
producer:
|
696
741
|
name: 'Google Inc.'
|
697
|
-
url: '
|
742
|
+
url: 'https://www.google.com/'
|
698
743
|
|
699
744
|
- regex: 'GoogleStackdriverMonitoring'
|
700
745
|
name: 'Google Stackdriver Monitoring'
|
@@ -704,13 +749,21 @@
|
|
704
749
|
name: 'Google Inc.'
|
705
750
|
url: 'https://www.google.com'
|
706
751
|
|
752
|
+
- regex: 'Google-Transparency-Report'
|
753
|
+
name: 'Google Transparency Report'
|
754
|
+
category: 'Site Monitor'
|
755
|
+
url: 'https://transparencyreport.google.com/'
|
756
|
+
producer:
|
757
|
+
name: 'Google Inc.'
|
758
|
+
url: 'https://www.google.com/'
|
759
|
+
|
707
760
|
- regex: 'via ggpht\.com GoogleImageProxy'
|
708
761
|
name: 'Gmail Image Proxy'
|
709
762
|
category: 'Crawler'
|
710
763
|
url: ''
|
711
764
|
producer:
|
712
765
|
name: 'Google Inc.'
|
713
|
-
url: '
|
766
|
+
url: 'https://www.google.com/'
|
714
767
|
|
715
768
|
- regex: 'SeznamEmailProxy'
|
716
769
|
name: 'Seznam Email Proxy'
|
@@ -744,21 +797,37 @@
|
|
744
797
|
name: 'Visual Meta'
|
745
798
|
url: 'https://www.shopalike.cz/'
|
746
799
|
|
747
|
-
- regex: '
|
800
|
+
- regex: 'Googlebot-News'
|
801
|
+
name: 'Googlebot News'
|
802
|
+
category: 'Search bot'
|
803
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
804
|
+
producer:
|
805
|
+
name: 'Google Inc.'
|
806
|
+
url: 'https://www.google.com/'
|
807
|
+
|
808
|
+
- regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet'
|
748
809
|
name: 'Googlebot'
|
749
810
|
category: 'Search bot'
|
750
|
-
url: '
|
811
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
751
812
|
producer:
|
752
813
|
name: 'Google Inc.'
|
753
|
-
url: '
|
814
|
+
url: 'https://www.google.com/'
|
754
815
|
|
755
816
|
- regex: '^Google$'
|
756
817
|
name: 'Googlebot'
|
757
818
|
category: 'Search bot'
|
758
|
-
url: '
|
819
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
759
820
|
producer:
|
760
821
|
name: 'Google Inc.'
|
761
|
-
url: '
|
822
|
+
url: 'https://www.google.com/'
|
823
|
+
|
824
|
+
- regex: 'Google-Area120-PrivacyPolicyFetcher'
|
825
|
+
name: 'Google Area 120 Privacy Policy Fetcher'
|
826
|
+
category: 'Crawler'
|
827
|
+
url: 'https://area120.google.com/'
|
828
|
+
producer:
|
829
|
+
name: 'Google Inc.'
|
830
|
+
url: 'https://www.google.com/'
|
762
831
|
|
763
832
|
- regex: 'heritrix'
|
764
833
|
name: 'Heritrix'
|
@@ -780,7 +849,7 @@
|
|
780
849
|
category: 'Crawler'
|
781
850
|
url: 'http://vuhuv.com/bot.html'
|
782
851
|
|
783
|
-
- regex: 'HTTPMon'
|
852
|
+
- regex: 'HTTPMon/[\d.]+'
|
784
853
|
name: 'HTTPMon'
|
785
854
|
category: 'Site Monitor'
|
786
855
|
url: 'http://www.httpmon.com'
|
@@ -796,7 +865,7 @@
|
|
796
865
|
name: ''
|
797
866
|
url: ''
|
798
867
|
|
799
|
-
- regex: 'inoreader
|
868
|
+
- regex: 'inoreader\.com'
|
800
869
|
name: 'inoreader'
|
801
870
|
category: 'Feed Reader'
|
802
871
|
url: 'https://www.inoreader.com'
|
@@ -844,7 +913,7 @@
|
|
844
913
|
name: ''
|
845
914
|
url: ''
|
846
915
|
|
847
|
-
- regex: '
|
916
|
+
- regex: '[A-z0-9]*-Lighthouse'
|
848
917
|
name: 'Lighthouse'
|
849
918
|
category: 'Site Monitor'
|
850
919
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
@@ -878,7 +947,8 @@
|
|
878
947
|
|
879
948
|
- regex: 'ltx71'
|
880
949
|
name: 'LTX71'
|
881
|
-
|
950
|
+
category: 'Security Checker'
|
951
|
+
url: 'https://ltx71.com/'
|
882
952
|
producer:
|
883
953
|
name: ''
|
884
954
|
url: ''
|
@@ -907,7 +977,7 @@
|
|
907
977
|
name: ''
|
908
978
|
url: ''
|
909
979
|
|
910
|
-
- regex: 'masscan-ng/
|
980
|
+
- regex: 'masscan-ng/[\d.]+'
|
911
981
|
name: 'masscan-ng'
|
912
982
|
url: 'https://github.com/bi-zone/masscan-ng'
|
913
983
|
category: 'Crawler'
|
@@ -915,7 +985,7 @@
|
|
915
985
|
name: 'BIZON, OOO'
|
916
986
|
url: 'https://bi.zone/'
|
917
987
|
|
918
|
-
- regex: 'masscan'
|
988
|
+
- regex: '.*masscan'
|
919
989
|
name: 'masscan'
|
920
990
|
url: 'https://github.com/robertdavidgraham/masscan'
|
921
991
|
category: 'Crawler'
|
@@ -1003,11 +1073,11 @@
|
|
1003
1073
|
name: 'Nagios Plugins Development Team'
|
1004
1074
|
url: 'https://nagios.org'
|
1005
1075
|
|
1006
|
-
- regex: 'nbertaupete95\(at\)gmail
|
1076
|
+
- regex: 'nbertaupete95\(at\)gmail\.com'
|
1007
1077
|
name: 'nbertaupete95'
|
1008
1078
|
category: 'Crawler'
|
1009
1079
|
|
1010
|
-
- regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
|
1080
|
+
- regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)'
|
1011
1081
|
name: 'Netcraft Survey Bot'
|
1012
1082
|
category: 'Search bot'
|
1013
1083
|
url: ''
|
@@ -1031,7 +1101,7 @@
|
|
1031
1101
|
name: ''
|
1032
1102
|
url: ''
|
1033
1103
|
|
1034
|
-
- regex: 'NewsBlur .*(Fetcher|Finder)'
|
1104
|
+
- regex: 'NewsBlur .*(?:Fetcher|Finder)'
|
1035
1105
|
name: 'NewsBlur'
|
1036
1106
|
url: 'http://www.newsblur.com'
|
1037
1107
|
category: 'Feed Fetcher'
|
@@ -1070,10 +1140,18 @@
|
|
1070
1140
|
name: 'Nuzzel'
|
1071
1141
|
url: 'https://www.nuzzel.com/'
|
1072
1142
|
|
1143
|
+
- regex: 'NodePing'
|
1144
|
+
name: 'NodePing'
|
1145
|
+
category: 'Site Monitor'
|
1146
|
+
url: 'https://nodeping.com'
|
1147
|
+
producer:
|
1148
|
+
name: 'NodePing'
|
1149
|
+
url: 'https://nodeping.com'
|
1150
|
+
|
1073
1151
|
- regex: 'Octopus [0-9]'
|
1074
1152
|
name: 'Octopus'
|
1075
1153
|
|
1076
|
-
- regex: 'OnlineOrNot
|
1154
|
+
- regex: 'OnlineOrNot\.com_bot'
|
1077
1155
|
name: 'OnlineOrNot Bot'
|
1078
1156
|
category: 'Site Monitor'
|
1079
1157
|
url: 'https://onlineornot.com/website-monitoring'
|
@@ -1142,7 +1220,7 @@
|
|
1142
1220
|
name: 'PHP Server Monitor'
|
1143
1221
|
url: 'http://www.phpservermonitor.org/'
|
1144
1222
|
|
1145
|
-
- regex: 'Pocket(?:ImageCache|Parser)/
|
1223
|
+
- regex: 'Pocket(?:ImageCache|Parser)/[\d.]+'
|
1146
1224
|
name: 'Pocket'
|
1147
1225
|
category: 'Read-it-later Service'
|
1148
1226
|
url: 'https://getpocket.com/pocketparser_ua'
|
@@ -1289,12 +1367,36 @@
|
|
1289
1367
|
url: ''
|
1290
1368
|
|
1291
1369
|
- regex: 'SemrushBot'
|
1292
|
-
name: '
|
1370
|
+
name: 'SemrushBot'
|
1371
|
+
category: 'Crawler'
|
1372
|
+
url: 'https://www.semrush.com/bot/'
|
1373
|
+
producer:
|
1374
|
+
name: 'Semrush Inc.'
|
1375
|
+
url: 'https://www.semrush.com/'
|
1376
|
+
|
1377
|
+
- regex: 'SerpReputationManagementAgent/[\d.]+'
|
1378
|
+
name: 'Semrush Reputation Management'
|
1379
|
+
category: 'Service Agent'
|
1380
|
+
url: 'https://www.semrush.com/bot/'
|
1381
|
+
producer:
|
1382
|
+
name: 'Semrush Inc.'
|
1383
|
+
url: 'https://www.semrush.com/'
|
1384
|
+
|
1385
|
+
- regex: 'SplitSignalBot'
|
1386
|
+
name: 'SplitSignalBot'
|
1293
1387
|
category: 'Crawler'
|
1294
|
-
url: '
|
1388
|
+
url: 'https://www.semrush.com/bot/'
|
1295
1389
|
producer:
|
1296
|
-
name: '
|
1297
|
-
url: '
|
1390
|
+
name: 'Semrush Inc.'
|
1391
|
+
url: 'https://www.semrush.com/'
|
1392
|
+
|
1393
|
+
- regex: 'SiteAuditBot/[\d.]+'
|
1394
|
+
name: 'SiteAuditBot'
|
1395
|
+
category: 'Crawler'
|
1396
|
+
url: 'https://www.semrush.com/bot/'
|
1397
|
+
producer:
|
1398
|
+
name: 'Semrush Inc.'
|
1399
|
+
url: 'https://www.semrush.com/'
|
1298
1400
|
|
1299
1401
|
- regex: 'SensikaBot'
|
1300
1402
|
name: 'Sensika Bot'
|
@@ -1304,7 +1406,7 @@
|
|
1304
1406
|
name: 'Sensika'
|
1305
1407
|
url: 'http://sensika.com'
|
1306
1408
|
|
1307
|
-
- regex: 'SEOENG(World)?Bot'
|
1409
|
+
- regex: 'SEOENG(?:World)?Bot'
|
1308
1410
|
name: 'SEOENGBot'
|
1309
1411
|
category: 'Crawler'
|
1310
1412
|
url: 'http://www.seoengine.com/seoengbot.htm'
|
@@ -1394,7 +1496,7 @@
|
|
1394
1496
|
category: 'Crawler'
|
1395
1497
|
url: 'http://ricks-apps.com/osx/sitesucker/'
|
1396
1498
|
|
1397
|
-
- regex: 'sixy
|
1499
|
+
- regex: 'sixy\.ch'
|
1398
1500
|
name: 'Sixy.ch'
|
1399
1501
|
category: 'Site Monitor'
|
1400
1502
|
url: 'http://sixy.ch'
|
@@ -1410,7 +1512,7 @@
|
|
1410
1512
|
name: 'Slack Technologies'
|
1411
1513
|
url: 'http://slack.com'
|
1412
1514
|
|
1413
|
-
- regex: '
|
1515
|
+
- regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider'
|
1414
1516
|
name: 'Sogou Spider'
|
1415
1517
|
category: 'Search bot'
|
1416
1518
|
url: 'http://www.sogou.com/docs/help/webmasters.htm'
|
@@ -1535,11 +1637,19 @@
|
|
1535
1637
|
name: ''
|
1536
1638
|
url: ''
|
1537
1639
|
|
1538
|
-
- regex: 'theoldreader
|
1640
|
+
- regex: 'theoldreader\.com'
|
1539
1641
|
name: 'theoldreader'
|
1540
1642
|
category: 'Feed Reader'
|
1541
1643
|
url: 'https://theoldreader.com'
|
1542
1644
|
|
1645
|
+
- regex: 'Trackable/0\.1'
|
1646
|
+
name: 'Chartable'
|
1647
|
+
category: 'Site Monitor'
|
1648
|
+
url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix'
|
1649
|
+
producer:
|
1650
|
+
name: 'Chartable'
|
1651
|
+
url: 'https://chartable.com'
|
1652
|
+
|
1543
1653
|
- regex: 'trendictionbot'
|
1544
1654
|
name: 'Trendiction Bot'
|
1545
1655
|
category: 'Crawler'
|
@@ -1556,13 +1666,13 @@
|
|
1556
1666
|
name: 'iParadigms, LLC.'
|
1557
1667
|
url: 'http://www.turnitin.com'
|
1558
1668
|
|
1559
|
-
- regex: 'TweetedTimes
|
1669
|
+
- regex: 'TweetedTimes'
|
1560
1670
|
name: 'TweetedTimes Bot'
|
1561
1671
|
category: 'Crawler'
|
1562
|
-
url: '
|
1672
|
+
url: 'https://tweetedtimes.com/'
|
1563
1673
|
producer:
|
1564
1674
|
name: 'TweetedTimes'
|
1565
|
-
url: '
|
1675
|
+
url: 'https://tweetedtimes.com/'
|
1566
1676
|
|
1567
1677
|
- regex: 'TweetmemeBot'
|
1568
1678
|
name: 'Tweetmeme Bot'
|
@@ -1603,21 +1713,21 @@
|
|
1603
1713
|
name: 'UkrNet Ltd'
|
1604
1714
|
url: 'https://www.ukr.net/'
|
1605
1715
|
|
1606
|
-
- regex: '
|
1716
|
+
- regex: 'Uptime(?:bot)?/[\d.]+'
|
1607
1717
|
name: 'Uptimebot'
|
1608
1718
|
category: 'Site Monitor'
|
1609
|
-
url: 'https://uptime.com/
|
1719
|
+
url: 'https://uptime.com/uptime-bot'
|
1610
1720
|
producer:
|
1611
1721
|
name: 'Uptime'
|
1612
|
-
url: 'https://uptime.com'
|
1722
|
+
url: 'https://uptime.com/'
|
1613
1723
|
|
1614
1724
|
- regex: 'UptimeRobot'
|
1615
|
-
name: '
|
1725
|
+
name: 'UptimeRobot'
|
1616
1726
|
category: 'Site Monitor'
|
1617
|
-
url: ''
|
1727
|
+
url: 'https://uptimerobot.com/'
|
1618
1728
|
producer:
|
1619
1729
|
name: 'Uptime Robot'
|
1620
|
-
url: '
|
1730
|
+
url: 'https://uptimerobot.com/'
|
1621
1731
|
|
1622
1732
|
- regex: 'URLAppendBot'
|
1623
1733
|
name: 'URLAppendBot'
|
@@ -1638,10 +1748,18 @@
|
|
1638
1748
|
- regex: 'vkShare; '
|
1639
1749
|
name: 'VK Share Button'
|
1640
1750
|
category: 'Crawler'
|
1641
|
-
url: '
|
1751
|
+
url: 'https://dev.vk.com/en/widgets/share'
|
1752
|
+
producer:
|
1753
|
+
name: 'VK'
|
1754
|
+
url: 'https://vk.com/'
|
1755
|
+
|
1756
|
+
- regex: 'VKRobot'
|
1757
|
+
name: 'VK Robot'
|
1758
|
+
category: 'Crawler'
|
1759
|
+
url: 'https://dev.vk.com/en/'
|
1642
1760
|
producer:
|
1643
1761
|
name: 'VK'
|
1644
|
-
url: '
|
1762
|
+
url: 'https://vk.com/'
|
1645
1763
|
|
1646
1764
|
- regex: 'VSMCrawler'
|
1647
1765
|
name: 'Visual Site Mapper Crawler'
|
@@ -1675,7 +1793,7 @@
|
|
1675
1793
|
name: 'W3C'
|
1676
1794
|
url: 'http://www.w3.org'
|
1677
1795
|
|
1678
|
-
- regex: 'W3C_Validator|Validator
|
1796
|
+
- regex: 'W3C_Validator|Validator\.nu'
|
1679
1797
|
name: 'W3C Markup Validation Service'
|
1680
1798
|
category: 'Validator'
|
1681
1799
|
url: 'http://validator.w3.org/services'
|
@@ -1699,6 +1817,14 @@
|
|
1699
1817
|
name: 'W3C'
|
1700
1818
|
url: 'http://www.w3.org'
|
1701
1819
|
|
1820
|
+
- regex: 'P3P Validator'
|
1821
|
+
name: 'W3C P3P Validator'
|
1822
|
+
category: 'Validator'
|
1823
|
+
url: 'https://www.w3.org/P3P/validator.html'
|
1824
|
+
producer:
|
1825
|
+
name: 'W3C'
|
1826
|
+
url: 'https://www.w3.org'
|
1827
|
+
|
1702
1828
|
- regex: 'Wappalyzer'
|
1703
1829
|
name: 'Wappalyzer'
|
1704
1830
|
url: 'https://github.com/AliasIO/Wappalyzer'
|
@@ -1735,6 +1861,22 @@
|
|
1735
1861
|
name: 'WebSitePulse'
|
1736
1862
|
url: 'http://www.websitepulse.com/'
|
1737
1863
|
|
1864
|
+
- regex: 'WordPress.+isitwp\.com'
|
1865
|
+
name: 'IsItWP'
|
1866
|
+
category: 'Crawler'
|
1867
|
+
url: 'https://www.isitwp.com/'
|
1868
|
+
producer:
|
1869
|
+
name: 'WPBeginner, LLC'
|
1870
|
+
url: 'https://www.wpbeginner.com/'
|
1871
|
+
|
1872
|
+
- regex: 'Automattic Analytics Crawler/[\d.]+'
|
1873
|
+
name: 'Automattic Analytics'
|
1874
|
+
category: 'Crawler'
|
1875
|
+
url: 'https://wordpress.com/crawler/'
|
1876
|
+
producer:
|
1877
|
+
name: 'Wordpress.org'
|
1878
|
+
url: 'https://wordpress.org/'
|
1879
|
+
|
1738
1880
|
- regex: 'WordPress'
|
1739
1881
|
name: 'WordPress'
|
1740
1882
|
category: 'Service Agent'
|
@@ -1815,13 +1957,29 @@
|
|
1815
1957
|
name: 'Yahoo! Japan Corp.'
|
1816
1958
|
url: 'https://www.yahoo.co.jp/'
|
1817
1959
|
|
1818
|
-
- regex: '
|
1960
|
+
- regex: 'Y!J-ASR'
|
1961
|
+
name: 'Yahoo! Japan ASR'
|
1962
|
+
category: 'Crawler'
|
1963
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
1964
|
+
producer:
|
1965
|
+
name: 'Yahoo! Japan Corp.'
|
1966
|
+
url: 'https://www.yahoo.co.jp/'
|
1967
|
+
|
1968
|
+
- regex: '^Y!J'
|
1969
|
+
name: 'Yahoo! Japan'
|
1970
|
+
category: 'Crawler'
|
1971
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
1972
|
+
producer:
|
1973
|
+
name: 'Yahoo! Japan Corp.'
|
1974
|
+
url: 'https://www.yahoo.co.jp/'
|
1975
|
+
|
1976
|
+
- regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher'
|
1819
1977
|
name: 'Yandex Bot'
|
1820
1978
|
category: 'Search bot'
|
1821
|
-
url: '
|
1979
|
+
url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html'
|
1822
1980
|
producer:
|
1823
1981
|
name: 'Yandex LLC'
|
1824
|
-
url: '
|
1982
|
+
url: 'https://yandex.com/company/'
|
1825
1983
|
|
1826
1984
|
- regex: 'Yeti|NaverJapan|AdsBot-Naver'
|
1827
1985
|
name: 'Yeti/Naverbot'
|
@@ -1881,7 +2039,7 @@
|
|
1881
2039
|
name: 'Yottaa'
|
1882
2040
|
url: 'http://www.yottaa.com/'
|
1883
2041
|
|
1884
|
-
- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857
|
2042
|
+
- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857'
|
1885
2043
|
name: 'Yahoo Gemini'
|
1886
2044
|
category: 'Crawler'
|
1887
2045
|
url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
|
@@ -1905,7 +2063,7 @@
|
|
1905
2063
|
name: 'HubPages, Inc.'
|
1906
2064
|
url: 'https://discover.hubpages.com/'
|
1907
2065
|
|
1908
|
-
- regex: 'Pinterest(bot)
|
2066
|
+
- regex: 'Pinterest(?:bot)?/[\d.]+.*www\.pinterest\.com'
|
1909
2067
|
name: 'Pinterest'
|
1910
2068
|
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
|
1911
2069
|
category: 'Crawler'
|
@@ -1913,7 +2071,7 @@
|
|
1913
2071
|
name: 'Pinterest'
|
1914
2072
|
url: 'https://www.pinterest.com/'
|
1915
2073
|
|
1916
|
-
- regex: 'Site24x7'
|
2074
|
+
- regex: '.*Site24x7'
|
1917
2075
|
name: 'Site24x7 Website Monitoring'
|
1918
2076
|
category: 'Site Monitor'
|
1919
2077
|
url: 'https://www.site24x7.com/site24x7-faq.html'
|
@@ -1921,6 +2079,14 @@
|
|
1921
2079
|
name: 'Site24x7'
|
1922
2080
|
url: 'https://www.site24x7.com'
|
1923
2081
|
|
2082
|
+
- regex: '.* HLB/[\d.]+'
|
2083
|
+
name: 'Site24x7 Defacement Monitor'
|
2084
|
+
category: 'Site Monitor'
|
2085
|
+
url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor'
|
2086
|
+
producer:
|
2087
|
+
name: 'Site24x7'
|
2088
|
+
url: 'https://www.site24x7.com/'
|
2089
|
+
|
1924
2090
|
- regex: 's~snapchat-proxy'
|
1925
2091
|
name: 'Snapchat Proxy'
|
1926
2092
|
category: 'Crawler'
|
@@ -1937,6 +2103,14 @@
|
|
1937
2103
|
name: 'Snapchat Inc.'
|
1938
2104
|
url: 'https://www.snapchat.com/'
|
1939
2105
|
|
2106
|
+
- regex: 'SnapchatAds/[\d.]+'
|
2107
|
+
name: 'Snapchat Ads'
|
2108
|
+
category: 'Crawler'
|
2109
|
+
url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US'
|
2110
|
+
producer:
|
2111
|
+
name: 'Snapchat Inc.'
|
2112
|
+
url: 'https://www.snapchat.com/'
|
2113
|
+
|
1940
2114
|
- regex: "Let's Encrypt validation server"
|
1941
2115
|
name: "Let's Encrypt Validation"
|
1942
2116
|
category: 'Service Agent'
|
@@ -2029,22 +2203,19 @@
|
|
2029
2203
|
- regex: 'AdMantX.*admantx\.com'
|
2030
2204
|
name: 'ADMantX'
|
2031
2205
|
|
2032
|
-
- regex: 'Server Density Service Monitoring
|
2206
|
+
- regex: 'Server Density Service Monitoring'
|
2033
2207
|
name: 'Server Density'
|
2034
2208
|
|
2035
2209
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
2036
2210
|
name: 'RSSRadio Bot'
|
2037
2211
|
|
2038
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent)'
|
2039
|
-
name: 'Generic Bot'
|
2040
|
-
|
2041
2212
|
- regex: '^sentry'
|
2042
2213
|
name: 'Sentry Bot'
|
2043
2214
|
producer:
|
2044
2215
|
name: 'Sentry'
|
2045
2216
|
url: 'https://sentry.io'
|
2046
2217
|
|
2047
|
-
- regex: '^Spotify/
|
2218
|
+
- regex: '^Spotify/[\d.]+$'
|
2048
2219
|
name: 'Spotify'
|
2049
2220
|
producer:
|
2050
2221
|
name: 'Spotify'
|
@@ -2102,14 +2273,6 @@
|
|
2102
2273
|
name: 'Siteimprove GmbH'
|
2103
2274
|
url: 'https://siteimprove.com/'
|
2104
2275
|
|
2105
|
-
- regex: 'Image size by Siteimprove\.com'
|
2106
|
-
name: 'Siteimprove'
|
2107
|
-
category: 'Search bot'
|
2108
|
-
url: 'https://siteimprove.com/'
|
2109
|
-
producer:
|
2110
|
-
name: 'Siteimprove GmbH'
|
2111
|
-
url: 'https://siteimprove.com/'
|
2112
|
-
|
2113
2276
|
- regex: 'CATExplorador'
|
2114
2277
|
name: 'CATExplorador'
|
2115
2278
|
category: 'Search bot'
|
@@ -2134,7 +2297,7 @@
|
|
2134
2297
|
name: 'Idee Inc.'
|
2135
2298
|
url: 'http://ideeinc.com/'
|
2136
2299
|
|
2137
|
-
- regex: 'zelist
|
2300
|
+
- regex: 'zelist\.ro feed parser'
|
2138
2301
|
name: 'Ze List'
|
2139
2302
|
url: 'https://www.zelist.ro/'
|
2140
2303
|
category: 'Feed Fetcher'
|
@@ -2182,21 +2345,21 @@
|
|
2182
2345
|
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
|
2183
2346
|
url: 'https://www.wikido.com/'
|
2184
2347
|
|
2185
|
-
- regex: '
|
2348
|
+
- regex: 'Awario(?:Smart)?Bot'
|
2186
2349
|
name: 'Awario'
|
2187
2350
|
category: 'Search bot'
|
2188
2351
|
url: 'https://awario.com/bots.html'
|
2189
2352
|
producer:
|
2190
|
-
name: '
|
2191
|
-
url: 'https://
|
2353
|
+
name: 'TechFusion Ltd.'
|
2354
|
+
url: 'https://www.techfusion.com.cy/'
|
2192
2355
|
|
2193
2356
|
- regex: 'AwarioRssBot'
|
2194
2357
|
name: 'Awario'
|
2195
2358
|
category: 'Feed Fetcher'
|
2196
2359
|
url: 'https://awario.com/bots.html'
|
2197
2360
|
producer:
|
2198
|
-
name: '
|
2199
|
-
url: 'https://
|
2361
|
+
name: 'TechFusion Ltd.'
|
2362
|
+
url: 'https://www.techfusion.com.cy/'
|
2200
2363
|
|
2201
2364
|
- regex: 'oBot'
|
2202
2365
|
name: 'oBot'
|
@@ -2288,7 +2451,7 @@
|
|
2288
2451
|
category: 'Crawler'
|
2289
2452
|
url: 'https://serendeputy.com/about/serendeputy-bot'
|
2290
2453
|
|
2291
|
-
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx
|
2454
|
+
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher'
|
2292
2455
|
name: 'ADmantX Service Fetcher'
|
2293
2456
|
category: 'Service bot'
|
2294
2457
|
url: 'https://www.admantx.com/service-fetcher.html'
|
@@ -2324,7 +2487,7 @@
|
|
2324
2487
|
name: 'PPC Labs LLC'
|
2325
2488
|
url: 'https://www.adbeat.com/'
|
2326
2489
|
|
2327
|
-
- regex: 'BW/
|
2490
|
+
- regex: '(?:BuiltWith|BW)/[\d.]+'
|
2328
2491
|
name: 'BuiltWith'
|
2329
2492
|
category: 'Crawler'
|
2330
2493
|
url: 'https://builtwith.com/biup'
|
@@ -2332,7 +2495,7 @@
|
|
2332
2495
|
name: 'BuiltWith Pty Ltd'
|
2333
2496
|
url: 'https://builtwith.com/'
|
2334
2497
|
|
2335
|
-
- regex: 'https://whatis
|
2498
|
+
- regex: 'https://whatis\.contentkingapp\.com'
|
2336
2499
|
name: 'ContentKing'
|
2337
2500
|
category: 'Site Monitor'
|
2338
2501
|
url: 'https://whatis.contentkingapp.com/'
|
@@ -2348,7 +2511,7 @@
|
|
2348
2511
|
name: 'MicroAd, Inc.'
|
2349
2512
|
url: 'https://www.microad.co.jp/'
|
2350
2513
|
|
2351
|
-
- regex: 'PingAdmin
|
2514
|
+
- regex: 'PingAdmin\.Ru'
|
2352
2515
|
name: 'PingAdmin.Ru'
|
2353
2516
|
category: 'Site Monitor'
|
2354
2517
|
url: 'https://ping-admin.ru/'
|
@@ -2366,7 +2529,7 @@
|
|
2366
2529
|
name: 'WebTehRazrabotka LLC'
|
2367
2530
|
url: 'https://webdatastats.com/'
|
2368
2531
|
|
2369
|
-
- regex: 'parse
|
2532
|
+
- regex: 'parse\.ly scraper'
|
2370
2533
|
name: 'parse.ly'
|
2371
2534
|
category: 'Crawler'
|
2372
2535
|
url: 'https://www.parse.ly/help/integration/crawler'
|
@@ -2379,7 +2542,7 @@
|
|
2379
2542
|
category: 'Site Monitor'
|
2380
2543
|
url: 'http://cloudsystemnetworks.com'
|
2381
2544
|
|
2382
|
-
- regex: 'HeartRails_Capture
|
2545
|
+
- regex: 'HeartRails_Capture/[\d.]+'
|
2383
2546
|
name: 'Heart Rails Capture'
|
2384
2547
|
category: 'Service Agent'
|
2385
2548
|
url: 'http://capture.heartrails.com'
|
@@ -2387,9 +2550,12 @@
|
|
2387
2550
|
- regex: 'Project-Resonance'
|
2388
2551
|
name: 'Project Resonance'
|
2389
2552
|
category: 'Crawler'
|
2390
|
-
url: '
|
2553
|
+
url: 'https://project-resonance.com/'
|
2554
|
+
producer:
|
2555
|
+
name: 'RedHunt Labs Limited'
|
2556
|
+
url: 'https://redhuntlabs.com/'
|
2391
2557
|
|
2392
|
-
- regex: 'DataXu
|
2558
|
+
- regex: 'DataXu/[\d.]+'
|
2393
2559
|
name: 'DataXu'
|
2394
2560
|
category: 'Service Agent'
|
2395
2561
|
url: 'https://advertising.roku.com/dataxu'
|
@@ -2426,7 +2592,7 @@
|
|
2426
2592
|
category: 'Crawler'
|
2427
2593
|
url: 'http://www.webtop.com/'
|
2428
2594
|
|
2429
|
-
- regex: 'PageThing
|
2595
|
+
- regex: 'PageThing\.com'
|
2430
2596
|
name: 'PageThing'
|
2431
2597
|
category: 'Crawler'
|
2432
2598
|
url: 'https://www.pagething.com/'
|
@@ -2471,10 +2637,18 @@
|
|
2471
2637
|
url: 'https://github.com/projectdiscovery/httpx'
|
2472
2638
|
category: 'Crawler'
|
2473
2639
|
producer:
|
2474
|
-
name: ''
|
2475
|
-
url: ''
|
2640
|
+
name: 'ProjectDiscovery, Inc.'
|
2641
|
+
url: 'https://projectdiscovery.io/'
|
2642
|
+
|
2643
|
+
- regex: '.*\.oast\.'
|
2644
|
+
name: 'Interactsh'
|
2645
|
+
category: 'Security Checker'
|
2646
|
+
url: 'https://github.com/projectdiscovery/interactsh'
|
2647
|
+
producer:
|
2648
|
+
name: 'ProjectDiscovery, Inc.'
|
2649
|
+
url: 'https://projectdiscovery.io/'
|
2476
2650
|
|
2477
|
-
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks)
|
2651
|
+
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com'
|
2478
2652
|
name: 'Expanse'
|
2479
2653
|
category: 'Security Checker'
|
2480
2654
|
url: 'https://expanse.co/'
|
@@ -2505,12 +2679,12 @@
|
|
2505
2679
|
name: 'Hatena Co., Ltd.'
|
2506
2680
|
url: 'https://www.hatena.ne.jp'
|
2507
2681
|
|
2508
|
-
- regex: 'RyowlEngine/
|
2682
|
+
- regex: 'RyowlEngine/[\d.]+'
|
2509
2683
|
name: 'Ryowl'
|
2510
2684
|
category: 'Crawler'
|
2511
2685
|
url: 'https://ryowl.org'
|
2512
2686
|
|
2513
|
-
- regex: 'OdklBot/
|
2687
|
+
- regex: 'OdklBot/[\d.]+'
|
2514
2688
|
name: 'Odnoklassniki Bot'
|
2515
2689
|
category: 'Crawler'
|
2516
2690
|
url: 'https://odnoklassniki.ru'
|
@@ -2525,7 +2699,7 @@
|
|
2525
2699
|
category: 'Crawler'
|
2526
2700
|
url: 'https://www.zoominfo.com'
|
2527
2701
|
|
2528
|
-
- regex: 'WeViKaBot/
|
2702
|
+
- regex: 'WeViKaBot/[\d.]+'
|
2529
2703
|
name: 'WeViKaBot'
|
2530
2704
|
category: 'Crawler'
|
2531
2705
|
url: 'http://www.wevika.de'
|
@@ -2535,7 +2709,7 @@
|
|
2535
2709
|
category: 'Crawler'
|
2536
2710
|
url: 'https://www.seokicks.de/robot.html'
|
2537
2711
|
|
2538
|
-
- regex: 'Plukkie/
|
2712
|
+
- regex: 'Plukkie/[\d.]+'
|
2539
2713
|
name: 'Plukkie'
|
2540
2714
|
category: 'Crawler'
|
2541
2715
|
url: 'http://www.botje.com/plukkie.htm'
|
@@ -2545,22 +2719,22 @@
|
|
2545
2719
|
category: 'Crawler'
|
2546
2720
|
url: 'https://www.comscore.com/Web-Crawler'
|
2547
2721
|
|
2548
|
-
- regex: 'SurdotlyBot/
|
2722
|
+
- regex: 'SurdotlyBot/[\d.]+'
|
2549
2723
|
name: 'SurdotlyBot'
|
2550
2724
|
category: 'Crawler'
|
2551
2725
|
url: 'http://sur.ly/bot.html'
|
2552
2726
|
|
2553
|
-
- regex: 'Gowikibot/
|
2727
|
+
- regex: 'Gowikibot/[\d.]+'
|
2554
2728
|
name: 'Gowikibot'
|
2555
2729
|
category: 'Crawler'
|
2556
2730
|
url: 'http:/www.gowikibot.com'
|
2557
2731
|
|
2558
|
-
- regex: 'SabsimBot/
|
2732
|
+
- regex: 'SabsimBot/[\d.]+'
|
2559
2733
|
name: 'SabsimBot'
|
2560
2734
|
category: 'Crawler'
|
2561
2735
|
url: 'https://sabsim.com'
|
2562
2736
|
|
2563
|
-
- regex: 'LumtelBot/
|
2737
|
+
- regex: 'LumtelBot/[\d.]+'
|
2564
2738
|
name: 'LumtelBot'
|
2565
2739
|
category: 'Crawler'
|
2566
2740
|
url: 'https://umtel.com'
|
@@ -2570,12 +2744,12 @@
|
|
2570
2744
|
category: 'Crawler'
|
2571
2745
|
url: 'http://www.pipl.com/bot'
|
2572
2746
|
|
2573
|
-
- regex: 'woobot/
|
2747
|
+
- regex: 'woobot/[\d.]+'
|
2574
2748
|
name: 'WooRank'
|
2575
2749
|
category: 'Crawler'
|
2576
2750
|
url: 'https://www.woorank.com/bot'
|
2577
2751
|
|
2578
|
-
- regex: 'Cookiebot/
|
2752
|
+
- regex: 'Cookiebot/[\d.]+'
|
2579
2753
|
name: 'Cookiebot'
|
2580
2754
|
category: 'Crawler'
|
2581
2755
|
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
|
@@ -2591,7 +2765,7 @@
|
|
2591
2765
|
name: 'NET SYSTEMS RESEARCH LLC'
|
2592
2766
|
url: 'https://www.netsystemsresearch.com/'
|
2593
2767
|
|
2594
|
-
- regex: 'CensysInspect/
|
2768
|
+
- regex: 'CensysInspect/[\d.]+'
|
2595
2769
|
name: 'CensysInspect'
|
2596
2770
|
category: 'Security Checker'
|
2597
2771
|
url: 'https://about.censys.io/'
|
@@ -2599,7 +2773,7 @@
|
|
2599
2773
|
name: 'Censys, Inc.'
|
2600
2774
|
url: 'https://censys.io/'
|
2601
2775
|
|
2602
|
-
- regex: 'gdnplus
|
2776
|
+
- regex: 'gdnplus\.com'
|
2603
2777
|
name: 'GDNP'
|
2604
2778
|
category: 'Crawler'
|
2605
2779
|
url: 'https://gdnplus.com/'
|
@@ -2607,17 +2781,17 @@
|
|
2607
2781
|
name: 'Global Digital Network Plus, LLC'
|
2608
2782
|
url: 'https://gdnplus.com/'
|
2609
2783
|
|
2610
|
-
- regex: 'WellKnownBot/
|
2784
|
+
- regex: 'WellKnownBot/[\d.]+'
|
2611
2785
|
name: 'WellKnownBot'
|
2612
2786
|
category: 'Crawler'
|
2613
2787
|
url: 'https://well-known.dev'
|
2614
2788
|
|
2615
|
-
- regex: 'Adsbot/
|
2789
|
+
- regex: 'Adsbot/[\d.]+'
|
2616
2790
|
name: 'Adsbot'
|
2617
2791
|
category: 'Crawler'
|
2618
2792
|
url: 'https://seostar.co/robot/'
|
2619
2793
|
|
2620
|
-
- regex: 'MTRobot/
|
2794
|
+
- regex: 'MTRobot/[\d.]+'
|
2621
2795
|
name: 'MTRobot'
|
2622
2796
|
category: 'Crawler'
|
2623
2797
|
url: 'https://metrics-tools.de/robot.html'
|
@@ -2625,7 +2799,7 @@
|
|
2625
2799
|
name: 'Metrics Tools'
|
2626
2800
|
url: 'https://metrics-tools.de/'
|
2627
2801
|
|
2628
|
-
- regex: 'serpstatbot/
|
2802
|
+
- regex: 'serpstatbot/[\d.]+'
|
2629
2803
|
name: 'serpstatbot'
|
2630
2804
|
category: 'Crawler'
|
2631
2805
|
url: 'http://serpstatbot.com/'
|
@@ -2638,17 +2812,17 @@
|
|
2638
2812
|
category: 'Crawler'
|
2639
2813
|
url: 'https://github.com/gocolly/colly/'
|
2640
2814
|
|
2641
|
-
- regex: 'l9tcpid/v
|
2815
|
+
- regex: 'l9tcpid/v[\d.]+'
|
2642
2816
|
name: 'l9tcpid'
|
2643
2817
|
category: 'Security Checker'
|
2644
2818
|
url: 'https://github.com/LeakIX/l9tcpid'
|
2645
2819
|
|
2646
|
-
- regex: 'l9explore/
|
2820
|
+
- regex: 'l9explore/[\d.]+'
|
2647
2821
|
name: 'l9explore'
|
2648
2822
|
category: 'Security Checker'
|
2649
2823
|
url: 'https://github.com/LeakIX/l9explore'
|
2650
2824
|
|
2651
|
-
- regex: 'l9scan/|^Lkx
|
2825
|
+
- regex: 'l9scan/|^Lkx-.*/[\d.]+'
|
2652
2826
|
name: 'LeakIX'
|
2653
2827
|
category: 'Security Checker'
|
2654
2828
|
url: 'https://leakix.net/'
|
@@ -2656,7 +2830,7 @@
|
|
2656
2830
|
name: 'BaDaaS SRL'
|
2657
2831
|
url: 'https://leakix.net/'
|
2658
2832
|
|
2659
|
-
- regex: 'MegaIndex
|
2833
|
+
- regex: 'MegaIndex\.ru/[\d.]+'
|
2660
2834
|
name: 'MegaIndex'
|
2661
2835
|
category: 'Crawler'
|
2662
2836
|
url: 'https://megaindex.com/crawler'
|
@@ -2664,17 +2838,17 @@
|
|
2664
2838
|
- regex: 'Seekport'
|
2665
2839
|
name: 'Seekport'
|
2666
2840
|
category: 'Crawler'
|
2667
|
-
url: '
|
2841
|
+
url: 'https://bot.seekport.com/'
|
2668
2842
|
producer:
|
2669
2843
|
name: 'SISTRIX GmbH'
|
2670
2844
|
url: 'https://www.sistrix.de/'
|
2671
2845
|
|
2672
|
-
- regex: 'seolyt/
|
2846
|
+
- regex: 'seolyt/[\d.]+'
|
2673
2847
|
name: 'seolyt'
|
2674
2848
|
category: 'Crawler'
|
2675
2849
|
url: 'https://seolyt.com/'
|
2676
2850
|
|
2677
|
-
- regex: 'YaK/
|
2851
|
+
- regex: 'YaK/[\d.]+'
|
2678
2852
|
name: 'YaK'
|
2679
2853
|
category: 'Crawler'
|
2680
2854
|
url: 'https://www.linkfluence.com/'
|
@@ -2682,7 +2856,7 @@
|
|
2682
2856
|
name: 'Linkfluence SAS'
|
2683
2857
|
url: 'https://www.linkfluence.com/'
|
2684
2858
|
|
2685
|
-
- regex: 'KomodiaBot/
|
2859
|
+
- regex: 'KomodiaBot/[\d.]+'
|
2686
2860
|
name: 'KomodiaBot'
|
2687
2861
|
category: 'Crawler'
|
2688
2862
|
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
|
@@ -2690,7 +2864,7 @@
|
|
2690
2864
|
name: 'Komodia Inc.'
|
2691
2865
|
url: 'https://www.komodia.com/'
|
2692
2866
|
|
2693
|
-
- regex: 'Neevabot/
|
2867
|
+
- regex: 'Neevabot/[\d.]+'
|
2694
2868
|
name: 'Neevabot'
|
2695
2869
|
category: 'Search bot'
|
2696
2870
|
url: 'https://neeva.com/neevabot'
|
@@ -2698,17 +2872,17 @@
|
|
2698
2872
|
name: 'Neeva Inc.'
|
2699
2873
|
url: 'https://neeva.com/'
|
2700
2874
|
|
2701
|
-
- regex: 'LinkPreview/
|
2875
|
+
- regex: 'LinkPreview/[\d.]+'
|
2702
2876
|
name: 'LinkPreview'
|
2703
2877
|
category: 'Service Agent'
|
2704
2878
|
url: 'https://www.linkpreview.net/'
|
2705
2879
|
|
2706
|
-
- regex: 'JungleKeyThumbnail/
|
2880
|
+
- regex: 'JungleKeyThumbnail/[\d.]+'
|
2707
2881
|
name: 'JungleKeyThumbnail'
|
2708
2882
|
category: 'Crawler'
|
2709
2883
|
url: 'https://junglekey.com/'
|
2710
2884
|
|
2711
|
-
- regex: 'rocketmonitor(?: |bot/)
|
2885
|
+
- regex: 'rocketmonitor(?: |bot/)[\d.]+'
|
2712
2886
|
name: 'RocketMonitorBot'
|
2713
2887
|
category: 'Site Monitor'
|
2714
2888
|
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
|
@@ -2716,7 +2890,7 @@
|
|
2716
2890
|
name: 'Radio Mast, Inc.'
|
2717
2891
|
url: 'https://www.radiomast.io/'
|
2718
2892
|
|
2719
|
-
- regex: 'SitemapParser-VIPnytt/
|
2893
|
+
- regex: 'SitemapParser-VIPnytt/[\d.]+'
|
2720
2894
|
name: 'SitemapParser-VIPnytt'
|
2721
2895
|
category: 'Crawler'
|
2722
2896
|
url: 'https://github.com/VIPnytt/SitemapParser/'
|
@@ -2726,7 +2900,7 @@
|
|
2726
2900
|
category: 'Crawler'
|
2727
2901
|
url: 'https://turnitin.com/robot/crawlerinfo.html'
|
2728
2902
|
|
2729
|
-
- regex: 'DMBrowser
|
2903
|
+
- regex: 'DMBrowser/[\d.]+|DMBrowser-[UB]V'
|
2730
2904
|
name: 'Dotcom Monitor'
|
2731
2905
|
category: 'Site Monitor'
|
2732
2906
|
url: 'https://www.dotcom-monitor.com'
|
@@ -2740,17 +2914,17 @@
|
|
2740
2914
|
category: 'Crawler'
|
2741
2915
|
url: 'https://dataforseo.com/dataforseo-bot'
|
2742
2916
|
|
2743
|
-
- regex: 'Discordbot/
|
2917
|
+
- regex: 'Discordbot/[\d.]+'
|
2744
2918
|
name: 'Discord Bot'
|
2745
2919
|
category: 'Service Agent'
|
2746
2920
|
url: 'https://discordapp.com'
|
2747
2921
|
|
2748
|
-
- regex: 'Linespider/
|
2922
|
+
- regex: 'Linespider/[\d.]+'
|
2749
2923
|
name: 'Linespider'
|
2750
2924
|
category: 'Crawler'
|
2751
2925
|
url: 'https://lin.ee/4dwXkTH'
|
2752
2926
|
|
2753
|
-
- regex: 'Cincraw/
|
2927
|
+
- regex: 'Cincraw/[\d.]+'
|
2754
2928
|
name: 'Cincraw'
|
2755
2929
|
category: 'Crawler'
|
2756
2930
|
url: 'http://cincrawdata.net/bot/'
|
@@ -2776,7 +2950,7 @@
|
|
2776
2950
|
category: 'Crawler'
|
2777
2951
|
url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
|
2778
2952
|
|
2779
|
-
- regex: 'https://securitytxt-scan
|
2953
|
+
- regex: 'https://securitytxt-scan\.cs\.hm\.edu/'
|
2780
2954
|
name: 'security.txt scanserver'
|
2781
2955
|
category: 'Security Checker'
|
2782
2956
|
url: 'https://securitytxt-scan.cs.hm.edu/'
|
@@ -2784,17 +2958,17 @@
|
|
2784
2958
|
name: 'Hochschule für angewandte Wissenschaften München'
|
2785
2959
|
url: 'https://www.hm.edu/'
|
2786
2960
|
|
2787
|
-
- regex: 'TigerBot/
|
2961
|
+
- regex: 'TigerBot/[\d.]+'
|
2788
2962
|
name: 'TigerBot'
|
2789
2963
|
category: 'Crawler'
|
2790
2964
|
url: 'https://tiger.ch/'
|
2791
2965
|
|
2792
|
-
- regex: 'TestCrawler/
|
2966
|
+
- regex: 'TestCrawler/[\d.]+'
|
2793
2967
|
name: 'TestCrawler'
|
2794
2968
|
category: 'Crawler'
|
2795
2969
|
url: 'https://www.comcepta.com/'
|
2796
2970
|
|
2797
|
-
- regex: 'CrowdTanglebot/
|
2971
|
+
- regex: 'CrowdTanglebot/[\d.]+'
|
2798
2972
|
name: 'CrowdTangle'
|
2799
2973
|
category: 'Crawler'
|
2800
2974
|
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
|
@@ -2802,7 +2976,7 @@
|
|
2802
2976
|
name: 'CrowdTangle, Inc.'
|
2803
2977
|
url: 'https://www.crowdtangle.com/'
|
2804
2978
|
|
2805
|
-
- regex: 'Sellers
|
2979
|
+
- regex: 'Sellers\.Guide Crawler by Primis'
|
2806
2980
|
name: 'Sellers.Guide'
|
2807
2981
|
category: 'Crawler'
|
2808
2982
|
url: 'https://sellers.guide/'
|
@@ -2826,7 +3000,7 @@
|
|
2826
3000
|
name: 'deepnoc, GmbH'
|
2827
3001
|
url: 'https://deepnoc.com/'
|
2828
3002
|
|
2829
|
-
- regex: 'Newslitbot/
|
3003
|
+
- regex: 'Newslitbot/[\d.]+'
|
2830
3004
|
name: 'Newslitbot'
|
2831
3005
|
category: 'Crawler'
|
2832
3006
|
url: 'https://www.newslit.co/'
|
@@ -2834,7 +3008,7 @@
|
|
2834
3008
|
name: 'Newslit, LLC.'
|
2835
3009
|
url: 'https://www.newslit.co/'
|
2836
3010
|
|
2837
|
-
- regex: 'um-LN/
|
3011
|
+
- regex: 'um-LN/[\d.]+'
|
2838
3012
|
name: 'uMBot'
|
2839
3013
|
category: 'Crawler'
|
2840
3014
|
url: 'https://www.ubermetrics-technologies.com/'
|
@@ -2842,12 +3016,12 @@
|
|
2842
3016
|
name: 'Ubermetrics Technologies GmbH'
|
2843
3017
|
url: 'https://www.ubermetrics-technologies.com/'
|
2844
3018
|
|
2845
|
-
- regex: 'Abonti/
|
3019
|
+
- regex: 'Abonti/[\d.]+'
|
2846
3020
|
name: 'Abonti'
|
2847
3021
|
category: 'Crawler'
|
2848
3022
|
url: 'http://abonti.com/'
|
2849
3023
|
|
2850
|
-
- regex: 'collection@infegy
|
3024
|
+
- regex: 'collection@infegy\.com'
|
2851
3025
|
name: 'Infegy'
|
2852
3026
|
category: 'Crawler'
|
2853
3027
|
url: 'https://infegy.com/'
|
@@ -2855,7 +3029,7 @@
|
|
2855
3029
|
name: 'Infegy, Inc.'
|
2856
3030
|
url: 'https://infegy.com/'
|
2857
3031
|
|
2858
|
-
- regex: 'HTTP Banner Detection \(https://security
|
3032
|
+
- regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)'
|
2859
3033
|
name: 'IPIP'
|
2860
3034
|
category: 'Security Checker'
|
2861
3035
|
url: 'https://security.ipip.net/'
|
@@ -2863,7 +3037,7 @@
|
|
2863
3037
|
name: 'Beijing Tiantexin Tech. Co., Ltd.'
|
2864
3038
|
url: 'https://en.ipip.net/'
|
2865
3039
|
|
2866
|
-
- regex: 'ev-crawler/
|
3040
|
+
- regex: 'ev-crawler/[\d.]+'
|
2867
3041
|
name: 'Headline'
|
2868
3042
|
category: 'Crawler'
|
2869
3043
|
url: 'https://headline.com/legal/crawler'
|
@@ -2871,7 +3045,7 @@
|
|
2871
3045
|
name: 'e.ventures Managementgesellschaft mbH'
|
2872
3046
|
url: 'https://headline.com/'
|
2873
3047
|
|
2874
|
-
- regex: 'webprosbot/
|
3048
|
+
- regex: 'webprosbot/[\d.]+'
|
2875
3049
|
name: 'WebPros'
|
2876
3050
|
category: 'Crawler'
|
2877
3051
|
url: 'https://webpros.com/'
|
@@ -2887,7 +3061,7 @@
|
|
2887
3061
|
name: 'Amazon.com, Inc.'
|
2888
3062
|
url: 'https://www.amazon.com/'
|
2889
3063
|
|
2890
|
-
- regex: 'Wheregoes
|
3064
|
+
- regex: 'Wheregoes\.com Redirect Checker/[\d.]+'
|
2891
3065
|
name: 'WhereGoes'
|
2892
3066
|
category: 'Crawler'
|
2893
3067
|
url: 'https://wheregoes.com/'
|
@@ -2897,12 +3071,12 @@
|
|
2897
3071
|
category: 'Crawler'
|
2898
3072
|
url: 'http://66.240.192.82/'
|
2899
3073
|
|
2900
|
-
- regex: 'InternetMeasurement/
|
3074
|
+
- regex: 'InternetMeasurement/[\d.]+'
|
2901
3075
|
name: 'InternetMeasurement'
|
2902
3076
|
category: 'Crawler'
|
2903
3077
|
url: 'https://internet-measurement.com/'
|
2904
3078
|
|
2905
|
-
- regex: 'DomainAppender /
|
3079
|
+
- regex: 'DomainAppender /[\d.]+'
|
2906
3080
|
name: 'DomainAppender'
|
2907
3081
|
category: 'Crawler'
|
2908
3082
|
url: 'https://www.profound.net/product/domain_append/'
|
@@ -2910,7 +3084,7 @@
|
|
2910
3084
|
name: 'Profound Networks, LLC'
|
2911
3085
|
url: 'https://www.profound.net/'
|
2912
3086
|
|
2913
|
-
- regex: 'FreeWebMonitoring SiteChecker/
|
3087
|
+
- regex: 'FreeWebMonitoring SiteChecker/[\d.]+'
|
2914
3088
|
name: 'FreeWebMonitoring'
|
2915
3089
|
category: 'Site Monitor'
|
2916
3090
|
url: 'https://www.freewebmonitoring.com/bot.html'
|
@@ -2926,7 +3100,7 @@
|
|
2926
3100
|
name: 'Valley Hosting, LLC'
|
2927
3101
|
url: 'https://www.pagemodified.com/'
|
2928
3102
|
|
2929
|
-
- regex: 'adstxtlab
|
3103
|
+
- regex: 'adstxtlab\.com'
|
2930
3104
|
name: 'adstxtlab.com'
|
2931
3105
|
category: 'Crawler'
|
2932
3106
|
url: 'https://adstxtlab.com/validator.php'
|
@@ -2934,7 +3108,7 @@
|
|
2934
3108
|
name: 'Jaohawi AB'
|
2935
3109
|
url: 'https://adstxtlab.com/'
|
2936
3110
|
|
2937
|
-
- regex: 'Iframely/
|
3111
|
+
- regex: 'Iframely/[\d.]+'
|
2938
3112
|
name: 'Iframely'
|
2939
3113
|
category: 'Crawler'
|
2940
3114
|
url: 'https://iframely.com/'
|
@@ -2942,7 +3116,7 @@
|
|
2942
3116
|
name: 'Itteco Software, Corp.'
|
2943
3117
|
url: 'https://iframely.com/'
|
2944
3118
|
|
2945
|
-
- regex: 'DomainStatsBot/
|
3119
|
+
- regex: 'DomainStatsBot/[\d.]+'
|
2946
3120
|
name: 'DomainStatsBot'
|
2947
3121
|
category: 'Crawler'
|
2948
3122
|
url: 'https://domainstats.com/pages/our-bot'
|
@@ -2950,7 +3124,7 @@
|
|
2950
3124
|
name: 'Domainstats Ltd'
|
2951
3125
|
url: 'https://domainstats.com/'
|
2952
3126
|
|
2953
|
-
- regex: 'aiHitBot/
|
3127
|
+
- regex: 'aiHitBot/[\d.]+'
|
2954
3128
|
name: 'aiHitBot'
|
2955
3129
|
category: 'Crawler'
|
2956
3130
|
url: 'https://www.aihitdata.com/about'
|
@@ -2968,12 +3142,12 @@
|
|
2968
3142
|
name: 'GitCrawlerBot'
|
2969
3143
|
category: 'Crawler'
|
2970
3144
|
|
2971
|
-
- regex: 'AdAuth/
|
3145
|
+
- regex: 'AdAuth/[\d.]+'
|
2972
3146
|
name: 'AdAuth'
|
2973
3147
|
category: 'Crawler'
|
2974
3148
|
url: 'https://www.adauth.com'
|
2975
3149
|
|
2976
|
-
- regex: 'faveeo
|
3150
|
+
- regex: 'faveeo\.com'
|
2977
3151
|
name: 'Faveeo'
|
2978
3152
|
category: 'Crawler'
|
2979
3153
|
url: 'http://www.faveeo.com'
|
@@ -3004,7 +3178,7 @@
|
|
3004
3178
|
name: 'Jožef Stefan Institute'
|
3005
3179
|
url: 'https://www.ijs.si/ijsw/JSI'
|
3006
3180
|
|
3007
|
-
- regex: 'dnt-policy@eff
|
3181
|
+
- regex: 'dnt-policy@eff\.org'
|
3008
3182
|
name: 'EFF Do Not Track Verifier'
|
3009
3183
|
category: 'Crawler'
|
3010
3184
|
url: 'https://www.eff.org/issues/do-not-track'
|
@@ -3028,7 +3202,7 @@
|
|
3028
3202
|
name: 'Swoppen Systems GmbH'
|
3029
3203
|
url: 'https://www.swoppen.com/de'
|
3030
3204
|
|
3031
|
-
- regex: 'ScamadviserExternalHit/
|
3205
|
+
- regex: 'ScamadviserExternalHit/[\d.]+'
|
3032
3206
|
name: 'Scamadviser External Hit'
|
3033
3207
|
category: 'Crawler'
|
3034
3208
|
url: 'https://www.scamadviser.com/'
|
@@ -3041,20 +3215,20 @@
|
|
3041
3215
|
category: 'Crawler'
|
3042
3216
|
url: 'https://www.zaldamo.com/search.html'
|
3043
3217
|
producer:
|
3044
|
-
name: '
|
3045
|
-
url: 'https://www.
|
3218
|
+
name: 'Zaldamo, LLC.'
|
3219
|
+
url: 'https://www.zaldamo.com/'
|
3046
3220
|
|
3047
|
-
- regex: 'AFB/
|
3221
|
+
- regex: 'AFB/[\d.]+'
|
3048
3222
|
name: 'Allloadin Favicon Bot'
|
3049
3223
|
category: 'Crawler'
|
3050
3224
|
url: 'https://allloadin.com/'
|
3051
3225
|
|
3052
|
-
- regex: 'SeolytBot/
|
3226
|
+
- regex: 'SeolytBot/[\d.]+'
|
3053
3227
|
name: 'Seolyt Bot'
|
3054
3228
|
category: 'Crawler'
|
3055
3229
|
url: 'https://seolyt.com'
|
3056
3230
|
|
3057
|
-
- regex: 'LinkWalker/
|
3231
|
+
- regex: 'LinkWalker/[\d.]+'
|
3058
3232
|
name: 'LinkWalker'
|
3059
3233
|
category: 'Crawler'
|
3060
3234
|
url: 'https://www.phishlabs.com/'
|
@@ -3062,7 +3236,7 @@
|
|
3062
3236
|
name: 'PhishLabs, Inc.'
|
3063
3237
|
url: 'https://www.phishlabs.com/'
|
3064
3238
|
|
3065
|
-
- regex: 'RenovateBot/
|
3239
|
+
- regex: 'RenovateBot/[\d.]+'
|
3066
3240
|
name: 'RenovateBot'
|
3067
3241
|
category: 'Security Checker'
|
3068
3242
|
url: 'https://github.com/renovatebot/renovate'
|
@@ -3070,7 +3244,7 @@
|
|
3070
3244
|
name: 'White Source Ltd.'
|
3071
3245
|
url: 'https://www.mend.io/free-developer-tools/renovate/'
|
3072
3246
|
|
3073
|
-
- regex: 'INETDEX-BOT/
|
3247
|
+
- regex: 'INETDEX-BOT/[\d.]+'
|
3074
3248
|
name: 'Inetdex Bot'
|
3075
3249
|
category: 'Crawler'
|
3076
3250
|
url: 'https://www.inetdex.com/'
|
@@ -3083,15 +3257,7 @@
|
|
3083
3257
|
name: 'Marc Huemer'
|
3084
3258
|
url: 'https://www.netzzappen.com/'
|
3085
3259
|
|
3086
|
-
- regex: '
|
3087
|
-
name: 'SEMrush Reputation Management'
|
3088
|
-
category: 'Service Agent'
|
3089
|
-
url: 'https://www.semrush.com/bot/'
|
3090
|
-
producer:
|
3091
|
-
name: 'SEMrush'
|
3092
|
-
url: 'https://www.semrush.com/'
|
3093
|
-
|
3094
|
-
- regex: 'panscient.com'
|
3260
|
+
- regex: 'panscient\.com'
|
3095
3261
|
name: 'Panscient'
|
3096
3262
|
category: 'Crawler'
|
3097
3263
|
url: 'https://www.panscient.com/faq.htm'
|
@@ -3099,7 +3265,7 @@
|
|
3099
3265
|
name: 'Panscient, Inc.'
|
3100
3266
|
url: 'https://www.panscient.com/'
|
3101
3267
|
|
3102
|
-
- regex: 'research@pdrlabs
|
3268
|
+
- regex: 'research@pdrlabs\.net'
|
3103
3269
|
name: 'PDR Labs'
|
3104
3270
|
category: 'Security Checker'
|
3105
3271
|
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
@@ -3107,7 +3273,7 @@
|
|
3107
3273
|
name: 'PDR Labs'
|
3108
3274
|
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
3109
3275
|
|
3110
|
-
- regex: 'Nicecrawler/
|
3276
|
+
- regex: 'Nicecrawler/[\d.]+'
|
3111
3277
|
name: 'NiceCrawler'
|
3112
3278
|
category: 'Crawler'
|
3113
3279
|
url: 'https://www.nicecrawler.com/'
|
@@ -3115,7 +3281,7 @@
|
|
3115
3281
|
name: 'Intelium Corp.'
|
3116
3282
|
url: 'https://www.intelium.com/'
|
3117
3283
|
|
3118
|
-
- regex: 't3versionsBot/
|
3284
|
+
- regex: 't3versionsBot/[\d.]+'
|
3119
3285
|
name: 't3versions'
|
3120
3286
|
category: 'Crawler'
|
3121
3287
|
url: 'https://www.t3versions.com/bot'
|
@@ -3123,7 +3289,7 @@
|
|
3123
3289
|
name: 'Torben Hansen'
|
3124
3290
|
url: 'https://www.t3versions.com/'
|
3125
3291
|
|
3126
|
-
- regex: 'Crawlson/
|
3292
|
+
- regex: 'Crawlson/[\d.]+'
|
3127
3293
|
name: 'Crawlson'
|
3128
3294
|
category: 'Crawler'
|
3129
3295
|
url: 'https://www.crawlson.com/about'
|
@@ -3131,7 +3297,7 @@
|
|
3131
3297
|
name: 'Crawlson'
|
3132
3298
|
url: 'https://www.crawlson.com/'
|
3133
3299
|
|
3134
|
-
- regex: 'tchelebi/
|
3300
|
+
- regex: 'tchelebi/[\d.]+'
|
3135
3301
|
name: 'tchelebi'
|
3136
3302
|
category: 'Crawler'
|
3137
3303
|
url: 'https://tchelebi.io/'
|
@@ -3147,7 +3313,7 @@
|
|
3147
3313
|
name: 'New Work SE'
|
3148
3314
|
url: 'https://www.xing.com/'
|
3149
3315
|
|
3150
|
-
- regex: 'RepoLookoutBot/
|
3316
|
+
- regex: 'RepoLookoutBot/v?[\d.]+'
|
3151
3317
|
name: 'Repo Lookout'
|
3152
3318
|
category: 'Security Checker'
|
3153
3319
|
url: 'https://www.repo-lookout.org/'
|
@@ -3163,7 +3329,7 @@
|
|
3163
3329
|
name: 'MAMI Project'
|
3164
3330
|
url: 'https://mami-project.eu/'
|
3165
3331
|
|
3166
|
-
- regex: 'everyfeed-spider/
|
3332
|
+
- regex: 'everyfeed-spider/[\d.]+'
|
3167
3333
|
name: 'Everyfeed'
|
3168
3334
|
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
|
3169
3335
|
category: 'Feed Fetcher'
|
@@ -3187,7 +3353,7 @@
|
|
3187
3353
|
name: ''
|
3188
3354
|
url: ''
|
3189
3355
|
|
3190
|
-
- regex: 'Gregarius/
|
3356
|
+
- regex: 'Gregarius/[\d.]+'
|
3191
3357
|
name: 'Gregarius'
|
3192
3358
|
category: 'Feed Fetcher'
|
3193
3359
|
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
|
@@ -3203,7 +3369,7 @@
|
|
3203
3369
|
name: 'Comodo Security Solutions, Inc.'
|
3204
3370
|
url: 'https://www.comodo.com/'
|
3205
3371
|
|
3206
|
-
- regex: 'Sectigo DCV'
|
3372
|
+
- regex: 'Sectigo DCV|acme\.sectigo\.com'
|
3207
3373
|
name: 'Sectigo DCV'
|
3208
3374
|
category: 'Service Agent'
|
3209
3375
|
url: 'https://sectigo.com/'
|
@@ -3211,7 +3377,7 @@
|
|
3211
3377
|
name: 'Sectigo Limited'
|
3212
3378
|
url: 'https://sectigo.com/'
|
3213
3379
|
|
3214
|
-
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/
|
3380
|
+
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\d.]+'
|
3215
3381
|
name: 'KlarnaBot'
|
3216
3382
|
category: 'Crawler'
|
3217
3383
|
url: 'https://docs.klarna.com/klarna-bot/'
|
@@ -3219,7 +3385,7 @@
|
|
3219
3385
|
name: 'Klarna Bank AB'
|
3220
3386
|
url: 'https://www.klarna.com/'
|
3221
3387
|
|
3222
|
-
- regex: 'Taboolabot/
|
3388
|
+
- regex: 'Taboolabot/[\d.]+'
|
3223
3389
|
name: 'Taboolabot'
|
3224
3390
|
category: 'Crawler'
|
3225
3391
|
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
|
@@ -3227,7 +3393,7 @@
|
|
3227
3393
|
name: 'Taboola, Inc.'
|
3228
3394
|
url: 'https://www.taboola.com/'
|
3229
3395
|
|
3230
|
-
- regex: 'Asana/
|
3396
|
+
- regex: 'Asana/[\d.]+'
|
3231
3397
|
name: 'Asana'
|
3232
3398
|
category: 'Crawler'
|
3233
3399
|
url: 'https://asana.com/'
|
@@ -3243,7 +3409,7 @@
|
|
3243
3409
|
name: 'Google Inc.'
|
3244
3410
|
url: 'https://www.google.com/'
|
3245
3411
|
|
3246
|
-
- regex: 'URLinspectorBot/
|
3412
|
+
- regex: 'URLinspectorBot/[\d.]+'
|
3247
3413
|
name: 'URLinspector'
|
3248
3414
|
category: 'Site Monitor'
|
3249
3415
|
url: 'https://www.urlinspector.com/bot/'
|
@@ -3251,7 +3417,7 @@
|
|
3251
3417
|
name: 'LinkResearchTools GmbH'
|
3252
3418
|
url: 'https://www.linkresearchtools.com/'
|
3253
3419
|
|
3254
|
-
- regex: 'EntferBot/
|
3420
|
+
- regex: 'EntferBot/[\d.]+'
|
3255
3421
|
name: 'Entfer'
|
3256
3422
|
category: 'Crawler'
|
3257
3423
|
url: 'https://entfer.com/'
|
@@ -3259,7 +3425,7 @@
|
|
3259
3425
|
name: 'Entfer Ltd.'
|
3260
3426
|
url: 'https://entfer.com/'
|
3261
3427
|
|
3262
|
-
- regex: 'TagInspector/
|
3428
|
+
- regex: 'TagInspector/[\d.]+'
|
3263
3429
|
name: 'Tag Inspector'
|
3264
3430
|
category: 'Crawler'
|
3265
3431
|
url: 'https://taginspector.com/'
|
@@ -3283,7 +3449,7 @@
|
|
3283
3449
|
name: 'Diffbot Technologies Corp.'
|
3284
3450
|
url: 'https://www.diffbot.com/'
|
3285
3451
|
|
3286
|
-
- regex: 'DisqusAdstxtCrawler/
|
3452
|
+
- regex: 'DisqusAdstxtCrawler/[\d.]+'
|
3287
3453
|
name: 'Disqus'
|
3288
3454
|
category: 'Crawler'
|
3289
3455
|
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
|
@@ -3291,7 +3457,7 @@
|
|
3291
3457
|
name: 'Disqus, Inc.'
|
3292
3458
|
url: 'https://disqus.com/'
|
3293
3459
|
|
3294
|
-
- regex: 'startmebot/
|
3460
|
+
- regex: 'startmebot/[\d.]+'
|
3295
3461
|
name: 'start.me'
|
3296
3462
|
category: 'Crawler'
|
3297
3463
|
url: 'https://about.start.me/'
|
@@ -3299,17 +3465,17 @@
|
|
3299
3465
|
name: 'start.me BV'
|
3300
3466
|
url: 'https://about.start.me/'
|
3301
3467
|
|
3302
|
-
- regex: '2ip bot/
|
3468
|
+
- regex: '2ip bot/[\d.]+'
|
3303
3469
|
name: '2ip'
|
3304
3470
|
category: 'Crawler'
|
3305
3471
|
url: 'https://2ip.io/'
|
3306
3472
|
|
3307
|
-
- regex: 'ReqBin Curl Client/
|
3473
|
+
- regex: 'ReqBin Curl Client/[\d.]+'
|
3308
3474
|
name: 'ReqBin'
|
3309
3475
|
category: 'Crawler'
|
3310
3476
|
url: 'https://reqbin.com/curl'
|
3311
3477
|
|
3312
|
-
- regex: 'XoviBot/
|
3478
|
+
- regex: 'XoviBot/[\d.]+'
|
3313
3479
|
name: 'XoviBot'
|
3314
3480
|
category: 'Crawler'
|
3315
3481
|
url: 'https://www.xovibot.net'
|
@@ -3317,12 +3483,12 @@
|
|
3317
3483
|
name: 'Xovi GmbH'
|
3318
3484
|
url: 'http://www.xovi.de'
|
3319
3485
|
|
3320
|
-
- regex: 'Overcast/
|
3486
|
+
- regex: 'Overcast/[\d.]+ Podcast Sync'
|
3321
3487
|
name: 'Overcast Podcast Sync'
|
3322
3488
|
category: 'Service Agent'
|
3323
3489
|
url: 'https://overcast.fm/podcasterinfo'
|
3324
3490
|
|
3325
|
-
- regex: '^Verity/
|
3491
|
+
- regex: '^Verity/[\d.]+'
|
3326
3492
|
name: 'GumGum Verity'
|
3327
3493
|
category: 'Service Agent'
|
3328
3494
|
url: 'https://gumgum.com/verity'
|
@@ -3332,7 +3498,7 @@
|
|
3332
3498
|
category: 'Feed Reader'
|
3333
3499
|
url: 'https://github.com/snarfed/hackermention'
|
3334
3500
|
|
3335
|
-
- regex: 'BitSightBot/
|
3501
|
+
- regex: 'BitSightBot/[\d.]+'
|
3336
3502
|
name: 'BitSight'
|
3337
3503
|
category: 'Security Checker'
|
3338
3504
|
url: 'https://www.bitsight.com/'
|
@@ -3340,12 +3506,12 @@
|
|
3340
3506
|
name: 'BitSight Technologies, Inc.'
|
3341
3507
|
url: 'https://www.bitsight.com/'
|
3342
3508
|
|
3343
|
-
- regex: 'Ezgif/
|
3509
|
+
- regex: 'Ezgif/[\d.]+'
|
3344
3510
|
name: 'Ezgif'
|
3345
3511
|
category: 'Service Agent'
|
3346
3512
|
url: 'https://ezgif.com/about'
|
3347
3513
|
|
3348
|
-
- regex: 'intelx
|
3514
|
+
- regex: 'intelx\.io_bot'
|
3349
3515
|
name: 'Intelligence X'
|
3350
3516
|
category: 'Crawler'
|
3351
3517
|
url: 'https://intelx.io/'
|
@@ -3353,7 +3519,7 @@
|
|
3353
3519
|
name: 'Kleissner Investments s.r.o.'
|
3354
3520
|
url: 'https://intelx.io/'
|
3355
3521
|
|
3356
|
-
- regex: 'FemtosearchBot/
|
3522
|
+
- regex: 'FemtosearchBot/[\d.]+'
|
3357
3523
|
name: 'Femtosearch'
|
3358
3524
|
category: 'Crawler'
|
3359
3525
|
url: 'http://femtosearch.com/'
|
@@ -3361,7 +3527,7 @@
|
|
3361
3527
|
name: 'Grier Forensics, LLC'
|
3362
3528
|
url: 'https://www.grierforensics.com/'
|
3363
3529
|
|
3364
|
-
- regex: 'AdsTxtCrawler/
|
3530
|
+
- regex: 'AdsTxtCrawler/[\d.]+'
|
3365
3531
|
name: 'AdsTxtCrawler'
|
3366
3532
|
category: 'Crawler'
|
3367
3533
|
url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
|
@@ -3377,7 +3543,7 @@
|
|
3377
3543
|
name: 'Morningscore'
|
3378
3544
|
url: 'https://morningscore.io/'
|
3379
3545
|
|
3380
|
-
- regex: 'Uptime-Kuma/
|
3546
|
+
- regex: 'Uptime-Kuma/[\d.]+'
|
3381
3547
|
name: 'Uptime-Kuma'
|
3382
3548
|
category: 'Site Monitor'
|
3383
3549
|
url: 'https://github.com/louislam/uptime-kuma'
|
@@ -3390,7 +3556,7 @@
|
|
3390
3556
|
name: 'OpenAI OpCo, LLC'
|
3391
3557
|
url: 'https://openai.com/'
|
3392
3558
|
|
3393
|
-
- regex: 'BrightEdge Crawler/
|
3559
|
+
- regex: 'BrightEdge Crawler/[\d.]+'
|
3394
3560
|
name: 'BrightEdge'
|
3395
3561
|
category: 'Crawler'
|
3396
3562
|
url: 'https://www.brightedge.com/'
|
@@ -3398,12 +3564,12 @@
|
|
3398
3564
|
name: 'BrightEdge Technologies, Inc'
|
3399
3565
|
url: 'https://www.brightedge.com/'
|
3400
3566
|
|
3401
|
-
- regex: 'sfFeedReader/
|
3567
|
+
- regex: 'sfFeedReader/[\d.]+'
|
3402
3568
|
name: 'sfFeedReader'
|
3403
3569
|
url: 'https://github.com/diem-project/sfFeed2Plugin'
|
3404
3570
|
category: 'Feed Fetcher'
|
3405
3571
|
|
3406
|
-
- regex: 'cyberscan
|
3572
|
+
- regex: 'cyberscan\.io'
|
3407
3573
|
name: 'Cyberscan'
|
3408
3574
|
category: 'Security Checker'
|
3409
3575
|
url: 'https://www.cyberscan.io/'
|
@@ -3419,15 +3585,7 @@
|
|
3419
3585
|
name: 'Lumar'
|
3420
3586
|
url: 'https://www.lumar.io/'
|
3421
3587
|
|
3422
|
-
- regex: '
|
3423
|
-
name: 'Repo Lookout'
|
3424
|
-
category: 'Crawler'
|
3425
|
-
url: 'https://www.repo-lookout.org/'
|
3426
|
-
producer:
|
3427
|
-
name: 'Crissy Field GmbH'
|
3428
|
-
url: 'https://www.crissyfield.de/'
|
3429
|
-
|
3430
|
-
- regex: 'researchscan.comsys.rwth-aachen.de'
|
3588
|
+
- regex: 'researchscan\.comsys\.rwth-aachen\.de'
|
3431
3589
|
name: 'Research Scan'
|
3432
3590
|
category: 'Crawler'
|
3433
3591
|
url: 'http://researchscan.comsys.rwth-aachen.de/'
|
@@ -3435,7 +3593,7 @@
|
|
3435
3593
|
name: 'RWTH Aachen University'
|
3436
3594
|
url: 'https://www.comsys.rwth-aachen.de/'
|
3437
3595
|
|
3438
|
-
- regex: 'newspaper/
|
3596
|
+
- regex: 'newspaper/[\d.]+'
|
3439
3597
|
name: 'Scraping Robot'
|
3440
3598
|
category: 'Crawler'
|
3441
3599
|
url: 'https://scrapingrobot.com/'
|
@@ -3443,7 +3601,7 @@
|
|
3443
3601
|
name: 'Sprious LLC'
|
3444
3602
|
url: 'https://sprious.com/'
|
3445
3603
|
|
3446
|
-
- regex: 'GPTBot/
|
3604
|
+
- regex: 'GPTBot/[\d.]+'
|
3447
3605
|
name: 'GPTBot'
|
3448
3606
|
category: 'Crawler'
|
3449
3607
|
url: 'https://platform.openai.com/docs/gptbot'
|
@@ -3451,7 +3609,7 @@
|
|
3451
3609
|
name: 'OpenAI OpCo, LLC'
|
3452
3610
|
url: 'https://openai.com/'
|
3453
3611
|
|
3454
|
-
- regex: 'Ant
|
3612
|
+
- regex: 'Ant(?:\.com beta|Bot)(?:/([\d+.]+))?'
|
3455
3613
|
name: 'Ant'
|
3456
3614
|
category: 'Crawler'
|
3457
3615
|
url: 'https://www.ant.com/'
|
@@ -3459,7 +3617,7 @@
|
|
3459
3617
|
name: 'Ant.com Ltd.'
|
3460
3618
|
url: 'https://www.ant.com/'
|
3461
3619
|
|
3462
|
-
- regex: 'WebwikiBot/
|
3620
|
+
- regex: 'WebwikiBot/[\d.]+'
|
3463
3621
|
name: 'Webwiki'
|
3464
3622
|
category: 'Crawler'
|
3465
3623
|
url: 'https://www.webwiki.com/'
|
@@ -3472,7 +3630,7 @@
|
|
3472
3630
|
category: 'Service Agent'
|
3473
3631
|
url: 'https://www.phpmyadmin.net/'
|
3474
3632
|
|
3475
|
-
- regex: 'Matomo/
|
3633
|
+
- regex: 'Matomo/[\d.]+'
|
3476
3634
|
name: 'Matomo'
|
3477
3635
|
category: 'Service Agent'
|
3478
3636
|
url: 'https://github.com/matomo-org/matomo'
|
@@ -3480,7 +3638,7 @@
|
|
3480
3638
|
name: 'InnoCraft Ltd'
|
3481
3639
|
url: 'https://matomo.org/'
|
3482
3640
|
|
3483
|
-
- regex: 'Prometheus/
|
3641
|
+
- regex: 'Prometheus/[\d.]+'
|
3484
3642
|
name: 'Prometheus'
|
3485
3643
|
category: 'Service Agent'
|
3486
3644
|
url: 'https://github.com/prometheus/prometheus'
|
@@ -3496,7 +3654,7 @@
|
|
3496
3654
|
name: 'ArchiveTeam'
|
3497
3655
|
url: 'https://wiki.archiveteam.org/'
|
3498
3656
|
|
3499
|
-
- regex: 'MADBbot/
|
3657
|
+
- regex: 'MADBbot/[\d.]+'
|
3500
3658
|
name: 'MADBbot'
|
3501
3659
|
category: 'Crawler'
|
3502
3660
|
url: 'https://madb.zapto.org/bot.html'
|
@@ -3508,6 +3666,877 @@
|
|
3508
3666
|
name: 'Meltwater Deutschland GmbH'
|
3509
3667
|
url: 'https://www.meltwater.com/'
|
3510
3668
|
|
3669
|
+
- regex: '(?:Owler@ows\.eu|OWLer)/[\d.]+'
|
3670
|
+
name: 'OWLer'
|
3671
|
+
category: 'Crawler'
|
3672
|
+
url: 'https://openwebsearch.eu/owler/'
|
3673
|
+
producer:
|
3674
|
+
name: 'Open Search Foundation e.V.'
|
3675
|
+
url: 'https://openwebsearch.eu/'
|
3676
|
+
|
3677
|
+
- regex: 'bbc\.co\.uk/display/men/Page\+Monitor'
|
3678
|
+
name: 'BBC Page Monitor'
|
3679
|
+
category: 'Site Monitor'
|
3680
|
+
url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor'
|
3681
|
+
producer:
|
3682
|
+
name: 'BBC'
|
3683
|
+
url: 'https://www.bbc.com/'
|
3684
|
+
|
3685
|
+
- regex: 'BBC-Forge-URL-Monitor-Twisted'
|
3686
|
+
name: 'BBC Forge URL Monitor'
|
3687
|
+
category: 'Site Monitor'
|
3688
|
+
url: 'https://www.bbc.com/'
|
3689
|
+
producer:
|
3690
|
+
name: 'BBC'
|
3691
|
+
url: 'https://www.bbc.com/'
|
3692
|
+
|
3693
|
+
- regex: 'ClaudeBot'
|
3694
|
+
name: 'ClaudeBot'
|
3695
|
+
category: 'Crawler'
|
3696
|
+
url: 'https://github.com/ClaudeBot/ClaudeBot'
|
3697
|
+
|
3698
|
+
- regex: 'Imagesift'
|
3699
|
+
name: 'ImageSift'
|
3700
|
+
category: 'Crawler'
|
3701
|
+
url: 'https://imagesift.com/'
|
3702
|
+
producer:
|
3703
|
+
name: 'Castle Global, Inc.'
|
3704
|
+
url: 'https://thehive.ai/'
|
3705
|
+
|
3706
|
+
- regex: 'TactiScout'
|
3707
|
+
name: 'TactiScout'
|
3708
|
+
category: 'Crawler'
|
3709
|
+
url: 'https://find-it.world/TempCrawl/Crawltheque.php'
|
3710
|
+
producer:
|
3711
|
+
name: 'Tactikast'
|
3712
|
+
|
3713
|
+
- regex: 'Brightbot ([\d+.]+)'
|
3714
|
+
name: 'BrightBot'
|
3715
|
+
category: 'Crawler'
|
3716
|
+
url: 'https://www.brightbot.app/'
|
3717
|
+
producer:
|
3718
|
+
name: 'Bright Interactive Ltd'
|
3719
|
+
url: 'https://www.builtbybright.com/'
|
3720
|
+
|
3721
|
+
- regex: 'DaspeedBot/([\d+.]+)'
|
3722
|
+
name: 'DaspeedBot'
|
3723
|
+
category: 'Crawler'
|
3724
|
+
url: 'https://daspeed.io/'
|
3725
|
+
producer:
|
3726
|
+
name: 'DAWAP SARL'
|
3727
|
+
url: 'https://dawap.fr/'
|
3728
|
+
|
3729
|
+
- regex: 'StractBot(?:/([\d+.]+))?'
|
3730
|
+
name: 'Stract'
|
3731
|
+
category: 'Crawler'
|
3732
|
+
url: 'https://stract.com/webmasters'
|
3733
|
+
producer:
|
3734
|
+
name: 'Stract'
|
3735
|
+
url: 'https://github.com/StractOrg/stract/'
|
3736
|
+
|
3737
|
+
- regex: 'GeedoBot(?:/([\d+.]+))?'
|
3738
|
+
name: 'GeedoBot'
|
3739
|
+
category: 'Crawler'
|
3740
|
+
url: 'https://geedo.com/bot/'
|
3741
|
+
|
3742
|
+
- regex: 'GeedoProductSearch'
|
3743
|
+
name: 'GeedoProductSearch'
|
3744
|
+
category: 'Crawler'
|
3745
|
+
url: 'https://geedo.com/product-search/'
|
3746
|
+
|
3747
|
+
- regex: 'BackupLand(?:/([\d+.]+))?'
|
3748
|
+
name: 'BackupLand'
|
3749
|
+
category: 'Crawler'
|
3750
|
+
url: 'https://go.backupland.com/'
|
3751
|
+
producer:
|
3752
|
+
name: 'ООО «КВАРТА»'
|
3753
|
+
url: 'https://go.backupland.com/'
|
3754
|
+
|
3755
|
+
- regex: 'Konturbot(?:/([\d+.]+))?'
|
3756
|
+
name: 'Konturbot'
|
3757
|
+
category: 'Crawler'
|
3758
|
+
url: 'https://kontur.ru/'
|
3759
|
+
producer:
|
3760
|
+
name: 'АО «ПФ «СКБ Контур»'
|
3761
|
+
url: 'https://kontur.ru/'
|
3762
|
+
|
3763
|
+
- regex: 'keys-so-bot'
|
3764
|
+
name: 'Keys.so'
|
3765
|
+
category: 'Crawler'
|
3766
|
+
url: 'https://www.keys.so/'
|
3767
|
+
producer:
|
3768
|
+
name: 'ООО «МОДЕСКО»'
|
3769
|
+
url: 'https://www.modesco.ru/'
|
3770
|
+
|
3771
|
+
- regex: 'LetsearchBot(?:/([\d+.]+))?'
|
3772
|
+
name: 'LetSearch'
|
3773
|
+
category: 'Crawler'
|
3774
|
+
url: 'https://letsearch.ru/bots'
|
3775
|
+
|
3776
|
+
- regex: 'Example3(?:/([\d+.]+))?'
|
3777
|
+
name: 'Example3'
|
3778
|
+
category: 'Crawler'
|
3779
|
+
url: 'https://www.example3.com/'
|
3780
|
+
|
3781
|
+
- regex: 'StatOnlineRuBot(?:/([\d+.]+))?'
|
3782
|
+
name: 'StatOnline.ru'
|
3783
|
+
category: 'Crawler'
|
3784
|
+
url: 'https://statonline.ru/'
|
3785
|
+
producer:
|
3786
|
+
name: 'ООО «Регистратор доменных имен РЕГ.РУ»'
|
3787
|
+
url: 'https://statonline.ru/'
|
3788
|
+
|
3789
|
+
- regex: 'Spawning-AI'
|
3790
|
+
name: 'Spawning AI'
|
3791
|
+
category: 'Crawler'
|
3792
|
+
url: 'https://spawning.ai/'
|
3793
|
+
producer:
|
3794
|
+
name: 'Spawning, Inc'
|
3795
|
+
url: 'https://spawning.ai/'
|
3796
|
+
|
3797
|
+
- regex: 'domain research project'
|
3798
|
+
name: 'Domain Research Project'
|
3799
|
+
category: 'Crawler'
|
3800
|
+
url: 'https://trentwil.es/domains.html'
|
3801
|
+
producer:
|
3802
|
+
name: 'Trent Wiles'
|
3803
|
+
url: 'https://trentwil.es/'
|
3804
|
+
|
3805
|
+
- regex: 'getodin\.com'
|
3806
|
+
name: 'Odin'
|
3807
|
+
category: 'Security Checker'
|
3808
|
+
url: 'https://docs.getodin.com/'
|
3809
|
+
producer:
|
3810
|
+
name: 'Cyble Inc.'
|
3811
|
+
url: 'https://cyble.com/'
|
3812
|
+
|
3813
|
+
- regex: 'YouBot'
|
3814
|
+
name: 'YouBot'
|
3815
|
+
category: 'Crawler'
|
3816
|
+
url: 'https://about.you.com/youbot/'
|
3817
|
+
producer:
|
3818
|
+
name: 'SuSea, Inc.'
|
3819
|
+
url: 'https://you.com/'
|
3820
|
+
|
3821
|
+
- regex: 'SiteScoreBot'
|
3822
|
+
name: 'SiteScore'
|
3823
|
+
category: 'Crawler'
|
3824
|
+
url: 'https://sitescore.ai/'
|
3825
|
+
|
3826
|
+
- regex: 'MBCrawler'
|
3827
|
+
name: 'Monitor Backlinks'
|
3828
|
+
category: 'Crawler'
|
3829
|
+
url: 'https://www.seoptimer.com/monitor-backlinks/'
|
3830
|
+
producer:
|
3831
|
+
name: 'SEOptimer'
|
3832
|
+
url: 'https://www.seoptimer.com/'
|
3833
|
+
|
3834
|
+
- regex: 'mariadb-mysql-kbs-bot'
|
3835
|
+
name: 'MariaDB/MySQL Knowledge Base'
|
3836
|
+
category: 'Crawler'
|
3837
|
+
url: 'https://github.com/williamdes/mariadb-mysql-kbs'
|
3838
|
+
producer:
|
3839
|
+
name: 'WDES SAS'
|
3840
|
+
url: 'https://wdes.fr/en/'
|
3841
|
+
|
3842
|
+
- regex: 'GitHubCopilotChat'
|
3843
|
+
name: 'GitHubCopilotChat'
|
3844
|
+
category: 'Crawler'
|
3845
|
+
url: 'https://github.com/aaamoon/copilot-gpt4-service'
|
3846
|
+
|
3847
|
+
- regex: '^pdrl\.fm'
|
3848
|
+
name: 'Podroll Analyzer'
|
3849
|
+
category: 'Crawler'
|
3850
|
+
url: 'https://podroll.fm'
|
3851
|
+
|
3852
|
+
- regex: 'PodUptime/'
|
3853
|
+
name: 'PodUptime'
|
3854
|
+
category: 'Site Monitor'
|
3855
|
+
url: 'https://poduptime.com'
|
3856
|
+
|
3857
|
+
- regex: 'anthropic-ai'
|
3858
|
+
name: 'Anthropic AI'
|
3859
|
+
category: 'Crawler'
|
3860
|
+
url: 'https://www.anthropic.com/'
|
3861
|
+
producer:
|
3862
|
+
name: 'Anthropic, PBC'
|
3863
|
+
url: 'https://www.anthropic.com/'
|
3864
|
+
|
3865
|
+
- regex: 'NetpeakCheckerBot/[\d.]+'
|
3866
|
+
name: 'Netpeak Checker'
|
3867
|
+
category: 'Crawler'
|
3868
|
+
url: 'https://netpeaksoftware.com/checker'
|
3869
|
+
producer:
|
3870
|
+
name: 'Netpeak LTD'
|
3871
|
+
url: 'https://netpeaksoftware.com/'
|
3872
|
+
|
3873
|
+
- regex: 'SandobaCrawler/[\d.]+'
|
3874
|
+
name: 'Sandoba//Crawler'
|
3875
|
+
category: 'Crawler'
|
3876
|
+
url: 'https://www.sandoba.com/en/crawler/'
|
3877
|
+
producer:
|
3878
|
+
name: 'SANDOBA//EBUSINESS SOLUTIONS'
|
3879
|
+
url: 'https://www.sandoba.com/'
|
3880
|
+
|
3881
|
+
- regex: 'SirdataBot'
|
3882
|
+
name: 'Sirdata'
|
3883
|
+
category: 'Crawler'
|
3884
|
+
url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction'
|
3885
|
+
producer:
|
3886
|
+
name: 'Sirdata SAS'
|
3887
|
+
url: 'https://www.sirdata.com/'
|
3888
|
+
|
3889
|
+
- regex: 'CheckMarkNetwork/[\d.]+'
|
3890
|
+
name: 'CheckMark Network'
|
3891
|
+
category: 'Crawler'
|
3892
|
+
url: 'https://www.checkmarknetwork.com/spider.html/'
|
3893
|
+
producer:
|
3894
|
+
name: 'Exipert, Inc.'
|
3895
|
+
url: 'https://www.checkmarknetwork.com/'
|
3896
|
+
|
3897
|
+
- regex: 'cohere-ai'
|
3898
|
+
name: 'Cohere AI'
|
3899
|
+
category: 'Crawler'
|
3900
|
+
url: 'https://cohere.com/'
|
3901
|
+
producer:
|
3902
|
+
name: 'Cohere, Inc.'
|
3903
|
+
url: 'https://cohere.com/'
|
3904
|
+
|
3905
|
+
- regex: 'PerplexityBot/[\d.]+'
|
3906
|
+
name: 'PerplexityBot'
|
3907
|
+
category: 'Crawler'
|
3908
|
+
url: 'https://docs.perplexity.ai/docs/perplexitybot'
|
3909
|
+
producer:
|
3910
|
+
name: 'Perplexity AI, Inc.'
|
3911
|
+
url: 'https://www.perplexity.ai/'
|
3912
|
+
|
3913
|
+
- regex: 'TTD-Content'
|
3914
|
+
name: 'The Trade Desk Content'
|
3915
|
+
category: 'Crawler'
|
3916
|
+
url: 'https://www.thetradedesk.com/us/ttd-content'
|
3917
|
+
producer:
|
3918
|
+
name: 'The Trade Desk, Inc.'
|
3919
|
+
url: 'https://www.thetradedesk.com/'
|
3920
|
+
|
3921
|
+
- regex: 'montastic-monitor'
|
3922
|
+
name: 'Montastic Monitor'
|
3923
|
+
category: 'Site Monitor'
|
3924
|
+
url: 'https://www.montastic.com/'
|
3925
|
+
producer:
|
3926
|
+
name: 'Metadot, Corp.'
|
3927
|
+
url: 'https://www.metadot.com/'
|
3928
|
+
|
3929
|
+
- regex: 'Ruby, Twurly v[\d.]+'
|
3930
|
+
name: 'Twurly'
|
3931
|
+
category: 'Crawler'
|
3932
|
+
url: 'https://twurly.org/'
|
3933
|
+
|
3934
|
+
- regex: 'Mixnode(?:(?:Cache)?/[\d.]+)?'
|
3935
|
+
name: 'Mixnode'
|
3936
|
+
category: 'Crawler'
|
3937
|
+
url: 'https://www.mixnode.com/'
|
3938
|
+
producer:
|
3939
|
+
name: 'Mixnode Technologies, Inc.'
|
3940
|
+
url: 'https://www.mixnode.com/'
|
3941
|
+
|
3942
|
+
- regex: 'CSSCheck/[\d.]+'
|
3943
|
+
name: 'CSSCheck'
|
3944
|
+
category: 'Validator'
|
3945
|
+
|
3946
|
+
- regex: 'MicrosoftPreview/[\d.]+'
|
3947
|
+
name: 'Microsoft Preview'
|
3948
|
+
category: 'Service Agent'
|
3949
|
+
url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0'
|
3950
|
+
producer:
|
3951
|
+
name: 'Microsoft Corporation'
|
3952
|
+
url: 'https://www.microsoft.com/'
|
3953
|
+
|
3954
|
+
- regex: 's~virustotalcloud'
|
3955
|
+
name: 'VirusTotal Cloud'
|
3956
|
+
category: 'Crawler'
|
3957
|
+
url: 'https://www.virustotal.com/'
|
3958
|
+
producer:
|
3959
|
+
name: 'Chronicle Security Ireland Limited'
|
3960
|
+
url: 'https://chronicle.security/'
|
3961
|
+
|
3962
|
+
- regex: 'TinEye/[\d.]+'
|
3963
|
+
name: 'TinEye'
|
3964
|
+
category: 'Crawler'
|
3965
|
+
url: 'https://tineye.com/'
|
3966
|
+
producer:
|
3967
|
+
name: 'Idée, Inc.'
|
3968
|
+
url: 'https://tineye.com/'
|
3969
|
+
|
3970
|
+
- regex: 'e~arsnova-filter-system'
|
3971
|
+
name: 'ARSNova Filter System'
|
3972
|
+
category: 'Crawler'
|
3973
|
+
url: 'https://particify.de/en/'
|
3974
|
+
producer:
|
3975
|
+
name: 'Particify Gerhardt & Weingarten OHG'
|
3976
|
+
url: 'https://particify.de/en/'
|
3977
|
+
|
3978
|
+
- regex: 'botify'
|
3979
|
+
name: 'Botify'
|
3980
|
+
category: 'Crawler'
|
3981
|
+
url: 'https://www.botify.com/'
|
3982
|
+
producer:
|
3983
|
+
name: 'BOTIFY SAS'
|
3984
|
+
url: 'https://www.botify.com/'
|
3985
|
+
|
3986
|
+
- regex: 'adscanner'
|
3987
|
+
name: 'Adscanner'
|
3988
|
+
category: 'Crawler'
|
3989
|
+
url: 'https://www.alleyesonscreens.com/'
|
3990
|
+
producer:
|
3991
|
+
name: 'AdScanner d.o.o'
|
3992
|
+
url: 'https://www.alleyesonscreens.com/'
|
3993
|
+
|
3994
|
+
- regex: 'online-webceo-bot/[\d.]+'
|
3995
|
+
name: 'WebCEO'
|
3996
|
+
category: 'Crawler'
|
3997
|
+
url: 'https://www.webceo.com/'
|
3998
|
+
producer:
|
3999
|
+
name: 'WebCEO, LLC'
|
4000
|
+
url: 'https://www.webceo.com/'
|
4001
|
+
|
4002
|
+
- regex: 'NetTrack'
|
4003
|
+
name: 'NetTrack'
|
4004
|
+
category: 'Crawler'
|
4005
|
+
url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/'
|
4006
|
+
|
4007
|
+
- regex: 'htmlyse'
|
4008
|
+
name: 'htmlyse'
|
4009
|
+
category: 'Crawler'
|
4010
|
+
url: 'https://www.htmlyse.com/'
|
4011
|
+
producer:
|
4012
|
+
name: 'Vistex LTD'
|
4013
|
+
url: 'https://www.htmlyse.com/'
|
4014
|
+
|
4015
|
+
- regex: 'TrendsmapResolver/[\d.]+'
|
4016
|
+
name: 'Trendsmap'
|
4017
|
+
category: 'Crawler'
|
4018
|
+
url: 'https://www.trendsmap.com/'
|
4019
|
+
producer:
|
4020
|
+
name: 'Trendsmap Pty Ltd'
|
4021
|
+
url: 'https://www.trendsmap.com/'
|
4022
|
+
|
4023
|
+
- regex: 'Shareaholic(?:bot)?/[\d.]+'
|
4024
|
+
name: 'Steve Bot'
|
4025
|
+
category: 'Crawler'
|
4026
|
+
url: 'https://www.shareaholic.com/steve'
|
4027
|
+
producer:
|
4028
|
+
name: 'Shareaholic, Inc.'
|
4029
|
+
url: 'https://www.shareaholic.com/'
|
4030
|
+
|
4031
|
+
- regex: 'keycdn-tools:'
|
4032
|
+
name: 'KeyCDN Tools'
|
4033
|
+
category: 'Service Agent'
|
4034
|
+
url: 'https://tools.keycdn.com/geo'
|
4035
|
+
|
4036
|
+
- regex: 'keycdn-tools/'
|
4037
|
+
name: 'KeyCDN Tools'
|
4038
|
+
category: 'Service Agent'
|
4039
|
+
url: 'https://tools.keycdn.com/'
|
4040
|
+
producer:
|
4041
|
+
name: 'proinity LLC'
|
4042
|
+
url: 'https://www.keycdn.com/'
|
4043
|
+
|
4044
|
+
- regex: 'Arquivo-web-crawler'
|
4045
|
+
name: 'Arquivo.pt'
|
4046
|
+
category: 'Crawler'
|
4047
|
+
url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/'
|
4048
|
+
producer:
|
4049
|
+
name: 'FCT|FCCN'
|
4050
|
+
url: 'https://www.fct.pt/'
|
4051
|
+
|
4052
|
+
- regex: 'WhatsMyIP\.org'
|
4053
|
+
name: 'WhatsMyIP.org'
|
4054
|
+
category: 'Service Agent'
|
4055
|
+
url: 'https://www.whatsmyip.org/ua/'
|
4056
|
+
|
4057
|
+
- regex: 'SenutoBot/[\d.]+'
|
4058
|
+
name: 'Senuto'
|
4059
|
+
category: 'Crawler'
|
4060
|
+
url: 'https://www.senuto.com/'
|
4061
|
+
producer:
|
4062
|
+
name: 'Senuto Sp. z o.o.'
|
4063
|
+
url: 'https://www.senuto.com/'
|
4064
|
+
|
4065
|
+
- regex: 'spaziodati'
|
4066
|
+
name: 'SpazioDati'
|
4067
|
+
category: 'Crawler'
|
4068
|
+
url: 'https://www.spaziodati.eu/'
|
4069
|
+
producer:
|
4070
|
+
name: 'SpazioDati s.r.l.'
|
4071
|
+
url: 'https://www.spaziodati.eu/'
|
4072
|
+
|
4073
|
+
- regex: 'GozleBot'
|
4074
|
+
name: 'Gozle'
|
4075
|
+
category: 'Crawler'
|
4076
|
+
url: 'https://gozle.com.tm/en/blog/post/1'
|
4077
|
+
producer:
|
4078
|
+
name: 'Doly Horjun HJ'
|
4079
|
+
url: 'https://gozle.com.tm/'
|
4080
|
+
|
4081
|
+
- regex: 'Quantcastbot/[\d.]+'
|
4082
|
+
name: 'Quantcast'
|
4083
|
+
category: 'Crawler'
|
4084
|
+
url: 'https://www.quantcast.com/bot/'
|
4085
|
+
producer:
|
4086
|
+
name: 'Quantcast Corp.'
|
4087
|
+
url: 'https://www.quantcast.com/'
|
4088
|
+
|
4089
|
+
- regex: 'FontRadar'
|
4090
|
+
name: 'FontRadar'
|
4091
|
+
category: 'Crawler'
|
4092
|
+
url: 'https://www.fontradar.com/'
|
4093
|
+
producer:
|
4094
|
+
name: 'EMDASH SAS'
|
4095
|
+
url: 'https://www.fontradar.com/'
|
4096
|
+
|
4097
|
+
- regex: 'ViberUrlDownloader'
|
4098
|
+
name: 'Viber Url Downloader'
|
4099
|
+
category: 'Service Agent'
|
4100
|
+
url: 'https://www.viber.com/'
|
4101
|
+
producer:
|
4102
|
+
name: 'Viber Media S.à r.l.'
|
4103
|
+
url: 'https://www.viber.com/'
|
4104
|
+
|
4105
|
+
- regex: '^Zeno$'
|
4106
|
+
name: 'Zeno'
|
4107
|
+
category: 'Crawler'
|
4108
|
+
url: 'https://github.com/internetarchive/Zeno'
|
4109
|
+
producer:
|
4110
|
+
name: 'The Internet Archive'
|
4111
|
+
url: 'https://archive.org/'
|
4112
|
+
|
4113
|
+
- regex: 'Barracuda Sentinel'
|
4114
|
+
name: 'Barracuda Sentinel'
|
4115
|
+
category: 'Service Agent'
|
4116
|
+
url: 'https://sentinel.barracudanetworks.com/'
|
4117
|
+
producer:
|
4118
|
+
name: 'Barracuda Networks, Inc.'
|
4119
|
+
url: 'https://www.barracudanetworks.com/'
|
4120
|
+
|
4121
|
+
- regex: 'RuxitSynthetic/[\d.]+'
|
4122
|
+
name: 'RuxitSynthetic'
|
4123
|
+
category: 'Site Monitor'
|
4124
|
+
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
|
4125
|
+
producer:
|
4126
|
+
name: 'Dynatrace LLC'
|
4127
|
+
url: 'https://www.dynatrace.com/'
|
4128
|
+
|
4129
|
+
- regex: 'DynatraceSynthetic/[\d.]+'
|
4130
|
+
name: 'DynatraceSynthetic'
|
4131
|
+
category: 'Site Monitor'
|
4132
|
+
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
|
4133
|
+
producer:
|
4134
|
+
name: 'Dynatrace LLC'
|
4135
|
+
url: 'https://www.dynatrace.com/'
|
4136
|
+
|
4137
|
+
- regex: 'sitebulb'
|
4138
|
+
name: 'Sitebulb'
|
4139
|
+
category: 'Crawler'
|
4140
|
+
url: 'https://sitebulb.com/'
|
4141
|
+
producer:
|
4142
|
+
name: 'Sitebulb Limited'
|
4143
|
+
url: 'https://sitebulb.com/'
|
4144
|
+
|
4145
|
+
- regex: 'Monsidobot/[\d.]+'
|
4146
|
+
name: 'Monsidobot'
|
4147
|
+
category: 'Crawler'
|
4148
|
+
url: 'https://monsido.com/bot-html'
|
4149
|
+
producer:
|
4150
|
+
name: 'Monsido LLC'
|
4151
|
+
url: 'https://monsido.com/'
|
4152
|
+
|
4153
|
+
- regex: 'AccompanyBot'
|
4154
|
+
name: 'AccompanyBot'
|
4155
|
+
category: 'Crawler'
|
4156
|
+
url: 'https://www.accompany.com/'
|
4157
|
+
producer:
|
4158
|
+
name: 'Accompani, Inc'
|
4159
|
+
url: 'https://www.accompany.com/'
|
4160
|
+
|
4161
|
+
- regex: 'Ghost Inspector'
|
4162
|
+
name: 'Ghost Inspector'
|
4163
|
+
category: 'Site Monitor'
|
4164
|
+
url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site'
|
4165
|
+
producer:
|
4166
|
+
name: 'Ghost Inspector, Inc.'
|
4167
|
+
url: 'https://www.ghostinspector.com/'
|
4168
|
+
|
4169
|
+
- regex: 'Cypress/[\d.]+'
|
4170
|
+
name: 'Cypress'
|
4171
|
+
category: 'Site Monitor'
|
4172
|
+
url: 'https://github.com/cypress-io/cypress'
|
4173
|
+
producer:
|
4174
|
+
name: 'Cypress.io, Inc.'
|
4175
|
+
url: 'https://www.cypress.io/'
|
4176
|
+
|
4177
|
+
- regex: 'Google-Apps-Script'
|
4178
|
+
name: 'Google Apps Script'
|
4179
|
+
category: 'Service Agent'
|
4180
|
+
url: 'https://www.google.com/script/start/'
|
4181
|
+
|
4182
|
+
- regex: 'SiteOne-Crawler/[\d.]+'
|
4183
|
+
name: 'SiteOne Crawler'
|
4184
|
+
category: 'Crawler'
|
4185
|
+
url: 'https://crawler.siteone.io/bot/'
|
4186
|
+
producer:
|
4187
|
+
name: 'SiteOne s.r.o.'
|
4188
|
+
url: 'https://www.siteone.io/'
|
4189
|
+
|
4190
|
+
- regex: 'Detectify'
|
4191
|
+
name: 'Detectify'
|
4192
|
+
category: 'Security Checker'
|
4193
|
+
url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site'
|
4194
|
+
producer:
|
4195
|
+
name: 'Detectify AB'
|
4196
|
+
url: 'https://detectify.com/'
|
4197
|
+
|
4198
|
+
- regex: 'DomCopBot'
|
4199
|
+
name: 'DomCop Bot'
|
4200
|
+
category: 'Crawler'
|
4201
|
+
url: 'https://www.domcop.com/bot'
|
4202
|
+
producer:
|
4203
|
+
name: 'Axeman Technology Solutions LLP'
|
4204
|
+
url: 'https://axemantech.com/'
|
4205
|
+
|
4206
|
+
- regex: 'Paqlebot/[\d.]+'
|
4207
|
+
name: 'Paqlebot'
|
4208
|
+
category: 'Crawler'
|
4209
|
+
url: 'https://www.paqle.dk/about/paqlebot'
|
4210
|
+
producer:
|
4211
|
+
name: 'Paqle A/S'
|
4212
|
+
url: 'https://www.paqle.dk/'
|
4213
|
+
|
4214
|
+
- regex: 'Wibybot'
|
4215
|
+
name: 'Wibybot'
|
4216
|
+
category: 'Crawler'
|
4217
|
+
url: 'https://www.wiby.me/'
|
4218
|
+
|
4219
|
+
- regex: 'Synapse'
|
4220
|
+
name: 'Synapse'
|
4221
|
+
category: 'Crawler'
|
4222
|
+
url: 'https://github.com/matrix-org/synapse'
|
4223
|
+
|
4224
|
+
- regex: 'OSZKbot/[\d.]+'
|
4225
|
+
name: 'OSZKbot'
|
4226
|
+
category: 'Crawler'
|
4227
|
+
url: 'http://mekosztaly.oszk.hu/mia/'
|
4228
|
+
producer:
|
4229
|
+
name: 'National Szechenyi Library'
|
4230
|
+
url: 'https://webarchivum.oszk.hu/'
|
4231
|
+
|
4232
|
+
- regex: 'ZoomBot'
|
4233
|
+
name: 'ZoomBot'
|
4234
|
+
category: 'Crawler'
|
4235
|
+
url: 'https://suite.seozoom.it/bot.html'
|
4236
|
+
producer:
|
4237
|
+
name: 'SEO Cube S.r.l.'
|
4238
|
+
url: 'https://www.seocube.it/'
|
4239
|
+
|
4240
|
+
- regex: 'RavenCrawler/[\d.]+'
|
4241
|
+
name: 'RavenCrawler'
|
4242
|
+
category: 'Crawler'
|
4243
|
+
url: 'https://raventools.com/site-auditor/'
|
4244
|
+
producer:
|
4245
|
+
name: 'TapClicks, Inc.'
|
4246
|
+
url: 'https://www.tapclicks.com/'
|
4247
|
+
|
4248
|
+
- regex: 'KadoBot'
|
4249
|
+
name: 'KadoBot'
|
4250
|
+
category: 'Crawler'
|
4251
|
+
url: 'https://www.kadolijst.nl/bot'
|
4252
|
+
producer:
|
4253
|
+
name: 'Kadolijst'
|
4254
|
+
url: 'https://www.kadolijst.nl/'
|
4255
|
+
|
4256
|
+
- regex: 'Dubbotbot/[\d.]+'
|
4257
|
+
name: 'Dubbotbot'
|
4258
|
+
category: 'Crawler'
|
4259
|
+
url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent'
|
4260
|
+
producer:
|
4261
|
+
name: 'DubBot'
|
4262
|
+
url: 'https://dubbot.com/'
|
4263
|
+
|
4264
|
+
- regex: 'Swiftbot/[\d.]+'
|
4265
|
+
name: 'Swiftbot'
|
4266
|
+
category: 'Crawler'
|
4267
|
+
url: 'https://swiftype.com/swiftbot'
|
4268
|
+
producer:
|
4269
|
+
name: 'Elasticsearch, B.V.'
|
4270
|
+
url: 'https://www.elastic.co/'
|
4271
|
+
|
4272
|
+
- regex: 'EyeMonIT'
|
4273
|
+
name: 'EyeMonit'
|
4274
|
+
category: 'Site Monitor'
|
4275
|
+
url: 'https://eyemonit.com/'
|
4276
|
+
producer:
|
4277
|
+
name: 'EyeMonit'
|
4278
|
+
url: 'https://eyemonit.com/'
|
4279
|
+
|
4280
|
+
- regex: 'ThousandEyes'
|
4281
|
+
name: 'ThousandEyes'
|
4282
|
+
category: 'Site Monitor'
|
4283
|
+
url: 'https://www.thousandeyes.com/'
|
4284
|
+
producer:
|
4285
|
+
name: 'Cisco Systems, Inc.'
|
4286
|
+
url: 'https://www.cisco.com/'
|
4287
|
+
|
4288
|
+
- regex: 'OmtrBot/[\d.]+'
|
4289
|
+
name: 'OmtrBot'
|
4290
|
+
category: 'Site Monitor'
|
4291
|
+
|
4292
|
+
- regex: 'WebMon/[\d.]+'
|
4293
|
+
name: 'WebMon'
|
4294
|
+
category: 'Site Monitor'
|
4295
|
+
|
4296
|
+
- regex: 'AdsTxtCrawlerTP/[\d.]+'
|
4297
|
+
name: 'AdsTxtCrawlerTP'
|
4298
|
+
category: 'Crawler'
|
4299
|
+
|
4300
|
+
- regex: 'fragFINN'
|
4301
|
+
name: 'fragFINN'
|
4302
|
+
category: 'Crawler'
|
4303
|
+
url: 'https://www.fragfinn.de/'
|
4304
|
+
producer:
|
4305
|
+
name: 'fragFINN e.V.'
|
4306
|
+
url: 'https://www.fragfinn.de/'
|
4307
|
+
|
4308
|
+
- regex: 'Clickagy'
|
4309
|
+
name: 'Clickagy'
|
4310
|
+
category: 'Crawler'
|
4311
|
+
url: 'https://www.clickagy.com/'
|
4312
|
+
producer:
|
4313
|
+
name: 'Clickagy, LLC'
|
4314
|
+
url: 'https://www.clickagy.com/'
|
4315
|
+
|
4316
|
+
- regex: 'kiwitcms-gitops/[\d.]+'
|
4317
|
+
name: 'Kiwi TCMS GitOps'
|
4318
|
+
category: 'Service Agent'
|
4319
|
+
url: 'https://kiwitcms.org'
|
4320
|
+
producer:
|
4321
|
+
name: 'Open Technologies Bulgaria, Ltd.'
|
4322
|
+
url: 'https://kiwitcms.org'
|
4323
|
+
|
4324
|
+
- regex: 'webtru_crawler'
|
4325
|
+
name: 'webtru'
|
4326
|
+
category: 'Crawler'
|
4327
|
+
url: 'https://webtru.io/'
|
4328
|
+
producer:
|
4329
|
+
name: 'DataSign Inc.'
|
4330
|
+
url: 'https://datasign.jp/'
|
4331
|
+
|
4332
|
+
- regex: 'URLSuMaBot'
|
4333
|
+
name: 'URLSuMaBot'
|
4334
|
+
category: 'Crawler'
|
4335
|
+
url: 'https://www.urlsuma.de/'
|
4336
|
+
|
4337
|
+
- regex: '360JK yunjiankong'
|
4338
|
+
name: '360JK'
|
4339
|
+
category: 'Site Monitor'
|
4340
|
+
url: 'http://jk.cloud.360.cn/'
|
4341
|
+
producer:
|
4342
|
+
name: '360 Security Technology Inc.'
|
4343
|
+
url: 'https://www.360.cn/'
|
4344
|
+
|
4345
|
+
- regex: 'UCSBNetworkMeasurement'
|
4346
|
+
name: 'UCSB Network Measurement'
|
4347
|
+
category: 'Crawler'
|
4348
|
+
url: 'https://www.it.ucsb.edu/'
|
4349
|
+
producer:
|
4350
|
+
name: 'University of California, Santa Barbara'
|
4351
|
+
url: 'https://www.it.ucsb.edu/'
|
4352
|
+
|
4353
|
+
- regex: 'Plesk screenshot bot'
|
4354
|
+
name: 'Plesk Screenshot Service'
|
4355
|
+
category: 'Service Agent'
|
4356
|
+
url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service'
|
4357
|
+
producer:
|
4358
|
+
name: 'Plesk International GmbH'
|
4359
|
+
url: 'https://www.plesk.com/'
|
4360
|
+
|
4361
|
+
- regex: 'Who\.is'
|
4362
|
+
name: 'Who.is Bot'
|
4363
|
+
category: 'Crawler'
|
4364
|
+
url: 'https://who.is/'
|
4365
|
+
|
4366
|
+
- regex: 'Probely'
|
4367
|
+
name: 'Probely'
|
4368
|
+
category: 'Security Checker'
|
4369
|
+
url: 'https://probely.com/sos/'
|
4370
|
+
producer:
|
4371
|
+
name: 'Probely - Soluções de Cibersegurança, S.A.'
|
4372
|
+
url: 'https://probely.com/'
|
4373
|
+
|
4374
|
+
- regex: 'Uptimia(?:/[\d.]+)?'
|
4375
|
+
name: 'Uptimia'
|
4376
|
+
category: 'Site Monitor'
|
4377
|
+
url: 'https://www.uptimia.com/'
|
4378
|
+
producer:
|
4379
|
+
name: 'JJ Online GmbH'
|
4380
|
+
url: 'https://www.uptimia.com/'
|
4381
|
+
|
4382
|
+
- regex: '2GDPR/[\d.]+'
|
4383
|
+
name: '2GDPR'
|
4384
|
+
category: 'Service Agent'
|
4385
|
+
url: 'https://2gdpr.com/tos'
|
4386
|
+
producer:
|
4387
|
+
name: '2GDPR'
|
4388
|
+
url: 'https://2gdpr.com/'
|
4389
|
+
|
4390
|
+
- regex: 'abuse\.xmco\.fr'
|
4391
|
+
name: 'Serenety'
|
4392
|
+
category: 'Security Checker'
|
4393
|
+
url: 'https://abuse.xmco.fr/'
|
4394
|
+
producer:
|
4395
|
+
name: 'XMCO, SASU'
|
4396
|
+
url: 'https://www.xmco.fr/'
|
4397
|
+
|
4398
|
+
- regex: 'CheckHost'
|
4399
|
+
name: 'CheckHost'
|
4400
|
+
category: 'Site Monitor'
|
4401
|
+
url: 'https://check-host.net/'
|
4402
|
+
producer:
|
4403
|
+
name: 'CheckHost'
|
4404
|
+
url: 'https://check-host.net/'
|
4405
|
+
|
4406
|
+
- regex: 'LAC_IAHarvester/[\d.]+'
|
4407
|
+
name: 'LAC IA Harvester'
|
4408
|
+
category: 'Crawler'
|
4409
|
+
url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx'
|
4410
|
+
producer:
|
4411
|
+
name: 'Library and Archives Canada'
|
4412
|
+
url: 'https://library-archives.canada.ca/'
|
4413
|
+
|
4414
|
+
- regex: 'InsytfulBot/[\d.]+'
|
4415
|
+
name: 'InsytfulBot'
|
4416
|
+
category: 'Crawler'
|
4417
|
+
url: 'https://www.insytful.com/'
|
4418
|
+
producer:
|
4419
|
+
name: 'Zengenti Limited'
|
4420
|
+
url: 'https://www.zengenti.com/'
|
4421
|
+
|
4422
|
+
- regex: 'statista\.com'
|
4423
|
+
name: 'Statista'
|
4424
|
+
category: 'Crawler'
|
4425
|
+
url: 'https://www.statista.com/'
|
4426
|
+
producer:
|
4427
|
+
name: 'Statista, Inc.'
|
4428
|
+
url: 'https://www.statista.com/'
|
4429
|
+
|
4430
|
+
- regex: 'SubstackContentFetch/[\d.]+'
|
4431
|
+
name: 'Substack Content Fetch'
|
4432
|
+
category: 'Crawler'
|
4433
|
+
url: 'https://substack.com/'
|
4434
|
+
producer:
|
4435
|
+
name: 'Substack, Inc.'
|
4436
|
+
url: 'https://substack.com/'
|
4437
|
+
|
4438
|
+
- regex: '^ds9'
|
4439
|
+
name: 'Deep SEARCH 9'
|
4440
|
+
category: 'Crawler'
|
4441
|
+
url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/'
|
4442
|
+
producer:
|
4443
|
+
name: 'Copyright Clearance Center, Inc.'
|
4444
|
+
url: 'https://www.copyright.com/'
|
4445
|
+
|
4446
|
+
- regex: 'LiveJournal\.com'
|
4447
|
+
name: 'LiveJournal'
|
4448
|
+
url: 'https://www.livejournal.com/'
|
4449
|
+
category: 'Feed Fetcher'
|
4450
|
+
producer:
|
4451
|
+
name: 'ООО "СИМ"'
|
4452
|
+
url: 'https://www.livejournal.com/'
|
4453
|
+
|
4454
|
+
- regex: 'bitdiscovery'
|
4455
|
+
name: 'Tenable.asm'
|
4456
|
+
category: 'Security Checker'
|
4457
|
+
url: 'https://bitdiscovery.com/'
|
4458
|
+
producer:
|
4459
|
+
name: 'Tenable, Inc.'
|
4460
|
+
url: 'https://www.tenable.com/'
|
4461
|
+
|
4462
|
+
- regex: 'Castopod/[\d.]+'
|
4463
|
+
name: 'Castopod'
|
4464
|
+
category: 'Crawler'
|
4465
|
+
url: 'https://www.castopod.org/'
|
4466
|
+
|
4467
|
+
- regex: 'Elastic/Synthetics'
|
4468
|
+
name: 'Elastic Synthetics'
|
4469
|
+
category: 'Site Monitor'
|
4470
|
+
url: 'https://github.com/elastic/synthetics'
|
4471
|
+
producer:
|
4472
|
+
name: 'Elasticsearch B.V.'
|
4473
|
+
url: 'https://www.elastic.co/'
|
4474
|
+
|
4475
|
+
- regex: 'WDG_Validator/[\d.]+'
|
4476
|
+
name: 'WDG HTML Validator'
|
4477
|
+
category: 'Validator'
|
4478
|
+
url: 'http://www.htmlhelp.com/tools/validator/'
|
4479
|
+
|
4480
|
+
- regex: 'scan@aegis.network'
|
4481
|
+
name: 'Aegis'
|
4482
|
+
category: 'Crawler'
|
4483
|
+
url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/'
|
4484
|
+
|
4485
|
+
- regex: 'CrawlyProjectCrawler/[\d.]+'
|
4486
|
+
name: 'Crawly Project'
|
4487
|
+
category: 'Crawler'
|
4488
|
+
url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/'
|
4489
|
+
|
4490
|
+
- regex: 'BDFetch'
|
4491
|
+
name: 'BDFetch'
|
4492
|
+
category: 'Crawler'
|
4493
|
+
url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/'
|
4494
|
+
|
4495
|
+
- regex: 'PunkMap'
|
4496
|
+
name: 'Punk Map'
|
4497
|
+
category: 'Security Checker'
|
4498
|
+
url: 'https://github.com/openeasm/punkmap'
|
4499
|
+
|
4500
|
+
- regex: 'GenomeCrawlerd/[\d.]+'
|
4501
|
+
name: 'Deepfield Genome'
|
4502
|
+
category: 'Crawler'
|
4503
|
+
url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/'
|
4504
|
+
producer:
|
4505
|
+
name: 'Nokia Corporation'
|
4506
|
+
url: 'https://www.nokia.com/'
|
4507
|
+
|
4508
|
+
- regex: 'Gaisbot/[\d.]+'
|
4509
|
+
name: 'Gaisbot'
|
4510
|
+
category: 'Crawler'
|
4511
|
+
url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php'
|
4512
|
+
|
4513
|
+
- regex: 'FAST-WebCrawler/[\d.]+'
|
4514
|
+
name: 'AlltheWeb'
|
4515
|
+
category: 'Crawler'
|
4516
|
+
url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler'
|
4517
|
+
|
4518
|
+
- regex: 'ducks\.party'
|
4519
|
+
name: 'ducks.party'
|
4520
|
+
category: 'Security Checker'
|
4521
|
+
url: 'https://ducks.party/'
|
4522
|
+
|
4523
|
+
- regex: 'DepSpid/[\d.]+'
|
4524
|
+
name: 'DepSpid'
|
4525
|
+
category: 'Crawler'
|
4526
|
+
url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/'
|
4527
|
+
|
4528
|
+
- regex: 'Website-info\.net'
|
4529
|
+
name: 'Website-info'
|
4530
|
+
category: 'Crawler'
|
4531
|
+
url: 'https://website-info.net/robot'
|
4532
|
+
producer:
|
4533
|
+
name: 'Meins und Vogel GmbH'
|
4534
|
+
url: 'https://muv.com/'
|
4535
|
+
|
4536
|
+
# Generic bots
|
4537
|
+
- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|url|Zeus|ZmEu)$'
|
4538
|
+
name: 'Generic Bot'
|
4539
|
+
|
3511
4540
|
# Generic detections
|
3512
|
-
- regex: '[a-z0-
|
4541
|
+
- regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)'
|
3513
4542
|
name: 'Generic Bot'
|