device_detector 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -1
- data/README.md +19 -5
- data/lib/device_detector/browser.rb +141 -7
- data/lib/device_detector/client_hint.rb +100 -32
- data/lib/device_detector/device.rb +188 -1
- data/lib/device_detector/os.rb +109 -7
- data/lib/device_detector/parser.rb +6 -5
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector.rb +56 -23
- data/regexes/bots.yml +1242 -213
- data/regexes/client/browser_engine.yml +11 -2
- data/regexes/client/browsers.yml +543 -102
- data/regexes/client/feed_readers.yml +1 -1
- data/regexes/client/hints/apps.yml +29 -3
- data/regexes/client/hints/browsers.yml +87 -5
- data/regexes/client/libraries.yml +107 -1
- data/regexes/client/mediaplayers.yml +15 -1
- data/regexes/client/mobile_apps.yml +413 -117
- data/regexes/client/pim.yml +36 -2
- data/regexes/device/car_browsers.yml +16 -0
- data/regexes/device/consoles.yml +18 -5
- data/regexes/device/mobiles.yml +4180 -1210
- data/regexes/device/notebooks.yml +14 -1
- data/regexes/device/portable_media_player.yml +7 -1
- data/regexes/device/shell_tv.yml +12 -0
- data/regexes/device/televisions.yml +409 -47
- data/regexes/oss.yml +661 -238
- metadata +3 -3
data/regexes/bots.yml
CHANGED
@@ -5,6 +5,11 @@
|
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
8
|
+
- regex: 'WireReaderBot(?:/([\d+.]+))?'
|
9
|
+
name: 'WireReaderBot'
|
10
|
+
category: 'Feed Fetcher'
|
11
|
+
url: 'https://wirereader.app/'
|
12
|
+
|
8
13
|
- regex: 'monitoring360bot'
|
9
14
|
name: '360 Monitoring'
|
10
15
|
category: 'Site Monitor'
|
@@ -61,7 +66,7 @@
|
|
61
66
|
name: 'Ahrefs Pte Ltd'
|
62
67
|
url: 'https://ahrefs.com/robot'
|
63
68
|
|
64
|
-
- regex: 'AhrefsSiteAudit/
|
69
|
+
- regex: 'AhrefsSiteAudit/[\d.]+'
|
65
70
|
name: 'AhrefsSiteAudit'
|
66
71
|
category: 'Site Monitor'
|
67
72
|
url: 'https://ahrefs.com/robot/site-audit'
|
@@ -85,7 +90,7 @@
|
|
85
90
|
name: 'Alexa Internet'
|
86
91
|
url: 'https://www.alexa.com'
|
87
92
|
|
88
|
-
- regex: 'Amazonbot'
|
93
|
+
- regex: 'Amazonbot/[\d.]+'
|
89
94
|
name: 'Amazon Bot'
|
90
95
|
category: 'Crawler'
|
91
96
|
url: 'https://developer.amazon.com/support/amazonbot'
|
@@ -93,6 +98,14 @@
|
|
93
98
|
name: 'Amazon.com, Inc.'
|
94
99
|
url: 'https://www.amazon.com/'
|
95
100
|
|
101
|
+
- regex: 'AmazonAdBot/[\d.]+'
|
102
|
+
name: 'Amazon AdBot'
|
103
|
+
category: 'Crawler'
|
104
|
+
url: 'https://adbot.amazon.com/'
|
105
|
+
producer:
|
106
|
+
name: 'Amazon.com, Inc.'
|
107
|
+
url: 'https://www.amazon.com/'
|
108
|
+
|
96
109
|
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
97
110
|
name: 'Amazon Route53 Health Check'
|
98
111
|
category: 'Service Agent'
|
@@ -119,10 +132,18 @@
|
|
119
132
|
- regex: 'Applebot'
|
120
133
|
name: 'Applebot'
|
121
134
|
category: 'Crawler'
|
122
|
-
url: 'https://support.apple.com/en-us/
|
135
|
+
url: 'https://support.apple.com/en-us/119829'
|
123
136
|
producer:
|
124
137
|
name: 'Apple Inc'
|
125
|
-
url: 'https://www.apple.com'
|
138
|
+
url: 'https://www.apple.com/'
|
139
|
+
|
140
|
+
- regex: 'iTMS'
|
141
|
+
name: 'iTMS'
|
142
|
+
category: 'Crawler'
|
143
|
+
url: 'https://support.apple.com/en-us/119829'
|
144
|
+
producer:
|
145
|
+
name: 'Apple Inc'
|
146
|
+
url: 'https://www.apple.com/'
|
126
147
|
|
127
148
|
- regex: 'AppSignalBot'
|
128
149
|
name: 'AppSignalBot'
|
@@ -220,7 +241,7 @@
|
|
220
241
|
name: 'Better Uptime'
|
221
242
|
url: 'https://betteruptime.com/'
|
222
243
|
|
223
|
-
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
244
|
+
- regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
224
245
|
name: 'BingBot'
|
225
246
|
category: 'Search bot'
|
226
247
|
url: 'http://search.msn.com/msnbot.htmn'
|
@@ -371,7 +392,23 @@
|
|
371
392
|
name: 'CloudFlare'
|
372
393
|
url: 'https://www.cloudflare.com/'
|
373
394
|
|
374
|
-
- regex: '
|
395
|
+
- regex: 'Cloudflare-Smart-Transit'
|
396
|
+
name: 'Cloudflare Smart Transit'
|
397
|
+
category: 'Site Monitor'
|
398
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
399
|
+
producer:
|
400
|
+
name: 'CloudFlare'
|
401
|
+
url: 'https://www.cloudflare.com/'
|
402
|
+
|
403
|
+
- regex: 'CloudflareObservatory'
|
404
|
+
name: 'Cloudflare Observatory'
|
405
|
+
category: 'Site Monitor'
|
406
|
+
url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test'
|
407
|
+
producer:
|
408
|
+
name: 'CloudFlare'
|
409
|
+
url: 'https://www.cloudflare.com/'
|
410
|
+
|
411
|
+
- regex: 'https://developers\.cloudflare\.com/security-center/'
|
375
412
|
name: 'Cloudflare Security Insights'
|
376
413
|
category: 'Site Monitor'
|
377
414
|
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
@@ -379,7 +416,7 @@
|
|
379
416
|
name: 'CloudFlare'
|
380
417
|
url: 'https://www.cloudflare.com/'
|
381
418
|
|
382
|
-
- regex: 'coccoc
|
419
|
+
- regex: 'coccoc\.com'
|
383
420
|
name: 'Cốc Cốc Bot'
|
384
421
|
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
385
422
|
category: 'Search bot'
|
@@ -435,7 +472,7 @@
|
|
435
472
|
name: 'Dataprovider B.V.'
|
436
473
|
url: 'https://www.dataprovider.com/'
|
437
474
|
|
438
|
-
- regex: 'Daum(
|
475
|
+
- regex: 'Daum(?!(?:Apps|Device))'
|
439
476
|
name: 'Daum'
|
440
477
|
category: 'Search bot'
|
441
478
|
url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
|
@@ -459,7 +496,7 @@
|
|
459
496
|
name: 'Discovery Engine'
|
460
497
|
url: 'http://discoveryengine.com'
|
461
498
|
|
462
|
-
- regex: 'Domain Re-Animator Bot|support@domainreanimator
|
499
|
+
- regex: 'Domain Re-Animator Bot|support@domainreanimator\.com'
|
463
500
|
name: 'Domain Re-Animator Bot'
|
464
501
|
category: 'Crawler'
|
465
502
|
url: ''
|
@@ -538,13 +575,21 @@
|
|
538
575
|
name: 'SEOmoz, Inc.'
|
539
576
|
url: 'http://moz.com/'
|
540
577
|
|
541
|
-
- regex: '
|
542
|
-
name: 'Facebook
|
578
|
+
- regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)'
|
579
|
+
name: 'Facebook Crawler'
|
543
580
|
category: 'Social Media Agent'
|
544
|
-
url: 'https://
|
581
|
+
url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/'
|
545
582
|
producer:
|
546
|
-
name: '
|
547
|
-
url: '
|
583
|
+
name: 'Meta Platforms, Inc.'
|
584
|
+
url: 'https://www.meta.com/'
|
585
|
+
|
586
|
+
- regex: 'FacebookBot/[\d.]+'
|
587
|
+
name: 'FacebookBot'
|
588
|
+
category: 'Crawler'
|
589
|
+
url: 'https://developers.facebook.com/docs/sharing/bot'
|
590
|
+
producer:
|
591
|
+
name: 'Meta Platforms, Inc.'
|
592
|
+
url: 'https://www.meta.com/'
|
548
593
|
|
549
594
|
- regex: 'Feedbin'
|
550
595
|
name: 'Feedbin'
|
@@ -662,7 +707,7 @@
|
|
662
707
|
url: 'https://search.google.com/search-console/about'
|
663
708
|
producer:
|
664
709
|
name: 'Google Inc.'
|
665
|
-
url: '
|
710
|
+
url: 'https://www.google.com/'
|
666
711
|
|
667
712
|
- regex: 'Google Page Speed Insights'
|
668
713
|
name: 'Google PageSpeed Insights'
|
@@ -670,7 +715,7 @@
|
|
670
715
|
url: 'http://developers.google.com/speed/pagespeed/insights/'
|
671
716
|
producer:
|
672
717
|
name: 'Google Inc.'
|
673
|
-
url: '
|
718
|
+
url: 'https://www.google.com/'
|
674
719
|
|
675
720
|
- regex: 'google_partner_monitoring'
|
676
721
|
name: 'Google Partner Monitoring'
|
@@ -678,7 +723,7 @@
|
|
678
723
|
url: ''
|
679
724
|
producer:
|
680
725
|
name: 'Google Inc.'
|
681
|
-
url: '
|
726
|
+
url: 'https://www.google.com/'
|
682
727
|
|
683
728
|
- regex: 'Google-Cloud-Scheduler'
|
684
729
|
name: 'Google Cloud Scheduler'
|
@@ -694,7 +739,7 @@
|
|
694
739
|
url: 'https://search.google.com/structured-data/testing-tool'
|
695
740
|
producer:
|
696
741
|
name: 'Google Inc.'
|
697
|
-
url: '
|
742
|
+
url: 'https://www.google.com/'
|
698
743
|
|
699
744
|
- regex: 'GoogleStackdriverMonitoring'
|
700
745
|
name: 'Google Stackdriver Monitoring'
|
@@ -704,13 +749,21 @@
|
|
704
749
|
name: 'Google Inc.'
|
705
750
|
url: 'https://www.google.com'
|
706
751
|
|
752
|
+
- regex: 'Google-Transparency-Report'
|
753
|
+
name: 'Google Transparency Report'
|
754
|
+
category: 'Site Monitor'
|
755
|
+
url: 'https://transparencyreport.google.com/'
|
756
|
+
producer:
|
757
|
+
name: 'Google Inc.'
|
758
|
+
url: 'https://www.google.com/'
|
759
|
+
|
707
760
|
- regex: 'via ggpht\.com GoogleImageProxy'
|
708
761
|
name: 'Gmail Image Proxy'
|
709
762
|
category: 'Crawler'
|
710
763
|
url: ''
|
711
764
|
producer:
|
712
765
|
name: 'Google Inc.'
|
713
|
-
url: '
|
766
|
+
url: 'https://www.google.com/'
|
714
767
|
|
715
768
|
- regex: 'SeznamEmailProxy'
|
716
769
|
name: 'Seznam Email Proxy'
|
@@ -744,21 +797,37 @@
|
|
744
797
|
name: 'Visual Meta'
|
745
798
|
url: 'https://www.shopalike.cz/'
|
746
799
|
|
747
|
-
- regex: '
|
800
|
+
- regex: 'Googlebot-News'
|
801
|
+
name: 'Googlebot News'
|
802
|
+
category: 'Search bot'
|
803
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
804
|
+
producer:
|
805
|
+
name: 'Google Inc.'
|
806
|
+
url: 'https://www.google.com/'
|
807
|
+
|
808
|
+
- regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet'
|
748
809
|
name: 'Googlebot'
|
749
810
|
category: 'Search bot'
|
750
|
-
url: '
|
811
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
751
812
|
producer:
|
752
813
|
name: 'Google Inc.'
|
753
|
-
url: '
|
814
|
+
url: 'https://www.google.com/'
|
754
815
|
|
755
816
|
- regex: '^Google$'
|
756
817
|
name: 'Googlebot'
|
757
818
|
category: 'Search bot'
|
758
|
-
url: '
|
819
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
759
820
|
producer:
|
760
821
|
name: 'Google Inc.'
|
761
|
-
url: '
|
822
|
+
url: 'https://www.google.com/'
|
823
|
+
|
824
|
+
- regex: 'Google-Area120-PrivacyPolicyFetcher'
|
825
|
+
name: 'Google Area 120 Privacy Policy Fetcher'
|
826
|
+
category: 'Crawler'
|
827
|
+
url: 'https://area120.google.com/'
|
828
|
+
producer:
|
829
|
+
name: 'Google Inc.'
|
830
|
+
url: 'https://www.google.com/'
|
762
831
|
|
763
832
|
- regex: 'heritrix'
|
764
833
|
name: 'Heritrix'
|
@@ -780,7 +849,7 @@
|
|
780
849
|
category: 'Crawler'
|
781
850
|
url: 'http://vuhuv.com/bot.html'
|
782
851
|
|
783
|
-
- regex: 'HTTPMon'
|
852
|
+
- regex: 'HTTPMon/[\d.]+'
|
784
853
|
name: 'HTTPMon'
|
785
854
|
category: 'Site Monitor'
|
786
855
|
url: 'http://www.httpmon.com'
|
@@ -796,7 +865,7 @@
|
|
796
865
|
name: ''
|
797
866
|
url: ''
|
798
867
|
|
799
|
-
- regex: 'inoreader
|
868
|
+
- regex: 'inoreader\.com'
|
800
869
|
name: 'inoreader'
|
801
870
|
category: 'Feed Reader'
|
802
871
|
url: 'https://www.inoreader.com'
|
@@ -844,7 +913,7 @@
|
|
844
913
|
name: ''
|
845
914
|
url: ''
|
846
915
|
|
847
|
-
- regex: '
|
916
|
+
- regex: '[A-z0-9]*-Lighthouse'
|
848
917
|
name: 'Lighthouse'
|
849
918
|
category: 'Site Monitor'
|
850
919
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
@@ -878,7 +947,8 @@
|
|
878
947
|
|
879
948
|
- regex: 'ltx71'
|
880
949
|
name: 'LTX71'
|
881
|
-
|
950
|
+
category: 'Security Checker'
|
951
|
+
url: 'https://ltx71.com/'
|
882
952
|
producer:
|
883
953
|
name: ''
|
884
954
|
url: ''
|
@@ -907,7 +977,7 @@
|
|
907
977
|
name: ''
|
908
978
|
url: ''
|
909
979
|
|
910
|
-
- regex: 'masscan-ng/
|
980
|
+
- regex: 'masscan-ng/[\d.]+'
|
911
981
|
name: 'masscan-ng'
|
912
982
|
url: 'https://github.com/bi-zone/masscan-ng'
|
913
983
|
category: 'Crawler'
|
@@ -915,7 +985,7 @@
|
|
915
985
|
name: 'BIZON, OOO'
|
916
986
|
url: 'https://bi.zone/'
|
917
987
|
|
918
|
-
- regex: 'masscan'
|
988
|
+
- regex: '.*masscan'
|
919
989
|
name: 'masscan'
|
920
990
|
url: 'https://github.com/robertdavidgraham/masscan'
|
921
991
|
category: 'Crawler'
|
@@ -1003,11 +1073,11 @@
|
|
1003
1073
|
name: 'Nagios Plugins Development Team'
|
1004
1074
|
url: 'https://nagios.org'
|
1005
1075
|
|
1006
|
-
- regex: 'nbertaupete95\(at\)gmail
|
1076
|
+
- regex: 'nbertaupete95\(at\)gmail\.com'
|
1007
1077
|
name: 'nbertaupete95'
|
1008
1078
|
category: 'Crawler'
|
1009
1079
|
|
1010
|
-
- regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
|
1080
|
+
- regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)'
|
1011
1081
|
name: 'Netcraft Survey Bot'
|
1012
1082
|
category: 'Search bot'
|
1013
1083
|
url: ''
|
@@ -1031,7 +1101,7 @@
|
|
1031
1101
|
name: ''
|
1032
1102
|
url: ''
|
1033
1103
|
|
1034
|
-
- regex: 'NewsBlur .*(Fetcher|Finder)'
|
1104
|
+
- regex: 'NewsBlur .*(?:Fetcher|Finder)'
|
1035
1105
|
name: 'NewsBlur'
|
1036
1106
|
url: 'http://www.newsblur.com'
|
1037
1107
|
category: 'Feed Fetcher'
|
@@ -1070,10 +1140,18 @@
|
|
1070
1140
|
name: 'Nuzzel'
|
1071
1141
|
url: 'https://www.nuzzel.com/'
|
1072
1142
|
|
1143
|
+
- regex: 'NodePing'
|
1144
|
+
name: 'NodePing'
|
1145
|
+
category: 'Site Monitor'
|
1146
|
+
url: 'https://nodeping.com'
|
1147
|
+
producer:
|
1148
|
+
name: 'NodePing'
|
1149
|
+
url: 'https://nodeping.com'
|
1150
|
+
|
1073
1151
|
- regex: 'Octopus [0-9]'
|
1074
1152
|
name: 'Octopus'
|
1075
1153
|
|
1076
|
-
- regex: 'OnlineOrNot
|
1154
|
+
- regex: 'OnlineOrNot\.com_bot'
|
1077
1155
|
name: 'OnlineOrNot Bot'
|
1078
1156
|
category: 'Site Monitor'
|
1079
1157
|
url: 'https://onlineornot.com/website-monitoring'
|
@@ -1142,7 +1220,7 @@
|
|
1142
1220
|
name: 'PHP Server Monitor'
|
1143
1221
|
url: 'http://www.phpservermonitor.org/'
|
1144
1222
|
|
1145
|
-
- regex: 'Pocket(?:ImageCache|Parser)/
|
1223
|
+
- regex: 'Pocket(?:ImageCache|Parser)/[\d.]+'
|
1146
1224
|
name: 'Pocket'
|
1147
1225
|
category: 'Read-it-later Service'
|
1148
1226
|
url: 'https://getpocket.com/pocketparser_ua'
|
@@ -1289,12 +1367,36 @@
|
|
1289
1367
|
url: ''
|
1290
1368
|
|
1291
1369
|
- regex: 'SemrushBot'
|
1292
|
-
name: '
|
1370
|
+
name: 'SemrushBot'
|
1371
|
+
category: 'Crawler'
|
1372
|
+
url: 'https://www.semrush.com/bot/'
|
1373
|
+
producer:
|
1374
|
+
name: 'Semrush Inc.'
|
1375
|
+
url: 'https://www.semrush.com/'
|
1376
|
+
|
1377
|
+
- regex: 'SerpReputationManagementAgent/[\d.]+'
|
1378
|
+
name: 'Semrush Reputation Management'
|
1379
|
+
category: 'Service Agent'
|
1380
|
+
url: 'https://www.semrush.com/bot/'
|
1381
|
+
producer:
|
1382
|
+
name: 'Semrush Inc.'
|
1383
|
+
url: 'https://www.semrush.com/'
|
1384
|
+
|
1385
|
+
- regex: 'SplitSignalBot'
|
1386
|
+
name: 'SplitSignalBot'
|
1293
1387
|
category: 'Crawler'
|
1294
|
-
url: '
|
1388
|
+
url: 'https://www.semrush.com/bot/'
|
1295
1389
|
producer:
|
1296
|
-
name: '
|
1297
|
-
url: '
|
1390
|
+
name: 'Semrush Inc.'
|
1391
|
+
url: 'https://www.semrush.com/'
|
1392
|
+
|
1393
|
+
- regex: 'SiteAuditBot/[\d.]+'
|
1394
|
+
name: 'SiteAuditBot'
|
1395
|
+
category: 'Crawler'
|
1396
|
+
url: 'https://www.semrush.com/bot/'
|
1397
|
+
producer:
|
1398
|
+
name: 'Semrush Inc.'
|
1399
|
+
url: 'https://www.semrush.com/'
|
1298
1400
|
|
1299
1401
|
- regex: 'SensikaBot'
|
1300
1402
|
name: 'Sensika Bot'
|
@@ -1304,7 +1406,7 @@
|
|
1304
1406
|
name: 'Sensika'
|
1305
1407
|
url: 'http://sensika.com'
|
1306
1408
|
|
1307
|
-
- regex: 'SEOENG(World)?Bot'
|
1409
|
+
- regex: 'SEOENG(?:World)?Bot'
|
1308
1410
|
name: 'SEOENGBot'
|
1309
1411
|
category: 'Crawler'
|
1310
1412
|
url: 'http://www.seoengine.com/seoengbot.htm'
|
@@ -1394,7 +1496,7 @@
|
|
1394
1496
|
category: 'Crawler'
|
1395
1497
|
url: 'http://ricks-apps.com/osx/sitesucker/'
|
1396
1498
|
|
1397
|
-
- regex: 'sixy
|
1499
|
+
- regex: 'sixy\.ch'
|
1398
1500
|
name: 'Sixy.ch'
|
1399
1501
|
category: 'Site Monitor'
|
1400
1502
|
url: 'http://sixy.ch'
|
@@ -1410,7 +1512,7 @@
|
|
1410
1512
|
name: 'Slack Technologies'
|
1411
1513
|
url: 'http://slack.com'
|
1412
1514
|
|
1413
|
-
- regex: '
|
1515
|
+
- regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider'
|
1414
1516
|
name: 'Sogou Spider'
|
1415
1517
|
category: 'Search bot'
|
1416
1518
|
url: 'http://www.sogou.com/docs/help/webmasters.htm'
|
@@ -1535,11 +1637,19 @@
|
|
1535
1637
|
name: ''
|
1536
1638
|
url: ''
|
1537
1639
|
|
1538
|
-
- regex: 'theoldreader
|
1640
|
+
- regex: 'theoldreader\.com'
|
1539
1641
|
name: 'theoldreader'
|
1540
1642
|
category: 'Feed Reader'
|
1541
1643
|
url: 'https://theoldreader.com'
|
1542
1644
|
|
1645
|
+
- regex: 'Trackable/0\.1'
|
1646
|
+
name: 'Chartable'
|
1647
|
+
category: 'Site Monitor'
|
1648
|
+
url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix'
|
1649
|
+
producer:
|
1650
|
+
name: 'Chartable'
|
1651
|
+
url: 'https://chartable.com'
|
1652
|
+
|
1543
1653
|
- regex: 'trendictionbot'
|
1544
1654
|
name: 'Trendiction Bot'
|
1545
1655
|
category: 'Crawler'
|
@@ -1556,13 +1666,13 @@
|
|
1556
1666
|
name: 'iParadigms, LLC.'
|
1557
1667
|
url: 'http://www.turnitin.com'
|
1558
1668
|
|
1559
|
-
- regex: 'TweetedTimes
|
1669
|
+
- regex: 'TweetedTimes'
|
1560
1670
|
name: 'TweetedTimes Bot'
|
1561
1671
|
category: 'Crawler'
|
1562
|
-
url: '
|
1672
|
+
url: 'https://tweetedtimes.com/'
|
1563
1673
|
producer:
|
1564
1674
|
name: 'TweetedTimes'
|
1565
|
-
url: '
|
1675
|
+
url: 'https://tweetedtimes.com/'
|
1566
1676
|
|
1567
1677
|
- regex: 'TweetmemeBot'
|
1568
1678
|
name: 'Tweetmeme Bot'
|
@@ -1603,21 +1713,21 @@
|
|
1603
1713
|
name: 'UkrNet Ltd'
|
1604
1714
|
url: 'https://www.ukr.net/'
|
1605
1715
|
|
1606
|
-
- regex: '
|
1716
|
+
- regex: 'Uptime(?:bot)?/[\d.]+'
|
1607
1717
|
name: 'Uptimebot'
|
1608
1718
|
category: 'Site Monitor'
|
1609
|
-
url: 'https://uptime.com/
|
1719
|
+
url: 'https://uptime.com/uptime-bot'
|
1610
1720
|
producer:
|
1611
1721
|
name: 'Uptime'
|
1612
|
-
url: 'https://uptime.com'
|
1722
|
+
url: 'https://uptime.com/'
|
1613
1723
|
|
1614
1724
|
- regex: 'UptimeRobot'
|
1615
|
-
name: '
|
1725
|
+
name: 'UptimeRobot'
|
1616
1726
|
category: 'Site Monitor'
|
1617
|
-
url: ''
|
1727
|
+
url: 'https://uptimerobot.com/'
|
1618
1728
|
producer:
|
1619
1729
|
name: 'Uptime Robot'
|
1620
|
-
url: '
|
1730
|
+
url: 'https://uptimerobot.com/'
|
1621
1731
|
|
1622
1732
|
- regex: 'URLAppendBot'
|
1623
1733
|
name: 'URLAppendBot'
|
@@ -1638,10 +1748,18 @@
|
|
1638
1748
|
- regex: 'vkShare; '
|
1639
1749
|
name: 'VK Share Button'
|
1640
1750
|
category: 'Crawler'
|
1641
|
-
url: '
|
1751
|
+
url: 'https://dev.vk.com/en/widgets/share'
|
1752
|
+
producer:
|
1753
|
+
name: 'VK'
|
1754
|
+
url: 'https://vk.com/'
|
1755
|
+
|
1756
|
+
- regex: 'VKRobot'
|
1757
|
+
name: 'VK Robot'
|
1758
|
+
category: 'Crawler'
|
1759
|
+
url: 'https://dev.vk.com/en/'
|
1642
1760
|
producer:
|
1643
1761
|
name: 'VK'
|
1644
|
-
url: '
|
1762
|
+
url: 'https://vk.com/'
|
1645
1763
|
|
1646
1764
|
- regex: 'VSMCrawler'
|
1647
1765
|
name: 'Visual Site Mapper Crawler'
|
@@ -1675,7 +1793,7 @@
|
|
1675
1793
|
name: 'W3C'
|
1676
1794
|
url: 'http://www.w3.org'
|
1677
1795
|
|
1678
|
-
- regex: 'W3C_Validator|Validator
|
1796
|
+
- regex: 'W3C_Validator|Validator\.nu'
|
1679
1797
|
name: 'W3C Markup Validation Service'
|
1680
1798
|
category: 'Validator'
|
1681
1799
|
url: 'http://validator.w3.org/services'
|
@@ -1699,6 +1817,14 @@
|
|
1699
1817
|
name: 'W3C'
|
1700
1818
|
url: 'http://www.w3.org'
|
1701
1819
|
|
1820
|
+
- regex: 'P3P Validator'
|
1821
|
+
name: 'W3C P3P Validator'
|
1822
|
+
category: 'Validator'
|
1823
|
+
url: 'https://www.w3.org/P3P/validator.html'
|
1824
|
+
producer:
|
1825
|
+
name: 'W3C'
|
1826
|
+
url: 'https://www.w3.org'
|
1827
|
+
|
1702
1828
|
- regex: 'Wappalyzer'
|
1703
1829
|
name: 'Wappalyzer'
|
1704
1830
|
url: 'https://github.com/AliasIO/Wappalyzer'
|
@@ -1735,6 +1861,22 @@
|
|
1735
1861
|
name: 'WebSitePulse'
|
1736
1862
|
url: 'http://www.websitepulse.com/'
|
1737
1863
|
|
1864
|
+
- regex: 'WordPress.+isitwp\.com'
|
1865
|
+
name: 'IsItWP'
|
1866
|
+
category: 'Crawler'
|
1867
|
+
url: 'https://www.isitwp.com/'
|
1868
|
+
producer:
|
1869
|
+
name: 'WPBeginner, LLC'
|
1870
|
+
url: 'https://www.wpbeginner.com/'
|
1871
|
+
|
1872
|
+
- regex: 'Automattic Analytics Crawler/[\d.]+'
|
1873
|
+
name: 'Automattic Analytics'
|
1874
|
+
category: 'Crawler'
|
1875
|
+
url: 'https://wordpress.com/crawler/'
|
1876
|
+
producer:
|
1877
|
+
name: 'Wordpress.org'
|
1878
|
+
url: 'https://wordpress.org/'
|
1879
|
+
|
1738
1880
|
- regex: 'WordPress'
|
1739
1881
|
name: 'WordPress'
|
1740
1882
|
category: 'Service Agent'
|
@@ -1815,13 +1957,29 @@
|
|
1815
1957
|
name: 'Yahoo! Japan Corp.'
|
1816
1958
|
url: 'https://www.yahoo.co.jp/'
|
1817
1959
|
|
1818
|
-
- regex: '
|
1960
|
+
- regex: 'Y!J-ASR'
|
1961
|
+
name: 'Yahoo! Japan ASR'
|
1962
|
+
category: 'Crawler'
|
1963
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
1964
|
+
producer:
|
1965
|
+
name: 'Yahoo! Japan Corp.'
|
1966
|
+
url: 'https://www.yahoo.co.jp/'
|
1967
|
+
|
1968
|
+
- regex: '^Y!J'
|
1969
|
+
name: 'Yahoo! Japan'
|
1970
|
+
category: 'Crawler'
|
1971
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
1972
|
+
producer:
|
1973
|
+
name: 'Yahoo! Japan Corp.'
|
1974
|
+
url: 'https://www.yahoo.co.jp/'
|
1975
|
+
|
1976
|
+
- regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher'
|
1819
1977
|
name: 'Yandex Bot'
|
1820
1978
|
category: 'Search bot'
|
1821
|
-
url: '
|
1979
|
+
url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html'
|
1822
1980
|
producer:
|
1823
1981
|
name: 'Yandex LLC'
|
1824
|
-
url: '
|
1982
|
+
url: 'https://yandex.com/company/'
|
1825
1983
|
|
1826
1984
|
- regex: 'Yeti|NaverJapan|AdsBot-Naver'
|
1827
1985
|
name: 'Yeti/Naverbot'
|
@@ -1881,7 +2039,7 @@
|
|
1881
2039
|
name: 'Yottaa'
|
1882
2040
|
url: 'http://www.yottaa.com/'
|
1883
2041
|
|
1884
|
-
- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857
|
2042
|
+
- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857'
|
1885
2043
|
name: 'Yahoo Gemini'
|
1886
2044
|
category: 'Crawler'
|
1887
2045
|
url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
|
@@ -1905,7 +2063,7 @@
|
|
1905
2063
|
name: 'HubPages, Inc.'
|
1906
2064
|
url: 'https://discover.hubpages.com/'
|
1907
2065
|
|
1908
|
-
- regex: 'Pinterest(bot)
|
2066
|
+
- regex: 'Pinterest(?:bot)?/[\d.]+.*www\.pinterest\.com'
|
1909
2067
|
name: 'Pinterest'
|
1910
2068
|
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
|
1911
2069
|
category: 'Crawler'
|
@@ -1913,7 +2071,7 @@
|
|
1913
2071
|
name: 'Pinterest'
|
1914
2072
|
url: 'https://www.pinterest.com/'
|
1915
2073
|
|
1916
|
-
- regex: 'Site24x7'
|
2074
|
+
- regex: '.*Site24x7'
|
1917
2075
|
name: 'Site24x7 Website Monitoring'
|
1918
2076
|
category: 'Site Monitor'
|
1919
2077
|
url: 'https://www.site24x7.com/site24x7-faq.html'
|
@@ -1921,6 +2079,14 @@
|
|
1921
2079
|
name: 'Site24x7'
|
1922
2080
|
url: 'https://www.site24x7.com'
|
1923
2081
|
|
2082
|
+
- regex: '.* HLB/[\d.]+'
|
2083
|
+
name: 'Site24x7 Defacement Monitor'
|
2084
|
+
category: 'Site Monitor'
|
2085
|
+
url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor'
|
2086
|
+
producer:
|
2087
|
+
name: 'Site24x7'
|
2088
|
+
url: 'https://www.site24x7.com/'
|
2089
|
+
|
1924
2090
|
- regex: 's~snapchat-proxy'
|
1925
2091
|
name: 'Snapchat Proxy'
|
1926
2092
|
category: 'Crawler'
|
@@ -1937,6 +2103,14 @@
|
|
1937
2103
|
name: 'Snapchat Inc.'
|
1938
2104
|
url: 'https://www.snapchat.com/'
|
1939
2105
|
|
2106
|
+
- regex: 'SnapchatAds/[\d.]+'
|
2107
|
+
name: 'Snapchat Ads'
|
2108
|
+
category: 'Crawler'
|
2109
|
+
url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US'
|
2110
|
+
producer:
|
2111
|
+
name: 'Snapchat Inc.'
|
2112
|
+
url: 'https://www.snapchat.com/'
|
2113
|
+
|
1940
2114
|
- regex: "Let's Encrypt validation server"
|
1941
2115
|
name: "Let's Encrypt Validation"
|
1942
2116
|
category: 'Service Agent'
|
@@ -2029,22 +2203,19 @@
|
|
2029
2203
|
- regex: 'AdMantX.*admantx\.com'
|
2030
2204
|
name: 'ADMantX'
|
2031
2205
|
|
2032
|
-
- regex: 'Server Density Service Monitoring
|
2206
|
+
- regex: 'Server Density Service Monitoring'
|
2033
2207
|
name: 'Server Density'
|
2034
2208
|
|
2035
2209
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
2036
2210
|
name: 'RSSRadio Bot'
|
2037
2211
|
|
2038
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?!(?: Build|Plus))|zeal(?!ot)|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex|CF-UC User Agent|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|daumoa,damoa,daum,daumos,duamoa,duam,duamos|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|custom_user_agent|Test Certificate Info|iplabel|Magellan|CustomUserAgent)'
|
2039
|
-
name: 'Generic Bot'
|
2040
|
-
|
2041
2212
|
- regex: '^sentry'
|
2042
2213
|
name: 'Sentry Bot'
|
2043
2214
|
producer:
|
2044
2215
|
name: 'Sentry'
|
2045
2216
|
url: 'https://sentry.io'
|
2046
2217
|
|
2047
|
-
- regex: '^Spotify/
|
2218
|
+
- regex: '^Spotify/[\d.]+$'
|
2048
2219
|
name: 'Spotify'
|
2049
2220
|
producer:
|
2050
2221
|
name: 'Spotify'
|
@@ -2102,14 +2273,6 @@
|
|
2102
2273
|
name: 'Siteimprove GmbH'
|
2103
2274
|
url: 'https://siteimprove.com/'
|
2104
2275
|
|
2105
|
-
- regex: 'Image size by Siteimprove\.com'
|
2106
|
-
name: 'Siteimprove'
|
2107
|
-
category: 'Search bot'
|
2108
|
-
url: 'https://siteimprove.com/'
|
2109
|
-
producer:
|
2110
|
-
name: 'Siteimprove GmbH'
|
2111
|
-
url: 'https://siteimprove.com/'
|
2112
|
-
|
2113
2276
|
- regex: 'CATExplorador'
|
2114
2277
|
name: 'CATExplorador'
|
2115
2278
|
category: 'Search bot'
|
@@ -2134,7 +2297,7 @@
|
|
2134
2297
|
name: 'Idee Inc.'
|
2135
2298
|
url: 'http://ideeinc.com/'
|
2136
2299
|
|
2137
|
-
- regex: 'zelist
|
2300
|
+
- regex: 'zelist\.ro feed parser'
|
2138
2301
|
name: 'Ze List'
|
2139
2302
|
url: 'https://www.zelist.ro/'
|
2140
2303
|
category: 'Feed Fetcher'
|
@@ -2182,21 +2345,21 @@
|
|
2182
2345
|
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
|
2183
2346
|
url: 'https://www.wikido.com/'
|
2184
2347
|
|
2185
|
-
- regex: '
|
2348
|
+
- regex: 'Awario(?:Smart)?Bot'
|
2186
2349
|
name: 'Awario'
|
2187
2350
|
category: 'Search bot'
|
2188
2351
|
url: 'https://awario.com/bots.html'
|
2189
2352
|
producer:
|
2190
|
-
name: '
|
2191
|
-
url: 'https://
|
2353
|
+
name: 'TechFusion Ltd.'
|
2354
|
+
url: 'https://www.techfusion.com.cy/'
|
2192
2355
|
|
2193
2356
|
- regex: 'AwarioRssBot'
|
2194
2357
|
name: 'Awario'
|
2195
2358
|
category: 'Feed Fetcher'
|
2196
2359
|
url: 'https://awario.com/bots.html'
|
2197
2360
|
producer:
|
2198
|
-
name: '
|
2199
|
-
url: 'https://
|
2361
|
+
name: 'TechFusion Ltd.'
|
2362
|
+
url: 'https://www.techfusion.com.cy/'
|
2200
2363
|
|
2201
2364
|
- regex: 'oBot'
|
2202
2365
|
name: 'oBot'
|
@@ -2288,7 +2451,7 @@
|
|
2288
2451
|
category: 'Crawler'
|
2289
2452
|
url: 'https://serendeputy.com/about/serendeputy-bot'
|
2290
2453
|
|
2291
|
-
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx
|
2454
|
+
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher'
|
2292
2455
|
name: 'ADmantX Service Fetcher'
|
2293
2456
|
category: 'Service bot'
|
2294
2457
|
url: 'https://www.admantx.com/service-fetcher.html'
|
@@ -2324,7 +2487,7 @@
|
|
2324
2487
|
name: 'PPC Labs LLC'
|
2325
2488
|
url: 'https://www.adbeat.com/'
|
2326
2489
|
|
2327
|
-
- regex: 'BW/
|
2490
|
+
- regex: '(?:BuiltWith|BW)/[\d.]+'
|
2328
2491
|
name: 'BuiltWith'
|
2329
2492
|
category: 'Crawler'
|
2330
2493
|
url: 'https://builtwith.com/biup'
|
@@ -2332,7 +2495,7 @@
|
|
2332
2495
|
name: 'BuiltWith Pty Ltd'
|
2333
2496
|
url: 'https://builtwith.com/'
|
2334
2497
|
|
2335
|
-
- regex: 'https://whatis
|
2498
|
+
- regex: 'https://whatis\.contentkingapp\.com'
|
2336
2499
|
name: 'ContentKing'
|
2337
2500
|
category: 'Site Monitor'
|
2338
2501
|
url: 'https://whatis.contentkingapp.com/'
|
@@ -2348,7 +2511,7 @@
|
|
2348
2511
|
name: 'MicroAd, Inc.'
|
2349
2512
|
url: 'https://www.microad.co.jp/'
|
2350
2513
|
|
2351
|
-
- regex: 'PingAdmin
|
2514
|
+
- regex: 'PingAdmin\.Ru'
|
2352
2515
|
name: 'PingAdmin.Ru'
|
2353
2516
|
category: 'Site Monitor'
|
2354
2517
|
url: 'https://ping-admin.ru/'
|
@@ -2366,7 +2529,7 @@
|
|
2366
2529
|
name: 'WebTehRazrabotka LLC'
|
2367
2530
|
url: 'https://webdatastats.com/'
|
2368
2531
|
|
2369
|
-
- regex: 'parse
|
2532
|
+
- regex: 'parse\.ly scraper'
|
2370
2533
|
name: 'parse.ly'
|
2371
2534
|
category: 'Crawler'
|
2372
2535
|
url: 'https://www.parse.ly/help/integration/crawler'
|
@@ -2379,7 +2542,7 @@
|
|
2379
2542
|
category: 'Site Monitor'
|
2380
2543
|
url: 'http://cloudsystemnetworks.com'
|
2381
2544
|
|
2382
|
-
- regex: 'HeartRails_Capture
|
2545
|
+
- regex: 'HeartRails_Capture/[\d.]+'
|
2383
2546
|
name: 'Heart Rails Capture'
|
2384
2547
|
category: 'Service Agent'
|
2385
2548
|
url: 'http://capture.heartrails.com'
|
@@ -2387,9 +2550,12 @@
|
|
2387
2550
|
- regex: 'Project-Resonance'
|
2388
2551
|
name: 'Project Resonance'
|
2389
2552
|
category: 'Crawler'
|
2390
|
-
url: '
|
2553
|
+
url: 'https://project-resonance.com/'
|
2554
|
+
producer:
|
2555
|
+
name: 'RedHunt Labs Limited'
|
2556
|
+
url: 'https://redhuntlabs.com/'
|
2391
2557
|
|
2392
|
-
- regex: 'DataXu
|
2558
|
+
- regex: 'DataXu/[\d.]+'
|
2393
2559
|
name: 'DataXu'
|
2394
2560
|
category: 'Service Agent'
|
2395
2561
|
url: 'https://advertising.roku.com/dataxu'
|
@@ -2426,7 +2592,7 @@
|
|
2426
2592
|
category: 'Crawler'
|
2427
2593
|
url: 'http://www.webtop.com/'
|
2428
2594
|
|
2429
|
-
- regex: 'PageThing
|
2595
|
+
- regex: 'PageThing\.com'
|
2430
2596
|
name: 'PageThing'
|
2431
2597
|
category: 'Crawler'
|
2432
2598
|
url: 'https://www.pagething.com/'
|
@@ -2471,10 +2637,18 @@
|
|
2471
2637
|
url: 'https://github.com/projectdiscovery/httpx'
|
2472
2638
|
category: 'Crawler'
|
2473
2639
|
producer:
|
2474
|
-
name: ''
|
2475
|
-
url: ''
|
2640
|
+
name: 'ProjectDiscovery, Inc.'
|
2641
|
+
url: 'https://projectdiscovery.io/'
|
2642
|
+
|
2643
|
+
- regex: '.*\.oast\.'
|
2644
|
+
name: 'Interactsh'
|
2645
|
+
category: 'Security Checker'
|
2646
|
+
url: 'https://github.com/projectdiscovery/interactsh'
|
2647
|
+
producer:
|
2648
|
+
name: 'ProjectDiscovery, Inc.'
|
2649
|
+
url: 'https://projectdiscovery.io/'
|
2476
2650
|
|
2477
|
-
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks)
|
2651
|
+
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com'
|
2478
2652
|
name: 'Expanse'
|
2479
2653
|
category: 'Security Checker'
|
2480
2654
|
url: 'https://expanse.co/'
|
@@ -2505,12 +2679,12 @@
|
|
2505
2679
|
name: 'Hatena Co., Ltd.'
|
2506
2680
|
url: 'https://www.hatena.ne.jp'
|
2507
2681
|
|
2508
|
-
- regex: 'RyowlEngine/
|
2682
|
+
- regex: 'RyowlEngine/[\d.]+'
|
2509
2683
|
name: 'Ryowl'
|
2510
2684
|
category: 'Crawler'
|
2511
2685
|
url: 'https://ryowl.org'
|
2512
2686
|
|
2513
|
-
- regex: 'OdklBot/
|
2687
|
+
- regex: 'OdklBot/[\d.]+'
|
2514
2688
|
name: 'Odnoklassniki Bot'
|
2515
2689
|
category: 'Crawler'
|
2516
2690
|
url: 'https://odnoklassniki.ru'
|
@@ -2525,7 +2699,7 @@
|
|
2525
2699
|
category: 'Crawler'
|
2526
2700
|
url: 'https://www.zoominfo.com'
|
2527
2701
|
|
2528
|
-
- regex: 'WeViKaBot/
|
2702
|
+
- regex: 'WeViKaBot/[\d.]+'
|
2529
2703
|
name: 'WeViKaBot'
|
2530
2704
|
category: 'Crawler'
|
2531
2705
|
url: 'http://www.wevika.de'
|
@@ -2535,7 +2709,7 @@
|
|
2535
2709
|
category: 'Crawler'
|
2536
2710
|
url: 'https://www.seokicks.de/robot.html'
|
2537
2711
|
|
2538
|
-
- regex: 'Plukkie/
|
2712
|
+
- regex: 'Plukkie/[\d.]+'
|
2539
2713
|
name: 'Plukkie'
|
2540
2714
|
category: 'Crawler'
|
2541
2715
|
url: 'http://www.botje.com/plukkie.htm'
|
@@ -2545,22 +2719,22 @@
|
|
2545
2719
|
category: 'Crawler'
|
2546
2720
|
url: 'https://www.comscore.com/Web-Crawler'
|
2547
2721
|
|
2548
|
-
- regex: 'SurdotlyBot/
|
2722
|
+
- regex: 'SurdotlyBot/[\d.]+'
|
2549
2723
|
name: 'SurdotlyBot'
|
2550
2724
|
category: 'Crawler'
|
2551
2725
|
url: 'http://sur.ly/bot.html'
|
2552
2726
|
|
2553
|
-
- regex: 'Gowikibot/
|
2727
|
+
- regex: 'Gowikibot/[\d.]+'
|
2554
2728
|
name: 'Gowikibot'
|
2555
2729
|
category: 'Crawler'
|
2556
2730
|
url: 'http:/www.gowikibot.com'
|
2557
2731
|
|
2558
|
-
- regex: 'SabsimBot/
|
2732
|
+
- regex: 'SabsimBot/[\d.]+'
|
2559
2733
|
name: 'SabsimBot'
|
2560
2734
|
category: 'Crawler'
|
2561
2735
|
url: 'https://sabsim.com'
|
2562
2736
|
|
2563
|
-
- regex: 'LumtelBot/
|
2737
|
+
- regex: 'LumtelBot/[\d.]+'
|
2564
2738
|
name: 'LumtelBot'
|
2565
2739
|
category: 'Crawler'
|
2566
2740
|
url: 'https://umtel.com'
|
@@ -2570,12 +2744,12 @@
|
|
2570
2744
|
category: 'Crawler'
|
2571
2745
|
url: 'http://www.pipl.com/bot'
|
2572
2746
|
|
2573
|
-
- regex: 'woobot/
|
2747
|
+
- regex: 'woobot/[\d.]+'
|
2574
2748
|
name: 'WooRank'
|
2575
2749
|
category: 'Crawler'
|
2576
2750
|
url: 'https://www.woorank.com/bot'
|
2577
2751
|
|
2578
|
-
- regex: 'Cookiebot/
|
2752
|
+
- regex: 'Cookiebot/[\d.]+'
|
2579
2753
|
name: 'Cookiebot'
|
2580
2754
|
category: 'Crawler'
|
2581
2755
|
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
|
@@ -2591,7 +2765,7 @@
|
|
2591
2765
|
name: 'NET SYSTEMS RESEARCH LLC'
|
2592
2766
|
url: 'https://www.netsystemsresearch.com/'
|
2593
2767
|
|
2594
|
-
- regex: 'CensysInspect/
|
2768
|
+
- regex: 'CensysInspect/[\d.]+'
|
2595
2769
|
name: 'CensysInspect'
|
2596
2770
|
category: 'Security Checker'
|
2597
2771
|
url: 'https://about.censys.io/'
|
@@ -2599,7 +2773,7 @@
|
|
2599
2773
|
name: 'Censys, Inc.'
|
2600
2774
|
url: 'https://censys.io/'
|
2601
2775
|
|
2602
|
-
- regex: 'gdnplus
|
2776
|
+
- regex: 'gdnplus\.com'
|
2603
2777
|
name: 'GDNP'
|
2604
2778
|
category: 'Crawler'
|
2605
2779
|
url: 'https://gdnplus.com/'
|
@@ -2607,17 +2781,17 @@
|
|
2607
2781
|
name: 'Global Digital Network Plus, LLC'
|
2608
2782
|
url: 'https://gdnplus.com/'
|
2609
2783
|
|
2610
|
-
- regex: 'WellKnownBot/
|
2784
|
+
- regex: 'WellKnownBot/[\d.]+'
|
2611
2785
|
name: 'WellKnownBot'
|
2612
2786
|
category: 'Crawler'
|
2613
2787
|
url: 'https://well-known.dev'
|
2614
2788
|
|
2615
|
-
- regex: 'Adsbot/
|
2789
|
+
- regex: 'Adsbot/[\d.]+'
|
2616
2790
|
name: 'Adsbot'
|
2617
2791
|
category: 'Crawler'
|
2618
2792
|
url: 'https://seostar.co/robot/'
|
2619
2793
|
|
2620
|
-
- regex: 'MTRobot/
|
2794
|
+
- regex: 'MTRobot/[\d.]+'
|
2621
2795
|
name: 'MTRobot'
|
2622
2796
|
category: 'Crawler'
|
2623
2797
|
url: 'https://metrics-tools.de/robot.html'
|
@@ -2625,7 +2799,7 @@
|
|
2625
2799
|
name: 'Metrics Tools'
|
2626
2800
|
url: 'https://metrics-tools.de/'
|
2627
2801
|
|
2628
|
-
- regex: 'serpstatbot/
|
2802
|
+
- regex: 'serpstatbot/[\d.]+'
|
2629
2803
|
name: 'serpstatbot'
|
2630
2804
|
category: 'Crawler'
|
2631
2805
|
url: 'http://serpstatbot.com/'
|
@@ -2638,17 +2812,17 @@
|
|
2638
2812
|
category: 'Crawler'
|
2639
2813
|
url: 'https://github.com/gocolly/colly/'
|
2640
2814
|
|
2641
|
-
- regex: 'l9tcpid/v
|
2815
|
+
- regex: 'l9tcpid/v[\d.]+'
|
2642
2816
|
name: 'l9tcpid'
|
2643
2817
|
category: 'Security Checker'
|
2644
2818
|
url: 'https://github.com/LeakIX/l9tcpid'
|
2645
2819
|
|
2646
|
-
- regex: 'l9explore/
|
2820
|
+
- regex: 'l9explore/[\d.]+'
|
2647
2821
|
name: 'l9explore'
|
2648
2822
|
category: 'Security Checker'
|
2649
2823
|
url: 'https://github.com/LeakIX/l9explore'
|
2650
2824
|
|
2651
|
-
- regex: 'l9scan/|^Lkx
|
2825
|
+
- regex: 'l9scan/|^Lkx-.*/[\d.]+'
|
2652
2826
|
name: 'LeakIX'
|
2653
2827
|
category: 'Security Checker'
|
2654
2828
|
url: 'https://leakix.net/'
|
@@ -2656,7 +2830,7 @@
|
|
2656
2830
|
name: 'BaDaaS SRL'
|
2657
2831
|
url: 'https://leakix.net/'
|
2658
2832
|
|
2659
|
-
- regex: 'MegaIndex
|
2833
|
+
- regex: 'MegaIndex\.ru/[\d.]+'
|
2660
2834
|
name: 'MegaIndex'
|
2661
2835
|
category: 'Crawler'
|
2662
2836
|
url: 'https://megaindex.com/crawler'
|
@@ -2664,17 +2838,17 @@
|
|
2664
2838
|
- regex: 'Seekport'
|
2665
2839
|
name: 'Seekport'
|
2666
2840
|
category: 'Crawler'
|
2667
|
-
url: '
|
2841
|
+
url: 'https://bot.seekport.com/'
|
2668
2842
|
producer:
|
2669
2843
|
name: 'SISTRIX GmbH'
|
2670
2844
|
url: 'https://www.sistrix.de/'
|
2671
2845
|
|
2672
|
-
- regex: 'seolyt/
|
2846
|
+
- regex: 'seolyt/[\d.]+'
|
2673
2847
|
name: 'seolyt'
|
2674
2848
|
category: 'Crawler'
|
2675
2849
|
url: 'https://seolyt.com/'
|
2676
2850
|
|
2677
|
-
- regex: 'YaK/
|
2851
|
+
- regex: 'YaK/[\d.]+'
|
2678
2852
|
name: 'YaK'
|
2679
2853
|
category: 'Crawler'
|
2680
2854
|
url: 'https://www.linkfluence.com/'
|
@@ -2682,7 +2856,7 @@
|
|
2682
2856
|
name: 'Linkfluence SAS'
|
2683
2857
|
url: 'https://www.linkfluence.com/'
|
2684
2858
|
|
2685
|
-
- regex: 'KomodiaBot/
|
2859
|
+
- regex: 'KomodiaBot/[\d.]+'
|
2686
2860
|
name: 'KomodiaBot'
|
2687
2861
|
category: 'Crawler'
|
2688
2862
|
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
|
@@ -2690,7 +2864,7 @@
|
|
2690
2864
|
name: 'Komodia Inc.'
|
2691
2865
|
url: 'https://www.komodia.com/'
|
2692
2866
|
|
2693
|
-
- regex: 'Neevabot/
|
2867
|
+
- regex: 'Neevabot/[\d.]+'
|
2694
2868
|
name: 'Neevabot'
|
2695
2869
|
category: 'Search bot'
|
2696
2870
|
url: 'https://neeva.com/neevabot'
|
@@ -2698,17 +2872,17 @@
|
|
2698
2872
|
name: 'Neeva Inc.'
|
2699
2873
|
url: 'https://neeva.com/'
|
2700
2874
|
|
2701
|
-
- regex: 'LinkPreview/
|
2875
|
+
- regex: 'LinkPreview/[\d.]+'
|
2702
2876
|
name: 'LinkPreview'
|
2703
2877
|
category: 'Service Agent'
|
2704
2878
|
url: 'https://www.linkpreview.net/'
|
2705
2879
|
|
2706
|
-
- regex: 'JungleKeyThumbnail/
|
2880
|
+
- regex: 'JungleKeyThumbnail/[\d.]+'
|
2707
2881
|
name: 'JungleKeyThumbnail'
|
2708
2882
|
category: 'Crawler'
|
2709
2883
|
url: 'https://junglekey.com/'
|
2710
2884
|
|
2711
|
-
- regex: 'rocketmonitor(?: |bot/)
|
2885
|
+
- regex: 'rocketmonitor(?: |bot/)[\d.]+'
|
2712
2886
|
name: 'RocketMonitorBot'
|
2713
2887
|
category: 'Site Monitor'
|
2714
2888
|
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
|
@@ -2716,7 +2890,7 @@
|
|
2716
2890
|
name: 'Radio Mast, Inc.'
|
2717
2891
|
url: 'https://www.radiomast.io/'
|
2718
2892
|
|
2719
|
-
- regex: 'SitemapParser-VIPnytt/
|
2893
|
+
- regex: 'SitemapParser-VIPnytt/[\d.]+'
|
2720
2894
|
name: 'SitemapParser-VIPnytt'
|
2721
2895
|
category: 'Crawler'
|
2722
2896
|
url: 'https://github.com/VIPnytt/SitemapParser/'
|
@@ -2726,7 +2900,7 @@
|
|
2726
2900
|
category: 'Crawler'
|
2727
2901
|
url: 'https://turnitin.com/robot/crawlerinfo.html'
|
2728
2902
|
|
2729
|
-
- regex: 'DMBrowser
|
2903
|
+
- regex: 'DMBrowser/[\d.]+|DMBrowser-[UB]V'
|
2730
2904
|
name: 'Dotcom Monitor'
|
2731
2905
|
category: 'Site Monitor'
|
2732
2906
|
url: 'https://www.dotcom-monitor.com'
|
@@ -2740,17 +2914,17 @@
|
|
2740
2914
|
category: 'Crawler'
|
2741
2915
|
url: 'https://dataforseo.com/dataforseo-bot'
|
2742
2916
|
|
2743
|
-
- regex: 'Discordbot/
|
2917
|
+
- regex: 'Discordbot/[\d.]+'
|
2744
2918
|
name: 'Discord Bot'
|
2745
2919
|
category: 'Service Agent'
|
2746
2920
|
url: 'https://discordapp.com'
|
2747
2921
|
|
2748
|
-
- regex: 'Linespider/
|
2922
|
+
- regex: 'Linespider/[\d.]+'
|
2749
2923
|
name: 'Linespider'
|
2750
2924
|
category: 'Crawler'
|
2751
2925
|
url: 'https://lin.ee/4dwXkTH'
|
2752
2926
|
|
2753
|
-
- regex: 'Cincraw/
|
2927
|
+
- regex: 'Cincraw/[\d.]+'
|
2754
2928
|
name: 'Cincraw'
|
2755
2929
|
category: 'Crawler'
|
2756
2930
|
url: 'http://cincrawdata.net/bot/'
|
@@ -2776,7 +2950,7 @@
|
|
2776
2950
|
category: 'Crawler'
|
2777
2951
|
url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
|
2778
2952
|
|
2779
|
-
- regex: 'https://securitytxt-scan
|
2953
|
+
- regex: 'https://securitytxt-scan\.cs\.hm\.edu/'
|
2780
2954
|
name: 'security.txt scanserver'
|
2781
2955
|
category: 'Security Checker'
|
2782
2956
|
url: 'https://securitytxt-scan.cs.hm.edu/'
|
@@ -2784,17 +2958,17 @@
|
|
2784
2958
|
name: 'Hochschule für angewandte Wissenschaften München'
|
2785
2959
|
url: 'https://www.hm.edu/'
|
2786
2960
|
|
2787
|
-
- regex: 'TigerBot/
|
2961
|
+
- regex: 'TigerBot/[\d.]+'
|
2788
2962
|
name: 'TigerBot'
|
2789
2963
|
category: 'Crawler'
|
2790
2964
|
url: 'https://tiger.ch/'
|
2791
2965
|
|
2792
|
-
- regex: 'TestCrawler/
|
2966
|
+
- regex: 'TestCrawler/[\d.]+'
|
2793
2967
|
name: 'TestCrawler'
|
2794
2968
|
category: 'Crawler'
|
2795
2969
|
url: 'https://www.comcepta.com/'
|
2796
2970
|
|
2797
|
-
- regex: 'CrowdTanglebot/
|
2971
|
+
- regex: 'CrowdTanglebot/[\d.]+'
|
2798
2972
|
name: 'CrowdTangle'
|
2799
2973
|
category: 'Crawler'
|
2800
2974
|
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
|
@@ -2802,7 +2976,7 @@
|
|
2802
2976
|
name: 'CrowdTangle, Inc.'
|
2803
2977
|
url: 'https://www.crowdtangle.com/'
|
2804
2978
|
|
2805
|
-
- regex: 'Sellers
|
2979
|
+
- regex: 'Sellers\.Guide Crawler by Primis'
|
2806
2980
|
name: 'Sellers.Guide'
|
2807
2981
|
category: 'Crawler'
|
2808
2982
|
url: 'https://sellers.guide/'
|
@@ -2826,7 +3000,7 @@
|
|
2826
3000
|
name: 'deepnoc, GmbH'
|
2827
3001
|
url: 'https://deepnoc.com/'
|
2828
3002
|
|
2829
|
-
- regex: 'Newslitbot/
|
3003
|
+
- regex: 'Newslitbot/[\d.]+'
|
2830
3004
|
name: 'Newslitbot'
|
2831
3005
|
category: 'Crawler'
|
2832
3006
|
url: 'https://www.newslit.co/'
|
@@ -2834,7 +3008,7 @@
|
|
2834
3008
|
name: 'Newslit, LLC.'
|
2835
3009
|
url: 'https://www.newslit.co/'
|
2836
3010
|
|
2837
|
-
- regex: 'um-LN/
|
3011
|
+
- regex: 'um-LN/[\d.]+'
|
2838
3012
|
name: 'uMBot'
|
2839
3013
|
category: 'Crawler'
|
2840
3014
|
url: 'https://www.ubermetrics-technologies.com/'
|
@@ -2842,12 +3016,12 @@
|
|
2842
3016
|
name: 'Ubermetrics Technologies GmbH'
|
2843
3017
|
url: 'https://www.ubermetrics-technologies.com/'
|
2844
3018
|
|
2845
|
-
- regex: 'Abonti/
|
3019
|
+
- regex: 'Abonti/[\d.]+'
|
2846
3020
|
name: 'Abonti'
|
2847
3021
|
category: 'Crawler'
|
2848
3022
|
url: 'http://abonti.com/'
|
2849
3023
|
|
2850
|
-
- regex: 'collection@infegy
|
3024
|
+
- regex: 'collection@infegy\.com'
|
2851
3025
|
name: 'Infegy'
|
2852
3026
|
category: 'Crawler'
|
2853
3027
|
url: 'https://infegy.com/'
|
@@ -2855,7 +3029,7 @@
|
|
2855
3029
|
name: 'Infegy, Inc.'
|
2856
3030
|
url: 'https://infegy.com/'
|
2857
3031
|
|
2858
|
-
- regex: 'HTTP Banner Detection \(https://security
|
3032
|
+
- regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)'
|
2859
3033
|
name: 'IPIP'
|
2860
3034
|
category: 'Security Checker'
|
2861
3035
|
url: 'https://security.ipip.net/'
|
@@ -2863,7 +3037,7 @@
|
|
2863
3037
|
name: 'Beijing Tiantexin Tech. Co., Ltd.'
|
2864
3038
|
url: 'https://en.ipip.net/'
|
2865
3039
|
|
2866
|
-
- regex: 'ev-crawler/
|
3040
|
+
- regex: 'ev-crawler/[\d.]+'
|
2867
3041
|
name: 'Headline'
|
2868
3042
|
category: 'Crawler'
|
2869
3043
|
url: 'https://headline.com/legal/crawler'
|
@@ -2871,7 +3045,7 @@
|
|
2871
3045
|
name: 'e.ventures Managementgesellschaft mbH'
|
2872
3046
|
url: 'https://headline.com/'
|
2873
3047
|
|
2874
|
-
- regex: 'webprosbot/
|
3048
|
+
- regex: 'webprosbot/[\d.]+'
|
2875
3049
|
name: 'WebPros'
|
2876
3050
|
category: 'Crawler'
|
2877
3051
|
url: 'https://webpros.com/'
|
@@ -2887,7 +3061,7 @@
|
|
2887
3061
|
name: 'Amazon.com, Inc.'
|
2888
3062
|
url: 'https://www.amazon.com/'
|
2889
3063
|
|
2890
|
-
- regex: 'Wheregoes
|
3064
|
+
- regex: 'Wheregoes\.com Redirect Checker/[\d.]+'
|
2891
3065
|
name: 'WhereGoes'
|
2892
3066
|
category: 'Crawler'
|
2893
3067
|
url: 'https://wheregoes.com/'
|
@@ -2897,12 +3071,12 @@
|
|
2897
3071
|
category: 'Crawler'
|
2898
3072
|
url: 'http://66.240.192.82/'
|
2899
3073
|
|
2900
|
-
- regex: 'InternetMeasurement/
|
3074
|
+
- regex: 'InternetMeasurement/[\d.]+'
|
2901
3075
|
name: 'InternetMeasurement'
|
2902
3076
|
category: 'Crawler'
|
2903
3077
|
url: 'https://internet-measurement.com/'
|
2904
3078
|
|
2905
|
-
- regex: 'DomainAppender /
|
3079
|
+
- regex: 'DomainAppender /[\d.]+'
|
2906
3080
|
name: 'DomainAppender'
|
2907
3081
|
category: 'Crawler'
|
2908
3082
|
url: 'https://www.profound.net/product/domain_append/'
|
@@ -2910,7 +3084,7 @@
|
|
2910
3084
|
name: 'Profound Networks, LLC'
|
2911
3085
|
url: 'https://www.profound.net/'
|
2912
3086
|
|
2913
|
-
- regex: 'FreeWebMonitoring SiteChecker/
|
3087
|
+
- regex: 'FreeWebMonitoring SiteChecker/[\d.]+'
|
2914
3088
|
name: 'FreeWebMonitoring'
|
2915
3089
|
category: 'Site Monitor'
|
2916
3090
|
url: 'https://www.freewebmonitoring.com/bot.html'
|
@@ -2926,7 +3100,7 @@
|
|
2926
3100
|
name: 'Valley Hosting, LLC'
|
2927
3101
|
url: 'https://www.pagemodified.com/'
|
2928
3102
|
|
2929
|
-
- regex: 'adstxtlab
|
3103
|
+
- regex: 'adstxtlab\.com'
|
2930
3104
|
name: 'adstxtlab.com'
|
2931
3105
|
category: 'Crawler'
|
2932
3106
|
url: 'https://adstxtlab.com/validator.php'
|
@@ -2934,7 +3108,7 @@
|
|
2934
3108
|
name: 'Jaohawi AB'
|
2935
3109
|
url: 'https://adstxtlab.com/'
|
2936
3110
|
|
2937
|
-
- regex: 'Iframely/
|
3111
|
+
- regex: 'Iframely/[\d.]+'
|
2938
3112
|
name: 'Iframely'
|
2939
3113
|
category: 'Crawler'
|
2940
3114
|
url: 'https://iframely.com/'
|
@@ -2942,7 +3116,7 @@
|
|
2942
3116
|
name: 'Itteco Software, Corp.'
|
2943
3117
|
url: 'https://iframely.com/'
|
2944
3118
|
|
2945
|
-
- regex: 'DomainStatsBot/
|
3119
|
+
- regex: 'DomainStatsBot/[\d.]+'
|
2946
3120
|
name: 'DomainStatsBot'
|
2947
3121
|
category: 'Crawler'
|
2948
3122
|
url: 'https://domainstats.com/pages/our-bot'
|
@@ -2950,7 +3124,7 @@
|
|
2950
3124
|
name: 'Domainstats Ltd'
|
2951
3125
|
url: 'https://domainstats.com/'
|
2952
3126
|
|
2953
|
-
- regex: 'aiHitBot/
|
3127
|
+
- regex: 'aiHitBot/[\d.]+'
|
2954
3128
|
name: 'aiHitBot'
|
2955
3129
|
category: 'Crawler'
|
2956
3130
|
url: 'https://www.aihitdata.com/about'
|
@@ -2968,12 +3142,12 @@
|
|
2968
3142
|
name: 'GitCrawlerBot'
|
2969
3143
|
category: 'Crawler'
|
2970
3144
|
|
2971
|
-
- regex: 'AdAuth/
|
3145
|
+
- regex: 'AdAuth/[\d.]+'
|
2972
3146
|
name: 'AdAuth'
|
2973
3147
|
category: 'Crawler'
|
2974
3148
|
url: 'https://www.adauth.com'
|
2975
3149
|
|
2976
|
-
- regex: 'faveeo
|
3150
|
+
- regex: 'faveeo\.com'
|
2977
3151
|
name: 'Faveeo'
|
2978
3152
|
category: 'Crawler'
|
2979
3153
|
url: 'http://www.faveeo.com'
|
@@ -3004,7 +3178,7 @@
|
|
3004
3178
|
name: 'Jožef Stefan Institute'
|
3005
3179
|
url: 'https://www.ijs.si/ijsw/JSI'
|
3006
3180
|
|
3007
|
-
- regex: 'dnt-policy@eff
|
3181
|
+
- regex: 'dnt-policy@eff\.org'
|
3008
3182
|
name: 'EFF Do Not Track Verifier'
|
3009
3183
|
category: 'Crawler'
|
3010
3184
|
url: 'https://www.eff.org/issues/do-not-track'
|
@@ -3028,7 +3202,7 @@
|
|
3028
3202
|
name: 'Swoppen Systems GmbH'
|
3029
3203
|
url: 'https://www.swoppen.com/de'
|
3030
3204
|
|
3031
|
-
- regex: 'ScamadviserExternalHit/
|
3205
|
+
- regex: 'ScamadviserExternalHit/[\d.]+'
|
3032
3206
|
name: 'Scamadviser External Hit'
|
3033
3207
|
category: 'Crawler'
|
3034
3208
|
url: 'https://www.scamadviser.com/'
|
@@ -3041,20 +3215,20 @@
|
|
3041
3215
|
category: 'Crawler'
|
3042
3216
|
url: 'https://www.zaldamo.com/search.html'
|
3043
3217
|
producer:
|
3044
|
-
name: '
|
3045
|
-
url: 'https://www.
|
3218
|
+
name: 'Zaldamo, LLC.'
|
3219
|
+
url: 'https://www.zaldamo.com/'
|
3046
3220
|
|
3047
|
-
- regex: 'AFB/
|
3221
|
+
- regex: 'AFB/[\d.]+'
|
3048
3222
|
name: 'Allloadin Favicon Bot'
|
3049
3223
|
category: 'Crawler'
|
3050
3224
|
url: 'https://allloadin.com/'
|
3051
3225
|
|
3052
|
-
- regex: 'SeolytBot/
|
3226
|
+
- regex: 'SeolytBot/[\d.]+'
|
3053
3227
|
name: 'Seolyt Bot'
|
3054
3228
|
category: 'Crawler'
|
3055
3229
|
url: 'https://seolyt.com'
|
3056
3230
|
|
3057
|
-
- regex: 'LinkWalker/
|
3231
|
+
- regex: 'LinkWalker/[\d.]+'
|
3058
3232
|
name: 'LinkWalker'
|
3059
3233
|
category: 'Crawler'
|
3060
3234
|
url: 'https://www.phishlabs.com/'
|
@@ -3062,7 +3236,7 @@
|
|
3062
3236
|
name: 'PhishLabs, Inc.'
|
3063
3237
|
url: 'https://www.phishlabs.com/'
|
3064
3238
|
|
3065
|
-
- regex: 'RenovateBot/
|
3239
|
+
- regex: 'RenovateBot/[\d.]+'
|
3066
3240
|
name: 'RenovateBot'
|
3067
3241
|
category: 'Security Checker'
|
3068
3242
|
url: 'https://github.com/renovatebot/renovate'
|
@@ -3070,7 +3244,7 @@
|
|
3070
3244
|
name: 'White Source Ltd.'
|
3071
3245
|
url: 'https://www.mend.io/free-developer-tools/renovate/'
|
3072
3246
|
|
3073
|
-
- regex: 'INETDEX-BOT/
|
3247
|
+
- regex: 'INETDEX-BOT/[\d.]+'
|
3074
3248
|
name: 'Inetdex Bot'
|
3075
3249
|
category: 'Crawler'
|
3076
3250
|
url: 'https://www.inetdex.com/'
|
@@ -3083,15 +3257,7 @@
|
|
3083
3257
|
name: 'Marc Huemer'
|
3084
3258
|
url: 'https://www.netzzappen.com/'
|
3085
3259
|
|
3086
|
-
- regex: '
|
3087
|
-
name: 'SEMrush Reputation Management'
|
3088
|
-
category: 'Service Agent'
|
3089
|
-
url: 'https://www.semrush.com/bot/'
|
3090
|
-
producer:
|
3091
|
-
name: 'SEMrush'
|
3092
|
-
url: 'https://www.semrush.com/'
|
3093
|
-
|
3094
|
-
- regex: 'panscient.com'
|
3260
|
+
- regex: 'panscient\.com'
|
3095
3261
|
name: 'Panscient'
|
3096
3262
|
category: 'Crawler'
|
3097
3263
|
url: 'https://www.panscient.com/faq.htm'
|
@@ -3099,7 +3265,7 @@
|
|
3099
3265
|
name: 'Panscient, Inc.'
|
3100
3266
|
url: 'https://www.panscient.com/'
|
3101
3267
|
|
3102
|
-
- regex: 'research@pdrlabs
|
3268
|
+
- regex: 'research@pdrlabs\.net'
|
3103
3269
|
name: 'PDR Labs'
|
3104
3270
|
category: 'Security Checker'
|
3105
3271
|
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
@@ -3107,7 +3273,7 @@
|
|
3107
3273
|
name: 'PDR Labs'
|
3108
3274
|
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
3109
3275
|
|
3110
|
-
- regex: 'Nicecrawler/
|
3276
|
+
- regex: 'Nicecrawler/[\d.]+'
|
3111
3277
|
name: 'NiceCrawler'
|
3112
3278
|
category: 'Crawler'
|
3113
3279
|
url: 'https://www.nicecrawler.com/'
|
@@ -3115,7 +3281,7 @@
|
|
3115
3281
|
name: 'Intelium Corp.'
|
3116
3282
|
url: 'https://www.intelium.com/'
|
3117
3283
|
|
3118
|
-
- regex: 't3versionsBot/
|
3284
|
+
- regex: 't3versionsBot/[\d.]+'
|
3119
3285
|
name: 't3versions'
|
3120
3286
|
category: 'Crawler'
|
3121
3287
|
url: 'https://www.t3versions.com/bot'
|
@@ -3123,7 +3289,7 @@
|
|
3123
3289
|
name: 'Torben Hansen'
|
3124
3290
|
url: 'https://www.t3versions.com/'
|
3125
3291
|
|
3126
|
-
- regex: 'Crawlson/
|
3292
|
+
- regex: 'Crawlson/[\d.]+'
|
3127
3293
|
name: 'Crawlson'
|
3128
3294
|
category: 'Crawler'
|
3129
3295
|
url: 'https://www.crawlson.com/about'
|
@@ -3131,7 +3297,7 @@
|
|
3131
3297
|
name: 'Crawlson'
|
3132
3298
|
url: 'https://www.crawlson.com/'
|
3133
3299
|
|
3134
|
-
- regex: 'tchelebi/
|
3300
|
+
- regex: 'tchelebi/[\d.]+'
|
3135
3301
|
name: 'tchelebi'
|
3136
3302
|
category: 'Crawler'
|
3137
3303
|
url: 'https://tchelebi.io/'
|
@@ -3147,7 +3313,7 @@
|
|
3147
3313
|
name: 'New Work SE'
|
3148
3314
|
url: 'https://www.xing.com/'
|
3149
3315
|
|
3150
|
-
- regex: 'RepoLookoutBot/
|
3316
|
+
- regex: 'RepoLookoutBot/v?[\d.]+'
|
3151
3317
|
name: 'Repo Lookout'
|
3152
3318
|
category: 'Security Checker'
|
3153
3319
|
url: 'https://www.repo-lookout.org/'
|
@@ -3163,7 +3329,7 @@
|
|
3163
3329
|
name: 'MAMI Project'
|
3164
3330
|
url: 'https://mami-project.eu/'
|
3165
3331
|
|
3166
|
-
- regex: 'everyfeed-spider/
|
3332
|
+
- regex: 'everyfeed-spider/[\d.]+'
|
3167
3333
|
name: 'Everyfeed'
|
3168
3334
|
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
|
3169
3335
|
category: 'Feed Fetcher'
|
@@ -3187,7 +3353,7 @@
|
|
3187
3353
|
name: ''
|
3188
3354
|
url: ''
|
3189
3355
|
|
3190
|
-
- regex: 'Gregarius/
|
3356
|
+
- regex: 'Gregarius/[\d.]+'
|
3191
3357
|
name: 'Gregarius'
|
3192
3358
|
category: 'Feed Fetcher'
|
3193
3359
|
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
|
@@ -3203,7 +3369,7 @@
|
|
3203
3369
|
name: 'Comodo Security Solutions, Inc.'
|
3204
3370
|
url: 'https://www.comodo.com/'
|
3205
3371
|
|
3206
|
-
- regex: 'Sectigo DCV'
|
3372
|
+
- regex: 'Sectigo DCV|acme\.sectigo\.com'
|
3207
3373
|
name: 'Sectigo DCV'
|
3208
3374
|
category: 'Service Agent'
|
3209
3375
|
url: 'https://sectigo.com/'
|
@@ -3211,7 +3377,7 @@
|
|
3211
3377
|
name: 'Sectigo Limited'
|
3212
3378
|
url: 'https://sectigo.com/'
|
3213
3379
|
|
3214
|
-
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/
|
3380
|
+
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\d.]+'
|
3215
3381
|
name: 'KlarnaBot'
|
3216
3382
|
category: 'Crawler'
|
3217
3383
|
url: 'https://docs.klarna.com/klarna-bot/'
|
@@ -3219,7 +3385,7 @@
|
|
3219
3385
|
name: 'Klarna Bank AB'
|
3220
3386
|
url: 'https://www.klarna.com/'
|
3221
3387
|
|
3222
|
-
- regex: 'Taboolabot/
|
3388
|
+
- regex: 'Taboolabot/[\d.]+'
|
3223
3389
|
name: 'Taboolabot'
|
3224
3390
|
category: 'Crawler'
|
3225
3391
|
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
|
@@ -3227,7 +3393,7 @@
|
|
3227
3393
|
name: 'Taboola, Inc.'
|
3228
3394
|
url: 'https://www.taboola.com/'
|
3229
3395
|
|
3230
|
-
- regex: 'Asana/
|
3396
|
+
- regex: 'Asana/[\d.]+'
|
3231
3397
|
name: 'Asana'
|
3232
3398
|
category: 'Crawler'
|
3233
3399
|
url: 'https://asana.com/'
|
@@ -3243,7 +3409,7 @@
|
|
3243
3409
|
name: 'Google Inc.'
|
3244
3410
|
url: 'https://www.google.com/'
|
3245
3411
|
|
3246
|
-
- regex: 'URLinspectorBot/
|
3412
|
+
- regex: 'URLinspectorBot/[\d.]+'
|
3247
3413
|
name: 'URLinspector'
|
3248
3414
|
category: 'Site Monitor'
|
3249
3415
|
url: 'https://www.urlinspector.com/bot/'
|
@@ -3251,7 +3417,7 @@
|
|
3251
3417
|
name: 'LinkResearchTools GmbH'
|
3252
3418
|
url: 'https://www.linkresearchtools.com/'
|
3253
3419
|
|
3254
|
-
- regex: 'EntferBot/
|
3420
|
+
- regex: 'EntferBot/[\d.]+'
|
3255
3421
|
name: 'Entfer'
|
3256
3422
|
category: 'Crawler'
|
3257
3423
|
url: 'https://entfer.com/'
|
@@ -3259,7 +3425,7 @@
|
|
3259
3425
|
name: 'Entfer Ltd.'
|
3260
3426
|
url: 'https://entfer.com/'
|
3261
3427
|
|
3262
|
-
- regex: 'TagInspector/
|
3428
|
+
- regex: 'TagInspector/[\d.]+'
|
3263
3429
|
name: 'Tag Inspector'
|
3264
3430
|
category: 'Crawler'
|
3265
3431
|
url: 'https://taginspector.com/'
|
@@ -3283,7 +3449,7 @@
|
|
3283
3449
|
name: 'Diffbot Technologies Corp.'
|
3284
3450
|
url: 'https://www.diffbot.com/'
|
3285
3451
|
|
3286
|
-
- regex: 'DisqusAdstxtCrawler/
|
3452
|
+
- regex: 'DisqusAdstxtCrawler/[\d.]+'
|
3287
3453
|
name: 'Disqus'
|
3288
3454
|
category: 'Crawler'
|
3289
3455
|
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
|
@@ -3291,7 +3457,7 @@
|
|
3291
3457
|
name: 'Disqus, Inc.'
|
3292
3458
|
url: 'https://disqus.com/'
|
3293
3459
|
|
3294
|
-
- regex: 'startmebot/
|
3460
|
+
- regex: 'startmebot/[\d.]+'
|
3295
3461
|
name: 'start.me'
|
3296
3462
|
category: 'Crawler'
|
3297
3463
|
url: 'https://about.start.me/'
|
@@ -3299,17 +3465,17 @@
|
|
3299
3465
|
name: 'start.me BV'
|
3300
3466
|
url: 'https://about.start.me/'
|
3301
3467
|
|
3302
|
-
- regex: '2ip bot/
|
3468
|
+
- regex: '2ip bot/[\d.]+'
|
3303
3469
|
name: '2ip'
|
3304
3470
|
category: 'Crawler'
|
3305
3471
|
url: 'https://2ip.io/'
|
3306
3472
|
|
3307
|
-
- regex: 'ReqBin Curl Client/
|
3473
|
+
- regex: 'ReqBin Curl Client/[\d.]+'
|
3308
3474
|
name: 'ReqBin'
|
3309
3475
|
category: 'Crawler'
|
3310
3476
|
url: 'https://reqbin.com/curl'
|
3311
3477
|
|
3312
|
-
- regex: 'XoviBot/
|
3478
|
+
- regex: 'XoviBot/[\d.]+'
|
3313
3479
|
name: 'XoviBot'
|
3314
3480
|
category: 'Crawler'
|
3315
3481
|
url: 'https://www.xovibot.net'
|
@@ -3317,12 +3483,12 @@
|
|
3317
3483
|
name: 'Xovi GmbH'
|
3318
3484
|
url: 'http://www.xovi.de'
|
3319
3485
|
|
3320
|
-
- regex: 'Overcast/
|
3486
|
+
- regex: 'Overcast/[\d.]+ Podcast Sync'
|
3321
3487
|
name: 'Overcast Podcast Sync'
|
3322
3488
|
category: 'Service Agent'
|
3323
3489
|
url: 'https://overcast.fm/podcasterinfo'
|
3324
3490
|
|
3325
|
-
- regex: '^Verity/
|
3491
|
+
- regex: '^Verity/[\d.]+'
|
3326
3492
|
name: 'GumGum Verity'
|
3327
3493
|
category: 'Service Agent'
|
3328
3494
|
url: 'https://gumgum.com/verity'
|
@@ -3332,7 +3498,7 @@
|
|
3332
3498
|
category: 'Feed Reader'
|
3333
3499
|
url: 'https://github.com/snarfed/hackermention'
|
3334
3500
|
|
3335
|
-
- regex: 'BitSightBot/
|
3501
|
+
- regex: 'BitSightBot/[\d.]+'
|
3336
3502
|
name: 'BitSight'
|
3337
3503
|
category: 'Security Checker'
|
3338
3504
|
url: 'https://www.bitsight.com/'
|
@@ -3340,12 +3506,12 @@
|
|
3340
3506
|
name: 'BitSight Technologies, Inc.'
|
3341
3507
|
url: 'https://www.bitsight.com/'
|
3342
3508
|
|
3343
|
-
- regex: 'Ezgif/
|
3509
|
+
- regex: 'Ezgif/[\d.]+'
|
3344
3510
|
name: 'Ezgif'
|
3345
3511
|
category: 'Service Agent'
|
3346
3512
|
url: 'https://ezgif.com/about'
|
3347
3513
|
|
3348
|
-
- regex: 'intelx
|
3514
|
+
- regex: 'intelx\.io_bot'
|
3349
3515
|
name: 'Intelligence X'
|
3350
3516
|
category: 'Crawler'
|
3351
3517
|
url: 'https://intelx.io/'
|
@@ -3353,7 +3519,7 @@
|
|
3353
3519
|
name: 'Kleissner Investments s.r.o.'
|
3354
3520
|
url: 'https://intelx.io/'
|
3355
3521
|
|
3356
|
-
- regex: 'FemtosearchBot/
|
3522
|
+
- regex: 'FemtosearchBot/[\d.]+'
|
3357
3523
|
name: 'Femtosearch'
|
3358
3524
|
category: 'Crawler'
|
3359
3525
|
url: 'http://femtosearch.com/'
|
@@ -3361,7 +3527,7 @@
|
|
3361
3527
|
name: 'Grier Forensics, LLC'
|
3362
3528
|
url: 'https://www.grierforensics.com/'
|
3363
3529
|
|
3364
|
-
- regex: 'AdsTxtCrawler/
|
3530
|
+
- regex: 'AdsTxtCrawler/[\d.]+'
|
3365
3531
|
name: 'AdsTxtCrawler'
|
3366
3532
|
category: 'Crawler'
|
3367
3533
|
url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
|
@@ -3377,7 +3543,7 @@
|
|
3377
3543
|
name: 'Morningscore'
|
3378
3544
|
url: 'https://morningscore.io/'
|
3379
3545
|
|
3380
|
-
- regex: 'Uptime-Kuma/
|
3546
|
+
- regex: 'Uptime-Kuma/[\d.]+'
|
3381
3547
|
name: 'Uptime-Kuma'
|
3382
3548
|
category: 'Site Monitor'
|
3383
3549
|
url: 'https://github.com/louislam/uptime-kuma'
|
@@ -3390,7 +3556,7 @@
|
|
3390
3556
|
name: 'OpenAI OpCo, LLC'
|
3391
3557
|
url: 'https://openai.com/'
|
3392
3558
|
|
3393
|
-
- regex: 'BrightEdge Crawler/
|
3559
|
+
- regex: 'BrightEdge Crawler/[\d.]+'
|
3394
3560
|
name: 'BrightEdge'
|
3395
3561
|
category: 'Crawler'
|
3396
3562
|
url: 'https://www.brightedge.com/'
|
@@ -3398,12 +3564,12 @@
|
|
3398
3564
|
name: 'BrightEdge Technologies, Inc'
|
3399
3565
|
url: 'https://www.brightedge.com/'
|
3400
3566
|
|
3401
|
-
- regex: 'sfFeedReader/
|
3567
|
+
- regex: 'sfFeedReader/[\d.]+'
|
3402
3568
|
name: 'sfFeedReader'
|
3403
3569
|
url: 'https://github.com/diem-project/sfFeed2Plugin'
|
3404
3570
|
category: 'Feed Fetcher'
|
3405
3571
|
|
3406
|
-
- regex: 'cyberscan
|
3572
|
+
- regex: 'cyberscan\.io'
|
3407
3573
|
name: 'Cyberscan'
|
3408
3574
|
category: 'Security Checker'
|
3409
3575
|
url: 'https://www.cyberscan.io/'
|
@@ -3419,15 +3585,7 @@
|
|
3419
3585
|
name: 'Lumar'
|
3420
3586
|
url: 'https://www.lumar.io/'
|
3421
3587
|
|
3422
|
-
- regex: '
|
3423
|
-
name: 'Repo Lookout'
|
3424
|
-
category: 'Crawler'
|
3425
|
-
url: 'https://www.repo-lookout.org/'
|
3426
|
-
producer:
|
3427
|
-
name: 'Crissy Field GmbH'
|
3428
|
-
url: 'https://www.crissyfield.de/'
|
3429
|
-
|
3430
|
-
- regex: 'researchscan.comsys.rwth-aachen.de'
|
3588
|
+
- regex: 'researchscan\.comsys\.rwth-aachen\.de'
|
3431
3589
|
name: 'Research Scan'
|
3432
3590
|
category: 'Crawler'
|
3433
3591
|
url: 'http://researchscan.comsys.rwth-aachen.de/'
|
@@ -3435,7 +3593,7 @@
|
|
3435
3593
|
name: 'RWTH Aachen University'
|
3436
3594
|
url: 'https://www.comsys.rwth-aachen.de/'
|
3437
3595
|
|
3438
|
-
- regex: 'newspaper/
|
3596
|
+
- regex: 'newspaper/[\d.]+'
|
3439
3597
|
name: 'Scraping Robot'
|
3440
3598
|
category: 'Crawler'
|
3441
3599
|
url: 'https://scrapingrobot.com/'
|
@@ -3443,7 +3601,7 @@
|
|
3443
3601
|
name: 'Sprious LLC'
|
3444
3602
|
url: 'https://sprious.com/'
|
3445
3603
|
|
3446
|
-
- regex: 'GPTBot/
|
3604
|
+
- regex: 'GPTBot/[\d.]+'
|
3447
3605
|
name: 'GPTBot'
|
3448
3606
|
category: 'Crawler'
|
3449
3607
|
url: 'https://platform.openai.com/docs/gptbot'
|
@@ -3451,7 +3609,7 @@
|
|
3451
3609
|
name: 'OpenAI OpCo, LLC'
|
3452
3610
|
url: 'https://openai.com/'
|
3453
3611
|
|
3454
|
-
- regex: 'Ant
|
3612
|
+
- regex: 'Ant(?:\.com beta|Bot)(?:/([\d+.]+))?'
|
3455
3613
|
name: 'Ant'
|
3456
3614
|
category: 'Crawler'
|
3457
3615
|
url: 'https://www.ant.com/'
|
@@ -3459,7 +3617,7 @@
|
|
3459
3617
|
name: 'Ant.com Ltd.'
|
3460
3618
|
url: 'https://www.ant.com/'
|
3461
3619
|
|
3462
|
-
- regex: 'WebwikiBot/
|
3620
|
+
- regex: 'WebwikiBot/[\d.]+'
|
3463
3621
|
name: 'Webwiki'
|
3464
3622
|
category: 'Crawler'
|
3465
3623
|
url: 'https://www.webwiki.com/'
|
@@ -3472,7 +3630,7 @@
|
|
3472
3630
|
category: 'Service Agent'
|
3473
3631
|
url: 'https://www.phpmyadmin.net/'
|
3474
3632
|
|
3475
|
-
- regex: 'Matomo/
|
3633
|
+
- regex: 'Matomo/[\d.]+'
|
3476
3634
|
name: 'Matomo'
|
3477
3635
|
category: 'Service Agent'
|
3478
3636
|
url: 'https://github.com/matomo-org/matomo'
|
@@ -3480,7 +3638,7 @@
|
|
3480
3638
|
name: 'InnoCraft Ltd'
|
3481
3639
|
url: 'https://matomo.org/'
|
3482
3640
|
|
3483
|
-
- regex: 'Prometheus/
|
3641
|
+
- regex: 'Prometheus/[\d.]+'
|
3484
3642
|
name: 'Prometheus'
|
3485
3643
|
category: 'Service Agent'
|
3486
3644
|
url: 'https://github.com/prometheus/prometheus'
|
@@ -3496,7 +3654,7 @@
|
|
3496
3654
|
name: 'ArchiveTeam'
|
3497
3655
|
url: 'https://wiki.archiveteam.org/'
|
3498
3656
|
|
3499
|
-
- regex: 'MADBbot/
|
3657
|
+
- regex: 'MADBbot/[\d.]+'
|
3500
3658
|
name: 'MADBbot'
|
3501
3659
|
category: 'Crawler'
|
3502
3660
|
url: 'https://madb.zapto.org/bot.html'
|
@@ -3508,6 +3666,877 @@
|
|
3508
3666
|
name: 'Meltwater Deutschland GmbH'
|
3509
3667
|
url: 'https://www.meltwater.com/'
|
3510
3668
|
|
3669
|
+
- regex: '(?:Owler@ows\.eu|OWLer)/[\d.]+'
|
3670
|
+
name: 'OWLer'
|
3671
|
+
category: 'Crawler'
|
3672
|
+
url: 'https://openwebsearch.eu/owler/'
|
3673
|
+
producer:
|
3674
|
+
name: 'Open Search Foundation e.V.'
|
3675
|
+
url: 'https://openwebsearch.eu/'
|
3676
|
+
|
3677
|
+
- regex: 'bbc\.co\.uk/display/men/Page\+Monitor'
|
3678
|
+
name: 'BBC Page Monitor'
|
3679
|
+
category: 'Site Monitor'
|
3680
|
+
url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor'
|
3681
|
+
producer:
|
3682
|
+
name: 'BBC'
|
3683
|
+
url: 'https://www.bbc.com/'
|
3684
|
+
|
3685
|
+
- regex: 'BBC-Forge-URL-Monitor-Twisted'
|
3686
|
+
name: 'BBC Forge URL Monitor'
|
3687
|
+
category: 'Site Monitor'
|
3688
|
+
url: 'https://www.bbc.com/'
|
3689
|
+
producer:
|
3690
|
+
name: 'BBC'
|
3691
|
+
url: 'https://www.bbc.com/'
|
3692
|
+
|
3693
|
+
- regex: 'ClaudeBot'
|
3694
|
+
name: 'ClaudeBot'
|
3695
|
+
category: 'Crawler'
|
3696
|
+
url: 'https://github.com/ClaudeBot/ClaudeBot'
|
3697
|
+
|
3698
|
+
- regex: 'Imagesift'
|
3699
|
+
name: 'ImageSift'
|
3700
|
+
category: 'Crawler'
|
3701
|
+
url: 'https://imagesift.com/'
|
3702
|
+
producer:
|
3703
|
+
name: 'Castle Global, Inc.'
|
3704
|
+
url: 'https://thehive.ai/'
|
3705
|
+
|
3706
|
+
- regex: 'TactiScout'
|
3707
|
+
name: 'TactiScout'
|
3708
|
+
category: 'Crawler'
|
3709
|
+
url: 'https://find-it.world/TempCrawl/Crawltheque.php'
|
3710
|
+
producer:
|
3711
|
+
name: 'Tactikast'
|
3712
|
+
|
3713
|
+
- regex: 'Brightbot ([\d+.]+)'
|
3714
|
+
name: 'BrightBot'
|
3715
|
+
category: 'Crawler'
|
3716
|
+
url: 'https://www.brightbot.app/'
|
3717
|
+
producer:
|
3718
|
+
name: 'Bright Interactive Ltd'
|
3719
|
+
url: 'https://www.builtbybright.com/'
|
3720
|
+
|
3721
|
+
- regex: 'DaspeedBot/([\d+.]+)'
|
3722
|
+
name: 'DaspeedBot'
|
3723
|
+
category: 'Crawler'
|
3724
|
+
url: 'https://daspeed.io/'
|
3725
|
+
producer:
|
3726
|
+
name: 'DAWAP SARL'
|
3727
|
+
url: 'https://dawap.fr/'
|
3728
|
+
|
3729
|
+
- regex: 'StractBot(?:/([\d+.]+))?'
|
3730
|
+
name: 'Stract'
|
3731
|
+
category: 'Crawler'
|
3732
|
+
url: 'https://stract.com/webmasters'
|
3733
|
+
producer:
|
3734
|
+
name: 'Stract'
|
3735
|
+
url: 'https://github.com/StractOrg/stract/'
|
3736
|
+
|
3737
|
+
- regex: 'GeedoBot(?:/([\d+.]+))?'
|
3738
|
+
name: 'GeedoBot'
|
3739
|
+
category: 'Crawler'
|
3740
|
+
url: 'https://geedo.com/bot/'
|
3741
|
+
|
3742
|
+
- regex: 'GeedoProductSearch'
|
3743
|
+
name: 'GeedoProductSearch'
|
3744
|
+
category: 'Crawler'
|
3745
|
+
url: 'https://geedo.com/product-search/'
|
3746
|
+
|
3747
|
+
- regex: 'BackupLand(?:/([\d+.]+))?'
|
3748
|
+
name: 'BackupLand'
|
3749
|
+
category: 'Crawler'
|
3750
|
+
url: 'https://go.backupland.com/'
|
3751
|
+
producer:
|
3752
|
+
name: 'ООО «КВАРТА»'
|
3753
|
+
url: 'https://go.backupland.com/'
|
3754
|
+
|
3755
|
+
- regex: 'Konturbot(?:/([\d+.]+))?'
|
3756
|
+
name: 'Konturbot'
|
3757
|
+
category: 'Crawler'
|
3758
|
+
url: 'https://kontur.ru/'
|
3759
|
+
producer:
|
3760
|
+
name: 'АО «ПФ «СКБ Контур»'
|
3761
|
+
url: 'https://kontur.ru/'
|
3762
|
+
|
3763
|
+
- regex: 'keys-so-bot'
|
3764
|
+
name: 'Keys.so'
|
3765
|
+
category: 'Crawler'
|
3766
|
+
url: 'https://www.keys.so/'
|
3767
|
+
producer:
|
3768
|
+
name: 'ООО «МОДЕСКО»'
|
3769
|
+
url: 'https://www.modesco.ru/'
|
3770
|
+
|
3771
|
+
- regex: 'LetsearchBot(?:/([\d+.]+))?'
|
3772
|
+
name: 'LetSearch'
|
3773
|
+
category: 'Crawler'
|
3774
|
+
url: 'https://letsearch.ru/bots'
|
3775
|
+
|
3776
|
+
- regex: 'Example3(?:/([\d+.]+))?'
|
3777
|
+
name: 'Example3'
|
3778
|
+
category: 'Crawler'
|
3779
|
+
url: 'https://www.example3.com/'
|
3780
|
+
|
3781
|
+
- regex: 'StatOnlineRuBot(?:/([\d+.]+))?'
|
3782
|
+
name: 'StatOnline.ru'
|
3783
|
+
category: 'Crawler'
|
3784
|
+
url: 'https://statonline.ru/'
|
3785
|
+
producer:
|
3786
|
+
name: 'ООО «Регистратор доменных имен РЕГ.РУ»'
|
3787
|
+
url: 'https://statonline.ru/'
|
3788
|
+
|
3789
|
+
- regex: 'Spawning-AI'
|
3790
|
+
name: 'Spawning AI'
|
3791
|
+
category: 'Crawler'
|
3792
|
+
url: 'https://spawning.ai/'
|
3793
|
+
producer:
|
3794
|
+
name: 'Spawning, Inc'
|
3795
|
+
url: 'https://spawning.ai/'
|
3796
|
+
|
3797
|
+
- regex: 'domain research project'
|
3798
|
+
name: 'Domain Research Project'
|
3799
|
+
category: 'Crawler'
|
3800
|
+
url: 'https://trentwil.es/domains.html'
|
3801
|
+
producer:
|
3802
|
+
name: 'Trent Wiles'
|
3803
|
+
url: 'https://trentwil.es/'
|
3804
|
+
|
3805
|
+
- regex: 'getodin\.com'
|
3806
|
+
name: 'Odin'
|
3807
|
+
category: 'Security Checker'
|
3808
|
+
url: 'https://docs.getodin.com/'
|
3809
|
+
producer:
|
3810
|
+
name: 'Cyble Inc.'
|
3811
|
+
url: 'https://cyble.com/'
|
3812
|
+
|
3813
|
+
- regex: 'YouBot'
|
3814
|
+
name: 'YouBot'
|
3815
|
+
category: 'Crawler'
|
3816
|
+
url: 'https://about.you.com/youbot/'
|
3817
|
+
producer:
|
3818
|
+
name: 'SuSea, Inc.'
|
3819
|
+
url: 'https://you.com/'
|
3820
|
+
|
3821
|
+
- regex: 'SiteScoreBot'
|
3822
|
+
name: 'SiteScore'
|
3823
|
+
category: 'Crawler'
|
3824
|
+
url: 'https://sitescore.ai/'
|
3825
|
+
|
3826
|
+
- regex: 'MBCrawler'
|
3827
|
+
name: 'Monitor Backlinks'
|
3828
|
+
category: 'Crawler'
|
3829
|
+
url: 'https://www.seoptimer.com/monitor-backlinks/'
|
3830
|
+
producer:
|
3831
|
+
name: 'SEOptimer'
|
3832
|
+
url: 'https://www.seoptimer.com/'
|
3833
|
+
|
3834
|
+
- regex: 'mariadb-mysql-kbs-bot'
|
3835
|
+
name: 'MariaDB/MySQL Knowledge Base'
|
3836
|
+
category: 'Crawler'
|
3837
|
+
url: 'https://github.com/williamdes/mariadb-mysql-kbs'
|
3838
|
+
producer:
|
3839
|
+
name: 'WDES SAS'
|
3840
|
+
url: 'https://wdes.fr/en/'
|
3841
|
+
|
3842
|
+
- regex: 'GitHubCopilotChat'
|
3843
|
+
name: 'GitHubCopilotChat'
|
3844
|
+
category: 'Crawler'
|
3845
|
+
url: 'https://github.com/aaamoon/copilot-gpt4-service'
|
3846
|
+
|
3847
|
+
- regex: '^pdrl\.fm'
|
3848
|
+
name: 'Podroll Analyzer'
|
3849
|
+
category: 'Crawler'
|
3850
|
+
url: 'https://podroll.fm'
|
3851
|
+
|
3852
|
+
- regex: 'PodUptime/'
|
3853
|
+
name: 'PodUptime'
|
3854
|
+
category: 'Site Monitor'
|
3855
|
+
url: 'https://poduptime.com'
|
3856
|
+
|
3857
|
+
- regex: 'anthropic-ai'
|
3858
|
+
name: 'Anthropic AI'
|
3859
|
+
category: 'Crawler'
|
3860
|
+
url: 'https://www.anthropic.com/'
|
3861
|
+
producer:
|
3862
|
+
name: 'Anthropic, PBC'
|
3863
|
+
url: 'https://www.anthropic.com/'
|
3864
|
+
|
3865
|
+
- regex: 'NetpeakCheckerBot/[\d.]+'
|
3866
|
+
name: 'Netpeak Checker'
|
3867
|
+
category: 'Crawler'
|
3868
|
+
url: 'https://netpeaksoftware.com/checker'
|
3869
|
+
producer:
|
3870
|
+
name: 'Netpeak LTD'
|
3871
|
+
url: 'https://netpeaksoftware.com/'
|
3872
|
+
|
3873
|
+
- regex: 'SandobaCrawler/[\d.]+'
|
3874
|
+
name: 'Sandoba//Crawler'
|
3875
|
+
category: 'Crawler'
|
3876
|
+
url: 'https://www.sandoba.com/en/crawler/'
|
3877
|
+
producer:
|
3878
|
+
name: 'SANDOBA//EBUSINESS SOLUTIONS'
|
3879
|
+
url: 'https://www.sandoba.com/'
|
3880
|
+
|
3881
|
+
- regex: 'SirdataBot'
|
3882
|
+
name: 'Sirdata'
|
3883
|
+
category: 'Crawler'
|
3884
|
+
url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction'
|
3885
|
+
producer:
|
3886
|
+
name: 'Sirdata SAS'
|
3887
|
+
url: 'https://www.sirdata.com/'
|
3888
|
+
|
3889
|
+
- regex: 'CheckMarkNetwork/[\d.]+'
|
3890
|
+
name: 'CheckMark Network'
|
3891
|
+
category: 'Crawler'
|
3892
|
+
url: 'https://www.checkmarknetwork.com/spider.html/'
|
3893
|
+
producer:
|
3894
|
+
name: 'Exipert, Inc.'
|
3895
|
+
url: 'https://www.checkmarknetwork.com/'
|
3896
|
+
|
3897
|
+
- regex: 'cohere-ai'
|
3898
|
+
name: 'Cohere AI'
|
3899
|
+
category: 'Crawler'
|
3900
|
+
url: 'https://cohere.com/'
|
3901
|
+
producer:
|
3902
|
+
name: 'Cohere, Inc.'
|
3903
|
+
url: 'https://cohere.com/'
|
3904
|
+
|
3905
|
+
- regex: 'PerplexityBot/[\d.]+'
|
3906
|
+
name: 'PerplexityBot'
|
3907
|
+
category: 'Crawler'
|
3908
|
+
url: 'https://docs.perplexity.ai/docs/perplexitybot'
|
3909
|
+
producer:
|
3910
|
+
name: 'Perplexity AI, Inc.'
|
3911
|
+
url: 'https://www.perplexity.ai/'
|
3912
|
+
|
3913
|
+
- regex: 'TTD-Content'
|
3914
|
+
name: 'The Trade Desk Content'
|
3915
|
+
category: 'Crawler'
|
3916
|
+
url: 'https://www.thetradedesk.com/us/ttd-content'
|
3917
|
+
producer:
|
3918
|
+
name: 'The Trade Desk, Inc.'
|
3919
|
+
url: 'https://www.thetradedesk.com/'
|
3920
|
+
|
3921
|
+
- regex: 'montastic-monitor'
|
3922
|
+
name: 'Montastic Monitor'
|
3923
|
+
category: 'Site Monitor'
|
3924
|
+
url: 'https://www.montastic.com/'
|
3925
|
+
producer:
|
3926
|
+
name: 'Metadot, Corp.'
|
3927
|
+
url: 'https://www.metadot.com/'
|
3928
|
+
|
3929
|
+
- regex: 'Ruby, Twurly v[\d.]+'
|
3930
|
+
name: 'Twurly'
|
3931
|
+
category: 'Crawler'
|
3932
|
+
url: 'https://twurly.org/'
|
3933
|
+
|
3934
|
+
- regex: 'Mixnode(?:(?:Cache)?/[\d.]+)?'
|
3935
|
+
name: 'Mixnode'
|
3936
|
+
category: 'Crawler'
|
3937
|
+
url: 'https://www.mixnode.com/'
|
3938
|
+
producer:
|
3939
|
+
name: 'Mixnode Technologies, Inc.'
|
3940
|
+
url: 'https://www.mixnode.com/'
|
3941
|
+
|
3942
|
+
- regex: 'CSSCheck/[\d.]+'
|
3943
|
+
name: 'CSSCheck'
|
3944
|
+
category: 'Validator'
|
3945
|
+
|
3946
|
+
- regex: 'MicrosoftPreview/[\d.]+'
|
3947
|
+
name: 'Microsoft Preview'
|
3948
|
+
category: 'Service Agent'
|
3949
|
+
url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0'
|
3950
|
+
producer:
|
3951
|
+
name: 'Microsoft Corporation'
|
3952
|
+
url: 'https://www.microsoft.com/'
|
3953
|
+
|
3954
|
+
- regex: 's~virustotalcloud'
|
3955
|
+
name: 'VirusTotal Cloud'
|
3956
|
+
category: 'Crawler'
|
3957
|
+
url: 'https://www.virustotal.com/'
|
3958
|
+
producer:
|
3959
|
+
name: 'Chronicle Security Ireland Limited'
|
3960
|
+
url: 'https://chronicle.security/'
|
3961
|
+
|
3962
|
+
- regex: 'TinEye/[\d.]+'
|
3963
|
+
name: 'TinEye'
|
3964
|
+
category: 'Crawler'
|
3965
|
+
url: 'https://tineye.com/'
|
3966
|
+
producer:
|
3967
|
+
name: 'Idée, Inc.'
|
3968
|
+
url: 'https://tineye.com/'
|
3969
|
+
|
3970
|
+
- regex: 'e~arsnova-filter-system'
|
3971
|
+
name: 'ARSNova Filter System'
|
3972
|
+
category: 'Crawler'
|
3973
|
+
url: 'https://particify.de/en/'
|
3974
|
+
producer:
|
3975
|
+
name: 'Particify Gerhardt & Weingarten OHG'
|
3976
|
+
url: 'https://particify.de/en/'
|
3977
|
+
|
3978
|
+
- regex: 'botify'
|
3979
|
+
name: 'Botify'
|
3980
|
+
category: 'Crawler'
|
3981
|
+
url: 'https://www.botify.com/'
|
3982
|
+
producer:
|
3983
|
+
name: 'BOTIFY SAS'
|
3984
|
+
url: 'https://www.botify.com/'
|
3985
|
+
|
3986
|
+
- regex: 'adscanner'
|
3987
|
+
name: 'Adscanner'
|
3988
|
+
category: 'Crawler'
|
3989
|
+
url: 'https://www.alleyesonscreens.com/'
|
3990
|
+
producer:
|
3991
|
+
name: 'AdScanner d.o.o'
|
3992
|
+
url: 'https://www.alleyesonscreens.com/'
|
3993
|
+
|
3994
|
+
- regex: 'online-webceo-bot/[\d.]+'
|
3995
|
+
name: 'WebCEO'
|
3996
|
+
category: 'Crawler'
|
3997
|
+
url: 'https://www.webceo.com/'
|
3998
|
+
producer:
|
3999
|
+
name: 'WebCEO, LLC'
|
4000
|
+
url: 'https://www.webceo.com/'
|
4001
|
+
|
4002
|
+
- regex: 'NetTrack'
|
4003
|
+
name: 'NetTrack'
|
4004
|
+
category: 'Crawler'
|
4005
|
+
url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/'
|
4006
|
+
|
4007
|
+
- regex: 'htmlyse'
|
4008
|
+
name: 'htmlyse'
|
4009
|
+
category: 'Crawler'
|
4010
|
+
url: 'https://www.htmlyse.com/'
|
4011
|
+
producer:
|
4012
|
+
name: 'Vistex LTD'
|
4013
|
+
url: 'https://www.htmlyse.com/'
|
4014
|
+
|
4015
|
+
- regex: 'TrendsmapResolver/[\d.]+'
|
4016
|
+
name: 'Trendsmap'
|
4017
|
+
category: 'Crawler'
|
4018
|
+
url: 'https://www.trendsmap.com/'
|
4019
|
+
producer:
|
4020
|
+
name: 'Trendsmap Pty Ltd'
|
4021
|
+
url: 'https://www.trendsmap.com/'
|
4022
|
+
|
4023
|
+
- regex: 'Shareaholic(?:bot)?/[\d.]+'
|
4024
|
+
name: 'Steve Bot'
|
4025
|
+
category: 'Crawler'
|
4026
|
+
url: 'https://www.shareaholic.com/steve'
|
4027
|
+
producer:
|
4028
|
+
name: 'Shareaholic, Inc.'
|
4029
|
+
url: 'https://www.shareaholic.com/'
|
4030
|
+
|
4031
|
+
- regex: 'keycdn-tools:'
|
4032
|
+
name: 'KeyCDN Tools'
|
4033
|
+
category: 'Service Agent'
|
4034
|
+
url: 'https://tools.keycdn.com/geo'
|
4035
|
+
|
4036
|
+
- regex: 'keycdn-tools/'
|
4037
|
+
name: 'KeyCDN Tools'
|
4038
|
+
category: 'Service Agent'
|
4039
|
+
url: 'https://tools.keycdn.com/'
|
4040
|
+
producer:
|
4041
|
+
name: 'proinity LLC'
|
4042
|
+
url: 'https://www.keycdn.com/'
|
4043
|
+
|
4044
|
+
- regex: 'Arquivo-web-crawler'
|
4045
|
+
name: 'Arquivo.pt'
|
4046
|
+
category: 'Crawler'
|
4047
|
+
url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/'
|
4048
|
+
producer:
|
4049
|
+
name: 'FCT|FCCN'
|
4050
|
+
url: 'https://www.fct.pt/'
|
4051
|
+
|
4052
|
+
- regex: 'WhatsMyIP\.org'
|
4053
|
+
name: 'WhatsMyIP.org'
|
4054
|
+
category: 'Service Agent'
|
4055
|
+
url: 'https://www.whatsmyip.org/ua/'
|
4056
|
+
|
4057
|
+
- regex: 'SenutoBot/[\d.]+'
|
4058
|
+
name: 'Senuto'
|
4059
|
+
category: 'Crawler'
|
4060
|
+
url: 'https://www.senuto.com/'
|
4061
|
+
producer:
|
4062
|
+
name: 'Senuto Sp. z o.o.'
|
4063
|
+
url: 'https://www.senuto.com/'
|
4064
|
+
|
4065
|
+
- regex: 'spaziodati'
|
4066
|
+
name: 'SpazioDati'
|
4067
|
+
category: 'Crawler'
|
4068
|
+
url: 'https://www.spaziodati.eu/'
|
4069
|
+
producer:
|
4070
|
+
name: 'SpazioDati s.r.l.'
|
4071
|
+
url: 'https://www.spaziodati.eu/'
|
4072
|
+
|
4073
|
+
- regex: 'GozleBot'
|
4074
|
+
name: 'Gozle'
|
4075
|
+
category: 'Crawler'
|
4076
|
+
url: 'https://gozle.com.tm/en/blog/post/1'
|
4077
|
+
producer:
|
4078
|
+
name: 'Doly Horjun HJ'
|
4079
|
+
url: 'https://gozle.com.tm/'
|
4080
|
+
|
4081
|
+
- regex: 'Quantcastbot/[\d.]+'
|
4082
|
+
name: 'Quantcast'
|
4083
|
+
category: 'Crawler'
|
4084
|
+
url: 'https://www.quantcast.com/bot/'
|
4085
|
+
producer:
|
4086
|
+
name: 'Quantcast Corp.'
|
4087
|
+
url: 'https://www.quantcast.com/'
|
4088
|
+
|
4089
|
+
- regex: 'FontRadar'
|
4090
|
+
name: 'FontRadar'
|
4091
|
+
category: 'Crawler'
|
4092
|
+
url: 'https://www.fontradar.com/'
|
4093
|
+
producer:
|
4094
|
+
name: 'EMDASH SAS'
|
4095
|
+
url: 'https://www.fontradar.com/'
|
4096
|
+
|
4097
|
+
- regex: 'ViberUrlDownloader'
|
4098
|
+
name: 'Viber Url Downloader'
|
4099
|
+
category: 'Service Agent'
|
4100
|
+
url: 'https://www.viber.com/'
|
4101
|
+
producer:
|
4102
|
+
name: 'Viber Media S.à r.l.'
|
4103
|
+
url: 'https://www.viber.com/'
|
4104
|
+
|
4105
|
+
- regex: '^Zeno$'
|
4106
|
+
name: 'Zeno'
|
4107
|
+
category: 'Crawler'
|
4108
|
+
url: 'https://github.com/internetarchive/Zeno'
|
4109
|
+
producer:
|
4110
|
+
name: 'The Internet Archive'
|
4111
|
+
url: 'https://archive.org/'
|
4112
|
+
|
4113
|
+
- regex: 'Barracuda Sentinel'
|
4114
|
+
name: 'Barracuda Sentinel'
|
4115
|
+
category: 'Service Agent'
|
4116
|
+
url: 'https://sentinel.barracudanetworks.com/'
|
4117
|
+
producer:
|
4118
|
+
name: 'Barracuda Networks, Inc.'
|
4119
|
+
url: 'https://www.barracudanetworks.com/'
|
4120
|
+
|
4121
|
+
- regex: 'RuxitSynthetic/[\d.]+'
|
4122
|
+
name: 'RuxitSynthetic'
|
4123
|
+
category: 'Site Monitor'
|
4124
|
+
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
|
4125
|
+
producer:
|
4126
|
+
name: 'Dynatrace LLC'
|
4127
|
+
url: 'https://www.dynatrace.com/'
|
4128
|
+
|
4129
|
+
- regex: 'DynatraceSynthetic/[\d.]+'
|
4130
|
+
name: 'DynatraceSynthetic'
|
4131
|
+
category: 'Site Monitor'
|
4132
|
+
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
|
4133
|
+
producer:
|
4134
|
+
name: 'Dynatrace LLC'
|
4135
|
+
url: 'https://www.dynatrace.com/'
|
4136
|
+
|
4137
|
+
- regex: 'sitebulb'
|
4138
|
+
name: 'Sitebulb'
|
4139
|
+
category: 'Crawler'
|
4140
|
+
url: 'https://sitebulb.com/'
|
4141
|
+
producer:
|
4142
|
+
name: 'Sitebulb Limited'
|
4143
|
+
url: 'https://sitebulb.com/'
|
4144
|
+
|
4145
|
+
- regex: 'Monsidobot/[\d.]+'
|
4146
|
+
name: 'Monsidobot'
|
4147
|
+
category: 'Crawler'
|
4148
|
+
url: 'https://monsido.com/bot-html'
|
4149
|
+
producer:
|
4150
|
+
name: 'Monsido LLC'
|
4151
|
+
url: 'https://monsido.com/'
|
4152
|
+
|
4153
|
+
- regex: 'AccompanyBot'
|
4154
|
+
name: 'AccompanyBot'
|
4155
|
+
category: 'Crawler'
|
4156
|
+
url: 'https://www.accompany.com/'
|
4157
|
+
producer:
|
4158
|
+
name: 'Accompani, Inc'
|
4159
|
+
url: 'https://www.accompany.com/'
|
4160
|
+
|
4161
|
+
- regex: 'Ghost Inspector'
|
4162
|
+
name: 'Ghost Inspector'
|
4163
|
+
category: 'Site Monitor'
|
4164
|
+
url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site'
|
4165
|
+
producer:
|
4166
|
+
name: 'Ghost Inspector, Inc.'
|
4167
|
+
url: 'https://www.ghostinspector.com/'
|
4168
|
+
|
4169
|
+
- regex: 'Cypress/[\d.]+'
|
4170
|
+
name: 'Cypress'
|
4171
|
+
category: 'Site Monitor'
|
4172
|
+
url: 'https://github.com/cypress-io/cypress'
|
4173
|
+
producer:
|
4174
|
+
name: 'Cypress.io, Inc.'
|
4175
|
+
url: 'https://www.cypress.io/'
|
4176
|
+
|
4177
|
+
- regex: 'Google-Apps-Script'
|
4178
|
+
name: 'Google Apps Script'
|
4179
|
+
category: 'Service Agent'
|
4180
|
+
url: 'https://www.google.com/script/start/'
|
4181
|
+
|
4182
|
+
- regex: 'SiteOne-Crawler/[\d.]+'
|
4183
|
+
name: 'SiteOne Crawler'
|
4184
|
+
category: 'Crawler'
|
4185
|
+
url: 'https://crawler.siteone.io/bot/'
|
4186
|
+
producer:
|
4187
|
+
name: 'SiteOne s.r.o.'
|
4188
|
+
url: 'https://www.siteone.io/'
|
4189
|
+
|
4190
|
+
- regex: 'Detectify'
|
4191
|
+
name: 'Detectify'
|
4192
|
+
category: 'Security Checker'
|
4193
|
+
url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site'
|
4194
|
+
producer:
|
4195
|
+
name: 'Detectify AB'
|
4196
|
+
url: 'https://detectify.com/'
|
4197
|
+
|
4198
|
+
- regex: 'DomCopBot'
|
4199
|
+
name: 'DomCop Bot'
|
4200
|
+
category: 'Crawler'
|
4201
|
+
url: 'https://www.domcop.com/bot'
|
4202
|
+
producer:
|
4203
|
+
name: 'Axeman Technology Solutions LLP'
|
4204
|
+
url: 'https://axemantech.com/'
|
4205
|
+
|
4206
|
+
- regex: 'Paqlebot/[\d.]+'
|
4207
|
+
name: 'Paqlebot'
|
4208
|
+
category: 'Crawler'
|
4209
|
+
url: 'https://www.paqle.dk/about/paqlebot'
|
4210
|
+
producer:
|
4211
|
+
name: 'Paqle A/S'
|
4212
|
+
url: 'https://www.paqle.dk/'
|
4213
|
+
|
4214
|
+
- regex: 'Wibybot'
|
4215
|
+
name: 'Wibybot'
|
4216
|
+
category: 'Crawler'
|
4217
|
+
url: 'https://www.wiby.me/'
|
4218
|
+
|
4219
|
+
- regex: 'Synapse'
|
4220
|
+
name: 'Synapse'
|
4221
|
+
category: 'Crawler'
|
4222
|
+
url: 'https://github.com/matrix-org/synapse'
|
4223
|
+
|
4224
|
+
- regex: 'OSZKbot/[\d.]+'
|
4225
|
+
name: 'OSZKbot'
|
4226
|
+
category: 'Crawler'
|
4227
|
+
url: 'http://mekosztaly.oszk.hu/mia/'
|
4228
|
+
producer:
|
4229
|
+
name: 'National Szechenyi Library'
|
4230
|
+
url: 'https://webarchivum.oszk.hu/'
|
4231
|
+
|
4232
|
+
- regex: 'ZoomBot'
|
4233
|
+
name: 'ZoomBot'
|
4234
|
+
category: 'Crawler'
|
4235
|
+
url: 'https://suite.seozoom.it/bot.html'
|
4236
|
+
producer:
|
4237
|
+
name: 'SEO Cube S.r.l.'
|
4238
|
+
url: 'https://www.seocube.it/'
|
4239
|
+
|
4240
|
+
- regex: 'RavenCrawler/[\d.]+'
|
4241
|
+
name: 'RavenCrawler'
|
4242
|
+
category: 'Crawler'
|
4243
|
+
url: 'https://raventools.com/site-auditor/'
|
4244
|
+
producer:
|
4245
|
+
name: 'TapClicks, Inc.'
|
4246
|
+
url: 'https://www.tapclicks.com/'
|
4247
|
+
|
4248
|
+
- regex: 'KadoBot'
|
4249
|
+
name: 'KadoBot'
|
4250
|
+
category: 'Crawler'
|
4251
|
+
url: 'https://www.kadolijst.nl/bot'
|
4252
|
+
producer:
|
4253
|
+
name: 'Kadolijst'
|
4254
|
+
url: 'https://www.kadolijst.nl/'
|
4255
|
+
|
4256
|
+
- regex: 'Dubbotbot/[\d.]+'
|
4257
|
+
name: 'Dubbotbot'
|
4258
|
+
category: 'Crawler'
|
4259
|
+
url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent'
|
4260
|
+
producer:
|
4261
|
+
name: 'DubBot'
|
4262
|
+
url: 'https://dubbot.com/'
|
4263
|
+
|
4264
|
+
- regex: 'Swiftbot/[\d.]+'
|
4265
|
+
name: 'Swiftbot'
|
4266
|
+
category: 'Crawler'
|
4267
|
+
url: 'https://swiftype.com/swiftbot'
|
4268
|
+
producer:
|
4269
|
+
name: 'Elasticsearch, B.V.'
|
4270
|
+
url: 'https://www.elastic.co/'
|
4271
|
+
|
4272
|
+
- regex: 'EyeMonIT'
|
4273
|
+
name: 'EyeMonit'
|
4274
|
+
category: 'Site Monitor'
|
4275
|
+
url: 'https://eyemonit.com/'
|
4276
|
+
producer:
|
4277
|
+
name: 'EyeMonit'
|
4278
|
+
url: 'https://eyemonit.com/'
|
4279
|
+
|
4280
|
+
- regex: 'ThousandEyes'
|
4281
|
+
name: 'ThousandEyes'
|
4282
|
+
category: 'Site Monitor'
|
4283
|
+
url: 'https://www.thousandeyes.com/'
|
4284
|
+
producer:
|
4285
|
+
name: 'Cisco Systems, Inc.'
|
4286
|
+
url: 'https://www.cisco.com/'
|
4287
|
+
|
4288
|
+
- regex: 'OmtrBot/[\d.]+'
|
4289
|
+
name: 'OmtrBot'
|
4290
|
+
category: 'Site Monitor'
|
4291
|
+
|
4292
|
+
- regex: 'WebMon/[\d.]+'
|
4293
|
+
name: 'WebMon'
|
4294
|
+
category: 'Site Monitor'
|
4295
|
+
|
4296
|
+
- regex: 'AdsTxtCrawlerTP/[\d.]+'
|
4297
|
+
name: 'AdsTxtCrawlerTP'
|
4298
|
+
category: 'Crawler'
|
4299
|
+
|
4300
|
+
- regex: 'fragFINN'
|
4301
|
+
name: 'fragFINN'
|
4302
|
+
category: 'Crawler'
|
4303
|
+
url: 'https://www.fragfinn.de/'
|
4304
|
+
producer:
|
4305
|
+
name: 'fragFINN e.V.'
|
4306
|
+
url: 'https://www.fragfinn.de/'
|
4307
|
+
|
4308
|
+
- regex: 'Clickagy'
|
4309
|
+
name: 'Clickagy'
|
4310
|
+
category: 'Crawler'
|
4311
|
+
url: 'https://www.clickagy.com/'
|
4312
|
+
producer:
|
4313
|
+
name: 'Clickagy, LLC'
|
4314
|
+
url: 'https://www.clickagy.com/'
|
4315
|
+
|
4316
|
+
- regex: 'kiwitcms-gitops/[\d.]+'
|
4317
|
+
name: 'Kiwi TCMS GitOps'
|
4318
|
+
category: 'Service Agent'
|
4319
|
+
url: 'https://kiwitcms.org'
|
4320
|
+
producer:
|
4321
|
+
name: 'Open Technologies Bulgaria, Ltd.'
|
4322
|
+
url: 'https://kiwitcms.org'
|
4323
|
+
|
4324
|
+
- regex: 'webtru_crawler'
|
4325
|
+
name: 'webtru'
|
4326
|
+
category: 'Crawler'
|
4327
|
+
url: 'https://webtru.io/'
|
4328
|
+
producer:
|
4329
|
+
name: 'DataSign Inc.'
|
4330
|
+
url: 'https://datasign.jp/'
|
4331
|
+
|
4332
|
+
- regex: 'URLSuMaBot'
|
4333
|
+
name: 'URLSuMaBot'
|
4334
|
+
category: 'Crawler'
|
4335
|
+
url: 'https://www.urlsuma.de/'
|
4336
|
+
|
4337
|
+
- regex: '360JK yunjiankong'
|
4338
|
+
name: '360JK'
|
4339
|
+
category: 'Site Monitor'
|
4340
|
+
url: 'http://jk.cloud.360.cn/'
|
4341
|
+
producer:
|
4342
|
+
name: '360 Security Technology Inc.'
|
4343
|
+
url: 'https://www.360.cn/'
|
4344
|
+
|
4345
|
+
- regex: 'UCSBNetworkMeasurement'
|
4346
|
+
name: 'UCSB Network Measurement'
|
4347
|
+
category: 'Crawler'
|
4348
|
+
url: 'https://www.it.ucsb.edu/'
|
4349
|
+
producer:
|
4350
|
+
name: 'University of California, Santa Barbara'
|
4351
|
+
url: 'https://www.it.ucsb.edu/'
|
4352
|
+
|
4353
|
+
- regex: 'Plesk screenshot bot'
|
4354
|
+
name: 'Plesk Screenshot Service'
|
4355
|
+
category: 'Service Agent'
|
4356
|
+
url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service'
|
4357
|
+
producer:
|
4358
|
+
name: 'Plesk International GmbH'
|
4359
|
+
url: 'https://www.plesk.com/'
|
4360
|
+
|
4361
|
+
- regex: 'Who\.is'
|
4362
|
+
name: 'Who.is Bot'
|
4363
|
+
category: 'Crawler'
|
4364
|
+
url: 'https://who.is/'
|
4365
|
+
|
4366
|
+
- regex: 'Probely'
|
4367
|
+
name: 'Probely'
|
4368
|
+
category: 'Security Checker'
|
4369
|
+
url: 'https://probely.com/sos/'
|
4370
|
+
producer:
|
4371
|
+
name: 'Probely - Soluções de Cibersegurança, S.A.'
|
4372
|
+
url: 'https://probely.com/'
|
4373
|
+
|
4374
|
+
- regex: 'Uptimia(?:/[\d.]+)?'
|
4375
|
+
name: 'Uptimia'
|
4376
|
+
category: 'Site Monitor'
|
4377
|
+
url: 'https://www.uptimia.com/'
|
4378
|
+
producer:
|
4379
|
+
name: 'JJ Online GmbH'
|
4380
|
+
url: 'https://www.uptimia.com/'
|
4381
|
+
|
4382
|
+
- regex: '2GDPR/[\d.]+'
|
4383
|
+
name: '2GDPR'
|
4384
|
+
category: 'Service Agent'
|
4385
|
+
url: 'https://2gdpr.com/tos'
|
4386
|
+
producer:
|
4387
|
+
name: '2GDPR'
|
4388
|
+
url: 'https://2gdpr.com/'
|
4389
|
+
|
4390
|
+
- regex: 'abuse\.xmco\.fr'
|
4391
|
+
name: 'Serenety'
|
4392
|
+
category: 'Security Checker'
|
4393
|
+
url: 'https://abuse.xmco.fr/'
|
4394
|
+
producer:
|
4395
|
+
name: 'XMCO, SASU'
|
4396
|
+
url: 'https://www.xmco.fr/'
|
4397
|
+
|
4398
|
+
- regex: 'CheckHost'
|
4399
|
+
name: 'CheckHost'
|
4400
|
+
category: 'Site Monitor'
|
4401
|
+
url: 'https://check-host.net/'
|
4402
|
+
producer:
|
4403
|
+
name: 'CheckHost'
|
4404
|
+
url: 'https://check-host.net/'
|
4405
|
+
|
4406
|
+
- regex: 'LAC_IAHarvester/[\d.]+'
|
4407
|
+
name: 'LAC IA Harvester'
|
4408
|
+
category: 'Crawler'
|
4409
|
+
url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx'
|
4410
|
+
producer:
|
4411
|
+
name: 'Library and Archives Canada'
|
4412
|
+
url: 'https://library-archives.canada.ca/'
|
4413
|
+
|
4414
|
+
- regex: 'InsytfulBot/[\d.]+'
|
4415
|
+
name: 'InsytfulBot'
|
4416
|
+
category: 'Crawler'
|
4417
|
+
url: 'https://www.insytful.com/'
|
4418
|
+
producer:
|
4419
|
+
name: 'Zengenti Limited'
|
4420
|
+
url: 'https://www.zengenti.com/'
|
4421
|
+
|
4422
|
+
- regex: 'statista\.com'
|
4423
|
+
name: 'Statista'
|
4424
|
+
category: 'Crawler'
|
4425
|
+
url: 'https://www.statista.com/'
|
4426
|
+
producer:
|
4427
|
+
name: 'Statista, Inc.'
|
4428
|
+
url: 'https://www.statista.com/'
|
4429
|
+
|
4430
|
+
- regex: 'SubstackContentFetch/[\d.]+'
|
4431
|
+
name: 'Substack Content Fetch'
|
4432
|
+
category: 'Crawler'
|
4433
|
+
url: 'https://substack.com/'
|
4434
|
+
producer:
|
4435
|
+
name: 'Substack, Inc.'
|
4436
|
+
url: 'https://substack.com/'
|
4437
|
+
|
4438
|
+
- regex: '^ds9'
|
4439
|
+
name: 'Deep SEARCH 9'
|
4440
|
+
category: 'Crawler'
|
4441
|
+
url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/'
|
4442
|
+
producer:
|
4443
|
+
name: 'Copyright Clearance Center, Inc.'
|
4444
|
+
url: 'https://www.copyright.com/'
|
4445
|
+
|
4446
|
+
- regex: 'LiveJournal\.com'
|
4447
|
+
name: 'LiveJournal'
|
4448
|
+
url: 'https://www.livejournal.com/'
|
4449
|
+
category: 'Feed Fetcher'
|
4450
|
+
producer:
|
4451
|
+
name: 'ООО "СИМ"'
|
4452
|
+
url: 'https://www.livejournal.com/'
|
4453
|
+
|
4454
|
+
- regex: 'bitdiscovery'
|
4455
|
+
name: 'Tenable.asm'
|
4456
|
+
category: 'Security Checker'
|
4457
|
+
url: 'https://bitdiscovery.com/'
|
4458
|
+
producer:
|
4459
|
+
name: 'Tenable, Inc.'
|
4460
|
+
url: 'https://www.tenable.com/'
|
4461
|
+
|
4462
|
+
- regex: 'Castopod/[\d.]+'
|
4463
|
+
name: 'Castopod'
|
4464
|
+
category: 'Crawler'
|
4465
|
+
url: 'https://www.castopod.org/'
|
4466
|
+
|
4467
|
+
- regex: 'Elastic/Synthetics'
|
4468
|
+
name: 'Elastic Synthetics'
|
4469
|
+
category: 'Site Monitor'
|
4470
|
+
url: 'https://github.com/elastic/synthetics'
|
4471
|
+
producer:
|
4472
|
+
name: 'Elasticsearch B.V.'
|
4473
|
+
url: 'https://www.elastic.co/'
|
4474
|
+
|
4475
|
+
- regex: 'WDG_Validator/[\d.]+'
|
4476
|
+
name: 'WDG HTML Validator'
|
4477
|
+
category: 'Validator'
|
4478
|
+
url: 'http://www.htmlhelp.com/tools/validator/'
|
4479
|
+
|
4480
|
+
- regex: 'scan@aegis.network'
|
4481
|
+
name: 'Aegis'
|
4482
|
+
category: 'Crawler'
|
4483
|
+
url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/'
|
4484
|
+
|
4485
|
+
- regex: 'CrawlyProjectCrawler/[\d.]+'
|
4486
|
+
name: 'Crawly Project'
|
4487
|
+
category: 'Crawler'
|
4488
|
+
url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/'
|
4489
|
+
|
4490
|
+
- regex: 'BDFetch'
|
4491
|
+
name: 'BDFetch'
|
4492
|
+
category: 'Crawler'
|
4493
|
+
url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/'
|
4494
|
+
|
4495
|
+
- regex: 'PunkMap'
|
4496
|
+
name: 'Punk Map'
|
4497
|
+
category: 'Security Checker'
|
4498
|
+
url: 'https://github.com/openeasm/punkmap'
|
4499
|
+
|
4500
|
+
- regex: 'GenomeCrawlerd/[\d.]+'
|
4501
|
+
name: 'Deepfield Genome'
|
4502
|
+
category: 'Crawler'
|
4503
|
+
url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/'
|
4504
|
+
producer:
|
4505
|
+
name: 'Nokia Corporation'
|
4506
|
+
url: 'https://www.nokia.com/'
|
4507
|
+
|
4508
|
+
- regex: 'Gaisbot/[\d.]+'
|
4509
|
+
name: 'Gaisbot'
|
4510
|
+
category: 'Crawler'
|
4511
|
+
url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php'
|
4512
|
+
|
4513
|
+
- regex: 'FAST-WebCrawler/[\d.]+'
|
4514
|
+
name: 'AlltheWeb'
|
4515
|
+
category: 'Crawler'
|
4516
|
+
url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler'
|
4517
|
+
|
4518
|
+
- regex: 'ducks\.party'
|
4519
|
+
name: 'ducks.party'
|
4520
|
+
category: 'Security Checker'
|
4521
|
+
url: 'https://ducks.party/'
|
4522
|
+
|
4523
|
+
- regex: 'DepSpid/[\d.]+'
|
4524
|
+
name: 'DepSpid'
|
4525
|
+
category: 'Crawler'
|
4526
|
+
url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/'
|
4527
|
+
|
4528
|
+
- regex: 'Website-info\.net'
|
4529
|
+
name: 'Website-info'
|
4530
|
+
category: 'Crawler'
|
4531
|
+
url: 'https://website-info.net/robot'
|
4532
|
+
producer:
|
4533
|
+
name: 'Meins und Vogel GmbH'
|
4534
|
+
url: 'https://muv.com/'
|
4535
|
+
|
4536
|
+
# Generic bots
|
4537
|
+
- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|url|Zeus|ZmEu)$'
|
4538
|
+
name: 'Generic Bot'
|
4539
|
+
|
3511
4540
|
# Generic detections
|
3512
|
-
- regex: '[a-z0-
|
4541
|
+
- regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)'
|
3513
4542
|
name: 'Generic Bot'
|