device_detector 1.0.5 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +7 -6
  3. data/lib/device_detector/browser.rb +364 -0
  4. data/lib/device_detector/client.rb +8 -0
  5. data/lib/device_detector/device.rb +1124 -4
  6. data/lib/device_detector/os.rb +36 -11
  7. data/lib/device_detector/version.rb +1 -1
  8. data/lib/device_detector/version_extractor.rb +9 -0
  9. data/lib/device_detector.rb +57 -13
  10. data/regexes/bots.yml +551 -58
  11. data/regexes/client/browser_engine.yml +7 -1
  12. data/regexes/client/browsers.yml +594 -71
  13. data/regexes/client/feed_readers.yml +4 -10
  14. data/regexes/client/libraries.yml +47 -2
  15. data/regexes/client/mediaplayers.yml +25 -1
  16. data/regexes/client/mobile_apps.yml +447 -77
  17. data/regexes/client/pim.yml +49 -1
  18. data/regexes/device/cameras.yml +5 -5
  19. data/regexes/device/car_browsers.yml +16 -0
  20. data/regexes/device/consoles.yml +6 -0
  21. data/regexes/device/mobiles.yml +12905 -4561
  22. data/regexes/device/portable_media_player.yml +20 -3
  23. data/regexes/device/shell_tv.yml +117 -0
  24. data/regexes/device/televisions.yml +426 -35
  25. data/regexes/oss.yml +567 -139
  26. data/spec/device_detector/concrete_user_agent_spec.rb +30 -42
  27. data/spec/device_detector/detector_fixtures_spec.rb +9 -5
  28. data/spec/device_detector/device_spec.rb +26 -10
  29. data/spec/fixtures/client/browser.yml +1463 -391
  30. data/spec/fixtures/client/feed_reader.yml +0 -12
  31. data/spec/fixtures/client/library.yml +91 -37
  32. data/spec/fixtures/client/mediaplayer.yml +30 -0
  33. data/spec/fixtures/client/mobile_app.yml +498 -45
  34. data/spec/fixtures/client/pim.yml +60 -0
  35. data/spec/fixtures/detector/bots.yml +1189 -568
  36. data/spec/fixtures/detector/camera.yml +12 -26
  37. data/spec/fixtures/detector/car_browser.yml +151 -15
  38. data/spec/fixtures/detector/console.yml +70 -48
  39. data/spec/fixtures/detector/desktop.yml +2041 -916
  40. data/spec/fixtures/detector/feature_phone.yml +895 -189
  41. data/spec/fixtures/detector/feed_reader.yml +50 -77
  42. data/spec/fixtures/detector/mediaplayer.yml +79 -26
  43. data/spec/fixtures/detector/mobile_apps.yml +726 -72
  44. data/spec/fixtures/detector/peripheral.yml +271 -0
  45. data/spec/fixtures/detector/phablet.yml +3635 -1596
  46. data/spec/fixtures/detector/portable_media_player.yml +355 -46
  47. data/spec/fixtures/detector/smart_display.yml +183 -9
  48. data/spec/fixtures/detector/smart_speaker.yml +13 -8
  49. data/spec/fixtures/detector/smartphone-1.yml +4002 -4286
  50. data/spec/fixtures/detector/smartphone-10.yml +3771 -4763
  51. data/spec/fixtures/detector/smartphone-11.yml +3615 -4692
  52. data/spec/fixtures/detector/smartphone-12.yml +3856 -4764
  53. data/spec/fixtures/detector/smartphone-13.yml +4213 -4713
  54. data/spec/fixtures/detector/smartphone-14.yml +4039 -4497
  55. data/spec/fixtures/detector/smartphone-15.yml +5642 -2956
  56. data/spec/fixtures/detector/smartphone-16.yml +4739 -5082
  57. data/spec/fixtures/detector/smartphone-17.yml +4832 -4275
  58. data/spec/fixtures/detector/smartphone-18.yml +9806 -0
  59. data/spec/fixtures/detector/smartphone-19.yml +9965 -0
  60. data/spec/fixtures/detector/smartphone-2.yml +4842 -2589
  61. data/spec/fixtures/detector/smartphone-20.yml +9710 -0
  62. data/spec/fixtures/detector/smartphone-21.yml +8693 -0
  63. data/spec/fixtures/detector/smartphone-22.yml +10178 -0
  64. data/spec/fixtures/detector/smartphone-23.yml +9453 -0
  65. data/spec/fixtures/detector/smartphone-24.yml +9843 -0
  66. data/spec/fixtures/detector/smartphone-25.yml +9703 -0
  67. data/spec/fixtures/detector/smartphone-26.yml +10007 -0
  68. data/spec/fixtures/detector/smartphone-27.yml +4927 -0
  69. data/spec/fixtures/detector/smartphone-3.yml +4387 -4427
  70. data/spec/fixtures/detector/smartphone-4.yml +3597 -4582
  71. data/spec/fixtures/detector/smartphone-5.yml +4066 -5022
  72. data/spec/fixtures/detector/smartphone-6.yml +3455 -4621
  73. data/spec/fixtures/detector/smartphone-7.yml +3574 -4574
  74. data/spec/fixtures/detector/smartphone-8.yml +4617 -4704
  75. data/spec/fixtures/detector/smartphone-9.yml +4080 -5035
  76. data/spec/fixtures/detector/smartphone.yml +3244 -4234
  77. data/spec/fixtures/detector/tablet-1.yml +4652 -4492
  78. data/spec/fixtures/detector/tablet-2.yml +3515 -4434
  79. data/spec/fixtures/detector/tablet-3.yml +3418 -4351
  80. data/spec/fixtures/detector/tablet-4.yml +5149 -3200
  81. data/spec/fixtures/detector/tablet-5.yml +9273 -0
  82. data/spec/fixtures/detector/tablet-6.yml +4588 -0
  83. data/spec/fixtures/detector/tablet.yml +1621 -2613
  84. data/spec/fixtures/detector/tv-1.yml +2501 -0
  85. data/spec/fixtures/detector/tv.yml +7826 -3114
  86. data/spec/fixtures/detector/unknown.yml +370 -531
  87. data/spec/fixtures/detector/wearable.yml +863 -9
  88. data/spec/fixtures/parser/oss.yml +1350 -21
  89. data/spec/fixtures/parser/vendorfragments.yml +53 -53
  90. metadata +35 -5
data/regexes/bots.yml CHANGED
@@ -5,10 +5,10 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
- - regex: '360Spider(-Image|-Video)?'
8
+ - regex: '360Spider'
9
9
  name: '360Spider'
10
10
  category: 'Search bot'
11
- url: 'http://www.so.com/help/help_3_2.html'
11
+ url: 'https://www.so.com/help/help_3_2.html'
12
12
  producer:
13
13
  name: 'Online Media Group, Inc.'
14
14
  url: ''
@@ -40,26 +40,34 @@
40
40
  - regex: 'AhrefsBot'
41
41
  name: 'aHrefs Bot'
42
42
  category: 'Crawler'
43
- url: 'http://ahrefs.com/robot'
43
+ url: 'https://ahrefs.com/robot'
44
44
  producer:
45
45
  name: 'Ahrefs Pte Ltd'
46
- url: 'http://ahrefs.com/robot'
46
+ url: 'https://ahrefs.com/robot'
47
47
 
48
48
  - regex: 'ia_archiver|alexabot|verifybot'
49
49
  name: 'Alexa Crawler'
50
50
  category: 'Search bot'
51
- url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
51
+ url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
52
52
  producer:
53
53
  name: 'Alexa Internet'
54
- url: 'http://www.alexa.com'
54
+ url: 'https://www.alexa.com'
55
55
 
56
56
  - regex: 'alexa site audit'
57
57
  name: 'Alexa Site Audit'
58
58
  category: 'Site Monitor'
59
- url: 'http://www.alexa.com/help/webmasters'
59
+ url: 'https://support.alexa.com/hc/en-us/articles/200450194'
60
60
  producer:
61
61
  name: 'Alexa Internet'
62
- url: 'http://www.alexa.com'
62
+ url: 'https://www.alexa.com'
63
+
64
+ - regex: 'Amazonbot'
65
+ name: 'Amazon Bot'
66
+ category: 'Crawler'
67
+ url: 'https://developer.amazon.com/support/amazonbot'
68
+ producer:
69
+ name: 'Amazon.com, Inc.'
70
+ url: 'https://www.amazon.com/'
63
71
 
64
72
  - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
65
73
  name: 'Amazon Route53 Health Check'
@@ -82,23 +90,31 @@
82
90
  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
83
91
  producer:
84
92
  name: 'The Apache Software Foundation'
85
- url: 'http://www.apache.org/foundation/'
93
+ url: 'https://www.apache.org/foundation/'
86
94
 
87
95
  - regex: 'Applebot'
88
96
  name: 'Applebot'
89
97
  category: 'Crawler'
90
- url: 'http://www.apple.com/go/applebot'
98
+ url: 'https://support.apple.com/en-us/HT204683'
91
99
  producer:
92
100
  name: 'Apple Inc'
93
- url: 'http://www.apple.com'
101
+ url: 'https://www.apple.com'
102
+
103
+ - regex: "AppSignalBot"
104
+ name: "AppSignalBot"
105
+ category: "Site Monitor"
106
+ url: "https://docs.appsignal.com/uptime-monitoring/"
107
+ producer:
108
+ name: "AppSignal"
109
+ url: "https://appsignal.com/"
94
110
 
95
111
  - regex: 'Arachni'
96
112
  name: 'Arachni'
97
113
  category: 'Security Checker'
98
- url: 'http://www.arachni-scanner.com'
114
+ url: 'https://www.arachni-scanner.com/'
99
115
  producer:
100
116
  name: 'Sarosys LLC'
101
- url: 'http://www.sarosys.com/'
117
+ url: 'https://www.sarosys.com/'
102
118
 
103
119
  - regex: 'AspiegelBot'
104
120
  name: 'AspiegelBot'
@@ -112,7 +128,7 @@
112
128
  name: 'Castro 2'
113
129
  category: 'Service Agent'
114
130
  url: 'http://supertop.co/castro/'
115
- producer:
131
+ producer:
116
132
  name: 'Supertop'
117
133
  url: 'http://supertop.co'
118
134
 
@@ -127,10 +143,10 @@
127
143
  - regex: 'archive\.org_bot|special_archiver'
128
144
  name: 'archive.org bot'
129
145
  category: 'Crawler'
130
- url: 'http://www.archive.org/details/archive.org_bot'
146
+ url: 'https://archive.org/details/archive.org_bot'
131
147
  producer:
132
148
  name: 'The Internet Archive'
133
- url: 'http://www.archive.org'
149
+ url: 'https://archive.org'
134
150
 
135
151
  - regex: 'Ask Jeeves/Teoma'
136
152
  name: 'Ask Jeeves'
@@ -156,7 +172,7 @@
156
172
  name: '2.0Promotion GbR'
157
173
  url: 'http://www.backlinktest.com'
158
174
 
159
- - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
175
+ - regex: 'Baidu.*spider|baidu Transcoder'
160
176
  name: 'Baidu Spider'
161
177
  category: 'Search bot'
162
178
  url: 'http://www.baidu.com/search/spider.htm'
@@ -172,6 +188,14 @@
172
188
  name: ''
173
189
  url: ''
174
190
 
191
+ - regex: 'Better Uptime Bot'
192
+ name: 'Better Uptime Bot'
193
+ category: 'Site Monitor'
194
+ url: 'https://betteruptime.com/faq'
195
+ producer:
196
+ name: 'Better Uptime'
197
+ url: 'https://betteruptime.com/'
198
+
175
199
  - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
176
200
  name: 'BingBot'
177
201
  category: 'Search bot'
@@ -188,7 +212,7 @@
188
212
  name: 'Blekko'
189
213
  url: 'http://blekko.com'
190
214
 
191
- - regex: 'BLEXBot(Test)?'
215
+ - regex: 'BLEXBot'
192
216
  name: 'BLEXBot Crawler'
193
217
  category: 'Crawler'
194
218
  url: 'http://webmeup-crawler.com'
@@ -217,7 +241,7 @@
217
241
  category: 'Crawler'
218
242
  producer:
219
243
  name: 'BoardReader'
220
- url: 'http://boardreader.com/'
244
+ url: 'https://boardreader.com/'
221
245
 
222
246
  - regex: 'BountiiBot'
223
247
  name: 'Bountii Bot'
@@ -283,6 +307,14 @@
283
307
  name: 'CloudFlare'
284
308
  url: 'http://www.cloudflare.com'
285
309
 
310
+ - regex: 'CloudflareDiagnostics'
311
+ name: 'Cloudflare Diagnostics'
312
+ category: 'Site Monitor'
313
+ url: 'https://www.cloudflare.com/'
314
+ producer:
315
+ name: 'Cloudflare'
316
+ url: 'https://www.cloudflare.com'
317
+
286
318
  - regex: 'CloudFlare-AlwaysOnline'
287
319
  name: 'CloudFlare Always Online'
288
320
  category: 'Site Monitor'
@@ -291,7 +323,7 @@
291
323
  name: 'CloudFlare'
292
324
  url: 'http://www.cloudflare.com'
293
325
 
294
- - regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
326
+ - regex: 'coccoc.com'
295
327
  name: 'Cốc Cốc Bot'
296
328
  url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
297
329
  category: 'Search bot'
@@ -339,7 +371,6 @@
339
371
  name: 'Datanyze'
340
372
  url: 'https://www.datanyze.com'
341
373
 
342
-
343
374
  - regex: 'Dataprovider'
344
375
  name: 'Dataprovider'
345
376
  category: 'Crawler'
@@ -364,7 +395,7 @@
364
395
  name: 'DAZOO.FR'
365
396
  url: 'http://dazoo.fr'
366
397
 
367
- - regex: 'discobot(-news)?'
398
+ - regex: 'discobot'
368
399
  name: 'Discobot'
369
400
  category: 'Search bot'
370
401
  url: 'http://discoveryengine.com/discobot.html'
@@ -427,7 +458,7 @@
427
458
  name: 'eVenture Capital Partners II, LLC'
428
459
  url: 'http://www.eventures.vc/'
429
460
 
430
- - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
461
+ - regex: 'Exabot|ExaleadCloudview'
431
462
  name: 'ExaBot'
432
463
  category: 'Crawler'
433
464
  url: 'http://www.exabot.com/go/robot'
@@ -483,7 +514,7 @@
483
514
  name: 'David Smith & Developing Perspective, LLC'
484
515
  url: 'https://david-smith.org'
485
516
 
486
- - regex: '(Meta)?Feedly(Bot|App)?'
517
+ - regex: 'Feedly'
487
518
  name: 'Feedly'
488
519
  url: 'http://www.feedly.com'
489
520
  category: 'Feed Fetcher'
@@ -549,6 +580,10 @@
549
580
  name: ''
550
581
  url: ''
551
582
 
583
+ - regex: 'gobuster'
584
+ name: 'Gobuster'
585
+ url: 'https://github.com/OJ/gobuster'
586
+
552
587
  - regex: 'ichiro/mobile goo'
553
588
  name: 'Goo'
554
589
  category: 'Search bot'
@@ -557,6 +592,10 @@
557
592
  name: 'NTT Resonant'
558
593
  url: 'http://goo.ne.jp'
559
594
 
595
+ - regex: 'Storebot-Google'
596
+ name: 'Google StoreBot'
597
+ category: 'Crawler'
598
+
560
599
  - regex: 'Google Favicon'
561
600
  name: 'Google Favicon'
562
601
  category: 'Crawler'
@@ -649,7 +688,7 @@
649
688
  name: 'Visual Meta'
650
689
  url: 'https://www.shopalike.cz/'
651
690
 
652
- - regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
691
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
653
692
  name: 'Googlebot'
654
693
  category: 'Search bot'
655
694
  url: 'http://www.google.com/bot.html'
@@ -663,7 +702,7 @@
663
702
  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
664
703
  producer:
665
704
  name: 'The Internet Archive'
666
- url: 'http://www.archive.org'
705
+ url: 'https://archive.org'
667
706
 
668
707
  - regex: 'HubSpot '
669
708
  name: 'HubSpot'
@@ -672,7 +711,6 @@
672
711
  name: 'HubSpot Inc.'
673
712
  url: 'https://www.hubspot.com'
674
713
 
675
-
676
714
  - regex: 'HTTPMon'
677
715
  name: 'HTTPMon'
678
716
  category: 'Site Monitor'
@@ -704,7 +742,7 @@
704
742
 
705
743
  - regex: 'ips-agent'
706
744
  name: 'IPS Agent'
707
- category: 'crawler'
745
+ category: 'Crawler'
708
746
  producer:
709
747
  name: 'VeriSign, Inc'
710
748
  url: 'http://www.verisign.com/'
@@ -717,6 +755,10 @@
717
755
  name: ''
718
756
  url: 'https://ip-guide.com'
719
757
 
758
+ - regex: 'k6/[0-9\.]+'
759
+ name: 'K6'
760
+ url: 'https://k6.io/'
761
+
720
762
  - regex: 'kouio'
721
763
  name: 'Kouio'
722
764
  url: 'http://kouio.com/'
@@ -741,7 +783,7 @@
741
783
  name: 'Lighthouse'
742
784
  url: 'https://developers.google.com/web/tools/lighthouse'
743
785
 
744
- - regex: 'linkdexbot(-mobile)?|linkdex\.com'
786
+ - regex: 'linkdexbot|linkdex\.com'
745
787
  name: 'Linkdex Bot'
746
788
  category: 'Search bot'
747
789
  url: 'http://www.linkdex.com/bots'
@@ -764,7 +806,7 @@
764
806
  name: ''
765
807
  url: ''
766
808
 
767
- - regex: 'Mail\.RU(_Bot)?'
809
+ - regex: 'Mail\.RU'
768
810
  name: 'Mail.Ru Bot'
769
811
  category: 'Search bot'
770
812
  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
@@ -788,7 +830,7 @@
788
830
  name: ''
789
831
  url: ''
790
832
 
791
- - regex : 'masscan'
833
+ - regex: 'masscan'
792
834
  name: 'masscan'
793
835
  url: 'https://github.com/robertdavidgraham/masscan'
794
836
  category: 'Crawler'
@@ -941,12 +983,12 @@
941
983
  category: 'Crawler'
942
984
  producer:
943
985
  name: 'Nuzzel'
944
- url: https://www.nuzzel.com/
986
+ url: 'https://www.nuzzel.com/'
945
987
 
946
988
  - regex: 'Octopus [0-9]'
947
989
  name: 'Octopus'
948
990
 
949
- - regex: 'omgili(?:bot)?'
991
+ - regex: 'omgili'
950
992
  name: 'Omgili bot'
951
993
  category: 'Search bot'
952
994
  url: 'http://www.omgili.com/Crawler.html'
@@ -1023,7 +1065,15 @@
1023
1065
  name: 'Bitlove'
1024
1066
  url: 'http://bitlove.org/'
1025
1067
 
1026
- - regex: 'psbot(-page)?'
1068
+ - regex: 'PRTG Network Monitor'
1069
+ name: 'PRTG Network Monitor'
1070
+ category: 'Network Monitor'
1071
+ url: 'https://www.paessler.com/prtg'
1072
+ producer:
1073
+ name: 'Paessler AG'
1074
+ url: 'https://www.paessler.com'
1075
+
1076
+ - regex: 'psbot'
1027
1077
  name: 'Picsearch bot'
1028
1078
  category: 'Search bot'
1029
1079
  url: 'http://www.picsearch.com/bot.html'
@@ -1031,7 +1081,7 @@
1031
1081
  name: 'Picsearch'
1032
1082
  url: 'http://www.picsearch.com'
1033
1083
 
1034
- - regex: 'Pingdom\.com'
1084
+ - regex: 'Pingdom(?:\.com|TMS)'
1035
1085
  name: 'Pingdom Bot'
1036
1086
  category: 'Site Monitor'
1037
1087
  url: ''
@@ -1047,6 +1097,14 @@
1047
1097
  name: 'Quora'
1048
1098
  url: 'http://www.quora.com'
1049
1099
 
1100
+ - regex: 'Quora-Bot'
1101
+ name: 'Quora Bot'
1102
+ category: 'Crawler'
1103
+ url: ''
1104
+ producer:
1105
+ name: 'Quora'
1106
+ url: 'https://www.quora.com/'
1107
+
1050
1108
  - regex: 'RamblerMail'
1051
1109
  name: 'RamblerMail Image Proxy'
1052
1110
  category: 'Crawler'
@@ -1275,6 +1333,14 @@
1275
1333
  name: 'Tencent Holdings'
1276
1334
  url: 'http://www.soso.com'
1277
1335
 
1336
+ - regex: 'Sprinklr'
1337
+ name: 'Sprinklr'
1338
+ category: 'Crawler'
1339
+ url: ''
1340
+ producer:
1341
+ name: 'Sprinklr, Inc.'
1342
+ url: 'https://www.sprinklr.com/'
1343
+
1278
1344
  - regex: 'sqlmap/'
1279
1345
  name: 'sqlmap'
1280
1346
  category: 'Security Checker'
@@ -1320,13 +1386,20 @@
1320
1386
  name: 'Tailrank Inc'
1321
1387
  url: 'http://spinn3r.com'
1322
1388
 
1323
- - regex: 'Sputnik(Image)?Bot'
1389
+ - regex: 'SputnikBot'
1324
1390
  name: 'Sputnik Bot'
1325
- category: ''
1391
+ category: 'Crawler'
1392
+ url: ''
1393
+
1394
+ - regex: 'SputnikFaviconBot'
1395
+ name: 'Sputnik Favicon Bot'
1396
+ category: 'Crawler'
1397
+ url: ''
1398
+
1399
+ - regex: 'SputnikImageBot'
1400
+ name: 'Sputnik Image Bot'
1401
+ category: 'Crawler'
1326
1402
  url: ''
1327
- producer:
1328
- name: ''
1329
- url: ''
1330
1403
 
1331
1404
  - regex: 'SurveyBot'
1332
1405
  name: 'Survey Bot'
@@ -1545,7 +1618,7 @@
1545
1618
  category: 'Site Monitor'
1546
1619
  url: 'https://www.webpagetest.org'
1547
1620
 
1548
- - regex: 'WeSEE(:Search)?'
1621
+ - regex: 'WeSEE'
1549
1622
  name: 'WeSEE:Search'
1550
1623
  category: 'Search bot'
1551
1624
  url: 'http://www.wesee.com/bot'
@@ -1617,6 +1690,14 @@
1617
1690
  name: 'Yahoo! Inc.'
1618
1691
  url: 'http://www.yahoo.com'
1619
1692
 
1693
+ - regex: 'YahooMailProxy'
1694
+ name: 'Yahoo! Mail Proxy'
1695
+ category: 'Service Agent'
1696
+ url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
1697
+ producer:
1698
+ name: 'Yahoo! Inc.'
1699
+ url: 'http://www.yahoo.com'
1700
+
1620
1701
  - regex: 'YahooCacheSystem'
1621
1702
  name: 'Yahoo! Cache System'
1622
1703
  category: 'Crawler'
@@ -1633,7 +1714,7 @@
1633
1714
  name: 'Yahoo! Japan Corp.'
1634
1715
  url: 'https://www.yahoo.co.jp/'
1635
1716
 
1636
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1717
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1637
1718
  name: 'Yandex Bot'
1638
1719
  category: 'Search bot'
1639
1720
  url: 'http://www.yandex.com/bots'
@@ -1718,18 +1799,18 @@
1718
1799
  - regex: 'HubPages.*crawlingpolicy'
1719
1800
  name: 'HubPages'
1720
1801
  category: 'Crawler'
1721
- url: 'http://hubpages.com/help/crawlingpolicy'
1802
+ url: 'https://hubpages.com/help/crawlingpolicy'
1722
1803
  producer:
1723
- name: 'HubPages'
1724
- url: 'http://hubpages.com/'
1804
+ name: 'HubPages, Inc.'
1805
+ url: 'https://discover.hubpages.com/'
1725
1806
 
1726
1807
  - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1727
1808
  name: 'Pinterest'
1728
- url: 'http://www.pinterest.com/bot.html'
1809
+ url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1729
1810
  category: 'Crawler'
1730
1811
  producer:
1731
1812
  name: 'Pinterest'
1732
- url: 'http://www.pinterest.com/'
1813
+ url: 'https://www.pinterest.com/'
1733
1814
 
1734
1815
  - regex: 'Site24x7'
1735
1816
  name: 'Site24x7 Website Monitoring'
@@ -1771,13 +1852,13 @@
1771
1852
  name: 'Monitor.Us'
1772
1853
  url: 'http://www.monitor.us'
1773
1854
 
1774
- - regex: 'Catchpoint( bot)?'
1855
+ - regex: 'Catchpoint'
1775
1856
  name: 'Catchpoint'
1776
1857
  category: 'Site Monitor'
1777
- url: ''
1858
+ url: 'https://www.catchpoint.com/'
1778
1859
  producer:
1779
1860
  name: 'Catchpoint Systems'
1780
- url: 'http://www.catchpoint.com/'
1861
+ url: 'https://www.catchpoint.com/'
1781
1862
 
1782
1863
  - regex: 'bitlybot'
1783
1864
  name: 'BitlyBot'
@@ -1845,7 +1926,7 @@
1845
1926
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1846
1927
  name: 'RSSRadio Bot'
1847
1928
 
1848
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
1929
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent)'
1849
1930
  name: 'Generic Bot'
1850
1931
 
1851
1932
  - regex: '^sentry'
@@ -1955,10 +2036,10 @@
1955
2036
  - regex: 'BoardReader Favicon Fetcher'
1956
2037
  name: 'BoardReader'
1957
2038
  category: 'Search bot'
1958
- url: 'http://boardreader.com/'
2039
+ url: 'https://boardreader.com/'
1959
2040
  producer:
1960
2041
  name: 'Effyis Inc'
1961
- url: 'http://boardreader.com/'
2042
+ url: 'https://boardreader.com/'
1962
2043
 
1963
2044
  - regex: 'IDG/IT'
1964
2045
  name: 'IDG/IT'
@@ -2003,7 +2084,7 @@
2003
2084
  - regex: 'oBot'
2004
2085
  name: 'oBot'
2005
2086
  category: 'Search bot'
2006
- url: 'http://www.xforce-security.com/crawler/'
2087
+ url: 'https://www.xforce-security.com/crawler/'
2007
2088
  producer:
2008
2089
  name: 'IBM Germany Research & Development GmbH'
2009
2090
  url: 'https://exchange.xforce.ibmcloud.com/'
@@ -2046,7 +2127,7 @@
2046
2127
  url: 'https://nutch.apache.org'
2047
2128
  producer:
2048
2129
  name: 'The Apache Software Foundation'
2049
- url: 'http://www.apache.org/foundation/'
2130
+ url: 'https://www.apache.org/foundation/'
2050
2131
 
2051
2132
  - regex: 'Seobility'
2052
2133
  name: 'Seobility'
@@ -2061,7 +2142,7 @@
2061
2142
  - regex: 'Grammarly'
2062
2143
  name: 'Grammarly'
2063
2144
  category: 'Service bot'
2064
- url: 'http://www.grammarly.com'
2145
+ url: 'https://www.grammarly.com'
2065
2146
 
2066
2147
  - regex: 'Robozilla'
2067
2148
  name: 'Robozilla'
@@ -2080,7 +2161,7 @@
2080
2161
  - regex: 'SerendeputyBot'
2081
2162
  name: 'Serendeputy Bot'
2082
2163
  category: 'Crawler'
2083
- url: 'http://serendeputy.com/about/serendeputy-bot'
2164
+ url: 'https://serendeputy.com/about/serendeputy-bot'
2084
2165
 
2085
2166
  - regex: 'ias-va.*admantx.*service-fetcher'
2086
2167
  name: 'ADmantX Service Fetcher'
@@ -2102,7 +2183,419 @@
2102
2183
  category: 'Crawler'
2103
2184
  url: 'http://www.exensa.com/crawl'
2104
2185
 
2105
- # Generic detections
2186
+ - regex: 'BDCbot'
2187
+ name: 'BDCbot'
2188
+ category: 'Crawler'
2189
+ url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2190
+ producer:
2191
+ name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2192
+ url: 'https://bigdatacorp.com.br/'
2193
+
2194
+ - regex: 'adbeat'
2195
+ name: 'Adbeat'
2196
+ category: 'Crawler'
2197
+ url: 'https://www.adbeat.com/operation_policy'
2198
+ producer:
2199
+ name: 'PPC Labs LLC'
2200
+ url: 'https://www.adbeat.com/'
2201
+
2202
+ - regex: 'BW/(?:(\d+[\.\d]+))'
2203
+ name: 'BuiltWith'
2204
+ category: 'Crawler'
2205
+ url: 'https://builtwith.com/biup'
2206
+ producer:
2207
+ name: 'BuiltWith Pty Ltd'
2208
+ url: 'https://builtwith.com/'
2209
+
2210
+ - regex: 'https://whatis.contentkingapp.com'
2211
+ name: 'ContentKing'
2212
+ category: 'Site Monitor'
2213
+ url: 'https://whatis.contentkingapp.com/'
2214
+ producer:
2215
+ name: 'ContentKing BV'
2216
+ url: 'https://www.contentkingapp.com/'
2217
+
2218
+ - regex: 'MicroAdBot'
2219
+ name: 'MicroAdBot'
2220
+ category: 'Crawler'
2221
+ url: 'https://www.microad.co.jp/'
2222
+ producer:
2223
+ name: 'MicroAd, Inc.'
2224
+ url: 'https://www.microad.co.jp/'
2225
+
2226
+ - regex: 'PingAdmin.Ru'
2227
+ name: 'PingAdmin.Ru'
2228
+ category: 'Site Monitor'
2229
+ url: 'https://ping-admin.ru/'
2230
+
2231
+ - regex: 'notifyninja.+monitoring'
2232
+ name: 'Notify Ninja'
2233
+ category: 'Site Monitor'
2234
+ url: 'http://notifyninja.com'
2235
+
2236
+ - regex: 'WebDataStats'
2237
+ name: 'WebDataStats'
2238
+ category: 'Crawler'
2239
+ url: 'https://webdatastats.com/policy.html'
2240
+ producer:
2241
+ name: 'WebTehRazrabotka LLC'
2242
+ url: 'https://webdatastats.com/'
2243
+
2244
+ - regex: 'parse.ly scraper'
2245
+ name: 'parse.ly'
2246
+ category: 'Crawler'
2247
+ url: 'https://www.parse.ly/help/integration/crawler'
2248
+ producer:
2249
+ name: 'Parsely, Inc.'
2250
+ url: 'https://www.parse.ly/'
2251
+
2252
+ - regex: 'Nimbostratus-Bot'
2253
+ name: 'Nimbostratus Bot'
2254
+ category: 'Site Monitor'
2255
+ url: 'http://cloudsystemnetworks.com'
2256
+
2257
+ - regex: 'HeartRails_Capture/\d'
2258
+ name: 'Heart Rails Capture'
2259
+ category: 'Service Agent'
2260
+ url: 'http://capture.heartrails.com'
2261
+
2262
+ - regex: 'Project-Resonance'
2263
+ name: 'Project Resonance'
2264
+ category: 'Crawler'
2265
+ url: 'http://project-resonance.com'
2266
+
2267
+ - regex: 'DataXu/\d'
2268
+ name: 'DataXu'
2269
+ category: 'Service Agent'
2270
+ url: 'https://advertising.roku.com/dataxu'
2271
+ producer:
2272
+ name: 'Roku, Inc.'
2273
+ url: 'https://roku.com'
2274
+
2275
+ - regex: 'Cocolyzebot'
2276
+ name: 'Cocolyzebot'
2277
+ category: 'Crawler'
2278
+ url: 'https://cocolyze.com/en/cocolyzebot'
2279
+ producer:
2280
+ name: 'VSI INNOVATION SAS'
2281
+ url: 'https://vsi-innovation.com/'
2282
+
2283
+ - regex: 'veryhip'
2284
+ name: 'VeryHip'
2285
+ category: 'Crawler'
2286
+ url: 'https://veryhip.com/'
2287
+ producer:
2288
+ name: 'VeryHip'
2289
+ url: 'https://veryhip.com/'
2290
+
2291
+ - regex: 'LinkpadBot'
2292
+ name: 'LinkpadBot'
2293
+ category: 'Crawler'
2294
+ url: 'https://www.linkpad.org/'
2295
+ producer:
2296
+ name: 'Solomono LLC'
2297
+ url: 'https://www.linkpad.org/'
2298
+
2299
+ - regex: 'MuscatFerret'
2300
+ name: 'MuscatFerret'
2301
+ category: 'Crawler'
2302
+ url: 'http://www.webtop.com/'
2303
+
2304
+ - regex: 'PageThing.com'
2305
+ name: 'PageThing'
2306
+ category: 'Crawler'
2307
+ url: 'https://www.pagething.com/'
2308
+ producer:
2309
+ name: 'SPECIALNOISE LTD'
2310
+ url: 'https://www.specialnoise.com/'
2311
+
2312
+ - regex: 'ArchiveBox'
2313
+ name: 'ArchiveBox'
2314
+ url: 'https://archivebox.io/'
2315
+ category: 'Crawler'
2316
+ producer:
2317
+ name: ''
2318
+ url: ''
2319
+
2320
+ - regex: 'Choosito'
2321
+ name: 'Choosito'
2322
+ url: 'https://www.choosito.com/'
2323
+ category: 'Crawler'
2324
+ producer:
2325
+ name: 'Choosito! Inc.'
2326
+ url: 'https://www.choosito.com/'
2327
+
2328
+ - regex: 'datagnionbot'
2329
+ name: 'datagnionbot'
2330
+ url: 'https://www.datagnion.com/bot.html'
2331
+ category: 'Crawler'
2332
+ producer:
2333
+ name: 'DATAGNION GMBH'
2334
+ url: 'https://www.datagnion.com/'
2335
+
2336
+ - regex: 'WhatCMS'
2337
+ name: 'WhatCMS'
2338
+ url: 'https://whatcms.org/'
2339
+ category: 'Crawler'
2340
+ producer:
2341
+ name: 'Nineteen Ten LLC'
2342
+ url: 'https://whatcms.org/'
2343
+
2344
+ - regex: 'httpx'
2345
+ name: 'httpx'
2346
+ url: 'https://github.com/projectdiscovery/httpx'
2347
+ category: 'Crawler'
2348
+ producer:
2349
+ name: ''
2350
+ url: ''
2351
+
2352
+ - regex: 'scaninfo@expanseinc.com'
2353
+ name: 'Expanse'
2354
+ category: 'Security Checker'
2355
+ url: 'https://expanse.co/'
2356
+ producer:
2357
+ name: 'Expanse Inc.'
2358
+ url: 'https://expanse.co/'
2359
+
2360
+ - regex: 'HuaweiWebCatBot'
2361
+ name: 'HuaweiWebCatBot'
2362
+ category: 'Crawler'
2363
+ url: 'https://isecurity.huawei.com'
2364
+ producer:
2365
+ name: 'Huawei Technologies Co., Ltd.'
2366
+ url: 'https://huawei.com'
2367
+
2368
+ - regex: 'Hatena-Favicon'
2369
+ name: 'Hatena Favicon'
2370
+ category: 'Crawler'
2371
+ url: 'https://www.hatena.ne.jp/faq/'
2372
+ producer:
2373
+ name: 'Hatena Co., Ltd.'
2374
+ url: 'https://www.hatena.ne.jp'
2375
+
2376
+ - regex: 'RyowlEngine/(\d+)'
2377
+ name: 'Ryowl'
2378
+ category: 'Crawler'
2379
+ url: 'https://ryowl.org'
2380
+
2381
+ - regex: 'OdklBot/(\d+)'
2382
+ name: 'Odnoklassniki Bot'
2383
+ category: 'Crawler'
2384
+ url: 'https://odnoklassniki.ru'
2385
+
2386
+ - regex: 'Mediatoolkitbot'
2387
+ name: 'Mediatoolkit Bot'
2388
+ category: 'Crawler'
2389
+ url: 'https://mediatoolkit.com'
2390
+
2391
+ - regex: 'ZoominfoBot'
2392
+ name: 'ZoominfoBot'
2393
+ category: 'Crawler'
2394
+ url: 'https://www.zoominfo.com'
2395
+
2396
+ - regex: 'WeViKaBot/([\d+\.])'
2397
+ name: 'WeViKaBot'
2398
+ category: 'Crawler'
2399
+ url: 'http://www.wevika.de'
2400
+
2401
+ - regex: 'SEOkicks'
2402
+ name: 'SEOkicks'
2403
+ category: 'Crawler'
2404
+ url: 'https://www.seokicks.de/robot.html'
2405
+
2406
+ - regex: 'Plukkie/([\d+\.])'
2407
+ name: 'Plukkie'
2408
+ category: 'Crawler'
2409
+ url: 'http://www.botje.com/plukkie.htm'
2410
+
2411
+ - regex: 'proximic;'
2412
+ name: 'Comscore'
2413
+ category: 'Crawler'
2414
+ url: 'https://www.comscore.com/Web-Crawler'
2415
+
2416
+ - regex: 'SurdotlyBot/([\d+\.])'
2417
+ name: 'SurdotlyBot'
2418
+ category: 'Crawler'
2419
+ url: 'http://sur.ly/bot.html'
2420
+
2421
+ - regex: 'Gowikibot/([\d+\.])'
2422
+ name: 'Gowikibot'
2423
+ category: 'Crawler'
2424
+ url: 'http:/www.gowikibot.com'
2425
+
2426
+ - regex: 'SabsimBot/([\d+\.])'
2427
+ name: 'SabsimBot'
2428
+ category: 'Crawler'
2429
+ url: 'https://sabsim.com'
2430
+
2431
+ - regex: 'LumtelBot/([\d+\.])'
2432
+ name: 'LumtelBot'
2433
+ category: 'Crawler'
2434
+ url: 'https://umtel.com'
2435
+
2436
+ - regex: 'PiplBot'
2437
+ name: 'PiplBot'
2438
+ category: 'Crawler'
2439
+ url: 'http://www.pipl.com/bot'
2440
+
2441
+ - regex: 'woobot/([\d+\.])'
2442
+ name: 'WooRank'
2443
+ category: 'Crawler'
2444
+ url: 'https://www.woorank.com/bot'
2445
+
2446
+ - regex: 'Cookiebot/([\d+\.])'
2447
+ name: 'Cookiebot'
2448
+ category: 'Crawler'
2449
+ url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2450
+ producer:
2451
+ name: 'Cybot A/S'
2452
+ url: 'https://www.cybot.com/'
2453
+
2454
+ - regex: 'NetSystemsResearch'
2455
+ name: 'NetSystemsResearch'
2456
+ category: 'Security Checker'
2457
+ url: 'https://www.netsystemsresearch.com/'
2458
+ producer:
2459
+ name: 'NET SYSTEMS RESEARCH LLC'
2460
+ url: 'https://www.netsystemsresearch.com/'
2461
+
2462
+ - regex: 'CensysInspect/([\d+\.])'
2463
+ name: 'CensysInspect'
2464
+ category: 'Security Checker'
2465
+ url: 'https://about.censys.io/'
2466
+ producer:
2467
+ name: 'Censys, Inc.'
2468
+ url: 'https://censys.io/'
2106
2469
 
2470
+ - regex: 'gdnplus.com'
2471
+ name: 'GDNP'
2472
+ category: 'Crawler'
2473
+ url: 'https://gdnplus.com/'
2474
+ producer:
2475
+ name: 'Global Digital Network Plus, LLC'
2476
+ url: 'https://gdnplus.com/'
2477
+
2478
+ - regex: 'WellKnownBot/([\d+\.])'
2479
+ name: 'WellKnownBot'
2480
+ category: 'Crawler'
2481
+ url: 'https://well-known.dev'
2482
+
2483
+ - regex: 'Adsbot/([\d+\.])'
2484
+ name: 'Adsbot'
2485
+ category: 'Crawler'
2486
+ url: 'https://seostar.co/robot/'
2487
+
2488
+ - regex: 'MTRobot/([\d+\.])'
2489
+ name: 'MTRobot'
2490
+ category: 'Crawler'
2491
+ url: 'https://metrics-tools.de/robot.html'
2492
+ producer:
2493
+ name: 'Metrics Tools'
2494
+ url: 'https://metrics-tools.de/'
2495
+
2496
+ - regex: 'serpstatbot/([\d+\.])'
2497
+ name: 'serpstatbot'
2498
+ category: 'Crawler'
2499
+ url: 'http://serpstatbot.com/'
2500
+ producer:
2501
+ name: 'Netpeak Ltd'
2502
+ url: 'https://netpeak.net/'
2503
+
2504
+ - regex: 'colly'
2505
+ name: 'colly'
2506
+ category: 'Crawler'
2507
+ url: 'https://github.com/gocolly/colly/'
2508
+
2509
+ - regex: 'l9tcpid/v([\d+\.])'
2510
+ name: 'l9tcpid'
2511
+ category: 'Security Checker'
2512
+ url: 'https://github.com/LeakIX/l9tcpid'
2513
+
2514
+ - regex: 'MegaIndex.ru/([\d+\.])'
2515
+ name: 'MegaIndex'
2516
+ category: 'Crawler'
2517
+ url: 'https://megaindex.com/crawler'
2518
+
2519
+ - regex: 'Seekport'
2520
+ name: 'Seekport'
2521
+ category: 'Crawler'
2522
+ url: 'http://www.seekport.com/'
2523
+ producer:
2524
+ name: 'SISTRIX GmbH'
2525
+ url: 'https://www.sistrix.de/'
2526
+
2527
+ - regex: 'seolyt/([\d+\.])'
2528
+ name: 'seolyt'
2529
+ category: 'Crawler'
2530
+ url: 'https://seolyt.com/'
2531
+
2532
+ - regex: 'YaK/([\d+\.])'
2533
+ name: 'YaK'
2534
+ category: 'Crawler'
2535
+ url: 'https://www.linkfluence.com/'
2536
+ producer:
2537
+ name: 'Linkfluence SAS'
2538
+ url: 'https://www.linkfluence.com/'
2539
+
2540
+ - regex: 'KomodiaBot/([\d+\.])'
2541
+ name: 'KomodiaBot'
2542
+ category: 'Crawler'
2543
+ url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2544
+ producer:
2545
+ name: 'Komodia Inc.'
2546
+ url: 'https://www.komodia.com/'
2547
+
2548
+ - regex: 'Neevabot/([\d+\.])'
2549
+ name: 'Neevabot'
2550
+ category: 'Search bot'
2551
+ url: 'https://neeva.com/neevabot'
2552
+ producer:
2553
+ name: 'Neeva Inc.'
2554
+ url: 'https://neeva.com/'
2555
+
2556
+ - regex: 'LinkPreview/([\d+\.])'
2557
+ name: 'LinkPreview'
2558
+ category: 'Service Agent'
2559
+ url: 'https://www.linkpreview.net/'
2560
+
2561
+ - regex: 'JungleKeyThumbnail/([\d+\.])'
2562
+ name: 'JungleKeyThumbnail'
2563
+ category: 'Crawler'
2564
+ url: 'https://junglekey.com/'
2565
+
2566
+ - regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2567
+ name: 'RocketMonitorBot'
2568
+ category: 'Site Monitor'
2569
+ url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
2570
+ producer:
2571
+ name: 'Radio Mast, Inc.'
2572
+ url: 'https://www.radiomast.io/'
2573
+
2574
+ - regex: 'SitemapParser-VIPnytt/([\d+\.])'
2575
+ name: 'SitemapParser-VIPnytt'
2576
+ category: 'Crawler'
2577
+ url: 'https://github.com/VIPnytt/SitemapParser/'
2578
+
2579
+
2580
+ - regex: '^Turnitin'
2581
+ name: 'Turnitin'
2582
+ category: 'Crawler'
2583
+ url: 'https://turnitin.com/robot/crawlerinfo.html'
2584
+
2585
+ - regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2586
+ name: 'Dotcom Monitor'
2587
+ category: 'Site Monitor'
2588
+ url: 'https://www.dotcom-monitor.com'
2589
+
2590
+ - regex: 'ThinkChaos/'
2591
+ name: 'ThinkChaos'
2592
+ category: 'Crawler'
2593
+
2594
+ - regex: 'DataForSeoBot'
2595
+ name: 'DataForSeoBot'
2596
+ category: 'Crawler'
2597
+ url: 'https://dataforseo.com/dataforseo-bot'
2598
+
2599
+ # Generic detections
2107
2600
  - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2108
2601
  name: 'Generic Bot'