device_detector 1.0.4 → 1.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -7
  4. data/lib/device_detector/browser.rb +364 -0
  5. data/lib/device_detector/client.rb +8 -0
  6. data/lib/device_detector/device.rb +1204 -2
  7. data/lib/device_detector/os.rb +39 -12
  8. data/lib/device_detector/parser.rb +1 -1
  9. data/lib/device_detector/version.rb +1 -1
  10. data/lib/device_detector/version_extractor.rb +28 -0
  11. data/lib/device_detector.rb +57 -13
  12. data/regexes/bots.yml +651 -61
  13. data/regexes/client/browser_engine.yml +7 -1
  14. data/regexes/client/browsers.yml +774 -78
  15. data/regexes/client/feed_readers.yml +4 -10
  16. data/regexes/client/libraries.yml +71 -2
  17. data/regexes/client/mediaplayers.yml +25 -1
  18. data/regexes/client/mobile_apps.yml +923 -73
  19. data/regexes/client/pim.yml +65 -2
  20. data/regexes/device/cameras.yml +5 -5
  21. data/regexes/device/car_browsers.yml +16 -0
  22. data/regexes/device/consoles.yml +13 -1
  23. data/regexes/device/mobiles.yml +17029 -3889
  24. data/regexes/device/notebooks.yml +114 -0
  25. data/regexes/device/portable_media_player.yml +36 -9
  26. data/regexes/device/shell_tv.yml +117 -0
  27. data/regexes/device/televisions.yml +439 -34
  28. data/regexes/oss.yml +620 -284
  29. metadata +9 -131
  30. data/.gitignore +0 -14
  31. data/.rubocop.yml +0 -49
  32. data/.ruby-version +0 -1
  33. data/.travis.yml +0 -14
  34. data/Gemfile +0 -8
  35. data/Rakefile +0 -85
  36. data/device_detector.gemspec +0 -27
  37. data/spec/device_detector/bot_fixtures_spec.rb +0 -30
  38. data/spec/device_detector/client_fixtures_spec.rb +0 -31
  39. data/spec/device_detector/concrete_user_agent_spec.rb +0 -135
  40. data/spec/device_detector/detector_fixtures_spec.rb +0 -100
  41. data/spec/device_detector/device_fixtures_spec.rb +0 -36
  42. data/spec/device_detector/device_spec.rb +0 -131
  43. data/spec/device_detector/memory_cache_spec.rb +0 -148
  44. data/spec/device_detector/model_extractor_spec.rb +0 -63
  45. data/spec/device_detector/os_fixtures_spec.rb +0 -26
  46. data/spec/device_detector/version_extractor_spec.rb +0 -79
  47. data/spec/device_detector_spec.rb +0 -189
  48. data/spec/fixtures/client/browser.yml +0 -2836
  49. data/spec/fixtures/client/feed_reader.yml +0 -199
  50. data/spec/fixtures/client/library.yml +0 -193
  51. data/spec/fixtures/client/mediaplayer.yml +0 -163
  52. data/spec/fixtures/client/mobile_app.yml +0 -217
  53. data/spec/fixtures/client/pim.yml +0 -115
  54. data/spec/fixtures/detector/bots.yml +0 -3726
  55. data/spec/fixtures/detector/camera.yml +0 -141
  56. data/spec/fixtures/detector/car_browser.yml +0 -81
  57. data/spec/fixtures/detector/console.yml +0 -321
  58. data/spec/fixtures/detector/desktop.yml +0 -5461
  59. data/spec/fixtures/detector/feature_phone.yml +0 -891
  60. data/spec/fixtures/detector/feed_reader.yml +0 -551
  61. data/spec/fixtures/detector/mediaplayer.yml +0 -253
  62. data/spec/fixtures/detector/mobile_apps.yml +0 -494
  63. data/spec/fixtures/detector/phablet.yml +0 -4167
  64. data/spec/fixtures/detector/portable_media_player.yml +0 -178
  65. data/spec/fixtures/detector/smart_display.yml +0 -61
  66. data/spec/fixtures/detector/smart_speaker.yml +0 -55
  67. data/spec/fixtures/detector/smartphone-1.yml +0 -9927
  68. data/spec/fixtures/detector/smartphone-10.yml +0 -9977
  69. data/spec/fixtures/detector/smartphone-11.yml +0 -9891
  70. data/spec/fixtures/detector/smartphone-12.yml +0 -9906
  71. data/spec/fixtures/detector/smartphone-13.yml +0 -9920
  72. data/spec/fixtures/detector/smartphone-14.yml +0 -2662
  73. data/spec/fixtures/detector/smartphone-2.yml +0 -9992
  74. data/spec/fixtures/detector/smartphone-3.yml +0 -9945
  75. data/spec/fixtures/detector/smartphone-4.yml +0 -9923
  76. data/spec/fixtures/detector/smartphone-5.yml +0 -9914
  77. data/spec/fixtures/detector/smartphone-6.yml +0 -9962
  78. data/spec/fixtures/detector/smartphone-7.yml +0 -9899
  79. data/spec/fixtures/detector/smartphone-8.yml +0 -9931
  80. data/spec/fixtures/detector/smartphone-9.yml +0 -9899
  81. data/spec/fixtures/detector/smartphone.yml +0 -9984
  82. data/spec/fixtures/detector/tablet-1.yml +0 -9995
  83. data/spec/fixtures/detector/tablet-2.yml +0 -9977
  84. data/spec/fixtures/detector/tablet-3.yml +0 -9959
  85. data/spec/fixtures/detector/tablet-4.yml +0 -4528
  86. data/spec/fixtures/detector/tablet.yml +0 -9971
  87. data/spec/fixtures/detector/tv.yml +0 -4933
  88. data/spec/fixtures/detector/unknown.yml +0 -3236
  89. data/spec/fixtures/detector/wearable.yml +0 -61
  90. data/spec/fixtures/device/camera.yml +0 -19
  91. data/spec/fixtures/device/car_browser.yml +0 -13
  92. data/spec/fixtures/device/console.yml +0 -79
  93. data/spec/fixtures/parser/oss.yml +0 -1082
  94. data/spec/fixtures/parser/vendorfragments.yml +0 -168
  95. data/spec/spec_helper.rb +0 -9
data/regexes/bots.yml CHANGED
@@ -5,10 +5,10 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
- - regex: '360Spider(-Image|-Video)?'
8
+ - regex: '360Spider'
9
9
  name: '360Spider'
10
10
  category: 'Search bot'
11
- url: 'http://www.so.com/help/help_3_2.html'
11
+ url: 'https://www.so.com/help/help_3_2.html'
12
12
  producer:
13
13
  name: 'Online Media Group, Inc.'
14
14
  url: ''
@@ -40,26 +40,34 @@
40
40
  - regex: 'AhrefsBot'
41
41
  name: 'aHrefs Bot'
42
42
  category: 'Crawler'
43
- url: 'http://ahrefs.com/robot'
43
+ url: 'https://ahrefs.com/robot'
44
44
  producer:
45
45
  name: 'Ahrefs Pte Ltd'
46
- url: 'http://ahrefs.com/robot'
46
+ url: 'https://ahrefs.com/robot'
47
47
 
48
48
  - regex: 'ia_archiver|alexabot|verifybot'
49
49
  name: 'Alexa Crawler'
50
50
  category: 'Search bot'
51
- url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
51
+ url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
52
52
  producer:
53
53
  name: 'Alexa Internet'
54
- url: 'http://www.alexa.com'
54
+ url: 'https://www.alexa.com'
55
55
 
56
56
  - regex: 'alexa site audit'
57
57
  name: 'Alexa Site Audit'
58
58
  category: 'Site Monitor'
59
- url: 'http://www.alexa.com/help/webmasters'
59
+ url: 'https://support.alexa.com/hc/en-us/articles/200450194'
60
60
  producer:
61
61
  name: 'Alexa Internet'
62
- url: 'http://www.alexa.com'
62
+ url: 'https://www.alexa.com'
63
+
64
+ - regex: 'Amazonbot'
65
+ name: 'Amazon Bot'
66
+ category: 'Crawler'
67
+ url: 'https://developer.amazon.com/support/amazonbot'
68
+ producer:
69
+ name: 'Amazon.com, Inc.'
70
+ url: 'https://www.amazon.com/'
63
71
 
64
72
  - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
65
73
  name: 'Amazon Route53 Health Check'
@@ -82,23 +90,31 @@
82
90
  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
83
91
  producer:
84
92
  name: 'The Apache Software Foundation'
85
- url: 'http://www.apache.org/foundation/'
93
+ url: 'https://www.apache.org/foundation/'
86
94
 
87
95
  - regex: 'Applebot'
88
96
  name: 'Applebot'
89
97
  category: 'Crawler'
90
- url: 'http://www.apple.com/go/applebot'
98
+ url: 'https://support.apple.com/en-us/HT204683'
91
99
  producer:
92
100
  name: 'Apple Inc'
93
- url: 'http://www.apple.com'
101
+ url: 'https://www.apple.com'
102
+
103
+ - regex: "AppSignalBot"
104
+ name: "AppSignalBot"
105
+ category: "Site Monitor"
106
+ url: "https://docs.appsignal.com/uptime-monitoring/"
107
+ producer:
108
+ name: "AppSignal"
109
+ url: "https://appsignal.com/"
94
110
 
95
111
  - regex: 'Arachni'
96
112
  name: 'Arachni'
97
113
  category: 'Security Checker'
98
- url: 'http://www.arachni-scanner.com'
114
+ url: 'https://www.arachni-scanner.com/'
99
115
  producer:
100
116
  name: 'Sarosys LLC'
101
- url: 'http://www.sarosys.com/'
117
+ url: 'https://www.sarosys.com/'
102
118
 
103
119
  - regex: 'AspiegelBot'
104
120
  name: 'AspiegelBot'
@@ -112,7 +128,7 @@
112
128
  name: 'Castro 2'
113
129
  category: 'Service Agent'
114
130
  url: 'http://supertop.co/castro/'
115
- producer:
131
+ producer:
116
132
  name: 'Supertop'
117
133
  url: 'http://supertop.co'
118
134
 
@@ -127,10 +143,10 @@
127
143
  - regex: 'archive\.org_bot|special_archiver'
128
144
  name: 'archive.org bot'
129
145
  category: 'Crawler'
130
- url: 'http://www.archive.org/details/archive.org_bot'
146
+ url: 'https://archive.org/details/archive.org_bot'
131
147
  producer:
132
148
  name: 'The Internet Archive'
133
- url: 'http://www.archive.org'
149
+ url: 'https://archive.org'
134
150
 
135
151
  - regex: 'Ask Jeeves/Teoma'
136
152
  name: 'Ask Jeeves'
@@ -156,7 +172,7 @@
156
172
  name: '2.0Promotion GbR'
157
173
  url: 'http://www.backlinktest.com'
158
174
 
159
- - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
175
+ - regex: 'Baidu.*spider|baidu Transcoder'
160
176
  name: 'Baidu Spider'
161
177
  category: 'Search bot'
162
178
  url: 'http://www.baidu.com/search/spider.htm'
@@ -172,6 +188,14 @@
172
188
  name: ''
173
189
  url: ''
174
190
 
191
+ - regex: 'Better Uptime Bot'
192
+ name: 'Better Uptime Bot'
193
+ category: 'Site Monitor'
194
+ url: 'https://betteruptime.com/faq'
195
+ producer:
196
+ name: 'Better Uptime'
197
+ url: 'https://betteruptime.com/'
198
+
175
199
  - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
176
200
  name: 'BingBot'
177
201
  category: 'Search bot'
@@ -188,7 +212,7 @@
188
212
  name: 'Blekko'
189
213
  url: 'http://blekko.com'
190
214
 
191
- - regex: 'BLEXBot(Test)?'
215
+ - regex: 'BLEXBot'
192
216
  name: 'BLEXBot Crawler'
193
217
  category: 'Crawler'
194
218
  url: 'http://webmeup-crawler.com'
@@ -217,7 +241,7 @@
217
241
  category: 'Crawler'
218
242
  producer:
219
243
  name: 'BoardReader'
220
- url: 'http://boardreader.com/'
244
+ url: 'https://boardreader.com/'
221
245
 
222
246
  - regex: 'BountiiBot'
223
247
  name: 'Bountii Bot'
@@ -283,6 +307,14 @@
283
307
  name: 'CloudFlare'
284
308
  url: 'http://www.cloudflare.com'
285
309
 
310
+ - regex: 'CloudflareDiagnostics'
311
+ name: 'Cloudflare Diagnostics'
312
+ category: 'Site Monitor'
313
+ url: 'https://www.cloudflare.com/'
314
+ producer:
315
+ name: 'Cloudflare'
316
+ url: 'https://www.cloudflare.com'
317
+
286
318
  - regex: 'CloudFlare-AlwaysOnline'
287
319
  name: 'CloudFlare Always Online'
288
320
  category: 'Site Monitor'
@@ -291,7 +323,7 @@
291
323
  name: 'CloudFlare'
292
324
  url: 'http://www.cloudflare.com'
293
325
 
294
- - regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
326
+ - regex: 'coccoc.com'
295
327
  name: 'Cốc Cốc Bot'
296
328
  url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
297
329
  category: 'Search bot'
@@ -339,7 +371,6 @@
339
371
  name: 'Datanyze'
340
372
  url: 'https://www.datanyze.com'
341
373
 
342
-
343
374
  - regex: 'Dataprovider'
344
375
  name: 'Dataprovider'
345
376
  category: 'Crawler'
@@ -364,7 +395,7 @@
364
395
  name: 'DAZOO.FR'
365
396
  url: 'http://dazoo.fr'
366
397
 
367
- - regex: 'discobot(-news)?'
398
+ - regex: 'discobot'
368
399
  name: 'Discobot'
369
400
  category: 'Search bot'
370
401
  url: 'http://discoveryengine.com/discobot.html'
@@ -427,7 +458,7 @@
427
458
  name: 'eVenture Capital Partners II, LLC'
428
459
  url: 'http://www.eventures.vc/'
429
460
 
430
- - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
461
+ - regex: 'Exabot|ExaleadCloudview'
431
462
  name: 'ExaBot'
432
463
  category: 'Crawler'
433
464
  url: 'http://www.exabot.com/go/robot'
@@ -451,7 +482,7 @@
451
482
  name: 'SEOmoz, Inc.'
452
483
  url: 'http://moz.com/'
453
484
 
454
- - regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
485
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
455
486
  name: 'Facebook External Hit'
456
487
  category: 'Social Media Agent'
457
488
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -483,7 +514,7 @@
483
514
  name: 'David Smith & Developing Perspective, LLC'
484
515
  url: 'https://david-smith.org'
485
516
 
486
- - regex: '(Meta)?Feedly(Bot|App)?'
517
+ - regex: 'Feedly'
487
518
  name: 'Feedly'
488
519
  url: 'http://www.feedly.com'
489
520
  category: 'Feed Fetcher'
@@ -549,6 +580,10 @@
549
580
  name: ''
550
581
  url: ''
551
582
 
583
+ - regex: 'gobuster'
584
+ name: 'Gobuster'
585
+ url: 'https://github.com/OJ/gobuster'
586
+
552
587
  - regex: 'ichiro/mobile goo'
553
588
  name: 'Goo'
554
589
  category: 'Search bot'
@@ -557,6 +592,10 @@
557
592
  name: 'NTT Resonant'
558
593
  url: 'http://goo.ne.jp'
559
594
 
595
+ - regex: 'Storebot-Google'
596
+ name: 'Google StoreBot'
597
+ category: 'Crawler'
598
+
560
599
  - regex: 'Google Favicon'
561
600
  name: 'Google Favicon'
562
601
  category: 'Crawler'
@@ -649,7 +688,7 @@
649
688
  name: 'Visual Meta'
650
689
  url: 'https://www.shopalike.cz/'
651
690
 
652
- - regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
691
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
653
692
  name: 'Googlebot'
654
693
  category: 'Search bot'
655
694
  url: 'http://www.google.com/bot.html'
@@ -663,7 +702,7 @@
663
702
  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
664
703
  producer:
665
704
  name: 'The Internet Archive'
666
- url: 'http://www.archive.org'
705
+ url: 'https://archive.org'
667
706
 
668
707
  - regex: 'HubSpot '
669
708
  name: 'HubSpot'
@@ -672,7 +711,6 @@
672
711
  name: 'HubSpot Inc.'
673
712
  url: 'https://www.hubspot.com'
674
713
 
675
-
676
714
  - regex: 'HTTPMon'
677
715
  name: 'HTTPMon'
678
716
  category: 'Site Monitor'
@@ -704,7 +742,7 @@
704
742
 
705
743
  - regex: 'ips-agent'
706
744
  name: 'IPS Agent'
707
- category: 'crawler'
745
+ category: 'Crawler'
708
746
  producer:
709
747
  name: 'VeriSign, Inc'
710
748
  url: 'http://www.verisign.com/'
@@ -717,6 +755,10 @@
717
755
  name: ''
718
756
  url: 'https://ip-guide.com'
719
757
 
758
+ - regex: 'k6/[0-9\.]+'
759
+ name: 'K6'
760
+ url: 'https://k6.io/'
761
+
720
762
  - regex: 'kouio'
721
763
  name: 'Kouio'
722
764
  url: 'http://kouio.com/'
@@ -741,7 +783,7 @@
741
783
  name: 'Lighthouse'
742
784
  url: 'https://developers.google.com/web/tools/lighthouse'
743
785
 
744
- - regex: 'linkdexbot(-mobile)?|linkdex\.com'
786
+ - regex: 'linkdexbot|linkdex\.com'
745
787
  name: 'Linkdex Bot'
746
788
  category: 'Search bot'
747
789
  url: 'http://www.linkdex.com/bots'
@@ -764,7 +806,7 @@
764
806
  name: ''
765
807
  url: ''
766
808
 
767
- - regex: 'Mail\.RU(_Bot)?'
809
+ - regex: 'Mail\.RU'
768
810
  name: 'Mail.Ru Bot'
769
811
  category: 'Search bot'
770
812
  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
@@ -788,7 +830,7 @@
788
830
  name: ''
789
831
  url: ''
790
832
 
791
- - regex : 'masscan'
833
+ - regex: 'masscan'
792
834
  name: 'masscan'
793
835
  url: 'https://github.com/robertdavidgraham/masscan'
794
836
  category: 'Crawler'
@@ -941,12 +983,12 @@
941
983
  category: 'Crawler'
942
984
  producer:
943
985
  name: 'Nuzzel'
944
- url: https://www.nuzzel.com/
986
+ url: 'https://www.nuzzel.com/'
945
987
 
946
988
  - regex: 'Octopus [0-9]'
947
989
  name: 'Octopus'
948
990
 
949
- - regex: 'omgili(?:bot)?'
991
+ - regex: 'omgili'
950
992
  name: 'Omgili bot'
951
993
  category: 'Search bot'
952
994
  url: 'http://www.omgili.com/Crawler.html'
@@ -1023,7 +1065,15 @@
1023
1065
  name: 'Bitlove'
1024
1066
  url: 'http://bitlove.org/'
1025
1067
 
1026
- - regex: 'psbot(-page)?'
1068
+ - regex: 'PRTG Network Monitor'
1069
+ name: 'PRTG Network Monitor'
1070
+ category: 'Network Monitor'
1071
+ url: 'https://www.paessler.com/prtg'
1072
+ producer:
1073
+ name: 'Paessler AG'
1074
+ url: 'https://www.paessler.com'
1075
+
1076
+ - regex: 'psbot'
1027
1077
  name: 'Picsearch bot'
1028
1078
  category: 'Search bot'
1029
1079
  url: 'http://www.picsearch.com/bot.html'
@@ -1031,7 +1081,7 @@
1031
1081
  name: 'Picsearch'
1032
1082
  url: 'http://www.picsearch.com'
1033
1083
 
1034
- - regex: 'Pingdom\.com'
1084
+ - regex: 'Pingdom(?:\.com|TMS)'
1035
1085
  name: 'Pingdom Bot'
1036
1086
  category: 'Site Monitor'
1037
1087
  url: ''
@@ -1047,6 +1097,14 @@
1047
1097
  name: 'Quora'
1048
1098
  url: 'http://www.quora.com'
1049
1099
 
1100
+ - regex: 'Quora-Bot'
1101
+ name: 'Quora Bot'
1102
+ category: 'Crawler'
1103
+ url: ''
1104
+ producer:
1105
+ name: 'Quora'
1106
+ url: 'https://www.quora.com/'
1107
+
1050
1108
  - regex: 'RamblerMail'
1051
1109
  name: 'RamblerMail Image Proxy'
1052
1110
  category: 'Crawler'
@@ -1230,7 +1288,7 @@
1230
1288
  name: 'SISTRIX GmbH'
1231
1289
  url: 'http://www.sistrix.de'
1232
1290
 
1233
- - regex: 'SISTRIX Optimizer'
1291
+ - regex: 'compatible; (?:SISTRIX )?Optimizer'
1234
1292
  name: 'SISTRIX Optimizer'
1235
1293
  category: 'Crawler'
1236
1294
  url: 'https://optimizer.sistrix.com'
@@ -1275,6 +1333,14 @@
1275
1333
  name: 'Tencent Holdings'
1276
1334
  url: 'http://www.soso.com'
1277
1335
 
1336
+ - regex: 'Sprinklr'
1337
+ name: 'Sprinklr'
1338
+ category: 'Crawler'
1339
+ url: ''
1340
+ producer:
1341
+ name: 'Sprinklr, Inc.'
1342
+ url: 'https://www.sprinklr.com/'
1343
+
1278
1344
  - regex: 'sqlmap/'
1279
1345
  name: 'sqlmap'
1280
1346
  category: 'Security Checker'
@@ -1320,13 +1386,20 @@
1320
1386
  name: 'Tailrank Inc'
1321
1387
  url: 'http://spinn3r.com'
1322
1388
 
1323
- - regex: 'Sputnik(Image)?Bot'
1389
+ - regex: 'SputnikBot'
1324
1390
  name: 'Sputnik Bot'
1325
- category: ''
1391
+ category: 'Crawler'
1392
+ url: ''
1393
+
1394
+ - regex: 'SputnikFaviconBot'
1395
+ name: 'Sputnik Favicon Bot'
1396
+ category: 'Crawler'
1397
+ url: ''
1398
+
1399
+ - regex: 'SputnikImageBot'
1400
+ name: 'Sputnik Image Bot'
1401
+ category: 'Crawler'
1326
1402
  url: ''
1327
- producer:
1328
- name: ''
1329
- url: ''
1330
1403
 
1331
1404
  - regex: 'SurveyBot'
1332
1405
  name: 'Survey Bot'
@@ -1545,7 +1618,7 @@
1545
1618
  category: 'Site Monitor'
1546
1619
  url: 'https://www.webpagetest.org'
1547
1620
 
1548
- - regex: 'WeSEE(:Search)?'
1621
+ - regex: 'WeSEE'
1549
1622
  name: 'WeSEE:Search'
1550
1623
  category: 'Search bot'
1551
1624
  url: 'http://www.wesee.com/bot'
@@ -1617,6 +1690,14 @@
1617
1690
  name: 'Yahoo! Inc.'
1618
1691
  url: 'http://www.yahoo.com'
1619
1692
 
1693
+ - regex: 'YahooMailProxy'
1694
+ name: 'Yahoo! Mail Proxy'
1695
+ category: 'Service Agent'
1696
+ url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
1697
+ producer:
1698
+ name: 'Yahoo! Inc.'
1699
+ url: 'http://www.yahoo.com'
1700
+
1620
1701
  - regex: 'YahooCacheSystem'
1621
1702
  name: 'Yahoo! Cache System'
1622
1703
  category: 'Crawler'
@@ -1633,7 +1714,7 @@
1633
1714
  name: 'Yahoo! Japan Corp.'
1634
1715
  url: 'https://www.yahoo.co.jp/'
1635
1716
 
1636
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1717
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1637
1718
  name: 'Yandex Bot'
1638
1719
  category: 'Search bot'
1639
1720
  url: 'http://www.yandex.com/bots'
@@ -1641,7 +1722,7 @@
1641
1722
  name: 'Yandex LLC'
1642
1723
  url: 'http://company.yandex.com'
1643
1724
 
1644
- - regex: 'Yeti|NaverJapan'
1725
+ - regex: 'Yeti|NaverJapan|AdsBot-Naver'
1645
1726
  name: 'Yeti/Naverbot'
1646
1727
  category: 'Search bot'
1647
1728
  url: 'http://help.naver.com/robots/'
@@ -1718,18 +1799,18 @@
1718
1799
  - regex: 'HubPages.*crawlingpolicy'
1719
1800
  name: 'HubPages'
1720
1801
  category: 'Crawler'
1721
- url: 'http://hubpages.com/help/crawlingpolicy'
1802
+ url: 'https://hubpages.com/help/crawlingpolicy'
1722
1803
  producer:
1723
- name: 'HubPages'
1724
- url: 'http://hubpages.com/'
1804
+ name: 'HubPages, Inc.'
1805
+ url: 'https://discover.hubpages.com/'
1725
1806
 
1726
1807
  - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1727
1808
  name: 'Pinterest'
1728
- url: 'http://www.pinterest.com/bot.html'
1809
+ url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1729
1810
  category: 'Crawler'
1730
1811
  producer:
1731
1812
  name: 'Pinterest'
1732
- url: 'http://www.pinterest.com/'
1813
+ url: 'https://www.pinterest.com/'
1733
1814
 
1734
1815
  - regex: 'Site24x7'
1735
1816
  name: 'Site24x7 Website Monitoring'
@@ -1771,13 +1852,13 @@
1771
1852
  name: 'Monitor.Us'
1772
1853
  url: 'http://www.monitor.us'
1773
1854
 
1774
- - regex: 'Catchpoint( bot)?'
1855
+ - regex: 'Catchpoint'
1775
1856
  name: 'Catchpoint'
1776
1857
  category: 'Site Monitor'
1777
- url: ''
1858
+ url: 'https://www.catchpoint.com/'
1778
1859
  producer:
1779
1860
  name: 'Catchpoint Systems'
1780
- url: 'http://www.catchpoint.com/'
1861
+ url: 'https://www.catchpoint.com/'
1781
1862
 
1782
1863
  - regex: 'bitlybot'
1783
1864
  name: 'BitlyBot'
@@ -1845,7 +1926,7 @@
1845
1926
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1846
1927
  name: 'RSSRadio Bot'
1847
1928
 
1848
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
1929
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
1849
1930
  name: 'Generic Bot'
1850
1931
 
1851
1932
  - regex: '^sentry'
@@ -1955,10 +2036,10 @@
1955
2036
  - regex: 'BoardReader Favicon Fetcher'
1956
2037
  name: 'BoardReader'
1957
2038
  category: 'Search bot'
1958
- url: 'http://boardreader.com/'
2039
+ url: 'https://boardreader.com/'
1959
2040
  producer:
1960
2041
  name: 'Effyis Inc'
1961
- url: 'http://boardreader.com/'
2042
+ url: 'https://boardreader.com/'
1962
2043
 
1963
2044
  - regex: 'IDG/IT'
1964
2045
  name: 'IDG/IT'
@@ -2003,7 +2084,7 @@
2003
2084
  - regex: 'oBot'
2004
2085
  name: 'oBot'
2005
2086
  category: 'Search bot'
2006
- url: 'http://www.xforce-security.com/crawler/'
2087
+ url: 'https://www.xforce-security.com/crawler/'
2007
2088
  producer:
2008
2089
  name: 'IBM Germany Research & Development GmbH'
2009
2090
  url: 'https://exchange.xforce.ibmcloud.com/'
@@ -2040,15 +2121,524 @@
2040
2121
  name: 'Carbon60 Operating Co. Ltd.'
2041
2122
  url: 'https://www.carbon60.com/'
2042
2123
 
2043
- # Generic detections
2044
-
2045
2124
  - regex: 'Nutch'
2046
2125
  name: 'Nutch-based Bot'
2047
2126
  category: 'Crawler'
2048
2127
  url: 'https://nutch.apache.org'
2049
2128
  producer:
2050
2129
  name: 'The Apache Software Foundation'
2051
- url: 'http://www.apache.org/foundation/'
2130
+ url: 'https://www.apache.org/foundation/'
2131
+
2132
+ - regex: 'Seobility'
2133
+ name: 'Seobility'
2134
+ category: 'Crawler'
2135
+ url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2136
+
2137
+ - regex: 'Vercelbot'
2138
+ name: 'Vercel Bot'
2139
+ category: 'Service bot'
2140
+ url: 'https://vercel.com'
2141
+
2142
+ - regex: 'Grammarly'
2143
+ name: 'Grammarly'
2144
+ category: 'Service bot'
2145
+ url: 'https://www.grammarly.com'
2146
+
2147
+ - regex: 'Robozilla'
2148
+ name: 'Robozilla'
2149
+ category: 'Crawler'
2150
+
2151
+ - regex: 'Domains Project'
2152
+ name: 'Domains Project'
2153
+ category: 'Crawler'
2154
+ url: 'https://domainsproject.org'
2155
+
2156
+ - regex: 'PetalBot'
2157
+ name: 'Petal Bot'
2158
+ category: 'Crawler'
2159
+ url: 'https://aspiegel.com/petalbot'
2160
+
2161
+ - regex: 'SerendeputyBot'
2162
+ name: 'Serendeputy Bot'
2163
+ category: 'Crawler'
2164
+ url: 'https://serendeputy.com/about/serendeputy-bot'
2165
+
2166
+ - regex: 'ias-va.*admantx.*service-fetcher'
2167
+ name: 'ADmantX Service Fetcher'
2168
+ category: 'Service bot'
2169
+ url: 'https://www.admantx.com/service-fetcher.html'
2170
+
2171
+ - regex: 'SemanticScholarBot'
2172
+ name: 'Semantic Scholar Bot'
2173
+ category: 'Crawler'
2174
+ url: 'https://www.semanticscholar.org/crawler'
2175
+
2176
+ - regex: 'VelenPublicWebCrawler'
2177
+ name: 'Velen Public Web Crawler'
2178
+ category: 'Crawler'
2179
+ url: 'https://hunter.io/robot'
2180
+
2181
+ - regex: 'Barkrowler'
2182
+ name: 'Barkrowler'
2183
+ category: 'Crawler'
2184
+ url: 'http://www.exensa.com/crawl'
2185
+
2186
+ - regex: 'BDCbot'
2187
+ name: 'BDCbot'
2188
+ category: 'Crawler'
2189
+ url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2190
+ producer:
2191
+ name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2192
+ url: 'https://bigdatacorp.com.br/'
2193
+
2194
+ - regex: 'adbeat'
2195
+ name: 'Adbeat'
2196
+ category: 'Crawler'
2197
+ url: 'https://www.adbeat.com/operation_policy'
2198
+ producer:
2199
+ name: 'PPC Labs LLC'
2200
+ url: 'https://www.adbeat.com/'
2201
+
2202
+ - regex: 'BW/(?:(\d+[\.\d]+))'
2203
+ name: 'BuiltWith'
2204
+ category: 'Crawler'
2205
+ url: 'https://builtwith.com/biup'
2206
+ producer:
2207
+ name: 'BuiltWith Pty Ltd'
2208
+ url: 'https://builtwith.com/'
2209
+
2210
+ - regex: 'https://whatis.contentkingapp.com'
2211
+ name: 'ContentKing'
2212
+ category: 'Site Monitor'
2213
+ url: 'https://whatis.contentkingapp.com/'
2214
+ producer:
2215
+ name: 'ContentKing BV'
2216
+ url: 'https://www.contentkingapp.com/'
2217
+
2218
+ - regex: 'MicroAdBot'
2219
+ name: 'MicroAdBot'
2220
+ category: 'Crawler'
2221
+ url: 'https://www.microad.co.jp/'
2222
+ producer:
2223
+ name: 'MicroAd, Inc.'
2224
+ url: 'https://www.microad.co.jp/'
2225
+
2226
+ - regex: 'PingAdmin.Ru'
2227
+ name: 'PingAdmin.Ru'
2228
+ category: 'Site Monitor'
2229
+ url: 'https://ping-admin.ru/'
2230
+
2231
+ - regex: 'notifyninja.+monitoring'
2232
+ name: 'Notify Ninja'
2233
+ category: 'Site Monitor'
2234
+ url: 'http://notifyninja.com'
2235
+
2236
+ - regex: 'WebDataStats'
2237
+ name: 'WebDataStats'
2238
+ category: 'Crawler'
2239
+ url: 'https://webdatastats.com/policy.html'
2240
+ producer:
2241
+ name: 'WebTehRazrabotka LLC'
2242
+ url: 'https://webdatastats.com/'
2243
+
2244
+ - regex: 'parse.ly scraper'
2245
+ name: 'parse.ly'
2246
+ category: 'Crawler'
2247
+ url: 'https://www.parse.ly/help/integration/crawler'
2248
+ producer:
2249
+ name: 'Parsely, Inc.'
2250
+ url: 'https://www.parse.ly/'
2251
+
2252
+ - regex: 'Nimbostratus-Bot'
2253
+ name: 'Nimbostratus Bot'
2254
+ category: 'Site Monitor'
2255
+ url: 'http://cloudsystemnetworks.com'
2052
2256
 
2053
- - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2257
+ - regex: 'HeartRails_Capture/\d'
2258
+ name: 'Heart Rails Capture'
2259
+ category: 'Service Agent'
2260
+ url: 'http://capture.heartrails.com'
2261
+
2262
+ - regex: 'Project-Resonance'
2263
+ name: 'Project Resonance'
2264
+ category: 'Crawler'
2265
+ url: 'http://project-resonance.com'
2266
+
2267
+ - regex: 'DataXu/\d'
2268
+ name: 'DataXu'
2269
+ category: 'Service Agent'
2270
+ url: 'https://advertising.roku.com/dataxu'
2271
+ producer:
2272
+ name: 'Roku, Inc.'
2273
+ url: 'https://roku.com'
2274
+
2275
+ - regex: 'Cocolyzebot'
2276
+ name: 'Cocolyzebot'
2277
+ category: 'Crawler'
2278
+ url: 'https://cocolyze.com/en/cocolyzebot'
2279
+ producer:
2280
+ name: 'VSI INNOVATION SAS'
2281
+ url: 'https://vsi-innovation.com/'
2282
+
2283
+ - regex: 'veryhip'
2284
+ name: 'VeryHip'
2285
+ category: 'Crawler'
2286
+ url: 'https://veryhip.com/'
2287
+ producer:
2288
+ name: 'VeryHip'
2289
+ url: 'https://veryhip.com/'
2290
+
2291
+ - regex: 'LinkpadBot'
2292
+ name: 'LinkpadBot'
2293
+ category: 'Crawler'
2294
+ url: 'https://www.linkpad.org/'
2295
+ producer:
2296
+ name: 'Solomono LLC'
2297
+ url: 'https://www.linkpad.org/'
2298
+
2299
+ - regex: 'MuscatFerret'
2300
+ name: 'MuscatFerret'
2301
+ category: 'Crawler'
2302
+ url: 'http://www.webtop.com/'
2303
+
2304
+ - regex: 'PageThing.com'
2305
+ name: 'PageThing'
2306
+ category: 'Crawler'
2307
+ url: 'https://www.pagething.com/'
2308
+ producer:
2309
+ name: 'SPECIALNOISE LTD'
2310
+ url: 'https://www.specialnoise.com/'
2311
+
2312
+ - regex: 'ArchiveBox'
2313
+ name: 'ArchiveBox'
2314
+ url: 'https://archivebox.io/'
2315
+ category: 'Crawler'
2316
+ producer:
2317
+ name: ''
2318
+ url: ''
2319
+
2320
+ - regex: 'Choosito'
2321
+ name: 'Choosito'
2322
+ url: 'https://www.choosito.com/'
2323
+ category: 'Crawler'
2324
+ producer:
2325
+ name: 'Choosito! Inc.'
2326
+ url: 'https://www.choosito.com/'
2327
+
2328
+ - regex: 'datagnionbot'
2329
+ name: 'datagnionbot'
2330
+ url: 'https://www.datagnion.com/bot.html'
2331
+ category: 'Crawler'
2332
+ producer:
2333
+ name: 'DATAGNION GMBH'
2334
+ url: 'https://www.datagnion.com/'
2335
+
2336
+ - regex: 'WhatCMS'
2337
+ name: 'WhatCMS'
2338
+ url: 'https://whatcms.org/'
2339
+ category: 'Crawler'
2340
+ producer:
2341
+ name: 'Nineteen Ten LLC'
2342
+ url: 'https://whatcms.org/'
2343
+
2344
+ - regex: 'httpx'
2345
+ name: 'httpx'
2346
+ url: 'https://github.com/projectdiscovery/httpx'
2347
+ category: 'Crawler'
2348
+ producer:
2349
+ name: ''
2350
+ url: ''
2351
+
2352
+ - regex: 'scaninfo@expanseinc.com'
2353
+ name: 'Expanse'
2354
+ category: 'Security Checker'
2355
+ url: 'https://expanse.co/'
2356
+ producer:
2357
+ name: 'Expanse Inc.'
2358
+ url: 'https://expanse.co/'
2359
+
2360
+ - regex: 'HuaweiWebCatBot'
2361
+ name: 'HuaweiWebCatBot'
2362
+ category: 'Crawler'
2363
+ url: 'https://isecurity.huawei.com'
2364
+ producer:
2365
+ name: 'Huawei Technologies Co., Ltd.'
2366
+ url: 'https://huawei.com'
2367
+
2368
+ - regex: 'Hatena-Favicon'
2369
+ name: 'Hatena Favicon'
2370
+ category: 'Crawler'
2371
+ url: 'https://www.hatena.ne.jp/faq/'
2372
+ producer:
2373
+ name: 'Hatena Co., Ltd.'
2374
+ url: 'https://www.hatena.ne.jp'
2375
+
2376
+ - regex: 'RyowlEngine/(\d+)'
2377
+ name: 'Ryowl'
2378
+ category: 'Crawler'
2379
+ url: 'https://ryowl.org'
2380
+
2381
+ - regex: 'OdklBot/(\d+)'
2382
+ name: 'Odnoklassniki Bot'
2383
+ category: 'Crawler'
2384
+ url: 'https://odnoklassniki.ru'
2385
+
2386
+ - regex: 'Mediatoolkitbot'
2387
+ name: 'Mediatoolkit Bot'
2388
+ category: 'Crawler'
2389
+ url: 'https://mediatoolkit.com'
2390
+
2391
+ - regex: 'ZoominfoBot'
2392
+ name: 'ZoominfoBot'
2393
+ category: 'Crawler'
2394
+ url: 'https://www.zoominfo.com'
2395
+
2396
+ - regex: 'WeViKaBot/([\d+\.])'
2397
+ name: 'WeViKaBot'
2398
+ category: 'Crawler'
2399
+ url: 'http://www.wevika.de'
2400
+
2401
+ - regex: 'SEOkicks'
2402
+ name: 'SEOkicks'
2403
+ category: 'Crawler'
2404
+ url: 'https://www.seokicks.de/robot.html'
2405
+
2406
+ - regex: 'Plukkie/([\d+\.])'
2407
+ name: 'Plukkie'
2408
+ category: 'Crawler'
2409
+ url: 'http://www.botje.com/plukkie.htm'
2410
+
2411
+ - regex: 'proximic;'
2412
+ name: 'Comscore'
2413
+ category: 'Crawler'
2414
+ url: 'https://www.comscore.com/Web-Crawler'
2415
+
2416
+ - regex: 'SurdotlyBot/([\d+\.])'
2417
+ name: 'SurdotlyBot'
2418
+ category: 'Crawler'
2419
+ url: 'http://sur.ly/bot.html'
2420
+
2421
+ - regex: 'Gowikibot/([\d+\.])'
2422
+ name: 'Gowikibot'
2423
+ category: 'Crawler'
2424
+ url: 'http:/www.gowikibot.com'
2425
+
2426
+ - regex: 'SabsimBot/([\d+\.])'
2427
+ name: 'SabsimBot'
2428
+ category: 'Crawler'
2429
+ url: 'https://sabsim.com'
2430
+
2431
+ - regex: 'LumtelBot/([\d+\.])'
2432
+ name: 'LumtelBot'
2433
+ category: 'Crawler'
2434
+ url: 'https://umtel.com'
2435
+
2436
+ - regex: 'PiplBot'
2437
+ name: 'PiplBot'
2438
+ category: 'Crawler'
2439
+ url: 'http://www.pipl.com/bot'
2440
+
2441
+ - regex: 'woobot/([\d+\.])'
2442
+ name: 'WooRank'
2443
+ category: 'Crawler'
2444
+ url: 'https://www.woorank.com/bot'
2445
+
2446
+ - regex: 'Cookiebot/([\d+\.])'
2447
+ name: 'Cookiebot'
2448
+ category: 'Crawler'
2449
+ url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2450
+ producer:
2451
+ name: 'Cybot A/S'
2452
+ url: 'https://www.cybot.com/'
2453
+
2454
+ - regex: 'NetSystemsResearch'
2455
+ name: 'NetSystemsResearch'
2456
+ category: 'Security Checker'
2457
+ url: 'https://www.netsystemsresearch.com/'
2458
+ producer:
2459
+ name: 'NET SYSTEMS RESEARCH LLC'
2460
+ url: 'https://www.netsystemsresearch.com/'
2461
+
2462
+ - regex: 'CensysInspect/([\d+\.])'
2463
+ name: 'CensysInspect'
2464
+ category: 'Security Checker'
2465
+ url: 'https://about.censys.io/'
2466
+ producer:
2467
+ name: 'Censys, Inc.'
2468
+ url: 'https://censys.io/'
2469
+
2470
+ - regex: 'gdnplus.com'
2471
+ name: 'GDNP'
2472
+ category: 'Crawler'
2473
+ url: 'https://gdnplus.com/'
2474
+ producer:
2475
+ name: 'Global Digital Network Plus, LLC'
2476
+ url: 'https://gdnplus.com/'
2477
+
2478
+ - regex: 'WellKnownBot/([\d+\.])'
2479
+ name: 'WellKnownBot'
2480
+ category: 'Crawler'
2481
+ url: 'https://well-known.dev'
2482
+
2483
+ - regex: 'Adsbot/([\d+\.])'
2484
+ name: 'Adsbot'
2485
+ category: 'Crawler'
2486
+ url: 'https://seostar.co/robot/'
2487
+
2488
+ - regex: 'MTRobot/([\d+\.])'
2489
+ name: 'MTRobot'
2490
+ category: 'Crawler'
2491
+ url: 'https://metrics-tools.de/robot.html'
2492
+ producer:
2493
+ name: 'Metrics Tools'
2494
+ url: 'https://metrics-tools.de/'
2495
+
2496
+ - regex: 'serpstatbot/([\d+\.])'
2497
+ name: 'serpstatbot'
2498
+ category: 'Crawler'
2499
+ url: 'http://serpstatbot.com/'
2500
+ producer:
2501
+ name: 'Netpeak Ltd'
2502
+ url: 'https://netpeak.net/'
2503
+
2504
+ - regex: 'colly'
2505
+ name: 'colly'
2506
+ category: 'Crawler'
2507
+ url: 'https://github.com/gocolly/colly/'
2508
+
2509
+ - regex: 'l9tcpid/v([\d+\.])'
2510
+ name: 'l9tcpid'
2511
+ category: 'Security Checker'
2512
+ url: 'https://github.com/LeakIX/l9tcpid'
2513
+
2514
+ - regex: 'MegaIndex.ru/([\d+\.])'
2515
+ name: 'MegaIndex'
2516
+ category: 'Crawler'
2517
+ url: 'https://megaindex.com/crawler'
2518
+
2519
+ - regex: 'Seekport'
2520
+ name: 'Seekport'
2521
+ category: 'Crawler'
2522
+ url: 'http://www.seekport.com/'
2523
+ producer:
2524
+ name: 'SISTRIX GmbH'
2525
+ url: 'https://www.sistrix.de/'
2526
+
2527
+ - regex: 'seolyt/([\d+\.])'
2528
+ name: 'seolyt'
2529
+ category: 'Crawler'
2530
+ url: 'https://seolyt.com/'
2531
+
2532
+ - regex: 'YaK/([\d+\.])'
2533
+ name: 'YaK'
2534
+ category: 'Crawler'
2535
+ url: 'https://www.linkfluence.com/'
2536
+ producer:
2537
+ name: 'Linkfluence SAS'
2538
+ url: 'https://www.linkfluence.com/'
2539
+
2540
+ - regex: 'KomodiaBot/([\d+\.])'
2541
+ name: 'KomodiaBot'
2542
+ category: 'Crawler'
2543
+ url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2544
+ producer:
2545
+ name: 'Komodia Inc.'
2546
+ url: 'https://www.komodia.com/'
2547
+
2548
+ - regex: 'Neevabot/([\d+\.])'
2549
+ name: 'Neevabot'
2550
+ category: 'Search bot'
2551
+ url: 'https://neeva.com/neevabot'
2552
+ producer:
2553
+ name: 'Neeva Inc.'
2554
+ url: 'https://neeva.com/'
2555
+
2556
+ - regex: 'LinkPreview/([\d+\.])'
2557
+ name: 'LinkPreview'
2558
+ category: 'Service Agent'
2559
+ url: 'https://www.linkpreview.net/'
2560
+
2561
+ - regex: 'JungleKeyThumbnail/([\d+\.])'
2562
+ name: 'JungleKeyThumbnail'
2563
+ category: 'Crawler'
2564
+ url: 'https://junglekey.com/'
2565
+
2566
+ - regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2567
+ name: 'RocketMonitorBot'
2568
+ category: 'Site Monitor'
2569
+ url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
2570
+ producer:
2571
+ name: 'Radio Mast, Inc.'
2572
+ url: 'https://www.radiomast.io/'
2573
+
2574
+ - regex: 'SitemapParser-VIPnytt/([\d+\.])'
2575
+ name: 'SitemapParser-VIPnytt'
2576
+ category: 'Crawler'
2577
+ url: 'https://github.com/VIPnytt/SitemapParser/'
2578
+
2579
+ - regex: '^Turnitin'
2580
+ name: 'Turnitin'
2581
+ category: 'Crawler'
2582
+ url: 'https://turnitin.com/robot/crawlerinfo.html'
2583
+
2584
+ - regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2585
+ name: 'Dotcom Monitor'
2586
+ category: 'Site Monitor'
2587
+ url: 'https://www.dotcom-monitor.com'
2588
+
2589
+ - regex: 'ThinkChaos/'
2590
+ name: 'ThinkChaos'
2591
+ category: 'Crawler'
2592
+
2593
+ - regex: 'DataForSeoBot'
2594
+ name: 'DataForSeoBot'
2595
+ category: 'Crawler'
2596
+ url: 'https://dataforseo.com/dataforseo-bot'
2597
+
2598
+ - regex: 'Discordbot/([\d+.]+)'
2599
+ name: 'Discord Bot'
2600
+ category: 'Service Agent'
2601
+ url: 'https://discordapp.com'
2602
+
2603
+ - regex: 'Linespider/([\d+.]+)'
2604
+ name: 'Linespider'
2605
+ category: 'Crawler'
2606
+ url: 'https://lin.ee/4dwXkTH'
2607
+
2608
+ - regex: 'Cincraw/([\d+.]+)'
2609
+ name: 'Cincraw'
2610
+ category: 'Crawler'
2611
+ url: 'http://cincrawdata.net/bot/'
2612
+
2613
+ - regex: 'CISPA Web Analyzer'
2614
+ name: 'CISPA Web Analyzer'
2615
+ category: 'Crawler'
2616
+ url: 'https://notify.cispa.de/'
2617
+ producer:
2618
+ name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
2619
+ url: 'https://cispa.de/en'
2620
+
2621
+ - regex: 'IonCrawl'
2622
+ name: 'IONOS Crawler'
2623
+ category: 'Crawler'
2624
+ url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
2625
+ producer:
2626
+ name: 'IONOS SE'
2627
+ url: 'https://www.ionos.de/'
2628
+
2629
+ - regex: 'Crawldad'
2630
+ name: 'Crawldad'
2631
+ category: 'Crawler'
2632
+ url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
2633
+
2634
+ - regex: 'https://securitytxt-scan.cs.hm.edu/'
2635
+ name: 'security.txt scanserver'
2636
+ category: 'Security Checker'
2637
+ url: 'https://securitytxt-scan.cs.hm.edu/'
2638
+ producer:
2639
+ name: 'Hochschule für angewandte Wissenschaften München'
2640
+ url: 'https://www.hm.edu/'
2641
+
2642
+ # Generic detections
2643
+ - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2054
2644
  name: 'Generic Bot'