device_detector 1.0.4 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -7
  4. data/lib/device_detector/browser.rb +364 -0
  5. data/lib/device_detector/client.rb +8 -0
  6. data/lib/device_detector/device.rb +1204 -2
  7. data/lib/device_detector/os.rb +39 -12
  8. data/lib/device_detector/parser.rb +1 -1
  9. data/lib/device_detector/version.rb +1 -1
  10. data/lib/device_detector/version_extractor.rb +28 -0
  11. data/lib/device_detector.rb +57 -13
  12. data/regexes/bots.yml +651 -61
  13. data/regexes/client/browser_engine.yml +7 -1
  14. data/regexes/client/browsers.yml +774 -78
  15. data/regexes/client/feed_readers.yml +4 -10
  16. data/regexes/client/libraries.yml +71 -2
  17. data/regexes/client/mediaplayers.yml +25 -1
  18. data/regexes/client/mobile_apps.yml +923 -73
  19. data/regexes/client/pim.yml +65 -2
  20. data/regexes/device/cameras.yml +5 -5
  21. data/regexes/device/car_browsers.yml +16 -0
  22. data/regexes/device/consoles.yml +13 -1
  23. data/regexes/device/mobiles.yml +17029 -3889
  24. data/regexes/device/notebooks.yml +114 -0
  25. data/regexes/device/portable_media_player.yml +36 -9
  26. data/regexes/device/shell_tv.yml +117 -0
  27. data/regexes/device/televisions.yml +439 -34
  28. data/regexes/oss.yml +620 -284
  29. metadata +9 -131
  30. data/.gitignore +0 -14
  31. data/.rubocop.yml +0 -49
  32. data/.ruby-version +0 -1
  33. data/.travis.yml +0 -14
  34. data/Gemfile +0 -8
  35. data/Rakefile +0 -85
  36. data/device_detector.gemspec +0 -27
  37. data/spec/device_detector/bot_fixtures_spec.rb +0 -30
  38. data/spec/device_detector/client_fixtures_spec.rb +0 -31
  39. data/spec/device_detector/concrete_user_agent_spec.rb +0 -135
  40. data/spec/device_detector/detector_fixtures_spec.rb +0 -100
  41. data/spec/device_detector/device_fixtures_spec.rb +0 -36
  42. data/spec/device_detector/device_spec.rb +0 -131
  43. data/spec/device_detector/memory_cache_spec.rb +0 -148
  44. data/spec/device_detector/model_extractor_spec.rb +0 -63
  45. data/spec/device_detector/os_fixtures_spec.rb +0 -26
  46. data/spec/device_detector/version_extractor_spec.rb +0 -79
  47. data/spec/device_detector_spec.rb +0 -189
  48. data/spec/fixtures/client/browser.yml +0 -2836
  49. data/spec/fixtures/client/feed_reader.yml +0 -199
  50. data/spec/fixtures/client/library.yml +0 -193
  51. data/spec/fixtures/client/mediaplayer.yml +0 -163
  52. data/spec/fixtures/client/mobile_app.yml +0 -217
  53. data/spec/fixtures/client/pim.yml +0 -115
  54. data/spec/fixtures/detector/bots.yml +0 -3726
  55. data/spec/fixtures/detector/camera.yml +0 -141
  56. data/spec/fixtures/detector/car_browser.yml +0 -81
  57. data/spec/fixtures/detector/console.yml +0 -321
  58. data/spec/fixtures/detector/desktop.yml +0 -5461
  59. data/spec/fixtures/detector/feature_phone.yml +0 -891
  60. data/spec/fixtures/detector/feed_reader.yml +0 -551
  61. data/spec/fixtures/detector/mediaplayer.yml +0 -253
  62. data/spec/fixtures/detector/mobile_apps.yml +0 -494
  63. data/spec/fixtures/detector/phablet.yml +0 -4167
  64. data/spec/fixtures/detector/portable_media_player.yml +0 -178
  65. data/spec/fixtures/detector/smart_display.yml +0 -61
  66. data/spec/fixtures/detector/smart_speaker.yml +0 -55
  67. data/spec/fixtures/detector/smartphone-1.yml +0 -9927
  68. data/spec/fixtures/detector/smartphone-10.yml +0 -9977
  69. data/spec/fixtures/detector/smartphone-11.yml +0 -9891
  70. data/spec/fixtures/detector/smartphone-12.yml +0 -9906
  71. data/spec/fixtures/detector/smartphone-13.yml +0 -9920
  72. data/spec/fixtures/detector/smartphone-14.yml +0 -2662
  73. data/spec/fixtures/detector/smartphone-2.yml +0 -9992
  74. data/spec/fixtures/detector/smartphone-3.yml +0 -9945
  75. data/spec/fixtures/detector/smartphone-4.yml +0 -9923
  76. data/spec/fixtures/detector/smartphone-5.yml +0 -9914
  77. data/spec/fixtures/detector/smartphone-6.yml +0 -9962
  78. data/spec/fixtures/detector/smartphone-7.yml +0 -9899
  79. data/spec/fixtures/detector/smartphone-8.yml +0 -9931
  80. data/spec/fixtures/detector/smartphone-9.yml +0 -9899
  81. data/spec/fixtures/detector/smartphone.yml +0 -9984
  82. data/spec/fixtures/detector/tablet-1.yml +0 -9995
  83. data/spec/fixtures/detector/tablet-2.yml +0 -9977
  84. data/spec/fixtures/detector/tablet-3.yml +0 -9959
  85. data/spec/fixtures/detector/tablet-4.yml +0 -4528
  86. data/spec/fixtures/detector/tablet.yml +0 -9971
  87. data/spec/fixtures/detector/tv.yml +0 -4933
  88. data/spec/fixtures/detector/unknown.yml +0 -3236
  89. data/spec/fixtures/detector/wearable.yml +0 -61
  90. data/spec/fixtures/device/camera.yml +0 -19
  91. data/spec/fixtures/device/car_browser.yml +0 -13
  92. data/spec/fixtures/device/console.yml +0 -79
  93. data/spec/fixtures/parser/oss.yml +0 -1082
  94. data/spec/fixtures/parser/vendorfragments.yml +0 -168
  95. data/spec/spec_helper.rb +0 -9
data/regexes/bots.yml CHANGED
@@ -5,10 +5,10 @@
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
- - regex: '360Spider(-Image|-Video)?'
8
+ - regex: '360Spider'
9
9
  name: '360Spider'
10
10
  category: 'Search bot'
11
- url: 'http://www.so.com/help/help_3_2.html'
11
+ url: 'https://www.so.com/help/help_3_2.html'
12
12
  producer:
13
13
  name: 'Online Media Group, Inc.'
14
14
  url: ''
@@ -40,26 +40,34 @@
40
40
  - regex: 'AhrefsBot'
41
41
  name: 'aHrefs Bot'
42
42
  category: 'Crawler'
43
- url: 'http://ahrefs.com/robot'
43
+ url: 'https://ahrefs.com/robot'
44
44
  producer:
45
45
  name: 'Ahrefs Pte Ltd'
46
- url: 'http://ahrefs.com/robot'
46
+ url: 'https://ahrefs.com/robot'
47
47
 
48
48
  - regex: 'ia_archiver|alexabot|verifybot'
49
49
  name: 'Alexa Crawler'
50
50
  category: 'Search bot'
51
- url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
51
+ url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
52
52
  producer:
53
53
  name: 'Alexa Internet'
54
- url: 'http://www.alexa.com'
54
+ url: 'https://www.alexa.com'
55
55
 
56
56
  - regex: 'alexa site audit'
57
57
  name: 'Alexa Site Audit'
58
58
  category: 'Site Monitor'
59
- url: 'http://www.alexa.com/help/webmasters'
59
+ url: 'https://support.alexa.com/hc/en-us/articles/200450194'
60
60
  producer:
61
61
  name: 'Alexa Internet'
62
- url: 'http://www.alexa.com'
62
+ url: 'https://www.alexa.com'
63
+
64
+ - regex: 'Amazonbot'
65
+ name: 'Amazon Bot'
66
+ category: 'Crawler'
67
+ url: 'https://developer.amazon.com/support/amazonbot'
68
+ producer:
69
+ name: 'Amazon.com, Inc.'
70
+ url: 'https://www.amazon.com/'
63
71
 
64
72
  - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
65
73
  name: 'Amazon Route53 Health Check'
@@ -82,23 +90,31 @@
82
90
  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
83
91
  producer:
84
92
  name: 'The Apache Software Foundation'
85
- url: 'http://www.apache.org/foundation/'
93
+ url: 'https://www.apache.org/foundation/'
86
94
 
87
95
  - regex: 'Applebot'
88
96
  name: 'Applebot'
89
97
  category: 'Crawler'
90
- url: 'http://www.apple.com/go/applebot'
98
+ url: 'https://support.apple.com/en-us/HT204683'
91
99
  producer:
92
100
  name: 'Apple Inc'
93
- url: 'http://www.apple.com'
101
+ url: 'https://www.apple.com'
102
+
103
+ - regex: "AppSignalBot"
104
+ name: "AppSignalBot"
105
+ category: "Site Monitor"
106
+ url: "https://docs.appsignal.com/uptime-monitoring/"
107
+ producer:
108
+ name: "AppSignal"
109
+ url: "https://appsignal.com/"
94
110
 
95
111
  - regex: 'Arachni'
96
112
  name: 'Arachni'
97
113
  category: 'Security Checker'
98
- url: 'http://www.arachni-scanner.com'
114
+ url: 'https://www.arachni-scanner.com/'
99
115
  producer:
100
116
  name: 'Sarosys LLC'
101
- url: 'http://www.sarosys.com/'
117
+ url: 'https://www.sarosys.com/'
102
118
 
103
119
  - regex: 'AspiegelBot'
104
120
  name: 'AspiegelBot'
@@ -112,7 +128,7 @@
112
128
  name: 'Castro 2'
113
129
  category: 'Service Agent'
114
130
  url: 'http://supertop.co/castro/'
115
- producer:
131
+ producer:
116
132
  name: 'Supertop'
117
133
  url: 'http://supertop.co'
118
134
 
@@ -127,10 +143,10 @@
127
143
  - regex: 'archive\.org_bot|special_archiver'
128
144
  name: 'archive.org bot'
129
145
  category: 'Crawler'
130
- url: 'http://www.archive.org/details/archive.org_bot'
146
+ url: 'https://archive.org/details/archive.org_bot'
131
147
  producer:
132
148
  name: 'The Internet Archive'
133
- url: 'http://www.archive.org'
149
+ url: 'https://archive.org'
134
150
 
135
151
  - regex: 'Ask Jeeves/Teoma'
136
152
  name: 'Ask Jeeves'
@@ -156,7 +172,7 @@
156
172
  name: '2.0Promotion GbR'
157
173
  url: 'http://www.backlinktest.com'
158
174
 
159
- - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
175
+ - regex: 'Baidu.*spider|baidu Transcoder'
160
176
  name: 'Baidu Spider'
161
177
  category: 'Search bot'
162
178
  url: 'http://www.baidu.com/search/spider.htm'
@@ -172,6 +188,14 @@
172
188
  name: ''
173
189
  url: ''
174
190
 
191
+ - regex: 'Better Uptime Bot'
192
+ name: 'Better Uptime Bot'
193
+ category: 'Site Monitor'
194
+ url: 'https://betteruptime.com/faq'
195
+ producer:
196
+ name: 'Better Uptime'
197
+ url: 'https://betteruptime.com/'
198
+
175
199
  - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
176
200
  name: 'BingBot'
177
201
  category: 'Search bot'
@@ -188,7 +212,7 @@
188
212
  name: 'Blekko'
189
213
  url: 'http://blekko.com'
190
214
 
191
- - regex: 'BLEXBot(Test)?'
215
+ - regex: 'BLEXBot'
192
216
  name: 'BLEXBot Crawler'
193
217
  category: 'Crawler'
194
218
  url: 'http://webmeup-crawler.com'
@@ -217,7 +241,7 @@
217
241
  category: 'Crawler'
218
242
  producer:
219
243
  name: 'BoardReader'
220
- url: 'http://boardreader.com/'
244
+ url: 'https://boardreader.com/'
221
245
 
222
246
  - regex: 'BountiiBot'
223
247
  name: 'Bountii Bot'
@@ -283,6 +307,14 @@
283
307
  name: 'CloudFlare'
284
308
  url: 'http://www.cloudflare.com'
285
309
 
310
+ - regex: 'CloudflareDiagnostics'
311
+ name: 'Cloudflare Diagnostics'
312
+ category: 'Site Monitor'
313
+ url: 'https://www.cloudflare.com/'
314
+ producer:
315
+ name: 'Cloudflare'
316
+ url: 'https://www.cloudflare.com'
317
+
286
318
  - regex: 'CloudFlare-AlwaysOnline'
287
319
  name: 'CloudFlare Always Online'
288
320
  category: 'Site Monitor'
@@ -291,7 +323,7 @@
291
323
  name: 'CloudFlare'
292
324
  url: 'http://www.cloudflare.com'
293
325
 
294
- - regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
326
+ - regex: 'coccoc.com'
295
327
  name: 'Cốc Cốc Bot'
296
328
  url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
297
329
  category: 'Search bot'
@@ -339,7 +371,6 @@
339
371
  name: 'Datanyze'
340
372
  url: 'https://www.datanyze.com'
341
373
 
342
-
343
374
  - regex: 'Dataprovider'
344
375
  name: 'Dataprovider'
345
376
  category: 'Crawler'
@@ -364,7 +395,7 @@
364
395
  name: 'DAZOO.FR'
365
396
  url: 'http://dazoo.fr'
366
397
 
367
- - regex: 'discobot(-news)?'
398
+ - regex: 'discobot'
368
399
  name: 'Discobot'
369
400
  category: 'Search bot'
370
401
  url: 'http://discoveryengine.com/discobot.html'
@@ -427,7 +458,7 @@
427
458
  name: 'eVenture Capital Partners II, LLC'
428
459
  url: 'http://www.eventures.vc/'
429
460
 
430
- - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
461
+ - regex: 'Exabot|ExaleadCloudview'
431
462
  name: 'ExaBot'
432
463
  category: 'Crawler'
433
464
  url: 'http://www.exabot.com/go/robot'
@@ -451,7 +482,7 @@
451
482
  name: 'SEOmoz, Inc.'
452
483
  url: 'http://moz.com/'
453
484
 
454
- - regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
485
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
455
486
  name: 'Facebook External Hit'
456
487
  category: 'Social Media Agent'
457
488
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -483,7 +514,7 @@
483
514
  name: 'David Smith & Developing Perspective, LLC'
484
515
  url: 'https://david-smith.org'
485
516
 
486
- - regex: '(Meta)?Feedly(Bot|App)?'
517
+ - regex: 'Feedly'
487
518
  name: 'Feedly'
488
519
  url: 'http://www.feedly.com'
489
520
  category: 'Feed Fetcher'
@@ -549,6 +580,10 @@
549
580
  name: ''
550
581
  url: ''
551
582
 
583
+ - regex: 'gobuster'
584
+ name: 'Gobuster'
585
+ url: 'https://github.com/OJ/gobuster'
586
+
552
587
  - regex: 'ichiro/mobile goo'
553
588
  name: 'Goo'
554
589
  category: 'Search bot'
@@ -557,6 +592,10 @@
557
592
  name: 'NTT Resonant'
558
593
  url: 'http://goo.ne.jp'
559
594
 
595
+ - regex: 'Storebot-Google'
596
+ name: 'Google StoreBot'
597
+ category: 'Crawler'
598
+
560
599
  - regex: 'Google Favicon'
561
600
  name: 'Google Favicon'
562
601
  category: 'Crawler'
@@ -649,7 +688,7 @@
649
688
  name: 'Visual Meta'
650
689
  url: 'https://www.shopalike.cz/'
651
690
 
652
- - regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
691
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
653
692
  name: 'Googlebot'
654
693
  category: 'Search bot'
655
694
  url: 'http://www.google.com/bot.html'
@@ -663,7 +702,7 @@
663
702
  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
664
703
  producer:
665
704
  name: 'The Internet Archive'
666
- url: 'http://www.archive.org'
705
+ url: 'https://archive.org'
667
706
 
668
707
  - regex: 'HubSpot '
669
708
  name: 'HubSpot'
@@ -672,7 +711,6 @@
672
711
  name: 'HubSpot Inc.'
673
712
  url: 'https://www.hubspot.com'
674
713
 
675
-
676
714
  - regex: 'HTTPMon'
677
715
  name: 'HTTPMon'
678
716
  category: 'Site Monitor'
@@ -704,7 +742,7 @@
704
742
 
705
743
  - regex: 'ips-agent'
706
744
  name: 'IPS Agent'
707
- category: 'crawler'
745
+ category: 'Crawler'
708
746
  producer:
709
747
  name: 'VeriSign, Inc'
710
748
  url: 'http://www.verisign.com/'
@@ -717,6 +755,10 @@
717
755
  name: ''
718
756
  url: 'https://ip-guide.com'
719
757
 
758
+ - regex: 'k6/[0-9\.]+'
759
+ name: 'K6'
760
+ url: 'https://k6.io/'
761
+
720
762
  - regex: 'kouio'
721
763
  name: 'Kouio'
722
764
  url: 'http://kouio.com/'
@@ -741,7 +783,7 @@
741
783
  name: 'Lighthouse'
742
784
  url: 'https://developers.google.com/web/tools/lighthouse'
743
785
 
744
- - regex: 'linkdexbot(-mobile)?|linkdex\.com'
786
+ - regex: 'linkdexbot|linkdex\.com'
745
787
  name: 'Linkdex Bot'
746
788
  category: 'Search bot'
747
789
  url: 'http://www.linkdex.com/bots'
@@ -764,7 +806,7 @@
764
806
  name: ''
765
807
  url: ''
766
808
 
767
- - regex: 'Mail\.RU(_Bot)?'
809
+ - regex: 'Mail\.RU'
768
810
  name: 'Mail.Ru Bot'
769
811
  category: 'Search bot'
770
812
  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
@@ -788,7 +830,7 @@
788
830
  name: ''
789
831
  url: ''
790
832
 
791
- - regex : 'masscan'
833
+ - regex: 'masscan'
792
834
  name: 'masscan'
793
835
  url: 'https://github.com/robertdavidgraham/masscan'
794
836
  category: 'Crawler'
@@ -941,12 +983,12 @@
941
983
  category: 'Crawler'
942
984
  producer:
943
985
  name: 'Nuzzel'
944
- url: https://www.nuzzel.com/
986
+ url: 'https://www.nuzzel.com/'
945
987
 
946
988
  - regex: 'Octopus [0-9]'
947
989
  name: 'Octopus'
948
990
 
949
- - regex: 'omgili(?:bot)?'
991
+ - regex: 'omgili'
950
992
  name: 'Omgili bot'
951
993
  category: 'Search bot'
952
994
  url: 'http://www.omgili.com/Crawler.html'
@@ -1023,7 +1065,15 @@
1023
1065
  name: 'Bitlove'
1024
1066
  url: 'http://bitlove.org/'
1025
1067
 
1026
- - regex: 'psbot(-page)?'
1068
+ - regex: 'PRTG Network Monitor'
1069
+ name: 'PRTG Network Monitor'
1070
+ category: 'Network Monitor'
1071
+ url: 'https://www.paessler.com/prtg'
1072
+ producer:
1073
+ name: 'Paessler AG'
1074
+ url: 'https://www.paessler.com'
1075
+
1076
+ - regex: 'psbot'
1027
1077
  name: 'Picsearch bot'
1028
1078
  category: 'Search bot'
1029
1079
  url: 'http://www.picsearch.com/bot.html'
@@ -1031,7 +1081,7 @@
1031
1081
  name: 'Picsearch'
1032
1082
  url: 'http://www.picsearch.com'
1033
1083
 
1034
- - regex: 'Pingdom\.com'
1084
+ - regex: 'Pingdom(?:\.com|TMS)'
1035
1085
  name: 'Pingdom Bot'
1036
1086
  category: 'Site Monitor'
1037
1087
  url: ''
@@ -1047,6 +1097,14 @@
1047
1097
  name: 'Quora'
1048
1098
  url: 'http://www.quora.com'
1049
1099
 
1100
+ - regex: 'Quora-Bot'
1101
+ name: 'Quora Bot'
1102
+ category: 'Crawler'
1103
+ url: ''
1104
+ producer:
1105
+ name: 'Quora'
1106
+ url: 'https://www.quora.com/'
1107
+
1050
1108
  - regex: 'RamblerMail'
1051
1109
  name: 'RamblerMail Image Proxy'
1052
1110
  category: 'Crawler'
@@ -1230,7 +1288,7 @@
1230
1288
  name: 'SISTRIX GmbH'
1231
1289
  url: 'http://www.sistrix.de'
1232
1290
 
1233
- - regex: 'SISTRIX Optimizer'
1291
+ - regex: 'compatible; (?:SISTRIX )?Optimizer'
1234
1292
  name: 'SISTRIX Optimizer'
1235
1293
  category: 'Crawler'
1236
1294
  url: 'https://optimizer.sistrix.com'
@@ -1275,6 +1333,14 @@
1275
1333
  name: 'Tencent Holdings'
1276
1334
  url: 'http://www.soso.com'
1277
1335
 
1336
+ - regex: 'Sprinklr'
1337
+ name: 'Sprinklr'
1338
+ category: 'Crawler'
1339
+ url: ''
1340
+ producer:
1341
+ name: 'Sprinklr, Inc.'
1342
+ url: 'https://www.sprinklr.com/'
1343
+
1278
1344
  - regex: 'sqlmap/'
1279
1345
  name: 'sqlmap'
1280
1346
  category: 'Security Checker'
@@ -1320,13 +1386,20 @@
1320
1386
  name: 'Tailrank Inc'
1321
1387
  url: 'http://spinn3r.com'
1322
1388
 
1323
- - regex: 'Sputnik(Image)?Bot'
1389
+ - regex: 'SputnikBot'
1324
1390
  name: 'Sputnik Bot'
1325
- category: ''
1391
+ category: 'Crawler'
1392
+ url: ''
1393
+
1394
+ - regex: 'SputnikFaviconBot'
1395
+ name: 'Sputnik Favicon Bot'
1396
+ category: 'Crawler'
1397
+ url: ''
1398
+
1399
+ - regex: 'SputnikImageBot'
1400
+ name: 'Sputnik Image Bot'
1401
+ category: 'Crawler'
1326
1402
  url: ''
1327
- producer:
1328
- name: ''
1329
- url: ''
1330
1403
 
1331
1404
  - regex: 'SurveyBot'
1332
1405
  name: 'Survey Bot'
@@ -1545,7 +1618,7 @@
1545
1618
  category: 'Site Monitor'
1546
1619
  url: 'https://www.webpagetest.org'
1547
1620
 
1548
- - regex: 'WeSEE(:Search)?'
1621
+ - regex: 'WeSEE'
1549
1622
  name: 'WeSEE:Search'
1550
1623
  category: 'Search bot'
1551
1624
  url: 'http://www.wesee.com/bot'
@@ -1617,6 +1690,14 @@
1617
1690
  name: 'Yahoo! Inc.'
1618
1691
  url: 'http://www.yahoo.com'
1619
1692
 
1693
+ - regex: 'YahooMailProxy'
1694
+ name: 'Yahoo! Mail Proxy'
1695
+ category: 'Service Agent'
1696
+ url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
1697
+ producer:
1698
+ name: 'Yahoo! Inc.'
1699
+ url: 'http://www.yahoo.com'
1700
+
1620
1701
  - regex: 'YahooCacheSystem'
1621
1702
  name: 'Yahoo! Cache System'
1622
1703
  category: 'Crawler'
@@ -1633,7 +1714,7 @@
1633
1714
  name: 'Yahoo! Japan Corp.'
1634
1715
  url: 'https://www.yahoo.co.jp/'
1635
1716
 
1636
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1717
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1637
1718
  name: 'Yandex Bot'
1638
1719
  category: 'Search bot'
1639
1720
  url: 'http://www.yandex.com/bots'
@@ -1641,7 +1722,7 @@
1641
1722
  name: 'Yandex LLC'
1642
1723
  url: 'http://company.yandex.com'
1643
1724
 
1644
- - regex: 'Yeti|NaverJapan'
1725
+ - regex: 'Yeti|NaverJapan|AdsBot-Naver'
1645
1726
  name: 'Yeti/Naverbot'
1646
1727
  category: 'Search bot'
1647
1728
  url: 'http://help.naver.com/robots/'
@@ -1718,18 +1799,18 @@
1718
1799
  - regex: 'HubPages.*crawlingpolicy'
1719
1800
  name: 'HubPages'
1720
1801
  category: 'Crawler'
1721
- url: 'http://hubpages.com/help/crawlingpolicy'
1802
+ url: 'https://hubpages.com/help/crawlingpolicy'
1722
1803
  producer:
1723
- name: 'HubPages'
1724
- url: 'http://hubpages.com/'
1804
+ name: 'HubPages, Inc.'
1805
+ url: 'https://discover.hubpages.com/'
1725
1806
 
1726
1807
  - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1727
1808
  name: 'Pinterest'
1728
- url: 'http://www.pinterest.com/bot.html'
1809
+ url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1729
1810
  category: 'Crawler'
1730
1811
  producer:
1731
1812
  name: 'Pinterest'
1732
- url: 'http://www.pinterest.com/'
1813
+ url: 'https://www.pinterest.com/'
1733
1814
 
1734
1815
  - regex: 'Site24x7'
1735
1816
  name: 'Site24x7 Website Monitoring'
@@ -1771,13 +1852,13 @@
1771
1852
  name: 'Monitor.Us'
1772
1853
  url: 'http://www.monitor.us'
1773
1854
 
1774
- - regex: 'Catchpoint( bot)?'
1855
+ - regex: 'Catchpoint'
1775
1856
  name: 'Catchpoint'
1776
1857
  category: 'Site Monitor'
1777
- url: ''
1858
+ url: 'https://www.catchpoint.com/'
1778
1859
  producer:
1779
1860
  name: 'Catchpoint Systems'
1780
- url: 'http://www.catchpoint.com/'
1861
+ url: 'https://www.catchpoint.com/'
1781
1862
 
1782
1863
  - regex: 'bitlybot'
1783
1864
  name: 'BitlyBot'
@@ -1845,7 +1926,7 @@
1845
1926
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1846
1927
  name: 'RSSRadio Bot'
1847
1928
 
1848
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
1929
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
1849
1930
  name: 'Generic Bot'
1850
1931
 
1851
1932
  - regex: '^sentry'
@@ -1955,10 +2036,10 @@
1955
2036
  - regex: 'BoardReader Favicon Fetcher'
1956
2037
  name: 'BoardReader'
1957
2038
  category: 'Search bot'
1958
- url: 'http://boardreader.com/'
2039
+ url: 'https://boardreader.com/'
1959
2040
  producer:
1960
2041
  name: 'Effyis Inc'
1961
- url: 'http://boardreader.com/'
2042
+ url: 'https://boardreader.com/'
1962
2043
 
1963
2044
  - regex: 'IDG/IT'
1964
2045
  name: 'IDG/IT'
@@ -2003,7 +2084,7 @@
2003
2084
  - regex: 'oBot'
2004
2085
  name: 'oBot'
2005
2086
  category: 'Search bot'
2006
- url: 'http://www.xforce-security.com/crawler/'
2087
+ url: 'https://www.xforce-security.com/crawler/'
2007
2088
  producer:
2008
2089
  name: 'IBM Germany Research & Development GmbH'
2009
2090
  url: 'https://exchange.xforce.ibmcloud.com/'
@@ -2040,15 +2121,524 @@
2040
2121
  name: 'Carbon60 Operating Co. Ltd.'
2041
2122
  url: 'https://www.carbon60.com/'
2042
2123
 
2043
- # Generic detections
2044
-
2045
2124
  - regex: 'Nutch'
2046
2125
  name: 'Nutch-based Bot'
2047
2126
  category: 'Crawler'
2048
2127
  url: 'https://nutch.apache.org'
2049
2128
  producer:
2050
2129
  name: 'The Apache Software Foundation'
2051
- url: 'http://www.apache.org/foundation/'
2130
+ url: 'https://www.apache.org/foundation/'
2131
+
2132
+ - regex: 'Seobility'
2133
+ name: 'Seobility'
2134
+ category: 'Crawler'
2135
+ url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2136
+
2137
+ - regex: 'Vercelbot'
2138
+ name: 'Vercel Bot'
2139
+ category: 'Service bot'
2140
+ url: 'https://vercel.com'
2141
+
2142
+ - regex: 'Grammarly'
2143
+ name: 'Grammarly'
2144
+ category: 'Service bot'
2145
+ url: 'https://www.grammarly.com'
2146
+
2147
+ - regex: 'Robozilla'
2148
+ name: 'Robozilla'
2149
+ category: 'Crawler'
2150
+
2151
+ - regex: 'Domains Project'
2152
+ name: 'Domains Project'
2153
+ category: 'Crawler'
2154
+ url: 'https://domainsproject.org'
2155
+
2156
+ - regex: 'PetalBot'
2157
+ name: 'Petal Bot'
2158
+ category: 'Crawler'
2159
+ url: 'https://aspiegel.com/petalbot'
2160
+
2161
+ - regex: 'SerendeputyBot'
2162
+ name: 'Serendeputy Bot'
2163
+ category: 'Crawler'
2164
+ url: 'https://serendeputy.com/about/serendeputy-bot'
2165
+
2166
+ - regex: 'ias-va.*admantx.*service-fetcher'
2167
+ name: 'ADmantX Service Fetcher'
2168
+ category: 'Service bot'
2169
+ url: 'https://www.admantx.com/service-fetcher.html'
2170
+
2171
+ - regex: 'SemanticScholarBot'
2172
+ name: 'Semantic Scholar Bot'
2173
+ category: 'Crawler'
2174
+ url: 'https://www.semanticscholar.org/crawler'
2175
+
2176
+ - regex: 'VelenPublicWebCrawler'
2177
+ name: 'Velen Public Web Crawler'
2178
+ category: 'Crawler'
2179
+ url: 'https://hunter.io/robot'
2180
+
2181
+ - regex: 'Barkrowler'
2182
+ name: 'Barkrowler'
2183
+ category: 'Crawler'
2184
+ url: 'http://www.exensa.com/crawl'
2185
+
2186
+ - regex: 'BDCbot'
2187
+ name: 'BDCbot'
2188
+ category: 'Crawler'
2189
+ url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2190
+ producer:
2191
+ name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2192
+ url: 'https://bigdatacorp.com.br/'
2193
+
2194
+ - regex: 'adbeat'
2195
+ name: 'Adbeat'
2196
+ category: 'Crawler'
2197
+ url: 'https://www.adbeat.com/operation_policy'
2198
+ producer:
2199
+ name: 'PPC Labs LLC'
2200
+ url: 'https://www.adbeat.com/'
2201
+
2202
+ - regex: 'BW/(?:(\d+[\.\d]+))'
2203
+ name: 'BuiltWith'
2204
+ category: 'Crawler'
2205
+ url: 'https://builtwith.com/biup'
2206
+ producer:
2207
+ name: 'BuiltWith Pty Ltd'
2208
+ url: 'https://builtwith.com/'
2209
+
2210
+ - regex: 'https://whatis.contentkingapp.com'
2211
+ name: 'ContentKing'
2212
+ category: 'Site Monitor'
2213
+ url: 'https://whatis.contentkingapp.com/'
2214
+ producer:
2215
+ name: 'ContentKing BV'
2216
+ url: 'https://www.contentkingapp.com/'
2217
+
2218
+ - regex: 'MicroAdBot'
2219
+ name: 'MicroAdBot'
2220
+ category: 'Crawler'
2221
+ url: 'https://www.microad.co.jp/'
2222
+ producer:
2223
+ name: 'MicroAd, Inc.'
2224
+ url: 'https://www.microad.co.jp/'
2225
+
2226
+ - regex: 'PingAdmin.Ru'
2227
+ name: 'PingAdmin.Ru'
2228
+ category: 'Site Monitor'
2229
+ url: 'https://ping-admin.ru/'
2230
+
2231
+ - regex: 'notifyninja.+monitoring'
2232
+ name: 'Notify Ninja'
2233
+ category: 'Site Monitor'
2234
+ url: 'http://notifyninja.com'
2235
+
2236
+ - regex: 'WebDataStats'
2237
+ name: 'WebDataStats'
2238
+ category: 'Crawler'
2239
+ url: 'https://webdatastats.com/policy.html'
2240
+ producer:
2241
+ name: 'WebTehRazrabotka LLC'
2242
+ url: 'https://webdatastats.com/'
2243
+
2244
+ - regex: 'parse.ly scraper'
2245
+ name: 'parse.ly'
2246
+ category: 'Crawler'
2247
+ url: 'https://www.parse.ly/help/integration/crawler'
2248
+ producer:
2249
+ name: 'Parsely, Inc.'
2250
+ url: 'https://www.parse.ly/'
2251
+
2252
+ - regex: 'Nimbostratus-Bot'
2253
+ name: 'Nimbostratus Bot'
2254
+ category: 'Site Monitor'
2255
+ url: 'http://cloudsystemnetworks.com'
2052
2256
 
2053
- - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2257
+ - regex: 'HeartRails_Capture/\d'
2258
+ name: 'Heart Rails Capture'
2259
+ category: 'Service Agent'
2260
+ url: 'http://capture.heartrails.com'
2261
+
2262
+ - regex: 'Project-Resonance'
2263
+ name: 'Project Resonance'
2264
+ category: 'Crawler'
2265
+ url: 'http://project-resonance.com'
2266
+
2267
+ - regex: 'DataXu/\d'
2268
+ name: 'DataXu'
2269
+ category: 'Service Agent'
2270
+ url: 'https://advertising.roku.com/dataxu'
2271
+ producer:
2272
+ name: 'Roku, Inc.'
2273
+ url: 'https://roku.com'
2274
+
2275
+ - regex: 'Cocolyzebot'
2276
+ name: 'Cocolyzebot'
2277
+ category: 'Crawler'
2278
+ url: 'https://cocolyze.com/en/cocolyzebot'
2279
+ producer:
2280
+ name: 'VSI INNOVATION SAS'
2281
+ url: 'https://vsi-innovation.com/'
2282
+
2283
+ - regex: 'veryhip'
2284
+ name: 'VeryHip'
2285
+ category: 'Crawler'
2286
+ url: 'https://veryhip.com/'
2287
+ producer:
2288
+ name: 'VeryHip'
2289
+ url: 'https://veryhip.com/'
2290
+
2291
+ - regex: 'LinkpadBot'
2292
+ name: 'LinkpadBot'
2293
+ category: 'Crawler'
2294
+ url: 'https://www.linkpad.org/'
2295
+ producer:
2296
+ name: 'Solomono LLC'
2297
+ url: 'https://www.linkpad.org/'
2298
+
2299
+ - regex: 'MuscatFerret'
2300
+ name: 'MuscatFerret'
2301
+ category: 'Crawler'
2302
+ url: 'http://www.webtop.com/'
2303
+
2304
+ - regex: 'PageThing.com'
2305
+ name: 'PageThing'
2306
+ category: 'Crawler'
2307
+ url: 'https://www.pagething.com/'
2308
+ producer:
2309
+ name: 'SPECIALNOISE LTD'
2310
+ url: 'https://www.specialnoise.com/'
2311
+
2312
+ - regex: 'ArchiveBox'
2313
+ name: 'ArchiveBox'
2314
+ url: 'https://archivebox.io/'
2315
+ category: 'Crawler'
2316
+ producer:
2317
+ name: ''
2318
+ url: ''
2319
+
2320
+ - regex: 'Choosito'
2321
+ name: 'Choosito'
2322
+ url: 'https://www.choosito.com/'
2323
+ category: 'Crawler'
2324
+ producer:
2325
+ name: 'Choosito! Inc.'
2326
+ url: 'https://www.choosito.com/'
2327
+
2328
+ - regex: 'datagnionbot'
2329
+ name: 'datagnionbot'
2330
+ url: 'https://www.datagnion.com/bot.html'
2331
+ category: 'Crawler'
2332
+ producer:
2333
+ name: 'DATAGNION GMBH'
2334
+ url: 'https://www.datagnion.com/'
2335
+
2336
+ - regex: 'WhatCMS'
2337
+ name: 'WhatCMS'
2338
+ url: 'https://whatcms.org/'
2339
+ category: 'Crawler'
2340
+ producer:
2341
+ name: 'Nineteen Ten LLC'
2342
+ url: 'https://whatcms.org/'
2343
+
2344
+ - regex: 'httpx'
2345
+ name: 'httpx'
2346
+ url: 'https://github.com/projectdiscovery/httpx'
2347
+ category: 'Crawler'
2348
+ producer:
2349
+ name: ''
2350
+ url: ''
2351
+
2352
+ - regex: 'scaninfo@expanseinc.com'
2353
+ name: 'Expanse'
2354
+ category: 'Security Checker'
2355
+ url: 'https://expanse.co/'
2356
+ producer:
2357
+ name: 'Expanse Inc.'
2358
+ url: 'https://expanse.co/'
2359
+
2360
+ - regex: 'HuaweiWebCatBot'
2361
+ name: 'HuaweiWebCatBot'
2362
+ category: 'Crawler'
2363
+ url: 'https://isecurity.huawei.com'
2364
+ producer:
2365
+ name: 'Huawei Technologies Co., Ltd.'
2366
+ url: 'https://huawei.com'
2367
+
2368
+ - regex: 'Hatena-Favicon'
2369
+ name: 'Hatena Favicon'
2370
+ category: 'Crawler'
2371
+ url: 'https://www.hatena.ne.jp/faq/'
2372
+ producer:
2373
+ name: 'Hatena Co., Ltd.'
2374
+ url: 'https://www.hatena.ne.jp'
2375
+
2376
+ - regex: 'RyowlEngine/(\d+)'
2377
+ name: 'Ryowl'
2378
+ category: 'Crawler'
2379
+ url: 'https://ryowl.org'
2380
+
2381
+ - regex: 'OdklBot/(\d+)'
2382
+ name: 'Odnoklassniki Bot'
2383
+ category: 'Crawler'
2384
+ url: 'https://odnoklassniki.ru'
2385
+
2386
+ - regex: 'Mediatoolkitbot'
2387
+ name: 'Mediatoolkit Bot'
2388
+ category: 'Crawler'
2389
+ url: 'https://mediatoolkit.com'
2390
+
2391
+ - regex: 'ZoominfoBot'
2392
+ name: 'ZoominfoBot'
2393
+ category: 'Crawler'
2394
+ url: 'https://www.zoominfo.com'
2395
+
2396
+ - regex: 'WeViKaBot/([\d+\.])'
2397
+ name: 'WeViKaBot'
2398
+ category: 'Crawler'
2399
+ url: 'http://www.wevika.de'
2400
+
2401
+ - regex: 'SEOkicks'
2402
+ name: 'SEOkicks'
2403
+ category: 'Crawler'
2404
+ url: 'https://www.seokicks.de/robot.html'
2405
+
2406
+ - regex: 'Plukkie/([\d+\.])'
2407
+ name: 'Plukkie'
2408
+ category: 'Crawler'
2409
+ url: 'http://www.botje.com/plukkie.htm'
2410
+
2411
+ - regex: 'proximic;'
2412
+ name: 'Comscore'
2413
+ category: 'Crawler'
2414
+ url: 'https://www.comscore.com/Web-Crawler'
2415
+
2416
+ - regex: 'SurdotlyBot/([\d+\.])'
2417
+ name: 'SurdotlyBot'
2418
+ category: 'Crawler'
2419
+ url: 'http://sur.ly/bot.html'
2420
+
2421
+ - regex: 'Gowikibot/([\d+\.])'
2422
+ name: 'Gowikibot'
2423
+ category: 'Crawler'
2424
+ url: 'http:/www.gowikibot.com'
2425
+
2426
+ - regex: 'SabsimBot/([\d+\.])'
2427
+ name: 'SabsimBot'
2428
+ category: 'Crawler'
2429
+ url: 'https://sabsim.com'
2430
+
2431
+ - regex: 'LumtelBot/([\d+\.])'
2432
+ name: 'LumtelBot'
2433
+ category: 'Crawler'
2434
+ url: 'https://umtel.com'
2435
+
2436
+ - regex: 'PiplBot'
2437
+ name: 'PiplBot'
2438
+ category: 'Crawler'
2439
+ url: 'http://www.pipl.com/bot'
2440
+
2441
+ - regex: 'woobot/([\d+\.])'
2442
+ name: 'WooRank'
2443
+ category: 'Crawler'
2444
+ url: 'https://www.woorank.com/bot'
2445
+
2446
+ - regex: 'Cookiebot/([\d+\.])'
2447
+ name: 'Cookiebot'
2448
+ category: 'Crawler'
2449
+ url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2450
+ producer:
2451
+ name: 'Cybot A/S'
2452
+ url: 'https://www.cybot.com/'
2453
+
2454
+ - regex: 'NetSystemsResearch'
2455
+ name: 'NetSystemsResearch'
2456
+ category: 'Security Checker'
2457
+ url: 'https://www.netsystemsresearch.com/'
2458
+ producer:
2459
+ name: 'NET SYSTEMS RESEARCH LLC'
2460
+ url: 'https://www.netsystemsresearch.com/'
2461
+
2462
+ - regex: 'CensysInspect/([\d+\.])'
2463
+ name: 'CensysInspect'
2464
+ category: 'Security Checker'
2465
+ url: 'https://about.censys.io/'
2466
+ producer:
2467
+ name: 'Censys, Inc.'
2468
+ url: 'https://censys.io/'
2469
+
2470
+ - regex: 'gdnplus.com'
2471
+ name: 'GDNP'
2472
+ category: 'Crawler'
2473
+ url: 'https://gdnplus.com/'
2474
+ producer:
2475
+ name: 'Global Digital Network Plus, LLC'
2476
+ url: 'https://gdnplus.com/'
2477
+
2478
+ - regex: 'WellKnownBot/([\d+\.])'
2479
+ name: 'WellKnownBot'
2480
+ category: 'Crawler'
2481
+ url: 'https://well-known.dev'
2482
+
2483
+ - regex: 'Adsbot/([\d+\.])'
2484
+ name: 'Adsbot'
2485
+ category: 'Crawler'
2486
+ url: 'https://seostar.co/robot/'
2487
+
2488
+ - regex: 'MTRobot/([\d+\.])'
2489
+ name: 'MTRobot'
2490
+ category: 'Crawler'
2491
+ url: 'https://metrics-tools.de/robot.html'
2492
+ producer:
2493
+ name: 'Metrics Tools'
2494
+ url: 'https://metrics-tools.de/'
2495
+
2496
+ - regex: 'serpstatbot/([\d+\.])'
2497
+ name: 'serpstatbot'
2498
+ category: 'Crawler'
2499
+ url: 'http://serpstatbot.com/'
2500
+ producer:
2501
+ name: 'Netpeak Ltd'
2502
+ url: 'https://netpeak.net/'
2503
+
2504
+ - regex: 'colly'
2505
+ name: 'colly'
2506
+ category: 'Crawler'
2507
+ url: 'https://github.com/gocolly/colly/'
2508
+
2509
+ - regex: 'l9tcpid/v([\d+\.])'
2510
+ name: 'l9tcpid'
2511
+ category: 'Security Checker'
2512
+ url: 'https://github.com/LeakIX/l9tcpid'
2513
+
2514
+ - regex: 'MegaIndex.ru/([\d+\.])'
2515
+ name: 'MegaIndex'
2516
+ category: 'Crawler'
2517
+ url: 'https://megaindex.com/crawler'
2518
+
2519
+ - regex: 'Seekport'
2520
+ name: 'Seekport'
2521
+ category: 'Crawler'
2522
+ url: 'http://www.seekport.com/'
2523
+ producer:
2524
+ name: 'SISTRIX GmbH'
2525
+ url: 'https://www.sistrix.de/'
2526
+
2527
+ - regex: 'seolyt/([\d+\.])'
2528
+ name: 'seolyt'
2529
+ category: 'Crawler'
2530
+ url: 'https://seolyt.com/'
2531
+
2532
+ - regex: 'YaK/([\d+\.])'
2533
+ name: 'YaK'
2534
+ category: 'Crawler'
2535
+ url: 'https://www.linkfluence.com/'
2536
+ producer:
2537
+ name: 'Linkfluence SAS'
2538
+ url: 'https://www.linkfluence.com/'
2539
+
2540
+ - regex: 'KomodiaBot/([\d+\.])'
2541
+ name: 'KomodiaBot'
2542
+ category: 'Crawler'
2543
+ url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2544
+ producer:
2545
+ name: 'Komodia Inc.'
2546
+ url: 'https://www.komodia.com/'
2547
+
2548
+ - regex: 'Neevabot/([\d+\.])'
2549
+ name: 'Neevabot'
2550
+ category: 'Search bot'
2551
+ url: 'https://neeva.com/neevabot'
2552
+ producer:
2553
+ name: 'Neeva Inc.'
2554
+ url: 'https://neeva.com/'
2555
+
2556
+ - regex: 'LinkPreview/([\d+\.])'
2557
+ name: 'LinkPreview'
2558
+ category: 'Service Agent'
2559
+ url: 'https://www.linkpreview.net/'
2560
+
2561
+ - regex: 'JungleKeyThumbnail/([\d+\.])'
2562
+ name: 'JungleKeyThumbnail'
2563
+ category: 'Crawler'
2564
+ url: 'https://junglekey.com/'
2565
+
2566
+ - regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2567
+ name: 'RocketMonitorBot'
2568
+ category: 'Site Monitor'
2569
+ url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
2570
+ producer:
2571
+ name: 'Radio Mast, Inc.'
2572
+ url: 'https://www.radiomast.io/'
2573
+
2574
+ - regex: 'SitemapParser-VIPnytt/([\d+\.])'
2575
+ name: 'SitemapParser-VIPnytt'
2576
+ category: 'Crawler'
2577
+ url: 'https://github.com/VIPnytt/SitemapParser/'
2578
+
2579
+ - regex: '^Turnitin'
2580
+ name: 'Turnitin'
2581
+ category: 'Crawler'
2582
+ url: 'https://turnitin.com/robot/crawlerinfo.html'
2583
+
2584
+ - regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2585
+ name: 'Dotcom Monitor'
2586
+ category: 'Site Monitor'
2587
+ url: 'https://www.dotcom-monitor.com'
2588
+
2589
+ - regex: 'ThinkChaos/'
2590
+ name: 'ThinkChaos'
2591
+ category: 'Crawler'
2592
+
2593
+ - regex: 'DataForSeoBot'
2594
+ name: 'DataForSeoBot'
2595
+ category: 'Crawler'
2596
+ url: 'https://dataforseo.com/dataforseo-bot'
2597
+
2598
+ - regex: 'Discordbot/([\d+.]+)'
2599
+ name: 'Discord Bot'
2600
+ category: 'Service Agent'
2601
+ url: 'https://discordapp.com'
2602
+
2603
+ - regex: 'Linespider/([\d+.]+)'
2604
+ name: 'Linespider'
2605
+ category: 'Crawler'
2606
+ url: 'https://lin.ee/4dwXkTH'
2607
+
2608
+ - regex: 'Cincraw/([\d+.]+)'
2609
+ name: 'Cincraw'
2610
+ category: 'Crawler'
2611
+ url: 'http://cincrawdata.net/bot/'
2612
+
2613
+ - regex: 'CISPA Web Analyzer'
2614
+ name: 'CISPA Web Analyzer'
2615
+ category: 'Crawler'
2616
+ url: 'https://notify.cispa.de/'
2617
+ producer:
2618
+ name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
2619
+ url: 'https://cispa.de/en'
2620
+
2621
+ - regex: 'IonCrawl'
2622
+ name: 'IONOS Crawler'
2623
+ category: 'Crawler'
2624
+ url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
2625
+ producer:
2626
+ name: 'IONOS SE'
2627
+ url: 'https://www.ionos.de/'
2628
+
2629
+ - regex: 'Crawldad'
2630
+ name: 'Crawldad'
2631
+ category: 'Crawler'
2632
+ url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
2633
+
2634
+ - regex: 'https://securitytxt-scan.cs.hm.edu/'
2635
+ name: 'security.txt scanserver'
2636
+ category: 'Security Checker'
2637
+ url: 'https://securitytxt-scan.cs.hm.edu/'
2638
+ producer:
2639
+ name: 'Hochschule für angewandte Wissenschaften München'
2640
+ url: 'https://www.hm.edu/'
2641
+
2642
+ # Generic detections
2643
+ - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2054
2644
  name: 'Generic Bot'