device_detector 1.0.3 → 1.0.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (94) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +15 -0
  3. data/README.md +8 -7
  4. data/lib/device_detector/bot.rb +2 -2
  5. data/lib/device_detector/browser.rb +364 -0
  6. data/lib/device_detector/client.rb +11 -2
  7. data/lib/device_detector/device.rb +1247 -22
  8. data/lib/device_detector/memory_cache.rb +5 -5
  9. data/lib/device_detector/metadata_extractor.rb +7 -8
  10. data/lib/device_detector/model_extractor.rb +3 -3
  11. data/lib/device_detector/name_extractor.rb +2 -2
  12. data/lib/device_detector/os.rb +150 -116
  13. data/lib/device_detector/parser.rb +23 -10
  14. data/lib/device_detector/version.rb +1 -1
  15. data/lib/device_detector/version_extractor.rb +29 -2
  16. data/lib/device_detector.rb +73 -40
  17. data/regexes/bots.yml +868 -62
  18. data/regexes/client/browser_engine.yml +11 -2
  19. data/regexes/client/browsers.yml +1132 -112
  20. data/regexes/client/feed_readers.yml +5 -11
  21. data/regexes/client/libraries.yml +86 -2
  22. data/regexes/client/mediaplayers.yml +39 -3
  23. data/regexes/client/mobile_apps.yml +940 -66
  24. data/regexes/client/pim.yml +66 -3
  25. data/regexes/device/cameras.yml +6 -6
  26. data/regexes/device/car_browsers.yml +23 -3
  27. data/regexes/device/consoles.yml +15 -3
  28. data/regexes/device/mobiles.yml +18351 -3566
  29. data/regexes/device/notebooks.yml +114 -0
  30. data/regexes/device/portable_media_player.yml +36 -9
  31. data/regexes/device/shell_tv.yml +117 -0
  32. data/regexes/device/televisions.yml +440 -35
  33. data/regexes/oss.yml +635 -284
  34. data/regexes/vendorfragments.yml +5 -1
  35. metadata +21 -118
  36. data/.gitignore +0 -14
  37. data/.travis.yml +0 -14
  38. data/Gemfile +0 -8
  39. data/Rakefile +0 -96
  40. data/device_detector.gemspec +0 -26
  41. data/spec/device_detector/bot_fixtures_spec.rb +0 -30
  42. data/spec/device_detector/client_fixtures_spec.rb +0 -31
  43. data/spec/device_detector/concrete_user_agent_spec.rb +0 -135
  44. data/spec/device_detector/detector_fixtures_spec.rb +0 -100
  45. data/spec/device_detector/device_fixtures_spec.rb +0 -36
  46. data/spec/device_detector/device_spec.rb +0 -151
  47. data/spec/device_detector/memory_cache_spec.rb +0 -148
  48. data/spec/device_detector/model_extractor_spec.rb +0 -63
  49. data/spec/device_detector/os_fixtures_spec.rb +0 -26
  50. data/spec/device_detector/version_extractor_spec.rb +0 -79
  51. data/spec/device_detector_spec.rb +0 -189
  52. data/spec/fixtures/client/browser.yml +0 -2206
  53. data/spec/fixtures/client/feed_reader.yml +0 -199
  54. data/spec/fixtures/client/library.yml +0 -175
  55. data/spec/fixtures/client/mediaplayer.yml +0 -163
  56. data/spec/fixtures/client/mobile_app.yml +0 -193
  57. data/spec/fixtures/client/pim.yml +0 -115
  58. data/spec/fixtures/detector/bots.yml +0 -3260
  59. data/spec/fixtures/detector/camera.yml +0 -121
  60. data/spec/fixtures/detector/car_browser.yml +0 -21
  61. data/spec/fixtures/detector/console.yml +0 -281
  62. data/spec/fixtures/detector/desktop.yml +0 -5361
  63. data/spec/fixtures/detector/feature_phone.yml +0 -891
  64. data/spec/fixtures/detector/feed_reader.yml +0 -551
  65. data/spec/fixtures/detector/mediaplayer.yml +0 -210
  66. data/spec/fixtures/detector/mobile_apps.yml +0 -456
  67. data/spec/fixtures/detector/phablet.yml +0 -3785
  68. data/spec/fixtures/detector/portable_media_player.yml +0 -178
  69. data/spec/fixtures/detector/smart_display.yml +0 -61
  70. data/spec/fixtures/detector/smartphone-1.yml +0 -9953
  71. data/spec/fixtures/detector/smartphone-10.yml +0 -9924
  72. data/spec/fixtures/detector/smartphone-11.yml +0 -9889
  73. data/spec/fixtures/detector/smartphone-12.yml +0 -8655
  74. data/spec/fixtures/detector/smartphone-2.yml +0 -9967
  75. data/spec/fixtures/detector/smartphone-3.yml +0 -9887
  76. data/spec/fixtures/detector/smartphone-4.yml +0 -9911
  77. data/spec/fixtures/detector/smartphone-5.yml +0 -9933
  78. data/spec/fixtures/detector/smartphone-6.yml +0 -9923
  79. data/spec/fixtures/detector/smartphone-7.yml +0 -9892
  80. data/spec/fixtures/detector/smartphone-8.yml +0 -9896
  81. data/spec/fixtures/detector/smartphone-9.yml +0 -9928
  82. data/spec/fixtures/detector/smartphone.yml +0 -9984
  83. data/spec/fixtures/detector/tablet-1.yml +0 -10023
  84. data/spec/fixtures/detector/tablet-2.yml +0 -9968
  85. data/spec/fixtures/detector/tablet-3.yml +0 -7787
  86. data/spec/fixtures/detector/tablet.yml +0 -9951
  87. data/spec/fixtures/detector/tv.yml +0 -3333
  88. data/spec/fixtures/detector/unknown.yml +0 -3283
  89. data/spec/fixtures/device/camera.yml +0 -19
  90. data/spec/fixtures/device/car_browser.yml +0 -7
  91. data/spec/fixtures/device/console.yml +0 -79
  92. data/spec/fixtures/parser/oss.yml +0 -1047
  93. data/spec/fixtures/parser/vendorfragments.yml +0 -162
  94. data/spec/spec_helper.rb +0 -9
data/regexes/bots.yml CHANGED
@@ -1,14 +1,14 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
- - regex: '360Spider(-Image|-Video)?'
8
+ - regex: '360Spider'
9
9
  name: '360Spider'
10
10
  category: 'Search bot'
11
- url: 'http://www.so.com/help/help_3_2.html'
11
+ url: 'https://www.so.com/help/help_3_2.html'
12
12
  producer:
13
13
  name: 'Online Media Group, Inc.'
14
14
  url: ''
@@ -40,26 +40,34 @@
40
40
  - regex: 'AhrefsBot'
41
41
  name: 'aHrefs Bot'
42
42
  category: 'Crawler'
43
- url: 'http://ahrefs.com/robot'
43
+ url: 'https://ahrefs.com/robot'
44
44
  producer:
45
45
  name: 'Ahrefs Pte Ltd'
46
- url: 'http://ahrefs.com/robot'
46
+ url: 'https://ahrefs.com/robot'
47
47
 
48
48
  - regex: 'ia_archiver|alexabot|verifybot'
49
49
  name: 'Alexa Crawler'
50
50
  category: 'Search bot'
51
- url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
51
+ url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
52
52
  producer:
53
53
  name: 'Alexa Internet'
54
- url: 'http://www.alexa.com'
54
+ url: 'https://www.alexa.com'
55
55
 
56
56
  - regex: 'alexa site audit'
57
57
  name: 'Alexa Site Audit'
58
58
  category: 'Site Monitor'
59
- url: 'http://www.alexa.com/help/webmasters'
59
+ url: 'https://support.alexa.com/hc/en-us/articles/200450194'
60
60
  producer:
61
61
  name: 'Alexa Internet'
62
- url: 'http://www.alexa.com'
62
+ url: 'https://www.alexa.com'
63
+
64
+ - regex: 'Amazonbot'
65
+ name: 'Amazon Bot'
66
+ category: 'Crawler'
67
+ url: 'https://developer.amazon.com/support/amazonbot'
68
+ producer:
69
+ name: 'Amazon.com, Inc.'
70
+ url: 'https://www.amazon.com/'
63
71
 
64
72
  - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
65
73
  name: 'Amazon Route53 Health Check'
@@ -82,29 +90,45 @@
82
90
  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
83
91
  producer:
84
92
  name: 'The Apache Software Foundation'
85
- url: 'http://www.apache.org/foundation/'
93
+ url: 'https://www.apache.org/foundation/'
86
94
 
87
95
  - regex: 'Applebot'
88
96
  name: 'Applebot'
89
97
  category: 'Crawler'
90
- url: 'http://www.apple.com/go/applebot'
98
+ url: 'https://support.apple.com/en-us/HT204683'
91
99
  producer:
92
100
  name: 'Apple Inc'
93
- url: 'http://www.apple.com'
101
+ url: 'https://www.apple.com'
102
+
103
+ - regex: "AppSignalBot"
104
+ name: "AppSignalBot"
105
+ category: "Site Monitor"
106
+ url: "https://docs.appsignal.com/uptime-monitoring/"
107
+ producer:
108
+ name: "AppSignal"
109
+ url: "https://appsignal.com/"
94
110
 
95
111
  - regex: 'Arachni'
96
112
  name: 'Arachni'
97
113
  category: 'Security Checker'
98
- url: 'http://www.arachni-scanner.com'
114
+ url: 'https://www.arachni-scanner.com/'
99
115
  producer:
100
116
  name: 'Sarosys LLC'
101
- url: 'http://www.sarosys.com/'
117
+ url: 'https://www.sarosys.com/'
118
+
119
+ - regex: 'AspiegelBot'
120
+ name: 'AspiegelBot'
121
+ category: 'Crawler'
122
+ url: 'https://aspiegel.com/'
123
+ producer:
124
+ name: 'Huawei'
125
+ url: 'https://www.huawei.com/'
102
126
 
103
127
  - regex: 'Castro 2, Episode Duration Lookup'
104
128
  name: 'Castro 2'
105
129
  category: 'Service Agent'
106
130
  url: 'http://supertop.co/castro/'
107
- producer:
131
+ producer:
108
132
  name: 'Supertop'
109
133
  url: 'http://supertop.co'
110
134
 
@@ -119,10 +143,10 @@
119
143
  - regex: 'archive\.org_bot|special_archiver'
120
144
  name: 'archive.org bot'
121
145
  category: 'Crawler'
122
- url: 'http://www.archive.org/details/archive.org_bot'
146
+ url: 'https://archive.org/details/archive.org_bot'
123
147
  producer:
124
148
  name: 'The Internet Archive'
125
- url: 'http://www.archive.org'
149
+ url: 'https://archive.org'
126
150
 
127
151
  - regex: 'Ask Jeeves/Teoma'
128
152
  name: 'Ask Jeeves'
@@ -148,7 +172,7 @@
148
172
  name: '2.0Promotion GbR'
149
173
  url: 'http://www.backlinktest.com'
150
174
 
151
- - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
175
+ - regex: 'Baidu.*spider|baidu Transcoder'
152
176
  name: 'Baidu Spider'
153
177
  category: 'Search bot'
154
178
  url: 'http://www.baidu.com/search/spider.htm'
@@ -164,6 +188,14 @@
164
188
  name: ''
165
189
  url: ''
166
190
 
191
+ - regex: 'Better Uptime Bot'
192
+ name: 'Better Uptime Bot'
193
+ category: 'Site Monitor'
194
+ url: 'https://betteruptime.com/faq'
195
+ producer:
196
+ name: 'Better Uptime'
197
+ url: 'https://betteruptime.com/'
198
+
167
199
  - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
168
200
  name: 'BingBot'
169
201
  category: 'Search bot'
@@ -180,7 +212,7 @@
180
212
  name: 'Blekko'
181
213
  url: 'http://blekko.com'
182
214
 
183
- - regex: 'BLEXBot(Test)?'
215
+ - regex: 'BLEXBot'
184
216
  name: 'BLEXBot Crawler'
185
217
  category: 'Crawler'
186
218
  url: 'http://webmeup-crawler.com'
@@ -209,7 +241,7 @@
209
241
  category: 'Crawler'
210
242
  producer:
211
243
  name: 'BoardReader'
212
- url: 'http://boardreader.com/'
244
+ url: 'https://boardreader.com/'
213
245
 
214
246
  - regex: 'BountiiBot'
215
247
  name: 'Bountii Bot'
@@ -275,6 +307,14 @@
275
307
  name: 'CloudFlare'
276
308
  url: 'http://www.cloudflare.com'
277
309
 
310
+ - regex: 'CloudflareDiagnostics'
311
+ name: 'Cloudflare Diagnostics'
312
+ category: 'Site Monitor'
313
+ url: 'https://www.cloudflare.com/'
314
+ producer:
315
+ name: 'Cloudflare'
316
+ url: 'https://www.cloudflare.com'
317
+
278
318
  - regex: 'CloudFlare-AlwaysOnline'
279
319
  name: 'CloudFlare Always Online'
280
320
  category: 'Site Monitor'
@@ -283,13 +323,13 @@
283
323
  name: 'CloudFlare'
284
324
  url: 'http://www.cloudflare.com'
285
325
 
286
- - regex: 'coccoc/'
326
+ - regex: 'coccoc.com'
287
327
  name: 'Cốc Cốc Bot'
288
- url: 'http://help.coccoc.com/'
328
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
289
329
  category: 'Search bot'
290
330
  producer:
291
331
  name: 'Cốc Cốc'
292
- url: 'http://coccoc.com/'
332
+ url: 'https://coccoc.com/'
293
333
 
294
334
  - regex: 'collectd'
295
335
  name: 'Collectd'
@@ -331,7 +371,6 @@
331
371
  name: 'Datanyze'
332
372
  url: 'https://www.datanyze.com'
333
373
 
334
-
335
374
  - regex: 'Dataprovider'
336
375
  name: 'Dataprovider'
337
376
  category: 'Crawler'
@@ -356,7 +395,7 @@
356
395
  name: 'DAZOO.FR'
357
396
  url: 'http://dazoo.fr'
358
397
 
359
- - regex: 'discobot(-news)?'
398
+ - regex: 'discobot'
360
399
  name: 'Discobot'
361
400
  category: 'Search bot'
362
401
  url: 'http://discoveryengine.com/discobot.html'
@@ -419,7 +458,7 @@
419
458
  name: 'eVenture Capital Partners II, LLC'
420
459
  url: 'http://www.eventures.vc/'
421
460
 
422
- - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
461
+ - regex: 'Exabot|ExaleadCloudview'
423
462
  name: 'ExaBot'
424
463
  category: 'Crawler'
425
464
  url: 'http://www.exabot.com/go/robot'
@@ -443,7 +482,7 @@
443
482
  name: 'SEOmoz, Inc.'
444
483
  url: 'http://moz.com/'
445
484
 
446
- - regex: 'facebookexternalhit|facebookplatform'
485
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
447
486
  name: 'Facebook External Hit'
448
487
  category: 'Social Media Agent'
449
488
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -475,7 +514,7 @@
475
514
  name: 'David Smith & Developing Perspective, LLC'
476
515
  url: 'https://david-smith.org'
477
516
 
478
- - regex: '(Meta)?Feedly(Bot|App)?'
517
+ - regex: 'Feedly'
479
518
  name: 'Feedly'
480
519
  url: 'http://www.feedly.com'
481
520
  category: 'Feed Fetcher'
@@ -541,6 +580,10 @@
541
580
  name: ''
542
581
  url: ''
543
582
 
583
+ - regex: 'gobuster'
584
+ name: 'Gobuster'
585
+ url: 'https://github.com/OJ/gobuster'
586
+
544
587
  - regex: 'ichiro/mobile goo'
545
588
  name: 'Goo'
546
589
  category: 'Search bot'
@@ -549,6 +592,10 @@
549
592
  name: 'NTT Resonant'
550
593
  url: 'http://goo.ne.jp'
551
594
 
595
+ - regex: 'Storebot-Google'
596
+ name: 'Google StoreBot'
597
+ category: 'Crawler'
598
+
552
599
  - regex: 'Google Favicon'
553
600
  name: 'Google Favicon'
554
601
  category: 'Crawler'
@@ -577,6 +624,14 @@
577
624
  name: 'Google Inc.'
578
625
  url: 'http://www.google.com'
579
626
 
627
+ - regex: 'Google-Cloud-Scheduler'
628
+ name: 'Google Cloud Scheduler'
629
+ category: 'Crawler'
630
+ url: 'https://cloud.google.com/scheduler'
631
+ producer:
632
+ name: 'Google Inc.'
633
+ url: 'https://www.google.com'
634
+
580
635
  - regex: 'Google-Structured-Data-Testing-Tool'
581
636
  name: 'Google Structured Data Testing Tool'
582
637
  category: 'Validator'
@@ -585,6 +640,14 @@
585
640
  name: 'Google Inc.'
586
641
  url: 'http://www.google.com'
587
642
 
643
+ - regex: 'GoogleStackdriverMonitoring'
644
+ name: 'Google Stackdriver Monitoring'
645
+ category: 'Site Monitor'
646
+ url: 'https://cloud.google.com/monitoring'
647
+ producer:
648
+ name: 'Google Inc.'
649
+ url: 'https://www.google.com'
650
+
588
651
  - regex: 'via ggpht\.com GoogleImageProxy'
589
652
  name: 'Gmail Image Proxy'
590
653
  category: 'Crawler'
@@ -592,7 +655,7 @@
592
655
  producer:
593
656
  name: 'Google Inc.'
594
657
  url: 'http://www.google.com'
595
-
658
+
596
659
  - regex: 'SeznamEmailProxy'
597
660
  name: 'Seznam Email Proxy'
598
661
  category: 'Crawler'
@@ -625,7 +688,7 @@
625
688
  name: 'Visual Meta'
626
689
  url: 'https://www.shopalike.cz/'
627
690
 
628
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Google-Adwords-Instant|APIs-Google|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin|Google-Shopping-Quality|Google-Adwords-DisplayAds|Google-Assess|Google-AdWords-Express|Google-speakr|Google-Read-Aloud'
691
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
629
692
  name: 'Googlebot'
630
693
  category: 'Search bot'
631
694
  url: 'http://www.google.com/bot.html'
@@ -639,7 +702,7 @@
639
702
  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
640
703
  producer:
641
704
  name: 'The Internet Archive'
642
- url: 'http://www.archive.org'
705
+ url: 'https://archive.org'
643
706
 
644
707
  - regex: 'HubSpot '
645
708
  name: 'HubSpot'
@@ -648,7 +711,6 @@
648
711
  name: 'HubSpot Inc.'
649
712
  url: 'https://www.hubspot.com'
650
713
 
651
-
652
714
  - regex: 'HTTPMon'
653
715
  name: 'HTTPMon'
654
716
  category: 'Site Monitor'
@@ -680,7 +742,7 @@
680
742
 
681
743
  - regex: 'ips-agent'
682
744
  name: 'IPS Agent'
683
- category: 'crawler'
745
+ category: 'Crawler'
684
746
  producer:
685
747
  name: 'VeriSign, Inc'
686
748
  url: 'http://www.verisign.com/'
@@ -693,6 +755,10 @@
693
755
  name: ''
694
756
  url: 'https://ip-guide.com'
695
757
 
758
+ - regex: 'k6/[0-9\.]+'
759
+ name: 'K6'
760
+ url: 'https://k6.io/'
761
+
696
762
  - regex: 'kouio'
697
763
  name: 'Kouio'
698
764
  url: 'http://kouio.com/'
@@ -717,7 +783,7 @@
717
783
  name: 'Lighthouse'
718
784
  url: 'https://developers.google.com/web/tools/lighthouse'
719
785
 
720
- - regex: 'linkdexbot(-mobile)?|linkdex\.com'
786
+ - regex: 'linkdexbot|linkdex\.com'
721
787
  name: 'Linkdex Bot'
722
788
  category: 'Search bot'
723
789
  url: 'http://www.linkdex.com/bots'
@@ -740,7 +806,7 @@
740
806
  name: ''
741
807
  url: ''
742
808
 
743
- - regex: 'Mail\.RU(_Bot)?'
809
+ - regex: 'Mail\.RU'
744
810
  name: 'Mail.Ru Bot'
745
811
  category: 'Search bot'
746
812
  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
@@ -764,7 +830,7 @@
764
830
  name: ''
765
831
  url: ''
766
832
 
767
- - regex : 'masscan'
833
+ - regex: 'masscan'
768
834
  name: 'masscan'
769
835
  url: 'https://github.com/robertdavidgraham/masscan'
770
836
  category: 'Crawler'
@@ -917,12 +983,12 @@
917
983
  category: 'Crawler'
918
984
  producer:
919
985
  name: 'Nuzzel'
920
- url: https://www.nuzzel.com/
986
+ url: 'https://www.nuzzel.com/'
921
987
 
922
988
  - regex: 'Octopus [0-9]'
923
989
  name: 'Octopus'
924
990
 
925
- - regex: 'omgili(?:bot)?'
991
+ - regex: 'omgili'
926
992
  name: 'Omgili bot'
927
993
  category: 'Search bot'
928
994
  url: 'http://www.omgili.com/Crawler.html'
@@ -999,7 +1065,15 @@
999
1065
  name: 'Bitlove'
1000
1066
  url: 'http://bitlove.org/'
1001
1067
 
1002
- - regex: 'psbot(-page)?'
1068
+ - regex: 'PRTG Network Monitor'
1069
+ name: 'PRTG Network Monitor'
1070
+ category: 'Network Monitor'
1071
+ url: 'https://www.paessler.com/prtg'
1072
+ producer:
1073
+ name: 'Paessler AG'
1074
+ url: 'https://www.paessler.com'
1075
+
1076
+ - regex: 'psbot'
1003
1077
  name: 'Picsearch bot'
1004
1078
  category: 'Search bot'
1005
1079
  url: 'http://www.picsearch.com/bot.html'
@@ -1007,7 +1081,7 @@
1007
1081
  name: 'Picsearch'
1008
1082
  url: 'http://www.picsearch.com'
1009
1083
 
1010
- - regex: 'Pingdom\.com'
1084
+ - regex: 'Pingdom(?:\.com|TMS)'
1011
1085
  name: 'Pingdom Bot'
1012
1086
  category: 'Site Monitor'
1013
1087
  url: ''
@@ -1023,6 +1097,14 @@
1023
1097
  name: 'Quora'
1024
1098
  url: 'http://www.quora.com'
1025
1099
 
1100
+ - regex: 'Quora-Bot'
1101
+ name: 'Quora Bot'
1102
+ category: 'Crawler'
1103
+ url: ''
1104
+ producer:
1105
+ name: 'Quora'
1106
+ url: 'https://www.quora.com/'
1107
+
1026
1108
  - regex: 'RamblerMail'
1027
1109
  name: 'RamblerMail Image Proxy'
1028
1110
  category: 'Crawler'
@@ -1206,7 +1288,7 @@
1206
1288
  name: 'SISTRIX GmbH'
1207
1289
  url: 'http://www.sistrix.de'
1208
1290
 
1209
- - regex: 'SISTRIX Optimizer'
1291
+ - regex: 'compatible; (?:SISTRIX )?Optimizer'
1210
1292
  name: 'SISTRIX Optimizer'
1211
1293
  category: 'Crawler'
1212
1294
  url: 'https://optimizer.sistrix.com'
@@ -1251,6 +1333,14 @@
1251
1333
  name: 'Tencent Holdings'
1252
1334
  url: 'http://www.soso.com'
1253
1335
 
1336
+ - regex: 'Sprinklr'
1337
+ name: 'Sprinklr'
1338
+ category: 'Crawler'
1339
+ url: ''
1340
+ producer:
1341
+ name: 'Sprinklr, Inc.'
1342
+ url: 'https://www.sprinklr.com/'
1343
+
1254
1344
  - regex: 'sqlmap/'
1255
1345
  name: 'sqlmap'
1256
1346
  category: 'Security Checker'
@@ -1296,13 +1386,20 @@
1296
1386
  name: 'Tailrank Inc'
1297
1387
  url: 'http://spinn3r.com'
1298
1388
 
1299
- - regex: 'Sputnik(Image)?Bot'
1389
+ - regex: 'SputnikBot'
1300
1390
  name: 'Sputnik Bot'
1301
- category: ''
1391
+ category: 'Crawler'
1392
+ url: ''
1393
+
1394
+ - regex: 'SputnikFaviconBot'
1395
+ name: 'Sputnik Favicon Bot'
1396
+ category: 'Crawler'
1397
+ url: ''
1398
+
1399
+ - regex: 'SputnikImageBot'
1400
+ name: 'Sputnik Image Bot'
1401
+ category: 'Crawler'
1302
1402
  url: ''
1303
- producer:
1304
- name: ''
1305
- url: ''
1306
1403
 
1307
1404
  - regex: 'SurveyBot'
1308
1405
  name: 'Survey Bot'
@@ -1521,7 +1618,7 @@
1521
1618
  category: 'Site Monitor'
1522
1619
  url: 'https://www.webpagetest.org'
1523
1620
 
1524
- - regex: 'WeSEE(:Search)?'
1621
+ - regex: 'WeSEE'
1525
1622
  name: 'WeSEE:Search'
1526
1623
  category: 'Search bot'
1527
1624
  url: 'http://www.wesee.com/bot'
@@ -1561,6 +1658,14 @@
1561
1658
  name: 'Wotbox'
1562
1659
  url: 'http://www.wotbox.com'
1563
1660
 
1661
+ - regex: 'XenForo'
1662
+ name: 'XenForo'
1663
+ category: 'Service Agent'
1664
+ url: 'https://xenforo.com/'
1665
+ producer:
1666
+ name: 'XenForo Ltd.'
1667
+ url: 'https://xenforo.com/'
1668
+
1564
1669
  - regex: 'yacybot'
1565
1670
  name: 'YaCy'
1566
1671
  category: 'Search bot'
@@ -1585,6 +1690,14 @@
1585
1690
  name: 'Yahoo! Inc.'
1586
1691
  url: 'http://www.yahoo.com'
1587
1692
 
1693
+ - regex: 'YahooMailProxy'
1694
+ name: 'Yahoo! Mail Proxy'
1695
+ category: 'Service Agent'
1696
+ url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
1697
+ producer:
1698
+ name: 'Yahoo! Inc.'
1699
+ url: 'http://www.yahoo.com'
1700
+
1588
1701
  - regex: 'YahooCacheSystem'
1589
1702
  name: 'Yahoo! Cache System'
1590
1703
  category: 'Crawler'
@@ -1593,7 +1706,15 @@
1593
1706
  name: 'Yahoo! Inc.'
1594
1707
  url: 'http://www.yahoo.com'
1595
1708
 
1596
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1709
+ - regex: 'Y!J-BRW'
1710
+ name: 'Yahoo! Japan BRW'
1711
+ category: 'Crawler'
1712
+ url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1713
+ producer:
1714
+ name: 'Yahoo! Japan Corp.'
1715
+ url: 'https://www.yahoo.co.jp/'
1716
+
1717
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1597
1718
  name: 'Yandex Bot'
1598
1719
  category: 'Search bot'
1599
1720
  url: 'http://www.yandex.com/bots'
@@ -1601,7 +1722,7 @@
1601
1722
  name: 'Yandex LLC'
1602
1723
  url: 'http://company.yandex.com'
1603
1724
 
1604
- - regex: 'Yeti'
1725
+ - regex: 'Yeti|NaverJapan|AdsBot-Naver'
1605
1726
  name: 'Yeti/Naverbot'
1606
1727
  category: 'Search bot'
1607
1728
  url: 'http://help.naver.com/robots/'
@@ -1678,18 +1799,18 @@
1678
1799
  - regex: 'HubPages.*crawlingpolicy'
1679
1800
  name: 'HubPages'
1680
1801
  category: 'Crawler'
1681
- url: 'http://hubpages.com/help/crawlingpolicy'
1802
+ url: 'https://hubpages.com/help/crawlingpolicy'
1682
1803
  producer:
1683
- name: 'HubPages'
1684
- url: 'http://hubpages.com/'
1804
+ name: 'HubPages, Inc.'
1805
+ url: 'https://discover.hubpages.com/'
1685
1806
 
1686
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
1807
+ - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1687
1808
  name: 'Pinterest'
1688
- url: ''
1809
+ url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1689
1810
  category: 'Crawler'
1690
1811
  producer:
1691
1812
  name: 'Pinterest'
1692
- url: 'http://www.pinterest.com/'
1813
+ url: 'https://www.pinterest.com/'
1693
1814
 
1694
1815
  - regex: 'Site24x7'
1695
1816
  name: 'Site24x7 Website Monitoring'
@@ -1731,13 +1852,13 @@
1731
1852
  name: 'Monitor.Us'
1732
1853
  url: 'http://www.monitor.us'
1733
1854
 
1734
- - regex: 'Catchpoint( bot)?'
1855
+ - regex: 'Catchpoint'
1735
1856
  name: 'Catchpoint'
1736
1857
  category: 'Site Monitor'
1737
- url: ''
1858
+ url: 'https://www.catchpoint.com/'
1738
1859
  producer:
1739
1860
  name: 'Catchpoint Systems'
1740
- url: 'http://www.catchpoint.com/'
1861
+ url: 'https://www.catchpoint.com/'
1741
1862
 
1742
1863
  - regex: 'bitlybot'
1743
1864
  name: 'BitlyBot'
@@ -1805,7 +1926,7 @@
1805
1926
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1806
1927
  name: 'RSSRadio Bot'
1807
1928
 
1808
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1929
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
1809
1930
  name: 'Generic Bot'
1810
1931
 
1811
1932
  - regex: '^sentry'
@@ -1824,7 +1945,181 @@
1824
1945
  name: 'The Knowledge AI'
1825
1946
  category: 'Crawler'
1826
1947
 
1827
- # Generic detections
1948
+ - regex: 'Embedly'
1949
+ name: 'Embedly'
1950
+ category: 'Crawler'
1951
+ url: 'https://support.embed.ly/hc/en-us'
1952
+ producer:
1953
+ name: 'A Medium, Corp.'
1954
+ url: 'https://medium.com/'
1955
+
1956
+ - regex: 'BrandVerity'
1957
+ name: 'BrandVerity'
1958
+ category: 'Crawler'
1959
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1960
+ producer:
1961
+ name: 'BrandVerity, Inc.'
1962
+ url: 'https://www.brandverity.com/'
1963
+
1964
+ - regex: 'Kaspersky Lab CFR link resolver'
1965
+ name: 'Kaspersky'
1966
+ category: 'Security Checker'
1967
+ url: 'https://www.kaspersky.com/'
1968
+ producer:
1969
+ name: 'AO Kaspersky Lab'
1970
+ url: 'https://www.kaspersky.com/'
1971
+
1972
+ - regex: 'eZ Publish Link Validator'
1973
+ name: 'eZ Publish Link Validator'
1974
+ category: 'Crawler'
1975
+ url: 'https://ez.no/'
1976
+ producer:
1977
+ name: 'eZ Systems AS'
1978
+ url: 'https://ez.no/'
1979
+
1980
+ - regex: 'woorankreview'
1981
+ name: 'WooRank'
1982
+ category: 'Search bot'
1983
+ url: 'https://www.woorank.com/'
1984
+ producer:
1985
+ name: 'WooRank sprl'
1986
+ url: 'https://www.woorank.com/'
1987
+
1988
+ - regex: '(Match|LinkCheck) by Siteimprove.com'
1989
+ name: 'Siteimprove'
1990
+ category: 'Search bot'
1991
+ url: 'https://siteimprove.com/'
1992
+ producer:
1993
+ name: 'Siteimprove GmbH'
1994
+ url: 'https://siteimprove.com/'
1995
+
1996
+ - regex: 'CATExplorador'
1997
+ name: 'CATExplorador'
1998
+ category: 'Search bot'
1999
+ url: 'https://fundacio.cat/ca/domini/'
2000
+ producer:
2001
+ name: 'Fundació puntCAT'
2002
+ url: 'https://fundacio.cat/ca/domini/'
2003
+
2004
+ - regex: 'Buck'
2005
+ name: 'Buck'
2006
+ category: 'Search bot'
2007
+ url: 'https://hypefactors.com/'
2008
+ producer:
2009
+ name: 'Hypefactors A/S'
2010
+ url: 'https://hypefactors.com/'
2011
+
2012
+ - regex: 'tracemyfile'
2013
+ name: 'TraceMyFile'
2014
+ category: 'Search bot'
2015
+ url: 'https://www.tracemyfile.com/'
2016
+ producer:
2017
+ name: 'Idee Inc.'
2018
+ url: 'http://ideeinc.com/'
2019
+
2020
+ - regex: 'zelist.ro feed parser'
2021
+ name: 'Ze List'
2022
+ url: 'https://www.zelist.ro/'
2023
+ category: 'Feed Fetcher'
2024
+ producer:
2025
+ name: 'Treeworks SRL'
2026
+ url: 'https://www.tree.ro/'
2027
+
2028
+ - regex: 'weborama-fetcher'
2029
+ name: 'Weborama'
2030
+ category: 'Search bot'
2031
+ url: 'https://weborama.com/'
2032
+ producer:
2033
+ name: 'Weborama SA'
2034
+ url: 'https://weborama.com/'
2035
+
2036
+ - regex: 'BoardReader Favicon Fetcher'
2037
+ name: 'BoardReader'
2038
+ category: 'Search bot'
2039
+ url: 'https://boardreader.com/'
2040
+ producer:
2041
+ name: 'Effyis Inc'
2042
+ url: 'https://boardreader.com/'
2043
+
2044
+ - regex: 'IDG/IT'
2045
+ name: 'IDG/IT'
2046
+ category: 'Search bot'
2047
+ url: 'https://spaziodati.eu/'
2048
+ producer:
2049
+ name: 'SpazioDati S.r.l.'
2050
+ url: 'https://spaziodati.eu/'
2051
+
2052
+ - regex: 'Bytespider'
2053
+ name: 'Bytespider'
2054
+ category: 'Search bot'
2055
+ url: 'https://bytedance.com/'
2056
+ producer:
2057
+ name: 'ByteDance Ltd.'
2058
+ url: 'https://bytedance.com/'
2059
+
2060
+ - regex: 'WikiDo'
2061
+ name: 'WikiDo'
2062
+ category: 'Search bot'
2063
+ url: 'https://www.wikido.com/'
2064
+ producer:
2065
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
2066
+ url: 'https://www.wikido.com/'
2067
+
2068
+ - regex: 'AwarioSmartBot'
2069
+ name: 'Awario'
2070
+ category: 'Search bot'
2071
+ url: 'https://awario.com/bots.html'
2072
+ producer:
2073
+ name: 'Awario'
2074
+ url: 'https://awario.com/'
2075
+
2076
+ - regex: 'AwarioRssBot'
2077
+ name: 'Awario'
2078
+ category: 'Feed Fetcher'
2079
+ url: 'https://awario.com/bots.html'
2080
+ producer:
2081
+ name: 'Awario'
2082
+ url: 'https://awario.com/'
2083
+
2084
+ - regex: 'oBot'
2085
+ name: 'oBot'
2086
+ category: 'Search bot'
2087
+ url: 'https://www.xforce-security.com/crawler/'
2088
+ producer:
2089
+ name: 'IBM Germany Research & Development GmbH'
2090
+ url: 'https://exchange.xforce.ibmcloud.com/'
2091
+
2092
+ - regex: 'SMTBot'
2093
+ name: 'SMTBot'
2094
+ category: 'Search bot'
2095
+ url: 'https://www.similartech.com/smtbot'
2096
+ producer:
2097
+ name: 'SimilarTech Ltd.'
2098
+ url: 'https://www.similartech.com/'
2099
+
2100
+ - regex: 'LCC'
2101
+ name: 'LCC'
2102
+ category: 'Search bot'
2103
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2104
+ producer:
2105
+ name: 'Universität Leipzig'
2106
+ url: 'https://www.uni-leipzig.de/'
2107
+
2108
+ - regex: 'Startpagina-Linkchecker'
2109
+ name: 'Startpagina Linkchecker'
2110
+ category: 'Search bot'
2111
+ url: 'https://www.startpagina.nl/linkchecker'
2112
+ producer:
2113
+ name: 'Startpagina B.V.'
2114
+ url: 'https://www.startpagina.nl/'
2115
+
2116
+ - regex: 'GTmetrix'
2117
+ name: 'GTmetrix'
2118
+ category: 'Crawler'
2119
+ url: 'https://gtmetrix.com/'
2120
+ producer:
2121
+ name: 'Carbon60 Operating Co. Ltd.'
2122
+ url: 'https://www.carbon60.com/'
1828
2123
 
1829
2124
  - regex: 'Nutch'
1830
2125
  name: 'Nutch-based Bot'
@@ -1832,7 +2127,518 @@
1832
2127
  url: 'https://nutch.apache.org'
1833
2128
  producer:
1834
2129
  name: 'The Apache Software Foundation'
1835
- url: 'http://www.apache.org/foundation/'
2130
+ url: 'https://www.apache.org/foundation/'
2131
+
2132
+ - regex: 'Seobility'
2133
+ name: 'Seobility'
2134
+ category: 'Crawler'
2135
+ url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2136
+
2137
+ - regex: 'Vercelbot'
2138
+ name: 'Vercel Bot'
2139
+ category: 'Service bot'
2140
+ url: 'https://vercel.com'
2141
+
2142
+ - regex: 'Grammarly'
2143
+ name: 'Grammarly'
2144
+ category: 'Service bot'
2145
+ url: 'https://www.grammarly.com'
2146
+
2147
+ - regex: 'Robozilla'
2148
+ name: 'Robozilla'
2149
+ category: 'Crawler'
2150
+
2151
+ - regex: 'Domains Project'
2152
+ name: 'Domains Project'
2153
+ category: 'Crawler'
2154
+ url: 'https://domainsproject.org'
2155
+
2156
+ - regex: 'PetalBot'
2157
+ name: 'Petal Bot'
2158
+ category: 'Crawler'
2159
+ url: 'https://aspiegel.com/petalbot'
2160
+
2161
+ - regex: 'SerendeputyBot'
2162
+ name: 'Serendeputy Bot'
2163
+ category: 'Crawler'
2164
+ url: 'https://serendeputy.com/about/serendeputy-bot'
2165
+
2166
+ - regex: 'ias-va.*admantx.*service-fetcher'
2167
+ name: 'ADmantX Service Fetcher'
2168
+ category: 'Service bot'
2169
+ url: 'https://www.admantx.com/service-fetcher.html'
2170
+
2171
+ - regex: 'SemanticScholarBot'
2172
+ name: 'Semantic Scholar Bot'
2173
+ category: 'Crawler'
2174
+ url: 'https://www.semanticscholar.org/crawler'
1836
2175
 
1837
- - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2176
+ - regex: 'VelenPublicWebCrawler'
2177
+ name: 'Velen Public Web Crawler'
2178
+ category: 'Crawler'
2179
+ url: 'https://hunter.io/robot'
2180
+
2181
+ - regex: 'Barkrowler'
2182
+ name: 'Barkrowler'
2183
+ category: 'Crawler'
2184
+ url: 'http://www.exensa.com/crawl'
2185
+
2186
+ - regex: 'BDCbot'
2187
+ name: 'BDCbot'
2188
+ category: 'Crawler'
2189
+ url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2190
+ producer:
2191
+ name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2192
+ url: 'https://bigdatacorp.com.br/'
2193
+
2194
+ - regex: 'adbeat'
2195
+ name: 'Adbeat'
2196
+ category: 'Crawler'
2197
+ url: 'https://www.adbeat.com/operation_policy'
2198
+ producer:
2199
+ name: 'PPC Labs LLC'
2200
+ url: 'https://www.adbeat.com/'
2201
+
2202
+ - regex: 'BW/(?:(\d+[\.\d]+))'
2203
+ name: 'BuiltWith'
2204
+ category: 'Crawler'
2205
+ url: 'https://builtwith.com/biup'
2206
+ producer:
2207
+ name: 'BuiltWith Pty Ltd'
2208
+ url: 'https://builtwith.com/'
2209
+
2210
+ - regex: 'https://whatis.contentkingapp.com'
2211
+ name: 'ContentKing'
2212
+ category: 'Site Monitor'
2213
+ url: 'https://whatis.contentkingapp.com/'
2214
+ producer:
2215
+ name: 'ContentKing BV'
2216
+ url: 'https://www.contentkingapp.com/'
2217
+
2218
+ - regex: 'MicroAdBot'
2219
+ name: 'MicroAdBot'
2220
+ category: 'Crawler'
2221
+ url: 'https://www.microad.co.jp/'
2222
+ producer:
2223
+ name: 'MicroAd, Inc.'
2224
+ url: 'https://www.microad.co.jp/'
2225
+
2226
+ - regex: 'PingAdmin.Ru'
2227
+ name: 'PingAdmin.Ru'
2228
+ category: 'Site Monitor'
2229
+ url: 'https://ping-admin.ru/'
2230
+
2231
+ - regex: 'notifyninja.+monitoring'
2232
+ name: 'Notify Ninja'
2233
+ category: 'Site Monitor'
2234
+ url: 'http://notifyninja.com'
2235
+
2236
+ - regex: 'WebDataStats'
2237
+ name: 'WebDataStats'
2238
+ category: 'Crawler'
2239
+ url: 'https://webdatastats.com/policy.html'
2240
+ producer:
2241
+ name: 'WebTehRazrabotka LLC'
2242
+ url: 'https://webdatastats.com/'
2243
+
2244
+ - regex: 'parse.ly scraper'
2245
+ name: 'parse.ly'
2246
+ category: 'Crawler'
2247
+ url: 'https://www.parse.ly/help/integration/crawler'
2248
+ producer:
2249
+ name: 'Parsely, Inc.'
2250
+ url: 'https://www.parse.ly/'
2251
+
2252
+ - regex: 'Nimbostratus-Bot'
2253
+ name: 'Nimbostratus Bot'
2254
+ category: 'Site Monitor'
2255
+ url: 'http://cloudsystemnetworks.com'
2256
+
2257
+ - regex: 'HeartRails_Capture/\d'
2258
+ name: 'Heart Rails Capture'
2259
+ category: 'Service Agent'
2260
+ url: 'http://capture.heartrails.com'
2261
+
2262
+ - regex: 'Project-Resonance'
2263
+ name: 'Project Resonance'
2264
+ category: 'Crawler'
2265
+ url: 'http://project-resonance.com'
2266
+
2267
+ - regex: 'DataXu/\d'
2268
+ name: 'DataXu'
2269
+ category: 'Service Agent'
2270
+ url: 'https://advertising.roku.com/dataxu'
2271
+ producer:
2272
+ name: 'Roku, Inc.'
2273
+ url: 'https://roku.com'
2274
+
2275
+ - regex: 'Cocolyzebot'
2276
+ name: 'Cocolyzebot'
2277
+ category: 'Crawler'
2278
+ url: 'https://cocolyze.com/en/cocolyzebot'
2279
+ producer:
2280
+ name: 'VSI INNOVATION SAS'
2281
+ url: 'https://vsi-innovation.com/'
2282
+
2283
+ - regex: 'veryhip'
2284
+ name: 'VeryHip'
2285
+ category: 'Crawler'
2286
+ url: 'https://veryhip.com/'
2287
+ producer:
2288
+ name: 'VeryHip'
2289
+ url: 'https://veryhip.com/'
2290
+
2291
+ - regex: 'LinkpadBot'
2292
+ name: 'LinkpadBot'
2293
+ category: 'Crawler'
2294
+ url: 'https://www.linkpad.org/'
2295
+ producer:
2296
+ name: 'Solomono LLC'
2297
+ url: 'https://www.linkpad.org/'
2298
+
2299
+ - regex: 'MuscatFerret'
2300
+ name: 'MuscatFerret'
2301
+ category: 'Crawler'
2302
+ url: 'http://www.webtop.com/'
2303
+
2304
+ - regex: 'PageThing.com'
2305
+ name: 'PageThing'
2306
+ category: 'Crawler'
2307
+ url: 'https://www.pagething.com/'
2308
+ producer:
2309
+ name: 'SPECIALNOISE LTD'
2310
+ url: 'https://www.specialnoise.com/'
2311
+
2312
+ - regex: 'ArchiveBox'
2313
+ name: 'ArchiveBox'
2314
+ url: 'https://archivebox.io/'
2315
+ category: 'Crawler'
2316
+ producer:
2317
+ name: ''
2318
+ url: ''
2319
+
2320
+ - regex: 'Choosito'
2321
+ name: 'Choosito'
2322
+ url: 'https://www.choosito.com/'
2323
+ category: 'Crawler'
2324
+ producer:
2325
+ name: 'Choosito! Inc.'
2326
+ url: 'https://www.choosito.com/'
2327
+
2328
+ - regex: 'datagnionbot'
2329
+ name: 'datagnionbot'
2330
+ url: 'https://www.datagnion.com/bot.html'
2331
+ category: 'Crawler'
2332
+ producer:
2333
+ name: 'DATAGNION GMBH'
2334
+ url: 'https://www.datagnion.com/'
2335
+
2336
+ - regex: 'WhatCMS'
2337
+ name: 'WhatCMS'
2338
+ url: 'https://whatcms.org/'
2339
+ category: 'Crawler'
2340
+ producer:
2341
+ name: 'Nineteen Ten LLC'
2342
+ url: 'https://whatcms.org/'
2343
+
2344
+ - regex: 'httpx'
2345
+ name: 'httpx'
2346
+ url: 'https://github.com/projectdiscovery/httpx'
2347
+ category: 'Crawler'
2348
+ producer:
2349
+ name: ''
2350
+ url: ''
2351
+
2352
+ - regex: 'scaninfo@expanseinc.com'
2353
+ name: 'Expanse'
2354
+ category: 'Security Checker'
2355
+ url: 'https://expanse.co/'
2356
+ producer:
2357
+ name: 'Expanse Inc.'
2358
+ url: 'https://expanse.co/'
2359
+
2360
+ - regex: 'HuaweiWebCatBot'
2361
+ name: 'HuaweiWebCatBot'
2362
+ category: 'Crawler'
2363
+ url: 'https://isecurity.huawei.com'
2364
+ producer:
2365
+ name: 'Huawei Technologies Co., Ltd.'
2366
+ url: 'https://huawei.com'
2367
+
2368
+ - regex: 'Hatena-Favicon'
2369
+ name: 'Hatena Favicon'
2370
+ category: 'Crawler'
2371
+ url: 'https://www.hatena.ne.jp/faq/'
2372
+ producer:
2373
+ name: 'Hatena Co., Ltd.'
2374
+ url: 'https://www.hatena.ne.jp'
2375
+
2376
+ - regex: 'RyowlEngine/(\d+)'
2377
+ name: 'Ryowl'
2378
+ category: 'Crawler'
2379
+ url: 'https://ryowl.org'
2380
+
2381
+ - regex: 'OdklBot/(\d+)'
2382
+ name: 'Odnoklassniki Bot'
2383
+ category: 'Crawler'
2384
+ url: 'https://odnoklassniki.ru'
2385
+
2386
+ - regex: 'Mediatoolkitbot'
2387
+ name: 'Mediatoolkit Bot'
2388
+ category: 'Crawler'
2389
+ url: 'https://mediatoolkit.com'
2390
+
2391
+ - regex: 'ZoominfoBot'
2392
+ name: 'ZoominfoBot'
2393
+ category: 'Crawler'
2394
+ url: 'https://www.zoominfo.com'
2395
+
2396
+ - regex: 'WeViKaBot/([\d+\.])'
2397
+ name: 'WeViKaBot'
2398
+ category: 'Crawler'
2399
+ url: 'http://www.wevika.de'
2400
+
2401
+ - regex: 'SEOkicks'
2402
+ name: 'SEOkicks'
2403
+ category: 'Crawler'
2404
+ url: 'https://www.seokicks.de/robot.html'
2405
+
2406
+ - regex: 'Plukkie/([\d+\.])'
2407
+ name: 'Plukkie'
2408
+ category: 'Crawler'
2409
+ url: 'http://www.botje.com/plukkie.htm'
2410
+
2411
+ - regex: 'proximic;'
2412
+ name: 'Comscore'
2413
+ category: 'Crawler'
2414
+ url: 'https://www.comscore.com/Web-Crawler'
2415
+
2416
+ - regex: 'SurdotlyBot/([\d+\.])'
2417
+ name: 'SurdotlyBot'
2418
+ category: 'Crawler'
2419
+ url: 'http://sur.ly/bot.html'
2420
+
2421
+ - regex: 'Gowikibot/([\d+\.])'
2422
+ name: 'Gowikibot'
2423
+ category: 'Crawler'
2424
+ url: 'http:/www.gowikibot.com'
2425
+
2426
+ - regex: 'SabsimBot/([\d+\.])'
2427
+ name: 'SabsimBot'
2428
+ category: 'Crawler'
2429
+ url: 'https://sabsim.com'
2430
+
2431
+ - regex: 'LumtelBot/([\d+\.])'
2432
+ name: 'LumtelBot'
2433
+ category: 'Crawler'
2434
+ url: 'https://umtel.com'
2435
+
2436
+ - regex: 'PiplBot'
2437
+ name: 'PiplBot'
2438
+ category: 'Crawler'
2439
+ url: 'http://www.pipl.com/bot'
2440
+
2441
+ - regex: 'woobot/([\d+\.])'
2442
+ name: 'WooRank'
2443
+ category: 'Crawler'
2444
+ url: 'https://www.woorank.com/bot'
2445
+
2446
+ - regex: 'Cookiebot/([\d+\.])'
2447
+ name: 'Cookiebot'
2448
+ category: 'Crawler'
2449
+ url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2450
+ producer:
2451
+ name: 'Cybot A/S'
2452
+ url: 'https://www.cybot.com/'
2453
+
2454
+ - regex: 'NetSystemsResearch'
2455
+ name: 'NetSystemsResearch'
2456
+ category: 'Security Checker'
2457
+ url: 'https://www.netsystemsresearch.com/'
2458
+ producer:
2459
+ name: 'NET SYSTEMS RESEARCH LLC'
2460
+ url: 'https://www.netsystemsresearch.com/'
2461
+
2462
+ - regex: 'CensysInspect/([\d+\.])'
2463
+ name: 'CensysInspect'
2464
+ category: 'Security Checker'
2465
+ url: 'https://about.censys.io/'
2466
+ producer:
2467
+ name: 'Censys, Inc.'
2468
+ url: 'https://censys.io/'
2469
+
2470
+ - regex: 'gdnplus.com'
2471
+ name: 'GDNP'
2472
+ category: 'Crawler'
2473
+ url: 'https://gdnplus.com/'
2474
+ producer:
2475
+ name: 'Global Digital Network Plus, LLC'
2476
+ url: 'https://gdnplus.com/'
2477
+
2478
+ - regex: 'WellKnownBot/([\d+\.])'
2479
+ name: 'WellKnownBot'
2480
+ category: 'Crawler'
2481
+ url: 'https://well-known.dev'
2482
+
2483
+ - regex: 'Adsbot/([\d+\.])'
2484
+ name: 'Adsbot'
2485
+ category: 'Crawler'
2486
+ url: 'https://seostar.co/robot/'
2487
+
2488
+ - regex: 'MTRobot/([\d+\.])'
2489
+ name: 'MTRobot'
2490
+ category: 'Crawler'
2491
+ url: 'https://metrics-tools.de/robot.html'
2492
+ producer:
2493
+ name: 'Metrics Tools'
2494
+ url: 'https://metrics-tools.de/'
2495
+
2496
+ - regex: 'serpstatbot/([\d+\.])'
2497
+ name: 'serpstatbot'
2498
+ category: 'Crawler'
2499
+ url: 'http://serpstatbot.com/'
2500
+ producer:
2501
+ name: 'Netpeak Ltd'
2502
+ url: 'https://netpeak.net/'
2503
+
2504
+ - regex: 'colly'
2505
+ name: 'colly'
2506
+ category: 'Crawler'
2507
+ url: 'https://github.com/gocolly/colly/'
2508
+
2509
+ - regex: 'l9tcpid/v([\d+\.])'
2510
+ name: 'l9tcpid'
2511
+ category: 'Security Checker'
2512
+ url: 'https://github.com/LeakIX/l9tcpid'
2513
+
2514
+ - regex: 'MegaIndex.ru/([\d+\.])'
2515
+ name: 'MegaIndex'
2516
+ category: 'Crawler'
2517
+ url: 'https://megaindex.com/crawler'
2518
+
2519
+ - regex: 'Seekport'
2520
+ name: 'Seekport'
2521
+ category: 'Crawler'
2522
+ url: 'http://www.seekport.com/'
2523
+ producer:
2524
+ name: 'SISTRIX GmbH'
2525
+ url: 'https://www.sistrix.de/'
2526
+
2527
+ - regex: 'seolyt/([\d+\.])'
2528
+ name: 'seolyt'
2529
+ category: 'Crawler'
2530
+ url: 'https://seolyt.com/'
2531
+
2532
+ - regex: 'YaK/([\d+\.])'
2533
+ name: 'YaK'
2534
+ category: 'Crawler'
2535
+ url: 'https://www.linkfluence.com/'
2536
+ producer:
2537
+ name: 'Linkfluence SAS'
2538
+ url: 'https://www.linkfluence.com/'
2539
+
2540
+ - regex: 'KomodiaBot/([\d+\.])'
2541
+ name: 'KomodiaBot'
2542
+ category: 'Crawler'
2543
+ url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2544
+ producer:
2545
+ name: 'Komodia Inc.'
2546
+ url: 'https://www.komodia.com/'
2547
+
2548
+ - regex: 'Neevabot/([\d+\.])'
2549
+ name: 'Neevabot'
2550
+ category: 'Search bot'
2551
+ url: 'https://neeva.com/neevabot'
2552
+ producer:
2553
+ name: 'Neeva Inc.'
2554
+ url: 'https://neeva.com/'
2555
+
2556
+ - regex: 'LinkPreview/([\d+\.])'
2557
+ name: 'LinkPreview'
2558
+ category: 'Service Agent'
2559
+ url: 'https://www.linkpreview.net/'
2560
+
2561
+ - regex: 'JungleKeyThumbnail/([\d+\.])'
2562
+ name: 'JungleKeyThumbnail'
2563
+ category: 'Crawler'
2564
+ url: 'https://junglekey.com/'
2565
+
2566
+ - regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2567
+ name: 'RocketMonitorBot'
2568
+ category: 'Site Monitor'
2569
+ url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
2570
+ producer:
2571
+ name: 'Radio Mast, Inc.'
2572
+ url: 'https://www.radiomast.io/'
2573
+
2574
+ - regex: 'SitemapParser-VIPnytt/([\d+\.])'
2575
+ name: 'SitemapParser-VIPnytt'
2576
+ category: 'Crawler'
2577
+ url: 'https://github.com/VIPnytt/SitemapParser/'
2578
+
2579
+ - regex: '^Turnitin'
2580
+ name: 'Turnitin'
2581
+ category: 'Crawler'
2582
+ url: 'https://turnitin.com/robot/crawlerinfo.html'
2583
+
2584
+ - regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2585
+ name: 'Dotcom Monitor'
2586
+ category: 'Site Monitor'
2587
+ url: 'https://www.dotcom-monitor.com'
2588
+
2589
+ - regex: 'ThinkChaos/'
2590
+ name: 'ThinkChaos'
2591
+ category: 'Crawler'
2592
+
2593
+ - regex: 'DataForSeoBot'
2594
+ name: 'DataForSeoBot'
2595
+ category: 'Crawler'
2596
+ url: 'https://dataforseo.com/dataforseo-bot'
2597
+
2598
+ - regex: 'Discordbot/([\d+.]+)'
2599
+ name: 'Discord Bot'
2600
+ category: 'Service Agent'
2601
+ url: 'https://discordapp.com'
2602
+
2603
+ - regex: 'Linespider/([\d+.]+)'
2604
+ name: 'Linespider'
2605
+ category: 'Crawler'
2606
+ url: 'https://lin.ee/4dwXkTH'
2607
+
2608
+ - regex: 'Cincraw/([\d+.]+)'
2609
+ name: 'Cincraw'
2610
+ category: 'Crawler'
2611
+ url: 'http://cincrawdata.net/bot/'
2612
+
2613
+ - regex: 'CISPA Web Analyzer'
2614
+ name: 'CISPA Web Analyzer'
2615
+ category: 'Crawler'
2616
+ url: 'https://notify.cispa.de/'
2617
+ producer:
2618
+ name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
2619
+ url: 'https://cispa.de/en'
2620
+
2621
+ - regex: 'IonCrawl'
2622
+ name: 'IONOS Crawler'
2623
+ category: 'Crawler'
2624
+ url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
2625
+ producer:
2626
+ name: 'IONOS SE'
2627
+ url: 'https://www.ionos.de/'
2628
+
2629
+ - regex: 'Crawldad'
2630
+ name: 'Crawldad'
2631
+ category: 'Crawler'
2632
+ url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
2633
+
2634
+ - regex: 'https://securitytxt-scan.cs.hm.edu/'
2635
+ name: 'security.txt scanserver'
2636
+ category: 'Security Checker'
2637
+ url: 'https://securitytxt-scan.cs.hm.edu/'
2638
+ producer:
2639
+ name: 'Hochschule für angewandte Wissenschaften München'
2640
+ url: 'https://www.hm.edu/'
2641
+
2642
+ # Generic detections
2643
+ - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
1838
2644
  name: 'Generic Bot'