device_detector 1.0.3 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +15 -0
  3. data/README.md +8 -7
  4. data/lib/device_detector/bot.rb +2 -2
  5. data/lib/device_detector/browser.rb +364 -0
  6. data/lib/device_detector/client.rb +11 -2
  7. data/lib/device_detector/device.rb +1247 -22
  8. data/lib/device_detector/memory_cache.rb +5 -5
  9. data/lib/device_detector/metadata_extractor.rb +7 -8
  10. data/lib/device_detector/model_extractor.rb +3 -3
  11. data/lib/device_detector/name_extractor.rb +2 -2
  12. data/lib/device_detector/os.rb +150 -116
  13. data/lib/device_detector/parser.rb +23 -10
  14. data/lib/device_detector/version.rb +1 -1
  15. data/lib/device_detector/version_extractor.rb +29 -2
  16. data/lib/device_detector.rb +73 -40
  17. data/regexes/bots.yml +868 -62
  18. data/regexes/client/browser_engine.yml +11 -2
  19. data/regexes/client/browsers.yml +1132 -112
  20. data/regexes/client/feed_readers.yml +5 -11
  21. data/regexes/client/libraries.yml +86 -2
  22. data/regexes/client/mediaplayers.yml +39 -3
  23. data/regexes/client/mobile_apps.yml +940 -66
  24. data/regexes/client/pim.yml +66 -3
  25. data/regexes/device/cameras.yml +6 -6
  26. data/regexes/device/car_browsers.yml +23 -3
  27. data/regexes/device/consoles.yml +15 -3
  28. data/regexes/device/mobiles.yml +18351 -3566
  29. data/regexes/device/notebooks.yml +114 -0
  30. data/regexes/device/portable_media_player.yml +36 -9
  31. data/regexes/device/shell_tv.yml +117 -0
  32. data/regexes/device/televisions.yml +440 -35
  33. data/regexes/oss.yml +635 -284
  34. data/regexes/vendorfragments.yml +5 -1
  35. metadata +21 -118
  36. data/.gitignore +0 -14
  37. data/.travis.yml +0 -14
  38. data/Gemfile +0 -8
  39. data/Rakefile +0 -96
  40. data/device_detector.gemspec +0 -26
  41. data/spec/device_detector/bot_fixtures_spec.rb +0 -30
  42. data/spec/device_detector/client_fixtures_spec.rb +0 -31
  43. data/spec/device_detector/concrete_user_agent_spec.rb +0 -135
  44. data/spec/device_detector/detector_fixtures_spec.rb +0 -100
  45. data/spec/device_detector/device_fixtures_spec.rb +0 -36
  46. data/spec/device_detector/device_spec.rb +0 -151
  47. data/spec/device_detector/memory_cache_spec.rb +0 -148
  48. data/spec/device_detector/model_extractor_spec.rb +0 -63
  49. data/spec/device_detector/os_fixtures_spec.rb +0 -26
  50. data/spec/device_detector/version_extractor_spec.rb +0 -79
  51. data/spec/device_detector_spec.rb +0 -189
  52. data/spec/fixtures/client/browser.yml +0 -2206
  53. data/spec/fixtures/client/feed_reader.yml +0 -199
  54. data/spec/fixtures/client/library.yml +0 -175
  55. data/spec/fixtures/client/mediaplayer.yml +0 -163
  56. data/spec/fixtures/client/mobile_app.yml +0 -193
  57. data/spec/fixtures/client/pim.yml +0 -115
  58. data/spec/fixtures/detector/bots.yml +0 -3260
  59. data/spec/fixtures/detector/camera.yml +0 -121
  60. data/spec/fixtures/detector/car_browser.yml +0 -21
  61. data/spec/fixtures/detector/console.yml +0 -281
  62. data/spec/fixtures/detector/desktop.yml +0 -5361
  63. data/spec/fixtures/detector/feature_phone.yml +0 -891
  64. data/spec/fixtures/detector/feed_reader.yml +0 -551
  65. data/spec/fixtures/detector/mediaplayer.yml +0 -210
  66. data/spec/fixtures/detector/mobile_apps.yml +0 -456
  67. data/spec/fixtures/detector/phablet.yml +0 -3785
  68. data/spec/fixtures/detector/portable_media_player.yml +0 -178
  69. data/spec/fixtures/detector/smart_display.yml +0 -61
  70. data/spec/fixtures/detector/smartphone-1.yml +0 -9953
  71. data/spec/fixtures/detector/smartphone-10.yml +0 -9924
  72. data/spec/fixtures/detector/smartphone-11.yml +0 -9889
  73. data/spec/fixtures/detector/smartphone-12.yml +0 -8655
  74. data/spec/fixtures/detector/smartphone-2.yml +0 -9967
  75. data/spec/fixtures/detector/smartphone-3.yml +0 -9887
  76. data/spec/fixtures/detector/smartphone-4.yml +0 -9911
  77. data/spec/fixtures/detector/smartphone-5.yml +0 -9933
  78. data/spec/fixtures/detector/smartphone-6.yml +0 -9923
  79. data/spec/fixtures/detector/smartphone-7.yml +0 -9892
  80. data/spec/fixtures/detector/smartphone-8.yml +0 -9896
  81. data/spec/fixtures/detector/smartphone-9.yml +0 -9928
  82. data/spec/fixtures/detector/smartphone.yml +0 -9984
  83. data/spec/fixtures/detector/tablet-1.yml +0 -10023
  84. data/spec/fixtures/detector/tablet-2.yml +0 -9968
  85. data/spec/fixtures/detector/tablet-3.yml +0 -7787
  86. data/spec/fixtures/detector/tablet.yml +0 -9951
  87. data/spec/fixtures/detector/tv.yml +0 -3333
  88. data/spec/fixtures/detector/unknown.yml +0 -3283
  89. data/spec/fixtures/device/camera.yml +0 -19
  90. data/spec/fixtures/device/car_browser.yml +0 -7
  91. data/spec/fixtures/device/console.yml +0 -79
  92. data/spec/fixtures/parser/oss.yml +0 -1047
  93. data/spec/fixtures/parser/vendorfragments.yml +0 -162
  94. data/spec/spec_helper.rb +0 -9
data/regexes/bots.yml CHANGED
@@ -1,14 +1,14 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
- - regex: '360Spider(-Image|-Video)?'
8
+ - regex: '360Spider'
9
9
  name: '360Spider'
10
10
  category: 'Search bot'
11
- url: 'http://www.so.com/help/help_3_2.html'
11
+ url: 'https://www.so.com/help/help_3_2.html'
12
12
  producer:
13
13
  name: 'Online Media Group, Inc.'
14
14
  url: ''
@@ -40,26 +40,34 @@
40
40
  - regex: 'AhrefsBot'
41
41
  name: 'aHrefs Bot'
42
42
  category: 'Crawler'
43
- url: 'http://ahrefs.com/robot'
43
+ url: 'https://ahrefs.com/robot'
44
44
  producer:
45
45
  name: 'Ahrefs Pte Ltd'
46
- url: 'http://ahrefs.com/robot'
46
+ url: 'https://ahrefs.com/robot'
47
47
 
48
48
  - regex: 'ia_archiver|alexabot|verifybot'
49
49
  name: 'Alexa Crawler'
50
50
  category: 'Search bot'
51
- url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
51
+ url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
52
52
  producer:
53
53
  name: 'Alexa Internet'
54
- url: 'http://www.alexa.com'
54
+ url: 'https://www.alexa.com'
55
55
 
56
56
  - regex: 'alexa site audit'
57
57
  name: 'Alexa Site Audit'
58
58
  category: 'Site Monitor'
59
- url: 'http://www.alexa.com/help/webmasters'
59
+ url: 'https://support.alexa.com/hc/en-us/articles/200450194'
60
60
  producer:
61
61
  name: 'Alexa Internet'
62
- url: 'http://www.alexa.com'
62
+ url: 'https://www.alexa.com'
63
+
64
+ - regex: 'Amazonbot'
65
+ name: 'Amazon Bot'
66
+ category: 'Crawler'
67
+ url: 'https://developer.amazon.com/support/amazonbot'
68
+ producer:
69
+ name: 'Amazon.com, Inc.'
70
+ url: 'https://www.amazon.com/'
63
71
 
64
72
  - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
65
73
  name: 'Amazon Route53 Health Check'
@@ -82,29 +90,45 @@
82
90
  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
83
91
  producer:
84
92
  name: 'The Apache Software Foundation'
85
- url: 'http://www.apache.org/foundation/'
93
+ url: 'https://www.apache.org/foundation/'
86
94
 
87
95
  - regex: 'Applebot'
88
96
  name: 'Applebot'
89
97
  category: 'Crawler'
90
- url: 'http://www.apple.com/go/applebot'
98
+ url: 'https://support.apple.com/en-us/HT204683'
91
99
  producer:
92
100
  name: 'Apple Inc'
93
- url: 'http://www.apple.com'
101
+ url: 'https://www.apple.com'
102
+
103
+ - regex: "AppSignalBot"
104
+ name: "AppSignalBot"
105
+ category: "Site Monitor"
106
+ url: "https://docs.appsignal.com/uptime-monitoring/"
107
+ producer:
108
+ name: "AppSignal"
109
+ url: "https://appsignal.com/"
94
110
 
95
111
  - regex: 'Arachni'
96
112
  name: 'Arachni'
97
113
  category: 'Security Checker'
98
- url: 'http://www.arachni-scanner.com'
114
+ url: 'https://www.arachni-scanner.com/'
99
115
  producer:
100
116
  name: 'Sarosys LLC'
101
- url: 'http://www.sarosys.com/'
117
+ url: 'https://www.sarosys.com/'
118
+
119
+ - regex: 'AspiegelBot'
120
+ name: 'AspiegelBot'
121
+ category: 'Crawler'
122
+ url: 'https://aspiegel.com/'
123
+ producer:
124
+ name: 'Huawei'
125
+ url: 'https://www.huawei.com/'
102
126
 
103
127
  - regex: 'Castro 2, Episode Duration Lookup'
104
128
  name: 'Castro 2'
105
129
  category: 'Service Agent'
106
130
  url: 'http://supertop.co/castro/'
107
- producer:
131
+ producer:
108
132
  name: 'Supertop'
109
133
  url: 'http://supertop.co'
110
134
 
@@ -119,10 +143,10 @@
119
143
  - regex: 'archive\.org_bot|special_archiver'
120
144
  name: 'archive.org bot'
121
145
  category: 'Crawler'
122
- url: 'http://www.archive.org/details/archive.org_bot'
146
+ url: 'https://archive.org/details/archive.org_bot'
123
147
  producer:
124
148
  name: 'The Internet Archive'
125
- url: 'http://www.archive.org'
149
+ url: 'https://archive.org'
126
150
 
127
151
  - regex: 'Ask Jeeves/Teoma'
128
152
  name: 'Ask Jeeves'
@@ -148,7 +172,7 @@
148
172
  name: '2.0Promotion GbR'
149
173
  url: 'http://www.backlinktest.com'
150
174
 
151
- - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
175
+ - regex: 'Baidu.*spider|baidu Transcoder'
152
176
  name: 'Baidu Spider'
153
177
  category: 'Search bot'
154
178
  url: 'http://www.baidu.com/search/spider.htm'
@@ -164,6 +188,14 @@
164
188
  name: ''
165
189
  url: ''
166
190
 
191
+ - regex: 'Better Uptime Bot'
192
+ name: 'Better Uptime Bot'
193
+ category: 'Site Monitor'
194
+ url: 'https://betteruptime.com/faq'
195
+ producer:
196
+ name: 'Better Uptime'
197
+ url: 'https://betteruptime.com/'
198
+
167
199
  - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
168
200
  name: 'BingBot'
169
201
  category: 'Search bot'
@@ -180,7 +212,7 @@
180
212
  name: 'Blekko'
181
213
  url: 'http://blekko.com'
182
214
 
183
- - regex: 'BLEXBot(Test)?'
215
+ - regex: 'BLEXBot'
184
216
  name: 'BLEXBot Crawler'
185
217
  category: 'Crawler'
186
218
  url: 'http://webmeup-crawler.com'
@@ -209,7 +241,7 @@
209
241
  category: 'Crawler'
210
242
  producer:
211
243
  name: 'BoardReader'
212
- url: 'http://boardreader.com/'
244
+ url: 'https://boardreader.com/'
213
245
 
214
246
  - regex: 'BountiiBot'
215
247
  name: 'Bountii Bot'
@@ -275,6 +307,14 @@
275
307
  name: 'CloudFlare'
276
308
  url: 'http://www.cloudflare.com'
277
309
 
310
+ - regex: 'CloudflareDiagnostics'
311
+ name: 'Cloudflare Diagnostics'
312
+ category: 'Site Monitor'
313
+ url: 'https://www.cloudflare.com/'
314
+ producer:
315
+ name: 'Cloudflare'
316
+ url: 'https://www.cloudflare.com'
317
+
278
318
  - regex: 'CloudFlare-AlwaysOnline'
279
319
  name: 'CloudFlare Always Online'
280
320
  category: 'Site Monitor'
@@ -283,13 +323,13 @@
283
323
  name: 'CloudFlare'
284
324
  url: 'http://www.cloudflare.com'
285
325
 
286
- - regex: 'coccoc/'
326
+ - regex: 'coccoc.com'
287
327
  name: 'Cốc Cốc Bot'
288
- url: 'http://help.coccoc.com/'
328
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
289
329
  category: 'Search bot'
290
330
  producer:
291
331
  name: 'Cốc Cốc'
292
- url: 'http://coccoc.com/'
332
+ url: 'https://coccoc.com/'
293
333
 
294
334
  - regex: 'collectd'
295
335
  name: 'Collectd'
@@ -331,7 +371,6 @@
331
371
  name: 'Datanyze'
332
372
  url: 'https://www.datanyze.com'
333
373
 
334
-
335
374
  - regex: 'Dataprovider'
336
375
  name: 'Dataprovider'
337
376
  category: 'Crawler'
@@ -356,7 +395,7 @@
356
395
  name: 'DAZOO.FR'
357
396
  url: 'http://dazoo.fr'
358
397
 
359
- - regex: 'discobot(-news)?'
398
+ - regex: 'discobot'
360
399
  name: 'Discobot'
361
400
  category: 'Search bot'
362
401
  url: 'http://discoveryengine.com/discobot.html'
@@ -419,7 +458,7 @@
419
458
  name: 'eVenture Capital Partners II, LLC'
420
459
  url: 'http://www.eventures.vc/'
421
460
 
422
- - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
461
+ - regex: 'Exabot|ExaleadCloudview'
423
462
  name: 'ExaBot'
424
463
  category: 'Crawler'
425
464
  url: 'http://www.exabot.com/go/robot'
@@ -443,7 +482,7 @@
443
482
  name: 'SEOmoz, Inc.'
444
483
  url: 'http://moz.com/'
445
484
 
446
- - regex: 'facebookexternalhit|facebookplatform'
485
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
447
486
  name: 'Facebook External Hit'
448
487
  category: 'Social Media Agent'
449
488
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -475,7 +514,7 @@
475
514
  name: 'David Smith & Developing Perspective, LLC'
476
515
  url: 'https://david-smith.org'
477
516
 
478
- - regex: '(Meta)?Feedly(Bot|App)?'
517
+ - regex: 'Feedly'
479
518
  name: 'Feedly'
480
519
  url: 'http://www.feedly.com'
481
520
  category: 'Feed Fetcher'
@@ -541,6 +580,10 @@
541
580
  name: ''
542
581
  url: ''
543
582
 
583
+ - regex: 'gobuster'
584
+ name: 'Gobuster'
585
+ url: 'https://github.com/OJ/gobuster'
586
+
544
587
  - regex: 'ichiro/mobile goo'
545
588
  name: 'Goo'
546
589
  category: 'Search bot'
@@ -549,6 +592,10 @@
549
592
  name: 'NTT Resonant'
550
593
  url: 'http://goo.ne.jp'
551
594
 
595
+ - regex: 'Storebot-Google'
596
+ name: 'Google StoreBot'
597
+ category: 'Crawler'
598
+
552
599
  - regex: 'Google Favicon'
553
600
  name: 'Google Favicon'
554
601
  category: 'Crawler'
@@ -577,6 +624,14 @@
577
624
  name: 'Google Inc.'
578
625
  url: 'http://www.google.com'
579
626
 
627
+ - regex: 'Google-Cloud-Scheduler'
628
+ name: 'Google Cloud Scheduler'
629
+ category: 'Crawler'
630
+ url: 'https://cloud.google.com/scheduler'
631
+ producer:
632
+ name: 'Google Inc.'
633
+ url: 'https://www.google.com'
634
+
580
635
  - regex: 'Google-Structured-Data-Testing-Tool'
581
636
  name: 'Google Structured Data Testing Tool'
582
637
  category: 'Validator'
@@ -585,6 +640,14 @@
585
640
  name: 'Google Inc.'
586
641
  url: 'http://www.google.com'
587
642
 
643
+ - regex: 'GoogleStackdriverMonitoring'
644
+ name: 'Google Stackdriver Monitoring'
645
+ category: 'Site Monitor'
646
+ url: 'https://cloud.google.com/monitoring'
647
+ producer:
648
+ name: 'Google Inc.'
649
+ url: 'https://www.google.com'
650
+
588
651
  - regex: 'via ggpht\.com GoogleImageProxy'
589
652
  name: 'Gmail Image Proxy'
590
653
  category: 'Crawler'
@@ -592,7 +655,7 @@
592
655
  producer:
593
656
  name: 'Google Inc.'
594
657
  url: 'http://www.google.com'
595
-
658
+
596
659
  - regex: 'SeznamEmailProxy'
597
660
  name: 'Seznam Email Proxy'
598
661
  category: 'Crawler'
@@ -625,7 +688,7 @@
625
688
  name: 'Visual Meta'
626
689
  url: 'https://www.shopalike.cz/'
627
690
 
628
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Google-Adwords-Instant|APIs-Google|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin|Google-Shopping-Quality|Google-Adwords-DisplayAds|Google-Assess|Google-AdWords-Express|Google-speakr|Google-Read-Aloud'
691
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
629
692
  name: 'Googlebot'
630
693
  category: 'Search bot'
631
694
  url: 'http://www.google.com/bot.html'
@@ -639,7 +702,7 @@
639
702
  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
640
703
  producer:
641
704
  name: 'The Internet Archive'
642
- url: 'http://www.archive.org'
705
+ url: 'https://archive.org'
643
706
 
644
707
  - regex: 'HubSpot '
645
708
  name: 'HubSpot'
@@ -648,7 +711,6 @@
648
711
  name: 'HubSpot Inc.'
649
712
  url: 'https://www.hubspot.com'
650
713
 
651
-
652
714
  - regex: 'HTTPMon'
653
715
  name: 'HTTPMon'
654
716
  category: 'Site Monitor'
@@ -680,7 +742,7 @@
680
742
 
681
743
  - regex: 'ips-agent'
682
744
  name: 'IPS Agent'
683
- category: 'crawler'
745
+ category: 'Crawler'
684
746
  producer:
685
747
  name: 'VeriSign, Inc'
686
748
  url: 'http://www.verisign.com/'
@@ -693,6 +755,10 @@
693
755
  name: ''
694
756
  url: 'https://ip-guide.com'
695
757
 
758
+ - regex: 'k6/[0-9\.]+'
759
+ name: 'K6'
760
+ url: 'https://k6.io/'
761
+
696
762
  - regex: 'kouio'
697
763
  name: 'Kouio'
698
764
  url: 'http://kouio.com/'
@@ -717,7 +783,7 @@
717
783
  name: 'Lighthouse'
718
784
  url: 'https://developers.google.com/web/tools/lighthouse'
719
785
 
720
- - regex: 'linkdexbot(-mobile)?|linkdex\.com'
786
+ - regex: 'linkdexbot|linkdex\.com'
721
787
  name: 'Linkdex Bot'
722
788
  category: 'Search bot'
723
789
  url: 'http://www.linkdex.com/bots'
@@ -740,7 +806,7 @@
740
806
  name: ''
741
807
  url: ''
742
808
 
743
- - regex: 'Mail\.RU(_Bot)?'
809
+ - regex: 'Mail\.RU'
744
810
  name: 'Mail.Ru Bot'
745
811
  category: 'Search bot'
746
812
  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
@@ -764,7 +830,7 @@
764
830
  name: ''
765
831
  url: ''
766
832
 
767
- - regex : 'masscan'
833
+ - regex: 'masscan'
768
834
  name: 'masscan'
769
835
  url: 'https://github.com/robertdavidgraham/masscan'
770
836
  category: 'Crawler'
@@ -917,12 +983,12 @@
917
983
  category: 'Crawler'
918
984
  producer:
919
985
  name: 'Nuzzel'
920
- url: https://www.nuzzel.com/
986
+ url: 'https://www.nuzzel.com/'
921
987
 
922
988
  - regex: 'Octopus [0-9]'
923
989
  name: 'Octopus'
924
990
 
925
- - regex: 'omgili(?:bot)?'
991
+ - regex: 'omgili'
926
992
  name: 'Omgili bot'
927
993
  category: 'Search bot'
928
994
  url: 'http://www.omgili.com/Crawler.html'
@@ -999,7 +1065,15 @@
999
1065
  name: 'Bitlove'
1000
1066
  url: 'http://bitlove.org/'
1001
1067
 
1002
- - regex: 'psbot(-page)?'
1068
+ - regex: 'PRTG Network Monitor'
1069
+ name: 'PRTG Network Monitor'
1070
+ category: 'Network Monitor'
1071
+ url: 'https://www.paessler.com/prtg'
1072
+ producer:
1073
+ name: 'Paessler AG'
1074
+ url: 'https://www.paessler.com'
1075
+
1076
+ - regex: 'psbot'
1003
1077
  name: 'Picsearch bot'
1004
1078
  category: 'Search bot'
1005
1079
  url: 'http://www.picsearch.com/bot.html'
@@ -1007,7 +1081,7 @@
1007
1081
  name: 'Picsearch'
1008
1082
  url: 'http://www.picsearch.com'
1009
1083
 
1010
- - regex: 'Pingdom\.com'
1084
+ - regex: 'Pingdom(?:\.com|TMS)'
1011
1085
  name: 'Pingdom Bot'
1012
1086
  category: 'Site Monitor'
1013
1087
  url: ''
@@ -1023,6 +1097,14 @@
1023
1097
  name: 'Quora'
1024
1098
  url: 'http://www.quora.com'
1025
1099
 
1100
+ - regex: 'Quora-Bot'
1101
+ name: 'Quora Bot'
1102
+ category: 'Crawler'
1103
+ url: ''
1104
+ producer:
1105
+ name: 'Quora'
1106
+ url: 'https://www.quora.com/'
1107
+
1026
1108
  - regex: 'RamblerMail'
1027
1109
  name: 'RamblerMail Image Proxy'
1028
1110
  category: 'Crawler'
@@ -1206,7 +1288,7 @@
1206
1288
  name: 'SISTRIX GmbH'
1207
1289
  url: 'http://www.sistrix.de'
1208
1290
 
1209
- - regex: 'SISTRIX Optimizer'
1291
+ - regex: 'compatible; (?:SISTRIX )?Optimizer'
1210
1292
  name: 'SISTRIX Optimizer'
1211
1293
  category: 'Crawler'
1212
1294
  url: 'https://optimizer.sistrix.com'
@@ -1251,6 +1333,14 @@
1251
1333
  name: 'Tencent Holdings'
1252
1334
  url: 'http://www.soso.com'
1253
1335
 
1336
+ - regex: 'Sprinklr'
1337
+ name: 'Sprinklr'
1338
+ category: 'Crawler'
1339
+ url: ''
1340
+ producer:
1341
+ name: 'Sprinklr, Inc.'
1342
+ url: 'https://www.sprinklr.com/'
1343
+
1254
1344
  - regex: 'sqlmap/'
1255
1345
  name: 'sqlmap'
1256
1346
  category: 'Security Checker'
@@ -1296,13 +1386,20 @@
1296
1386
  name: 'Tailrank Inc'
1297
1387
  url: 'http://spinn3r.com'
1298
1388
 
1299
- - regex: 'Sputnik(Image)?Bot'
1389
+ - regex: 'SputnikBot'
1300
1390
  name: 'Sputnik Bot'
1301
- category: ''
1391
+ category: 'Crawler'
1392
+ url: ''
1393
+
1394
+ - regex: 'SputnikFaviconBot'
1395
+ name: 'Sputnik Favicon Bot'
1396
+ category: 'Crawler'
1397
+ url: ''
1398
+
1399
+ - regex: 'SputnikImageBot'
1400
+ name: 'Sputnik Image Bot'
1401
+ category: 'Crawler'
1302
1402
  url: ''
1303
- producer:
1304
- name: ''
1305
- url: ''
1306
1403
 
1307
1404
  - regex: 'SurveyBot'
1308
1405
  name: 'Survey Bot'
@@ -1521,7 +1618,7 @@
1521
1618
  category: 'Site Monitor'
1522
1619
  url: 'https://www.webpagetest.org'
1523
1620
 
1524
- - regex: 'WeSEE(:Search)?'
1621
+ - regex: 'WeSEE'
1525
1622
  name: 'WeSEE:Search'
1526
1623
  category: 'Search bot'
1527
1624
  url: 'http://www.wesee.com/bot'
@@ -1561,6 +1658,14 @@
1561
1658
  name: 'Wotbox'
1562
1659
  url: 'http://www.wotbox.com'
1563
1660
 
1661
+ - regex: 'XenForo'
1662
+ name: 'XenForo'
1663
+ category: 'Service Agent'
1664
+ url: 'https://xenforo.com/'
1665
+ producer:
1666
+ name: 'XenForo Ltd.'
1667
+ url: 'https://xenforo.com/'
1668
+
1564
1669
  - regex: 'yacybot'
1565
1670
  name: 'YaCy'
1566
1671
  category: 'Search bot'
@@ -1585,6 +1690,14 @@
1585
1690
  name: 'Yahoo! Inc.'
1586
1691
  url: 'http://www.yahoo.com'
1587
1692
 
1693
+ - regex: 'YahooMailProxy'
1694
+ name: 'Yahoo! Mail Proxy'
1695
+ category: 'Service Agent'
1696
+ url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
1697
+ producer:
1698
+ name: 'Yahoo! Inc.'
1699
+ url: 'http://www.yahoo.com'
1700
+
1588
1701
  - regex: 'YahooCacheSystem'
1589
1702
  name: 'Yahoo! Cache System'
1590
1703
  category: 'Crawler'
@@ -1593,7 +1706,15 @@
1593
1706
  name: 'Yahoo! Inc.'
1594
1707
  url: 'http://www.yahoo.com'
1595
1708
 
1596
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1709
+ - regex: 'Y!J-BRW'
1710
+ name: 'Yahoo! Japan BRW'
1711
+ category: 'Crawler'
1712
+ url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1713
+ producer:
1714
+ name: 'Yahoo! Japan Corp.'
1715
+ url: 'https://www.yahoo.co.jp/'
1716
+
1717
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1597
1718
  name: 'Yandex Bot'
1598
1719
  category: 'Search bot'
1599
1720
  url: 'http://www.yandex.com/bots'
@@ -1601,7 +1722,7 @@
1601
1722
  name: 'Yandex LLC'
1602
1723
  url: 'http://company.yandex.com'
1603
1724
 
1604
- - regex: 'Yeti'
1725
+ - regex: 'Yeti|NaverJapan|AdsBot-Naver'
1605
1726
  name: 'Yeti/Naverbot'
1606
1727
  category: 'Search bot'
1607
1728
  url: 'http://help.naver.com/robots/'
@@ -1678,18 +1799,18 @@
1678
1799
  - regex: 'HubPages.*crawlingpolicy'
1679
1800
  name: 'HubPages'
1680
1801
  category: 'Crawler'
1681
- url: 'http://hubpages.com/help/crawlingpolicy'
1802
+ url: 'https://hubpages.com/help/crawlingpolicy'
1682
1803
  producer:
1683
- name: 'HubPages'
1684
- url: 'http://hubpages.com/'
1804
+ name: 'HubPages, Inc.'
1805
+ url: 'https://discover.hubpages.com/'
1685
1806
 
1686
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
1807
+ - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1687
1808
  name: 'Pinterest'
1688
- url: ''
1809
+ url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1689
1810
  category: 'Crawler'
1690
1811
  producer:
1691
1812
  name: 'Pinterest'
1692
- url: 'http://www.pinterest.com/'
1813
+ url: 'https://www.pinterest.com/'
1693
1814
 
1694
1815
  - regex: 'Site24x7'
1695
1816
  name: 'Site24x7 Website Monitoring'
@@ -1731,13 +1852,13 @@
1731
1852
  name: 'Monitor.Us'
1732
1853
  url: 'http://www.monitor.us'
1733
1854
 
1734
- - regex: 'Catchpoint( bot)?'
1855
+ - regex: 'Catchpoint'
1735
1856
  name: 'Catchpoint'
1736
1857
  category: 'Site Monitor'
1737
- url: ''
1858
+ url: 'https://www.catchpoint.com/'
1738
1859
  producer:
1739
1860
  name: 'Catchpoint Systems'
1740
- url: 'http://www.catchpoint.com/'
1861
+ url: 'https://www.catchpoint.com/'
1741
1862
 
1742
1863
  - regex: 'bitlybot'
1743
1864
  name: 'BitlyBot'
@@ -1805,7 +1926,7 @@
1805
1926
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1806
1927
  name: 'RSSRadio Bot'
1807
1928
 
1808
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1929
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
1809
1930
  name: 'Generic Bot'
1810
1931
 
1811
1932
  - regex: '^sentry'
@@ -1824,7 +1945,181 @@
1824
1945
  name: 'The Knowledge AI'
1825
1946
  category: 'Crawler'
1826
1947
 
1827
- # Generic detections
1948
+ - regex: 'Embedly'
1949
+ name: 'Embedly'
1950
+ category: 'Crawler'
1951
+ url: 'https://support.embed.ly/hc/en-us'
1952
+ producer:
1953
+ name: 'A Medium, Corp.'
1954
+ url: 'https://medium.com/'
1955
+
1956
+ - regex: 'BrandVerity'
1957
+ name: 'BrandVerity'
1958
+ category: 'Crawler'
1959
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1960
+ producer:
1961
+ name: 'BrandVerity, Inc.'
1962
+ url: 'https://www.brandverity.com/'
1963
+
1964
+ - regex: 'Kaspersky Lab CFR link resolver'
1965
+ name: 'Kaspersky'
1966
+ category: 'Security Checker'
1967
+ url: 'https://www.kaspersky.com/'
1968
+ producer:
1969
+ name: 'AO Kaspersky Lab'
1970
+ url: 'https://www.kaspersky.com/'
1971
+
1972
+ - regex: 'eZ Publish Link Validator'
1973
+ name: 'eZ Publish Link Validator'
1974
+ category: 'Crawler'
1975
+ url: 'https://ez.no/'
1976
+ producer:
1977
+ name: 'eZ Systems AS'
1978
+ url: 'https://ez.no/'
1979
+
1980
+ - regex: 'woorankreview'
1981
+ name: 'WooRank'
1982
+ category: 'Search bot'
1983
+ url: 'https://www.woorank.com/'
1984
+ producer:
1985
+ name: 'WooRank sprl'
1986
+ url: 'https://www.woorank.com/'
1987
+
1988
+ - regex: '(Match|LinkCheck) by Siteimprove.com'
1989
+ name: 'Siteimprove'
1990
+ category: 'Search bot'
1991
+ url: 'https://siteimprove.com/'
1992
+ producer:
1993
+ name: 'Siteimprove GmbH'
1994
+ url: 'https://siteimprove.com/'
1995
+
1996
+ - regex: 'CATExplorador'
1997
+ name: 'CATExplorador'
1998
+ category: 'Search bot'
1999
+ url: 'https://fundacio.cat/ca/domini/'
2000
+ producer:
2001
+ name: 'Fundació puntCAT'
2002
+ url: 'https://fundacio.cat/ca/domini/'
2003
+
2004
+ - regex: 'Buck'
2005
+ name: 'Buck'
2006
+ category: 'Search bot'
2007
+ url: 'https://hypefactors.com/'
2008
+ producer:
2009
+ name: 'Hypefactors A/S'
2010
+ url: 'https://hypefactors.com/'
2011
+
2012
+ - regex: 'tracemyfile'
2013
+ name: 'TraceMyFile'
2014
+ category: 'Search bot'
2015
+ url: 'https://www.tracemyfile.com/'
2016
+ producer:
2017
+ name: 'Idee Inc.'
2018
+ url: 'http://ideeinc.com/'
2019
+
2020
+ - regex: 'zelist.ro feed parser'
2021
+ name: 'Ze List'
2022
+ url: 'https://www.zelist.ro/'
2023
+ category: 'Feed Fetcher'
2024
+ producer:
2025
+ name: 'Treeworks SRL'
2026
+ url: 'https://www.tree.ro/'
2027
+
2028
+ - regex: 'weborama-fetcher'
2029
+ name: 'Weborama'
2030
+ category: 'Search bot'
2031
+ url: 'https://weborama.com/'
2032
+ producer:
2033
+ name: 'Weborama SA'
2034
+ url: 'https://weborama.com/'
2035
+
2036
+ - regex: 'BoardReader Favicon Fetcher'
2037
+ name: 'BoardReader'
2038
+ category: 'Search bot'
2039
+ url: 'https://boardreader.com/'
2040
+ producer:
2041
+ name: 'Effyis Inc'
2042
+ url: 'https://boardreader.com/'
2043
+
2044
+ - regex: 'IDG/IT'
2045
+ name: 'IDG/IT'
2046
+ category: 'Search bot'
2047
+ url: 'https://spaziodati.eu/'
2048
+ producer:
2049
+ name: 'SpazioDati S.r.l.'
2050
+ url: 'https://spaziodati.eu/'
2051
+
2052
+ - regex: 'Bytespider'
2053
+ name: 'Bytespider'
2054
+ category: 'Search bot'
2055
+ url: 'https://bytedance.com/'
2056
+ producer:
2057
+ name: 'ByteDance Ltd.'
2058
+ url: 'https://bytedance.com/'
2059
+
2060
+ - regex: 'WikiDo'
2061
+ name: 'WikiDo'
2062
+ category: 'Search bot'
2063
+ url: 'https://www.wikido.com/'
2064
+ producer:
2065
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
2066
+ url: 'https://www.wikido.com/'
2067
+
2068
+ - regex: 'AwarioSmartBot'
2069
+ name: 'Awario'
2070
+ category: 'Search bot'
2071
+ url: 'https://awario.com/bots.html'
2072
+ producer:
2073
+ name: 'Awario'
2074
+ url: 'https://awario.com/'
2075
+
2076
+ - regex: 'AwarioRssBot'
2077
+ name: 'Awario'
2078
+ category: 'Feed Fetcher'
2079
+ url: 'https://awario.com/bots.html'
2080
+ producer:
2081
+ name: 'Awario'
2082
+ url: 'https://awario.com/'
2083
+
2084
+ - regex: 'oBot'
2085
+ name: 'oBot'
2086
+ category: 'Search bot'
2087
+ url: 'https://www.xforce-security.com/crawler/'
2088
+ producer:
2089
+ name: 'IBM Germany Research & Development GmbH'
2090
+ url: 'https://exchange.xforce.ibmcloud.com/'
2091
+
2092
+ - regex: 'SMTBot'
2093
+ name: 'SMTBot'
2094
+ category: 'Search bot'
2095
+ url: 'https://www.similartech.com/smtbot'
2096
+ producer:
2097
+ name: 'SimilarTech Ltd.'
2098
+ url: 'https://www.similartech.com/'
2099
+
2100
+ - regex: 'LCC'
2101
+ name: 'LCC'
2102
+ category: 'Search bot'
2103
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2104
+ producer:
2105
+ name: 'Universität Leipzig'
2106
+ url: 'https://www.uni-leipzig.de/'
2107
+
2108
+ - regex: 'Startpagina-Linkchecker'
2109
+ name: 'Startpagina Linkchecker'
2110
+ category: 'Search bot'
2111
+ url: 'https://www.startpagina.nl/linkchecker'
2112
+ producer:
2113
+ name: 'Startpagina B.V.'
2114
+ url: 'https://www.startpagina.nl/'
2115
+
2116
+ - regex: 'GTmetrix'
2117
+ name: 'GTmetrix'
2118
+ category: 'Crawler'
2119
+ url: 'https://gtmetrix.com/'
2120
+ producer:
2121
+ name: 'Carbon60 Operating Co. Ltd.'
2122
+ url: 'https://www.carbon60.com/'
1828
2123
 
1829
2124
  - regex: 'Nutch'
1830
2125
  name: 'Nutch-based Bot'
@@ -1832,7 +2127,518 @@
1832
2127
  url: 'https://nutch.apache.org'
1833
2128
  producer:
1834
2129
  name: 'The Apache Software Foundation'
1835
- url: 'http://www.apache.org/foundation/'
2130
+ url: 'https://www.apache.org/foundation/'
2131
+
2132
+ - regex: 'Seobility'
2133
+ name: 'Seobility'
2134
+ category: 'Crawler'
2135
+ url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2136
+
2137
+ - regex: 'Vercelbot'
2138
+ name: 'Vercel Bot'
2139
+ category: 'Service bot'
2140
+ url: 'https://vercel.com'
2141
+
2142
+ - regex: 'Grammarly'
2143
+ name: 'Grammarly'
2144
+ category: 'Service bot'
2145
+ url: 'https://www.grammarly.com'
2146
+
2147
+ - regex: 'Robozilla'
2148
+ name: 'Robozilla'
2149
+ category: 'Crawler'
2150
+
2151
+ - regex: 'Domains Project'
2152
+ name: 'Domains Project'
2153
+ category: 'Crawler'
2154
+ url: 'https://domainsproject.org'
2155
+
2156
+ - regex: 'PetalBot'
2157
+ name: 'Petal Bot'
2158
+ category: 'Crawler'
2159
+ url: 'https://aspiegel.com/petalbot'
2160
+
2161
+ - regex: 'SerendeputyBot'
2162
+ name: 'Serendeputy Bot'
2163
+ category: 'Crawler'
2164
+ url: 'https://serendeputy.com/about/serendeputy-bot'
2165
+
2166
+ - regex: 'ias-va.*admantx.*service-fetcher'
2167
+ name: 'ADmantX Service Fetcher'
2168
+ category: 'Service bot'
2169
+ url: 'https://www.admantx.com/service-fetcher.html'
2170
+
2171
+ - regex: 'SemanticScholarBot'
2172
+ name: 'Semantic Scholar Bot'
2173
+ category: 'Crawler'
2174
+ url: 'https://www.semanticscholar.org/crawler'
1836
2175
 
1837
- - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2176
+ - regex: 'VelenPublicWebCrawler'
2177
+ name: 'Velen Public Web Crawler'
2178
+ category: 'Crawler'
2179
+ url: 'https://hunter.io/robot'
2180
+
2181
+ - regex: 'Barkrowler'
2182
+ name: 'Barkrowler'
2183
+ category: 'Crawler'
2184
+ url: 'http://www.exensa.com/crawl'
2185
+
2186
+ - regex: 'BDCbot'
2187
+ name: 'BDCbot'
2188
+ category: 'Crawler'
2189
+ url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2190
+ producer:
2191
+ name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2192
+ url: 'https://bigdatacorp.com.br/'
2193
+
2194
+ - regex: 'adbeat'
2195
+ name: 'Adbeat'
2196
+ category: 'Crawler'
2197
+ url: 'https://www.adbeat.com/operation_policy'
2198
+ producer:
2199
+ name: 'PPC Labs LLC'
2200
+ url: 'https://www.adbeat.com/'
2201
+
2202
+ - regex: 'BW/(?:(\d+[\.\d]+))'
2203
+ name: 'BuiltWith'
2204
+ category: 'Crawler'
2205
+ url: 'https://builtwith.com/biup'
2206
+ producer:
2207
+ name: 'BuiltWith Pty Ltd'
2208
+ url: 'https://builtwith.com/'
2209
+
2210
+ - regex: 'https://whatis.contentkingapp.com'
2211
+ name: 'ContentKing'
2212
+ category: 'Site Monitor'
2213
+ url: 'https://whatis.contentkingapp.com/'
2214
+ producer:
2215
+ name: 'ContentKing BV'
2216
+ url: 'https://www.contentkingapp.com/'
2217
+
2218
+ - regex: 'MicroAdBot'
2219
+ name: 'MicroAdBot'
2220
+ category: 'Crawler'
2221
+ url: 'https://www.microad.co.jp/'
2222
+ producer:
2223
+ name: 'MicroAd, Inc.'
2224
+ url: 'https://www.microad.co.jp/'
2225
+
2226
+ - regex: 'PingAdmin.Ru'
2227
+ name: 'PingAdmin.Ru'
2228
+ category: 'Site Monitor'
2229
+ url: 'https://ping-admin.ru/'
2230
+
2231
+ - regex: 'notifyninja.+monitoring'
2232
+ name: 'Notify Ninja'
2233
+ category: 'Site Monitor'
2234
+ url: 'http://notifyninja.com'
2235
+
2236
+ - regex: 'WebDataStats'
2237
+ name: 'WebDataStats'
2238
+ category: 'Crawler'
2239
+ url: 'https://webdatastats.com/policy.html'
2240
+ producer:
2241
+ name: 'WebTehRazrabotka LLC'
2242
+ url: 'https://webdatastats.com/'
2243
+
2244
+ - regex: 'parse.ly scraper'
2245
+ name: 'parse.ly'
2246
+ category: 'Crawler'
2247
+ url: 'https://www.parse.ly/help/integration/crawler'
2248
+ producer:
2249
+ name: 'Parsely, Inc.'
2250
+ url: 'https://www.parse.ly/'
2251
+
2252
+ - regex: 'Nimbostratus-Bot'
2253
+ name: 'Nimbostratus Bot'
2254
+ category: 'Site Monitor'
2255
+ url: 'http://cloudsystemnetworks.com'
2256
+
2257
+ - regex: 'HeartRails_Capture/\d'
2258
+ name: 'Heart Rails Capture'
2259
+ category: 'Service Agent'
2260
+ url: 'http://capture.heartrails.com'
2261
+
2262
+ - regex: 'Project-Resonance'
2263
+ name: 'Project Resonance'
2264
+ category: 'Crawler'
2265
+ url: 'http://project-resonance.com'
2266
+
2267
+ - regex: 'DataXu/\d'
2268
+ name: 'DataXu'
2269
+ category: 'Service Agent'
2270
+ url: 'https://advertising.roku.com/dataxu'
2271
+ producer:
2272
+ name: 'Roku, Inc.'
2273
+ url: 'https://roku.com'
2274
+
2275
+ - regex: 'Cocolyzebot'
2276
+ name: 'Cocolyzebot'
2277
+ category: 'Crawler'
2278
+ url: 'https://cocolyze.com/en/cocolyzebot'
2279
+ producer:
2280
+ name: 'VSI INNOVATION SAS'
2281
+ url: 'https://vsi-innovation.com/'
2282
+
2283
+ - regex: 'veryhip'
2284
+ name: 'VeryHip'
2285
+ category: 'Crawler'
2286
+ url: 'https://veryhip.com/'
2287
+ producer:
2288
+ name: 'VeryHip'
2289
+ url: 'https://veryhip.com/'
2290
+
2291
+ - regex: 'LinkpadBot'
2292
+ name: 'LinkpadBot'
2293
+ category: 'Crawler'
2294
+ url: 'https://www.linkpad.org/'
2295
+ producer:
2296
+ name: 'Solomono LLC'
2297
+ url: 'https://www.linkpad.org/'
2298
+
2299
+ - regex: 'MuscatFerret'
2300
+ name: 'MuscatFerret'
2301
+ category: 'Crawler'
2302
+ url: 'http://www.webtop.com/'
2303
+
2304
+ - regex: 'PageThing.com'
2305
+ name: 'PageThing'
2306
+ category: 'Crawler'
2307
+ url: 'https://www.pagething.com/'
2308
+ producer:
2309
+ name: 'SPECIALNOISE LTD'
2310
+ url: 'https://www.specialnoise.com/'
2311
+
2312
+ - regex: 'ArchiveBox'
2313
+ name: 'ArchiveBox'
2314
+ url: 'https://archivebox.io/'
2315
+ category: 'Crawler'
2316
+ producer:
2317
+ name: ''
2318
+ url: ''
2319
+
2320
+ - regex: 'Choosito'
2321
+ name: 'Choosito'
2322
+ url: 'https://www.choosito.com/'
2323
+ category: 'Crawler'
2324
+ producer:
2325
+ name: 'Choosito! Inc.'
2326
+ url: 'https://www.choosito.com/'
2327
+
2328
+ - regex: 'datagnionbot'
2329
+ name: 'datagnionbot'
2330
+ url: 'https://www.datagnion.com/bot.html'
2331
+ category: 'Crawler'
2332
+ producer:
2333
+ name: 'DATAGNION GMBH'
2334
+ url: 'https://www.datagnion.com/'
2335
+
2336
+ - regex: 'WhatCMS'
2337
+ name: 'WhatCMS'
2338
+ url: 'https://whatcms.org/'
2339
+ category: 'Crawler'
2340
+ producer:
2341
+ name: 'Nineteen Ten LLC'
2342
+ url: 'https://whatcms.org/'
2343
+
2344
+ - regex: 'httpx'
2345
+ name: 'httpx'
2346
+ url: 'https://github.com/projectdiscovery/httpx'
2347
+ category: 'Crawler'
2348
+ producer:
2349
+ name: ''
2350
+ url: ''
2351
+
2352
+ - regex: 'scaninfo@expanseinc.com'
2353
+ name: 'Expanse'
2354
+ category: 'Security Checker'
2355
+ url: 'https://expanse.co/'
2356
+ producer:
2357
+ name: 'Expanse Inc.'
2358
+ url: 'https://expanse.co/'
2359
+
2360
+ - regex: 'HuaweiWebCatBot'
2361
+ name: 'HuaweiWebCatBot'
2362
+ category: 'Crawler'
2363
+ url: 'https://isecurity.huawei.com'
2364
+ producer:
2365
+ name: 'Huawei Technologies Co., Ltd.'
2366
+ url: 'https://huawei.com'
2367
+
2368
+ - regex: 'Hatena-Favicon'
2369
+ name: 'Hatena Favicon'
2370
+ category: 'Crawler'
2371
+ url: 'https://www.hatena.ne.jp/faq/'
2372
+ producer:
2373
+ name: 'Hatena Co., Ltd.'
2374
+ url: 'https://www.hatena.ne.jp'
2375
+
2376
+ - regex: 'RyowlEngine/(\d+)'
2377
+ name: 'Ryowl'
2378
+ category: 'Crawler'
2379
+ url: 'https://ryowl.org'
2380
+
2381
+ - regex: 'OdklBot/(\d+)'
2382
+ name: 'Odnoklassniki Bot'
2383
+ category: 'Crawler'
2384
+ url: 'https://odnoklassniki.ru'
2385
+
2386
+ - regex: 'Mediatoolkitbot'
2387
+ name: 'Mediatoolkit Bot'
2388
+ category: 'Crawler'
2389
+ url: 'https://mediatoolkit.com'
2390
+
2391
+ - regex: 'ZoominfoBot'
2392
+ name: 'ZoominfoBot'
2393
+ category: 'Crawler'
2394
+ url: 'https://www.zoominfo.com'
2395
+
2396
+ - regex: 'WeViKaBot/([\d+\.])'
2397
+ name: 'WeViKaBot'
2398
+ category: 'Crawler'
2399
+ url: 'http://www.wevika.de'
2400
+
2401
+ - regex: 'SEOkicks'
2402
+ name: 'SEOkicks'
2403
+ category: 'Crawler'
2404
+ url: 'https://www.seokicks.de/robot.html'
2405
+
2406
+ - regex: 'Plukkie/([\d+\.])'
2407
+ name: 'Plukkie'
2408
+ category: 'Crawler'
2409
+ url: 'http://www.botje.com/plukkie.htm'
2410
+
2411
+ - regex: 'proximic;'
2412
+ name: 'Comscore'
2413
+ category: 'Crawler'
2414
+ url: 'https://www.comscore.com/Web-Crawler'
2415
+
2416
+ - regex: 'SurdotlyBot/([\d+\.])'
2417
+ name: 'SurdotlyBot'
2418
+ category: 'Crawler'
2419
+ url: 'http://sur.ly/bot.html'
2420
+
2421
+ - regex: 'Gowikibot/([\d+\.])'
2422
+ name: 'Gowikibot'
2423
+ category: 'Crawler'
2424
+ url: 'http:/www.gowikibot.com'
2425
+
2426
+ - regex: 'SabsimBot/([\d+\.])'
2427
+ name: 'SabsimBot'
2428
+ category: 'Crawler'
2429
+ url: 'https://sabsim.com'
2430
+
2431
+ - regex: 'LumtelBot/([\d+\.])'
2432
+ name: 'LumtelBot'
2433
+ category: 'Crawler'
2434
+ url: 'https://umtel.com'
2435
+
2436
+ - regex: 'PiplBot'
2437
+ name: 'PiplBot'
2438
+ category: 'Crawler'
2439
+ url: 'http://www.pipl.com/bot'
2440
+
2441
+ - regex: 'woobot/([\d+\.])'
2442
+ name: 'WooRank'
2443
+ category: 'Crawler'
2444
+ url: 'https://www.woorank.com/bot'
2445
+
2446
+ - regex: 'Cookiebot/([\d+\.])'
2447
+ name: 'Cookiebot'
2448
+ category: 'Crawler'
2449
+ url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2450
+ producer:
2451
+ name: 'Cybot A/S'
2452
+ url: 'https://www.cybot.com/'
2453
+
2454
+ - regex: 'NetSystemsResearch'
2455
+ name: 'NetSystemsResearch'
2456
+ category: 'Security Checker'
2457
+ url: 'https://www.netsystemsresearch.com/'
2458
+ producer:
2459
+ name: 'NET SYSTEMS RESEARCH LLC'
2460
+ url: 'https://www.netsystemsresearch.com/'
2461
+
2462
+ - regex: 'CensysInspect/([\d+\.])'
2463
+ name: 'CensysInspect'
2464
+ category: 'Security Checker'
2465
+ url: 'https://about.censys.io/'
2466
+ producer:
2467
+ name: 'Censys, Inc.'
2468
+ url: 'https://censys.io/'
2469
+
2470
+ - regex: 'gdnplus.com'
2471
+ name: 'GDNP'
2472
+ category: 'Crawler'
2473
+ url: 'https://gdnplus.com/'
2474
+ producer:
2475
+ name: 'Global Digital Network Plus, LLC'
2476
+ url: 'https://gdnplus.com/'
2477
+
2478
+ - regex: 'WellKnownBot/([\d+\.])'
2479
+ name: 'WellKnownBot'
2480
+ category: 'Crawler'
2481
+ url: 'https://well-known.dev'
2482
+
2483
+ - regex: 'Adsbot/([\d+\.])'
2484
+ name: 'Adsbot'
2485
+ category: 'Crawler'
2486
+ url: 'https://seostar.co/robot/'
2487
+
2488
+ - regex: 'MTRobot/([\d+\.])'
2489
+ name: 'MTRobot'
2490
+ category: 'Crawler'
2491
+ url: 'https://metrics-tools.de/robot.html'
2492
+ producer:
2493
+ name: 'Metrics Tools'
2494
+ url: 'https://metrics-tools.de/'
2495
+
2496
+ - regex: 'serpstatbot/([\d+\.])'
2497
+ name: 'serpstatbot'
2498
+ category: 'Crawler'
2499
+ url: 'http://serpstatbot.com/'
2500
+ producer:
2501
+ name: 'Netpeak Ltd'
2502
+ url: 'https://netpeak.net/'
2503
+
2504
+ - regex: 'colly'
2505
+ name: 'colly'
2506
+ category: 'Crawler'
2507
+ url: 'https://github.com/gocolly/colly/'
2508
+
2509
+ - regex: 'l9tcpid/v([\d+\.])'
2510
+ name: 'l9tcpid'
2511
+ category: 'Security Checker'
2512
+ url: 'https://github.com/LeakIX/l9tcpid'
2513
+
2514
+ - regex: 'MegaIndex.ru/([\d+\.])'
2515
+ name: 'MegaIndex'
2516
+ category: 'Crawler'
2517
+ url: 'https://megaindex.com/crawler'
2518
+
2519
+ - regex: 'Seekport'
2520
+ name: 'Seekport'
2521
+ category: 'Crawler'
2522
+ url: 'http://www.seekport.com/'
2523
+ producer:
2524
+ name: 'SISTRIX GmbH'
2525
+ url: 'https://www.sistrix.de/'
2526
+
2527
+ - regex: 'seolyt/([\d+\.])'
2528
+ name: 'seolyt'
2529
+ category: 'Crawler'
2530
+ url: 'https://seolyt.com/'
2531
+
2532
+ - regex: 'YaK/([\d+\.])'
2533
+ name: 'YaK'
2534
+ category: 'Crawler'
2535
+ url: 'https://www.linkfluence.com/'
2536
+ producer:
2537
+ name: 'Linkfluence SAS'
2538
+ url: 'https://www.linkfluence.com/'
2539
+
2540
+ - regex: 'KomodiaBot/([\d+\.])'
2541
+ name: 'KomodiaBot'
2542
+ category: 'Crawler'
2543
+ url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2544
+ producer:
2545
+ name: 'Komodia Inc.'
2546
+ url: 'https://www.komodia.com/'
2547
+
2548
+ - regex: 'Neevabot/([\d+\.])'
2549
+ name: 'Neevabot'
2550
+ category: 'Search bot'
2551
+ url: 'https://neeva.com/neevabot'
2552
+ producer:
2553
+ name: 'Neeva Inc.'
2554
+ url: 'https://neeva.com/'
2555
+
2556
+ - regex: 'LinkPreview/([\d+\.])'
2557
+ name: 'LinkPreview'
2558
+ category: 'Service Agent'
2559
+ url: 'https://www.linkpreview.net/'
2560
+
2561
+ - regex: 'JungleKeyThumbnail/([\d+\.])'
2562
+ name: 'JungleKeyThumbnail'
2563
+ category: 'Crawler'
2564
+ url: 'https://junglekey.com/'
2565
+
2566
+ - regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2567
+ name: 'RocketMonitorBot'
2568
+ category: 'Site Monitor'
2569
+ url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
2570
+ producer:
2571
+ name: 'Radio Mast, Inc.'
2572
+ url: 'https://www.radiomast.io/'
2573
+
2574
+ - regex: 'SitemapParser-VIPnytt/([\d+\.])'
2575
+ name: 'SitemapParser-VIPnytt'
2576
+ category: 'Crawler'
2577
+ url: 'https://github.com/VIPnytt/SitemapParser/'
2578
+
2579
+ - regex: '^Turnitin'
2580
+ name: 'Turnitin'
2581
+ category: 'Crawler'
2582
+ url: 'https://turnitin.com/robot/crawlerinfo.html'
2583
+
2584
+ - regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2585
+ name: 'Dotcom Monitor'
2586
+ category: 'Site Monitor'
2587
+ url: 'https://www.dotcom-monitor.com'
2588
+
2589
+ - regex: 'ThinkChaos/'
2590
+ name: 'ThinkChaos'
2591
+ category: 'Crawler'
2592
+
2593
+ - regex: 'DataForSeoBot'
2594
+ name: 'DataForSeoBot'
2595
+ category: 'Crawler'
2596
+ url: 'https://dataforseo.com/dataforseo-bot'
2597
+
2598
+ - regex: 'Discordbot/([\d+.]+)'
2599
+ name: 'Discord Bot'
2600
+ category: 'Service Agent'
2601
+ url: 'https://discordapp.com'
2602
+
2603
+ - regex: 'Linespider/([\d+.]+)'
2604
+ name: 'Linespider'
2605
+ category: 'Crawler'
2606
+ url: 'https://lin.ee/4dwXkTH'
2607
+
2608
+ - regex: 'Cincraw/([\d+.]+)'
2609
+ name: 'Cincraw'
2610
+ category: 'Crawler'
2611
+ url: 'http://cincrawdata.net/bot/'
2612
+
2613
+ - regex: 'CISPA Web Analyzer'
2614
+ name: 'CISPA Web Analyzer'
2615
+ category: 'Crawler'
2616
+ url: 'https://notify.cispa.de/'
2617
+ producer:
2618
+ name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
2619
+ url: 'https://cispa.de/en'
2620
+
2621
+ - regex: 'IonCrawl'
2622
+ name: 'IONOS Crawler'
2623
+ category: 'Crawler'
2624
+ url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
2625
+ producer:
2626
+ name: 'IONOS SE'
2627
+ url: 'https://www.ionos.de/'
2628
+
2629
+ - regex: 'Crawldad'
2630
+ name: 'Crawldad'
2631
+ category: 'Crawler'
2632
+ url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
2633
+
2634
+ - regex: 'https://securitytxt-scan.cs.hm.edu/'
2635
+ name: 'security.txt scanserver'
2636
+ category: 'Security Checker'
2637
+ url: 'https://securitytxt-scan.cs.hm.edu/'
2638
+ producer:
2639
+ name: 'Hochschule für angewandte Wissenschaften München'
2640
+ url: 'https://www.hm.edu/'
2641
+
2642
+ # Generic detections
2643
+ - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
1838
2644
  name: 'Generic Bot'