device_detector 1.0.2 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +49 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +5 -6
  5. data/CHANGELOG.md +11 -0
  6. data/README.md +8 -7
  7. data/Rakefile +7 -4
  8. data/device_detector.gemspec +1 -0
  9. data/lib/device_detector/bot.rb +2 -2
  10. data/lib/device_detector/browser.rb +364 -0
  11. data/lib/device_detector/client.rb +11 -2
  12. data/lib/device_detector/device.rb +1168 -22
  13. data/lib/device_detector/memory_cache.rb +5 -5
  14. data/lib/device_detector/metadata_extractor.rb +7 -8
  15. data/lib/device_detector/model_extractor.rb +3 -3
  16. data/lib/device_detector/name_extractor.rb +2 -2
  17. data/lib/device_detector/os.rb +148 -116
  18. data/lib/device_detector/parser.rb +22 -9
  19. data/lib/device_detector/version.rb +1 -1
  20. data/lib/device_detector/version_extractor.rb +10 -2
  21. data/lib/device_detector.rb +73 -40
  22. data/regexes/bots.yml +909 -59
  23. data/regexes/client/browser_engine.yml +14 -2
  24. data/regexes/client/browsers.yml +1356 -241
  25. data/regexes/client/feed_readers.yml +10 -10
  26. data/regexes/client/libraries.yml +75 -1
  27. data/regexes/client/mediaplayers.yml +39 -3
  28. data/regexes/client/mobile_apps.yml +524 -45
  29. data/regexes/client/pim.yml +55 -2
  30. data/regexes/device/cameras.yml +6 -6
  31. data/regexes/device/car_browsers.yml +23 -3
  32. data/regexes/device/consoles.yml +9 -3
  33. data/regexes/device/mobiles.yml +16903 -3199
  34. data/regexes/device/notebooks.yml +114 -0
  35. data/regexes/device/portable_media_player.yml +20 -3
  36. data/regexes/device/shell_tv.yml +117 -0
  37. data/regexes/device/televisions.yml +440 -35
  38. data/regexes/oss.yml +603 -165
  39. data/regexes/vendorfragments.yml +6 -2
  40. data/spec/device_detector/concrete_user_agent_spec.rb +38 -51
  41. data/spec/device_detector/detector_fixtures_spec.rb +36 -37
  42. data/spec/device_detector/device_spec.rb +52 -56
  43. data/spec/device_detector/memory_cache_spec.rb +19 -19
  44. data/spec/device_detector/model_extractor_spec.rb +3 -3
  45. data/spec/device_detector/version_extractor_spec.rb +5 -6
  46. data/spec/device_detector_spec.rb +26 -26
  47. data/spec/fixtures/client/browser.yml +2617 -555
  48. data/spec/fixtures/client/feed_reader.yml +34 -52
  49. data/spec/fixtures/client/library.yml +129 -38
  50. data/spec/fixtures/client/mediaplayer.yml +59 -28
  51. data/spec/fixtures/client/mobile_app.yml +589 -20
  52. data/spec/fixtures/client/pim.yml +92 -19
  53. data/spec/fixtures/detector/bots.yml +1749 -458
  54. data/spec/fixtures/detector/camera.yml +30 -24
  55. data/spec/fixtures/detector/car_browser.yml +199 -3
  56. data/spec/fixtures/detector/console.yml +107 -45
  57. data/spec/fixtures/detector/desktop.yml +3216 -758
  58. data/spec/fixtures/detector/feature_phone.yml +935 -181
  59. data/spec/fixtures/detector/feed_reader.yml +181 -193
  60. data/spec/fixtures/detector/mediaplayer.yml +157 -60
  61. data/spec/fixtures/detector/mobile_apps.yml +914 -136
  62. data/spec/fixtures/detector/peripheral.yml +271 -0
  63. data/spec/fixtures/detector/phablet.yml +5593 -1533
  64. data/spec/fixtures/detector/portable_media_player.yml +398 -32
  65. data/spec/fixtures/detector/smart_display.yml +183 -9
  66. data/spec/fixtures/detector/smart_speaker.yml +60 -0
  67. data/spec/fixtures/detector/smartphone-1.yml +3930 -4239
  68. data/spec/fixtures/detector/smartphone-10.yml +3985 -7366
  69. data/spec/fixtures/detector/smartphone-11.yml +5083 -3784
  70. data/spec/fixtures/detector/smartphone-12.yml +8989 -0
  71. data/spec/fixtures/detector/smartphone-13.yml +9412 -0
  72. data/spec/fixtures/detector/smartphone-14.yml +9477 -0
  73. data/spec/fixtures/detector/smartphone-15.yml +9281 -0
  74. data/spec/fixtures/detector/smartphone-16.yml +9678 -0
  75. data/spec/fixtures/detector/smartphone-17.yml +9965 -0
  76. data/spec/fixtures/detector/smartphone-18.yml +9806 -0
  77. data/spec/fixtures/detector/smartphone-19.yml +9965 -0
  78. data/spec/fixtures/detector/smartphone-2.yml +6670 -4375
  79. data/spec/fixtures/detector/smartphone-20.yml +9710 -0
  80. data/spec/fixtures/detector/smartphone-21.yml +8693 -0
  81. data/spec/fixtures/detector/smartphone-22.yml +10178 -0
  82. data/spec/fixtures/detector/smartphone-23.yml +9453 -0
  83. data/spec/fixtures/detector/smartphone-24.yml +9843 -0
  84. data/spec/fixtures/detector/smartphone-25.yml +9703 -0
  85. data/spec/fixtures/detector/smartphone-26.yml +10007 -0
  86. data/spec/fixtures/detector/smartphone-27.yml +4927 -0
  87. data/spec/fixtures/detector/smartphone-3.yml +4656 -4711
  88. data/spec/fixtures/detector/smartphone-4.yml +3518 -4479
  89. data/spec/fixtures/detector/smartphone-5.yml +3805 -4803
  90. data/spec/fixtures/detector/smartphone-6.yml +3745 -4693
  91. data/spec/fixtures/detector/smartphone-7.yml +3721 -4645
  92. data/spec/fixtures/detector/smartphone-8.yml +4564 -4699
  93. data/spec/fixtures/detector/smartphone-9.yml +3897 -4888
  94. data/spec/fixtures/detector/smartphone.yml +3154 -4141
  95. data/spec/fixtures/detector/tablet-1.yml +4742 -4576
  96. data/spec/fixtures/detector/tablet-2.yml +3803 -4731
  97. data/spec/fixtures/detector/tablet-3.yml +6210 -2309
  98. data/spec/fixtures/detector/tablet-4.yml +9062 -0
  99. data/spec/fixtures/detector/tablet-5.yml +9273 -0
  100. data/spec/fixtures/detector/tablet-6.yml +4588 -0
  101. data/spec/fixtures/detector/tablet.yml +3629 -4613
  102. data/spec/fixtures/detector/tv-1.yml +2501 -0
  103. data/spec/fixtures/detector/tv.yml +8856 -2064
  104. data/spec/fixtures/detector/unknown.yml +412 -587
  105. data/spec/fixtures/detector/wearable.yml +915 -0
  106. data/spec/fixtures/device/camera.yml +4 -3
  107. data/spec/fixtures/device/car_browser.yml +9 -2
  108. data/spec/fixtures/device/console.yml +15 -14
  109. data/spec/fixtures/device/notebook.yml +7 -0
  110. data/spec/fixtures/parser/oss.yml +1392 -21
  111. data/spec/fixtures/parser/vendorfragments.yml +57 -51
  112. metadata +70 -4
data/regexes/bots.yml CHANGED
@@ -1,14 +1,14 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
8
- - regex: '360Spider(-Image|-Video)?'
8
+ - regex: '360Spider'
9
9
  name: '360Spider'
10
10
  category: 'Search bot'
11
- url: 'http://www.so.com/help/help_3_2.html'
11
+ url: 'https://www.so.com/help/help_3_2.html'
12
12
  producer:
13
13
  name: 'Online Media Group, Inc.'
14
14
  url: ''
@@ -40,26 +40,41 @@
40
40
  - regex: 'AhrefsBot'
41
41
  name: 'aHrefs Bot'
42
42
  category: 'Crawler'
43
- url: 'http://ahrefs.com/robot'
43
+ url: 'https://ahrefs.com/robot'
44
44
  producer:
45
45
  name: 'Ahrefs Pte Ltd'
46
- url: 'http://ahrefs.com/robot'
46
+ url: 'https://ahrefs.com/robot'
47
47
 
48
48
  - regex: 'ia_archiver|alexabot|verifybot'
49
49
  name: 'Alexa Crawler'
50
50
  category: 'Search bot'
51
- url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
51
+ url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
52
52
  producer:
53
53
  name: 'Alexa Internet'
54
- url: 'http://www.alexa.com'
54
+ url: 'https://www.alexa.com'
55
55
 
56
56
  - regex: 'alexa site audit'
57
57
  name: 'Alexa Site Audit'
58
58
  category: 'Site Monitor'
59
- url: 'http://www.alexa.com/help/webmasters'
59
+ url: 'https://support.alexa.com/hc/en-us/articles/200450194'
60
60
  producer:
61
61
  name: 'Alexa Internet'
62
- url: 'http://www.alexa.com'
62
+ url: 'https://www.alexa.com'
63
+
64
+ - regex: 'Amazonbot'
65
+ name: 'Amazon Bot'
66
+ category: 'Crawler'
67
+ url: 'https://developer.amazon.com/support/amazonbot'
68
+ producer:
69
+ name: 'Amazon.com, Inc.'
70
+ url: 'https://www.amazon.com/'
71
+
72
+ - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
73
+ name: 'Amazon Route53 Health Check'
74
+ category: 'Service Agent'
75
+ producer:
76
+ name: 'Amazon Web Services'
77
+ url: 'https://aws.amazon.com/'
63
78
 
64
79
  - regex: 'AmorankSpider'
65
80
  name: 'Amorank Spider'
@@ -75,29 +90,45 @@
75
90
  url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
76
91
  producer:
77
92
  name: 'The Apache Software Foundation'
78
- url: 'http://www.apache.org/foundation/'
93
+ url: 'https://www.apache.org/foundation/'
79
94
 
80
95
  - regex: 'Applebot'
81
96
  name: 'Applebot'
82
97
  category: 'Crawler'
83
- url: 'http://www.apple.com/go/applebot'
98
+ url: 'https://support.apple.com/en-us/HT204683'
84
99
  producer:
85
100
  name: 'Apple Inc'
86
- url: 'http://www.apple.com'
101
+ url: 'https://www.apple.com'
102
+
103
+ - regex: "AppSignalBot"
104
+ name: "AppSignalBot"
105
+ category: "Site Monitor"
106
+ url: "https://docs.appsignal.com/uptime-monitoring/"
107
+ producer:
108
+ name: "AppSignal"
109
+ url: "https://appsignal.com/"
87
110
 
88
111
  - regex: 'Arachni'
89
112
  name: 'Arachni'
90
113
  category: 'Security Checker'
91
- url: 'http://www.arachni-scanner.com'
114
+ url: 'https://www.arachni-scanner.com/'
92
115
  producer:
93
116
  name: 'Sarosys LLC'
94
- url: 'http://www.sarosys.com/'
117
+ url: 'https://www.sarosys.com/'
118
+
119
+ - regex: 'AspiegelBot'
120
+ name: 'AspiegelBot'
121
+ category: 'Crawler'
122
+ url: 'https://aspiegel.com/'
123
+ producer:
124
+ name: 'Huawei'
125
+ url: 'https://www.huawei.com/'
95
126
 
96
127
  - regex: 'Castro 2, Episode Duration Lookup'
97
128
  name: 'Castro 2'
98
129
  category: 'Service Agent'
99
130
  url: 'http://supertop.co/castro/'
100
- producer:
131
+ producer:
101
132
  name: 'Supertop'
102
133
  url: 'http://supertop.co'
103
134
 
@@ -112,10 +143,10 @@
112
143
  - regex: 'archive\.org_bot|special_archiver'
113
144
  name: 'archive.org bot'
114
145
  category: 'Crawler'
115
- url: 'http://www.archive.org/details/archive.org_bot'
146
+ url: 'https://archive.org/details/archive.org_bot'
116
147
  producer:
117
148
  name: 'The Internet Archive'
118
- url: 'http://www.archive.org'
149
+ url: 'https://archive.org'
119
150
 
120
151
  - regex: 'Ask Jeeves/Teoma'
121
152
  name: 'Ask Jeeves'
@@ -141,7 +172,7 @@
141
172
  name: '2.0Promotion GbR'
142
173
  url: 'http://www.backlinktest.com'
143
174
 
144
- - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
175
+ - regex: 'Baidu.*spider|baidu Transcoder'
145
176
  name: 'Baidu Spider'
146
177
  category: 'Search bot'
147
178
  url: 'http://www.baidu.com/search/spider.htm'
@@ -157,6 +188,14 @@
157
188
  name: ''
158
189
  url: ''
159
190
 
191
+ - regex: 'Better Uptime Bot'
192
+ name: 'Better Uptime Bot'
193
+ category: 'Site Monitor'
194
+ url: 'https://betteruptime.com/faq'
195
+ producer:
196
+ name: 'Better Uptime'
197
+ url: 'https://betteruptime.com/'
198
+
160
199
  - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
161
200
  name: 'BingBot'
162
201
  category: 'Search bot'
@@ -173,7 +212,7 @@
173
212
  name: 'Blekko'
174
213
  url: 'http://blekko.com'
175
214
 
176
- - regex: 'BLEXBot(Test)?'
215
+ - regex: 'BLEXBot'
177
216
  name: 'BLEXBot Crawler'
178
217
  category: 'Crawler'
179
218
  url: 'http://webmeup-crawler.com'
@@ -197,6 +236,13 @@
197
236
  name: 'Blogtrottr Ltd'
198
237
  url: 'https://blogtrottr.com/'
199
238
 
239
+ - regex: 'BoardReader Blog Indexer'
240
+ name: 'BoardReader Blog Indexer'
241
+ category: 'Crawler'
242
+ producer:
243
+ name: 'BoardReader'
244
+ url: 'https://boardreader.com/'
245
+
200
246
  - regex: 'BountiiBot'
201
247
  name: 'Bountii Bot'
202
248
  category: 'Search bot'
@@ -261,6 +307,14 @@
261
307
  name: 'CloudFlare'
262
308
  url: 'http://www.cloudflare.com'
263
309
 
310
+ - regex: 'CloudflareDiagnostics'
311
+ name: 'Cloudflare Diagnostics'
312
+ category: 'Site Monitor'
313
+ url: 'https://www.cloudflare.com/'
314
+ producer:
315
+ name: 'Cloudflare'
316
+ url: 'https://www.cloudflare.com'
317
+
264
318
  - regex: 'CloudFlare-AlwaysOnline'
265
319
  name: 'CloudFlare Always Online'
266
320
  category: 'Site Monitor'
@@ -269,13 +323,13 @@
269
323
  name: 'CloudFlare'
270
324
  url: 'http://www.cloudflare.com'
271
325
 
272
- - regex: 'coccoc/'
326
+ - regex: 'coccoc.com'
273
327
  name: 'Cốc Cốc Bot'
274
- url: 'http://help.coccoc.com/'
328
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
275
329
  category: 'Search bot'
276
330
  producer:
277
331
  name: 'Cốc Cốc'
278
- url: 'http://coccoc.com/'
332
+ url: 'https://coccoc.com/'
279
333
 
280
334
  - regex: 'collectd'
281
335
  name: 'Collectd'
@@ -309,6 +363,14 @@
309
363
  name: 'Datadog'
310
364
  url: 'https://www.datadoghq.com/'
311
365
 
366
+ - regex: 'Datanyze'
367
+ name: 'Datanyze'
368
+ url: ''
369
+ category: 'Crawler'
370
+ producer:
371
+ name: 'Datanyze'
372
+ url: 'https://www.datanyze.com'
373
+
312
374
  - regex: 'Dataprovider'
313
375
  name: 'Dataprovider'
314
376
  category: 'Crawler'
@@ -333,7 +395,7 @@
333
395
  name: 'DAZOO.FR'
334
396
  url: 'http://dazoo.fr'
335
397
 
336
- - regex: 'discobot(-news)?'
398
+ - regex: 'discobot'
337
399
  name: 'Discobot'
338
400
  category: 'Search bot'
339
401
  url: 'http://discoveryengine.com/discobot.html'
@@ -357,7 +419,7 @@
357
419
  name: 'SEOmoz, Inc.'
358
420
  url: 'http://moz.com/'
359
421
 
360
- - regex: 'DuckDuck'
422
+ - regex: 'DuckDuck(?:Go-Favicons-)?Bot'
361
423
  name: 'DuckDuckGo Bot'
362
424
  category: 'Search bot'
363
425
  url: 'https://duckduckgo.com/duckduckbot'
@@ -373,6 +435,13 @@
373
435
  name: 'easou ICP'
374
436
  url: 'http://www.easou.com'
375
437
 
438
+ - regex: 'eCairn-Grabber'
439
+ name: 'eCairn-Grabber'
440
+ category: 'Crawler'
441
+ producer:
442
+ name: 'eCairn'
443
+ url: 'https://ecairn.com'
444
+
376
445
  - regex: 'EMail Exractor'
377
446
  name: 'EMail Exractor'
378
447
  category: 'Crawler'
@@ -389,7 +458,7 @@
389
458
  name: 'eVenture Capital Partners II, LLC'
390
459
  url: 'http://www.eventures.vc/'
391
460
 
392
- - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
461
+ - regex: 'Exabot|ExaleadCloudview'
393
462
  name: 'ExaBot'
394
463
  category: 'Crawler'
395
464
  url: 'http://www.exabot.com/go/robot'
@@ -413,7 +482,7 @@
413
482
  name: 'SEOmoz, Inc.'
414
483
  url: 'http://moz.com/'
415
484
 
416
- - regex: 'facebookexternalhit|facebookplatform'
485
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
417
486
  name: 'Facebook External Hit'
418
487
  category: 'Social Media Agent'
419
488
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -445,7 +514,7 @@
445
514
  name: 'David Smith & Developing Perspective, LLC'
446
515
  url: 'https://david-smith.org'
447
516
 
448
- - regex: '(Meta)?Feedly(Bot|App)?'
517
+ - regex: 'Feedly'
449
518
  name: 'Feedly'
450
519
  url: 'http://www.feedly.com'
451
520
  category: 'Feed Fetcher'
@@ -482,6 +551,11 @@
482
551
  category: 'Crawler'
483
552
  url: 'http://www.findxbot.com'
484
553
 
554
+ - regex: 'FreshRSS'
555
+ name: 'FreshRSS'
556
+ category: 'Feed Fetcher'
557
+ url: 'https://freshrss.org/'
558
+
485
559
  - regex: 'Genieo'
486
560
  name: 'Genieo Web filter'
487
561
  category: ''
@@ -506,6 +580,10 @@
506
580
  name: ''
507
581
  url: ''
508
582
 
583
+ - regex: 'gobuster'
584
+ name: 'Gobuster'
585
+ url: 'https://github.com/OJ/gobuster'
586
+
509
587
  - regex: 'ichiro/mobile goo'
510
588
  name: 'Goo'
511
589
  category: 'Search bot'
@@ -514,6 +592,14 @@
514
592
  name: 'NTT Resonant'
515
593
  url: 'http://goo.ne.jp'
516
594
 
595
+ - regex: 'Storebot-Google'
596
+ name: 'Google StoreBot'
597
+ category: 'Crawler'
598
+
599
+ - regex: 'Google Favicon'
600
+ name: 'Google Favicon'
601
+ category: 'Crawler'
602
+
517
603
  - regex: 'Google Search Console'
518
604
  name: 'Google Search Console'
519
605
  category: 'Crawler'
@@ -538,6 +624,14 @@
538
624
  name: 'Google Inc.'
539
625
  url: 'http://www.google.com'
540
626
 
627
+ - regex: 'Google-Cloud-Scheduler'
628
+ name: 'Google Cloud Scheduler'
629
+ category: 'Crawler'
630
+ url: 'https://cloud.google.com/scheduler'
631
+ producer:
632
+ name: 'Google Inc.'
633
+ url: 'https://www.google.com'
634
+
541
635
  - regex: 'Google-Structured-Data-Testing-Tool'
542
636
  name: 'Google Structured Data Testing Tool'
543
637
  category: 'Validator'
@@ -546,6 +640,14 @@
546
640
  name: 'Google Inc.'
547
641
  url: 'http://www.google.com'
548
642
 
643
+ - regex: 'GoogleStackdriverMonitoring'
644
+ name: 'Google Stackdriver Monitoring'
645
+ category: 'Site Monitor'
646
+ url: 'https://cloud.google.com/monitoring'
647
+ producer:
648
+ name: 'Google Inc.'
649
+ url: 'https://www.google.com'
650
+
549
651
  - regex: 'via ggpht\.com GoogleImageProxy'
550
652
  name: 'Gmail Image Proxy'
551
653
  category: 'Crawler'
@@ -553,7 +655,7 @@
553
655
  producer:
554
656
  name: 'Google Inc.'
555
657
  url: 'http://www.google.com'
556
-
658
+
557
659
  - regex: 'SeznamEmailProxy'
558
660
  name: 'Seznam Email Proxy'
559
661
  category: 'Crawler'
@@ -586,7 +688,7 @@
586
688
  name: 'Visual Meta'
587
689
  url: 'https://www.shopalike.cz/'
588
690
 
589
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Google-Adwords-Instant|APIs-Google|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin|Google-Shopping-Quality|Google-Adwords-DisplayAds|Google-Assess|Google-AdWords-Express|Google-speakr|Google-Read-Aloud'
691
+ - regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
590
692
  name: 'Googlebot'
591
693
  category: 'Search bot'
592
694
  url: 'http://www.google.com/bot.html'
@@ -600,7 +702,7 @@
600
702
  url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
601
703
  producer:
602
704
  name: 'The Internet Archive'
603
- url: 'http://www.archive.org'
705
+ url: 'https://archive.org'
604
706
 
605
707
  - regex: 'HubSpot '
606
708
  name: 'HubSpot'
@@ -609,7 +711,6 @@
609
711
  name: 'HubSpot Inc.'
610
712
  url: 'https://www.hubspot.com'
611
713
 
612
-
613
714
  - regex: 'HTTPMon'
614
715
  name: 'HTTPMon'
615
716
  category: 'Site Monitor'
@@ -626,6 +727,11 @@
626
727
  name: ''
627
728
  url: ''
628
729
 
730
+ - regex: 'inoreader.com'
731
+ name: 'inoreader'
732
+ category: 'Feed Reader'
733
+ url: 'https://www.inoreader.com'
734
+
629
735
  - regex: 'iisbot'
630
736
  name: 'IIS Site Analysis'
631
737
  category: 'Crawler'
@@ -636,7 +742,7 @@
636
742
 
637
743
  - regex: 'ips-agent'
638
744
  name: 'IPS Agent'
639
- category: 'crawler'
745
+ category: 'Crawler'
640
746
  producer:
641
747
  name: 'VeriSign, Inc'
642
748
  url: 'http://www.verisign.com/'
@@ -649,6 +755,10 @@
649
755
  name: ''
650
756
  url: 'https://ip-guide.com'
651
757
 
758
+ - regex: 'k6/[0-9\.]+'
759
+ name: 'K6'
760
+ url: 'https://k6.io/'
761
+
652
762
  - regex: 'kouio'
653
763
  name: 'Kouio'
654
764
  url: 'http://kouio.com/'
@@ -673,7 +783,7 @@
673
783
  name: 'Lighthouse'
674
784
  url: 'https://developers.google.com/web/tools/lighthouse'
675
785
 
676
- - regex: 'linkdexbot(-mobile)?|linkdex\.com'
786
+ - regex: 'linkdexbot|linkdex\.com'
677
787
  name: 'Linkdex Bot'
678
788
  category: 'Search bot'
679
789
  url: 'http://www.linkdex.com/bots'
@@ -696,7 +806,7 @@
696
806
  name: ''
697
807
  url: ''
698
808
 
699
- - regex: 'Mail\.RU(_Bot)?'
809
+ - regex: 'Mail\.RU'
700
810
  name: 'Mail.Ru Bot'
701
811
  category: 'Search bot'
702
812
  url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
@@ -720,7 +830,7 @@
720
830
  name: ''
721
831
  url: ''
722
832
 
723
- - regex : 'masscan'
833
+ - regex: 'masscan'
724
834
  name: 'masscan'
725
835
  url: 'https://github.com/robertdavidgraham/masscan'
726
836
  category: 'Crawler'
@@ -808,6 +918,10 @@
808
918
  name: 'Nagios Plugins Development Team'
809
919
  url: 'https://nagios.org'
810
920
 
921
+ - regex: 'nbertaupete95\(at\)gmail.com'
922
+ name: 'nbertaupete95'
923
+ category: 'Crawler'
924
+
811
925
  - regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
812
926
  name: 'Netcraft Survey Bot'
813
927
  category: 'Search bot'
@@ -818,7 +932,7 @@
818
932
 
819
933
  - regex: 'netEstate NE Crawler'
820
934
  name: 'netEstate'
821
- category: 'Analytics SEO Crawler'
935
+ category: 'Crawler'
822
936
  url: 'http://www.website-datenbank.de/Impressum'
823
937
  producer:
824
938
  name: 'netEstate GmbH'
@@ -864,10 +978,17 @@
864
978
  name: 'Nmap'
865
979
  url: 'https://nmap.org/'
866
980
 
981
+ - regex: 'Nuzzel'
982
+ name: 'Nuzzel'
983
+ category: 'Crawler'
984
+ producer:
985
+ name: 'Nuzzel'
986
+ url: 'https://www.nuzzel.com/'
987
+
867
988
  - regex: 'Octopus [0-9]'
868
989
  name: 'Octopus'
869
990
 
870
- - regex: 'omgili(?:bot)?'
991
+ - regex: 'omgili'
871
992
  name: 'Omgili bot'
872
993
  category: 'Search bot'
873
994
  url: 'http://www.omgili.com/Crawler.html'
@@ -944,7 +1065,15 @@
944
1065
  name: 'Bitlove'
945
1066
  url: 'http://bitlove.org/'
946
1067
 
947
- - regex: 'psbot(-page)?'
1068
+ - regex: 'PRTG Network Monitor'
1069
+ name: 'PRTG Network Monitor'
1070
+ category: 'Network Monitor'
1071
+ url: 'https://www.paessler.com/prtg'
1072
+ producer:
1073
+ name: 'Paessler AG'
1074
+ url: 'https://www.paessler.com'
1075
+
1076
+ - regex: 'psbot'
948
1077
  name: 'Picsearch bot'
949
1078
  category: 'Search bot'
950
1079
  url: 'http://www.picsearch.com/bot.html'
@@ -952,7 +1081,7 @@
952
1081
  name: 'Picsearch'
953
1082
  url: 'http://www.picsearch.com'
954
1083
 
955
- - regex: 'Pingdom\.com'
1084
+ - regex: 'Pingdom(?:\.com|TMS)'
956
1085
  name: 'Pingdom Bot'
957
1086
  category: 'Site Monitor'
958
1087
  url: ''
@@ -968,6 +1097,14 @@
968
1097
  name: 'Quora'
969
1098
  url: 'http://www.quora.com'
970
1099
 
1100
+ - regex: 'Quora-Bot'
1101
+ name: 'Quora Bot'
1102
+ category: 'Crawler'
1103
+ url: ''
1104
+ producer:
1105
+ name: 'Quora'
1106
+ url: 'https://www.quora.com/'
1107
+
971
1108
  - regex: 'RamblerMail'
972
1109
  name: 'RamblerMail Image Proxy'
973
1110
  category: 'Crawler'
@@ -1111,6 +1248,14 @@
1111
1248
  name: 'Seznam.cz, a.s.'
1112
1249
  url: 'http://www.seznam.cz/'
1113
1250
 
1251
+ - regex: 'shopify-partner-homepage-scraper'
1252
+ name: 'Shopify Partner'
1253
+ category: 'Crawler'
1254
+ url: 'https://www.shopify.com/partners'
1255
+ producer:
1256
+ name: 'Shopify'
1257
+ url: 'https://www.shopify.com/'
1258
+
1114
1259
  - regex: 'ShopWiki'
1115
1260
  name: 'ShopWiki'
1116
1261
  category: 'Search tools'
@@ -1143,6 +1288,14 @@
1143
1288
  name: 'SISTRIX GmbH'
1144
1289
  url: 'http://www.sistrix.de'
1145
1290
 
1291
+ - regex: 'compatible; (?:SISTRIX )?Optimizer'
1292
+ name: 'SISTRIX Optimizer'
1293
+ category: 'Crawler'
1294
+ url: 'https://optimizer.sistrix.com'
1295
+ producer:
1296
+ name: 'SISTRIX GmbH'
1297
+ url: 'http://www.sistrix.de'
1298
+
1146
1299
  - regex: 'SiteSucker'
1147
1300
  name: 'SiteSucker'
1148
1301
  category: 'Crawler'
@@ -1180,6 +1333,14 @@
1180
1333
  name: 'Tencent Holdings'
1181
1334
  url: 'http://www.soso.com'
1182
1335
 
1336
+ - regex: 'Sprinklr'
1337
+ name: 'Sprinklr'
1338
+ category: 'Crawler'
1339
+ url: ''
1340
+ producer:
1341
+ name: 'Sprinklr, Inc.'
1342
+ url: 'https://www.sprinklr.com/'
1343
+
1183
1344
  - regex: 'sqlmap/'
1184
1345
  name: 'sqlmap'
1185
1346
  category: 'Security Checker'
@@ -1225,13 +1386,20 @@
1225
1386
  name: 'Tailrank Inc'
1226
1387
  url: 'http://spinn3r.com'
1227
1388
 
1228
- - regex: 'Sputnik(Image)?Bot'
1389
+ - regex: 'SputnikBot'
1229
1390
  name: 'Sputnik Bot'
1230
- category: ''
1391
+ category: 'Crawler'
1392
+ url: ''
1393
+
1394
+ - regex: 'SputnikFaviconBot'
1395
+ name: 'Sputnik Favicon Bot'
1396
+ category: 'Crawler'
1397
+ url: ''
1398
+
1399
+ - regex: 'SputnikImageBot'
1400
+ name: 'Sputnik Image Bot'
1401
+ category: 'Crawler'
1231
1402
  url: ''
1232
- producer:
1233
- name: ''
1234
- url: ''
1235
1403
 
1236
1404
  - regex: 'SurveyBot'
1237
1405
  name: 'Survey Bot'
@@ -1274,6 +1442,11 @@
1274
1442
  name: ''
1275
1443
  url: ''
1276
1444
 
1445
+ - regex: 'theoldreader.com'
1446
+ name: 'theoldreader'
1447
+ category: 'Feed Reader'
1448
+ url: 'https://theoldreader.com'
1449
+
1277
1450
  - regex: 'trendictionbot'
1278
1451
  name: 'Trendiction Bot'
1279
1452
  category: 'Crawler'
@@ -1306,6 +1479,13 @@
1306
1479
  name: 'Mediasift'
1307
1480
  url: ''
1308
1481
 
1482
+ - regex: 'Twingly Recon'
1483
+ name: 'Twingly Recon'
1484
+ category: 'Crawler'
1485
+ producer:
1486
+ name: 'Twingly'
1487
+ url: 'https://www.twingly.com'
1488
+
1309
1489
  - regex: 'Twitterbot'
1310
1490
  name: 'Twitterbot'
1311
1491
  category: 'Social Media Agent'
@@ -1438,7 +1618,7 @@
1438
1618
  category: 'Site Monitor'
1439
1619
  url: 'https://www.webpagetest.org'
1440
1620
 
1441
- - regex: 'WeSEE(:Search)?'
1621
+ - regex: 'WeSEE'
1442
1622
  name: 'WeSEE:Search'
1443
1623
  category: 'Search bot'
1444
1624
  url: 'http://www.wesee.com/bot'
@@ -1478,6 +1658,14 @@
1478
1658
  name: 'Wotbox'
1479
1659
  url: 'http://www.wotbox.com'
1480
1660
 
1661
+ - regex: 'XenForo'
1662
+ name: 'XenForo'
1663
+ category: 'Service Agent'
1664
+ url: 'https://xenforo.com/'
1665
+ producer:
1666
+ name: 'XenForo Ltd.'
1667
+ url: 'https://xenforo.com/'
1668
+
1481
1669
  - regex: 'yacybot'
1482
1670
  name: 'YaCy'
1483
1671
  category: 'Search bot'
@@ -1502,6 +1690,14 @@
1502
1690
  name: 'Yahoo! Inc.'
1503
1691
  url: 'http://www.yahoo.com'
1504
1692
 
1693
+ - regex: 'YahooMailProxy'
1694
+ name: 'Yahoo! Mail Proxy'
1695
+ category: 'Service Agent'
1696
+ url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
1697
+ producer:
1698
+ name: 'Yahoo! Inc.'
1699
+ url: 'http://www.yahoo.com'
1700
+
1505
1701
  - regex: 'YahooCacheSystem'
1506
1702
  name: 'Yahoo! Cache System'
1507
1703
  category: 'Crawler'
@@ -1510,7 +1706,15 @@
1510
1706
  name: 'Yahoo! Inc.'
1511
1707
  url: 'http://www.yahoo.com'
1512
1708
 
1513
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1709
+ - regex: 'Y!J-BRW'
1710
+ name: 'Yahoo! Japan BRW'
1711
+ category: 'Crawler'
1712
+ url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1713
+ producer:
1714
+ name: 'Yahoo! Japan Corp.'
1715
+ url: 'https://www.yahoo.co.jp/'
1716
+
1717
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1514
1718
  name: 'Yandex Bot'
1515
1719
  category: 'Search bot'
1516
1720
  url: 'http://www.yandex.com/bots'
@@ -1518,7 +1722,7 @@
1518
1722
  name: 'Yandex LLC'
1519
1723
  url: 'http://company.yandex.com'
1520
1724
 
1521
- - regex: 'Yeti'
1725
+ - regex: 'Yeti|NaverJapan'
1522
1726
  name: 'Yeti/Naverbot'
1523
1727
  category: 'Search bot'
1524
1728
  url: 'http://help.naver.com/robots/'
@@ -1595,18 +1799,18 @@
1595
1799
  - regex: 'HubPages.*crawlingpolicy'
1596
1800
  name: 'HubPages'
1597
1801
  category: 'Crawler'
1598
- url: 'http://hubpages.com/help/crawlingpolicy'
1802
+ url: 'https://hubpages.com/help/crawlingpolicy'
1599
1803
  producer:
1600
- name: 'HubPages'
1601
- url: 'http://hubpages.com/'
1804
+ name: 'HubPages, Inc.'
1805
+ url: 'https://discover.hubpages.com/'
1602
1806
 
1603
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
1807
+ - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1604
1808
  name: 'Pinterest'
1605
- url: ''
1809
+ url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
1606
1810
  category: 'Crawler'
1607
1811
  producer:
1608
1812
  name: 'Pinterest'
1609
- url: 'http://www.pinterest.com/'
1813
+ url: 'https://www.pinterest.com/'
1610
1814
 
1611
1815
  - regex: 'Site24x7'
1612
1816
  name: 'Site24x7 Website Monitoring'
@@ -1648,13 +1852,13 @@
1648
1852
  name: 'Monitor.Us'
1649
1853
  url: 'http://www.monitor.us'
1650
1854
 
1651
- - regex: 'Catchpoint( bot)?'
1855
+ - regex: 'Catchpoint'
1652
1856
  name: 'Catchpoint'
1653
1857
  category: 'Site Monitor'
1654
- url: ''
1858
+ url: 'https://www.catchpoint.com/'
1655
1859
  producer:
1656
1860
  name: 'Catchpoint Systems'
1657
- url: 'http://www.catchpoint.com/'
1861
+ url: 'https://www.catchpoint.com/'
1658
1862
 
1659
1863
  - regex: 'bitlybot'
1660
1864
  name: 'BitlyBot'
@@ -1722,7 +1926,7 @@
1722
1926
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1723
1927
  name: 'RSSRadio Bot'
1724
1928
 
1725
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1929
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent)'
1726
1930
  name: 'Generic Bot'
1727
1931
 
1728
1932
  - regex: '^sentry'
@@ -1737,7 +1941,185 @@
1737
1941
  name: 'Spotify'
1738
1942
  url: 'https://www.spotify.com'
1739
1943
 
1740
- # Generic detections
1944
+ - regex: 'The Knowledge AI'
1945
+ name: 'The Knowledge AI'
1946
+ category: 'Crawler'
1947
+
1948
+ - regex: 'Embedly'
1949
+ name: 'Embedly'
1950
+ category: 'Crawler'
1951
+ url: 'https://support.embed.ly/hc/en-us'
1952
+ producer:
1953
+ name: 'A Medium, Corp.'
1954
+ url: 'https://medium.com/'
1955
+
1956
+ - regex: 'BrandVerity'
1957
+ name: 'BrandVerity'
1958
+ category: 'Crawler'
1959
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1960
+ producer:
1961
+ name: 'BrandVerity, Inc.'
1962
+ url: 'https://www.brandverity.com/'
1963
+
1964
+ - regex: 'Kaspersky Lab CFR link resolver'
1965
+ name: 'Kaspersky'
1966
+ category: 'Security Checker'
1967
+ url: 'https://www.kaspersky.com/'
1968
+ producer:
1969
+ name: 'AO Kaspersky Lab'
1970
+ url: 'https://www.kaspersky.com/'
1971
+
1972
+ - regex: 'eZ Publish Link Validator'
1973
+ name: 'eZ Publish Link Validator'
1974
+ category: 'Crawler'
1975
+ url: 'https://ez.no/'
1976
+ producer:
1977
+ name: 'eZ Systems AS'
1978
+ url: 'https://ez.no/'
1979
+
1980
+ - regex: 'woorankreview'
1981
+ name: 'WooRank'
1982
+ category: 'Search bot'
1983
+ url: 'https://www.woorank.com/'
1984
+ producer:
1985
+ name: 'WooRank sprl'
1986
+ url: 'https://www.woorank.com/'
1987
+
1988
+ - regex: '(Match|LinkCheck) by Siteimprove.com'
1989
+ name: 'Siteimprove'
1990
+ category: 'Search bot'
1991
+ url: 'https://siteimprove.com/'
1992
+ producer:
1993
+ name: 'Siteimprove GmbH'
1994
+ url: 'https://siteimprove.com/'
1995
+
1996
+ - regex: 'CATExplorador'
1997
+ name: 'CATExplorador'
1998
+ category: 'Search bot'
1999
+ url: 'https://fundacio.cat/ca/domini/'
2000
+ producer:
2001
+ name: 'Fundació puntCAT'
2002
+ url: 'https://fundacio.cat/ca/domini/'
2003
+
2004
+ - regex: 'Buck'
2005
+ name: 'Buck'
2006
+ category: 'Search bot'
2007
+ url: 'https://hypefactors.com/'
2008
+ producer:
2009
+ name: 'Hypefactors A/S'
2010
+ url: 'https://hypefactors.com/'
2011
+
2012
+ - regex: 'tracemyfile'
2013
+ name: 'TraceMyFile'
2014
+ category: 'Search bot'
2015
+ url: 'https://www.tracemyfile.com/'
2016
+ producer:
2017
+ name: 'Idee Inc.'
2018
+ url: 'http://ideeinc.com/'
2019
+
2020
+ - regex: 'zelist.ro feed parser'
2021
+ name: 'Ze List'
2022
+ url: 'https://www.zelist.ro/'
2023
+ category: 'Feed Fetcher'
2024
+ producer:
2025
+ name: 'Treeworks SRL'
2026
+ url: 'https://www.tree.ro/'
2027
+
2028
+ - regex: 'weborama-fetcher'
2029
+ name: 'Weborama'
2030
+ category: 'Search bot'
2031
+ url: 'https://weborama.com/'
2032
+ producer:
2033
+ name: 'Weborama SA'
2034
+ url: 'https://weborama.com/'
2035
+
2036
+ - regex: 'BoardReader Favicon Fetcher'
2037
+ name: 'BoardReader'
2038
+ category: 'Search bot'
2039
+ url: 'https://boardreader.com/'
2040
+ producer:
2041
+ name: 'Effyis Inc'
2042
+ url: 'https://boardreader.com/'
2043
+
2044
+ - regex: 'IDG/IT'
2045
+ name: 'IDG/IT'
2046
+ category: 'Search bot'
2047
+ url: 'https://spaziodati.eu/'
2048
+ producer:
2049
+ name: 'SpazioDati S.r.l.'
2050
+ url: 'https://spaziodati.eu/'
2051
+
2052
+ - regex: 'Bytespider'
2053
+ name: 'Bytespider'
2054
+ category: 'Search bot'
2055
+ url: 'https://bytedance.com/'
2056
+ producer:
2057
+ name: 'ByteDance Ltd.'
2058
+ url: 'https://bytedance.com/'
2059
+
2060
+ - regex: 'WikiDo'
2061
+ name: 'WikiDo'
2062
+ category: 'Search bot'
2063
+ url: 'https://www.wikido.com/'
2064
+ producer:
2065
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
2066
+ url: 'https://www.wikido.com/'
2067
+
2068
+ - regex: 'AwarioSmartBot'
2069
+ name: 'Awario'
2070
+ category: 'Search bot'
2071
+ url: 'https://awario.com/bots.html'
2072
+ producer:
2073
+ name: 'Awario'
2074
+ url: 'https://awario.com/'
2075
+
2076
+ - regex: 'AwarioRssBot'
2077
+ name: 'Awario'
2078
+ category: 'Feed Fetcher'
2079
+ url: 'https://awario.com/bots.html'
2080
+ producer:
2081
+ name: 'Awario'
2082
+ url: 'https://awario.com/'
2083
+
2084
+ - regex: 'oBot'
2085
+ name: 'oBot'
2086
+ category: 'Search bot'
2087
+ url: 'https://www.xforce-security.com/crawler/'
2088
+ producer:
2089
+ name: 'IBM Germany Research & Development GmbH'
2090
+ url: 'https://exchange.xforce.ibmcloud.com/'
2091
+
2092
+ - regex: 'SMTBot'
2093
+ name: 'SMTBot'
2094
+ category: 'Search bot'
2095
+ url: 'https://www.similartech.com/smtbot'
2096
+ producer:
2097
+ name: 'SimilarTech Ltd.'
2098
+ url: 'https://www.similartech.com/'
2099
+
2100
+ - regex: 'LCC'
2101
+ name: 'LCC'
2102
+ category: 'Search bot'
2103
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2104
+ producer:
2105
+ name: 'Universität Leipzig'
2106
+ url: 'https://www.uni-leipzig.de/'
2107
+
2108
+ - regex: 'Startpagina-Linkchecker'
2109
+ name: 'Startpagina Linkchecker'
2110
+ category: 'Search bot'
2111
+ url: 'https://www.startpagina.nl/linkchecker'
2112
+ producer:
2113
+ name: 'Startpagina B.V.'
2114
+ url: 'https://www.startpagina.nl/'
2115
+
2116
+ - regex: 'GTmetrix'
2117
+ name: 'GTmetrix'
2118
+ category: 'Crawler'
2119
+ url: 'https://gtmetrix.com/'
2120
+ producer:
2121
+ name: 'Carbon60 Operating Co. Ltd.'
2122
+ url: 'https://www.carbon60.com/'
1741
2123
 
1742
2124
  - regex: 'Nutch'
1743
2125
  name: 'Nutch-based Bot'
@@ -1745,7 +2127,475 @@
1745
2127
  url: 'https://nutch.apache.org'
1746
2128
  producer:
1747
2129
  name: 'The Apache Software Foundation'
1748
- url: 'http://www.apache.org/foundation/'
2130
+ url: 'https://www.apache.org/foundation/'
2131
+
2132
+ - regex: 'Seobility'
2133
+ name: 'Seobility'
2134
+ category: 'Crawler'
2135
+ url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2136
+
2137
+ - regex: 'Vercelbot'
2138
+ name: 'Vercel Bot'
2139
+ category: 'Service bot'
2140
+ url: 'https://vercel.com'
2141
+
2142
+ - regex: 'Grammarly'
2143
+ name: 'Grammarly'
2144
+ category: 'Service bot'
2145
+ url: 'https://www.grammarly.com'
2146
+
2147
+ - regex: 'Robozilla'
2148
+ name: 'Robozilla'
2149
+ category: 'Crawler'
2150
+
2151
+ - regex: 'Domains Project'
2152
+ name: 'Domains Project'
2153
+ category: 'Crawler'
2154
+ url: 'https://domainsproject.org'
1749
2155
 
2156
+ - regex: 'PetalBot'
2157
+ name: 'Petal Bot'
2158
+ category: 'Crawler'
2159
+ url: 'https://aspiegel.com/petalbot'
2160
+
2161
+ - regex: 'SerendeputyBot'
2162
+ name: 'Serendeputy Bot'
2163
+ category: 'Crawler'
2164
+ url: 'https://serendeputy.com/about/serendeputy-bot'
2165
+
2166
+ - regex: 'ias-va.*admantx.*service-fetcher'
2167
+ name: 'ADmantX Service Fetcher'
2168
+ category: 'Service bot'
2169
+ url: 'https://www.admantx.com/service-fetcher.html'
2170
+
2171
+ - regex: 'SemanticScholarBot'
2172
+ name: 'Semantic Scholar Bot'
2173
+ category: 'Crawler'
2174
+ url: 'https://www.semanticscholar.org/crawler'
2175
+
2176
+ - regex: 'VelenPublicWebCrawler'
2177
+ name: 'Velen Public Web Crawler'
2178
+ category: 'Crawler'
2179
+ url: 'https://hunter.io/robot'
2180
+
2181
+ - regex: 'Barkrowler'
2182
+ name: 'Barkrowler'
2183
+ category: 'Crawler'
2184
+ url: 'http://www.exensa.com/crawl'
2185
+
2186
+ - regex: 'BDCbot'
2187
+ name: 'BDCbot'
2188
+ category: 'Crawler'
2189
+ url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
2190
+ producer:
2191
+ name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
2192
+ url: 'https://bigdatacorp.com.br/'
2193
+
2194
+ - regex: 'adbeat'
2195
+ name: 'Adbeat'
2196
+ category: 'Crawler'
2197
+ url: 'https://www.adbeat.com/operation_policy'
2198
+ producer:
2199
+ name: 'PPC Labs LLC'
2200
+ url: 'https://www.adbeat.com/'
2201
+
2202
+ - regex: 'BW/(?:(\d+[\.\d]+))'
2203
+ name: 'BuiltWith'
2204
+ category: 'Crawler'
2205
+ url: 'https://builtwith.com/biup'
2206
+ producer:
2207
+ name: 'BuiltWith Pty Ltd'
2208
+ url: 'https://builtwith.com/'
2209
+
2210
+ - regex: 'https://whatis.contentkingapp.com'
2211
+ name: 'ContentKing'
2212
+ category: 'Site Monitor'
2213
+ url: 'https://whatis.contentkingapp.com/'
2214
+ producer:
2215
+ name: 'ContentKing BV'
2216
+ url: 'https://www.contentkingapp.com/'
2217
+
2218
+ - regex: 'MicroAdBot'
2219
+ name: 'MicroAdBot'
2220
+ category: 'Crawler'
2221
+ url: 'https://www.microad.co.jp/'
2222
+ producer:
2223
+ name: 'MicroAd, Inc.'
2224
+ url: 'https://www.microad.co.jp/'
2225
+
2226
+ - regex: 'PingAdmin.Ru'
2227
+ name: 'PingAdmin.Ru'
2228
+ category: 'Site Monitor'
2229
+ url: 'https://ping-admin.ru/'
2230
+
2231
+ - regex: 'notifyninja.+monitoring'
2232
+ name: 'Notify Ninja'
2233
+ category: 'Site Monitor'
2234
+ url: 'http://notifyninja.com'
2235
+
2236
+ - regex: 'WebDataStats'
2237
+ name: 'WebDataStats'
2238
+ category: 'Crawler'
2239
+ url: 'https://webdatastats.com/policy.html'
2240
+ producer:
2241
+ name: 'WebTehRazrabotka LLC'
2242
+ url: 'https://webdatastats.com/'
2243
+
2244
+ - regex: 'parse.ly scraper'
2245
+ name: 'parse.ly'
2246
+ category: 'Crawler'
2247
+ url: 'https://www.parse.ly/help/integration/crawler'
2248
+ producer:
2249
+ name: 'Parsely, Inc.'
2250
+ url: 'https://www.parse.ly/'
2251
+
2252
+ - regex: 'Nimbostratus-Bot'
2253
+ name: 'Nimbostratus Bot'
2254
+ category: 'Site Monitor'
2255
+ url: 'http://cloudsystemnetworks.com'
2256
+
2257
+ - regex: 'HeartRails_Capture/\d'
2258
+ name: 'Heart Rails Capture'
2259
+ category: 'Service Agent'
2260
+ url: 'http://capture.heartrails.com'
2261
+
2262
+ - regex: 'Project-Resonance'
2263
+ name: 'Project Resonance'
2264
+ category: 'Crawler'
2265
+ url: 'http://project-resonance.com'
2266
+
2267
+ - regex: 'DataXu/\d'
2268
+ name: 'DataXu'
2269
+ category: 'Service Agent'
2270
+ url: 'https://advertising.roku.com/dataxu'
2271
+ producer:
2272
+ name: 'Roku, Inc.'
2273
+ url: 'https://roku.com'
2274
+
2275
+ - regex: 'Cocolyzebot'
2276
+ name: 'Cocolyzebot'
2277
+ category: 'Crawler'
2278
+ url: 'https://cocolyze.com/en/cocolyzebot'
2279
+ producer:
2280
+ name: 'VSI INNOVATION SAS'
2281
+ url: 'https://vsi-innovation.com/'
2282
+
2283
+ - regex: 'veryhip'
2284
+ name: 'VeryHip'
2285
+ category: 'Crawler'
2286
+ url: 'https://veryhip.com/'
2287
+ producer:
2288
+ name: 'VeryHip'
2289
+ url: 'https://veryhip.com/'
2290
+
2291
+ - regex: 'LinkpadBot'
2292
+ name: 'LinkpadBot'
2293
+ category: 'Crawler'
2294
+ url: 'https://www.linkpad.org/'
2295
+ producer:
2296
+ name: 'Solomono LLC'
2297
+ url: 'https://www.linkpad.org/'
2298
+
2299
+ - regex: 'MuscatFerret'
2300
+ name: 'MuscatFerret'
2301
+ category: 'Crawler'
2302
+ url: 'http://www.webtop.com/'
2303
+
2304
+ - regex: 'PageThing.com'
2305
+ name: 'PageThing'
2306
+ category: 'Crawler'
2307
+ url: 'https://www.pagething.com/'
2308
+ producer:
2309
+ name: 'SPECIALNOISE LTD'
2310
+ url: 'https://www.specialnoise.com/'
2311
+
2312
+ - regex: 'ArchiveBox'
2313
+ name: 'ArchiveBox'
2314
+ url: 'https://archivebox.io/'
2315
+ category: 'Crawler'
2316
+ producer:
2317
+ name: ''
2318
+ url: ''
2319
+
2320
+ - regex: 'Choosito'
2321
+ name: 'Choosito'
2322
+ url: 'https://www.choosito.com/'
2323
+ category: 'Crawler'
2324
+ producer:
2325
+ name: 'Choosito! Inc.'
2326
+ url: 'https://www.choosito.com/'
2327
+
2328
+ - regex: 'datagnionbot'
2329
+ name: 'datagnionbot'
2330
+ url: 'https://www.datagnion.com/bot.html'
2331
+ category: 'Crawler'
2332
+ producer:
2333
+ name: 'DATAGNION GMBH'
2334
+ url: 'https://www.datagnion.com/'
2335
+
2336
+ - regex: 'WhatCMS'
2337
+ name: 'WhatCMS'
2338
+ url: 'https://whatcms.org/'
2339
+ category: 'Crawler'
2340
+ producer:
2341
+ name: 'Nineteen Ten LLC'
2342
+ url: 'https://whatcms.org/'
2343
+
2344
+ - regex: 'httpx'
2345
+ name: 'httpx'
2346
+ url: 'https://github.com/projectdiscovery/httpx'
2347
+ category: 'Crawler'
2348
+ producer:
2349
+ name: ''
2350
+ url: ''
2351
+
2352
+ - regex: 'scaninfo@expanseinc.com'
2353
+ name: 'Expanse'
2354
+ category: 'Security Checker'
2355
+ url: 'https://expanse.co/'
2356
+ producer:
2357
+ name: 'Expanse Inc.'
2358
+ url: 'https://expanse.co/'
2359
+
2360
+ - regex: 'HuaweiWebCatBot'
2361
+ name: 'HuaweiWebCatBot'
2362
+ category: 'Crawler'
2363
+ url: 'https://isecurity.huawei.com'
2364
+ producer:
2365
+ name: 'Huawei Technologies Co., Ltd.'
2366
+ url: 'https://huawei.com'
2367
+
2368
+ - regex: 'Hatena-Favicon'
2369
+ name: 'Hatena Favicon'
2370
+ category: 'Crawler'
2371
+ url: 'https://www.hatena.ne.jp/faq/'
2372
+ producer:
2373
+ name: 'Hatena Co., Ltd.'
2374
+ url: 'https://www.hatena.ne.jp'
2375
+
2376
+ - regex: 'RyowlEngine/(\d+)'
2377
+ name: 'Ryowl'
2378
+ category: 'Crawler'
2379
+ url: 'https://ryowl.org'
2380
+
2381
+ - regex: 'OdklBot/(\d+)'
2382
+ name: 'Odnoklassniki Bot'
2383
+ category: 'Crawler'
2384
+ url: 'https://odnoklassniki.ru'
2385
+
2386
+ - regex: 'Mediatoolkitbot'
2387
+ name: 'Mediatoolkit Bot'
2388
+ category: 'Crawler'
2389
+ url: 'https://mediatoolkit.com'
2390
+
2391
+ - regex: 'ZoominfoBot'
2392
+ name: 'ZoominfoBot'
2393
+ category: 'Crawler'
2394
+ url: 'https://www.zoominfo.com'
2395
+
2396
+ - regex: 'WeViKaBot/([\d+\.])'
2397
+ name: 'WeViKaBot'
2398
+ category: 'Crawler'
2399
+ url: 'http://www.wevika.de'
2400
+
2401
+ - regex: 'SEOkicks'
2402
+ name: 'SEOkicks'
2403
+ category: 'Crawler'
2404
+ url: 'https://www.seokicks.de/robot.html'
2405
+
2406
+ - regex: 'Plukkie/([\d+\.])'
2407
+ name: 'Plukkie'
2408
+ category: 'Crawler'
2409
+ url: 'http://www.botje.com/plukkie.htm'
2410
+
2411
+ - regex: 'proximic;'
2412
+ name: 'Comscore'
2413
+ category: 'Crawler'
2414
+ url: 'https://www.comscore.com/Web-Crawler'
2415
+
2416
+ - regex: 'SurdotlyBot/([\d+\.])'
2417
+ name: 'SurdotlyBot'
2418
+ category: 'Crawler'
2419
+ url: 'http://sur.ly/bot.html'
2420
+
2421
+ - regex: 'Gowikibot/([\d+\.])'
2422
+ name: 'Gowikibot'
2423
+ category: 'Crawler'
2424
+ url: 'http:/www.gowikibot.com'
2425
+
2426
+ - regex: 'SabsimBot/([\d+\.])'
2427
+ name: 'SabsimBot'
2428
+ category: 'Crawler'
2429
+ url: 'https://sabsim.com'
2430
+
2431
+ - regex: 'LumtelBot/([\d+\.])'
2432
+ name: 'LumtelBot'
2433
+ category: 'Crawler'
2434
+ url: 'https://umtel.com'
2435
+
2436
+ - regex: 'PiplBot'
2437
+ name: 'PiplBot'
2438
+ category: 'Crawler'
2439
+ url: 'http://www.pipl.com/bot'
2440
+
2441
+ - regex: 'woobot/([\d+\.])'
2442
+ name: 'WooRank'
2443
+ category: 'Crawler'
2444
+ url: 'https://www.woorank.com/bot'
2445
+
2446
+ - regex: 'Cookiebot/([\d+\.])'
2447
+ name: 'Cookiebot'
2448
+ category: 'Crawler'
2449
+ url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
2450
+ producer:
2451
+ name: 'Cybot A/S'
2452
+ url: 'https://www.cybot.com/'
2453
+
2454
+ - regex: 'NetSystemsResearch'
2455
+ name: 'NetSystemsResearch'
2456
+ category: 'Security Checker'
2457
+ url: 'https://www.netsystemsresearch.com/'
2458
+ producer:
2459
+ name: 'NET SYSTEMS RESEARCH LLC'
2460
+ url: 'https://www.netsystemsresearch.com/'
2461
+
2462
+ - regex: 'CensysInspect/([\d+\.])'
2463
+ name: 'CensysInspect'
2464
+ category: 'Security Checker'
2465
+ url: 'https://about.censys.io/'
2466
+ producer:
2467
+ name: 'Censys, Inc.'
2468
+ url: 'https://censys.io/'
2469
+
2470
+ - regex: 'gdnplus.com'
2471
+ name: 'GDNP'
2472
+ category: 'Crawler'
2473
+ url: 'https://gdnplus.com/'
2474
+ producer:
2475
+ name: 'Global Digital Network Plus, LLC'
2476
+ url: 'https://gdnplus.com/'
2477
+
2478
+ - regex: 'WellKnownBot/([\d+\.])'
2479
+ name: 'WellKnownBot'
2480
+ category: 'Crawler'
2481
+ url: 'https://well-known.dev'
2482
+
2483
+ - regex: 'Adsbot/([\d+\.])'
2484
+ name: 'Adsbot'
2485
+ category: 'Crawler'
2486
+ url: 'https://seostar.co/robot/'
2487
+
2488
+ - regex: 'MTRobot/([\d+\.])'
2489
+ name: 'MTRobot'
2490
+ category: 'Crawler'
2491
+ url: 'https://metrics-tools.de/robot.html'
2492
+ producer:
2493
+ name: 'Metrics Tools'
2494
+ url: 'https://metrics-tools.de/'
2495
+
2496
+ - regex: 'serpstatbot/([\d+\.])'
2497
+ name: 'serpstatbot'
2498
+ category: 'Crawler'
2499
+ url: 'http://serpstatbot.com/'
2500
+ producer:
2501
+ name: 'Netpeak Ltd'
2502
+ url: 'https://netpeak.net/'
2503
+
2504
+ - regex: 'colly'
2505
+ name: 'colly'
2506
+ category: 'Crawler'
2507
+ url: 'https://github.com/gocolly/colly/'
2508
+
2509
+ - regex: 'l9tcpid/v([\d+\.])'
2510
+ name: 'l9tcpid'
2511
+ category: 'Security Checker'
2512
+ url: 'https://github.com/LeakIX/l9tcpid'
2513
+
2514
+ - regex: 'MegaIndex.ru/([\d+\.])'
2515
+ name: 'MegaIndex'
2516
+ category: 'Crawler'
2517
+ url: 'https://megaindex.com/crawler'
2518
+
2519
+ - regex: 'Seekport'
2520
+ name: 'Seekport'
2521
+ category: 'Crawler'
2522
+ url: 'http://www.seekport.com/'
2523
+ producer:
2524
+ name: 'SISTRIX GmbH'
2525
+ url: 'https://www.sistrix.de/'
2526
+
2527
+ - regex: 'seolyt/([\d+\.])'
2528
+ name: 'seolyt'
2529
+ category: 'Crawler'
2530
+ url: 'https://seolyt.com/'
2531
+
2532
+ - regex: 'YaK/([\d+\.])'
2533
+ name: 'YaK'
2534
+ category: 'Crawler'
2535
+ url: 'https://www.linkfluence.com/'
2536
+ producer:
2537
+ name: 'Linkfluence SAS'
2538
+ url: 'https://www.linkfluence.com/'
2539
+
2540
+ - regex: 'KomodiaBot/([\d+\.])'
2541
+ name: 'KomodiaBot'
2542
+ category: 'Crawler'
2543
+ url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
2544
+ producer:
2545
+ name: 'Komodia Inc.'
2546
+ url: 'https://www.komodia.com/'
2547
+
2548
+ - regex: 'Neevabot/([\d+\.])'
2549
+ name: 'Neevabot'
2550
+ category: 'Search bot'
2551
+ url: 'https://neeva.com/neevabot'
2552
+ producer:
2553
+ name: 'Neeva Inc.'
2554
+ url: 'https://neeva.com/'
2555
+
2556
+ - regex: 'LinkPreview/([\d+\.])'
2557
+ name: 'LinkPreview'
2558
+ category: 'Service Agent'
2559
+ url: 'https://www.linkpreview.net/'
2560
+
2561
+ - regex: 'JungleKeyThumbnail/([\d+\.])'
2562
+ name: 'JungleKeyThumbnail'
2563
+ category: 'Crawler'
2564
+ url: 'https://junglekey.com/'
2565
+
2566
+ - regex: 'rocketmonitor(?: |bot/)([\d+\.])'
2567
+ name: 'RocketMonitorBot'
2568
+ category: 'Site Monitor'
2569
+ url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
2570
+ producer:
2571
+ name: 'Radio Mast, Inc.'
2572
+ url: 'https://www.radiomast.io/'
2573
+
2574
+ - regex: 'SitemapParser-VIPnytt/([\d+\.])'
2575
+ name: 'SitemapParser-VIPnytt'
2576
+ category: 'Crawler'
2577
+ url: 'https://github.com/VIPnytt/SitemapParser/'
2578
+
2579
+
2580
+ - regex: '^Turnitin'
2581
+ name: 'Turnitin'
2582
+ category: 'Crawler'
2583
+ url: 'https://turnitin.com/robot/crawlerinfo.html'
2584
+
2585
+ - regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
2586
+ name: 'Dotcom Monitor'
2587
+ category: 'Site Monitor'
2588
+ url: 'https://www.dotcom-monitor.com'
2589
+
2590
+ - regex: 'ThinkChaos/'
2591
+ name: 'ThinkChaos'
2592
+ category: 'Crawler'
2593
+
2594
+ - regex: 'DataForSeoBot'
2595
+ name: 'DataForSeoBot'
2596
+ category: 'Crawler'
2597
+ url: 'https://dataforseo.com/dataforseo-bot'
2598
+
2599
+ # Generic detections
1750
2600
  - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
1751
2601
  name: 'Generic Bot'