device_detector 0.9.1 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +49 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +5 -9
  5. data/CHANGELOG.md +16 -3
  6. data/README.md +7 -9
  7. data/Rakefile +19 -13
  8. data/device_detector.gemspec +1 -0
  9. data/lib/device_detector.rb +32 -28
  10. data/lib/device_detector/bot.rb +2 -2
  11. data/lib/device_detector/client.rb +3 -2
  12. data/lib/device_detector/device.rb +44 -21
  13. data/lib/device_detector/memory_cache.rb +26 -19
  14. data/lib/device_detector/metadata_extractor.rb +7 -8
  15. data/lib/device_detector/model_extractor.rb +3 -3
  16. data/lib/device_detector/name_extractor.rb +2 -2
  17. data/lib/device_detector/os.rb +121 -111
  18. data/lib/device_detector/parser.rb +22 -9
  19. data/lib/device_detector/version.rb +3 -1
  20. data/lib/device_detector/version_extractor.rb +2 -3
  21. data/regexes/bots.yml +840 -20
  22. data/regexes/client/browser_engine.yml +11 -2
  23. data/regexes/client/browsers.yml +909 -108
  24. data/regexes/client/feed_readers.yml +38 -2
  25. data/regexes/client/libraries.yml +76 -2
  26. data/regexes/client/mediaplayers.yml +25 -5
  27. data/regexes/client/mobile_apps.yml +167 -2
  28. data/regexes/client/pim.yml +10 -1
  29. data/regexes/device/cameras.yml +1 -1
  30. data/regexes/device/car_browsers.yml +7 -3
  31. data/regexes/device/consoles.yml +3 -3
  32. data/regexes/device/mobiles.yml +10123 -465
  33. data/regexes/device/portable_media_player.yml +4 -6
  34. data/regexes/device/televisions.yml +18 -4
  35. data/regexes/oss.yml +115 -21
  36. data/regexes/vendorfragments.yml +6 -2
  37. data/spec/device_detector/concrete_user_agent_spec.rb +16 -17
  38. data/spec/device_detector/detector_fixtures_spec.rb +51 -11
  39. data/spec/device_detector/device_spec.rb +28 -48
  40. data/spec/device_detector/memory_cache_spec.rb +60 -28
  41. data/spec/device_detector/model_extractor_spec.rb +3 -3
  42. data/spec/device_detector/version_extractor_spec.rb +5 -6
  43. data/spec/device_detector_spec.rb +60 -69
  44. data/spec/fixtures/client/browser.yml +1785 -262
  45. data/spec/fixtures/client/feed_reader.yml +47 -35
  46. data/spec/fixtures/client/library.yml +112 -3
  47. data/spec/fixtures/client/mediaplayer.yml +32 -37
  48. data/spec/fixtures/client/mobile_app.yml +193 -6
  49. data/spec/fixtures/client/pim.yml +37 -18
  50. data/spec/fixtures/detector/bots.yml +1426 -118
  51. data/spec/fixtures/detector/camera.yml +36 -10
  52. data/spec/fixtures/detector/car_browser.yml +64 -3
  53. data/spec/fixtures/detector/console.yml +80 -26
  54. data/spec/fixtures/detector/desktop.yml +2222 -1589
  55. data/spec/fixtures/detector/feature_phone.yml +151 -42
  56. data/spec/fixtures/detector/feed_reader.yml +186 -121
  57. data/spec/fixtures/detector/mediaplayer.yml +113 -39
  58. data/spec/fixtures/detector/mobile_apps.yml +366 -21
  59. data/spec/fixtures/detector/phablet.yml +2597 -570
  60. data/spec/fixtures/detector/portable_media_player.yml +41 -16
  61. data/spec/fixtures/detector/smart_display.yml +8 -5
  62. data/spec/fixtures/detector/smart_speaker.yml +55 -0
  63. data/spec/fixtures/detector/smartphone-1.yml +5468 -5010
  64. data/spec/fixtures/detector/smartphone-10.yml +9977 -0
  65. data/spec/fixtures/detector/smartphone-11.yml +9891 -0
  66. data/spec/fixtures/detector/smartphone-12.yml +9906 -0
  67. data/spec/fixtures/detector/smartphone-13.yml +9920 -0
  68. data/spec/fixtures/detector/smartphone-14.yml +2662 -0
  69. data/spec/fixtures/detector/smartphone-2.yml +5213 -4635
  70. data/spec/fixtures/detector/smartphone-3.yml +5082 -4533
  71. data/spec/fixtures/detector/smartphone-4.yml +6806 -2625
  72. data/spec/fixtures/detector/smartphone-5.yml +9914 -0
  73. data/spec/fixtures/detector/smartphone-6.yml +9962 -0
  74. data/spec/fixtures/detector/smartphone-7.yml +9899 -0
  75. data/spec/fixtures/detector/smartphone-8.yml +9931 -0
  76. data/spec/fixtures/detector/smartphone-9.yml +9899 -0
  77. data/spec/fixtures/detector/smartphone.yml +5225 -4652
  78. data/spec/fixtures/detector/tablet-1.yml +4691 -4191
  79. data/spec/fixtures/detector/tablet-2.yml +9800 -71
  80. data/spec/fixtures/detector/tablet-3.yml +9959 -0
  81. data/spec/fixtures/detector/tablet-4.yml +4528 -0
  82. data/spec/fixtures/detector/tablet.yml +4664 -4177
  83. data/spec/fixtures/detector/tv.yml +3399 -1048
  84. data/spec/fixtures/detector/unknown.yml +1017 -977
  85. data/spec/fixtures/detector/wearable.yml +61 -0
  86. data/spec/fixtures/device/camera.yml +4 -3
  87. data/spec/fixtures/device/car_browser.yml +9 -2
  88. data/spec/fixtures/device/console.yml +15 -14
  89. data/spec/fixtures/parser/oss.yml +284 -2
  90. data/spec/fixtures/parser/vendorfragments.yml +8 -2
  91. metadata +50 -7
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
- VERSION = '0.9.1'
4
+ VERSION = '1.0.4'
3
5
  end
@@ -1,12 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class VersionExtractor < MetadataExtractor
3
-
4
5
  private
5
6
 
6
7
  def metadata_string
7
8
  String(regex_meta[:version])
8
9
  end
9
-
10
10
  end
11
11
  end
12
-
@@ -1,7 +1,7 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
@@ -53,6 +53,21 @@
53
53
  name: 'Alexa Internet'
54
54
  url: 'http://www.alexa.com'
55
55
 
56
+ - regex: 'alexa site audit'
57
+ name: 'Alexa Site Audit'
58
+ category: 'Site Monitor'
59
+ url: 'http://www.alexa.com/help/webmasters'
60
+ producer:
61
+ name: 'Alexa Internet'
62
+ url: 'http://www.alexa.com'
63
+
64
+ - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
65
+ name: 'Amazon Route53 Health Check'
66
+ category: 'Service Agent'
67
+ producer:
68
+ name: 'Amazon Web Services'
69
+ url: 'https://aws.amazon.com/'
70
+
56
71
  - regex: 'AmorankSpider'
57
72
  name: 'Amorank Spider'
58
73
  category: 'Crawler'
@@ -61,6 +76,14 @@
61
76
  name: 'Amorank'
62
77
  url: 'http://www.amorank.com'
63
78
 
79
+ - regex: 'ApacheBench'
80
+ name: 'ApacheBench'
81
+ category: 'Benchmark'
82
+ url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
83
+ producer:
84
+ name: 'The Apache Software Foundation'
85
+ url: 'http://www.apache.org/foundation/'
86
+
64
87
  - regex: 'Applebot'
65
88
  name: 'Applebot'
66
89
  category: 'Crawler'
@@ -69,6 +92,30 @@
69
92
  name: 'Apple Inc'
70
93
  url: 'http://www.apple.com'
71
94
 
95
+ - regex: 'Arachni'
96
+ name: 'Arachni'
97
+ category: 'Security Checker'
98
+ url: 'http://www.arachni-scanner.com'
99
+ producer:
100
+ name: 'Sarosys LLC'
101
+ url: 'http://www.sarosys.com/'
102
+
103
+ - regex: 'AspiegelBot'
104
+ name: 'AspiegelBot'
105
+ category: 'Crawler'
106
+ url: 'https://aspiegel.com/'
107
+ producer:
108
+ name: 'Huawei'
109
+ url: 'https://www.huawei.com/'
110
+
111
+ - regex: 'Castro 2, Episode Duration Lookup'
112
+ name: 'Castro 2'
113
+ category: 'Service Agent'
114
+ url: 'http://supertop.co/castro/'
115
+ producer:
116
+ name: 'Supertop'
117
+ url: 'http://supertop.co'
118
+
72
119
  - regex: 'Curious George'
73
120
  name: 'Analytics SEO Crawler'
74
121
  category: 'Crawler'
@@ -93,8 +140,8 @@
93
140
  name: 'Ask Jeeves Inc.'
94
141
  url: 'http://www.ask.com'
95
142
 
96
- - regex: 'Backlink-Ceck\.de'
97
- name: 'Backlink-Ceck.de'
143
+ - regex: 'Backlink-Check\.de'
144
+ name: 'Backlink-Check.de'
98
145
  category: 'Crawler'
99
146
  url: 'http://www.backlink-check.de/bot.html'
100
147
  producer:
@@ -165,6 +212,13 @@
165
212
  name: 'Blogtrottr Ltd'
166
213
  url: 'https://blogtrottr.com/'
167
214
 
215
+ - regex: 'BoardReader Blog Indexer'
216
+ name: 'BoardReader Blog Indexer'
217
+ category: 'Crawler'
218
+ producer:
219
+ name: 'BoardReader'
220
+ url: 'http://boardreader.com/'
221
+
168
222
  - regex: 'BountiiBot'
169
223
  name: 'Bountii Bot'
170
224
  category: 'Search bot'
@@ -186,8 +240,8 @@
186
240
  category: 'Crawler'
187
241
  url: 'http://law.di.unimi.it/BUbiNG.html'
188
242
  producer:
189
- name: ''
190
- url: ''
243
+ name: 'The Laboratory for Web Algorithmics (LAW)'
244
+ url: 'http://law.di.unimi.it/software.php#buging'
191
245
 
192
246
  - regex: '(?<!HTC)[ _]Butterfly/'
193
247
  name: 'Butterfly Robot'
@@ -221,6 +275,14 @@
221
275
  name: '10betterpages GmbH'
222
276
  url: 'http://cliqz.com'
223
277
 
278
+ - regex: 'Cloudflare-AMP'
279
+ name: 'CloudFlare AMP Fetcher'
280
+ category: 'Crawler'
281
+ url: 'https://amp.cloudflare.com/doc/fetcher.html'
282
+ producer:
283
+ name: 'CloudFlare'
284
+ url: 'http://www.cloudflare.com'
285
+
224
286
  - regex: 'CloudFlare-AlwaysOnline'
225
287
  name: 'CloudFlare Always Online'
226
288
  category: 'Site Monitor'
@@ -229,13 +291,21 @@
229
291
  name: 'CloudFlare'
230
292
  url: 'http://www.cloudflare.com'
231
293
 
232
- - regex: 'coccoc/'
294
+ - regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
233
295
  name: 'Cốc Cốc Bot'
234
- url: 'http://help.coccoc.com/'
296
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
235
297
  category: 'Search bot'
236
298
  producer:
237
299
  name: 'Cốc Cốc'
238
- url: 'http://coccoc.com/'
300
+ url: 'https://coccoc.com/'
301
+
302
+ - regex: 'collectd'
303
+ name: 'Collectd'
304
+ url: 'https://collectd.org/'
305
+ category: 'Site Monitor'
306
+ producer:
307
+ name: 'Collectd'
308
+ url: 'https://collectd.org/'
239
309
 
240
310
  - regex: 'CommaFeed'
241
311
  name: 'CommaFeed'
@@ -245,6 +315,39 @@
245
315
  name: ''
246
316
  url: ''
247
317
 
318
+ - regex: 'CSS Certificate Spider'
319
+ name: 'CSS Certificate Spider'
320
+ category: 'Crawler'
321
+ url: 'http://www.css-security.com/certificatespider/'
322
+ producer:
323
+ name: 'Certified Security Solutions'
324
+ url: 'https://www.css-security.com/company/about-us/'
325
+
326
+ - regex: 'Datadog Agent'
327
+ name: 'Datadog Agent'
328
+ url: 'https://github.com/DataDog/dd-agent'
329
+ category: 'Site Monitor'
330
+ producer:
331
+ name: 'Datadog'
332
+ url: 'https://www.datadoghq.com/'
333
+
334
+ - regex: 'Datanyze'
335
+ name: 'Datanyze'
336
+ url: ''
337
+ category: 'Crawler'
338
+ producer:
339
+ name: 'Datanyze'
340
+ url: 'https://www.datanyze.com'
341
+
342
+
343
+ - regex: 'Dataprovider'
344
+ name: 'Dataprovider'
345
+ category: 'Crawler'
346
+ url: ''
347
+ producer:
348
+ name: 'Dataprovider B.V.'
349
+ url: 'https://www.dataprovider.com/'
350
+
248
351
  - regex: 'Daum(oa)?[ /][0-9]'
249
352
  name: 'Daum'
250
353
  category: 'Search bot'
@@ -285,6 +388,14 @@
285
388
  name: 'SEOmoz, Inc.'
286
389
  url: 'http://moz.com/'
287
390
 
391
+ - regex: 'DuckDuck(?:Go-Favicons-)?Bot'
392
+ name: 'DuckDuckGo Bot'
393
+ category: 'Search bot'
394
+ url: 'https://duckduckgo.com/duckduckbot'
395
+ producer:
396
+ name: 'DuckDuckGo'
397
+ url: 'https://duckduckgo.com/'
398
+
288
399
  - regex: 'EasouSpider'
289
400
  name: 'Easou Spider'
290
401
  category: 'Search bot'
@@ -293,6 +404,13 @@
293
404
  name: 'easou ICP'
294
405
  url: 'http://www.easou.com'
295
406
 
407
+ - regex: 'eCairn-Grabber'
408
+ name: 'eCairn-Grabber'
409
+ category: 'Crawler'
410
+ producer:
411
+ name: 'eCairn'
412
+ url: 'https://ecairn.com'
413
+
296
414
  - regex: 'EMail Exractor'
297
415
  name: 'EMail Exractor'
298
416
  category: 'Crawler'
@@ -301,6 +419,14 @@
301
419
  name: ''
302
420
  url: ''
303
421
 
422
+ - regex: 'evc-batch'
423
+ name: 'evc-batch'
424
+ category: 'Crawler'
425
+ url: ''
426
+ producer:
427
+ name: 'eVenture Capital Partners II, LLC'
428
+ url: 'http://www.eventures.vc/'
429
+
304
430
  - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
305
431
  name: 'ExaBot'
306
432
  category: 'Crawler'
@@ -325,7 +451,7 @@
325
451
  name: 'SEOmoz, Inc.'
326
452
  url: 'http://moz.com/'
327
453
 
328
- - regex: 'facebookexternalhit|facebookplatform'
454
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
329
455
  name: 'Facebook External Hit'
330
456
  category: 'Social Media Agent'
331
457
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -381,6 +507,24 @@
381
507
  name: ''
382
508
  url: ''
383
509
 
510
+ - regex: 'FlipboardProxy|FlipboardRSS'
511
+ name: 'Flipboard'
512
+ url: 'http://flipboard.com/browserproxy'
513
+ category: 'Feed Fetcher'
514
+ producer:
515
+ name: 'Flipboard'
516
+ url: 'http://flipboard.com/'
517
+
518
+ - regex: 'Findxbot'
519
+ name: 'Findxbot'
520
+ category: 'Crawler'
521
+ url: 'http://www.findxbot.com'
522
+
523
+ - regex: 'FreshRSS'
524
+ name: 'FreshRSS'
525
+ category: 'Feed Fetcher'
526
+ url: 'https://freshrss.org/'
527
+
384
528
  - regex: 'Genieo'
385
529
  name: 'Genieo Web filter'
386
530
  category: ''
@@ -389,6 +533,14 @@
389
533
  name: 'Genieo'
390
534
  url: 'http://www.genieo.com'
391
535
 
536
+ - regex: 'GigablastOpenSource'
537
+ name: 'Gigablast'
538
+ category: 'Search bot'
539
+ url: 'https://github.com/gigablast/open-source-search-engine'
540
+ producer:
541
+ name: 'Matt Wells'
542
+ url: 'http://www.gigablast.com/faq.html'
543
+
392
544
  - regex: 'Gluten Free Crawler'
393
545
  name: 'Gluten Free Crawler'
394
546
  category: 'Crawler'
@@ -405,6 +557,18 @@
405
557
  name: 'NTT Resonant'
406
558
  url: 'http://goo.ne.jp'
407
559
 
560
+ - regex: 'Google Favicon'
561
+ name: 'Google Favicon'
562
+ category: 'Crawler'
563
+
564
+ - regex: 'Google Search Console'
565
+ name: 'Google Search Console'
566
+ category: 'Crawler'
567
+ url: 'https://search.google.com/search-console/about'
568
+ producer:
569
+ name: 'Google Inc.'
570
+ url: 'http://www.google.com'
571
+
408
572
  - regex: 'Google Page Speed Insights'
409
573
  name: 'Google PageSpeed Insights'
410
574
  category: 'Site Monitor'
@@ -421,6 +585,30 @@
421
585
  name: 'Google Inc.'
422
586
  url: 'http://www.google.com'
423
587
 
588
+ - regex: 'Google-Cloud-Scheduler'
589
+ name: 'Google Cloud Scheduler'
590
+ category: 'Crawler'
591
+ url: 'https://cloud.google.com/scheduler'
592
+ producer:
593
+ name: 'Google Inc.'
594
+ url: 'https://www.google.com'
595
+
596
+ - regex: 'Google-Structured-Data-Testing-Tool'
597
+ name: 'Google Structured Data Testing Tool'
598
+ category: 'Validator'
599
+ url: 'https://search.google.com/structured-data/testing-tool'
600
+ producer:
601
+ name: 'Google Inc.'
602
+ url: 'http://www.google.com'
603
+
604
+ - regex: 'GoogleStackdriverMonitoring'
605
+ name: 'Google Stackdriver Monitoring'
606
+ category: 'Site Monitor'
607
+ url: 'https://cloud.google.com/monitoring'
608
+ producer:
609
+ name: 'Google Inc.'
610
+ url: 'https://www.google.com'
611
+
424
612
  - regex: 'via ggpht\.com GoogleImageProxy'
425
613
  name: 'Gmail Image Proxy'
426
614
  category: 'Crawler'
@@ -429,7 +617,39 @@
429
617
  name: 'Google Inc.'
430
618
  url: 'http://www.google.com'
431
619
 
432
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin'
620
+ - regex: 'SeznamEmailProxy'
621
+ name: 'Seznam Email Proxy'
622
+ category: 'Crawler'
623
+ url: ''
624
+ producer:
625
+ name: 'Seznam.cz, a.s.'
626
+ url: 'http://www.seznam.cz/'
627
+
628
+ - regex: 'Seznam-Zbozi-robot'
629
+ name: 'Seznam Zbozi.cz'
630
+ category: 'Crawler'
631
+ url: ''
632
+ producer:
633
+ name: 'Seznam.cz, a.s.'
634
+ url: 'https://www.zbozi.cz/'
635
+
636
+ - regex: 'Heurekabot-Feed'
637
+ name: 'Heureka Feed'
638
+ category: 'Crawler'
639
+ url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
640
+ producer:
641
+ name: 'Heureka.cz, a.s.'
642
+ url: 'https://www.heureka.cz/'
643
+
644
+ - regex: 'ShopAlike'
645
+ name: 'ShopAlike'
646
+ category: 'Crawler'
647
+ url: ''
648
+ producer:
649
+ name: 'Visual Meta'
650
+ url: 'https://www.shopalike.cz/'
651
+
652
+ - regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
433
653
  name: 'Googlebot'
434
654
  category: 'Search bot'
435
655
  url: 'http://www.google.com/bot.html'
@@ -445,6 +665,14 @@
445
665
  name: 'The Internet Archive'
446
666
  url: 'http://www.archive.org'
447
667
 
668
+ - regex: 'HubSpot '
669
+ name: 'HubSpot'
670
+ category: 'Crawler'
671
+ producer:
672
+ name: 'HubSpot Inc.'
673
+ url: 'https://www.hubspot.com'
674
+
675
+
448
676
  - regex: 'HTTPMon'
449
677
  name: 'HTTPMon'
450
678
  category: 'Site Monitor'
@@ -461,6 +689,11 @@
461
689
  name: ''
462
690
  url: ''
463
691
 
692
+ - regex: 'inoreader.com'
693
+ name: 'inoreader'
694
+ category: 'Feed Reader'
695
+ url: 'https://www.inoreader.com'
696
+
464
697
  - regex: 'iisbot'
465
698
  name: 'IIS Site Analysis'
466
699
  category: 'Crawler'
@@ -469,6 +702,21 @@
469
702
  name: 'Microsoft Corporation'
470
703
  url: 'http://www.microsoft.com'
471
704
 
705
+ - regex: 'ips-agent'
706
+ name: 'IPS Agent'
707
+ category: 'crawler'
708
+ producer:
709
+ name: 'VeriSign, Inc'
710
+ url: 'http://www.verisign.com/'
711
+
712
+ - regex: 'IP-Guide\.com'
713
+ name: 'IP-Guide Crawler'
714
+ category: 'Crawler'
715
+ url: ''
716
+ producer:
717
+ name: ''
718
+ url: 'https://ip-guide.com'
719
+
472
720
  - regex: 'kouio'
473
721
  name: 'Kouio'
474
722
  url: 'http://kouio.com/'
@@ -485,6 +733,14 @@
485
733
  name: ''
486
734
  url: ''
487
735
 
736
+ - regex: '([A-z0-9]*)-Lighthouse'
737
+ name: 'Lighthouse'
738
+ category: 'Site Monitor'
739
+ url: 'https://developers.google.com/web/tools/lighthouse'
740
+ producer:
741
+ name: 'Lighthouse'
742
+ url: 'https://developers.google.com/web/tools/lighthouse'
743
+
488
744
  - regex: 'linkdexbot(-mobile)?|linkdex\.com'
489
745
  name: 'Linkdex Bot'
490
746
  category: 'Search bot'
@@ -532,6 +788,18 @@
532
788
  name: ''
533
789
  url: ''
534
790
 
791
+ - regex : 'masscan'
792
+ name: 'masscan'
793
+ url: 'https://github.com/robertdavidgraham/masscan'
794
+ category: 'Crawler'
795
+ producer:
796
+ name: 'Robert Graham'
797
+ url: 'https://github.com/robertdavidgraham'
798
+
799
+ - regex: 'Mastodon/'
800
+ name: 'Mastodon Bot'
801
+ category: 'Social Media Agent'
802
+
535
803
  - regex: 'meanpathbot'
536
804
  name: 'Meanpath Bot'
537
805
  category: 'Search bot'
@@ -540,6 +808,19 @@
540
808
  name: 'Meanpath'
541
809
  url: 'http://www.meanpath.com'
542
810
 
811
+ - regex: 'MetaJobBot'
812
+ name: 'MetaJobBot'
813
+ category: 'Crawler'
814
+ url: 'http://www.metajob.at/the/crawler'
815
+ producer:
816
+ name: 'MetaJob'
817
+ url: 'http://www.metajob.at'
818
+
819
+ - regex: 'MetaInspector'
820
+ name: 'MetaInspector'
821
+ category: 'Crawler'
822
+ url: 'https://github.com/jaimeiniesta/metainspector'
823
+
543
824
  - regex: 'MixrankBot'
544
825
  name: 'Mixrank Bot'
545
826
  category: 'Crawler'
@@ -556,6 +837,13 @@
556
837
  name: 'Majestic-12'
557
838
  url: 'http://majestic12.co.uk'
558
839
 
840
+ - regex: 'Mnogosearch'
841
+ name: 'Mnogosearch'
842
+ category: 'Search bot'
843
+ url: 'http://www.mnogosearch.org/'
844
+ producer:
845
+ name: 'Lavtech.Com Corp.'
846
+ url: ''
559
847
  - regex: 'MojeekBot'
560
848
  name: 'MojeekBot'
561
849
  category: 'Search bot'
@@ -564,6 +852,14 @@
564
852
  name: 'Mojeek Ltd.'
565
853
  url: 'http://www.mojeek.com'
566
854
 
855
+ - regex: 'munin'
856
+ name: 'Munin'
857
+ category: 'Site Monitor'
858
+ url: 'http://munin-monitoring.org/'
859
+ producer:
860
+ name: 'Munin'
861
+ url: 'http://munin-monitoring.org/'
862
+
567
863
  - regex: 'NalezenCzBot'
568
864
  name: 'NalezenCzBot'
569
865
  category: 'Crawler'
@@ -572,7 +868,19 @@
572
868
  name: 'Jaroslav Kuboš'
573
869
  url: ''
574
870
 
575
- - regex: 'Netcraft Web Server Survey'
871
+ - regex: 'check_http/v'
872
+ name: 'Nagios check_http'
873
+ category: 'Site Monitor'
874
+ url: 'https://nagios.org'
875
+ producer:
876
+ name: 'Nagios Plugins Development Team'
877
+ url: 'https://nagios.org'
878
+
879
+ - regex: 'nbertaupete95\(at\)gmail.com'
880
+ name: 'nbertaupete95'
881
+ category: 'Crawler'
882
+
883
+ - regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
576
884
  name: 'Netcraft Survey Bot'
577
885
  category: 'Search bot'
578
886
  url: ''
@@ -580,6 +888,14 @@
580
888
  name: 'Netcraft'
581
889
  url: 'http://www.netcraft.com'
582
890
 
891
+ - regex: 'netEstate NE Crawler'
892
+ name: 'netEstate'
893
+ category: 'Crawler'
894
+ url: 'http://www.website-datenbank.de/Impressum'
895
+ producer:
896
+ name: 'netEstate GmbH'
897
+ url: 'https://www.netestate.de/en/'
898
+
583
899
  - regex: 'Netvibes'
584
900
  name: 'Netvibes'
585
901
  url: 'http://www.netvibes.com/'
@@ -612,7 +928,25 @@
612
928
  name: 'Northern Light'
613
929
  url: 'http://northernlight.com'
614
930
 
615
- - regex: 'omgilibot'
931
+ - regex: 'Nmap Scripting Engine'
932
+ name: 'Nmap'
933
+ category: 'Security Checker'
934
+ url: 'https://nmap.org/book/nse.html'
935
+ producer:
936
+ name: 'Nmap'
937
+ url: 'https://nmap.org/'
938
+
939
+ - regex: 'Nuzzel'
940
+ name: 'Nuzzel'
941
+ category: 'Crawler'
942
+ producer:
943
+ name: 'Nuzzel'
944
+ url: https://www.nuzzel.com/
945
+
946
+ - regex: 'Octopus [0-9]'
947
+ name: 'Octopus'
948
+
949
+ - regex: 'omgili(?:bot)?'
616
950
  name: 'Omgili bot'
617
951
  category: 'Search bot'
618
952
  url: 'http://www.omgili.com/Crawler.html'
@@ -660,6 +994,11 @@
660
994
  name: 'Smallrivers SA'
661
995
  url: 'http://www.paper.li'
662
996
 
997
+ - regex: 'phantomas/'
998
+ name: 'Phantomas'
999
+ category: 'Site Monitor'
1000
+ url: 'https://github.com/macbre/phantomas'
1001
+
663
1002
  - regex: 'phpservermon'
664
1003
  name: 'PHP Server Monitor'
665
1004
  category: 'Site Monitor'
@@ -668,6 +1007,22 @@
668
1007
  name: 'PHP Server Monitor'
669
1008
  url: 'http://www.phpservermonitor.org/'
670
1009
 
1010
+ - regex: 'PocketParser'
1011
+ name: 'PocketParser'
1012
+ category: 'Read-it-later Service'
1013
+ url: 'https://getpocket.com/pocketparser_ua'
1014
+ producer:
1015
+ name: 'Pocket'
1016
+ url: 'https://getpocket.com/'
1017
+
1018
+ - regex: 'PritTorrent'
1019
+ name: 'PritTorrent'
1020
+ category: 'Crawler'
1021
+ url: 'https://github.com/astro/prittorrent'
1022
+ producer:
1023
+ name: 'Bitlove'
1024
+ url: 'http://bitlove.org/'
1025
+
671
1026
  - regex: 'psbot(-page)?'
672
1027
  name: 'Picsearch bot'
673
1028
  category: 'Search bot'
@@ -684,6 +1039,22 @@
684
1039
  name: 'Pingdom AB'
685
1040
  url: 'https://www.pingdom.com'
686
1041
 
1042
+ - regex: 'Quora Link Preview'
1043
+ name: 'Quora Link Preview'
1044
+ category: 'Crawler'
1045
+ url: ''
1046
+ producer:
1047
+ name: 'Quora'
1048
+ url: 'http://www.quora.com'
1049
+
1050
+ - regex: 'RamblerMail'
1051
+ name: 'RamblerMail Image Proxy'
1052
+ category: 'Crawler'
1053
+ url: ''
1054
+ producer:
1055
+ name: 'Rambler&Co'
1056
+ url: 'https://rambler-co.ru/'
1057
+
687
1058
  - regex: 'QuerySeekerSpider'
688
1059
  name: 'QuerySeekerSpider'
689
1060
  category: 'Crawler'
@@ -692,6 +1063,19 @@
692
1063
  name: 'QueryEye Inc.'
693
1064
  url: 'http://queryeye.com'
694
1065
 
1066
+ - regex: 'Qwantify'
1067
+ name: 'Qwantify'
1068
+ category: 'Crawler'
1069
+ url: 'https://www.qwant.com/'
1070
+ producer:
1071
+ name: 'Qwant Corporation'
1072
+ url: 'https://www.qwant.com/'
1073
+
1074
+ - regex: 'Rainmeter'
1075
+ name: 'Rainmeter'
1076
+ category: 'Crawler'
1077
+ url: 'https://www.rainmeter.net'
1078
+
695
1079
  - regex: 'redditbot'
696
1080
  name: 'Reddit Bot'
697
1081
  category: 'Social Media Agent'
@@ -700,6 +1084,14 @@
700
1084
  name: 'reddit inc.'
701
1085
  url: 'http://www.reddit.com'
702
1086
 
1087
+ - regex: 'Riddler'
1088
+ name: 'Riddler'
1089
+ category: 'Security search bot'
1090
+ url: 'https://riddler.io/about'
1091
+ producer:
1092
+ name: 'F-Secure'
1093
+ url: 'https://www.f-secure.com'
1094
+
703
1095
  - regex: 'rogerbot'
704
1096
  name: 'Rogerbot'
705
1097
  category: 'Crawler'
@@ -716,6 +1108,14 @@
716
1108
  name: 'Roihunter a.s.'
717
1109
  url: 'http://roihunter.com/'
718
1110
 
1111
+ - regex: 'SafeDNSBot'
1112
+ name: 'SafeDNSBot'
1113
+ category: 'Crawler'
1114
+ url: 'https://www.safedns.com/searchbot'
1115
+ producer:
1116
+ name: 'SafeDNS, Inc.'
1117
+ url: 'https://www.safedns.com/'
1118
+
719
1119
  - regex: 'Scrapy'
720
1120
  name: 'Scrapy'
721
1121
  category: 'Crawler'
@@ -761,6 +1161,19 @@
761
1161
  name: 'SEO Engine'
762
1162
  url: 'http://www.seoengine.com'
763
1163
 
1164
+ - regex: 'SEOkicks-Robot'
1165
+ name: 'SEOkicks-Robot'
1166
+ category: 'Crawler'
1167
+ url: 'http://www.seokicks.de/robot.html'
1168
+ producer:
1169
+ name: 'SEOkicks'
1170
+ url: 'https://www.seokicks.de/'
1171
+
1172
+ - regex: 'seoscanners\.net'
1173
+ name: 'Seoscanners.net'
1174
+ category: 'Crawler'
1175
+ url: ''
1176
+
764
1177
  - regex: 'SkypeUriPreview'
765
1178
  name: 'Skype URI Preview'
766
1179
  category: 'Service Agent'
@@ -777,6 +1190,14 @@
777
1190
  name: 'Seznam.cz, a.s.'
778
1191
  url: 'http://www.seznam.cz/'
779
1192
 
1193
+ - regex: 'shopify-partner-homepage-scraper'
1194
+ name: 'Shopify Partner'
1195
+ category: 'Crawler'
1196
+ url: 'https://www.shopify.com/partners'
1197
+ producer:
1198
+ name: 'Shopify'
1199
+ url: 'https://www.shopify.com/'
1200
+
780
1201
  - regex: 'ShopWiki'
781
1202
  name: 'ShopWiki'
782
1203
  category: 'Search tools'
@@ -809,6 +1230,27 @@
809
1230
  name: 'SISTRIX GmbH'
810
1231
  url: 'http://www.sistrix.de'
811
1232
 
1233
+ - regex: 'SISTRIX Optimizer'
1234
+ name: 'SISTRIX Optimizer'
1235
+ category: 'Crawler'
1236
+ url: 'https://optimizer.sistrix.com'
1237
+ producer:
1238
+ name: 'SISTRIX GmbH'
1239
+ url: 'http://www.sistrix.de'
1240
+
1241
+ - regex: 'SiteSucker'
1242
+ name: 'SiteSucker'
1243
+ category: 'Crawler'
1244
+ url: 'http://ricks-apps.com/osx/sitesucker/'
1245
+
1246
+ - regex: 'sixy.ch'
1247
+ name: 'Sixy.ch'
1248
+ category: 'Site Monitor'
1249
+ url: 'http://sixy.ch'
1250
+ producer:
1251
+ name: 'Manuel Kasper'
1252
+ url: 'https://neon1.net/'
1253
+
812
1254
  - regex: 'Slackbot|Slack-ImgProxy'
813
1255
  name: 'Slackbot'
814
1256
  category: 'Crawler'
@@ -833,6 +1275,30 @@
833
1275
  name: 'Tencent Holdings'
834
1276
  url: 'http://www.soso.com'
835
1277
 
1278
+ - regex: 'sqlmap/'
1279
+ name: 'sqlmap'
1280
+ category: 'Security Checker'
1281
+ url: 'http://sqlmap.org/'
1282
+ producer:
1283
+ name: 'sqlmap'
1284
+ url: 'http://sqlmap.org/'
1285
+
1286
+ - regex: 'SSL Labs'
1287
+ name: 'SSL Labs'
1288
+ category: 'Validator'
1289
+ url: 'https://www.ssllabs.com/about/assessment.html'
1290
+ producer:
1291
+ name: 'SSL Labs'
1292
+ url: 'https://www.ssllabs.com/about/assessment.html'
1293
+
1294
+ - regex: 'StatusCake'
1295
+ name: 'StatusCake'
1296
+ category: 'Site Monitor'
1297
+ url: 'https://www.statuscake.com'
1298
+ producer:
1299
+ name: 'StatusCake'
1300
+ url: 'https://www.statuscake.com'
1301
+
836
1302
  - regex: 'Superfeedr bot'
837
1303
  name: 'Superfeedr Bot'
838
1304
  category: 'Feed Fetcher'
@@ -841,6 +1307,11 @@
841
1307
  name: 'Superfeedr'
842
1308
  url: 'https://superfeedr.com/'
843
1309
 
1310
+ - regex: 'Sparkler/[0-9]'
1311
+ name: 'Sparkler'
1312
+ category: 'Crawler'
1313
+ url: 'https://github.com/USCDataScience/sparkler'
1314
+
844
1315
  - regex: 'Spinn3r'
845
1316
  name: 'Spinn3r'
846
1317
  category: 'Crawler'
@@ -865,10 +1336,23 @@
865
1336
  name: 'Domain Tools'
866
1337
  url: 'http://www.domaintools.com'
867
1338
 
1339
+ - regex: 'TarmotGezgin'
1340
+ name: 'Tarmot Gezgin'
1341
+ url: 'http://www.tarmot.com/gezgin/'
1342
+ category: 'Search bot'
1343
+
868
1344
  - regex: 'TelegramBot'
869
- name: 'TelgramBot'
1345
+ name: 'TelegramBot'
870
1346
  url: 'https://telegram.org/blog/bot-revolution'
871
1347
 
1348
+ - regex: 'TLSProbe'
1349
+ name: 'TLSProbe'
1350
+ url: 'https://scan.trustnet.venafi.com/'
1351
+ category: 'Security search bot'
1352
+ producer:
1353
+ name: 'Venafi TrustNet'
1354
+ url: 'https://www.venafi.com'
1355
+
872
1356
  - regex: 'TinEye-bot'
873
1357
  name: 'TinEye Crawler'
874
1358
  category: 'Search bot'
@@ -885,6 +1369,19 @@
885
1369
  name: ''
886
1370
  url: ''
887
1371
 
1372
+ - regex: 'theoldreader.com'
1373
+ name: 'theoldreader'
1374
+ category: 'Feed Reader'
1375
+ url: 'https://theoldreader.com'
1376
+
1377
+ - regex: 'trendictionbot'
1378
+ name: 'Trendiction Bot'
1379
+ category: 'Crawler'
1380
+ url: 'http://www.trendiction.de/bot'
1381
+ producer:
1382
+ name: 'Talkwalker Inc.'
1383
+ url: 'http://www.talkwalker.com'
1384
+
888
1385
  - regex: 'TurnitinBot'
889
1386
  name: 'TurnitinBot'
890
1387
  category: 'Crawler'
@@ -909,6 +1406,13 @@
909
1406
  name: 'Mediasift'
910
1407
  url: ''
911
1408
 
1409
+ - regex: 'Twingly Recon'
1410
+ name: 'Twingly Recon'
1411
+ category: 'Crawler'
1412
+ producer:
1413
+ name: 'Twingly'
1414
+ url: 'https://www.twingly.com'
1415
+
912
1416
  - regex: 'Twitterbot'
913
1417
  name: 'Twitterbot'
914
1418
  category: 'Social Media Agent'
@@ -917,6 +1421,30 @@
917
1421
  name: 'Twitter'
918
1422
  url: 'http://www.twitter.com'
919
1423
 
1424
+ - regex: 'UniversalFeedParser'
1425
+ name: 'UniversalFeedParser'
1426
+ category: 'Feed Fetcher'
1427
+ url: 'https://github.com/kurtmckee/feedparser'
1428
+ producer:
1429
+ name: 'Kurt McKee'
1430
+ url: 'https://github.com/kurtmckee'
1431
+
1432
+ - regex: 'via secureurl\.fwdcdn\.com'
1433
+ name: 'UkrNet Mail Proxy'
1434
+ category: 'Crawler'
1435
+ url: ''
1436
+ producer:
1437
+ name: 'UkrNet Ltd'
1438
+ url: 'https://www.ukr.net/'
1439
+
1440
+ - regex: 'Uptimebot'
1441
+ name: 'Uptimebot'
1442
+ category: 'Site Monitor'
1443
+ url: 'https://uptime.com/uptimebot'
1444
+ producer:
1445
+ name: 'Uptime'
1446
+ url: 'https://uptime.com'
1447
+
920
1448
  - regex: 'UptimeRobot'
921
1449
  name: 'Uptime Robot'
922
1450
  category: 'Site Monitor'
@@ -933,6 +1461,22 @@
933
1461
  name: 'Profound Networks'
934
1462
  url: 'http://www.profound.net'
935
1463
 
1464
+ - regex: 'Vagabondo'
1465
+ name: 'Vagabondo'
1466
+ category: 'Crawler'
1467
+ url: ''
1468
+ producer:
1469
+ name: 'WiseGuys'
1470
+ url: 'http://www.wise-guys.nl/'
1471
+
1472
+ - regex: 'vkShare; '
1473
+ name: 'VK Share Button'
1474
+ category: 'Crawler'
1475
+ url: 'http://vk.com/dev/Share'
1476
+ producer:
1477
+ name: 'VK'
1478
+ url: 'http://vk.com/'
1479
+
936
1480
  - regex: 'VSMCrawler'
937
1481
  name: 'Visual Site Mapper Crawler'
938
1482
  category: 'Crawler'
@@ -965,7 +1509,7 @@
965
1509
  name: 'W3C'
966
1510
  url: 'http://www.w3.org'
967
1511
 
968
- - regex: 'W3C_Validator'
1512
+ - regex: 'W3C_Validator|Validator.nu'
969
1513
  name: 'W3C Markup Validation Service'
970
1514
  category: 'Validator'
971
1515
  url: 'http://validator.w3.org/services'
@@ -989,6 +1533,18 @@
989
1533
  name: 'W3C'
990
1534
  url: 'http://www.w3.org'
991
1535
 
1536
+ - regex: 'Wappalyzer'
1537
+ name: 'Wappalyzer'
1538
+ url: 'https://github.com/AliasIO/Wappalyzer'
1539
+ producer:
1540
+ name: 'AliasIO'
1541
+ url: 'https://github.com/AliasIO'
1542
+
1543
+ - regex: 'PTST/'
1544
+ name: 'WebPageTest'
1545
+ category: 'Site Monitor'
1546
+ url: 'https://www.webpagetest.org'
1547
+
992
1548
  - regex: 'WeSEE(:Search)?'
993
1549
  name: 'WeSEE:Search'
994
1550
  category: 'Search bot'
@@ -1013,6 +1569,14 @@
1013
1569
  name: 'WebSitePulse'
1014
1570
  url: 'http://www.websitepulse.com/'
1015
1571
 
1572
+ - regex: 'WordPress'
1573
+ name: 'WordPress'
1574
+ category: 'Service Agent'
1575
+ url: 'https://wordpress.org/'
1576
+ producer:
1577
+ name: 'Wordpress.org'
1578
+ url: 'https://wordpress.org/'
1579
+
1016
1580
  - regex: 'Wotbox'
1017
1581
  name: 'Wotbox'
1018
1582
  category: 'Search bot'
@@ -1021,6 +1585,14 @@
1021
1585
  name: 'Wotbox'
1022
1586
  url: 'http://www.wotbox.com'
1023
1587
 
1588
+ - regex: 'XenForo'
1589
+ name: 'XenForo'
1590
+ category: 'Service Agent'
1591
+ url: 'https://xenforo.com/'
1592
+ producer:
1593
+ name: 'XenForo Ltd.'
1594
+ url: 'https://xenforo.com/'
1595
+
1024
1596
  - regex: 'yacybot'
1025
1597
  name: 'YaCy'
1026
1598
  category: 'Search bot'
@@ -1053,7 +1625,15 @@
1053
1625
  name: 'Yahoo! Inc.'
1054
1626
  url: 'http://www.yahoo.com'
1055
1627
 
1056
- - regex: 'Yandex(Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1628
+ - regex: 'Y!J-BRW'
1629
+ name: 'Yahoo! Japan BRW'
1630
+ category: 'Crawler'
1631
+ url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1632
+ producer:
1633
+ name: 'Yahoo! Japan Corp.'
1634
+ url: 'https://www.yahoo.co.jp/'
1635
+
1636
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1057
1637
  name: 'Yandex Bot'
1058
1638
  category: 'Search bot'
1059
1639
  url: 'http://www.yandex.com/bots'
@@ -1061,7 +1641,7 @@
1061
1641
  name: 'Yandex LLC'
1062
1642
  url: 'http://company.yandex.com'
1063
1643
 
1064
- - regex: 'Yeti'
1644
+ - regex: 'Yeti|NaverJapan'
1065
1645
  name: 'Yeti/Naverbot'
1066
1646
  category: 'Search bot'
1067
1647
  url: 'http://help.naver.com/robots/'
@@ -1090,6 +1670,11 @@
1090
1670
  name: 'YunYun'
1091
1671
  url: 'http://www.yunyun.com'
1092
1672
 
1673
+ - regex: 'zgrab'
1674
+ name: 'zgrab'
1675
+ category: 'Security Checker'
1676
+ url: 'https://github.com/zmap/zgrab'
1677
+
1093
1678
  - regex: 'Zookabot'
1094
1679
  name: 'Zookabot'
1095
1680
  category: 'Crawler'
@@ -1138,9 +1723,9 @@
1138
1723
  name: 'HubPages'
1139
1724
  url: 'http://hubpages.com/'
1140
1725
 
1141
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
1726
+ - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1142
1727
  name: 'Pinterest'
1143
- url: ''
1728
+ url: 'http://www.pinterest.com/bot.html'
1144
1729
  category: 'Crawler'
1145
1730
  producer:
1146
1731
  name: 'Pinterest'
@@ -1154,6 +1739,30 @@
1154
1739
  name: 'Site24x7'
1155
1740
  url: 'https://www.site24x7.com'
1156
1741
 
1742
+ - regex: 's~snapchat-proxy'
1743
+ name: 'Snapchat Proxy'
1744
+ category: 'Crawler'
1745
+ url: 'https://www.snapchat.com'
1746
+ producer:
1747
+ name: 'Snapchat Inc.'
1748
+ url: 'https://www.snapchat.com'
1749
+
1750
+ - regex: "Let's Encrypt validation server"
1751
+ name: "Let's Encrypt Validation"
1752
+ category: 'Service Agent'
1753
+ url: 'https://letsencrypt.org/how-it-works/'
1754
+ producer:
1755
+ name: "Let's Encrypt"
1756
+ url: 'https://letsencrypt.org'
1757
+
1758
+ - regex: 'GrapeshotCrawler'
1759
+ name: 'Grapeshot'
1760
+ category: 'Crawler'
1761
+ url: 'https://www.grapeshot.com/crawler'
1762
+ producer:
1763
+ name: 'Grapeshot'
1764
+ url: 'https://www.grapeshot.com'
1765
+
1157
1766
  - regex: 'www\.monitor\.us'
1158
1767
  name: 'Monitor.Us'
1159
1768
  category: 'Site Monitor'
@@ -1162,6 +1771,22 @@
1162
1771
  name: 'Monitor.Us'
1163
1772
  url: 'http://www.monitor.us'
1164
1773
 
1774
+ - regex: 'Catchpoint( bot)?'
1775
+ name: 'Catchpoint'
1776
+ category: 'Site Monitor'
1777
+ url: ''
1778
+ producer:
1779
+ name: 'Catchpoint Systems'
1780
+ url: 'http://www.catchpoint.com/'
1781
+
1782
+ - regex: 'bitlybot'
1783
+ name: 'BitlyBot'
1784
+ category: 'Crawler'
1785
+ url: 'https://bitly.com'
1786
+ producer:
1787
+ name: 'Bitly, Inc.'
1788
+ url: 'https://bitly.com'
1789
+
1165
1790
  - regex: 'Zao/'
1166
1791
  name: 'Zao'
1167
1792
  category: 'Crawler'
@@ -1217,9 +1842,204 @@
1217
1842
  - regex: 'Server Density Service Monitoring.*'
1218
1843
  name: 'Server Density'
1219
1844
 
1220
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Catchpoint bot|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1845
+ - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1846
+ name: 'RSSRadio Bot'
1847
+
1848
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
1221
1849
  name: 'Generic Bot'
1222
1850
 
1851
+ - regex: '^sentry'
1852
+ name: 'Sentry Bot'
1853
+ producer:
1854
+ name: 'Sentry'
1855
+ url: 'https://sentry.io'
1856
+
1857
+ - regex: '^Spotify'
1858
+ name: 'Spotify'
1859
+ producer:
1860
+ name: 'Spotify'
1861
+ url: 'https://www.spotify.com'
1862
+
1863
+ - regex: 'The Knowledge AI'
1864
+ name: 'The Knowledge AI'
1865
+ category: 'Crawler'
1866
+
1867
+ - regex: 'Embedly'
1868
+ name: 'Embedly'
1869
+ category: 'Crawler'
1870
+ url: 'https://support.embed.ly/hc/en-us'
1871
+ producer:
1872
+ name: 'A Medium, Corp.'
1873
+ url: 'https://medium.com/'
1874
+
1875
+ - regex: 'BrandVerity'
1876
+ name: 'BrandVerity'
1877
+ category: 'Crawler'
1878
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1879
+ producer:
1880
+ name: 'BrandVerity, Inc.'
1881
+ url: 'https://www.brandverity.com/'
1882
+
1883
+ - regex: 'Kaspersky Lab CFR link resolver'
1884
+ name: 'Kaspersky'
1885
+ category: 'Security Checker'
1886
+ url: 'https://www.kaspersky.com/'
1887
+ producer:
1888
+ name: 'AO Kaspersky Lab'
1889
+ url: 'https://www.kaspersky.com/'
1890
+
1891
+ - regex: 'eZ Publish Link Validator'
1892
+ name: 'eZ Publish Link Validator'
1893
+ category: 'Crawler'
1894
+ url: 'https://ez.no/'
1895
+ producer:
1896
+ name: 'eZ Systems AS'
1897
+ url: 'https://ez.no/'
1898
+
1899
+ - regex: 'woorankreview'
1900
+ name: 'WooRank'
1901
+ category: 'Search bot'
1902
+ url: 'https://www.woorank.com/'
1903
+ producer:
1904
+ name: 'WooRank sprl'
1905
+ url: 'https://www.woorank.com/'
1906
+
1907
+ - regex: '(Match|LinkCheck) by Siteimprove.com'
1908
+ name: 'Siteimprove'
1909
+ category: 'Search bot'
1910
+ url: 'https://siteimprove.com/'
1911
+ producer:
1912
+ name: 'Siteimprove GmbH'
1913
+ url: 'https://siteimprove.com/'
1914
+
1915
+ - regex: 'CATExplorador'
1916
+ name: 'CATExplorador'
1917
+ category: 'Search bot'
1918
+ url: 'https://fundacio.cat/ca/domini/'
1919
+ producer:
1920
+ name: 'Fundació puntCAT'
1921
+ url: 'https://fundacio.cat/ca/domini/'
1922
+
1923
+ - regex: 'Buck'
1924
+ name: 'Buck'
1925
+ category: 'Search bot'
1926
+ url: 'https://hypefactors.com/'
1927
+ producer:
1928
+ name: 'Hypefactors A/S'
1929
+ url: 'https://hypefactors.com/'
1930
+
1931
+ - regex: 'tracemyfile'
1932
+ name: 'TraceMyFile'
1933
+ category: 'Search bot'
1934
+ url: 'https://www.tracemyfile.com/'
1935
+ producer:
1936
+ name: 'Idee Inc.'
1937
+ url: 'http://ideeinc.com/'
1938
+
1939
+ - regex: 'zelist.ro feed parser'
1940
+ name: 'Ze List'
1941
+ url: 'https://www.zelist.ro/'
1942
+ category: 'Feed Fetcher'
1943
+ producer:
1944
+ name: 'Treeworks SRL'
1945
+ url: 'https://www.tree.ro/'
1946
+
1947
+ - regex: 'weborama-fetcher'
1948
+ name: 'Weborama'
1949
+ category: 'Search bot'
1950
+ url: 'https://weborama.com/'
1951
+ producer:
1952
+ name: 'Weborama SA'
1953
+ url: 'https://weborama.com/'
1954
+
1955
+ - regex: 'BoardReader Favicon Fetcher'
1956
+ name: 'BoardReader'
1957
+ category: 'Search bot'
1958
+ url: 'http://boardreader.com/'
1959
+ producer:
1960
+ name: 'Effyis Inc'
1961
+ url: 'http://boardreader.com/'
1962
+
1963
+ - regex: 'IDG/IT'
1964
+ name: 'IDG/IT'
1965
+ category: 'Search bot'
1966
+ url: 'https://spaziodati.eu/'
1967
+ producer:
1968
+ name: 'SpazioDati S.r.l.'
1969
+ url: 'https://spaziodati.eu/'
1970
+
1971
+ - regex: 'Bytespider'
1972
+ name: 'Bytespider'
1973
+ category: 'Search bot'
1974
+ url: 'https://bytedance.com/'
1975
+ producer:
1976
+ name: 'ByteDance Ltd.'
1977
+ url: 'https://bytedance.com/'
1978
+
1979
+ - regex: 'WikiDo'
1980
+ name: 'WikiDo'
1981
+ category: 'Search bot'
1982
+ url: 'https://www.wikido.com/'
1983
+ producer:
1984
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
1985
+ url: 'https://www.wikido.com/'
1986
+
1987
+ - regex: 'AwarioSmartBot'
1988
+ name: 'Awario'
1989
+ category: 'Search bot'
1990
+ url: 'https://awario.com/bots.html'
1991
+ producer:
1992
+ name: 'Awario'
1993
+ url: 'https://awario.com/'
1994
+
1995
+ - regex: 'AwarioRssBot'
1996
+ name: 'Awario'
1997
+ category: 'Feed Fetcher'
1998
+ url: 'https://awario.com/bots.html'
1999
+ producer:
2000
+ name: 'Awario'
2001
+ url: 'https://awario.com/'
2002
+
2003
+ - regex: 'oBot'
2004
+ name: 'oBot'
2005
+ category: 'Search bot'
2006
+ url: 'http://www.xforce-security.com/crawler/'
2007
+ producer:
2008
+ name: 'IBM Germany Research & Development GmbH'
2009
+ url: 'https://exchange.xforce.ibmcloud.com/'
2010
+
2011
+ - regex: 'SMTBot'
2012
+ name: 'SMTBot'
2013
+ category: 'Search bot'
2014
+ url: 'https://www.similartech.com/smtbot'
2015
+ producer:
2016
+ name: 'SimilarTech Ltd.'
2017
+ url: 'https://www.similartech.com/'
2018
+
2019
+ - regex: 'LCC'
2020
+ name: 'LCC'
2021
+ category: 'Search bot'
2022
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2023
+ producer:
2024
+ name: 'Universität Leipzig'
2025
+ url: 'https://www.uni-leipzig.de/'
2026
+
2027
+ - regex: 'Startpagina-Linkchecker'
2028
+ name: 'Startpagina Linkchecker'
2029
+ category: 'Search bot'
2030
+ url: 'https://www.startpagina.nl/linkchecker'
2031
+ producer:
2032
+ name: 'Startpagina B.V.'
2033
+ url: 'https://www.startpagina.nl/'
2034
+
2035
+ - regex: 'GTmetrix'
2036
+ name: 'GTmetrix'
2037
+ category: 'Crawler'
2038
+ url: 'https://gtmetrix.com/'
2039
+ producer:
2040
+ name: 'Carbon60 Operating Co. Ltd.'
2041
+ url: 'https://www.carbon60.com/'
2042
+
1223
2043
  # Generic detections
1224
2044
 
1225
2045
  - regex: 'Nutch'
@@ -1230,5 +2050,5 @@
1230
2050
  name: 'The Apache Software Foundation'
1231
2051
  url: 'http://www.apache.org/foundation/'
1232
2052
 
1233
- - regex: '[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)([^a-z]|$)'
2053
+ - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
1234
2054
  name: 'Generic Bot'