device_detector 0.9.1 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +49 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +5 -9
  5. data/CHANGELOG.md +16 -3
  6. data/README.md +7 -9
  7. data/Rakefile +19 -13
  8. data/device_detector.gemspec +1 -0
  9. data/lib/device_detector.rb +32 -28
  10. data/lib/device_detector/bot.rb +2 -2
  11. data/lib/device_detector/client.rb +3 -2
  12. data/lib/device_detector/device.rb +44 -21
  13. data/lib/device_detector/memory_cache.rb +26 -19
  14. data/lib/device_detector/metadata_extractor.rb +7 -8
  15. data/lib/device_detector/model_extractor.rb +3 -3
  16. data/lib/device_detector/name_extractor.rb +2 -2
  17. data/lib/device_detector/os.rb +121 -111
  18. data/lib/device_detector/parser.rb +22 -9
  19. data/lib/device_detector/version.rb +3 -1
  20. data/lib/device_detector/version_extractor.rb +2 -3
  21. data/regexes/bots.yml +840 -20
  22. data/regexes/client/browser_engine.yml +11 -2
  23. data/regexes/client/browsers.yml +909 -108
  24. data/regexes/client/feed_readers.yml +38 -2
  25. data/regexes/client/libraries.yml +76 -2
  26. data/regexes/client/mediaplayers.yml +25 -5
  27. data/regexes/client/mobile_apps.yml +167 -2
  28. data/regexes/client/pim.yml +10 -1
  29. data/regexes/device/cameras.yml +1 -1
  30. data/regexes/device/car_browsers.yml +7 -3
  31. data/regexes/device/consoles.yml +3 -3
  32. data/regexes/device/mobiles.yml +10123 -465
  33. data/regexes/device/portable_media_player.yml +4 -6
  34. data/regexes/device/televisions.yml +18 -4
  35. data/regexes/oss.yml +115 -21
  36. data/regexes/vendorfragments.yml +6 -2
  37. data/spec/device_detector/concrete_user_agent_spec.rb +16 -17
  38. data/spec/device_detector/detector_fixtures_spec.rb +51 -11
  39. data/spec/device_detector/device_spec.rb +28 -48
  40. data/spec/device_detector/memory_cache_spec.rb +60 -28
  41. data/spec/device_detector/model_extractor_spec.rb +3 -3
  42. data/spec/device_detector/version_extractor_spec.rb +5 -6
  43. data/spec/device_detector_spec.rb +60 -69
  44. data/spec/fixtures/client/browser.yml +1785 -262
  45. data/spec/fixtures/client/feed_reader.yml +47 -35
  46. data/spec/fixtures/client/library.yml +112 -3
  47. data/spec/fixtures/client/mediaplayer.yml +32 -37
  48. data/spec/fixtures/client/mobile_app.yml +193 -6
  49. data/spec/fixtures/client/pim.yml +37 -18
  50. data/spec/fixtures/detector/bots.yml +1426 -118
  51. data/spec/fixtures/detector/camera.yml +36 -10
  52. data/spec/fixtures/detector/car_browser.yml +64 -3
  53. data/spec/fixtures/detector/console.yml +80 -26
  54. data/spec/fixtures/detector/desktop.yml +2222 -1589
  55. data/spec/fixtures/detector/feature_phone.yml +151 -42
  56. data/spec/fixtures/detector/feed_reader.yml +186 -121
  57. data/spec/fixtures/detector/mediaplayer.yml +113 -39
  58. data/spec/fixtures/detector/mobile_apps.yml +366 -21
  59. data/spec/fixtures/detector/phablet.yml +2597 -570
  60. data/spec/fixtures/detector/portable_media_player.yml +41 -16
  61. data/spec/fixtures/detector/smart_display.yml +8 -5
  62. data/spec/fixtures/detector/smart_speaker.yml +55 -0
  63. data/spec/fixtures/detector/smartphone-1.yml +5468 -5010
  64. data/spec/fixtures/detector/smartphone-10.yml +9977 -0
  65. data/spec/fixtures/detector/smartphone-11.yml +9891 -0
  66. data/spec/fixtures/detector/smartphone-12.yml +9906 -0
  67. data/spec/fixtures/detector/smartphone-13.yml +9920 -0
  68. data/spec/fixtures/detector/smartphone-14.yml +2662 -0
  69. data/spec/fixtures/detector/smartphone-2.yml +5213 -4635
  70. data/spec/fixtures/detector/smartphone-3.yml +5082 -4533
  71. data/spec/fixtures/detector/smartphone-4.yml +6806 -2625
  72. data/spec/fixtures/detector/smartphone-5.yml +9914 -0
  73. data/spec/fixtures/detector/smartphone-6.yml +9962 -0
  74. data/spec/fixtures/detector/smartphone-7.yml +9899 -0
  75. data/spec/fixtures/detector/smartphone-8.yml +9931 -0
  76. data/spec/fixtures/detector/smartphone-9.yml +9899 -0
  77. data/spec/fixtures/detector/smartphone.yml +5225 -4652
  78. data/spec/fixtures/detector/tablet-1.yml +4691 -4191
  79. data/spec/fixtures/detector/tablet-2.yml +9800 -71
  80. data/spec/fixtures/detector/tablet-3.yml +9959 -0
  81. data/spec/fixtures/detector/tablet-4.yml +4528 -0
  82. data/spec/fixtures/detector/tablet.yml +4664 -4177
  83. data/spec/fixtures/detector/tv.yml +3399 -1048
  84. data/spec/fixtures/detector/unknown.yml +1017 -977
  85. data/spec/fixtures/detector/wearable.yml +61 -0
  86. data/spec/fixtures/device/camera.yml +4 -3
  87. data/spec/fixtures/device/car_browser.yml +9 -2
  88. data/spec/fixtures/device/console.yml +15 -14
  89. data/spec/fixtures/parser/oss.yml +284 -2
  90. data/spec/fixtures/parser/vendorfragments.yml +8 -2
  91. metadata +50 -7
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
- VERSION = '0.9.1'
4
+ VERSION = '1.0.4'
3
5
  end
@@ -1,12 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class VersionExtractor < MetadataExtractor
3
-
4
5
  private
5
6
 
6
7
  def metadata_string
7
8
  String(regex_meta[:version])
8
9
  end
9
-
10
10
  end
11
11
  end
12
-
@@ -1,7 +1,7 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
@@ -53,6 +53,21 @@
53
53
  name: 'Alexa Internet'
54
54
  url: 'http://www.alexa.com'
55
55
 
56
+ - regex: 'alexa site audit'
57
+ name: 'Alexa Site Audit'
58
+ category: 'Site Monitor'
59
+ url: 'http://www.alexa.com/help/webmasters'
60
+ producer:
61
+ name: 'Alexa Internet'
62
+ url: 'http://www.alexa.com'
63
+
64
+ - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
65
+ name: 'Amazon Route53 Health Check'
66
+ category: 'Service Agent'
67
+ producer:
68
+ name: 'Amazon Web Services'
69
+ url: 'https://aws.amazon.com/'
70
+
56
71
  - regex: 'AmorankSpider'
57
72
  name: 'Amorank Spider'
58
73
  category: 'Crawler'
@@ -61,6 +76,14 @@
61
76
  name: 'Amorank'
62
77
  url: 'http://www.amorank.com'
63
78
 
79
+ - regex: 'ApacheBench'
80
+ name: 'ApacheBench'
81
+ category: 'Benchmark'
82
+ url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
83
+ producer:
84
+ name: 'The Apache Software Foundation'
85
+ url: 'http://www.apache.org/foundation/'
86
+
64
87
  - regex: 'Applebot'
65
88
  name: 'Applebot'
66
89
  category: 'Crawler'
@@ -69,6 +92,30 @@
69
92
  name: 'Apple Inc'
70
93
  url: 'http://www.apple.com'
71
94
 
95
+ - regex: 'Arachni'
96
+ name: 'Arachni'
97
+ category: 'Security Checker'
98
+ url: 'http://www.arachni-scanner.com'
99
+ producer:
100
+ name: 'Sarosys LLC'
101
+ url: 'http://www.sarosys.com/'
102
+
103
+ - regex: 'AspiegelBot'
104
+ name: 'AspiegelBot'
105
+ category: 'Crawler'
106
+ url: 'https://aspiegel.com/'
107
+ producer:
108
+ name: 'Huawei'
109
+ url: 'https://www.huawei.com/'
110
+
111
+ - regex: 'Castro 2, Episode Duration Lookup'
112
+ name: 'Castro 2'
113
+ category: 'Service Agent'
114
+ url: 'http://supertop.co/castro/'
115
+ producer:
116
+ name: 'Supertop'
117
+ url: 'http://supertop.co'
118
+
72
119
  - regex: 'Curious George'
73
120
  name: 'Analytics SEO Crawler'
74
121
  category: 'Crawler'
@@ -93,8 +140,8 @@
93
140
  name: 'Ask Jeeves Inc.'
94
141
  url: 'http://www.ask.com'
95
142
 
96
- - regex: 'Backlink-Ceck\.de'
97
- name: 'Backlink-Ceck.de'
143
+ - regex: 'Backlink-Check\.de'
144
+ name: 'Backlink-Check.de'
98
145
  category: 'Crawler'
99
146
  url: 'http://www.backlink-check.de/bot.html'
100
147
  producer:
@@ -165,6 +212,13 @@
165
212
  name: 'Blogtrottr Ltd'
166
213
  url: 'https://blogtrottr.com/'
167
214
 
215
+ - regex: 'BoardReader Blog Indexer'
216
+ name: 'BoardReader Blog Indexer'
217
+ category: 'Crawler'
218
+ producer:
219
+ name: 'BoardReader'
220
+ url: 'http://boardreader.com/'
221
+
168
222
  - regex: 'BountiiBot'
169
223
  name: 'Bountii Bot'
170
224
  category: 'Search bot'
@@ -186,8 +240,8 @@
186
240
  category: 'Crawler'
187
241
  url: 'http://law.di.unimi.it/BUbiNG.html'
188
242
  producer:
189
- name: ''
190
- url: ''
243
+ name: 'The Laboratory for Web Algorithmics (LAW)'
244
+ url: 'http://law.di.unimi.it/software.php#buging'
191
245
 
192
246
  - regex: '(?<!HTC)[ _]Butterfly/'
193
247
  name: 'Butterfly Robot'
@@ -221,6 +275,14 @@
221
275
  name: '10betterpages GmbH'
222
276
  url: 'http://cliqz.com'
223
277
 
278
+ - regex: 'Cloudflare-AMP'
279
+ name: 'CloudFlare AMP Fetcher'
280
+ category: 'Crawler'
281
+ url: 'https://amp.cloudflare.com/doc/fetcher.html'
282
+ producer:
283
+ name: 'CloudFlare'
284
+ url: 'http://www.cloudflare.com'
285
+
224
286
  - regex: 'CloudFlare-AlwaysOnline'
225
287
  name: 'CloudFlare Always Online'
226
288
  category: 'Site Monitor'
@@ -229,13 +291,21 @@
229
291
  name: 'CloudFlare'
230
292
  url: 'http://www.cloudflare.com'
231
293
 
232
- - regex: 'coccoc/'
294
+ - regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
233
295
  name: 'Cốc Cốc Bot'
234
- url: 'http://help.coccoc.com/'
296
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
235
297
  category: 'Search bot'
236
298
  producer:
237
299
  name: 'Cốc Cốc'
238
- url: 'http://coccoc.com/'
300
+ url: 'https://coccoc.com/'
301
+
302
+ - regex: 'collectd'
303
+ name: 'Collectd'
304
+ url: 'https://collectd.org/'
305
+ category: 'Site Monitor'
306
+ producer:
307
+ name: 'Collectd'
308
+ url: 'https://collectd.org/'
239
309
 
240
310
  - regex: 'CommaFeed'
241
311
  name: 'CommaFeed'
@@ -245,6 +315,39 @@
245
315
  name: ''
246
316
  url: ''
247
317
 
318
+ - regex: 'CSS Certificate Spider'
319
+ name: 'CSS Certificate Spider'
320
+ category: 'Crawler'
321
+ url: 'http://www.css-security.com/certificatespider/'
322
+ producer:
323
+ name: 'Certified Security Solutions'
324
+ url: 'https://www.css-security.com/company/about-us/'
325
+
326
+ - regex: 'Datadog Agent'
327
+ name: 'Datadog Agent'
328
+ url: 'https://github.com/DataDog/dd-agent'
329
+ category: 'Site Monitor'
330
+ producer:
331
+ name: 'Datadog'
332
+ url: 'https://www.datadoghq.com/'
333
+
334
+ - regex: 'Datanyze'
335
+ name: 'Datanyze'
336
+ url: ''
337
+ category: 'Crawler'
338
+ producer:
339
+ name: 'Datanyze'
340
+ url: 'https://www.datanyze.com'
341
+
342
+
343
+ - regex: 'Dataprovider'
344
+ name: 'Dataprovider'
345
+ category: 'Crawler'
346
+ url: ''
347
+ producer:
348
+ name: 'Dataprovider B.V.'
349
+ url: 'https://www.dataprovider.com/'
350
+
248
351
  - regex: 'Daum(oa)?[ /][0-9]'
249
352
  name: 'Daum'
250
353
  category: 'Search bot'
@@ -285,6 +388,14 @@
285
388
  name: 'SEOmoz, Inc.'
286
389
  url: 'http://moz.com/'
287
390
 
391
+ - regex: 'DuckDuck(?:Go-Favicons-)?Bot'
392
+ name: 'DuckDuckGo Bot'
393
+ category: 'Search bot'
394
+ url: 'https://duckduckgo.com/duckduckbot'
395
+ producer:
396
+ name: 'DuckDuckGo'
397
+ url: 'https://duckduckgo.com/'
398
+
288
399
  - regex: 'EasouSpider'
289
400
  name: 'Easou Spider'
290
401
  category: 'Search bot'
@@ -293,6 +404,13 @@
293
404
  name: 'easou ICP'
294
405
  url: 'http://www.easou.com'
295
406
 
407
+ - regex: 'eCairn-Grabber'
408
+ name: 'eCairn-Grabber'
409
+ category: 'Crawler'
410
+ producer:
411
+ name: 'eCairn'
412
+ url: 'https://ecairn.com'
413
+
296
414
  - regex: 'EMail Exractor'
297
415
  name: 'EMail Exractor'
298
416
  category: 'Crawler'
@@ -301,6 +419,14 @@
301
419
  name: ''
302
420
  url: ''
303
421
 
422
+ - regex: 'evc-batch'
423
+ name: 'evc-batch'
424
+ category: 'Crawler'
425
+ url: ''
426
+ producer:
427
+ name: 'eVenture Capital Partners II, LLC'
428
+ url: 'http://www.eventures.vc/'
429
+
304
430
  - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
305
431
  name: 'ExaBot'
306
432
  category: 'Crawler'
@@ -325,7 +451,7 @@
325
451
  name: 'SEOmoz, Inc.'
326
452
  url: 'http://moz.com/'
327
453
 
328
- - regex: 'facebookexternalhit|facebookplatform'
454
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
329
455
  name: 'Facebook External Hit'
330
456
  category: 'Social Media Agent'
331
457
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -381,6 +507,24 @@
381
507
  name: ''
382
508
  url: ''
383
509
 
510
+ - regex: 'FlipboardProxy|FlipboardRSS'
511
+ name: 'Flipboard'
512
+ url: 'http://flipboard.com/browserproxy'
513
+ category: 'Feed Fetcher'
514
+ producer:
515
+ name: 'Flipboard'
516
+ url: 'http://flipboard.com/'
517
+
518
+ - regex: 'Findxbot'
519
+ name: 'Findxbot'
520
+ category: 'Crawler'
521
+ url: 'http://www.findxbot.com'
522
+
523
+ - regex: 'FreshRSS'
524
+ name: 'FreshRSS'
525
+ category: 'Feed Fetcher'
526
+ url: 'https://freshrss.org/'
527
+
384
528
  - regex: 'Genieo'
385
529
  name: 'Genieo Web filter'
386
530
  category: ''
@@ -389,6 +533,14 @@
389
533
  name: 'Genieo'
390
534
  url: 'http://www.genieo.com'
391
535
 
536
+ - regex: 'GigablastOpenSource'
537
+ name: 'Gigablast'
538
+ category: 'Search bot'
539
+ url: 'https://github.com/gigablast/open-source-search-engine'
540
+ producer:
541
+ name: 'Matt Wells'
542
+ url: 'http://www.gigablast.com/faq.html'
543
+
392
544
  - regex: 'Gluten Free Crawler'
393
545
  name: 'Gluten Free Crawler'
394
546
  category: 'Crawler'
@@ -405,6 +557,18 @@
405
557
  name: 'NTT Resonant'
406
558
  url: 'http://goo.ne.jp'
407
559
 
560
+ - regex: 'Google Favicon'
561
+ name: 'Google Favicon'
562
+ category: 'Crawler'
563
+
564
+ - regex: 'Google Search Console'
565
+ name: 'Google Search Console'
566
+ category: 'Crawler'
567
+ url: 'https://search.google.com/search-console/about'
568
+ producer:
569
+ name: 'Google Inc.'
570
+ url: 'http://www.google.com'
571
+
408
572
  - regex: 'Google Page Speed Insights'
409
573
  name: 'Google PageSpeed Insights'
410
574
  category: 'Site Monitor'
@@ -421,6 +585,30 @@
421
585
  name: 'Google Inc.'
422
586
  url: 'http://www.google.com'
423
587
 
588
+ - regex: 'Google-Cloud-Scheduler'
589
+ name: 'Google Cloud Scheduler'
590
+ category: 'Crawler'
591
+ url: 'https://cloud.google.com/scheduler'
592
+ producer:
593
+ name: 'Google Inc.'
594
+ url: 'https://www.google.com'
595
+
596
+ - regex: 'Google-Structured-Data-Testing-Tool'
597
+ name: 'Google Structured Data Testing Tool'
598
+ category: 'Validator'
599
+ url: 'https://search.google.com/structured-data/testing-tool'
600
+ producer:
601
+ name: 'Google Inc.'
602
+ url: 'http://www.google.com'
603
+
604
+ - regex: 'GoogleStackdriverMonitoring'
605
+ name: 'Google Stackdriver Monitoring'
606
+ category: 'Site Monitor'
607
+ url: 'https://cloud.google.com/monitoring'
608
+ producer:
609
+ name: 'Google Inc.'
610
+ url: 'https://www.google.com'
611
+
424
612
  - regex: 'via ggpht\.com GoogleImageProxy'
425
613
  name: 'Gmail Image Proxy'
426
614
  category: 'Crawler'
@@ -429,7 +617,39 @@
429
617
  name: 'Google Inc.'
430
618
  url: 'http://www.google.com'
431
619
 
432
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin'
620
+ - regex: 'SeznamEmailProxy'
621
+ name: 'Seznam Email Proxy'
622
+ category: 'Crawler'
623
+ url: ''
624
+ producer:
625
+ name: 'Seznam.cz, a.s.'
626
+ url: 'http://www.seznam.cz/'
627
+
628
+ - regex: 'Seznam-Zbozi-robot'
629
+ name: 'Seznam Zbozi.cz'
630
+ category: 'Crawler'
631
+ url: ''
632
+ producer:
633
+ name: 'Seznam.cz, a.s.'
634
+ url: 'https://www.zbozi.cz/'
635
+
636
+ - regex: 'Heurekabot-Feed'
637
+ name: 'Heureka Feed'
638
+ category: 'Crawler'
639
+ url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
640
+ producer:
641
+ name: 'Heureka.cz, a.s.'
642
+ url: 'https://www.heureka.cz/'
643
+
644
+ - regex: 'ShopAlike'
645
+ name: 'ShopAlike'
646
+ category: 'Crawler'
647
+ url: ''
648
+ producer:
649
+ name: 'Visual Meta'
650
+ url: 'https://www.shopalike.cz/'
651
+
652
+ - regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
433
653
  name: 'Googlebot'
434
654
  category: 'Search bot'
435
655
  url: 'http://www.google.com/bot.html'
@@ -445,6 +665,14 @@
445
665
  name: 'The Internet Archive'
446
666
  url: 'http://www.archive.org'
447
667
 
668
+ - regex: 'HubSpot '
669
+ name: 'HubSpot'
670
+ category: 'Crawler'
671
+ producer:
672
+ name: 'HubSpot Inc.'
673
+ url: 'https://www.hubspot.com'
674
+
675
+
448
676
  - regex: 'HTTPMon'
449
677
  name: 'HTTPMon'
450
678
  category: 'Site Monitor'
@@ -461,6 +689,11 @@
461
689
  name: ''
462
690
  url: ''
463
691
 
692
+ - regex: 'inoreader.com'
693
+ name: 'inoreader'
694
+ category: 'Feed Reader'
695
+ url: 'https://www.inoreader.com'
696
+
464
697
  - regex: 'iisbot'
465
698
  name: 'IIS Site Analysis'
466
699
  category: 'Crawler'
@@ -469,6 +702,21 @@
469
702
  name: 'Microsoft Corporation'
470
703
  url: 'http://www.microsoft.com'
471
704
 
705
+ - regex: 'ips-agent'
706
+ name: 'IPS Agent'
707
+ category: 'crawler'
708
+ producer:
709
+ name: 'VeriSign, Inc'
710
+ url: 'http://www.verisign.com/'
711
+
712
+ - regex: 'IP-Guide\.com'
713
+ name: 'IP-Guide Crawler'
714
+ category: 'Crawler'
715
+ url: ''
716
+ producer:
717
+ name: ''
718
+ url: 'https://ip-guide.com'
719
+
472
720
  - regex: 'kouio'
473
721
  name: 'Kouio'
474
722
  url: 'http://kouio.com/'
@@ -485,6 +733,14 @@
485
733
  name: ''
486
734
  url: ''
487
735
 
736
+ - regex: '([A-z0-9]*)-Lighthouse'
737
+ name: 'Lighthouse'
738
+ category: 'Site Monitor'
739
+ url: 'https://developers.google.com/web/tools/lighthouse'
740
+ producer:
741
+ name: 'Lighthouse'
742
+ url: 'https://developers.google.com/web/tools/lighthouse'
743
+
488
744
  - regex: 'linkdexbot(-mobile)?|linkdex\.com'
489
745
  name: 'Linkdex Bot'
490
746
  category: 'Search bot'
@@ -532,6 +788,18 @@
532
788
  name: ''
533
789
  url: ''
534
790
 
791
+ - regex : 'masscan'
792
+ name: 'masscan'
793
+ url: 'https://github.com/robertdavidgraham/masscan'
794
+ category: 'Crawler'
795
+ producer:
796
+ name: 'Robert Graham'
797
+ url: 'https://github.com/robertdavidgraham'
798
+
799
+ - regex: 'Mastodon/'
800
+ name: 'Mastodon Bot'
801
+ category: 'Social Media Agent'
802
+
535
803
  - regex: 'meanpathbot'
536
804
  name: 'Meanpath Bot'
537
805
  category: 'Search bot'
@@ -540,6 +808,19 @@
540
808
  name: 'Meanpath'
541
809
  url: 'http://www.meanpath.com'
542
810
 
811
+ - regex: 'MetaJobBot'
812
+ name: 'MetaJobBot'
813
+ category: 'Crawler'
814
+ url: 'http://www.metajob.at/the/crawler'
815
+ producer:
816
+ name: 'MetaJob'
817
+ url: 'http://www.metajob.at'
818
+
819
+ - regex: 'MetaInspector'
820
+ name: 'MetaInspector'
821
+ category: 'Crawler'
822
+ url: 'https://github.com/jaimeiniesta/metainspector'
823
+
543
824
  - regex: 'MixrankBot'
544
825
  name: 'Mixrank Bot'
545
826
  category: 'Crawler'
@@ -556,6 +837,13 @@
556
837
  name: 'Majestic-12'
557
838
  url: 'http://majestic12.co.uk'
558
839
 
840
+ - regex: 'Mnogosearch'
841
+ name: 'Mnogosearch'
842
+ category: 'Search bot'
843
+ url: 'http://www.mnogosearch.org/'
844
+ producer:
845
+ name: 'Lavtech.Com Corp.'
846
+ url: ''
559
847
  - regex: 'MojeekBot'
560
848
  name: 'MojeekBot'
561
849
  category: 'Search bot'
@@ -564,6 +852,14 @@
564
852
  name: 'Mojeek Ltd.'
565
853
  url: 'http://www.mojeek.com'
566
854
 
855
+ - regex: 'munin'
856
+ name: 'Munin'
857
+ category: 'Site Monitor'
858
+ url: 'http://munin-monitoring.org/'
859
+ producer:
860
+ name: 'Munin'
861
+ url: 'http://munin-monitoring.org/'
862
+
567
863
  - regex: 'NalezenCzBot'
568
864
  name: 'NalezenCzBot'
569
865
  category: 'Crawler'
@@ -572,7 +868,19 @@
572
868
  name: 'Jaroslav Kuboš'
573
869
  url: ''
574
870
 
575
- - regex: 'Netcraft Web Server Survey'
871
+ - regex: 'check_http/v'
872
+ name: 'Nagios check_http'
873
+ category: 'Site Monitor'
874
+ url: 'https://nagios.org'
875
+ producer:
876
+ name: 'Nagios Plugins Development Team'
877
+ url: 'https://nagios.org'
878
+
879
+ - regex: 'nbertaupete95\(at\)gmail.com'
880
+ name: 'nbertaupete95'
881
+ category: 'Crawler'
882
+
883
+ - regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
576
884
  name: 'Netcraft Survey Bot'
577
885
  category: 'Search bot'
578
886
  url: ''
@@ -580,6 +888,14 @@
580
888
  name: 'Netcraft'
581
889
  url: 'http://www.netcraft.com'
582
890
 
891
+ - regex: 'netEstate NE Crawler'
892
+ name: 'netEstate'
893
+ category: 'Crawler'
894
+ url: 'http://www.website-datenbank.de/Impressum'
895
+ producer:
896
+ name: 'netEstate GmbH'
897
+ url: 'https://www.netestate.de/en/'
898
+
583
899
  - regex: 'Netvibes'
584
900
  name: 'Netvibes'
585
901
  url: 'http://www.netvibes.com/'
@@ -612,7 +928,25 @@
612
928
  name: 'Northern Light'
613
929
  url: 'http://northernlight.com'
614
930
 
615
- - regex: 'omgilibot'
931
+ - regex: 'Nmap Scripting Engine'
932
+ name: 'Nmap'
933
+ category: 'Security Checker'
934
+ url: 'https://nmap.org/book/nse.html'
935
+ producer:
936
+ name: 'Nmap'
937
+ url: 'https://nmap.org/'
938
+
939
+ - regex: 'Nuzzel'
940
+ name: 'Nuzzel'
941
+ category: 'Crawler'
942
+ producer:
943
+ name: 'Nuzzel'
944
+ url: https://www.nuzzel.com/
945
+
946
+ - regex: 'Octopus [0-9]'
947
+ name: 'Octopus'
948
+
949
+ - regex: 'omgili(?:bot)?'
616
950
  name: 'Omgili bot'
617
951
  category: 'Search bot'
618
952
  url: 'http://www.omgili.com/Crawler.html'
@@ -660,6 +994,11 @@
660
994
  name: 'Smallrivers SA'
661
995
  url: 'http://www.paper.li'
662
996
 
997
+ - regex: 'phantomas/'
998
+ name: 'Phantomas'
999
+ category: 'Site Monitor'
1000
+ url: 'https://github.com/macbre/phantomas'
1001
+
663
1002
  - regex: 'phpservermon'
664
1003
  name: 'PHP Server Monitor'
665
1004
  category: 'Site Monitor'
@@ -668,6 +1007,22 @@
668
1007
  name: 'PHP Server Monitor'
669
1008
  url: 'http://www.phpservermonitor.org/'
670
1009
 
1010
+ - regex: 'PocketParser'
1011
+ name: 'PocketParser'
1012
+ category: 'Read-it-later Service'
1013
+ url: 'https://getpocket.com/pocketparser_ua'
1014
+ producer:
1015
+ name: 'Pocket'
1016
+ url: 'https://getpocket.com/'
1017
+
1018
+ - regex: 'PritTorrent'
1019
+ name: 'PritTorrent'
1020
+ category: 'Crawler'
1021
+ url: 'https://github.com/astro/prittorrent'
1022
+ producer:
1023
+ name: 'Bitlove'
1024
+ url: 'http://bitlove.org/'
1025
+
671
1026
  - regex: 'psbot(-page)?'
672
1027
  name: 'Picsearch bot'
673
1028
  category: 'Search bot'
@@ -684,6 +1039,22 @@
684
1039
  name: 'Pingdom AB'
685
1040
  url: 'https://www.pingdom.com'
686
1041
 
1042
+ - regex: 'Quora Link Preview'
1043
+ name: 'Quora Link Preview'
1044
+ category: 'Crawler'
1045
+ url: ''
1046
+ producer:
1047
+ name: 'Quora'
1048
+ url: 'http://www.quora.com'
1049
+
1050
+ - regex: 'RamblerMail'
1051
+ name: 'RamblerMail Image Proxy'
1052
+ category: 'Crawler'
1053
+ url: ''
1054
+ producer:
1055
+ name: 'Rambler&Co'
1056
+ url: 'https://rambler-co.ru/'
1057
+
687
1058
  - regex: 'QuerySeekerSpider'
688
1059
  name: 'QuerySeekerSpider'
689
1060
  category: 'Crawler'
@@ -692,6 +1063,19 @@
692
1063
  name: 'QueryEye Inc.'
693
1064
  url: 'http://queryeye.com'
694
1065
 
1066
+ - regex: 'Qwantify'
1067
+ name: 'Qwantify'
1068
+ category: 'Crawler'
1069
+ url: 'https://www.qwant.com/'
1070
+ producer:
1071
+ name: 'Qwant Corporation'
1072
+ url: 'https://www.qwant.com/'
1073
+
1074
+ - regex: 'Rainmeter'
1075
+ name: 'Rainmeter'
1076
+ category: 'Crawler'
1077
+ url: 'https://www.rainmeter.net'
1078
+
695
1079
  - regex: 'redditbot'
696
1080
  name: 'Reddit Bot'
697
1081
  category: 'Social Media Agent'
@@ -700,6 +1084,14 @@
700
1084
  name: 'reddit inc.'
701
1085
  url: 'http://www.reddit.com'
702
1086
 
1087
+ - regex: 'Riddler'
1088
+ name: 'Riddler'
1089
+ category: 'Security search bot'
1090
+ url: 'https://riddler.io/about'
1091
+ producer:
1092
+ name: 'F-Secure'
1093
+ url: 'https://www.f-secure.com'
1094
+
703
1095
  - regex: 'rogerbot'
704
1096
  name: 'Rogerbot'
705
1097
  category: 'Crawler'
@@ -716,6 +1108,14 @@
716
1108
  name: 'Roihunter a.s.'
717
1109
  url: 'http://roihunter.com/'
718
1110
 
1111
+ - regex: 'SafeDNSBot'
1112
+ name: 'SafeDNSBot'
1113
+ category: 'Crawler'
1114
+ url: 'https://www.safedns.com/searchbot'
1115
+ producer:
1116
+ name: 'SafeDNS, Inc.'
1117
+ url: 'https://www.safedns.com/'
1118
+
719
1119
  - regex: 'Scrapy'
720
1120
  name: 'Scrapy'
721
1121
  category: 'Crawler'
@@ -761,6 +1161,19 @@
761
1161
  name: 'SEO Engine'
762
1162
  url: 'http://www.seoengine.com'
763
1163
 
1164
+ - regex: 'SEOkicks-Robot'
1165
+ name: 'SEOkicks-Robot'
1166
+ category: 'Crawler'
1167
+ url: 'http://www.seokicks.de/robot.html'
1168
+ producer:
1169
+ name: 'SEOkicks'
1170
+ url: 'https://www.seokicks.de/'
1171
+
1172
+ - regex: 'seoscanners\.net'
1173
+ name: 'Seoscanners.net'
1174
+ category: 'Crawler'
1175
+ url: ''
1176
+
764
1177
  - regex: 'SkypeUriPreview'
765
1178
  name: 'Skype URI Preview'
766
1179
  category: 'Service Agent'
@@ -777,6 +1190,14 @@
777
1190
  name: 'Seznam.cz, a.s.'
778
1191
  url: 'http://www.seznam.cz/'
779
1192
 
1193
+ - regex: 'shopify-partner-homepage-scraper'
1194
+ name: 'Shopify Partner'
1195
+ category: 'Crawler'
1196
+ url: 'https://www.shopify.com/partners'
1197
+ producer:
1198
+ name: 'Shopify'
1199
+ url: 'https://www.shopify.com/'
1200
+
780
1201
  - regex: 'ShopWiki'
781
1202
  name: 'ShopWiki'
782
1203
  category: 'Search tools'
@@ -809,6 +1230,27 @@
809
1230
  name: 'SISTRIX GmbH'
810
1231
  url: 'http://www.sistrix.de'
811
1232
 
1233
+ - regex: 'SISTRIX Optimizer'
1234
+ name: 'SISTRIX Optimizer'
1235
+ category: 'Crawler'
1236
+ url: 'https://optimizer.sistrix.com'
1237
+ producer:
1238
+ name: 'SISTRIX GmbH'
1239
+ url: 'http://www.sistrix.de'
1240
+
1241
+ - regex: 'SiteSucker'
1242
+ name: 'SiteSucker'
1243
+ category: 'Crawler'
1244
+ url: 'http://ricks-apps.com/osx/sitesucker/'
1245
+
1246
+ - regex: 'sixy.ch'
1247
+ name: 'Sixy.ch'
1248
+ category: 'Site Monitor'
1249
+ url: 'http://sixy.ch'
1250
+ producer:
1251
+ name: 'Manuel Kasper'
1252
+ url: 'https://neon1.net/'
1253
+
812
1254
  - regex: 'Slackbot|Slack-ImgProxy'
813
1255
  name: 'Slackbot'
814
1256
  category: 'Crawler'
@@ -833,6 +1275,30 @@
833
1275
  name: 'Tencent Holdings'
834
1276
  url: 'http://www.soso.com'
835
1277
 
1278
+ - regex: 'sqlmap/'
1279
+ name: 'sqlmap'
1280
+ category: 'Security Checker'
1281
+ url: 'http://sqlmap.org/'
1282
+ producer:
1283
+ name: 'sqlmap'
1284
+ url: 'http://sqlmap.org/'
1285
+
1286
+ - regex: 'SSL Labs'
1287
+ name: 'SSL Labs'
1288
+ category: 'Validator'
1289
+ url: 'https://www.ssllabs.com/about/assessment.html'
1290
+ producer:
1291
+ name: 'SSL Labs'
1292
+ url: 'https://www.ssllabs.com/about/assessment.html'
1293
+
1294
+ - regex: 'StatusCake'
1295
+ name: 'StatusCake'
1296
+ category: 'Site Monitor'
1297
+ url: 'https://www.statuscake.com'
1298
+ producer:
1299
+ name: 'StatusCake'
1300
+ url: 'https://www.statuscake.com'
1301
+
836
1302
  - regex: 'Superfeedr bot'
837
1303
  name: 'Superfeedr Bot'
838
1304
  category: 'Feed Fetcher'
@@ -841,6 +1307,11 @@
841
1307
  name: 'Superfeedr'
842
1308
  url: 'https://superfeedr.com/'
843
1309
 
1310
+ - regex: 'Sparkler/[0-9]'
1311
+ name: 'Sparkler'
1312
+ category: 'Crawler'
1313
+ url: 'https://github.com/USCDataScience/sparkler'
1314
+
844
1315
  - regex: 'Spinn3r'
845
1316
  name: 'Spinn3r'
846
1317
  category: 'Crawler'
@@ -865,10 +1336,23 @@
865
1336
  name: 'Domain Tools'
866
1337
  url: 'http://www.domaintools.com'
867
1338
 
1339
+ - regex: 'TarmotGezgin'
1340
+ name: 'Tarmot Gezgin'
1341
+ url: 'http://www.tarmot.com/gezgin/'
1342
+ category: 'Search bot'
1343
+
868
1344
  - regex: 'TelegramBot'
869
- name: 'TelgramBot'
1345
+ name: 'TelegramBot'
870
1346
  url: 'https://telegram.org/blog/bot-revolution'
871
1347
 
1348
+ - regex: 'TLSProbe'
1349
+ name: 'TLSProbe'
1350
+ url: 'https://scan.trustnet.venafi.com/'
1351
+ category: 'Security search bot'
1352
+ producer:
1353
+ name: 'Venafi TrustNet'
1354
+ url: 'https://www.venafi.com'
1355
+
872
1356
  - regex: 'TinEye-bot'
873
1357
  name: 'TinEye Crawler'
874
1358
  category: 'Search bot'
@@ -885,6 +1369,19 @@
885
1369
  name: ''
886
1370
  url: ''
887
1371
 
1372
+ - regex: 'theoldreader.com'
1373
+ name: 'theoldreader'
1374
+ category: 'Feed Reader'
1375
+ url: 'https://theoldreader.com'
1376
+
1377
+ - regex: 'trendictionbot'
1378
+ name: 'Trendiction Bot'
1379
+ category: 'Crawler'
1380
+ url: 'http://www.trendiction.de/bot'
1381
+ producer:
1382
+ name: 'Talkwalker Inc.'
1383
+ url: 'http://www.talkwalker.com'
1384
+
888
1385
  - regex: 'TurnitinBot'
889
1386
  name: 'TurnitinBot'
890
1387
  category: 'Crawler'
@@ -909,6 +1406,13 @@
909
1406
  name: 'Mediasift'
910
1407
  url: ''
911
1408
 
1409
+ - regex: 'Twingly Recon'
1410
+ name: 'Twingly Recon'
1411
+ category: 'Crawler'
1412
+ producer:
1413
+ name: 'Twingly'
1414
+ url: 'https://www.twingly.com'
1415
+
912
1416
  - regex: 'Twitterbot'
913
1417
  name: 'Twitterbot'
914
1418
  category: 'Social Media Agent'
@@ -917,6 +1421,30 @@
917
1421
  name: 'Twitter'
918
1422
  url: 'http://www.twitter.com'
919
1423
 
1424
+ - regex: 'UniversalFeedParser'
1425
+ name: 'UniversalFeedParser'
1426
+ category: 'Feed Fetcher'
1427
+ url: 'https://github.com/kurtmckee/feedparser'
1428
+ producer:
1429
+ name: 'Kurt McKee'
1430
+ url: 'https://github.com/kurtmckee'
1431
+
1432
+ - regex: 'via secureurl\.fwdcdn\.com'
1433
+ name: 'UkrNet Mail Proxy'
1434
+ category: 'Crawler'
1435
+ url: ''
1436
+ producer:
1437
+ name: 'UkrNet Ltd'
1438
+ url: 'https://www.ukr.net/'
1439
+
1440
+ - regex: 'Uptimebot'
1441
+ name: 'Uptimebot'
1442
+ category: 'Site Monitor'
1443
+ url: 'https://uptime.com/uptimebot'
1444
+ producer:
1445
+ name: 'Uptime'
1446
+ url: 'https://uptime.com'
1447
+
920
1448
  - regex: 'UptimeRobot'
921
1449
  name: 'Uptime Robot'
922
1450
  category: 'Site Monitor'
@@ -933,6 +1461,22 @@
933
1461
  name: 'Profound Networks'
934
1462
  url: 'http://www.profound.net'
935
1463
 
1464
+ - regex: 'Vagabondo'
1465
+ name: 'Vagabondo'
1466
+ category: 'Crawler'
1467
+ url: ''
1468
+ producer:
1469
+ name: 'WiseGuys'
1470
+ url: 'http://www.wise-guys.nl/'
1471
+
1472
+ - regex: 'vkShare; '
1473
+ name: 'VK Share Button'
1474
+ category: 'Crawler'
1475
+ url: 'http://vk.com/dev/Share'
1476
+ producer:
1477
+ name: 'VK'
1478
+ url: 'http://vk.com/'
1479
+
936
1480
  - regex: 'VSMCrawler'
937
1481
  name: 'Visual Site Mapper Crawler'
938
1482
  category: 'Crawler'
@@ -965,7 +1509,7 @@
965
1509
  name: 'W3C'
966
1510
  url: 'http://www.w3.org'
967
1511
 
968
- - regex: 'W3C_Validator'
1512
+ - regex: 'W3C_Validator|Validator.nu'
969
1513
  name: 'W3C Markup Validation Service'
970
1514
  category: 'Validator'
971
1515
  url: 'http://validator.w3.org/services'
@@ -989,6 +1533,18 @@
989
1533
  name: 'W3C'
990
1534
  url: 'http://www.w3.org'
991
1535
 
1536
+ - regex: 'Wappalyzer'
1537
+ name: 'Wappalyzer'
1538
+ url: 'https://github.com/AliasIO/Wappalyzer'
1539
+ producer:
1540
+ name: 'AliasIO'
1541
+ url: 'https://github.com/AliasIO'
1542
+
1543
+ - regex: 'PTST/'
1544
+ name: 'WebPageTest'
1545
+ category: 'Site Monitor'
1546
+ url: 'https://www.webpagetest.org'
1547
+
992
1548
  - regex: 'WeSEE(:Search)?'
993
1549
  name: 'WeSEE:Search'
994
1550
  category: 'Search bot'
@@ -1013,6 +1569,14 @@
1013
1569
  name: 'WebSitePulse'
1014
1570
  url: 'http://www.websitepulse.com/'
1015
1571
 
1572
+ - regex: 'WordPress'
1573
+ name: 'WordPress'
1574
+ category: 'Service Agent'
1575
+ url: 'https://wordpress.org/'
1576
+ producer:
1577
+ name: 'Wordpress.org'
1578
+ url: 'https://wordpress.org/'
1579
+
1016
1580
  - regex: 'Wotbox'
1017
1581
  name: 'Wotbox'
1018
1582
  category: 'Search bot'
@@ -1021,6 +1585,14 @@
1021
1585
  name: 'Wotbox'
1022
1586
  url: 'http://www.wotbox.com'
1023
1587
 
1588
+ - regex: 'XenForo'
1589
+ name: 'XenForo'
1590
+ category: 'Service Agent'
1591
+ url: 'https://xenforo.com/'
1592
+ producer:
1593
+ name: 'XenForo Ltd.'
1594
+ url: 'https://xenforo.com/'
1595
+
1024
1596
  - regex: 'yacybot'
1025
1597
  name: 'YaCy'
1026
1598
  category: 'Search bot'
@@ -1053,7 +1625,15 @@
1053
1625
  name: 'Yahoo! Inc.'
1054
1626
  url: 'http://www.yahoo.com'
1055
1627
 
1056
- - regex: 'Yandex(Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1628
+ - regex: 'Y!J-BRW'
1629
+ name: 'Yahoo! Japan BRW'
1630
+ category: 'Crawler'
1631
+ url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1632
+ producer:
1633
+ name: 'Yahoo! Japan Corp.'
1634
+ url: 'https://www.yahoo.co.jp/'
1635
+
1636
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1057
1637
  name: 'Yandex Bot'
1058
1638
  category: 'Search bot'
1059
1639
  url: 'http://www.yandex.com/bots'
@@ -1061,7 +1641,7 @@
1061
1641
  name: 'Yandex LLC'
1062
1642
  url: 'http://company.yandex.com'
1063
1643
 
1064
- - regex: 'Yeti'
1644
+ - regex: 'Yeti|NaverJapan'
1065
1645
  name: 'Yeti/Naverbot'
1066
1646
  category: 'Search bot'
1067
1647
  url: 'http://help.naver.com/robots/'
@@ -1090,6 +1670,11 @@
1090
1670
  name: 'YunYun'
1091
1671
  url: 'http://www.yunyun.com'
1092
1672
 
1673
+ - regex: 'zgrab'
1674
+ name: 'zgrab'
1675
+ category: 'Security Checker'
1676
+ url: 'https://github.com/zmap/zgrab'
1677
+
1093
1678
  - regex: 'Zookabot'
1094
1679
  name: 'Zookabot'
1095
1680
  category: 'Crawler'
@@ -1138,9 +1723,9 @@
1138
1723
  name: 'HubPages'
1139
1724
  url: 'http://hubpages.com/'
1140
1725
 
1141
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
1726
+ - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1142
1727
  name: 'Pinterest'
1143
- url: ''
1728
+ url: 'http://www.pinterest.com/bot.html'
1144
1729
  category: 'Crawler'
1145
1730
  producer:
1146
1731
  name: 'Pinterest'
@@ -1154,6 +1739,30 @@
1154
1739
  name: 'Site24x7'
1155
1740
  url: 'https://www.site24x7.com'
1156
1741
 
1742
+ - regex: 's~snapchat-proxy'
1743
+ name: 'Snapchat Proxy'
1744
+ category: 'Crawler'
1745
+ url: 'https://www.snapchat.com'
1746
+ producer:
1747
+ name: 'Snapchat Inc.'
1748
+ url: 'https://www.snapchat.com'
1749
+
1750
+ - regex: "Let's Encrypt validation server"
1751
+ name: "Let's Encrypt Validation"
1752
+ category: 'Service Agent'
1753
+ url: 'https://letsencrypt.org/how-it-works/'
1754
+ producer:
1755
+ name: "Let's Encrypt"
1756
+ url: 'https://letsencrypt.org'
1757
+
1758
+ - regex: 'GrapeshotCrawler'
1759
+ name: 'Grapeshot'
1760
+ category: 'Crawler'
1761
+ url: 'https://www.grapeshot.com/crawler'
1762
+ producer:
1763
+ name: 'Grapeshot'
1764
+ url: 'https://www.grapeshot.com'
1765
+
1157
1766
  - regex: 'www\.monitor\.us'
1158
1767
  name: 'Monitor.Us'
1159
1768
  category: 'Site Monitor'
@@ -1162,6 +1771,22 @@
1162
1771
  name: 'Monitor.Us'
1163
1772
  url: 'http://www.monitor.us'
1164
1773
 
1774
+ - regex: 'Catchpoint( bot)?'
1775
+ name: 'Catchpoint'
1776
+ category: 'Site Monitor'
1777
+ url: ''
1778
+ producer:
1779
+ name: 'Catchpoint Systems'
1780
+ url: 'http://www.catchpoint.com/'
1781
+
1782
+ - regex: 'bitlybot'
1783
+ name: 'BitlyBot'
1784
+ category: 'Crawler'
1785
+ url: 'https://bitly.com'
1786
+ producer:
1787
+ name: 'Bitly, Inc.'
1788
+ url: 'https://bitly.com'
1789
+
1165
1790
  - regex: 'Zao/'
1166
1791
  name: 'Zao'
1167
1792
  category: 'Crawler'
@@ -1217,9 +1842,204 @@
1217
1842
  - regex: 'Server Density Service Monitoring.*'
1218
1843
  name: 'Server Density'
1219
1844
 
1220
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Catchpoint bot|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1845
+ - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1846
+ name: 'RSSRadio Bot'
1847
+
1848
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
1221
1849
  name: 'Generic Bot'
1222
1850
 
1851
+ - regex: '^sentry'
1852
+ name: 'Sentry Bot'
1853
+ producer:
1854
+ name: 'Sentry'
1855
+ url: 'https://sentry.io'
1856
+
1857
+ - regex: '^Spotify'
1858
+ name: 'Spotify'
1859
+ producer:
1860
+ name: 'Spotify'
1861
+ url: 'https://www.spotify.com'
1862
+
1863
+ - regex: 'The Knowledge AI'
1864
+ name: 'The Knowledge AI'
1865
+ category: 'Crawler'
1866
+
1867
+ - regex: 'Embedly'
1868
+ name: 'Embedly'
1869
+ category: 'Crawler'
1870
+ url: 'https://support.embed.ly/hc/en-us'
1871
+ producer:
1872
+ name: 'A Medium, Corp.'
1873
+ url: 'https://medium.com/'
1874
+
1875
+ - regex: 'BrandVerity'
1876
+ name: 'BrandVerity'
1877
+ category: 'Crawler'
1878
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1879
+ producer:
1880
+ name: 'BrandVerity, Inc.'
1881
+ url: 'https://www.brandverity.com/'
1882
+
1883
+ - regex: 'Kaspersky Lab CFR link resolver'
1884
+ name: 'Kaspersky'
1885
+ category: 'Security Checker'
1886
+ url: 'https://www.kaspersky.com/'
1887
+ producer:
1888
+ name: 'AO Kaspersky Lab'
1889
+ url: 'https://www.kaspersky.com/'
1890
+
1891
+ - regex: 'eZ Publish Link Validator'
1892
+ name: 'eZ Publish Link Validator'
1893
+ category: 'Crawler'
1894
+ url: 'https://ez.no/'
1895
+ producer:
1896
+ name: 'eZ Systems AS'
1897
+ url: 'https://ez.no/'
1898
+
1899
+ - regex: 'woorankreview'
1900
+ name: 'WooRank'
1901
+ category: 'Search bot'
1902
+ url: 'https://www.woorank.com/'
1903
+ producer:
1904
+ name: 'WooRank sprl'
1905
+ url: 'https://www.woorank.com/'
1906
+
1907
+ - regex: '(Match|LinkCheck) by Siteimprove.com'
1908
+ name: 'Siteimprove'
1909
+ category: 'Search bot'
1910
+ url: 'https://siteimprove.com/'
1911
+ producer:
1912
+ name: 'Siteimprove GmbH'
1913
+ url: 'https://siteimprove.com/'
1914
+
1915
+ - regex: 'CATExplorador'
1916
+ name: 'CATExplorador'
1917
+ category: 'Search bot'
1918
+ url: 'https://fundacio.cat/ca/domini/'
1919
+ producer:
1920
+ name: 'Fundació puntCAT'
1921
+ url: 'https://fundacio.cat/ca/domini/'
1922
+
1923
+ - regex: 'Buck'
1924
+ name: 'Buck'
1925
+ category: 'Search bot'
1926
+ url: 'https://hypefactors.com/'
1927
+ producer:
1928
+ name: 'Hypefactors A/S'
1929
+ url: 'https://hypefactors.com/'
1930
+
1931
+ - regex: 'tracemyfile'
1932
+ name: 'TraceMyFile'
1933
+ category: 'Search bot'
1934
+ url: 'https://www.tracemyfile.com/'
1935
+ producer:
1936
+ name: 'Idee Inc.'
1937
+ url: 'http://ideeinc.com/'
1938
+
1939
+ - regex: 'zelist.ro feed parser'
1940
+ name: 'Ze List'
1941
+ url: 'https://www.zelist.ro/'
1942
+ category: 'Feed Fetcher'
1943
+ producer:
1944
+ name: 'Treeworks SRL'
1945
+ url: 'https://www.tree.ro/'
1946
+
1947
+ - regex: 'weborama-fetcher'
1948
+ name: 'Weborama'
1949
+ category: 'Search bot'
1950
+ url: 'https://weborama.com/'
1951
+ producer:
1952
+ name: 'Weborama SA'
1953
+ url: 'https://weborama.com/'
1954
+
1955
+ - regex: 'BoardReader Favicon Fetcher'
1956
+ name: 'BoardReader'
1957
+ category: 'Search bot'
1958
+ url: 'http://boardreader.com/'
1959
+ producer:
1960
+ name: 'Effyis Inc'
1961
+ url: 'http://boardreader.com/'
1962
+
1963
+ - regex: 'IDG/IT'
1964
+ name: 'IDG/IT'
1965
+ category: 'Search bot'
1966
+ url: 'https://spaziodati.eu/'
1967
+ producer:
1968
+ name: 'SpazioDati S.r.l.'
1969
+ url: 'https://spaziodati.eu/'
1970
+
1971
+ - regex: 'Bytespider'
1972
+ name: 'Bytespider'
1973
+ category: 'Search bot'
1974
+ url: 'https://bytedance.com/'
1975
+ producer:
1976
+ name: 'ByteDance Ltd.'
1977
+ url: 'https://bytedance.com/'
1978
+
1979
+ - regex: 'WikiDo'
1980
+ name: 'WikiDo'
1981
+ category: 'Search bot'
1982
+ url: 'https://www.wikido.com/'
1983
+ producer:
1984
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
1985
+ url: 'https://www.wikido.com/'
1986
+
1987
+ - regex: 'AwarioSmartBot'
1988
+ name: 'Awario'
1989
+ category: 'Search bot'
1990
+ url: 'https://awario.com/bots.html'
1991
+ producer:
1992
+ name: 'Awario'
1993
+ url: 'https://awario.com/'
1994
+
1995
+ - regex: 'AwarioRssBot'
1996
+ name: 'Awario'
1997
+ category: 'Feed Fetcher'
1998
+ url: 'https://awario.com/bots.html'
1999
+ producer:
2000
+ name: 'Awario'
2001
+ url: 'https://awario.com/'
2002
+
2003
+ - regex: 'oBot'
2004
+ name: 'oBot'
2005
+ category: 'Search bot'
2006
+ url: 'http://www.xforce-security.com/crawler/'
2007
+ producer:
2008
+ name: 'IBM Germany Research & Development GmbH'
2009
+ url: 'https://exchange.xforce.ibmcloud.com/'
2010
+
2011
+ - regex: 'SMTBot'
2012
+ name: 'SMTBot'
2013
+ category: 'Search bot'
2014
+ url: 'https://www.similartech.com/smtbot'
2015
+ producer:
2016
+ name: 'SimilarTech Ltd.'
2017
+ url: 'https://www.similartech.com/'
2018
+
2019
+ - regex: 'LCC'
2020
+ name: 'LCC'
2021
+ category: 'Search bot'
2022
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2023
+ producer:
2024
+ name: 'Universität Leipzig'
2025
+ url: 'https://www.uni-leipzig.de/'
2026
+
2027
+ - regex: 'Startpagina-Linkchecker'
2028
+ name: 'Startpagina Linkchecker'
2029
+ category: 'Search bot'
2030
+ url: 'https://www.startpagina.nl/linkchecker'
2031
+ producer:
2032
+ name: 'Startpagina B.V.'
2033
+ url: 'https://www.startpagina.nl/'
2034
+
2035
+ - regex: 'GTmetrix'
2036
+ name: 'GTmetrix'
2037
+ category: 'Crawler'
2038
+ url: 'https://gtmetrix.com/'
2039
+ producer:
2040
+ name: 'Carbon60 Operating Co. Ltd.'
2041
+ url: 'https://www.carbon60.com/'
2042
+
1223
2043
  # Generic detections
1224
2044
 
1225
2045
  - regex: 'Nutch'
@@ -1230,5 +2050,5 @@
1230
2050
  name: 'The Apache Software Foundation'
1231
2051
  url: 'http://www.apache.org/foundation/'
1232
2052
 
1233
- - regex: '[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)([^a-z]|$)'
2053
+ - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
1234
2054
  name: 'Generic Bot'