device_detector 1.0.1 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +49 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +5 -10
  5. data/CHANGELOG.md +14 -1
  6. data/README.md +6 -6
  7. data/Rakefile +20 -13
  8. data/device_detector.gemspec +1 -0
  9. data/lib/device_detector.rb +30 -26
  10. data/lib/device_detector/bot.rb +2 -2
  11. data/lib/device_detector/client.rb +3 -2
  12. data/lib/device_detector/device.rb +46 -20
  13. data/lib/device_detector/memory_cache.rb +26 -19
  14. data/lib/device_detector/metadata_extractor.rb +7 -8
  15. data/lib/device_detector/model_extractor.rb +3 -3
  16. data/lib/device_detector/name_extractor.rb +2 -2
  17. data/lib/device_detector/os.rb +121 -111
  18. data/lib/device_detector/parser.rb +22 -9
  19. data/lib/device_detector/version.rb +3 -1
  20. data/lib/device_detector/version_extractor.rb +2 -3
  21. data/regexes/bots.yml +442 -19
  22. data/regexes/client/browser_engine.yml +7 -1
  23. data/regexes/client/browsers.yml +773 -103
  24. data/regexes/client/feed_readers.yml +14 -8
  25. data/regexes/client/libraries.yml +43 -2
  26. data/regexes/client/mediaplayers.yml +21 -5
  27. data/regexes/client/mobile_apps.yml +131 -1
  28. data/regexes/client/pim.yml +6 -1
  29. data/regexes/device/cameras.yml +1 -1
  30. data/regexes/device/car_browsers.yml +7 -3
  31. data/regexes/device/consoles.yml +3 -3
  32. data/regexes/device/mobiles.yml +11365 -791
  33. data/regexes/device/notebooks.yml +114 -0
  34. data/regexes/device/portable_media_player.yml +2 -2
  35. data/regexes/device/televisions.yml +17 -3
  36. data/regexes/oss.yml +115 -47
  37. data/regexes/vendorfragments.yml +6 -2
  38. data/spec/device_detector/concrete_user_agent_spec.rb +16 -17
  39. data/spec/device_detector/detector_fixtures_spec.rb +30 -35
  40. data/spec/device_detector/device_spec.rb +28 -48
  41. data/spec/device_detector/memory_cache_spec.rb +60 -28
  42. data/spec/device_detector/model_extractor_spec.rb +3 -3
  43. data/spec/device_detector/version_extractor_spec.rb +5 -6
  44. data/spec/device_detector_spec.rb +49 -78
  45. data/spec/fixtures/client/browser.yml +1521 -406
  46. data/spec/fixtures/client/feed_reader.yml +39 -51
  47. data/spec/fixtures/client/library.yml +72 -11
  48. data/spec/fixtures/client/mediaplayer.yml +29 -40
  49. data/spec/fixtures/client/mobile_app.yml +172 -32
  50. data/spec/fixtures/client/pim.yml +32 -19
  51. data/spec/fixtures/detector/bots.yml +854 -19
  52. data/spec/fixtures/detector/camera.yml +22 -2
  53. data/spec/fixtures/detector/car_browser.yml +60 -0
  54. data/spec/fixtures/detector/console.yml +43 -3
  55. data/spec/fixtures/detector/desktop.yml +2860 -1527
  56. data/spec/fixtures/detector/feature_phone.yml +69 -1
  57. data/spec/fixtures/detector/feed_reader.yml +158 -130
  58. data/spec/fixtures/detector/mediaplayer.yml +113 -39
  59. data/spec/fixtures/detector/mobile_apps.yml +262 -89
  60. data/spec/fixtures/detector/phablet.yml +3444 -663
  61. data/spec/fixtures/detector/portable_media_player.yml +57 -0
  62. data/spec/fixtures/detector/smart_speaker.yml +55 -0
  63. data/spec/fixtures/detector/smartphone-1.yml +4739 -4765
  64. data/spec/fixtures/detector/smartphone-10.yml +9973 -0
  65. data/spec/fixtures/detector/smartphone-11.yml +10015 -0
  66. data/spec/fixtures/detector/smartphone-12.yml +9897 -0
  67. data/spec/fixtures/detector/smartphone-13.yml +9912 -0
  68. data/spec/fixtures/detector/smartphone-14.yml +9935 -0
  69. data/spec/fixtures/detector/smartphone-15.yml +6595 -0
  70. data/spec/fixtures/detector/smartphone-16.yml +10021 -0
  71. data/spec/fixtures/detector/smartphone-17.yml +9408 -0
  72. data/spec/fixtures/detector/smartphone-2.yml +4265 -4238
  73. data/spec/fixtures/detector/smartphone-3.yml +4487 -4391
  74. data/spec/fixtures/detector/smartphone-4.yml +4210 -4179
  75. data/spec/fixtures/detector/smartphone-5.yml +5794 -2901
  76. data/spec/fixtures/detector/smartphone-6.yml +10114 -0
  77. data/spec/fixtures/detector/smartphone-7.yml +9975 -0
  78. data/spec/fixtures/detector/smartphone-8.yml +9897 -0
  79. data/spec/fixtures/detector/smartphone-9.yml +9880 -0
  80. data/spec/fixtures/detector/smartphone.yml +4152 -4048
  81. data/spec/fixtures/detector/tablet-1.yml +3997 -3991
  82. data/spec/fixtures/detector/tablet-2.yml +6820 -1935
  83. data/spec/fixtures/detector/tablet-3.yml +9968 -0
  84. data/spec/fixtures/detector/tablet-4.yml +7113 -0
  85. data/spec/fixtures/detector/tablet.yml +3789 -3804
  86. data/spec/fixtures/detector/tv.yml +3889 -1495
  87. data/spec/fixtures/detector/unknown.yml +45 -179
  88. data/spec/fixtures/detector/wearable.yml +61 -0
  89. data/spec/fixtures/device/camera.yml +4 -3
  90. data/spec/fixtures/device/car_browser.yml +9 -2
  91. data/spec/fixtures/device/console.yml +15 -14
  92. data/spec/fixtures/device/notebook.yml +7 -0
  93. data/spec/fixtures/parser/oss.yml +177 -0
  94. data/spec/fixtures/parser/vendorfragments.yml +6 -0
  95. metadata +57 -7
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
- VERSION = '1.0.1'
4
+ VERSION = '1.0.5'
3
5
  end
@@ -1,12 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class VersionExtractor < MetadataExtractor
3
-
4
5
  private
5
6
 
6
7
  def metadata_string
7
8
  String(regex_meta[:version])
8
9
  end
9
-
10
10
  end
11
11
  end
12
-
@@ -1,7 +1,7 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
@@ -53,6 +53,21 @@
53
53
  name: 'Alexa Internet'
54
54
  url: 'http://www.alexa.com'
55
55
 
56
+ - regex: 'alexa site audit'
57
+ name: 'Alexa Site Audit'
58
+ category: 'Site Monitor'
59
+ url: 'http://www.alexa.com/help/webmasters'
60
+ producer:
61
+ name: 'Alexa Internet'
62
+ url: 'http://www.alexa.com'
63
+
64
+ - regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
65
+ name: 'Amazon Route53 Health Check'
66
+ category: 'Service Agent'
67
+ producer:
68
+ name: 'Amazon Web Services'
69
+ url: 'https://aws.amazon.com/'
70
+
56
71
  - regex: 'AmorankSpider'
57
72
  name: 'Amorank Spider'
58
73
  category: 'Crawler'
@@ -77,6 +92,22 @@
77
92
  name: 'Apple Inc'
78
93
  url: 'http://www.apple.com'
79
94
 
95
+ - regex: 'Arachni'
96
+ name: 'Arachni'
97
+ category: 'Security Checker'
98
+ url: 'http://www.arachni-scanner.com'
99
+ producer:
100
+ name: 'Sarosys LLC'
101
+ url: 'http://www.sarosys.com/'
102
+
103
+ - regex: 'AspiegelBot'
104
+ name: 'AspiegelBot'
105
+ category: 'Crawler'
106
+ url: 'https://aspiegel.com/'
107
+ producer:
108
+ name: 'Huawei'
109
+ url: 'https://www.huawei.com/'
110
+
80
111
  - regex: 'Castro 2, Episode Duration Lookup'
81
112
  name: 'Castro 2'
82
113
  category: 'Service Agent'
@@ -181,6 +212,13 @@
181
212
  name: 'Blogtrottr Ltd'
182
213
  url: 'https://blogtrottr.com/'
183
214
 
215
+ - regex: 'BoardReader Blog Indexer'
216
+ name: 'BoardReader Blog Indexer'
217
+ category: 'Crawler'
218
+ producer:
219
+ name: 'BoardReader'
220
+ url: 'http://boardreader.com/'
221
+
184
222
  - regex: 'BountiiBot'
185
223
  name: 'Bountii Bot'
186
224
  category: 'Search bot'
@@ -253,13 +291,13 @@
253
291
  name: 'CloudFlare'
254
292
  url: 'http://www.cloudflare.com'
255
293
 
256
- - regex: 'coccoc/'
294
+ - regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
257
295
  name: 'Cốc Cốc Bot'
258
- url: 'http://help.coccoc.com/'
296
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
259
297
  category: 'Search bot'
260
298
  producer:
261
299
  name: 'Cốc Cốc'
262
- url: 'http://coccoc.com/'
300
+ url: 'https://coccoc.com/'
263
301
 
264
302
  - regex: 'collectd'
265
303
  name: 'Collectd'
@@ -293,6 +331,15 @@
293
331
  name: 'Datadog'
294
332
  url: 'https://www.datadoghq.com/'
295
333
 
334
+ - regex: 'Datanyze'
335
+ name: 'Datanyze'
336
+ url: ''
337
+ category: 'Crawler'
338
+ producer:
339
+ name: 'Datanyze'
340
+ url: 'https://www.datanyze.com'
341
+
342
+
296
343
  - regex: 'Dataprovider'
297
344
  name: 'Dataprovider'
298
345
  category: 'Crawler'
@@ -341,7 +388,7 @@
341
388
  name: 'SEOmoz, Inc.'
342
389
  url: 'http://moz.com/'
343
390
 
344
- - regex: 'DuckDuck'
391
+ - regex: 'DuckDuck(?:Go-Favicons-)?Bot'
345
392
  name: 'DuckDuckGo Bot'
346
393
  category: 'Search bot'
347
394
  url: 'https://duckduckgo.com/duckduckbot'
@@ -357,6 +404,13 @@
357
404
  name: 'easou ICP'
358
405
  url: 'http://www.easou.com'
359
406
 
407
+ - regex: 'eCairn-Grabber'
408
+ name: 'eCairn-Grabber'
409
+ category: 'Crawler'
410
+ producer:
411
+ name: 'eCairn'
412
+ url: 'https://ecairn.com'
413
+
360
414
  - regex: 'EMail Exractor'
361
415
  name: 'EMail Exractor'
362
416
  category: 'Crawler'
@@ -397,7 +451,7 @@
397
451
  name: 'SEOmoz, Inc.'
398
452
  url: 'http://moz.com/'
399
453
 
400
- - regex: 'facebookexternalhit|facebookplatform'
454
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
401
455
  name: 'Facebook External Hit'
402
456
  category: 'Social Media Agent'
403
457
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -466,6 +520,11 @@
466
520
  category: 'Crawler'
467
521
  url: 'http://www.findxbot.com'
468
522
 
523
+ - regex: 'FreshRSS'
524
+ name: 'FreshRSS'
525
+ category: 'Feed Fetcher'
526
+ url: 'https://freshrss.org/'
527
+
469
528
  - regex: 'Genieo'
470
529
  name: 'Genieo Web filter'
471
530
  category: ''
@@ -498,6 +557,18 @@
498
557
  name: 'NTT Resonant'
499
558
  url: 'http://goo.ne.jp'
500
559
 
560
+ - regex: 'Google Favicon'
561
+ name: 'Google Favicon'
562
+ category: 'Crawler'
563
+
564
+ - regex: 'Google Search Console'
565
+ name: 'Google Search Console'
566
+ category: 'Crawler'
567
+ url: 'https://search.google.com/search-console/about'
568
+ producer:
569
+ name: 'Google Inc.'
570
+ url: 'http://www.google.com'
571
+
501
572
  - regex: 'Google Page Speed Insights'
502
573
  name: 'Google PageSpeed Insights'
503
574
  category: 'Site Monitor'
@@ -514,6 +585,14 @@
514
585
  name: 'Google Inc.'
515
586
  url: 'http://www.google.com'
516
587
 
588
+ - regex: 'Google-Cloud-Scheduler'
589
+ name: 'Google Cloud Scheduler'
590
+ category: 'Crawler'
591
+ url: 'https://cloud.google.com/scheduler'
592
+ producer:
593
+ name: 'Google Inc.'
594
+ url: 'https://www.google.com'
595
+
517
596
  - regex: 'Google-Structured-Data-Testing-Tool'
518
597
  name: 'Google Structured Data Testing Tool'
519
598
  category: 'Validator'
@@ -522,6 +601,14 @@
522
601
  name: 'Google Inc.'
523
602
  url: 'http://www.google.com'
524
603
 
604
+ - regex: 'GoogleStackdriverMonitoring'
605
+ name: 'Google Stackdriver Monitoring'
606
+ category: 'Site Monitor'
607
+ url: 'https://cloud.google.com/monitoring'
608
+ producer:
609
+ name: 'Google Inc.'
610
+ url: 'https://www.google.com'
611
+
525
612
  - regex: 'via ggpht\.com GoogleImageProxy'
526
613
  name: 'Gmail Image Proxy'
527
614
  category: 'Crawler'
@@ -529,7 +616,7 @@
529
616
  producer:
530
617
  name: 'Google Inc.'
531
618
  url: 'http://www.google.com'
532
-
619
+
533
620
  - regex: 'SeznamEmailProxy'
534
621
  name: 'Seznam Email Proxy'
535
622
  category: 'Crawler'
@@ -562,7 +649,7 @@
562
649
  name: 'Visual Meta'
563
650
  url: 'https://www.shopalike.cz/'
564
651
 
565
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Google-Adwords-Instant|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin|Google-Shopping-Quality'
652
+ - regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
566
653
  name: 'Googlebot'
567
654
  category: 'Search bot'
568
655
  url: 'http://www.google.com/bot.html'
@@ -602,6 +689,11 @@
602
689
  name: ''
603
690
  url: ''
604
691
 
692
+ - regex: 'inoreader.com'
693
+ name: 'inoreader'
694
+ category: 'Feed Reader'
695
+ url: 'https://www.inoreader.com'
696
+
605
697
  - regex: 'iisbot'
606
698
  name: 'IIS Site Analysis'
607
699
  category: 'Crawler'
@@ -704,6 +796,10 @@
704
796
  name: 'Robert Graham'
705
797
  url: 'https://github.com/robertdavidgraham'
706
798
 
799
+ - regex: 'Mastodon/'
800
+ name: 'Mastodon Bot'
801
+ category: 'Social Media Agent'
802
+
707
803
  - regex: 'meanpathbot'
708
804
  name: 'Meanpath Bot'
709
805
  category: 'Search bot'
@@ -780,6 +876,10 @@
780
876
  name: 'Nagios Plugins Development Team'
781
877
  url: 'https://nagios.org'
782
878
 
879
+ - regex: 'nbertaupete95\(at\)gmail.com'
880
+ name: 'nbertaupete95'
881
+ category: 'Crawler'
882
+
783
883
  - regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
784
884
  name: 'Netcraft Survey Bot'
785
885
  category: 'Search bot'
@@ -790,7 +890,7 @@
790
890
 
791
891
  - regex: 'netEstate NE Crawler'
792
892
  name: 'netEstate'
793
- category: 'Analytics SEO Crawler'
893
+ category: 'Crawler'
794
894
  url: 'http://www.website-datenbank.de/Impressum'
795
895
  producer:
796
896
  name: 'netEstate GmbH'
@@ -836,10 +936,17 @@
836
936
  name: 'Nmap'
837
937
  url: 'https://nmap.org/'
838
938
 
939
+ - regex: 'Nuzzel'
940
+ name: 'Nuzzel'
941
+ category: 'Crawler'
942
+ producer:
943
+ name: 'Nuzzel'
944
+ url: https://www.nuzzel.com/
945
+
839
946
  - regex: 'Octopus [0-9]'
840
947
  name: 'Octopus'
841
948
 
842
- - regex: 'omgilibot'
949
+ - regex: 'omgili(?:bot)?'
843
950
  name: 'Omgili bot'
844
951
  category: 'Search bot'
845
952
  url: 'http://www.omgili.com/Crawler.html'
@@ -931,7 +1038,15 @@
931
1038
  producer:
932
1039
  name: 'Pingdom AB'
933
1040
  url: 'https://www.pingdom.com'
934
-
1041
+
1042
+ - regex: 'Quora Link Preview'
1043
+ name: 'Quora Link Preview'
1044
+ category: 'Crawler'
1045
+ url: ''
1046
+ producer:
1047
+ name: 'Quora'
1048
+ url: 'http://www.quora.com'
1049
+
935
1050
  - regex: 'RamblerMail'
936
1051
  name: 'RamblerMail Image Proxy'
937
1052
  category: 'Crawler'
@@ -1075,6 +1190,14 @@
1075
1190
  name: 'Seznam.cz, a.s.'
1076
1191
  url: 'http://www.seznam.cz/'
1077
1192
 
1193
+ - regex: 'shopify-partner-homepage-scraper'
1194
+ name: 'Shopify Partner'
1195
+ category: 'Crawler'
1196
+ url: 'https://www.shopify.com/partners'
1197
+ producer:
1198
+ name: 'Shopify'
1199
+ url: 'https://www.shopify.com/'
1200
+
1078
1201
  - regex: 'ShopWiki'
1079
1202
  name: 'ShopWiki'
1080
1203
  category: 'Search tools'
@@ -1107,6 +1230,14 @@
1107
1230
  name: 'SISTRIX GmbH'
1108
1231
  url: 'http://www.sistrix.de'
1109
1232
 
1233
+ - regex: 'compatible; (?:SISTRIX )?Optimizer'
1234
+ name: 'SISTRIX Optimizer'
1235
+ category: 'Crawler'
1236
+ url: 'https://optimizer.sistrix.com'
1237
+ producer:
1238
+ name: 'SISTRIX GmbH'
1239
+ url: 'http://www.sistrix.de'
1240
+
1110
1241
  - regex: 'SiteSucker'
1111
1242
  name: 'SiteSucker'
1112
1243
  category: 'Crawler'
@@ -1211,7 +1342,7 @@
1211
1342
  category: 'Search bot'
1212
1343
 
1213
1344
  - regex: 'TelegramBot'
1214
- name: 'TelgramBot'
1345
+ name: 'TelegramBot'
1215
1346
  url: 'https://telegram.org/blog/bot-revolution'
1216
1347
 
1217
1348
  - regex: 'TLSProbe'
@@ -1238,6 +1369,11 @@
1238
1369
  name: ''
1239
1370
  url: ''
1240
1371
 
1372
+ - regex: 'theoldreader.com'
1373
+ name: 'theoldreader'
1374
+ category: 'Feed Reader'
1375
+ url: 'https://theoldreader.com'
1376
+
1241
1377
  - regex: 'trendictionbot'
1242
1378
  name: 'Trendiction Bot'
1243
1379
  category: 'Crawler'
@@ -1270,6 +1406,13 @@
1270
1406
  name: 'Mediasift'
1271
1407
  url: ''
1272
1408
 
1409
+ - regex: 'Twingly Recon'
1410
+ name: 'Twingly Recon'
1411
+ category: 'Crawler'
1412
+ producer:
1413
+ name: 'Twingly'
1414
+ url: 'https://www.twingly.com'
1415
+
1273
1416
  - regex: 'Twitterbot'
1274
1417
  name: 'Twitterbot'
1275
1418
  category: 'Social Media Agent'
@@ -1326,6 +1469,14 @@
1326
1469
  name: 'WiseGuys'
1327
1470
  url: 'http://www.wise-guys.nl/'
1328
1471
 
1472
+ - regex: 'vkShare; '
1473
+ name: 'VK Share Button'
1474
+ category: 'Crawler'
1475
+ url: 'http://vk.com/dev/Share'
1476
+ producer:
1477
+ name: 'VK'
1478
+ url: 'http://vk.com/'
1479
+
1329
1480
  - regex: 'VSMCrawler'
1330
1481
  name: 'Visual Site Mapper Crawler'
1331
1482
  category: 'Crawler'
@@ -1389,6 +1540,11 @@
1389
1540
  name: 'AliasIO'
1390
1541
  url: 'https://github.com/AliasIO'
1391
1542
 
1543
+ - regex: 'PTST/'
1544
+ name: 'WebPageTest'
1545
+ category: 'Site Monitor'
1546
+ url: 'https://www.webpagetest.org'
1547
+
1392
1548
  - regex: 'WeSEE(:Search)?'
1393
1549
  name: 'WeSEE:Search'
1394
1550
  category: 'Search bot'
@@ -1429,6 +1585,14 @@
1429
1585
  name: 'Wotbox'
1430
1586
  url: 'http://www.wotbox.com'
1431
1587
 
1588
+ - regex: 'XenForo'
1589
+ name: 'XenForo'
1590
+ category: 'Service Agent'
1591
+ url: 'https://xenforo.com/'
1592
+ producer:
1593
+ name: 'XenForo Ltd.'
1594
+ url: 'https://xenforo.com/'
1595
+
1432
1596
  - regex: 'yacybot'
1433
1597
  name: 'YaCy'
1434
1598
  category: 'Search bot'
@@ -1461,7 +1625,15 @@
1461
1625
  name: 'Yahoo! Inc.'
1462
1626
  url: 'http://www.yahoo.com'
1463
1627
 
1464
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1628
+ - regex: 'Y!J-BRW'
1629
+ name: 'Yahoo! Japan BRW'
1630
+ category: 'Crawler'
1631
+ url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1632
+ producer:
1633
+ name: 'Yahoo! Japan Corp.'
1634
+ url: 'https://www.yahoo.co.jp/'
1635
+
1636
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1465
1637
  name: 'Yandex Bot'
1466
1638
  category: 'Search bot'
1467
1639
  url: 'http://www.yandex.com/bots'
@@ -1469,7 +1641,7 @@
1469
1641
  name: 'Yandex LLC'
1470
1642
  url: 'http://company.yandex.com'
1471
1643
 
1472
- - regex: 'Yeti'
1644
+ - regex: 'Yeti|NaverJapan'
1473
1645
  name: 'Yeti/Naverbot'
1474
1646
  category: 'Search bot'
1475
1647
  url: 'http://help.naver.com/robots/'
@@ -1551,9 +1723,9 @@
1551
1723
  name: 'HubPages'
1552
1724
  url: 'http://hubpages.com/'
1553
1725
 
1554
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
1726
+ - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1555
1727
  name: 'Pinterest'
1556
- url: ''
1728
+ url: 'http://www.pinterest.com/bot.html'
1557
1729
  category: 'Crawler'
1558
1730
  producer:
1559
1731
  name: 'Pinterest'
@@ -1567,6 +1739,14 @@
1567
1739
  name: 'Site24x7'
1568
1740
  url: 'https://www.site24x7.com'
1569
1741
 
1742
+ - regex: 's~snapchat-proxy'
1743
+ name: 'Snapchat Proxy'
1744
+ category: 'Crawler'
1745
+ url: 'https://www.snapchat.com'
1746
+ producer:
1747
+ name: 'Snapchat Inc.'
1748
+ url: 'https://www.snapchat.com'
1749
+
1570
1750
  - regex: "Let's Encrypt validation server"
1571
1751
  name: "Let's Encrypt Validation"
1572
1752
  category: 'Service Agent'
@@ -1662,7 +1842,10 @@
1662
1842
  - regex: 'Server Density Service Monitoring.*'
1663
1843
  name: 'Server Density'
1664
1844
 
1665
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1845
+ - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1846
+ name: 'RSSRadio Bot'
1847
+
1848
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
1666
1849
  name: 'Generic Bot'
1667
1850
 
1668
1851
  - regex: '^sentry'
@@ -1671,7 +1854,191 @@
1671
1854
  name: 'Sentry'
1672
1855
  url: 'https://sentry.io'
1673
1856
 
1674
- # Generic detections
1857
+ - regex: '^Spotify'
1858
+ name: 'Spotify'
1859
+ producer:
1860
+ name: 'Spotify'
1861
+ url: 'https://www.spotify.com'
1862
+
1863
+ - regex: 'The Knowledge AI'
1864
+ name: 'The Knowledge AI'
1865
+ category: 'Crawler'
1866
+
1867
+ - regex: 'Embedly'
1868
+ name: 'Embedly'
1869
+ category: 'Crawler'
1870
+ url: 'https://support.embed.ly/hc/en-us'
1871
+ producer:
1872
+ name: 'A Medium, Corp.'
1873
+ url: 'https://medium.com/'
1874
+
1875
+ - regex: 'BrandVerity'
1876
+ name: 'BrandVerity'
1877
+ category: 'Crawler'
1878
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1879
+ producer:
1880
+ name: 'BrandVerity, Inc.'
1881
+ url: 'https://www.brandverity.com/'
1882
+
1883
+ - regex: 'Kaspersky Lab CFR link resolver'
1884
+ name: 'Kaspersky'
1885
+ category: 'Security Checker'
1886
+ url: 'https://www.kaspersky.com/'
1887
+ producer:
1888
+ name: 'AO Kaspersky Lab'
1889
+ url: 'https://www.kaspersky.com/'
1890
+
1891
+ - regex: 'eZ Publish Link Validator'
1892
+ name: 'eZ Publish Link Validator'
1893
+ category: 'Crawler'
1894
+ url: 'https://ez.no/'
1895
+ producer:
1896
+ name: 'eZ Systems AS'
1897
+ url: 'https://ez.no/'
1898
+
1899
+ - regex: 'woorankreview'
1900
+ name: 'WooRank'
1901
+ category: 'Search bot'
1902
+ url: 'https://www.woorank.com/'
1903
+ producer:
1904
+ name: 'WooRank sprl'
1905
+ url: 'https://www.woorank.com/'
1906
+
1907
+ - regex: '(Match|LinkCheck) by Siteimprove.com'
1908
+ name: 'Siteimprove'
1909
+ category: 'Search bot'
1910
+ url: 'https://siteimprove.com/'
1911
+ producer:
1912
+ name: 'Siteimprove GmbH'
1913
+ url: 'https://siteimprove.com/'
1914
+
1915
+ - regex: 'CATExplorador'
1916
+ name: 'CATExplorador'
1917
+ category: 'Search bot'
1918
+ url: 'https://fundacio.cat/ca/domini/'
1919
+ producer:
1920
+ name: 'Fundació puntCAT'
1921
+ url: 'https://fundacio.cat/ca/domini/'
1922
+
1923
+ - regex: 'Buck'
1924
+ name: 'Buck'
1925
+ category: 'Search bot'
1926
+ url: 'https://hypefactors.com/'
1927
+ producer:
1928
+ name: 'Hypefactors A/S'
1929
+ url: 'https://hypefactors.com/'
1930
+
1931
+ - regex: 'tracemyfile'
1932
+ name: 'TraceMyFile'
1933
+ category: 'Search bot'
1934
+ url: 'https://www.tracemyfile.com/'
1935
+ producer:
1936
+ name: 'Idee Inc.'
1937
+ url: 'http://ideeinc.com/'
1938
+
1939
+ - regex: 'zelist.ro feed parser'
1940
+ name: 'Ze List'
1941
+ url: 'https://www.zelist.ro/'
1942
+ category: 'Feed Fetcher'
1943
+ producer:
1944
+ name: 'Treeworks SRL'
1945
+ url: 'https://www.tree.ro/'
1946
+
1947
+ - regex: 'weborama-fetcher'
1948
+ name: 'Weborama'
1949
+ category: 'Search bot'
1950
+ url: 'https://weborama.com/'
1951
+ producer:
1952
+ name: 'Weborama SA'
1953
+ url: 'https://weborama.com/'
1954
+
1955
+ - regex: 'BoardReader Favicon Fetcher'
1956
+ name: 'BoardReader'
1957
+ category: 'Search bot'
1958
+ url: 'http://boardreader.com/'
1959
+ producer:
1960
+ name: 'Effyis Inc'
1961
+ url: 'http://boardreader.com/'
1962
+
1963
+ - regex: 'IDG/IT'
1964
+ name: 'IDG/IT'
1965
+ category: 'Search bot'
1966
+ url: 'https://spaziodati.eu/'
1967
+ producer:
1968
+ name: 'SpazioDati S.r.l.'
1969
+ url: 'https://spaziodati.eu/'
1970
+
1971
+ - regex: 'Bytespider'
1972
+ name: 'Bytespider'
1973
+ category: 'Search bot'
1974
+ url: 'https://bytedance.com/'
1975
+ producer:
1976
+ name: 'ByteDance Ltd.'
1977
+ url: 'https://bytedance.com/'
1978
+
1979
+ - regex: 'WikiDo'
1980
+ name: 'WikiDo'
1981
+ category: 'Search bot'
1982
+ url: 'https://www.wikido.com/'
1983
+ producer:
1984
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
1985
+ url: 'https://www.wikido.com/'
1986
+
1987
+ - regex: 'AwarioSmartBot'
1988
+ name: 'Awario'
1989
+ category: 'Search bot'
1990
+ url: 'https://awario.com/bots.html'
1991
+ producer:
1992
+ name: 'Awario'
1993
+ url: 'https://awario.com/'
1994
+
1995
+ - regex: 'AwarioRssBot'
1996
+ name: 'Awario'
1997
+ category: 'Feed Fetcher'
1998
+ url: 'https://awario.com/bots.html'
1999
+ producer:
2000
+ name: 'Awario'
2001
+ url: 'https://awario.com/'
2002
+
2003
+ - regex: 'oBot'
2004
+ name: 'oBot'
2005
+ category: 'Search bot'
2006
+ url: 'http://www.xforce-security.com/crawler/'
2007
+ producer:
2008
+ name: 'IBM Germany Research & Development GmbH'
2009
+ url: 'https://exchange.xforce.ibmcloud.com/'
2010
+
2011
+ - regex: 'SMTBot'
2012
+ name: 'SMTBot'
2013
+ category: 'Search bot'
2014
+ url: 'https://www.similartech.com/smtbot'
2015
+ producer:
2016
+ name: 'SimilarTech Ltd.'
2017
+ url: 'https://www.similartech.com/'
2018
+
2019
+ - regex: 'LCC'
2020
+ name: 'LCC'
2021
+ category: 'Search bot'
2022
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2023
+ producer:
2024
+ name: 'Universität Leipzig'
2025
+ url: 'https://www.uni-leipzig.de/'
2026
+
2027
+ - regex: 'Startpagina-Linkchecker'
2028
+ name: 'Startpagina Linkchecker'
2029
+ category: 'Search bot'
2030
+ url: 'https://www.startpagina.nl/linkchecker'
2031
+ producer:
2032
+ name: 'Startpagina B.V.'
2033
+ url: 'https://www.startpagina.nl/'
2034
+
2035
+ - regex: 'GTmetrix'
2036
+ name: 'GTmetrix'
2037
+ category: 'Crawler'
2038
+ url: 'https://gtmetrix.com/'
2039
+ producer:
2040
+ name: 'Carbon60 Operating Co. Ltd.'
2041
+ url: 'https://www.carbon60.com/'
1675
2042
 
1676
2043
  - regex: 'Nutch'
1677
2044
  name: 'Nutch-based Bot'
@@ -1681,5 +2048,61 @@
1681
2048
  name: 'The Apache Software Foundation'
1682
2049
  url: 'http://www.apache.org/foundation/'
1683
2050
 
1684
- - regex: '[a-z0-9\-_]*((?<!cu|power )bot(?! TAB| ?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
2051
+ - regex: 'Seobility'
2052
+ name: 'Seobility'
2053
+ category: 'Crawler'
2054
+ url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
2055
+
2056
+ - regex: 'Vercelbot'
2057
+ name: 'Vercel Bot'
2058
+ category: 'Service bot'
2059
+ url: 'https://vercel.com'
2060
+
2061
+ - regex: 'Grammarly'
2062
+ name: 'Grammarly'
2063
+ category: 'Service bot'
2064
+ url: 'http://www.grammarly.com'
2065
+
2066
+ - regex: 'Robozilla'
2067
+ name: 'Robozilla'
2068
+ category: 'Crawler'
2069
+
2070
+ - regex: 'Domains Project'
2071
+ name: 'Domains Project'
2072
+ category: 'Crawler'
2073
+ url: 'https://domainsproject.org'
2074
+
2075
+ - regex: 'PetalBot'
2076
+ name: 'Petal Bot'
2077
+ category: 'Crawler'
2078
+ url: 'https://aspiegel.com/petalbot'
2079
+
2080
+ - regex: 'SerendeputyBot'
2081
+ name: 'Serendeputy Bot'
2082
+ category: 'Crawler'
2083
+ url: 'http://serendeputy.com/about/serendeputy-bot'
2084
+
2085
+ - regex: 'ias-va.*admantx.*service-fetcher'
2086
+ name: 'ADmantX Service Fetcher'
2087
+ category: 'Service bot'
2088
+ url: 'https://www.admantx.com/service-fetcher.html'
2089
+
2090
+ - regex: 'SemanticScholarBot'
2091
+ name: 'Semantic Scholar Bot'
2092
+ category: 'Crawler'
2093
+ url: 'https://www.semanticscholar.org/crawler'
2094
+
2095
+ - regex: 'VelenPublicWebCrawler'
2096
+ name: 'Velen Public Web Crawler'
2097
+ category: 'Crawler'
2098
+ url: 'https://hunter.io/robot'
2099
+
2100
+ - regex: 'Barkrowler'
2101
+ name: 'Barkrowler'
2102
+ category: 'Crawler'
2103
+ url: 'http://www.exensa.com/crawl'
2104
+
2105
+ # Generic detections
2106
+
2107
+ - regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
1685
2108
  name: 'Generic Bot'