device_detector 1.0.1 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +49 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -10
- data/CHANGELOG.md +14 -1
- data/README.md +6 -6
- data/Rakefile +20 -13
- data/device_detector.gemspec +1 -0
- data/lib/device_detector.rb +30 -26
- data/lib/device_detector/bot.rb +2 -2
- data/lib/device_detector/client.rb +3 -2
- data/lib/device_detector/device.rb +46 -20
- data/lib/device_detector/memory_cache.rb +26 -19
- data/lib/device_detector/metadata_extractor.rb +7 -8
- data/lib/device_detector/model_extractor.rb +3 -3
- data/lib/device_detector/name_extractor.rb +2 -2
- data/lib/device_detector/os.rb +121 -111
- data/lib/device_detector/parser.rb +22 -9
- data/lib/device_detector/version.rb +3 -1
- data/lib/device_detector/version_extractor.rb +2 -3
- data/regexes/bots.yml +442 -19
- data/regexes/client/browser_engine.yml +7 -1
- data/regexes/client/browsers.yml +773 -103
- data/regexes/client/feed_readers.yml +14 -8
- data/regexes/client/libraries.yml +43 -2
- data/regexes/client/mediaplayers.yml +21 -5
- data/regexes/client/mobile_apps.yml +131 -1
- data/regexes/client/pim.yml +6 -1
- data/regexes/device/cameras.yml +1 -1
- data/regexes/device/car_browsers.yml +7 -3
- data/regexes/device/consoles.yml +3 -3
- data/regexes/device/mobiles.yml +11365 -791
- data/regexes/device/notebooks.yml +114 -0
- data/regexes/device/portable_media_player.yml +2 -2
- data/regexes/device/televisions.yml +17 -3
- data/regexes/oss.yml +115 -47
- data/regexes/vendorfragments.yml +6 -2
- data/spec/device_detector/concrete_user_agent_spec.rb +16 -17
- data/spec/device_detector/detector_fixtures_spec.rb +30 -35
- data/spec/device_detector/device_spec.rb +28 -48
- data/spec/device_detector/memory_cache_spec.rb +60 -28
- data/spec/device_detector/model_extractor_spec.rb +3 -3
- data/spec/device_detector/version_extractor_spec.rb +5 -6
- data/spec/device_detector_spec.rb +49 -78
- data/spec/fixtures/client/browser.yml +1521 -406
- data/spec/fixtures/client/feed_reader.yml +39 -51
- data/spec/fixtures/client/library.yml +72 -11
- data/spec/fixtures/client/mediaplayer.yml +29 -40
- data/spec/fixtures/client/mobile_app.yml +172 -32
- data/spec/fixtures/client/pim.yml +32 -19
- data/spec/fixtures/detector/bots.yml +854 -19
- data/spec/fixtures/detector/camera.yml +22 -2
- data/spec/fixtures/detector/car_browser.yml +60 -0
- data/spec/fixtures/detector/console.yml +43 -3
- data/spec/fixtures/detector/desktop.yml +2860 -1527
- data/spec/fixtures/detector/feature_phone.yml +69 -1
- data/spec/fixtures/detector/feed_reader.yml +158 -130
- data/spec/fixtures/detector/mediaplayer.yml +113 -39
- data/spec/fixtures/detector/mobile_apps.yml +262 -89
- data/spec/fixtures/detector/phablet.yml +3444 -663
- data/spec/fixtures/detector/portable_media_player.yml +57 -0
- data/spec/fixtures/detector/smart_speaker.yml +55 -0
- data/spec/fixtures/detector/smartphone-1.yml +4739 -4765
- data/spec/fixtures/detector/smartphone-10.yml +9973 -0
- data/spec/fixtures/detector/smartphone-11.yml +10015 -0
- data/spec/fixtures/detector/smartphone-12.yml +9897 -0
- data/spec/fixtures/detector/smartphone-13.yml +9912 -0
- data/spec/fixtures/detector/smartphone-14.yml +9935 -0
- data/spec/fixtures/detector/smartphone-15.yml +6595 -0
- data/spec/fixtures/detector/smartphone-16.yml +10021 -0
- data/spec/fixtures/detector/smartphone-17.yml +9408 -0
- data/spec/fixtures/detector/smartphone-2.yml +4265 -4238
- data/spec/fixtures/detector/smartphone-3.yml +4487 -4391
- data/spec/fixtures/detector/smartphone-4.yml +4210 -4179
- data/spec/fixtures/detector/smartphone-5.yml +5794 -2901
- data/spec/fixtures/detector/smartphone-6.yml +10114 -0
- data/spec/fixtures/detector/smartphone-7.yml +9975 -0
- data/spec/fixtures/detector/smartphone-8.yml +9897 -0
- data/spec/fixtures/detector/smartphone-9.yml +9880 -0
- data/spec/fixtures/detector/smartphone.yml +4152 -4048
- data/spec/fixtures/detector/tablet-1.yml +3997 -3991
- data/spec/fixtures/detector/tablet-2.yml +6820 -1935
- data/spec/fixtures/detector/tablet-3.yml +9968 -0
- data/spec/fixtures/detector/tablet-4.yml +7113 -0
- data/spec/fixtures/detector/tablet.yml +3789 -3804
- data/spec/fixtures/detector/tv.yml +3889 -1495
- data/spec/fixtures/detector/unknown.yml +45 -179
- data/spec/fixtures/detector/wearable.yml +61 -0
- data/spec/fixtures/device/camera.yml +4 -3
- data/spec/fixtures/device/car_browser.yml +9 -2
- data/spec/fixtures/device/console.yml +15 -14
- data/spec/fixtures/device/notebook.yml +7 -0
- data/spec/fixtures/parser/oss.yml +177 -0
- data/spec/fixtures/parser/vendorfragments.yml +6 -0
- metadata +57 -7
data/regexes/bots.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
###############
|
2
2
|
# Device Detector - The Universal Device Detection library for parsing User Agents
|
3
3
|
#
|
4
|
-
# @link
|
4
|
+
# @link https://matomo.org
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
@@ -53,6 +53,21 @@
|
|
53
53
|
name: 'Alexa Internet'
|
54
54
|
url: 'http://www.alexa.com'
|
55
55
|
|
56
|
+
- regex: 'alexa site audit'
|
57
|
+
name: 'Alexa Site Audit'
|
58
|
+
category: 'Site Monitor'
|
59
|
+
url: 'http://www.alexa.com/help/webmasters'
|
60
|
+
producer:
|
61
|
+
name: 'Alexa Internet'
|
62
|
+
url: 'http://www.alexa.com'
|
63
|
+
|
64
|
+
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
65
|
+
name: 'Amazon Route53 Health Check'
|
66
|
+
category: 'Service Agent'
|
67
|
+
producer:
|
68
|
+
name: 'Amazon Web Services'
|
69
|
+
url: 'https://aws.amazon.com/'
|
70
|
+
|
56
71
|
- regex: 'AmorankSpider'
|
57
72
|
name: 'Amorank Spider'
|
58
73
|
category: 'Crawler'
|
@@ -77,6 +92,22 @@
|
|
77
92
|
name: 'Apple Inc'
|
78
93
|
url: 'http://www.apple.com'
|
79
94
|
|
95
|
+
- regex: 'Arachni'
|
96
|
+
name: 'Arachni'
|
97
|
+
category: 'Security Checker'
|
98
|
+
url: 'http://www.arachni-scanner.com'
|
99
|
+
producer:
|
100
|
+
name: 'Sarosys LLC'
|
101
|
+
url: 'http://www.sarosys.com/'
|
102
|
+
|
103
|
+
- regex: 'AspiegelBot'
|
104
|
+
name: 'AspiegelBot'
|
105
|
+
category: 'Crawler'
|
106
|
+
url: 'https://aspiegel.com/'
|
107
|
+
producer:
|
108
|
+
name: 'Huawei'
|
109
|
+
url: 'https://www.huawei.com/'
|
110
|
+
|
80
111
|
- regex: 'Castro 2, Episode Duration Lookup'
|
81
112
|
name: 'Castro 2'
|
82
113
|
category: 'Service Agent'
|
@@ -181,6 +212,13 @@
|
|
181
212
|
name: 'Blogtrottr Ltd'
|
182
213
|
url: 'https://blogtrottr.com/'
|
183
214
|
|
215
|
+
- regex: 'BoardReader Blog Indexer'
|
216
|
+
name: 'BoardReader Blog Indexer'
|
217
|
+
category: 'Crawler'
|
218
|
+
producer:
|
219
|
+
name: 'BoardReader'
|
220
|
+
url: 'http://boardreader.com/'
|
221
|
+
|
184
222
|
- regex: 'BountiiBot'
|
185
223
|
name: 'Bountii Bot'
|
186
224
|
category: 'Search bot'
|
@@ -253,13 +291,13 @@
|
|
253
291
|
name: 'CloudFlare'
|
254
292
|
url: 'http://www.cloudflare.com'
|
255
293
|
|
256
|
-
- regex: 'coccoc
|
294
|
+
- regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
|
257
295
|
name: 'Cốc Cốc Bot'
|
258
|
-
url: '
|
296
|
+
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
259
297
|
category: 'Search bot'
|
260
298
|
producer:
|
261
299
|
name: 'Cốc Cốc'
|
262
|
-
url: '
|
300
|
+
url: 'https://coccoc.com/'
|
263
301
|
|
264
302
|
- regex: 'collectd'
|
265
303
|
name: 'Collectd'
|
@@ -293,6 +331,15 @@
|
|
293
331
|
name: 'Datadog'
|
294
332
|
url: 'https://www.datadoghq.com/'
|
295
333
|
|
334
|
+
- regex: 'Datanyze'
|
335
|
+
name: 'Datanyze'
|
336
|
+
url: ''
|
337
|
+
category: 'Crawler'
|
338
|
+
producer:
|
339
|
+
name: 'Datanyze'
|
340
|
+
url: 'https://www.datanyze.com'
|
341
|
+
|
342
|
+
|
296
343
|
- regex: 'Dataprovider'
|
297
344
|
name: 'Dataprovider'
|
298
345
|
category: 'Crawler'
|
@@ -341,7 +388,7 @@
|
|
341
388
|
name: 'SEOmoz, Inc.'
|
342
389
|
url: 'http://moz.com/'
|
343
390
|
|
344
|
-
- regex: 'DuckDuck'
|
391
|
+
- regex: 'DuckDuck(?:Go-Favicons-)?Bot'
|
345
392
|
name: 'DuckDuckGo Bot'
|
346
393
|
category: 'Search bot'
|
347
394
|
url: 'https://duckduckgo.com/duckduckbot'
|
@@ -357,6 +404,13 @@
|
|
357
404
|
name: 'easou ICP'
|
358
405
|
url: 'http://www.easou.com'
|
359
406
|
|
407
|
+
- regex: 'eCairn-Grabber'
|
408
|
+
name: 'eCairn-Grabber'
|
409
|
+
category: 'Crawler'
|
410
|
+
producer:
|
411
|
+
name: 'eCairn'
|
412
|
+
url: 'https://ecairn.com'
|
413
|
+
|
360
414
|
- regex: 'EMail Exractor'
|
361
415
|
name: 'EMail Exractor'
|
362
416
|
category: 'Crawler'
|
@@ -397,7 +451,7 @@
|
|
397
451
|
name: 'SEOmoz, Inc.'
|
398
452
|
url: 'http://moz.com/'
|
399
453
|
|
400
|
-
- regex: 'facebookexternalhit|facebookplatform'
|
454
|
+
- regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
|
401
455
|
name: 'Facebook External Hit'
|
402
456
|
category: 'Social Media Agent'
|
403
457
|
url: 'https://www.facebook.com/externalhit_uatext.php'
|
@@ -466,6 +520,11 @@
|
|
466
520
|
category: 'Crawler'
|
467
521
|
url: 'http://www.findxbot.com'
|
468
522
|
|
523
|
+
- regex: 'FreshRSS'
|
524
|
+
name: 'FreshRSS'
|
525
|
+
category: 'Feed Fetcher'
|
526
|
+
url: 'https://freshrss.org/'
|
527
|
+
|
469
528
|
- regex: 'Genieo'
|
470
529
|
name: 'Genieo Web filter'
|
471
530
|
category: ''
|
@@ -498,6 +557,18 @@
|
|
498
557
|
name: 'NTT Resonant'
|
499
558
|
url: 'http://goo.ne.jp'
|
500
559
|
|
560
|
+
- regex: 'Google Favicon'
|
561
|
+
name: 'Google Favicon'
|
562
|
+
category: 'Crawler'
|
563
|
+
|
564
|
+
- regex: 'Google Search Console'
|
565
|
+
name: 'Google Search Console'
|
566
|
+
category: 'Crawler'
|
567
|
+
url: 'https://search.google.com/search-console/about'
|
568
|
+
producer:
|
569
|
+
name: 'Google Inc.'
|
570
|
+
url: 'http://www.google.com'
|
571
|
+
|
501
572
|
- regex: 'Google Page Speed Insights'
|
502
573
|
name: 'Google PageSpeed Insights'
|
503
574
|
category: 'Site Monitor'
|
@@ -514,6 +585,14 @@
|
|
514
585
|
name: 'Google Inc.'
|
515
586
|
url: 'http://www.google.com'
|
516
587
|
|
588
|
+
- regex: 'Google-Cloud-Scheduler'
|
589
|
+
name: 'Google Cloud Scheduler'
|
590
|
+
category: 'Crawler'
|
591
|
+
url: 'https://cloud.google.com/scheduler'
|
592
|
+
producer:
|
593
|
+
name: 'Google Inc.'
|
594
|
+
url: 'https://www.google.com'
|
595
|
+
|
517
596
|
- regex: 'Google-Structured-Data-Testing-Tool'
|
518
597
|
name: 'Google Structured Data Testing Tool'
|
519
598
|
category: 'Validator'
|
@@ -522,6 +601,14 @@
|
|
522
601
|
name: 'Google Inc.'
|
523
602
|
url: 'http://www.google.com'
|
524
603
|
|
604
|
+
- regex: 'GoogleStackdriverMonitoring'
|
605
|
+
name: 'Google Stackdriver Monitoring'
|
606
|
+
category: 'Site Monitor'
|
607
|
+
url: 'https://cloud.google.com/monitoring'
|
608
|
+
producer:
|
609
|
+
name: 'Google Inc.'
|
610
|
+
url: 'https://www.google.com'
|
611
|
+
|
525
612
|
- regex: 'via ggpht\.com GoogleImageProxy'
|
526
613
|
name: 'Gmail Image Proxy'
|
527
614
|
category: 'Crawler'
|
@@ -529,7 +616,7 @@
|
|
529
616
|
producer:
|
530
617
|
name: 'Google Inc.'
|
531
618
|
url: 'http://www.google.com'
|
532
|
-
|
619
|
+
|
533
620
|
- regex: 'SeznamEmailProxy'
|
534
621
|
name: 'Seznam Email Proxy'
|
535
622
|
category: 'Crawler'
|
@@ -562,7 +649,7 @@
|
|
562
649
|
name: 'Visual Meta'
|
563
650
|
url: 'https://www.shopalike.cz/'
|
564
651
|
|
565
|
-
- regex: '
|
652
|
+
- regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
|
566
653
|
name: 'Googlebot'
|
567
654
|
category: 'Search bot'
|
568
655
|
url: 'http://www.google.com/bot.html'
|
@@ -602,6 +689,11 @@
|
|
602
689
|
name: ''
|
603
690
|
url: ''
|
604
691
|
|
692
|
+
- regex: 'inoreader.com'
|
693
|
+
name: 'inoreader'
|
694
|
+
category: 'Feed Reader'
|
695
|
+
url: 'https://www.inoreader.com'
|
696
|
+
|
605
697
|
- regex: 'iisbot'
|
606
698
|
name: 'IIS Site Analysis'
|
607
699
|
category: 'Crawler'
|
@@ -704,6 +796,10 @@
|
|
704
796
|
name: 'Robert Graham'
|
705
797
|
url: 'https://github.com/robertdavidgraham'
|
706
798
|
|
799
|
+
- regex: 'Mastodon/'
|
800
|
+
name: 'Mastodon Bot'
|
801
|
+
category: 'Social Media Agent'
|
802
|
+
|
707
803
|
- regex: 'meanpathbot'
|
708
804
|
name: 'Meanpath Bot'
|
709
805
|
category: 'Search bot'
|
@@ -780,6 +876,10 @@
|
|
780
876
|
name: 'Nagios Plugins Development Team'
|
781
877
|
url: 'https://nagios.org'
|
782
878
|
|
879
|
+
- regex: 'nbertaupete95\(at\)gmail.com'
|
880
|
+
name: 'nbertaupete95'
|
881
|
+
category: 'Crawler'
|
882
|
+
|
783
883
|
- regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
|
784
884
|
name: 'Netcraft Survey Bot'
|
785
885
|
category: 'Search bot'
|
@@ -790,7 +890,7 @@
|
|
790
890
|
|
791
891
|
- regex: 'netEstate NE Crawler'
|
792
892
|
name: 'netEstate'
|
793
|
-
category: '
|
893
|
+
category: 'Crawler'
|
794
894
|
url: 'http://www.website-datenbank.de/Impressum'
|
795
895
|
producer:
|
796
896
|
name: 'netEstate GmbH'
|
@@ -836,10 +936,17 @@
|
|
836
936
|
name: 'Nmap'
|
837
937
|
url: 'https://nmap.org/'
|
838
938
|
|
939
|
+
- regex: 'Nuzzel'
|
940
|
+
name: 'Nuzzel'
|
941
|
+
category: 'Crawler'
|
942
|
+
producer:
|
943
|
+
name: 'Nuzzel'
|
944
|
+
url: https://www.nuzzel.com/
|
945
|
+
|
839
946
|
- regex: 'Octopus [0-9]'
|
840
947
|
name: 'Octopus'
|
841
948
|
|
842
|
-
- regex: '
|
949
|
+
- regex: 'omgili(?:bot)?'
|
843
950
|
name: 'Omgili bot'
|
844
951
|
category: 'Search bot'
|
845
952
|
url: 'http://www.omgili.com/Crawler.html'
|
@@ -931,7 +1038,15 @@
|
|
931
1038
|
producer:
|
932
1039
|
name: 'Pingdom AB'
|
933
1040
|
url: 'https://www.pingdom.com'
|
934
|
-
|
1041
|
+
|
1042
|
+
- regex: 'Quora Link Preview'
|
1043
|
+
name: 'Quora Link Preview'
|
1044
|
+
category: 'Crawler'
|
1045
|
+
url: ''
|
1046
|
+
producer:
|
1047
|
+
name: 'Quora'
|
1048
|
+
url: 'http://www.quora.com'
|
1049
|
+
|
935
1050
|
- regex: 'RamblerMail'
|
936
1051
|
name: 'RamblerMail Image Proxy'
|
937
1052
|
category: 'Crawler'
|
@@ -1075,6 +1190,14 @@
|
|
1075
1190
|
name: 'Seznam.cz, a.s.'
|
1076
1191
|
url: 'http://www.seznam.cz/'
|
1077
1192
|
|
1193
|
+
- regex: 'shopify-partner-homepage-scraper'
|
1194
|
+
name: 'Shopify Partner'
|
1195
|
+
category: 'Crawler'
|
1196
|
+
url: 'https://www.shopify.com/partners'
|
1197
|
+
producer:
|
1198
|
+
name: 'Shopify'
|
1199
|
+
url: 'https://www.shopify.com/'
|
1200
|
+
|
1078
1201
|
- regex: 'ShopWiki'
|
1079
1202
|
name: 'ShopWiki'
|
1080
1203
|
category: 'Search tools'
|
@@ -1107,6 +1230,14 @@
|
|
1107
1230
|
name: 'SISTRIX GmbH'
|
1108
1231
|
url: 'http://www.sistrix.de'
|
1109
1232
|
|
1233
|
+
- regex: 'compatible; (?:SISTRIX )?Optimizer'
|
1234
|
+
name: 'SISTRIX Optimizer'
|
1235
|
+
category: 'Crawler'
|
1236
|
+
url: 'https://optimizer.sistrix.com'
|
1237
|
+
producer:
|
1238
|
+
name: 'SISTRIX GmbH'
|
1239
|
+
url: 'http://www.sistrix.de'
|
1240
|
+
|
1110
1241
|
- regex: 'SiteSucker'
|
1111
1242
|
name: 'SiteSucker'
|
1112
1243
|
category: 'Crawler'
|
@@ -1211,7 +1342,7 @@
|
|
1211
1342
|
category: 'Search bot'
|
1212
1343
|
|
1213
1344
|
- regex: 'TelegramBot'
|
1214
|
-
name: '
|
1345
|
+
name: 'TelegramBot'
|
1215
1346
|
url: 'https://telegram.org/blog/bot-revolution'
|
1216
1347
|
|
1217
1348
|
- regex: 'TLSProbe'
|
@@ -1238,6 +1369,11 @@
|
|
1238
1369
|
name: ''
|
1239
1370
|
url: ''
|
1240
1371
|
|
1372
|
+
- regex: 'theoldreader.com'
|
1373
|
+
name: 'theoldreader'
|
1374
|
+
category: 'Feed Reader'
|
1375
|
+
url: 'https://theoldreader.com'
|
1376
|
+
|
1241
1377
|
- regex: 'trendictionbot'
|
1242
1378
|
name: 'Trendiction Bot'
|
1243
1379
|
category: 'Crawler'
|
@@ -1270,6 +1406,13 @@
|
|
1270
1406
|
name: 'Mediasift'
|
1271
1407
|
url: ''
|
1272
1408
|
|
1409
|
+
- regex: 'Twingly Recon'
|
1410
|
+
name: 'Twingly Recon'
|
1411
|
+
category: 'Crawler'
|
1412
|
+
producer:
|
1413
|
+
name: 'Twingly'
|
1414
|
+
url: 'https://www.twingly.com'
|
1415
|
+
|
1273
1416
|
- regex: 'Twitterbot'
|
1274
1417
|
name: 'Twitterbot'
|
1275
1418
|
category: 'Social Media Agent'
|
@@ -1326,6 +1469,14 @@
|
|
1326
1469
|
name: 'WiseGuys'
|
1327
1470
|
url: 'http://www.wise-guys.nl/'
|
1328
1471
|
|
1472
|
+
- regex: 'vkShare; '
|
1473
|
+
name: 'VK Share Button'
|
1474
|
+
category: 'Crawler'
|
1475
|
+
url: 'http://vk.com/dev/Share'
|
1476
|
+
producer:
|
1477
|
+
name: 'VK'
|
1478
|
+
url: 'http://vk.com/'
|
1479
|
+
|
1329
1480
|
- regex: 'VSMCrawler'
|
1330
1481
|
name: 'Visual Site Mapper Crawler'
|
1331
1482
|
category: 'Crawler'
|
@@ -1389,6 +1540,11 @@
|
|
1389
1540
|
name: 'AliasIO'
|
1390
1541
|
url: 'https://github.com/AliasIO'
|
1391
1542
|
|
1543
|
+
- regex: 'PTST/'
|
1544
|
+
name: 'WebPageTest'
|
1545
|
+
category: 'Site Monitor'
|
1546
|
+
url: 'https://www.webpagetest.org'
|
1547
|
+
|
1392
1548
|
- regex: 'WeSEE(:Search)?'
|
1393
1549
|
name: 'WeSEE:Search'
|
1394
1550
|
category: 'Search bot'
|
@@ -1429,6 +1585,14 @@
|
|
1429
1585
|
name: 'Wotbox'
|
1430
1586
|
url: 'http://www.wotbox.com'
|
1431
1587
|
|
1588
|
+
- regex: 'XenForo'
|
1589
|
+
name: 'XenForo'
|
1590
|
+
category: 'Service Agent'
|
1591
|
+
url: 'https://xenforo.com/'
|
1592
|
+
producer:
|
1593
|
+
name: 'XenForo Ltd.'
|
1594
|
+
url: 'https://xenforo.com/'
|
1595
|
+
|
1432
1596
|
- regex: 'yacybot'
|
1433
1597
|
name: 'YaCy'
|
1434
1598
|
category: 'Search bot'
|
@@ -1461,7 +1625,15 @@
|
|
1461
1625
|
name: 'Yahoo! Inc.'
|
1462
1626
|
url: 'http://www.yahoo.com'
|
1463
1627
|
|
1464
|
-
- regex: '
|
1628
|
+
- regex: 'Y!J-BRW'
|
1629
|
+
name: 'Yahoo! Japan BRW'
|
1630
|
+
category: 'Crawler'
|
1631
|
+
url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
|
1632
|
+
producer:
|
1633
|
+
name: 'Yahoo! Japan Corp.'
|
1634
|
+
url: 'https://www.yahoo.co.jp/'
|
1635
|
+
|
1636
|
+
- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
|
1465
1637
|
name: 'Yandex Bot'
|
1466
1638
|
category: 'Search bot'
|
1467
1639
|
url: 'http://www.yandex.com/bots'
|
@@ -1469,7 +1641,7 @@
|
|
1469
1641
|
name: 'Yandex LLC'
|
1470
1642
|
url: 'http://company.yandex.com'
|
1471
1643
|
|
1472
|
-
- regex: 'Yeti'
|
1644
|
+
- regex: 'Yeti|NaverJapan'
|
1473
1645
|
name: 'Yeti/Naverbot'
|
1474
1646
|
category: 'Search bot'
|
1475
1647
|
url: 'http://help.naver.com/robots/'
|
@@ -1551,9 +1723,9 @@
|
|
1551
1723
|
name: 'HubPages'
|
1552
1724
|
url: 'http://hubpages.com/'
|
1553
1725
|
|
1554
|
-
- regex: 'Pinterest
|
1726
|
+
- regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
|
1555
1727
|
name: 'Pinterest'
|
1556
|
-
url: ''
|
1728
|
+
url: 'http://www.pinterest.com/bot.html'
|
1557
1729
|
category: 'Crawler'
|
1558
1730
|
producer:
|
1559
1731
|
name: 'Pinterest'
|
@@ -1567,6 +1739,14 @@
|
|
1567
1739
|
name: 'Site24x7'
|
1568
1740
|
url: 'https://www.site24x7.com'
|
1569
1741
|
|
1742
|
+
- regex: 's~snapchat-proxy'
|
1743
|
+
name: 'Snapchat Proxy'
|
1744
|
+
category: 'Crawler'
|
1745
|
+
url: 'https://www.snapchat.com'
|
1746
|
+
producer:
|
1747
|
+
name: 'Snapchat Inc.'
|
1748
|
+
url: 'https://www.snapchat.com'
|
1749
|
+
|
1570
1750
|
- regex: "Let's Encrypt validation server"
|
1571
1751
|
name: "Let's Encrypt Validation"
|
1572
1752
|
category: 'Service Agent'
|
@@ -1662,7 +1842,10 @@
|
|
1662
1842
|
- regex: 'Server Density Service Monitoring.*'
|
1663
1843
|
name: 'Server Density'
|
1664
1844
|
|
1665
|
-
- regex: '
|
1845
|
+
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
1846
|
+
name: 'RSSRadio Bot'
|
1847
|
+
|
1848
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
|
1666
1849
|
name: 'Generic Bot'
|
1667
1850
|
|
1668
1851
|
- regex: '^sentry'
|
@@ -1671,7 +1854,191 @@
|
|
1671
1854
|
name: 'Sentry'
|
1672
1855
|
url: 'https://sentry.io'
|
1673
1856
|
|
1674
|
-
|
1857
|
+
- regex: '^Spotify'
|
1858
|
+
name: 'Spotify'
|
1859
|
+
producer:
|
1860
|
+
name: 'Spotify'
|
1861
|
+
url: 'https://www.spotify.com'
|
1862
|
+
|
1863
|
+
- regex: 'The Knowledge AI'
|
1864
|
+
name: 'The Knowledge AI'
|
1865
|
+
category: 'Crawler'
|
1866
|
+
|
1867
|
+
- regex: 'Embedly'
|
1868
|
+
name: 'Embedly'
|
1869
|
+
category: 'Crawler'
|
1870
|
+
url: 'https://support.embed.ly/hc/en-us'
|
1871
|
+
producer:
|
1872
|
+
name: 'A Medium, Corp.'
|
1873
|
+
url: 'https://medium.com/'
|
1874
|
+
|
1875
|
+
- regex: 'BrandVerity'
|
1876
|
+
name: 'BrandVerity'
|
1877
|
+
category: 'Crawler'
|
1878
|
+
url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
|
1879
|
+
producer:
|
1880
|
+
name: 'BrandVerity, Inc.'
|
1881
|
+
url: 'https://www.brandverity.com/'
|
1882
|
+
|
1883
|
+
- regex: 'Kaspersky Lab CFR link resolver'
|
1884
|
+
name: 'Kaspersky'
|
1885
|
+
category: 'Security Checker'
|
1886
|
+
url: 'https://www.kaspersky.com/'
|
1887
|
+
producer:
|
1888
|
+
name: 'AO Kaspersky Lab'
|
1889
|
+
url: 'https://www.kaspersky.com/'
|
1890
|
+
|
1891
|
+
- regex: 'eZ Publish Link Validator'
|
1892
|
+
name: 'eZ Publish Link Validator'
|
1893
|
+
category: 'Crawler'
|
1894
|
+
url: 'https://ez.no/'
|
1895
|
+
producer:
|
1896
|
+
name: 'eZ Systems AS'
|
1897
|
+
url: 'https://ez.no/'
|
1898
|
+
|
1899
|
+
- regex: 'woorankreview'
|
1900
|
+
name: 'WooRank'
|
1901
|
+
category: 'Search bot'
|
1902
|
+
url: 'https://www.woorank.com/'
|
1903
|
+
producer:
|
1904
|
+
name: 'WooRank sprl'
|
1905
|
+
url: 'https://www.woorank.com/'
|
1906
|
+
|
1907
|
+
- regex: '(Match|LinkCheck) by Siteimprove.com'
|
1908
|
+
name: 'Siteimprove'
|
1909
|
+
category: 'Search bot'
|
1910
|
+
url: 'https://siteimprove.com/'
|
1911
|
+
producer:
|
1912
|
+
name: 'Siteimprove GmbH'
|
1913
|
+
url: 'https://siteimprove.com/'
|
1914
|
+
|
1915
|
+
- regex: 'CATExplorador'
|
1916
|
+
name: 'CATExplorador'
|
1917
|
+
category: 'Search bot'
|
1918
|
+
url: 'https://fundacio.cat/ca/domini/'
|
1919
|
+
producer:
|
1920
|
+
name: 'Fundació puntCAT'
|
1921
|
+
url: 'https://fundacio.cat/ca/domini/'
|
1922
|
+
|
1923
|
+
- regex: 'Buck'
|
1924
|
+
name: 'Buck'
|
1925
|
+
category: 'Search bot'
|
1926
|
+
url: 'https://hypefactors.com/'
|
1927
|
+
producer:
|
1928
|
+
name: 'Hypefactors A/S'
|
1929
|
+
url: 'https://hypefactors.com/'
|
1930
|
+
|
1931
|
+
- regex: 'tracemyfile'
|
1932
|
+
name: 'TraceMyFile'
|
1933
|
+
category: 'Search bot'
|
1934
|
+
url: 'https://www.tracemyfile.com/'
|
1935
|
+
producer:
|
1936
|
+
name: 'Idee Inc.'
|
1937
|
+
url: 'http://ideeinc.com/'
|
1938
|
+
|
1939
|
+
- regex: 'zelist.ro feed parser'
|
1940
|
+
name: 'Ze List'
|
1941
|
+
url: 'https://www.zelist.ro/'
|
1942
|
+
category: 'Feed Fetcher'
|
1943
|
+
producer:
|
1944
|
+
name: 'Treeworks SRL'
|
1945
|
+
url: 'https://www.tree.ro/'
|
1946
|
+
|
1947
|
+
- regex: 'weborama-fetcher'
|
1948
|
+
name: 'Weborama'
|
1949
|
+
category: 'Search bot'
|
1950
|
+
url: 'https://weborama.com/'
|
1951
|
+
producer:
|
1952
|
+
name: 'Weborama SA'
|
1953
|
+
url: 'https://weborama.com/'
|
1954
|
+
|
1955
|
+
- regex: 'BoardReader Favicon Fetcher'
|
1956
|
+
name: 'BoardReader'
|
1957
|
+
category: 'Search bot'
|
1958
|
+
url: 'http://boardreader.com/'
|
1959
|
+
producer:
|
1960
|
+
name: 'Effyis Inc'
|
1961
|
+
url: 'http://boardreader.com/'
|
1962
|
+
|
1963
|
+
- regex: 'IDG/IT'
|
1964
|
+
name: 'IDG/IT'
|
1965
|
+
category: 'Search bot'
|
1966
|
+
url: 'https://spaziodati.eu/'
|
1967
|
+
producer:
|
1968
|
+
name: 'SpazioDati S.r.l.'
|
1969
|
+
url: 'https://spaziodati.eu/'
|
1970
|
+
|
1971
|
+
- regex: 'Bytespider'
|
1972
|
+
name: 'Bytespider'
|
1973
|
+
category: 'Search bot'
|
1974
|
+
url: 'https://bytedance.com/'
|
1975
|
+
producer:
|
1976
|
+
name: 'ByteDance Ltd.'
|
1977
|
+
url: 'https://bytedance.com/'
|
1978
|
+
|
1979
|
+
- regex: 'WikiDo'
|
1980
|
+
name: 'WikiDo'
|
1981
|
+
category: 'Search bot'
|
1982
|
+
url: 'https://www.wikido.com/'
|
1983
|
+
producer:
|
1984
|
+
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
|
1985
|
+
url: 'https://www.wikido.com/'
|
1986
|
+
|
1987
|
+
- regex: 'AwarioSmartBot'
|
1988
|
+
name: 'Awario'
|
1989
|
+
category: 'Search bot'
|
1990
|
+
url: 'https://awario.com/bots.html'
|
1991
|
+
producer:
|
1992
|
+
name: 'Awario'
|
1993
|
+
url: 'https://awario.com/'
|
1994
|
+
|
1995
|
+
- regex: 'AwarioRssBot'
|
1996
|
+
name: 'Awario'
|
1997
|
+
category: 'Feed Fetcher'
|
1998
|
+
url: 'https://awario.com/bots.html'
|
1999
|
+
producer:
|
2000
|
+
name: 'Awario'
|
2001
|
+
url: 'https://awario.com/'
|
2002
|
+
|
2003
|
+
- regex: 'oBot'
|
2004
|
+
name: 'oBot'
|
2005
|
+
category: 'Search bot'
|
2006
|
+
url: 'http://www.xforce-security.com/crawler/'
|
2007
|
+
producer:
|
2008
|
+
name: 'IBM Germany Research & Development GmbH'
|
2009
|
+
url: 'https://exchange.xforce.ibmcloud.com/'
|
2010
|
+
|
2011
|
+
- regex: 'SMTBot'
|
2012
|
+
name: 'SMTBot'
|
2013
|
+
category: 'Search bot'
|
2014
|
+
url: 'https://www.similartech.com/smtbot'
|
2015
|
+
producer:
|
2016
|
+
name: 'SimilarTech Ltd.'
|
2017
|
+
url: 'https://www.similartech.com/'
|
2018
|
+
|
2019
|
+
- regex: 'LCC'
|
2020
|
+
name: 'LCC'
|
2021
|
+
category: 'Search bot'
|
2022
|
+
url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
|
2023
|
+
producer:
|
2024
|
+
name: 'Universität Leipzig'
|
2025
|
+
url: 'https://www.uni-leipzig.de/'
|
2026
|
+
|
2027
|
+
- regex: 'Startpagina-Linkchecker'
|
2028
|
+
name: 'Startpagina Linkchecker'
|
2029
|
+
category: 'Search bot'
|
2030
|
+
url: 'https://www.startpagina.nl/linkchecker'
|
2031
|
+
producer:
|
2032
|
+
name: 'Startpagina B.V.'
|
2033
|
+
url: 'https://www.startpagina.nl/'
|
2034
|
+
|
2035
|
+
- regex: 'GTmetrix'
|
2036
|
+
name: 'GTmetrix'
|
2037
|
+
category: 'Crawler'
|
2038
|
+
url: 'https://gtmetrix.com/'
|
2039
|
+
producer:
|
2040
|
+
name: 'Carbon60 Operating Co. Ltd.'
|
2041
|
+
url: 'https://www.carbon60.com/'
|
1675
2042
|
|
1676
2043
|
- regex: 'Nutch'
|
1677
2044
|
name: 'Nutch-based Bot'
|
@@ -1681,5 +2048,61 @@
|
|
1681
2048
|
name: 'The Apache Software Foundation'
|
1682
2049
|
url: 'http://www.apache.org/foundation/'
|
1683
2050
|
|
1684
|
-
- regex: '
|
2051
|
+
- regex: 'Seobility'
|
2052
|
+
name: 'Seobility'
|
2053
|
+
category: 'Crawler'
|
2054
|
+
url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
|
2055
|
+
|
2056
|
+
- regex: 'Vercelbot'
|
2057
|
+
name: 'Vercel Bot'
|
2058
|
+
category: 'Service bot'
|
2059
|
+
url: 'https://vercel.com'
|
2060
|
+
|
2061
|
+
- regex: 'Grammarly'
|
2062
|
+
name: 'Grammarly'
|
2063
|
+
category: 'Service bot'
|
2064
|
+
url: 'http://www.grammarly.com'
|
2065
|
+
|
2066
|
+
- regex: 'Robozilla'
|
2067
|
+
name: 'Robozilla'
|
2068
|
+
category: 'Crawler'
|
2069
|
+
|
2070
|
+
- regex: 'Domains Project'
|
2071
|
+
name: 'Domains Project'
|
2072
|
+
category: 'Crawler'
|
2073
|
+
url: 'https://domainsproject.org'
|
2074
|
+
|
2075
|
+
- regex: 'PetalBot'
|
2076
|
+
name: 'Petal Bot'
|
2077
|
+
category: 'Crawler'
|
2078
|
+
url: 'https://aspiegel.com/petalbot'
|
2079
|
+
|
2080
|
+
- regex: 'SerendeputyBot'
|
2081
|
+
name: 'Serendeputy Bot'
|
2082
|
+
category: 'Crawler'
|
2083
|
+
url: 'http://serendeputy.com/about/serendeputy-bot'
|
2084
|
+
|
2085
|
+
- regex: 'ias-va.*admantx.*service-fetcher'
|
2086
|
+
name: 'ADmantX Service Fetcher'
|
2087
|
+
category: 'Service bot'
|
2088
|
+
url: 'https://www.admantx.com/service-fetcher.html'
|
2089
|
+
|
2090
|
+
- regex: 'SemanticScholarBot'
|
2091
|
+
name: 'Semantic Scholar Bot'
|
2092
|
+
category: 'Crawler'
|
2093
|
+
url: 'https://www.semanticscholar.org/crawler'
|
2094
|
+
|
2095
|
+
- regex: 'VelenPublicWebCrawler'
|
2096
|
+
name: 'Velen Public Web Crawler'
|
2097
|
+
category: 'Crawler'
|
2098
|
+
url: 'https://hunter.io/robot'
|
2099
|
+
|
2100
|
+
- regex: 'Barkrowler'
|
2101
|
+
name: 'Barkrowler'
|
2102
|
+
category: 'Crawler'
|
2103
|
+
url: 'http://www.exensa.com/crawl'
|
2104
|
+
|
2105
|
+
# Generic detections
|
2106
|
+
|
2107
|
+
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
1685
2108
|
name: 'Generic Bot'
|