device_detector 0.9.1 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +49 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -9
- data/CHANGELOG.md +16 -3
- data/README.md +7 -9
- data/Rakefile +19 -13
- data/device_detector.gemspec +1 -0
- data/lib/device_detector.rb +32 -28
- data/lib/device_detector/bot.rb +2 -2
- data/lib/device_detector/client.rb +3 -2
- data/lib/device_detector/device.rb +44 -21
- data/lib/device_detector/memory_cache.rb +26 -19
- data/lib/device_detector/metadata_extractor.rb +7 -8
- data/lib/device_detector/model_extractor.rb +3 -3
- data/lib/device_detector/name_extractor.rb +2 -2
- data/lib/device_detector/os.rb +121 -111
- data/lib/device_detector/parser.rb +22 -9
- data/lib/device_detector/version.rb +3 -1
- data/lib/device_detector/version_extractor.rb +2 -3
- data/regexes/bots.yml +840 -20
- data/regexes/client/browser_engine.yml +11 -2
- data/regexes/client/browsers.yml +909 -108
- data/regexes/client/feed_readers.yml +38 -2
- data/regexes/client/libraries.yml +76 -2
- data/regexes/client/mediaplayers.yml +25 -5
- data/regexes/client/mobile_apps.yml +167 -2
- data/regexes/client/pim.yml +10 -1
- data/regexes/device/cameras.yml +1 -1
- data/regexes/device/car_browsers.yml +7 -3
- data/regexes/device/consoles.yml +3 -3
- data/regexes/device/mobiles.yml +10123 -465
- data/regexes/device/portable_media_player.yml +4 -6
- data/regexes/device/televisions.yml +18 -4
- data/regexes/oss.yml +115 -21
- data/regexes/vendorfragments.yml +6 -2
- data/spec/device_detector/concrete_user_agent_spec.rb +16 -17
- data/spec/device_detector/detector_fixtures_spec.rb +51 -11
- data/spec/device_detector/device_spec.rb +28 -48
- data/spec/device_detector/memory_cache_spec.rb +60 -28
- data/spec/device_detector/model_extractor_spec.rb +3 -3
- data/spec/device_detector/version_extractor_spec.rb +5 -6
- data/spec/device_detector_spec.rb +60 -69
- data/spec/fixtures/client/browser.yml +1785 -262
- data/spec/fixtures/client/feed_reader.yml +47 -35
- data/spec/fixtures/client/library.yml +112 -3
- data/spec/fixtures/client/mediaplayer.yml +32 -37
- data/spec/fixtures/client/mobile_app.yml +193 -6
- data/spec/fixtures/client/pim.yml +37 -18
- data/spec/fixtures/detector/bots.yml +1426 -118
- data/spec/fixtures/detector/camera.yml +36 -10
- data/spec/fixtures/detector/car_browser.yml +64 -3
- data/spec/fixtures/detector/console.yml +80 -26
- data/spec/fixtures/detector/desktop.yml +2222 -1589
- data/spec/fixtures/detector/feature_phone.yml +151 -42
- data/spec/fixtures/detector/feed_reader.yml +186 -121
- data/spec/fixtures/detector/mediaplayer.yml +113 -39
- data/spec/fixtures/detector/mobile_apps.yml +366 -21
- data/spec/fixtures/detector/phablet.yml +2597 -570
- data/spec/fixtures/detector/portable_media_player.yml +41 -16
- data/spec/fixtures/detector/smart_display.yml +8 -5
- data/spec/fixtures/detector/smart_speaker.yml +55 -0
- data/spec/fixtures/detector/smartphone-1.yml +5468 -5010
- data/spec/fixtures/detector/smartphone-10.yml +9977 -0
- data/spec/fixtures/detector/smartphone-11.yml +9891 -0
- data/spec/fixtures/detector/smartphone-12.yml +9906 -0
- data/spec/fixtures/detector/smartphone-13.yml +9920 -0
- data/spec/fixtures/detector/smartphone-14.yml +2662 -0
- data/spec/fixtures/detector/smartphone-2.yml +5213 -4635
- data/spec/fixtures/detector/smartphone-3.yml +5082 -4533
- data/spec/fixtures/detector/smartphone-4.yml +6806 -2625
- data/spec/fixtures/detector/smartphone-5.yml +9914 -0
- data/spec/fixtures/detector/smartphone-6.yml +9962 -0
- data/spec/fixtures/detector/smartphone-7.yml +9899 -0
- data/spec/fixtures/detector/smartphone-8.yml +9931 -0
- data/spec/fixtures/detector/smartphone-9.yml +9899 -0
- data/spec/fixtures/detector/smartphone.yml +5225 -4652
- data/spec/fixtures/detector/tablet-1.yml +4691 -4191
- data/spec/fixtures/detector/tablet-2.yml +9800 -71
- data/spec/fixtures/detector/tablet-3.yml +9959 -0
- data/spec/fixtures/detector/tablet-4.yml +4528 -0
- data/spec/fixtures/detector/tablet.yml +4664 -4177
- data/spec/fixtures/detector/tv.yml +3399 -1048
- data/spec/fixtures/detector/unknown.yml +1017 -977
- data/spec/fixtures/detector/wearable.yml +61 -0
- data/spec/fixtures/device/camera.yml +4 -3
- data/spec/fixtures/device/car_browser.yml +9 -2
- data/spec/fixtures/device/console.yml +15 -14
- data/spec/fixtures/parser/oss.yml +284 -2
- data/spec/fixtures/parser/vendorfragments.yml +8 -2
- metadata +50 -7
data/regexes/bots.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
###############
|
2
2
|
# Device Detector - The Universal Device Detection library for parsing User Agents
|
3
3
|
#
|
4
|
-
# @link
|
4
|
+
# @link https://matomo.org
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
@@ -53,6 +53,21 @@
|
|
53
53
|
name: 'Alexa Internet'
|
54
54
|
url: 'http://www.alexa.com'
|
55
55
|
|
56
|
+
- regex: 'alexa site audit'
|
57
|
+
name: 'Alexa Site Audit'
|
58
|
+
category: 'Site Monitor'
|
59
|
+
url: 'http://www.alexa.com/help/webmasters'
|
60
|
+
producer:
|
61
|
+
name: 'Alexa Internet'
|
62
|
+
url: 'http://www.alexa.com'
|
63
|
+
|
64
|
+
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
65
|
+
name: 'Amazon Route53 Health Check'
|
66
|
+
category: 'Service Agent'
|
67
|
+
producer:
|
68
|
+
name: 'Amazon Web Services'
|
69
|
+
url: 'https://aws.amazon.com/'
|
70
|
+
|
56
71
|
- regex: 'AmorankSpider'
|
57
72
|
name: 'Amorank Spider'
|
58
73
|
category: 'Crawler'
|
@@ -61,6 +76,14 @@
|
|
61
76
|
name: 'Amorank'
|
62
77
|
url: 'http://www.amorank.com'
|
63
78
|
|
79
|
+
- regex: 'ApacheBench'
|
80
|
+
name: 'ApacheBench'
|
81
|
+
category: 'Benchmark'
|
82
|
+
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
|
83
|
+
producer:
|
84
|
+
name: 'The Apache Software Foundation'
|
85
|
+
url: 'http://www.apache.org/foundation/'
|
86
|
+
|
64
87
|
- regex: 'Applebot'
|
65
88
|
name: 'Applebot'
|
66
89
|
category: 'Crawler'
|
@@ -69,6 +92,30 @@
|
|
69
92
|
name: 'Apple Inc'
|
70
93
|
url: 'http://www.apple.com'
|
71
94
|
|
95
|
+
- regex: 'Arachni'
|
96
|
+
name: 'Arachni'
|
97
|
+
category: 'Security Checker'
|
98
|
+
url: 'http://www.arachni-scanner.com'
|
99
|
+
producer:
|
100
|
+
name: 'Sarosys LLC'
|
101
|
+
url: 'http://www.sarosys.com/'
|
102
|
+
|
103
|
+
- regex: 'AspiegelBot'
|
104
|
+
name: 'AspiegelBot'
|
105
|
+
category: 'Crawler'
|
106
|
+
url: 'https://aspiegel.com/'
|
107
|
+
producer:
|
108
|
+
name: 'Huawei'
|
109
|
+
url: 'https://www.huawei.com/'
|
110
|
+
|
111
|
+
- regex: 'Castro 2, Episode Duration Lookup'
|
112
|
+
name: 'Castro 2'
|
113
|
+
category: 'Service Agent'
|
114
|
+
url: 'http://supertop.co/castro/'
|
115
|
+
producer:
|
116
|
+
name: 'Supertop'
|
117
|
+
url: 'http://supertop.co'
|
118
|
+
|
72
119
|
- regex: 'Curious George'
|
73
120
|
name: 'Analytics SEO Crawler'
|
74
121
|
category: 'Crawler'
|
@@ -93,8 +140,8 @@
|
|
93
140
|
name: 'Ask Jeeves Inc.'
|
94
141
|
url: 'http://www.ask.com'
|
95
142
|
|
96
|
-
- regex: 'Backlink-
|
97
|
-
name: 'Backlink-
|
143
|
+
- regex: 'Backlink-Check\.de'
|
144
|
+
name: 'Backlink-Check.de'
|
98
145
|
category: 'Crawler'
|
99
146
|
url: 'http://www.backlink-check.de/bot.html'
|
100
147
|
producer:
|
@@ -165,6 +212,13 @@
|
|
165
212
|
name: 'Blogtrottr Ltd'
|
166
213
|
url: 'https://blogtrottr.com/'
|
167
214
|
|
215
|
+
- regex: 'BoardReader Blog Indexer'
|
216
|
+
name: 'BoardReader Blog Indexer'
|
217
|
+
category: 'Crawler'
|
218
|
+
producer:
|
219
|
+
name: 'BoardReader'
|
220
|
+
url: 'http://boardreader.com/'
|
221
|
+
|
168
222
|
- regex: 'BountiiBot'
|
169
223
|
name: 'Bountii Bot'
|
170
224
|
category: 'Search bot'
|
@@ -186,8 +240,8 @@
|
|
186
240
|
category: 'Crawler'
|
187
241
|
url: 'http://law.di.unimi.it/BUbiNG.html'
|
188
242
|
producer:
|
189
|
-
name: ''
|
190
|
-
url: ''
|
243
|
+
name: 'The Laboratory for Web Algorithmics (LAW)'
|
244
|
+
url: 'http://law.di.unimi.it/software.php#buging'
|
191
245
|
|
192
246
|
- regex: '(?<!HTC)[ _]Butterfly/'
|
193
247
|
name: 'Butterfly Robot'
|
@@ -221,6 +275,14 @@
|
|
221
275
|
name: '10betterpages GmbH'
|
222
276
|
url: 'http://cliqz.com'
|
223
277
|
|
278
|
+
- regex: 'Cloudflare-AMP'
|
279
|
+
name: 'CloudFlare AMP Fetcher'
|
280
|
+
category: 'Crawler'
|
281
|
+
url: 'https://amp.cloudflare.com/doc/fetcher.html'
|
282
|
+
producer:
|
283
|
+
name: 'CloudFlare'
|
284
|
+
url: 'http://www.cloudflare.com'
|
285
|
+
|
224
286
|
- regex: 'CloudFlare-AlwaysOnline'
|
225
287
|
name: 'CloudFlare Always Online'
|
226
288
|
category: 'Site Monitor'
|
@@ -229,13 +291,21 @@
|
|
229
291
|
name: 'CloudFlare'
|
230
292
|
url: 'http://www.cloudflare.com'
|
231
293
|
|
232
|
-
- regex: 'coccoc
|
294
|
+
- regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
|
233
295
|
name: 'Cốc Cốc Bot'
|
234
|
-
url: '
|
296
|
+
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
235
297
|
category: 'Search bot'
|
236
298
|
producer:
|
237
299
|
name: 'Cốc Cốc'
|
238
|
-
url: '
|
300
|
+
url: 'https://coccoc.com/'
|
301
|
+
|
302
|
+
- regex: 'collectd'
|
303
|
+
name: 'Collectd'
|
304
|
+
url: 'https://collectd.org/'
|
305
|
+
category: 'Site Monitor'
|
306
|
+
producer:
|
307
|
+
name: 'Collectd'
|
308
|
+
url: 'https://collectd.org/'
|
239
309
|
|
240
310
|
- regex: 'CommaFeed'
|
241
311
|
name: 'CommaFeed'
|
@@ -245,6 +315,39 @@
|
|
245
315
|
name: ''
|
246
316
|
url: ''
|
247
317
|
|
318
|
+
- regex: 'CSS Certificate Spider'
|
319
|
+
name: 'CSS Certificate Spider'
|
320
|
+
category: 'Crawler'
|
321
|
+
url: 'http://www.css-security.com/certificatespider/'
|
322
|
+
producer:
|
323
|
+
name: 'Certified Security Solutions'
|
324
|
+
url: 'https://www.css-security.com/company/about-us/'
|
325
|
+
|
326
|
+
- regex: 'Datadog Agent'
|
327
|
+
name: 'Datadog Agent'
|
328
|
+
url: 'https://github.com/DataDog/dd-agent'
|
329
|
+
category: 'Site Monitor'
|
330
|
+
producer:
|
331
|
+
name: 'Datadog'
|
332
|
+
url: 'https://www.datadoghq.com/'
|
333
|
+
|
334
|
+
- regex: 'Datanyze'
|
335
|
+
name: 'Datanyze'
|
336
|
+
url: ''
|
337
|
+
category: 'Crawler'
|
338
|
+
producer:
|
339
|
+
name: 'Datanyze'
|
340
|
+
url: 'https://www.datanyze.com'
|
341
|
+
|
342
|
+
|
343
|
+
- regex: 'Dataprovider'
|
344
|
+
name: 'Dataprovider'
|
345
|
+
category: 'Crawler'
|
346
|
+
url: ''
|
347
|
+
producer:
|
348
|
+
name: 'Dataprovider B.V.'
|
349
|
+
url: 'https://www.dataprovider.com/'
|
350
|
+
|
248
351
|
- regex: 'Daum(oa)?[ /][0-9]'
|
249
352
|
name: 'Daum'
|
250
353
|
category: 'Search bot'
|
@@ -285,6 +388,14 @@
|
|
285
388
|
name: 'SEOmoz, Inc.'
|
286
389
|
url: 'http://moz.com/'
|
287
390
|
|
391
|
+
- regex: 'DuckDuck(?:Go-Favicons-)?Bot'
|
392
|
+
name: 'DuckDuckGo Bot'
|
393
|
+
category: 'Search bot'
|
394
|
+
url: 'https://duckduckgo.com/duckduckbot'
|
395
|
+
producer:
|
396
|
+
name: 'DuckDuckGo'
|
397
|
+
url: 'https://duckduckgo.com/'
|
398
|
+
|
288
399
|
- regex: 'EasouSpider'
|
289
400
|
name: 'Easou Spider'
|
290
401
|
category: 'Search bot'
|
@@ -293,6 +404,13 @@
|
|
293
404
|
name: 'easou ICP'
|
294
405
|
url: 'http://www.easou.com'
|
295
406
|
|
407
|
+
- regex: 'eCairn-Grabber'
|
408
|
+
name: 'eCairn-Grabber'
|
409
|
+
category: 'Crawler'
|
410
|
+
producer:
|
411
|
+
name: 'eCairn'
|
412
|
+
url: 'https://ecairn.com'
|
413
|
+
|
296
414
|
- regex: 'EMail Exractor'
|
297
415
|
name: 'EMail Exractor'
|
298
416
|
category: 'Crawler'
|
@@ -301,6 +419,14 @@
|
|
301
419
|
name: ''
|
302
420
|
url: ''
|
303
421
|
|
422
|
+
- regex: 'evc-batch'
|
423
|
+
name: 'evc-batch'
|
424
|
+
category: 'Crawler'
|
425
|
+
url: ''
|
426
|
+
producer:
|
427
|
+
name: 'eVenture Capital Partners II, LLC'
|
428
|
+
url: 'http://www.eventures.vc/'
|
429
|
+
|
304
430
|
- regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
|
305
431
|
name: 'ExaBot'
|
306
432
|
category: 'Crawler'
|
@@ -325,7 +451,7 @@
|
|
325
451
|
name: 'SEOmoz, Inc.'
|
326
452
|
url: 'http://moz.com/'
|
327
453
|
|
328
|
-
- regex: 'facebookexternalhit|facebookplatform'
|
454
|
+
- regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
|
329
455
|
name: 'Facebook External Hit'
|
330
456
|
category: 'Social Media Agent'
|
331
457
|
url: 'https://www.facebook.com/externalhit_uatext.php'
|
@@ -381,6 +507,24 @@
|
|
381
507
|
name: ''
|
382
508
|
url: ''
|
383
509
|
|
510
|
+
- regex: 'FlipboardProxy|FlipboardRSS'
|
511
|
+
name: 'Flipboard'
|
512
|
+
url: 'http://flipboard.com/browserproxy'
|
513
|
+
category: 'Feed Fetcher'
|
514
|
+
producer:
|
515
|
+
name: 'Flipboard'
|
516
|
+
url: 'http://flipboard.com/'
|
517
|
+
|
518
|
+
- regex: 'Findxbot'
|
519
|
+
name: 'Findxbot'
|
520
|
+
category: 'Crawler'
|
521
|
+
url: 'http://www.findxbot.com'
|
522
|
+
|
523
|
+
- regex: 'FreshRSS'
|
524
|
+
name: 'FreshRSS'
|
525
|
+
category: 'Feed Fetcher'
|
526
|
+
url: 'https://freshrss.org/'
|
527
|
+
|
384
528
|
- regex: 'Genieo'
|
385
529
|
name: 'Genieo Web filter'
|
386
530
|
category: ''
|
@@ -389,6 +533,14 @@
|
|
389
533
|
name: 'Genieo'
|
390
534
|
url: 'http://www.genieo.com'
|
391
535
|
|
536
|
+
- regex: 'GigablastOpenSource'
|
537
|
+
name: 'Gigablast'
|
538
|
+
category: 'Search bot'
|
539
|
+
url: 'https://github.com/gigablast/open-source-search-engine'
|
540
|
+
producer:
|
541
|
+
name: 'Matt Wells'
|
542
|
+
url: 'http://www.gigablast.com/faq.html'
|
543
|
+
|
392
544
|
- regex: 'Gluten Free Crawler'
|
393
545
|
name: 'Gluten Free Crawler'
|
394
546
|
category: 'Crawler'
|
@@ -405,6 +557,18 @@
|
|
405
557
|
name: 'NTT Resonant'
|
406
558
|
url: 'http://goo.ne.jp'
|
407
559
|
|
560
|
+
- regex: 'Google Favicon'
|
561
|
+
name: 'Google Favicon'
|
562
|
+
category: 'Crawler'
|
563
|
+
|
564
|
+
- regex: 'Google Search Console'
|
565
|
+
name: 'Google Search Console'
|
566
|
+
category: 'Crawler'
|
567
|
+
url: 'https://search.google.com/search-console/about'
|
568
|
+
producer:
|
569
|
+
name: 'Google Inc.'
|
570
|
+
url: 'http://www.google.com'
|
571
|
+
|
408
572
|
- regex: 'Google Page Speed Insights'
|
409
573
|
name: 'Google PageSpeed Insights'
|
410
574
|
category: 'Site Monitor'
|
@@ -421,6 +585,30 @@
|
|
421
585
|
name: 'Google Inc.'
|
422
586
|
url: 'http://www.google.com'
|
423
587
|
|
588
|
+
- regex: 'Google-Cloud-Scheduler'
|
589
|
+
name: 'Google Cloud Scheduler'
|
590
|
+
category: 'Crawler'
|
591
|
+
url: 'https://cloud.google.com/scheduler'
|
592
|
+
producer:
|
593
|
+
name: 'Google Inc.'
|
594
|
+
url: 'https://www.google.com'
|
595
|
+
|
596
|
+
- regex: 'Google-Structured-Data-Testing-Tool'
|
597
|
+
name: 'Google Structured Data Testing Tool'
|
598
|
+
category: 'Validator'
|
599
|
+
url: 'https://search.google.com/structured-data/testing-tool'
|
600
|
+
producer:
|
601
|
+
name: 'Google Inc.'
|
602
|
+
url: 'http://www.google.com'
|
603
|
+
|
604
|
+
- regex: 'GoogleStackdriverMonitoring'
|
605
|
+
name: 'Google Stackdriver Monitoring'
|
606
|
+
category: 'Site Monitor'
|
607
|
+
url: 'https://cloud.google.com/monitoring'
|
608
|
+
producer:
|
609
|
+
name: 'Google Inc.'
|
610
|
+
url: 'https://www.google.com'
|
611
|
+
|
424
612
|
- regex: 'via ggpht\.com GoogleImageProxy'
|
425
613
|
name: 'Gmail Image Proxy'
|
426
614
|
category: 'Crawler'
|
@@ -429,7 +617,39 @@
|
|
429
617
|
name: 'Google Inc.'
|
430
618
|
url: 'http://www.google.com'
|
431
619
|
|
432
|
-
- regex: '
|
620
|
+
- regex: 'SeznamEmailProxy'
|
621
|
+
name: 'Seznam Email Proxy'
|
622
|
+
category: 'Crawler'
|
623
|
+
url: ''
|
624
|
+
producer:
|
625
|
+
name: 'Seznam.cz, a.s.'
|
626
|
+
url: 'http://www.seznam.cz/'
|
627
|
+
|
628
|
+
- regex: 'Seznam-Zbozi-robot'
|
629
|
+
name: 'Seznam Zbozi.cz'
|
630
|
+
category: 'Crawler'
|
631
|
+
url: ''
|
632
|
+
producer:
|
633
|
+
name: 'Seznam.cz, a.s.'
|
634
|
+
url: 'https://www.zbozi.cz/'
|
635
|
+
|
636
|
+
- regex: 'Heurekabot-Feed'
|
637
|
+
name: 'Heureka Feed'
|
638
|
+
category: 'Crawler'
|
639
|
+
url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
|
640
|
+
producer:
|
641
|
+
name: 'Heureka.cz, a.s.'
|
642
|
+
url: 'https://www.heureka.cz/'
|
643
|
+
|
644
|
+
- regex: 'ShopAlike'
|
645
|
+
name: 'ShopAlike'
|
646
|
+
category: 'Crawler'
|
647
|
+
url: ''
|
648
|
+
producer:
|
649
|
+
name: 'Visual Meta'
|
650
|
+
url: 'https://www.shopalike.cz/'
|
651
|
+
|
652
|
+
- regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
|
433
653
|
name: 'Googlebot'
|
434
654
|
category: 'Search bot'
|
435
655
|
url: 'http://www.google.com/bot.html'
|
@@ -445,6 +665,14 @@
|
|
445
665
|
name: 'The Internet Archive'
|
446
666
|
url: 'http://www.archive.org'
|
447
667
|
|
668
|
+
- regex: 'HubSpot '
|
669
|
+
name: 'HubSpot'
|
670
|
+
category: 'Crawler'
|
671
|
+
producer:
|
672
|
+
name: 'HubSpot Inc.'
|
673
|
+
url: 'https://www.hubspot.com'
|
674
|
+
|
675
|
+
|
448
676
|
- regex: 'HTTPMon'
|
449
677
|
name: 'HTTPMon'
|
450
678
|
category: 'Site Monitor'
|
@@ -461,6 +689,11 @@
|
|
461
689
|
name: ''
|
462
690
|
url: ''
|
463
691
|
|
692
|
+
- regex: 'inoreader.com'
|
693
|
+
name: 'inoreader'
|
694
|
+
category: 'Feed Reader'
|
695
|
+
url: 'https://www.inoreader.com'
|
696
|
+
|
464
697
|
- regex: 'iisbot'
|
465
698
|
name: 'IIS Site Analysis'
|
466
699
|
category: 'Crawler'
|
@@ -469,6 +702,21 @@
|
|
469
702
|
name: 'Microsoft Corporation'
|
470
703
|
url: 'http://www.microsoft.com'
|
471
704
|
|
705
|
+
- regex: 'ips-agent'
|
706
|
+
name: 'IPS Agent'
|
707
|
+
category: 'crawler'
|
708
|
+
producer:
|
709
|
+
name: 'VeriSign, Inc'
|
710
|
+
url: 'http://www.verisign.com/'
|
711
|
+
|
712
|
+
- regex: 'IP-Guide\.com'
|
713
|
+
name: 'IP-Guide Crawler'
|
714
|
+
category: 'Crawler'
|
715
|
+
url: ''
|
716
|
+
producer:
|
717
|
+
name: ''
|
718
|
+
url: 'https://ip-guide.com'
|
719
|
+
|
472
720
|
- regex: 'kouio'
|
473
721
|
name: 'Kouio'
|
474
722
|
url: 'http://kouio.com/'
|
@@ -485,6 +733,14 @@
|
|
485
733
|
name: ''
|
486
734
|
url: ''
|
487
735
|
|
736
|
+
- regex: '([A-z0-9]*)-Lighthouse'
|
737
|
+
name: 'Lighthouse'
|
738
|
+
category: 'Site Monitor'
|
739
|
+
url: 'https://developers.google.com/web/tools/lighthouse'
|
740
|
+
producer:
|
741
|
+
name: 'Lighthouse'
|
742
|
+
url: 'https://developers.google.com/web/tools/lighthouse'
|
743
|
+
|
488
744
|
- regex: 'linkdexbot(-mobile)?|linkdex\.com'
|
489
745
|
name: 'Linkdex Bot'
|
490
746
|
category: 'Search bot'
|
@@ -532,6 +788,18 @@
|
|
532
788
|
name: ''
|
533
789
|
url: ''
|
534
790
|
|
791
|
+
- regex : 'masscan'
|
792
|
+
name: 'masscan'
|
793
|
+
url: 'https://github.com/robertdavidgraham/masscan'
|
794
|
+
category: 'Crawler'
|
795
|
+
producer:
|
796
|
+
name: 'Robert Graham'
|
797
|
+
url: 'https://github.com/robertdavidgraham'
|
798
|
+
|
799
|
+
- regex: 'Mastodon/'
|
800
|
+
name: 'Mastodon Bot'
|
801
|
+
category: 'Social Media Agent'
|
802
|
+
|
535
803
|
- regex: 'meanpathbot'
|
536
804
|
name: 'Meanpath Bot'
|
537
805
|
category: 'Search bot'
|
@@ -540,6 +808,19 @@
|
|
540
808
|
name: 'Meanpath'
|
541
809
|
url: 'http://www.meanpath.com'
|
542
810
|
|
811
|
+
- regex: 'MetaJobBot'
|
812
|
+
name: 'MetaJobBot'
|
813
|
+
category: 'Crawler'
|
814
|
+
url: 'http://www.metajob.at/the/crawler'
|
815
|
+
producer:
|
816
|
+
name: 'MetaJob'
|
817
|
+
url: 'http://www.metajob.at'
|
818
|
+
|
819
|
+
- regex: 'MetaInspector'
|
820
|
+
name: 'MetaInspector'
|
821
|
+
category: 'Crawler'
|
822
|
+
url: 'https://github.com/jaimeiniesta/metainspector'
|
823
|
+
|
543
824
|
- regex: 'MixrankBot'
|
544
825
|
name: 'Mixrank Bot'
|
545
826
|
category: 'Crawler'
|
@@ -556,6 +837,13 @@
|
|
556
837
|
name: 'Majestic-12'
|
557
838
|
url: 'http://majestic12.co.uk'
|
558
839
|
|
840
|
+
- regex: 'Mnogosearch'
|
841
|
+
name: 'Mnogosearch'
|
842
|
+
category: 'Search bot'
|
843
|
+
url: 'http://www.mnogosearch.org/'
|
844
|
+
producer:
|
845
|
+
name: 'Lavtech.Com Corp.'
|
846
|
+
url: ''
|
559
847
|
- regex: 'MojeekBot'
|
560
848
|
name: 'MojeekBot'
|
561
849
|
category: 'Search bot'
|
@@ -564,6 +852,14 @@
|
|
564
852
|
name: 'Mojeek Ltd.'
|
565
853
|
url: 'http://www.mojeek.com'
|
566
854
|
|
855
|
+
- regex: 'munin'
|
856
|
+
name: 'Munin'
|
857
|
+
category: 'Site Monitor'
|
858
|
+
url: 'http://munin-monitoring.org/'
|
859
|
+
producer:
|
860
|
+
name: 'Munin'
|
861
|
+
url: 'http://munin-monitoring.org/'
|
862
|
+
|
567
863
|
- regex: 'NalezenCzBot'
|
568
864
|
name: 'NalezenCzBot'
|
569
865
|
category: 'Crawler'
|
@@ -572,7 +868,19 @@
|
|
572
868
|
name: 'Jaroslav Kuboš'
|
573
869
|
url: ''
|
574
870
|
|
575
|
-
- regex: '
|
871
|
+
- regex: 'check_http/v'
|
872
|
+
name: 'Nagios check_http'
|
873
|
+
category: 'Site Monitor'
|
874
|
+
url: 'https://nagios.org'
|
875
|
+
producer:
|
876
|
+
name: 'Nagios Plugins Development Team'
|
877
|
+
url: 'https://nagios.org'
|
878
|
+
|
879
|
+
- regex: 'nbertaupete95\(at\)gmail.com'
|
880
|
+
name: 'nbertaupete95'
|
881
|
+
category: 'Crawler'
|
882
|
+
|
883
|
+
- regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
|
576
884
|
name: 'Netcraft Survey Bot'
|
577
885
|
category: 'Search bot'
|
578
886
|
url: ''
|
@@ -580,6 +888,14 @@
|
|
580
888
|
name: 'Netcraft'
|
581
889
|
url: 'http://www.netcraft.com'
|
582
890
|
|
891
|
+
- regex: 'netEstate NE Crawler'
|
892
|
+
name: 'netEstate'
|
893
|
+
category: 'Crawler'
|
894
|
+
url: 'http://www.website-datenbank.de/Impressum'
|
895
|
+
producer:
|
896
|
+
name: 'netEstate GmbH'
|
897
|
+
url: 'https://www.netestate.de/en/'
|
898
|
+
|
583
899
|
- regex: 'Netvibes'
|
584
900
|
name: 'Netvibes'
|
585
901
|
url: 'http://www.netvibes.com/'
|
@@ -612,7 +928,25 @@
|
|
612
928
|
name: 'Northern Light'
|
613
929
|
url: 'http://northernlight.com'
|
614
930
|
|
615
|
-
- regex: '
|
931
|
+
- regex: 'Nmap Scripting Engine'
|
932
|
+
name: 'Nmap'
|
933
|
+
category: 'Security Checker'
|
934
|
+
url: 'https://nmap.org/book/nse.html'
|
935
|
+
producer:
|
936
|
+
name: 'Nmap'
|
937
|
+
url: 'https://nmap.org/'
|
938
|
+
|
939
|
+
- regex: 'Nuzzel'
|
940
|
+
name: 'Nuzzel'
|
941
|
+
category: 'Crawler'
|
942
|
+
producer:
|
943
|
+
name: 'Nuzzel'
|
944
|
+
url: https://www.nuzzel.com/
|
945
|
+
|
946
|
+
- regex: 'Octopus [0-9]'
|
947
|
+
name: 'Octopus'
|
948
|
+
|
949
|
+
- regex: 'omgili(?:bot)?'
|
616
950
|
name: 'Omgili bot'
|
617
951
|
category: 'Search bot'
|
618
952
|
url: 'http://www.omgili.com/Crawler.html'
|
@@ -660,6 +994,11 @@
|
|
660
994
|
name: 'Smallrivers SA'
|
661
995
|
url: 'http://www.paper.li'
|
662
996
|
|
997
|
+
- regex: 'phantomas/'
|
998
|
+
name: 'Phantomas'
|
999
|
+
category: 'Site Monitor'
|
1000
|
+
url: 'https://github.com/macbre/phantomas'
|
1001
|
+
|
663
1002
|
- regex: 'phpservermon'
|
664
1003
|
name: 'PHP Server Monitor'
|
665
1004
|
category: 'Site Monitor'
|
@@ -668,6 +1007,22 @@
|
|
668
1007
|
name: 'PHP Server Monitor'
|
669
1008
|
url: 'http://www.phpservermonitor.org/'
|
670
1009
|
|
1010
|
+
- regex: 'PocketParser'
|
1011
|
+
name: 'PocketParser'
|
1012
|
+
category: 'Read-it-later Service'
|
1013
|
+
url: 'https://getpocket.com/pocketparser_ua'
|
1014
|
+
producer:
|
1015
|
+
name: 'Pocket'
|
1016
|
+
url: 'https://getpocket.com/'
|
1017
|
+
|
1018
|
+
- regex: 'PritTorrent'
|
1019
|
+
name: 'PritTorrent'
|
1020
|
+
category: 'Crawler'
|
1021
|
+
url: 'https://github.com/astro/prittorrent'
|
1022
|
+
producer:
|
1023
|
+
name: 'Bitlove'
|
1024
|
+
url: 'http://bitlove.org/'
|
1025
|
+
|
671
1026
|
- regex: 'psbot(-page)?'
|
672
1027
|
name: 'Picsearch bot'
|
673
1028
|
category: 'Search bot'
|
@@ -684,6 +1039,22 @@
|
|
684
1039
|
name: 'Pingdom AB'
|
685
1040
|
url: 'https://www.pingdom.com'
|
686
1041
|
|
1042
|
+
- regex: 'Quora Link Preview'
|
1043
|
+
name: 'Quora Link Preview'
|
1044
|
+
category: 'Crawler'
|
1045
|
+
url: ''
|
1046
|
+
producer:
|
1047
|
+
name: 'Quora'
|
1048
|
+
url: 'http://www.quora.com'
|
1049
|
+
|
1050
|
+
- regex: 'RamblerMail'
|
1051
|
+
name: 'RamblerMail Image Proxy'
|
1052
|
+
category: 'Crawler'
|
1053
|
+
url: ''
|
1054
|
+
producer:
|
1055
|
+
name: 'Rambler&Co'
|
1056
|
+
url: 'https://rambler-co.ru/'
|
1057
|
+
|
687
1058
|
- regex: 'QuerySeekerSpider'
|
688
1059
|
name: 'QuerySeekerSpider'
|
689
1060
|
category: 'Crawler'
|
@@ -692,6 +1063,19 @@
|
|
692
1063
|
name: 'QueryEye Inc.'
|
693
1064
|
url: 'http://queryeye.com'
|
694
1065
|
|
1066
|
+
- regex: 'Qwantify'
|
1067
|
+
name: 'Qwantify'
|
1068
|
+
category: 'Crawler'
|
1069
|
+
url: 'https://www.qwant.com/'
|
1070
|
+
producer:
|
1071
|
+
name: 'Qwant Corporation'
|
1072
|
+
url: 'https://www.qwant.com/'
|
1073
|
+
|
1074
|
+
- regex: 'Rainmeter'
|
1075
|
+
name: 'Rainmeter'
|
1076
|
+
category: 'Crawler'
|
1077
|
+
url: 'https://www.rainmeter.net'
|
1078
|
+
|
695
1079
|
- regex: 'redditbot'
|
696
1080
|
name: 'Reddit Bot'
|
697
1081
|
category: 'Social Media Agent'
|
@@ -700,6 +1084,14 @@
|
|
700
1084
|
name: 'reddit inc.'
|
701
1085
|
url: 'http://www.reddit.com'
|
702
1086
|
|
1087
|
+
- regex: 'Riddler'
|
1088
|
+
name: 'Riddler'
|
1089
|
+
category: 'Security search bot'
|
1090
|
+
url: 'https://riddler.io/about'
|
1091
|
+
producer:
|
1092
|
+
name: 'F-Secure'
|
1093
|
+
url: 'https://www.f-secure.com'
|
1094
|
+
|
703
1095
|
- regex: 'rogerbot'
|
704
1096
|
name: 'Rogerbot'
|
705
1097
|
category: 'Crawler'
|
@@ -716,6 +1108,14 @@
|
|
716
1108
|
name: 'Roihunter a.s.'
|
717
1109
|
url: 'http://roihunter.com/'
|
718
1110
|
|
1111
|
+
- regex: 'SafeDNSBot'
|
1112
|
+
name: 'SafeDNSBot'
|
1113
|
+
category: 'Crawler'
|
1114
|
+
url: 'https://www.safedns.com/searchbot'
|
1115
|
+
producer:
|
1116
|
+
name: 'SafeDNS, Inc.'
|
1117
|
+
url: 'https://www.safedns.com/'
|
1118
|
+
|
719
1119
|
- regex: 'Scrapy'
|
720
1120
|
name: 'Scrapy'
|
721
1121
|
category: 'Crawler'
|
@@ -761,6 +1161,19 @@
|
|
761
1161
|
name: 'SEO Engine'
|
762
1162
|
url: 'http://www.seoengine.com'
|
763
1163
|
|
1164
|
+
- regex: 'SEOkicks-Robot'
|
1165
|
+
name: 'SEOkicks-Robot'
|
1166
|
+
category: 'Crawler'
|
1167
|
+
url: 'http://www.seokicks.de/robot.html'
|
1168
|
+
producer:
|
1169
|
+
name: 'SEOkicks'
|
1170
|
+
url: 'https://www.seokicks.de/'
|
1171
|
+
|
1172
|
+
- regex: 'seoscanners\.net'
|
1173
|
+
name: 'Seoscanners.net'
|
1174
|
+
category: 'Crawler'
|
1175
|
+
url: ''
|
1176
|
+
|
764
1177
|
- regex: 'SkypeUriPreview'
|
765
1178
|
name: 'Skype URI Preview'
|
766
1179
|
category: 'Service Agent'
|
@@ -777,6 +1190,14 @@
|
|
777
1190
|
name: 'Seznam.cz, a.s.'
|
778
1191
|
url: 'http://www.seznam.cz/'
|
779
1192
|
|
1193
|
+
- regex: 'shopify-partner-homepage-scraper'
|
1194
|
+
name: 'Shopify Partner'
|
1195
|
+
category: 'Crawler'
|
1196
|
+
url: 'https://www.shopify.com/partners'
|
1197
|
+
producer:
|
1198
|
+
name: 'Shopify'
|
1199
|
+
url: 'https://www.shopify.com/'
|
1200
|
+
|
780
1201
|
- regex: 'ShopWiki'
|
781
1202
|
name: 'ShopWiki'
|
782
1203
|
category: 'Search tools'
|
@@ -809,6 +1230,27 @@
|
|
809
1230
|
name: 'SISTRIX GmbH'
|
810
1231
|
url: 'http://www.sistrix.de'
|
811
1232
|
|
1233
|
+
- regex: 'SISTRIX Optimizer'
|
1234
|
+
name: 'SISTRIX Optimizer'
|
1235
|
+
category: 'Crawler'
|
1236
|
+
url: 'https://optimizer.sistrix.com'
|
1237
|
+
producer:
|
1238
|
+
name: 'SISTRIX GmbH'
|
1239
|
+
url: 'http://www.sistrix.de'
|
1240
|
+
|
1241
|
+
- regex: 'SiteSucker'
|
1242
|
+
name: 'SiteSucker'
|
1243
|
+
category: 'Crawler'
|
1244
|
+
url: 'http://ricks-apps.com/osx/sitesucker/'
|
1245
|
+
|
1246
|
+
- regex: 'sixy.ch'
|
1247
|
+
name: 'Sixy.ch'
|
1248
|
+
category: 'Site Monitor'
|
1249
|
+
url: 'http://sixy.ch'
|
1250
|
+
producer:
|
1251
|
+
name: 'Manuel Kasper'
|
1252
|
+
url: 'https://neon1.net/'
|
1253
|
+
|
812
1254
|
- regex: 'Slackbot|Slack-ImgProxy'
|
813
1255
|
name: 'Slackbot'
|
814
1256
|
category: 'Crawler'
|
@@ -833,6 +1275,30 @@
|
|
833
1275
|
name: 'Tencent Holdings'
|
834
1276
|
url: 'http://www.soso.com'
|
835
1277
|
|
1278
|
+
- regex: 'sqlmap/'
|
1279
|
+
name: 'sqlmap'
|
1280
|
+
category: 'Security Checker'
|
1281
|
+
url: 'http://sqlmap.org/'
|
1282
|
+
producer:
|
1283
|
+
name: 'sqlmap'
|
1284
|
+
url: 'http://sqlmap.org/'
|
1285
|
+
|
1286
|
+
- regex: 'SSL Labs'
|
1287
|
+
name: 'SSL Labs'
|
1288
|
+
category: 'Validator'
|
1289
|
+
url: 'https://www.ssllabs.com/about/assessment.html'
|
1290
|
+
producer:
|
1291
|
+
name: 'SSL Labs'
|
1292
|
+
url: 'https://www.ssllabs.com/about/assessment.html'
|
1293
|
+
|
1294
|
+
- regex: 'StatusCake'
|
1295
|
+
name: 'StatusCake'
|
1296
|
+
category: 'Site Monitor'
|
1297
|
+
url: 'https://www.statuscake.com'
|
1298
|
+
producer:
|
1299
|
+
name: 'StatusCake'
|
1300
|
+
url: 'https://www.statuscake.com'
|
1301
|
+
|
836
1302
|
- regex: 'Superfeedr bot'
|
837
1303
|
name: 'Superfeedr Bot'
|
838
1304
|
category: 'Feed Fetcher'
|
@@ -841,6 +1307,11 @@
|
|
841
1307
|
name: 'Superfeedr'
|
842
1308
|
url: 'https://superfeedr.com/'
|
843
1309
|
|
1310
|
+
- regex: 'Sparkler/[0-9]'
|
1311
|
+
name: 'Sparkler'
|
1312
|
+
category: 'Crawler'
|
1313
|
+
url: 'https://github.com/USCDataScience/sparkler'
|
1314
|
+
|
844
1315
|
- regex: 'Spinn3r'
|
845
1316
|
name: 'Spinn3r'
|
846
1317
|
category: 'Crawler'
|
@@ -865,10 +1336,23 @@
|
|
865
1336
|
name: 'Domain Tools'
|
866
1337
|
url: 'http://www.domaintools.com'
|
867
1338
|
|
1339
|
+
- regex: 'TarmotGezgin'
|
1340
|
+
name: 'Tarmot Gezgin'
|
1341
|
+
url: 'http://www.tarmot.com/gezgin/'
|
1342
|
+
category: 'Search bot'
|
1343
|
+
|
868
1344
|
- regex: 'TelegramBot'
|
869
|
-
name: '
|
1345
|
+
name: 'TelegramBot'
|
870
1346
|
url: 'https://telegram.org/blog/bot-revolution'
|
871
1347
|
|
1348
|
+
- regex: 'TLSProbe'
|
1349
|
+
name: 'TLSProbe'
|
1350
|
+
url: 'https://scan.trustnet.venafi.com/'
|
1351
|
+
category: 'Security search bot'
|
1352
|
+
producer:
|
1353
|
+
name: 'Venafi TrustNet'
|
1354
|
+
url: 'https://www.venafi.com'
|
1355
|
+
|
872
1356
|
- regex: 'TinEye-bot'
|
873
1357
|
name: 'TinEye Crawler'
|
874
1358
|
category: 'Search bot'
|
@@ -885,6 +1369,19 @@
|
|
885
1369
|
name: ''
|
886
1370
|
url: ''
|
887
1371
|
|
1372
|
+
- regex: 'theoldreader.com'
|
1373
|
+
name: 'theoldreader'
|
1374
|
+
category: 'Feed Reader'
|
1375
|
+
url: 'https://theoldreader.com'
|
1376
|
+
|
1377
|
+
- regex: 'trendictionbot'
|
1378
|
+
name: 'Trendiction Bot'
|
1379
|
+
category: 'Crawler'
|
1380
|
+
url: 'http://www.trendiction.de/bot'
|
1381
|
+
producer:
|
1382
|
+
name: 'Talkwalker Inc.'
|
1383
|
+
url: 'http://www.talkwalker.com'
|
1384
|
+
|
888
1385
|
- regex: 'TurnitinBot'
|
889
1386
|
name: 'TurnitinBot'
|
890
1387
|
category: 'Crawler'
|
@@ -909,6 +1406,13 @@
|
|
909
1406
|
name: 'Mediasift'
|
910
1407
|
url: ''
|
911
1408
|
|
1409
|
+
- regex: 'Twingly Recon'
|
1410
|
+
name: 'Twingly Recon'
|
1411
|
+
category: 'Crawler'
|
1412
|
+
producer:
|
1413
|
+
name: 'Twingly'
|
1414
|
+
url: 'https://www.twingly.com'
|
1415
|
+
|
912
1416
|
- regex: 'Twitterbot'
|
913
1417
|
name: 'Twitterbot'
|
914
1418
|
category: 'Social Media Agent'
|
@@ -917,6 +1421,30 @@
|
|
917
1421
|
name: 'Twitter'
|
918
1422
|
url: 'http://www.twitter.com'
|
919
1423
|
|
1424
|
+
- regex: 'UniversalFeedParser'
|
1425
|
+
name: 'UniversalFeedParser'
|
1426
|
+
category: 'Feed Fetcher'
|
1427
|
+
url: 'https://github.com/kurtmckee/feedparser'
|
1428
|
+
producer:
|
1429
|
+
name: 'Kurt McKee'
|
1430
|
+
url: 'https://github.com/kurtmckee'
|
1431
|
+
|
1432
|
+
- regex: 'via secureurl\.fwdcdn\.com'
|
1433
|
+
name: 'UkrNet Mail Proxy'
|
1434
|
+
category: 'Crawler'
|
1435
|
+
url: ''
|
1436
|
+
producer:
|
1437
|
+
name: 'UkrNet Ltd'
|
1438
|
+
url: 'https://www.ukr.net/'
|
1439
|
+
|
1440
|
+
- regex: 'Uptimebot'
|
1441
|
+
name: 'Uptimebot'
|
1442
|
+
category: 'Site Monitor'
|
1443
|
+
url: 'https://uptime.com/uptimebot'
|
1444
|
+
producer:
|
1445
|
+
name: 'Uptime'
|
1446
|
+
url: 'https://uptime.com'
|
1447
|
+
|
920
1448
|
- regex: 'UptimeRobot'
|
921
1449
|
name: 'Uptime Robot'
|
922
1450
|
category: 'Site Monitor'
|
@@ -933,6 +1461,22 @@
|
|
933
1461
|
name: 'Profound Networks'
|
934
1462
|
url: 'http://www.profound.net'
|
935
1463
|
|
1464
|
+
- regex: 'Vagabondo'
|
1465
|
+
name: 'Vagabondo'
|
1466
|
+
category: 'Crawler'
|
1467
|
+
url: ''
|
1468
|
+
producer:
|
1469
|
+
name: 'WiseGuys'
|
1470
|
+
url: 'http://www.wise-guys.nl/'
|
1471
|
+
|
1472
|
+
- regex: 'vkShare; '
|
1473
|
+
name: 'VK Share Button'
|
1474
|
+
category: 'Crawler'
|
1475
|
+
url: 'http://vk.com/dev/Share'
|
1476
|
+
producer:
|
1477
|
+
name: 'VK'
|
1478
|
+
url: 'http://vk.com/'
|
1479
|
+
|
936
1480
|
- regex: 'VSMCrawler'
|
937
1481
|
name: 'Visual Site Mapper Crawler'
|
938
1482
|
category: 'Crawler'
|
@@ -965,7 +1509,7 @@
|
|
965
1509
|
name: 'W3C'
|
966
1510
|
url: 'http://www.w3.org'
|
967
1511
|
|
968
|
-
- regex: 'W3C_Validator'
|
1512
|
+
- regex: 'W3C_Validator|Validator.nu'
|
969
1513
|
name: 'W3C Markup Validation Service'
|
970
1514
|
category: 'Validator'
|
971
1515
|
url: 'http://validator.w3.org/services'
|
@@ -989,6 +1533,18 @@
|
|
989
1533
|
name: 'W3C'
|
990
1534
|
url: 'http://www.w3.org'
|
991
1535
|
|
1536
|
+
- regex: 'Wappalyzer'
|
1537
|
+
name: 'Wappalyzer'
|
1538
|
+
url: 'https://github.com/AliasIO/Wappalyzer'
|
1539
|
+
producer:
|
1540
|
+
name: 'AliasIO'
|
1541
|
+
url: 'https://github.com/AliasIO'
|
1542
|
+
|
1543
|
+
- regex: 'PTST/'
|
1544
|
+
name: 'WebPageTest'
|
1545
|
+
category: 'Site Monitor'
|
1546
|
+
url: 'https://www.webpagetest.org'
|
1547
|
+
|
992
1548
|
- regex: 'WeSEE(:Search)?'
|
993
1549
|
name: 'WeSEE:Search'
|
994
1550
|
category: 'Search bot'
|
@@ -1013,6 +1569,14 @@
|
|
1013
1569
|
name: 'WebSitePulse'
|
1014
1570
|
url: 'http://www.websitepulse.com/'
|
1015
1571
|
|
1572
|
+
- regex: 'WordPress'
|
1573
|
+
name: 'WordPress'
|
1574
|
+
category: 'Service Agent'
|
1575
|
+
url: 'https://wordpress.org/'
|
1576
|
+
producer:
|
1577
|
+
name: 'Wordpress.org'
|
1578
|
+
url: 'https://wordpress.org/'
|
1579
|
+
|
1016
1580
|
- regex: 'Wotbox'
|
1017
1581
|
name: 'Wotbox'
|
1018
1582
|
category: 'Search bot'
|
@@ -1021,6 +1585,14 @@
|
|
1021
1585
|
name: 'Wotbox'
|
1022
1586
|
url: 'http://www.wotbox.com'
|
1023
1587
|
|
1588
|
+
- regex: 'XenForo'
|
1589
|
+
name: 'XenForo'
|
1590
|
+
category: 'Service Agent'
|
1591
|
+
url: 'https://xenforo.com/'
|
1592
|
+
producer:
|
1593
|
+
name: 'XenForo Ltd.'
|
1594
|
+
url: 'https://xenforo.com/'
|
1595
|
+
|
1024
1596
|
- regex: 'yacybot'
|
1025
1597
|
name: 'YaCy'
|
1026
1598
|
category: 'Search bot'
|
@@ -1053,7 +1625,15 @@
|
|
1053
1625
|
name: 'Yahoo! Inc.'
|
1054
1626
|
url: 'http://www.yahoo.com'
|
1055
1627
|
|
1056
|
-
- regex: '
|
1628
|
+
- regex: 'Y!J-BRW'
|
1629
|
+
name: 'Yahoo! Japan BRW'
|
1630
|
+
category: 'Crawler'
|
1631
|
+
url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
|
1632
|
+
producer:
|
1633
|
+
name: 'Yahoo! Japan Corp.'
|
1634
|
+
url: 'https://www.yahoo.co.jp/'
|
1635
|
+
|
1636
|
+
- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
|
1057
1637
|
name: 'Yandex Bot'
|
1058
1638
|
category: 'Search bot'
|
1059
1639
|
url: 'http://www.yandex.com/bots'
|
@@ -1061,7 +1641,7 @@
|
|
1061
1641
|
name: 'Yandex LLC'
|
1062
1642
|
url: 'http://company.yandex.com'
|
1063
1643
|
|
1064
|
-
- regex: 'Yeti'
|
1644
|
+
- regex: 'Yeti|NaverJapan'
|
1065
1645
|
name: 'Yeti/Naverbot'
|
1066
1646
|
category: 'Search bot'
|
1067
1647
|
url: 'http://help.naver.com/robots/'
|
@@ -1090,6 +1670,11 @@
|
|
1090
1670
|
name: 'YunYun'
|
1091
1671
|
url: 'http://www.yunyun.com'
|
1092
1672
|
|
1673
|
+
- regex: 'zgrab'
|
1674
|
+
name: 'zgrab'
|
1675
|
+
category: 'Security Checker'
|
1676
|
+
url: 'https://github.com/zmap/zgrab'
|
1677
|
+
|
1093
1678
|
- regex: 'Zookabot'
|
1094
1679
|
name: 'Zookabot'
|
1095
1680
|
category: 'Crawler'
|
@@ -1138,9 +1723,9 @@
|
|
1138
1723
|
name: 'HubPages'
|
1139
1724
|
url: 'http://hubpages.com/'
|
1140
1725
|
|
1141
|
-
- regex: 'Pinterest
|
1726
|
+
- regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
|
1142
1727
|
name: 'Pinterest'
|
1143
|
-
url: ''
|
1728
|
+
url: 'http://www.pinterest.com/bot.html'
|
1144
1729
|
category: 'Crawler'
|
1145
1730
|
producer:
|
1146
1731
|
name: 'Pinterest'
|
@@ -1154,6 +1739,30 @@
|
|
1154
1739
|
name: 'Site24x7'
|
1155
1740
|
url: 'https://www.site24x7.com'
|
1156
1741
|
|
1742
|
+
- regex: 's~snapchat-proxy'
|
1743
|
+
name: 'Snapchat Proxy'
|
1744
|
+
category: 'Crawler'
|
1745
|
+
url: 'https://www.snapchat.com'
|
1746
|
+
producer:
|
1747
|
+
name: 'Snapchat Inc.'
|
1748
|
+
url: 'https://www.snapchat.com'
|
1749
|
+
|
1750
|
+
- regex: "Let's Encrypt validation server"
|
1751
|
+
name: "Let's Encrypt Validation"
|
1752
|
+
category: 'Service Agent'
|
1753
|
+
url: 'https://letsencrypt.org/how-it-works/'
|
1754
|
+
producer:
|
1755
|
+
name: "Let's Encrypt"
|
1756
|
+
url: 'https://letsencrypt.org'
|
1757
|
+
|
1758
|
+
- regex: 'GrapeshotCrawler'
|
1759
|
+
name: 'Grapeshot'
|
1760
|
+
category: 'Crawler'
|
1761
|
+
url: 'https://www.grapeshot.com/crawler'
|
1762
|
+
producer:
|
1763
|
+
name: 'Grapeshot'
|
1764
|
+
url: 'https://www.grapeshot.com'
|
1765
|
+
|
1157
1766
|
- regex: 'www\.monitor\.us'
|
1158
1767
|
name: 'Monitor.Us'
|
1159
1768
|
category: 'Site Monitor'
|
@@ -1162,6 +1771,22 @@
|
|
1162
1771
|
name: 'Monitor.Us'
|
1163
1772
|
url: 'http://www.monitor.us'
|
1164
1773
|
|
1774
|
+
- regex: 'Catchpoint( bot)?'
|
1775
|
+
name: 'Catchpoint'
|
1776
|
+
category: 'Site Monitor'
|
1777
|
+
url: ''
|
1778
|
+
producer:
|
1779
|
+
name: 'Catchpoint Systems'
|
1780
|
+
url: 'http://www.catchpoint.com/'
|
1781
|
+
|
1782
|
+
- regex: 'bitlybot'
|
1783
|
+
name: 'BitlyBot'
|
1784
|
+
category: 'Crawler'
|
1785
|
+
url: 'https://bitly.com'
|
1786
|
+
producer:
|
1787
|
+
name: 'Bitly, Inc.'
|
1788
|
+
url: 'https://bitly.com'
|
1789
|
+
|
1165
1790
|
- regex: 'Zao/'
|
1166
1791
|
name: 'Zao'
|
1167
1792
|
category: 'Crawler'
|
@@ -1217,9 +1842,204 @@
|
|
1217
1842
|
- regex: 'Server Density Service Monitoring.*'
|
1218
1843
|
name: 'Server Density'
|
1219
1844
|
|
1220
|
-
- regex: '
|
1845
|
+
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
1846
|
+
name: 'RSSRadio Bot'
|
1847
|
+
|
1848
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
|
1221
1849
|
name: 'Generic Bot'
|
1222
1850
|
|
1851
|
+
- regex: '^sentry'
|
1852
|
+
name: 'Sentry Bot'
|
1853
|
+
producer:
|
1854
|
+
name: 'Sentry'
|
1855
|
+
url: 'https://sentry.io'
|
1856
|
+
|
1857
|
+
- regex: '^Spotify'
|
1858
|
+
name: 'Spotify'
|
1859
|
+
producer:
|
1860
|
+
name: 'Spotify'
|
1861
|
+
url: 'https://www.spotify.com'
|
1862
|
+
|
1863
|
+
- regex: 'The Knowledge AI'
|
1864
|
+
name: 'The Knowledge AI'
|
1865
|
+
category: 'Crawler'
|
1866
|
+
|
1867
|
+
- regex: 'Embedly'
|
1868
|
+
name: 'Embedly'
|
1869
|
+
category: 'Crawler'
|
1870
|
+
url: 'https://support.embed.ly/hc/en-us'
|
1871
|
+
producer:
|
1872
|
+
name: 'A Medium, Corp.'
|
1873
|
+
url: 'https://medium.com/'
|
1874
|
+
|
1875
|
+
- regex: 'BrandVerity'
|
1876
|
+
name: 'BrandVerity'
|
1877
|
+
category: 'Crawler'
|
1878
|
+
url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
|
1879
|
+
producer:
|
1880
|
+
name: 'BrandVerity, Inc.'
|
1881
|
+
url: 'https://www.brandverity.com/'
|
1882
|
+
|
1883
|
+
- regex: 'Kaspersky Lab CFR link resolver'
|
1884
|
+
name: 'Kaspersky'
|
1885
|
+
category: 'Security Checker'
|
1886
|
+
url: 'https://www.kaspersky.com/'
|
1887
|
+
producer:
|
1888
|
+
name: 'AO Kaspersky Lab'
|
1889
|
+
url: 'https://www.kaspersky.com/'
|
1890
|
+
|
1891
|
+
- regex: 'eZ Publish Link Validator'
|
1892
|
+
name: 'eZ Publish Link Validator'
|
1893
|
+
category: 'Crawler'
|
1894
|
+
url: 'https://ez.no/'
|
1895
|
+
producer:
|
1896
|
+
name: 'eZ Systems AS'
|
1897
|
+
url: 'https://ez.no/'
|
1898
|
+
|
1899
|
+
- regex: 'woorankreview'
|
1900
|
+
name: 'WooRank'
|
1901
|
+
category: 'Search bot'
|
1902
|
+
url: 'https://www.woorank.com/'
|
1903
|
+
producer:
|
1904
|
+
name: 'WooRank sprl'
|
1905
|
+
url: 'https://www.woorank.com/'
|
1906
|
+
|
1907
|
+
- regex: '(Match|LinkCheck) by Siteimprove.com'
|
1908
|
+
name: 'Siteimprove'
|
1909
|
+
category: 'Search bot'
|
1910
|
+
url: 'https://siteimprove.com/'
|
1911
|
+
producer:
|
1912
|
+
name: 'Siteimprove GmbH'
|
1913
|
+
url: 'https://siteimprove.com/'
|
1914
|
+
|
1915
|
+
- regex: 'CATExplorador'
|
1916
|
+
name: 'CATExplorador'
|
1917
|
+
category: 'Search bot'
|
1918
|
+
url: 'https://fundacio.cat/ca/domini/'
|
1919
|
+
producer:
|
1920
|
+
name: 'Fundació puntCAT'
|
1921
|
+
url: 'https://fundacio.cat/ca/domini/'
|
1922
|
+
|
1923
|
+
- regex: 'Buck'
|
1924
|
+
name: 'Buck'
|
1925
|
+
category: 'Search bot'
|
1926
|
+
url: 'https://hypefactors.com/'
|
1927
|
+
producer:
|
1928
|
+
name: 'Hypefactors A/S'
|
1929
|
+
url: 'https://hypefactors.com/'
|
1930
|
+
|
1931
|
+
- regex: 'tracemyfile'
|
1932
|
+
name: 'TraceMyFile'
|
1933
|
+
category: 'Search bot'
|
1934
|
+
url: 'https://www.tracemyfile.com/'
|
1935
|
+
producer:
|
1936
|
+
name: 'Idee Inc.'
|
1937
|
+
url: 'http://ideeinc.com/'
|
1938
|
+
|
1939
|
+
- regex: 'zelist.ro feed parser'
|
1940
|
+
name: 'Ze List'
|
1941
|
+
url: 'https://www.zelist.ro/'
|
1942
|
+
category: 'Feed Fetcher'
|
1943
|
+
producer:
|
1944
|
+
name: 'Treeworks SRL'
|
1945
|
+
url: 'https://www.tree.ro/'
|
1946
|
+
|
1947
|
+
- regex: 'weborama-fetcher'
|
1948
|
+
name: 'Weborama'
|
1949
|
+
category: 'Search bot'
|
1950
|
+
url: 'https://weborama.com/'
|
1951
|
+
producer:
|
1952
|
+
name: 'Weborama SA'
|
1953
|
+
url: 'https://weborama.com/'
|
1954
|
+
|
1955
|
+
- regex: 'BoardReader Favicon Fetcher'
|
1956
|
+
name: 'BoardReader'
|
1957
|
+
category: 'Search bot'
|
1958
|
+
url: 'http://boardreader.com/'
|
1959
|
+
producer:
|
1960
|
+
name: 'Effyis Inc'
|
1961
|
+
url: 'http://boardreader.com/'
|
1962
|
+
|
1963
|
+
- regex: 'IDG/IT'
|
1964
|
+
name: 'IDG/IT'
|
1965
|
+
category: 'Search bot'
|
1966
|
+
url: 'https://spaziodati.eu/'
|
1967
|
+
producer:
|
1968
|
+
name: 'SpazioDati S.r.l.'
|
1969
|
+
url: 'https://spaziodati.eu/'
|
1970
|
+
|
1971
|
+
- regex: 'Bytespider'
|
1972
|
+
name: 'Bytespider'
|
1973
|
+
category: 'Search bot'
|
1974
|
+
url: 'https://bytedance.com/'
|
1975
|
+
producer:
|
1976
|
+
name: 'ByteDance Ltd.'
|
1977
|
+
url: 'https://bytedance.com/'
|
1978
|
+
|
1979
|
+
- regex: 'WikiDo'
|
1980
|
+
name: 'WikiDo'
|
1981
|
+
category: 'Search bot'
|
1982
|
+
url: 'https://www.wikido.com/'
|
1983
|
+
producer:
|
1984
|
+
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
|
1985
|
+
url: 'https://www.wikido.com/'
|
1986
|
+
|
1987
|
+
- regex: 'AwarioSmartBot'
|
1988
|
+
name: 'Awario'
|
1989
|
+
category: 'Search bot'
|
1990
|
+
url: 'https://awario.com/bots.html'
|
1991
|
+
producer:
|
1992
|
+
name: 'Awario'
|
1993
|
+
url: 'https://awario.com/'
|
1994
|
+
|
1995
|
+
- regex: 'AwarioRssBot'
|
1996
|
+
name: 'Awario'
|
1997
|
+
category: 'Feed Fetcher'
|
1998
|
+
url: 'https://awario.com/bots.html'
|
1999
|
+
producer:
|
2000
|
+
name: 'Awario'
|
2001
|
+
url: 'https://awario.com/'
|
2002
|
+
|
2003
|
+
- regex: 'oBot'
|
2004
|
+
name: 'oBot'
|
2005
|
+
category: 'Search bot'
|
2006
|
+
url: 'http://www.xforce-security.com/crawler/'
|
2007
|
+
producer:
|
2008
|
+
name: 'IBM Germany Research & Development GmbH'
|
2009
|
+
url: 'https://exchange.xforce.ibmcloud.com/'
|
2010
|
+
|
2011
|
+
- regex: 'SMTBot'
|
2012
|
+
name: 'SMTBot'
|
2013
|
+
category: 'Search bot'
|
2014
|
+
url: 'https://www.similartech.com/smtbot'
|
2015
|
+
producer:
|
2016
|
+
name: 'SimilarTech Ltd.'
|
2017
|
+
url: 'https://www.similartech.com/'
|
2018
|
+
|
2019
|
+
- regex: 'LCC'
|
2020
|
+
name: 'LCC'
|
2021
|
+
category: 'Search bot'
|
2022
|
+
url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
|
2023
|
+
producer:
|
2024
|
+
name: 'Universität Leipzig'
|
2025
|
+
url: 'https://www.uni-leipzig.de/'
|
2026
|
+
|
2027
|
+
- regex: 'Startpagina-Linkchecker'
|
2028
|
+
name: 'Startpagina Linkchecker'
|
2029
|
+
category: 'Search bot'
|
2030
|
+
url: 'https://www.startpagina.nl/linkchecker'
|
2031
|
+
producer:
|
2032
|
+
name: 'Startpagina B.V.'
|
2033
|
+
url: 'https://www.startpagina.nl/'
|
2034
|
+
|
2035
|
+
- regex: 'GTmetrix'
|
2036
|
+
name: 'GTmetrix'
|
2037
|
+
category: 'Crawler'
|
2038
|
+
url: 'https://gtmetrix.com/'
|
2039
|
+
producer:
|
2040
|
+
name: 'Carbon60 Operating Co. Ltd.'
|
2041
|
+
url: 'https://www.carbon60.com/'
|
2042
|
+
|
1223
2043
|
# Generic detections
|
1224
2044
|
|
1225
2045
|
- regex: 'Nutch'
|
@@ -1230,5 +2050,5 @@
|
|
1230
2050
|
name: 'The Apache Software Foundation'
|
1231
2051
|
url: 'http://www.apache.org/foundation/'
|
1232
2052
|
|
1233
|
-
- regex: '[a-z0-9\-_]*((?<!cu)bot|crawler|archiver|transcoder|spider)([^a-z]|$)'
|
2053
|
+
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
1234
2054
|
name: 'Generic Bot'
|