device_detector 1.0.3 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +15 -0
- data/README.md +8 -7
- data/lib/device_detector/bot.rb +2 -2
- data/lib/device_detector/browser.rb +364 -0
- data/lib/device_detector/client.rb +11 -2
- data/lib/device_detector/device.rb +1247 -22
- data/lib/device_detector/memory_cache.rb +5 -5
- data/lib/device_detector/metadata_extractor.rb +7 -8
- data/lib/device_detector/model_extractor.rb +3 -3
- data/lib/device_detector/name_extractor.rb +2 -2
- data/lib/device_detector/os.rb +150 -116
- data/lib/device_detector/parser.rb +23 -10
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector/version_extractor.rb +29 -2
- data/lib/device_detector.rb +73 -40
- data/regexes/bots.yml +868 -62
- data/regexes/client/browser_engine.yml +11 -2
- data/regexes/client/browsers.yml +1132 -112
- data/regexes/client/feed_readers.yml +5 -11
- data/regexes/client/libraries.yml +86 -2
- data/regexes/client/mediaplayers.yml +39 -3
- data/regexes/client/mobile_apps.yml +940 -66
- data/regexes/client/pim.yml +66 -3
- data/regexes/device/cameras.yml +6 -6
- data/regexes/device/car_browsers.yml +23 -3
- data/regexes/device/consoles.yml +15 -3
- data/regexes/device/mobiles.yml +18351 -3566
- data/regexes/device/notebooks.yml +114 -0
- data/regexes/device/portable_media_player.yml +36 -9
- data/regexes/device/shell_tv.yml +117 -0
- data/regexes/device/televisions.yml +440 -35
- data/regexes/oss.yml +635 -284
- data/regexes/vendorfragments.yml +5 -1
- metadata +21 -118
- data/.gitignore +0 -14
- data/.travis.yml +0 -14
- data/Gemfile +0 -8
- data/Rakefile +0 -96
- data/device_detector.gemspec +0 -26
- data/spec/device_detector/bot_fixtures_spec.rb +0 -30
- data/spec/device_detector/client_fixtures_spec.rb +0 -31
- data/spec/device_detector/concrete_user_agent_spec.rb +0 -135
- data/spec/device_detector/detector_fixtures_spec.rb +0 -100
- data/spec/device_detector/device_fixtures_spec.rb +0 -36
- data/spec/device_detector/device_spec.rb +0 -151
- data/spec/device_detector/memory_cache_spec.rb +0 -148
- data/spec/device_detector/model_extractor_spec.rb +0 -63
- data/spec/device_detector/os_fixtures_spec.rb +0 -26
- data/spec/device_detector/version_extractor_spec.rb +0 -79
- data/spec/device_detector_spec.rb +0 -189
- data/spec/fixtures/client/browser.yml +0 -2206
- data/spec/fixtures/client/feed_reader.yml +0 -199
- data/spec/fixtures/client/library.yml +0 -175
- data/spec/fixtures/client/mediaplayer.yml +0 -163
- data/spec/fixtures/client/mobile_app.yml +0 -193
- data/spec/fixtures/client/pim.yml +0 -115
- data/spec/fixtures/detector/bots.yml +0 -3260
- data/spec/fixtures/detector/camera.yml +0 -121
- data/spec/fixtures/detector/car_browser.yml +0 -21
- data/spec/fixtures/detector/console.yml +0 -281
- data/spec/fixtures/detector/desktop.yml +0 -5361
- data/spec/fixtures/detector/feature_phone.yml +0 -891
- data/spec/fixtures/detector/feed_reader.yml +0 -551
- data/spec/fixtures/detector/mediaplayer.yml +0 -210
- data/spec/fixtures/detector/mobile_apps.yml +0 -456
- data/spec/fixtures/detector/phablet.yml +0 -3785
- data/spec/fixtures/detector/portable_media_player.yml +0 -178
- data/spec/fixtures/detector/smart_display.yml +0 -61
- data/spec/fixtures/detector/smartphone-1.yml +0 -9953
- data/spec/fixtures/detector/smartphone-10.yml +0 -9924
- data/spec/fixtures/detector/smartphone-11.yml +0 -9889
- data/spec/fixtures/detector/smartphone-12.yml +0 -8655
- data/spec/fixtures/detector/smartphone-2.yml +0 -9967
- data/spec/fixtures/detector/smartphone-3.yml +0 -9887
- data/spec/fixtures/detector/smartphone-4.yml +0 -9911
- data/spec/fixtures/detector/smartphone-5.yml +0 -9933
- data/spec/fixtures/detector/smartphone-6.yml +0 -9923
- data/spec/fixtures/detector/smartphone-7.yml +0 -9892
- data/spec/fixtures/detector/smartphone-8.yml +0 -9896
- data/spec/fixtures/detector/smartphone-9.yml +0 -9928
- data/spec/fixtures/detector/smartphone.yml +0 -9984
- data/spec/fixtures/detector/tablet-1.yml +0 -10023
- data/spec/fixtures/detector/tablet-2.yml +0 -9968
- data/spec/fixtures/detector/tablet-3.yml +0 -7787
- data/spec/fixtures/detector/tablet.yml +0 -9951
- data/spec/fixtures/detector/tv.yml +0 -3333
- data/spec/fixtures/detector/unknown.yml +0 -3283
- data/spec/fixtures/device/camera.yml +0 -19
- data/spec/fixtures/device/car_browser.yml +0 -7
- data/spec/fixtures/device/console.yml +0 -79
- data/spec/fixtures/parser/oss.yml +0 -1047
- data/spec/fixtures/parser/vendorfragments.yml +0 -162
- data/spec/spec_helper.rb +0 -9
data/regexes/bots.yml
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
###############
|
2
2
|
# Device Detector - The Universal Device Detection library for parsing User Agents
|
3
3
|
#
|
4
|
-
# @link
|
4
|
+
# @link https://matomo.org
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
8
|
-
- regex: '360Spider
|
8
|
+
- regex: '360Spider'
|
9
9
|
name: '360Spider'
|
10
10
|
category: 'Search bot'
|
11
|
-
url: '
|
11
|
+
url: 'https://www.so.com/help/help_3_2.html'
|
12
12
|
producer:
|
13
13
|
name: 'Online Media Group, Inc.'
|
14
14
|
url: ''
|
@@ -40,26 +40,34 @@
|
|
40
40
|
- regex: 'AhrefsBot'
|
41
41
|
name: 'aHrefs Bot'
|
42
42
|
category: 'Crawler'
|
43
|
-
url: '
|
43
|
+
url: 'https://ahrefs.com/robot'
|
44
44
|
producer:
|
45
45
|
name: 'Ahrefs Pte Ltd'
|
46
|
-
url: '
|
46
|
+
url: 'https://ahrefs.com/robot'
|
47
47
|
|
48
48
|
- regex: 'ia_archiver|alexabot|verifybot'
|
49
49
|
name: 'Alexa Crawler'
|
50
50
|
category: 'Search bot'
|
51
|
-
url: 'https://alexa.
|
51
|
+
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
|
52
52
|
producer:
|
53
53
|
name: 'Alexa Internet'
|
54
|
-
url: '
|
54
|
+
url: 'https://www.alexa.com'
|
55
55
|
|
56
56
|
- regex: 'alexa site audit'
|
57
57
|
name: 'Alexa Site Audit'
|
58
58
|
category: 'Site Monitor'
|
59
|
-
url: '
|
59
|
+
url: 'https://support.alexa.com/hc/en-us/articles/200450194'
|
60
60
|
producer:
|
61
61
|
name: 'Alexa Internet'
|
62
|
-
url: '
|
62
|
+
url: 'https://www.alexa.com'
|
63
|
+
|
64
|
+
- regex: 'Amazonbot'
|
65
|
+
name: 'Amazon Bot'
|
66
|
+
category: 'Crawler'
|
67
|
+
url: 'https://developer.amazon.com/support/amazonbot'
|
68
|
+
producer:
|
69
|
+
name: 'Amazon.com, Inc.'
|
70
|
+
url: 'https://www.amazon.com/'
|
63
71
|
|
64
72
|
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
65
73
|
name: 'Amazon Route53 Health Check'
|
@@ -82,29 +90,45 @@
|
|
82
90
|
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
|
83
91
|
producer:
|
84
92
|
name: 'The Apache Software Foundation'
|
85
|
-
url: '
|
93
|
+
url: 'https://www.apache.org/foundation/'
|
86
94
|
|
87
95
|
- regex: 'Applebot'
|
88
96
|
name: 'Applebot'
|
89
97
|
category: 'Crawler'
|
90
|
-
url: '
|
98
|
+
url: 'https://support.apple.com/en-us/HT204683'
|
91
99
|
producer:
|
92
100
|
name: 'Apple Inc'
|
93
|
-
url: '
|
101
|
+
url: 'https://www.apple.com'
|
102
|
+
|
103
|
+
- regex: "AppSignalBot"
|
104
|
+
name: "AppSignalBot"
|
105
|
+
category: "Site Monitor"
|
106
|
+
url: "https://docs.appsignal.com/uptime-monitoring/"
|
107
|
+
producer:
|
108
|
+
name: "AppSignal"
|
109
|
+
url: "https://appsignal.com/"
|
94
110
|
|
95
111
|
- regex: 'Arachni'
|
96
112
|
name: 'Arachni'
|
97
113
|
category: 'Security Checker'
|
98
|
-
url: '
|
114
|
+
url: 'https://www.arachni-scanner.com/'
|
99
115
|
producer:
|
100
116
|
name: 'Sarosys LLC'
|
101
|
-
url: '
|
117
|
+
url: 'https://www.sarosys.com/'
|
118
|
+
|
119
|
+
- regex: 'AspiegelBot'
|
120
|
+
name: 'AspiegelBot'
|
121
|
+
category: 'Crawler'
|
122
|
+
url: 'https://aspiegel.com/'
|
123
|
+
producer:
|
124
|
+
name: 'Huawei'
|
125
|
+
url: 'https://www.huawei.com/'
|
102
126
|
|
103
127
|
- regex: 'Castro 2, Episode Duration Lookup'
|
104
128
|
name: 'Castro 2'
|
105
129
|
category: 'Service Agent'
|
106
130
|
url: 'http://supertop.co/castro/'
|
107
|
-
producer:
|
131
|
+
producer:
|
108
132
|
name: 'Supertop'
|
109
133
|
url: 'http://supertop.co'
|
110
134
|
|
@@ -119,10 +143,10 @@
|
|
119
143
|
- regex: 'archive\.org_bot|special_archiver'
|
120
144
|
name: 'archive.org bot'
|
121
145
|
category: 'Crawler'
|
122
|
-
url: '
|
146
|
+
url: 'https://archive.org/details/archive.org_bot'
|
123
147
|
producer:
|
124
148
|
name: 'The Internet Archive'
|
125
|
-
url: '
|
149
|
+
url: 'https://archive.org'
|
126
150
|
|
127
151
|
- regex: 'Ask Jeeves/Teoma'
|
128
152
|
name: 'Ask Jeeves'
|
@@ -148,7 +172,7 @@
|
|
148
172
|
name: '2.0Promotion GbR'
|
149
173
|
url: 'http://www.backlinktest.com'
|
150
174
|
|
151
|
-
- regex: '
|
175
|
+
- regex: 'Baidu.*spider|baidu Transcoder'
|
152
176
|
name: 'Baidu Spider'
|
153
177
|
category: 'Search bot'
|
154
178
|
url: 'http://www.baidu.com/search/spider.htm'
|
@@ -164,6 +188,14 @@
|
|
164
188
|
name: ''
|
165
189
|
url: ''
|
166
190
|
|
191
|
+
- regex: 'Better Uptime Bot'
|
192
|
+
name: 'Better Uptime Bot'
|
193
|
+
category: 'Site Monitor'
|
194
|
+
url: 'https://betteruptime.com/faq'
|
195
|
+
producer:
|
196
|
+
name: 'Better Uptime'
|
197
|
+
url: 'https://betteruptime.com/'
|
198
|
+
|
167
199
|
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
168
200
|
name: 'BingBot'
|
169
201
|
category: 'Search bot'
|
@@ -180,7 +212,7 @@
|
|
180
212
|
name: 'Blekko'
|
181
213
|
url: 'http://blekko.com'
|
182
214
|
|
183
|
-
- regex: 'BLEXBot
|
215
|
+
- regex: 'BLEXBot'
|
184
216
|
name: 'BLEXBot Crawler'
|
185
217
|
category: 'Crawler'
|
186
218
|
url: 'http://webmeup-crawler.com'
|
@@ -209,7 +241,7 @@
|
|
209
241
|
category: 'Crawler'
|
210
242
|
producer:
|
211
243
|
name: 'BoardReader'
|
212
|
-
url: '
|
244
|
+
url: 'https://boardreader.com/'
|
213
245
|
|
214
246
|
- regex: 'BountiiBot'
|
215
247
|
name: 'Bountii Bot'
|
@@ -275,6 +307,14 @@
|
|
275
307
|
name: 'CloudFlare'
|
276
308
|
url: 'http://www.cloudflare.com'
|
277
309
|
|
310
|
+
- regex: 'CloudflareDiagnostics'
|
311
|
+
name: 'Cloudflare Diagnostics'
|
312
|
+
category: 'Site Monitor'
|
313
|
+
url: 'https://www.cloudflare.com/'
|
314
|
+
producer:
|
315
|
+
name: 'Cloudflare'
|
316
|
+
url: 'https://www.cloudflare.com'
|
317
|
+
|
278
318
|
- regex: 'CloudFlare-AlwaysOnline'
|
279
319
|
name: 'CloudFlare Always Online'
|
280
320
|
category: 'Site Monitor'
|
@@ -283,13 +323,13 @@
|
|
283
323
|
name: 'CloudFlare'
|
284
324
|
url: 'http://www.cloudflare.com'
|
285
325
|
|
286
|
-
- regex: 'coccoc
|
326
|
+
- regex: 'coccoc.com'
|
287
327
|
name: 'Cốc Cốc Bot'
|
288
|
-
url: '
|
328
|
+
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
289
329
|
category: 'Search bot'
|
290
330
|
producer:
|
291
331
|
name: 'Cốc Cốc'
|
292
|
-
url: '
|
332
|
+
url: 'https://coccoc.com/'
|
293
333
|
|
294
334
|
- regex: 'collectd'
|
295
335
|
name: 'Collectd'
|
@@ -331,7 +371,6 @@
|
|
331
371
|
name: 'Datanyze'
|
332
372
|
url: 'https://www.datanyze.com'
|
333
373
|
|
334
|
-
|
335
374
|
- regex: 'Dataprovider'
|
336
375
|
name: 'Dataprovider'
|
337
376
|
category: 'Crawler'
|
@@ -356,7 +395,7 @@
|
|
356
395
|
name: 'DAZOO.FR'
|
357
396
|
url: 'http://dazoo.fr'
|
358
397
|
|
359
|
-
- regex: 'discobot
|
398
|
+
- regex: 'discobot'
|
360
399
|
name: 'Discobot'
|
361
400
|
category: 'Search bot'
|
362
401
|
url: 'http://discoveryengine.com/discobot.html'
|
@@ -419,7 +458,7 @@
|
|
419
458
|
name: 'eVenture Capital Partners II, LLC'
|
420
459
|
url: 'http://www.eventures.vc/'
|
421
460
|
|
422
|
-
- regex: 'Exabot
|
461
|
+
- regex: 'Exabot|ExaleadCloudview'
|
423
462
|
name: 'ExaBot'
|
424
463
|
category: 'Crawler'
|
425
464
|
url: 'http://www.exabot.com/go/robot'
|
@@ -443,7 +482,7 @@
|
|
443
482
|
name: 'SEOmoz, Inc.'
|
444
483
|
url: 'http://moz.com/'
|
445
484
|
|
446
|
-
- regex: 'facebookexternalhit|facebookplatform'
|
485
|
+
- regex: 'facebookexternalhit|facebookplatform|facebookexternalua|facebookcatalog'
|
447
486
|
name: 'Facebook External Hit'
|
448
487
|
category: 'Social Media Agent'
|
449
488
|
url: 'https://www.facebook.com/externalhit_uatext.php'
|
@@ -475,7 +514,7 @@
|
|
475
514
|
name: 'David Smith & Developing Perspective, LLC'
|
476
515
|
url: 'https://david-smith.org'
|
477
516
|
|
478
|
-
- regex: '
|
517
|
+
- regex: 'Feedly'
|
479
518
|
name: 'Feedly'
|
480
519
|
url: 'http://www.feedly.com'
|
481
520
|
category: 'Feed Fetcher'
|
@@ -541,6 +580,10 @@
|
|
541
580
|
name: ''
|
542
581
|
url: ''
|
543
582
|
|
583
|
+
- regex: 'gobuster'
|
584
|
+
name: 'Gobuster'
|
585
|
+
url: 'https://github.com/OJ/gobuster'
|
586
|
+
|
544
587
|
- regex: 'ichiro/mobile goo'
|
545
588
|
name: 'Goo'
|
546
589
|
category: 'Search bot'
|
@@ -549,6 +592,10 @@
|
|
549
592
|
name: 'NTT Resonant'
|
550
593
|
url: 'http://goo.ne.jp'
|
551
594
|
|
595
|
+
- regex: 'Storebot-Google'
|
596
|
+
name: 'Google StoreBot'
|
597
|
+
category: 'Crawler'
|
598
|
+
|
552
599
|
- regex: 'Google Favicon'
|
553
600
|
name: 'Google Favicon'
|
554
601
|
category: 'Crawler'
|
@@ -577,6 +624,14 @@
|
|
577
624
|
name: 'Google Inc.'
|
578
625
|
url: 'http://www.google.com'
|
579
626
|
|
627
|
+
- regex: 'Google-Cloud-Scheduler'
|
628
|
+
name: 'Google Cloud Scheduler'
|
629
|
+
category: 'Crawler'
|
630
|
+
url: 'https://cloud.google.com/scheduler'
|
631
|
+
producer:
|
632
|
+
name: 'Google Inc.'
|
633
|
+
url: 'https://www.google.com'
|
634
|
+
|
580
635
|
- regex: 'Google-Structured-Data-Testing-Tool'
|
581
636
|
name: 'Google Structured Data Testing Tool'
|
582
637
|
category: 'Validator'
|
@@ -585,6 +640,14 @@
|
|
585
640
|
name: 'Google Inc.'
|
586
641
|
url: 'http://www.google.com'
|
587
642
|
|
643
|
+
- regex: 'GoogleStackdriverMonitoring'
|
644
|
+
name: 'Google Stackdriver Monitoring'
|
645
|
+
category: 'Site Monitor'
|
646
|
+
url: 'https://cloud.google.com/monitoring'
|
647
|
+
producer:
|
648
|
+
name: 'Google Inc.'
|
649
|
+
url: 'https://www.google.com'
|
650
|
+
|
588
651
|
- regex: 'via ggpht\.com GoogleImageProxy'
|
589
652
|
name: 'Gmail Image Proxy'
|
590
653
|
category: 'Crawler'
|
@@ -592,7 +655,7 @@
|
|
592
655
|
producer:
|
593
656
|
name: 'Google Inc.'
|
594
657
|
url: 'http://www.google.com'
|
595
|
-
|
658
|
+
|
596
659
|
- regex: 'SeznamEmailProxy'
|
597
660
|
name: 'Seznam Email Proxy'
|
598
661
|
category: 'Crawler'
|
@@ -625,7 +688,7 @@
|
|
625
688
|
name: 'Visual Meta'
|
626
689
|
url: 'https://www.shopalike.cz/'
|
627
690
|
|
628
|
-
- regex: '
|
691
|
+
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Stale-Content-Probe|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
|
629
692
|
name: 'Googlebot'
|
630
693
|
category: 'Search bot'
|
631
694
|
url: 'http://www.google.com/bot.html'
|
@@ -639,7 +702,7 @@
|
|
639
702
|
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
|
640
703
|
producer:
|
641
704
|
name: 'The Internet Archive'
|
642
|
-
url: '
|
705
|
+
url: 'https://archive.org'
|
643
706
|
|
644
707
|
- regex: 'HubSpot '
|
645
708
|
name: 'HubSpot'
|
@@ -648,7 +711,6 @@
|
|
648
711
|
name: 'HubSpot Inc.'
|
649
712
|
url: 'https://www.hubspot.com'
|
650
713
|
|
651
|
-
|
652
714
|
- regex: 'HTTPMon'
|
653
715
|
name: 'HTTPMon'
|
654
716
|
category: 'Site Monitor'
|
@@ -680,7 +742,7 @@
|
|
680
742
|
|
681
743
|
- regex: 'ips-agent'
|
682
744
|
name: 'IPS Agent'
|
683
|
-
category: '
|
745
|
+
category: 'Crawler'
|
684
746
|
producer:
|
685
747
|
name: 'VeriSign, Inc'
|
686
748
|
url: 'http://www.verisign.com/'
|
@@ -693,6 +755,10 @@
|
|
693
755
|
name: ''
|
694
756
|
url: 'https://ip-guide.com'
|
695
757
|
|
758
|
+
- regex: 'k6/[0-9\.]+'
|
759
|
+
name: 'K6'
|
760
|
+
url: 'https://k6.io/'
|
761
|
+
|
696
762
|
- regex: 'kouio'
|
697
763
|
name: 'Kouio'
|
698
764
|
url: 'http://kouio.com/'
|
@@ -717,7 +783,7 @@
|
|
717
783
|
name: 'Lighthouse'
|
718
784
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
719
785
|
|
720
|
-
- regex: 'linkdexbot
|
786
|
+
- regex: 'linkdexbot|linkdex\.com'
|
721
787
|
name: 'Linkdex Bot'
|
722
788
|
category: 'Search bot'
|
723
789
|
url: 'http://www.linkdex.com/bots'
|
@@ -740,7 +806,7 @@
|
|
740
806
|
name: ''
|
741
807
|
url: ''
|
742
808
|
|
743
|
-
- regex: 'Mail\.RU
|
809
|
+
- regex: 'Mail\.RU'
|
744
810
|
name: 'Mail.Ru Bot'
|
745
811
|
category: 'Search bot'
|
746
812
|
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
|
@@ -764,7 +830,7 @@
|
|
764
830
|
name: ''
|
765
831
|
url: ''
|
766
832
|
|
767
|
-
- regex
|
833
|
+
- regex: 'masscan'
|
768
834
|
name: 'masscan'
|
769
835
|
url: 'https://github.com/robertdavidgraham/masscan'
|
770
836
|
category: 'Crawler'
|
@@ -917,12 +983,12 @@
|
|
917
983
|
category: 'Crawler'
|
918
984
|
producer:
|
919
985
|
name: 'Nuzzel'
|
920
|
-
url: https://www.nuzzel.com/
|
986
|
+
url: 'https://www.nuzzel.com/'
|
921
987
|
|
922
988
|
- regex: 'Octopus [0-9]'
|
923
989
|
name: 'Octopus'
|
924
990
|
|
925
|
-
- regex: 'omgili
|
991
|
+
- regex: 'omgili'
|
926
992
|
name: 'Omgili bot'
|
927
993
|
category: 'Search bot'
|
928
994
|
url: 'http://www.omgili.com/Crawler.html'
|
@@ -999,7 +1065,15 @@
|
|
999
1065
|
name: 'Bitlove'
|
1000
1066
|
url: 'http://bitlove.org/'
|
1001
1067
|
|
1002
|
-
- regex: '
|
1068
|
+
- regex: 'PRTG Network Monitor'
|
1069
|
+
name: 'PRTG Network Monitor'
|
1070
|
+
category: 'Network Monitor'
|
1071
|
+
url: 'https://www.paessler.com/prtg'
|
1072
|
+
producer:
|
1073
|
+
name: 'Paessler AG'
|
1074
|
+
url: 'https://www.paessler.com'
|
1075
|
+
|
1076
|
+
- regex: 'psbot'
|
1003
1077
|
name: 'Picsearch bot'
|
1004
1078
|
category: 'Search bot'
|
1005
1079
|
url: 'http://www.picsearch.com/bot.html'
|
@@ -1007,7 +1081,7 @@
|
|
1007
1081
|
name: 'Picsearch'
|
1008
1082
|
url: 'http://www.picsearch.com'
|
1009
1083
|
|
1010
|
-
- regex: 'Pingdom
|
1084
|
+
- regex: 'Pingdom(?:\.com|TMS)'
|
1011
1085
|
name: 'Pingdom Bot'
|
1012
1086
|
category: 'Site Monitor'
|
1013
1087
|
url: ''
|
@@ -1023,6 +1097,14 @@
|
|
1023
1097
|
name: 'Quora'
|
1024
1098
|
url: 'http://www.quora.com'
|
1025
1099
|
|
1100
|
+
- regex: 'Quora-Bot'
|
1101
|
+
name: 'Quora Bot'
|
1102
|
+
category: 'Crawler'
|
1103
|
+
url: ''
|
1104
|
+
producer:
|
1105
|
+
name: 'Quora'
|
1106
|
+
url: 'https://www.quora.com/'
|
1107
|
+
|
1026
1108
|
- regex: 'RamblerMail'
|
1027
1109
|
name: 'RamblerMail Image Proxy'
|
1028
1110
|
category: 'Crawler'
|
@@ -1206,7 +1288,7 @@
|
|
1206
1288
|
name: 'SISTRIX GmbH'
|
1207
1289
|
url: 'http://www.sistrix.de'
|
1208
1290
|
|
1209
|
-
- regex: 'SISTRIX Optimizer'
|
1291
|
+
- regex: 'compatible; (?:SISTRIX )?Optimizer'
|
1210
1292
|
name: 'SISTRIX Optimizer'
|
1211
1293
|
category: 'Crawler'
|
1212
1294
|
url: 'https://optimizer.sistrix.com'
|
@@ -1251,6 +1333,14 @@
|
|
1251
1333
|
name: 'Tencent Holdings'
|
1252
1334
|
url: 'http://www.soso.com'
|
1253
1335
|
|
1336
|
+
- regex: 'Sprinklr'
|
1337
|
+
name: 'Sprinklr'
|
1338
|
+
category: 'Crawler'
|
1339
|
+
url: ''
|
1340
|
+
producer:
|
1341
|
+
name: 'Sprinklr, Inc.'
|
1342
|
+
url: 'https://www.sprinklr.com/'
|
1343
|
+
|
1254
1344
|
- regex: 'sqlmap/'
|
1255
1345
|
name: 'sqlmap'
|
1256
1346
|
category: 'Security Checker'
|
@@ -1296,13 +1386,20 @@
|
|
1296
1386
|
name: 'Tailrank Inc'
|
1297
1387
|
url: 'http://spinn3r.com'
|
1298
1388
|
|
1299
|
-
- regex: '
|
1389
|
+
- regex: 'SputnikBot'
|
1300
1390
|
name: 'Sputnik Bot'
|
1301
|
-
category: ''
|
1391
|
+
category: 'Crawler'
|
1392
|
+
url: ''
|
1393
|
+
|
1394
|
+
- regex: 'SputnikFaviconBot'
|
1395
|
+
name: 'Sputnik Favicon Bot'
|
1396
|
+
category: 'Crawler'
|
1397
|
+
url: ''
|
1398
|
+
|
1399
|
+
- regex: 'SputnikImageBot'
|
1400
|
+
name: 'Sputnik Image Bot'
|
1401
|
+
category: 'Crawler'
|
1302
1402
|
url: ''
|
1303
|
-
producer:
|
1304
|
-
name: ''
|
1305
|
-
url: ''
|
1306
1403
|
|
1307
1404
|
- regex: 'SurveyBot'
|
1308
1405
|
name: 'Survey Bot'
|
@@ -1521,7 +1618,7 @@
|
|
1521
1618
|
category: 'Site Monitor'
|
1522
1619
|
url: 'https://www.webpagetest.org'
|
1523
1620
|
|
1524
|
-
- regex: 'WeSEE
|
1621
|
+
- regex: 'WeSEE'
|
1525
1622
|
name: 'WeSEE:Search'
|
1526
1623
|
category: 'Search bot'
|
1527
1624
|
url: 'http://www.wesee.com/bot'
|
@@ -1561,6 +1658,14 @@
|
|
1561
1658
|
name: 'Wotbox'
|
1562
1659
|
url: 'http://www.wotbox.com'
|
1563
1660
|
|
1661
|
+
- regex: 'XenForo'
|
1662
|
+
name: 'XenForo'
|
1663
|
+
category: 'Service Agent'
|
1664
|
+
url: 'https://xenforo.com/'
|
1665
|
+
producer:
|
1666
|
+
name: 'XenForo Ltd.'
|
1667
|
+
url: 'https://xenforo.com/'
|
1668
|
+
|
1564
1669
|
- regex: 'yacybot'
|
1565
1670
|
name: 'YaCy'
|
1566
1671
|
category: 'Search bot'
|
@@ -1585,6 +1690,14 @@
|
|
1585
1690
|
name: 'Yahoo! Inc.'
|
1586
1691
|
url: 'http://www.yahoo.com'
|
1587
1692
|
|
1693
|
+
- regex: 'YahooMailProxy'
|
1694
|
+
name: 'Yahoo! Mail Proxy'
|
1695
|
+
category: 'Service Agent'
|
1696
|
+
url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
|
1697
|
+
producer:
|
1698
|
+
name: 'Yahoo! Inc.'
|
1699
|
+
url: 'http://www.yahoo.com'
|
1700
|
+
|
1588
1701
|
- regex: 'YahooCacheSystem'
|
1589
1702
|
name: 'Yahoo! Cache System'
|
1590
1703
|
category: 'Crawler'
|
@@ -1593,7 +1706,15 @@
|
|
1593
1706
|
name: 'Yahoo! Inc.'
|
1594
1707
|
url: 'http://www.yahoo.com'
|
1595
1708
|
|
1596
|
-
- regex: '
|
1709
|
+
- regex: 'Y!J-BRW'
|
1710
|
+
name: 'Yahoo! Japan BRW'
|
1711
|
+
category: 'Crawler'
|
1712
|
+
url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
|
1713
|
+
producer:
|
1714
|
+
name: 'Yahoo! Japan Corp.'
|
1715
|
+
url: 'https://www.yahoo.co.jp/'
|
1716
|
+
|
1717
|
+
- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
|
1597
1718
|
name: 'Yandex Bot'
|
1598
1719
|
category: 'Search bot'
|
1599
1720
|
url: 'http://www.yandex.com/bots'
|
@@ -1601,7 +1722,7 @@
|
|
1601
1722
|
name: 'Yandex LLC'
|
1602
1723
|
url: 'http://company.yandex.com'
|
1603
1724
|
|
1604
|
-
- regex: 'Yeti'
|
1725
|
+
- regex: 'Yeti|NaverJapan|AdsBot-Naver'
|
1605
1726
|
name: 'Yeti/Naverbot'
|
1606
1727
|
category: 'Search bot'
|
1607
1728
|
url: 'http://help.naver.com/robots/'
|
@@ -1678,18 +1799,18 @@
|
|
1678
1799
|
- regex: 'HubPages.*crawlingpolicy'
|
1679
1800
|
name: 'HubPages'
|
1680
1801
|
category: 'Crawler'
|
1681
|
-
url: '
|
1802
|
+
url: 'https://hubpages.com/help/crawlingpolicy'
|
1682
1803
|
producer:
|
1683
|
-
name: 'HubPages'
|
1684
|
-
url: '
|
1804
|
+
name: 'HubPages, Inc.'
|
1805
|
+
url: 'https://discover.hubpages.com/'
|
1685
1806
|
|
1686
|
-
- regex: 'Pinterest
|
1807
|
+
- regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
|
1687
1808
|
name: 'Pinterest'
|
1688
|
-
url: ''
|
1809
|
+
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
|
1689
1810
|
category: 'Crawler'
|
1690
1811
|
producer:
|
1691
1812
|
name: 'Pinterest'
|
1692
|
-
url: '
|
1813
|
+
url: 'https://www.pinterest.com/'
|
1693
1814
|
|
1694
1815
|
- regex: 'Site24x7'
|
1695
1816
|
name: 'Site24x7 Website Monitoring'
|
@@ -1731,13 +1852,13 @@
|
|
1731
1852
|
name: 'Monitor.Us'
|
1732
1853
|
url: 'http://www.monitor.us'
|
1733
1854
|
|
1734
|
-
- regex: 'Catchpoint
|
1855
|
+
- regex: 'Catchpoint'
|
1735
1856
|
name: 'Catchpoint'
|
1736
1857
|
category: 'Site Monitor'
|
1737
|
-
url: ''
|
1858
|
+
url: 'https://www.catchpoint.com/'
|
1738
1859
|
producer:
|
1739
1860
|
name: 'Catchpoint Systems'
|
1740
|
-
url: '
|
1861
|
+
url: 'https://www.catchpoint.com/'
|
1741
1862
|
|
1742
1863
|
- regex: 'bitlybot'
|
1743
1864
|
name: 'BitlyBot'
|
@@ -1805,7 +1926,7 @@
|
|
1805
1926
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
1806
1927
|
name: 'RSSRadio Bot'
|
1807
1928
|
|
1808
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|
|
1929
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent|cortex)'
|
1809
1930
|
name: 'Generic Bot'
|
1810
1931
|
|
1811
1932
|
- regex: '^sentry'
|
@@ -1824,7 +1945,181 @@
|
|
1824
1945
|
name: 'The Knowledge AI'
|
1825
1946
|
category: 'Crawler'
|
1826
1947
|
|
1827
|
-
|
1948
|
+
- regex: 'Embedly'
|
1949
|
+
name: 'Embedly'
|
1950
|
+
category: 'Crawler'
|
1951
|
+
url: 'https://support.embed.ly/hc/en-us'
|
1952
|
+
producer:
|
1953
|
+
name: 'A Medium, Corp.'
|
1954
|
+
url: 'https://medium.com/'
|
1955
|
+
|
1956
|
+
- regex: 'BrandVerity'
|
1957
|
+
name: 'BrandVerity'
|
1958
|
+
category: 'Crawler'
|
1959
|
+
url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
|
1960
|
+
producer:
|
1961
|
+
name: 'BrandVerity, Inc.'
|
1962
|
+
url: 'https://www.brandverity.com/'
|
1963
|
+
|
1964
|
+
- regex: 'Kaspersky Lab CFR link resolver'
|
1965
|
+
name: 'Kaspersky'
|
1966
|
+
category: 'Security Checker'
|
1967
|
+
url: 'https://www.kaspersky.com/'
|
1968
|
+
producer:
|
1969
|
+
name: 'AO Kaspersky Lab'
|
1970
|
+
url: 'https://www.kaspersky.com/'
|
1971
|
+
|
1972
|
+
- regex: 'eZ Publish Link Validator'
|
1973
|
+
name: 'eZ Publish Link Validator'
|
1974
|
+
category: 'Crawler'
|
1975
|
+
url: 'https://ez.no/'
|
1976
|
+
producer:
|
1977
|
+
name: 'eZ Systems AS'
|
1978
|
+
url: 'https://ez.no/'
|
1979
|
+
|
1980
|
+
- regex: 'woorankreview'
|
1981
|
+
name: 'WooRank'
|
1982
|
+
category: 'Search bot'
|
1983
|
+
url: 'https://www.woorank.com/'
|
1984
|
+
producer:
|
1985
|
+
name: 'WooRank sprl'
|
1986
|
+
url: 'https://www.woorank.com/'
|
1987
|
+
|
1988
|
+
- regex: '(Match|LinkCheck) by Siteimprove.com'
|
1989
|
+
name: 'Siteimprove'
|
1990
|
+
category: 'Search bot'
|
1991
|
+
url: 'https://siteimprove.com/'
|
1992
|
+
producer:
|
1993
|
+
name: 'Siteimprove GmbH'
|
1994
|
+
url: 'https://siteimprove.com/'
|
1995
|
+
|
1996
|
+
- regex: 'CATExplorador'
|
1997
|
+
name: 'CATExplorador'
|
1998
|
+
category: 'Search bot'
|
1999
|
+
url: 'https://fundacio.cat/ca/domini/'
|
2000
|
+
producer:
|
2001
|
+
name: 'Fundació puntCAT'
|
2002
|
+
url: 'https://fundacio.cat/ca/domini/'
|
2003
|
+
|
2004
|
+
- regex: 'Buck'
|
2005
|
+
name: 'Buck'
|
2006
|
+
category: 'Search bot'
|
2007
|
+
url: 'https://hypefactors.com/'
|
2008
|
+
producer:
|
2009
|
+
name: 'Hypefactors A/S'
|
2010
|
+
url: 'https://hypefactors.com/'
|
2011
|
+
|
2012
|
+
- regex: 'tracemyfile'
|
2013
|
+
name: 'TraceMyFile'
|
2014
|
+
category: 'Search bot'
|
2015
|
+
url: 'https://www.tracemyfile.com/'
|
2016
|
+
producer:
|
2017
|
+
name: 'Idee Inc.'
|
2018
|
+
url: 'http://ideeinc.com/'
|
2019
|
+
|
2020
|
+
- regex: 'zelist.ro feed parser'
|
2021
|
+
name: 'Ze List'
|
2022
|
+
url: 'https://www.zelist.ro/'
|
2023
|
+
category: 'Feed Fetcher'
|
2024
|
+
producer:
|
2025
|
+
name: 'Treeworks SRL'
|
2026
|
+
url: 'https://www.tree.ro/'
|
2027
|
+
|
2028
|
+
- regex: 'weborama-fetcher'
|
2029
|
+
name: 'Weborama'
|
2030
|
+
category: 'Search bot'
|
2031
|
+
url: 'https://weborama.com/'
|
2032
|
+
producer:
|
2033
|
+
name: 'Weborama SA'
|
2034
|
+
url: 'https://weborama.com/'
|
2035
|
+
|
2036
|
+
- regex: 'BoardReader Favicon Fetcher'
|
2037
|
+
name: 'BoardReader'
|
2038
|
+
category: 'Search bot'
|
2039
|
+
url: 'https://boardreader.com/'
|
2040
|
+
producer:
|
2041
|
+
name: 'Effyis Inc'
|
2042
|
+
url: 'https://boardreader.com/'
|
2043
|
+
|
2044
|
+
- regex: 'IDG/IT'
|
2045
|
+
name: 'IDG/IT'
|
2046
|
+
category: 'Search bot'
|
2047
|
+
url: 'https://spaziodati.eu/'
|
2048
|
+
producer:
|
2049
|
+
name: 'SpazioDati S.r.l.'
|
2050
|
+
url: 'https://spaziodati.eu/'
|
2051
|
+
|
2052
|
+
- regex: 'Bytespider'
|
2053
|
+
name: 'Bytespider'
|
2054
|
+
category: 'Search bot'
|
2055
|
+
url: 'https://bytedance.com/'
|
2056
|
+
producer:
|
2057
|
+
name: 'ByteDance Ltd.'
|
2058
|
+
url: 'https://bytedance.com/'
|
2059
|
+
|
2060
|
+
- regex: 'WikiDo'
|
2061
|
+
name: 'WikiDo'
|
2062
|
+
category: 'Search bot'
|
2063
|
+
url: 'https://www.wikido.com/'
|
2064
|
+
producer:
|
2065
|
+
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
|
2066
|
+
url: 'https://www.wikido.com/'
|
2067
|
+
|
2068
|
+
- regex: 'AwarioSmartBot'
|
2069
|
+
name: 'Awario'
|
2070
|
+
category: 'Search bot'
|
2071
|
+
url: 'https://awario.com/bots.html'
|
2072
|
+
producer:
|
2073
|
+
name: 'Awario'
|
2074
|
+
url: 'https://awario.com/'
|
2075
|
+
|
2076
|
+
- regex: 'AwarioRssBot'
|
2077
|
+
name: 'Awario'
|
2078
|
+
category: 'Feed Fetcher'
|
2079
|
+
url: 'https://awario.com/bots.html'
|
2080
|
+
producer:
|
2081
|
+
name: 'Awario'
|
2082
|
+
url: 'https://awario.com/'
|
2083
|
+
|
2084
|
+
- regex: 'oBot'
|
2085
|
+
name: 'oBot'
|
2086
|
+
category: 'Search bot'
|
2087
|
+
url: 'https://www.xforce-security.com/crawler/'
|
2088
|
+
producer:
|
2089
|
+
name: 'IBM Germany Research & Development GmbH'
|
2090
|
+
url: 'https://exchange.xforce.ibmcloud.com/'
|
2091
|
+
|
2092
|
+
- regex: 'SMTBot'
|
2093
|
+
name: 'SMTBot'
|
2094
|
+
category: 'Search bot'
|
2095
|
+
url: 'https://www.similartech.com/smtbot'
|
2096
|
+
producer:
|
2097
|
+
name: 'SimilarTech Ltd.'
|
2098
|
+
url: 'https://www.similartech.com/'
|
2099
|
+
|
2100
|
+
- regex: 'LCC'
|
2101
|
+
name: 'LCC'
|
2102
|
+
category: 'Search bot'
|
2103
|
+
url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
|
2104
|
+
producer:
|
2105
|
+
name: 'Universität Leipzig'
|
2106
|
+
url: 'https://www.uni-leipzig.de/'
|
2107
|
+
|
2108
|
+
- regex: 'Startpagina-Linkchecker'
|
2109
|
+
name: 'Startpagina Linkchecker'
|
2110
|
+
category: 'Search bot'
|
2111
|
+
url: 'https://www.startpagina.nl/linkchecker'
|
2112
|
+
producer:
|
2113
|
+
name: 'Startpagina B.V.'
|
2114
|
+
url: 'https://www.startpagina.nl/'
|
2115
|
+
|
2116
|
+
- regex: 'GTmetrix'
|
2117
|
+
name: 'GTmetrix'
|
2118
|
+
category: 'Crawler'
|
2119
|
+
url: 'https://gtmetrix.com/'
|
2120
|
+
producer:
|
2121
|
+
name: 'Carbon60 Operating Co. Ltd.'
|
2122
|
+
url: 'https://www.carbon60.com/'
|
1828
2123
|
|
1829
2124
|
- regex: 'Nutch'
|
1830
2125
|
name: 'Nutch-based Bot'
|
@@ -1832,7 +2127,518 @@
|
|
1832
2127
|
url: 'https://nutch.apache.org'
|
1833
2128
|
producer:
|
1834
2129
|
name: 'The Apache Software Foundation'
|
1835
|
-
url: '
|
2130
|
+
url: 'https://www.apache.org/foundation/'
|
2131
|
+
|
2132
|
+
- regex: 'Seobility'
|
2133
|
+
name: 'Seobility'
|
2134
|
+
category: 'Crawler'
|
2135
|
+
url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
|
2136
|
+
|
2137
|
+
- regex: 'Vercelbot'
|
2138
|
+
name: 'Vercel Bot'
|
2139
|
+
category: 'Service bot'
|
2140
|
+
url: 'https://vercel.com'
|
2141
|
+
|
2142
|
+
- regex: 'Grammarly'
|
2143
|
+
name: 'Grammarly'
|
2144
|
+
category: 'Service bot'
|
2145
|
+
url: 'https://www.grammarly.com'
|
2146
|
+
|
2147
|
+
- regex: 'Robozilla'
|
2148
|
+
name: 'Robozilla'
|
2149
|
+
category: 'Crawler'
|
2150
|
+
|
2151
|
+
- regex: 'Domains Project'
|
2152
|
+
name: 'Domains Project'
|
2153
|
+
category: 'Crawler'
|
2154
|
+
url: 'https://domainsproject.org'
|
2155
|
+
|
2156
|
+
- regex: 'PetalBot'
|
2157
|
+
name: 'Petal Bot'
|
2158
|
+
category: 'Crawler'
|
2159
|
+
url: 'https://aspiegel.com/petalbot'
|
2160
|
+
|
2161
|
+
- regex: 'SerendeputyBot'
|
2162
|
+
name: 'Serendeputy Bot'
|
2163
|
+
category: 'Crawler'
|
2164
|
+
url: 'https://serendeputy.com/about/serendeputy-bot'
|
2165
|
+
|
2166
|
+
- regex: 'ias-va.*admantx.*service-fetcher'
|
2167
|
+
name: 'ADmantX Service Fetcher'
|
2168
|
+
category: 'Service bot'
|
2169
|
+
url: 'https://www.admantx.com/service-fetcher.html'
|
2170
|
+
|
2171
|
+
- regex: 'SemanticScholarBot'
|
2172
|
+
name: 'Semantic Scholar Bot'
|
2173
|
+
category: 'Crawler'
|
2174
|
+
url: 'https://www.semanticscholar.org/crawler'
|
1836
2175
|
|
1837
|
-
- regex: '
|
2176
|
+
- regex: 'VelenPublicWebCrawler'
|
2177
|
+
name: 'Velen Public Web Crawler'
|
2178
|
+
category: 'Crawler'
|
2179
|
+
url: 'https://hunter.io/robot'
|
2180
|
+
|
2181
|
+
- regex: 'Barkrowler'
|
2182
|
+
name: 'Barkrowler'
|
2183
|
+
category: 'Crawler'
|
2184
|
+
url: 'http://www.exensa.com/crawl'
|
2185
|
+
|
2186
|
+
- regex: 'BDCbot'
|
2187
|
+
name: 'BDCbot'
|
2188
|
+
category: 'Crawler'
|
2189
|
+
url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
|
2190
|
+
producer:
|
2191
|
+
name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
|
2192
|
+
url: 'https://bigdatacorp.com.br/'
|
2193
|
+
|
2194
|
+
- regex: 'adbeat'
|
2195
|
+
name: 'Adbeat'
|
2196
|
+
category: 'Crawler'
|
2197
|
+
url: 'https://www.adbeat.com/operation_policy'
|
2198
|
+
producer:
|
2199
|
+
name: 'PPC Labs LLC'
|
2200
|
+
url: 'https://www.adbeat.com/'
|
2201
|
+
|
2202
|
+
- regex: 'BW/(?:(\d+[\.\d]+))'
|
2203
|
+
name: 'BuiltWith'
|
2204
|
+
category: 'Crawler'
|
2205
|
+
url: 'https://builtwith.com/biup'
|
2206
|
+
producer:
|
2207
|
+
name: 'BuiltWith Pty Ltd'
|
2208
|
+
url: 'https://builtwith.com/'
|
2209
|
+
|
2210
|
+
- regex: 'https://whatis.contentkingapp.com'
|
2211
|
+
name: 'ContentKing'
|
2212
|
+
category: 'Site Monitor'
|
2213
|
+
url: 'https://whatis.contentkingapp.com/'
|
2214
|
+
producer:
|
2215
|
+
name: 'ContentKing BV'
|
2216
|
+
url: 'https://www.contentkingapp.com/'
|
2217
|
+
|
2218
|
+
- regex: 'MicroAdBot'
|
2219
|
+
name: 'MicroAdBot'
|
2220
|
+
category: 'Crawler'
|
2221
|
+
url: 'https://www.microad.co.jp/'
|
2222
|
+
producer:
|
2223
|
+
name: 'MicroAd, Inc.'
|
2224
|
+
url: 'https://www.microad.co.jp/'
|
2225
|
+
|
2226
|
+
- regex: 'PingAdmin.Ru'
|
2227
|
+
name: 'PingAdmin.Ru'
|
2228
|
+
category: 'Site Monitor'
|
2229
|
+
url: 'https://ping-admin.ru/'
|
2230
|
+
|
2231
|
+
- regex: 'notifyninja.+monitoring'
|
2232
|
+
name: 'Notify Ninja'
|
2233
|
+
category: 'Site Monitor'
|
2234
|
+
url: 'http://notifyninja.com'
|
2235
|
+
|
2236
|
+
- regex: 'WebDataStats'
|
2237
|
+
name: 'WebDataStats'
|
2238
|
+
category: 'Crawler'
|
2239
|
+
url: 'https://webdatastats.com/policy.html'
|
2240
|
+
producer:
|
2241
|
+
name: 'WebTehRazrabotka LLC'
|
2242
|
+
url: 'https://webdatastats.com/'
|
2243
|
+
|
2244
|
+
- regex: 'parse.ly scraper'
|
2245
|
+
name: 'parse.ly'
|
2246
|
+
category: 'Crawler'
|
2247
|
+
url: 'https://www.parse.ly/help/integration/crawler'
|
2248
|
+
producer:
|
2249
|
+
name: 'Parsely, Inc.'
|
2250
|
+
url: 'https://www.parse.ly/'
|
2251
|
+
|
2252
|
+
- regex: 'Nimbostratus-Bot'
|
2253
|
+
name: 'Nimbostratus Bot'
|
2254
|
+
category: 'Site Monitor'
|
2255
|
+
url: 'http://cloudsystemnetworks.com'
|
2256
|
+
|
2257
|
+
- regex: 'HeartRails_Capture/\d'
|
2258
|
+
name: 'Heart Rails Capture'
|
2259
|
+
category: 'Service Agent'
|
2260
|
+
url: 'http://capture.heartrails.com'
|
2261
|
+
|
2262
|
+
- regex: 'Project-Resonance'
|
2263
|
+
name: 'Project Resonance'
|
2264
|
+
category: 'Crawler'
|
2265
|
+
url: 'http://project-resonance.com'
|
2266
|
+
|
2267
|
+
- regex: 'DataXu/\d'
|
2268
|
+
name: 'DataXu'
|
2269
|
+
category: 'Service Agent'
|
2270
|
+
url: 'https://advertising.roku.com/dataxu'
|
2271
|
+
producer:
|
2272
|
+
name: 'Roku, Inc.'
|
2273
|
+
url: 'https://roku.com'
|
2274
|
+
|
2275
|
+
- regex: 'Cocolyzebot'
|
2276
|
+
name: 'Cocolyzebot'
|
2277
|
+
category: 'Crawler'
|
2278
|
+
url: 'https://cocolyze.com/en/cocolyzebot'
|
2279
|
+
producer:
|
2280
|
+
name: 'VSI INNOVATION SAS'
|
2281
|
+
url: 'https://vsi-innovation.com/'
|
2282
|
+
|
2283
|
+
- regex: 'veryhip'
|
2284
|
+
name: 'VeryHip'
|
2285
|
+
category: 'Crawler'
|
2286
|
+
url: 'https://veryhip.com/'
|
2287
|
+
producer:
|
2288
|
+
name: 'VeryHip'
|
2289
|
+
url: 'https://veryhip.com/'
|
2290
|
+
|
2291
|
+
- regex: 'LinkpadBot'
|
2292
|
+
name: 'LinkpadBot'
|
2293
|
+
category: 'Crawler'
|
2294
|
+
url: 'https://www.linkpad.org/'
|
2295
|
+
producer:
|
2296
|
+
name: 'Solomono LLC'
|
2297
|
+
url: 'https://www.linkpad.org/'
|
2298
|
+
|
2299
|
+
- regex: 'MuscatFerret'
|
2300
|
+
name: 'MuscatFerret'
|
2301
|
+
category: 'Crawler'
|
2302
|
+
url: 'http://www.webtop.com/'
|
2303
|
+
|
2304
|
+
- regex: 'PageThing.com'
|
2305
|
+
name: 'PageThing'
|
2306
|
+
category: 'Crawler'
|
2307
|
+
url: 'https://www.pagething.com/'
|
2308
|
+
producer:
|
2309
|
+
name: 'SPECIALNOISE LTD'
|
2310
|
+
url: 'https://www.specialnoise.com/'
|
2311
|
+
|
2312
|
+
- regex: 'ArchiveBox'
|
2313
|
+
name: 'ArchiveBox'
|
2314
|
+
url: 'https://archivebox.io/'
|
2315
|
+
category: 'Crawler'
|
2316
|
+
producer:
|
2317
|
+
name: ''
|
2318
|
+
url: ''
|
2319
|
+
|
2320
|
+
- regex: 'Choosito'
|
2321
|
+
name: 'Choosito'
|
2322
|
+
url: 'https://www.choosito.com/'
|
2323
|
+
category: 'Crawler'
|
2324
|
+
producer:
|
2325
|
+
name: 'Choosito! Inc.'
|
2326
|
+
url: 'https://www.choosito.com/'
|
2327
|
+
|
2328
|
+
- regex: 'datagnionbot'
|
2329
|
+
name: 'datagnionbot'
|
2330
|
+
url: 'https://www.datagnion.com/bot.html'
|
2331
|
+
category: 'Crawler'
|
2332
|
+
producer:
|
2333
|
+
name: 'DATAGNION GMBH'
|
2334
|
+
url: 'https://www.datagnion.com/'
|
2335
|
+
|
2336
|
+
- regex: 'WhatCMS'
|
2337
|
+
name: 'WhatCMS'
|
2338
|
+
url: 'https://whatcms.org/'
|
2339
|
+
category: 'Crawler'
|
2340
|
+
producer:
|
2341
|
+
name: 'Nineteen Ten LLC'
|
2342
|
+
url: 'https://whatcms.org/'
|
2343
|
+
|
2344
|
+
- regex: 'httpx'
|
2345
|
+
name: 'httpx'
|
2346
|
+
url: 'https://github.com/projectdiscovery/httpx'
|
2347
|
+
category: 'Crawler'
|
2348
|
+
producer:
|
2349
|
+
name: ''
|
2350
|
+
url: ''
|
2351
|
+
|
2352
|
+
- regex: 'scaninfo@expanseinc.com'
|
2353
|
+
name: 'Expanse'
|
2354
|
+
category: 'Security Checker'
|
2355
|
+
url: 'https://expanse.co/'
|
2356
|
+
producer:
|
2357
|
+
name: 'Expanse Inc.'
|
2358
|
+
url: 'https://expanse.co/'
|
2359
|
+
|
2360
|
+
- regex: 'HuaweiWebCatBot'
|
2361
|
+
name: 'HuaweiWebCatBot'
|
2362
|
+
category: 'Crawler'
|
2363
|
+
url: 'https://isecurity.huawei.com'
|
2364
|
+
producer:
|
2365
|
+
name: 'Huawei Technologies Co., Ltd.'
|
2366
|
+
url: 'https://huawei.com'
|
2367
|
+
|
2368
|
+
- regex: 'Hatena-Favicon'
|
2369
|
+
name: 'Hatena Favicon'
|
2370
|
+
category: 'Crawler'
|
2371
|
+
url: 'https://www.hatena.ne.jp/faq/'
|
2372
|
+
producer:
|
2373
|
+
name: 'Hatena Co., Ltd.'
|
2374
|
+
url: 'https://www.hatena.ne.jp'
|
2375
|
+
|
2376
|
+
- regex: 'RyowlEngine/(\d+)'
|
2377
|
+
name: 'Ryowl'
|
2378
|
+
category: 'Crawler'
|
2379
|
+
url: 'https://ryowl.org'
|
2380
|
+
|
2381
|
+
- regex: 'OdklBot/(\d+)'
|
2382
|
+
name: 'Odnoklassniki Bot'
|
2383
|
+
category: 'Crawler'
|
2384
|
+
url: 'https://odnoklassniki.ru'
|
2385
|
+
|
2386
|
+
- regex: 'Mediatoolkitbot'
|
2387
|
+
name: 'Mediatoolkit Bot'
|
2388
|
+
category: 'Crawler'
|
2389
|
+
url: 'https://mediatoolkit.com'
|
2390
|
+
|
2391
|
+
- regex: 'ZoominfoBot'
|
2392
|
+
name: 'ZoominfoBot'
|
2393
|
+
category: 'Crawler'
|
2394
|
+
url: 'https://www.zoominfo.com'
|
2395
|
+
|
2396
|
+
- regex: 'WeViKaBot/([\d+\.])'
|
2397
|
+
name: 'WeViKaBot'
|
2398
|
+
category: 'Crawler'
|
2399
|
+
url: 'http://www.wevika.de'
|
2400
|
+
|
2401
|
+
- regex: 'SEOkicks'
|
2402
|
+
name: 'SEOkicks'
|
2403
|
+
category: 'Crawler'
|
2404
|
+
url: 'https://www.seokicks.de/robot.html'
|
2405
|
+
|
2406
|
+
- regex: 'Plukkie/([\d+\.])'
|
2407
|
+
name: 'Plukkie'
|
2408
|
+
category: 'Crawler'
|
2409
|
+
url: 'http://www.botje.com/plukkie.htm'
|
2410
|
+
|
2411
|
+
- regex: 'proximic;'
|
2412
|
+
name: 'Comscore'
|
2413
|
+
category: 'Crawler'
|
2414
|
+
url: 'https://www.comscore.com/Web-Crawler'
|
2415
|
+
|
2416
|
+
- regex: 'SurdotlyBot/([\d+\.])'
|
2417
|
+
name: 'SurdotlyBot'
|
2418
|
+
category: 'Crawler'
|
2419
|
+
url: 'http://sur.ly/bot.html'
|
2420
|
+
|
2421
|
+
- regex: 'Gowikibot/([\d+\.])'
|
2422
|
+
name: 'Gowikibot'
|
2423
|
+
category: 'Crawler'
|
2424
|
+
url: 'http:/www.gowikibot.com'
|
2425
|
+
|
2426
|
+
- regex: 'SabsimBot/([\d+\.])'
|
2427
|
+
name: 'SabsimBot'
|
2428
|
+
category: 'Crawler'
|
2429
|
+
url: 'https://sabsim.com'
|
2430
|
+
|
2431
|
+
- regex: 'LumtelBot/([\d+\.])'
|
2432
|
+
name: 'LumtelBot'
|
2433
|
+
category: 'Crawler'
|
2434
|
+
url: 'https://umtel.com'
|
2435
|
+
|
2436
|
+
- regex: 'PiplBot'
|
2437
|
+
name: 'PiplBot'
|
2438
|
+
category: 'Crawler'
|
2439
|
+
url: 'http://www.pipl.com/bot'
|
2440
|
+
|
2441
|
+
- regex: 'woobot/([\d+\.])'
|
2442
|
+
name: 'WooRank'
|
2443
|
+
category: 'Crawler'
|
2444
|
+
url: 'https://www.woorank.com/bot'
|
2445
|
+
|
2446
|
+
- regex: 'Cookiebot/([\d+\.])'
|
2447
|
+
name: 'Cookiebot'
|
2448
|
+
category: 'Crawler'
|
2449
|
+
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
|
2450
|
+
producer:
|
2451
|
+
name: 'Cybot A/S'
|
2452
|
+
url: 'https://www.cybot.com/'
|
2453
|
+
|
2454
|
+
- regex: 'NetSystemsResearch'
|
2455
|
+
name: 'NetSystemsResearch'
|
2456
|
+
category: 'Security Checker'
|
2457
|
+
url: 'https://www.netsystemsresearch.com/'
|
2458
|
+
producer:
|
2459
|
+
name: 'NET SYSTEMS RESEARCH LLC'
|
2460
|
+
url: 'https://www.netsystemsresearch.com/'
|
2461
|
+
|
2462
|
+
- regex: 'CensysInspect/([\d+\.])'
|
2463
|
+
name: 'CensysInspect'
|
2464
|
+
category: 'Security Checker'
|
2465
|
+
url: 'https://about.censys.io/'
|
2466
|
+
producer:
|
2467
|
+
name: 'Censys, Inc.'
|
2468
|
+
url: 'https://censys.io/'
|
2469
|
+
|
2470
|
+
- regex: 'gdnplus.com'
|
2471
|
+
name: 'GDNP'
|
2472
|
+
category: 'Crawler'
|
2473
|
+
url: 'https://gdnplus.com/'
|
2474
|
+
producer:
|
2475
|
+
name: 'Global Digital Network Plus, LLC'
|
2476
|
+
url: 'https://gdnplus.com/'
|
2477
|
+
|
2478
|
+
- regex: 'WellKnownBot/([\d+\.])'
|
2479
|
+
name: 'WellKnownBot'
|
2480
|
+
category: 'Crawler'
|
2481
|
+
url: 'https://well-known.dev'
|
2482
|
+
|
2483
|
+
- regex: 'Adsbot/([\d+\.])'
|
2484
|
+
name: 'Adsbot'
|
2485
|
+
category: 'Crawler'
|
2486
|
+
url: 'https://seostar.co/robot/'
|
2487
|
+
|
2488
|
+
- regex: 'MTRobot/([\d+\.])'
|
2489
|
+
name: 'MTRobot'
|
2490
|
+
category: 'Crawler'
|
2491
|
+
url: 'https://metrics-tools.de/robot.html'
|
2492
|
+
producer:
|
2493
|
+
name: 'Metrics Tools'
|
2494
|
+
url: 'https://metrics-tools.de/'
|
2495
|
+
|
2496
|
+
- regex: 'serpstatbot/([\d+\.])'
|
2497
|
+
name: 'serpstatbot'
|
2498
|
+
category: 'Crawler'
|
2499
|
+
url: 'http://serpstatbot.com/'
|
2500
|
+
producer:
|
2501
|
+
name: 'Netpeak Ltd'
|
2502
|
+
url: 'https://netpeak.net/'
|
2503
|
+
|
2504
|
+
- regex: 'colly'
|
2505
|
+
name: 'colly'
|
2506
|
+
category: 'Crawler'
|
2507
|
+
url: 'https://github.com/gocolly/colly/'
|
2508
|
+
|
2509
|
+
- regex: 'l9tcpid/v([\d+\.])'
|
2510
|
+
name: 'l9tcpid'
|
2511
|
+
category: 'Security Checker'
|
2512
|
+
url: 'https://github.com/LeakIX/l9tcpid'
|
2513
|
+
|
2514
|
+
- regex: 'MegaIndex.ru/([\d+\.])'
|
2515
|
+
name: 'MegaIndex'
|
2516
|
+
category: 'Crawler'
|
2517
|
+
url: 'https://megaindex.com/crawler'
|
2518
|
+
|
2519
|
+
- regex: 'Seekport'
|
2520
|
+
name: 'Seekport'
|
2521
|
+
category: 'Crawler'
|
2522
|
+
url: 'http://www.seekport.com/'
|
2523
|
+
producer:
|
2524
|
+
name: 'SISTRIX GmbH'
|
2525
|
+
url: 'https://www.sistrix.de/'
|
2526
|
+
|
2527
|
+
- regex: 'seolyt/([\d+\.])'
|
2528
|
+
name: 'seolyt'
|
2529
|
+
category: 'Crawler'
|
2530
|
+
url: 'https://seolyt.com/'
|
2531
|
+
|
2532
|
+
- regex: 'YaK/([\d+\.])'
|
2533
|
+
name: 'YaK'
|
2534
|
+
category: 'Crawler'
|
2535
|
+
url: 'https://www.linkfluence.com/'
|
2536
|
+
producer:
|
2537
|
+
name: 'Linkfluence SAS'
|
2538
|
+
url: 'https://www.linkfluence.com/'
|
2539
|
+
|
2540
|
+
- regex: 'KomodiaBot/([\d+\.])'
|
2541
|
+
name: 'KomodiaBot'
|
2542
|
+
category: 'Crawler'
|
2543
|
+
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
|
2544
|
+
producer:
|
2545
|
+
name: 'Komodia Inc.'
|
2546
|
+
url: 'https://www.komodia.com/'
|
2547
|
+
|
2548
|
+
- regex: 'Neevabot/([\d+\.])'
|
2549
|
+
name: 'Neevabot'
|
2550
|
+
category: 'Search bot'
|
2551
|
+
url: 'https://neeva.com/neevabot'
|
2552
|
+
producer:
|
2553
|
+
name: 'Neeva Inc.'
|
2554
|
+
url: 'https://neeva.com/'
|
2555
|
+
|
2556
|
+
- regex: 'LinkPreview/([\d+\.])'
|
2557
|
+
name: 'LinkPreview'
|
2558
|
+
category: 'Service Agent'
|
2559
|
+
url: 'https://www.linkpreview.net/'
|
2560
|
+
|
2561
|
+
- regex: 'JungleKeyThumbnail/([\d+\.])'
|
2562
|
+
name: 'JungleKeyThumbnail'
|
2563
|
+
category: 'Crawler'
|
2564
|
+
url: 'https://junglekey.com/'
|
2565
|
+
|
2566
|
+
- regex: 'rocketmonitor(?: |bot/)([\d+\.])'
|
2567
|
+
name: 'RocketMonitorBot'
|
2568
|
+
category: 'Site Monitor'
|
2569
|
+
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
|
2570
|
+
producer:
|
2571
|
+
name: 'Radio Mast, Inc.'
|
2572
|
+
url: 'https://www.radiomast.io/'
|
2573
|
+
|
2574
|
+
- regex: 'SitemapParser-VIPnytt/([\d+\.])'
|
2575
|
+
name: 'SitemapParser-VIPnytt'
|
2576
|
+
category: 'Crawler'
|
2577
|
+
url: 'https://github.com/VIPnytt/SitemapParser/'
|
2578
|
+
|
2579
|
+
- regex: '^Turnitin'
|
2580
|
+
name: 'Turnitin'
|
2581
|
+
category: 'Crawler'
|
2582
|
+
url: 'https://turnitin.com/robot/crawlerinfo.html'
|
2583
|
+
|
2584
|
+
- regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
|
2585
|
+
name: 'Dotcom Monitor'
|
2586
|
+
category: 'Site Monitor'
|
2587
|
+
url: 'https://www.dotcom-monitor.com'
|
2588
|
+
|
2589
|
+
- regex: 'ThinkChaos/'
|
2590
|
+
name: 'ThinkChaos'
|
2591
|
+
category: 'Crawler'
|
2592
|
+
|
2593
|
+
- regex: 'DataForSeoBot'
|
2594
|
+
name: 'DataForSeoBot'
|
2595
|
+
category: 'Crawler'
|
2596
|
+
url: 'https://dataforseo.com/dataforseo-bot'
|
2597
|
+
|
2598
|
+
- regex: 'Discordbot/([\d+.]+)'
|
2599
|
+
name: 'Discord Bot'
|
2600
|
+
category: 'Service Agent'
|
2601
|
+
url: 'https://discordapp.com'
|
2602
|
+
|
2603
|
+
- regex: 'Linespider/([\d+.]+)'
|
2604
|
+
name: 'Linespider'
|
2605
|
+
category: 'Crawler'
|
2606
|
+
url: 'https://lin.ee/4dwXkTH'
|
2607
|
+
|
2608
|
+
- regex: 'Cincraw/([\d+.]+)'
|
2609
|
+
name: 'Cincraw'
|
2610
|
+
category: 'Crawler'
|
2611
|
+
url: 'http://cincrawdata.net/bot/'
|
2612
|
+
|
2613
|
+
- regex: 'CISPA Web Analyzer'
|
2614
|
+
name: 'CISPA Web Analyzer'
|
2615
|
+
category: 'Crawler'
|
2616
|
+
url: 'https://notify.cispa.de/'
|
2617
|
+
producer:
|
2618
|
+
name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
|
2619
|
+
url: 'https://cispa.de/en'
|
2620
|
+
|
2621
|
+
- regex: 'IonCrawl'
|
2622
|
+
name: 'IONOS Crawler'
|
2623
|
+
category: 'Crawler'
|
2624
|
+
url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
|
2625
|
+
producer:
|
2626
|
+
name: 'IONOS SE'
|
2627
|
+
url: 'https://www.ionos.de/'
|
2628
|
+
|
2629
|
+
- regex: 'Crawldad'
|
2630
|
+
name: 'Crawldad'
|
2631
|
+
category: 'Crawler'
|
2632
|
+
url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
|
2633
|
+
|
2634
|
+
- regex: 'https://securitytxt-scan.cs.hm.edu/'
|
2635
|
+
name: 'security.txt scanserver'
|
2636
|
+
category: 'Security Checker'
|
2637
|
+
url: 'https://securitytxt-scan.cs.hm.edu/'
|
2638
|
+
producer:
|
2639
|
+
name: 'Hochschule für angewandte Wissenschaften München'
|
2640
|
+
url: 'https://www.hm.edu/'
|
2641
|
+
|
2642
|
+
# Generic detections
|
2643
|
+
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
1838
2644
|
name: 'Generic Bot'
|