device_detector 1.0.2 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.rubocop.yml +49 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -6
- data/CHANGELOG.md +11 -0
- data/README.md +8 -7
- data/Rakefile +7 -4
- data/device_detector.gemspec +1 -0
- data/lib/device_detector/bot.rb +2 -2
- data/lib/device_detector/browser.rb +364 -0
- data/lib/device_detector/client.rb +11 -2
- data/lib/device_detector/device.rb +1168 -22
- data/lib/device_detector/memory_cache.rb +5 -5
- data/lib/device_detector/metadata_extractor.rb +7 -8
- data/lib/device_detector/model_extractor.rb +3 -3
- data/lib/device_detector/name_extractor.rb +2 -2
- data/lib/device_detector/os.rb +148 -116
- data/lib/device_detector/parser.rb +22 -9
- data/lib/device_detector/version.rb +1 -1
- data/lib/device_detector/version_extractor.rb +10 -2
- data/lib/device_detector.rb +73 -40
- data/regexes/bots.yml +909 -59
- data/regexes/client/browser_engine.yml +14 -2
- data/regexes/client/browsers.yml +1356 -241
- data/regexes/client/feed_readers.yml +10 -10
- data/regexes/client/libraries.yml +75 -1
- data/regexes/client/mediaplayers.yml +39 -3
- data/regexes/client/mobile_apps.yml +524 -45
- data/regexes/client/pim.yml +55 -2
- data/regexes/device/cameras.yml +6 -6
- data/regexes/device/car_browsers.yml +23 -3
- data/regexes/device/consoles.yml +9 -3
- data/regexes/device/mobiles.yml +16903 -3199
- data/regexes/device/notebooks.yml +114 -0
- data/regexes/device/portable_media_player.yml +20 -3
- data/regexes/device/shell_tv.yml +117 -0
- data/regexes/device/televisions.yml +440 -35
- data/regexes/oss.yml +603 -165
- data/regexes/vendorfragments.yml +6 -2
- data/spec/device_detector/concrete_user_agent_spec.rb +38 -51
- data/spec/device_detector/detector_fixtures_spec.rb +36 -37
- data/spec/device_detector/device_spec.rb +52 -56
- data/spec/device_detector/memory_cache_spec.rb +19 -19
- data/spec/device_detector/model_extractor_spec.rb +3 -3
- data/spec/device_detector/version_extractor_spec.rb +5 -6
- data/spec/device_detector_spec.rb +26 -26
- data/spec/fixtures/client/browser.yml +2617 -555
- data/spec/fixtures/client/feed_reader.yml +34 -52
- data/spec/fixtures/client/library.yml +129 -38
- data/spec/fixtures/client/mediaplayer.yml +59 -28
- data/spec/fixtures/client/mobile_app.yml +589 -20
- data/spec/fixtures/client/pim.yml +92 -19
- data/spec/fixtures/detector/bots.yml +1749 -458
- data/spec/fixtures/detector/camera.yml +30 -24
- data/spec/fixtures/detector/car_browser.yml +199 -3
- data/spec/fixtures/detector/console.yml +107 -45
- data/spec/fixtures/detector/desktop.yml +3216 -758
- data/spec/fixtures/detector/feature_phone.yml +935 -181
- data/spec/fixtures/detector/feed_reader.yml +181 -193
- data/spec/fixtures/detector/mediaplayer.yml +157 -60
- data/spec/fixtures/detector/mobile_apps.yml +914 -136
- data/spec/fixtures/detector/peripheral.yml +271 -0
- data/spec/fixtures/detector/phablet.yml +5593 -1533
- data/spec/fixtures/detector/portable_media_player.yml +398 -32
- data/spec/fixtures/detector/smart_display.yml +183 -9
- data/spec/fixtures/detector/smart_speaker.yml +60 -0
- data/spec/fixtures/detector/smartphone-1.yml +3930 -4239
- data/spec/fixtures/detector/smartphone-10.yml +3985 -7366
- data/spec/fixtures/detector/smartphone-11.yml +5083 -3784
- data/spec/fixtures/detector/smartphone-12.yml +8989 -0
- data/spec/fixtures/detector/smartphone-13.yml +9412 -0
- data/spec/fixtures/detector/smartphone-14.yml +9477 -0
- data/spec/fixtures/detector/smartphone-15.yml +9281 -0
- data/spec/fixtures/detector/smartphone-16.yml +9678 -0
- data/spec/fixtures/detector/smartphone-17.yml +9965 -0
- data/spec/fixtures/detector/smartphone-18.yml +9806 -0
- data/spec/fixtures/detector/smartphone-19.yml +9965 -0
- data/spec/fixtures/detector/smartphone-2.yml +6670 -4375
- data/spec/fixtures/detector/smartphone-20.yml +9710 -0
- data/spec/fixtures/detector/smartphone-21.yml +8693 -0
- data/spec/fixtures/detector/smartphone-22.yml +10178 -0
- data/spec/fixtures/detector/smartphone-23.yml +9453 -0
- data/spec/fixtures/detector/smartphone-24.yml +9843 -0
- data/spec/fixtures/detector/smartphone-25.yml +9703 -0
- data/spec/fixtures/detector/smartphone-26.yml +10007 -0
- data/spec/fixtures/detector/smartphone-27.yml +4927 -0
- data/spec/fixtures/detector/smartphone-3.yml +4656 -4711
- data/spec/fixtures/detector/smartphone-4.yml +3518 -4479
- data/spec/fixtures/detector/smartphone-5.yml +3805 -4803
- data/spec/fixtures/detector/smartphone-6.yml +3745 -4693
- data/spec/fixtures/detector/smartphone-7.yml +3721 -4645
- data/spec/fixtures/detector/smartphone-8.yml +4564 -4699
- data/spec/fixtures/detector/smartphone-9.yml +3897 -4888
- data/spec/fixtures/detector/smartphone.yml +3154 -4141
- data/spec/fixtures/detector/tablet-1.yml +4742 -4576
- data/spec/fixtures/detector/tablet-2.yml +3803 -4731
- data/spec/fixtures/detector/tablet-3.yml +6210 -2309
- data/spec/fixtures/detector/tablet-4.yml +9062 -0
- data/spec/fixtures/detector/tablet-5.yml +9273 -0
- data/spec/fixtures/detector/tablet-6.yml +4588 -0
- data/spec/fixtures/detector/tablet.yml +3629 -4613
- data/spec/fixtures/detector/tv-1.yml +2501 -0
- data/spec/fixtures/detector/tv.yml +8856 -2064
- data/spec/fixtures/detector/unknown.yml +412 -587
- data/spec/fixtures/detector/wearable.yml +915 -0
- data/spec/fixtures/device/camera.yml +4 -3
- data/spec/fixtures/device/car_browser.yml +9 -2
- data/spec/fixtures/device/console.yml +15 -14
- data/spec/fixtures/device/notebook.yml +7 -0
- data/spec/fixtures/parser/oss.yml +1392 -21
- data/spec/fixtures/parser/vendorfragments.yml +57 -51
- metadata +70 -4
data/regexes/bots.yml
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
###############
|
|
2
2
|
# Device Detector - The Universal Device Detection library for parsing User Agents
|
|
3
3
|
#
|
|
4
|
-
# @link
|
|
4
|
+
# @link https://matomo.org
|
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
|
6
6
|
###############
|
|
7
7
|
|
|
8
|
-
- regex: '360Spider
|
|
8
|
+
- regex: '360Spider'
|
|
9
9
|
name: '360Spider'
|
|
10
10
|
category: 'Search bot'
|
|
11
|
-
url: '
|
|
11
|
+
url: 'https://www.so.com/help/help_3_2.html'
|
|
12
12
|
producer:
|
|
13
13
|
name: 'Online Media Group, Inc.'
|
|
14
14
|
url: ''
|
|
@@ -40,26 +40,41 @@
|
|
|
40
40
|
- regex: 'AhrefsBot'
|
|
41
41
|
name: 'aHrefs Bot'
|
|
42
42
|
category: 'Crawler'
|
|
43
|
-
url: '
|
|
43
|
+
url: 'https://ahrefs.com/robot'
|
|
44
44
|
producer:
|
|
45
45
|
name: 'Ahrefs Pte Ltd'
|
|
46
|
-
url: '
|
|
46
|
+
url: 'https://ahrefs.com/robot'
|
|
47
47
|
|
|
48
48
|
- regex: 'ia_archiver|alexabot|verifybot'
|
|
49
49
|
name: 'Alexa Crawler'
|
|
50
50
|
category: 'Search bot'
|
|
51
|
-
url: 'https://alexa.
|
|
51
|
+
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
|
|
52
52
|
producer:
|
|
53
53
|
name: 'Alexa Internet'
|
|
54
|
-
url: '
|
|
54
|
+
url: 'https://www.alexa.com'
|
|
55
55
|
|
|
56
56
|
- regex: 'alexa site audit'
|
|
57
57
|
name: 'Alexa Site Audit'
|
|
58
58
|
category: 'Site Monitor'
|
|
59
|
-
url: '
|
|
59
|
+
url: 'https://support.alexa.com/hc/en-us/articles/200450194'
|
|
60
60
|
producer:
|
|
61
61
|
name: 'Alexa Internet'
|
|
62
|
-
url: '
|
|
62
|
+
url: 'https://www.alexa.com'
|
|
63
|
+
|
|
64
|
+
- regex: 'Amazonbot'
|
|
65
|
+
name: 'Amazon Bot'
|
|
66
|
+
category: 'Crawler'
|
|
67
|
+
url: 'https://developer.amazon.com/support/amazonbot'
|
|
68
|
+
producer:
|
|
69
|
+
name: 'Amazon.com, Inc.'
|
|
70
|
+
url: 'https://www.amazon.com/'
|
|
71
|
+
|
|
72
|
+
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
|
73
|
+
name: 'Amazon Route53 Health Check'
|
|
74
|
+
category: 'Service Agent'
|
|
75
|
+
producer:
|
|
76
|
+
name: 'Amazon Web Services'
|
|
77
|
+
url: 'https://aws.amazon.com/'
|
|
63
78
|
|
|
64
79
|
- regex: 'AmorankSpider'
|
|
65
80
|
name: 'Amorank Spider'
|
|
@@ -75,29 +90,45 @@
|
|
|
75
90
|
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
|
|
76
91
|
producer:
|
|
77
92
|
name: 'The Apache Software Foundation'
|
|
78
|
-
url: '
|
|
93
|
+
url: 'https://www.apache.org/foundation/'
|
|
79
94
|
|
|
80
95
|
- regex: 'Applebot'
|
|
81
96
|
name: 'Applebot'
|
|
82
97
|
category: 'Crawler'
|
|
83
|
-
url: '
|
|
98
|
+
url: 'https://support.apple.com/en-us/HT204683'
|
|
84
99
|
producer:
|
|
85
100
|
name: 'Apple Inc'
|
|
86
|
-
url: '
|
|
101
|
+
url: 'https://www.apple.com'
|
|
102
|
+
|
|
103
|
+
- regex: "AppSignalBot"
|
|
104
|
+
name: "AppSignalBot"
|
|
105
|
+
category: "Site Monitor"
|
|
106
|
+
url: "https://docs.appsignal.com/uptime-monitoring/"
|
|
107
|
+
producer:
|
|
108
|
+
name: "AppSignal"
|
|
109
|
+
url: "https://appsignal.com/"
|
|
87
110
|
|
|
88
111
|
- regex: 'Arachni'
|
|
89
112
|
name: 'Arachni'
|
|
90
113
|
category: 'Security Checker'
|
|
91
|
-
url: '
|
|
114
|
+
url: 'https://www.arachni-scanner.com/'
|
|
92
115
|
producer:
|
|
93
116
|
name: 'Sarosys LLC'
|
|
94
|
-
url: '
|
|
117
|
+
url: 'https://www.sarosys.com/'
|
|
118
|
+
|
|
119
|
+
- regex: 'AspiegelBot'
|
|
120
|
+
name: 'AspiegelBot'
|
|
121
|
+
category: 'Crawler'
|
|
122
|
+
url: 'https://aspiegel.com/'
|
|
123
|
+
producer:
|
|
124
|
+
name: 'Huawei'
|
|
125
|
+
url: 'https://www.huawei.com/'
|
|
95
126
|
|
|
96
127
|
- regex: 'Castro 2, Episode Duration Lookup'
|
|
97
128
|
name: 'Castro 2'
|
|
98
129
|
category: 'Service Agent'
|
|
99
130
|
url: 'http://supertop.co/castro/'
|
|
100
|
-
producer:
|
|
131
|
+
producer:
|
|
101
132
|
name: 'Supertop'
|
|
102
133
|
url: 'http://supertop.co'
|
|
103
134
|
|
|
@@ -112,10 +143,10 @@
|
|
|
112
143
|
- regex: 'archive\.org_bot|special_archiver'
|
|
113
144
|
name: 'archive.org bot'
|
|
114
145
|
category: 'Crawler'
|
|
115
|
-
url: '
|
|
146
|
+
url: 'https://archive.org/details/archive.org_bot'
|
|
116
147
|
producer:
|
|
117
148
|
name: 'The Internet Archive'
|
|
118
|
-
url: '
|
|
149
|
+
url: 'https://archive.org'
|
|
119
150
|
|
|
120
151
|
- regex: 'Ask Jeeves/Teoma'
|
|
121
152
|
name: 'Ask Jeeves'
|
|
@@ -141,7 +172,7 @@
|
|
|
141
172
|
name: '2.0Promotion GbR'
|
|
142
173
|
url: 'http://www.backlinktest.com'
|
|
143
174
|
|
|
144
|
-
- regex: '
|
|
175
|
+
- regex: 'Baidu.*spider|baidu Transcoder'
|
|
145
176
|
name: 'Baidu Spider'
|
|
146
177
|
category: 'Search bot'
|
|
147
178
|
url: 'http://www.baidu.com/search/spider.htm'
|
|
@@ -157,6 +188,14 @@
|
|
|
157
188
|
name: ''
|
|
158
189
|
url: ''
|
|
159
190
|
|
|
191
|
+
- regex: 'Better Uptime Bot'
|
|
192
|
+
name: 'Better Uptime Bot'
|
|
193
|
+
category: 'Site Monitor'
|
|
194
|
+
url: 'https://betteruptime.com/faq'
|
|
195
|
+
producer:
|
|
196
|
+
name: 'Better Uptime'
|
|
197
|
+
url: 'https://betteruptime.com/'
|
|
198
|
+
|
|
160
199
|
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
|
161
200
|
name: 'BingBot'
|
|
162
201
|
category: 'Search bot'
|
|
@@ -173,7 +212,7 @@
|
|
|
173
212
|
name: 'Blekko'
|
|
174
213
|
url: 'http://blekko.com'
|
|
175
214
|
|
|
176
|
-
- regex: 'BLEXBot
|
|
215
|
+
- regex: 'BLEXBot'
|
|
177
216
|
name: 'BLEXBot Crawler'
|
|
178
217
|
category: 'Crawler'
|
|
179
218
|
url: 'http://webmeup-crawler.com'
|
|
@@ -197,6 +236,13 @@
|
|
|
197
236
|
name: 'Blogtrottr Ltd'
|
|
198
237
|
url: 'https://blogtrottr.com/'
|
|
199
238
|
|
|
239
|
+
- regex: 'BoardReader Blog Indexer'
|
|
240
|
+
name: 'BoardReader Blog Indexer'
|
|
241
|
+
category: 'Crawler'
|
|
242
|
+
producer:
|
|
243
|
+
name: 'BoardReader'
|
|
244
|
+
url: 'https://boardreader.com/'
|
|
245
|
+
|
|
200
246
|
- regex: 'BountiiBot'
|
|
201
247
|
name: 'Bountii Bot'
|
|
202
248
|
category: 'Search bot'
|
|
@@ -261,6 +307,14 @@
|
|
|
261
307
|
name: 'CloudFlare'
|
|
262
308
|
url: 'http://www.cloudflare.com'
|
|
263
309
|
|
|
310
|
+
- regex: 'CloudflareDiagnostics'
|
|
311
|
+
name: 'Cloudflare Diagnostics'
|
|
312
|
+
category: 'Site Monitor'
|
|
313
|
+
url: 'https://www.cloudflare.com/'
|
|
314
|
+
producer:
|
|
315
|
+
name: 'Cloudflare'
|
|
316
|
+
url: 'https://www.cloudflare.com'
|
|
317
|
+
|
|
264
318
|
- regex: 'CloudFlare-AlwaysOnline'
|
|
265
319
|
name: 'CloudFlare Always Online'
|
|
266
320
|
category: 'Site Monitor'
|
|
@@ -269,13 +323,13 @@
|
|
|
269
323
|
name: 'CloudFlare'
|
|
270
324
|
url: 'http://www.cloudflare.com'
|
|
271
325
|
|
|
272
|
-
- regex: 'coccoc
|
|
326
|
+
- regex: 'coccoc.com'
|
|
273
327
|
name: 'Cốc Cốc Bot'
|
|
274
|
-
url: '
|
|
328
|
+
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
|
275
329
|
category: 'Search bot'
|
|
276
330
|
producer:
|
|
277
331
|
name: 'Cốc Cốc'
|
|
278
|
-
url: '
|
|
332
|
+
url: 'https://coccoc.com/'
|
|
279
333
|
|
|
280
334
|
- regex: 'collectd'
|
|
281
335
|
name: 'Collectd'
|
|
@@ -309,6 +363,14 @@
|
|
|
309
363
|
name: 'Datadog'
|
|
310
364
|
url: 'https://www.datadoghq.com/'
|
|
311
365
|
|
|
366
|
+
- regex: 'Datanyze'
|
|
367
|
+
name: 'Datanyze'
|
|
368
|
+
url: ''
|
|
369
|
+
category: 'Crawler'
|
|
370
|
+
producer:
|
|
371
|
+
name: 'Datanyze'
|
|
372
|
+
url: 'https://www.datanyze.com'
|
|
373
|
+
|
|
312
374
|
- regex: 'Dataprovider'
|
|
313
375
|
name: 'Dataprovider'
|
|
314
376
|
category: 'Crawler'
|
|
@@ -333,7 +395,7 @@
|
|
|
333
395
|
name: 'DAZOO.FR'
|
|
334
396
|
url: 'http://dazoo.fr'
|
|
335
397
|
|
|
336
|
-
- regex: 'discobot
|
|
398
|
+
- regex: 'discobot'
|
|
337
399
|
name: 'Discobot'
|
|
338
400
|
category: 'Search bot'
|
|
339
401
|
url: 'http://discoveryengine.com/discobot.html'
|
|
@@ -357,7 +419,7 @@
|
|
|
357
419
|
name: 'SEOmoz, Inc.'
|
|
358
420
|
url: 'http://moz.com/'
|
|
359
421
|
|
|
360
|
-
- regex: 'DuckDuck'
|
|
422
|
+
- regex: 'DuckDuck(?:Go-Favicons-)?Bot'
|
|
361
423
|
name: 'DuckDuckGo Bot'
|
|
362
424
|
category: 'Search bot'
|
|
363
425
|
url: 'https://duckduckgo.com/duckduckbot'
|
|
@@ -373,6 +435,13 @@
|
|
|
373
435
|
name: 'easou ICP'
|
|
374
436
|
url: 'http://www.easou.com'
|
|
375
437
|
|
|
438
|
+
- regex: 'eCairn-Grabber'
|
|
439
|
+
name: 'eCairn-Grabber'
|
|
440
|
+
category: 'Crawler'
|
|
441
|
+
producer:
|
|
442
|
+
name: 'eCairn'
|
|
443
|
+
url: 'https://ecairn.com'
|
|
444
|
+
|
|
376
445
|
- regex: 'EMail Exractor'
|
|
377
446
|
name: 'EMail Exractor'
|
|
378
447
|
category: 'Crawler'
|
|
@@ -389,7 +458,7 @@
|
|
|
389
458
|
name: 'eVenture Capital Partners II, LLC'
|
|
390
459
|
url: 'http://www.eventures.vc/'
|
|
391
460
|
|
|
392
|
-
- regex: 'Exabot
|
|
461
|
+
- regex: 'Exabot|ExaleadCloudview'
|
|
393
462
|
name: 'ExaBot'
|
|
394
463
|
category: 'Crawler'
|
|
395
464
|
url: 'http://www.exabot.com/go/robot'
|
|
@@ -413,7 +482,7 @@
|
|
|
413
482
|
name: 'SEOmoz, Inc.'
|
|
414
483
|
url: 'http://moz.com/'
|
|
415
484
|
|
|
416
|
-
- regex: 'facebookexternalhit|facebookplatform'
|
|
485
|
+
- regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
|
|
417
486
|
name: 'Facebook External Hit'
|
|
418
487
|
category: 'Social Media Agent'
|
|
419
488
|
url: 'https://www.facebook.com/externalhit_uatext.php'
|
|
@@ -445,7 +514,7 @@
|
|
|
445
514
|
name: 'David Smith & Developing Perspective, LLC'
|
|
446
515
|
url: 'https://david-smith.org'
|
|
447
516
|
|
|
448
|
-
- regex: '
|
|
517
|
+
- regex: 'Feedly'
|
|
449
518
|
name: 'Feedly'
|
|
450
519
|
url: 'http://www.feedly.com'
|
|
451
520
|
category: 'Feed Fetcher'
|
|
@@ -482,6 +551,11 @@
|
|
|
482
551
|
category: 'Crawler'
|
|
483
552
|
url: 'http://www.findxbot.com'
|
|
484
553
|
|
|
554
|
+
- regex: 'FreshRSS'
|
|
555
|
+
name: 'FreshRSS'
|
|
556
|
+
category: 'Feed Fetcher'
|
|
557
|
+
url: 'https://freshrss.org/'
|
|
558
|
+
|
|
485
559
|
- regex: 'Genieo'
|
|
486
560
|
name: 'Genieo Web filter'
|
|
487
561
|
category: ''
|
|
@@ -506,6 +580,10 @@
|
|
|
506
580
|
name: ''
|
|
507
581
|
url: ''
|
|
508
582
|
|
|
583
|
+
- regex: 'gobuster'
|
|
584
|
+
name: 'Gobuster'
|
|
585
|
+
url: 'https://github.com/OJ/gobuster'
|
|
586
|
+
|
|
509
587
|
- regex: 'ichiro/mobile goo'
|
|
510
588
|
name: 'Goo'
|
|
511
589
|
category: 'Search bot'
|
|
@@ -514,6 +592,14 @@
|
|
|
514
592
|
name: 'NTT Resonant'
|
|
515
593
|
url: 'http://goo.ne.jp'
|
|
516
594
|
|
|
595
|
+
- regex: 'Storebot-Google'
|
|
596
|
+
name: 'Google StoreBot'
|
|
597
|
+
category: 'Crawler'
|
|
598
|
+
|
|
599
|
+
- regex: 'Google Favicon'
|
|
600
|
+
name: 'Google Favicon'
|
|
601
|
+
category: 'Crawler'
|
|
602
|
+
|
|
517
603
|
- regex: 'Google Search Console'
|
|
518
604
|
name: 'Google Search Console'
|
|
519
605
|
category: 'Crawler'
|
|
@@ -538,6 +624,14 @@
|
|
|
538
624
|
name: 'Google Inc.'
|
|
539
625
|
url: 'http://www.google.com'
|
|
540
626
|
|
|
627
|
+
- regex: 'Google-Cloud-Scheduler'
|
|
628
|
+
name: 'Google Cloud Scheduler'
|
|
629
|
+
category: 'Crawler'
|
|
630
|
+
url: 'https://cloud.google.com/scheduler'
|
|
631
|
+
producer:
|
|
632
|
+
name: 'Google Inc.'
|
|
633
|
+
url: 'https://www.google.com'
|
|
634
|
+
|
|
541
635
|
- regex: 'Google-Structured-Data-Testing-Tool'
|
|
542
636
|
name: 'Google Structured Data Testing Tool'
|
|
543
637
|
category: 'Validator'
|
|
@@ -546,6 +640,14 @@
|
|
|
546
640
|
name: 'Google Inc.'
|
|
547
641
|
url: 'http://www.google.com'
|
|
548
642
|
|
|
643
|
+
- regex: 'GoogleStackdriverMonitoring'
|
|
644
|
+
name: 'Google Stackdriver Monitoring'
|
|
645
|
+
category: 'Site Monitor'
|
|
646
|
+
url: 'https://cloud.google.com/monitoring'
|
|
647
|
+
producer:
|
|
648
|
+
name: 'Google Inc.'
|
|
649
|
+
url: 'https://www.google.com'
|
|
650
|
+
|
|
549
651
|
- regex: 'via ggpht\.com GoogleImageProxy'
|
|
550
652
|
name: 'Gmail Image Proxy'
|
|
551
653
|
category: 'Crawler'
|
|
@@ -553,7 +655,7 @@
|
|
|
553
655
|
producer:
|
|
554
656
|
name: 'Google Inc.'
|
|
555
657
|
url: 'http://www.google.com'
|
|
556
|
-
|
|
658
|
+
|
|
557
659
|
- regex: 'SeznamEmailProxy'
|
|
558
660
|
name: 'Seznam Email Proxy'
|
|
559
661
|
category: 'Crawler'
|
|
@@ -586,7 +688,7 @@
|
|
|
586
688
|
name: 'Visual Meta'
|
|
587
689
|
url: 'https://www.shopalike.cz/'
|
|
588
690
|
|
|
589
|
-
- regex: '
|
|
691
|
+
- regex: 'AdsBot-Google|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Ads-Qualify|Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot|GoogleProducer|Google.*/\+/web/snippet'
|
|
590
692
|
name: 'Googlebot'
|
|
591
693
|
category: 'Search bot'
|
|
592
694
|
url: 'http://www.google.com/bot.html'
|
|
@@ -600,7 +702,7 @@
|
|
|
600
702
|
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
|
|
601
703
|
producer:
|
|
602
704
|
name: 'The Internet Archive'
|
|
603
|
-
url: '
|
|
705
|
+
url: 'https://archive.org'
|
|
604
706
|
|
|
605
707
|
- regex: 'HubSpot '
|
|
606
708
|
name: 'HubSpot'
|
|
@@ -609,7 +711,6 @@
|
|
|
609
711
|
name: 'HubSpot Inc.'
|
|
610
712
|
url: 'https://www.hubspot.com'
|
|
611
713
|
|
|
612
|
-
|
|
613
714
|
- regex: 'HTTPMon'
|
|
614
715
|
name: 'HTTPMon'
|
|
615
716
|
category: 'Site Monitor'
|
|
@@ -626,6 +727,11 @@
|
|
|
626
727
|
name: ''
|
|
627
728
|
url: ''
|
|
628
729
|
|
|
730
|
+
- regex: 'inoreader.com'
|
|
731
|
+
name: 'inoreader'
|
|
732
|
+
category: 'Feed Reader'
|
|
733
|
+
url: 'https://www.inoreader.com'
|
|
734
|
+
|
|
629
735
|
- regex: 'iisbot'
|
|
630
736
|
name: 'IIS Site Analysis'
|
|
631
737
|
category: 'Crawler'
|
|
@@ -636,7 +742,7 @@
|
|
|
636
742
|
|
|
637
743
|
- regex: 'ips-agent'
|
|
638
744
|
name: 'IPS Agent'
|
|
639
|
-
category: '
|
|
745
|
+
category: 'Crawler'
|
|
640
746
|
producer:
|
|
641
747
|
name: 'VeriSign, Inc'
|
|
642
748
|
url: 'http://www.verisign.com/'
|
|
@@ -649,6 +755,10 @@
|
|
|
649
755
|
name: ''
|
|
650
756
|
url: 'https://ip-guide.com'
|
|
651
757
|
|
|
758
|
+
- regex: 'k6/[0-9\.]+'
|
|
759
|
+
name: 'K6'
|
|
760
|
+
url: 'https://k6.io/'
|
|
761
|
+
|
|
652
762
|
- regex: 'kouio'
|
|
653
763
|
name: 'Kouio'
|
|
654
764
|
url: 'http://kouio.com/'
|
|
@@ -673,7 +783,7 @@
|
|
|
673
783
|
name: 'Lighthouse'
|
|
674
784
|
url: 'https://developers.google.com/web/tools/lighthouse'
|
|
675
785
|
|
|
676
|
-
- regex: 'linkdexbot
|
|
786
|
+
- regex: 'linkdexbot|linkdex\.com'
|
|
677
787
|
name: 'Linkdex Bot'
|
|
678
788
|
category: 'Search bot'
|
|
679
789
|
url: 'http://www.linkdex.com/bots'
|
|
@@ -696,7 +806,7 @@
|
|
|
696
806
|
name: ''
|
|
697
807
|
url: ''
|
|
698
808
|
|
|
699
|
-
- regex: 'Mail\.RU
|
|
809
|
+
- regex: 'Mail\.RU'
|
|
700
810
|
name: 'Mail.Ru Bot'
|
|
701
811
|
category: 'Search bot'
|
|
702
812
|
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
|
|
@@ -720,7 +830,7 @@
|
|
|
720
830
|
name: ''
|
|
721
831
|
url: ''
|
|
722
832
|
|
|
723
|
-
- regex
|
|
833
|
+
- regex: 'masscan'
|
|
724
834
|
name: 'masscan'
|
|
725
835
|
url: 'https://github.com/robertdavidgraham/masscan'
|
|
726
836
|
category: 'Crawler'
|
|
@@ -808,6 +918,10 @@
|
|
|
808
918
|
name: 'Nagios Plugins Development Team'
|
|
809
919
|
url: 'https://nagios.org'
|
|
810
920
|
|
|
921
|
+
- regex: 'nbertaupete95\(at\)gmail.com'
|
|
922
|
+
name: 'nbertaupete95'
|
|
923
|
+
category: 'Crawler'
|
|
924
|
+
|
|
811
925
|
- regex: 'Netcraft( Web Server Survey| SSL Server Survey|SurveyAgent)'
|
|
812
926
|
name: 'Netcraft Survey Bot'
|
|
813
927
|
category: 'Search bot'
|
|
@@ -818,7 +932,7 @@
|
|
|
818
932
|
|
|
819
933
|
- regex: 'netEstate NE Crawler'
|
|
820
934
|
name: 'netEstate'
|
|
821
|
-
category: '
|
|
935
|
+
category: 'Crawler'
|
|
822
936
|
url: 'http://www.website-datenbank.de/Impressum'
|
|
823
937
|
producer:
|
|
824
938
|
name: 'netEstate GmbH'
|
|
@@ -864,10 +978,17 @@
|
|
|
864
978
|
name: 'Nmap'
|
|
865
979
|
url: 'https://nmap.org/'
|
|
866
980
|
|
|
981
|
+
- regex: 'Nuzzel'
|
|
982
|
+
name: 'Nuzzel'
|
|
983
|
+
category: 'Crawler'
|
|
984
|
+
producer:
|
|
985
|
+
name: 'Nuzzel'
|
|
986
|
+
url: 'https://www.nuzzel.com/'
|
|
987
|
+
|
|
867
988
|
- regex: 'Octopus [0-9]'
|
|
868
989
|
name: 'Octopus'
|
|
869
990
|
|
|
870
|
-
- regex: 'omgili
|
|
991
|
+
- regex: 'omgili'
|
|
871
992
|
name: 'Omgili bot'
|
|
872
993
|
category: 'Search bot'
|
|
873
994
|
url: 'http://www.omgili.com/Crawler.html'
|
|
@@ -944,7 +1065,15 @@
|
|
|
944
1065
|
name: 'Bitlove'
|
|
945
1066
|
url: 'http://bitlove.org/'
|
|
946
1067
|
|
|
947
|
-
- regex: '
|
|
1068
|
+
- regex: 'PRTG Network Monitor'
|
|
1069
|
+
name: 'PRTG Network Monitor'
|
|
1070
|
+
category: 'Network Monitor'
|
|
1071
|
+
url: 'https://www.paessler.com/prtg'
|
|
1072
|
+
producer:
|
|
1073
|
+
name: 'Paessler AG'
|
|
1074
|
+
url: 'https://www.paessler.com'
|
|
1075
|
+
|
|
1076
|
+
- regex: 'psbot'
|
|
948
1077
|
name: 'Picsearch bot'
|
|
949
1078
|
category: 'Search bot'
|
|
950
1079
|
url: 'http://www.picsearch.com/bot.html'
|
|
@@ -952,7 +1081,7 @@
|
|
|
952
1081
|
name: 'Picsearch'
|
|
953
1082
|
url: 'http://www.picsearch.com'
|
|
954
1083
|
|
|
955
|
-
- regex: 'Pingdom
|
|
1084
|
+
- regex: 'Pingdom(?:\.com|TMS)'
|
|
956
1085
|
name: 'Pingdom Bot'
|
|
957
1086
|
category: 'Site Monitor'
|
|
958
1087
|
url: ''
|
|
@@ -968,6 +1097,14 @@
|
|
|
968
1097
|
name: 'Quora'
|
|
969
1098
|
url: 'http://www.quora.com'
|
|
970
1099
|
|
|
1100
|
+
- regex: 'Quora-Bot'
|
|
1101
|
+
name: 'Quora Bot'
|
|
1102
|
+
category: 'Crawler'
|
|
1103
|
+
url: ''
|
|
1104
|
+
producer:
|
|
1105
|
+
name: 'Quora'
|
|
1106
|
+
url: 'https://www.quora.com/'
|
|
1107
|
+
|
|
971
1108
|
- regex: 'RamblerMail'
|
|
972
1109
|
name: 'RamblerMail Image Proxy'
|
|
973
1110
|
category: 'Crawler'
|
|
@@ -1111,6 +1248,14 @@
|
|
|
1111
1248
|
name: 'Seznam.cz, a.s.'
|
|
1112
1249
|
url: 'http://www.seznam.cz/'
|
|
1113
1250
|
|
|
1251
|
+
- regex: 'shopify-partner-homepage-scraper'
|
|
1252
|
+
name: 'Shopify Partner'
|
|
1253
|
+
category: 'Crawler'
|
|
1254
|
+
url: 'https://www.shopify.com/partners'
|
|
1255
|
+
producer:
|
|
1256
|
+
name: 'Shopify'
|
|
1257
|
+
url: 'https://www.shopify.com/'
|
|
1258
|
+
|
|
1114
1259
|
- regex: 'ShopWiki'
|
|
1115
1260
|
name: 'ShopWiki'
|
|
1116
1261
|
category: 'Search tools'
|
|
@@ -1143,6 +1288,14 @@
|
|
|
1143
1288
|
name: 'SISTRIX GmbH'
|
|
1144
1289
|
url: 'http://www.sistrix.de'
|
|
1145
1290
|
|
|
1291
|
+
- regex: 'compatible; (?:SISTRIX )?Optimizer'
|
|
1292
|
+
name: 'SISTRIX Optimizer'
|
|
1293
|
+
category: 'Crawler'
|
|
1294
|
+
url: 'https://optimizer.sistrix.com'
|
|
1295
|
+
producer:
|
|
1296
|
+
name: 'SISTRIX GmbH'
|
|
1297
|
+
url: 'http://www.sistrix.de'
|
|
1298
|
+
|
|
1146
1299
|
- regex: 'SiteSucker'
|
|
1147
1300
|
name: 'SiteSucker'
|
|
1148
1301
|
category: 'Crawler'
|
|
@@ -1180,6 +1333,14 @@
|
|
|
1180
1333
|
name: 'Tencent Holdings'
|
|
1181
1334
|
url: 'http://www.soso.com'
|
|
1182
1335
|
|
|
1336
|
+
- regex: 'Sprinklr'
|
|
1337
|
+
name: 'Sprinklr'
|
|
1338
|
+
category: 'Crawler'
|
|
1339
|
+
url: ''
|
|
1340
|
+
producer:
|
|
1341
|
+
name: 'Sprinklr, Inc.'
|
|
1342
|
+
url: 'https://www.sprinklr.com/'
|
|
1343
|
+
|
|
1183
1344
|
- regex: 'sqlmap/'
|
|
1184
1345
|
name: 'sqlmap'
|
|
1185
1346
|
category: 'Security Checker'
|
|
@@ -1225,13 +1386,20 @@
|
|
|
1225
1386
|
name: 'Tailrank Inc'
|
|
1226
1387
|
url: 'http://spinn3r.com'
|
|
1227
1388
|
|
|
1228
|
-
- regex: '
|
|
1389
|
+
- regex: 'SputnikBot'
|
|
1229
1390
|
name: 'Sputnik Bot'
|
|
1230
|
-
category: ''
|
|
1391
|
+
category: 'Crawler'
|
|
1392
|
+
url: ''
|
|
1393
|
+
|
|
1394
|
+
- regex: 'SputnikFaviconBot'
|
|
1395
|
+
name: 'Sputnik Favicon Bot'
|
|
1396
|
+
category: 'Crawler'
|
|
1397
|
+
url: ''
|
|
1398
|
+
|
|
1399
|
+
- regex: 'SputnikImageBot'
|
|
1400
|
+
name: 'Sputnik Image Bot'
|
|
1401
|
+
category: 'Crawler'
|
|
1231
1402
|
url: ''
|
|
1232
|
-
producer:
|
|
1233
|
-
name: ''
|
|
1234
|
-
url: ''
|
|
1235
1403
|
|
|
1236
1404
|
- regex: 'SurveyBot'
|
|
1237
1405
|
name: 'Survey Bot'
|
|
@@ -1274,6 +1442,11 @@
|
|
|
1274
1442
|
name: ''
|
|
1275
1443
|
url: ''
|
|
1276
1444
|
|
|
1445
|
+
- regex: 'theoldreader.com'
|
|
1446
|
+
name: 'theoldreader'
|
|
1447
|
+
category: 'Feed Reader'
|
|
1448
|
+
url: 'https://theoldreader.com'
|
|
1449
|
+
|
|
1277
1450
|
- regex: 'trendictionbot'
|
|
1278
1451
|
name: 'Trendiction Bot'
|
|
1279
1452
|
category: 'Crawler'
|
|
@@ -1306,6 +1479,13 @@
|
|
|
1306
1479
|
name: 'Mediasift'
|
|
1307
1480
|
url: ''
|
|
1308
1481
|
|
|
1482
|
+
- regex: 'Twingly Recon'
|
|
1483
|
+
name: 'Twingly Recon'
|
|
1484
|
+
category: 'Crawler'
|
|
1485
|
+
producer:
|
|
1486
|
+
name: 'Twingly'
|
|
1487
|
+
url: 'https://www.twingly.com'
|
|
1488
|
+
|
|
1309
1489
|
- regex: 'Twitterbot'
|
|
1310
1490
|
name: 'Twitterbot'
|
|
1311
1491
|
category: 'Social Media Agent'
|
|
@@ -1438,7 +1618,7 @@
|
|
|
1438
1618
|
category: 'Site Monitor'
|
|
1439
1619
|
url: 'https://www.webpagetest.org'
|
|
1440
1620
|
|
|
1441
|
-
- regex: 'WeSEE
|
|
1621
|
+
- regex: 'WeSEE'
|
|
1442
1622
|
name: 'WeSEE:Search'
|
|
1443
1623
|
category: 'Search bot'
|
|
1444
1624
|
url: 'http://www.wesee.com/bot'
|
|
@@ -1478,6 +1658,14 @@
|
|
|
1478
1658
|
name: 'Wotbox'
|
|
1479
1659
|
url: 'http://www.wotbox.com'
|
|
1480
1660
|
|
|
1661
|
+
- regex: 'XenForo'
|
|
1662
|
+
name: 'XenForo'
|
|
1663
|
+
category: 'Service Agent'
|
|
1664
|
+
url: 'https://xenforo.com/'
|
|
1665
|
+
producer:
|
|
1666
|
+
name: 'XenForo Ltd.'
|
|
1667
|
+
url: 'https://xenforo.com/'
|
|
1668
|
+
|
|
1481
1669
|
- regex: 'yacybot'
|
|
1482
1670
|
name: 'YaCy'
|
|
1483
1671
|
category: 'Search bot'
|
|
@@ -1502,6 +1690,14 @@
|
|
|
1502
1690
|
name: 'Yahoo! Inc.'
|
|
1503
1691
|
url: 'http://www.yahoo.com'
|
|
1504
1692
|
|
|
1693
|
+
- regex: 'YahooMailProxy'
|
|
1694
|
+
name: 'Yahoo! Mail Proxy'
|
|
1695
|
+
category: 'Service Agent'
|
|
1696
|
+
url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
|
|
1697
|
+
producer:
|
|
1698
|
+
name: 'Yahoo! Inc.'
|
|
1699
|
+
url: 'http://www.yahoo.com'
|
|
1700
|
+
|
|
1505
1701
|
- regex: 'YahooCacheSystem'
|
|
1506
1702
|
name: 'Yahoo! Cache System'
|
|
1507
1703
|
category: 'Crawler'
|
|
@@ -1510,7 +1706,15 @@
|
|
|
1510
1706
|
name: 'Yahoo! Inc.'
|
|
1511
1707
|
url: 'http://www.yahoo.com'
|
|
1512
1708
|
|
|
1513
|
-
- regex: '
|
|
1709
|
+
- regex: 'Y!J-BRW'
|
|
1710
|
+
name: 'Yahoo! Japan BRW'
|
|
1711
|
+
category: 'Crawler'
|
|
1712
|
+
url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
|
|
1713
|
+
producer:
|
|
1714
|
+
name: 'Yahoo! Japan Corp.'
|
|
1715
|
+
url: 'https://www.yahoo.co.jp/'
|
|
1716
|
+
|
|
1717
|
+
- regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
|
|
1514
1718
|
name: 'Yandex Bot'
|
|
1515
1719
|
category: 'Search bot'
|
|
1516
1720
|
url: 'http://www.yandex.com/bots'
|
|
@@ -1518,7 +1722,7 @@
|
|
|
1518
1722
|
name: 'Yandex LLC'
|
|
1519
1723
|
url: 'http://company.yandex.com'
|
|
1520
1724
|
|
|
1521
|
-
- regex: 'Yeti'
|
|
1725
|
+
- regex: 'Yeti|NaverJapan'
|
|
1522
1726
|
name: 'Yeti/Naverbot'
|
|
1523
1727
|
category: 'Search bot'
|
|
1524
1728
|
url: 'http://help.naver.com/robots/'
|
|
@@ -1595,18 +1799,18 @@
|
|
|
1595
1799
|
- regex: 'HubPages.*crawlingpolicy'
|
|
1596
1800
|
name: 'HubPages'
|
|
1597
1801
|
category: 'Crawler'
|
|
1598
|
-
url: '
|
|
1802
|
+
url: 'https://hubpages.com/help/crawlingpolicy'
|
|
1599
1803
|
producer:
|
|
1600
|
-
name: 'HubPages'
|
|
1601
|
-
url: '
|
|
1804
|
+
name: 'HubPages, Inc.'
|
|
1805
|
+
url: 'https://discover.hubpages.com/'
|
|
1602
1806
|
|
|
1603
|
-
- regex: 'Pinterest
|
|
1807
|
+
- regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
|
|
1604
1808
|
name: 'Pinterest'
|
|
1605
|
-
url: ''
|
|
1809
|
+
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
|
|
1606
1810
|
category: 'Crawler'
|
|
1607
1811
|
producer:
|
|
1608
1812
|
name: 'Pinterest'
|
|
1609
|
-
url: '
|
|
1813
|
+
url: 'https://www.pinterest.com/'
|
|
1610
1814
|
|
|
1611
1815
|
- regex: 'Site24x7'
|
|
1612
1816
|
name: 'Site24x7 Website Monitoring'
|
|
@@ -1648,13 +1852,13 @@
|
|
|
1648
1852
|
name: 'Monitor.Us'
|
|
1649
1853
|
url: 'http://www.monitor.us'
|
|
1650
1854
|
|
|
1651
|
-
- regex: 'Catchpoint
|
|
1855
|
+
- regex: 'Catchpoint'
|
|
1652
1856
|
name: 'Catchpoint'
|
|
1653
1857
|
category: 'Site Monitor'
|
|
1654
|
-
url: ''
|
|
1858
|
+
url: 'https://www.catchpoint.com/'
|
|
1655
1859
|
producer:
|
|
1656
1860
|
name: 'Catchpoint Systems'
|
|
1657
|
-
url: '
|
|
1861
|
+
url: 'https://www.catchpoint.com/'
|
|
1658
1862
|
|
|
1659
1863
|
- regex: 'bitlybot'
|
|
1660
1864
|
name: 'BitlyBot'
|
|
@@ -1722,7 +1926,7 @@
|
|
|
1722
1926
|
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
|
1723
1927
|
name: 'RSSRadio Bot'
|
|
1724
1928
|
|
|
1725
|
-
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|
|
|
1929
|
+
- regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|My User Agent)'
|
|
1726
1930
|
name: 'Generic Bot'
|
|
1727
1931
|
|
|
1728
1932
|
- regex: '^sentry'
|
|
@@ -1737,7 +1941,185 @@
|
|
|
1737
1941
|
name: 'Spotify'
|
|
1738
1942
|
url: 'https://www.spotify.com'
|
|
1739
1943
|
|
|
1740
|
-
|
|
1944
|
+
- regex: 'The Knowledge AI'
|
|
1945
|
+
name: 'The Knowledge AI'
|
|
1946
|
+
category: 'Crawler'
|
|
1947
|
+
|
|
1948
|
+
- regex: 'Embedly'
|
|
1949
|
+
name: 'Embedly'
|
|
1950
|
+
category: 'Crawler'
|
|
1951
|
+
url: 'https://support.embed.ly/hc/en-us'
|
|
1952
|
+
producer:
|
|
1953
|
+
name: 'A Medium, Corp.'
|
|
1954
|
+
url: 'https://medium.com/'
|
|
1955
|
+
|
|
1956
|
+
- regex: 'BrandVerity'
|
|
1957
|
+
name: 'BrandVerity'
|
|
1958
|
+
category: 'Crawler'
|
|
1959
|
+
url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
|
|
1960
|
+
producer:
|
|
1961
|
+
name: 'BrandVerity, Inc.'
|
|
1962
|
+
url: 'https://www.brandverity.com/'
|
|
1963
|
+
|
|
1964
|
+
- regex: 'Kaspersky Lab CFR link resolver'
|
|
1965
|
+
name: 'Kaspersky'
|
|
1966
|
+
category: 'Security Checker'
|
|
1967
|
+
url: 'https://www.kaspersky.com/'
|
|
1968
|
+
producer:
|
|
1969
|
+
name: 'AO Kaspersky Lab'
|
|
1970
|
+
url: 'https://www.kaspersky.com/'
|
|
1971
|
+
|
|
1972
|
+
- regex: 'eZ Publish Link Validator'
|
|
1973
|
+
name: 'eZ Publish Link Validator'
|
|
1974
|
+
category: 'Crawler'
|
|
1975
|
+
url: 'https://ez.no/'
|
|
1976
|
+
producer:
|
|
1977
|
+
name: 'eZ Systems AS'
|
|
1978
|
+
url: 'https://ez.no/'
|
|
1979
|
+
|
|
1980
|
+
- regex: 'woorankreview'
|
|
1981
|
+
name: 'WooRank'
|
|
1982
|
+
category: 'Search bot'
|
|
1983
|
+
url: 'https://www.woorank.com/'
|
|
1984
|
+
producer:
|
|
1985
|
+
name: 'WooRank sprl'
|
|
1986
|
+
url: 'https://www.woorank.com/'
|
|
1987
|
+
|
|
1988
|
+
- regex: '(Match|LinkCheck) by Siteimprove.com'
|
|
1989
|
+
name: 'Siteimprove'
|
|
1990
|
+
category: 'Search bot'
|
|
1991
|
+
url: 'https://siteimprove.com/'
|
|
1992
|
+
producer:
|
|
1993
|
+
name: 'Siteimprove GmbH'
|
|
1994
|
+
url: 'https://siteimprove.com/'
|
|
1995
|
+
|
|
1996
|
+
- regex: 'CATExplorador'
|
|
1997
|
+
name: 'CATExplorador'
|
|
1998
|
+
category: 'Search bot'
|
|
1999
|
+
url: 'https://fundacio.cat/ca/domini/'
|
|
2000
|
+
producer:
|
|
2001
|
+
name: 'Fundació puntCAT'
|
|
2002
|
+
url: 'https://fundacio.cat/ca/domini/'
|
|
2003
|
+
|
|
2004
|
+
- regex: 'Buck'
|
|
2005
|
+
name: 'Buck'
|
|
2006
|
+
category: 'Search bot'
|
|
2007
|
+
url: 'https://hypefactors.com/'
|
|
2008
|
+
producer:
|
|
2009
|
+
name: 'Hypefactors A/S'
|
|
2010
|
+
url: 'https://hypefactors.com/'
|
|
2011
|
+
|
|
2012
|
+
- regex: 'tracemyfile'
|
|
2013
|
+
name: 'TraceMyFile'
|
|
2014
|
+
category: 'Search bot'
|
|
2015
|
+
url: 'https://www.tracemyfile.com/'
|
|
2016
|
+
producer:
|
|
2017
|
+
name: 'Idee Inc.'
|
|
2018
|
+
url: 'http://ideeinc.com/'
|
|
2019
|
+
|
|
2020
|
+
- regex: 'zelist.ro feed parser'
|
|
2021
|
+
name: 'Ze List'
|
|
2022
|
+
url: 'https://www.zelist.ro/'
|
|
2023
|
+
category: 'Feed Fetcher'
|
|
2024
|
+
producer:
|
|
2025
|
+
name: 'Treeworks SRL'
|
|
2026
|
+
url: 'https://www.tree.ro/'
|
|
2027
|
+
|
|
2028
|
+
- regex: 'weborama-fetcher'
|
|
2029
|
+
name: 'Weborama'
|
|
2030
|
+
category: 'Search bot'
|
|
2031
|
+
url: 'https://weborama.com/'
|
|
2032
|
+
producer:
|
|
2033
|
+
name: 'Weborama SA'
|
|
2034
|
+
url: 'https://weborama.com/'
|
|
2035
|
+
|
|
2036
|
+
- regex: 'BoardReader Favicon Fetcher'
|
|
2037
|
+
name: 'BoardReader'
|
|
2038
|
+
category: 'Search bot'
|
|
2039
|
+
url: 'https://boardreader.com/'
|
|
2040
|
+
producer:
|
|
2041
|
+
name: 'Effyis Inc'
|
|
2042
|
+
url: 'https://boardreader.com/'
|
|
2043
|
+
|
|
2044
|
+
- regex: 'IDG/IT'
|
|
2045
|
+
name: 'IDG/IT'
|
|
2046
|
+
category: 'Search bot'
|
|
2047
|
+
url: 'https://spaziodati.eu/'
|
|
2048
|
+
producer:
|
|
2049
|
+
name: 'SpazioDati S.r.l.'
|
|
2050
|
+
url: 'https://spaziodati.eu/'
|
|
2051
|
+
|
|
2052
|
+
- regex: 'Bytespider'
|
|
2053
|
+
name: 'Bytespider'
|
|
2054
|
+
category: 'Search bot'
|
|
2055
|
+
url: 'https://bytedance.com/'
|
|
2056
|
+
producer:
|
|
2057
|
+
name: 'ByteDance Ltd.'
|
|
2058
|
+
url: 'https://bytedance.com/'
|
|
2059
|
+
|
|
2060
|
+
- regex: 'WikiDo'
|
|
2061
|
+
name: 'WikiDo'
|
|
2062
|
+
category: 'Search bot'
|
|
2063
|
+
url: 'https://www.wikido.com/'
|
|
2064
|
+
producer:
|
|
2065
|
+
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
|
|
2066
|
+
url: 'https://www.wikido.com/'
|
|
2067
|
+
|
|
2068
|
+
- regex: 'AwarioSmartBot'
|
|
2069
|
+
name: 'Awario'
|
|
2070
|
+
category: 'Search bot'
|
|
2071
|
+
url: 'https://awario.com/bots.html'
|
|
2072
|
+
producer:
|
|
2073
|
+
name: 'Awario'
|
|
2074
|
+
url: 'https://awario.com/'
|
|
2075
|
+
|
|
2076
|
+
- regex: 'AwarioRssBot'
|
|
2077
|
+
name: 'Awario'
|
|
2078
|
+
category: 'Feed Fetcher'
|
|
2079
|
+
url: 'https://awario.com/bots.html'
|
|
2080
|
+
producer:
|
|
2081
|
+
name: 'Awario'
|
|
2082
|
+
url: 'https://awario.com/'
|
|
2083
|
+
|
|
2084
|
+
- regex: 'oBot'
|
|
2085
|
+
name: 'oBot'
|
|
2086
|
+
category: 'Search bot'
|
|
2087
|
+
url: 'https://www.xforce-security.com/crawler/'
|
|
2088
|
+
producer:
|
|
2089
|
+
name: 'IBM Germany Research & Development GmbH'
|
|
2090
|
+
url: 'https://exchange.xforce.ibmcloud.com/'
|
|
2091
|
+
|
|
2092
|
+
- regex: 'SMTBot'
|
|
2093
|
+
name: 'SMTBot'
|
|
2094
|
+
category: 'Search bot'
|
|
2095
|
+
url: 'https://www.similartech.com/smtbot'
|
|
2096
|
+
producer:
|
|
2097
|
+
name: 'SimilarTech Ltd.'
|
|
2098
|
+
url: 'https://www.similartech.com/'
|
|
2099
|
+
|
|
2100
|
+
- regex: 'LCC'
|
|
2101
|
+
name: 'LCC'
|
|
2102
|
+
category: 'Search bot'
|
|
2103
|
+
url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
|
|
2104
|
+
producer:
|
|
2105
|
+
name: 'Universität Leipzig'
|
|
2106
|
+
url: 'https://www.uni-leipzig.de/'
|
|
2107
|
+
|
|
2108
|
+
- regex: 'Startpagina-Linkchecker'
|
|
2109
|
+
name: 'Startpagina Linkchecker'
|
|
2110
|
+
category: 'Search bot'
|
|
2111
|
+
url: 'https://www.startpagina.nl/linkchecker'
|
|
2112
|
+
producer:
|
|
2113
|
+
name: 'Startpagina B.V.'
|
|
2114
|
+
url: 'https://www.startpagina.nl/'
|
|
2115
|
+
|
|
2116
|
+
- regex: 'GTmetrix'
|
|
2117
|
+
name: 'GTmetrix'
|
|
2118
|
+
category: 'Crawler'
|
|
2119
|
+
url: 'https://gtmetrix.com/'
|
|
2120
|
+
producer:
|
|
2121
|
+
name: 'Carbon60 Operating Co. Ltd.'
|
|
2122
|
+
url: 'https://www.carbon60.com/'
|
|
1741
2123
|
|
|
1742
2124
|
- regex: 'Nutch'
|
|
1743
2125
|
name: 'Nutch-based Bot'
|
|
@@ -1745,7 +2127,475 @@
|
|
|
1745
2127
|
url: 'https://nutch.apache.org'
|
|
1746
2128
|
producer:
|
|
1747
2129
|
name: 'The Apache Software Foundation'
|
|
1748
|
-
url: '
|
|
2130
|
+
url: 'https://www.apache.org/foundation/'
|
|
2131
|
+
|
|
2132
|
+
- regex: 'Seobility'
|
|
2133
|
+
name: 'Seobility'
|
|
2134
|
+
category: 'Crawler'
|
|
2135
|
+
url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
|
|
2136
|
+
|
|
2137
|
+
- regex: 'Vercelbot'
|
|
2138
|
+
name: 'Vercel Bot'
|
|
2139
|
+
category: 'Service bot'
|
|
2140
|
+
url: 'https://vercel.com'
|
|
2141
|
+
|
|
2142
|
+
- regex: 'Grammarly'
|
|
2143
|
+
name: 'Grammarly'
|
|
2144
|
+
category: 'Service bot'
|
|
2145
|
+
url: 'https://www.grammarly.com'
|
|
2146
|
+
|
|
2147
|
+
- regex: 'Robozilla'
|
|
2148
|
+
name: 'Robozilla'
|
|
2149
|
+
category: 'Crawler'
|
|
2150
|
+
|
|
2151
|
+
- regex: 'Domains Project'
|
|
2152
|
+
name: 'Domains Project'
|
|
2153
|
+
category: 'Crawler'
|
|
2154
|
+
url: 'https://domainsproject.org'
|
|
1749
2155
|
|
|
2156
|
+
- regex: 'PetalBot'
|
|
2157
|
+
name: 'Petal Bot'
|
|
2158
|
+
category: 'Crawler'
|
|
2159
|
+
url: 'https://aspiegel.com/petalbot'
|
|
2160
|
+
|
|
2161
|
+
- regex: 'SerendeputyBot'
|
|
2162
|
+
name: 'Serendeputy Bot'
|
|
2163
|
+
category: 'Crawler'
|
|
2164
|
+
url: 'https://serendeputy.com/about/serendeputy-bot'
|
|
2165
|
+
|
|
2166
|
+
- regex: 'ias-va.*admantx.*service-fetcher'
|
|
2167
|
+
name: 'ADmantX Service Fetcher'
|
|
2168
|
+
category: 'Service bot'
|
|
2169
|
+
url: 'https://www.admantx.com/service-fetcher.html'
|
|
2170
|
+
|
|
2171
|
+
- regex: 'SemanticScholarBot'
|
|
2172
|
+
name: 'Semantic Scholar Bot'
|
|
2173
|
+
category: 'Crawler'
|
|
2174
|
+
url: 'https://www.semanticscholar.org/crawler'
|
|
2175
|
+
|
|
2176
|
+
- regex: 'VelenPublicWebCrawler'
|
|
2177
|
+
name: 'Velen Public Web Crawler'
|
|
2178
|
+
category: 'Crawler'
|
|
2179
|
+
url: 'https://hunter.io/robot'
|
|
2180
|
+
|
|
2181
|
+
- regex: 'Barkrowler'
|
|
2182
|
+
name: 'Barkrowler'
|
|
2183
|
+
category: 'Crawler'
|
|
2184
|
+
url: 'http://www.exensa.com/crawl'
|
|
2185
|
+
|
|
2186
|
+
- regex: 'BDCbot'
|
|
2187
|
+
name: 'BDCbot'
|
|
2188
|
+
category: 'Crawler'
|
|
2189
|
+
url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
|
|
2190
|
+
producer:
|
|
2191
|
+
name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
|
|
2192
|
+
url: 'https://bigdatacorp.com.br/'
|
|
2193
|
+
|
|
2194
|
+
- regex: 'adbeat'
|
|
2195
|
+
name: 'Adbeat'
|
|
2196
|
+
category: 'Crawler'
|
|
2197
|
+
url: 'https://www.adbeat.com/operation_policy'
|
|
2198
|
+
producer:
|
|
2199
|
+
name: 'PPC Labs LLC'
|
|
2200
|
+
url: 'https://www.adbeat.com/'
|
|
2201
|
+
|
|
2202
|
+
- regex: 'BW/(?:(\d+[\.\d]+))'
|
|
2203
|
+
name: 'BuiltWith'
|
|
2204
|
+
category: 'Crawler'
|
|
2205
|
+
url: 'https://builtwith.com/biup'
|
|
2206
|
+
producer:
|
|
2207
|
+
name: 'BuiltWith Pty Ltd'
|
|
2208
|
+
url: 'https://builtwith.com/'
|
|
2209
|
+
|
|
2210
|
+
- regex: 'https://whatis.contentkingapp.com'
|
|
2211
|
+
name: 'ContentKing'
|
|
2212
|
+
category: 'Site Monitor'
|
|
2213
|
+
url: 'https://whatis.contentkingapp.com/'
|
|
2214
|
+
producer:
|
|
2215
|
+
name: 'ContentKing BV'
|
|
2216
|
+
url: 'https://www.contentkingapp.com/'
|
|
2217
|
+
|
|
2218
|
+
- regex: 'MicroAdBot'
|
|
2219
|
+
name: 'MicroAdBot'
|
|
2220
|
+
category: 'Crawler'
|
|
2221
|
+
url: 'https://www.microad.co.jp/'
|
|
2222
|
+
producer:
|
|
2223
|
+
name: 'MicroAd, Inc.'
|
|
2224
|
+
url: 'https://www.microad.co.jp/'
|
|
2225
|
+
|
|
2226
|
+
- regex: 'PingAdmin.Ru'
|
|
2227
|
+
name: 'PingAdmin.Ru'
|
|
2228
|
+
category: 'Site Monitor'
|
|
2229
|
+
url: 'https://ping-admin.ru/'
|
|
2230
|
+
|
|
2231
|
+
- regex: 'notifyninja.+monitoring'
|
|
2232
|
+
name: 'Notify Ninja'
|
|
2233
|
+
category: 'Site Monitor'
|
|
2234
|
+
url: 'http://notifyninja.com'
|
|
2235
|
+
|
|
2236
|
+
- regex: 'WebDataStats'
|
|
2237
|
+
name: 'WebDataStats'
|
|
2238
|
+
category: 'Crawler'
|
|
2239
|
+
url: 'https://webdatastats.com/policy.html'
|
|
2240
|
+
producer:
|
|
2241
|
+
name: 'WebTehRazrabotka LLC'
|
|
2242
|
+
url: 'https://webdatastats.com/'
|
|
2243
|
+
|
|
2244
|
+
- regex: 'parse.ly scraper'
|
|
2245
|
+
name: 'parse.ly'
|
|
2246
|
+
category: 'Crawler'
|
|
2247
|
+
url: 'https://www.parse.ly/help/integration/crawler'
|
|
2248
|
+
producer:
|
|
2249
|
+
name: 'Parsely, Inc.'
|
|
2250
|
+
url: 'https://www.parse.ly/'
|
|
2251
|
+
|
|
2252
|
+
- regex: 'Nimbostratus-Bot'
|
|
2253
|
+
name: 'Nimbostratus Bot'
|
|
2254
|
+
category: 'Site Monitor'
|
|
2255
|
+
url: 'http://cloudsystemnetworks.com'
|
|
2256
|
+
|
|
2257
|
+
- regex: 'HeartRails_Capture/\d'
|
|
2258
|
+
name: 'Heart Rails Capture'
|
|
2259
|
+
category: 'Service Agent'
|
|
2260
|
+
url: 'http://capture.heartrails.com'
|
|
2261
|
+
|
|
2262
|
+
- regex: 'Project-Resonance'
|
|
2263
|
+
name: 'Project Resonance'
|
|
2264
|
+
category: 'Crawler'
|
|
2265
|
+
url: 'http://project-resonance.com'
|
|
2266
|
+
|
|
2267
|
+
- regex: 'DataXu/\d'
|
|
2268
|
+
name: 'DataXu'
|
|
2269
|
+
category: 'Service Agent'
|
|
2270
|
+
url: 'https://advertising.roku.com/dataxu'
|
|
2271
|
+
producer:
|
|
2272
|
+
name: 'Roku, Inc.'
|
|
2273
|
+
url: 'https://roku.com'
|
|
2274
|
+
|
|
2275
|
+
- regex: 'Cocolyzebot'
|
|
2276
|
+
name: 'Cocolyzebot'
|
|
2277
|
+
category: 'Crawler'
|
|
2278
|
+
url: 'https://cocolyze.com/en/cocolyzebot'
|
|
2279
|
+
producer:
|
|
2280
|
+
name: 'VSI INNOVATION SAS'
|
|
2281
|
+
url: 'https://vsi-innovation.com/'
|
|
2282
|
+
|
|
2283
|
+
- regex: 'veryhip'
|
|
2284
|
+
name: 'VeryHip'
|
|
2285
|
+
category: 'Crawler'
|
|
2286
|
+
url: 'https://veryhip.com/'
|
|
2287
|
+
producer:
|
|
2288
|
+
name: 'VeryHip'
|
|
2289
|
+
url: 'https://veryhip.com/'
|
|
2290
|
+
|
|
2291
|
+
- regex: 'LinkpadBot'
|
|
2292
|
+
name: 'LinkpadBot'
|
|
2293
|
+
category: 'Crawler'
|
|
2294
|
+
url: 'https://www.linkpad.org/'
|
|
2295
|
+
producer:
|
|
2296
|
+
name: 'Solomono LLC'
|
|
2297
|
+
url: 'https://www.linkpad.org/'
|
|
2298
|
+
|
|
2299
|
+
- regex: 'MuscatFerret'
|
|
2300
|
+
name: 'MuscatFerret'
|
|
2301
|
+
category: 'Crawler'
|
|
2302
|
+
url: 'http://www.webtop.com/'
|
|
2303
|
+
|
|
2304
|
+
- regex: 'PageThing.com'
|
|
2305
|
+
name: 'PageThing'
|
|
2306
|
+
category: 'Crawler'
|
|
2307
|
+
url: 'https://www.pagething.com/'
|
|
2308
|
+
producer:
|
|
2309
|
+
name: 'SPECIALNOISE LTD'
|
|
2310
|
+
url: 'https://www.specialnoise.com/'
|
|
2311
|
+
|
|
2312
|
+
- regex: 'ArchiveBox'
|
|
2313
|
+
name: 'ArchiveBox'
|
|
2314
|
+
url: 'https://archivebox.io/'
|
|
2315
|
+
category: 'Crawler'
|
|
2316
|
+
producer:
|
|
2317
|
+
name: ''
|
|
2318
|
+
url: ''
|
|
2319
|
+
|
|
2320
|
+
- regex: 'Choosito'
|
|
2321
|
+
name: 'Choosito'
|
|
2322
|
+
url: 'https://www.choosito.com/'
|
|
2323
|
+
category: 'Crawler'
|
|
2324
|
+
producer:
|
|
2325
|
+
name: 'Choosito! Inc.'
|
|
2326
|
+
url: 'https://www.choosito.com/'
|
|
2327
|
+
|
|
2328
|
+
- regex: 'datagnionbot'
|
|
2329
|
+
name: 'datagnionbot'
|
|
2330
|
+
url: 'https://www.datagnion.com/bot.html'
|
|
2331
|
+
category: 'Crawler'
|
|
2332
|
+
producer:
|
|
2333
|
+
name: 'DATAGNION GMBH'
|
|
2334
|
+
url: 'https://www.datagnion.com/'
|
|
2335
|
+
|
|
2336
|
+
- regex: 'WhatCMS'
|
|
2337
|
+
name: 'WhatCMS'
|
|
2338
|
+
url: 'https://whatcms.org/'
|
|
2339
|
+
category: 'Crawler'
|
|
2340
|
+
producer:
|
|
2341
|
+
name: 'Nineteen Ten LLC'
|
|
2342
|
+
url: 'https://whatcms.org/'
|
|
2343
|
+
|
|
2344
|
+
- regex: 'httpx'
|
|
2345
|
+
name: 'httpx'
|
|
2346
|
+
url: 'https://github.com/projectdiscovery/httpx'
|
|
2347
|
+
category: 'Crawler'
|
|
2348
|
+
producer:
|
|
2349
|
+
name: ''
|
|
2350
|
+
url: ''
|
|
2351
|
+
|
|
2352
|
+
- regex: 'scaninfo@expanseinc.com'
|
|
2353
|
+
name: 'Expanse'
|
|
2354
|
+
category: 'Security Checker'
|
|
2355
|
+
url: 'https://expanse.co/'
|
|
2356
|
+
producer:
|
|
2357
|
+
name: 'Expanse Inc.'
|
|
2358
|
+
url: 'https://expanse.co/'
|
|
2359
|
+
|
|
2360
|
+
- regex: 'HuaweiWebCatBot'
|
|
2361
|
+
name: 'HuaweiWebCatBot'
|
|
2362
|
+
category: 'Crawler'
|
|
2363
|
+
url: 'https://isecurity.huawei.com'
|
|
2364
|
+
producer:
|
|
2365
|
+
name: 'Huawei Technologies Co., Ltd.'
|
|
2366
|
+
url: 'https://huawei.com'
|
|
2367
|
+
|
|
2368
|
+
- regex: 'Hatena-Favicon'
|
|
2369
|
+
name: 'Hatena Favicon'
|
|
2370
|
+
category: 'Crawler'
|
|
2371
|
+
url: 'https://www.hatena.ne.jp/faq/'
|
|
2372
|
+
producer:
|
|
2373
|
+
name: 'Hatena Co., Ltd.'
|
|
2374
|
+
url: 'https://www.hatena.ne.jp'
|
|
2375
|
+
|
|
2376
|
+
- regex: 'RyowlEngine/(\d+)'
|
|
2377
|
+
name: 'Ryowl'
|
|
2378
|
+
category: 'Crawler'
|
|
2379
|
+
url: 'https://ryowl.org'
|
|
2380
|
+
|
|
2381
|
+
- regex: 'OdklBot/(\d+)'
|
|
2382
|
+
name: 'Odnoklassniki Bot'
|
|
2383
|
+
category: 'Crawler'
|
|
2384
|
+
url: 'https://odnoklassniki.ru'
|
|
2385
|
+
|
|
2386
|
+
- regex: 'Mediatoolkitbot'
|
|
2387
|
+
name: 'Mediatoolkit Bot'
|
|
2388
|
+
category: 'Crawler'
|
|
2389
|
+
url: 'https://mediatoolkit.com'
|
|
2390
|
+
|
|
2391
|
+
- regex: 'ZoominfoBot'
|
|
2392
|
+
name: 'ZoominfoBot'
|
|
2393
|
+
category: 'Crawler'
|
|
2394
|
+
url: 'https://www.zoominfo.com'
|
|
2395
|
+
|
|
2396
|
+
- regex: 'WeViKaBot/([\d+\.])'
|
|
2397
|
+
name: 'WeViKaBot'
|
|
2398
|
+
category: 'Crawler'
|
|
2399
|
+
url: 'http://www.wevika.de'
|
|
2400
|
+
|
|
2401
|
+
- regex: 'SEOkicks'
|
|
2402
|
+
name: 'SEOkicks'
|
|
2403
|
+
category: 'Crawler'
|
|
2404
|
+
url: 'https://www.seokicks.de/robot.html'
|
|
2405
|
+
|
|
2406
|
+
- regex: 'Plukkie/([\d+\.])'
|
|
2407
|
+
name: 'Plukkie'
|
|
2408
|
+
category: 'Crawler'
|
|
2409
|
+
url: 'http://www.botje.com/plukkie.htm'
|
|
2410
|
+
|
|
2411
|
+
- regex: 'proximic;'
|
|
2412
|
+
name: 'Comscore'
|
|
2413
|
+
category: 'Crawler'
|
|
2414
|
+
url: 'https://www.comscore.com/Web-Crawler'
|
|
2415
|
+
|
|
2416
|
+
- regex: 'SurdotlyBot/([\d+\.])'
|
|
2417
|
+
name: 'SurdotlyBot'
|
|
2418
|
+
category: 'Crawler'
|
|
2419
|
+
url: 'http://sur.ly/bot.html'
|
|
2420
|
+
|
|
2421
|
+
- regex: 'Gowikibot/([\d+\.])'
|
|
2422
|
+
name: 'Gowikibot'
|
|
2423
|
+
category: 'Crawler'
|
|
2424
|
+
url: 'http:/www.gowikibot.com'
|
|
2425
|
+
|
|
2426
|
+
- regex: 'SabsimBot/([\d+\.])'
|
|
2427
|
+
name: 'SabsimBot'
|
|
2428
|
+
category: 'Crawler'
|
|
2429
|
+
url: 'https://sabsim.com'
|
|
2430
|
+
|
|
2431
|
+
- regex: 'LumtelBot/([\d+\.])'
|
|
2432
|
+
name: 'LumtelBot'
|
|
2433
|
+
category: 'Crawler'
|
|
2434
|
+
url: 'https://umtel.com'
|
|
2435
|
+
|
|
2436
|
+
- regex: 'PiplBot'
|
|
2437
|
+
name: 'PiplBot'
|
|
2438
|
+
category: 'Crawler'
|
|
2439
|
+
url: 'http://www.pipl.com/bot'
|
|
2440
|
+
|
|
2441
|
+
- regex: 'woobot/([\d+\.])'
|
|
2442
|
+
name: 'WooRank'
|
|
2443
|
+
category: 'Crawler'
|
|
2444
|
+
url: 'https://www.woorank.com/bot'
|
|
2445
|
+
|
|
2446
|
+
- regex: 'Cookiebot/([\d+\.])'
|
|
2447
|
+
name: 'Cookiebot'
|
|
2448
|
+
category: 'Crawler'
|
|
2449
|
+
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
|
|
2450
|
+
producer:
|
|
2451
|
+
name: 'Cybot A/S'
|
|
2452
|
+
url: 'https://www.cybot.com/'
|
|
2453
|
+
|
|
2454
|
+
- regex: 'NetSystemsResearch'
|
|
2455
|
+
name: 'NetSystemsResearch'
|
|
2456
|
+
category: 'Security Checker'
|
|
2457
|
+
url: 'https://www.netsystemsresearch.com/'
|
|
2458
|
+
producer:
|
|
2459
|
+
name: 'NET SYSTEMS RESEARCH LLC'
|
|
2460
|
+
url: 'https://www.netsystemsresearch.com/'
|
|
2461
|
+
|
|
2462
|
+
- regex: 'CensysInspect/([\d+\.])'
|
|
2463
|
+
name: 'CensysInspect'
|
|
2464
|
+
category: 'Security Checker'
|
|
2465
|
+
url: 'https://about.censys.io/'
|
|
2466
|
+
producer:
|
|
2467
|
+
name: 'Censys, Inc.'
|
|
2468
|
+
url: 'https://censys.io/'
|
|
2469
|
+
|
|
2470
|
+
- regex: 'gdnplus.com'
|
|
2471
|
+
name: 'GDNP'
|
|
2472
|
+
category: 'Crawler'
|
|
2473
|
+
url: 'https://gdnplus.com/'
|
|
2474
|
+
producer:
|
|
2475
|
+
name: 'Global Digital Network Plus, LLC'
|
|
2476
|
+
url: 'https://gdnplus.com/'
|
|
2477
|
+
|
|
2478
|
+
- regex: 'WellKnownBot/([\d+\.])'
|
|
2479
|
+
name: 'WellKnownBot'
|
|
2480
|
+
category: 'Crawler'
|
|
2481
|
+
url: 'https://well-known.dev'
|
|
2482
|
+
|
|
2483
|
+
- regex: 'Adsbot/([\d+\.])'
|
|
2484
|
+
name: 'Adsbot'
|
|
2485
|
+
category: 'Crawler'
|
|
2486
|
+
url: 'https://seostar.co/robot/'
|
|
2487
|
+
|
|
2488
|
+
- regex: 'MTRobot/([\d+\.])'
|
|
2489
|
+
name: 'MTRobot'
|
|
2490
|
+
category: 'Crawler'
|
|
2491
|
+
url: 'https://metrics-tools.de/robot.html'
|
|
2492
|
+
producer:
|
|
2493
|
+
name: 'Metrics Tools'
|
|
2494
|
+
url: 'https://metrics-tools.de/'
|
|
2495
|
+
|
|
2496
|
+
- regex: 'serpstatbot/([\d+\.])'
|
|
2497
|
+
name: 'serpstatbot'
|
|
2498
|
+
category: 'Crawler'
|
|
2499
|
+
url: 'http://serpstatbot.com/'
|
|
2500
|
+
producer:
|
|
2501
|
+
name: 'Netpeak Ltd'
|
|
2502
|
+
url: 'https://netpeak.net/'
|
|
2503
|
+
|
|
2504
|
+
- regex: 'colly'
|
|
2505
|
+
name: 'colly'
|
|
2506
|
+
category: 'Crawler'
|
|
2507
|
+
url: 'https://github.com/gocolly/colly/'
|
|
2508
|
+
|
|
2509
|
+
- regex: 'l9tcpid/v([\d+\.])'
|
|
2510
|
+
name: 'l9tcpid'
|
|
2511
|
+
category: 'Security Checker'
|
|
2512
|
+
url: 'https://github.com/LeakIX/l9tcpid'
|
|
2513
|
+
|
|
2514
|
+
- regex: 'MegaIndex.ru/([\d+\.])'
|
|
2515
|
+
name: 'MegaIndex'
|
|
2516
|
+
category: 'Crawler'
|
|
2517
|
+
url: 'https://megaindex.com/crawler'
|
|
2518
|
+
|
|
2519
|
+
- regex: 'Seekport'
|
|
2520
|
+
name: 'Seekport'
|
|
2521
|
+
category: 'Crawler'
|
|
2522
|
+
url: 'http://www.seekport.com/'
|
|
2523
|
+
producer:
|
|
2524
|
+
name: 'SISTRIX GmbH'
|
|
2525
|
+
url: 'https://www.sistrix.de/'
|
|
2526
|
+
|
|
2527
|
+
- regex: 'seolyt/([\d+\.])'
|
|
2528
|
+
name: 'seolyt'
|
|
2529
|
+
category: 'Crawler'
|
|
2530
|
+
url: 'https://seolyt.com/'
|
|
2531
|
+
|
|
2532
|
+
- regex: 'YaK/([\d+\.])'
|
|
2533
|
+
name: 'YaK'
|
|
2534
|
+
category: 'Crawler'
|
|
2535
|
+
url: 'https://www.linkfluence.com/'
|
|
2536
|
+
producer:
|
|
2537
|
+
name: 'Linkfluence SAS'
|
|
2538
|
+
url: 'https://www.linkfluence.com/'
|
|
2539
|
+
|
|
2540
|
+
- regex: 'KomodiaBot/([\d+\.])'
|
|
2541
|
+
name: 'KomodiaBot'
|
|
2542
|
+
category: 'Crawler'
|
|
2543
|
+
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
|
|
2544
|
+
producer:
|
|
2545
|
+
name: 'Komodia Inc.'
|
|
2546
|
+
url: 'https://www.komodia.com/'
|
|
2547
|
+
|
|
2548
|
+
- regex: 'Neevabot/([\d+\.])'
|
|
2549
|
+
name: 'Neevabot'
|
|
2550
|
+
category: 'Search bot'
|
|
2551
|
+
url: 'https://neeva.com/neevabot'
|
|
2552
|
+
producer:
|
|
2553
|
+
name: 'Neeva Inc.'
|
|
2554
|
+
url: 'https://neeva.com/'
|
|
2555
|
+
|
|
2556
|
+
- regex: 'LinkPreview/([\d+\.])'
|
|
2557
|
+
name: 'LinkPreview'
|
|
2558
|
+
category: 'Service Agent'
|
|
2559
|
+
url: 'https://www.linkpreview.net/'
|
|
2560
|
+
|
|
2561
|
+
- regex: 'JungleKeyThumbnail/([\d+\.])'
|
|
2562
|
+
name: 'JungleKeyThumbnail'
|
|
2563
|
+
category: 'Crawler'
|
|
2564
|
+
url: 'https://junglekey.com/'
|
|
2565
|
+
|
|
2566
|
+
- regex: 'rocketmonitor(?: |bot/)([\d+\.])'
|
|
2567
|
+
name: 'RocketMonitorBot'
|
|
2568
|
+
category: 'Site Monitor'
|
|
2569
|
+
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
|
|
2570
|
+
producer:
|
|
2571
|
+
name: 'Radio Mast, Inc.'
|
|
2572
|
+
url: 'https://www.radiomast.io/'
|
|
2573
|
+
|
|
2574
|
+
- regex: 'SitemapParser-VIPnytt/([\d+\.])'
|
|
2575
|
+
name: 'SitemapParser-VIPnytt'
|
|
2576
|
+
category: 'Crawler'
|
|
2577
|
+
url: 'https://github.com/VIPnytt/SitemapParser/'
|
|
2578
|
+
|
|
2579
|
+
|
|
2580
|
+
- regex: '^Turnitin'
|
|
2581
|
+
name: 'Turnitin'
|
|
2582
|
+
category: 'Crawler'
|
|
2583
|
+
url: 'https://turnitin.com/robot/crawlerinfo.html'
|
|
2584
|
+
|
|
2585
|
+
- regex: 'DMBrowser/\d+|DMBrowser-[UB]V'
|
|
2586
|
+
name: 'Dotcom Monitor'
|
|
2587
|
+
category: 'Site Monitor'
|
|
2588
|
+
url: 'https://www.dotcom-monitor.com'
|
|
2589
|
+
|
|
2590
|
+
- regex: 'ThinkChaos/'
|
|
2591
|
+
name: 'ThinkChaos'
|
|
2592
|
+
category: 'Crawler'
|
|
2593
|
+
|
|
2594
|
+
- regex: 'DataForSeoBot'
|
|
2595
|
+
name: 'DataForSeoBot'
|
|
2596
|
+
category: 'Crawler'
|
|
2597
|
+
url: 'https://dataforseo.com/dataforseo-bot'
|
|
2598
|
+
|
|
2599
|
+
# Generic detections
|
|
1750
2600
|
- regex: '[a-z0-9\-_]*((?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9])|crawler|crawl|checker|archiver|transcoder|spider)([^a-z]|$)'
|
|
1751
2601
|
name: 'Generic Bot'
|