device_detector 1.0.0 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +58 -4
- data/README.md +57 -21
- data/lib/device_detector/bot.rb +2 -2
- data/lib/device_detector/browser.rb +691 -0
- data/lib/device_detector/client.rb +11 -2
- data/lib/device_detector/client_hint.rb +249 -0
- data/lib/device_detector/device.rb +1954 -23
- data/lib/device_detector/memory_cache.rb +26 -19
- data/lib/device_detector/metadata_extractor.rb +7 -8
- data/lib/device_detector/model_extractor.rb +3 -3
- data/lib/device_detector/name_extractor.rb +2 -2
- data/lib/device_detector/os.rb +289 -112
- data/lib/device_detector/parser.rb +49 -13
- data/lib/device_detector/vendor_fragment.rb +25 -0
- data/lib/device_detector/version.rb +3 -1
- data/lib/device_detector/version_extractor.rb +29 -2
- data/lib/device_detector.rb +192 -44
- data/regexes/bots.yml +3399 -91
- data/regexes/client/browser_engine.yml +28 -4
- data/regexes/client/browsers.yml +2697 -408
- data/regexes/client/feed_readers.yml +60 -22
- data/regexes/client/hints/apps.yml +150 -0
- data/regexes/client/hints/browsers.yml +292 -0
- data/regexes/client/libraries.yml +598 -4
- data/regexes/client/mediaplayers.yml +110 -5
- data/regexes/client/mobile_apps.yml +2451 -14
- data/regexes/client/pim.yml +128 -3
- data/regexes/device/cameras.yml +6 -6
- data/regexes/device/car_browsers.yml +39 -3
- data/regexes/device/consoles.yml +40 -6
- data/regexes/device/mobiles.yml +38844 -2907
- data/regexes/device/notebooks.yml +127 -0
- data/regexes/device/portable_media_player.yml +75 -12
- data/regexes/device/shell_tv.yml +145 -0
- data/regexes/device/televisions.yml +981 -40
- data/regexes/oss.yml +1560 -311
- data/regexes/vendorfragments.yml +6 -2
- metadata +31 -105
- data/.gitignore +0 -14
- data/.travis.yml +0 -18
- data/Gemfile +0 -8
- data/Rakefile +0 -79
- data/device_detector.gemspec +0 -26
- data/spec/device_detector/bot_fixtures_spec.rb +0 -30
- data/spec/device_detector/client_fixtures_spec.rb +0 -31
- data/spec/device_detector/concrete_user_agent_spec.rb +0 -136
- data/spec/device_detector/detector_fixtures_spec.rb +0 -60
- data/spec/device_detector/device_fixtures_spec.rb +0 -36
- data/spec/device_detector/device_spec.rb +0 -151
- data/spec/device_detector/memory_cache_spec.rb +0 -116
- data/spec/device_detector/model_extractor_spec.rb +0 -63
- data/spec/device_detector/os_fixtures_spec.rb +0 -26
- data/spec/device_detector/version_extractor_spec.rb +0 -80
- data/spec/device_detector_spec.rb +0 -198
- data/spec/fixtures/client/browser.yml +0 -1313
- data/spec/fixtures/client/feed_reader.yml +0 -187
- data/spec/fixtures/client/library.yml +0 -84
- data/spec/fixtures/client/mediaplayer.yml +0 -168
- data/spec/fixtures/client/mobile_app.yml +0 -30
- data/spec/fixtures/client/pim.yml +0 -96
- data/spec/fixtures/detector/bots.yml +0 -2418
- data/spec/fixtures/detector/camera.yml +0 -115
- data/spec/fixtures/detector/car_browser.yml +0 -20
- data/spec/fixtures/detector/console.yml +0 -267
- data/spec/fixtures/detector/desktop.yml +0 -4828
- data/spec/fixtures/detector/feature_phone.yml +0 -782
- data/spec/fixtures/detector/feed_reader.yml +0 -486
- data/spec/fixtures/detector/mediaplayer.yml +0 -179
- data/spec/fixtures/detector/mobile_apps.yml +0 -149
- data/spec/fixtures/detector/phablet.yml +0 -2140
- data/spec/fixtures/detector/portable_media_player.yml +0 -153
- data/spec/fixtures/detector/smart_display.yml +0 -58
- data/spec/fixtures/detector/smartphone-1.yml +0 -9469
- data/spec/fixtures/detector/smartphone-2.yml +0 -9414
- data/spec/fixtures/detector/smartphone-3.yml +0 -9396
- data/spec/fixtures/detector/smartphone-4.yml +0 -5742
- data/spec/fixtures/detector/smartphone.yml +0 -9411
- data/spec/fixtures/detector/tablet-1.yml +0 -9495
- data/spec/fixtures/detector/tablet-2.yml +0 -248
- data/spec/fixtures/detector/tablet.yml +0 -9484
- data/spec/fixtures/detector/tv.yml +0 -2582
- data/spec/fixtures/detector/unknown.yml +0 -3196
- data/spec/fixtures/device/camera.yml +0 -18
- data/spec/fixtures/device/car_browser.yml +0 -6
- data/spec/fixtures/device/console.yml +0 -78
- data/spec/fixtures/parser/oss.yml +0 -800
- data/spec/fixtures/parser/vendorfragments.yml +0 -162
- data/spec/spec_helper.rb +0 -9
data/regexes/bots.yml
CHANGED
@@ -1,14 +1,35 @@
|
|
1
1
|
###############
|
2
2
|
# Device Detector - The Universal Device Detection library for parsing User Agents
|
3
3
|
#
|
4
|
-
# @link
|
4
|
+
# @link https://matomo.org
|
5
5
|
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
6
|
###############
|
7
7
|
|
8
|
-
- regex: '
|
8
|
+
- regex: 'WireReaderBot(?:/([\d+.]+))?'
|
9
|
+
name: 'WireReaderBot'
|
10
|
+
category: 'Feed Fetcher'
|
11
|
+
url: 'https://wirereader.app/'
|
12
|
+
|
13
|
+
- regex: 'monitoring360bot'
|
14
|
+
name: '360 Monitoring'
|
15
|
+
category: 'Site Monitor'
|
16
|
+
url: 'https://www.360monitoring.io'
|
17
|
+
producer:
|
18
|
+
name: 'Plesk International GmbH'
|
19
|
+
url: 'https://www.plesk.com'
|
20
|
+
|
21
|
+
- regex: 'Cloudflare-Healthchecks'
|
22
|
+
name: 'Cloudflare Health Checks'
|
23
|
+
category: 'Service Agent'
|
24
|
+
url: 'https://developers.cloudflare.com/health-checks/'
|
25
|
+
producer:
|
26
|
+
name: 'CloudFlare'
|
27
|
+
url: 'https://www.cloudflare.com/'
|
28
|
+
|
29
|
+
- regex: '360Spider'
|
9
30
|
name: '360Spider'
|
10
31
|
category: 'Search bot'
|
11
|
-
url: '
|
32
|
+
url: 'https://www.so.com/help/help_3_2.html'
|
12
33
|
producer:
|
13
34
|
name: 'Online Media Group, Inc.'
|
14
35
|
url: ''
|
@@ -40,18 +61,57 @@
|
|
40
61
|
- regex: 'AhrefsBot'
|
41
62
|
name: 'aHrefs Bot'
|
42
63
|
category: 'Crawler'
|
43
|
-
url: '
|
64
|
+
url: 'https://ahrefs.com/robot'
|
65
|
+
producer:
|
66
|
+
name: 'Ahrefs Pte Ltd'
|
67
|
+
url: 'https://ahrefs.com/robot'
|
68
|
+
|
69
|
+
- regex: 'AhrefsSiteAudit/[\d.]+'
|
70
|
+
name: 'AhrefsSiteAudit'
|
71
|
+
category: 'Site Monitor'
|
72
|
+
url: 'https://ahrefs.com/robot/site-audit'
|
44
73
|
producer:
|
45
74
|
name: 'Ahrefs Pte Ltd'
|
46
|
-
url: '
|
75
|
+
url: 'https://ahrefs.com/'
|
47
76
|
|
48
77
|
- regex: 'ia_archiver|alexabot|verifybot'
|
49
78
|
name: 'Alexa Crawler'
|
50
79
|
category: 'Search bot'
|
51
|
-
url: 'https://alexa.
|
80
|
+
url: 'https://support.alexa.com/hc/en-us/sections/200100794-Crawlers'
|
81
|
+
producer:
|
82
|
+
name: 'Alexa Internet'
|
83
|
+
url: 'https://www.alexa.com'
|
84
|
+
|
85
|
+
- regex: 'alexa site audit'
|
86
|
+
name: 'Alexa Site Audit'
|
87
|
+
category: 'Site Monitor'
|
88
|
+
url: 'https://support.alexa.com/hc/en-us/articles/200450194'
|
52
89
|
producer:
|
53
90
|
name: 'Alexa Internet'
|
54
|
-
url: '
|
91
|
+
url: 'https://www.alexa.com'
|
92
|
+
|
93
|
+
- regex: 'Amazonbot/[\d.]+'
|
94
|
+
name: 'Amazon Bot'
|
95
|
+
category: 'Crawler'
|
96
|
+
url: 'https://developer.amazon.com/support/amazonbot'
|
97
|
+
producer:
|
98
|
+
name: 'Amazon.com, Inc.'
|
99
|
+
url: 'https://www.amazon.com/'
|
100
|
+
|
101
|
+
- regex: 'AmazonAdBot/[\d.]+'
|
102
|
+
name: 'Amazon AdBot'
|
103
|
+
category: 'Crawler'
|
104
|
+
url: 'https://adbot.amazon.com/'
|
105
|
+
producer:
|
106
|
+
name: 'Amazon.com, Inc.'
|
107
|
+
url: 'https://www.amazon.com/'
|
108
|
+
|
109
|
+
- regex: 'Amazon[ -]Route ?53[ -]Health[ -]Check[ -]Service'
|
110
|
+
name: 'Amazon Route53 Health Check'
|
111
|
+
category: 'Service Agent'
|
112
|
+
producer:
|
113
|
+
name: 'Amazon Web Services'
|
114
|
+
url: 'https://aws.amazon.com/'
|
55
115
|
|
56
116
|
- regex: 'AmorankSpider'
|
57
117
|
name: 'Amorank Spider'
|
@@ -61,13 +121,61 @@
|
|
61
121
|
name: 'Amorank'
|
62
122
|
url: 'http://www.amorank.com'
|
63
123
|
|
124
|
+
- regex: 'ApacheBench'
|
125
|
+
name: 'ApacheBench'
|
126
|
+
category: 'Benchmark'
|
127
|
+
url: 'https://httpd.apache.org/docs/2.4/programs/ab.html'
|
128
|
+
producer:
|
129
|
+
name: 'The Apache Software Foundation'
|
130
|
+
url: 'https://www.apache.org/foundation/'
|
131
|
+
|
64
132
|
- regex: 'Applebot'
|
65
133
|
name: 'Applebot'
|
66
134
|
category: 'Crawler'
|
67
|
-
url: '
|
135
|
+
url: 'https://support.apple.com/en-us/119829'
|
136
|
+
producer:
|
137
|
+
name: 'Apple Inc'
|
138
|
+
url: 'https://www.apple.com/'
|
139
|
+
|
140
|
+
- regex: 'iTMS'
|
141
|
+
name: 'iTMS'
|
142
|
+
category: 'Crawler'
|
143
|
+
url: 'https://support.apple.com/en-us/119829'
|
68
144
|
producer:
|
69
145
|
name: 'Apple Inc'
|
70
|
-
url: '
|
146
|
+
url: 'https://www.apple.com/'
|
147
|
+
|
148
|
+
- regex: 'AppSignalBot'
|
149
|
+
name: 'AppSignalBot'
|
150
|
+
category: 'Site Monitor'
|
151
|
+
url: 'https://docs.appsignal.com/uptime-monitoring/'
|
152
|
+
producer:
|
153
|
+
name: 'AppSignal'
|
154
|
+
url: 'https://appsignal.com/'
|
155
|
+
|
156
|
+
- regex: 'Arachni'
|
157
|
+
name: 'Arachni'
|
158
|
+
category: 'Security Checker'
|
159
|
+
url: 'https://www.arachni-scanner.com/'
|
160
|
+
producer:
|
161
|
+
name: 'Sarosys LLC'
|
162
|
+
url: 'https://www.sarosys.com/'
|
163
|
+
|
164
|
+
- regex: 'AspiegelBot'
|
165
|
+
name: 'AspiegelBot'
|
166
|
+
category: 'Crawler'
|
167
|
+
url: 'https://aspiegel.com/'
|
168
|
+
producer:
|
169
|
+
name: 'Huawei'
|
170
|
+
url: 'https://www.huawei.com/'
|
171
|
+
|
172
|
+
- regex: 'Castro 2, Episode Duration Lookup'
|
173
|
+
name: 'Castro 2'
|
174
|
+
category: 'Service Agent'
|
175
|
+
url: 'http://supertop.co/castro/'
|
176
|
+
producer:
|
177
|
+
name: 'Supertop'
|
178
|
+
url: 'http://supertop.co'
|
71
179
|
|
72
180
|
- regex: 'Curious George'
|
73
181
|
name: 'Analytics SEO Crawler'
|
@@ -80,10 +188,10 @@
|
|
80
188
|
- regex: 'archive\.org_bot|special_archiver'
|
81
189
|
name: 'archive.org bot'
|
82
190
|
category: 'Crawler'
|
83
|
-
url: '
|
191
|
+
url: 'https://archive.org/details/archive.org_bot'
|
84
192
|
producer:
|
85
193
|
name: 'The Internet Archive'
|
86
|
-
url: '
|
194
|
+
url: 'https://archive.org'
|
87
195
|
|
88
196
|
- regex: 'Ask Jeeves/Teoma'
|
89
197
|
name: 'Ask Jeeves'
|
@@ -93,8 +201,8 @@
|
|
93
201
|
name: 'Ask Jeeves Inc.'
|
94
202
|
url: 'http://www.ask.com'
|
95
203
|
|
96
|
-
- regex: 'Backlink-
|
97
|
-
name: 'Backlink-
|
204
|
+
- regex: 'Backlink-Check\.de'
|
205
|
+
name: 'Backlink-Check.de'
|
98
206
|
category: 'Crawler'
|
99
207
|
url: 'http://www.backlink-check.de/bot.html'
|
100
208
|
producer:
|
@@ -109,7 +217,7 @@
|
|
109
217
|
name: '2.0Promotion GbR'
|
110
218
|
url: 'http://www.backlinktest.com'
|
111
219
|
|
112
|
-
- regex: '
|
220
|
+
- regex: 'Baidu.*spider|baidu Transcoder'
|
113
221
|
name: 'Baidu Spider'
|
114
222
|
category: 'Search bot'
|
115
223
|
url: 'http://www.baidu.com/search/spider.htm'
|
@@ -125,7 +233,15 @@
|
|
125
233
|
name: ''
|
126
234
|
url: ''
|
127
235
|
|
128
|
-
- regex: '
|
236
|
+
- regex: 'Better Uptime Bot'
|
237
|
+
name: 'Better Uptime Bot'
|
238
|
+
category: 'Site Monitor'
|
239
|
+
url: 'https://betteruptime.com/faq'
|
240
|
+
producer:
|
241
|
+
name: 'Better Uptime'
|
242
|
+
url: 'https://betteruptime.com/'
|
243
|
+
|
244
|
+
- regex: 'MSNBot|msrbot|bingbot|bingadsbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
129
245
|
name: 'BingBot'
|
130
246
|
category: 'Search bot'
|
131
247
|
url: 'http://search.msn.com/msnbot.htmn'
|
@@ -141,7 +257,7 @@
|
|
141
257
|
name: 'Blekko'
|
142
258
|
url: 'http://blekko.com'
|
143
259
|
|
144
|
-
- regex: 'BLEXBot
|
260
|
+
- regex: 'BLEXBot'
|
145
261
|
name: 'BLEXBot Crawler'
|
146
262
|
category: 'Crawler'
|
147
263
|
url: 'http://webmeup-crawler.com'
|
@@ -165,6 +281,13 @@
|
|
165
281
|
name: 'Blogtrottr Ltd'
|
166
282
|
url: 'https://blogtrottr.com/'
|
167
283
|
|
284
|
+
- regex: 'BoardReader Blog Indexer'
|
285
|
+
name: 'BoardReader Blog Indexer'
|
286
|
+
category: 'Crawler'
|
287
|
+
producer:
|
288
|
+
name: 'BoardReader'
|
289
|
+
url: 'https://boardreader.com/'
|
290
|
+
|
168
291
|
- regex: 'BountiiBot'
|
169
292
|
name: 'Bountii Bot'
|
170
293
|
category: 'Search bot'
|
@@ -186,8 +309,8 @@
|
|
186
309
|
category: 'Crawler'
|
187
310
|
url: 'http://law.di.unimi.it/BUbiNG.html'
|
188
311
|
producer:
|
189
|
-
name: ''
|
190
|
-
url: ''
|
312
|
+
name: 'The Laboratory for Web Algorithmics (LAW)'
|
313
|
+
url: 'http://law.di.unimi.it/software.php#buging'
|
191
314
|
|
192
315
|
- regex: '(?<!HTC)[ _]Butterfly/'
|
193
316
|
name: 'Butterfly Robot'
|
@@ -221,21 +344,93 @@
|
|
221
344
|
name: '10betterpages GmbH'
|
222
345
|
url: 'http://cliqz.com'
|
223
346
|
|
347
|
+
- regex: 'Cloudflare-AMP'
|
348
|
+
name: 'CloudFlare AMP Fetcher'
|
349
|
+
category: 'Crawler'
|
350
|
+
url: 'https://amp.cloudflare.com/doc/fetcher.html'
|
351
|
+
producer:
|
352
|
+
name: 'CloudFlare'
|
353
|
+
url: 'http://www.cloudflare.com'
|
354
|
+
|
355
|
+
- regex: 'Cloudflare-?Diagnostics'
|
356
|
+
name: 'Cloudflare Diagnostics'
|
357
|
+
category: 'Site Monitor'
|
358
|
+
url: 'https://www.cloudflare.com/'
|
359
|
+
producer:
|
360
|
+
name: 'Cloudflare'
|
361
|
+
url: 'https://www.cloudflare.com/'
|
362
|
+
|
224
363
|
- regex: 'CloudFlare-AlwaysOnline'
|
225
364
|
name: 'CloudFlare Always Online'
|
226
365
|
category: 'Site Monitor'
|
227
|
-
url: '
|
366
|
+
url: 'https://www.cloudflare.com/always-online'
|
228
367
|
producer:
|
229
368
|
name: 'CloudFlare'
|
230
|
-
url: '
|
369
|
+
url: 'https://www.cloudflare.com/'
|
370
|
+
|
371
|
+
- regex: 'Cloudflare-SSLDetector'
|
372
|
+
name: 'Cloudflare SSL Detector'
|
373
|
+
category: 'Site Monitor'
|
374
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
375
|
+
producer:
|
376
|
+
name: 'CloudFlare'
|
377
|
+
url: 'https://www.cloudflare.com/'
|
378
|
+
|
379
|
+
- regex: 'Cloudflare Custom Hostname Verification'
|
380
|
+
name: 'Cloudflare Custom Hostname Verification'
|
381
|
+
category: 'Service Agent'
|
382
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
383
|
+
producer:
|
384
|
+
name: 'CloudFlare'
|
385
|
+
url: 'https://www.cloudflare.com/'
|
386
|
+
|
387
|
+
- regex: 'Cloudflare-Traffic-Manager'
|
388
|
+
name: 'Cloudflare Traffic Manager'
|
389
|
+
category: 'Site Monitor'
|
390
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
391
|
+
producer:
|
392
|
+
name: 'CloudFlare'
|
393
|
+
url: 'https://www.cloudflare.com/'
|
394
|
+
|
395
|
+
- regex: 'Cloudflare-Smart-Transit'
|
396
|
+
name: 'Cloudflare Smart Transit'
|
397
|
+
category: 'Site Monitor'
|
398
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
399
|
+
producer:
|
400
|
+
name: 'CloudFlare'
|
401
|
+
url: 'https://www.cloudflare.com/'
|
402
|
+
|
403
|
+
- regex: 'CloudflareObservatory'
|
404
|
+
name: 'Cloudflare Observatory'
|
405
|
+
category: 'Site Monitor'
|
406
|
+
url: 'https://developers.cloudflare.com/speed/speed-test/run-speed-test'
|
407
|
+
producer:
|
408
|
+
name: 'CloudFlare'
|
409
|
+
url: 'https://www.cloudflare.com/'
|
410
|
+
|
411
|
+
- regex: 'https://developers\.cloudflare\.com/security-center/'
|
412
|
+
name: 'Cloudflare Security Insights'
|
413
|
+
category: 'Site Monitor'
|
414
|
+
url: 'https://developers.cloudflare.com/fundamentals/reference/cloudflare-site-crawling/'
|
415
|
+
producer:
|
416
|
+
name: 'CloudFlare'
|
417
|
+
url: 'https://www.cloudflare.com/'
|
231
418
|
|
232
|
-
- regex: 'coccoc
|
419
|
+
- regex: 'coccoc\.com'
|
233
420
|
name: 'Cốc Cốc Bot'
|
234
|
-
url: '
|
421
|
+
url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
|
235
422
|
category: 'Search bot'
|
236
423
|
producer:
|
237
424
|
name: 'Cốc Cốc'
|
238
|
-
url: '
|
425
|
+
url: 'https://coccoc.com/'
|
426
|
+
|
427
|
+
- regex: 'collectd'
|
428
|
+
name: 'Collectd'
|
429
|
+
url: 'https://collectd.org/'
|
430
|
+
category: 'Site Monitor'
|
431
|
+
producer:
|
432
|
+
name: 'Collectd'
|
433
|
+
url: 'https://collectd.org/'
|
239
434
|
|
240
435
|
- regex: 'CommaFeed'
|
241
436
|
name: 'CommaFeed'
|
@@ -245,7 +440,39 @@
|
|
245
440
|
name: ''
|
246
441
|
url: ''
|
247
442
|
|
248
|
-
- regex: '
|
443
|
+
- regex: 'CSS Certificate Spider'
|
444
|
+
name: 'CSS Certificate Spider'
|
445
|
+
category: 'Crawler'
|
446
|
+
url: 'http://www.css-security.com/certificatespider/'
|
447
|
+
producer:
|
448
|
+
name: 'Certified Security Solutions'
|
449
|
+
url: 'https://www.css-security.com/company/about-us/'
|
450
|
+
|
451
|
+
- regex: 'Datadog Agent|Datadog/?Synthetics'
|
452
|
+
name: 'Datadog Agent'
|
453
|
+
url: 'https://github.com/DataDog/dd-agent'
|
454
|
+
category: 'Site Monitor'
|
455
|
+
producer:
|
456
|
+
name: 'Datadog'
|
457
|
+
url: 'https://www.datadoghq.com/'
|
458
|
+
|
459
|
+
- regex: 'Datanyze'
|
460
|
+
name: 'Datanyze'
|
461
|
+
url: ''
|
462
|
+
category: 'Crawler'
|
463
|
+
producer:
|
464
|
+
name: 'Datanyze'
|
465
|
+
url: 'https://www.datanyze.com'
|
466
|
+
|
467
|
+
- regex: 'Dataprovider'
|
468
|
+
name: 'Dataprovider'
|
469
|
+
category: 'Crawler'
|
470
|
+
url: ''
|
471
|
+
producer:
|
472
|
+
name: 'Dataprovider B.V.'
|
473
|
+
url: 'https://www.dataprovider.com/'
|
474
|
+
|
475
|
+
- regex: 'Daum(?!(?:Apps|Device))'
|
249
476
|
name: 'Daum'
|
250
477
|
category: 'Search bot'
|
251
478
|
url: 'http://tab.search.daum.net/aboutWebSearch_en.html'
|
@@ -261,7 +488,7 @@
|
|
261
488
|
name: 'DAZOO.FR'
|
262
489
|
url: 'http://dazoo.fr'
|
263
490
|
|
264
|
-
- regex: 'discobot
|
491
|
+
- regex: 'discobot'
|
265
492
|
name: 'Discobot'
|
266
493
|
category: 'Search bot'
|
267
494
|
url: 'http://discoveryengine.com/discobot.html'
|
@@ -269,7 +496,7 @@
|
|
269
496
|
name: 'Discovery Engine'
|
270
497
|
url: 'http://discoveryengine.com'
|
271
498
|
|
272
|
-
- regex: 'Domain Re-Animator Bot|support@domainreanimator
|
499
|
+
- regex: 'Domain Re-Animator Bot|support@domainreanimator\.com'
|
273
500
|
name: 'Domain Re-Animator Bot'
|
274
501
|
category: 'Crawler'
|
275
502
|
url: ''
|
@@ -285,6 +512,14 @@
|
|
285
512
|
name: 'SEOmoz, Inc.'
|
286
513
|
url: 'http://moz.com/'
|
287
514
|
|
515
|
+
- regex: 'DuckDuck(?:Go-Favicons-)?Bot'
|
516
|
+
name: 'DuckDuckGo Bot'
|
517
|
+
category: 'Search bot'
|
518
|
+
url: 'https://duckduckgo.com/duckduckbot'
|
519
|
+
producer:
|
520
|
+
name: 'DuckDuckGo'
|
521
|
+
url: 'https://duckduckgo.com/'
|
522
|
+
|
288
523
|
- regex: 'EasouSpider'
|
289
524
|
name: 'Easou Spider'
|
290
525
|
category: 'Search bot'
|
@@ -293,6 +528,13 @@
|
|
293
528
|
name: 'easou ICP'
|
294
529
|
url: 'http://www.easou.com'
|
295
530
|
|
531
|
+
- regex: 'eCairn-Grabber'
|
532
|
+
name: 'eCairn-Grabber'
|
533
|
+
category: 'Crawler'
|
534
|
+
producer:
|
535
|
+
name: 'eCairn'
|
536
|
+
url: 'https://ecairn.com'
|
537
|
+
|
296
538
|
- regex: 'EMail Exractor'
|
297
539
|
name: 'EMail Exractor'
|
298
540
|
category: 'Crawler'
|
@@ -301,7 +543,15 @@
|
|
301
543
|
name: ''
|
302
544
|
url: ''
|
303
545
|
|
304
|
-
- regex: '
|
546
|
+
- regex: 'evc-batch'
|
547
|
+
name: 'evc-batch'
|
548
|
+
category: 'Crawler'
|
549
|
+
url: ''
|
550
|
+
producer:
|
551
|
+
name: 'eVenture Capital Partners II, LLC'
|
552
|
+
url: 'http://www.eventures.vc/'
|
553
|
+
|
554
|
+
- regex: 'Exabot|ExaleadCloudview'
|
305
555
|
name: 'ExaBot'
|
306
556
|
category: 'Crawler'
|
307
557
|
url: 'http://www.exabot.com/go/robot'
|
@@ -325,13 +575,21 @@
|
|
325
575
|
name: 'SEOmoz, Inc.'
|
326
576
|
url: 'http://moz.com/'
|
327
577
|
|
328
|
-
- regex: '
|
329
|
-
name: 'Facebook
|
578
|
+
- regex: 'facebook(?:catalog|externalhit|externalua|platform|scraper)'
|
579
|
+
name: 'Facebook Crawler'
|
330
580
|
category: 'Social Media Agent'
|
331
|
-
url: 'https://
|
581
|
+
url: 'https://developers.facebook.com/docs/sharing/webmasters/crawler/'
|
332
582
|
producer:
|
333
|
-
name: '
|
334
|
-
url: '
|
583
|
+
name: 'Meta Platforms, Inc.'
|
584
|
+
url: 'https://www.meta.com/'
|
585
|
+
|
586
|
+
- regex: 'FacebookBot/[\d.]+'
|
587
|
+
name: 'FacebookBot'
|
588
|
+
category: 'Crawler'
|
589
|
+
url: 'https://developers.facebook.com/docs/sharing/bot'
|
590
|
+
producer:
|
591
|
+
name: 'Meta Platforms, Inc.'
|
592
|
+
url: 'https://www.meta.com/'
|
335
593
|
|
336
594
|
- regex: 'Feedbin'
|
337
595
|
name: 'Feedbin'
|
@@ -357,7 +615,7 @@
|
|
357
615
|
name: 'David Smith & Developing Perspective, LLC'
|
358
616
|
url: 'https://david-smith.org'
|
359
617
|
|
360
|
-
- regex: '
|
618
|
+
- regex: 'Feedly'
|
361
619
|
name: 'Feedly'
|
362
620
|
url: 'http://www.feedly.com'
|
363
621
|
category: 'Feed Fetcher'
|
@@ -381,6 +639,24 @@
|
|
381
639
|
name: ''
|
382
640
|
url: ''
|
383
641
|
|
642
|
+
- regex: 'FlipboardProxy|FlipboardRSS'
|
643
|
+
name: 'Flipboard'
|
644
|
+
url: 'http://flipboard.com/browserproxy'
|
645
|
+
category: 'Feed Fetcher'
|
646
|
+
producer:
|
647
|
+
name: 'Flipboard'
|
648
|
+
url: 'http://flipboard.com/'
|
649
|
+
|
650
|
+
- regex: 'Findxbot'
|
651
|
+
name: 'Findxbot'
|
652
|
+
category: 'Crawler'
|
653
|
+
url: 'http://www.findxbot.com'
|
654
|
+
|
655
|
+
- regex: 'FreshRSS'
|
656
|
+
name: 'FreshRSS'
|
657
|
+
category: 'Feed Fetcher'
|
658
|
+
url: 'https://freshrss.org/'
|
659
|
+
|
384
660
|
- regex: 'Genieo'
|
385
661
|
name: 'Genieo Web filter'
|
386
662
|
category: ''
|
@@ -389,6 +665,14 @@
|
|
389
665
|
name: 'Genieo'
|
390
666
|
url: 'http://www.genieo.com'
|
391
667
|
|
668
|
+
- regex: 'GigablastOpenSource'
|
669
|
+
name: 'Gigablast'
|
670
|
+
category: 'Search bot'
|
671
|
+
url: 'https://github.com/gigablast/open-source-search-engine'
|
672
|
+
producer:
|
673
|
+
name: 'Matt Wells'
|
674
|
+
url: 'http://www.gigablast.com/faq.html'
|
675
|
+
|
392
676
|
- regex: 'Gluten Free Crawler'
|
393
677
|
name: 'Gluten Free Crawler'
|
394
678
|
category: 'Crawler'
|
@@ -397,6 +681,10 @@
|
|
397
681
|
name: ''
|
398
682
|
url: ''
|
399
683
|
|
684
|
+
- regex: 'gobuster'
|
685
|
+
name: 'Gobuster'
|
686
|
+
url: 'https://github.com/OJ/gobuster'
|
687
|
+
|
400
688
|
- regex: 'ichiro/mobile goo'
|
401
689
|
name: 'Goo'
|
402
690
|
category: 'Search bot'
|
@@ -405,13 +693,29 @@
|
|
405
693
|
name: 'NTT Resonant'
|
406
694
|
url: 'http://goo.ne.jp'
|
407
695
|
|
696
|
+
- regex: 'Storebot-Google'
|
697
|
+
name: 'Google StoreBot'
|
698
|
+
category: 'Crawler'
|
699
|
+
|
700
|
+
- regex: 'Google Favicon'
|
701
|
+
name: 'Google Favicon'
|
702
|
+
category: 'Crawler'
|
703
|
+
|
704
|
+
- regex: 'Google Search Console'
|
705
|
+
name: 'Google Search Console'
|
706
|
+
category: 'Crawler'
|
707
|
+
url: 'https://search.google.com/search-console/about'
|
708
|
+
producer:
|
709
|
+
name: 'Google Inc.'
|
710
|
+
url: 'https://www.google.com/'
|
711
|
+
|
408
712
|
- regex: 'Google Page Speed Insights'
|
409
713
|
name: 'Google PageSpeed Insights'
|
410
714
|
category: 'Site Monitor'
|
411
715
|
url: 'http://developers.google.com/speed/pagespeed/insights/'
|
412
716
|
producer:
|
413
717
|
name: 'Google Inc.'
|
414
|
-
url: '
|
718
|
+
url: 'https://www.google.com/'
|
415
719
|
|
416
720
|
- regex: 'google_partner_monitoring'
|
417
721
|
name: 'Google Partner Monitoring'
|
@@ -419,7 +723,39 @@
|
|
419
723
|
url: ''
|
420
724
|
producer:
|
421
725
|
name: 'Google Inc.'
|
422
|
-
url: '
|
726
|
+
url: 'https://www.google.com/'
|
727
|
+
|
728
|
+
- regex: 'Google-Cloud-Scheduler'
|
729
|
+
name: 'Google Cloud Scheduler'
|
730
|
+
category: 'Crawler'
|
731
|
+
url: 'https://cloud.google.com/scheduler'
|
732
|
+
producer:
|
733
|
+
name: 'Google Inc.'
|
734
|
+
url: 'https://www.google.com'
|
735
|
+
|
736
|
+
- regex: 'Google-Structured-Data-Testing-Tool'
|
737
|
+
name: 'Google Structured Data Testing Tool'
|
738
|
+
category: 'Validator'
|
739
|
+
url: 'https://search.google.com/structured-data/testing-tool'
|
740
|
+
producer:
|
741
|
+
name: 'Google Inc.'
|
742
|
+
url: 'https://www.google.com/'
|
743
|
+
|
744
|
+
- regex: 'GoogleStackdriverMonitoring'
|
745
|
+
name: 'Google Stackdriver Monitoring'
|
746
|
+
category: 'Site Monitor'
|
747
|
+
url: 'https://cloud.google.com/monitoring'
|
748
|
+
producer:
|
749
|
+
name: 'Google Inc.'
|
750
|
+
url: 'https://www.google.com'
|
751
|
+
|
752
|
+
- regex: 'Google-Transparency-Report'
|
753
|
+
name: 'Google Transparency Report'
|
754
|
+
category: 'Site Monitor'
|
755
|
+
url: 'https://transparencyreport.google.com/'
|
756
|
+
producer:
|
757
|
+
name: 'Google Inc.'
|
758
|
+
url: 'https://www.google.com/'
|
423
759
|
|
424
760
|
- regex: 'via ggpht\.com GoogleImageProxy'
|
425
761
|
name: 'Gmail Image Proxy'
|
@@ -427,15 +763,71 @@
|
|
427
763
|
url: ''
|
428
764
|
producer:
|
429
765
|
name: 'Google Inc.'
|
430
|
-
url: '
|
766
|
+
url: 'https://www.google.com/'
|
767
|
+
|
768
|
+
- regex: 'SeznamEmailProxy'
|
769
|
+
name: 'Seznam Email Proxy'
|
770
|
+
category: 'Crawler'
|
771
|
+
url: ''
|
772
|
+
producer:
|
773
|
+
name: 'Seznam.cz, a.s.'
|
774
|
+
url: 'http://www.seznam.cz/'
|
775
|
+
|
776
|
+
- regex: 'Seznam-Zbozi-robot'
|
777
|
+
name: 'Seznam Zbozi.cz'
|
778
|
+
category: 'Crawler'
|
779
|
+
url: ''
|
780
|
+
producer:
|
781
|
+
name: 'Seznam.cz, a.s.'
|
782
|
+
url: 'https://www.zbozi.cz/'
|
783
|
+
|
784
|
+
- regex: 'Heurekabot-Feed'
|
785
|
+
name: 'Heureka Feed'
|
786
|
+
category: 'Crawler'
|
787
|
+
url: 'https://sluzby.heureka.cz/napoveda/heurekabot/'
|
788
|
+
producer:
|
789
|
+
name: 'Heureka.cz, a.s.'
|
790
|
+
url: 'https://www.heureka.cz/'
|
791
|
+
|
792
|
+
- regex: 'ShopAlike'
|
793
|
+
name: 'ShopAlike'
|
794
|
+
category: 'Crawler'
|
795
|
+
url: ''
|
796
|
+
producer:
|
797
|
+
name: 'Visual Meta'
|
798
|
+
url: 'https://www.shopalike.cz/'
|
799
|
+
|
800
|
+
- regex: 'Googlebot-News'
|
801
|
+
name: 'Googlebot News'
|
802
|
+
category: 'Search bot'
|
803
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
804
|
+
producer:
|
805
|
+
name: 'Google Inc.'
|
806
|
+
url: 'https://www.google.com/'
|
807
|
+
|
808
|
+
- regex: 'Adwords-(?:DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(?:adstxt|Ads-Conversions|Ads-Qualify|Adwords|AMPHTML|Assess|Extended|HotelAdsVerifier|InspectionTool|Lens|PageRenderer|Read-Aloud|Safety|Shopping-Quality|Site-Verification|Sites-Thumbnails|speakr|Stale-Content-Probe|Test|Youtube-Links)|(?:AdsBot|APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google(?:-Mobile)?|Google(?:AdSenseInfeed|AssociationService|bot|Other|Prober|Producer|Sites)|Google.*/\+/web/snippet'
|
809
|
+
name: 'Googlebot'
|
810
|
+
category: 'Search bot'
|
811
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
812
|
+
producer:
|
813
|
+
name: 'Google Inc.'
|
814
|
+
url: 'https://www.google.com/'
|
431
815
|
|
432
|
-
- regex: '
|
816
|
+
- regex: '^Google$'
|
433
817
|
name: 'Googlebot'
|
434
818
|
category: 'Search bot'
|
435
|
-
url: '
|
819
|
+
url: 'https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers'
|
820
|
+
producer:
|
821
|
+
name: 'Google Inc.'
|
822
|
+
url: 'https://www.google.com/'
|
823
|
+
|
824
|
+
- regex: 'Google-Area120-PrivacyPolicyFetcher'
|
825
|
+
name: 'Google Area 120 Privacy Policy Fetcher'
|
826
|
+
category: 'Crawler'
|
827
|
+
url: 'https://area120.google.com/'
|
436
828
|
producer:
|
437
829
|
name: 'Google Inc.'
|
438
|
-
url: '
|
830
|
+
url: 'https://www.google.com/'
|
439
831
|
|
440
832
|
- regex: 'heritrix'
|
441
833
|
name: 'Heritrix'
|
@@ -443,9 +835,21 @@
|
|
443
835
|
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
|
444
836
|
producer:
|
445
837
|
name: 'The Internet Archive'
|
446
|
-
url: '
|
838
|
+
url: 'https://archive.org'
|
839
|
+
|
840
|
+
- regex: 'HubSpot '
|
841
|
+
name: 'HubSpot'
|
842
|
+
category: 'Crawler'
|
843
|
+
producer:
|
844
|
+
name: 'HubSpot Inc.'
|
845
|
+
url: 'https://www.hubspot.com'
|
447
846
|
|
448
|
-
- regex: '
|
847
|
+
- regex: 'vuhuvBot'
|
848
|
+
name: 'Vuhuv Bot'
|
849
|
+
category: 'Crawler'
|
850
|
+
url: 'http://vuhuv.com/bot.html'
|
851
|
+
|
852
|
+
- regex: 'HTTPMon/[\d.]+'
|
449
853
|
name: 'HTTPMon'
|
450
854
|
category: 'Site Monitor'
|
451
855
|
url: 'http://www.httpmon.com'
|
@@ -461,6 +865,11 @@
|
|
461
865
|
name: ''
|
462
866
|
url: ''
|
463
867
|
|
868
|
+
- regex: 'inoreader\.com'
|
869
|
+
name: 'inoreader'
|
870
|
+
category: 'Feed Reader'
|
871
|
+
url: 'https://www.inoreader.com'
|
872
|
+
|
464
873
|
- regex: 'iisbot'
|
465
874
|
name: 'IIS Site Analysis'
|
466
875
|
category: 'Crawler'
|
@@ -469,6 +878,25 @@
|
|
469
878
|
name: 'Microsoft Corporation'
|
470
879
|
url: 'http://www.microsoft.com'
|
471
880
|
|
881
|
+
- regex: 'ips-agent'
|
882
|
+
name: 'IPS Agent'
|
883
|
+
category: 'Crawler'
|
884
|
+
producer:
|
885
|
+
name: 'VeriSign, Inc'
|
886
|
+
url: 'http://www.verisign.com/'
|
887
|
+
|
888
|
+
- regex: 'IP-Guide\.com'
|
889
|
+
name: 'IP-Guide Crawler'
|
890
|
+
category: 'Crawler'
|
891
|
+
url: ''
|
892
|
+
producer:
|
893
|
+
name: ''
|
894
|
+
url: 'https://ip-guide.com'
|
895
|
+
|
896
|
+
- regex: 'k6/[0-9\.]+'
|
897
|
+
name: 'K6'
|
898
|
+
url: 'https://k6.io/'
|
899
|
+
|
472
900
|
- regex: 'kouio'
|
473
901
|
name: 'Kouio'
|
474
902
|
url: 'http://kouio.com/'
|
@@ -485,7 +913,23 @@
|
|
485
913
|
name: ''
|
486
914
|
url: ''
|
487
915
|
|
488
|
-
- regex: '
|
916
|
+
- regex: '[A-z0-9]*-Lighthouse'
|
917
|
+
name: 'Lighthouse'
|
918
|
+
category: 'Site Monitor'
|
919
|
+
url: 'https://developers.google.com/web/tools/lighthouse'
|
920
|
+
producer:
|
921
|
+
name: 'Lighthouse'
|
922
|
+
url: 'https://developers.google.com/web/tools/lighthouse'
|
923
|
+
|
924
|
+
- regex: 'last-modified\.com'
|
925
|
+
name: 'LastMod Bot'
|
926
|
+
category: 'Site Monitor'
|
927
|
+
url: 'https://last-modified.com/en/about'
|
928
|
+
producer:
|
929
|
+
name: ''
|
930
|
+
url: 'https://last-modified.com/en'
|
931
|
+
|
932
|
+
- regex: 'linkdexbot|linkdex\.com'
|
489
933
|
name: 'Linkdex Bot'
|
490
934
|
category: 'Search bot'
|
491
935
|
url: 'http://www.linkdex.com/bots'
|
@@ -503,12 +947,13 @@
|
|
503
947
|
|
504
948
|
- regex: 'ltx71'
|
505
949
|
name: 'LTX71'
|
506
|
-
|
950
|
+
category: 'Security Checker'
|
951
|
+
url: 'https://ltx71.com/'
|
507
952
|
producer:
|
508
953
|
name: ''
|
509
954
|
url: ''
|
510
955
|
|
511
|
-
- regex: 'Mail\.RU
|
956
|
+
- regex: 'Mail\.RU'
|
512
957
|
name: 'Mail.Ru Bot'
|
513
958
|
category: 'Search bot'
|
514
959
|
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
|
@@ -532,6 +977,26 @@
|
|
532
977
|
name: ''
|
533
978
|
url: ''
|
534
979
|
|
980
|
+
- regex: 'masscan-ng/[\d.]+'
|
981
|
+
name: 'masscan-ng'
|
982
|
+
url: 'https://github.com/bi-zone/masscan-ng'
|
983
|
+
category: 'Crawler'
|
984
|
+
producer:
|
985
|
+
name: 'BIZON, OOO'
|
986
|
+
url: 'https://bi.zone/'
|
987
|
+
|
988
|
+
- regex: '.*masscan'
|
989
|
+
name: 'masscan'
|
990
|
+
url: 'https://github.com/robertdavidgraham/masscan'
|
991
|
+
category: 'Crawler'
|
992
|
+
producer:
|
993
|
+
name: 'Robert Graham'
|
994
|
+
url: 'https://github.com/robertdavidgraham'
|
995
|
+
|
996
|
+
- regex: 'Mastodon/'
|
997
|
+
name: 'Mastodon Bot'
|
998
|
+
category: 'Social Media Agent'
|
999
|
+
|
535
1000
|
- regex: 'meanpathbot'
|
536
1001
|
name: 'Meanpath Bot'
|
537
1002
|
category: 'Search bot'
|
@@ -540,6 +1005,19 @@
|
|
540
1005
|
name: 'Meanpath'
|
541
1006
|
url: 'http://www.meanpath.com'
|
542
1007
|
|
1008
|
+
- regex: 'MetaJobBot'
|
1009
|
+
name: 'MetaJobBot'
|
1010
|
+
category: 'Crawler'
|
1011
|
+
url: 'http://www.metajob.at/the/crawler'
|
1012
|
+
producer:
|
1013
|
+
name: 'MetaJob'
|
1014
|
+
url: 'http://www.metajob.at'
|
1015
|
+
|
1016
|
+
- regex: 'MetaInspector'
|
1017
|
+
name: 'MetaInspector'
|
1018
|
+
category: 'Crawler'
|
1019
|
+
url: 'https://github.com/jaimeiniesta/metainspector'
|
1020
|
+
|
543
1021
|
- regex: 'MixrankBot'
|
544
1022
|
name: 'Mixrank Bot'
|
545
1023
|
category: 'Crawler'
|
@@ -556,6 +1034,13 @@
|
|
556
1034
|
name: 'Majestic-12'
|
557
1035
|
url: 'http://majestic12.co.uk'
|
558
1036
|
|
1037
|
+
- regex: 'Mnogosearch'
|
1038
|
+
name: 'Mnogosearch'
|
1039
|
+
category: 'Search bot'
|
1040
|
+
url: 'http://www.mnogosearch.org/'
|
1041
|
+
producer:
|
1042
|
+
name: 'Lavtech.Com Corp.'
|
1043
|
+
url: ''
|
559
1044
|
- regex: 'MojeekBot'
|
560
1045
|
name: 'MojeekBot'
|
561
1046
|
category: 'Search bot'
|
@@ -564,6 +1049,14 @@
|
|
564
1049
|
name: 'Mojeek Ltd.'
|
565
1050
|
url: 'http://www.mojeek.com'
|
566
1051
|
|
1052
|
+
- regex: 'munin'
|
1053
|
+
name: 'Munin'
|
1054
|
+
category: 'Site Monitor'
|
1055
|
+
url: 'http://munin-monitoring.org/'
|
1056
|
+
producer:
|
1057
|
+
name: 'Munin'
|
1058
|
+
url: 'http://munin-monitoring.org/'
|
1059
|
+
|
567
1060
|
- regex: 'NalezenCzBot'
|
568
1061
|
name: 'NalezenCzBot'
|
569
1062
|
category: 'Crawler'
|
@@ -572,7 +1065,19 @@
|
|
572
1065
|
name: 'Jaroslav Kuboš'
|
573
1066
|
url: ''
|
574
1067
|
|
575
|
-
- regex: '
|
1068
|
+
- regex: 'check_http/v'
|
1069
|
+
name: 'Nagios check_http'
|
1070
|
+
category: 'Site Monitor'
|
1071
|
+
url: 'https://nagios.org'
|
1072
|
+
producer:
|
1073
|
+
name: 'Nagios Plugins Development Team'
|
1074
|
+
url: 'https://nagios.org'
|
1075
|
+
|
1076
|
+
- regex: 'nbertaupete95\(at\)gmail\.com'
|
1077
|
+
name: 'nbertaupete95'
|
1078
|
+
category: 'Crawler'
|
1079
|
+
|
1080
|
+
- regex: 'Netcraft(?: Web Server Survey| SSL Server Survey|SurveyAgent)'
|
576
1081
|
name: 'Netcraft Survey Bot'
|
577
1082
|
category: 'Search bot'
|
578
1083
|
url: ''
|
@@ -580,6 +1085,14 @@
|
|
580
1085
|
name: 'Netcraft'
|
581
1086
|
url: 'http://www.netcraft.com'
|
582
1087
|
|
1088
|
+
- regex: 'netEstate NE Crawler'
|
1089
|
+
name: 'netEstate'
|
1090
|
+
category: 'Crawler'
|
1091
|
+
url: 'http://www.website-datenbank.de/Impressum'
|
1092
|
+
producer:
|
1093
|
+
name: 'netEstate GmbH'
|
1094
|
+
url: 'https://www.netestate.de/en/'
|
1095
|
+
|
583
1096
|
- regex: 'Netvibes'
|
584
1097
|
name: 'Netvibes'
|
585
1098
|
url: 'http://www.netvibes.com/'
|
@@ -588,7 +1101,7 @@
|
|
588
1101
|
name: ''
|
589
1102
|
url: ''
|
590
1103
|
|
591
|
-
- regex: 'NewsBlur .*(Fetcher|Finder)'
|
1104
|
+
- regex: 'NewsBlur .*(?:Fetcher|Finder)'
|
592
1105
|
name: 'NewsBlur'
|
593
1106
|
url: 'http://www.newsblur.com'
|
594
1107
|
category: 'Feed Fetcher'
|
@@ -612,7 +1125,41 @@
|
|
612
1125
|
name: 'Northern Light'
|
613
1126
|
url: 'http://northernlight.com'
|
614
1127
|
|
615
|
-
- regex: '
|
1128
|
+
- regex: 'Nmap Scripting Engine'
|
1129
|
+
name: 'Nmap'
|
1130
|
+
category: 'Security Checker'
|
1131
|
+
url: 'https://nmap.org/book/nse.html'
|
1132
|
+
producer:
|
1133
|
+
name: 'Nmap'
|
1134
|
+
url: 'https://nmap.org/'
|
1135
|
+
|
1136
|
+
- regex: 'Nuzzel'
|
1137
|
+
name: 'Nuzzel'
|
1138
|
+
category: 'Crawler'
|
1139
|
+
producer:
|
1140
|
+
name: 'Nuzzel'
|
1141
|
+
url: 'https://www.nuzzel.com/'
|
1142
|
+
|
1143
|
+
- regex: 'NodePing'
|
1144
|
+
name: 'NodePing'
|
1145
|
+
category: 'Site Monitor'
|
1146
|
+
url: 'https://nodeping.com'
|
1147
|
+
producer:
|
1148
|
+
name: 'NodePing'
|
1149
|
+
url: 'https://nodeping.com'
|
1150
|
+
|
1151
|
+
- regex: 'Octopus [0-9]'
|
1152
|
+
name: 'Octopus'
|
1153
|
+
|
1154
|
+
- regex: 'OnlineOrNot\.com_bot'
|
1155
|
+
name: 'OnlineOrNot Bot'
|
1156
|
+
category: 'Site Monitor'
|
1157
|
+
url: 'https://onlineornot.com/website-monitoring'
|
1158
|
+
producer:
|
1159
|
+
name: 'OnlineOrNot'
|
1160
|
+
url: 'https://onlineornot.com'
|
1161
|
+
|
1162
|
+
- regex: 'omgili'
|
616
1163
|
name: 'Omgili bot'
|
617
1164
|
category: 'Search bot'
|
618
1165
|
url: 'http://www.omgili.com/Crawler.html'
|
@@ -660,6 +1207,11 @@
|
|
660
1207
|
name: 'Smallrivers SA'
|
661
1208
|
url: 'http://www.paper.li'
|
662
1209
|
|
1210
|
+
- regex: 'phantomas/'
|
1211
|
+
name: 'Phantomas'
|
1212
|
+
category: 'Site Monitor'
|
1213
|
+
url: 'https://github.com/macbre/phantomas'
|
1214
|
+
|
663
1215
|
- regex: 'phpservermon'
|
664
1216
|
name: 'PHP Server Monitor'
|
665
1217
|
category: 'Site Monitor'
|
@@ -668,7 +1220,31 @@
|
|
668
1220
|
name: 'PHP Server Monitor'
|
669
1221
|
url: 'http://www.phpservermonitor.org/'
|
670
1222
|
|
671
|
-
- regex: '
|
1223
|
+
- regex: 'Pocket(?:ImageCache|Parser)/[\d.]+'
|
1224
|
+
name: 'Pocket'
|
1225
|
+
category: 'Read-it-later Service'
|
1226
|
+
url: 'https://getpocket.com/pocketparser_ua'
|
1227
|
+
producer:
|
1228
|
+
name: 'Read It Later, Inc.'
|
1229
|
+
url: 'https://getpocket.com/'
|
1230
|
+
|
1231
|
+
- regex: 'PritTorrent'
|
1232
|
+
name: 'PritTorrent'
|
1233
|
+
category: 'Crawler'
|
1234
|
+
url: 'https://github.com/astro/prittorrent'
|
1235
|
+
producer:
|
1236
|
+
name: 'Bitlove'
|
1237
|
+
url: 'http://bitlove.org/'
|
1238
|
+
|
1239
|
+
- regex: 'PRTG Network Monitor'
|
1240
|
+
name: 'PRTG Network Monitor'
|
1241
|
+
category: 'Network Monitor'
|
1242
|
+
url: 'https://www.paessler.com/prtg'
|
1243
|
+
producer:
|
1244
|
+
name: 'Paessler AG'
|
1245
|
+
url: 'https://www.paessler.com'
|
1246
|
+
|
1247
|
+
- regex: 'psbot'
|
672
1248
|
name: 'Picsearch bot'
|
673
1249
|
category: 'Search bot'
|
674
1250
|
url: 'http://www.picsearch.com/bot.html'
|
@@ -676,7 +1252,7 @@
|
|
676
1252
|
name: 'Picsearch'
|
677
1253
|
url: 'http://www.picsearch.com'
|
678
1254
|
|
679
|
-
- regex: 'Pingdom
|
1255
|
+
- regex: 'Pingdom(?:\.com|TMS)'
|
680
1256
|
name: 'Pingdom Bot'
|
681
1257
|
category: 'Site Monitor'
|
682
1258
|
url: ''
|
@@ -684,14 +1260,51 @@
|
|
684
1260
|
name: 'Pingdom AB'
|
685
1261
|
url: 'https://www.pingdom.com'
|
686
1262
|
|
687
|
-
- regex: '
|
688
|
-
name: '
|
1263
|
+
- regex: 'Quora Link Preview'
|
1264
|
+
name: 'Quora Link Preview'
|
689
1265
|
category: 'Crawler'
|
690
|
-
url: '
|
1266
|
+
url: ''
|
691
1267
|
producer:
|
692
|
-
name: '
|
1268
|
+
name: 'Quora'
|
1269
|
+
url: 'http://www.quora.com'
|
1270
|
+
|
1271
|
+
- regex: 'Quora-Bot'
|
1272
|
+
name: 'Quora Bot'
|
1273
|
+
category: 'Crawler'
|
1274
|
+
url: ''
|
1275
|
+
producer:
|
1276
|
+
name: 'Quora'
|
1277
|
+
url: 'https://www.quora.com/'
|
1278
|
+
|
1279
|
+
- regex: 'RamblerMail'
|
1280
|
+
name: 'RamblerMail Image Proxy'
|
1281
|
+
category: 'Crawler'
|
1282
|
+
url: ''
|
1283
|
+
producer:
|
1284
|
+
name: 'Rambler&Co'
|
1285
|
+
url: 'https://rambler-co.ru/'
|
1286
|
+
|
1287
|
+
- regex: 'QuerySeekerSpider'
|
1288
|
+
name: 'QuerySeekerSpider'
|
1289
|
+
category: 'Crawler'
|
1290
|
+
url: 'http://queryseeker.com/bot.html'
|
1291
|
+
producer:
|
1292
|
+
name: 'QueryEye Inc.'
|
693
1293
|
url: 'http://queryeye.com'
|
694
1294
|
|
1295
|
+
- regex: 'Qwantify'
|
1296
|
+
name: 'Qwantify'
|
1297
|
+
category: 'Crawler'
|
1298
|
+
url: 'https://www.qwant.com/'
|
1299
|
+
producer:
|
1300
|
+
name: 'Qwant Corporation'
|
1301
|
+
url: 'https://www.qwant.com/'
|
1302
|
+
|
1303
|
+
- regex: 'Rainmeter'
|
1304
|
+
name: 'Rainmeter'
|
1305
|
+
category: 'Crawler'
|
1306
|
+
url: 'https://www.rainmeter.net'
|
1307
|
+
|
695
1308
|
- regex: 'redditbot'
|
696
1309
|
name: 'Reddit Bot'
|
697
1310
|
category: 'Social Media Agent'
|
@@ -700,6 +1313,14 @@
|
|
700
1313
|
name: 'reddit inc.'
|
701
1314
|
url: 'http://www.reddit.com'
|
702
1315
|
|
1316
|
+
- regex: 'Riddler'
|
1317
|
+
name: 'Riddler'
|
1318
|
+
category: 'Security search bot'
|
1319
|
+
url: 'https://riddler.io/about'
|
1320
|
+
producer:
|
1321
|
+
name: 'F-Secure'
|
1322
|
+
url: 'https://www.f-secure.com'
|
1323
|
+
|
703
1324
|
- regex: 'rogerbot'
|
704
1325
|
name: 'Rogerbot'
|
705
1326
|
category: 'Crawler'
|
@@ -716,6 +1337,14 @@
|
|
716
1337
|
name: 'Roihunter a.s.'
|
717
1338
|
url: 'http://roihunter.com/'
|
718
1339
|
|
1340
|
+
- regex: 'SafeDNSBot'
|
1341
|
+
name: 'SafeDNSBot'
|
1342
|
+
category: 'Crawler'
|
1343
|
+
url: 'https://www.safedns.com/searchbot'
|
1344
|
+
producer:
|
1345
|
+
name: 'SafeDNS, Inc.'
|
1346
|
+
url: 'https://www.safedns.com/'
|
1347
|
+
|
719
1348
|
- regex: 'Scrapy'
|
720
1349
|
name: 'Scrapy'
|
721
1350
|
category: 'Crawler'
|
@@ -738,12 +1367,36 @@
|
|
738
1367
|
url: ''
|
739
1368
|
|
740
1369
|
- regex: 'SemrushBot'
|
741
|
-
name: '
|
1370
|
+
name: 'SemrushBot'
|
1371
|
+
category: 'Crawler'
|
1372
|
+
url: 'https://www.semrush.com/bot/'
|
1373
|
+
producer:
|
1374
|
+
name: 'Semrush Inc.'
|
1375
|
+
url: 'https://www.semrush.com/'
|
1376
|
+
|
1377
|
+
- regex: 'SerpReputationManagementAgent/[\d.]+'
|
1378
|
+
name: 'Semrush Reputation Management'
|
1379
|
+
category: 'Service Agent'
|
1380
|
+
url: 'https://www.semrush.com/bot/'
|
1381
|
+
producer:
|
1382
|
+
name: 'Semrush Inc.'
|
1383
|
+
url: 'https://www.semrush.com/'
|
1384
|
+
|
1385
|
+
- regex: 'SplitSignalBot'
|
1386
|
+
name: 'SplitSignalBot'
|
1387
|
+
category: 'Crawler'
|
1388
|
+
url: 'https://www.semrush.com/bot/'
|
1389
|
+
producer:
|
1390
|
+
name: 'Semrush Inc.'
|
1391
|
+
url: 'https://www.semrush.com/'
|
1392
|
+
|
1393
|
+
- regex: 'SiteAuditBot/[\d.]+'
|
1394
|
+
name: 'SiteAuditBot'
|
742
1395
|
category: 'Crawler'
|
743
|
-
url: '
|
1396
|
+
url: 'https://www.semrush.com/bot/'
|
744
1397
|
producer:
|
745
|
-
name: '
|
746
|
-
url: '
|
1398
|
+
name: 'Semrush Inc.'
|
1399
|
+
url: 'https://www.semrush.com/'
|
747
1400
|
|
748
1401
|
- regex: 'SensikaBot'
|
749
1402
|
name: 'Sensika Bot'
|
@@ -753,7 +1406,7 @@
|
|
753
1406
|
name: 'Sensika'
|
754
1407
|
url: 'http://sensika.com'
|
755
1408
|
|
756
|
-
- regex: 'SEOENG(World)?Bot'
|
1409
|
+
- regex: 'SEOENG(?:World)?Bot'
|
757
1410
|
name: 'SEOENGBot'
|
758
1411
|
category: 'Crawler'
|
759
1412
|
url: 'http://www.seoengine.com/seoengbot.htm'
|
@@ -761,6 +1414,19 @@
|
|
761
1414
|
name: 'SEO Engine'
|
762
1415
|
url: 'http://www.seoengine.com'
|
763
1416
|
|
1417
|
+
- regex: 'SEOkicks-Robot'
|
1418
|
+
name: 'SEOkicks-Robot'
|
1419
|
+
category: 'Crawler'
|
1420
|
+
url: 'http://www.seokicks.de/robot.html'
|
1421
|
+
producer:
|
1422
|
+
name: 'SEOkicks'
|
1423
|
+
url: 'https://www.seokicks.de/'
|
1424
|
+
|
1425
|
+
- regex: 'seoscanners\.net'
|
1426
|
+
name: 'Seoscanners.net'
|
1427
|
+
category: 'Crawler'
|
1428
|
+
url: ''
|
1429
|
+
|
764
1430
|
- regex: 'SkypeUriPreview'
|
765
1431
|
name: 'Skype URI Preview'
|
766
1432
|
category: 'Service Agent'
|
@@ -777,6 +1443,14 @@
|
|
777
1443
|
name: 'Seznam.cz, a.s.'
|
778
1444
|
url: 'http://www.seznam.cz/'
|
779
1445
|
|
1446
|
+
- regex: 'shopify-partner-homepage-scraper'
|
1447
|
+
name: 'Shopify Partner'
|
1448
|
+
category: 'Crawler'
|
1449
|
+
url: 'https://www.shopify.com/partners'
|
1450
|
+
producer:
|
1451
|
+
name: 'Shopify'
|
1452
|
+
url: 'https://www.shopify.com/'
|
1453
|
+
|
780
1454
|
- regex: 'ShopWiki'
|
781
1455
|
name: 'ShopWiki'
|
782
1456
|
category: 'Search tools'
|
@@ -809,6 +1483,27 @@
|
|
809
1483
|
name: 'SISTRIX GmbH'
|
810
1484
|
url: 'http://www.sistrix.de'
|
811
1485
|
|
1486
|
+
- regex: 'compatible; (?:SISTRIX )?Optimizer'
|
1487
|
+
name: 'SISTRIX Optimizer'
|
1488
|
+
category: 'Crawler'
|
1489
|
+
url: 'https://optimizer.sistrix.com'
|
1490
|
+
producer:
|
1491
|
+
name: 'SISTRIX GmbH'
|
1492
|
+
url: 'http://www.sistrix.de'
|
1493
|
+
|
1494
|
+
- regex: 'SiteSucker'
|
1495
|
+
name: 'SiteSucker'
|
1496
|
+
category: 'Crawler'
|
1497
|
+
url: 'http://ricks-apps.com/osx/sitesucker/'
|
1498
|
+
|
1499
|
+
- regex: 'sixy\.ch'
|
1500
|
+
name: 'Sixy.ch'
|
1501
|
+
category: 'Site Monitor'
|
1502
|
+
url: 'http://sixy.ch'
|
1503
|
+
producer:
|
1504
|
+
name: 'Manuel Kasper'
|
1505
|
+
url: 'https://neon1.net/'
|
1506
|
+
|
812
1507
|
- regex: 'Slackbot|Slack-ImgProxy'
|
813
1508
|
name: 'Slackbot'
|
814
1509
|
category: 'Crawler'
|
@@ -817,7 +1512,7 @@
|
|
817
1512
|
name: 'Slack Technologies'
|
818
1513
|
url: 'http://slack.com'
|
819
1514
|
|
820
|
-
- regex: '
|
1515
|
+
- regex: 'Sogou[ -](?:head|inst|Orion|Pic|Test|web)[ -]spider|New-Sogou-Spider'
|
821
1516
|
name: 'Sogou Spider'
|
822
1517
|
category: 'Search bot'
|
823
1518
|
url: 'http://www.sogou.com/docs/help/webmasters.htm'
|
@@ -833,6 +1528,38 @@
|
|
833
1528
|
name: 'Tencent Holdings'
|
834
1529
|
url: 'http://www.soso.com'
|
835
1530
|
|
1531
|
+
- regex: 'Sprinklr'
|
1532
|
+
name: 'Sprinklr'
|
1533
|
+
category: 'Crawler'
|
1534
|
+
url: ''
|
1535
|
+
producer:
|
1536
|
+
name: 'Sprinklr, Inc.'
|
1537
|
+
url: 'https://www.sprinklr.com/'
|
1538
|
+
|
1539
|
+
- regex: 'sqlmap/'
|
1540
|
+
name: 'sqlmap'
|
1541
|
+
category: 'Security Checker'
|
1542
|
+
url: 'http://sqlmap.org/'
|
1543
|
+
producer:
|
1544
|
+
name: 'sqlmap'
|
1545
|
+
url: 'http://sqlmap.org/'
|
1546
|
+
|
1547
|
+
- regex: 'SSL Labs'
|
1548
|
+
name: 'SSL Labs'
|
1549
|
+
category: 'Validator'
|
1550
|
+
url: 'https://www.ssllabs.com/about/assessment.html'
|
1551
|
+
producer:
|
1552
|
+
name: 'SSL Labs'
|
1553
|
+
url: 'https://www.ssllabs.com/about/assessment.html'
|
1554
|
+
|
1555
|
+
- regex: 'StatusCake'
|
1556
|
+
name: 'StatusCake'
|
1557
|
+
category: 'Site Monitor'
|
1558
|
+
url: 'https://www.statuscake.com'
|
1559
|
+
producer:
|
1560
|
+
name: 'StatusCake'
|
1561
|
+
url: 'https://www.statuscake.com'
|
1562
|
+
|
836
1563
|
- regex: 'Superfeedr bot'
|
837
1564
|
name: 'Superfeedr Bot'
|
838
1565
|
category: 'Feed Fetcher'
|
@@ -841,6 +1568,11 @@
|
|
841
1568
|
name: 'Superfeedr'
|
842
1569
|
url: 'https://superfeedr.com/'
|
843
1570
|
|
1571
|
+
- regex: 'Sparkler/[0-9]'
|
1572
|
+
name: 'Sparkler'
|
1573
|
+
category: 'Crawler'
|
1574
|
+
url: 'https://github.com/USCDataScience/sparkler'
|
1575
|
+
|
844
1576
|
- regex: 'Spinn3r'
|
845
1577
|
name: 'Spinn3r'
|
846
1578
|
category: 'Crawler'
|
@@ -849,13 +1581,20 @@
|
|
849
1581
|
name: 'Tailrank Inc'
|
850
1582
|
url: 'http://spinn3r.com'
|
851
1583
|
|
852
|
-
- regex: '
|
1584
|
+
- regex: 'SputnikBot'
|
853
1585
|
name: 'Sputnik Bot'
|
854
|
-
category: ''
|
1586
|
+
category: 'Crawler'
|
1587
|
+
url: ''
|
1588
|
+
|
1589
|
+
- regex: 'SputnikFaviconBot'
|
1590
|
+
name: 'Sputnik Favicon Bot'
|
1591
|
+
category: 'Crawler'
|
1592
|
+
url: ''
|
1593
|
+
|
1594
|
+
- regex: 'SputnikImageBot'
|
1595
|
+
name: 'Sputnik Image Bot'
|
1596
|
+
category: 'Crawler'
|
855
1597
|
url: ''
|
856
|
-
producer:
|
857
|
-
name: ''
|
858
|
-
url: ''
|
859
1598
|
|
860
1599
|
- regex: 'SurveyBot'
|
861
1600
|
name: 'Survey Bot'
|
@@ -865,10 +1604,23 @@
|
|
865
1604
|
name: 'Domain Tools'
|
866
1605
|
url: 'http://www.domaintools.com'
|
867
1606
|
|
1607
|
+
- regex: 'TarmotGezgin'
|
1608
|
+
name: 'Tarmot Gezgin'
|
1609
|
+
url: 'http://www.tarmot.com/gezgin/'
|
1610
|
+
category: 'Search bot'
|
1611
|
+
|
868
1612
|
- regex: 'TelegramBot'
|
869
|
-
name: '
|
1613
|
+
name: 'TelegramBot'
|
870
1614
|
url: 'https://telegram.org/blog/bot-revolution'
|
871
1615
|
|
1616
|
+
- regex: 'TLSProbe'
|
1617
|
+
name: 'TLSProbe'
|
1618
|
+
url: 'https://scan.trustnet.venafi.com/'
|
1619
|
+
category: 'Security search bot'
|
1620
|
+
producer:
|
1621
|
+
name: 'Venafi TrustNet'
|
1622
|
+
url: 'https://www.venafi.com'
|
1623
|
+
|
872
1624
|
- regex: 'TinEye-bot'
|
873
1625
|
name: 'TinEye Crawler'
|
874
1626
|
category: 'Search bot'
|
@@ -885,6 +1637,27 @@
|
|
885
1637
|
name: ''
|
886
1638
|
url: ''
|
887
1639
|
|
1640
|
+
- regex: 'theoldreader\.com'
|
1641
|
+
name: 'theoldreader'
|
1642
|
+
category: 'Feed Reader'
|
1643
|
+
url: 'https://theoldreader.com'
|
1644
|
+
|
1645
|
+
- regex: 'Trackable/0\.1'
|
1646
|
+
name: 'Chartable'
|
1647
|
+
category: 'Site Monitor'
|
1648
|
+
url: 'https://help.chartable.com/article/34-what-is-the-trackable-analytics-prefix'
|
1649
|
+
producer:
|
1650
|
+
name: 'Chartable'
|
1651
|
+
url: 'https://chartable.com'
|
1652
|
+
|
1653
|
+
- regex: 'trendictionbot'
|
1654
|
+
name: 'Trendiction Bot'
|
1655
|
+
category: 'Crawler'
|
1656
|
+
url: 'http://www.trendiction.de/bot'
|
1657
|
+
producer:
|
1658
|
+
name: 'Talkwalker Inc.'
|
1659
|
+
url: 'http://www.talkwalker.com'
|
1660
|
+
|
888
1661
|
- regex: 'TurnitinBot'
|
889
1662
|
name: 'TurnitinBot'
|
890
1663
|
category: 'Crawler'
|
@@ -893,13 +1666,13 @@
|
|
893
1666
|
name: 'iParadigms, LLC.'
|
894
1667
|
url: 'http://www.turnitin.com'
|
895
1668
|
|
896
|
-
- regex: 'TweetedTimes
|
1669
|
+
- regex: 'TweetedTimes'
|
897
1670
|
name: 'TweetedTimes Bot'
|
898
1671
|
category: 'Crawler'
|
899
|
-
url: '
|
1672
|
+
url: 'https://tweetedtimes.com/'
|
900
1673
|
producer:
|
901
1674
|
name: 'TweetedTimes'
|
902
|
-
url: '
|
1675
|
+
url: 'https://tweetedtimes.com/'
|
903
1676
|
|
904
1677
|
- regex: 'TweetmemeBot'
|
905
1678
|
name: 'Tweetmeme Bot'
|
@@ -909,6 +1682,13 @@
|
|
909
1682
|
name: 'Mediasift'
|
910
1683
|
url: ''
|
911
1684
|
|
1685
|
+
- regex: 'Twingly Recon'
|
1686
|
+
name: 'Twingly Recon'
|
1687
|
+
category: 'Crawler'
|
1688
|
+
producer:
|
1689
|
+
name: 'Twingly'
|
1690
|
+
url: 'https://www.twingly.com'
|
1691
|
+
|
912
1692
|
- regex: 'Twitterbot'
|
913
1693
|
name: 'Twitterbot'
|
914
1694
|
category: 'Social Media Agent'
|
@@ -917,13 +1697,37 @@
|
|
917
1697
|
name: 'Twitter'
|
918
1698
|
url: 'http://www.twitter.com'
|
919
1699
|
|
1700
|
+
- regex: 'UniversalFeedParser'
|
1701
|
+
name: 'UniversalFeedParser'
|
1702
|
+
category: 'Feed Fetcher'
|
1703
|
+
url: 'https://github.com/kurtmckee/feedparser'
|
1704
|
+
producer:
|
1705
|
+
name: 'Kurt McKee'
|
1706
|
+
url: 'https://github.com/kurtmckee'
|
1707
|
+
|
1708
|
+
- regex: 'via secureurl\.fwdcdn\.com'
|
1709
|
+
name: 'UkrNet Mail Proxy'
|
1710
|
+
category: 'Crawler'
|
1711
|
+
url: ''
|
1712
|
+
producer:
|
1713
|
+
name: 'UkrNet Ltd'
|
1714
|
+
url: 'https://www.ukr.net/'
|
1715
|
+
|
1716
|
+
- regex: 'Uptime(?:bot)?/[\d.]+'
|
1717
|
+
name: 'Uptimebot'
|
1718
|
+
category: 'Site Monitor'
|
1719
|
+
url: 'https://uptime.com/uptime-bot'
|
1720
|
+
producer:
|
1721
|
+
name: 'Uptime'
|
1722
|
+
url: 'https://uptime.com/'
|
1723
|
+
|
920
1724
|
- regex: 'UptimeRobot'
|
921
|
-
name: '
|
1725
|
+
name: 'UptimeRobot'
|
922
1726
|
category: 'Site Monitor'
|
923
|
-
url: ''
|
1727
|
+
url: 'https://uptimerobot.com/'
|
924
1728
|
producer:
|
925
1729
|
name: 'Uptime Robot'
|
926
|
-
url: '
|
1730
|
+
url: 'https://uptimerobot.com/'
|
927
1731
|
|
928
1732
|
- regex: 'URLAppendBot'
|
929
1733
|
name: 'URLAppendBot'
|
@@ -933,6 +1737,30 @@
|
|
933
1737
|
name: 'Profound Networks'
|
934
1738
|
url: 'http://www.profound.net'
|
935
1739
|
|
1740
|
+
- regex: 'Vagabondo'
|
1741
|
+
name: 'Vagabondo'
|
1742
|
+
category: 'Crawler'
|
1743
|
+
url: ''
|
1744
|
+
producer:
|
1745
|
+
name: 'WiseGuys'
|
1746
|
+
url: 'http://www.wise-guys.nl/'
|
1747
|
+
|
1748
|
+
- regex: 'vkShare; '
|
1749
|
+
name: 'VK Share Button'
|
1750
|
+
category: 'Crawler'
|
1751
|
+
url: 'https://dev.vk.com/en/widgets/share'
|
1752
|
+
producer:
|
1753
|
+
name: 'VK'
|
1754
|
+
url: 'https://vk.com/'
|
1755
|
+
|
1756
|
+
- regex: 'VKRobot'
|
1757
|
+
name: 'VK Robot'
|
1758
|
+
category: 'Crawler'
|
1759
|
+
url: 'https://dev.vk.com/en/'
|
1760
|
+
producer:
|
1761
|
+
name: 'VK'
|
1762
|
+
url: 'https://vk.com/'
|
1763
|
+
|
936
1764
|
- regex: 'VSMCrawler'
|
937
1765
|
name: 'Visual Site Mapper Crawler'
|
938
1766
|
category: 'Crawler'
|
@@ -965,7 +1793,7 @@
|
|
965
1793
|
name: 'W3C'
|
966
1794
|
url: 'http://www.w3.org'
|
967
1795
|
|
968
|
-
- regex: 'W3C_Validator'
|
1796
|
+
- regex: 'W3C_Validator|Validator\.nu'
|
969
1797
|
name: 'W3C Markup Validation Service'
|
970
1798
|
category: 'Validator'
|
971
1799
|
url: 'http://validator.w3.org/services'
|
@@ -989,7 +1817,27 @@
|
|
989
1817
|
name: 'W3C'
|
990
1818
|
url: 'http://www.w3.org'
|
991
1819
|
|
992
|
-
- regex: '
|
1820
|
+
- regex: 'P3P Validator'
|
1821
|
+
name: 'W3C P3P Validator'
|
1822
|
+
category: 'Validator'
|
1823
|
+
url: 'https://www.w3.org/P3P/validator.html'
|
1824
|
+
producer:
|
1825
|
+
name: 'W3C'
|
1826
|
+
url: 'https://www.w3.org'
|
1827
|
+
|
1828
|
+
- regex: 'Wappalyzer'
|
1829
|
+
name: 'Wappalyzer'
|
1830
|
+
url: 'https://github.com/AliasIO/Wappalyzer'
|
1831
|
+
producer:
|
1832
|
+
name: 'AliasIO'
|
1833
|
+
url: 'https://github.com/AliasIO'
|
1834
|
+
|
1835
|
+
- regex: 'PTST/'
|
1836
|
+
name: 'WebPageTest'
|
1837
|
+
category: 'Site Monitor'
|
1838
|
+
url: 'https://www.webpagetest.org'
|
1839
|
+
|
1840
|
+
- regex: 'WeSEE'
|
993
1841
|
name: 'WeSEE:Search'
|
994
1842
|
category: 'Search bot'
|
995
1843
|
url: 'http://www.wesee.com/bot'
|
@@ -1013,6 +1861,30 @@
|
|
1013
1861
|
name: 'WebSitePulse'
|
1014
1862
|
url: 'http://www.websitepulse.com/'
|
1015
1863
|
|
1864
|
+
- regex: 'WordPress.+isitwp\.com'
|
1865
|
+
name: 'IsItWP'
|
1866
|
+
category: 'Crawler'
|
1867
|
+
url: 'https://www.isitwp.com/'
|
1868
|
+
producer:
|
1869
|
+
name: 'WPBeginner, LLC'
|
1870
|
+
url: 'https://www.wpbeginner.com/'
|
1871
|
+
|
1872
|
+
- regex: 'Automattic Analytics Crawler/[\d.]+'
|
1873
|
+
name: 'Automattic Analytics'
|
1874
|
+
category: 'Crawler'
|
1875
|
+
url: 'https://wordpress.com/crawler/'
|
1876
|
+
producer:
|
1877
|
+
name: 'Wordpress.org'
|
1878
|
+
url: 'https://wordpress.org/'
|
1879
|
+
|
1880
|
+
- regex: 'WordPress'
|
1881
|
+
name: 'WordPress'
|
1882
|
+
category: 'Service Agent'
|
1883
|
+
url: 'https://wordpress.org/'
|
1884
|
+
producer:
|
1885
|
+
name: 'Wordpress.org'
|
1886
|
+
url: 'https://wordpress.org/'
|
1887
|
+
|
1016
1888
|
- regex: 'Wotbox'
|
1017
1889
|
name: 'Wotbox'
|
1018
1890
|
category: 'Search bot'
|
@@ -1021,6 +1893,14 @@
|
|
1021
1893
|
name: 'Wotbox'
|
1022
1894
|
url: 'http://www.wotbox.com'
|
1023
1895
|
|
1896
|
+
- regex: 'XenForo'
|
1897
|
+
name: 'XenForo'
|
1898
|
+
category: 'Service Agent'
|
1899
|
+
url: 'https://xenforo.com/'
|
1900
|
+
producer:
|
1901
|
+
name: 'XenForo Ltd.'
|
1902
|
+
url: 'https://xenforo.com/'
|
1903
|
+
|
1024
1904
|
- regex: 'yacybot'
|
1025
1905
|
name: 'YaCy'
|
1026
1906
|
category: 'Search bot'
|
@@ -1045,6 +1925,14 @@
|
|
1045
1925
|
name: 'Yahoo! Inc.'
|
1046
1926
|
url: 'http://www.yahoo.com'
|
1047
1927
|
|
1928
|
+
- regex: 'YahooMailProxy'
|
1929
|
+
name: 'Yahoo! Mail Proxy'
|
1930
|
+
category: 'Service Agent'
|
1931
|
+
url: 'https://help.yahoo.com/kb/yahoo-mail-proxy-SLN28749.html'
|
1932
|
+
producer:
|
1933
|
+
name: 'Yahoo! Inc.'
|
1934
|
+
url: 'http://www.yahoo.com'
|
1935
|
+
|
1048
1936
|
- regex: 'YahooCacheSystem'
|
1049
1937
|
name: 'Yahoo! Cache System'
|
1050
1938
|
category: 'Crawler'
|
@@ -1053,15 +1941,47 @@
|
|
1053
1941
|
name: 'Yahoo! Inc.'
|
1054
1942
|
url: 'http://www.yahoo.com'
|
1055
1943
|
|
1056
|
-
- regex: '
|
1944
|
+
- regex: 'Y!J-BRW'
|
1945
|
+
name: 'Yahoo! Japan BRW'
|
1946
|
+
category: 'Crawler'
|
1947
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
1948
|
+
producer:
|
1949
|
+
name: 'Yahoo! Japan Corp.'
|
1950
|
+
url: 'https://www.yahoo.co.jp/'
|
1951
|
+
|
1952
|
+
- regex: 'Y!J-WSC'
|
1953
|
+
name: 'Yahoo! Japan WSC'
|
1954
|
+
category: 'Crawler'
|
1955
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
1956
|
+
producer:
|
1957
|
+
name: 'Yahoo! Japan Corp.'
|
1958
|
+
url: 'https://www.yahoo.co.jp/'
|
1959
|
+
|
1960
|
+
- regex: 'Y!J-ASR'
|
1961
|
+
name: 'Yahoo! Japan ASR'
|
1962
|
+
category: 'Crawler'
|
1963
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
1964
|
+
producer:
|
1965
|
+
name: 'Yahoo! Japan Corp.'
|
1966
|
+
url: 'https://www.yahoo.co.jp/'
|
1967
|
+
|
1968
|
+
- regex: '^Y!J'
|
1969
|
+
name: 'Yahoo! Japan'
|
1970
|
+
category: 'Crawler'
|
1971
|
+
url: 'https://support.yahoo-net.jp/PccSearch/s/article/H000007955'
|
1972
|
+
producer:
|
1973
|
+
name: 'Yahoo! Japan Corp.'
|
1974
|
+
url: 'https://www.yahoo.co.jp/'
|
1975
|
+
|
1976
|
+
- regex: 'Yandex(?:(?:\.Gazeta |Accessibility|Mobile|MobileScreenShot|RenderResources|Screenshot|Sprav)?Bot|(?:AdNet|Antivirus|Blogs|Calendar|Catalog|Direct|Favicons|ForDomain|ImageResizer|Images|Market|Media|Metrika|News|OntoDB(?:API)?|Pagechecker|Partner|RCA|SearchShop|(?:News|Site)links|Tracker|Turbo|Userproxy|Verticals|Vertis|Video|Webmaster))|YaDirectFetcher'
|
1057
1977
|
name: 'Yandex Bot'
|
1058
1978
|
category: 'Search bot'
|
1059
|
-
url: '
|
1979
|
+
url: 'https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html'
|
1060
1980
|
producer:
|
1061
1981
|
name: 'Yandex LLC'
|
1062
|
-
url: '
|
1982
|
+
url: 'https://yandex.com/company/'
|
1063
1983
|
|
1064
|
-
- regex: 'Yeti'
|
1984
|
+
- regex: 'Yeti|NaverJapan|AdsBot-Naver'
|
1065
1985
|
name: 'Yeti/Naverbot'
|
1066
1986
|
category: 'Search bot'
|
1067
1987
|
url: 'http://help.naver.com/robots/'
|
@@ -1090,6 +2010,11 @@
|
|
1090
2010
|
name: 'YunYun'
|
1091
2011
|
url: 'http://www.yunyun.com'
|
1092
2012
|
|
2013
|
+
- regex: 'zgrab'
|
2014
|
+
name: 'zgrab'
|
2015
|
+
category: 'Security Checker'
|
2016
|
+
url: 'https://github.com/zmap/zgrab'
|
2017
|
+
|
1093
2018
|
- regex: 'Zookabot'
|
1094
2019
|
name: 'Zookabot'
|
1095
2020
|
category: 'Crawler'
|
@@ -1114,7 +2039,7 @@
|
|
1114
2039
|
name: 'Yottaa'
|
1115
2040
|
url: 'http://www.yottaa.com/'
|
1116
2041
|
|
1117
|
-
- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857
|
2042
|
+
- regex: 'Yahoo Ad monitoring.*yahoo-ad-monitoring-SLN24857'
|
1118
2043
|
name: 'Yahoo Gemini'
|
1119
2044
|
category: 'Crawler'
|
1120
2045
|
url: 'https://help.yahoo.com/kb/yahoo-ad-monitoring-SLN24857.html'
|
@@ -1133,20 +2058,20 @@
|
|
1133
2058
|
- regex: 'HubPages.*crawlingpolicy'
|
1134
2059
|
name: 'HubPages'
|
1135
2060
|
category: 'Crawler'
|
1136
|
-
url: '
|
2061
|
+
url: 'https://hubpages.com/help/crawlingpolicy'
|
1137
2062
|
producer:
|
1138
|
-
name: 'HubPages'
|
1139
|
-
url: '
|
2063
|
+
name: 'HubPages, Inc.'
|
2064
|
+
url: 'https://discover.hubpages.com/'
|
1140
2065
|
|
1141
|
-
- regex: 'Pinterest
|
2066
|
+
- regex: 'Pinterest(?:bot)?/[\d.]+.*www\.pinterest\.com'
|
1142
2067
|
name: 'Pinterest'
|
1143
|
-
url: ''
|
2068
|
+
url: 'https://help.pinterest.com/en/business/article/pinterest-crawler'
|
1144
2069
|
category: 'Crawler'
|
1145
2070
|
producer:
|
1146
2071
|
name: 'Pinterest'
|
1147
|
-
url: '
|
2072
|
+
url: 'https://www.pinterest.com/'
|
1148
2073
|
|
1149
|
-
- regex: 'Site24x7'
|
2074
|
+
- regex: '.*Site24x7'
|
1150
2075
|
name: 'Site24x7 Website Monitoring'
|
1151
2076
|
category: 'Site Monitor'
|
1152
2077
|
url: 'https://www.site24x7.com/site24x7-faq.html'
|
@@ -1154,6 +2079,54 @@
|
|
1154
2079
|
name: 'Site24x7'
|
1155
2080
|
url: 'https://www.site24x7.com'
|
1156
2081
|
|
2082
|
+
- regex: '.* HLB/[\d.]+'
|
2083
|
+
name: 'Site24x7 Defacement Monitor'
|
2084
|
+
category: 'Site Monitor'
|
2085
|
+
url: 'https://support.site24x7.com/portal/en/kb/articles/default-user-agent-used-in-website-defacement-monitor'
|
2086
|
+
producer:
|
2087
|
+
name: 'Site24x7'
|
2088
|
+
url: 'https://www.site24x7.com/'
|
2089
|
+
|
2090
|
+
- regex: 's~snapchat-proxy'
|
2091
|
+
name: 'Snapchat Proxy'
|
2092
|
+
category: 'Crawler'
|
2093
|
+
url: 'https://www.snapchat.com'
|
2094
|
+
producer:
|
2095
|
+
name: 'Snapchat Inc.'
|
2096
|
+
url: 'https://www.snapchat.com'
|
2097
|
+
|
2098
|
+
- regex: 'Snap URL Preview Service'
|
2099
|
+
name: 'Snap URL Preview Service'
|
2100
|
+
category: 'Service Agent'
|
2101
|
+
url: 'https://developers.snap.com/robots'
|
2102
|
+
producer:
|
2103
|
+
name: 'Snapchat Inc.'
|
2104
|
+
url: 'https://www.snapchat.com/'
|
2105
|
+
|
2106
|
+
- regex: 'SnapchatAds/[\d.]+'
|
2107
|
+
name: 'Snapchat Ads'
|
2108
|
+
category: 'Crawler'
|
2109
|
+
url: 'https://businesshelp.snapchat.com/s/article/adsbot-crawler?language=en_US'
|
2110
|
+
producer:
|
2111
|
+
name: 'Snapchat Inc.'
|
2112
|
+
url: 'https://www.snapchat.com/'
|
2113
|
+
|
2114
|
+
- regex: "Let's Encrypt validation server"
|
2115
|
+
name: "Let's Encrypt Validation"
|
2116
|
+
category: 'Service Agent'
|
2117
|
+
url: 'https://letsencrypt.org/how-it-works/'
|
2118
|
+
producer:
|
2119
|
+
name: "Let's Encrypt"
|
2120
|
+
url: 'https://letsencrypt.org'
|
2121
|
+
|
2122
|
+
- regex: 'GrapeshotCrawler'
|
2123
|
+
name: 'Grapeshot'
|
2124
|
+
category: 'Crawler'
|
2125
|
+
url: 'https://www.grapeshot.com/crawler'
|
2126
|
+
producer:
|
2127
|
+
name: 'Grapeshot'
|
2128
|
+
url: 'https://www.grapeshot.com'
|
2129
|
+
|
1157
2130
|
- regex: 'www\.monitor\.us'
|
1158
2131
|
name: 'Monitor.Us'
|
1159
2132
|
category: 'Site Monitor'
|
@@ -1162,6 +2135,22 @@
|
|
1162
2135
|
name: 'Monitor.Us'
|
1163
2136
|
url: 'http://www.monitor.us'
|
1164
2137
|
|
2138
|
+
- regex: 'Catchpoint'
|
2139
|
+
name: 'Catchpoint'
|
2140
|
+
category: 'Site Monitor'
|
2141
|
+
url: 'https://www.catchpoint.com/'
|
2142
|
+
producer:
|
2143
|
+
name: 'Catchpoint Systems'
|
2144
|
+
url: 'https://www.catchpoint.com/'
|
2145
|
+
|
2146
|
+
- regex: 'bitlybot'
|
2147
|
+
name: 'BitlyBot'
|
2148
|
+
category: 'Crawler'
|
2149
|
+
url: 'https://bitly.com'
|
2150
|
+
producer:
|
2151
|
+
name: 'Bitly, Inc.'
|
2152
|
+
url: 'https://bitly.com'
|
2153
|
+
|
1165
2154
|
- regex: 'Zao/'
|
1166
2155
|
name: 'Zao'
|
1167
2156
|
category: 'Crawler'
|
@@ -1214,13 +2203,211 @@
|
|
1214
2203
|
- regex: 'AdMantX.*admantx\.com'
|
1215
2204
|
name: 'ADMantX'
|
1216
2205
|
|
1217
|
-
- regex: 'Server Density Service Monitoring
|
2206
|
+
- regex: 'Server Density Service Monitoring'
|
1218
2207
|
name: 'Server Density'
|
1219
2208
|
|
1220
|
-
- regex: '
|
1221
|
-
name: '
|
2209
|
+
- regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
|
2210
|
+
name: 'RSSRadio Bot'
|
1222
2211
|
|
1223
|
-
|
2212
|
+
- regex: '^sentry'
|
2213
|
+
name: 'Sentry Bot'
|
2214
|
+
producer:
|
2215
|
+
name: 'Sentry'
|
2216
|
+
url: 'https://sentry.io'
|
2217
|
+
|
2218
|
+
- regex: '^Spotify/[\d.]+$'
|
2219
|
+
name: 'Spotify'
|
2220
|
+
producer:
|
2221
|
+
name: 'Spotify'
|
2222
|
+
url: 'https://www.spotify.com'
|
2223
|
+
|
2224
|
+
- regex: 'The Knowledge AI'
|
2225
|
+
name: 'The Knowledge AI'
|
2226
|
+
category: 'Crawler'
|
2227
|
+
|
2228
|
+
- regex: 'Embedly'
|
2229
|
+
name: 'Embedly'
|
2230
|
+
category: 'Crawler'
|
2231
|
+
url: 'https://support.embed.ly/hc/en-us'
|
2232
|
+
producer:
|
2233
|
+
name: 'A Medium, Corp.'
|
2234
|
+
url: 'https://medium.com/'
|
2235
|
+
|
2236
|
+
- regex: 'BrandVerity'
|
2237
|
+
name: 'BrandVerity'
|
2238
|
+
category: 'Crawler'
|
2239
|
+
url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
|
2240
|
+
producer:
|
2241
|
+
name: 'BrandVerity, Inc.'
|
2242
|
+
url: 'https://www.brandverity.com/'
|
2243
|
+
|
2244
|
+
- regex: 'Kaspersky Lab CFR link resolver'
|
2245
|
+
name: 'Kaspersky'
|
2246
|
+
category: 'Security Checker'
|
2247
|
+
url: 'https://www.kaspersky.com/'
|
2248
|
+
producer:
|
2249
|
+
name: 'AO Kaspersky Lab'
|
2250
|
+
url: 'https://www.kaspersky.com/'
|
2251
|
+
|
2252
|
+
- regex: 'eZ Publish Link Validator'
|
2253
|
+
name: 'eZ Publish Link Validator'
|
2254
|
+
category: 'Crawler'
|
2255
|
+
url: 'https://ez.no/'
|
2256
|
+
producer:
|
2257
|
+
name: 'eZ Systems AS'
|
2258
|
+
url: 'https://ez.no/'
|
2259
|
+
|
2260
|
+
- regex: 'woorankreview'
|
2261
|
+
name: 'WooRank'
|
2262
|
+
category: 'Search bot'
|
2263
|
+
url: 'https://www.woorank.com/'
|
2264
|
+
producer:
|
2265
|
+
name: 'WooRank sprl'
|
2266
|
+
url: 'https://www.woorank.com/'
|
2267
|
+
|
2268
|
+
- regex: 'by Siteimprove\.com'
|
2269
|
+
name: 'Siteimprove'
|
2270
|
+
category: 'Search bot'
|
2271
|
+
url: 'https://siteimprove.com/'
|
2272
|
+
producer:
|
2273
|
+
name: 'Siteimprove GmbH'
|
2274
|
+
url: 'https://siteimprove.com/'
|
2275
|
+
|
2276
|
+
- regex: 'CATExplorador'
|
2277
|
+
name: 'CATExplorador'
|
2278
|
+
category: 'Search bot'
|
2279
|
+
url: 'https://fundacio.cat/ca/domini/'
|
2280
|
+
producer:
|
2281
|
+
name: 'Fundació puntCAT'
|
2282
|
+
url: 'https://fundacio.cat/ca/domini/'
|
2283
|
+
|
2284
|
+
- regex: 'Buck'
|
2285
|
+
name: 'Buck'
|
2286
|
+
category: 'Search bot'
|
2287
|
+
url: 'https://hypefactors.com/'
|
2288
|
+
producer:
|
2289
|
+
name: 'Hypefactors A/S'
|
2290
|
+
url: 'https://hypefactors.com/'
|
2291
|
+
|
2292
|
+
- regex: 'tracemyfile'
|
2293
|
+
name: 'TraceMyFile'
|
2294
|
+
category: 'Search bot'
|
2295
|
+
url: 'https://www.tracemyfile.com/'
|
2296
|
+
producer:
|
2297
|
+
name: 'Idee Inc.'
|
2298
|
+
url: 'http://ideeinc.com/'
|
2299
|
+
|
2300
|
+
- regex: 'zelist\.ro feed parser'
|
2301
|
+
name: 'Ze List'
|
2302
|
+
url: 'https://www.zelist.ro/'
|
2303
|
+
category: 'Feed Fetcher'
|
2304
|
+
producer:
|
2305
|
+
name: 'Treeworks SRL'
|
2306
|
+
url: 'https://www.tree.ro/'
|
2307
|
+
|
2308
|
+
- regex: 'weborama-fetcher'
|
2309
|
+
name: 'Weborama'
|
2310
|
+
category: 'Search bot'
|
2311
|
+
url: 'https://weborama.com/'
|
2312
|
+
producer:
|
2313
|
+
name: 'Weborama SA'
|
2314
|
+
url: 'https://weborama.com/'
|
2315
|
+
|
2316
|
+
- regex: 'BoardReader Favicon Fetcher'
|
2317
|
+
name: 'BoardReader'
|
2318
|
+
category: 'Search bot'
|
2319
|
+
url: 'https://boardreader.com/'
|
2320
|
+
producer:
|
2321
|
+
name: 'Effyis Inc'
|
2322
|
+
url: 'https://boardreader.com/'
|
2323
|
+
|
2324
|
+
- regex: 'IDG/IT'
|
2325
|
+
name: 'IDG/IT'
|
2326
|
+
category: 'Search bot'
|
2327
|
+
url: 'https://spaziodati.eu/'
|
2328
|
+
producer:
|
2329
|
+
name: 'SpazioDati S.r.l.'
|
2330
|
+
url: 'https://spaziodati.eu/'
|
2331
|
+
|
2332
|
+
- regex: 'Bytespider'
|
2333
|
+
name: 'Bytespider'
|
2334
|
+
category: 'Search bot'
|
2335
|
+
url: 'https://bytedance.com/'
|
2336
|
+
producer:
|
2337
|
+
name: 'ByteDance Ltd.'
|
2338
|
+
url: 'https://bytedance.com/'
|
2339
|
+
|
2340
|
+
- regex: 'WikiDo'
|
2341
|
+
name: 'WikiDo'
|
2342
|
+
category: 'Search bot'
|
2343
|
+
url: 'https://www.wikido.com/'
|
2344
|
+
producer:
|
2345
|
+
name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
|
2346
|
+
url: 'https://www.wikido.com/'
|
2347
|
+
|
2348
|
+
- regex: 'Awario(?:Smart)?Bot'
|
2349
|
+
name: 'Awario'
|
2350
|
+
category: 'Search bot'
|
2351
|
+
url: 'https://awario.com/bots.html'
|
2352
|
+
producer:
|
2353
|
+
name: 'TechFusion Ltd.'
|
2354
|
+
url: 'https://www.techfusion.com.cy/'
|
2355
|
+
|
2356
|
+
- regex: 'AwarioRssBot'
|
2357
|
+
name: 'Awario'
|
2358
|
+
category: 'Feed Fetcher'
|
2359
|
+
url: 'https://awario.com/bots.html'
|
2360
|
+
producer:
|
2361
|
+
name: 'TechFusion Ltd.'
|
2362
|
+
url: 'https://www.techfusion.com.cy/'
|
2363
|
+
|
2364
|
+
- regex: 'oBot'
|
2365
|
+
name: 'oBot'
|
2366
|
+
category: 'Search bot'
|
2367
|
+
url: 'https://www.xforce-security.com/crawler/'
|
2368
|
+
producer:
|
2369
|
+
name: 'IBM Germany Research & Development GmbH'
|
2370
|
+
url: 'https://exchange.xforce.ibmcloud.com/'
|
2371
|
+
|
2372
|
+
- regex: 'SMTBot'
|
2373
|
+
name: 'SMTBot'
|
2374
|
+
category: 'Search bot'
|
2375
|
+
url: 'https://www.similartech.com/smtbot'
|
2376
|
+
producer:
|
2377
|
+
name: 'SimilarTech Ltd.'
|
2378
|
+
url: 'https://www.similartech.com/'
|
2379
|
+
|
2380
|
+
- regex: 'LCC'
|
2381
|
+
name: 'LCC'
|
2382
|
+
category: 'Search bot'
|
2383
|
+
url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
|
2384
|
+
producer:
|
2385
|
+
name: 'Universität Leipzig'
|
2386
|
+
url: 'https://www.uni-leipzig.de/'
|
2387
|
+
|
2388
|
+
- regex: 'Startpagina-Linkchecker'
|
2389
|
+
name: 'Startpagina Linkchecker'
|
2390
|
+
category: 'Search bot'
|
2391
|
+
url: 'https://www.startpagina.nl/linkchecker'
|
2392
|
+
producer:
|
2393
|
+
name: 'Startpagina B.V.'
|
2394
|
+
url: 'https://www.startpagina.nl/'
|
2395
|
+
|
2396
|
+
- regex: 'MoodleBot-Linkchecker'
|
2397
|
+
name: 'MoodleBot Linkchecker'
|
2398
|
+
category: 'Search bot'
|
2399
|
+
url: 'hhttps://docs.moodle.org/en/Usage'
|
2400
|
+
producer:
|
2401
|
+
name: 'Moodle Pty Ltd'
|
2402
|
+
url: 'https://moodle.org/'
|
2403
|
+
|
2404
|
+
- regex: 'GTmetrix'
|
2405
|
+
name: 'GTmetrix'
|
2406
|
+
category: 'Crawler'
|
2407
|
+
url: 'https://gtmetrix.com/'
|
2408
|
+
producer:
|
2409
|
+
name: 'Carbon60 Operating Co. Ltd.'
|
2410
|
+
url: 'https://www.carbon60.com/'
|
1224
2411
|
|
1225
2412
|
- regex: 'Nutch'
|
1226
2413
|
name: 'Nutch-based Bot'
|
@@ -1228,7 +2415,2128 @@
|
|
1228
2415
|
url: 'https://nutch.apache.org'
|
1229
2416
|
producer:
|
1230
2417
|
name: 'The Apache Software Foundation'
|
1231
|
-
url: '
|
2418
|
+
url: 'https://www.apache.org/foundation/'
|
2419
|
+
|
2420
|
+
- regex: 'Seobility'
|
2421
|
+
name: 'Seobility'
|
2422
|
+
category: 'Crawler'
|
2423
|
+
url: 'https://www.seobility.net/en/faq/?category=crawling#!aboutourbot'
|
2424
|
+
|
2425
|
+
- regex: 'Vercelbot'
|
2426
|
+
name: 'Vercel Bot'
|
2427
|
+
category: 'Service bot'
|
2428
|
+
url: 'https://vercel.com'
|
2429
|
+
|
2430
|
+
- regex: 'Grammarly'
|
2431
|
+
name: 'Grammarly'
|
2432
|
+
category: 'Service bot'
|
2433
|
+
url: 'https://www.grammarly.com'
|
1232
2434
|
|
1233
|
-
- regex: '
|
2435
|
+
- regex: 'Robozilla'
|
2436
|
+
name: 'Robozilla'
|
2437
|
+
category: 'Crawler'
|
2438
|
+
|
2439
|
+
- regex: 'Domains Project'
|
2440
|
+
name: 'Domains Project'
|
2441
|
+
category: 'Crawler'
|
2442
|
+
url: 'https://domainsproject.org'
|
2443
|
+
|
2444
|
+
- regex: 'PetalBot'
|
2445
|
+
name: 'Petal Bot'
|
2446
|
+
category: 'Crawler'
|
2447
|
+
url: 'https://aspiegel.com/petalbot'
|
2448
|
+
|
2449
|
+
- regex: 'SerendeputyBot'
|
2450
|
+
name: 'Serendeputy Bot'
|
2451
|
+
category: 'Crawler'
|
2452
|
+
url: 'https://serendeputy.com/about/serendeputy-bot'
|
2453
|
+
|
2454
|
+
- regex: 'ias-(?:va|sg).*admantx.*service-fetcher|admantx\.com.*service-fetcher'
|
2455
|
+
name: 'ADmantX Service Fetcher'
|
2456
|
+
category: 'Service bot'
|
2457
|
+
url: 'https://www.admantx.com/service-fetcher.html'
|
2458
|
+
|
2459
|
+
- regex: 'SemanticScholarBot'
|
2460
|
+
name: 'Semantic Scholar Bot'
|
2461
|
+
category: 'Crawler'
|
2462
|
+
url: 'https://www.semanticscholar.org/crawler'
|
2463
|
+
|
2464
|
+
- regex: 'VelenPublicWebCrawler'
|
2465
|
+
name: 'Velen Public Web Crawler'
|
2466
|
+
category: 'Crawler'
|
2467
|
+
url: 'https://hunter.io/robot'
|
2468
|
+
|
2469
|
+
- regex: 'Barkrowler'
|
2470
|
+
name: 'Barkrowler'
|
2471
|
+
category: 'Crawler'
|
2472
|
+
url: 'http://www.exensa.com/crawl'
|
2473
|
+
|
2474
|
+
- regex: 'BDCbot'
|
2475
|
+
name: 'BDCbot'
|
2476
|
+
category: 'Crawler'
|
2477
|
+
url: 'https://bigweb.bigdatacorp.com.br/pages/faq.aspx'
|
2478
|
+
producer:
|
2479
|
+
name: 'BIG Data Solucoes Em Tecnologia de Informatica LTDA'
|
2480
|
+
url: 'https://bigdatacorp.com.br/'
|
2481
|
+
|
2482
|
+
- regex: 'adbeat'
|
2483
|
+
name: 'Adbeat'
|
2484
|
+
category: 'Crawler'
|
2485
|
+
url: 'https://www.adbeat.com/operation_policy'
|
2486
|
+
producer:
|
2487
|
+
name: 'PPC Labs LLC'
|
2488
|
+
url: 'https://www.adbeat.com/'
|
2489
|
+
|
2490
|
+
- regex: '(?:BuiltWith|BW)/[\d.]+'
|
2491
|
+
name: 'BuiltWith'
|
2492
|
+
category: 'Crawler'
|
2493
|
+
url: 'https://builtwith.com/biup'
|
2494
|
+
producer:
|
2495
|
+
name: 'BuiltWith Pty Ltd'
|
2496
|
+
url: 'https://builtwith.com/'
|
2497
|
+
|
2498
|
+
- regex: 'https://whatis\.contentkingapp\.com'
|
2499
|
+
name: 'ContentKing'
|
2500
|
+
category: 'Site Monitor'
|
2501
|
+
url: 'https://whatis.contentkingapp.com/'
|
2502
|
+
producer:
|
2503
|
+
name: 'ContentKing BV'
|
2504
|
+
url: 'https://www.contentkingapp.com/'
|
2505
|
+
|
2506
|
+
- regex: 'MicroAdBot'
|
2507
|
+
name: 'MicroAdBot'
|
2508
|
+
category: 'Crawler'
|
2509
|
+
url: 'https://www.microad.co.jp/'
|
2510
|
+
producer:
|
2511
|
+
name: 'MicroAd, Inc.'
|
2512
|
+
url: 'https://www.microad.co.jp/'
|
2513
|
+
|
2514
|
+
- regex: 'PingAdmin\.Ru'
|
2515
|
+
name: 'PingAdmin.Ru'
|
2516
|
+
category: 'Site Monitor'
|
2517
|
+
url: 'https://ping-admin.ru/'
|
2518
|
+
|
2519
|
+
- regex: 'notifyninja.+monitoring'
|
2520
|
+
name: 'Notify Ninja'
|
2521
|
+
category: 'Site Monitor'
|
2522
|
+
url: 'http://notifyninja.com'
|
2523
|
+
|
2524
|
+
- regex: 'WebDataStats'
|
2525
|
+
name: 'WebDataStats'
|
2526
|
+
category: 'Crawler'
|
2527
|
+
url: 'https://webdatastats.com/policy.html'
|
2528
|
+
producer:
|
2529
|
+
name: 'WebTehRazrabotka LLC'
|
2530
|
+
url: 'https://webdatastats.com/'
|
2531
|
+
|
2532
|
+
- regex: 'parse\.ly scraper'
|
2533
|
+
name: 'parse.ly'
|
2534
|
+
category: 'Crawler'
|
2535
|
+
url: 'https://www.parse.ly/help/integration/crawler'
|
2536
|
+
producer:
|
2537
|
+
name: 'Parsely, Inc.'
|
2538
|
+
url: 'https://www.parse.ly/'
|
2539
|
+
|
2540
|
+
- regex: 'Nimbostratus-Bot'
|
2541
|
+
name: 'Nimbostratus Bot'
|
2542
|
+
category: 'Site Monitor'
|
2543
|
+
url: 'http://cloudsystemnetworks.com'
|
2544
|
+
|
2545
|
+
- regex: 'HeartRails_Capture/[\d.]+'
|
2546
|
+
name: 'Heart Rails Capture'
|
2547
|
+
category: 'Service Agent'
|
2548
|
+
url: 'http://capture.heartrails.com'
|
2549
|
+
|
2550
|
+
- regex: 'Project-Resonance'
|
2551
|
+
name: 'Project Resonance'
|
2552
|
+
category: 'Crawler'
|
2553
|
+
url: 'https://project-resonance.com/'
|
2554
|
+
producer:
|
2555
|
+
name: 'RedHunt Labs Limited'
|
2556
|
+
url: 'https://redhuntlabs.com/'
|
2557
|
+
|
2558
|
+
- regex: 'DataXu/[\d.]+'
|
2559
|
+
name: 'DataXu'
|
2560
|
+
category: 'Service Agent'
|
2561
|
+
url: 'https://advertising.roku.com/dataxu'
|
2562
|
+
producer:
|
2563
|
+
name: 'Roku, Inc.'
|
2564
|
+
url: 'https://roku.com'
|
2565
|
+
|
2566
|
+
- regex: 'Cocolyzebot'
|
2567
|
+
name: 'Cocolyzebot'
|
2568
|
+
category: 'Crawler'
|
2569
|
+
url: 'https://cocolyze.com/en/cocolyzebot'
|
2570
|
+
producer:
|
2571
|
+
name: 'VSI INNOVATION SAS'
|
2572
|
+
url: 'https://vsi-innovation.com/'
|
2573
|
+
|
2574
|
+
- regex: 'veryhip'
|
2575
|
+
name: 'VeryHip'
|
2576
|
+
category: 'Crawler'
|
2577
|
+
url: 'https://veryhip.com/'
|
2578
|
+
producer:
|
2579
|
+
name: 'VeryHip'
|
2580
|
+
url: 'https://veryhip.com/'
|
2581
|
+
|
2582
|
+
- regex: 'LinkpadBot'
|
2583
|
+
name: 'LinkpadBot'
|
2584
|
+
category: 'Crawler'
|
2585
|
+
url: 'https://www.linkpad.org/'
|
2586
|
+
producer:
|
2587
|
+
name: 'Solomono LLC'
|
2588
|
+
url: 'https://www.linkpad.org/'
|
2589
|
+
|
2590
|
+
- regex: 'MuscatFerret'
|
2591
|
+
name: 'MuscatFerret'
|
2592
|
+
category: 'Crawler'
|
2593
|
+
url: 'http://www.webtop.com/'
|
2594
|
+
|
2595
|
+
- regex: 'PageThing\.com'
|
2596
|
+
name: 'PageThing'
|
2597
|
+
category: 'Crawler'
|
2598
|
+
url: 'https://www.pagething.com/'
|
2599
|
+
producer:
|
2600
|
+
name: 'SPECIALNOISE LTD'
|
2601
|
+
url: 'https://www.specialnoise.com/'
|
2602
|
+
|
2603
|
+
- regex: 'ArchiveBox'
|
2604
|
+
name: 'ArchiveBox'
|
2605
|
+
url: 'https://archivebox.io/'
|
2606
|
+
category: 'Crawler'
|
2607
|
+
producer:
|
2608
|
+
name: ''
|
2609
|
+
url: ''
|
2610
|
+
|
2611
|
+
- regex: 'Choosito'
|
2612
|
+
name: 'Choosito'
|
2613
|
+
url: 'https://www.choosito.com/'
|
2614
|
+
category: 'Crawler'
|
2615
|
+
producer:
|
2616
|
+
name: 'Choosito! Inc.'
|
2617
|
+
url: 'https://www.choosito.com/'
|
2618
|
+
|
2619
|
+
- regex: 'datagnionbot'
|
2620
|
+
name: 'datagnionbot'
|
2621
|
+
url: 'https://www.datagnion.com/bot.html'
|
2622
|
+
category: 'Crawler'
|
2623
|
+
producer:
|
2624
|
+
name: 'DATAGNION GMBH'
|
2625
|
+
url: 'https://www.datagnion.com/'
|
2626
|
+
|
2627
|
+
- regex: 'WhatCMS'
|
2628
|
+
name: 'WhatCMS'
|
2629
|
+
url: 'https://whatcms.org/'
|
2630
|
+
category: 'Crawler'
|
2631
|
+
producer:
|
2632
|
+
name: 'Nineteen Ten LLC'
|
2633
|
+
url: 'https://whatcms.org/'
|
2634
|
+
|
2635
|
+
- regex: 'httpx'
|
2636
|
+
name: 'httpx'
|
2637
|
+
url: 'https://github.com/projectdiscovery/httpx'
|
2638
|
+
category: 'Crawler'
|
2639
|
+
producer:
|
2640
|
+
name: 'ProjectDiscovery, Inc.'
|
2641
|
+
url: 'https://projectdiscovery.io/'
|
2642
|
+
|
2643
|
+
- regex: '.*\.oast\.'
|
2644
|
+
name: 'Interactsh'
|
2645
|
+
category: 'Security Checker'
|
2646
|
+
url: 'https://github.com/projectdiscovery/interactsh'
|
2647
|
+
producer:
|
2648
|
+
name: 'ProjectDiscovery, Inc.'
|
2649
|
+
url: 'https://projectdiscovery.io/'
|
2650
|
+
|
2651
|
+
- regex: 'scaninfo@(?:expanseinc|paloaltonetworks)\.com'
|
2652
|
+
name: 'Expanse'
|
2653
|
+
category: 'Security Checker'
|
2654
|
+
url: 'https://expanse.co/'
|
2655
|
+
producer:
|
2656
|
+
name: 'Expanse Inc.'
|
2657
|
+
url: 'https://expanse.co/'
|
2658
|
+
|
2659
|
+
- regex: 'HuaweiWebCatBot'
|
2660
|
+
name: 'HuaweiWebCatBot'
|
2661
|
+
category: 'Crawler'
|
2662
|
+
url: 'https://isecurity.huawei.com'
|
2663
|
+
producer:
|
2664
|
+
name: 'Huawei Technologies Co., Ltd.'
|
2665
|
+
url: 'https://huawei.com'
|
2666
|
+
|
2667
|
+
- regex: 'Hatena-Favicon'
|
2668
|
+
name: 'Hatena Favicon'
|
2669
|
+
category: 'Crawler'
|
2670
|
+
url: 'https://www.hatena.ne.jp/faq/'
|
2671
|
+
producer:
|
2672
|
+
name: 'Hatena Co., Ltd.'
|
2673
|
+
url: 'https://www.hatena.ne.jp'
|
2674
|
+
- regex: 'Hatena-?Bookmark'
|
2675
|
+
name: 'Hatena Bookmark'
|
2676
|
+
category: 'Crawler'
|
2677
|
+
url: 'https://www.hatena.ne.jp/faq/'
|
2678
|
+
producer:
|
2679
|
+
name: 'Hatena Co., Ltd.'
|
2680
|
+
url: 'https://www.hatena.ne.jp'
|
2681
|
+
|
2682
|
+
- regex: 'RyowlEngine/[\d.]+'
|
2683
|
+
name: 'Ryowl'
|
2684
|
+
category: 'Crawler'
|
2685
|
+
url: 'https://ryowl.org'
|
2686
|
+
|
2687
|
+
- regex: 'OdklBot/[\d.]+'
|
2688
|
+
name: 'Odnoklassniki Bot'
|
2689
|
+
category: 'Crawler'
|
2690
|
+
url: 'https://odnoklassniki.ru'
|
2691
|
+
|
2692
|
+
- regex: 'Mediatoolkitbot'
|
2693
|
+
name: 'Mediatoolkit Bot'
|
2694
|
+
category: 'Crawler'
|
2695
|
+
url: 'https://mediatoolkit.com'
|
2696
|
+
|
2697
|
+
- regex: 'ZoominfoBot'
|
2698
|
+
name: 'ZoominfoBot'
|
2699
|
+
category: 'Crawler'
|
2700
|
+
url: 'https://www.zoominfo.com'
|
2701
|
+
|
2702
|
+
- regex: 'WeViKaBot/[\d.]+'
|
2703
|
+
name: 'WeViKaBot'
|
2704
|
+
category: 'Crawler'
|
2705
|
+
url: 'http://www.wevika.de'
|
2706
|
+
|
2707
|
+
- regex: 'SEOkicks'
|
2708
|
+
name: 'SEOkicks'
|
2709
|
+
category: 'Crawler'
|
2710
|
+
url: 'https://www.seokicks.de/robot.html'
|
2711
|
+
|
2712
|
+
- regex: 'Plukkie/[\d.]+'
|
2713
|
+
name: 'Plukkie'
|
2714
|
+
category: 'Crawler'
|
2715
|
+
url: 'http://www.botje.com/plukkie.htm'
|
2716
|
+
|
2717
|
+
- regex: 'proximic;'
|
2718
|
+
name: 'Comscore'
|
2719
|
+
category: 'Crawler'
|
2720
|
+
url: 'https://www.comscore.com/Web-Crawler'
|
2721
|
+
|
2722
|
+
- regex: 'SurdotlyBot/[\d.]+'
|
2723
|
+
name: 'SurdotlyBot'
|
2724
|
+
category: 'Crawler'
|
2725
|
+
url: 'http://sur.ly/bot.html'
|
2726
|
+
|
2727
|
+
- regex: 'Gowikibot/[\d.]+'
|
2728
|
+
name: 'Gowikibot'
|
2729
|
+
category: 'Crawler'
|
2730
|
+
url: 'http:/www.gowikibot.com'
|
2731
|
+
|
2732
|
+
- regex: 'SabsimBot/[\d.]+'
|
2733
|
+
name: 'SabsimBot'
|
2734
|
+
category: 'Crawler'
|
2735
|
+
url: 'https://sabsim.com'
|
2736
|
+
|
2737
|
+
- regex: 'LumtelBot/[\d.]+'
|
2738
|
+
name: 'LumtelBot'
|
2739
|
+
category: 'Crawler'
|
2740
|
+
url: 'https://umtel.com'
|
2741
|
+
|
2742
|
+
- regex: 'PiplBot'
|
2743
|
+
name: 'PiplBot'
|
2744
|
+
category: 'Crawler'
|
2745
|
+
url: 'http://www.pipl.com/bot'
|
2746
|
+
|
2747
|
+
- regex: 'woobot/[\d.]+'
|
2748
|
+
name: 'WooRank'
|
2749
|
+
category: 'Crawler'
|
2750
|
+
url: 'https://www.woorank.com/bot'
|
2751
|
+
|
2752
|
+
- regex: 'Cookiebot/[\d.]+'
|
2753
|
+
name: 'Cookiebot'
|
2754
|
+
category: 'Crawler'
|
2755
|
+
url: 'https://support.cookiebot.com/hc/en-us/articles/360014264140-Scanner-User-Agent'
|
2756
|
+
producer:
|
2757
|
+
name: 'Cybot A/S'
|
2758
|
+
url: 'https://www.cybot.com/'
|
2759
|
+
|
2760
|
+
- regex: 'NetSystemsResearch'
|
2761
|
+
name: 'NetSystemsResearch'
|
2762
|
+
category: 'Security Checker'
|
2763
|
+
url: 'https://www.netsystemsresearch.com/'
|
2764
|
+
producer:
|
2765
|
+
name: 'NET SYSTEMS RESEARCH LLC'
|
2766
|
+
url: 'https://www.netsystemsresearch.com/'
|
2767
|
+
|
2768
|
+
- regex: 'CensysInspect/[\d.]+'
|
2769
|
+
name: 'CensysInspect'
|
2770
|
+
category: 'Security Checker'
|
2771
|
+
url: 'https://about.censys.io/'
|
2772
|
+
producer:
|
2773
|
+
name: 'Censys, Inc.'
|
2774
|
+
url: 'https://censys.io/'
|
2775
|
+
|
2776
|
+
- regex: 'gdnplus\.com'
|
2777
|
+
name: 'GDNP'
|
2778
|
+
category: 'Crawler'
|
2779
|
+
url: 'https://gdnplus.com/'
|
2780
|
+
producer:
|
2781
|
+
name: 'Global Digital Network Plus, LLC'
|
2782
|
+
url: 'https://gdnplus.com/'
|
2783
|
+
|
2784
|
+
- regex: 'WellKnownBot/[\d.]+'
|
2785
|
+
name: 'WellKnownBot'
|
2786
|
+
category: 'Crawler'
|
2787
|
+
url: 'https://well-known.dev'
|
2788
|
+
|
2789
|
+
- regex: 'Adsbot/[\d.]+'
|
2790
|
+
name: 'Adsbot'
|
2791
|
+
category: 'Crawler'
|
2792
|
+
url: 'https://seostar.co/robot/'
|
2793
|
+
|
2794
|
+
- regex: 'MTRobot/[\d.]+'
|
2795
|
+
name: 'MTRobot'
|
2796
|
+
category: 'Crawler'
|
2797
|
+
url: 'https://metrics-tools.de/robot.html'
|
2798
|
+
producer:
|
2799
|
+
name: 'Metrics Tools'
|
2800
|
+
url: 'https://metrics-tools.de/'
|
2801
|
+
|
2802
|
+
- regex: 'serpstatbot/[\d.]+'
|
2803
|
+
name: 'serpstatbot'
|
2804
|
+
category: 'Crawler'
|
2805
|
+
url: 'http://serpstatbot.com/'
|
2806
|
+
producer:
|
2807
|
+
name: 'Netpeak Ltd'
|
2808
|
+
url: 'https://netpeak.net/'
|
2809
|
+
|
2810
|
+
- regex: 'colly'
|
2811
|
+
name: 'colly'
|
2812
|
+
category: 'Crawler'
|
2813
|
+
url: 'https://github.com/gocolly/colly/'
|
2814
|
+
|
2815
|
+
- regex: 'l9tcpid/v[\d.]+'
|
2816
|
+
name: 'l9tcpid'
|
2817
|
+
category: 'Security Checker'
|
2818
|
+
url: 'https://github.com/LeakIX/l9tcpid'
|
2819
|
+
|
2820
|
+
- regex: 'l9explore/[\d.]+'
|
2821
|
+
name: 'l9explore'
|
2822
|
+
category: 'Security Checker'
|
2823
|
+
url: 'https://github.com/LeakIX/l9explore'
|
2824
|
+
|
2825
|
+
- regex: 'l9scan/|^Lkx-.*/[\d.]+'
|
2826
|
+
name: 'LeakIX'
|
2827
|
+
category: 'Security Checker'
|
2828
|
+
url: 'https://leakix.net/'
|
2829
|
+
producer:
|
2830
|
+
name: 'BaDaaS SRL'
|
2831
|
+
url: 'https://leakix.net/'
|
2832
|
+
|
2833
|
+
- regex: 'MegaIndex\.ru/[\d.]+'
|
2834
|
+
name: 'MegaIndex'
|
2835
|
+
category: 'Crawler'
|
2836
|
+
url: 'https://megaindex.com/crawler'
|
2837
|
+
|
2838
|
+
- regex: 'Seekport'
|
2839
|
+
name: 'Seekport'
|
2840
|
+
category: 'Crawler'
|
2841
|
+
url: 'https://bot.seekport.com/'
|
2842
|
+
producer:
|
2843
|
+
name: 'SISTRIX GmbH'
|
2844
|
+
url: 'https://www.sistrix.de/'
|
2845
|
+
|
2846
|
+
- regex: 'seolyt/[\d.]+'
|
2847
|
+
name: 'seolyt'
|
2848
|
+
category: 'Crawler'
|
2849
|
+
url: 'https://seolyt.com/'
|
2850
|
+
|
2851
|
+
- regex: 'YaK/[\d.]+'
|
2852
|
+
name: 'YaK'
|
2853
|
+
category: 'Crawler'
|
2854
|
+
url: 'https://www.linkfluence.com/'
|
2855
|
+
producer:
|
2856
|
+
name: 'Linkfluence SAS'
|
2857
|
+
url: 'https://www.linkfluence.com/'
|
2858
|
+
|
2859
|
+
- regex: 'KomodiaBot/[\d.]+'
|
2860
|
+
name: 'KomodiaBot'
|
2861
|
+
category: 'Crawler'
|
2862
|
+
url: 'http://www.komodia.com/newwiki/index.php/URL_server_crawler'
|
2863
|
+
producer:
|
2864
|
+
name: 'Komodia Inc.'
|
2865
|
+
url: 'https://www.komodia.com/'
|
2866
|
+
|
2867
|
+
- regex: 'Neevabot/[\d.]+'
|
2868
|
+
name: 'Neevabot'
|
2869
|
+
category: 'Search bot'
|
2870
|
+
url: 'https://neeva.com/neevabot'
|
2871
|
+
producer:
|
2872
|
+
name: 'Neeva Inc.'
|
2873
|
+
url: 'https://neeva.com/'
|
2874
|
+
|
2875
|
+
- regex: 'LinkPreview/[\d.]+'
|
2876
|
+
name: 'LinkPreview'
|
2877
|
+
category: 'Service Agent'
|
2878
|
+
url: 'https://www.linkpreview.net/'
|
2879
|
+
|
2880
|
+
- regex: 'JungleKeyThumbnail/[\d.]+'
|
2881
|
+
name: 'JungleKeyThumbnail'
|
2882
|
+
category: 'Crawler'
|
2883
|
+
url: 'https://junglekey.com/'
|
2884
|
+
|
2885
|
+
- regex: 'rocketmonitor(?: |bot/)[\d.]+'
|
2886
|
+
name: 'RocketMonitorBot'
|
2887
|
+
category: 'Site Monitor'
|
2888
|
+
url: 'https://www.radiomast.io/docs/stream-monitoring/technical_details.html'
|
2889
|
+
producer:
|
2890
|
+
name: 'Radio Mast, Inc.'
|
2891
|
+
url: 'https://www.radiomast.io/'
|
2892
|
+
|
2893
|
+
- regex: 'SitemapParser-VIPnytt/[\d.]+'
|
2894
|
+
name: 'SitemapParser-VIPnytt'
|
2895
|
+
category: 'Crawler'
|
2896
|
+
url: 'https://github.com/VIPnytt/SitemapParser/'
|
2897
|
+
|
2898
|
+
- regex: '^Turnitin'
|
2899
|
+
name: 'Turnitin'
|
2900
|
+
category: 'Crawler'
|
2901
|
+
url: 'https://turnitin.com/robot/crawlerinfo.html'
|
2902
|
+
|
2903
|
+
- regex: 'DMBrowser/[\d.]+|DMBrowser-[UB]V'
|
2904
|
+
name: 'Dotcom Monitor'
|
2905
|
+
category: 'Site Monitor'
|
2906
|
+
url: 'https://www.dotcom-monitor.com'
|
2907
|
+
|
2908
|
+
- regex: 'ThinkChaos/'
|
2909
|
+
name: 'ThinkChaos'
|
2910
|
+
category: 'Crawler'
|
2911
|
+
|
2912
|
+
- regex: 'DataForSeoBot'
|
2913
|
+
name: 'DataForSeoBot'
|
2914
|
+
category: 'Crawler'
|
2915
|
+
url: 'https://dataforseo.com/dataforseo-bot'
|
2916
|
+
|
2917
|
+
- regex: 'Discordbot/[\d.]+'
|
2918
|
+
name: 'Discord Bot'
|
2919
|
+
category: 'Service Agent'
|
2920
|
+
url: 'https://discordapp.com'
|
2921
|
+
|
2922
|
+
- regex: 'Linespider/[\d.]+'
|
2923
|
+
name: 'Linespider'
|
2924
|
+
category: 'Crawler'
|
2925
|
+
url: 'https://lin.ee/4dwXkTH'
|
2926
|
+
|
2927
|
+
- regex: 'Cincraw/[\d.]+'
|
2928
|
+
name: 'Cincraw'
|
2929
|
+
category: 'Crawler'
|
2930
|
+
url: 'http://cincrawdata.net/bot/'
|
2931
|
+
|
2932
|
+
- regex: 'CISPA Web Analyzer'
|
2933
|
+
name: 'CISPA Web Analyzer'
|
2934
|
+
category: 'Crawler'
|
2935
|
+
url: 'https://notify.cispa.de/'
|
2936
|
+
producer:
|
2937
|
+
name: 'CISPA - Helmholtz-Zentrum für Informationssicherheit gGmbH'
|
2938
|
+
url: 'https://cispa.de/en'
|
2939
|
+
|
2940
|
+
- regex: 'IonCrawl'
|
2941
|
+
name: 'IONOS Crawler'
|
2942
|
+
category: 'Crawler'
|
2943
|
+
url: 'https://www.ionos.de/terms-gtc/faq-crawler-en/'
|
2944
|
+
producer:
|
2945
|
+
name: 'IONOS SE'
|
2946
|
+
url: 'https://www.ionos.de/'
|
2947
|
+
|
2948
|
+
- regex: 'Crawldad'
|
2949
|
+
name: 'Crawldad'
|
2950
|
+
category: 'Crawler'
|
2951
|
+
url: 'https://gist.github.com/jayhardee9/2f2a2c4dba26564ee040ae32e0dd0972'
|
2952
|
+
|
2953
|
+
- regex: 'https://securitytxt-scan\.cs\.hm\.edu/'
|
2954
|
+
name: 'security.txt scanserver'
|
2955
|
+
category: 'Security Checker'
|
2956
|
+
url: 'https://securitytxt-scan.cs.hm.edu/'
|
2957
|
+
producer:
|
2958
|
+
name: 'Hochschule für angewandte Wissenschaften München'
|
2959
|
+
url: 'https://www.hm.edu/'
|
2960
|
+
|
2961
|
+
- regex: 'TigerBot/[\d.]+'
|
2962
|
+
name: 'TigerBot'
|
2963
|
+
category: 'Crawler'
|
2964
|
+
url: 'https://tiger.ch/'
|
2965
|
+
|
2966
|
+
- regex: 'TestCrawler/[\d.]+'
|
2967
|
+
name: 'TestCrawler'
|
2968
|
+
category: 'Crawler'
|
2969
|
+
url: 'https://www.comcepta.com/'
|
2970
|
+
|
2971
|
+
- regex: 'CrowdTanglebot/[\d.]+'
|
2972
|
+
name: 'CrowdTangle'
|
2973
|
+
category: 'Crawler'
|
2974
|
+
url: 'https://help.crowdtangle.com/en/articles/3009319-crowdtangle-bot'
|
2975
|
+
producer:
|
2976
|
+
name: 'CrowdTangle, Inc.'
|
2977
|
+
url: 'https://www.crowdtangle.com/'
|
2978
|
+
|
2979
|
+
- regex: 'Sellers\.Guide Crawler by Primis'
|
2980
|
+
name: 'Sellers.Guide'
|
2981
|
+
category: 'Crawler'
|
2982
|
+
url: 'https://sellers.guide/'
|
2983
|
+
producer:
|
2984
|
+
name: 'McCann Disciplines, Ltd.'
|
2985
|
+
url: 'https://www.primis.tech/'
|
2986
|
+
|
2987
|
+
- regex: 'OnalyticaBot'
|
2988
|
+
name: 'Onalytica'
|
2989
|
+
category: 'Crawler'
|
2990
|
+
url: 'https://www.airslate.com/bot/explore/onalytica-bot'
|
2991
|
+
producer:
|
2992
|
+
name: 'airSlate, Inc.'
|
2993
|
+
url: 'https://www.airslate.com/'
|
2994
|
+
|
2995
|
+
- regex: 'deepnoc'
|
2996
|
+
name: 'deepnoc'
|
2997
|
+
category: 'Crawler'
|
2998
|
+
url: 'https://deepnoc.com/bot'
|
2999
|
+
producer:
|
3000
|
+
name: 'deepnoc, GmbH'
|
3001
|
+
url: 'https://deepnoc.com/'
|
3002
|
+
|
3003
|
+
- regex: 'Newslitbot/[\d.]+'
|
3004
|
+
name: 'Newslitbot'
|
3005
|
+
category: 'Crawler'
|
3006
|
+
url: 'https://www.newslit.co/'
|
3007
|
+
producer:
|
3008
|
+
name: 'Newslit, LLC.'
|
3009
|
+
url: 'https://www.newslit.co/'
|
3010
|
+
|
3011
|
+
- regex: 'um-LN/[\d.]+'
|
3012
|
+
name: 'uMBot'
|
3013
|
+
category: 'Crawler'
|
3014
|
+
url: 'https://www.ubermetrics-technologies.com/'
|
3015
|
+
producer:
|
3016
|
+
name: 'Ubermetrics Technologies GmbH'
|
3017
|
+
url: 'https://www.ubermetrics-technologies.com/'
|
3018
|
+
|
3019
|
+
- regex: 'Abonti/[\d.]+'
|
3020
|
+
name: 'Abonti'
|
3021
|
+
category: 'Crawler'
|
3022
|
+
url: 'http://abonti.com/'
|
3023
|
+
|
3024
|
+
- regex: 'collection@infegy\.com'
|
3025
|
+
name: 'Infegy'
|
3026
|
+
category: 'Crawler'
|
3027
|
+
url: 'https://infegy.com/'
|
3028
|
+
producer:
|
3029
|
+
name: 'Infegy, Inc.'
|
3030
|
+
url: 'https://infegy.com/'
|
3031
|
+
|
3032
|
+
- regex: 'HTTP Banner Detection \(https://security\.ipip\.net\)'
|
3033
|
+
name: 'IPIP'
|
3034
|
+
category: 'Security Checker'
|
3035
|
+
url: 'https://security.ipip.net/'
|
3036
|
+
producer:
|
3037
|
+
name: 'Beijing Tiantexin Tech. Co., Ltd.'
|
3038
|
+
url: 'https://en.ipip.net/'
|
3039
|
+
|
3040
|
+
- regex: 'ev-crawler/[\d.]+'
|
3041
|
+
name: 'Headline'
|
3042
|
+
category: 'Crawler'
|
3043
|
+
url: 'https://headline.com/legal/crawler'
|
3044
|
+
producer:
|
3045
|
+
name: 'e.ventures Managementgesellschaft mbH'
|
3046
|
+
url: 'https://headline.com/'
|
3047
|
+
|
3048
|
+
- regex: 'webprosbot/[\d.]+'
|
3049
|
+
name: 'WebPros'
|
3050
|
+
category: 'Crawler'
|
3051
|
+
url: 'https://webpros.com/'
|
3052
|
+
producer:
|
3053
|
+
name: 'WebPros Holdco B.V.'
|
3054
|
+
url: 'https://webpros.com/'
|
3055
|
+
|
3056
|
+
- regex: 'ELB-HealthChecker'
|
3057
|
+
name: 'Amazon ELB'
|
3058
|
+
category: 'Site Monitor'
|
3059
|
+
url: 'https://aws.amazon.com/elasticloadbalancing/'
|
3060
|
+
producer:
|
3061
|
+
name: 'Amazon.com, Inc.'
|
3062
|
+
url: 'https://www.amazon.com/'
|
3063
|
+
|
3064
|
+
- regex: 'Wheregoes\.com Redirect Checker/[\d.]+'
|
3065
|
+
name: 'WhereGoes'
|
3066
|
+
category: 'Crawler'
|
3067
|
+
url: 'https://wheregoes.com/'
|
3068
|
+
|
3069
|
+
- regex: 'project_patchwatch'
|
3070
|
+
name: 'Project Patchwatch'
|
3071
|
+
category: 'Crawler'
|
3072
|
+
url: 'http://66.240.192.82/'
|
3073
|
+
|
3074
|
+
- regex: 'InternetMeasurement/[\d.]+'
|
3075
|
+
name: 'InternetMeasurement'
|
3076
|
+
category: 'Crawler'
|
3077
|
+
url: 'https://internet-measurement.com/'
|
3078
|
+
|
3079
|
+
- regex: 'DomainAppender /[\d.]+'
|
3080
|
+
name: 'DomainAppender'
|
3081
|
+
category: 'Crawler'
|
3082
|
+
url: 'https://www.profound.net/product/domain_append/'
|
3083
|
+
producer:
|
3084
|
+
name: 'Profound Networks, LLC'
|
3085
|
+
url: 'https://www.profound.net/'
|
3086
|
+
|
3087
|
+
- regex: 'FreeWebMonitoring SiteChecker/[\d.]+'
|
3088
|
+
name: 'FreeWebMonitoring'
|
3089
|
+
category: 'Site Monitor'
|
3090
|
+
url: 'https://www.freewebmonitoring.com/bot.html'
|
3091
|
+
producer:
|
3092
|
+
name: 'GreenWave Online, Inc.'
|
3093
|
+
url: 'http://www.greenwaveonline.com/'
|
3094
|
+
|
3095
|
+
- regex: 'Page Modified Pinger'
|
3096
|
+
name: 'Page Modified Pinger'
|
3097
|
+
category: 'Site Monitor'
|
3098
|
+
url: 'https://www.pagemodified.com/'
|
3099
|
+
producer:
|
3100
|
+
name: 'Valley Hosting, LLC'
|
3101
|
+
url: 'https://www.pagemodified.com/'
|
3102
|
+
|
3103
|
+
- regex: 'adstxtlab\.com'
|
3104
|
+
name: 'adstxtlab.com'
|
3105
|
+
category: 'Crawler'
|
3106
|
+
url: 'https://adstxtlab.com/validator.php'
|
3107
|
+
producer:
|
3108
|
+
name: 'Jaohawi AB'
|
3109
|
+
url: 'https://adstxtlab.com/'
|
3110
|
+
|
3111
|
+
- regex: 'Iframely/[\d.]+'
|
3112
|
+
name: 'Iframely'
|
3113
|
+
category: 'Crawler'
|
3114
|
+
url: 'https://iframely.com/'
|
3115
|
+
producer:
|
3116
|
+
name: 'Itteco Software, Corp.'
|
3117
|
+
url: 'https://iframely.com/'
|
3118
|
+
|
3119
|
+
- regex: 'DomainStatsBot/[\d.]+'
|
3120
|
+
name: 'DomainStatsBot'
|
3121
|
+
category: 'Crawler'
|
3122
|
+
url: 'https://domainstats.com/pages/our-bot'
|
3123
|
+
producer:
|
3124
|
+
name: 'Domainstats Ltd'
|
3125
|
+
url: 'https://domainstats.com/'
|
3126
|
+
|
3127
|
+
- regex: 'aiHitBot/[\d.]+'
|
3128
|
+
name: 'aiHitBot'
|
3129
|
+
category: 'Crawler'
|
3130
|
+
url: 'https://www.aihitdata.com/about'
|
3131
|
+
|
3132
|
+
- regex: 'DomainCrawler/'
|
3133
|
+
name: 'DomainCrawler'
|
3134
|
+
category: 'Crawler'
|
3135
|
+
url: 'https://domaincrawler.com/about-us/'
|
3136
|
+
|
3137
|
+
- regex: 'DNSResearchBot'
|
3138
|
+
name: 'DNSResearchBot'
|
3139
|
+
category: 'Crawler'
|
3140
|
+
|
3141
|
+
- regex: 'GitCrawlerBot'
|
3142
|
+
name: 'GitCrawlerBot'
|
3143
|
+
category: 'Crawler'
|
3144
|
+
|
3145
|
+
- regex: 'AdAuth/[\d.]+'
|
3146
|
+
name: 'AdAuth'
|
3147
|
+
category: 'Crawler'
|
3148
|
+
url: 'https://www.adauth.com'
|
3149
|
+
|
3150
|
+
- regex: 'faveeo\.com'
|
3151
|
+
name: 'Faveeo'
|
3152
|
+
category: 'Crawler'
|
3153
|
+
url: 'http://www.faveeo.com'
|
3154
|
+
|
3155
|
+
- regex: 'kozmonavt\.'
|
3156
|
+
name: 'Kozmonavt'
|
3157
|
+
category: 'Crawler'
|
3158
|
+
url: 'https://kozmonavt.ml'
|
3159
|
+
|
3160
|
+
- regex: 'CriteoBot/'
|
3161
|
+
name: 'CriteoBot'
|
3162
|
+
category: 'Crawler'
|
3163
|
+
url: 'https://www.criteo.com/criteo-crawler/'
|
3164
|
+
|
3165
|
+
- regex: 'PayPal IPN'
|
3166
|
+
name: 'PayPal IPN'
|
3167
|
+
category: 'Service Agent'
|
3168
|
+
url: 'https://developer.paypal.com/api/nvp-soap/ipn/IPNIntro/'
|
3169
|
+
producer:
|
3170
|
+
name: 'PayPal, Inc.'
|
3171
|
+
url: 'https://www.paypal.com/'
|
3172
|
+
|
3173
|
+
- regex: 'MaCoCu'
|
3174
|
+
name: 'MaCoCu'
|
3175
|
+
category: 'Crawler'
|
3176
|
+
url: 'https://www.clarin.si/info/macocu-massive-collection-and-curation-of-monolingual-and-bilingual-data/'
|
3177
|
+
producer:
|
3178
|
+
name: 'Jožef Stefan Institute'
|
3179
|
+
url: 'https://www.ijs.si/ijsw/JSI'
|
3180
|
+
|
3181
|
+
- regex: 'dnt-policy@eff\.org'
|
3182
|
+
name: 'EFF Do Not Track Verifier'
|
3183
|
+
category: 'Crawler'
|
3184
|
+
url: 'https://www.eff.org/issues/do-not-track'
|
3185
|
+
producer:
|
3186
|
+
name: 'Electronic Frontier Foundation'
|
3187
|
+
url: 'https://www.eff.org/'
|
3188
|
+
|
3189
|
+
- regex: 'InfoTigerBot'
|
3190
|
+
name: 'InfoTigerBot'
|
3191
|
+
category: 'Crawler'
|
3192
|
+
url: 'https://infotiger.com/bot'
|
3193
|
+
producer:
|
3194
|
+
name: 'Infotiger UG'
|
3195
|
+
url: 'https://infotiger.com/'
|
3196
|
+
|
3197
|
+
- regex: '(?:Birdcrawlerbot|CrawlaDeBot)'
|
3198
|
+
name: 'Birdcrawlerbot'
|
3199
|
+
category: 'Crawler'
|
3200
|
+
url: 'https://crawla.de/de/index.php'
|
3201
|
+
producer:
|
3202
|
+
name: 'Swoppen Systems GmbH'
|
3203
|
+
url: 'https://www.swoppen.com/de'
|
3204
|
+
|
3205
|
+
- regex: 'ScamadviserExternalHit/[\d.]+'
|
3206
|
+
name: 'Scamadviser External Hit'
|
3207
|
+
category: 'Crawler'
|
3208
|
+
url: 'https://www.scamadviser.com/'
|
3209
|
+
producer:
|
3210
|
+
name: 'Ecommerce Operations B.V.'
|
3211
|
+
url: 'https://www.scamadviser.com/'
|
3212
|
+
|
3213
|
+
- regex: 'ZaldamoSearchBot'
|
3214
|
+
name: 'Zaldamo'
|
3215
|
+
category: 'Crawler'
|
3216
|
+
url: 'https://www.zaldamo.com/search.html'
|
3217
|
+
producer:
|
3218
|
+
name: 'Zaldamo, LLC.'
|
3219
|
+
url: 'https://www.zaldamo.com/'
|
3220
|
+
|
3221
|
+
- regex: 'AFB/[\d.]+'
|
3222
|
+
name: 'Allloadin Favicon Bot'
|
3223
|
+
category: 'Crawler'
|
3224
|
+
url: 'https://allloadin.com/'
|
3225
|
+
|
3226
|
+
- regex: 'SeolytBot/[\d.]+'
|
3227
|
+
name: 'Seolyt Bot'
|
3228
|
+
category: 'Crawler'
|
3229
|
+
url: 'https://seolyt.com'
|
3230
|
+
|
3231
|
+
- regex: 'LinkWalker/[\d.]+'
|
3232
|
+
name: 'LinkWalker'
|
3233
|
+
category: 'Crawler'
|
3234
|
+
url: 'https://www.phishlabs.com/'
|
3235
|
+
producer:
|
3236
|
+
name: 'PhishLabs, Inc.'
|
3237
|
+
url: 'https://www.phishlabs.com/'
|
3238
|
+
|
3239
|
+
- regex: 'RenovateBot/[\d.]+'
|
3240
|
+
name: 'RenovateBot'
|
3241
|
+
category: 'Security Checker'
|
3242
|
+
url: 'https://github.com/renovatebot/renovate'
|
3243
|
+
producer:
|
3244
|
+
name: 'White Source Ltd.'
|
3245
|
+
url: 'https://www.mend.io/free-developer-tools/renovate/'
|
3246
|
+
|
3247
|
+
- regex: 'INETDEX-BOT/[\d.]+'
|
3248
|
+
name: 'Inetdex Bot'
|
3249
|
+
category: 'Crawler'
|
3250
|
+
url: 'https://www.inetdex.com/'
|
3251
|
+
|
3252
|
+
- regex: 'NETZZAPPEN'
|
3253
|
+
name: 'NETZZAPPEN'
|
3254
|
+
category: 'Crawler'
|
3255
|
+
url: 'https://www.netzzappen.com/'
|
3256
|
+
producer:
|
3257
|
+
name: 'Marc Huemer'
|
3258
|
+
url: 'https://www.netzzappen.com/'
|
3259
|
+
|
3260
|
+
- regex: 'panscient\.com'
|
3261
|
+
name: 'Panscient'
|
3262
|
+
category: 'Crawler'
|
3263
|
+
url: 'https://www.panscient.com/faq.htm'
|
3264
|
+
producer:
|
3265
|
+
name: 'Panscient, Inc.'
|
3266
|
+
url: 'https://www.panscient.com/'
|
3267
|
+
|
3268
|
+
- regex: 'research@pdrlabs\.net'
|
3269
|
+
name: 'PDR Labs'
|
3270
|
+
category: 'Security Checker'
|
3271
|
+
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
3272
|
+
producer:
|
3273
|
+
name: 'PDR Labs'
|
3274
|
+
url: 'https://web.archive.org/web/20220420054123/http://www.pdrlabs.net/'
|
3275
|
+
|
3276
|
+
- regex: 'Nicecrawler/[\d.]+'
|
3277
|
+
name: 'NiceCrawler'
|
3278
|
+
category: 'Crawler'
|
3279
|
+
url: 'https://www.nicecrawler.com/'
|
3280
|
+
producer:
|
3281
|
+
name: 'Intelium Corp.'
|
3282
|
+
url: 'https://www.intelium.com/'
|
3283
|
+
|
3284
|
+
- regex: 't3versionsBot/[\d.]+'
|
3285
|
+
name: 't3versions'
|
3286
|
+
category: 'Crawler'
|
3287
|
+
url: 'https://www.t3versions.com/bot'
|
3288
|
+
producer:
|
3289
|
+
name: 'Torben Hansen'
|
3290
|
+
url: 'https://www.t3versions.com/'
|
3291
|
+
|
3292
|
+
- regex: 'Crawlson/[\d.]+'
|
3293
|
+
name: 'Crawlson'
|
3294
|
+
category: 'Crawler'
|
3295
|
+
url: 'https://www.crawlson.com/about'
|
3296
|
+
producer:
|
3297
|
+
name: 'Crawlson'
|
3298
|
+
url: 'https://www.crawlson.com/'
|
3299
|
+
|
3300
|
+
- regex: 'tchelebi/[\d.]+'
|
3301
|
+
name: 'tchelebi'
|
3302
|
+
category: 'Crawler'
|
3303
|
+
url: 'https://tchelebi.io/'
|
3304
|
+
producer:
|
3305
|
+
name: 'NormShield, Inc.'
|
3306
|
+
url: 'https://blackkite.com/'
|
3307
|
+
|
3308
|
+
- regex: 'JobboerseBot'
|
3309
|
+
name: 'JobboerseBot'
|
3310
|
+
category: 'Crawler'
|
3311
|
+
url: 'https://www.xing.com/jobs'
|
3312
|
+
producer:
|
3313
|
+
name: 'New Work SE'
|
3314
|
+
url: 'https://www.xing.com/'
|
3315
|
+
|
3316
|
+
- regex: 'RepoLookoutBot/v?[\d.]+'
|
3317
|
+
name: 'Repo Lookout'
|
3318
|
+
category: 'Security Checker'
|
3319
|
+
url: 'https://www.repo-lookout.org/'
|
3320
|
+
producer:
|
3321
|
+
name: 'Crissy Field GmbH'
|
3322
|
+
url: 'https://www.crissyfield.de/'
|
3323
|
+
|
3324
|
+
- regex: 'PATHspider'
|
3325
|
+
name: 'PATHspider'
|
3326
|
+
category: 'Security Checker'
|
3327
|
+
url: 'https://pathspider.net/'
|
3328
|
+
producer:
|
3329
|
+
name: 'MAMI Project'
|
3330
|
+
url: 'https://mami-project.eu/'
|
3331
|
+
|
3332
|
+
- regex: 'everyfeed-spider/[\d.]+'
|
3333
|
+
name: 'Everyfeed'
|
3334
|
+
url: 'https://web.archive.org/web/20050930235914/http://www.everyfeed.com/'
|
3335
|
+
category: 'Feed Fetcher'
|
3336
|
+
producer:
|
3337
|
+
name: ''
|
3338
|
+
url: ''
|
3339
|
+
|
3340
|
+
- regex: 'Exchange check'
|
3341
|
+
name: 'Exchange check'
|
3342
|
+
category: 'Security Checker'
|
3343
|
+
url: 'https://github.com/GossiTheDog/scanning'
|
3344
|
+
producer:
|
3345
|
+
name: 'Kevin Beaumont'
|
3346
|
+
url: 'https://doublepulsar.com/'
|
3347
|
+
|
3348
|
+
- regex: 'Sublinq'
|
3349
|
+
name: 'Sublinq'
|
3350
|
+
category: 'Crawler'
|
3351
|
+
url: 'https://web.archive.org/web/20220626191617/https://sublinq.com/'
|
3352
|
+
producer:
|
3353
|
+
name: ''
|
3354
|
+
url: ''
|
3355
|
+
|
3356
|
+
- regex: 'Gregarius/[\d.]+'
|
3357
|
+
name: 'Gregarius'
|
3358
|
+
category: 'Feed Fetcher'
|
3359
|
+
url: 'https://web.archive.org/web/20100614011837/http://devlog.gregarius.net/docs/ua/'
|
3360
|
+
producer:
|
3361
|
+
name: ''
|
3362
|
+
url: ''
|
3363
|
+
|
3364
|
+
- regex: 'COMODO DCV'
|
3365
|
+
name: 'COMODO DCV'
|
3366
|
+
category: 'Service Agent'
|
3367
|
+
url: 'https://www.comodo.com/'
|
3368
|
+
producer:
|
3369
|
+
name: 'Comodo Security Solutions, Inc.'
|
3370
|
+
url: 'https://www.comodo.com/'
|
3371
|
+
|
3372
|
+
- regex: 'Sectigo DCV|acme\.sectigo\.com'
|
3373
|
+
name: 'Sectigo DCV'
|
3374
|
+
category: 'Service Agent'
|
3375
|
+
url: 'https://sectigo.com/'
|
3376
|
+
producer:
|
3377
|
+
name: 'Sectigo Limited'
|
3378
|
+
url: 'https://sectigo.com/'
|
3379
|
+
|
3380
|
+
- regex: 'KlarnaBot-(?:DownloadProductImage|EnrichProducts|PriceWatcher)/[\d.]+'
|
3381
|
+
name: 'KlarnaBot'
|
3382
|
+
category: 'Crawler'
|
3383
|
+
url: 'https://docs.klarna.com/klarna-bot/'
|
3384
|
+
producer:
|
3385
|
+
name: 'Klarna Bank AB'
|
3386
|
+
url: 'https://www.klarna.com/'
|
3387
|
+
|
3388
|
+
- regex: 'Taboolabot/[\d.]+'
|
3389
|
+
name: 'Taboolabot'
|
3390
|
+
category: 'Crawler'
|
3391
|
+
url: 'https://help.taboola.com/hc/en-us/articles/115002347594-The-Taboola-Crawler'
|
3392
|
+
producer:
|
3393
|
+
name: 'Taboola, Inc.'
|
3394
|
+
url: 'https://www.taboola.com/'
|
3395
|
+
|
3396
|
+
- regex: 'Asana/[\d.]+'
|
3397
|
+
name: 'Asana'
|
3398
|
+
category: 'Crawler'
|
3399
|
+
url: 'https://asana.com/'
|
3400
|
+
producer:
|
3401
|
+
name: 'Asana, Inc.'
|
3402
|
+
url: 'https://asana.com/'
|
3403
|
+
|
3404
|
+
- regex: 'Chrome Privacy Preserving Prefetch Proxy'
|
3405
|
+
name: 'Chrome Privacy Preserving Prefetch Proxy'
|
3406
|
+
category: 'Service Agent'
|
3407
|
+
url: 'https://developer.chrome.com/blog/private-prefetch-proxy/'
|
3408
|
+
producer:
|
3409
|
+
name: 'Google Inc.'
|
3410
|
+
url: 'https://www.google.com/'
|
3411
|
+
|
3412
|
+
- regex: 'URLinspectorBot/[\d.]+'
|
3413
|
+
name: 'URLinspector'
|
3414
|
+
category: 'Site Monitor'
|
3415
|
+
url: 'https://www.urlinspector.com/bot/'
|
3416
|
+
producer:
|
3417
|
+
name: 'LinkResearchTools GmbH'
|
3418
|
+
url: 'https://www.linkresearchtools.com/'
|
3419
|
+
|
3420
|
+
- regex: 'EntferBot/[\d.]+'
|
3421
|
+
name: 'Entfer'
|
3422
|
+
category: 'Crawler'
|
3423
|
+
url: 'https://entfer.com/'
|
3424
|
+
producer:
|
3425
|
+
name: 'Entfer Ltd.'
|
3426
|
+
url: 'https://entfer.com/'
|
3427
|
+
|
3428
|
+
- regex: 'TagInspector/[\d.]+'
|
3429
|
+
name: 'Tag Inspector'
|
3430
|
+
category: 'Crawler'
|
3431
|
+
url: 'https://taginspector.com/'
|
3432
|
+
producer:
|
3433
|
+
name: 'InfoTrust, LLC'
|
3434
|
+
url: 'https://infotrust.com/'
|
3435
|
+
|
3436
|
+
- regex: 'pageburst'
|
3437
|
+
name: 'Pageburst'
|
3438
|
+
category: 'Crawler'
|
3439
|
+
url: 'https://pageburstls.elsevier.com/'
|
3440
|
+
producer:
|
3441
|
+
name: 'Elsevier Ltd'
|
3442
|
+
url: 'https://www.elsevier.com/'
|
3443
|
+
|
3444
|
+
- regex: '.+diffbot'
|
3445
|
+
name: 'Diffbot'
|
3446
|
+
category: 'Crawler'
|
3447
|
+
url: 'https://docs.diffbot.com/docs/getting-started-with-crawl'
|
3448
|
+
producer:
|
3449
|
+
name: 'Diffbot Technologies Corp.'
|
3450
|
+
url: 'https://www.diffbot.com/'
|
3451
|
+
|
3452
|
+
- regex: 'DisqusAdstxtCrawler/[\d.]+'
|
3453
|
+
name: 'Disqus'
|
3454
|
+
category: 'Crawler'
|
3455
|
+
url: 'https://help.disqus.com/en/articles/1765357-ads-txt-implementation-guide'
|
3456
|
+
producer:
|
3457
|
+
name: 'Disqus, Inc.'
|
3458
|
+
url: 'https://disqus.com/'
|
3459
|
+
|
3460
|
+
- regex: 'startmebot/[\d.]+'
|
3461
|
+
name: 'start.me'
|
3462
|
+
category: 'Crawler'
|
3463
|
+
url: 'https://about.start.me/'
|
3464
|
+
producer:
|
3465
|
+
name: 'start.me BV'
|
3466
|
+
url: 'https://about.start.me/'
|
3467
|
+
|
3468
|
+
- regex: '2ip bot/[\d.]+'
|
3469
|
+
name: '2ip'
|
3470
|
+
category: 'Crawler'
|
3471
|
+
url: 'https://2ip.io/'
|
3472
|
+
|
3473
|
+
- regex: 'ReqBin Curl Client/[\d.]+'
|
3474
|
+
name: 'ReqBin'
|
3475
|
+
category: 'Crawler'
|
3476
|
+
url: 'https://reqbin.com/curl'
|
3477
|
+
|
3478
|
+
- regex: 'XoviBot/[\d.]+'
|
3479
|
+
name: 'XoviBot'
|
3480
|
+
category: 'Crawler'
|
3481
|
+
url: 'https://www.xovibot.net'
|
3482
|
+
producer:
|
3483
|
+
name: 'Xovi GmbH'
|
3484
|
+
url: 'http://www.xovi.de'
|
3485
|
+
|
3486
|
+
- regex: 'Overcast/[\d.]+ Podcast Sync'
|
3487
|
+
name: 'Overcast Podcast Sync'
|
3488
|
+
category: 'Service Agent'
|
3489
|
+
url: 'https://overcast.fm/podcasterinfo'
|
3490
|
+
|
3491
|
+
- regex: '^Verity/[\d.]+'
|
3492
|
+
name: 'GumGum Verity'
|
3493
|
+
category: 'Service Agent'
|
3494
|
+
url: 'https://gumgum.com/verity'
|
3495
|
+
|
3496
|
+
- regex: 'hackermention'
|
3497
|
+
name: 'hackermention'
|
3498
|
+
category: 'Feed Reader'
|
3499
|
+
url: 'https://github.com/snarfed/hackermention'
|
3500
|
+
|
3501
|
+
- regex: 'BitSightBot/[\d.]+'
|
3502
|
+
name: 'BitSight'
|
3503
|
+
category: 'Security Checker'
|
3504
|
+
url: 'https://www.bitsight.com/'
|
3505
|
+
producer:
|
3506
|
+
name: 'BitSight Technologies, Inc.'
|
3507
|
+
url: 'https://www.bitsight.com/'
|
3508
|
+
|
3509
|
+
- regex: 'Ezgif/[\d.]+'
|
3510
|
+
name: 'Ezgif'
|
3511
|
+
category: 'Service Agent'
|
3512
|
+
url: 'https://ezgif.com/about'
|
3513
|
+
|
3514
|
+
- regex: 'intelx\.io_bot'
|
3515
|
+
name: 'Intelligence X'
|
3516
|
+
category: 'Crawler'
|
3517
|
+
url: 'https://intelx.io/'
|
3518
|
+
producer:
|
3519
|
+
name: 'Kleissner Investments s.r.o.'
|
3520
|
+
url: 'https://intelx.io/'
|
3521
|
+
|
3522
|
+
- regex: 'FemtosearchBot/[\d.]+'
|
3523
|
+
name: 'Femtosearch'
|
3524
|
+
category: 'Crawler'
|
3525
|
+
url: 'http://femtosearch.com/'
|
3526
|
+
producer:
|
3527
|
+
name: 'Grier Forensics, LLC'
|
3528
|
+
url: 'https://www.grierforensics.com/'
|
3529
|
+
|
3530
|
+
- regex: 'AdsTxtCrawler/[\d.]+'
|
3531
|
+
name: 'AdsTxtCrawler'
|
3532
|
+
category: 'Crawler'
|
3533
|
+
url: 'https://github.com/InteractiveAdvertisingBureau/adstxtcrawler'
|
3534
|
+
producer:
|
3535
|
+
name: 'IAB Technology Laboratory, Inc.'
|
3536
|
+
url: 'https://iabtechlab.com/'
|
3537
|
+
|
3538
|
+
- regex: 'Morningscore'
|
3539
|
+
name: 'Morningscore Bot'
|
3540
|
+
category: 'Crawler'
|
3541
|
+
url: 'https://morningscore.io/'
|
3542
|
+
producer:
|
3543
|
+
name: 'Morningscore'
|
3544
|
+
url: 'https://morningscore.io/'
|
3545
|
+
|
3546
|
+
- regex: 'Uptime-Kuma/[\d.]+'
|
3547
|
+
name: 'Uptime-Kuma'
|
3548
|
+
category: 'Site Monitor'
|
3549
|
+
url: 'https://github.com/louislam/uptime-kuma'
|
3550
|
+
|
3551
|
+
- regex: 'ChatGPT-User'
|
3552
|
+
name: 'ChatGPT'
|
3553
|
+
category: 'Crawler'
|
3554
|
+
url: 'https://platform.openai.com/docs/plugins/bot'
|
3555
|
+
producer:
|
3556
|
+
name: 'OpenAI OpCo, LLC'
|
3557
|
+
url: 'https://openai.com/'
|
3558
|
+
|
3559
|
+
- regex: 'BrightEdge Crawler/[\d.]+'
|
3560
|
+
name: 'BrightEdge'
|
3561
|
+
category: 'Crawler'
|
3562
|
+
url: 'https://www.brightedge.com/'
|
3563
|
+
producer:
|
3564
|
+
name: 'BrightEdge Technologies, Inc'
|
3565
|
+
url: 'https://www.brightedge.com/'
|
3566
|
+
|
3567
|
+
- regex: 'sfFeedReader/[\d.]+'
|
3568
|
+
name: 'sfFeedReader'
|
3569
|
+
url: 'https://github.com/diem-project/sfFeed2Plugin'
|
3570
|
+
category: 'Feed Fetcher'
|
3571
|
+
|
3572
|
+
- regex: 'cyberscan\.io'
|
3573
|
+
name: 'Cyberscan'
|
3574
|
+
category: 'Security Checker'
|
3575
|
+
url: 'https://www.cyberscan.io/'
|
3576
|
+
producer:
|
3577
|
+
name: 'DGC Verwaltungs GmbH'
|
3578
|
+
url: 'https://dgc.org/'
|
3579
|
+
|
3580
|
+
- regex: 'deepcrawl\.com'
|
3581
|
+
name: 'Lumar'
|
3582
|
+
category: 'Crawler'
|
3583
|
+
url: 'https://deepcrawl.com/bot'
|
3584
|
+
producer:
|
3585
|
+
name: 'Lumar'
|
3586
|
+
url: 'https://www.lumar.io/'
|
3587
|
+
|
3588
|
+
- regex: 'researchscan\.comsys\.rwth-aachen\.de'
|
3589
|
+
name: 'Research Scan'
|
3590
|
+
category: 'Crawler'
|
3591
|
+
url: 'http://researchscan.comsys.rwth-aachen.de/'
|
3592
|
+
producer:
|
3593
|
+
name: 'RWTH Aachen University'
|
3594
|
+
url: 'https://www.comsys.rwth-aachen.de/'
|
3595
|
+
|
3596
|
+
- regex: 'newspaper/[\d.]+'
|
3597
|
+
name: 'Scraping Robot'
|
3598
|
+
category: 'Crawler'
|
3599
|
+
url: 'https://scrapingrobot.com/'
|
3600
|
+
producer:
|
3601
|
+
name: 'Sprious LLC'
|
3602
|
+
url: 'https://sprious.com/'
|
3603
|
+
|
3604
|
+
- regex: 'GPTBot/[\d.]+'
|
3605
|
+
name: 'GPTBot'
|
3606
|
+
category: 'Crawler'
|
3607
|
+
url: 'https://platform.openai.com/docs/gptbot'
|
3608
|
+
producer:
|
3609
|
+
name: 'OpenAI OpCo, LLC'
|
3610
|
+
url: 'https://openai.com/'
|
3611
|
+
|
3612
|
+
- regex: 'Ant(?:\.com beta|Bot)(?:/([\d+.]+))?'
|
3613
|
+
name: 'Ant'
|
3614
|
+
category: 'Crawler'
|
3615
|
+
url: 'https://www.ant.com/'
|
3616
|
+
producer:
|
3617
|
+
name: 'Ant.com Ltd.'
|
3618
|
+
url: 'https://www.ant.com/'
|
3619
|
+
|
3620
|
+
- regex: 'WebwikiBot/[\d.]+'
|
3621
|
+
name: 'Webwiki'
|
3622
|
+
category: 'Crawler'
|
3623
|
+
url: 'https://www.webwiki.com/'
|
3624
|
+
producer:
|
3625
|
+
name: 'webwiki GmbH'
|
3626
|
+
url: 'https://www.webwiki.com/'
|
3627
|
+
|
3628
|
+
- regex: 'phpMyAdmin'
|
3629
|
+
name: 'phpMyAdmin'
|
3630
|
+
category: 'Service Agent'
|
3631
|
+
url: 'https://www.phpmyadmin.net/'
|
3632
|
+
|
3633
|
+
- regex: 'Matomo/[\d.]+'
|
3634
|
+
name: 'Matomo'
|
3635
|
+
category: 'Service Agent'
|
3636
|
+
url: 'https://github.com/matomo-org/matomo'
|
3637
|
+
producer:
|
3638
|
+
name: 'InnoCraft Ltd'
|
3639
|
+
url: 'https://matomo.org/'
|
3640
|
+
|
3641
|
+
- regex: 'Prometheus/[\d.]+'
|
3642
|
+
name: 'Prometheus'
|
3643
|
+
category: 'Service Agent'
|
3644
|
+
url: 'https://github.com/prometheus/prometheus'
|
3645
|
+
producer:
|
3646
|
+
name: 'The Linux Foundation'
|
3647
|
+
url: 'https://www.cncf.io/'
|
3648
|
+
|
3649
|
+
- regex: 'ArchiveTeam ArchiveBot'
|
3650
|
+
name: 'ArchiveBot'
|
3651
|
+
category: 'Crawler'
|
3652
|
+
url: 'https://wiki.archiveteam.org/index.php?title=ArchiveBot'
|
3653
|
+
producer:
|
3654
|
+
name: 'ArchiveTeam'
|
3655
|
+
url: 'https://wiki.archiveteam.org/'
|
3656
|
+
|
3657
|
+
- regex: 'MADBbot/[\d.]+'
|
3658
|
+
name: 'MADBbot'
|
3659
|
+
category: 'Crawler'
|
3660
|
+
url: 'https://madb.zapto.org/bot.html'
|
3661
|
+
|
3662
|
+
- regex: 'MeltwaterNews'
|
3663
|
+
name: 'MeltwaterNews'
|
3664
|
+
category: 'Crawler'
|
3665
|
+
producer:
|
3666
|
+
name: 'Meltwater Deutschland GmbH'
|
3667
|
+
url: 'https://www.meltwater.com/'
|
3668
|
+
|
3669
|
+
- regex: '(?:Owler@ows\.eu|OWLer)/[\d.]+'
|
3670
|
+
name: 'OWLer'
|
3671
|
+
category: 'Crawler'
|
3672
|
+
url: 'https://openwebsearch.eu/owler/'
|
3673
|
+
producer:
|
3674
|
+
name: 'Open Search Foundation e.V.'
|
3675
|
+
url: 'https://openwebsearch.eu/'
|
3676
|
+
|
3677
|
+
- regex: 'bbc\.co\.uk/display/men/Page\+Monitor'
|
3678
|
+
name: 'BBC Page Monitor'
|
3679
|
+
category: 'Site Monitor'
|
3680
|
+
url: 'https://confluence.dev.bbc.co.uk/display/men/Page+Monitor'
|
3681
|
+
producer:
|
3682
|
+
name: 'BBC'
|
3683
|
+
url: 'https://www.bbc.com/'
|
3684
|
+
|
3685
|
+
- regex: 'BBC-Forge-URL-Monitor-Twisted'
|
3686
|
+
name: 'BBC Forge URL Monitor'
|
3687
|
+
category: 'Site Monitor'
|
3688
|
+
url: 'https://www.bbc.com/'
|
3689
|
+
producer:
|
3690
|
+
name: 'BBC'
|
3691
|
+
url: 'https://www.bbc.com/'
|
3692
|
+
|
3693
|
+
- regex: 'ClaudeBot'
|
3694
|
+
name: 'ClaudeBot'
|
3695
|
+
category: 'Crawler'
|
3696
|
+
url: 'https://github.com/ClaudeBot/ClaudeBot'
|
3697
|
+
|
3698
|
+
- regex: 'Imagesift'
|
3699
|
+
name: 'ImageSift'
|
3700
|
+
category: 'Crawler'
|
3701
|
+
url: 'https://imagesift.com/'
|
3702
|
+
producer:
|
3703
|
+
name: 'Castle Global, Inc.'
|
3704
|
+
url: 'https://thehive.ai/'
|
3705
|
+
|
3706
|
+
- regex: 'TactiScout'
|
3707
|
+
name: 'TactiScout'
|
3708
|
+
category: 'Crawler'
|
3709
|
+
url: 'https://find-it.world/TempCrawl/Crawltheque.php'
|
3710
|
+
producer:
|
3711
|
+
name: 'Tactikast'
|
3712
|
+
|
3713
|
+
- regex: 'Brightbot ([\d+.]+)'
|
3714
|
+
name: 'BrightBot'
|
3715
|
+
category: 'Crawler'
|
3716
|
+
url: 'https://www.brightbot.app/'
|
3717
|
+
producer:
|
3718
|
+
name: 'Bright Interactive Ltd'
|
3719
|
+
url: 'https://www.builtbybright.com/'
|
3720
|
+
|
3721
|
+
- regex: 'DaspeedBot/([\d+.]+)'
|
3722
|
+
name: 'DaspeedBot'
|
3723
|
+
category: 'Crawler'
|
3724
|
+
url: 'https://daspeed.io/'
|
3725
|
+
producer:
|
3726
|
+
name: 'DAWAP SARL'
|
3727
|
+
url: 'https://dawap.fr/'
|
3728
|
+
|
3729
|
+
- regex: 'StractBot(?:/([\d+.]+))?'
|
3730
|
+
name: 'Stract'
|
3731
|
+
category: 'Crawler'
|
3732
|
+
url: 'https://stract.com/webmasters'
|
3733
|
+
producer:
|
3734
|
+
name: 'Stract'
|
3735
|
+
url: 'https://github.com/StractOrg/stract/'
|
3736
|
+
|
3737
|
+
- regex: 'GeedoBot(?:/([\d+.]+))?'
|
3738
|
+
name: 'GeedoBot'
|
3739
|
+
category: 'Crawler'
|
3740
|
+
url: 'https://geedo.com/bot/'
|
3741
|
+
|
3742
|
+
- regex: 'GeedoProductSearch'
|
3743
|
+
name: 'GeedoProductSearch'
|
3744
|
+
category: 'Crawler'
|
3745
|
+
url: 'https://geedo.com/product-search/'
|
3746
|
+
|
3747
|
+
- regex: 'BackupLand(?:/([\d+.]+))?'
|
3748
|
+
name: 'BackupLand'
|
3749
|
+
category: 'Crawler'
|
3750
|
+
url: 'https://go.backupland.com/'
|
3751
|
+
producer:
|
3752
|
+
name: 'ООО «КВАРТА»'
|
3753
|
+
url: 'https://go.backupland.com/'
|
3754
|
+
|
3755
|
+
- regex: 'Konturbot(?:/([\d+.]+))?'
|
3756
|
+
name: 'Konturbot'
|
3757
|
+
category: 'Crawler'
|
3758
|
+
url: 'https://kontur.ru/'
|
3759
|
+
producer:
|
3760
|
+
name: 'АО «ПФ «СКБ Контур»'
|
3761
|
+
url: 'https://kontur.ru/'
|
3762
|
+
|
3763
|
+
- regex: 'keys-so-bot'
|
3764
|
+
name: 'Keys.so'
|
3765
|
+
category: 'Crawler'
|
3766
|
+
url: 'https://www.keys.so/'
|
3767
|
+
producer:
|
3768
|
+
name: 'ООО «МОДЕСКО»'
|
3769
|
+
url: 'https://www.modesco.ru/'
|
3770
|
+
|
3771
|
+
- regex: 'LetsearchBot(?:/([\d+.]+))?'
|
3772
|
+
name: 'LetSearch'
|
3773
|
+
category: 'Crawler'
|
3774
|
+
url: 'https://letsearch.ru/bots'
|
3775
|
+
|
3776
|
+
- regex: 'Example3(?:/([\d+.]+))?'
|
3777
|
+
name: 'Example3'
|
3778
|
+
category: 'Crawler'
|
3779
|
+
url: 'https://www.example3.com/'
|
3780
|
+
|
3781
|
+
- regex: 'StatOnlineRuBot(?:/([\d+.]+))?'
|
3782
|
+
name: 'StatOnline.ru'
|
3783
|
+
category: 'Crawler'
|
3784
|
+
url: 'https://statonline.ru/'
|
3785
|
+
producer:
|
3786
|
+
name: 'ООО «Регистратор доменных имен РЕГ.РУ»'
|
3787
|
+
url: 'https://statonline.ru/'
|
3788
|
+
|
3789
|
+
- regex: 'Spawning-AI'
|
3790
|
+
name: 'Spawning AI'
|
3791
|
+
category: 'Crawler'
|
3792
|
+
url: 'https://spawning.ai/'
|
3793
|
+
producer:
|
3794
|
+
name: 'Spawning, Inc'
|
3795
|
+
url: 'https://spawning.ai/'
|
3796
|
+
|
3797
|
+
- regex: 'domain research project'
|
3798
|
+
name: 'Domain Research Project'
|
3799
|
+
category: 'Crawler'
|
3800
|
+
url: 'https://trentwil.es/domains.html'
|
3801
|
+
producer:
|
3802
|
+
name: 'Trent Wiles'
|
3803
|
+
url: 'https://trentwil.es/'
|
3804
|
+
|
3805
|
+
- regex: 'getodin\.com'
|
3806
|
+
name: 'Odin'
|
3807
|
+
category: 'Security Checker'
|
3808
|
+
url: 'https://docs.getodin.com/'
|
3809
|
+
producer:
|
3810
|
+
name: 'Cyble Inc.'
|
3811
|
+
url: 'https://cyble.com/'
|
3812
|
+
|
3813
|
+
- regex: 'YouBot'
|
3814
|
+
name: 'YouBot'
|
3815
|
+
category: 'Crawler'
|
3816
|
+
url: 'https://about.you.com/youbot/'
|
3817
|
+
producer:
|
3818
|
+
name: 'SuSea, Inc.'
|
3819
|
+
url: 'https://you.com/'
|
3820
|
+
|
3821
|
+
- regex: 'SiteScoreBot'
|
3822
|
+
name: 'SiteScore'
|
3823
|
+
category: 'Crawler'
|
3824
|
+
url: 'https://sitescore.ai/'
|
3825
|
+
|
3826
|
+
- regex: 'MBCrawler'
|
3827
|
+
name: 'Monitor Backlinks'
|
3828
|
+
category: 'Crawler'
|
3829
|
+
url: 'https://www.seoptimer.com/monitor-backlinks/'
|
3830
|
+
producer:
|
3831
|
+
name: 'SEOptimer'
|
3832
|
+
url: 'https://www.seoptimer.com/'
|
3833
|
+
|
3834
|
+
- regex: 'mariadb-mysql-kbs-bot'
|
3835
|
+
name: 'MariaDB/MySQL Knowledge Base'
|
3836
|
+
category: 'Crawler'
|
3837
|
+
url: 'https://github.com/williamdes/mariadb-mysql-kbs'
|
3838
|
+
producer:
|
3839
|
+
name: 'WDES SAS'
|
3840
|
+
url: 'https://wdes.fr/en/'
|
3841
|
+
|
3842
|
+
- regex: 'GitHubCopilotChat'
|
3843
|
+
name: 'GitHubCopilotChat'
|
3844
|
+
category: 'Crawler'
|
3845
|
+
url: 'https://github.com/aaamoon/copilot-gpt4-service'
|
3846
|
+
|
3847
|
+
- regex: '^pdrl\.fm'
|
3848
|
+
name: 'Podroll Analyzer'
|
3849
|
+
category: 'Crawler'
|
3850
|
+
url: 'https://podroll.fm'
|
3851
|
+
|
3852
|
+
- regex: 'PodUptime/'
|
3853
|
+
name: 'PodUptime'
|
3854
|
+
category: 'Site Monitor'
|
3855
|
+
url: 'https://poduptime.com'
|
3856
|
+
|
3857
|
+
- regex: 'anthropic-ai'
|
3858
|
+
name: 'Anthropic AI'
|
3859
|
+
category: 'Crawler'
|
3860
|
+
url: 'https://www.anthropic.com/'
|
3861
|
+
producer:
|
3862
|
+
name: 'Anthropic, PBC'
|
3863
|
+
url: 'https://www.anthropic.com/'
|
3864
|
+
|
3865
|
+
- regex: 'NetpeakCheckerBot/[\d.]+'
|
3866
|
+
name: 'Netpeak Checker'
|
3867
|
+
category: 'Crawler'
|
3868
|
+
url: 'https://netpeaksoftware.com/checker'
|
3869
|
+
producer:
|
3870
|
+
name: 'Netpeak LTD'
|
3871
|
+
url: 'https://netpeaksoftware.com/'
|
3872
|
+
|
3873
|
+
- regex: 'SandobaCrawler/[\d.]+'
|
3874
|
+
name: 'Sandoba//Crawler'
|
3875
|
+
category: 'Crawler'
|
3876
|
+
url: 'https://www.sandoba.com/en/crawler/'
|
3877
|
+
producer:
|
3878
|
+
name: 'SANDOBA//EBUSINESS SOLUTIONS'
|
3879
|
+
url: 'https://www.sandoba.com/'
|
3880
|
+
|
3881
|
+
- regex: 'SirdataBot'
|
3882
|
+
name: 'Sirdata'
|
3883
|
+
category: 'Crawler'
|
3884
|
+
url: 'https://semantic-api.docs.sirdata.net/contextual-api/contextual-api/introduction'
|
3885
|
+
producer:
|
3886
|
+
name: 'Sirdata SAS'
|
3887
|
+
url: 'https://www.sirdata.com/'
|
3888
|
+
|
3889
|
+
- regex: 'CheckMarkNetwork/[\d.]+'
|
3890
|
+
name: 'CheckMark Network'
|
3891
|
+
category: 'Crawler'
|
3892
|
+
url: 'https://www.checkmarknetwork.com/spider.html/'
|
3893
|
+
producer:
|
3894
|
+
name: 'Exipert, Inc.'
|
3895
|
+
url: 'https://www.checkmarknetwork.com/'
|
3896
|
+
|
3897
|
+
- regex: 'cohere-ai'
|
3898
|
+
name: 'Cohere AI'
|
3899
|
+
category: 'Crawler'
|
3900
|
+
url: 'https://cohere.com/'
|
3901
|
+
producer:
|
3902
|
+
name: 'Cohere, Inc.'
|
3903
|
+
url: 'https://cohere.com/'
|
3904
|
+
|
3905
|
+
- regex: 'PerplexityBot/[\d.]+'
|
3906
|
+
name: 'PerplexityBot'
|
3907
|
+
category: 'Crawler'
|
3908
|
+
url: 'https://docs.perplexity.ai/docs/perplexitybot'
|
3909
|
+
producer:
|
3910
|
+
name: 'Perplexity AI, Inc.'
|
3911
|
+
url: 'https://www.perplexity.ai/'
|
3912
|
+
|
3913
|
+
- regex: 'TTD-Content'
|
3914
|
+
name: 'The Trade Desk Content'
|
3915
|
+
category: 'Crawler'
|
3916
|
+
url: 'https://www.thetradedesk.com/us/ttd-content'
|
3917
|
+
producer:
|
3918
|
+
name: 'The Trade Desk, Inc.'
|
3919
|
+
url: 'https://www.thetradedesk.com/'
|
3920
|
+
|
3921
|
+
- regex: 'montastic-monitor'
|
3922
|
+
name: 'Montastic Monitor'
|
3923
|
+
category: 'Site Monitor'
|
3924
|
+
url: 'https://www.montastic.com/'
|
3925
|
+
producer:
|
3926
|
+
name: 'Metadot, Corp.'
|
3927
|
+
url: 'https://www.metadot.com/'
|
3928
|
+
|
3929
|
+
- regex: 'Ruby, Twurly v[\d.]+'
|
3930
|
+
name: 'Twurly'
|
3931
|
+
category: 'Crawler'
|
3932
|
+
url: 'https://twurly.org/'
|
3933
|
+
|
3934
|
+
- regex: 'Mixnode(?:(?:Cache)?/[\d.]+)?'
|
3935
|
+
name: 'Mixnode'
|
3936
|
+
category: 'Crawler'
|
3937
|
+
url: 'https://www.mixnode.com/'
|
3938
|
+
producer:
|
3939
|
+
name: 'Mixnode Technologies, Inc.'
|
3940
|
+
url: 'https://www.mixnode.com/'
|
3941
|
+
|
3942
|
+
- regex: 'CSSCheck/[\d.]+'
|
3943
|
+
name: 'CSSCheck'
|
3944
|
+
category: 'Validator'
|
3945
|
+
|
3946
|
+
- regex: 'MicrosoftPreview/[\d.]+'
|
3947
|
+
name: 'Microsoft Preview'
|
3948
|
+
category: 'Service Agent'
|
3949
|
+
url: 'https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0'
|
3950
|
+
producer:
|
3951
|
+
name: 'Microsoft Corporation'
|
3952
|
+
url: 'https://www.microsoft.com/'
|
3953
|
+
|
3954
|
+
- regex: 's~virustotalcloud'
|
3955
|
+
name: 'VirusTotal Cloud'
|
3956
|
+
category: 'Crawler'
|
3957
|
+
url: 'https://www.virustotal.com/'
|
3958
|
+
producer:
|
3959
|
+
name: 'Chronicle Security Ireland Limited'
|
3960
|
+
url: 'https://chronicle.security/'
|
3961
|
+
|
3962
|
+
- regex: 'TinEye/[\d.]+'
|
3963
|
+
name: 'TinEye'
|
3964
|
+
category: 'Crawler'
|
3965
|
+
url: 'https://tineye.com/'
|
3966
|
+
producer:
|
3967
|
+
name: 'Idée, Inc.'
|
3968
|
+
url: 'https://tineye.com/'
|
3969
|
+
|
3970
|
+
- regex: 'e~arsnova-filter-system'
|
3971
|
+
name: 'ARSNova Filter System'
|
3972
|
+
category: 'Crawler'
|
3973
|
+
url: 'https://particify.de/en/'
|
3974
|
+
producer:
|
3975
|
+
name: 'Particify Gerhardt & Weingarten OHG'
|
3976
|
+
url: 'https://particify.de/en/'
|
3977
|
+
|
3978
|
+
- regex: 'botify'
|
3979
|
+
name: 'Botify'
|
3980
|
+
category: 'Crawler'
|
3981
|
+
url: 'https://www.botify.com/'
|
3982
|
+
producer:
|
3983
|
+
name: 'BOTIFY SAS'
|
3984
|
+
url: 'https://www.botify.com/'
|
3985
|
+
|
3986
|
+
- regex: 'adscanner'
|
3987
|
+
name: 'Adscanner'
|
3988
|
+
category: 'Crawler'
|
3989
|
+
url: 'https://www.alleyesonscreens.com/'
|
3990
|
+
producer:
|
3991
|
+
name: 'AdScanner d.o.o'
|
3992
|
+
url: 'https://www.alleyesonscreens.com/'
|
3993
|
+
|
3994
|
+
- regex: 'online-webceo-bot/[\d.]+'
|
3995
|
+
name: 'WebCEO'
|
3996
|
+
category: 'Crawler'
|
3997
|
+
url: 'https://www.webceo.com/'
|
3998
|
+
producer:
|
3999
|
+
name: 'WebCEO, LLC'
|
4000
|
+
url: 'https://www.webceo.com/'
|
4001
|
+
|
4002
|
+
- regex: 'NetTrack'
|
4003
|
+
name: 'NetTrack'
|
4004
|
+
category: 'Crawler'
|
4005
|
+
url: 'https://web.archive.org/web/20160607151934/https://nettrack.info/'
|
4006
|
+
|
4007
|
+
- regex: 'htmlyse'
|
4008
|
+
name: 'htmlyse'
|
4009
|
+
category: 'Crawler'
|
4010
|
+
url: 'https://www.htmlyse.com/'
|
4011
|
+
producer:
|
4012
|
+
name: 'Vistex LTD'
|
4013
|
+
url: 'https://www.htmlyse.com/'
|
4014
|
+
|
4015
|
+
- regex: 'TrendsmapResolver/[\d.]+'
|
4016
|
+
name: 'Trendsmap'
|
4017
|
+
category: 'Crawler'
|
4018
|
+
url: 'https://www.trendsmap.com/'
|
4019
|
+
producer:
|
4020
|
+
name: 'Trendsmap Pty Ltd'
|
4021
|
+
url: 'https://www.trendsmap.com/'
|
4022
|
+
|
4023
|
+
- regex: 'Shareaholic(?:bot)?/[\d.]+'
|
4024
|
+
name: 'Steve Bot'
|
4025
|
+
category: 'Crawler'
|
4026
|
+
url: 'https://www.shareaholic.com/steve'
|
4027
|
+
producer:
|
4028
|
+
name: 'Shareaholic, Inc.'
|
4029
|
+
url: 'https://www.shareaholic.com/'
|
4030
|
+
|
4031
|
+
- regex: 'keycdn-tools:'
|
4032
|
+
name: 'KeyCDN Tools'
|
4033
|
+
category: 'Service Agent'
|
4034
|
+
url: 'https://tools.keycdn.com/geo'
|
4035
|
+
|
4036
|
+
- regex: 'keycdn-tools/'
|
4037
|
+
name: 'KeyCDN Tools'
|
4038
|
+
category: 'Service Agent'
|
4039
|
+
url: 'https://tools.keycdn.com/'
|
4040
|
+
producer:
|
4041
|
+
name: 'proinity LLC'
|
4042
|
+
url: 'https://www.keycdn.com/'
|
4043
|
+
|
4044
|
+
- regex: 'Arquivo-web-crawler'
|
4045
|
+
name: 'Arquivo.pt'
|
4046
|
+
category: 'Crawler'
|
4047
|
+
url: 'https://sobre.arquivo.pt/en/help/crawling-and-archiving-web-content/'
|
4048
|
+
producer:
|
4049
|
+
name: 'FCT|FCCN'
|
4050
|
+
url: 'https://www.fct.pt/'
|
4051
|
+
|
4052
|
+
- regex: 'WhatsMyIP\.org'
|
4053
|
+
name: 'WhatsMyIP.org'
|
4054
|
+
category: 'Service Agent'
|
4055
|
+
url: 'https://www.whatsmyip.org/ua/'
|
4056
|
+
|
4057
|
+
- regex: 'SenutoBot/[\d.]+'
|
4058
|
+
name: 'Senuto'
|
4059
|
+
category: 'Crawler'
|
4060
|
+
url: 'https://www.senuto.com/'
|
4061
|
+
producer:
|
4062
|
+
name: 'Senuto Sp. z o.o.'
|
4063
|
+
url: 'https://www.senuto.com/'
|
4064
|
+
|
4065
|
+
- regex: 'spaziodati'
|
4066
|
+
name: 'SpazioDati'
|
4067
|
+
category: 'Crawler'
|
4068
|
+
url: 'https://www.spaziodati.eu/'
|
4069
|
+
producer:
|
4070
|
+
name: 'SpazioDati s.r.l.'
|
4071
|
+
url: 'https://www.spaziodati.eu/'
|
4072
|
+
|
4073
|
+
- regex: 'GozleBot'
|
4074
|
+
name: 'Gozle'
|
4075
|
+
category: 'Crawler'
|
4076
|
+
url: 'https://gozle.com.tm/en/blog/post/1'
|
4077
|
+
producer:
|
4078
|
+
name: 'Doly Horjun HJ'
|
4079
|
+
url: 'https://gozle.com.tm/'
|
4080
|
+
|
4081
|
+
- regex: 'Quantcastbot/[\d.]+'
|
4082
|
+
name: 'Quantcast'
|
4083
|
+
category: 'Crawler'
|
4084
|
+
url: 'https://www.quantcast.com/bot/'
|
4085
|
+
producer:
|
4086
|
+
name: 'Quantcast Corp.'
|
4087
|
+
url: 'https://www.quantcast.com/'
|
4088
|
+
|
4089
|
+
- regex: 'FontRadar'
|
4090
|
+
name: 'FontRadar'
|
4091
|
+
category: 'Crawler'
|
4092
|
+
url: 'https://www.fontradar.com/'
|
4093
|
+
producer:
|
4094
|
+
name: 'EMDASH SAS'
|
4095
|
+
url: 'https://www.fontradar.com/'
|
4096
|
+
|
4097
|
+
- regex: 'ViberUrlDownloader'
|
4098
|
+
name: 'Viber Url Downloader'
|
4099
|
+
category: 'Service Agent'
|
4100
|
+
url: 'https://www.viber.com/'
|
4101
|
+
producer:
|
4102
|
+
name: 'Viber Media S.à r.l.'
|
4103
|
+
url: 'https://www.viber.com/'
|
4104
|
+
|
4105
|
+
- regex: '^Zeno$'
|
4106
|
+
name: 'Zeno'
|
4107
|
+
category: 'Crawler'
|
4108
|
+
url: 'https://github.com/internetarchive/Zeno'
|
4109
|
+
producer:
|
4110
|
+
name: 'The Internet Archive'
|
4111
|
+
url: 'https://archive.org/'
|
4112
|
+
|
4113
|
+
- regex: 'Barracuda Sentinel'
|
4114
|
+
name: 'Barracuda Sentinel'
|
4115
|
+
category: 'Service Agent'
|
4116
|
+
url: 'https://sentinel.barracudanetworks.com/'
|
4117
|
+
producer:
|
4118
|
+
name: 'Barracuda Networks, Inc.'
|
4119
|
+
url: 'https://www.barracudanetworks.com/'
|
4120
|
+
|
4121
|
+
- regex: 'RuxitSynthetic/[\d.]+'
|
4122
|
+
name: 'RuxitSynthetic'
|
4123
|
+
category: 'Site Monitor'
|
4124
|
+
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
|
4125
|
+
producer:
|
4126
|
+
name: 'Dynatrace LLC'
|
4127
|
+
url: 'https://www.dynatrace.com/'
|
4128
|
+
|
4129
|
+
- regex: 'DynatraceSynthetic/[\d.]+'
|
4130
|
+
name: 'DynatraceSynthetic'
|
4131
|
+
category: 'Site Monitor'
|
4132
|
+
url: 'https://community.dynatrace.com/t5/Troubleshooting/Basic-Commands-for-Synthetic/ta-p/198164'
|
4133
|
+
producer:
|
4134
|
+
name: 'Dynatrace LLC'
|
4135
|
+
url: 'https://www.dynatrace.com/'
|
4136
|
+
|
4137
|
+
- regex: 'sitebulb'
|
4138
|
+
name: 'Sitebulb'
|
4139
|
+
category: 'Crawler'
|
4140
|
+
url: 'https://sitebulb.com/'
|
4141
|
+
producer:
|
4142
|
+
name: 'Sitebulb Limited'
|
4143
|
+
url: 'https://sitebulb.com/'
|
4144
|
+
|
4145
|
+
- regex: 'Monsidobot/[\d.]+'
|
4146
|
+
name: 'Monsidobot'
|
4147
|
+
category: 'Crawler'
|
4148
|
+
url: 'https://monsido.com/bot-html'
|
4149
|
+
producer:
|
4150
|
+
name: 'Monsido LLC'
|
4151
|
+
url: 'https://monsido.com/'
|
4152
|
+
|
4153
|
+
- regex: 'AccompanyBot'
|
4154
|
+
name: 'AccompanyBot'
|
4155
|
+
category: 'Crawler'
|
4156
|
+
url: 'https://www.accompany.com/'
|
4157
|
+
producer:
|
4158
|
+
name: 'Accompani, Inc'
|
4159
|
+
url: 'https://www.accompany.com/'
|
4160
|
+
|
4161
|
+
- regex: 'Ghost Inspector'
|
4162
|
+
name: 'Ghost Inspector'
|
4163
|
+
category: 'Site Monitor'
|
4164
|
+
url: 'https://docs.ghostinspector.com/faq/#how-do-i-detect-ghost-inspector-test-runner-traffic-on-my-site'
|
4165
|
+
producer:
|
4166
|
+
name: 'Ghost Inspector, Inc.'
|
4167
|
+
url: 'https://www.ghostinspector.com/'
|
4168
|
+
|
4169
|
+
- regex: 'Cypress/[\d.]+'
|
4170
|
+
name: 'Cypress'
|
4171
|
+
category: 'Site Monitor'
|
4172
|
+
url: 'https://github.com/cypress-io/cypress'
|
4173
|
+
producer:
|
4174
|
+
name: 'Cypress.io, Inc.'
|
4175
|
+
url: 'https://www.cypress.io/'
|
4176
|
+
|
4177
|
+
- regex: 'Google-Apps-Script'
|
4178
|
+
name: 'Google Apps Script'
|
4179
|
+
category: 'Service Agent'
|
4180
|
+
url: 'https://www.google.com/script/start/'
|
4181
|
+
|
4182
|
+
- regex: 'SiteOne-Crawler/[\d.]+'
|
4183
|
+
name: 'SiteOne Crawler'
|
4184
|
+
category: 'Crawler'
|
4185
|
+
url: 'https://crawler.siteone.io/bot/'
|
4186
|
+
producer:
|
4187
|
+
name: 'SiteOne s.r.o.'
|
4188
|
+
url: 'https://www.siteone.io/'
|
4189
|
+
|
4190
|
+
- regex: 'Detectify'
|
4191
|
+
name: 'Detectify'
|
4192
|
+
category: 'Security Checker'
|
4193
|
+
url: 'https://support.detectify.com/support/solutions/articles/48001049001-how-to-allow-detectify-to-access-your-site'
|
4194
|
+
producer:
|
4195
|
+
name: 'Detectify AB'
|
4196
|
+
url: 'https://detectify.com/'
|
4197
|
+
|
4198
|
+
- regex: 'DomCopBot'
|
4199
|
+
name: 'DomCop Bot'
|
4200
|
+
category: 'Crawler'
|
4201
|
+
url: 'https://www.domcop.com/bot'
|
4202
|
+
producer:
|
4203
|
+
name: 'Axeman Technology Solutions LLP'
|
4204
|
+
url: 'https://axemantech.com/'
|
4205
|
+
|
4206
|
+
- regex: 'Paqlebot/[\d.]+'
|
4207
|
+
name: 'Paqlebot'
|
4208
|
+
category: 'Crawler'
|
4209
|
+
url: 'https://www.paqle.dk/about/paqlebot'
|
4210
|
+
producer:
|
4211
|
+
name: 'Paqle A/S'
|
4212
|
+
url: 'https://www.paqle.dk/'
|
4213
|
+
|
4214
|
+
- regex: 'Wibybot'
|
4215
|
+
name: 'Wibybot'
|
4216
|
+
category: 'Crawler'
|
4217
|
+
url: 'https://www.wiby.me/'
|
4218
|
+
|
4219
|
+
- regex: 'Synapse'
|
4220
|
+
name: 'Synapse'
|
4221
|
+
category: 'Crawler'
|
4222
|
+
url: 'https://github.com/matrix-org/synapse'
|
4223
|
+
|
4224
|
+
- regex: 'OSZKbot/[\d.]+'
|
4225
|
+
name: 'OSZKbot'
|
4226
|
+
category: 'Crawler'
|
4227
|
+
url: 'http://mekosztaly.oszk.hu/mia/'
|
4228
|
+
producer:
|
4229
|
+
name: 'National Szechenyi Library'
|
4230
|
+
url: 'https://webarchivum.oszk.hu/'
|
4231
|
+
|
4232
|
+
- regex: 'ZoomBot'
|
4233
|
+
name: 'ZoomBot'
|
4234
|
+
category: 'Crawler'
|
4235
|
+
url: 'https://suite.seozoom.it/bot.html'
|
4236
|
+
producer:
|
4237
|
+
name: 'SEO Cube S.r.l.'
|
4238
|
+
url: 'https://www.seocube.it/'
|
4239
|
+
|
4240
|
+
- regex: 'RavenCrawler/[\d.]+'
|
4241
|
+
name: 'RavenCrawler'
|
4242
|
+
category: 'Crawler'
|
4243
|
+
url: 'https://raventools.com/site-auditor/'
|
4244
|
+
producer:
|
4245
|
+
name: 'TapClicks, Inc.'
|
4246
|
+
url: 'https://www.tapclicks.com/'
|
4247
|
+
|
4248
|
+
- regex: 'KadoBot'
|
4249
|
+
name: 'KadoBot'
|
4250
|
+
category: 'Crawler'
|
4251
|
+
url: 'https://www.kadolijst.nl/bot'
|
4252
|
+
producer:
|
4253
|
+
name: 'Kadolijst'
|
4254
|
+
url: 'https://www.kadolijst.nl/'
|
4255
|
+
|
4256
|
+
- regex: 'Dubbotbot/[\d.]+'
|
4257
|
+
name: 'Dubbotbot'
|
4258
|
+
category: 'Crawler'
|
4259
|
+
url: 'https://help.dubbot.com/en/articles/6746594-example-custom-user-agent'
|
4260
|
+
producer:
|
4261
|
+
name: 'DubBot'
|
4262
|
+
url: 'https://dubbot.com/'
|
4263
|
+
|
4264
|
+
- regex: 'Swiftbot/[\d.]+'
|
4265
|
+
name: 'Swiftbot'
|
4266
|
+
category: 'Crawler'
|
4267
|
+
url: 'https://swiftype.com/swiftbot'
|
4268
|
+
producer:
|
4269
|
+
name: 'Elasticsearch, B.V.'
|
4270
|
+
url: 'https://www.elastic.co/'
|
4271
|
+
|
4272
|
+
- regex: 'EyeMonIT'
|
4273
|
+
name: 'EyeMonit'
|
4274
|
+
category: 'Site Monitor'
|
4275
|
+
url: 'https://eyemonit.com/'
|
4276
|
+
producer:
|
4277
|
+
name: 'EyeMonit'
|
4278
|
+
url: 'https://eyemonit.com/'
|
4279
|
+
|
4280
|
+
- regex: 'ThousandEyes'
|
4281
|
+
name: 'ThousandEyes'
|
4282
|
+
category: 'Site Monitor'
|
4283
|
+
url: 'https://www.thousandeyes.com/'
|
4284
|
+
producer:
|
4285
|
+
name: 'Cisco Systems, Inc.'
|
4286
|
+
url: 'https://www.cisco.com/'
|
4287
|
+
|
4288
|
+
- regex: 'OmtrBot/[\d.]+'
|
4289
|
+
name: 'OmtrBot'
|
4290
|
+
category: 'Site Monitor'
|
4291
|
+
|
4292
|
+
- regex: 'WebMon/[\d.]+'
|
4293
|
+
name: 'WebMon'
|
4294
|
+
category: 'Site Monitor'
|
4295
|
+
|
4296
|
+
- regex: 'AdsTxtCrawlerTP/[\d.]+'
|
4297
|
+
name: 'AdsTxtCrawlerTP'
|
4298
|
+
category: 'Crawler'
|
4299
|
+
|
4300
|
+
- regex: 'fragFINN'
|
4301
|
+
name: 'fragFINN'
|
4302
|
+
category: 'Crawler'
|
4303
|
+
url: 'https://www.fragfinn.de/'
|
4304
|
+
producer:
|
4305
|
+
name: 'fragFINN e.V.'
|
4306
|
+
url: 'https://www.fragfinn.de/'
|
4307
|
+
|
4308
|
+
- regex: 'Clickagy'
|
4309
|
+
name: 'Clickagy'
|
4310
|
+
category: 'Crawler'
|
4311
|
+
url: 'https://www.clickagy.com/'
|
4312
|
+
producer:
|
4313
|
+
name: 'Clickagy, LLC'
|
4314
|
+
url: 'https://www.clickagy.com/'
|
4315
|
+
|
4316
|
+
- regex: 'kiwitcms-gitops/[\d.]+'
|
4317
|
+
name: 'Kiwi TCMS GitOps'
|
4318
|
+
category: 'Service Agent'
|
4319
|
+
url: 'https://kiwitcms.org'
|
4320
|
+
producer:
|
4321
|
+
name: 'Open Technologies Bulgaria, Ltd.'
|
4322
|
+
url: 'https://kiwitcms.org'
|
4323
|
+
|
4324
|
+
- regex: 'webtru_crawler'
|
4325
|
+
name: 'webtru'
|
4326
|
+
category: 'Crawler'
|
4327
|
+
url: 'https://webtru.io/'
|
4328
|
+
producer:
|
4329
|
+
name: 'DataSign Inc.'
|
4330
|
+
url: 'https://datasign.jp/'
|
4331
|
+
|
4332
|
+
- regex: 'URLSuMaBot'
|
4333
|
+
name: 'URLSuMaBot'
|
4334
|
+
category: 'Crawler'
|
4335
|
+
url: 'https://www.urlsuma.de/'
|
4336
|
+
|
4337
|
+
- regex: '360JK yunjiankong'
|
4338
|
+
name: '360JK'
|
4339
|
+
category: 'Site Monitor'
|
4340
|
+
url: 'http://jk.cloud.360.cn/'
|
4341
|
+
producer:
|
4342
|
+
name: '360 Security Technology Inc.'
|
4343
|
+
url: 'https://www.360.cn/'
|
4344
|
+
|
4345
|
+
- regex: 'UCSBNetworkMeasurement'
|
4346
|
+
name: 'UCSB Network Measurement'
|
4347
|
+
category: 'Crawler'
|
4348
|
+
url: 'https://www.it.ucsb.edu/'
|
4349
|
+
producer:
|
4350
|
+
name: 'University of California, Santa Barbara'
|
4351
|
+
url: 'https://www.it.ucsb.edu/'
|
4352
|
+
|
4353
|
+
- regex: 'Plesk screenshot bot'
|
4354
|
+
name: 'Plesk Screenshot Service'
|
4355
|
+
category: 'Service Agent'
|
4356
|
+
url: 'https://support.plesk.com/hc/en-us/articles/13302778306199-What-is-Plesk-Screenshot-Service'
|
4357
|
+
producer:
|
4358
|
+
name: 'Plesk International GmbH'
|
4359
|
+
url: 'https://www.plesk.com/'
|
4360
|
+
|
4361
|
+
- regex: 'Who\.is'
|
4362
|
+
name: 'Who.is Bot'
|
4363
|
+
category: 'Crawler'
|
4364
|
+
url: 'https://who.is/'
|
4365
|
+
|
4366
|
+
- regex: 'Probely'
|
4367
|
+
name: 'Probely'
|
4368
|
+
category: 'Security Checker'
|
4369
|
+
url: 'https://probely.com/sos/'
|
4370
|
+
producer:
|
4371
|
+
name: 'Probely - Soluções de Cibersegurança, S.A.'
|
4372
|
+
url: 'https://probely.com/'
|
4373
|
+
|
4374
|
+
- regex: 'Uptimia(?:/[\d.]+)?'
|
4375
|
+
name: 'Uptimia'
|
4376
|
+
category: 'Site Monitor'
|
4377
|
+
url: 'https://www.uptimia.com/'
|
4378
|
+
producer:
|
4379
|
+
name: 'JJ Online GmbH'
|
4380
|
+
url: 'https://www.uptimia.com/'
|
4381
|
+
|
4382
|
+
- regex: '2GDPR/[\d.]+'
|
4383
|
+
name: '2GDPR'
|
4384
|
+
category: 'Service Agent'
|
4385
|
+
url: 'https://2gdpr.com/tos'
|
4386
|
+
producer:
|
4387
|
+
name: '2GDPR'
|
4388
|
+
url: 'https://2gdpr.com/'
|
4389
|
+
|
4390
|
+
- regex: 'abuse\.xmco\.fr'
|
4391
|
+
name: 'Serenety'
|
4392
|
+
category: 'Security Checker'
|
4393
|
+
url: 'https://abuse.xmco.fr/'
|
4394
|
+
producer:
|
4395
|
+
name: 'XMCO, SASU'
|
4396
|
+
url: 'https://www.xmco.fr/'
|
4397
|
+
|
4398
|
+
- regex: 'CheckHost'
|
4399
|
+
name: 'CheckHost'
|
4400
|
+
category: 'Site Monitor'
|
4401
|
+
url: 'https://check-host.net/'
|
4402
|
+
producer:
|
4403
|
+
name: 'CheckHost'
|
4404
|
+
url: 'https://check-host.net/'
|
4405
|
+
|
4406
|
+
- regex: 'LAC_IAHarvester/[\d.]+'
|
4407
|
+
name: 'LAC IA Harvester'
|
4408
|
+
category: 'Crawler'
|
4409
|
+
url: 'https://library-archives.canada.ca/eng/services/government-canada/web-social-media-preservation-program/Pages/web-archive.aspx'
|
4410
|
+
producer:
|
4411
|
+
name: 'Library and Archives Canada'
|
4412
|
+
url: 'https://library-archives.canada.ca/'
|
4413
|
+
|
4414
|
+
- regex: 'InsytfulBot/[\d.]+'
|
4415
|
+
name: 'InsytfulBot'
|
4416
|
+
category: 'Crawler'
|
4417
|
+
url: 'https://www.insytful.com/'
|
4418
|
+
producer:
|
4419
|
+
name: 'Zengenti Limited'
|
4420
|
+
url: 'https://www.zengenti.com/'
|
4421
|
+
|
4422
|
+
- regex: 'statista\.com'
|
4423
|
+
name: 'Statista'
|
4424
|
+
category: 'Crawler'
|
4425
|
+
url: 'https://www.statista.com/'
|
4426
|
+
producer:
|
4427
|
+
name: 'Statista, Inc.'
|
4428
|
+
url: 'https://www.statista.com/'
|
4429
|
+
|
4430
|
+
- regex: 'SubstackContentFetch/[\d.]+'
|
4431
|
+
name: 'Substack Content Fetch'
|
4432
|
+
category: 'Crawler'
|
4433
|
+
url: 'https://substack.com/'
|
4434
|
+
producer:
|
4435
|
+
name: 'Substack, Inc.'
|
4436
|
+
url: 'https://substack.com/'
|
4437
|
+
|
4438
|
+
- regex: '^ds9'
|
4439
|
+
name: 'Deep SEARCH 9'
|
4440
|
+
category: 'Crawler'
|
4441
|
+
url: 'https://www.copyright.com/blog/ccc-expands-corporate-solutions-offering-with-new-technology/'
|
4442
|
+
producer:
|
4443
|
+
name: 'Copyright Clearance Center, Inc.'
|
4444
|
+
url: 'https://www.copyright.com/'
|
4445
|
+
|
4446
|
+
- regex: 'LiveJournal\.com'
|
4447
|
+
name: 'LiveJournal'
|
4448
|
+
url: 'https://www.livejournal.com/'
|
4449
|
+
category: 'Feed Fetcher'
|
4450
|
+
producer:
|
4451
|
+
name: 'ООО "СИМ"'
|
4452
|
+
url: 'https://www.livejournal.com/'
|
4453
|
+
|
4454
|
+
- regex: 'bitdiscovery'
|
4455
|
+
name: 'Tenable.asm'
|
4456
|
+
category: 'Security Checker'
|
4457
|
+
url: 'https://bitdiscovery.com/'
|
4458
|
+
producer:
|
4459
|
+
name: 'Tenable, Inc.'
|
4460
|
+
url: 'https://www.tenable.com/'
|
4461
|
+
|
4462
|
+
- regex: 'Castopod/[\d.]+'
|
4463
|
+
name: 'Castopod'
|
4464
|
+
category: 'Crawler'
|
4465
|
+
url: 'https://www.castopod.org/'
|
4466
|
+
|
4467
|
+
- regex: 'Elastic/Synthetics'
|
4468
|
+
name: 'Elastic Synthetics'
|
4469
|
+
category: 'Site Monitor'
|
4470
|
+
url: 'https://github.com/elastic/synthetics'
|
4471
|
+
producer:
|
4472
|
+
name: 'Elasticsearch B.V.'
|
4473
|
+
url: 'https://www.elastic.co/'
|
4474
|
+
|
4475
|
+
- regex: 'WDG_Validator/[\d.]+'
|
4476
|
+
name: 'WDG HTML Validator'
|
4477
|
+
category: 'Validator'
|
4478
|
+
url: 'http://www.htmlhelp.com/tools/validator/'
|
4479
|
+
|
4480
|
+
- regex: 'scan@aegis.network'
|
4481
|
+
name: 'Aegis'
|
4482
|
+
category: 'Crawler'
|
4483
|
+
url: 'https://web.archive.org/web/20180910002802/http://www.aegis.network/'
|
4484
|
+
|
4485
|
+
- regex: 'CrawlyProjectCrawler/[\d.]+'
|
4486
|
+
name: 'Crawly Project'
|
4487
|
+
category: 'Crawler'
|
4488
|
+
url: 'https://web.archive.org/web/20240326141952/https://crawlyproject.digitaldragon.dev/'
|
4489
|
+
|
4490
|
+
- regex: 'BDFetch'
|
4491
|
+
name: 'BDFetch'
|
4492
|
+
category: 'Crawler'
|
4493
|
+
url: 'https://web.archive.org/web/20130821043949/http://www.branddimensions.com/'
|
4494
|
+
|
4495
|
+
- regex: 'PunkMap'
|
4496
|
+
name: 'Punk Map'
|
4497
|
+
category: 'Security Checker'
|
4498
|
+
url: 'https://github.com/openeasm/punkmap'
|
4499
|
+
|
4500
|
+
- regex: 'GenomeCrawlerd/[\d.]+'
|
4501
|
+
name: 'Deepfield Genome'
|
4502
|
+
category: 'Crawler'
|
4503
|
+
url: 'https://www.nokia.com/networks/ip-networks/deepfield/genome/'
|
4504
|
+
producer:
|
4505
|
+
name: 'Nokia Corporation'
|
4506
|
+
url: 'https://www.nokia.com/'
|
4507
|
+
|
4508
|
+
- regex: 'Gaisbot/[\d.]+'
|
4509
|
+
name: 'Gaisbot'
|
4510
|
+
category: 'Crawler'
|
4511
|
+
url: 'https://web.archive.org/web/20090604121511/https://gais.cs.ccu.edu.tw/robot.php'
|
4512
|
+
|
4513
|
+
- regex: 'FAST-WebCrawler/[\d.]+'
|
4514
|
+
name: 'AlltheWeb'
|
4515
|
+
category: 'Crawler'
|
4516
|
+
url: 'https://web.archive.org/web/20041020050801/http://www.alltheweb.com/help/webmaster/crawler'
|
4517
|
+
|
4518
|
+
- regex: 'ducks\.party'
|
4519
|
+
name: 'ducks.party'
|
4520
|
+
category: 'Security Checker'
|
4521
|
+
url: 'https://ducks.party/'
|
4522
|
+
|
4523
|
+
- regex: 'DepSpid/[\d.]+'
|
4524
|
+
name: 'DepSpid'
|
4525
|
+
category: 'Crawler'
|
4526
|
+
url: 'https://web.archive.org/web/20080321224033/http://about.depspid.net/'
|
4527
|
+
|
4528
|
+
- regex: 'Website-info\.net'
|
4529
|
+
name: 'Website-info'
|
4530
|
+
category: 'Crawler'
|
4531
|
+
url: 'https://website-info.net/robot'
|
4532
|
+
producer:
|
4533
|
+
name: 'Meins und Vogel GmbH'
|
4534
|
+
url: 'https://muv.com/'
|
4535
|
+
|
4536
|
+
# Generic bots
|
4537
|
+
- regex: 'nuhk|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr\.com|teoma|oegp|http%20client|htdig|mogimogi|larbin|scrubby|searchsight|semanticdiscovery|snappy|vortex(?!(?: Build|Plus| CM62| HD65))|zeal(?!ot)|dataparksearch|findlinks|BrowserMob|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|7Siters|centuryb\.o\.t9|InterNaetBoten|EasyBib AutoCite|Bidtellect|tomnomnom/meg|cortex|Re-re Studio|adreview|AHC/|NameOfAgent|Request-Promise|ALittle Client|Hello,? world|wp_is_mobile|0xAbyssalDoesntExist|Anarchy99|^revolt|nvd0rz|xfa1|Hakai|gbrmss|fuck-your-hp|IDBTE4M CODE87|Antoine|Insomania|Hells-Net|b3astmode|Linux Gnu \(cow\)|Test Certificate Info|iplabel|Magellan|TheSafex?Internetx?Search|Searcherweb|kirkland-signature|LinkChain|survey-security-dot-txt|infrawatch|Time/|r00ts3c-owned-you|nvdorz|Root Slut|NiggaBalls|BotPoke|GlobalWebSearch|^xenu|^(?:chrome|firefox|Abcd|Dark|KvshClient|url|Zeus|ZmEu)$'
|
4538
|
+
name: 'Generic Bot'
|
4539
|
+
|
4540
|
+
# Generic detections
|
4541
|
+
- regex: '[a-z0-9_-]*(?:(?<!cu|power[ _]|m[ _])bot(?![ _]TAB|[ _]?5[0-9]|[ _]Senior|[ _]Junior)|analyzer|appengine|archiver?|checker|collector|crawl|crawler|(?<!node-|uclient-|Mikrotik/\d\.[x\d] |electron-)fetch(?:er)?|indexer|inspector|monitor|(?<!Microsoft |banshee-)project(?!or)|(?<!Google Wap |Blue |SpeedMode; )proxy|research|resolver|robots|(?<!Cam)scanner|scraper|script|searcher|(?<!-)security|spider(?! 8)|study|transcoder|uptime|user[ _]?agent|validator)(?:[^a-z]|$)'
|
1234
4542
|
name: 'Generic Bot'
|