@j0hanz/fetch-url-mcp 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -21
- package/dist/cli.d.ts +3 -3
- package/dist/cli.js +15 -8
- package/dist/http/auth.d.ts +6 -6
- package/dist/http/auth.js +78 -23
- package/dist/http/health.d.ts +1 -2
- package/dist/http/health.js +7 -18
- package/dist/http/helpers.d.ts +3 -11
- package/dist/http/helpers.js +28 -26
- package/dist/http/native.d.ts +0 -1
- package/dist/http/native.js +63 -41
- package/dist/http/rate-limit.d.ts +2 -2
- package/dist/http/rate-limit.js +11 -16
- package/dist/index.d.ts +0 -1
- package/dist/index.js +17 -20
- package/dist/{markdown-cleanup.d.ts → lib/content.d.ts} +4 -2
- package/dist/lib/content.js +1356 -0
- package/dist/lib/core.d.ts +253 -0
- package/dist/lib/core.js +1228 -0
- package/dist/{tool-pipeline.d.ts → lib/fetch-pipeline.d.ts} +1 -3
- package/dist/{tool-pipeline.js → lib/fetch-pipeline.js} +18 -44
- package/dist/{fetch.d.ts → lib/http.d.ts} +7 -9
- package/dist/{fetch.js → lib/http.js} +721 -1004
- package/dist/lib/mcp-tools.d.ts +28 -0
- package/dist/lib/mcp-tools.js +107 -0
- package/dist/{tool-progress.d.ts → lib/progress.d.ts} +0 -2
- package/dist/{tool-progress.js → lib/progress.js} +9 -14
- package/dist/lib/task-handlers.d.ts +5 -0
- package/dist/{mcp.js → lib/task-handlers.js} +95 -31
- package/dist/lib/url.d.ts +70 -0
- package/dist/lib/url.js +686 -0
- package/dist/lib/utils.d.ts +58 -0
- package/dist/lib/utils.js +304 -0
- package/dist/{prompts.d.ts → prompts/index.d.ts} +0 -1
- package/dist/{prompts.js → prompts/index.js} +1 -2
- package/dist/{resources.d.ts → resources/index.d.ts} +0 -1
- package/dist/{resources.js → resources/index.js} +87 -64
- package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -1
- package/dist/{instructions.js → resources/instructions.js} +5 -3
- package/dist/schemas/inputs.d.ts +7 -0
- package/dist/schemas/inputs.js +24 -0
- package/dist/schemas/outputs.d.ts +23 -0
- package/dist/schemas/outputs.js +77 -0
- package/dist/server.d.ts +0 -1
- package/dist/server.js +26 -25
- package/dist/tasks/execution.d.ts +0 -1
- package/dist/tasks/execution.js +106 -70
- package/dist/tasks/manager.d.ts +11 -3
- package/dist/tasks/manager.js +97 -73
- package/dist/tasks/owner.d.ts +3 -3
- package/dist/tasks/owner.js +2 -2
- package/dist/tasks/tool-registry.d.ts +11 -0
- package/dist/tasks/tool-registry.js +13 -0
- package/dist/tools/fetch-url.d.ts +28 -0
- package/dist/{tools.js → tools/fetch-url.js} +95 -147
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +4 -0
- package/dist/transform/html-translators.d.ts +1 -0
- package/dist/transform/html-translators.js +454 -0
- package/dist/transform/metadata.d.ts +4 -0
- package/dist/transform/metadata.js +183 -0
- package/dist/transform/transform.d.ts +0 -1
- package/dist/transform/transform.js +44 -679
- package/dist/transform/types.d.ts +9 -12
- package/dist/transform/types.js +0 -1
- package/dist/transform/worker-pool.d.ts +0 -1
- package/dist/transform/worker-pool.js +7 -16
- package/dist/transform/workers/shared.d.ts +7 -0
- package/dist/transform/workers/shared.js +130 -0
- package/dist/transform/workers/transform-child.d.ts +0 -1
- package/dist/transform/workers/transform-child.js +5 -135
- package/dist/transform/workers/transform-worker.d.ts +0 -1
- package/dist/transform/workers/transform-worker.js +7 -128
- package/package.json +11 -7
- package/dist/cache.d.ts +0 -54
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js +0 -261
- package/dist/cache.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts +0 -141
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js +0 -473
- package/dist/config.js.map +0 -1
- package/dist/crypto.d.ts +0 -4
- package/dist/crypto.d.ts.map +0 -1
- package/dist/crypto.js +0 -56
- package/dist/crypto.js.map +0 -1
- package/dist/dom-noise-removal.d.ts +0 -2
- package/dist/dom-noise-removal.d.ts.map +0 -1
- package/dist/dom-noise-removal.js +0 -494
- package/dist/dom-noise-removal.js.map +0 -1
- package/dist/download.d.ts +0 -4
- package/dist/download.d.ts.map +0 -1
- package/dist/download.js +0 -106
- package/dist/download.js.map +0 -1
- package/dist/errors.d.ts +0 -11
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js +0 -65
- package/dist/errors.js.map +0 -1
- package/dist/examples/mcp-fetch-url-client.js +0 -329
- package/dist/examples/mcp-fetch-url-client.js.map +0 -1
- package/dist/fetch-content.d.ts +0 -5
- package/dist/fetch-content.d.ts.map +0 -1
- package/dist/fetch-content.js +0 -164
- package/dist/fetch-content.js.map +0 -1
- package/dist/fetch-stream.d.ts +0 -5
- package/dist/fetch-stream.d.ts.map +0 -1
- package/dist/fetch-stream.js +0 -29
- package/dist/fetch-stream.js.map +0 -1
- package/dist/fetch.d.ts.map +0 -1
- package/dist/fetch.js.map +0 -1
- package/dist/host-normalization.d.ts +0 -2
- package/dist/host-normalization.d.ts.map +0 -1
- package/dist/host-normalization.js +0 -91
- package/dist/host-normalization.js.map +0 -1
- package/dist/http/auth.d.ts.map +0 -1
- package/dist/http/auth.js.map +0 -1
- package/dist/http/health.d.ts.map +0 -1
- package/dist/http/health.js.map +0 -1
- package/dist/http/helpers.d.ts.map +0 -1
- package/dist/http/helpers.js.map +0 -1
- package/dist/http/native.d.ts.map +0 -1
- package/dist/http/native.js.map +0 -1
- package/dist/http/rate-limit.d.ts.map +0 -1
- package/dist/http/rate-limit.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/instructions.d.ts.map +0 -1
- package/dist/instructions.js.map +0 -1
- package/dist/ip-blocklist.d.ts +0 -9
- package/dist/ip-blocklist.d.ts.map +0 -1
- package/dist/ip-blocklist.js +0 -79
- package/dist/ip-blocklist.js.map +0 -1
- package/dist/json.d.ts +0 -2
- package/dist/json.d.ts.map +0 -1
- package/dist/json.js +0 -45
- package/dist/json.js.map +0 -1
- package/dist/language-detection.d.ts +0 -3
- package/dist/language-detection.d.ts.map +0 -1
- package/dist/language-detection.js +0 -355
- package/dist/language-detection.js.map +0 -1
- package/dist/markdown-cleanup.d.ts.map +0 -1
- package/dist/markdown-cleanup.js +0 -534
- package/dist/markdown-cleanup.js.map +0 -1
- package/dist/mcp-validator.d.ts +0 -17
- package/dist/mcp-validator.d.ts.map +0 -1
- package/dist/mcp-validator.js +0 -45
- package/dist/mcp-validator.js.map +0 -1
- package/dist/mcp.d.ts +0 -4
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/observability.d.ts +0 -23
- package/dist/observability.d.ts.map +0 -1
- package/dist/observability.js +0 -238
- package/dist/observability.js.map +0 -1
- package/dist/prompts.d.ts.map +0 -1
- package/dist/prompts.js.map +0 -1
- package/dist/resources.d.ts.map +0 -1
- package/dist/resources.js.map +0 -1
- package/dist/server-tuning.d.ts +0 -15
- package/dist/server-tuning.d.ts.map +0 -1
- package/dist/server-tuning.js +0 -49
- package/dist/server-tuning.js.map +0 -1
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js.map +0 -1
- package/dist/session.d.ts +0 -42
- package/dist/session.d.ts.map +0 -1
- package/dist/session.js +0 -255
- package/dist/session.js.map +0 -1
- package/dist/tasks/execution.d.ts.map +0 -1
- package/dist/tasks/execution.js.map +0 -1
- package/dist/tasks/manager.d.ts.map +0 -1
- package/dist/tasks/manager.js.map +0 -1
- package/dist/tasks/owner.d.ts.map +0 -1
- package/dist/tasks/owner.js.map +0 -1
- package/dist/timer-utils.d.ts +0 -6
- package/dist/timer-utils.d.ts.map +0 -1
- package/dist/timer-utils.js +0 -27
- package/dist/timer-utils.js.map +0 -1
- package/dist/tool-errors.d.ts +0 -12
- package/dist/tool-errors.d.ts.map +0 -1
- package/dist/tool-errors.js +0 -55
- package/dist/tool-errors.js.map +0 -1
- package/dist/tool-pipeline.d.ts.map +0 -1
- package/dist/tool-pipeline.js.map +0 -1
- package/dist/tool-progress.d.ts.map +0 -1
- package/dist/tool-progress.js.map +0 -1
- package/dist/tools.d.ts +0 -54
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/transform/transform.d.ts.map +0 -1
- package/dist/transform/transform.js.map +0 -1
- package/dist/transform/types.d.ts.map +0 -1
- package/dist/transform/types.js.map +0 -1
- package/dist/transform/worker-pool.d.ts.map +0 -1
- package/dist/transform/worker-pool.js.map +0 -1
- package/dist/transform/workers/transform-child.d.ts.map +0 -1
- package/dist/transform/workers/transform-child.js.map +0 -1
- package/dist/transform/workers/transform-worker.d.ts.map +0 -1
- package/dist/transform/workers/transform-worker.js.map +0 -1
- package/dist/type-guards.d.ts +0 -16
- package/dist/type-guards.d.ts.map +0 -1
- package/dist/type-guards.js +0 -13
- package/dist/type-guards.js.map +0 -1
|
@@ -1,551 +1,277 @@
|
|
|
1
1
|
import { Buffer } from 'node:buffer';
|
|
2
2
|
import { randomUUID } from 'node:crypto';
|
|
3
3
|
import diagnosticsChannel from 'node:diagnostics_channel';
|
|
4
|
-
import
|
|
4
|
+
import {} from 'node:http';
|
|
5
5
|
import { isIP } from 'node:net';
|
|
6
|
+
import { posix as pathPosix } from 'node:path';
|
|
6
7
|
import { performance } from 'node:perf_hooks';
|
|
7
8
|
import { PassThrough, Readable, Transform } from 'node:stream';
|
|
8
9
|
import { buffer as consumeBuffer } from 'node:stream/consumers';
|
|
9
10
|
import { finished, pipeline } from 'node:stream/promises';
|
|
11
|
+
import {} from 'node:stream/web';
|
|
12
|
+
import tls from 'node:tls';
|
|
10
13
|
import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
|
|
11
14
|
import { Agent } from 'undici';
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
warn: logWarn,
|
|
22
|
-
error: logError,
|
|
15
|
+
import { z } from 'zod';
|
|
16
|
+
import { get as cacheGet, config, getOperationId, getRequestId, logDebug, logError, logWarn, parseCachedPayload, redactUrl, resolveCachedPayloadContent, } from './core.js';
|
|
17
|
+
import { BLOCKED_HOST_SUFFIXES, createDnsPreflight, IpBlocker, RawUrlTransformer, SafeDnsResolver, UrlNormalizer, VALIDATION_ERROR_CODE, } from './url.js';
|
|
18
|
+
import { createErrorWithCode, FetchError, isError, isObject, isSystemError, toError, } from './utils.js';
|
|
19
|
+
const FILENAME_RULES = {
|
|
20
|
+
MAX_LEN: 200,
|
|
21
|
+
UNSAFE_CHARS: /[<>:"/\\|?*\p{C}]/gu,
|
|
22
|
+
WHITESPACE: /\s+/g,
|
|
23
|
+
EXTENSIONS: /\.(html?|php|aspx?|jsp)$/i,
|
|
23
24
|
};
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
const defaultFetch = (input, init) => globalThis.fetch(input, init);
|
|
32
|
-
function isLocalFetchAllowed() {
|
|
33
|
-
return process.env['ALLOW_LOCAL_FETCH'] === 'true';
|
|
34
|
-
}
|
|
35
|
-
class IpBlocker {
|
|
36
|
-
security;
|
|
37
|
-
blockList = createDefaultBlockList();
|
|
38
|
-
constructor(security) {
|
|
39
|
-
this.security = security;
|
|
40
|
-
}
|
|
41
|
-
isBlockedIp(candidate) {
|
|
42
|
-
const normalized = candidate.trim().toLowerCase();
|
|
43
|
-
if (isCloudMetadataHost(normalized))
|
|
44
|
-
return true;
|
|
45
|
-
if (isLocalFetchAllowed())
|
|
46
|
-
return false;
|
|
47
|
-
if (!normalized)
|
|
48
|
-
return false;
|
|
49
|
-
if (this.security.blockedHosts.has(normalized))
|
|
50
|
-
return true;
|
|
51
|
-
const normalizedIp = normalizeIpForBlockList(normalized);
|
|
52
|
-
return normalizedIp
|
|
53
|
-
? this.blockList.check(normalizedIp.ip, normalizedIp.family)
|
|
54
|
-
: false;
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
|
|
58
|
-
function createValidationError(message) {
|
|
59
|
-
return createErrorWithCode(message, VALIDATION_ERROR_CODE);
|
|
60
|
-
}
|
|
61
|
-
const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
|
|
62
|
-
// This list is not exhaustive but covers the most common cloud metadata endpoints.
|
|
63
|
-
const CLOUD_METADATA_HOSTS = new Set([
|
|
64
|
-
'169.254.169.254', // AWS / GCP / Azure
|
|
65
|
-
'metadata.google.internal', // GCP
|
|
66
|
-
'100.100.100.200', // Alibaba Cloud
|
|
67
|
-
'fd00:ec2::254', // AWS IPv6
|
|
68
|
-
]);
|
|
69
|
-
function isCloudMetadataHost(hostname) {
|
|
70
|
-
const lowered = hostname.toLowerCase();
|
|
71
|
-
if (CLOUD_METADATA_HOSTS.has(lowered))
|
|
72
|
-
return true;
|
|
73
|
-
const normalized = normalizeIpForBlockList(lowered);
|
|
74
|
-
return normalized !== null && CLOUD_METADATA_HOSTS.has(normalized.ip);
|
|
75
|
-
}
|
|
76
|
-
class UrlNormalizer {
|
|
77
|
-
constants;
|
|
78
|
-
security;
|
|
79
|
-
ipBlocker;
|
|
80
|
-
blockedHostSuffixes;
|
|
81
|
-
constructor(constants, security, ipBlocker, blockedHostSuffixes) {
|
|
82
|
-
this.constants = constants;
|
|
83
|
-
this.security = security;
|
|
84
|
-
this.ipBlocker = ipBlocker;
|
|
85
|
-
this.blockedHostSuffixes = blockedHostSuffixes;
|
|
86
|
-
}
|
|
87
|
-
normalize(urlString) {
|
|
88
|
-
const trimmedUrl = this.requireTrimmedUrl(urlString);
|
|
89
|
-
if (trimmedUrl.length > this.constants.maxUrlLength) {
|
|
90
|
-
throw createValidationError(`URL exceeds maximum length of ${this.constants.maxUrlLength} characters`);
|
|
91
|
-
}
|
|
92
|
-
let url;
|
|
93
|
-
try {
|
|
94
|
-
url = new URL(trimmedUrl);
|
|
95
|
-
}
|
|
96
|
-
catch {
|
|
97
|
-
throw createValidationError('Invalid URL format');
|
|
98
|
-
}
|
|
99
|
-
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
100
|
-
throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
|
|
101
|
-
}
|
|
102
|
-
if (url.username || url.password) {
|
|
103
|
-
throw createValidationError('URLs with embedded credentials are not allowed');
|
|
104
|
-
}
|
|
105
|
-
const hostname = this.normalizeHostname(url);
|
|
106
|
-
this.assertHostnameAllowed(hostname);
|
|
107
|
-
url.hostname = hostname;
|
|
108
|
-
return { normalizedUrl: url.href, hostname };
|
|
109
|
-
}
|
|
110
|
-
validateAndNormalize(urlString) {
|
|
111
|
-
return this.normalize(urlString).normalizedUrl;
|
|
112
|
-
}
|
|
113
|
-
requireTrimmedUrl(urlString) {
|
|
114
|
-
if (!urlString || typeof urlString !== 'string') {
|
|
115
|
-
throw createValidationError('URL is required');
|
|
116
|
-
}
|
|
117
|
-
const trimmed = urlString.trim();
|
|
118
|
-
if (!trimmed)
|
|
119
|
-
throw createValidationError('URL cannot be empty');
|
|
120
|
-
return trimmed;
|
|
121
|
-
}
|
|
122
|
-
normalizeHostname(url) {
|
|
123
|
-
const hostname = url.hostname.toLowerCase().replace(/\.+$/, '');
|
|
124
|
-
if (!hostname) {
|
|
125
|
-
throw createValidationError('URL must have a valid hostname');
|
|
126
|
-
}
|
|
127
|
-
return hostname;
|
|
128
|
-
}
|
|
129
|
-
assertHostnameAllowed(hostname) {
|
|
130
|
-
this.assertNotBlockedHost(hostname);
|
|
131
|
-
this.assertNotBlockedIp(hostname);
|
|
132
|
-
this.assertNotBlockedHostnameSuffix(hostname);
|
|
133
|
-
}
|
|
134
|
-
assertNotBlockedHost(hostname) {
|
|
135
|
-
if (isCloudMetadataHost(hostname)) {
|
|
136
|
-
throw createValidationError(`Blocked host: ${hostname}. Cloud metadata endpoints are not allowed`);
|
|
137
|
-
}
|
|
138
|
-
if (isLocalFetchAllowed())
|
|
139
|
-
return;
|
|
140
|
-
if (!this.security.blockedHosts.has(hostname))
|
|
141
|
-
return;
|
|
142
|
-
throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
|
|
143
|
-
}
|
|
144
|
-
assertNotBlockedIp(hostname) {
|
|
145
|
-
if (isCloudMetadataHost(hostname)) {
|
|
146
|
-
throw createValidationError(`Blocked IP range: ${hostname}. Cloud metadata endpoints are not allowed`);
|
|
147
|
-
}
|
|
148
|
-
if (isLocalFetchAllowed())
|
|
149
|
-
return;
|
|
150
|
-
if (!this.ipBlocker.isBlockedIp(hostname))
|
|
151
|
-
return;
|
|
152
|
-
throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
|
|
153
|
-
}
|
|
154
|
-
assertNotBlockedHostnameSuffix(hostname) {
|
|
155
|
-
const blocked = this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
|
|
156
|
-
if (!blocked)
|
|
157
|
-
return;
|
|
158
|
-
throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
|
|
159
|
-
}
|
|
25
|
+
function sanitizeString(input) {
|
|
26
|
+
return input
|
|
27
|
+
.toLowerCase()
|
|
28
|
+
.replace(FILENAME_RULES.UNSAFE_CHARS, '')
|
|
29
|
+
.replace(FILENAME_RULES.WHITESPACE, '-')
|
|
30
|
+
.replace(/-+/g, '-')
|
|
31
|
+
.replace(/(?:^-|-$)/g, '');
|
|
160
32
|
}
|
|
161
|
-
function
|
|
162
|
-
const
|
|
163
|
-
if (
|
|
33
|
+
function resolveUrlFilenameCandidate(url) {
|
|
34
|
+
const parsed = new URL(url);
|
|
35
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
164
36
|
return null;
|
|
165
|
-
|
|
37
|
+
const basename = pathPosix.basename(parsed.pathname);
|
|
38
|
+
if (!basename || basename === 'index')
|
|
166
39
|
return null;
|
|
167
|
-
|
|
40
|
+
const cleaned = basename.replace(FILENAME_RULES.EXTENSIONS, '');
|
|
41
|
+
const sanitized = sanitizeString(cleaned);
|
|
42
|
+
if (sanitized === 'index')
|
|
43
|
+
return null;
|
|
44
|
+
return sanitized || null;
|
|
168
45
|
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
protocol: 'http{s}?',
|
|
176
|
-
hostname: 'gist.github.com',
|
|
177
|
-
pathname: '/:user/:gistId',
|
|
178
|
-
});
|
|
179
|
-
const GITHUB_GIST_RAW_PATTERN = new URLPattern({
|
|
180
|
-
protocol: 'http{s}?',
|
|
181
|
-
hostname: 'gist.github.com',
|
|
182
|
-
pathname: '/:user/:gistId/raw/:filePath+',
|
|
183
|
-
});
|
|
184
|
-
const GITLAB_BLOB_PATTERNS = [
|
|
185
|
-
new URLPattern({
|
|
186
|
-
protocol: 'http{s}?',
|
|
187
|
-
hostname: 'gitlab.com',
|
|
188
|
-
pathname: '/:base+/-/blob/:branch/:path+',
|
|
189
|
-
}),
|
|
190
|
-
new URLPattern({
|
|
191
|
-
protocol: 'http{s}?',
|
|
192
|
-
hostname: '*:sub.gitlab.com',
|
|
193
|
-
pathname: '/:base+/-/blob/:branch/:path+',
|
|
194
|
-
}),
|
|
195
|
-
];
|
|
196
|
-
const BITBUCKET_SRC_PATTERN = new URLPattern({
|
|
197
|
-
protocol: 'http{s}?',
|
|
198
|
-
hostname: '{:sub.}?bitbucket.org',
|
|
199
|
-
pathname: '/:owner/:repo/src/:branch/:path+',
|
|
200
|
-
});
|
|
201
|
-
const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
|
|
202
|
-
const RAW_TEXT_EXTENSIONS = new Set([
|
|
203
|
-
'.md',
|
|
204
|
-
'.markdown',
|
|
205
|
-
'.txt',
|
|
206
|
-
'.json',
|
|
207
|
-
'.yaml',
|
|
208
|
-
'.yml',
|
|
209
|
-
'.toml',
|
|
210
|
-
'.xml',
|
|
211
|
-
'.csv',
|
|
212
|
-
'.rst',
|
|
213
|
-
'.adoc',
|
|
214
|
-
'.org',
|
|
215
|
-
]);
|
|
216
|
-
class RawUrlTransformer {
|
|
217
|
-
logger;
|
|
218
|
-
constructor(logger) {
|
|
219
|
-
this.logger = logger;
|
|
220
|
-
}
|
|
221
|
-
transformToRawUrl(url) {
|
|
222
|
-
if (!url)
|
|
223
|
-
return { url, transformed: false };
|
|
224
|
-
if (this.isRawUrl(url))
|
|
225
|
-
return { url, transformed: false };
|
|
226
|
-
let base;
|
|
227
|
-
let hash;
|
|
228
|
-
let parsed;
|
|
229
|
-
try {
|
|
230
|
-
parsed = new URL(url);
|
|
231
|
-
base = parsed.origin + parsed.pathname;
|
|
232
|
-
({ hash } = parsed);
|
|
233
|
-
}
|
|
234
|
-
catch {
|
|
235
|
-
({ base, hash } = this.splitParams(url));
|
|
236
|
-
}
|
|
237
|
-
const match = this.tryTransformWithUrl(base, hash, parsed);
|
|
238
|
-
if (!match)
|
|
239
|
-
return { url, transformed: false };
|
|
240
|
-
this.logger.debug('URL transformed to raw content URL', {
|
|
241
|
-
platform: match.platform,
|
|
242
|
-
original: url.substring(0, 100),
|
|
243
|
-
transformed: match.url.substring(0, 100),
|
|
244
|
-
});
|
|
245
|
-
return { url: match.url, transformed: true, platform: match.platform };
|
|
246
|
-
}
|
|
247
|
-
isRawTextContentUrl(urlString) {
|
|
248
|
-
if (!urlString)
|
|
249
|
-
return false;
|
|
250
|
-
if (this.isRawUrl(urlString))
|
|
251
|
-
return true;
|
|
252
|
-
try {
|
|
253
|
-
const url = new URL(urlString);
|
|
254
|
-
const pathname = url.pathname.toLowerCase();
|
|
255
|
-
const lastDot = pathname.lastIndexOf('.');
|
|
256
|
-
if (lastDot === -1)
|
|
257
|
-
return false;
|
|
258
|
-
return RAW_TEXT_EXTENSIONS.has(pathname.slice(lastDot));
|
|
259
|
-
}
|
|
260
|
-
catch {
|
|
261
|
-
const { base } = this.splitParams(urlString);
|
|
262
|
-
const lowerBase = base.toLowerCase();
|
|
263
|
-
const lastDot = lowerBase.lastIndexOf('.');
|
|
264
|
-
if (lastDot === -1)
|
|
265
|
-
return false;
|
|
266
|
-
return RAW_TEXT_EXTENSIONS.has(lowerBase.slice(lastDot));
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
isRawUrl(url) {
|
|
270
|
-
const lower = url.toLowerCase();
|
|
271
|
-
return (lower.includes('raw.githubusercontent.com') ||
|
|
272
|
-
lower.includes('gist.githubusercontent.com') ||
|
|
273
|
-
lower.includes('/-/raw/') ||
|
|
274
|
-
BITBUCKET_RAW_RE.test(lower));
|
|
275
|
-
}
|
|
276
|
-
splitParams(urlString) {
|
|
277
|
-
const hashIndex = urlString.indexOf('#');
|
|
278
|
-
const queryIndex = urlString.indexOf('?');
|
|
279
|
-
const endIndex = Math.min(queryIndex === -1 ? urlString.length : queryIndex, hashIndex === -1 ? urlString.length : hashIndex);
|
|
280
|
-
const hash = hashIndex !== -1 ? urlString.slice(hashIndex) : '';
|
|
281
|
-
return { base: urlString.slice(0, endIndex), hash };
|
|
282
|
-
}
|
|
283
|
-
tryTransformWithUrl(base, hash, preParsed) {
|
|
284
|
-
let parsed = preParsed ?? null;
|
|
285
|
-
if (!parsed) {
|
|
286
|
-
try {
|
|
287
|
-
parsed = new URL(base);
|
|
288
|
-
}
|
|
289
|
-
catch {
|
|
290
|
-
// Ignore invalid URLs
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
if (!parsed)
|
|
294
|
-
return null;
|
|
295
|
-
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
296
|
-
return null;
|
|
297
|
-
const gist = this.transformGithubGist(base, hash);
|
|
298
|
-
if (gist)
|
|
299
|
-
return gist;
|
|
300
|
-
const github = this.transformGithubBlob(base);
|
|
301
|
-
if (github)
|
|
302
|
-
return github;
|
|
303
|
-
const gitlab = this.transformGitLab(base, parsed.origin);
|
|
304
|
-
if (gitlab)
|
|
305
|
-
return gitlab;
|
|
306
|
-
const bitbucket = this.transformBitbucket(base, parsed.origin);
|
|
307
|
-
if (bitbucket)
|
|
308
|
-
return bitbucket;
|
|
46
|
+
function truncateFilenameBase(name, extension) {
|
|
47
|
+
const maxBase = FILENAME_RULES.MAX_LEN - extension.length;
|
|
48
|
+
return name.length > maxBase ? name.substring(0, maxBase) : name;
|
|
49
|
+
}
|
|
50
|
+
function resolveTitleFilenameCandidate(title) {
|
|
51
|
+
if (!title)
|
|
309
52
|
return null;
|
|
53
|
+
return sanitizeString(title) || null;
|
|
54
|
+
}
|
|
55
|
+
function resolveFilenameBase(url, title, hashFallback) {
|
|
56
|
+
try {
|
|
57
|
+
const fromUrl = resolveUrlFilenameCandidate(url);
|
|
58
|
+
if (fromUrl)
|
|
59
|
+
return fromUrl;
|
|
310
60
|
}
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
const match = GITHUB_GIST_PATTERN.exec(url);
|
|
343
|
-
if (!match)
|
|
344
|
-
return null;
|
|
345
|
-
const groups = match.pathname.groups;
|
|
346
|
-
const user = getPatternGroup(groups, 'user');
|
|
347
|
-
const gistId = getPatternGroup(groups, 'gistId');
|
|
348
|
-
if (!user || !gistId)
|
|
349
|
-
return null;
|
|
350
|
-
let filePath = '';
|
|
351
|
-
if (hash.startsWith('#file-')) {
|
|
352
|
-
const filename = hash.slice('#file-'.length).replace(/-/g, '.');
|
|
353
|
-
if (filename)
|
|
354
|
-
filePath = `/${filename}`;
|
|
355
|
-
}
|
|
356
|
-
return {
|
|
357
|
-
url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`,
|
|
358
|
-
platform: 'github-gist',
|
|
359
|
-
};
|
|
61
|
+
catch {
|
|
62
|
+
// Ignore URL parsing errors and continue fallbacks.
|
|
63
|
+
}
|
|
64
|
+
const fromTitle = resolveTitleFilenameCandidate(title);
|
|
65
|
+
if (fromTitle)
|
|
66
|
+
return fromTitle;
|
|
67
|
+
if (hashFallback)
|
|
68
|
+
return hashFallback.substring(0, 16);
|
|
69
|
+
return `download-${Date.now()}`;
|
|
70
|
+
}
|
|
71
|
+
export function generateSafeFilename(url, title, hashFallback, extension = '.md') {
|
|
72
|
+
const name = resolveFilenameBase(url, title, hashFallback);
|
|
73
|
+
return `${truncateFilenameBase(name, extension)}${extension}`;
|
|
74
|
+
}
|
|
75
|
+
const DownloadParamsSchema = z.strictObject({
|
|
76
|
+
namespace: z.literal('markdown'),
|
|
77
|
+
hash: z
|
|
78
|
+
.string()
|
|
79
|
+
.regex(/^[a-f0-9.]+$/i)
|
|
80
|
+
.min(8)
|
|
81
|
+
.max(64),
|
|
82
|
+
});
|
|
83
|
+
function writeJsonError(res, status, message, code) {
|
|
84
|
+
res.writeHead(status, { 'Content-Type': 'application/json' });
|
|
85
|
+
res.end(JSON.stringify({ error: message, code }));
|
|
86
|
+
}
|
|
87
|
+
export function handleDownload(res, namespace, hash) {
|
|
88
|
+
const parsed = DownloadParamsSchema.safeParse({ namespace, hash });
|
|
89
|
+
if (!parsed.success) {
|
|
90
|
+
writeJsonError(res, 400, 'Invalid namespace or hash', 'BAD_REQUEST');
|
|
91
|
+
return;
|
|
360
92
|
}
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
const groups = match.pathname.groups;
|
|
367
|
-
const base = getPatternGroup(groups, 'base');
|
|
368
|
-
const branch = getPatternGroup(groups, 'branch');
|
|
369
|
-
const path = getPatternGroup(groups, 'path');
|
|
370
|
-
if (!base || !branch || !path)
|
|
371
|
-
return null;
|
|
372
|
-
return {
|
|
373
|
-
url: `${origin}/${base}/-/raw/${branch}/${path}`,
|
|
374
|
-
platform: 'gitlab',
|
|
375
|
-
};
|
|
376
|
-
}
|
|
377
|
-
return null;
|
|
93
|
+
const cacheKey = `${parsed.data.namespace}:${parsed.data.hash}`;
|
|
94
|
+
const entry = cacheGet(cacheKey, { force: true });
|
|
95
|
+
if (!entry) {
|
|
96
|
+
writeJsonError(res, 404, 'Not found or expired', 'NOT_FOUND');
|
|
97
|
+
return;
|
|
378
98
|
}
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
const owner = getPatternGroup(groups, 'owner');
|
|
385
|
-
const repo = getPatternGroup(groups, 'repo');
|
|
386
|
-
const branch = getPatternGroup(groups, 'branch');
|
|
387
|
-
const path = getPatternGroup(groups, 'path');
|
|
388
|
-
if (!owner || !repo || !branch || !path)
|
|
389
|
-
return null;
|
|
390
|
-
return {
|
|
391
|
-
url: `${origin}/${owner}/${repo}/raw/${branch}/${path}`,
|
|
392
|
-
platform: 'bitbucket',
|
|
393
|
-
};
|
|
99
|
+
const payload = parseCachedPayload(entry.content);
|
|
100
|
+
const content = payload ? resolveCachedPayloadContent(payload) : null;
|
|
101
|
+
if (!content) {
|
|
102
|
+
writeJsonError(res, 404, 'Content missing', 'NOT_FOUND');
|
|
103
|
+
return;
|
|
394
104
|
}
|
|
105
|
+
const fileName = generateSafeFilename(entry.url, payload?.title, parsed.data.hash);
|
|
106
|
+
// Safe header generation — RFC 5987 encoding for non-ASCII filenames
|
|
107
|
+
const encoded = encodeURIComponent(fileName).replace(/'/g, '%27');
|
|
108
|
+
res.setHeader('Content-Type', 'text/markdown; charset=utf-8');
|
|
109
|
+
res.setHeader('Content-Disposition', `attachment; filename="${fileName}"; filename*=UTF-8''${encoded}`);
|
|
110
|
+
res.setHeader('Cache-Control', `private, max-age=${config.cache.ttl}`);
|
|
111
|
+
res.setHeader('X-Content-Type-Options', 'nosniff');
|
|
112
|
+
res.end(content);
|
|
395
113
|
}
|
|
396
|
-
const
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
return;
|
|
415
|
-
try {
|
|
416
|
-
signal.removeEventListener('abort', abortListener);
|
|
417
|
-
}
|
|
418
|
-
catch {
|
|
419
|
-
// Ignore listener cleanup failures; they are non-fatal by design.
|
|
420
|
-
}
|
|
421
|
-
abortListener = null;
|
|
422
|
-
};
|
|
423
|
-
return { abortPromise, cleanup };
|
|
424
|
-
}
|
|
425
|
-
async function withTimeout(promise, timeoutMs, onTimeout, signal, onAbort) {
|
|
426
|
-
const timeoutSignal = timeoutMs > 0 ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
427
|
-
const raceSignal = signal && timeoutSignal
|
|
428
|
-
? AbortSignal.any([signal, timeoutSignal])
|
|
429
|
-
: (signal ?? timeoutSignal);
|
|
430
|
-
if (!raceSignal)
|
|
431
|
-
return promise;
|
|
432
|
-
const abortRace = createSignalAbortRace(raceSignal, () => signal?.aborted === true, onTimeout, onAbort ?? (() => new Error('Request was canceled')));
|
|
114
|
+
const UTF8_ENCODING = 'utf-8';
|
|
115
|
+
function getCharsetFromContentType(contentType) {
|
|
116
|
+
if (!contentType)
|
|
117
|
+
return undefined;
|
|
118
|
+
const match = /charset=([^;]+)/i.exec(contentType);
|
|
119
|
+
const charsetGroup = match?.[1];
|
|
120
|
+
if (!charsetGroup)
|
|
121
|
+
return undefined;
|
|
122
|
+
let charset = charsetGroup.trim();
|
|
123
|
+
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
124
|
+
charset = charset.slice(1, -1);
|
|
125
|
+
}
|
|
126
|
+
return charset.trim();
|
|
127
|
+
}
|
|
128
|
+
function createDecoder(encoding) {
|
|
129
|
+
const fallback = () => new TextDecoder(UTF8_ENCODING);
|
|
130
|
+
if (!encoding)
|
|
131
|
+
return fallback();
|
|
433
132
|
try {
|
|
434
|
-
return
|
|
133
|
+
return new TextDecoder(encoding);
|
|
435
134
|
}
|
|
436
|
-
|
|
437
|
-
|
|
135
|
+
catch {
|
|
136
|
+
return fallback();
|
|
438
137
|
}
|
|
439
138
|
}
|
|
440
|
-
function
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
}
|
|
469
|
-
if (process.env['ALLOW_LOCAL_FETCH'] !== 'true' &&
|
|
470
|
-
this.ipBlocker.isBlockedIp(normalizedHostname)) {
|
|
471
|
-
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
|
|
472
|
-
}
|
|
473
|
-
return normalizedHostname;
|
|
474
|
-
}
|
|
475
|
-
await this.assertNoBlockedCname(normalizedHostname, signal);
|
|
476
|
-
const resultPromise = dns.promises.lookup(normalizedHostname, {
|
|
477
|
-
all: true,
|
|
478
|
-
order: 'verbatim',
|
|
479
|
-
});
|
|
480
|
-
const addresses = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS lookup timed out for ${normalizedHostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
|
|
481
|
-
if (addresses.length === 0 || !addresses[0]) {
|
|
482
|
-
throw createErrorWithCode(`No DNS results returned for ${normalizedHostname}`, 'ENODATA');
|
|
483
|
-
}
|
|
484
|
-
for (const addr of addresses) {
|
|
485
|
-
if (addr.family !== 4 && addr.family !== 6) {
|
|
486
|
-
throw createErrorWithCode(`Invalid address family returned for ${normalizedHostname}`, 'EINVAL');
|
|
487
|
-
}
|
|
488
|
-
if (isCloudMetadataHost(addr.address)) {
|
|
489
|
-
throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
|
|
490
|
-
}
|
|
491
|
-
if (!isLocalFetchAllowed() && this.ipBlocker.isBlockedIp(addr.address)) {
|
|
492
|
-
throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
return addresses[0].address;
|
|
496
|
-
}
|
|
497
|
-
isBlockedHostname(hostname) {
|
|
498
|
-
if (isCloudMetadataHost(hostname))
|
|
499
|
-
return true;
|
|
500
|
-
if (isLocalFetchAllowed())
|
|
139
|
+
function decodeBuffer(buffer, encoding) {
|
|
140
|
+
return createDecoder(encoding).decode(buffer);
|
|
141
|
+
}
|
|
142
|
+
function normalizeEncodingLabel(encoding) {
|
|
143
|
+
return encoding?.trim().toLowerCase() ?? '';
|
|
144
|
+
}
|
|
145
|
+
function isUnicodeWideEncoding(encoding) {
|
|
146
|
+
const normalized = normalizeEncodingLabel(encoding);
|
|
147
|
+
return (normalized.startsWith('utf-16') ||
|
|
148
|
+
normalized.startsWith('utf-32') ||
|
|
149
|
+
normalized === 'ucs-2' ||
|
|
150
|
+
normalized === 'unicodefffe' ||
|
|
151
|
+
normalized === 'unicodefeff');
|
|
152
|
+
}
|
|
153
|
+
const BOM_SIGNATURES = [
|
|
154
|
+
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
155
|
+
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
156
|
+
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
157
|
+
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
158
|
+
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
159
|
+
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
160
|
+
];
|
|
161
|
+
function startsWithBytes(buffer, signature) {
|
|
162
|
+
const sigLen = signature.length;
|
|
163
|
+
if (buffer.length < sigLen)
|
|
164
|
+
return false;
|
|
165
|
+
for (let i = 0; i < sigLen; i += 1) {
|
|
166
|
+
if (buffer[i] !== signature[i])
|
|
501
167
|
return false;
|
|
502
|
-
if (this.security.blockedHosts.has(hostname))
|
|
503
|
-
return true;
|
|
504
|
-
return this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
|
|
505
168
|
}
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
seen.add(current);
|
|
513
|
-
const cnames = await this.resolveCname(current, signal);
|
|
514
|
-
if (cnames.length === 0)
|
|
515
|
-
return;
|
|
516
|
-
for (const cname of cnames) {
|
|
517
|
-
if (this.isBlockedHostname(cname)) {
|
|
518
|
-
throw createErrorWithCode(`Blocked DNS CNAME detected for ${hostname}: ${cname}`, 'EBLOCKED');
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
current = cnames[0] ?? '';
|
|
522
|
-
}
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
function detectBomEncoding(buffer) {
|
|
172
|
+
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
173
|
+
if (startsWithBytes(buffer, bytes))
|
|
174
|
+
return encoding;
|
|
523
175
|
}
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
176
|
+
return undefined;
|
|
177
|
+
}
|
|
178
|
+
function readQuotedValue(input, startIndex) {
|
|
179
|
+
const first = input[startIndex];
|
|
180
|
+
if (!first)
|
|
181
|
+
return '';
|
|
182
|
+
const quoted = first === '"' || first === "'";
|
|
183
|
+
if (quoted) {
|
|
184
|
+
const end = input.indexOf(first, startIndex + 1);
|
|
185
|
+
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
186
|
+
}
|
|
187
|
+
const tail = input.slice(startIndex);
|
|
188
|
+
const stop = tail.search(/[\s/>]/);
|
|
189
|
+
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
190
|
+
}
|
|
191
|
+
function findTokenValue(original, lower, token, fromIndex = 0) {
|
|
192
|
+
const tokenIndex = lower.indexOf(token, fromIndex);
|
|
193
|
+
if (tokenIndex === -1)
|
|
194
|
+
return undefined;
|
|
195
|
+
const valueStart = tokenIndex + token.length;
|
|
196
|
+
const value = readQuotedValue(original, valueStart);
|
|
197
|
+
return value || undefined;
|
|
198
|
+
}
|
|
199
|
+
function extractHtmlCharset(headSnippet) {
|
|
200
|
+
const lower = headSnippet.toLowerCase();
|
|
201
|
+
const charset = findTokenValue(headSnippet, lower, 'charset=');
|
|
202
|
+
return charset ? charset.toLowerCase() : undefined;
|
|
203
|
+
}
|
|
204
|
+
function extractXmlEncoding(headSnippet) {
|
|
205
|
+
const lower = headSnippet.toLowerCase();
|
|
206
|
+
const xmlStart = lower.indexOf('<?xml');
|
|
207
|
+
if (xmlStart === -1)
|
|
208
|
+
return undefined;
|
|
209
|
+
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
210
|
+
const declaration = xmlEnd === -1
|
|
211
|
+
? headSnippet.slice(xmlStart)
|
|
212
|
+
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
213
|
+
const declarationLower = declaration.toLowerCase();
|
|
214
|
+
const encoding = findTokenValue(declaration, declarationLower, 'encoding=');
|
|
215
|
+
return encoding ? encoding.toLowerCase() : undefined;
|
|
216
|
+
}
|
|
217
|
+
function detectHtmlDeclaredEncoding(buffer) {
|
|
218
|
+
const scanSize = Math.min(buffer.length, 8_192);
|
|
219
|
+
if (scanSize === 0)
|
|
220
|
+
return undefined;
|
|
221
|
+
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
222
|
+
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
223
|
+
}
|
|
224
|
+
function resolveEncoding(declaredEncoding, sample) {
|
|
225
|
+
const bomEncoding = detectBomEncoding(sample);
|
|
226
|
+
if (bomEncoding)
|
|
227
|
+
return bomEncoding;
|
|
228
|
+
if (declaredEncoding)
|
|
229
|
+
return declaredEncoding;
|
|
230
|
+
return detectHtmlDeclaredEncoding(sample);
|
|
231
|
+
}
|
|
232
|
+
const BINARY_SIGNATURES = [
|
|
233
|
+
[0x25, 0x50, 0x44, 0x46],
|
|
234
|
+
[0x89, 0x50, 0x4e, 0x47],
|
|
235
|
+
[0x47, 0x49, 0x46, 0x38],
|
|
236
|
+
[0xff, 0xd8, 0xff],
|
|
237
|
+
[0x52, 0x49, 0x46, 0x46],
|
|
238
|
+
[0x42, 0x4d],
|
|
239
|
+
[0x49, 0x49, 0x2a, 0x00],
|
|
240
|
+
[0x4d, 0x4d, 0x00, 0x2a],
|
|
241
|
+
[0x00, 0x00, 0x01, 0x00],
|
|
242
|
+
[0x50, 0x4b, 0x03, 0x04],
|
|
243
|
+
[0x1f, 0x8b],
|
|
244
|
+
[0x42, 0x5a, 0x68],
|
|
245
|
+
[0x52, 0x61, 0x72, 0x21],
|
|
246
|
+
[0x37, 0x7a, 0xbc, 0xaf],
|
|
247
|
+
[0x7f, 0x45, 0x4c, 0x46],
|
|
248
|
+
[0x4d, 0x5a],
|
|
249
|
+
[0xcf, 0xfa, 0xed, 0xfe],
|
|
250
|
+
[0x00, 0x61, 0x73, 0x6d],
|
|
251
|
+
[0x1a, 0x45, 0xdf, 0xa3],
|
|
252
|
+
[0x66, 0x74, 0x79, 0x70],
|
|
253
|
+
[0x46, 0x4c, 0x56],
|
|
254
|
+
[0x49, 0x44, 0x33],
|
|
255
|
+
[0xff, 0xfb],
|
|
256
|
+
[0xff, 0xfa],
|
|
257
|
+
[0x4f, 0x67, 0x67, 0x53],
|
|
258
|
+
[0x66, 0x4c, 0x61, 0x43],
|
|
259
|
+
[0x4d, 0x54, 0x68, 0x64],
|
|
260
|
+
[0x77, 0x4f, 0x46, 0x46],
|
|
261
|
+
[0x00, 0x01, 0x00, 0x00],
|
|
262
|
+
[0x4f, 0x54, 0x54, 0x4f],
|
|
263
|
+
[0x53, 0x51, 0x4c, 0x69],
|
|
264
|
+
];
|
|
265
|
+
function hasNullByte(buffer, limit) {
|
|
266
|
+
const checkLen = Math.min(buffer.length, limit);
|
|
267
|
+
return buffer.subarray(0, checkLen).includes(0x00);
|
|
268
|
+
}
|
|
269
|
+
function isBinaryContent(buffer, encoding) {
|
|
270
|
+
for (const signature of BINARY_SIGNATURES) {
|
|
271
|
+
if (startsWithBytes(buffer, signature))
|
|
272
|
+
return true;
|
|
548
273
|
}
|
|
274
|
+
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
549
275
|
}
|
|
550
276
|
function parseRetryAfter(header) {
|
|
551
277
|
if (!header)
|
|
@@ -563,43 +289,31 @@ function parseRetryAfter(header) {
|
|
|
563
289
|
return 0;
|
|
564
290
|
return Math.ceil(deltaMs / 1000);
|
|
565
291
|
}
|
|
566
|
-
function
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
return `Network error: Could not reach ${url}`;
|
|
592
|
-
}
|
|
593
|
-
function createNetworkFetchError(url, message) {
|
|
594
|
-
return new FetchError(buildNetworkErrorMessage(url), url, undefined, message ? { message } : {});
|
|
595
|
-
}
|
|
596
|
-
function createUnknownFetchError(url, message) {
|
|
597
|
-
return new FetchError(message, url);
|
|
598
|
-
}
|
|
599
|
-
function createAbortedFetchError(url) {
|
|
600
|
-
return new FetchError('Request was aborted during response read', url, 499, {
|
|
601
|
-
reason: 'aborted',
|
|
602
|
-
});
|
|
292
|
+
function createFetchError(input, url) {
|
|
293
|
+
switch (input.kind) {
|
|
294
|
+
case 'canceled':
|
|
295
|
+
return new FetchError('Request was canceled', url, 499, {
|
|
296
|
+
reason: 'aborted',
|
|
297
|
+
});
|
|
298
|
+
case 'aborted':
|
|
299
|
+
return new FetchError('Request was aborted during response read', url, 499, { reason: 'aborted' });
|
|
300
|
+
case 'timeout':
|
|
301
|
+
return new FetchError(`Request timeout after ${input.timeout}ms`, url, 504, { timeout: input.timeout });
|
|
302
|
+
case 'rate-limited':
|
|
303
|
+
return new FetchError('Too many requests', url, 429, {
|
|
304
|
+
retryAfter: parseRetryAfter(input.retryAfter),
|
|
305
|
+
});
|
|
306
|
+
case 'http':
|
|
307
|
+
return new FetchError(`HTTP ${input.status}: ${input.statusText}`, url, input.status);
|
|
308
|
+
case 'too-many-redirects':
|
|
309
|
+
return new FetchError('Too many redirects', url);
|
|
310
|
+
case 'missing-redirect-location':
|
|
311
|
+
return new FetchError('Redirect response missing Location header', url);
|
|
312
|
+
case 'network':
|
|
313
|
+
return new FetchError(`Network error: Could not reach ${url}`, url, undefined, { message: input.message });
|
|
314
|
+
case 'unknown':
|
|
315
|
+
return new FetchError(input.message ?? 'Unexpected error', url);
|
|
316
|
+
}
|
|
603
317
|
}
|
|
604
318
|
function isAbortError(error) {
|
|
605
319
|
return (isError(error) &&
|
|
@@ -622,15 +336,15 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
|
622
336
|
const url = resolveErrorUrl(error, fallbackUrl);
|
|
623
337
|
if (isAbortError(error)) {
|
|
624
338
|
return isTimeoutError(error)
|
|
625
|
-
?
|
|
626
|
-
:
|
|
339
|
+
? createFetchError({ kind: 'timeout', timeout: timeoutMs }, url)
|
|
340
|
+
: createFetchError({ kind: 'canceled' }, url);
|
|
627
341
|
}
|
|
628
342
|
if (!isError(error))
|
|
629
|
-
return
|
|
343
|
+
return createFetchError({ kind: 'unknown', message: 'Unexpected error' }, url);
|
|
630
344
|
if (!isSystemError(error)) {
|
|
631
345
|
const err = error;
|
|
632
346
|
const causeStr = err.cause instanceof Error ? err.cause.message : String(err.cause);
|
|
633
|
-
return
|
|
347
|
+
return createFetchError({ kind: 'network', message: `${err.message}. Cause: ${causeStr}` }, url);
|
|
634
348
|
}
|
|
635
349
|
const { code } = error;
|
|
636
350
|
if (code === 'ETIMEOUT') {
|
|
@@ -643,158 +357,7 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
|
643
357
|
code === 'EINVAL') {
|
|
644
358
|
return new FetchError(error.message, url, 400, { code });
|
|
645
359
|
}
|
|
646
|
-
return
|
|
647
|
-
code,
|
|
648
|
-
message: error.message,
|
|
649
|
-
});
|
|
650
|
-
}
|
|
651
|
-
const fetchChannel = diagnosticsChannel.channel('fetch-url-mcp.fetch');
|
|
652
|
-
const SLOW_REQUEST_THRESHOLD_MS = 5000;
|
|
653
|
-
class FetchTelemetry {
|
|
654
|
-
logger;
|
|
655
|
-
context;
|
|
656
|
-
redactor;
|
|
657
|
-
constructor(logger, context, redactor) {
|
|
658
|
-
this.logger = logger;
|
|
659
|
-
this.context = context;
|
|
660
|
-
this.redactor = redactor;
|
|
661
|
-
}
|
|
662
|
-
redact(url) {
|
|
663
|
-
return this.redactor.redact(url);
|
|
664
|
-
}
|
|
665
|
-
start(url, method) {
|
|
666
|
-
const safeUrl = this.redactor.redact(url);
|
|
667
|
-
const contextRequestId = this.context.getRequestId();
|
|
668
|
-
const operationId = this.context.getOperationId();
|
|
669
|
-
const ctx = {
|
|
670
|
-
requestId: randomUUID(),
|
|
671
|
-
startTime: performance.now(),
|
|
672
|
-
url: safeUrl,
|
|
673
|
-
method: method.toUpperCase(),
|
|
674
|
-
};
|
|
675
|
-
if (contextRequestId)
|
|
676
|
-
ctx.contextRequestId = contextRequestId;
|
|
677
|
-
if (operationId)
|
|
678
|
-
ctx.operationId = operationId;
|
|
679
|
-
const event = {
|
|
680
|
-
v: 1,
|
|
681
|
-
type: 'start',
|
|
682
|
-
requestId: ctx.requestId,
|
|
683
|
-
method: ctx.method,
|
|
684
|
-
url: ctx.url,
|
|
685
|
-
};
|
|
686
|
-
if (ctx.contextRequestId)
|
|
687
|
-
event.contextRequestId = ctx.contextRequestId;
|
|
688
|
-
if (ctx.operationId)
|
|
689
|
-
event.operationId = ctx.operationId;
|
|
690
|
-
this.publish(event);
|
|
691
|
-
const logData = {
|
|
692
|
-
requestId: ctx.requestId,
|
|
693
|
-
method: ctx.method,
|
|
694
|
-
url: ctx.url,
|
|
695
|
-
};
|
|
696
|
-
if (ctx.contextRequestId)
|
|
697
|
-
logData['contextRequestId'] = ctx.contextRequestId;
|
|
698
|
-
if (ctx.operationId)
|
|
699
|
-
logData['operationId'] = ctx.operationId;
|
|
700
|
-
this.logger.debug('HTTP Request', logData);
|
|
701
|
-
return ctx;
|
|
702
|
-
}
|
|
703
|
-
recordResponse(context, response, contentSize) {
|
|
704
|
-
const duration = performance.now() - context.startTime;
|
|
705
|
-
const durationLabel = `${Math.round(duration)}ms`;
|
|
706
|
-
const event = {
|
|
707
|
-
v: 1,
|
|
708
|
-
type: 'end',
|
|
709
|
-
requestId: context.requestId,
|
|
710
|
-
status: response.status,
|
|
711
|
-
duration,
|
|
712
|
-
};
|
|
713
|
-
if (context.contextRequestId)
|
|
714
|
-
event.contextRequestId = context.contextRequestId;
|
|
715
|
-
if (context.operationId)
|
|
716
|
-
event.operationId = context.operationId;
|
|
717
|
-
this.publish(event);
|
|
718
|
-
const contentType = response.headers.get('content-type') ?? undefined;
|
|
719
|
-
const contentLengthHeader = response.headers.get('content-length');
|
|
720
|
-
const size = contentLengthHeader ??
|
|
721
|
-
(contentSize === undefined ? undefined : String(contentSize));
|
|
722
|
-
const logData = {
|
|
723
|
-
requestId: context.requestId,
|
|
724
|
-
status: response.status,
|
|
725
|
-
url: context.url,
|
|
726
|
-
duration: durationLabel,
|
|
727
|
-
};
|
|
728
|
-
if (context.contextRequestId)
|
|
729
|
-
logData['contextRequestId'] = context.contextRequestId;
|
|
730
|
-
if (context.operationId)
|
|
731
|
-
logData['operationId'] = context.operationId;
|
|
732
|
-
if (contentType)
|
|
733
|
-
logData['contentType'] = contentType;
|
|
734
|
-
if (size)
|
|
735
|
-
logData['size'] = size;
|
|
736
|
-
this.logger.debug('HTTP Response', logData);
|
|
737
|
-
if (duration > SLOW_REQUEST_THRESHOLD_MS) {
|
|
738
|
-
const warnData = {
|
|
739
|
-
requestId: context.requestId,
|
|
740
|
-
url: context.url,
|
|
741
|
-
duration: durationLabel,
|
|
742
|
-
};
|
|
743
|
-
if (context.contextRequestId)
|
|
744
|
-
warnData['contextRequestId'] = context.contextRequestId;
|
|
745
|
-
if (context.operationId)
|
|
746
|
-
warnData['operationId'] = context.operationId;
|
|
747
|
-
this.logger.warn('Slow HTTP request detected', warnData);
|
|
748
|
-
}
|
|
749
|
-
}
|
|
750
|
-
recordError(context, error, status) {
|
|
751
|
-
const duration = performance.now() - context.startTime;
|
|
752
|
-
const err = isError(error) ? error : new Error(String(error));
|
|
753
|
-
const code = isSystemError(err) ? err.code : undefined;
|
|
754
|
-
const event = {
|
|
755
|
-
v: 1,
|
|
756
|
-
type: 'error',
|
|
757
|
-
requestId: context.requestId,
|
|
758
|
-
url: context.url,
|
|
759
|
-
error: err.message,
|
|
760
|
-
duration,
|
|
761
|
-
};
|
|
762
|
-
if (code !== undefined)
|
|
763
|
-
event.code = code;
|
|
764
|
-
if (status !== undefined)
|
|
765
|
-
event.status = status;
|
|
766
|
-
if (context.contextRequestId)
|
|
767
|
-
event.contextRequestId = context.contextRequestId;
|
|
768
|
-
if (context.operationId)
|
|
769
|
-
event.operationId = context.operationId;
|
|
770
|
-
this.publish(event);
|
|
771
|
-
const logData = {
|
|
772
|
-
requestId: context.requestId,
|
|
773
|
-
url: context.url,
|
|
774
|
-
status,
|
|
775
|
-
code,
|
|
776
|
-
error: err.message,
|
|
777
|
-
};
|
|
778
|
-
if (context.contextRequestId)
|
|
779
|
-
logData['contextRequestId'] = context.contextRequestId;
|
|
780
|
-
if (context.operationId)
|
|
781
|
-
logData['operationId'] = context.operationId;
|
|
782
|
-
if (status === 429) {
|
|
783
|
-
this.logger.warn('HTTP Request Error', logData);
|
|
784
|
-
return;
|
|
785
|
-
}
|
|
786
|
-
this.logger.error('HTTP Request Error', logData);
|
|
787
|
-
}
|
|
788
|
-
publish(event) {
|
|
789
|
-
if (!fetchChannel.hasSubscribers)
|
|
790
|
-
return;
|
|
791
|
-
try {
|
|
792
|
-
fetchChannel.publish(event);
|
|
793
|
-
}
|
|
794
|
-
catch {
|
|
795
|
-
// Best-effort telemetry; never crash request path.
|
|
796
|
-
}
|
|
797
|
-
}
|
|
360
|
+
return createFetchError({ kind: 'network', message: error.message }, url);
|
|
798
361
|
}
|
|
799
362
|
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
800
363
|
function isRedirectStatus(status) {
|
|
@@ -835,7 +398,7 @@ class RedirectFollower {
|
|
|
835
398
|
return { response, url: currentUrl };
|
|
836
399
|
currentUrl = nextUrl;
|
|
837
400
|
}
|
|
838
|
-
throw
|
|
401
|
+
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
839
402
|
}
|
|
840
403
|
async performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress) {
|
|
841
404
|
const fetchInit = {
|
|
@@ -843,6 +406,7 @@ class RedirectFollower {
|
|
|
843
406
|
redirect: 'manual',
|
|
844
407
|
};
|
|
845
408
|
if (ipAddress) {
|
|
409
|
+
const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
|
|
846
410
|
const agent = new Agent({
|
|
847
411
|
connect: {
|
|
848
412
|
lookup: (hostname, options, callback) => {
|
|
@@ -854,6 +418,8 @@ class RedirectFollower {
|
|
|
854
418
|
callback(null, ipAddress, family);
|
|
855
419
|
}
|
|
856
420
|
},
|
|
421
|
+
timeout: config.fetcher.timeout,
|
|
422
|
+
...(ca ? { ca } : {}),
|
|
857
423
|
},
|
|
858
424
|
pipelining: 1,
|
|
859
425
|
connections: 1,
|
|
@@ -865,7 +431,10 @@ class RedirectFollower {
|
|
|
865
431
|
const response = await this.fetchFn(currentUrl, fetchInit);
|
|
866
432
|
if (!isRedirectStatus(response.status))
|
|
867
433
|
return { response };
|
|
868
|
-
|
|
434
|
+
if (redirectCount >= redirectLimit) {
|
|
435
|
+
cancelResponseBody(response);
|
|
436
|
+
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
437
|
+
}
|
|
869
438
|
const location = this.getRedirectLocation(response, currentUrl);
|
|
870
439
|
cancelResponseBody(response);
|
|
871
440
|
const nextUrl = this.resolveRedirectTarget(currentUrl, location);
|
|
@@ -874,221 +443,57 @@ class RedirectFollower {
|
|
|
874
443
|
parsedNextUrl.protocol !== 'https:') {
|
|
875
444
|
throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
|
|
876
445
|
}
|
|
877
|
-
return {
|
|
878
|
-
response,
|
|
879
|
-
nextUrl,
|
|
880
|
-
};
|
|
881
|
-
}
|
|
882
|
-
assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount) {
|
|
883
|
-
if (redirectCount < redirectLimit)
|
|
884
|
-
return;
|
|
885
|
-
cancelResponseBody(response);
|
|
886
|
-
throw createTooManyRedirectsFetchError(currentUrl);
|
|
887
|
-
}
|
|
888
|
-
getRedirectLocation(response, currentUrl) {
|
|
889
|
-
const location = response.headers.get('location');
|
|
890
|
-
if (location)
|
|
891
|
-
return location;
|
|
892
|
-
cancelResponseBody(response);
|
|
893
|
-
throw createMissingRedirectLocationFetchError(currentUrl);
|
|
894
|
-
}
|
|
895
|
-
resolveRedirectTarget(baseUrl, location) {
|
|
896
|
-
let resolved;
|
|
897
|
-
try {
|
|
898
|
-
resolved = new URL(location, baseUrl);
|
|
899
|
-
}
|
|
900
|
-
catch {
|
|
901
|
-
throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
|
|
902
|
-
}
|
|
903
|
-
if (resolved.username || resolved.password) {
|
|
904
|
-
throw createErrorWithCode('Redirect target includes credentials', 'EBADREDIRECT');
|
|
905
|
-
}
|
|
906
|
-
return this.normalizeUrl(resolved.href);
|
|
907
|
-
}
|
|
908
|
-
annotateRedirectError(error, url) {
|
|
909
|
-
if (!isObject(error))
|
|
910
|
-
return;
|
|
911
|
-
error['requestUrl'] = url;
|
|
912
|
-
}
|
|
913
|
-
async withRedirectErrorContext(url, fn) {
|
|
914
|
-
try {
|
|
915
|
-
return await fn();
|
|
916
|
-
}
|
|
917
|
-
catch (error) {
|
|
918
|
-
this.annotateRedirectError(error, url);
|
|
919
|
-
throw error;
|
|
920
|
-
}
|
|
921
|
-
}
|
|
922
|
-
}
|
|
923
|
-
class ResponseTextReader {
|
|
924
|
-
async read(response, url, maxBytes, signal, encoding) {
|
|
925
|
-
const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
|
926
|
-
const text = decodeBuffer(buffer, effectiveEncoding);
|
|
927
|
-
return { text, size: buffer.byteLength, truncated };
|
|
928
|
-
}
|
|
929
|
-
async readBuffer(response, url, maxBytes, signal, encoding) {
|
|
930
|
-
if (signal?.aborted) {
|
|
931
|
-
cancelResponseBody(response);
|
|
932
|
-
throw createAbortedFetchError(url);
|
|
933
|
-
}
|
|
934
|
-
if (!response.body) {
|
|
935
|
-
return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
|
|
936
|
-
}
|
|
937
|
-
return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
|
|
938
|
-
}
|
|
939
|
-
async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
|
|
940
|
-
if (signal?.aborted)
|
|
941
|
-
throw createCanceledFetchError(url);
|
|
942
|
-
const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
943
|
-
let buffer;
|
|
944
|
-
let truncated = false;
|
|
945
|
-
try {
|
|
946
|
-
// Try safe blob slicing if available (Node 18+) to avoid OOM
|
|
947
|
-
const blob = await response.blob();
|
|
948
|
-
if (Number.isFinite(limit) && blob.size > limit) {
|
|
949
|
-
const sliced = blob.slice(0, limit);
|
|
950
|
-
buffer = new Uint8Array(await sliced.arrayBuffer());
|
|
951
|
-
truncated = true;
|
|
952
|
-
}
|
|
953
|
-
else {
|
|
954
|
-
buffer = new Uint8Array(await blob.arrayBuffer());
|
|
955
|
-
}
|
|
956
|
-
}
|
|
957
|
-
catch {
|
|
958
|
-
// Fallback if blob() fails
|
|
959
|
-
const arrayBuffer = await response.arrayBuffer();
|
|
960
|
-
const length = Math.min(arrayBuffer.byteLength, limit);
|
|
961
|
-
buffer = new Uint8Array(arrayBuffer, 0, length);
|
|
962
|
-
truncated = Number.isFinite(limit) && arrayBuffer.byteLength > limit;
|
|
963
|
-
}
|
|
964
|
-
const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
|
|
965
|
-
if (isBinaryContent(buffer, effectiveEncoding)) {
|
|
966
|
-
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
967
|
-
}
|
|
968
|
-
return {
|
|
969
|
-
buffer,
|
|
970
|
-
encoding: effectiveEncoding,
|
|
971
|
-
size: buffer.byteLength,
|
|
972
|
-
truncated,
|
|
973
|
-
};
|
|
974
|
-
}
|
|
975
|
-
async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
|
|
976
|
-
const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
977
|
-
const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
|
|
978
|
-
let effectiveEncoding = encoding ?? 'utf-8';
|
|
979
|
-
let encodingResolved = false;
|
|
980
|
-
let total = 0;
|
|
981
|
-
const chunks = [];
|
|
982
|
-
const source = Readable.fromWeb(toNodeReadableStream(stream, url, 'response:read-stream-buffer'));
|
|
983
|
-
const guard = new Transform({
|
|
984
|
-
transform(chunk, _encoding, callback) {
|
|
985
|
-
try {
|
|
986
|
-
const buf = Buffer.isBuffer(chunk)
|
|
987
|
-
? chunk
|
|
988
|
-
: Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
989
|
-
if (!encodingResolved) {
|
|
990
|
-
encodingResolved = true;
|
|
991
|
-
effectiveEncoding =
|
|
992
|
-
resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
|
|
993
|
-
}
|
|
994
|
-
if (isBinaryContent(buf, effectiveEncoding)) {
|
|
995
|
-
callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
|
|
996
|
-
return;
|
|
997
|
-
}
|
|
998
|
-
const newTotal = total + buf.length;
|
|
999
|
-
if (newTotal > byteLimit) {
|
|
1000
|
-
const remaining = byteLimit - total;
|
|
1001
|
-
if (remaining > 0) {
|
|
1002
|
-
const slice = buf.subarray(0, remaining);
|
|
1003
|
-
total += remaining;
|
|
1004
|
-
if (captureChunks)
|
|
1005
|
-
chunks.push(slice);
|
|
1006
|
-
this.push(slice);
|
|
1007
|
-
}
|
|
1008
|
-
callback(new MaxBytesError());
|
|
1009
|
-
return;
|
|
1010
|
-
}
|
|
1011
|
-
total = newTotal;
|
|
1012
|
-
if (captureChunks)
|
|
1013
|
-
chunks.push(buf);
|
|
1014
|
-
callback(null, buf);
|
|
1015
|
-
}
|
|
1016
|
-
catch (error) {
|
|
1017
|
-
callback(error instanceof Error ? error : new Error(String(error)));
|
|
1018
|
-
}
|
|
1019
|
-
},
|
|
1020
|
-
});
|
|
1021
|
-
const guarded = source.pipe(guard);
|
|
1022
|
-
const abortHandler = () => {
|
|
1023
|
-
source.destroy();
|
|
1024
|
-
guard.destroy();
|
|
446
|
+
return {
|
|
447
|
+
response,
|
|
448
|
+
nextUrl,
|
|
1025
449
|
};
|
|
1026
|
-
|
|
1027
|
-
|
|
450
|
+
}
|
|
451
|
+
getRedirectLocation(response, currentUrl) {
|
|
452
|
+
const location = response.headers.get('location');
|
|
453
|
+
if (location)
|
|
454
|
+
return location;
|
|
455
|
+
cancelResponseBody(response);
|
|
456
|
+
throw createFetchError({ kind: 'missing-redirect-location' }, currentUrl);
|
|
457
|
+
}
|
|
458
|
+
resolveRedirectTarget(baseUrl, location) {
|
|
459
|
+
let resolved;
|
|
460
|
+
try {
|
|
461
|
+
resolved = new URL(location, baseUrl);
|
|
1028
462
|
}
|
|
463
|
+
catch {
|
|
464
|
+
throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
|
|
465
|
+
}
|
|
466
|
+
if (resolved.username || resolved.password) {
|
|
467
|
+
throw createErrorWithCode('Redirect target includes credentials', 'EBADREDIRECT');
|
|
468
|
+
}
|
|
469
|
+
return this.normalizeUrl(resolved.href);
|
|
470
|
+
}
|
|
471
|
+
annotateRedirectError(error, url) {
|
|
472
|
+
if (!isObject(error))
|
|
473
|
+
return;
|
|
474
|
+
error['requestUrl'] = url;
|
|
475
|
+
}
|
|
476
|
+
async withRedirectErrorContext(url, fn) {
|
|
1029
477
|
try {
|
|
1030
|
-
|
|
1031
|
-
return {
|
|
1032
|
-
buffer,
|
|
1033
|
-
encoding: effectiveEncoding,
|
|
1034
|
-
size: total,
|
|
1035
|
-
truncated: false,
|
|
1036
|
-
};
|
|
478
|
+
return await fn();
|
|
1037
479
|
}
|
|
1038
480
|
catch (error) {
|
|
1039
|
-
|
|
1040
|
-
throw createAbortedFetchError(url);
|
|
1041
|
-
if (error instanceof FetchError)
|
|
1042
|
-
throw error;
|
|
1043
|
-
if (error instanceof MaxBytesError) {
|
|
1044
|
-
source.destroy();
|
|
1045
|
-
guard.destroy();
|
|
1046
|
-
return {
|
|
1047
|
-
buffer: Buffer.concat(chunks, total),
|
|
1048
|
-
encoding: effectiveEncoding,
|
|
1049
|
-
size: total,
|
|
1050
|
-
truncated: true,
|
|
1051
|
-
};
|
|
1052
|
-
}
|
|
481
|
+
this.annotateRedirectError(error, url);
|
|
1053
482
|
throw error;
|
|
1054
483
|
}
|
|
1055
|
-
finally {
|
|
1056
|
-
if (signal) {
|
|
1057
|
-
signal.removeEventListener('abort', abortHandler);
|
|
1058
|
-
}
|
|
1059
|
-
}
|
|
1060
484
|
}
|
|
1061
485
|
}
|
|
1062
|
-
const DEFAULT_HEADERS = {
|
|
1063
|
-
'User-Agent': config.fetcher.userAgent,
|
|
1064
|
-
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
1065
|
-
'Accept-Language': 'en-US,en;q=0.5',
|
|
1066
|
-
'Accept-Encoding': 'gzip, deflate, br',
|
|
1067
|
-
Connection: 'keep-alive',
|
|
1068
|
-
};
|
|
1069
|
-
function buildHeaders() {
|
|
1070
|
-
return DEFAULT_HEADERS;
|
|
1071
|
-
}
|
|
1072
|
-
function buildRequestSignal(timeoutMs, external) {
|
|
1073
|
-
if (timeoutMs <= 0)
|
|
1074
|
-
return external;
|
|
1075
|
-
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
1076
|
-
return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
|
|
1077
|
-
}
|
|
1078
|
-
function buildRequestInit(headers, signal) {
|
|
1079
|
-
return {
|
|
1080
|
-
method: 'GET',
|
|
1081
|
-
headers,
|
|
1082
|
-
...(signal ? { signal } : {}),
|
|
1083
|
-
};
|
|
1084
|
-
}
|
|
1085
486
|
function resolveResponseError(response, finalUrl) {
|
|
1086
487
|
if (response.status === 429) {
|
|
1087
|
-
return
|
|
488
|
+
return createFetchError({ kind: 'rate-limited', retryAfter: response.headers.get('retry-after') }, finalUrl);
|
|
1088
489
|
}
|
|
1089
490
|
return response.ok
|
|
1090
491
|
? null
|
|
1091
|
-
:
|
|
492
|
+
: createFetchError({
|
|
493
|
+
kind: 'http',
|
|
494
|
+
status: response.status,
|
|
495
|
+
statusText: response.statusText,
|
|
496
|
+
}, finalUrl);
|
|
1092
497
|
}
|
|
1093
498
|
function resolveMediaType(contentType) {
|
|
1094
499
|
if (!contentType)
|
|
@@ -1220,85 +625,224 @@ async function decodeResponseIfNeeded(response, url, signal) {
|
|
|
1220
625
|
if (!isSupportedContentEncoding(encoding)) {
|
|
1221
626
|
throw createUnsupportedContentEncodingError(url, encodingHeader ?? encoding);
|
|
1222
627
|
}
|
|
1223
|
-
}
|
|
1224
|
-
if (!response.body)
|
|
1225
|
-
return response;
|
|
1226
|
-
const [decodeBranch, passthroughBranch] = response.body.tee();
|
|
1227
|
-
const decodeOrder = encodings
|
|
1228
|
-
.slice()
|
|
1229
|
-
.reverse()
|
|
1230
|
-
.filter(isSupportedContentEncoding);
|
|
1231
|
-
const decompressors = decodeOrder.map((encoding) => createDecompressor(encoding));
|
|
1232
|
-
const decodeSource = Readable.fromWeb(toNodeReadableStream(decodeBranch, url, 'response:decode-content-encoding'));
|
|
1233
|
-
const decodedNodeStream = new PassThrough();
|
|
1234
|
-
const decodedPipeline = pipeline([
|
|
1235
|
-
decodeSource,
|
|
1236
|
-
...decompressors,
|
|
1237
|
-
decodedNodeStream,
|
|
1238
|
-
]);
|
|
1239
|
-
const headers = new Headers(response.headers);
|
|
1240
|
-
headers.delete('content-encoding');
|
|
1241
|
-
headers.delete('content-length');
|
|
1242
|
-
const abortDecodePipeline = () => {
|
|
1243
|
-
decodeSource.destroy();
|
|
1244
|
-
for (const decompressor of decompressors) {
|
|
1245
|
-
decompressor.destroy();
|
|
628
|
+
}
|
|
629
|
+
if (!response.body)
|
|
630
|
+
return response;
|
|
631
|
+
const [decodeBranch, passthroughBranch] = response.body.tee();
|
|
632
|
+
const decodeOrder = encodings
|
|
633
|
+
.slice()
|
|
634
|
+
.reverse()
|
|
635
|
+
.filter(isSupportedContentEncoding);
|
|
636
|
+
const decompressors = decodeOrder.map((encoding) => createDecompressor(encoding));
|
|
637
|
+
const decodeSource = Readable.fromWeb(toNodeReadableStream(decodeBranch, url, 'response:decode-content-encoding'));
|
|
638
|
+
const decodedNodeStream = new PassThrough();
|
|
639
|
+
const decodedPipeline = pipeline([
|
|
640
|
+
decodeSource,
|
|
641
|
+
...decompressors,
|
|
642
|
+
decodedNodeStream,
|
|
643
|
+
]);
|
|
644
|
+
const headers = new Headers(response.headers);
|
|
645
|
+
headers.delete('content-encoding');
|
|
646
|
+
headers.delete('content-length');
|
|
647
|
+
const abortDecodePipeline = () => {
|
|
648
|
+
decodeSource.destroy();
|
|
649
|
+
for (const decompressor of decompressors) {
|
|
650
|
+
decompressor.destroy();
|
|
651
|
+
}
|
|
652
|
+
decodedNodeStream.destroy();
|
|
653
|
+
};
|
|
654
|
+
if (signal) {
|
|
655
|
+
signal.addEventListener('abort', abortDecodePipeline, { once: true });
|
|
656
|
+
}
|
|
657
|
+
void decodedPipeline.catch((error) => {
|
|
658
|
+
decodedNodeStream.destroy(toError(error));
|
|
659
|
+
});
|
|
660
|
+
const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
|
|
661
|
+
const decodedReader = decodedBodyStream.getReader();
|
|
662
|
+
const clearAbortListener = () => {
|
|
663
|
+
if (!signal)
|
|
664
|
+
return;
|
|
665
|
+
signal.removeEventListener('abort', abortDecodePipeline);
|
|
666
|
+
};
|
|
667
|
+
try {
|
|
668
|
+
const first = await decodedReader.read();
|
|
669
|
+
if (first.done) {
|
|
670
|
+
clearAbortListener();
|
|
671
|
+
void passthroughBranch.cancel().catch(() => undefined);
|
|
672
|
+
return new Response(null, {
|
|
673
|
+
status: response.status,
|
|
674
|
+
statusText: response.statusText,
|
|
675
|
+
headers,
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
void passthroughBranch.cancel().catch(() => undefined);
|
|
679
|
+
const body = createPumpedStream(first.value, decodedReader);
|
|
680
|
+
if (signal) {
|
|
681
|
+
void finished(decodedNodeStream, { cleanup: true })
|
|
682
|
+
.catch(() => { })
|
|
683
|
+
.finally(() => {
|
|
684
|
+
clearAbortListener();
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
return new Response(body, {
|
|
688
|
+
status: response.status,
|
|
689
|
+
statusText: response.statusText,
|
|
690
|
+
headers,
|
|
691
|
+
});
|
|
692
|
+
}
|
|
693
|
+
catch (error) {
|
|
694
|
+
clearAbortListener();
|
|
695
|
+
abortDecodePipeline();
|
|
696
|
+
void decodedReader.cancel(error).catch(() => undefined);
|
|
697
|
+
logDebug('Content-Encoding decode failed; using passthrough body', {
|
|
698
|
+
url: redactUrl(url),
|
|
699
|
+
encoding: encodingHeader ?? encodings.join(','),
|
|
700
|
+
error: isError(error) ? error.message : String(error),
|
|
701
|
+
});
|
|
702
|
+
return new Response(passthroughBranch, {
|
|
703
|
+
status: response.status,
|
|
704
|
+
statusText: response.statusText,
|
|
705
|
+
headers,
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
class ResponseTextReader {
|
|
710
|
+
async read(response, url, maxBytes, signal, encoding) {
|
|
711
|
+
const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
|
712
|
+
const text = decodeBuffer(buffer, effectiveEncoding);
|
|
713
|
+
return { text, size: buffer.byteLength, truncated };
|
|
714
|
+
}
|
|
715
|
+
async readBuffer(response, url, maxBytes, signal, encoding) {
|
|
716
|
+
if (signal?.aborted) {
|
|
717
|
+
cancelResponseBody(response);
|
|
718
|
+
throw createFetchError({ kind: 'aborted' }, url);
|
|
719
|
+
}
|
|
720
|
+
if (!response.body) {
|
|
721
|
+
return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
|
|
722
|
+
}
|
|
723
|
+
return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
|
|
724
|
+
}
|
|
725
|
+
async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
|
|
726
|
+
if (signal?.aborted)
|
|
727
|
+
throw createFetchError({ kind: 'canceled' }, url);
|
|
728
|
+
const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
729
|
+
let buffer;
|
|
730
|
+
let truncated = false;
|
|
731
|
+
try {
|
|
732
|
+
// Try safe blob slicing if available (Node 18+) to avoid OOM
|
|
733
|
+
const blob = await response.blob();
|
|
734
|
+
if (Number.isFinite(limit) && blob.size > limit) {
|
|
735
|
+
const sliced = blob.slice(0, limit);
|
|
736
|
+
buffer = new Uint8Array(await sliced.arrayBuffer());
|
|
737
|
+
truncated = true;
|
|
738
|
+
}
|
|
739
|
+
else {
|
|
740
|
+
buffer = new Uint8Array(await blob.arrayBuffer());
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
catch {
|
|
744
|
+
// Fallback if blob() fails
|
|
745
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
746
|
+
const length = Math.min(arrayBuffer.byteLength, limit);
|
|
747
|
+
buffer = new Uint8Array(arrayBuffer, 0, length);
|
|
748
|
+
truncated = Number.isFinite(limit) && arrayBuffer.byteLength > limit;
|
|
749
|
+
}
|
|
750
|
+
const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
|
|
751
|
+
if (isBinaryContent(buffer, effectiveEncoding)) {
|
|
752
|
+
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
753
|
+
}
|
|
754
|
+
return {
|
|
755
|
+
buffer,
|
|
756
|
+
encoding: effectiveEncoding,
|
|
757
|
+
size: buffer.byteLength,
|
|
758
|
+
truncated,
|
|
759
|
+
};
|
|
760
|
+
}
|
|
761
|
+
async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
|
|
762
|
+
const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
763
|
+
const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
|
|
764
|
+
let effectiveEncoding = encoding ?? 'utf-8';
|
|
765
|
+
let encodingResolved = false;
|
|
766
|
+
let total = 0;
|
|
767
|
+
const chunks = [];
|
|
768
|
+
const source = Readable.fromWeb(toNodeReadableStream(stream, url, 'response:read-stream-buffer'));
|
|
769
|
+
const guard = new Transform({
|
|
770
|
+
transform(chunk, _encoding, callback) {
|
|
771
|
+
try {
|
|
772
|
+
const buf = Buffer.isBuffer(chunk)
|
|
773
|
+
? chunk
|
|
774
|
+
: Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
775
|
+
if (!encodingResolved) {
|
|
776
|
+
encodingResolved = true;
|
|
777
|
+
effectiveEncoding =
|
|
778
|
+
resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
|
|
779
|
+
}
|
|
780
|
+
if (isBinaryContent(buf, effectiveEncoding)) {
|
|
781
|
+
callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
const newTotal = total + buf.length;
|
|
785
|
+
if (newTotal > byteLimit) {
|
|
786
|
+
const remaining = byteLimit - total;
|
|
787
|
+
if (remaining > 0) {
|
|
788
|
+
const slice = buf.subarray(0, remaining);
|
|
789
|
+
total += remaining;
|
|
790
|
+
if (captureChunks)
|
|
791
|
+
chunks.push(slice);
|
|
792
|
+
this.push(slice);
|
|
793
|
+
}
|
|
794
|
+
callback(new MaxBytesError());
|
|
795
|
+
return;
|
|
796
|
+
}
|
|
797
|
+
total = newTotal;
|
|
798
|
+
if (captureChunks)
|
|
799
|
+
chunks.push(buf);
|
|
800
|
+
callback(null, buf);
|
|
801
|
+
}
|
|
802
|
+
catch (error) {
|
|
803
|
+
callback(toError(error));
|
|
804
|
+
}
|
|
805
|
+
},
|
|
806
|
+
});
|
|
807
|
+
const guarded = source.pipe(guard);
|
|
808
|
+
const abortHandler = () => {
|
|
809
|
+
source.destroy();
|
|
810
|
+
guard.destroy();
|
|
811
|
+
};
|
|
812
|
+
if (signal) {
|
|
813
|
+
signal.addEventListener('abort', abortHandler, { once: true });
|
|
814
|
+
}
|
|
815
|
+
try {
|
|
816
|
+
const buffer = await consumeBuffer(guarded);
|
|
817
|
+
return {
|
|
818
|
+
buffer,
|
|
819
|
+
encoding: effectiveEncoding,
|
|
820
|
+
size: total,
|
|
821
|
+
truncated: false,
|
|
822
|
+
};
|
|
1246
823
|
}
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
const first = await decodedReader.read();
|
|
1264
|
-
if (first.done) {
|
|
1265
|
-
clearAbortListener();
|
|
1266
|
-
void passthroughBranch.cancel().catch(() => undefined);
|
|
1267
|
-
return new Response(null, {
|
|
1268
|
-
status: response.status,
|
|
1269
|
-
statusText: response.statusText,
|
|
1270
|
-
headers,
|
|
1271
|
-
});
|
|
824
|
+
catch (error) {
|
|
825
|
+
if (signal?.aborted)
|
|
826
|
+
throw createFetchError({ kind: 'aborted' }, url);
|
|
827
|
+
if (error instanceof FetchError)
|
|
828
|
+
throw error;
|
|
829
|
+
if (error instanceof MaxBytesError) {
|
|
830
|
+
source.destroy();
|
|
831
|
+
guard.destroy();
|
|
832
|
+
return {
|
|
833
|
+
buffer: Buffer.concat(chunks, total),
|
|
834
|
+
encoding: effectiveEncoding,
|
|
835
|
+
size: total,
|
|
836
|
+
truncated: true,
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
throw error;
|
|
1272
840
|
}
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
.catch(() => { })
|
|
1278
|
-
.finally(() => {
|
|
1279
|
-
clearAbortListener();
|
|
1280
|
-
});
|
|
841
|
+
finally {
|
|
842
|
+
if (signal) {
|
|
843
|
+
signal.removeEventListener('abort', abortHandler);
|
|
844
|
+
}
|
|
1281
845
|
}
|
|
1282
|
-
return new Response(body, {
|
|
1283
|
-
status: response.status,
|
|
1284
|
-
statusText: response.statusText,
|
|
1285
|
-
headers,
|
|
1286
|
-
});
|
|
1287
|
-
}
|
|
1288
|
-
catch (error) {
|
|
1289
|
-
clearAbortListener();
|
|
1290
|
-
abortDecodePipeline();
|
|
1291
|
-
void decodedReader.cancel(error).catch(() => undefined);
|
|
1292
|
-
logDebug('Content-Encoding decode failed; using passthrough body', {
|
|
1293
|
-
url: redactUrl(url),
|
|
1294
|
-
encoding: encodingHeader ?? encodings.join(','),
|
|
1295
|
-
error: isError(error) ? error.message : String(error),
|
|
1296
|
-
});
|
|
1297
|
-
return new Response(passthroughBranch, {
|
|
1298
|
-
status: response.status,
|
|
1299
|
-
statusText: response.statusText,
|
|
1300
|
-
headers,
|
|
1301
|
-
});
|
|
1302
846
|
}
|
|
1303
847
|
}
|
|
1304
848
|
async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry, reader, maxBytes, mode, signal) {
|
|
@@ -1320,20 +864,172 @@ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry,
|
|
|
1320
864
|
telemetry.recordResponse(ctx, decodedResponse, size);
|
|
1321
865
|
return { kind: 'buffer', buffer, encoding, size, truncated };
|
|
1322
866
|
}
|
|
1323
|
-
function
|
|
1324
|
-
|
|
1325
|
-
return
|
|
867
|
+
function isReadableStreamLike(value) {
|
|
868
|
+
if (!isObject(value))
|
|
869
|
+
return false;
|
|
870
|
+
return (typeof value['getReader'] === 'function' &&
|
|
871
|
+
typeof value['cancel'] === 'function' &&
|
|
872
|
+
typeof value['tee'] === 'function' &&
|
|
873
|
+
typeof value['locked'] === 'boolean');
|
|
874
|
+
}
|
|
875
|
+
function assertReadableStreamLike(stream, url, stage) {
|
|
876
|
+
if (isReadableStreamLike(stream))
|
|
877
|
+
return;
|
|
878
|
+
throw new FetchError('Invalid response stream', url, 500, {
|
|
879
|
+
reason: 'invalid_stream',
|
|
880
|
+
stage,
|
|
881
|
+
});
|
|
882
|
+
}
|
|
883
|
+
function toNodeReadableStream(stream, url, stage) {
|
|
884
|
+
assertReadableStreamLike(stream, url, stage);
|
|
885
|
+
return stream;
|
|
886
|
+
}
|
|
887
|
+
function toWebReadableStream(stream, url, stage) {
|
|
888
|
+
const converted = Readable.toWeb(stream);
|
|
889
|
+
assertReadableStreamLike(converted, url, stage);
|
|
890
|
+
return converted;
|
|
891
|
+
}
|
|
892
|
+
const fetchChannel = diagnosticsChannel.channel('fetch-url-mcp.fetch');
|
|
893
|
+
const SLOW_REQUEST_THRESHOLD_MS = 5000;
|
|
894
|
+
class FetchTelemetry {
|
|
895
|
+
logger;
|
|
896
|
+
context;
|
|
897
|
+
redactor;
|
|
898
|
+
constructor(logger, context, redactor) {
|
|
899
|
+
this.logger = logger;
|
|
900
|
+
this.context = context;
|
|
901
|
+
this.redactor = redactor;
|
|
1326
902
|
}
|
|
1327
|
-
|
|
1328
|
-
|
|
903
|
+
redact(url) {
|
|
904
|
+
return this.redactor.redact(url);
|
|
905
|
+
}
|
|
906
|
+
contextFields(ctx) {
|
|
907
|
+
return {
|
|
908
|
+
...(ctx.contextRequestId
|
|
909
|
+
? { contextRequestId: ctx.contextRequestId }
|
|
910
|
+
: {}),
|
|
911
|
+
...(ctx.operationId ? { operationId: ctx.operationId } : {}),
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
start(url, method) {
|
|
915
|
+
const safeUrl = this.redactor.redact(url);
|
|
916
|
+
const contextRequestId = this.context.getRequestId();
|
|
917
|
+
const operationId = this.context.getOperationId();
|
|
918
|
+
const ctx = {
|
|
919
|
+
requestId: randomUUID(),
|
|
920
|
+
startTime: performance.now(),
|
|
921
|
+
url: safeUrl,
|
|
922
|
+
method: method.toUpperCase(),
|
|
923
|
+
};
|
|
924
|
+
if (contextRequestId)
|
|
925
|
+
ctx.contextRequestId = contextRequestId;
|
|
926
|
+
if (operationId)
|
|
927
|
+
ctx.operationId = operationId;
|
|
928
|
+
const ctxFields = this.contextFields(ctx);
|
|
929
|
+
this.publish({
|
|
930
|
+
v: 1,
|
|
931
|
+
type: 'start',
|
|
932
|
+
requestId: ctx.requestId,
|
|
933
|
+
method: ctx.method,
|
|
934
|
+
url: ctx.url,
|
|
935
|
+
...ctxFields,
|
|
936
|
+
});
|
|
937
|
+
this.logger.debug('HTTP Request', {
|
|
938
|
+
requestId: ctx.requestId,
|
|
939
|
+
method: ctx.method,
|
|
940
|
+
url: ctx.url,
|
|
941
|
+
...ctxFields,
|
|
942
|
+
});
|
|
943
|
+
return ctx;
|
|
944
|
+
}
|
|
945
|
+
recordResponse(context, response, contentSize) {
|
|
946
|
+
const duration = performance.now() - context.startTime;
|
|
947
|
+
const durationLabel = `${Math.round(duration)}ms`;
|
|
948
|
+
const ctxFields = this.contextFields(context);
|
|
949
|
+
this.publish({
|
|
950
|
+
v: 1,
|
|
951
|
+
type: 'end',
|
|
952
|
+
requestId: context.requestId,
|
|
953
|
+
status: response.status,
|
|
954
|
+
duration,
|
|
955
|
+
...ctxFields,
|
|
956
|
+
});
|
|
957
|
+
const contentType = response.headers.get('content-type') ?? undefined;
|
|
958
|
+
const contentLengthHeader = response.headers.get('content-length');
|
|
959
|
+
const size = contentLengthHeader ??
|
|
960
|
+
(contentSize === undefined ? undefined : String(contentSize));
|
|
961
|
+
this.logger.debug('HTTP Response', {
|
|
962
|
+
requestId: context.requestId,
|
|
963
|
+
status: response.status,
|
|
964
|
+
url: context.url,
|
|
965
|
+
duration: durationLabel,
|
|
966
|
+
...ctxFields,
|
|
967
|
+
...(contentType ? { contentType } : {}),
|
|
968
|
+
...(size ? { size } : {}),
|
|
969
|
+
});
|
|
970
|
+
if (duration > SLOW_REQUEST_THRESHOLD_MS) {
|
|
971
|
+
this.logger.warn('Slow HTTP request detected', {
|
|
972
|
+
requestId: context.requestId,
|
|
973
|
+
url: context.url,
|
|
974
|
+
duration: durationLabel,
|
|
975
|
+
...ctxFields,
|
|
976
|
+
});
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
recordError(context, error, status) {
|
|
980
|
+
const duration = performance.now() - context.startTime;
|
|
981
|
+
const err = toError(error);
|
|
982
|
+
const code = isSystemError(err) ? err.code : undefined;
|
|
983
|
+
const ctxFields = this.contextFields(context);
|
|
984
|
+
this.publish({
|
|
985
|
+
v: 1,
|
|
986
|
+
type: 'error',
|
|
987
|
+
requestId: context.requestId,
|
|
988
|
+
url: context.url,
|
|
989
|
+
error: err.message,
|
|
990
|
+
duration,
|
|
991
|
+
...(code !== undefined ? { code } : {}),
|
|
992
|
+
...(status !== undefined ? { status } : {}),
|
|
993
|
+
...ctxFields,
|
|
994
|
+
});
|
|
995
|
+
const logData = {
|
|
996
|
+
requestId: context.requestId,
|
|
997
|
+
url: context.url,
|
|
998
|
+
status,
|
|
999
|
+
code,
|
|
1000
|
+
error: err.message,
|
|
1001
|
+
...ctxFields,
|
|
1002
|
+
};
|
|
1003
|
+
if (status === 429) {
|
|
1004
|
+
this.logger.warn('HTTP Request Error', logData);
|
|
1005
|
+
return;
|
|
1006
|
+
}
|
|
1007
|
+
this.logger.error('HTTP Request Error', logData);
|
|
1008
|
+
}
|
|
1009
|
+
publish(event) {
|
|
1010
|
+
if (!fetchChannel.hasSubscribers)
|
|
1011
|
+
return;
|
|
1012
|
+
try {
|
|
1013
|
+
fetchChannel.publish(event);
|
|
1014
|
+
}
|
|
1015
|
+
catch {
|
|
1016
|
+
// Best-effort telemetry; never crash request path.
|
|
1017
|
+
}
|
|
1329
1018
|
}
|
|
1330
1019
|
}
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1020
|
+
const defaultLogger = {
|
|
1021
|
+
debug: logDebug,
|
|
1022
|
+
warn: logWarn,
|
|
1023
|
+
error: logError,
|
|
1024
|
+
};
|
|
1025
|
+
const defaultContext = {
|
|
1026
|
+
getRequestId,
|
|
1027
|
+
getOperationId,
|
|
1028
|
+
};
|
|
1029
|
+
const defaultRedactor = {
|
|
1030
|
+
redact: redactUrl,
|
|
1031
|
+
};
|
|
1032
|
+
const defaultFetch = (input, init) => globalThis.fetch(input, init);
|
|
1337
1033
|
class HttpFetcher {
|
|
1338
1034
|
fetcherConfig;
|
|
1339
1035
|
redirectFollower;
|
|
@@ -1387,6 +1083,29 @@ class HttpFetcher {
|
|
|
1387
1083
|
}
|
|
1388
1084
|
}
|
|
1389
1085
|
}
|
|
1086
|
+
const DEFAULT_HEADERS = {
|
|
1087
|
+
'User-Agent': config.fetcher.userAgent,
|
|
1088
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
1089
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
1090
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
1091
|
+
Connection: 'keep-alive',
|
|
1092
|
+
};
|
|
1093
|
+
function buildHeaders() {
|
|
1094
|
+
return DEFAULT_HEADERS;
|
|
1095
|
+
}
|
|
1096
|
+
function buildRequestSignal(timeoutMs, external) {
|
|
1097
|
+
if (timeoutMs <= 0)
|
|
1098
|
+
return external;
|
|
1099
|
+
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
1100
|
+
return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
|
|
1101
|
+
}
|
|
1102
|
+
function buildRequestInit(headers, signal) {
|
|
1103
|
+
return {
|
|
1104
|
+
method: 'GET',
|
|
1105
|
+
headers,
|
|
1106
|
+
...(signal ? { signal } : {}),
|
|
1107
|
+
};
|
|
1108
|
+
}
|
|
1390
1109
|
const ipBlocker = new IpBlocker(config.security);
|
|
1391
1110
|
const urlNormalizer = new UrlNormalizer(config.constants, config.security, ipBlocker, BLOCKED_HOST_SUFFIXES);
|
|
1392
1111
|
const rawUrlTransformer = new RawUrlTransformer(defaultLogger);
|
|
@@ -1394,7 +1113,6 @@ const dnsResolver = new SafeDnsResolver(ipBlocker, config.security, BLOCKED_HOST
|
|
|
1394
1113
|
const telemetry = new FetchTelemetry(defaultLogger, defaultContext, defaultRedactor);
|
|
1395
1114
|
const normalizeRedirectUrl = (url) => urlNormalizer.validateAndNormalize(url);
|
|
1396
1115
|
const dnsPreflight = createDnsPreflight(dnsResolver);
|
|
1397
|
-
// Redirect follower with per-hop DNS preflight.
|
|
1398
1116
|
const secureRedirectFollower = new RedirectFollower(defaultFetch, normalizeRedirectUrl, dnsPreflight);
|
|
1399
1117
|
const responseReader = new ResponseTextReader();
|
|
1400
1118
|
const httpFetcher = new HttpFetcher(config.fetcher, secureRedirectFollower, responseReader, telemetry);
|
|
@@ -1436,4 +1154,3 @@ export async function fetchNormalizedUrl(normalizedUrl, options) {
|
|
|
1436
1154
|
export async function fetchNormalizedUrlBuffer(normalizedUrl, options) {
|
|
1437
1155
|
return httpFetcher.fetchNormalizedUrlBuffer(normalizedUrl, options);
|
|
1438
1156
|
}
|
|
1439
|
-
//# sourceMappingURL=fetch.js.map
|