@j0hanz/fetch-url-mcp 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts +2 -3
- package/dist/cli.js +1 -2
- package/dist/http/auth.d.ts +5 -3
- package/dist/http/auth.js +64 -15
- package/dist/http/health.d.ts +1 -2
- package/dist/http/health.js +7 -18
- package/dist/http/helpers.d.ts +3 -4
- package/dist/http/helpers.js +21 -21
- package/dist/http/native.d.ts +0 -1
- package/dist/http/native.js +34 -26
- package/dist/http/rate-limit.d.ts +0 -1
- package/dist/http/rate-limit.js +3 -4
- package/dist/index.d.ts +0 -1
- package/dist/index.js +17 -18
- package/dist/lib/{markdown-cleanup.d.ts → content.d.ts} +4 -2
- package/dist/lib/content.js +1356 -0
- package/dist/lib/core.d.ts +253 -0
- package/dist/lib/core.js +1228 -0
- package/dist/lib/{tool-pipeline.d.ts → fetch-pipeline.d.ts} +1 -2
- package/dist/lib/{tool-pipeline.js → fetch-pipeline.js} +10 -19
- package/dist/lib/{fetch.d.ts → http.d.ts} +7 -9
- package/dist/lib/{fetch.js → http.js} +706 -944
- package/dist/lib/mcp-tools.d.ts +28 -0
- package/dist/lib/mcp-tools.js +107 -0
- package/dist/lib/{tool-progress.d.ts → progress.d.ts} +0 -1
- package/dist/lib/{tool-progress.js → progress.js} +8 -13
- package/dist/lib/task-handlers.d.ts +5 -0
- package/dist/lib/{mcp.js → task-handlers.js} +56 -12
- package/dist/lib/url.d.ts +70 -0
- package/dist/lib/url.js +686 -0
- package/dist/lib/utils.d.ts +58 -0
- package/dist/lib/utils.js +304 -0
- package/dist/prompts/index.d.ts +0 -1
- package/dist/prompts/index.js +0 -1
- package/dist/resources/index.d.ts +0 -1
- package/dist/resources/index.js +74 -33
- package/dist/resources/instructions.d.ts +0 -1
- package/dist/resources/instructions.js +2 -2
- package/dist/schemas/inputs.d.ts +0 -1
- package/dist/schemas/inputs.js +2 -3
- package/dist/schemas/outputs.d.ts +0 -1
- package/dist/schemas/outputs.js +1 -2
- package/dist/server.d.ts +0 -1
- package/dist/server.js +16 -26
- package/dist/tasks/execution.d.ts +0 -1
- package/dist/tasks/execution.js +27 -24
- package/dist/tasks/manager.d.ts +7 -3
- package/dist/tasks/manager.js +53 -34
- package/dist/tasks/owner.d.ts +1 -2
- package/dist/tasks/owner.js +1 -2
- package/dist/tasks/tool-registry.d.ts +1 -2
- package/dist/tasks/tool-registry.js +0 -1
- package/dist/tools/fetch-url.d.ts +1 -2
- package/dist/tools/fetch-url.js +39 -31
- package/dist/tools/index.d.ts +0 -1
- package/dist/tools/index.js +0 -1
- package/dist/transform/html-translators.d.ts +1 -0
- package/dist/transform/html-translators.js +454 -0
- package/dist/transform/metadata.d.ts +4 -0
- package/dist/transform/metadata.js +183 -0
- package/dist/transform/transform.d.ts +0 -1
- package/dist/transform/transform.js +24 -641
- package/dist/transform/types.d.ts +9 -11
- package/dist/transform/types.js +0 -1
- package/dist/transform/worker-pool.d.ts +0 -1
- package/dist/transform/worker-pool.js +7 -16
- package/dist/transform/workers/shared.d.ts +0 -1
- package/dist/transform/workers/shared.js +1 -2
- package/dist/transform/workers/transform-child.d.ts +0 -1
- package/dist/transform/workers/transform-child.js +0 -1
- package/dist/transform/workers/transform-worker.d.ts +0 -1
- package/dist/transform/workers/transform-worker.js +0 -1
- package/package.json +6 -3
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/http/auth.d.ts.map +0 -1
- package/dist/http/auth.js.map +0 -1
- package/dist/http/health.d.ts.map +0 -1
- package/dist/http/health.js.map +0 -1
- package/dist/http/helpers.d.ts.map +0 -1
- package/dist/http/helpers.js.map +0 -1
- package/dist/http/native.d.ts.map +0 -1
- package/dist/http/native.js.map +0 -1
- package/dist/http/rate-limit.d.ts.map +0 -1
- package/dist/http/rate-limit.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/lib/cache.d.ts +0 -54
- package/dist/lib/cache.d.ts.map +0 -1
- package/dist/lib/cache.js +0 -264
- package/dist/lib/cache.js.map +0 -1
- package/dist/lib/config.d.ts +0 -143
- package/dist/lib/config.d.ts.map +0 -1
- package/dist/lib/config.js +0 -476
- package/dist/lib/config.js.map +0 -1
- package/dist/lib/crypto.d.ts +0 -4
- package/dist/lib/crypto.d.ts.map +0 -1
- package/dist/lib/crypto.js +0 -56
- package/dist/lib/crypto.js.map +0 -1
- package/dist/lib/dom-noise-removal.d.ts +0 -2
- package/dist/lib/dom-noise-removal.d.ts.map +0 -1
- package/dist/lib/dom-noise-removal.js +0 -494
- package/dist/lib/dom-noise-removal.js.map +0 -1
- package/dist/lib/download.d.ts +0 -4
- package/dist/lib/download.d.ts.map +0 -1
- package/dist/lib/download.js +0 -106
- package/dist/lib/download.js.map +0 -1
- package/dist/lib/errors.d.ts +0 -14
- package/dist/lib/errors.d.ts.map +0 -1
- package/dist/lib/errors.js +0 -72
- package/dist/lib/errors.js.map +0 -1
- package/dist/lib/fetch-content.d.ts +0 -5
- package/dist/lib/fetch-content.d.ts.map +0 -1
- package/dist/lib/fetch-content.js +0 -164
- package/dist/lib/fetch-content.js.map +0 -1
- package/dist/lib/fetch-stream.d.ts +0 -5
- package/dist/lib/fetch-stream.d.ts.map +0 -1
- package/dist/lib/fetch-stream.js +0 -29
- package/dist/lib/fetch-stream.js.map +0 -1
- package/dist/lib/fetch.d.ts.map +0 -1
- package/dist/lib/fetch.js.map +0 -1
- package/dist/lib/host-normalization.d.ts +0 -2
- package/dist/lib/host-normalization.d.ts.map +0 -1
- package/dist/lib/host-normalization.js +0 -91
- package/dist/lib/host-normalization.js.map +0 -1
- package/dist/lib/ip-blocklist.d.ts +0 -9
- package/dist/lib/ip-blocklist.d.ts.map +0 -1
- package/dist/lib/ip-blocklist.js +0 -79
- package/dist/lib/ip-blocklist.js.map +0 -1
- package/dist/lib/json.d.ts +0 -2
- package/dist/lib/json.d.ts.map +0 -1
- package/dist/lib/json.js +0 -45
- package/dist/lib/json.js.map +0 -1
- package/dist/lib/language-detection.d.ts +0 -3
- package/dist/lib/language-detection.d.ts.map +0 -1
- package/dist/lib/language-detection.js +0 -355
- package/dist/lib/language-detection.js.map +0 -1
- package/dist/lib/markdown-cleanup.d.ts.map +0 -1
- package/dist/lib/markdown-cleanup.js +0 -532
- package/dist/lib/markdown-cleanup.js.map +0 -1
- package/dist/lib/mcp-lifecycle.d.ts +0 -5
- package/dist/lib/mcp-lifecycle.d.ts.map +0 -1
- package/dist/lib/mcp-lifecycle.js +0 -51
- package/dist/lib/mcp-lifecycle.js.map +0 -1
- package/dist/lib/mcp-validator.d.ts +0 -17
- package/dist/lib/mcp-validator.d.ts.map +0 -1
- package/dist/lib/mcp-validator.js +0 -45
- package/dist/lib/mcp-validator.js.map +0 -1
- package/dist/lib/mcp.d.ts +0 -4
- package/dist/lib/mcp.d.ts.map +0 -1
- package/dist/lib/mcp.js.map +0 -1
- package/dist/lib/observability.d.ts +0 -23
- package/dist/lib/observability.d.ts.map +0 -1
- package/dist/lib/observability.js +0 -238
- package/dist/lib/observability.js.map +0 -1
- package/dist/lib/server-tuning.d.ts +0 -15
- package/dist/lib/server-tuning.d.ts.map +0 -1
- package/dist/lib/server-tuning.js +0 -49
- package/dist/lib/server-tuning.js.map +0 -1
- package/dist/lib/session.d.ts +0 -45
- package/dist/lib/session.d.ts.map +0 -1
- package/dist/lib/session.js +0 -263
- package/dist/lib/session.js.map +0 -1
- package/dist/lib/timer-utils.d.ts +0 -13
- package/dist/lib/timer-utils.d.ts.map +0 -1
- package/dist/lib/timer-utils.js +0 -44
- package/dist/lib/timer-utils.js.map +0 -1
- package/dist/lib/tool-errors.d.ts +0 -12
- package/dist/lib/tool-errors.d.ts.map +0 -1
- package/dist/lib/tool-errors.js +0 -55
- package/dist/lib/tool-errors.js.map +0 -1
- package/dist/lib/tool-pipeline.d.ts.map +0 -1
- package/dist/lib/tool-pipeline.js.map +0 -1
- package/dist/lib/tool-progress.d.ts.map +0 -1
- package/dist/lib/tool-progress.js.map +0 -1
- package/dist/lib/type-guards.d.ts +0 -16
- package/dist/lib/type-guards.d.ts.map +0 -1
- package/dist/lib/type-guards.js +0 -13
- package/dist/lib/type-guards.js.map +0 -1
- package/dist/prompts/index.d.ts.map +0 -1
- package/dist/prompts/index.js.map +0 -1
- package/dist/resources/index.d.ts.map +0 -1
- package/dist/resources/index.js.map +0 -1
- package/dist/resources/instructions.d.ts.map +0 -1
- package/dist/resources/instructions.js.map +0 -1
- package/dist/schemas/inputs.d.ts.map +0 -1
- package/dist/schemas/inputs.js.map +0 -1
- package/dist/schemas/outputs.d.ts.map +0 -1
- package/dist/schemas/outputs.js.map +0 -1
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js.map +0 -1
- package/dist/tasks/execution.d.ts.map +0 -1
- package/dist/tasks/execution.js.map +0 -1
- package/dist/tasks/manager.d.ts.map +0 -1
- package/dist/tasks/manager.js.map +0 -1
- package/dist/tasks/owner.d.ts.map +0 -1
- package/dist/tasks/owner.js.map +0 -1
- package/dist/tasks/tool-registry.d.ts.map +0 -1
- package/dist/tasks/tool-registry.js.map +0 -1
- package/dist/tools/fetch-url.d.ts.map +0 -1
- package/dist/tools/fetch-url.js.map +0 -1
- package/dist/tools/index.d.ts.map +0 -1
- package/dist/tools/index.js.map +0 -1
- package/dist/transform/transform.d.ts.map +0 -1
- package/dist/transform/transform.js.map +0 -1
- package/dist/transform/types.d.ts.map +0 -1
- package/dist/transform/types.js.map +0 -1
- package/dist/transform/worker-pool.d.ts.map +0 -1
- package/dist/transform/worker-pool.js.map +0 -1
- package/dist/transform/workers/shared.d.ts.map +0 -1
- package/dist/transform/workers/shared.js.map +0 -1
- package/dist/transform/workers/transform-child.d.ts.map +0 -1
- package/dist/transform/workers/transform-child.js.map +0 -1
- package/dist/transform/workers/transform-worker.d.ts.map +0 -1
- package/dist/transform/workers/transform-worker.js.map +0 -1
|
@@ -1,537 +1,277 @@
|
|
|
1
1
|
import { Buffer } from 'node:buffer';
|
|
2
2
|
import { randomUUID } from 'node:crypto';
|
|
3
3
|
import diagnosticsChannel from 'node:diagnostics_channel';
|
|
4
|
-
import
|
|
4
|
+
import {} from 'node:http';
|
|
5
5
|
import { isIP } from 'node:net';
|
|
6
|
+
import { posix as pathPosix } from 'node:path';
|
|
6
7
|
import { performance } from 'node:perf_hooks';
|
|
7
8
|
import { PassThrough, Readable, Transform } from 'node:stream';
|
|
8
9
|
import { buffer as consumeBuffer } from 'node:stream/consumers';
|
|
9
10
|
import { finished, pipeline } from 'node:stream/promises';
|
|
11
|
+
import {} from 'node:stream/web';
|
|
10
12
|
import tls from 'node:tls';
|
|
11
13
|
import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
|
|
12
14
|
import { Agent } from 'undici';
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
warn: logWarn,
|
|
23
|
-
error: logError,
|
|
24
|
-
};
|
|
25
|
-
const defaultContext = {
|
|
26
|
-
getRequestId,
|
|
27
|
-
getOperationId,
|
|
15
|
+
import { z } from 'zod';
|
|
16
|
+
import { get as cacheGet, config, getOperationId, getRequestId, logDebug, logError, logWarn, parseCachedPayload, redactUrl, resolveCachedPayloadContent, } from './core.js';
|
|
17
|
+
import { BLOCKED_HOST_SUFFIXES, createDnsPreflight, IpBlocker, RawUrlTransformer, SafeDnsResolver, UrlNormalizer, VALIDATION_ERROR_CODE, } from './url.js';
|
|
18
|
+
import { createErrorWithCode, FetchError, isError, isObject, isSystemError, toError, } from './utils.js';
|
|
19
|
+
const FILENAME_RULES = {
|
|
20
|
+
MAX_LEN: 200,
|
|
21
|
+
UNSAFE_CHARS: /[<>:"/\\|?*\p{C}]/gu,
|
|
22
|
+
WHITESPACE: /\s+/g,
|
|
23
|
+
EXTENSIONS: /\.(html?|php|aspx?|jsp)$/i,
|
|
28
24
|
};
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class IpBlocker {
|
|
37
|
-
security;
|
|
38
|
-
blockList = createDefaultBlockList();
|
|
39
|
-
constructor(security) {
|
|
40
|
-
this.security = security;
|
|
41
|
-
}
|
|
42
|
-
isBlockedIp(candidate) {
|
|
43
|
-
const normalized = candidate.trim().toLowerCase();
|
|
44
|
-
if (isCloudMetadataHost(normalized))
|
|
45
|
-
return true;
|
|
46
|
-
if (isLocalFetchAllowed())
|
|
47
|
-
return false;
|
|
48
|
-
if (!normalized)
|
|
49
|
-
return false;
|
|
50
|
-
if (this.security.blockedHosts.has(normalized))
|
|
51
|
-
return true;
|
|
52
|
-
const normalizedIp = normalizeIpForBlockList(normalized);
|
|
53
|
-
return normalizedIp
|
|
54
|
-
? this.blockList.check(normalizedIp.ip, normalizedIp.family)
|
|
55
|
-
: false;
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
|
|
59
|
-
function createValidationError(message) {
|
|
60
|
-
return createErrorWithCode(message, VALIDATION_ERROR_CODE);
|
|
61
|
-
}
|
|
62
|
-
const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
|
|
63
|
-
// This list is not exhaustive but covers the most common cloud metadata endpoints.
|
|
64
|
-
const CLOUD_METADATA_HOSTS = new Set([
|
|
65
|
-
'169.254.169.254', // AWS / GCP / Azure
|
|
66
|
-
'metadata.google.internal', // GCP
|
|
67
|
-
'100.100.100.200', // Alibaba Cloud
|
|
68
|
-
'fd00:ec2::254', // AWS IPv6
|
|
69
|
-
]);
|
|
70
|
-
function isCloudMetadataHost(hostname) {
|
|
71
|
-
const lowered = hostname.toLowerCase();
|
|
72
|
-
if (CLOUD_METADATA_HOSTS.has(lowered))
|
|
73
|
-
return true;
|
|
74
|
-
const normalized = normalizeIpForBlockList(lowered);
|
|
75
|
-
return normalized !== null && CLOUD_METADATA_HOSTS.has(normalized.ip);
|
|
76
|
-
}
|
|
77
|
-
class UrlNormalizer {
|
|
78
|
-
constants;
|
|
79
|
-
security;
|
|
80
|
-
ipBlocker;
|
|
81
|
-
blockedHostSuffixes;
|
|
82
|
-
constructor(constants, security, ipBlocker, blockedHostSuffixes) {
|
|
83
|
-
this.constants = constants;
|
|
84
|
-
this.security = security;
|
|
85
|
-
this.ipBlocker = ipBlocker;
|
|
86
|
-
this.blockedHostSuffixes = blockedHostSuffixes;
|
|
87
|
-
}
|
|
88
|
-
normalize(urlString) {
|
|
89
|
-
const trimmedUrl = this.requireTrimmedUrl(urlString);
|
|
90
|
-
if (trimmedUrl.length > this.constants.maxUrlLength) {
|
|
91
|
-
throw createValidationError(`URL exceeds maximum length of ${this.constants.maxUrlLength} characters`);
|
|
92
|
-
}
|
|
93
|
-
let url;
|
|
94
|
-
try {
|
|
95
|
-
url = new URL(trimmedUrl);
|
|
96
|
-
}
|
|
97
|
-
catch {
|
|
98
|
-
throw createValidationError('Invalid URL format');
|
|
99
|
-
}
|
|
100
|
-
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
101
|
-
throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
|
|
102
|
-
}
|
|
103
|
-
if (url.username || url.password) {
|
|
104
|
-
throw createValidationError('URLs with embedded credentials are not allowed');
|
|
105
|
-
}
|
|
106
|
-
const hostname = this.normalizeHostname(url);
|
|
107
|
-
this.assertHostnameAllowed(hostname);
|
|
108
|
-
url.hostname = hostname;
|
|
109
|
-
return { normalizedUrl: url.href, hostname };
|
|
110
|
-
}
|
|
111
|
-
validateAndNormalize(urlString) {
|
|
112
|
-
return this.normalize(urlString).normalizedUrl;
|
|
113
|
-
}
|
|
114
|
-
requireTrimmedUrl(urlString) {
|
|
115
|
-
if (!urlString || typeof urlString !== 'string') {
|
|
116
|
-
throw createValidationError('URL is required');
|
|
117
|
-
}
|
|
118
|
-
const trimmed = urlString.trim();
|
|
119
|
-
if (!trimmed)
|
|
120
|
-
throw createValidationError('URL cannot be empty');
|
|
121
|
-
return trimmed;
|
|
122
|
-
}
|
|
123
|
-
normalizeHostname(url) {
|
|
124
|
-
const hostname = url.hostname.toLowerCase().replace(/\.+$/, '');
|
|
125
|
-
if (!hostname) {
|
|
126
|
-
throw createValidationError('URL must have a valid hostname');
|
|
127
|
-
}
|
|
128
|
-
return hostname;
|
|
129
|
-
}
|
|
130
|
-
assertHostnameAllowed(hostname) {
|
|
131
|
-
if (isCloudMetadataHost(hostname)) {
|
|
132
|
-
throw createValidationError(`Blocked host: ${hostname}. Cloud metadata endpoints are not allowed`);
|
|
133
|
-
}
|
|
134
|
-
if (!isLocalFetchAllowed()) {
|
|
135
|
-
if (this.security.blockedHosts.has(hostname)) {
|
|
136
|
-
throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
|
|
137
|
-
}
|
|
138
|
-
if (this.ipBlocker.isBlockedIp(hostname)) {
|
|
139
|
-
throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
if (this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix))) {
|
|
143
|
-
throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
|
|
144
|
-
}
|
|
145
|
-
}
|
|
25
|
+
function sanitizeString(input) {
|
|
26
|
+
return input
|
|
27
|
+
.toLowerCase()
|
|
28
|
+
.replace(FILENAME_RULES.UNSAFE_CHARS, '')
|
|
29
|
+
.replace(FILENAME_RULES.WHITESPACE, '-')
|
|
30
|
+
.replace(/-+/g, '-')
|
|
31
|
+
.replace(/(?:^-|-$)/g, '');
|
|
146
32
|
}
|
|
147
|
-
function
|
|
148
|
-
const
|
|
149
|
-
if (
|
|
33
|
+
function resolveUrlFilenameCandidate(url) {
|
|
34
|
+
const parsed = new URL(url);
|
|
35
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
36
|
+
return null;
|
|
37
|
+
const basename = pathPosix.basename(parsed.pathname);
|
|
38
|
+
if (!basename || basename === 'index')
|
|
150
39
|
return null;
|
|
151
|
-
|
|
40
|
+
const cleaned = basename.replace(FILENAME_RULES.EXTENSIONS, '');
|
|
41
|
+
const sanitized = sanitizeString(cleaned);
|
|
42
|
+
if (sanitized === 'index')
|
|
152
43
|
return null;
|
|
153
|
-
return
|
|
44
|
+
return sanitized || null;
|
|
154
45
|
}
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
protocol: 'http{s}?',
|
|
162
|
-
hostname: 'gist.github.com',
|
|
163
|
-
pathname: '/:user/:gistId',
|
|
164
|
-
});
|
|
165
|
-
const GITHUB_GIST_RAW_PATTERN = new URLPattern({
|
|
166
|
-
protocol: 'http{s}?',
|
|
167
|
-
hostname: 'gist.github.com',
|
|
168
|
-
pathname: '/:user/:gistId/raw/:filePath+',
|
|
169
|
-
});
|
|
170
|
-
const GITLAB_BLOB_PATTERNS = [
|
|
171
|
-
new URLPattern({
|
|
172
|
-
protocol: 'http{s}?',
|
|
173
|
-
hostname: 'gitlab.com',
|
|
174
|
-
pathname: '/:base+/-/blob/:branch/:path+',
|
|
175
|
-
}),
|
|
176
|
-
new URLPattern({
|
|
177
|
-
protocol: 'http{s}?',
|
|
178
|
-
hostname: '*:sub.gitlab.com',
|
|
179
|
-
pathname: '/:base+/-/blob/:branch/:path+',
|
|
180
|
-
}),
|
|
181
|
-
];
|
|
182
|
-
const BITBUCKET_SRC_PATTERN = new URLPattern({
|
|
183
|
-
protocol: 'http{s}?',
|
|
184
|
-
hostname: '{:sub.}?bitbucket.org',
|
|
185
|
-
pathname: '/:owner/:repo/src/:branch/:path+',
|
|
186
|
-
});
|
|
187
|
-
const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
|
|
188
|
-
const RAW_TEXT_EXTENSIONS = new Set([
|
|
189
|
-
'.md',
|
|
190
|
-
'.markdown',
|
|
191
|
-
'.txt',
|
|
192
|
-
'.json',
|
|
193
|
-
'.yaml',
|
|
194
|
-
'.yml',
|
|
195
|
-
'.toml',
|
|
196
|
-
'.xml',
|
|
197
|
-
'.csv',
|
|
198
|
-
'.rst',
|
|
199
|
-
'.adoc',
|
|
200
|
-
'.org',
|
|
201
|
-
]);
|
|
202
|
-
class RawUrlTransformer {
|
|
203
|
-
logger;
|
|
204
|
-
constructor(logger) {
|
|
205
|
-
this.logger = logger;
|
|
206
|
-
}
|
|
207
|
-
transformToRawUrl(url) {
|
|
208
|
-
if (!url)
|
|
209
|
-
return { url, transformed: false };
|
|
210
|
-
if (this.isRawUrl(url))
|
|
211
|
-
return { url, transformed: false };
|
|
212
|
-
let base;
|
|
213
|
-
let hash;
|
|
214
|
-
let parsed;
|
|
215
|
-
try {
|
|
216
|
-
parsed = new URL(url);
|
|
217
|
-
base = parsed.origin + parsed.pathname;
|
|
218
|
-
({ hash } = parsed);
|
|
219
|
-
}
|
|
220
|
-
catch {
|
|
221
|
-
({ base, hash } = this.splitParams(url));
|
|
222
|
-
}
|
|
223
|
-
const match = this.tryTransformWithUrl(base, hash, parsed);
|
|
224
|
-
if (!match)
|
|
225
|
-
return { url, transformed: false };
|
|
226
|
-
this.logger.debug('URL transformed to raw content URL', {
|
|
227
|
-
platform: match.platform,
|
|
228
|
-
original: url.substring(0, 100),
|
|
229
|
-
transformed: match.url.substring(0, 100),
|
|
230
|
-
});
|
|
231
|
-
return { url: match.url, transformed: true, platform: match.platform };
|
|
232
|
-
}
|
|
233
|
-
isRawTextContentUrl(urlString) {
|
|
234
|
-
if (!urlString)
|
|
235
|
-
return false;
|
|
236
|
-
if (this.isRawUrl(urlString))
|
|
237
|
-
return true;
|
|
238
|
-
try {
|
|
239
|
-
const url = new URL(urlString);
|
|
240
|
-
const pathname = url.pathname.toLowerCase();
|
|
241
|
-
const lastDot = pathname.lastIndexOf('.');
|
|
242
|
-
if (lastDot === -1)
|
|
243
|
-
return false;
|
|
244
|
-
return RAW_TEXT_EXTENSIONS.has(pathname.slice(lastDot));
|
|
245
|
-
}
|
|
246
|
-
catch {
|
|
247
|
-
const { base } = this.splitParams(urlString);
|
|
248
|
-
const lowerBase = base.toLowerCase();
|
|
249
|
-
const lastDot = lowerBase.lastIndexOf('.');
|
|
250
|
-
if (lastDot === -1)
|
|
251
|
-
return false;
|
|
252
|
-
return RAW_TEXT_EXTENSIONS.has(lowerBase.slice(lastDot));
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
isRawUrl(url) {
|
|
256
|
-
const lower = url.toLowerCase();
|
|
257
|
-
return (lower.includes('raw.githubusercontent.com') ||
|
|
258
|
-
lower.includes('gist.githubusercontent.com') ||
|
|
259
|
-
lower.includes('/-/raw/') ||
|
|
260
|
-
BITBUCKET_RAW_RE.test(lower));
|
|
261
|
-
}
|
|
262
|
-
splitParams(urlString) {
|
|
263
|
-
const hashIndex = urlString.indexOf('#');
|
|
264
|
-
const queryIndex = urlString.indexOf('?');
|
|
265
|
-
const endIndex = Math.min(queryIndex === -1 ? urlString.length : queryIndex, hashIndex === -1 ? urlString.length : hashIndex);
|
|
266
|
-
const hash = hashIndex !== -1 ? urlString.slice(hashIndex) : '';
|
|
267
|
-
return { base: urlString.slice(0, endIndex), hash };
|
|
268
|
-
}
|
|
269
|
-
tryTransformWithUrl(base, hash, preParsed) {
|
|
270
|
-
let parsed = preParsed ?? null;
|
|
271
|
-
if (!parsed) {
|
|
272
|
-
try {
|
|
273
|
-
parsed = new URL(base);
|
|
274
|
-
}
|
|
275
|
-
catch {
|
|
276
|
-
// Ignore invalid URLs
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
if (!parsed)
|
|
280
|
-
return null;
|
|
281
|
-
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
282
|
-
return null;
|
|
283
|
-
const gist = this.transformGithubGist(base, hash);
|
|
284
|
-
if (gist)
|
|
285
|
-
return gist;
|
|
286
|
-
const github = this.transformGithubBlob(base);
|
|
287
|
-
if (github)
|
|
288
|
-
return github;
|
|
289
|
-
const gitlab = this.transformGitLab(base, parsed.origin);
|
|
290
|
-
if (gitlab)
|
|
291
|
-
return gitlab;
|
|
292
|
-
const bitbucket = this.transformBitbucket(base, parsed.origin);
|
|
293
|
-
if (bitbucket)
|
|
294
|
-
return bitbucket;
|
|
46
|
+
function truncateFilenameBase(name, extension) {
|
|
47
|
+
const maxBase = FILENAME_RULES.MAX_LEN - extension.length;
|
|
48
|
+
return name.length > maxBase ? name.substring(0, maxBase) : name;
|
|
49
|
+
}
|
|
50
|
+
function resolveTitleFilenameCandidate(title) {
|
|
51
|
+
if (!title)
|
|
295
52
|
return null;
|
|
53
|
+
return sanitizeString(title) || null;
|
|
54
|
+
}
|
|
55
|
+
function resolveFilenameBase(url, title, hashFallback) {
|
|
56
|
+
try {
|
|
57
|
+
const fromUrl = resolveUrlFilenameCandidate(url);
|
|
58
|
+
if (fromUrl)
|
|
59
|
+
return fromUrl;
|
|
296
60
|
}
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
const match = GITHUB_GIST_PATTERN.exec(url);
|
|
329
|
-
if (!match)
|
|
330
|
-
return null;
|
|
331
|
-
const groups = match.pathname.groups;
|
|
332
|
-
const user = getPatternGroup(groups, 'user');
|
|
333
|
-
const gistId = getPatternGroup(groups, 'gistId');
|
|
334
|
-
if (!user || !gistId)
|
|
335
|
-
return null;
|
|
336
|
-
let filePath = '';
|
|
337
|
-
if (hash.startsWith('#file-')) {
|
|
338
|
-
const filename = hash.slice('#file-'.length).replace(/-/g, '.');
|
|
339
|
-
if (filename)
|
|
340
|
-
filePath = `/${filename}`;
|
|
341
|
-
}
|
|
342
|
-
return {
|
|
343
|
-
url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`,
|
|
344
|
-
platform: 'github-gist',
|
|
345
|
-
};
|
|
61
|
+
catch {
|
|
62
|
+
// Ignore URL parsing errors and continue fallbacks.
|
|
63
|
+
}
|
|
64
|
+
const fromTitle = resolveTitleFilenameCandidate(title);
|
|
65
|
+
if (fromTitle)
|
|
66
|
+
return fromTitle;
|
|
67
|
+
if (hashFallback)
|
|
68
|
+
return hashFallback.substring(0, 16);
|
|
69
|
+
return `download-${Date.now()}`;
|
|
70
|
+
}
|
|
71
|
+
export function generateSafeFilename(url, title, hashFallback, extension = '.md') {
|
|
72
|
+
const name = resolveFilenameBase(url, title, hashFallback);
|
|
73
|
+
return `${truncateFilenameBase(name, extension)}${extension}`;
|
|
74
|
+
}
|
|
75
|
+
const DownloadParamsSchema = z.strictObject({
|
|
76
|
+
namespace: z.literal('markdown'),
|
|
77
|
+
hash: z
|
|
78
|
+
.string()
|
|
79
|
+
.regex(/^[a-f0-9.]+$/i)
|
|
80
|
+
.min(8)
|
|
81
|
+
.max(64),
|
|
82
|
+
});
|
|
83
|
+
function writeJsonError(res, status, message, code) {
|
|
84
|
+
res.writeHead(status, { 'Content-Type': 'application/json' });
|
|
85
|
+
res.end(JSON.stringify({ error: message, code }));
|
|
86
|
+
}
|
|
87
|
+
export function handleDownload(res, namespace, hash) {
|
|
88
|
+
const parsed = DownloadParamsSchema.safeParse({ namespace, hash });
|
|
89
|
+
if (!parsed.success) {
|
|
90
|
+
writeJsonError(res, 400, 'Invalid namespace or hash', 'BAD_REQUEST');
|
|
91
|
+
return;
|
|
346
92
|
}
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
const groups = match.pathname.groups;
|
|
353
|
-
const base = getPatternGroup(groups, 'base');
|
|
354
|
-
const branch = getPatternGroup(groups, 'branch');
|
|
355
|
-
const path = getPatternGroup(groups, 'path');
|
|
356
|
-
if (!base || !branch || !path)
|
|
357
|
-
return null;
|
|
358
|
-
return {
|
|
359
|
-
url: `${origin}/${base}/-/raw/${branch}/${path}`,
|
|
360
|
-
platform: 'gitlab',
|
|
361
|
-
};
|
|
362
|
-
}
|
|
363
|
-
return null;
|
|
93
|
+
const cacheKey = `${parsed.data.namespace}:${parsed.data.hash}`;
|
|
94
|
+
const entry = cacheGet(cacheKey, { force: true });
|
|
95
|
+
if (!entry) {
|
|
96
|
+
writeJsonError(res, 404, 'Not found or expired', 'NOT_FOUND');
|
|
97
|
+
return;
|
|
364
98
|
}
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
const owner = getPatternGroup(groups, 'owner');
|
|
371
|
-
const repo = getPatternGroup(groups, 'repo');
|
|
372
|
-
const branch = getPatternGroup(groups, 'branch');
|
|
373
|
-
const path = getPatternGroup(groups, 'path');
|
|
374
|
-
if (!owner || !repo || !branch || !path)
|
|
375
|
-
return null;
|
|
376
|
-
return {
|
|
377
|
-
url: `${origin}/${owner}/${repo}/raw/${branch}/${path}`,
|
|
378
|
-
platform: 'bitbucket',
|
|
379
|
-
};
|
|
99
|
+
const payload = parseCachedPayload(entry.content);
|
|
100
|
+
const content = payload ? resolveCachedPayloadContent(payload) : null;
|
|
101
|
+
if (!content) {
|
|
102
|
+
writeJsonError(res, 404, 'Content missing', 'NOT_FOUND');
|
|
103
|
+
return;
|
|
380
104
|
}
|
|
105
|
+
const fileName = generateSafeFilename(entry.url, payload?.title, parsed.data.hash);
|
|
106
|
+
// Safe header generation — RFC 5987 encoding for non-ASCII filenames
|
|
107
|
+
const encoded = encodeURIComponent(fileName).replace(/'/g, '%27');
|
|
108
|
+
res.setHeader('Content-Type', 'text/markdown; charset=utf-8');
|
|
109
|
+
res.setHeader('Content-Disposition', `attachment; filename="${fileName}"; filename*=UTF-8''${encoded}`);
|
|
110
|
+
res.setHeader('Cache-Control', `private, max-age=${config.cache.ttl}`);
|
|
111
|
+
res.setHeader('X-Content-Type-Options', 'nosniff');
|
|
112
|
+
res.end(content);
|
|
381
113
|
}
|
|
382
|
-
const
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
return;
|
|
401
|
-
try {
|
|
402
|
-
signal.removeEventListener('abort', abortListener);
|
|
403
|
-
}
|
|
404
|
-
catch {
|
|
405
|
-
// Ignore listener cleanup failures; they are non-fatal by design.
|
|
406
|
-
}
|
|
407
|
-
abortListener = null;
|
|
408
|
-
};
|
|
409
|
-
return { abortPromise, cleanup };
|
|
410
|
-
}
|
|
411
|
-
async function withTimeout(promise, timeoutMs, onTimeout, signal, onAbort) {
|
|
412
|
-
const timeoutSignal = timeoutMs > 0 ? AbortSignal.timeout(timeoutMs) : undefined;
|
|
413
|
-
const raceSignal = signal && timeoutSignal
|
|
414
|
-
? AbortSignal.any([signal, timeoutSignal])
|
|
415
|
-
: (signal ?? timeoutSignal);
|
|
416
|
-
if (!raceSignal)
|
|
417
|
-
return promise;
|
|
418
|
-
const abortRace = createSignalAbortRace(raceSignal, () => signal?.aborted === true, onTimeout, onAbort ?? (() => new Error('Request was canceled')));
|
|
114
|
+
const UTF8_ENCODING = 'utf-8';
|
|
115
|
+
function getCharsetFromContentType(contentType) {
|
|
116
|
+
if (!contentType)
|
|
117
|
+
return undefined;
|
|
118
|
+
const match = /charset=([^;]+)/i.exec(contentType);
|
|
119
|
+
const charsetGroup = match?.[1];
|
|
120
|
+
if (!charsetGroup)
|
|
121
|
+
return undefined;
|
|
122
|
+
let charset = charsetGroup.trim();
|
|
123
|
+
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
124
|
+
charset = charset.slice(1, -1);
|
|
125
|
+
}
|
|
126
|
+
return charset.trim();
|
|
127
|
+
}
|
|
128
|
+
function createDecoder(encoding) {
|
|
129
|
+
const fallback = () => new TextDecoder(UTF8_ENCODING);
|
|
130
|
+
if (!encoding)
|
|
131
|
+
return fallback();
|
|
419
132
|
try {
|
|
420
|
-
return
|
|
133
|
+
return new TextDecoder(encoding);
|
|
421
134
|
}
|
|
422
|
-
|
|
423
|
-
|
|
135
|
+
catch {
|
|
136
|
+
return fallback();
|
|
424
137
|
}
|
|
425
138
|
}
|
|
426
|
-
function
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
}
|
|
455
|
-
if (process.env['ALLOW_LOCAL_FETCH'] !== 'true' &&
|
|
456
|
-
this.ipBlocker.isBlockedIp(normalizedHostname)) {
|
|
457
|
-
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
|
|
458
|
-
}
|
|
459
|
-
return normalizedHostname;
|
|
460
|
-
}
|
|
461
|
-
await this.assertNoBlockedCname(normalizedHostname, signal);
|
|
462
|
-
const resultPromise = dns.promises.lookup(normalizedHostname, {
|
|
463
|
-
all: true,
|
|
464
|
-
order: 'verbatim',
|
|
465
|
-
});
|
|
466
|
-
const addresses = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS lookup timed out for ${normalizedHostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
|
|
467
|
-
if (addresses.length === 0 || !addresses[0]) {
|
|
468
|
-
throw createErrorWithCode(`No DNS results returned for ${normalizedHostname}`, 'ENODATA');
|
|
469
|
-
}
|
|
470
|
-
for (const addr of addresses) {
|
|
471
|
-
if (addr.family !== 4 && addr.family !== 6) {
|
|
472
|
-
throw createErrorWithCode(`Invalid address family returned for ${normalizedHostname}`, 'EINVAL');
|
|
473
|
-
}
|
|
474
|
-
if (isCloudMetadataHost(addr.address)) {
|
|
475
|
-
throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
|
|
476
|
-
}
|
|
477
|
-
if (!isLocalFetchAllowed() && this.ipBlocker.isBlockedIp(addr.address)) {
|
|
478
|
-
throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
|
-
return addresses[0].address;
|
|
482
|
-
}
|
|
483
|
-
isBlockedHostname(hostname) {
|
|
484
|
-
if (isCloudMetadataHost(hostname))
|
|
485
|
-
return true;
|
|
486
|
-
if (isLocalFetchAllowed())
|
|
139
|
+
function decodeBuffer(buffer, encoding) {
|
|
140
|
+
return createDecoder(encoding).decode(buffer);
|
|
141
|
+
}
|
|
142
|
+
function normalizeEncodingLabel(encoding) {
|
|
143
|
+
return encoding?.trim().toLowerCase() ?? '';
|
|
144
|
+
}
|
|
145
|
+
function isUnicodeWideEncoding(encoding) {
|
|
146
|
+
const normalized = normalizeEncodingLabel(encoding);
|
|
147
|
+
return (normalized.startsWith('utf-16') ||
|
|
148
|
+
normalized.startsWith('utf-32') ||
|
|
149
|
+
normalized === 'ucs-2' ||
|
|
150
|
+
normalized === 'unicodefffe' ||
|
|
151
|
+
normalized === 'unicodefeff');
|
|
152
|
+
}
|
|
153
|
+
const BOM_SIGNATURES = [
|
|
154
|
+
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
155
|
+
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
156
|
+
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
157
|
+
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
158
|
+
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
159
|
+
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
160
|
+
];
|
|
161
|
+
function startsWithBytes(buffer, signature) {
|
|
162
|
+
const sigLen = signature.length;
|
|
163
|
+
if (buffer.length < sigLen)
|
|
164
|
+
return false;
|
|
165
|
+
for (let i = 0; i < sigLen; i += 1) {
|
|
166
|
+
if (buffer[i] !== signature[i])
|
|
487
167
|
return false;
|
|
488
|
-
if (this.security.blockedHosts.has(hostname))
|
|
489
|
-
return true;
|
|
490
|
-
return this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
|
|
491
168
|
}
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
seen.add(current);
|
|
499
|
-
const cnames = await this.resolveCname(current, signal);
|
|
500
|
-
if (cnames.length === 0)
|
|
501
|
-
return;
|
|
502
|
-
for (const cname of cnames) {
|
|
503
|
-
if (this.isBlockedHostname(cname)) {
|
|
504
|
-
throw createErrorWithCode(`Blocked DNS CNAME detected for ${hostname}: ${cname}`, 'EBLOCKED');
|
|
505
|
-
}
|
|
506
|
-
}
|
|
507
|
-
current = cnames[0] ?? '';
|
|
508
|
-
}
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
function detectBomEncoding(buffer) {
|
|
172
|
+
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
173
|
+
if (startsWithBytes(buffer, bytes))
|
|
174
|
+
return encoding;
|
|
509
175
|
}
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
176
|
+
return undefined;
|
|
177
|
+
}
|
|
178
|
+
function readQuotedValue(input, startIndex) {
|
|
179
|
+
const first = input[startIndex];
|
|
180
|
+
if (!first)
|
|
181
|
+
return '';
|
|
182
|
+
const quoted = first === '"' || first === "'";
|
|
183
|
+
if (quoted) {
|
|
184
|
+
const end = input.indexOf(first, startIndex + 1);
|
|
185
|
+
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
186
|
+
}
|
|
187
|
+
const tail = input.slice(startIndex);
|
|
188
|
+
const stop = tail.search(/[\s/>]/);
|
|
189
|
+
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
190
|
+
}
|
|
191
|
+
function findTokenValue(original, lower, token, fromIndex = 0) {
|
|
192
|
+
const tokenIndex = lower.indexOf(token, fromIndex);
|
|
193
|
+
if (tokenIndex === -1)
|
|
194
|
+
return undefined;
|
|
195
|
+
const valueStart = tokenIndex + token.length;
|
|
196
|
+
const value = readQuotedValue(original, valueStart);
|
|
197
|
+
return value || undefined;
|
|
198
|
+
}
|
|
199
|
+
function extractHtmlCharset(headSnippet) {
|
|
200
|
+
const lower = headSnippet.toLowerCase();
|
|
201
|
+
const charset = findTokenValue(headSnippet, lower, 'charset=');
|
|
202
|
+
return charset ? charset.toLowerCase() : undefined;
|
|
203
|
+
}
|
|
204
|
+
function extractXmlEncoding(headSnippet) {
|
|
205
|
+
const lower = headSnippet.toLowerCase();
|
|
206
|
+
const xmlStart = lower.indexOf('<?xml');
|
|
207
|
+
if (xmlStart === -1)
|
|
208
|
+
return undefined;
|
|
209
|
+
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
210
|
+
const declaration = xmlEnd === -1
|
|
211
|
+
? headSnippet.slice(xmlStart)
|
|
212
|
+
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
213
|
+
const declarationLower = declaration.toLowerCase();
|
|
214
|
+
const encoding = findTokenValue(declaration, declarationLower, 'encoding=');
|
|
215
|
+
return encoding ? encoding.toLowerCase() : undefined;
|
|
216
|
+
}
|
|
217
|
+
function detectHtmlDeclaredEncoding(buffer) {
|
|
218
|
+
const scanSize = Math.min(buffer.length, 8_192);
|
|
219
|
+
if (scanSize === 0)
|
|
220
|
+
return undefined;
|
|
221
|
+
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
222
|
+
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
223
|
+
}
|
|
224
|
+
function resolveEncoding(declaredEncoding, sample) {
|
|
225
|
+
const bomEncoding = detectBomEncoding(sample);
|
|
226
|
+
if (bomEncoding)
|
|
227
|
+
return bomEncoding;
|
|
228
|
+
if (declaredEncoding)
|
|
229
|
+
return declaredEncoding;
|
|
230
|
+
return detectHtmlDeclaredEncoding(sample);
|
|
231
|
+
}
|
|
232
|
+
const BINARY_SIGNATURES = [
|
|
233
|
+
[0x25, 0x50, 0x44, 0x46],
|
|
234
|
+
[0x89, 0x50, 0x4e, 0x47],
|
|
235
|
+
[0x47, 0x49, 0x46, 0x38],
|
|
236
|
+
[0xff, 0xd8, 0xff],
|
|
237
|
+
[0x52, 0x49, 0x46, 0x46],
|
|
238
|
+
[0x42, 0x4d],
|
|
239
|
+
[0x49, 0x49, 0x2a, 0x00],
|
|
240
|
+
[0x4d, 0x4d, 0x00, 0x2a],
|
|
241
|
+
[0x00, 0x00, 0x01, 0x00],
|
|
242
|
+
[0x50, 0x4b, 0x03, 0x04],
|
|
243
|
+
[0x1f, 0x8b],
|
|
244
|
+
[0x42, 0x5a, 0x68],
|
|
245
|
+
[0x52, 0x61, 0x72, 0x21],
|
|
246
|
+
[0x37, 0x7a, 0xbc, 0xaf],
|
|
247
|
+
[0x7f, 0x45, 0x4c, 0x46],
|
|
248
|
+
[0x4d, 0x5a],
|
|
249
|
+
[0xcf, 0xfa, 0xed, 0xfe],
|
|
250
|
+
[0x00, 0x61, 0x73, 0x6d],
|
|
251
|
+
[0x1a, 0x45, 0xdf, 0xa3],
|
|
252
|
+
[0x66, 0x74, 0x79, 0x70],
|
|
253
|
+
[0x46, 0x4c, 0x56],
|
|
254
|
+
[0x49, 0x44, 0x33],
|
|
255
|
+
[0xff, 0xfb],
|
|
256
|
+
[0xff, 0xfa],
|
|
257
|
+
[0x4f, 0x67, 0x67, 0x53],
|
|
258
|
+
[0x66, 0x4c, 0x61, 0x43],
|
|
259
|
+
[0x4d, 0x54, 0x68, 0x64],
|
|
260
|
+
[0x77, 0x4f, 0x46, 0x46],
|
|
261
|
+
[0x00, 0x01, 0x00, 0x00],
|
|
262
|
+
[0x4f, 0x54, 0x54, 0x4f],
|
|
263
|
+
[0x53, 0x51, 0x4c, 0x69],
|
|
264
|
+
];
|
|
265
|
+
function hasNullByte(buffer, limit) {
|
|
266
|
+
const checkLen = Math.min(buffer.length, limit);
|
|
267
|
+
return buffer.subarray(0, checkLen).includes(0x00);
|
|
268
|
+
}
|
|
269
|
+
function isBinaryContent(buffer, encoding) {
|
|
270
|
+
for (const signature of BINARY_SIGNATURES) {
|
|
271
|
+
if (startsWithBytes(buffer, signature))
|
|
272
|
+
return true;
|
|
534
273
|
}
|
|
274
|
+
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
535
275
|
}
|
|
536
276
|
function parseRetryAfter(header) {
|
|
537
277
|
if (!header)
|
|
@@ -619,221 +359,93 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
|
619
359
|
}
|
|
620
360
|
return createFetchError({ kind: 'network', message: error.message }, url);
|
|
621
361
|
}
|
|
622
|
-
const
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
362
|
+
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
363
|
+
function isRedirectStatus(status) {
|
|
364
|
+
return REDIRECT_STATUSES.has(status);
|
|
365
|
+
}
|
|
366
|
+
function cancelResponseBody(response) {
|
|
367
|
+
const cancelPromise = response.body?.cancel();
|
|
368
|
+
if (!cancelPromise)
|
|
369
|
+
return;
|
|
370
|
+
void cancelPromise.catch(() => undefined);
|
|
371
|
+
}
|
|
372
|
+
class MaxBytesError extends Error {
|
|
373
|
+
constructor() {
|
|
374
|
+
super('max-bytes-reached');
|
|
632
375
|
}
|
|
633
|
-
|
|
634
|
-
|
|
376
|
+
}
|
|
377
|
+
class RedirectFollower {
|
|
378
|
+
fetchFn;
|
|
379
|
+
normalizeUrl;
|
|
380
|
+
preflight;
|
|
381
|
+
constructor(fetchFn, normalizeUrl, preflight) {
|
|
382
|
+
this.fetchFn = fetchFn;
|
|
383
|
+
this.normalizeUrl = normalizeUrl;
|
|
384
|
+
this.preflight = preflight;
|
|
635
385
|
}
|
|
636
|
-
|
|
386
|
+
async fetchWithRedirects(url, init, maxRedirects) {
|
|
387
|
+
let currentUrl = url;
|
|
388
|
+
const redirectLimit = Math.max(0, maxRedirects);
|
|
389
|
+
for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
|
|
390
|
+
const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => {
|
|
391
|
+
let ipAddress;
|
|
392
|
+
if (this.preflight) {
|
|
393
|
+
ipAddress = await this.preflight(currentUrl, init.signal ?? undefined);
|
|
394
|
+
}
|
|
395
|
+
return this.performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress);
|
|
396
|
+
});
|
|
397
|
+
if (!nextUrl)
|
|
398
|
+
return { response, url: currentUrl };
|
|
399
|
+
currentUrl = nextUrl;
|
|
400
|
+
}
|
|
401
|
+
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
402
|
+
}
|
|
403
|
+
async performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress) {
|
|
404
|
+
const fetchInit = {
|
|
405
|
+
...init,
|
|
406
|
+
redirect: 'manual',
|
|
407
|
+
};
|
|
408
|
+
if (ipAddress) {
|
|
409
|
+
const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
|
|
410
|
+
const agent = new Agent({
|
|
411
|
+
connect: {
|
|
412
|
+
lookup: (hostname, options, callback) => {
|
|
413
|
+
const family = isIP(ipAddress) === 6 ? 6 : 4;
|
|
414
|
+
if (options.all) {
|
|
415
|
+
callback(null, [{ address: ipAddress, family }]);
|
|
416
|
+
}
|
|
417
|
+
else {
|
|
418
|
+
callback(null, ipAddress, family);
|
|
419
|
+
}
|
|
420
|
+
},
|
|
421
|
+
timeout: config.fetcher.timeout,
|
|
422
|
+
...(ca ? { ca } : {}),
|
|
423
|
+
},
|
|
424
|
+
pipelining: 1,
|
|
425
|
+
connections: 1,
|
|
426
|
+
keepAliveTimeout: 1000,
|
|
427
|
+
keepAliveMaxTimeout: 1000,
|
|
428
|
+
});
|
|
429
|
+
fetchInit.dispatcher = agent;
|
|
430
|
+
}
|
|
431
|
+
const response = await this.fetchFn(currentUrl, fetchInit);
|
|
432
|
+
if (!isRedirectStatus(response.status))
|
|
433
|
+
return { response };
|
|
434
|
+
if (redirectCount >= redirectLimit) {
|
|
435
|
+
cancelResponseBody(response);
|
|
436
|
+
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
437
|
+
}
|
|
438
|
+
const location = this.getRedirectLocation(response, currentUrl);
|
|
439
|
+
cancelResponseBody(response);
|
|
440
|
+
const nextUrl = this.resolveRedirectTarget(currentUrl, location);
|
|
441
|
+
const parsedNextUrl = new URL(nextUrl);
|
|
442
|
+
if (parsedNextUrl.protocol !== 'http:' &&
|
|
443
|
+
parsedNextUrl.protocol !== 'https:') {
|
|
444
|
+
throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
|
|
445
|
+
}
|
|
637
446
|
return {
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
: {}),
|
|
641
|
-
...(ctx.operationId ? { operationId: ctx.operationId } : {}),
|
|
642
|
-
};
|
|
643
|
-
}
|
|
644
|
-
start(url, method) {
|
|
645
|
-
const safeUrl = this.redactor.redact(url);
|
|
646
|
-
const contextRequestId = this.context.getRequestId();
|
|
647
|
-
const operationId = this.context.getOperationId();
|
|
648
|
-
const ctx = {
|
|
649
|
-
requestId: randomUUID(),
|
|
650
|
-
startTime: performance.now(),
|
|
651
|
-
url: safeUrl,
|
|
652
|
-
method: method.toUpperCase(),
|
|
653
|
-
};
|
|
654
|
-
if (contextRequestId)
|
|
655
|
-
ctx.contextRequestId = contextRequestId;
|
|
656
|
-
if (operationId)
|
|
657
|
-
ctx.operationId = operationId;
|
|
658
|
-
const ctxFields = this.contextFields(ctx);
|
|
659
|
-
this.publish({
|
|
660
|
-
v: 1,
|
|
661
|
-
type: 'start',
|
|
662
|
-
requestId: ctx.requestId,
|
|
663
|
-
method: ctx.method,
|
|
664
|
-
url: ctx.url,
|
|
665
|
-
...ctxFields,
|
|
666
|
-
});
|
|
667
|
-
this.logger.debug('HTTP Request', {
|
|
668
|
-
requestId: ctx.requestId,
|
|
669
|
-
method: ctx.method,
|
|
670
|
-
url: ctx.url,
|
|
671
|
-
...ctxFields,
|
|
672
|
-
});
|
|
673
|
-
return ctx;
|
|
674
|
-
}
|
|
675
|
-
recordResponse(context, response, contentSize) {
|
|
676
|
-
const duration = performance.now() - context.startTime;
|
|
677
|
-
const durationLabel = `${Math.round(duration)}ms`;
|
|
678
|
-
const ctxFields = this.contextFields(context);
|
|
679
|
-
this.publish({
|
|
680
|
-
v: 1,
|
|
681
|
-
type: 'end',
|
|
682
|
-
requestId: context.requestId,
|
|
683
|
-
status: response.status,
|
|
684
|
-
duration,
|
|
685
|
-
...ctxFields,
|
|
686
|
-
});
|
|
687
|
-
const contentType = response.headers.get('content-type') ?? undefined;
|
|
688
|
-
const contentLengthHeader = response.headers.get('content-length');
|
|
689
|
-
const size = contentLengthHeader ??
|
|
690
|
-
(contentSize === undefined ? undefined : String(contentSize));
|
|
691
|
-
this.logger.debug('HTTP Response', {
|
|
692
|
-
requestId: context.requestId,
|
|
693
|
-
status: response.status,
|
|
694
|
-
url: context.url,
|
|
695
|
-
duration: durationLabel,
|
|
696
|
-
...ctxFields,
|
|
697
|
-
...(contentType ? { contentType } : {}),
|
|
698
|
-
...(size ? { size } : {}),
|
|
699
|
-
});
|
|
700
|
-
if (duration > SLOW_REQUEST_THRESHOLD_MS) {
|
|
701
|
-
this.logger.warn('Slow HTTP request detected', {
|
|
702
|
-
requestId: context.requestId,
|
|
703
|
-
url: context.url,
|
|
704
|
-
duration: durationLabel,
|
|
705
|
-
...ctxFields,
|
|
706
|
-
});
|
|
707
|
-
}
|
|
708
|
-
}
|
|
709
|
-
recordError(context, error, status) {
|
|
710
|
-
const duration = performance.now() - context.startTime;
|
|
711
|
-
const err = toError(error);
|
|
712
|
-
const code = isSystemError(err) ? err.code : undefined;
|
|
713
|
-
const ctxFields = this.contextFields(context);
|
|
714
|
-
this.publish({
|
|
715
|
-
v: 1,
|
|
716
|
-
type: 'error',
|
|
717
|
-
requestId: context.requestId,
|
|
718
|
-
url: context.url,
|
|
719
|
-
error: err.message,
|
|
720
|
-
duration,
|
|
721
|
-
...(code !== undefined ? { code } : {}),
|
|
722
|
-
...(status !== undefined ? { status } : {}),
|
|
723
|
-
...ctxFields,
|
|
724
|
-
});
|
|
725
|
-
const logData = {
|
|
726
|
-
requestId: context.requestId,
|
|
727
|
-
url: context.url,
|
|
728
|
-
status,
|
|
729
|
-
code,
|
|
730
|
-
error: err.message,
|
|
731
|
-
...ctxFields,
|
|
732
|
-
};
|
|
733
|
-
if (status === 429) {
|
|
734
|
-
this.logger.warn('HTTP Request Error', logData);
|
|
735
|
-
return;
|
|
736
|
-
}
|
|
737
|
-
this.logger.error('HTTP Request Error', logData);
|
|
738
|
-
}
|
|
739
|
-
publish(event) {
|
|
740
|
-
if (!fetchChannel.hasSubscribers)
|
|
741
|
-
return;
|
|
742
|
-
try {
|
|
743
|
-
fetchChannel.publish(event);
|
|
744
|
-
}
|
|
745
|
-
catch {
|
|
746
|
-
// Best-effort telemetry; never crash request path.
|
|
747
|
-
}
|
|
748
|
-
}
|
|
749
|
-
}
|
|
750
|
-
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
751
|
-
function isRedirectStatus(status) {
|
|
752
|
-
return REDIRECT_STATUSES.has(status);
|
|
753
|
-
}
|
|
754
|
-
function cancelResponseBody(response) {
|
|
755
|
-
const cancelPromise = response.body?.cancel();
|
|
756
|
-
if (!cancelPromise)
|
|
757
|
-
return;
|
|
758
|
-
void cancelPromise.catch(() => undefined);
|
|
759
|
-
}
|
|
760
|
-
class MaxBytesError extends Error {
|
|
761
|
-
constructor() {
|
|
762
|
-
super('max-bytes-reached');
|
|
763
|
-
}
|
|
764
|
-
}
|
|
765
|
-
class RedirectFollower {
|
|
766
|
-
fetchFn;
|
|
767
|
-
normalizeUrl;
|
|
768
|
-
preflight;
|
|
769
|
-
constructor(fetchFn, normalizeUrl, preflight) {
|
|
770
|
-
this.fetchFn = fetchFn;
|
|
771
|
-
this.normalizeUrl = normalizeUrl;
|
|
772
|
-
this.preflight = preflight;
|
|
773
|
-
}
|
|
774
|
-
async fetchWithRedirects(url, init, maxRedirects) {
|
|
775
|
-
let currentUrl = url;
|
|
776
|
-
const redirectLimit = Math.max(0, maxRedirects);
|
|
777
|
-
for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
|
|
778
|
-
const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => {
|
|
779
|
-
let ipAddress;
|
|
780
|
-
if (this.preflight) {
|
|
781
|
-
ipAddress = await this.preflight(currentUrl, init.signal ?? undefined);
|
|
782
|
-
}
|
|
783
|
-
return this.performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress);
|
|
784
|
-
});
|
|
785
|
-
if (!nextUrl)
|
|
786
|
-
return { response, url: currentUrl };
|
|
787
|
-
currentUrl = nextUrl;
|
|
788
|
-
}
|
|
789
|
-
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
790
|
-
}
|
|
791
|
-
async performFetchCycle(currentUrl, init, redirectLimit, redirectCount, ipAddress) {
|
|
792
|
-
const fetchInit = {
|
|
793
|
-
...init,
|
|
794
|
-
redirect: 'manual',
|
|
795
|
-
};
|
|
796
|
-
if (ipAddress) {
|
|
797
|
-
const ca = tls.rootCertificates.length > 0 ? tls.rootCertificates : undefined;
|
|
798
|
-
const agent = new Agent({
|
|
799
|
-
connect: {
|
|
800
|
-
lookup: (hostname, options, callback) => {
|
|
801
|
-
const family = isIP(ipAddress) === 6 ? 6 : 4;
|
|
802
|
-
if (options.all) {
|
|
803
|
-
callback(null, [{ address: ipAddress, family }]);
|
|
804
|
-
}
|
|
805
|
-
else {
|
|
806
|
-
callback(null, ipAddress, family);
|
|
807
|
-
}
|
|
808
|
-
},
|
|
809
|
-
timeout: config.fetcher.timeout,
|
|
810
|
-
...(ca ? { ca } : {}),
|
|
811
|
-
},
|
|
812
|
-
pipelining: 1,
|
|
813
|
-
connections: 1,
|
|
814
|
-
keepAliveTimeout: 1000,
|
|
815
|
-
keepAliveMaxTimeout: 1000,
|
|
816
|
-
});
|
|
817
|
-
fetchInit.dispatcher = agent;
|
|
818
|
-
}
|
|
819
|
-
const response = await this.fetchFn(currentUrl, fetchInit);
|
|
820
|
-
if (!isRedirectStatus(response.status))
|
|
821
|
-
return { response };
|
|
822
|
-
if (redirectCount >= redirectLimit) {
|
|
823
|
-
cancelResponseBody(response);
|
|
824
|
-
throw createFetchError({ kind: 'too-many-redirects' }, currentUrl);
|
|
825
|
-
}
|
|
826
|
-
const location = this.getRedirectLocation(response, currentUrl);
|
|
827
|
-
cancelResponseBody(response);
|
|
828
|
-
const nextUrl = this.resolveRedirectTarget(currentUrl, location);
|
|
829
|
-
const parsedNextUrl = new URL(nextUrl);
|
|
830
|
-
if (parsedNextUrl.protocol !== 'http:' &&
|
|
831
|
-
parsedNextUrl.protocol !== 'https:') {
|
|
832
|
-
throw createErrorWithCode(`Unsupported redirect protocol: ${parsedNextUrl.protocol}`, 'EUNSUPPORTEDPROTOCOL');
|
|
833
|
-
}
|
|
834
|
-
return {
|
|
835
|
-
response,
|
|
836
|
-
nextUrl,
|
|
447
|
+
response,
|
|
448
|
+
nextUrl,
|
|
837
449
|
};
|
|
838
450
|
}
|
|
839
451
|
getRedirectLocation(response, currentUrl) {
|
|
@@ -871,168 +483,6 @@ class RedirectFollower {
|
|
|
871
483
|
}
|
|
872
484
|
}
|
|
873
485
|
}
|
|
874
|
-
class ResponseTextReader {
|
|
875
|
-
async read(response, url, maxBytes, signal, encoding) {
|
|
876
|
-
const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
|
877
|
-
const text = decodeBuffer(buffer, effectiveEncoding);
|
|
878
|
-
return { text, size: buffer.byteLength, truncated };
|
|
879
|
-
}
|
|
880
|
-
async readBuffer(response, url, maxBytes, signal, encoding) {
|
|
881
|
-
if (signal?.aborted) {
|
|
882
|
-
cancelResponseBody(response);
|
|
883
|
-
throw createFetchError({ kind: 'aborted' }, url);
|
|
884
|
-
}
|
|
885
|
-
if (!response.body) {
|
|
886
|
-
return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
|
|
887
|
-
}
|
|
888
|
-
return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
|
|
889
|
-
}
|
|
890
|
-
async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
|
|
891
|
-
if (signal?.aborted)
|
|
892
|
-
throw createFetchError({ kind: 'canceled' }, url);
|
|
893
|
-
const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
894
|
-
let buffer;
|
|
895
|
-
let truncated = false;
|
|
896
|
-
try {
|
|
897
|
-
// Try safe blob slicing if available (Node 18+) to avoid OOM
|
|
898
|
-
const blob = await response.blob();
|
|
899
|
-
if (Number.isFinite(limit) && blob.size > limit) {
|
|
900
|
-
const sliced = blob.slice(0, limit);
|
|
901
|
-
buffer = new Uint8Array(await sliced.arrayBuffer());
|
|
902
|
-
truncated = true;
|
|
903
|
-
}
|
|
904
|
-
else {
|
|
905
|
-
buffer = new Uint8Array(await blob.arrayBuffer());
|
|
906
|
-
}
|
|
907
|
-
}
|
|
908
|
-
catch {
|
|
909
|
-
// Fallback if blob() fails
|
|
910
|
-
const arrayBuffer = await response.arrayBuffer();
|
|
911
|
-
const length = Math.min(arrayBuffer.byteLength, limit);
|
|
912
|
-
buffer = new Uint8Array(arrayBuffer, 0, length);
|
|
913
|
-
truncated = Number.isFinite(limit) && arrayBuffer.byteLength > limit;
|
|
914
|
-
}
|
|
915
|
-
const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
|
|
916
|
-
if (isBinaryContent(buffer, effectiveEncoding)) {
|
|
917
|
-
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
918
|
-
}
|
|
919
|
-
return {
|
|
920
|
-
buffer,
|
|
921
|
-
encoding: effectiveEncoding,
|
|
922
|
-
size: buffer.byteLength,
|
|
923
|
-
truncated,
|
|
924
|
-
};
|
|
925
|
-
}
|
|
926
|
-
async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
|
|
927
|
-
const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
928
|
-
const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
|
|
929
|
-
let effectiveEncoding = encoding ?? 'utf-8';
|
|
930
|
-
let encodingResolved = false;
|
|
931
|
-
let total = 0;
|
|
932
|
-
const chunks = [];
|
|
933
|
-
const source = Readable.fromWeb(toNodeReadableStream(stream, url, 'response:read-stream-buffer'));
|
|
934
|
-
const guard = new Transform({
|
|
935
|
-
transform(chunk, _encoding, callback) {
|
|
936
|
-
try {
|
|
937
|
-
const buf = Buffer.isBuffer(chunk)
|
|
938
|
-
? chunk
|
|
939
|
-
: Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
940
|
-
if (!encodingResolved) {
|
|
941
|
-
encodingResolved = true;
|
|
942
|
-
effectiveEncoding =
|
|
943
|
-
resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
|
|
944
|
-
}
|
|
945
|
-
if (isBinaryContent(buf, effectiveEncoding)) {
|
|
946
|
-
callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
|
|
947
|
-
return;
|
|
948
|
-
}
|
|
949
|
-
const newTotal = total + buf.length;
|
|
950
|
-
if (newTotal > byteLimit) {
|
|
951
|
-
const remaining = byteLimit - total;
|
|
952
|
-
if (remaining > 0) {
|
|
953
|
-
const slice = buf.subarray(0, remaining);
|
|
954
|
-
total += remaining;
|
|
955
|
-
if (captureChunks)
|
|
956
|
-
chunks.push(slice);
|
|
957
|
-
this.push(slice);
|
|
958
|
-
}
|
|
959
|
-
callback(new MaxBytesError());
|
|
960
|
-
return;
|
|
961
|
-
}
|
|
962
|
-
total = newTotal;
|
|
963
|
-
if (captureChunks)
|
|
964
|
-
chunks.push(buf);
|
|
965
|
-
callback(null, buf);
|
|
966
|
-
}
|
|
967
|
-
catch (error) {
|
|
968
|
-
callback(toError(error));
|
|
969
|
-
}
|
|
970
|
-
},
|
|
971
|
-
});
|
|
972
|
-
const guarded = source.pipe(guard);
|
|
973
|
-
const abortHandler = () => {
|
|
974
|
-
source.destroy();
|
|
975
|
-
guard.destroy();
|
|
976
|
-
};
|
|
977
|
-
if (signal) {
|
|
978
|
-
signal.addEventListener('abort', abortHandler, { once: true });
|
|
979
|
-
}
|
|
980
|
-
try {
|
|
981
|
-
const buffer = await consumeBuffer(guarded);
|
|
982
|
-
return {
|
|
983
|
-
buffer,
|
|
984
|
-
encoding: effectiveEncoding,
|
|
985
|
-
size: total,
|
|
986
|
-
truncated: false,
|
|
987
|
-
};
|
|
988
|
-
}
|
|
989
|
-
catch (error) {
|
|
990
|
-
if (signal?.aborted)
|
|
991
|
-
throw createFetchError({ kind: 'aborted' }, url);
|
|
992
|
-
if (error instanceof FetchError)
|
|
993
|
-
throw error;
|
|
994
|
-
if (error instanceof MaxBytesError) {
|
|
995
|
-
source.destroy();
|
|
996
|
-
guard.destroy();
|
|
997
|
-
return {
|
|
998
|
-
buffer: Buffer.concat(chunks, total),
|
|
999
|
-
encoding: effectiveEncoding,
|
|
1000
|
-
size: total,
|
|
1001
|
-
truncated: true,
|
|
1002
|
-
};
|
|
1003
|
-
}
|
|
1004
|
-
throw error;
|
|
1005
|
-
}
|
|
1006
|
-
finally {
|
|
1007
|
-
if (signal) {
|
|
1008
|
-
signal.removeEventListener('abort', abortHandler);
|
|
1009
|
-
}
|
|
1010
|
-
}
|
|
1011
|
-
}
|
|
1012
|
-
}
|
|
1013
|
-
const DEFAULT_HEADERS = {
|
|
1014
|
-
'User-Agent': config.fetcher.userAgent,
|
|
1015
|
-
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
1016
|
-
'Accept-Language': 'en-US,en;q=0.5',
|
|
1017
|
-
'Accept-Encoding': 'gzip, deflate, br',
|
|
1018
|
-
Connection: 'keep-alive',
|
|
1019
|
-
};
|
|
1020
|
-
function buildHeaders() {
|
|
1021
|
-
return DEFAULT_HEADERS;
|
|
1022
|
-
}
|
|
1023
|
-
function buildRequestSignal(timeoutMs, external) {
|
|
1024
|
-
if (timeoutMs <= 0)
|
|
1025
|
-
return external;
|
|
1026
|
-
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
1027
|
-
return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
|
|
1028
|
-
}
|
|
1029
|
-
function buildRequestInit(headers, signal) {
|
|
1030
|
-
return {
|
|
1031
|
-
method: 'GET',
|
|
1032
|
-
headers,
|
|
1033
|
-
...(signal ? { signal } : {}),
|
|
1034
|
-
};
|
|
1035
|
-
}
|
|
1036
486
|
function resolveResponseError(response, finalUrl) {
|
|
1037
487
|
if (response.status === 429) {
|
|
1038
488
|
return createFetchError({ kind: 'rate-limited', retryAfter: response.headers.get('retry-after') }, finalUrl);
|
|
@@ -1199,61 +649,200 @@ async function decodeResponseIfNeeded(response, url, signal) {
|
|
|
1199
649
|
for (const decompressor of decompressors) {
|
|
1200
650
|
decompressor.destroy();
|
|
1201
651
|
}
|
|
1202
|
-
decodedNodeStream.destroy();
|
|
1203
|
-
};
|
|
1204
|
-
if (signal) {
|
|
1205
|
-
signal.addEventListener('abort', abortDecodePipeline, { once: true });
|
|
1206
|
-
}
|
|
1207
|
-
void decodedPipeline.catch((error) => {
|
|
1208
|
-
decodedNodeStream.destroy(toError(error));
|
|
1209
|
-
});
|
|
1210
|
-
const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
|
|
1211
|
-
const decodedReader = decodedBodyStream.getReader();
|
|
1212
|
-
const clearAbortListener = () => {
|
|
1213
|
-
if (!signal)
|
|
1214
|
-
return;
|
|
1215
|
-
signal.removeEventListener('abort', abortDecodePipeline);
|
|
1216
|
-
};
|
|
1217
|
-
try {
|
|
1218
|
-
const first = await decodedReader.read();
|
|
1219
|
-
if (first.done) {
|
|
1220
|
-
clearAbortListener();
|
|
1221
|
-
void passthroughBranch.cancel().catch(() => undefined);
|
|
1222
|
-
return new Response(null, {
|
|
1223
|
-
status: response.status,
|
|
1224
|
-
statusText: response.statusText,
|
|
1225
|
-
headers,
|
|
1226
|
-
});
|
|
652
|
+
decodedNodeStream.destroy();
|
|
653
|
+
};
|
|
654
|
+
if (signal) {
|
|
655
|
+
signal.addEventListener('abort', abortDecodePipeline, { once: true });
|
|
656
|
+
}
|
|
657
|
+
void decodedPipeline.catch((error) => {
|
|
658
|
+
decodedNodeStream.destroy(toError(error));
|
|
659
|
+
});
|
|
660
|
+
const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
|
|
661
|
+
const decodedReader = decodedBodyStream.getReader();
|
|
662
|
+
const clearAbortListener = () => {
|
|
663
|
+
if (!signal)
|
|
664
|
+
return;
|
|
665
|
+
signal.removeEventListener('abort', abortDecodePipeline);
|
|
666
|
+
};
|
|
667
|
+
try {
|
|
668
|
+
const first = await decodedReader.read();
|
|
669
|
+
if (first.done) {
|
|
670
|
+
clearAbortListener();
|
|
671
|
+
void passthroughBranch.cancel().catch(() => undefined);
|
|
672
|
+
return new Response(null, {
|
|
673
|
+
status: response.status,
|
|
674
|
+
statusText: response.statusText,
|
|
675
|
+
headers,
|
|
676
|
+
});
|
|
677
|
+
}
|
|
678
|
+
void passthroughBranch.cancel().catch(() => undefined);
|
|
679
|
+
const body = createPumpedStream(first.value, decodedReader);
|
|
680
|
+
if (signal) {
|
|
681
|
+
void finished(decodedNodeStream, { cleanup: true })
|
|
682
|
+
.catch(() => { })
|
|
683
|
+
.finally(() => {
|
|
684
|
+
clearAbortListener();
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
return new Response(body, {
|
|
688
|
+
status: response.status,
|
|
689
|
+
statusText: response.statusText,
|
|
690
|
+
headers,
|
|
691
|
+
});
|
|
692
|
+
}
|
|
693
|
+
catch (error) {
|
|
694
|
+
clearAbortListener();
|
|
695
|
+
abortDecodePipeline();
|
|
696
|
+
void decodedReader.cancel(error).catch(() => undefined);
|
|
697
|
+
logDebug('Content-Encoding decode failed; using passthrough body', {
|
|
698
|
+
url: redactUrl(url),
|
|
699
|
+
encoding: encodingHeader ?? encodings.join(','),
|
|
700
|
+
error: isError(error) ? error.message : String(error),
|
|
701
|
+
});
|
|
702
|
+
return new Response(passthroughBranch, {
|
|
703
|
+
status: response.status,
|
|
704
|
+
statusText: response.statusText,
|
|
705
|
+
headers,
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
class ResponseTextReader {
|
|
710
|
+
async read(response, url, maxBytes, signal, encoding) {
|
|
711
|
+
const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
|
712
|
+
const text = decodeBuffer(buffer, effectiveEncoding);
|
|
713
|
+
return { text, size: buffer.byteLength, truncated };
|
|
714
|
+
}
|
|
715
|
+
async readBuffer(response, url, maxBytes, signal, encoding) {
|
|
716
|
+
if (signal?.aborted) {
|
|
717
|
+
cancelResponseBody(response);
|
|
718
|
+
throw createFetchError({ kind: 'aborted' }, url);
|
|
719
|
+
}
|
|
720
|
+
if (!response.body) {
|
|
721
|
+
return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
|
|
722
|
+
}
|
|
723
|
+
return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
|
|
724
|
+
}
|
|
725
|
+
async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
|
|
726
|
+
if (signal?.aborted)
|
|
727
|
+
throw createFetchError({ kind: 'canceled' }, url);
|
|
728
|
+
const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
729
|
+
let buffer;
|
|
730
|
+
let truncated = false;
|
|
731
|
+
try {
|
|
732
|
+
// Try safe blob slicing if available (Node 18+) to avoid OOM
|
|
733
|
+
const blob = await response.blob();
|
|
734
|
+
if (Number.isFinite(limit) && blob.size > limit) {
|
|
735
|
+
const sliced = blob.slice(0, limit);
|
|
736
|
+
buffer = new Uint8Array(await sliced.arrayBuffer());
|
|
737
|
+
truncated = true;
|
|
738
|
+
}
|
|
739
|
+
else {
|
|
740
|
+
buffer = new Uint8Array(await blob.arrayBuffer());
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
catch {
|
|
744
|
+
// Fallback if blob() fails
|
|
745
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
746
|
+
const length = Math.min(arrayBuffer.byteLength, limit);
|
|
747
|
+
buffer = new Uint8Array(arrayBuffer, 0, length);
|
|
748
|
+
truncated = Number.isFinite(limit) && arrayBuffer.byteLength > limit;
|
|
749
|
+
}
|
|
750
|
+
const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
|
|
751
|
+
if (isBinaryContent(buffer, effectiveEncoding)) {
|
|
752
|
+
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
753
|
+
}
|
|
754
|
+
return {
|
|
755
|
+
buffer,
|
|
756
|
+
encoding: effectiveEncoding,
|
|
757
|
+
size: buffer.byteLength,
|
|
758
|
+
truncated,
|
|
759
|
+
};
|
|
760
|
+
}
|
|
761
|
+
async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
|
|
762
|
+
const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
763
|
+
const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
|
|
764
|
+
let effectiveEncoding = encoding ?? 'utf-8';
|
|
765
|
+
let encodingResolved = false;
|
|
766
|
+
let total = 0;
|
|
767
|
+
const chunks = [];
|
|
768
|
+
const source = Readable.fromWeb(toNodeReadableStream(stream, url, 'response:read-stream-buffer'));
|
|
769
|
+
const guard = new Transform({
|
|
770
|
+
transform(chunk, _encoding, callback) {
|
|
771
|
+
try {
|
|
772
|
+
const buf = Buffer.isBuffer(chunk)
|
|
773
|
+
? chunk
|
|
774
|
+
: Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
775
|
+
if (!encodingResolved) {
|
|
776
|
+
encodingResolved = true;
|
|
777
|
+
effectiveEncoding =
|
|
778
|
+
resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
|
|
779
|
+
}
|
|
780
|
+
if (isBinaryContent(buf, effectiveEncoding)) {
|
|
781
|
+
callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
const newTotal = total + buf.length;
|
|
785
|
+
if (newTotal > byteLimit) {
|
|
786
|
+
const remaining = byteLimit - total;
|
|
787
|
+
if (remaining > 0) {
|
|
788
|
+
const slice = buf.subarray(0, remaining);
|
|
789
|
+
total += remaining;
|
|
790
|
+
if (captureChunks)
|
|
791
|
+
chunks.push(slice);
|
|
792
|
+
this.push(slice);
|
|
793
|
+
}
|
|
794
|
+
callback(new MaxBytesError());
|
|
795
|
+
return;
|
|
796
|
+
}
|
|
797
|
+
total = newTotal;
|
|
798
|
+
if (captureChunks)
|
|
799
|
+
chunks.push(buf);
|
|
800
|
+
callback(null, buf);
|
|
801
|
+
}
|
|
802
|
+
catch (error) {
|
|
803
|
+
callback(toError(error));
|
|
804
|
+
}
|
|
805
|
+
},
|
|
806
|
+
});
|
|
807
|
+
const guarded = source.pipe(guard);
|
|
808
|
+
const abortHandler = () => {
|
|
809
|
+
source.destroy();
|
|
810
|
+
guard.destroy();
|
|
811
|
+
};
|
|
812
|
+
if (signal) {
|
|
813
|
+
signal.addEventListener('abort', abortHandler, { once: true });
|
|
814
|
+
}
|
|
815
|
+
try {
|
|
816
|
+
const buffer = await consumeBuffer(guarded);
|
|
817
|
+
return {
|
|
818
|
+
buffer,
|
|
819
|
+
encoding: effectiveEncoding,
|
|
820
|
+
size: total,
|
|
821
|
+
truncated: false,
|
|
822
|
+
};
|
|
1227
823
|
}
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
824
|
+
catch (error) {
|
|
825
|
+
if (signal?.aborted)
|
|
826
|
+
throw createFetchError({ kind: 'aborted' }, url);
|
|
827
|
+
if (error instanceof FetchError)
|
|
828
|
+
throw error;
|
|
829
|
+
if (error instanceof MaxBytesError) {
|
|
830
|
+
source.destroy();
|
|
831
|
+
guard.destroy();
|
|
832
|
+
return {
|
|
833
|
+
buffer: Buffer.concat(chunks, total),
|
|
834
|
+
encoding: effectiveEncoding,
|
|
835
|
+
size: total,
|
|
836
|
+
truncated: true,
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
throw error;
|
|
840
|
+
}
|
|
841
|
+
finally {
|
|
842
|
+
if (signal) {
|
|
843
|
+
signal.removeEventListener('abort', abortHandler);
|
|
844
|
+
}
|
|
1236
845
|
}
|
|
1237
|
-
return new Response(body, {
|
|
1238
|
-
status: response.status,
|
|
1239
|
-
statusText: response.statusText,
|
|
1240
|
-
headers,
|
|
1241
|
-
});
|
|
1242
|
-
}
|
|
1243
|
-
catch (error) {
|
|
1244
|
-
clearAbortListener();
|
|
1245
|
-
abortDecodePipeline();
|
|
1246
|
-
void decodedReader.cancel(error).catch(() => undefined);
|
|
1247
|
-
logDebug('Content-Encoding decode failed; using passthrough body', {
|
|
1248
|
-
url: redactUrl(url),
|
|
1249
|
-
encoding: encodingHeader ?? encodings.join(','),
|
|
1250
|
-
error: isError(error) ? error.message : String(error),
|
|
1251
|
-
});
|
|
1252
|
-
return new Response(passthroughBranch, {
|
|
1253
|
-
status: response.status,
|
|
1254
|
-
statusText: response.statusText,
|
|
1255
|
-
headers,
|
|
1256
|
-
});
|
|
1257
846
|
}
|
|
1258
847
|
}
|
|
1259
848
|
async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry, reader, maxBytes, mode, signal) {
|
|
@@ -1275,20 +864,172 @@ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry,
|
|
|
1275
864
|
telemetry.recordResponse(ctx, decodedResponse, size);
|
|
1276
865
|
return { kind: 'buffer', buffer, encoding, size, truncated };
|
|
1277
866
|
}
|
|
1278
|
-
function
|
|
1279
|
-
|
|
1280
|
-
return
|
|
867
|
+
function isReadableStreamLike(value) {
|
|
868
|
+
if (!isObject(value))
|
|
869
|
+
return false;
|
|
870
|
+
return (typeof value['getReader'] === 'function' &&
|
|
871
|
+
typeof value['cancel'] === 'function' &&
|
|
872
|
+
typeof value['tee'] === 'function' &&
|
|
873
|
+
typeof value['locked'] === 'boolean');
|
|
874
|
+
}
|
|
875
|
+
function assertReadableStreamLike(stream, url, stage) {
|
|
876
|
+
if (isReadableStreamLike(stream))
|
|
877
|
+
return;
|
|
878
|
+
throw new FetchError('Invalid response stream', url, 500, {
|
|
879
|
+
reason: 'invalid_stream',
|
|
880
|
+
stage,
|
|
881
|
+
});
|
|
882
|
+
}
|
|
883
|
+
function toNodeReadableStream(stream, url, stage) {
|
|
884
|
+
assertReadableStreamLike(stream, url, stage);
|
|
885
|
+
return stream;
|
|
886
|
+
}
|
|
887
|
+
function toWebReadableStream(stream, url, stage) {
|
|
888
|
+
const converted = Readable.toWeb(stream);
|
|
889
|
+
assertReadableStreamLike(converted, url, stage);
|
|
890
|
+
return converted;
|
|
891
|
+
}
|
|
892
|
+
const fetchChannel = diagnosticsChannel.channel('fetch-url-mcp.fetch');
|
|
893
|
+
const SLOW_REQUEST_THRESHOLD_MS = 5000;
|
|
894
|
+
class FetchTelemetry {
|
|
895
|
+
logger;
|
|
896
|
+
context;
|
|
897
|
+
redactor;
|
|
898
|
+
constructor(logger, context, redactor) {
|
|
899
|
+
this.logger = logger;
|
|
900
|
+
this.context = context;
|
|
901
|
+
this.redactor = redactor;
|
|
1281
902
|
}
|
|
1282
|
-
|
|
1283
|
-
|
|
903
|
+
redact(url) {
|
|
904
|
+
return this.redactor.redact(url);
|
|
905
|
+
}
|
|
906
|
+
contextFields(ctx) {
|
|
907
|
+
return {
|
|
908
|
+
...(ctx.contextRequestId
|
|
909
|
+
? { contextRequestId: ctx.contextRequestId }
|
|
910
|
+
: {}),
|
|
911
|
+
...(ctx.operationId ? { operationId: ctx.operationId } : {}),
|
|
912
|
+
};
|
|
913
|
+
}
|
|
914
|
+
start(url, method) {
|
|
915
|
+
const safeUrl = this.redactor.redact(url);
|
|
916
|
+
const contextRequestId = this.context.getRequestId();
|
|
917
|
+
const operationId = this.context.getOperationId();
|
|
918
|
+
const ctx = {
|
|
919
|
+
requestId: randomUUID(),
|
|
920
|
+
startTime: performance.now(),
|
|
921
|
+
url: safeUrl,
|
|
922
|
+
method: method.toUpperCase(),
|
|
923
|
+
};
|
|
924
|
+
if (contextRequestId)
|
|
925
|
+
ctx.contextRequestId = contextRequestId;
|
|
926
|
+
if (operationId)
|
|
927
|
+
ctx.operationId = operationId;
|
|
928
|
+
const ctxFields = this.contextFields(ctx);
|
|
929
|
+
this.publish({
|
|
930
|
+
v: 1,
|
|
931
|
+
type: 'start',
|
|
932
|
+
requestId: ctx.requestId,
|
|
933
|
+
method: ctx.method,
|
|
934
|
+
url: ctx.url,
|
|
935
|
+
...ctxFields,
|
|
936
|
+
});
|
|
937
|
+
this.logger.debug('HTTP Request', {
|
|
938
|
+
requestId: ctx.requestId,
|
|
939
|
+
method: ctx.method,
|
|
940
|
+
url: ctx.url,
|
|
941
|
+
...ctxFields,
|
|
942
|
+
});
|
|
943
|
+
return ctx;
|
|
944
|
+
}
|
|
945
|
+
recordResponse(context, response, contentSize) {
|
|
946
|
+
const duration = performance.now() - context.startTime;
|
|
947
|
+
const durationLabel = `${Math.round(duration)}ms`;
|
|
948
|
+
const ctxFields = this.contextFields(context);
|
|
949
|
+
this.publish({
|
|
950
|
+
v: 1,
|
|
951
|
+
type: 'end',
|
|
952
|
+
requestId: context.requestId,
|
|
953
|
+
status: response.status,
|
|
954
|
+
duration,
|
|
955
|
+
...ctxFields,
|
|
956
|
+
});
|
|
957
|
+
const contentType = response.headers.get('content-type') ?? undefined;
|
|
958
|
+
const contentLengthHeader = response.headers.get('content-length');
|
|
959
|
+
const size = contentLengthHeader ??
|
|
960
|
+
(contentSize === undefined ? undefined : String(contentSize));
|
|
961
|
+
this.logger.debug('HTTP Response', {
|
|
962
|
+
requestId: context.requestId,
|
|
963
|
+
status: response.status,
|
|
964
|
+
url: context.url,
|
|
965
|
+
duration: durationLabel,
|
|
966
|
+
...ctxFields,
|
|
967
|
+
...(contentType ? { contentType } : {}),
|
|
968
|
+
...(size ? { size } : {}),
|
|
969
|
+
});
|
|
970
|
+
if (duration > SLOW_REQUEST_THRESHOLD_MS) {
|
|
971
|
+
this.logger.warn('Slow HTTP request detected', {
|
|
972
|
+
requestId: context.requestId,
|
|
973
|
+
url: context.url,
|
|
974
|
+
duration: durationLabel,
|
|
975
|
+
...ctxFields,
|
|
976
|
+
});
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
recordError(context, error, status) {
|
|
980
|
+
const duration = performance.now() - context.startTime;
|
|
981
|
+
const err = toError(error);
|
|
982
|
+
const code = isSystemError(err) ? err.code : undefined;
|
|
983
|
+
const ctxFields = this.contextFields(context);
|
|
984
|
+
this.publish({
|
|
985
|
+
v: 1,
|
|
986
|
+
type: 'error',
|
|
987
|
+
requestId: context.requestId,
|
|
988
|
+
url: context.url,
|
|
989
|
+
error: err.message,
|
|
990
|
+
duration,
|
|
991
|
+
...(code !== undefined ? { code } : {}),
|
|
992
|
+
...(status !== undefined ? { status } : {}),
|
|
993
|
+
...ctxFields,
|
|
994
|
+
});
|
|
995
|
+
const logData = {
|
|
996
|
+
requestId: context.requestId,
|
|
997
|
+
url: context.url,
|
|
998
|
+
status,
|
|
999
|
+
code,
|
|
1000
|
+
error: err.message,
|
|
1001
|
+
...ctxFields,
|
|
1002
|
+
};
|
|
1003
|
+
if (status === 429) {
|
|
1004
|
+
this.logger.warn('HTTP Request Error', logData);
|
|
1005
|
+
return;
|
|
1006
|
+
}
|
|
1007
|
+
this.logger.error('HTTP Request Error', logData);
|
|
1008
|
+
}
|
|
1009
|
+
publish(event) {
|
|
1010
|
+
if (!fetchChannel.hasSubscribers)
|
|
1011
|
+
return;
|
|
1012
|
+
try {
|
|
1013
|
+
fetchChannel.publish(event);
|
|
1014
|
+
}
|
|
1015
|
+
catch {
|
|
1016
|
+
// Best-effort telemetry; never crash request path.
|
|
1017
|
+
}
|
|
1284
1018
|
}
|
|
1285
1019
|
}
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1020
|
+
const defaultLogger = {
|
|
1021
|
+
debug: logDebug,
|
|
1022
|
+
warn: logWarn,
|
|
1023
|
+
error: logError,
|
|
1024
|
+
};
|
|
1025
|
+
const defaultContext = {
|
|
1026
|
+
getRequestId,
|
|
1027
|
+
getOperationId,
|
|
1028
|
+
};
|
|
1029
|
+
const defaultRedactor = {
|
|
1030
|
+
redact: redactUrl,
|
|
1031
|
+
};
|
|
1032
|
+
const defaultFetch = (input, init) => globalThis.fetch(input, init);
|
|
1292
1033
|
class HttpFetcher {
|
|
1293
1034
|
fetcherConfig;
|
|
1294
1035
|
redirectFollower;
|
|
@@ -1342,6 +1083,29 @@ class HttpFetcher {
|
|
|
1342
1083
|
}
|
|
1343
1084
|
}
|
|
1344
1085
|
}
|
|
1086
|
+
const DEFAULT_HEADERS = {
|
|
1087
|
+
'User-Agent': config.fetcher.userAgent,
|
|
1088
|
+
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
1089
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
1090
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
1091
|
+
Connection: 'keep-alive',
|
|
1092
|
+
};
|
|
1093
|
+
function buildHeaders() {
|
|
1094
|
+
return DEFAULT_HEADERS;
|
|
1095
|
+
}
|
|
1096
|
+
function buildRequestSignal(timeoutMs, external) {
|
|
1097
|
+
if (timeoutMs <= 0)
|
|
1098
|
+
return external;
|
|
1099
|
+
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
1100
|
+
return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
|
|
1101
|
+
}
|
|
1102
|
+
function buildRequestInit(headers, signal) {
|
|
1103
|
+
return {
|
|
1104
|
+
method: 'GET',
|
|
1105
|
+
headers,
|
|
1106
|
+
...(signal ? { signal } : {}),
|
|
1107
|
+
};
|
|
1108
|
+
}
|
|
1345
1109
|
const ipBlocker = new IpBlocker(config.security);
|
|
1346
1110
|
const urlNormalizer = new UrlNormalizer(config.constants, config.security, ipBlocker, BLOCKED_HOST_SUFFIXES);
|
|
1347
1111
|
const rawUrlTransformer = new RawUrlTransformer(defaultLogger);
|
|
@@ -1349,7 +1113,6 @@ const dnsResolver = new SafeDnsResolver(ipBlocker, config.security, BLOCKED_HOST
|
|
|
1349
1113
|
const telemetry = new FetchTelemetry(defaultLogger, defaultContext, defaultRedactor);
|
|
1350
1114
|
const normalizeRedirectUrl = (url) => urlNormalizer.validateAndNormalize(url);
|
|
1351
1115
|
const dnsPreflight = createDnsPreflight(dnsResolver);
|
|
1352
|
-
// Redirect follower with per-hop DNS preflight.
|
|
1353
1116
|
const secureRedirectFollower = new RedirectFollower(defaultFetch, normalizeRedirectUrl, dnsPreflight);
|
|
1354
1117
|
const responseReader = new ResponseTextReader();
|
|
1355
1118
|
const httpFetcher = new HttpFetcher(config.fetcher, secureRedirectFollower, responseReader, telemetry);
|
|
@@ -1391,4 +1154,3 @@ export async function fetchNormalizedUrl(normalizedUrl, options) {
|
|
|
1391
1154
|
export async function fetchNormalizedUrlBuffer(normalizedUrl, options) {
|
|
1392
1155
|
return httpFetcher.fetchNormalizedUrlBuffer(normalizedUrl, options);
|
|
1393
1156
|
}
|
|
1394
|
-
//# sourceMappingURL=fetch.js.map
|