@j0hanz/superfetch 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.js +14 -12
- package/dist/config.js +51 -39
- package/dist/dom-noise-removal.js +4 -1
- package/dist/fetch.d.ts +1 -0
- package/dist/fetch.js +160 -97
- package/dist/http-native.js +31 -14
- package/dist/language-detection.js +28 -4
- package/dist/mcp.js +7 -1
- package/dist/tasks.d.ts +1 -0
- package/dist/tasks.js +129 -95
- package/dist/tools.d.ts +2 -0
- package/dist/tools.js +4 -3
- package/dist/transform-types.d.ts +1 -0
- package/dist/transform.js +122 -17
- package/package.json +1 -1
package/dist/cache.js
CHANGED
|
@@ -102,9 +102,12 @@ class InMemoryCacheStore {
|
|
|
102
102
|
if (!this.isEnabled())
|
|
103
103
|
return [];
|
|
104
104
|
const now = Date.now();
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
.
|
|
105
|
+
const result = [];
|
|
106
|
+
for (const [key, entry] of this.entries) {
|
|
107
|
+
if (entry.expiresAtMs > now)
|
|
108
|
+
result.push(key);
|
|
109
|
+
}
|
|
110
|
+
return result;
|
|
108
111
|
}
|
|
109
112
|
onUpdate(listener) {
|
|
110
113
|
const wrapped = (event) => {
|
|
@@ -131,7 +134,8 @@ class InMemoryCacheStore {
|
|
|
131
134
|
const entry = this.entries.get(cacheKey);
|
|
132
135
|
if (!entry)
|
|
133
136
|
return undefined;
|
|
134
|
-
|
|
137
|
+
const now = Date.now();
|
|
138
|
+
if (entry.expiresAtMs <= now) {
|
|
135
139
|
this.entries.delete(cacheKey);
|
|
136
140
|
return undefined;
|
|
137
141
|
}
|
|
@@ -157,12 +161,11 @@ class InMemoryCacheStore {
|
|
|
157
161
|
};
|
|
158
162
|
this.entries.delete(cacheKey);
|
|
159
163
|
this.entries.set(cacheKey, entry);
|
|
160
|
-
// Eviction
|
|
164
|
+
// Eviction (LRU: first insertion-order key)
|
|
161
165
|
if (this.entries.size > this.max) {
|
|
162
166
|
const firstKey = this.entries.keys().next();
|
|
163
|
-
if (!firstKey.done)
|
|
167
|
+
if (!firstKey.done)
|
|
164
168
|
this.entries.delete(firstKey.value);
|
|
165
|
-
}
|
|
166
169
|
}
|
|
167
170
|
this.notify(cacheKey);
|
|
168
171
|
}
|
|
@@ -170,9 +173,9 @@ class InMemoryCacheStore {
|
|
|
170
173
|
if (this.updateEmitter.listenerCount('update') === 0)
|
|
171
174
|
return;
|
|
172
175
|
const parts = parseCacheKey(cacheKey);
|
|
173
|
-
if (parts)
|
|
174
|
-
|
|
175
|
-
}
|
|
176
|
+
if (!parts)
|
|
177
|
+
return;
|
|
178
|
+
this.updateEmitter.emit('update', { cacheKey, ...parts });
|
|
176
179
|
}
|
|
177
180
|
logError(message, cacheKey, error) {
|
|
178
181
|
logWarn(message, {
|
|
@@ -231,7 +234,6 @@ function resolveCachedMarkdownText(raw) {
|
|
|
231
234
|
return raw;
|
|
232
235
|
}
|
|
233
236
|
export function registerCachedContentResource(server, serverIcons) {
|
|
234
|
-
// Resource Registration
|
|
235
237
|
server.registerResource('cached-content', new ResourceTemplate('superfetch://cache/{namespace}/{urlHash}', {
|
|
236
238
|
list: listCachedResources,
|
|
237
239
|
}), {
|
|
@@ -288,7 +290,7 @@ export function registerCachedContentResource(server, serverIcons) {
|
|
|
288
290
|
store.onUpdate(({ cacheKey }) => {
|
|
289
291
|
if (!server.isConnected() || !initialized)
|
|
290
292
|
return;
|
|
291
|
-
// Check capabilities via unsafe cast
|
|
293
|
+
// Check capabilities via unsafe cast (SDK limitation)
|
|
292
294
|
const capabilities = server.server.getClientCapabilities();
|
|
293
295
|
const uri = toResourceUri(cacheKey);
|
|
294
296
|
if (capabilities?.resources?.subscribe && uri && subscriptions.has(uri)) {
|
package/dist/config.js
CHANGED
|
@@ -3,16 +3,19 @@ import { findPackageJSON } from 'node:module';
|
|
|
3
3
|
import { isIP } from 'node:net';
|
|
4
4
|
import process from 'node:process';
|
|
5
5
|
import { domainToASCII } from 'node:url';
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
11
|
-
if (typeof packageJson.version !== 'string') {
|
|
12
|
-
|
|
6
|
+
function readServerVersion(moduleUrl) {
|
|
7
|
+
const packageJsonPath = findPackageJSON(moduleUrl);
|
|
8
|
+
if (!packageJsonPath)
|
|
9
|
+
throw new Error('package.json not found');
|
|
10
|
+
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
11
|
+
if (typeof packageJson.version !== 'string') {
|
|
12
|
+
throw new Error('package.json version is missing');
|
|
13
|
+
}
|
|
14
|
+
return packageJson.version;
|
|
13
15
|
}
|
|
14
|
-
export const serverVersion =
|
|
16
|
+
export const serverVersion = readServerVersion(import.meta.url);
|
|
15
17
|
const LOG_LEVELS = ['debug', 'info', 'warn', 'error'];
|
|
18
|
+
const ALLOWED_LOG_LEVELS = new Set(LOG_LEVELS);
|
|
16
19
|
const DEFAULT_HEADING_KEYWORDS = [
|
|
17
20
|
'overview',
|
|
18
21
|
'introduction',
|
|
@@ -34,6 +37,9 @@ const DEFAULT_HEADING_KEYWORDS = [
|
|
|
34
37
|
'acknowledgments',
|
|
35
38
|
'appendix',
|
|
36
39
|
];
|
|
40
|
+
class ConfigError extends Error {
|
|
41
|
+
name = 'ConfigError';
|
|
42
|
+
}
|
|
37
43
|
function isMissingEnvFileError(error) {
|
|
38
44
|
if (!error || typeof error !== 'object')
|
|
39
45
|
return false;
|
|
@@ -54,23 +60,20 @@ function loadEnvFileIfAvailable() {
|
|
|
54
60
|
}
|
|
55
61
|
loadEnvFileIfAvailable();
|
|
56
62
|
const { env } = process;
|
|
57
|
-
class ConfigError extends Error {
|
|
58
|
-
name = 'ConfigError';
|
|
59
|
-
}
|
|
60
63
|
function buildIpv4(parts) {
|
|
61
64
|
return parts.join('.');
|
|
62
65
|
}
|
|
63
|
-
function formatHostForUrl(hostname) {
|
|
64
|
-
if (hostname.includes(':') && !hostname.startsWith('['))
|
|
65
|
-
return `[${hostname}]`;
|
|
66
|
-
return hostname;
|
|
67
|
-
}
|
|
68
66
|
function stripTrailingDots(value) {
|
|
69
67
|
let result = value;
|
|
70
68
|
while (result.endsWith('.'))
|
|
71
69
|
result = result.slice(0, -1);
|
|
72
70
|
return result;
|
|
73
71
|
}
|
|
72
|
+
function formatHostForUrl(hostname) {
|
|
73
|
+
if (hostname.includes(':') && !hostname.startsWith('['))
|
|
74
|
+
return `[${hostname}]`;
|
|
75
|
+
return hostname;
|
|
76
|
+
}
|
|
74
77
|
function normalizeHostname(value) {
|
|
75
78
|
const trimmed = value.trim();
|
|
76
79
|
if (!trimmed)
|
|
@@ -86,24 +89,29 @@ function normalizeHostValue(value) {
|
|
|
86
89
|
const raw = value.trim();
|
|
87
90
|
if (!raw)
|
|
88
91
|
return null;
|
|
92
|
+
// Full URL
|
|
89
93
|
if (raw.includes('://')) {
|
|
90
94
|
if (!URL.canParse(raw))
|
|
91
95
|
return null;
|
|
92
96
|
return normalizeHostname(new URL(raw).hostname);
|
|
93
97
|
}
|
|
98
|
+
// host[:port]
|
|
94
99
|
const candidateUrl = `http://${raw}`;
|
|
95
100
|
if (URL.canParse(candidateUrl)) {
|
|
96
101
|
return normalizeHostname(new URL(candidateUrl).hostname);
|
|
97
102
|
}
|
|
98
103
|
const lowered = raw.toLowerCase();
|
|
104
|
+
// [::1]:port
|
|
99
105
|
if (lowered.startsWith('[')) {
|
|
100
106
|
const end = lowered.indexOf(']');
|
|
101
107
|
if (end === -1)
|
|
102
108
|
return null;
|
|
103
109
|
return normalizeHostname(lowered.slice(1, end));
|
|
104
110
|
}
|
|
111
|
+
// Bare IPv6
|
|
105
112
|
if (isIP(lowered) === 6)
|
|
106
113
|
return stripTrailingDots(lowered);
|
|
114
|
+
// Split host:port (single colon only)
|
|
107
115
|
const firstColon = lowered.indexOf(':');
|
|
108
116
|
if (firstColon === -1)
|
|
109
117
|
return normalizeHostname(lowered);
|
|
@@ -159,27 +167,6 @@ function normalizeLocale(value) {
|
|
|
159
167
|
return undefined;
|
|
160
168
|
return trimmed;
|
|
161
169
|
}
|
|
162
|
-
function parseUrlEnv(value, name) {
|
|
163
|
-
if (!value)
|
|
164
|
-
return undefined;
|
|
165
|
-
if (!URL.canParse(value)) {
|
|
166
|
-
throw new ConfigError(`Invalid ${name} value: ${value}`);
|
|
167
|
-
}
|
|
168
|
-
return new URL(value);
|
|
169
|
-
}
|
|
170
|
-
function readUrlEnv(name) {
|
|
171
|
-
return parseUrlEnv(env[name], name);
|
|
172
|
-
}
|
|
173
|
-
function parseAllowedHosts(envValue) {
|
|
174
|
-
const hosts = new Set();
|
|
175
|
-
for (const entry of parseList(envValue)) {
|
|
176
|
-
const normalized = normalizeHostValue(entry);
|
|
177
|
-
if (normalized)
|
|
178
|
-
hosts.add(normalized);
|
|
179
|
-
}
|
|
180
|
-
return hosts;
|
|
181
|
-
}
|
|
182
|
-
const ALLOWED_LOG_LEVELS = new Set(LOG_LEVELS);
|
|
183
170
|
function isLogLevel(value) {
|
|
184
171
|
return ALLOWED_LOG_LEVELS.has(value);
|
|
185
172
|
}
|
|
@@ -202,6 +189,26 @@ function parsePort(envValue) {
|
|
|
202
189
|
return 0;
|
|
203
190
|
return parseInteger(envValue, 3000, 1024, 65535);
|
|
204
191
|
}
|
|
192
|
+
function parseUrlEnv(value, name) {
|
|
193
|
+
if (!value)
|
|
194
|
+
return undefined;
|
|
195
|
+
if (!URL.canParse(value)) {
|
|
196
|
+
throw new ConfigError(`Invalid ${name} value: ${value}`);
|
|
197
|
+
}
|
|
198
|
+
return new URL(value);
|
|
199
|
+
}
|
|
200
|
+
function readUrlEnv(name) {
|
|
201
|
+
return parseUrlEnv(env[name], name);
|
|
202
|
+
}
|
|
203
|
+
function parseAllowedHosts(envValue) {
|
|
204
|
+
const hosts = new Set();
|
|
205
|
+
for (const entry of parseList(envValue)) {
|
|
206
|
+
const normalized = normalizeHostValue(entry);
|
|
207
|
+
if (normalized)
|
|
208
|
+
hosts.add(normalized);
|
|
209
|
+
}
|
|
210
|
+
return hosts;
|
|
211
|
+
}
|
|
205
212
|
const MAX_HTML_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
206
213
|
const MAX_INLINE_CONTENT_CHARS = 0;
|
|
207
214
|
const DEFAULT_SESSION_TTL_MS = 30 * 60 * 1000;
|
|
@@ -216,23 +223,28 @@ const DEFAULT_TOOL_TIMEOUT_MS = DEFAULT_FETCH_TIMEOUT_MS +
|
|
|
216
223
|
DEFAULT_TOOL_TIMEOUT_PADDING_MS;
|
|
217
224
|
function resolveWorkerResourceLimits() {
|
|
218
225
|
const limits = {};
|
|
226
|
+
let hasAny = false;
|
|
219
227
|
const maxOldGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_OLD_GENERATION_MB, 1);
|
|
220
228
|
const maxYoungGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_YOUNG_GENERATION_MB, 1);
|
|
221
229
|
const codeRangeSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_CODE_RANGE_MB, 1);
|
|
222
230
|
const stackSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_STACK_MB, 1);
|
|
223
231
|
if (maxOldGenerationSizeMb !== undefined) {
|
|
224
232
|
limits.maxOldGenerationSizeMb = maxOldGenerationSizeMb;
|
|
233
|
+
hasAny = true;
|
|
225
234
|
}
|
|
226
235
|
if (maxYoungGenerationSizeMb !== undefined) {
|
|
227
236
|
limits.maxYoungGenerationSizeMb = maxYoungGenerationSizeMb;
|
|
237
|
+
hasAny = true;
|
|
228
238
|
}
|
|
229
239
|
if (codeRangeSizeMb !== undefined) {
|
|
230
240
|
limits.codeRangeSizeMb = codeRangeSizeMb;
|
|
241
|
+
hasAny = true;
|
|
231
242
|
}
|
|
232
243
|
if (stackSizeMb !== undefined) {
|
|
233
244
|
limits.stackSizeMb = stackSizeMb;
|
|
245
|
+
hasAny = true;
|
|
234
246
|
}
|
|
235
|
-
return
|
|
247
|
+
return hasAny ? limits : undefined;
|
|
236
248
|
}
|
|
237
249
|
function readOAuthUrls(baseUrl) {
|
|
238
250
|
const issuerUrl = readUrlEnv('OAUTH_ISSUER_URL');
|
|
@@ -318,8 +330,8 @@ const host = (env.HOST ?? LOOPBACK_V4).trim();
|
|
|
318
330
|
const port = parsePort(env.PORT);
|
|
319
331
|
const maxConnections = parseInteger(env.SERVER_MAX_CONNECTIONS, 0, 0);
|
|
320
332
|
const blockPrivateConnections = parseBoolean(env.SERVER_BLOCK_PRIVATE_CONNECTIONS, false);
|
|
321
|
-
const baseUrl = new URL(`http://${formatHostForUrl(host)}:${port}`);
|
|
322
333
|
const allowRemote = parseBoolean(env.ALLOW_REMOTE, false);
|
|
334
|
+
const baseUrl = new URL(`http://${formatHostForUrl(host)}:${port}`);
|
|
323
335
|
const runtimeState = {
|
|
324
336
|
httpMode: false,
|
|
325
337
|
};
|
|
@@ -8,6 +8,7 @@ const DIALOG_MIN_CHARS_FOR_PRESERVATION = 500;
|
|
|
8
8
|
const NAV_FOOTER_MIN_CHARS_FOR_PRESERVATION = 500;
|
|
9
9
|
// Merged markers for fast rejection
|
|
10
10
|
const HTML_DOCUMENT_MARKERS = /<\s*(?:!doctype|html|head|body)\b/i;
|
|
11
|
+
const HTML_FRAGMENT_MARKERS = /<\s*(?:article|main|section|div|nav|footer|header|aside|table|ul|ol)\b/i;
|
|
11
12
|
// Split into smaller regexes to stay within sonarjs/regex-complexity limit
|
|
12
13
|
const NOISE_PATTERNS = [
|
|
13
14
|
/<\s*(?:script|style|noscript|iframe|nav|footer|header|form|button|input|select|textarea|svg|canvas)\b/i,
|
|
@@ -451,7 +452,9 @@ function mayContainNoise(html) {
|
|
|
451
452
|
return NOISE_PATTERNS.some((re) => re.test(sample));
|
|
452
453
|
}
|
|
453
454
|
export function removeNoiseFromHtml(html, document, baseUrl) {
|
|
454
|
-
const shouldParse = isFullDocumentHtml(html) ||
|
|
455
|
+
const shouldParse = isFullDocumentHtml(html) ||
|
|
456
|
+
mayContainNoise(html) ||
|
|
457
|
+
HTML_FRAGMENT_MARKERS.test(html);
|
|
455
458
|
if (!shouldParse)
|
|
456
459
|
return html;
|
|
457
460
|
try {
|
package/dist/fetch.d.ts
CHANGED
package/dist/fetch.js
CHANGED
|
@@ -4,7 +4,9 @@ import diagnosticsChannel from 'node:diagnostics_channel';
|
|
|
4
4
|
import dns from 'node:dns';
|
|
5
5
|
import { isIP } from 'node:net';
|
|
6
6
|
import { performance } from 'node:perf_hooks';
|
|
7
|
-
import { Readable } from 'node:stream';
|
|
7
|
+
import { PassThrough, Readable, Transform } from 'node:stream';
|
|
8
|
+
import { buffer as consumeBuffer } from 'node:stream/consumers';
|
|
9
|
+
import { finished, pipeline } from 'node:stream/promises';
|
|
8
10
|
import { setTimeout as delay } from 'node:timers/promises';
|
|
9
11
|
import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
|
|
10
12
|
import { config } from './config.js';
|
|
@@ -64,10 +66,13 @@ class UrlNormalizer {
|
|
|
64
66
|
if (trimmedUrl.length > this.constants.maxUrlLength) {
|
|
65
67
|
throw createValidationError(`URL exceeds maximum length of ${this.constants.maxUrlLength} characters`);
|
|
66
68
|
}
|
|
67
|
-
|
|
69
|
+
let url;
|
|
70
|
+
try {
|
|
71
|
+
url = new URL(trimmedUrl);
|
|
72
|
+
}
|
|
73
|
+
catch {
|
|
68
74
|
throw createValidationError('Invalid URL format');
|
|
69
75
|
}
|
|
70
|
-
const url = new URL(trimmedUrl);
|
|
71
76
|
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
72
77
|
throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
|
|
73
78
|
}
|
|
@@ -767,6 +772,11 @@ function cancelResponseBody(response) {
|
|
|
767
772
|
return;
|
|
768
773
|
void cancelPromise.catch(() => undefined);
|
|
769
774
|
}
|
|
775
|
+
class MaxBytesError extends Error {
|
|
776
|
+
constructor() {
|
|
777
|
+
super('max-bytes-reached');
|
|
778
|
+
}
|
|
779
|
+
}
|
|
770
780
|
class RedirectFollower {
|
|
771
781
|
fetchFn;
|
|
772
782
|
normalizeUrl;
|
|
@@ -821,10 +831,13 @@ class RedirectFollower {
|
|
|
821
831
|
throw createMissingRedirectLocationFetchError(currentUrl);
|
|
822
832
|
}
|
|
823
833
|
resolveRedirectTarget(baseUrl, location) {
|
|
824
|
-
|
|
834
|
+
let resolved;
|
|
835
|
+
try {
|
|
836
|
+
resolved = new URL(location, baseUrl);
|
|
837
|
+
}
|
|
838
|
+
catch {
|
|
825
839
|
throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
|
|
826
840
|
}
|
|
827
|
-
const resolved = new URL(location, baseUrl);
|
|
828
841
|
if (resolved.username || resolved.password) {
|
|
829
842
|
throw createErrorWithCode('Redirect target includes credentials', 'EBADREDIRECT');
|
|
830
843
|
}
|
|
@@ -868,6 +881,9 @@ function createDecoder(encoding) {
|
|
|
868
881
|
return new TextDecoder('utf-8');
|
|
869
882
|
}
|
|
870
883
|
}
|
|
884
|
+
function decodeBuffer(buffer, encoding) {
|
|
885
|
+
return createDecoder(encoding).decode(buffer);
|
|
886
|
+
}
|
|
871
887
|
function normalizeEncodingLabel(encoding) {
|
|
872
888
|
return encoding?.trim().toLowerCase() ?? '';
|
|
873
889
|
}
|
|
@@ -1006,102 +1022,136 @@ function isBinaryContent(buffer, encoding) {
|
|
|
1006
1022
|
}
|
|
1007
1023
|
class ResponseTextReader {
|
|
1008
1024
|
async read(response, url, maxBytes, signal, encoding) {
|
|
1009
|
-
const { buffer, encoding: effectiveEncoding } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
|
1010
|
-
const
|
|
1011
|
-
|
|
1012
|
-
return { text, size: buffer.byteLength };
|
|
1025
|
+
const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
|
1026
|
+
const text = decodeBuffer(buffer, effectiveEncoding);
|
|
1027
|
+
return { text, size: buffer.byteLength, truncated };
|
|
1013
1028
|
}
|
|
1014
1029
|
async readBuffer(response, url, maxBytes, signal, encoding) {
|
|
1015
1030
|
if (signal?.aborted) {
|
|
1016
1031
|
cancelResponseBody(response);
|
|
1017
1032
|
throw createAbortedFetchError(url);
|
|
1018
1033
|
}
|
|
1019
|
-
const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
1020
1034
|
if (!response.body) {
|
|
1021
|
-
|
|
1022
|
-
throw createCanceledFetchError(url);
|
|
1023
|
-
const arrayBuffer = await response.arrayBuffer();
|
|
1024
|
-
const length = Math.min(arrayBuffer.byteLength, limit);
|
|
1025
|
-
const buffer = new Uint8Array(arrayBuffer, 0, length);
|
|
1026
|
-
const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
|
|
1027
|
-
if (isBinaryContent(buffer, effectiveEncoding)) {
|
|
1028
|
-
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
1029
|
-
}
|
|
1030
|
-
return { buffer, encoding: effectiveEncoding, size: buffer.byteLength };
|
|
1035
|
+
return this.readNonStreamBuffer(response, url, maxBytes, signal, encoding);
|
|
1031
1036
|
}
|
|
1032
|
-
return this.readStreamToBuffer(response.body, url,
|
|
1037
|
+
return this.readStreamToBuffer(response.body, url, maxBytes, signal, encoding);
|
|
1033
1038
|
}
|
|
1034
|
-
async
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1039
|
+
async readNonStreamBuffer(response, url, maxBytes, signal, encoding) {
|
|
1040
|
+
if (signal?.aborted)
|
|
1041
|
+
throw createCanceledFetchError(url);
|
|
1042
|
+
const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
1043
|
+
const contentLengthHeader = response.headers.get('content-length');
|
|
1044
|
+
if (contentLengthHeader && Number.isFinite(limit)) {
|
|
1045
|
+
const declared = Number.parseInt(contentLengthHeader, 10);
|
|
1046
|
+
if (!Number.isNaN(declared) && declared > limit) {
|
|
1047
|
+
throw new FetchError(`Response exceeds maximum size (${limit} bytes)`, url, 413, {
|
|
1048
|
+
reason: 'max_content_length',
|
|
1049
|
+
contentLength: declared,
|
|
1050
|
+
maxBytes: limit,
|
|
1051
|
+
});
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
1055
|
+
const length = Math.min(arrayBuffer.byteLength, limit);
|
|
1056
|
+
const buffer = new Uint8Array(arrayBuffer, 0, length);
|
|
1057
|
+
const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
|
|
1058
|
+
if (isBinaryContent(buffer, effectiveEncoding)) {
|
|
1059
|
+
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
1060
|
+
}
|
|
1061
|
+
const truncated = Number.isFinite(limit)
|
|
1062
|
+
? arrayBuffer.byteLength > limit
|
|
1063
|
+
: false;
|
|
1064
|
+
return {
|
|
1065
|
+
buffer,
|
|
1066
|
+
encoding: effectiveEncoding,
|
|
1067
|
+
size: buffer.byteLength,
|
|
1068
|
+
truncated,
|
|
1069
|
+
};
|
|
1038
1070
|
}
|
|
1039
1071
|
async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1072
|
+
const byteLimit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
1073
|
+
const captureChunks = byteLimit !== Number.POSITIVE_INFINITY;
|
|
1074
|
+
let effectiveEncoding = encoding ?? 'utf-8';
|
|
1075
|
+
let checkedBinary = false;
|
|
1043
1076
|
let total = 0;
|
|
1044
|
-
const
|
|
1045
|
-
const
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
1077
|
+
const chunks = [];
|
|
1078
|
+
const source = Readable.fromWeb(stream);
|
|
1079
|
+
const guard = new Transform({
|
|
1080
|
+
transform(chunk, _encoding, callback) {
|
|
1081
|
+
try {
|
|
1082
|
+
const buf = Buffer.isBuffer(chunk)
|
|
1083
|
+
? chunk
|
|
1084
|
+
: Buffer.from(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
1085
|
+
if (!checkedBinary) {
|
|
1086
|
+
checkedBinary = true;
|
|
1087
|
+
effectiveEncoding =
|
|
1088
|
+
resolveEncoding(encoding, buf) ?? encoding ?? 'utf-8';
|
|
1089
|
+
if (isBinaryContent(buf, effectiveEncoding)) {
|
|
1090
|
+
callback(new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' }));
|
|
1091
|
+
return;
|
|
1092
|
+
}
|
|
1061
1093
|
}
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1094
|
+
const newTotal = total + buf.length;
|
|
1095
|
+
if (newTotal > byteLimit) {
|
|
1096
|
+
const remaining = byteLimit - total;
|
|
1097
|
+
if (remaining > 0) {
|
|
1098
|
+
const slice = buf.subarray(0, remaining);
|
|
1099
|
+
total += remaining;
|
|
1100
|
+
if (captureChunks)
|
|
1101
|
+
chunks.push(slice);
|
|
1102
|
+
this.push(slice);
|
|
1103
|
+
}
|
|
1104
|
+
callback(new MaxBytesError());
|
|
1105
|
+
return;
|
|
1069
1106
|
}
|
|
1070
|
-
|
|
1071
|
-
|
|
1107
|
+
total = newTotal;
|
|
1108
|
+
if (captureChunks)
|
|
1109
|
+
chunks.push(buf);
|
|
1110
|
+
callback(null, buf);
|
|
1072
1111
|
}
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
}
|
|
1077
|
-
}
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1112
|
+
catch (error) {
|
|
1113
|
+
callback(error instanceof Error ? error : new Error(String(error)));
|
|
1114
|
+
}
|
|
1115
|
+
},
|
|
1116
|
+
});
|
|
1117
|
+
const guarded = source.pipe(guard);
|
|
1118
|
+
const abortHandler = () => {
|
|
1119
|
+
source.destroy();
|
|
1120
|
+
guard.destroy();
|
|
1121
|
+
};
|
|
1122
|
+
if (signal) {
|
|
1123
|
+
signal.addEventListener('abort', abortHandler, { once: true });
|
|
1081
1124
|
}
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1125
|
+
try {
|
|
1126
|
+
const buffer = await consumeBuffer(guarded);
|
|
1127
|
+
return {
|
|
1128
|
+
buffer,
|
|
1129
|
+
encoding: effectiveEncoding,
|
|
1130
|
+
size: total,
|
|
1131
|
+
truncated: false,
|
|
1132
|
+
};
|
|
1085
1133
|
}
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1134
|
+
catch (error) {
|
|
1135
|
+
if (signal?.aborted)
|
|
1136
|
+
throw createAbortedFetchError(url);
|
|
1137
|
+
if (error instanceof FetchError)
|
|
1138
|
+
throw error;
|
|
1139
|
+
if (error instanceof MaxBytesError) {
|
|
1140
|
+
source.destroy();
|
|
1141
|
+
guard.destroy();
|
|
1142
|
+
return {
|
|
1143
|
+
buffer: Buffer.concat(chunks, total),
|
|
1144
|
+
encoding: effectiveEncoding,
|
|
1145
|
+
size: total,
|
|
1146
|
+
truncated: true,
|
|
1147
|
+
};
|
|
1148
|
+
}
|
|
1094
1149
|
throw error;
|
|
1095
|
-
if (signal?.aborted)
|
|
1096
|
-
throw createAbortedFetchError(url);
|
|
1097
|
-
throw error;
|
|
1098
|
-
}
|
|
1099
|
-
async cancelReaderQuietly(reader) {
|
|
1100
|
-
try {
|
|
1101
|
-
await reader.cancel();
|
|
1102
1150
|
}
|
|
1103
|
-
|
|
1104
|
-
|
|
1151
|
+
finally {
|
|
1152
|
+
if (signal) {
|
|
1153
|
+
signal.removeEventListener('abort', abortHandler);
|
|
1154
|
+
}
|
|
1105
1155
|
}
|
|
1106
1156
|
}
|
|
1107
1157
|
}
|
|
@@ -1169,8 +1219,12 @@ function isTextLikeMediaType(mediaType) {
|
|
|
1169
1219
|
}
|
|
1170
1220
|
function assertSupportedContentType(contentType, url) {
|
|
1171
1221
|
const mediaType = resolveMediaType(contentType);
|
|
1172
|
-
if (!mediaType)
|
|
1222
|
+
if (!mediaType) {
|
|
1223
|
+
logDebug('No Content-Type header; relying on binary-content detection', {
|
|
1224
|
+
url: redactUrl(url),
|
|
1225
|
+
});
|
|
1173
1226
|
return;
|
|
1227
|
+
}
|
|
1174
1228
|
if (!isTextLikeMediaType(mediaType)) {
|
|
1175
1229
|
throw new FetchError(`Unsupported content type: ${mediaType}`, url);
|
|
1176
1230
|
}
|
|
@@ -1327,7 +1381,8 @@ async function decodeResponseIfNeeded(response, url, signal) {
|
|
|
1327
1381
|
throw createUnsupportedContentEncodingError(url, encodingHeader ?? encoding);
|
|
1328
1382
|
}
|
|
1329
1383
|
const sourceStream = Readable.fromWeb(createPumpedStream(initialChunk, reader));
|
|
1330
|
-
const decodedNodeStream =
|
|
1384
|
+
const decodedNodeStream = new PassThrough();
|
|
1385
|
+
const pipelinePromise = pipeline(sourceStream, decompressor, decodedNodeStream);
|
|
1331
1386
|
const abortHandler = () => {
|
|
1332
1387
|
sourceStream.destroy();
|
|
1333
1388
|
decompressor.destroy();
|
|
@@ -1336,15 +1391,15 @@ async function decodeResponseIfNeeded(response, url, signal) {
|
|
|
1336
1391
|
if (signal) {
|
|
1337
1392
|
signal.addEventListener('abort', abortHandler, { once: true });
|
|
1338
1393
|
}
|
|
1394
|
+
void pipelinePromise.catch((error) => {
|
|
1395
|
+
decodedNodeStream.destroy(error instanceof Error ? error : new Error(String(error)));
|
|
1396
|
+
});
|
|
1339
1397
|
const decodedBody = Readable.toWeb(decodedNodeStream);
|
|
1340
1398
|
const headers = new Headers(response.headers);
|
|
1341
1399
|
headers.delete('content-encoding');
|
|
1342
1400
|
headers.delete('content-length');
|
|
1343
1401
|
if (signal) {
|
|
1344
|
-
decodedNodeStream
|
|
1345
|
-
signal.removeEventListener('abort', abortHandler);
|
|
1346
|
-
});
|
|
1347
|
-
decodedNodeStream.once('error', () => {
|
|
1402
|
+
void finished(decodedNodeStream, { cleanup: true }).finally(() => {
|
|
1348
1403
|
signal.removeEventListener('abort', abortHandler);
|
|
1349
1404
|
});
|
|
1350
1405
|
}
|
|
@@ -1365,19 +1420,21 @@ async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry,
|
|
|
1365
1420
|
assertSupportedContentType(contentType, finalUrl);
|
|
1366
1421
|
const declaredEncoding = getCharsetFromContentType(contentType ?? null);
|
|
1367
1422
|
if (mode === 'text') {
|
|
1368
|
-
const { text, size } = await reader.read(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
|
|
1423
|
+
const { text, size, truncated } = await reader.read(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
|
|
1369
1424
|
telemetry.recordResponse(ctx, decodedResponse, size);
|
|
1370
|
-
return { kind: 'text', text, size };
|
|
1425
|
+
return { kind: 'text', text, size, truncated };
|
|
1371
1426
|
}
|
|
1372
|
-
const { buffer, encoding, size } = await reader.readBuffer(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
|
|
1427
|
+
const { buffer, encoding, size, truncated } = await reader.readBuffer(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
|
|
1373
1428
|
telemetry.recordResponse(ctx, decodedResponse, size);
|
|
1374
|
-
return { kind: 'buffer', buffer, encoding, size };
|
|
1429
|
+
return { kind: 'buffer', buffer, encoding, size, truncated };
|
|
1375
1430
|
}
|
|
1376
1431
|
function extractHostname(url) {
|
|
1377
|
-
|
|
1432
|
+
try {
|
|
1433
|
+
return new URL(url).hostname;
|
|
1434
|
+
}
|
|
1435
|
+
catch {
|
|
1378
1436
|
throw createErrorWithCode('Invalid URL', 'EINVAL');
|
|
1379
1437
|
}
|
|
1380
|
-
return new URL(url).hostname;
|
|
1381
1438
|
}
|
|
1382
1439
|
function createDnsPreflight(dnsResolver) {
|
|
1383
1440
|
return async (url, signal) => {
|
|
@@ -1418,7 +1475,11 @@ class HttpFetcher {
|
|
|
1418
1475
|
const payload = await readAndRecordDecodedResponse(response, finalUrl, ctx, this.telemetry, this.reader, this.fetcherConfig.maxContentLength, mode, init.signal ?? undefined);
|
|
1419
1476
|
if (payload.kind === 'text')
|
|
1420
1477
|
return payload.text;
|
|
1421
|
-
return {
|
|
1478
|
+
return {
|
|
1479
|
+
buffer: payload.buffer,
|
|
1480
|
+
encoding: payload.encoding,
|
|
1481
|
+
truncated: payload.truncated,
|
|
1482
|
+
};
|
|
1422
1483
|
}
|
|
1423
1484
|
catch (error) {
|
|
1424
1485
|
const mapped = mapFetchError(error, normalizedUrl, timeoutMs);
|
|
@@ -1468,11 +1529,13 @@ export async function fetchWithRedirects(url, init, maxRedirects) {
|
|
|
1468
1529
|
}
|
|
1469
1530
|
export async function readResponseText(response, url, maxBytes, signal, encoding) {
|
|
1470
1531
|
const decodedResponse = await decodeResponseIfNeeded(response, url, signal);
|
|
1471
|
-
|
|
1532
|
+
const { text, size } = await responseReader.read(decodedResponse, url, maxBytes, signal, encoding);
|
|
1533
|
+
return { text, size };
|
|
1472
1534
|
}
|
|
1473
1535
|
export async function readResponseBuffer(response, url, maxBytes, signal, encoding) {
|
|
1474
1536
|
const decodedResponse = await decodeResponseIfNeeded(response, url, signal);
|
|
1475
|
-
|
|
1537
|
+
const { buffer, encoding: resolvedEncoding, size, } = await responseReader.readBuffer(decodedResponse, url, maxBytes, signal, encoding);
|
|
1538
|
+
return { buffer, encoding: resolvedEncoding, size };
|
|
1476
1539
|
}
|
|
1477
1540
|
export async function fetchNormalizedUrl(normalizedUrl, options) {
|
|
1478
1541
|
return httpFetcher.fetchNormalizedUrl(normalizedUrl, options);
|