@j0hanz/fetch-url-mcp 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/AGENTS.md +3 -2
- package/dist/cache.js +5 -2
- package/dist/cli.js +12 -5
- package/dist/config.js +19 -18
- package/dist/crypto.js +7 -4
- package/dist/dom-noise-removal.js +14 -11
- package/dist/errors.js +5 -2
- package/dist/fetch-content.d.ts +4 -0
- package/dist/fetch-content.js +163 -0
- package/dist/fetch-stream.d.ts +4 -0
- package/dist/fetch-stream.js +28 -0
- package/dist/fetch.js +7 -181
- package/dist/host-normalization.js +7 -6
- package/dist/http-native.js +6 -4
- package/dist/index.js +7 -4
- package/dist/ip-blocklist.js +3 -3
- package/dist/json.js +3 -2
- package/dist/language-detection.js +1 -5
- package/dist/markdown-cleanup.js +8 -5
- package/dist/mcp-validator.js +7 -8
- package/dist/mcp.js +32 -17
- package/dist/observability.js +9 -8
- package/dist/prompts.js +2 -10
- package/dist/resources.js +6 -4
- package/dist/server-tuning.js +20 -15
- package/dist/server.js +4 -8
- package/dist/session.js +7 -4
- package/dist/tasks.js +4 -4
- package/dist/timer-utils.js +4 -1
- package/dist/tools.js +8 -6
- package/dist/transform.js +6 -3
- package/dist/type-guards.js +3 -3
- package/dist/workers/transform-child.js +8 -4
- package/dist/workers/transform-worker.js +4 -3
- package/package.json +2 -1
package/dist/AGENTS.md
CHANGED
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
- `tests/` — Unit/integration tests (46+ test files) using Node.js built-in test runner
|
|
35
35
|
- `scripts/` — Build & test orchestration (`tasks.mjs`)
|
|
36
36
|
- `assets/` — Server icon (`logo.svg`)
|
|
37
|
-
- `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → test → build → publish to npm, MCP Registry, Docker)
|
|
37
|
+
- `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → type-check:tests → test → build → publish to npm, MCP Registry, Docker)
|
|
38
38
|
|
|
39
39
|
> Ignore: `dist/`, `node_modules/`, `coverage/`, `.cache/`, `.tsbuildinfo`
|
|
40
40
|
|
|
@@ -49,6 +49,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
|
|
|
49
49
|
- **Start:** `npm run start` → `node dist/index.js` (see `package.json`)
|
|
50
50
|
- **Build:** `npm run build` → `node scripts/tasks.mjs build` — cleans `dist/`, compiles TS, validates `instructions.md`, copies assets, sets executable bit (see `scripts/tasks.mjs`, `package.json`)
|
|
51
51
|
- **Type-check:** `npm run type-check` → `tsc -p tsconfig.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
|
|
52
|
+
- **Type-check (tests):** `npm run type-check:tests` → build output + `tsc -p tsconfig.tests.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
|
|
52
53
|
- **Lint:** `npm run lint` → `eslint .` (see `package.json`, `.github/workflows/release.yml`)
|
|
53
54
|
- **Lint (fix):** `npm run lint:fix` → `eslint . --fix` (see `package.json`)
|
|
54
55
|
- **Format:** `npm run format` → `prettier --write .` (see `package.json`)
|
|
@@ -135,7 +136,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
|
|
|
135
136
|
- Config values temporarily overridden per test with `try/finally` cleanup (observed in `tests/fetch-url-tool.test.ts`)
|
|
136
137
|
- Worker pool shutdown in `after()` hooks for clean teardown (observed in `tests/fetch-url-tool.test.ts`)
|
|
137
138
|
- No external services (DB/containers) required for tests
|
|
138
|
-
- **CI validation order:** `lint` → `type-check` → `test` → `build` (see `.github/workflows/release.yml`)
|
|
139
|
+
- **CI validation order:** `lint` → `type-check` → `type-check:tests` → `test` → `build` (see `.github/workflows/release.yml`)
|
|
139
140
|
|
|
140
141
|
## 7) Common Pitfalls (Verified Only)
|
|
141
142
|
|
package/dist/cache.js
CHANGED
|
@@ -93,13 +93,16 @@ class InMemoryCacheStore {
|
|
|
93
93
|
isEnabled() {
|
|
94
94
|
return config.cache.enabled;
|
|
95
95
|
}
|
|
96
|
+
isExpired(entry, now = Date.now()) {
|
|
97
|
+
return entry.expiresAtMs <= now;
|
|
98
|
+
}
|
|
96
99
|
keys() {
|
|
97
100
|
if (!this.isEnabled())
|
|
98
101
|
return [];
|
|
99
102
|
const now = Date.now();
|
|
100
103
|
const result = [];
|
|
101
104
|
for (const [key, entry] of this.entries) {
|
|
102
|
-
if (entry
|
|
105
|
+
if (!this.isExpired(entry, now))
|
|
103
106
|
result.push(key);
|
|
104
107
|
}
|
|
105
108
|
return result;
|
|
@@ -130,7 +133,7 @@ class InMemoryCacheStore {
|
|
|
130
133
|
if (!entry)
|
|
131
134
|
return undefined;
|
|
132
135
|
const now = Date.now();
|
|
133
|
-
if (entry
|
|
136
|
+
if (this.isExpired(entry, now)) {
|
|
134
137
|
this.delete(cacheKey);
|
|
135
138
|
this.notify(cacheKey, true);
|
|
136
139
|
return undefined;
|
package/dist/cli.js
CHANGED
|
@@ -19,6 +19,17 @@ const optionSchema = {
|
|
|
19
19
|
function toErrorMessage(error) {
|
|
20
20
|
return error instanceof Error ? error.message : String(error);
|
|
21
21
|
}
|
|
22
|
+
function toBoolean(value) {
|
|
23
|
+
return value === true;
|
|
24
|
+
}
|
|
25
|
+
function buildCliValues(values) {
|
|
26
|
+
const { stdio, help, version } = values;
|
|
27
|
+
return {
|
|
28
|
+
stdio: toBoolean(stdio),
|
|
29
|
+
help: toBoolean(help),
|
|
30
|
+
version: toBoolean(version),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
22
33
|
export function renderCliUsage() {
|
|
23
34
|
return `${usageLines.join('\n')}\n`;
|
|
24
35
|
}
|
|
@@ -32,11 +43,7 @@ export function parseCliArgs(args) {
|
|
|
32
43
|
});
|
|
33
44
|
return {
|
|
34
45
|
ok: true,
|
|
35
|
-
values:
|
|
36
|
-
stdio: values.stdio,
|
|
37
|
-
help: values.help,
|
|
38
|
-
version: values.version,
|
|
39
|
-
},
|
|
46
|
+
values: buildCliValues(values),
|
|
40
47
|
};
|
|
41
48
|
}
|
|
42
49
|
catch (error) {
|
package/dist/config.js
CHANGED
|
@@ -233,24 +233,25 @@ const RESOLVED_TASKS_MAX_PER_OWNER = Math.min(DEFAULT_TASKS_MAX_PER_OWNER, DEFAU
|
|
|
233
233
|
function resolveWorkerResourceLimits() {
|
|
234
234
|
const limits = {};
|
|
235
235
|
let hasAny = false;
|
|
236
|
-
const
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
236
|
+
const entries = [
|
|
237
|
+
[
|
|
238
|
+
'maxOldGenerationSizeMb',
|
|
239
|
+
parseOptionalInteger(env['TRANSFORM_WORKER_MAX_OLD_GENERATION_MB'], 1),
|
|
240
|
+
],
|
|
241
|
+
[
|
|
242
|
+
'maxYoungGenerationSizeMb',
|
|
243
|
+
parseOptionalInteger(env['TRANSFORM_WORKER_MAX_YOUNG_GENERATION_MB'], 1),
|
|
244
|
+
],
|
|
245
|
+
[
|
|
246
|
+
'codeRangeSizeMb',
|
|
247
|
+
parseOptionalInteger(env['TRANSFORM_WORKER_CODE_RANGE_MB'], 1),
|
|
248
|
+
],
|
|
249
|
+
['stackSizeMb', parseOptionalInteger(env['TRANSFORM_WORKER_STACK_MB'], 1)],
|
|
250
|
+
];
|
|
251
|
+
for (const [key, value] of entries) {
|
|
252
|
+
if (value === undefined)
|
|
253
|
+
continue;
|
|
254
|
+
limits[key] = value;
|
|
254
255
|
hasAny = true;
|
|
255
256
|
}
|
|
256
257
|
return hasAny ? limits : undefined;
|
package/dist/crypto.js
CHANGED
|
@@ -18,6 +18,11 @@ function assertAllowedAlgorithm(algorithm) {
|
|
|
18
18
|
throw new Error(`Hash algorithm not allowed: ${algorithm}`);
|
|
19
19
|
}
|
|
20
20
|
}
|
|
21
|
+
function padBuffer(buffer, length) {
|
|
22
|
+
const padded = Buffer.alloc(length);
|
|
23
|
+
buffer.copy(padded);
|
|
24
|
+
return padded;
|
|
25
|
+
}
|
|
21
26
|
export function timingSafeEqualUtf8(a, b) {
|
|
22
27
|
const aBuffer = Buffer.from(a, 'utf8');
|
|
23
28
|
const bBuffer = Buffer.from(b, 'utf8');
|
|
@@ -26,10 +31,8 @@ export function timingSafeEqualUtf8(a, b) {
|
|
|
26
31
|
}
|
|
27
32
|
// Avoid early return timing differences on length mismatch.
|
|
28
33
|
const maxLength = Math.max(aBuffer.length, bBuffer.length);
|
|
29
|
-
const paddedA =
|
|
30
|
-
const paddedB =
|
|
31
|
-
aBuffer.copy(paddedA);
|
|
32
|
-
bBuffer.copy(paddedB);
|
|
34
|
+
const paddedA = padBuffer(aBuffer, maxLength);
|
|
35
|
+
const paddedB = padBuffer(bBuffer, maxLength);
|
|
33
36
|
return timingSafeEqual(paddedA, paddedB) && aBuffer.length === bBuffer.length;
|
|
34
37
|
}
|
|
35
38
|
function hashHex(algorithm, input) {
|
|
@@ -103,22 +103,25 @@ function buildTokenRegex(tokens) {
|
|
|
103
103
|
return NO_MATCH_REGEX;
|
|
104
104
|
return new RegExp(`(?:^|[^a-z0-9])(?:${[...tokens].map(escapeRegexLiteral).join('|')})(?:$|[^a-z0-9])`, 'i');
|
|
105
105
|
}
|
|
106
|
+
function addTokens(target, tokens) {
|
|
107
|
+
for (const token of tokens)
|
|
108
|
+
target.add(token);
|
|
109
|
+
}
|
|
106
110
|
function getPromoMatchers(currentConfig, flags) {
|
|
107
111
|
const baseTokens = new Set(PROMO_TOKENS_ALWAYS);
|
|
108
112
|
const aggressiveTokens = new Set();
|
|
109
113
|
if (currentConfig.aggressiveMode) {
|
|
110
|
-
|
|
111
|
-
|
|
114
|
+
addTokens(aggressiveTokens, PROMO_TOKENS_AGGRESSIVE);
|
|
115
|
+
}
|
|
116
|
+
if (flags.cookieBanners) {
|
|
117
|
+
addTokens(baseTokens, PROMO_TOKENS_BY_CATEGORY['cookie-banners']);
|
|
118
|
+
}
|
|
119
|
+
if (flags.newsletters) {
|
|
120
|
+
addTokens(baseTokens, PROMO_TOKENS_BY_CATEGORY['newsletters']);
|
|
121
|
+
}
|
|
122
|
+
if (flags.socialShare) {
|
|
123
|
+
addTokens(baseTokens, PROMO_TOKENS_BY_CATEGORY['social-share']);
|
|
112
124
|
}
|
|
113
|
-
if (flags.cookieBanners)
|
|
114
|
-
for (const t of PROMO_TOKENS_BY_CATEGORY['cookie-banners'])
|
|
115
|
-
baseTokens.add(t);
|
|
116
|
-
if (flags.newsletters)
|
|
117
|
-
for (const t of PROMO_TOKENS_BY_CATEGORY['newsletters'])
|
|
118
|
-
baseTokens.add(t);
|
|
119
|
-
if (flags.socialShare)
|
|
120
|
-
for (const t of PROMO_TOKENS_BY_CATEGORY['social-share'])
|
|
121
|
-
baseTokens.add(t);
|
|
122
125
|
for (const t of currentConfig.extraTokens) {
|
|
123
126
|
const n = t.toLowerCase().trim();
|
|
124
127
|
if (n)
|
package/dist/errors.js
CHANGED
|
@@ -19,17 +19,20 @@ export class FetchError extends Error {
|
|
|
19
19
|
export function getErrorMessage(error) {
|
|
20
20
|
if (isError(error))
|
|
21
21
|
return error.message;
|
|
22
|
-
if (
|
|
22
|
+
if (isNonEmptyString(error))
|
|
23
23
|
return error;
|
|
24
24
|
if (isErrorWithMessage(error))
|
|
25
25
|
return error.message;
|
|
26
26
|
return formatUnknownError(error);
|
|
27
27
|
}
|
|
28
|
+
function isNonEmptyString(value) {
|
|
29
|
+
return typeof value === 'string' && value.length > 0;
|
|
30
|
+
}
|
|
28
31
|
function isErrorWithMessage(error) {
|
|
29
32
|
if (!isObject(error))
|
|
30
33
|
return false;
|
|
31
34
|
const { message } = error;
|
|
32
|
-
return
|
|
35
|
+
return isNonEmptyString(message);
|
|
33
36
|
}
|
|
34
37
|
function formatUnknownError(error) {
|
|
35
38
|
if (error === null || error === undefined)
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export declare function getCharsetFromContentType(contentType: string | null): string | undefined;
|
|
2
|
+
export declare function decodeBuffer(buffer: Uint8Array, encoding: string): string;
|
|
3
|
+
export declare function resolveEncoding(declaredEncoding: string | undefined, sample: Uint8Array): string | undefined;
|
|
4
|
+
export declare function isBinaryContent(buffer: Uint8Array, encoding?: string): boolean;
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
export function getCharsetFromContentType(contentType) {
|
|
3
|
+
if (!contentType)
|
|
4
|
+
return undefined;
|
|
5
|
+
const match = /charset=([^;]+)/i.exec(contentType);
|
|
6
|
+
const charsetGroup = match?.[1];
|
|
7
|
+
if (!charsetGroup)
|
|
8
|
+
return undefined;
|
|
9
|
+
let charset = charsetGroup.trim();
|
|
10
|
+
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
11
|
+
charset = charset.slice(1, -1);
|
|
12
|
+
}
|
|
13
|
+
return charset.trim();
|
|
14
|
+
}
|
|
15
|
+
function createDecoder(encoding) {
|
|
16
|
+
if (!encoding)
|
|
17
|
+
return new TextDecoder('utf-8');
|
|
18
|
+
try {
|
|
19
|
+
return new TextDecoder(encoding);
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return new TextDecoder('utf-8');
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
export function decodeBuffer(buffer, encoding) {
|
|
26
|
+
return createDecoder(encoding).decode(buffer);
|
|
27
|
+
}
|
|
28
|
+
function normalizeEncodingLabel(encoding) {
|
|
29
|
+
return encoding?.trim().toLowerCase() ?? '';
|
|
30
|
+
}
|
|
31
|
+
function isUnicodeWideEncoding(encoding) {
|
|
32
|
+
const normalized = normalizeEncodingLabel(encoding);
|
|
33
|
+
return (normalized.startsWith('utf-16') ||
|
|
34
|
+
normalized.startsWith('utf-32') ||
|
|
35
|
+
normalized === 'ucs-2' ||
|
|
36
|
+
normalized === 'unicodefffe' ||
|
|
37
|
+
normalized === 'unicodefeff');
|
|
38
|
+
}
|
|
39
|
+
const BOM_SIGNATURES = [
|
|
40
|
+
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
41
|
+
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
42
|
+
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
43
|
+
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
44
|
+
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
45
|
+
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
46
|
+
];
|
|
47
|
+
function startsWithBytes(buffer, signature) {
|
|
48
|
+
const sigLen = signature.length;
|
|
49
|
+
if (buffer.length < sigLen)
|
|
50
|
+
return false;
|
|
51
|
+
for (let i = 0; i < sigLen; i += 1) {
|
|
52
|
+
if (buffer[i] !== signature[i])
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
function detectBomEncoding(buffer) {
|
|
58
|
+
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
59
|
+
if (startsWithBytes(buffer, bytes))
|
|
60
|
+
return encoding;
|
|
61
|
+
}
|
|
62
|
+
return undefined;
|
|
63
|
+
}
|
|
64
|
+
function readQuotedValue(input, startIndex) {
|
|
65
|
+
const first = input[startIndex];
|
|
66
|
+
if (!first)
|
|
67
|
+
return '';
|
|
68
|
+
const quoted = first === '"' || first === "'";
|
|
69
|
+
if (quoted) {
|
|
70
|
+
const end = input.indexOf(first, startIndex + 1);
|
|
71
|
+
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
72
|
+
}
|
|
73
|
+
const tail = input.slice(startIndex);
|
|
74
|
+
const stop = tail.search(/[\s/>]/);
|
|
75
|
+
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
76
|
+
}
|
|
77
|
+
function extractHtmlCharset(headSnippet) {
|
|
78
|
+
const lower = headSnippet.toLowerCase();
|
|
79
|
+
const charsetToken = 'charset=';
|
|
80
|
+
const charsetIdx = lower.indexOf(charsetToken);
|
|
81
|
+
if (charsetIdx === -1)
|
|
82
|
+
return undefined;
|
|
83
|
+
const valueStart = charsetIdx + charsetToken.length;
|
|
84
|
+
const charset = readQuotedValue(headSnippet, valueStart);
|
|
85
|
+
return charset ? charset.toLowerCase() : undefined;
|
|
86
|
+
}
|
|
87
|
+
function extractXmlEncoding(headSnippet) {
|
|
88
|
+
const lower = headSnippet.toLowerCase();
|
|
89
|
+
const xmlStart = lower.indexOf('<?xml');
|
|
90
|
+
if (xmlStart === -1)
|
|
91
|
+
return undefined;
|
|
92
|
+
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
93
|
+
const declaration = xmlEnd === -1
|
|
94
|
+
? headSnippet.slice(xmlStart)
|
|
95
|
+
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
96
|
+
const declarationLower = declaration.toLowerCase();
|
|
97
|
+
const encodingToken = 'encoding=';
|
|
98
|
+
const encodingIdx = declarationLower.indexOf(encodingToken);
|
|
99
|
+
if (encodingIdx === -1)
|
|
100
|
+
return undefined;
|
|
101
|
+
const valueStart = encodingIdx + encodingToken.length;
|
|
102
|
+
const encoding = readQuotedValue(declaration, valueStart);
|
|
103
|
+
return encoding ? encoding.toLowerCase() : undefined;
|
|
104
|
+
}
|
|
105
|
+
function detectHtmlDeclaredEncoding(buffer) {
|
|
106
|
+
const scanSize = Math.min(buffer.length, 8_192);
|
|
107
|
+
if (scanSize === 0)
|
|
108
|
+
return undefined;
|
|
109
|
+
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
110
|
+
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
111
|
+
}
|
|
112
|
+
export function resolveEncoding(declaredEncoding, sample) {
|
|
113
|
+
const bomEncoding = detectBomEncoding(sample);
|
|
114
|
+
if (bomEncoding)
|
|
115
|
+
return bomEncoding;
|
|
116
|
+
if (declaredEncoding)
|
|
117
|
+
return declaredEncoding;
|
|
118
|
+
return detectHtmlDeclaredEncoding(sample);
|
|
119
|
+
}
|
|
120
|
+
const BINARY_SIGNATURES = [
|
|
121
|
+
[0x25, 0x50, 0x44, 0x46],
|
|
122
|
+
[0x89, 0x50, 0x4e, 0x47],
|
|
123
|
+
[0x47, 0x49, 0x46, 0x38],
|
|
124
|
+
[0xff, 0xd8, 0xff],
|
|
125
|
+
[0x52, 0x49, 0x46, 0x46],
|
|
126
|
+
[0x42, 0x4d],
|
|
127
|
+
[0x49, 0x49, 0x2a, 0x00],
|
|
128
|
+
[0x4d, 0x4d, 0x00, 0x2a],
|
|
129
|
+
[0x00, 0x00, 0x01, 0x00],
|
|
130
|
+
[0x50, 0x4b, 0x03, 0x04],
|
|
131
|
+
[0x1f, 0x8b],
|
|
132
|
+
[0x42, 0x5a, 0x68],
|
|
133
|
+
[0x52, 0x61, 0x72, 0x21],
|
|
134
|
+
[0x37, 0x7a, 0xbc, 0xaf],
|
|
135
|
+
[0x7f, 0x45, 0x4c, 0x46],
|
|
136
|
+
[0x4d, 0x5a],
|
|
137
|
+
[0xcf, 0xfa, 0xed, 0xfe],
|
|
138
|
+
[0x00, 0x61, 0x73, 0x6d],
|
|
139
|
+
[0x1a, 0x45, 0xdf, 0xa3],
|
|
140
|
+
[0x66, 0x74, 0x79, 0x70],
|
|
141
|
+
[0x46, 0x4c, 0x56],
|
|
142
|
+
[0x49, 0x44, 0x33],
|
|
143
|
+
[0xff, 0xfb],
|
|
144
|
+
[0xff, 0xfa],
|
|
145
|
+
[0x4f, 0x67, 0x67, 0x53],
|
|
146
|
+
[0x66, 0x4c, 0x61, 0x43],
|
|
147
|
+
[0x4d, 0x54, 0x68, 0x64],
|
|
148
|
+
[0x77, 0x4f, 0x46, 0x46],
|
|
149
|
+
[0x00, 0x01, 0x00, 0x00],
|
|
150
|
+
[0x4f, 0x54, 0x54, 0x4f],
|
|
151
|
+
[0x53, 0x51, 0x4c, 0x69],
|
|
152
|
+
];
|
|
153
|
+
function hasNullByte(buffer, limit) {
|
|
154
|
+
const checkLen = Math.min(buffer.length, limit);
|
|
155
|
+
return buffer.subarray(0, checkLen).includes(0x00);
|
|
156
|
+
}
|
|
157
|
+
export function isBinaryContent(buffer, encoding) {
|
|
158
|
+
for (const signature of BINARY_SIGNATURES) {
|
|
159
|
+
if (startsWithBytes(buffer, signature))
|
|
160
|
+
return true;
|
|
161
|
+
}
|
|
162
|
+
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
163
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import type { ReadableStream as NodeReadableStream } from 'node:stream/web';
|
|
3
|
+
export declare function toNodeReadableStream(stream: ReadableStream<Uint8Array>, url: string, stage: string): NodeReadableStream<Uint8Array>;
|
|
4
|
+
export declare function toWebReadableStream(stream: Readable, url: string, stage: string): ReadableStream<Uint8Array>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { FetchError } from './errors.js';
|
|
3
|
+
import { isObject } from './type-guards.js';
|
|
4
|
+
function isReadableStreamLike(value) {
|
|
5
|
+
if (!isObject(value))
|
|
6
|
+
return false;
|
|
7
|
+
return (typeof value['getReader'] === 'function' &&
|
|
8
|
+
typeof value['cancel'] === 'function' &&
|
|
9
|
+
typeof value['tee'] === 'function' &&
|
|
10
|
+
typeof value['locked'] === 'boolean');
|
|
11
|
+
}
|
|
12
|
+
function assertReadableStreamLike(stream, url, stage) {
|
|
13
|
+
if (isReadableStreamLike(stream))
|
|
14
|
+
return;
|
|
15
|
+
throw new FetchError('Invalid response stream', url, 500, {
|
|
16
|
+
reason: 'invalid_stream',
|
|
17
|
+
stage,
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
export function toNodeReadableStream(stream, url, stage) {
|
|
21
|
+
assertReadableStreamLike(stream, url, stage);
|
|
22
|
+
return stream;
|
|
23
|
+
}
|
|
24
|
+
export function toWebReadableStream(stream, url, stage) {
|
|
25
|
+
const converted = Readable.toWeb(stream);
|
|
26
|
+
assertReadableStreamLike(converted, url, stage);
|
|
27
|
+
return converted;
|
|
28
|
+
}
|
package/dist/fetch.js
CHANGED
|
@@ -10,6 +10,8 @@ import { finished, pipeline } from 'node:stream/promises';
|
|
|
10
10
|
import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
|
|
11
11
|
import { config } from './config.js';
|
|
12
12
|
import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
|
|
13
|
+
import { decodeBuffer, getCharsetFromContentType, isBinaryContent, resolveEncoding, } from './fetch-content.js';
|
|
14
|
+
import { toNodeReadableStream, toWebReadableStream } from './fetch-stream.js';
|
|
13
15
|
import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
|
|
14
16
|
import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
|
|
15
17
|
import { isError, isObject } from './type-guards.js';
|
|
@@ -26,23 +28,6 @@ const defaultRedactor = {
|
|
|
26
28
|
redact: redactUrl,
|
|
27
29
|
};
|
|
28
30
|
const defaultFetch = (input, init) => globalThis.fetch(input, init);
|
|
29
|
-
function assertReadableStreamLike(stream, url, stage) {
|
|
30
|
-
if (isObject(stream) && typeof stream['getReader'] === 'function')
|
|
31
|
-
return;
|
|
32
|
-
throw new FetchError('Invalid response stream', url, 500, {
|
|
33
|
-
reason: 'invalid_stream',
|
|
34
|
-
stage,
|
|
35
|
-
});
|
|
36
|
-
}
|
|
37
|
-
function toNodeReadableStream(stream, url, stage) {
|
|
38
|
-
assertReadableStreamLike(stream, url, stage);
|
|
39
|
-
return stream;
|
|
40
|
-
}
|
|
41
|
-
function toWebReadableStream(stream, url, stage) {
|
|
42
|
-
const converted = Readable.toWeb(stream);
|
|
43
|
-
assertReadableStreamLike(converted, url, stage);
|
|
44
|
-
return converted;
|
|
45
|
-
}
|
|
46
31
|
class IpBlocker {
|
|
47
32
|
security;
|
|
48
33
|
blockList = createDefaultBlockList();
|
|
@@ -569,8 +554,11 @@ function createTooManyRedirectsFetchError(url) {
|
|
|
569
554
|
function createMissingRedirectLocationFetchError(url) {
|
|
570
555
|
return new FetchError('Redirect response missing Location header', url);
|
|
571
556
|
}
|
|
557
|
+
function buildNetworkErrorMessage(url) {
|
|
558
|
+
return `Network error: Could not reach ${url}`;
|
|
559
|
+
}
|
|
572
560
|
function createNetworkFetchError(url, message) {
|
|
573
|
-
return new FetchError(
|
|
561
|
+
return new FetchError(buildNetworkErrorMessage(url), url, undefined, message ? { message } : {});
|
|
574
562
|
}
|
|
575
563
|
function createUnknownFetchError(url, message) {
|
|
576
564
|
return new FetchError(message, url);
|
|
@@ -619,7 +607,7 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
|
619
607
|
code === 'EINVAL') {
|
|
620
608
|
return new FetchError(error.message, url, 400, { code });
|
|
621
609
|
}
|
|
622
|
-
return new FetchError(
|
|
610
|
+
return new FetchError(buildNetworkErrorMessage(url), url, undefined, {
|
|
623
611
|
code,
|
|
624
612
|
message: error.message,
|
|
625
613
|
});
|
|
@@ -868,168 +856,6 @@ class RedirectFollower {
|
|
|
868
856
|
}
|
|
869
857
|
}
|
|
870
858
|
}
|
|
871
|
-
function getCharsetFromContentType(contentType) {
|
|
872
|
-
if (!contentType)
|
|
873
|
-
return undefined;
|
|
874
|
-
const match = /charset=([^;]+)/i.exec(contentType);
|
|
875
|
-
const charsetGroup = match?.[1];
|
|
876
|
-
if (!charsetGroup)
|
|
877
|
-
return undefined;
|
|
878
|
-
let charset = charsetGroup.trim();
|
|
879
|
-
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
880
|
-
charset = charset.slice(1, -1);
|
|
881
|
-
}
|
|
882
|
-
return charset.trim();
|
|
883
|
-
}
|
|
884
|
-
function createDecoder(encoding) {
|
|
885
|
-
if (!encoding)
|
|
886
|
-
return new TextDecoder('utf-8');
|
|
887
|
-
try {
|
|
888
|
-
return new TextDecoder(encoding);
|
|
889
|
-
}
|
|
890
|
-
catch {
|
|
891
|
-
return new TextDecoder('utf-8');
|
|
892
|
-
}
|
|
893
|
-
}
|
|
894
|
-
function decodeBuffer(buffer, encoding) {
|
|
895
|
-
return createDecoder(encoding).decode(buffer);
|
|
896
|
-
}
|
|
897
|
-
function normalizeEncodingLabel(encoding) {
|
|
898
|
-
return encoding?.trim().toLowerCase() ?? '';
|
|
899
|
-
}
|
|
900
|
-
function isUnicodeWideEncoding(encoding) {
|
|
901
|
-
const normalized = normalizeEncodingLabel(encoding);
|
|
902
|
-
return (normalized.startsWith('utf-16') ||
|
|
903
|
-
normalized.startsWith('utf-32') ||
|
|
904
|
-
normalized === 'ucs-2' ||
|
|
905
|
-
normalized === 'unicodefffe' ||
|
|
906
|
-
normalized === 'unicodefeff');
|
|
907
|
-
}
|
|
908
|
-
const BOM_SIGNATURES = [
|
|
909
|
-
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
910
|
-
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
911
|
-
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
912
|
-
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
913
|
-
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
914
|
-
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
915
|
-
];
|
|
916
|
-
function detectBomEncoding(buffer) {
|
|
917
|
-
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
918
|
-
if (startsWithBytes(buffer, bytes))
|
|
919
|
-
return encoding;
|
|
920
|
-
}
|
|
921
|
-
return undefined;
|
|
922
|
-
}
|
|
923
|
-
function readQuotedValue(input, startIndex) {
|
|
924
|
-
const first = input[startIndex];
|
|
925
|
-
if (!first)
|
|
926
|
-
return '';
|
|
927
|
-
const quoted = first === '"' || first === "'";
|
|
928
|
-
if (quoted) {
|
|
929
|
-
const end = input.indexOf(first, startIndex + 1);
|
|
930
|
-
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
931
|
-
}
|
|
932
|
-
const tail = input.slice(startIndex);
|
|
933
|
-
const stop = tail.search(/[\s/>]/);
|
|
934
|
-
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
935
|
-
}
|
|
936
|
-
function extractHtmlCharset(headSnippet) {
|
|
937
|
-
const lower = headSnippet.toLowerCase();
|
|
938
|
-
const charsetToken = 'charset=';
|
|
939
|
-
const charsetIdx = lower.indexOf(charsetToken);
|
|
940
|
-
if (charsetIdx === -1)
|
|
941
|
-
return undefined;
|
|
942
|
-
const valueStart = charsetIdx + charsetToken.length;
|
|
943
|
-
const charset = readQuotedValue(headSnippet, valueStart);
|
|
944
|
-
return charset ? charset.toLowerCase() : undefined;
|
|
945
|
-
}
|
|
946
|
-
function extractXmlEncoding(headSnippet) {
|
|
947
|
-
const lower = headSnippet.toLowerCase();
|
|
948
|
-
const xmlStart = lower.indexOf('<?xml');
|
|
949
|
-
if (xmlStart === -1)
|
|
950
|
-
return undefined;
|
|
951
|
-
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
952
|
-
const declaration = xmlEnd === -1
|
|
953
|
-
? headSnippet.slice(xmlStart)
|
|
954
|
-
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
955
|
-
const declarationLower = declaration.toLowerCase();
|
|
956
|
-
const encodingToken = 'encoding=';
|
|
957
|
-
const encodingIdx = declarationLower.indexOf(encodingToken);
|
|
958
|
-
if (encodingIdx === -1)
|
|
959
|
-
return undefined;
|
|
960
|
-
const valueStart = encodingIdx + encodingToken.length;
|
|
961
|
-
const encoding = readQuotedValue(declaration, valueStart);
|
|
962
|
-
return encoding ? encoding.toLowerCase() : undefined;
|
|
963
|
-
}
|
|
964
|
-
function detectHtmlDeclaredEncoding(buffer) {
|
|
965
|
-
const scanSize = Math.min(buffer.length, 8_192);
|
|
966
|
-
if (scanSize === 0)
|
|
967
|
-
return undefined;
|
|
968
|
-
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
969
|
-
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
970
|
-
}
|
|
971
|
-
function resolveEncoding(declaredEncoding, sample) {
|
|
972
|
-
const bomEncoding = detectBomEncoding(sample);
|
|
973
|
-
if (bomEncoding)
|
|
974
|
-
return bomEncoding;
|
|
975
|
-
if (declaredEncoding)
|
|
976
|
-
return declaredEncoding;
|
|
977
|
-
return detectHtmlDeclaredEncoding(sample);
|
|
978
|
-
}
|
|
979
|
-
const BINARY_SIGNATURES = [
|
|
980
|
-
[0x25, 0x50, 0x44, 0x46],
|
|
981
|
-
[0x89, 0x50, 0x4e, 0x47],
|
|
982
|
-
[0x47, 0x49, 0x46, 0x38],
|
|
983
|
-
[0xff, 0xd8, 0xff],
|
|
984
|
-
[0x52, 0x49, 0x46, 0x46],
|
|
985
|
-
[0x42, 0x4d],
|
|
986
|
-
[0x49, 0x49, 0x2a, 0x00],
|
|
987
|
-
[0x4d, 0x4d, 0x00, 0x2a],
|
|
988
|
-
[0x00, 0x00, 0x01, 0x00],
|
|
989
|
-
[0x50, 0x4b, 0x03, 0x04],
|
|
990
|
-
[0x1f, 0x8b],
|
|
991
|
-
[0x42, 0x5a, 0x68],
|
|
992
|
-
[0x52, 0x61, 0x72, 0x21],
|
|
993
|
-
[0x37, 0x7a, 0xbc, 0xaf],
|
|
994
|
-
[0x7f, 0x45, 0x4c, 0x46],
|
|
995
|
-
[0x4d, 0x5a],
|
|
996
|
-
[0xcf, 0xfa, 0xed, 0xfe],
|
|
997
|
-
[0x00, 0x61, 0x73, 0x6d],
|
|
998
|
-
[0x1a, 0x45, 0xdf, 0xa3],
|
|
999
|
-
[0x66, 0x74, 0x79, 0x70],
|
|
1000
|
-
[0x46, 0x4c, 0x56],
|
|
1001
|
-
[0x49, 0x44, 0x33],
|
|
1002
|
-
[0xff, 0xfb],
|
|
1003
|
-
[0xff, 0xfa],
|
|
1004
|
-
[0x4f, 0x67, 0x67, 0x53],
|
|
1005
|
-
[0x66, 0x4c, 0x61, 0x43],
|
|
1006
|
-
[0x4d, 0x54, 0x68, 0x64],
|
|
1007
|
-
[0x77, 0x4f, 0x46, 0x46],
|
|
1008
|
-
[0x00, 0x01, 0x00, 0x00],
|
|
1009
|
-
[0x4f, 0x54, 0x54, 0x4f],
|
|
1010
|
-
[0x53, 0x51, 0x4c, 0x69],
|
|
1011
|
-
];
|
|
1012
|
-
function startsWithBytes(buffer, signature) {
|
|
1013
|
-
const sigLen = signature.length;
|
|
1014
|
-
if (buffer.length < sigLen)
|
|
1015
|
-
return false;
|
|
1016
|
-
for (let i = 0; i < sigLen; i += 1) {
|
|
1017
|
-
if (buffer[i] !== signature[i])
|
|
1018
|
-
return false;
|
|
1019
|
-
}
|
|
1020
|
-
return true;
|
|
1021
|
-
}
|
|
1022
|
-
function hasNullByte(buffer, limit) {
|
|
1023
|
-
const checkLen = Math.min(buffer.length, limit);
|
|
1024
|
-
return buffer.subarray(0, checkLen).includes(0x00);
|
|
1025
|
-
}
|
|
1026
|
-
function isBinaryContent(buffer, encoding) {
|
|
1027
|
-
for (const signature of BINARY_SIGNATURES) {
|
|
1028
|
-
if (startsWithBytes(buffer, signature))
|
|
1029
|
-
return true;
|
|
1030
|
-
}
|
|
1031
|
-
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
1032
|
-
}
|
|
1033
859
|
class ResponseTextReader {
|
|
1034
860
|
async read(response, url, maxBytes, signal, encoding) {
|
|
1035
861
|
const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|