@j0hanz/fetch-url-mcp 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -0
- package/dist/AGENTS.md +3 -2
- package/dist/examples/mcp-fetch-url-client.js +313 -0
- package/dist/examples/mcp-fetch-url-client.js.map +1 -0
- package/dist/fetch-content.d.ts +4 -0
- package/dist/fetch-content.js +163 -0
- package/dist/fetch-stream.d.ts +4 -0
- package/dist/fetch-stream.js +28 -0
- package/dist/fetch.js +61 -255
- package/dist/tools.js +28 -6
- package/dist/transform.js +3 -7
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -52,6 +52,7 @@ URL → Validate → DNS Preflight → HTTP Fetch → Decompress
|
|
|
52
52
|
```text
|
|
53
53
|
fetch-url-mcp/
|
|
54
54
|
├── assets/ # Server icon (logo.svg)
|
|
55
|
+
├── examples/ # Client examples
|
|
55
56
|
├── scripts/ # Build & test orchestration
|
|
56
57
|
├── src/
|
|
57
58
|
│ ├── workers/ # Worker-thread child for HTML transforms
|
|
@@ -95,6 +96,23 @@ Add to your MCP client configuration:
|
|
|
95
96
|
}
|
|
96
97
|
```
|
|
97
98
|
|
|
99
|
+
## Client Example (CLI)
|
|
100
|
+
|
|
101
|
+
Build the server and examples, then run the client:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
npm run build
|
|
105
|
+
node dist/examples/mcp-fetch-url-client.js https://example.com
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Optional flags:
|
|
109
|
+
|
|
110
|
+
- `--full` reads the cached markdown resource to avoid inline truncation.
|
|
111
|
+
- `--task` enables task-based execution with streamed status updates.
|
|
112
|
+
- `--task-ttl <ms>` sets task TTL; `--task-poll <ms>` sets poll interval.
|
|
113
|
+
- `--http http://localhost:3000/mcp` connects to the Streamable HTTP server.
|
|
114
|
+
- Progress updates (when emitted) are printed to stderr.
|
|
115
|
+
|
|
98
116
|
## Installation
|
|
99
117
|
|
|
100
118
|
### NPX (Recommended)
|
package/dist/AGENTS.md
CHANGED
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
- `tests/` — Unit/integration tests (46+ test files) using Node.js built-in test runner
|
|
35
35
|
- `scripts/` — Build & test orchestration (`tasks.mjs`)
|
|
36
36
|
- `assets/` — Server icon (`logo.svg`)
|
|
37
|
-
- `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → test → build → publish to npm, MCP Registry, Docker)
|
|
37
|
+
- `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → type-check:tests → test → build → publish to npm, MCP Registry, Docker)
|
|
38
38
|
|
|
39
39
|
> Ignore: `dist/`, `node_modules/`, `coverage/`, `.cache/`, `.tsbuildinfo`
|
|
40
40
|
|
|
@@ -49,6 +49,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
|
|
|
49
49
|
- **Start:** `npm run start` → `node dist/index.js` (see `package.json`)
|
|
50
50
|
- **Build:** `npm run build` → `node scripts/tasks.mjs build` — cleans `dist/`, compiles TS, validates `instructions.md`, copies assets, sets executable bit (see `scripts/tasks.mjs`, `package.json`)
|
|
51
51
|
- **Type-check:** `npm run type-check` → `tsc -p tsconfig.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
|
|
52
|
+
- **Type-check (tests):** `npm run type-check:tests` → build output + `tsc -p tsconfig.tests.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
|
|
52
53
|
- **Lint:** `npm run lint` → `eslint .` (see `package.json`, `.github/workflows/release.yml`)
|
|
53
54
|
- **Lint (fix):** `npm run lint:fix` → `eslint . --fix` (see `package.json`)
|
|
54
55
|
- **Format:** `npm run format` → `prettier --write .` (see `package.json`)
|
|
@@ -135,7 +136,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
|
|
|
135
136
|
- Config values temporarily overridden per test with `try/finally` cleanup (observed in `tests/fetch-url-tool.test.ts`)
|
|
136
137
|
- Worker pool shutdown in `after()` hooks for clean teardown (observed in `tests/fetch-url-tool.test.ts`)
|
|
137
138
|
- No external services (DB/containers) required for tests
|
|
138
|
-
- **CI validation order:** `lint` → `type-check` → `test` → `build` (see `.github/workflows/release.yml`)
|
|
139
|
+
- **CI validation order:** `lint` → `type-check` → `type-check:tests` → `test` → `build` (see `.github/workflows/release.yml`)
|
|
139
140
|
|
|
140
141
|
## 7) Common Pitfalls (Verified Only)
|
|
141
142
|
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
import { access, writeFile } from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import process from 'node:process';
|
|
4
|
+
import { fileURLToPath } from 'node:url';
|
|
5
|
+
import { parseArgs } from 'node:util';
|
|
6
|
+
import { Client } from '@modelcontextprotocol/sdk/client';
|
|
7
|
+
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
|
8
|
+
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
|
|
9
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
10
|
+
const __dirname = path.dirname(__filename);
|
|
11
|
+
function printUsage() {
|
|
12
|
+
const usage = `
|
|
13
|
+
Usage:
|
|
14
|
+
node dist/examples/mcp-fetch-url-client.js <url> [options]
|
|
15
|
+
|
|
16
|
+
Options:
|
|
17
|
+
--http <url> Connect via Streamable HTTP (e.g. http://localhost:3000/mcp)
|
|
18
|
+
--task Use task-based execution with streamed status updates
|
|
19
|
+
--task-ttl <ms> Task TTL in milliseconds (optional)
|
|
20
|
+
--task-poll <ms> Task poll interval in milliseconds (optional)
|
|
21
|
+
--no-noise Skip noise removal
|
|
22
|
+
--force Force refresh (bypass cache)
|
|
23
|
+
--max-inline <n> Max inline chars before truncation
|
|
24
|
+
--full If truncated, read cached resource for full markdown
|
|
25
|
+
--out <path> Write markdown to file instead of stdout
|
|
26
|
+
--json Print full structured JSON instead of markdown
|
|
27
|
+
--cmd <executable> Stdio: command to spawn (default: node)
|
|
28
|
+
--server <path> Stdio: server entry (default: dist/index.js)
|
|
29
|
+
--cwd <path> Stdio: working directory for server (default: repo root)
|
|
30
|
+
--env KEY=VALUE Stdio: add/override environment variable (repeatable)
|
|
31
|
+
-h, --help Show help
|
|
32
|
+
`;
|
|
33
|
+
process.stderr.write(usage);
|
|
34
|
+
}
|
|
35
|
+
async function findRepoRoot(startDir) {
|
|
36
|
+
let current = startDir;
|
|
37
|
+
while (true) {
|
|
38
|
+
const candidate = path.join(current, 'package.json');
|
|
39
|
+
try {
|
|
40
|
+
await access(candidate);
|
|
41
|
+
return current;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
const parent = path.dirname(current);
|
|
45
|
+
if (parent === current) {
|
|
46
|
+
return startDir;
|
|
47
|
+
}
|
|
48
|
+
current = parent;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
function parseEnvOverrides(values) {
|
|
53
|
+
const env = {};
|
|
54
|
+
if (!values) {
|
|
55
|
+
return env;
|
|
56
|
+
}
|
|
57
|
+
for (const item of values) {
|
|
58
|
+
const index = item.indexOf('=');
|
|
59
|
+
if (index <= 0) {
|
|
60
|
+
throw new Error(`Invalid --env value: ${item}`);
|
|
61
|
+
}
|
|
62
|
+
const key = item.slice(0, index).trim();
|
|
63
|
+
const value = item.slice(index + 1);
|
|
64
|
+
if (!key) {
|
|
65
|
+
throw new Error(`Invalid --env key in: ${item}`);
|
|
66
|
+
}
|
|
67
|
+
env[key] = value;
|
|
68
|
+
}
|
|
69
|
+
return env;
|
|
70
|
+
}
|
|
71
|
+
function buildInheritedEnv() {
|
|
72
|
+
const env = {};
|
|
73
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
74
|
+
if (typeof value === 'string') {
|
|
75
|
+
env[key] = value;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return env;
|
|
79
|
+
}
|
|
80
|
+
function pickTextResource(contents) {
|
|
81
|
+
for (const entry of contents) {
|
|
82
|
+
const text = entry['text'];
|
|
83
|
+
if (typeof text === 'string') {
|
|
84
|
+
return text;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
function formatProgress(progress) {
|
|
90
|
+
const { message, total } = progress;
|
|
91
|
+
if (typeof total === 'number' && total > 0) {
|
|
92
|
+
const percent = Math.round((progress.progress / total) * 100);
|
|
93
|
+
return `${percent}%${message ? ` ${message}` : ''}`;
|
|
94
|
+
}
|
|
95
|
+
return message ? `${progress.progress} ${message}` : `${progress.progress}`;
|
|
96
|
+
}
|
|
97
|
+
function getStructuredContent(result) {
|
|
98
|
+
if (typeof result !== 'object' || result === null) {
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
const candidate = result;
|
|
102
|
+
if (candidate.structuredContent &&
|
|
103
|
+
typeof candidate.structuredContent === 'object' &&
|
|
104
|
+
!Array.isArray(candidate.structuredContent)) {
|
|
105
|
+
return candidate.structuredContent;
|
|
106
|
+
}
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
function isToolError(result) {
|
|
110
|
+
if (typeof result !== 'object' || result === null) {
|
|
111
|
+
return false;
|
|
112
|
+
}
|
|
113
|
+
const candidate = result;
|
|
114
|
+
return candidate.isError === true;
|
|
115
|
+
}
|
|
116
|
+
function getStringField(structured, key) {
|
|
117
|
+
if (!structured) {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
const value = structured[key];
|
|
121
|
+
return typeof value === 'string' ? value : null;
|
|
122
|
+
}
|
|
123
|
+
const options = {
|
|
124
|
+
help: { type: 'boolean', short: 'h' },
|
|
125
|
+
http: { type: 'string' },
|
|
126
|
+
task: { type: 'boolean' },
|
|
127
|
+
'task-ttl': { type: 'string' },
|
|
128
|
+
'task-poll': { type: 'string' },
|
|
129
|
+
'no-noise': { type: 'boolean' },
|
|
130
|
+
force: { type: 'boolean' },
|
|
131
|
+
'max-inline': { type: 'string' },
|
|
132
|
+
full: { type: 'boolean' },
|
|
133
|
+
out: { type: 'string' },
|
|
134
|
+
json: { type: 'boolean' },
|
|
135
|
+
cmd: { type: 'string' },
|
|
136
|
+
server: { type: 'string' },
|
|
137
|
+
cwd: { type: 'string' },
|
|
138
|
+
env: { type: 'string', multiple: true },
|
|
139
|
+
};
|
|
140
|
+
const { values, positionals } = parseArgs({
|
|
141
|
+
allowPositionals: true,
|
|
142
|
+
options,
|
|
143
|
+
});
|
|
144
|
+
if (values.help) {
|
|
145
|
+
printUsage();
|
|
146
|
+
process.exit(0);
|
|
147
|
+
}
|
|
148
|
+
function requireUrl(positionals) {
|
|
149
|
+
const url = positionals[0];
|
|
150
|
+
if (typeof url !== 'string' || url.length === 0) {
|
|
151
|
+
printUsage();
|
|
152
|
+
throw new Error('Missing URL.');
|
|
153
|
+
}
|
|
154
|
+
return url;
|
|
155
|
+
}
|
|
156
|
+
const targetUrl = requireUrl(positionals);
|
|
157
|
+
const maxInlineRaw = values['max-inline'];
|
|
158
|
+
const maxInlineChars = maxInlineRaw !== undefined ? Number(maxInlineRaw) : undefined;
|
|
159
|
+
if (maxInlineRaw !== undefined && Number.isNaN(maxInlineChars)) {
|
|
160
|
+
throw new Error(`Invalid --max-inline value: ${maxInlineRaw}`);
|
|
161
|
+
}
|
|
162
|
+
const taskTtlRaw = values['task-ttl'];
|
|
163
|
+
const taskTtl = taskTtlRaw !== undefined ? Number(taskTtlRaw) : undefined;
|
|
164
|
+
if (taskTtlRaw !== undefined && Number.isNaN(taskTtl)) {
|
|
165
|
+
throw new Error(`Invalid --task-ttl value: ${taskTtlRaw}`);
|
|
166
|
+
}
|
|
167
|
+
const taskPollRaw = values['task-poll'];
|
|
168
|
+
const taskPoll = taskPollRaw !== undefined ? Number(taskPollRaw) : undefined;
|
|
169
|
+
if (taskPollRaw !== undefined && Number.isNaN(taskPoll)) {
|
|
170
|
+
throw new Error(`Invalid --task-poll value: ${taskPollRaw}`);
|
|
171
|
+
}
|
|
172
|
+
const onProgress = (progress) => {
|
|
173
|
+
process.stderr.write(`[progress] ${formatProgress(progress)}\n`);
|
|
174
|
+
};
|
|
175
|
+
async function run() {
|
|
176
|
+
let transport = null;
|
|
177
|
+
const client = new Client({ name: 'fetch-url-mcp-client', version: '0.1.0' }, { capabilities: {} });
|
|
178
|
+
try {
|
|
179
|
+
if (values.http) {
|
|
180
|
+
const endpoint = new URL(values.http);
|
|
181
|
+
transport = new StreamableHTTPClientTransport(endpoint);
|
|
182
|
+
}
|
|
183
|
+
else {
|
|
184
|
+
const command = values.cmd ?? process.execPath;
|
|
185
|
+
const repoRoot = await findRepoRoot(__dirname);
|
|
186
|
+
const serverPath = values.server ?? path.join(repoRoot, 'dist/index.js');
|
|
187
|
+
const cwd = values.cwd ?? repoRoot;
|
|
188
|
+
try {
|
|
189
|
+
await access(serverPath);
|
|
190
|
+
}
|
|
191
|
+
catch {
|
|
192
|
+
throw new Error(`Server entry not found at ${serverPath}. Run \"npm run build\" first or set --server.`);
|
|
193
|
+
}
|
|
194
|
+
const env = {
|
|
195
|
+
...buildInheritedEnv(),
|
|
196
|
+
...parseEnvOverrides(values.env),
|
|
197
|
+
};
|
|
198
|
+
transport = new StdioClientTransport({
|
|
199
|
+
command,
|
|
200
|
+
args: [serverPath, '--stdio'],
|
|
201
|
+
cwd,
|
|
202
|
+
env,
|
|
203
|
+
stderr: 'inherit',
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
await client.connect(transport);
|
|
207
|
+
const toolArguments = {
|
|
208
|
+
url: targetUrl,
|
|
209
|
+
skipNoiseRemoval: values['no-noise'] ?? false,
|
|
210
|
+
forceRefresh: values.force ?? false,
|
|
211
|
+
};
|
|
212
|
+
if (typeof maxInlineChars === 'number') {
|
|
213
|
+
toolArguments.maxInlineChars = maxInlineChars;
|
|
214
|
+
}
|
|
215
|
+
const taskOptions = {};
|
|
216
|
+
if (typeof taskTtl === 'number') {
|
|
217
|
+
taskOptions.ttl = taskTtl;
|
|
218
|
+
}
|
|
219
|
+
if (typeof taskPoll === 'number') {
|
|
220
|
+
taskOptions.pollInterval = taskPoll;
|
|
221
|
+
}
|
|
222
|
+
const requestOptions = values.task
|
|
223
|
+
? { onprogress: onProgress, task: taskOptions }
|
|
224
|
+
: { onprogress: onProgress };
|
|
225
|
+
const result = values.task
|
|
226
|
+
? await callToolStream(client, toolArguments, requestOptions)
|
|
227
|
+
: await client.callTool({
|
|
228
|
+
name: 'fetch-url',
|
|
229
|
+
arguments: toolArguments,
|
|
230
|
+
}, undefined, requestOptions);
|
|
231
|
+
if (isToolError(result)) {
|
|
232
|
+
const errorPayload = getStructuredContent(result) ?? {
|
|
233
|
+
message: 'Fetch failed',
|
|
234
|
+
};
|
|
235
|
+
process.stderr.write(`${JSON.stringify(errorPayload, null, 2)}\n`);
|
|
236
|
+
process.exitCode = 2;
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
const structured = getStructuredContent(result);
|
|
240
|
+
if (values.json) {
|
|
241
|
+
const payload = structured ?? result;
|
|
242
|
+
process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
let markdown = getStringField(structured, 'markdown');
|
|
246
|
+
if (values.full) {
|
|
247
|
+
const cacheResourceUri = getStringField(structured, 'cacheResourceUri');
|
|
248
|
+
if (cacheResourceUri) {
|
|
249
|
+
const resource = await client.readResource({
|
|
250
|
+
uri: cacheResourceUri,
|
|
251
|
+
});
|
|
252
|
+
const text = pickTextResource(resource.contents);
|
|
253
|
+
if (text) {
|
|
254
|
+
markdown = text;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
if (!markdown) {
|
|
259
|
+
throw new Error('No markdown returned from fetch-url.');
|
|
260
|
+
}
|
|
261
|
+
if (values.out) {
|
|
262
|
+
await writeFile(values.out, markdown, 'utf8');
|
|
263
|
+
}
|
|
264
|
+
else {
|
|
265
|
+
process.stdout.write(`${markdown}\n`);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
finally {
|
|
269
|
+
if (transport) {
|
|
270
|
+
await transport.close();
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
async function callToolStream(client, toolArguments, requestOptions) {
|
|
275
|
+
const stream = client.experimental.tasks.callToolStream({
|
|
276
|
+
name: 'fetch-url',
|
|
277
|
+
arguments: toolArguments,
|
|
278
|
+
}, undefined, requestOptions);
|
|
279
|
+
let finalResult = null;
|
|
280
|
+
for await (const message of stream) {
|
|
281
|
+
if (message.type === 'taskCreated') {
|
|
282
|
+
process.stderr.write(`[task] created ${message.task.taskId}\n`);
|
|
283
|
+
continue;
|
|
284
|
+
}
|
|
285
|
+
if (message.type === 'taskStatus') {
|
|
286
|
+
const statusMessage = message.task.statusMessage
|
|
287
|
+
? ` ${message.task.statusMessage}`
|
|
288
|
+
: '';
|
|
289
|
+
process.stderr.write(`[task] ${message.task.status}${statusMessage}\n`);
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
if (message.type === 'result') {
|
|
293
|
+
finalResult = message.result;
|
|
294
|
+
continue;
|
|
295
|
+
}
|
|
296
|
+
if (message.type === 'error') {
|
|
297
|
+
throw new Error(message.error.message ?? 'Task failed');
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
if (finalResult === null) {
|
|
301
|
+
throw new Error('Task stream ended without a result.');
|
|
302
|
+
}
|
|
303
|
+
return finalResult;
|
|
304
|
+
}
|
|
305
|
+
try {
|
|
306
|
+
await run();
|
|
307
|
+
}
|
|
308
|
+
catch (error) {
|
|
309
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
310
|
+
process.stderr.write(`${message}\n`);
|
|
311
|
+
process.exitCode = 1;
|
|
312
|
+
}
|
|
313
|
+
//# sourceMappingURL=mcp-fetch-url-client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mcp-fetch-url-client.js","sourceRoot":"","sources":["../../examples/mcp-fetch-url-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,OAAO,MAAM,cAAc,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,MAAM,EAAE,MAAM,kCAAkC,CAAC;AAC1D,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,6BAA6B,EAAE,MAAM,oDAAoD,CAAC;AAUnG,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAE3C,SAAS,UAAU;IACjB,MAAM,KAAK,GAAG;;;;;;;;;;;;;;;;;;;;CAoBf,CAAC;IACA,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;AAC9B,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,QAAgB;IAC1C,IAAI,OAAO,GAAG,QAAQ,CAAC;IACvB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC;QACrD,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YACxB,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACrC,IAAI,MAAM,KAAK,OAAO,EAAE,CAAC;gBACvB,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,GAAG,MAAM,CAAC;QACnB,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CACxB,MAA4B;IAE5B,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,GAAG,CAAC;IACb,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAChC,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACpC,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,KAAK,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC;QACnD,CAAC;QACD,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;IACnB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,iBAAiB;IACxB,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACvD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,gBAAgB,CACvB,QAAwC;IAExC,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3B,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,QAAkB;IACxC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,QAAQ,CAAC;IACpC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,QAAQ,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;QAC9D,OAAO,GAAG,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,OAAO,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,QAAQ,IAAI,OAAO,EAAE,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,QAAQ,EAAE,CAAC;AAC9E,CAAC;AAED,SAAS,oBAAoB,CAAC,MAAe;IAC3C,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAClD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,SAAS,GAAG,MAAoB,CAAC;IACvC,IACE,SAAS,CAAC,iBAAiB;QAC3B,OAAO,SAAS,CAAC,iBAAiB,KAAK,QAAQ;QAC/C,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,iBAAiB,CAAC,EAC3C,CAAC;QACD,OAAO,SAAS,CAAC,iBAAiB,CAAC;IACrC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,WAAW,CAAC,MAAe;IAClC,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAClD,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,SAAS,GAAG,MAAoB,CAAC;IACvC,OAAO,SAAS,CAAC,OAAO,KAAK,IAAI,CAAC;AACpC,CAAC;AAED,SAAS,cAAc,CACrB,UAAoC,EACpC,GAAW;IAEX,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;AAClD,CAAC;AAED,MAAM,OAAO,GAAG;IACd,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE;IACrC,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IACxB,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IACzB,UAAU,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC9B,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC/B,UAAU,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IAC/B,KAAK,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IAC1B,YAAY,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IAChC,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IACzB,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IACvB,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IACzB,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IACvB,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC1B,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IACvB,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE;CAC/B,CAAC;AAEX,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,SAAS,CAAC;IACxC,gBAAgB,EAAE,IAAI;IACtB,OAAO;CACR,CAAC,CAAC;AAEH,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;IAChB,UAAU,EAAE,CAAC;IACb,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,SAAS,UAAU,CAAC,WAAqB;IACvC,MAAM,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IAC3B,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChD,UAAU,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,cAAc,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,SAAS,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC;AAE1C,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC;AAC1C,MAAM,cAAc,GAClB,YAAY,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAChE,IAAI,YAAY,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE,CAAC;IAC/D,MAAM,IAAI,KAAK,CAAC,+BAA+B,YAAY,EAAE,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC;AACtC,MAAM,OAAO,GAAG,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAC1E,IAAI,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;IACtD,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC;AACxC,MAAM,QAAQ,GAAG,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAC7E,IAAI,WAAW,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;IACxD,MAAM,IAAI,KAAK,CAAC,8BAA8B,WAAW,EAAE,CAAC,CAAC;AAC/D,CAAC;AAED,MAAM,UAAU,GAAG,CAAC,QAAkB,EAAQ,EAAE;IAC9C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACnE,CAAC,CAAC;AAEF,KAAK,UAAU,GAAG;IAChB,IAAI,SAAS,GAAqB,IAAI,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,MAAM,CACvB,EAAE,IAAI,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,EAClD,EAAE,YAAY,EAAE,EAAE,EAAE,CACrB,CAAC;IAEF,IAAI,CAAC;QACH,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACtC,SAAS,GAAG,IAAI,6BAA6B,CAAC,QAAQ,CAAc,CAAC;QACvE,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,IAAI,OAAO,CAAC,QAAQ,CAAC;YAC/C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC;YAC/C,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC;YACzE,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,IAAI,QAAQ,CAAC;YAEnC,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC;YAC3B,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CACb,6BAA6B,UAAU,gDAAgD,CACxF,CAAC;YACJ,CAAC;YAED,MAAM,GAAG,GAAG;gBACV,GAAG,iBAAiB,EAAE;gBACtB,GAAG,iBAAiB,CAAC,MAAM,CAAC,GAAG,CAAC;aACjC,CAAC;YAEF,SAAS,GAAG,IAAI,oBAAoB,CAAC;gBACnC,OAAO;gBACP,IAAI,EAAE,CAAC,UAAU,EAAE,SAAS,CAAC;gBAC7B,GAAG;gBACH,GAAG;gBACH,MAAM,EAAE,SAAS;aAClB,CAAc,CAAC;QAClB,CAAC;QAED,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAEhC,MAAM,aAAa,GAKf;YACF,GAAG,EAAE,SAAS;YACd,gBAAgB,EAAE,MAAM,CAAC,UAAU,CAAC,IAAI,KAAK;YAC7C,YAAY,EAAE,MAAM,CAAC,KAAK,IAAI,KAAK;SACpC,CAAC;QAEF,IAAI,OAAO,cAAc,KAAK,QAAQ,EAAE,CAAC;YACvC,aAAa,CAAC,cAAc,GAAG,cAAc,CAAC;QAChD,CAAC;QAED,MAAM,WAAW,GAA4C,EAAE,CAAC;QAChE,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;YAChC,WAAW,CAAC,GAAG,GAAG,OAAO,CAAC;QAC5B,CAAC;QACD,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACjC,WAAW,CAAC,YAAY,GAAG,QAAQ,CAAC;QACtC,CAAC;QAED,MAAM,cAAc,GAAG,MAAM,CAAC,IAAI;YAChC,CAAC,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE;YAC/C,CAAC,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC;QAE/B,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI;YACxB,CAAC,CAAC,MAAM,cAAc,CAAC,MAAM,EAAE,aAAa,EAAE,cAAc,CAAC;YAC7D,CAAC,CAAC,MAAM,MAAM,CAAC,QAAQ,CACnB;gBACE,IAAI,EAAE,WAAW;gBACjB,SAAS,EAAE,aAAa;aACzB,EACD,SAAS,EACT,cAAc,CACf,CAAC;QAEN,IAAI,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC;YACxB,MAAM,YAAY,GAAG,oBAAoB,CAAC,MAAM,CAAC,IAAI;gBACnD,OAAO,EAAE,cAAc;aACxB,CAAC;YACF,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;YACnE,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;YACrB,OAAO;QACT,CAAC;QAED,MAAM,UAAU,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAEhD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,MAAM,OAAO,GAAG,UAAU,IAAI,MAAM,CAAC;YACrC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;YAC9D,OAAO;QACT,CAAC;QAED,IAAI,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QACtD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,MAAM,gBAAgB,GAAG,cAAc,CAAC,UAAU,EAAE,kBAAkB,CAAC,CAAC;YACxE,IAAI,gBAAgB,EAAE,CAAC;gBACrB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC;oBACzC,GAAG,EAAE,gBAAgB;iBACtB,CAAC,CAAC;gBACH,MAAM,IAAI,GAAG,gBAAgB,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBACjD,IAAI,IAAI,EAAE,CAAC;oBACT,QAAQ,GAAG,IAAI,CAAC;gBAClB,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,MAAM,CAAC,GAAG,EAAE,CAAC;YACf,MAAM,SAAS,CAAC,MAAM,CAAC,GAAG,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAChD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,QAAQ,IAAI,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,SAAS,CAAC,KAAK,EAAE,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,cAAc,CAC3B,MAAc,EACd,aAKC,EACD,cAMC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,cAAc,CACrD;QACE,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE,aAAa;KACzB,EACD,SAAS,EACT,cAAc,CACf,CAAC;IAEF,IAAI,WAAW,GAAY,IAAI,CAAC;IAEhC,IAAI,KAAK,EAAE,MAAM,OAAO,IAAI,MAAM,EAAE,CAAC;QACnC,IAAI,OAAO,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YACnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,kBAAkB,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;YAChE,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YAClC,MAAM,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC,aAAa;gBAC9C,CAAC,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE;gBAClC,CAAC,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,aAAa,IAAI,CAAC,CAAC;YACxE,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC9B,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;YAC7B,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,IAAI,aAAa,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,IAAI,WAAW,KAAK,IAAI,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;IACzD,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,IAAI,CAAC;IACH,MAAM,GAAG,EAAE,CAAC;AACd,CAAC;AAAC,OAAO,KAAK,EAAE,CAAC;IACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACvE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACrC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;AACvB,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export declare function getCharsetFromContentType(contentType: string | null): string | undefined;
|
|
2
|
+
export declare function decodeBuffer(buffer: Uint8Array, encoding: string): string;
|
|
3
|
+
export declare function resolveEncoding(declaredEncoding: string | undefined, sample: Uint8Array): string | undefined;
|
|
4
|
+
export declare function isBinaryContent(buffer: Uint8Array, encoding?: string): boolean;
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
2
|
+
export function getCharsetFromContentType(contentType) {
|
|
3
|
+
if (!contentType)
|
|
4
|
+
return undefined;
|
|
5
|
+
const match = /charset=([^;]+)/i.exec(contentType);
|
|
6
|
+
const charsetGroup = match?.[1];
|
|
7
|
+
if (!charsetGroup)
|
|
8
|
+
return undefined;
|
|
9
|
+
let charset = charsetGroup.trim();
|
|
10
|
+
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
11
|
+
charset = charset.slice(1, -1);
|
|
12
|
+
}
|
|
13
|
+
return charset.trim();
|
|
14
|
+
}
|
|
15
|
+
function createDecoder(encoding) {
|
|
16
|
+
if (!encoding)
|
|
17
|
+
return new TextDecoder('utf-8');
|
|
18
|
+
try {
|
|
19
|
+
return new TextDecoder(encoding);
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return new TextDecoder('utf-8');
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
export function decodeBuffer(buffer, encoding) {
|
|
26
|
+
return createDecoder(encoding).decode(buffer);
|
|
27
|
+
}
|
|
28
|
+
function normalizeEncodingLabel(encoding) {
|
|
29
|
+
return encoding?.trim().toLowerCase() ?? '';
|
|
30
|
+
}
|
|
31
|
+
function isUnicodeWideEncoding(encoding) {
|
|
32
|
+
const normalized = normalizeEncodingLabel(encoding);
|
|
33
|
+
return (normalized.startsWith('utf-16') ||
|
|
34
|
+
normalized.startsWith('utf-32') ||
|
|
35
|
+
normalized === 'ucs-2' ||
|
|
36
|
+
normalized === 'unicodefffe' ||
|
|
37
|
+
normalized === 'unicodefeff');
|
|
38
|
+
}
|
|
39
|
+
const BOM_SIGNATURES = [
|
|
40
|
+
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
41
|
+
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
42
|
+
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
43
|
+
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
44
|
+
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
45
|
+
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
46
|
+
];
|
|
47
|
+
function startsWithBytes(buffer, signature) {
|
|
48
|
+
const sigLen = signature.length;
|
|
49
|
+
if (buffer.length < sigLen)
|
|
50
|
+
return false;
|
|
51
|
+
for (let i = 0; i < sigLen; i += 1) {
|
|
52
|
+
if (buffer[i] !== signature[i])
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
function detectBomEncoding(buffer) {
|
|
58
|
+
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
59
|
+
if (startsWithBytes(buffer, bytes))
|
|
60
|
+
return encoding;
|
|
61
|
+
}
|
|
62
|
+
return undefined;
|
|
63
|
+
}
|
|
64
|
+
function readQuotedValue(input, startIndex) {
|
|
65
|
+
const first = input[startIndex];
|
|
66
|
+
if (!first)
|
|
67
|
+
return '';
|
|
68
|
+
const quoted = first === '"' || first === "'";
|
|
69
|
+
if (quoted) {
|
|
70
|
+
const end = input.indexOf(first, startIndex + 1);
|
|
71
|
+
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
72
|
+
}
|
|
73
|
+
const tail = input.slice(startIndex);
|
|
74
|
+
const stop = tail.search(/[\s/>]/);
|
|
75
|
+
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
76
|
+
}
|
|
77
|
+
function extractHtmlCharset(headSnippet) {
|
|
78
|
+
const lower = headSnippet.toLowerCase();
|
|
79
|
+
const charsetToken = 'charset=';
|
|
80
|
+
const charsetIdx = lower.indexOf(charsetToken);
|
|
81
|
+
if (charsetIdx === -1)
|
|
82
|
+
return undefined;
|
|
83
|
+
const valueStart = charsetIdx + charsetToken.length;
|
|
84
|
+
const charset = readQuotedValue(headSnippet, valueStart);
|
|
85
|
+
return charset ? charset.toLowerCase() : undefined;
|
|
86
|
+
}
|
|
87
|
+
function extractXmlEncoding(headSnippet) {
|
|
88
|
+
const lower = headSnippet.toLowerCase();
|
|
89
|
+
const xmlStart = lower.indexOf('<?xml');
|
|
90
|
+
if (xmlStart === -1)
|
|
91
|
+
return undefined;
|
|
92
|
+
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
93
|
+
const declaration = xmlEnd === -1
|
|
94
|
+
? headSnippet.slice(xmlStart)
|
|
95
|
+
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
96
|
+
const declarationLower = declaration.toLowerCase();
|
|
97
|
+
const encodingToken = 'encoding=';
|
|
98
|
+
const encodingIdx = declarationLower.indexOf(encodingToken);
|
|
99
|
+
if (encodingIdx === -1)
|
|
100
|
+
return undefined;
|
|
101
|
+
const valueStart = encodingIdx + encodingToken.length;
|
|
102
|
+
const encoding = readQuotedValue(declaration, valueStart);
|
|
103
|
+
return encoding ? encoding.toLowerCase() : undefined;
|
|
104
|
+
}
|
|
105
|
+
function detectHtmlDeclaredEncoding(buffer) {
|
|
106
|
+
const scanSize = Math.min(buffer.length, 8_192);
|
|
107
|
+
if (scanSize === 0)
|
|
108
|
+
return undefined;
|
|
109
|
+
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
110
|
+
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
111
|
+
}
|
|
112
|
+
export function resolveEncoding(declaredEncoding, sample) {
|
|
113
|
+
const bomEncoding = detectBomEncoding(sample);
|
|
114
|
+
if (bomEncoding)
|
|
115
|
+
return bomEncoding;
|
|
116
|
+
if (declaredEncoding)
|
|
117
|
+
return declaredEncoding;
|
|
118
|
+
return detectHtmlDeclaredEncoding(sample);
|
|
119
|
+
}
|
|
120
|
+
const BINARY_SIGNATURES = [
|
|
121
|
+
[0x25, 0x50, 0x44, 0x46],
|
|
122
|
+
[0x89, 0x50, 0x4e, 0x47],
|
|
123
|
+
[0x47, 0x49, 0x46, 0x38],
|
|
124
|
+
[0xff, 0xd8, 0xff],
|
|
125
|
+
[0x52, 0x49, 0x46, 0x46],
|
|
126
|
+
[0x42, 0x4d],
|
|
127
|
+
[0x49, 0x49, 0x2a, 0x00],
|
|
128
|
+
[0x4d, 0x4d, 0x00, 0x2a],
|
|
129
|
+
[0x00, 0x00, 0x01, 0x00],
|
|
130
|
+
[0x50, 0x4b, 0x03, 0x04],
|
|
131
|
+
[0x1f, 0x8b],
|
|
132
|
+
[0x42, 0x5a, 0x68],
|
|
133
|
+
[0x52, 0x61, 0x72, 0x21],
|
|
134
|
+
[0x37, 0x7a, 0xbc, 0xaf],
|
|
135
|
+
[0x7f, 0x45, 0x4c, 0x46],
|
|
136
|
+
[0x4d, 0x5a],
|
|
137
|
+
[0xcf, 0xfa, 0xed, 0xfe],
|
|
138
|
+
[0x00, 0x61, 0x73, 0x6d],
|
|
139
|
+
[0x1a, 0x45, 0xdf, 0xa3],
|
|
140
|
+
[0x66, 0x74, 0x79, 0x70],
|
|
141
|
+
[0x46, 0x4c, 0x56],
|
|
142
|
+
[0x49, 0x44, 0x33],
|
|
143
|
+
[0xff, 0xfb],
|
|
144
|
+
[0xff, 0xfa],
|
|
145
|
+
[0x4f, 0x67, 0x67, 0x53],
|
|
146
|
+
[0x66, 0x4c, 0x61, 0x43],
|
|
147
|
+
[0x4d, 0x54, 0x68, 0x64],
|
|
148
|
+
[0x77, 0x4f, 0x46, 0x46],
|
|
149
|
+
[0x00, 0x01, 0x00, 0x00],
|
|
150
|
+
[0x4f, 0x54, 0x54, 0x4f],
|
|
151
|
+
[0x53, 0x51, 0x4c, 0x69],
|
|
152
|
+
];
|
|
153
|
+
function hasNullByte(buffer, limit) {
|
|
154
|
+
const checkLen = Math.min(buffer.length, limit);
|
|
155
|
+
return buffer.subarray(0, checkLen).includes(0x00);
|
|
156
|
+
}
|
|
157
|
+
export function isBinaryContent(buffer, encoding) {
|
|
158
|
+
for (const signature of BINARY_SIGNATURES) {
|
|
159
|
+
if (startsWithBytes(buffer, signature))
|
|
160
|
+
return true;
|
|
161
|
+
}
|
|
162
|
+
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
163
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import type { ReadableStream as NodeReadableStream } from 'node:stream/web';
|
|
3
|
+
export declare function toNodeReadableStream(stream: ReadableStream<Uint8Array>, url: string, stage: string): NodeReadableStream<Uint8Array>;
|
|
4
|
+
export declare function toWebReadableStream(stream: Readable, url: string, stage: string): ReadableStream<Uint8Array>;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { Readable } from 'node:stream';
|
|
2
|
+
import { FetchError } from './errors.js';
|
|
3
|
+
import { isObject } from './type-guards.js';
|
|
4
|
+
function isReadableStreamLike(value) {
|
|
5
|
+
if (!isObject(value))
|
|
6
|
+
return false;
|
|
7
|
+
return (typeof value['getReader'] === 'function' &&
|
|
8
|
+
typeof value['cancel'] === 'function' &&
|
|
9
|
+
typeof value['tee'] === 'function' &&
|
|
10
|
+
typeof value['locked'] === 'boolean');
|
|
11
|
+
}
|
|
12
|
+
function assertReadableStreamLike(stream, url, stage) {
|
|
13
|
+
if (isReadableStreamLike(stream))
|
|
14
|
+
return;
|
|
15
|
+
throw new FetchError('Invalid response stream', url, 500, {
|
|
16
|
+
reason: 'invalid_stream',
|
|
17
|
+
stage,
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
export function toNodeReadableStream(stream, url, stage) {
|
|
21
|
+
assertReadableStreamLike(stream, url, stage);
|
|
22
|
+
return stream;
|
|
23
|
+
}
|
|
24
|
+
export function toWebReadableStream(stream, url, stage) {
|
|
25
|
+
const converted = Readable.toWeb(stream);
|
|
26
|
+
assertReadableStreamLike(converted, url, stage);
|
|
27
|
+
return converted;
|
|
28
|
+
}
|
package/dist/fetch.js
CHANGED
|
@@ -10,6 +10,8 @@ import { finished, pipeline } from 'node:stream/promises';
|
|
|
10
10
|
import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
|
|
11
11
|
import { config } from './config.js';
|
|
12
12
|
import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
|
|
13
|
+
import { decodeBuffer, getCharsetFromContentType, isBinaryContent, resolveEncoding, } from './fetch-content.js';
|
|
14
|
+
import { toNodeReadableStream, toWebReadableStream } from './fetch-stream.js';
|
|
13
15
|
import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
|
|
14
16
|
import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
|
|
15
17
|
import { isError, isObject } from './type-guards.js';
|
|
@@ -26,23 +28,6 @@ const defaultRedactor = {
|
|
|
26
28
|
redact: redactUrl,
|
|
27
29
|
};
|
|
28
30
|
const defaultFetch = (input, init) => globalThis.fetch(input, init);
|
|
29
|
-
function assertReadableStreamLike(stream, url, stage) {
|
|
30
|
-
if (isObject(stream) && typeof stream['getReader'] === 'function')
|
|
31
|
-
return;
|
|
32
|
-
throw new FetchError('Invalid response stream', url, 500, {
|
|
33
|
-
reason: 'invalid_stream',
|
|
34
|
-
stage,
|
|
35
|
-
});
|
|
36
|
-
}
|
|
37
|
-
function toNodeReadableStream(stream, url, stage) {
|
|
38
|
-
assertReadableStreamLike(stream, url, stage);
|
|
39
|
-
return stream;
|
|
40
|
-
}
|
|
41
|
-
function toWebReadableStream(stream, url, stage) {
|
|
42
|
-
const converted = Readable.toWeb(stream);
|
|
43
|
-
assertReadableStreamLike(converted, url, stage);
|
|
44
|
-
return converted;
|
|
45
|
-
}
|
|
46
31
|
class IpBlocker {
|
|
47
32
|
security;
|
|
48
33
|
blockList = createDefaultBlockList();
|
|
@@ -456,6 +441,9 @@ class SafeDnsResolver {
|
|
|
456
441
|
if (signal?.aborted) {
|
|
457
442
|
throw createAbortSignalError();
|
|
458
443
|
}
|
|
444
|
+
if (this.isBlockedHostname(normalizedHostname)) {
|
|
445
|
+
throw createErrorWithCode(`Blocked host: ${normalizedHostname}. Internal hosts are not allowed`, 'EBLOCKED');
|
|
446
|
+
}
|
|
459
447
|
if (isIP(normalizedHostname)) {
|
|
460
448
|
if (this.ipBlocker.isBlockedIp(normalizedHostname)) {
|
|
461
449
|
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
|
|
@@ -871,168 +859,6 @@ class RedirectFollower {
|
|
|
871
859
|
}
|
|
872
860
|
}
|
|
873
861
|
}
|
|
874
|
-
function getCharsetFromContentType(contentType) {
|
|
875
|
-
if (!contentType)
|
|
876
|
-
return undefined;
|
|
877
|
-
const match = /charset=([^;]+)/i.exec(contentType);
|
|
878
|
-
const charsetGroup = match?.[1];
|
|
879
|
-
if (!charsetGroup)
|
|
880
|
-
return undefined;
|
|
881
|
-
let charset = charsetGroup.trim();
|
|
882
|
-
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
883
|
-
charset = charset.slice(1, -1);
|
|
884
|
-
}
|
|
885
|
-
return charset.trim();
|
|
886
|
-
}
|
|
887
|
-
function createDecoder(encoding) {
|
|
888
|
-
if (!encoding)
|
|
889
|
-
return new TextDecoder('utf-8');
|
|
890
|
-
try {
|
|
891
|
-
return new TextDecoder(encoding);
|
|
892
|
-
}
|
|
893
|
-
catch {
|
|
894
|
-
return new TextDecoder('utf-8');
|
|
895
|
-
}
|
|
896
|
-
}
|
|
897
|
-
function decodeBuffer(buffer, encoding) {
|
|
898
|
-
return createDecoder(encoding).decode(buffer);
|
|
899
|
-
}
|
|
900
|
-
function normalizeEncodingLabel(encoding) {
|
|
901
|
-
return encoding?.trim().toLowerCase() ?? '';
|
|
902
|
-
}
|
|
903
|
-
function isUnicodeWideEncoding(encoding) {
|
|
904
|
-
const normalized = normalizeEncodingLabel(encoding);
|
|
905
|
-
return (normalized.startsWith('utf-16') ||
|
|
906
|
-
normalized.startsWith('utf-32') ||
|
|
907
|
-
normalized === 'ucs-2' ||
|
|
908
|
-
normalized === 'unicodefffe' ||
|
|
909
|
-
normalized === 'unicodefeff');
|
|
910
|
-
}
|
|
911
|
-
const BOM_SIGNATURES = [
|
|
912
|
-
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
913
|
-
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
914
|
-
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
915
|
-
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
916
|
-
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
917
|
-
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
918
|
-
];
|
|
919
|
-
function detectBomEncoding(buffer) {
|
|
920
|
-
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
921
|
-
if (startsWithBytes(buffer, bytes))
|
|
922
|
-
return encoding;
|
|
923
|
-
}
|
|
924
|
-
return undefined;
|
|
925
|
-
}
|
|
926
|
-
function readQuotedValue(input, startIndex) {
|
|
927
|
-
const first = input[startIndex];
|
|
928
|
-
if (!first)
|
|
929
|
-
return '';
|
|
930
|
-
const quoted = first === '"' || first === "'";
|
|
931
|
-
if (quoted) {
|
|
932
|
-
const end = input.indexOf(first, startIndex + 1);
|
|
933
|
-
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
934
|
-
}
|
|
935
|
-
const tail = input.slice(startIndex);
|
|
936
|
-
const stop = tail.search(/[\s/>]/);
|
|
937
|
-
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
938
|
-
}
|
|
939
|
-
function extractHtmlCharset(headSnippet) {
|
|
940
|
-
const lower = headSnippet.toLowerCase();
|
|
941
|
-
const charsetToken = 'charset=';
|
|
942
|
-
const charsetIdx = lower.indexOf(charsetToken);
|
|
943
|
-
if (charsetIdx === -1)
|
|
944
|
-
return undefined;
|
|
945
|
-
const valueStart = charsetIdx + charsetToken.length;
|
|
946
|
-
const charset = readQuotedValue(headSnippet, valueStart);
|
|
947
|
-
return charset ? charset.toLowerCase() : undefined;
|
|
948
|
-
}
|
|
949
|
-
function extractXmlEncoding(headSnippet) {
|
|
950
|
-
const lower = headSnippet.toLowerCase();
|
|
951
|
-
const xmlStart = lower.indexOf('<?xml');
|
|
952
|
-
if (xmlStart === -1)
|
|
953
|
-
return undefined;
|
|
954
|
-
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
955
|
-
const declaration = xmlEnd === -1
|
|
956
|
-
? headSnippet.slice(xmlStart)
|
|
957
|
-
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
958
|
-
const declarationLower = declaration.toLowerCase();
|
|
959
|
-
const encodingToken = 'encoding=';
|
|
960
|
-
const encodingIdx = declarationLower.indexOf(encodingToken);
|
|
961
|
-
if (encodingIdx === -1)
|
|
962
|
-
return undefined;
|
|
963
|
-
const valueStart = encodingIdx + encodingToken.length;
|
|
964
|
-
const encoding = readQuotedValue(declaration, valueStart);
|
|
965
|
-
return encoding ? encoding.toLowerCase() : undefined;
|
|
966
|
-
}
|
|
967
|
-
function detectHtmlDeclaredEncoding(buffer) {
|
|
968
|
-
const scanSize = Math.min(buffer.length, 8_192);
|
|
969
|
-
if (scanSize === 0)
|
|
970
|
-
return undefined;
|
|
971
|
-
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
972
|
-
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
973
|
-
}
|
|
974
|
-
function resolveEncoding(declaredEncoding, sample) {
|
|
975
|
-
const bomEncoding = detectBomEncoding(sample);
|
|
976
|
-
if (bomEncoding)
|
|
977
|
-
return bomEncoding;
|
|
978
|
-
if (declaredEncoding)
|
|
979
|
-
return declaredEncoding;
|
|
980
|
-
return detectHtmlDeclaredEncoding(sample);
|
|
981
|
-
}
|
|
982
|
-
const BINARY_SIGNATURES = [
|
|
983
|
-
[0x25, 0x50, 0x44, 0x46],
|
|
984
|
-
[0x89, 0x50, 0x4e, 0x47],
|
|
985
|
-
[0x47, 0x49, 0x46, 0x38],
|
|
986
|
-
[0xff, 0xd8, 0xff],
|
|
987
|
-
[0x52, 0x49, 0x46, 0x46],
|
|
988
|
-
[0x42, 0x4d],
|
|
989
|
-
[0x49, 0x49, 0x2a, 0x00],
|
|
990
|
-
[0x4d, 0x4d, 0x00, 0x2a],
|
|
991
|
-
[0x00, 0x00, 0x01, 0x00],
|
|
992
|
-
[0x50, 0x4b, 0x03, 0x04],
|
|
993
|
-
[0x1f, 0x8b],
|
|
994
|
-
[0x42, 0x5a, 0x68],
|
|
995
|
-
[0x52, 0x61, 0x72, 0x21],
|
|
996
|
-
[0x37, 0x7a, 0xbc, 0xaf],
|
|
997
|
-
[0x7f, 0x45, 0x4c, 0x46],
|
|
998
|
-
[0x4d, 0x5a],
|
|
999
|
-
[0xcf, 0xfa, 0xed, 0xfe],
|
|
1000
|
-
[0x00, 0x61, 0x73, 0x6d],
|
|
1001
|
-
[0x1a, 0x45, 0xdf, 0xa3],
|
|
1002
|
-
[0x66, 0x74, 0x79, 0x70],
|
|
1003
|
-
[0x46, 0x4c, 0x56],
|
|
1004
|
-
[0x49, 0x44, 0x33],
|
|
1005
|
-
[0xff, 0xfb],
|
|
1006
|
-
[0xff, 0xfa],
|
|
1007
|
-
[0x4f, 0x67, 0x67, 0x53],
|
|
1008
|
-
[0x66, 0x4c, 0x61, 0x43],
|
|
1009
|
-
[0x4d, 0x54, 0x68, 0x64],
|
|
1010
|
-
[0x77, 0x4f, 0x46, 0x46],
|
|
1011
|
-
[0x00, 0x01, 0x00, 0x00],
|
|
1012
|
-
[0x4f, 0x54, 0x54, 0x4f],
|
|
1013
|
-
[0x53, 0x51, 0x4c, 0x69],
|
|
1014
|
-
];
|
|
1015
|
-
function startsWithBytes(buffer, signature) {
|
|
1016
|
-
const sigLen = signature.length;
|
|
1017
|
-
if (buffer.length < sigLen)
|
|
1018
|
-
return false;
|
|
1019
|
-
for (let i = 0; i < sigLen; i += 1) {
|
|
1020
|
-
if (buffer[i] !== signature[i])
|
|
1021
|
-
return false;
|
|
1022
|
-
}
|
|
1023
|
-
return true;
|
|
1024
|
-
}
|
|
1025
|
-
function hasNullByte(buffer, limit) {
|
|
1026
|
-
const checkLen = Math.min(buffer.length, limit);
|
|
1027
|
-
return buffer.subarray(0, checkLen).includes(0x00);
|
|
1028
|
-
}
|
|
1029
|
-
function isBinaryContent(buffer, encoding) {
|
|
1030
|
-
for (const signature of BINARY_SIGNATURES) {
|
|
1031
|
-
if (startsWithBytes(buffer, signature))
|
|
1032
|
-
return true;
|
|
1033
|
-
}
|
|
1034
|
-
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
1035
|
-
}
|
|
1036
862
|
class ResponseTextReader {
|
|
1037
863
|
async read(response, url, maxBytes, signal, encoding) {
|
|
1038
864
|
const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
|
@@ -1298,7 +1124,7 @@ function createDecompressor(encoding) {
|
|
|
1298
1124
|
function createPumpedStream(initialChunk, reader) {
|
|
1299
1125
|
return new ReadableStream({
|
|
1300
1126
|
start(controller) {
|
|
1301
|
-
if (initialChunk.byteLength > 0) {
|
|
1127
|
+
if (initialChunk && initialChunk.byteLength > 0) {
|
|
1302
1128
|
controller.enqueue(initialChunk);
|
|
1303
1129
|
}
|
|
1304
1130
|
},
|
|
@@ -1321,31 +1147,6 @@ function createPumpedStream(initialChunk, reader) {
|
|
|
1321
1147
|
},
|
|
1322
1148
|
});
|
|
1323
1149
|
}
|
|
1324
|
-
function isLikelyCompressed(chunk, encoding) {
|
|
1325
|
-
if (chunk.byteLength === 0)
|
|
1326
|
-
return false;
|
|
1327
|
-
if (encoding === 'gzip') {
|
|
1328
|
-
return chunk.byteLength >= 2 && chunk[0] === 0x1f && chunk[1] === 0x8b;
|
|
1329
|
-
}
|
|
1330
|
-
if (encoding === 'deflate') {
|
|
1331
|
-
if (chunk.byteLength < 2)
|
|
1332
|
-
return false;
|
|
1333
|
-
const byte0 = chunk[0] ?? 0;
|
|
1334
|
-
const byte1 = chunk[1] ?? 0;
|
|
1335
|
-
const cm = byte0 & 0x0f;
|
|
1336
|
-
if (cm !== 8)
|
|
1337
|
-
return false;
|
|
1338
|
-
return (byte0 * 256 + byte1) % 31 === 0;
|
|
1339
|
-
}
|
|
1340
|
-
let nonPrintable = 0;
|
|
1341
|
-
const limit = Math.min(chunk.length, 50);
|
|
1342
|
-
for (let i = 0; i < limit; i += 1) {
|
|
1343
|
-
const b = chunk[i] ?? 0;
|
|
1344
|
-
if (b < 0x09 || (b > 0x0d && b < 0x20) || b === 0x7f)
|
|
1345
|
-
nonPrintable += 1;
|
|
1346
|
-
}
|
|
1347
|
-
return nonPrintable / limit > 0.1;
|
|
1348
|
-
}
|
|
1349
1150
|
async function decodeResponseIfNeeded(response, url, signal) {
|
|
1350
1151
|
const encodingHeader = response.headers.get('content-encoding');
|
|
1351
1152
|
const parsedEncodings = parseContentEncodings(encodingHeader);
|
|
@@ -1361,76 +1162,81 @@ async function decodeResponseIfNeeded(response, url, signal) {
|
|
|
1361
1162
|
}
|
|
1362
1163
|
if (!response.body)
|
|
1363
1164
|
return response;
|
|
1364
|
-
|
|
1365
|
-
const reader = response.body.getReader();
|
|
1366
|
-
let initialChunk;
|
|
1367
|
-
try {
|
|
1368
|
-
const { done, value } = await reader.read();
|
|
1369
|
-
if (done) {
|
|
1370
|
-
return new Response(null, {
|
|
1371
|
-
status: response.status,
|
|
1372
|
-
statusText: response.statusText,
|
|
1373
|
-
headers: response.headers,
|
|
1374
|
-
});
|
|
1375
|
-
}
|
|
1376
|
-
initialChunk = value;
|
|
1377
|
-
}
|
|
1378
|
-
catch (error) {
|
|
1379
|
-
// If read fails, throw properly
|
|
1380
|
-
throw new FetchError(`Failed to read response body: ${isError(error) ? error.message : String(error)}`, url, 502);
|
|
1381
|
-
}
|
|
1165
|
+
const [decodeBranch, passthroughBranch] = response.body.tee();
|
|
1382
1166
|
const decodeOrder = encodings
|
|
1383
1167
|
.slice()
|
|
1384
1168
|
.reverse()
|
|
1385
1169
|
.filter(isSupportedContentEncoding);
|
|
1386
|
-
const firstDecodeEncoding = decodeOrder[0];
|
|
1387
|
-
if (!firstDecodeEncoding ||
|
|
1388
|
-
!isLikelyCompressed(initialChunk, firstDecodeEncoding)) {
|
|
1389
|
-
const body = createPumpedStream(initialChunk, reader);
|
|
1390
|
-
const headers = new Headers(response.headers);
|
|
1391
|
-
headers.delete('content-encoding');
|
|
1392
|
-
headers.delete('content-length');
|
|
1393
|
-
return new Response(body, {
|
|
1394
|
-
status: response.status,
|
|
1395
|
-
statusText: response.statusText,
|
|
1396
|
-
headers,
|
|
1397
|
-
});
|
|
1398
|
-
}
|
|
1399
1170
|
const decompressors = decodeOrder.map((encoding) => createDecompressor(encoding));
|
|
1400
|
-
const
|
|
1171
|
+
const decodeSource = Readable.fromWeb(toNodeReadableStream(decodeBranch, url, 'response:decode-content-encoding'));
|
|
1401
1172
|
const decodedNodeStream = new PassThrough();
|
|
1402
|
-
const
|
|
1403
|
-
|
|
1173
|
+
const decodedPipeline = pipeline([
|
|
1174
|
+
decodeSource,
|
|
1404
1175
|
...decompressors,
|
|
1405
1176
|
decodedNodeStream,
|
|
1406
1177
|
]);
|
|
1407
|
-
const
|
|
1408
|
-
|
|
1178
|
+
const headers = new Headers(response.headers);
|
|
1179
|
+
headers.delete('content-encoding');
|
|
1180
|
+
headers.delete('content-length');
|
|
1181
|
+
const abortDecodePipeline = () => {
|
|
1182
|
+
decodeSource.destroy();
|
|
1409
1183
|
for (const decompressor of decompressors) {
|
|
1410
1184
|
decompressor.destroy();
|
|
1411
1185
|
}
|
|
1412
1186
|
decodedNodeStream.destroy();
|
|
1413
1187
|
};
|
|
1414
1188
|
if (signal) {
|
|
1415
|
-
signal.addEventListener('abort',
|
|
1189
|
+
signal.addEventListener('abort', abortDecodePipeline, { once: true });
|
|
1416
1190
|
}
|
|
1417
|
-
void
|
|
1191
|
+
void decodedPipeline.catch((error) => {
|
|
1418
1192
|
decodedNodeStream.destroy(error instanceof Error ? error : new Error(String(error)));
|
|
1419
1193
|
});
|
|
1420
|
-
const
|
|
1421
|
-
const
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1194
|
+
const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
|
|
1195
|
+
const decodedReader = decodedBodyStream.getReader();
|
|
1196
|
+
const clearAbortListener = () => {
|
|
1197
|
+
if (!signal)
|
|
1198
|
+
return;
|
|
1199
|
+
signal.removeEventListener('abort', abortDecodePipeline);
|
|
1200
|
+
};
|
|
1201
|
+
try {
|
|
1202
|
+
const first = await decodedReader.read();
|
|
1203
|
+
if (first.done) {
|
|
1204
|
+
clearAbortListener();
|
|
1205
|
+
void passthroughBranch.cancel().catch(() => undefined);
|
|
1206
|
+
return new Response(null, {
|
|
1207
|
+
status: response.status,
|
|
1208
|
+
statusText: response.statusText,
|
|
1209
|
+
headers,
|
|
1210
|
+
});
|
|
1211
|
+
}
|
|
1212
|
+
void passthroughBranch.cancel().catch(() => undefined);
|
|
1213
|
+
const body = createPumpedStream(first.value, decodedReader);
|
|
1214
|
+
if (signal) {
|
|
1215
|
+
void finished(decodedNodeStream, { cleanup: true }).finally(() => {
|
|
1216
|
+
clearAbortListener();
|
|
1217
|
+
});
|
|
1218
|
+
}
|
|
1219
|
+
return new Response(body, {
|
|
1220
|
+
status: response.status,
|
|
1221
|
+
statusText: response.statusText,
|
|
1222
|
+
headers,
|
|
1223
|
+
});
|
|
1224
|
+
}
|
|
1225
|
+
catch (error) {
|
|
1226
|
+
clearAbortListener();
|
|
1227
|
+
abortDecodePipeline();
|
|
1228
|
+
void decodedReader.cancel(error).catch(() => undefined);
|
|
1229
|
+
logDebug('Content-Encoding decode failed; using passthrough body', {
|
|
1230
|
+
url: redactUrl(url),
|
|
1231
|
+
encoding: encodingHeader ?? encodings.join(','),
|
|
1232
|
+
error: isError(error) ? error.message : String(error),
|
|
1233
|
+
});
|
|
1234
|
+
return new Response(passthroughBranch, {
|
|
1235
|
+
status: response.status,
|
|
1236
|
+
statusText: response.statusText,
|
|
1237
|
+
headers,
|
|
1427
1238
|
});
|
|
1428
1239
|
}
|
|
1429
|
-
return new Response(decodedBody, {
|
|
1430
|
-
status: response.status,
|
|
1431
|
-
statusText: response.statusText,
|
|
1432
|
-
headers,
|
|
1433
|
-
});
|
|
1434
1240
|
}
|
|
1435
1241
|
async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry, reader, maxBytes, mode, signal) {
|
|
1436
1242
|
const responseError = resolveResponseError(response, finalUrl);
|
package/dist/tools.js
CHANGED
|
@@ -446,8 +446,21 @@ function buildToolContentBlocks(structuredContent, resourceLink, embeddedResourc
|
|
|
446
446
|
}
|
|
447
447
|
function resolveNormalizedUrl(url) {
|
|
448
448
|
const { normalizedUrl: validatedUrl } = normalizeUrl(url);
|
|
449
|
-
const
|
|
450
|
-
|
|
449
|
+
const transformedResult = transformToRawUrl(validatedUrl);
|
|
450
|
+
if (!transformedResult.transformed) {
|
|
451
|
+
return {
|
|
452
|
+
normalizedUrl: validatedUrl,
|
|
453
|
+
originalUrl: validatedUrl,
|
|
454
|
+
transformed: false,
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
// Re-validate transformed URLs so blocked-host and length policies still apply.
|
|
458
|
+
const { normalizedUrl: transformedUrl } = normalizeUrl(transformedResult.url);
|
|
459
|
+
return {
|
|
460
|
+
normalizedUrl: transformedUrl,
|
|
461
|
+
originalUrl: validatedUrl,
|
|
462
|
+
transformed: true,
|
|
463
|
+
};
|
|
451
464
|
}
|
|
452
465
|
function logRawUrlTransformation(resolvedUrl) {
|
|
453
466
|
if (!resolvedUrl.transformed)
|
|
@@ -493,10 +506,12 @@ function attemptCacheRetrieval(params) {
|
|
|
493
506
|
return null;
|
|
494
507
|
}
|
|
495
508
|
logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
|
|
509
|
+
const finalUrl = cached.url !== normalizedUrl ? cached.url : undefined;
|
|
496
510
|
return {
|
|
497
511
|
data,
|
|
498
512
|
fromCache: true,
|
|
499
513
|
url: normalizedUrl,
|
|
514
|
+
...(finalUrl ? { finalUrl } : {}),
|
|
500
515
|
fetchedAt: cached.fetchedAt,
|
|
501
516
|
cacheKey,
|
|
502
517
|
};
|
|
@@ -675,12 +690,16 @@ export function parseCachedMarkdownResult(cached) {
|
|
|
675
690
|
if (typeof markdown !== 'string')
|
|
676
691
|
return undefined;
|
|
677
692
|
const metadata = normalizeExtractedMetadata(result.data.metadata);
|
|
693
|
+
const truncated = result.data.truncated ?? false;
|
|
694
|
+
const persistedMarkdown = truncated
|
|
695
|
+
? appendTruncationMarker(markdown, TRUNCATION_MARKER)
|
|
696
|
+
: markdown;
|
|
678
697
|
return {
|
|
679
|
-
content:
|
|
680
|
-
markdown,
|
|
698
|
+
content: persistedMarkdown,
|
|
699
|
+
markdown: persistedMarkdown,
|
|
681
700
|
title: result.data.title,
|
|
682
701
|
...(metadata ? { metadata } : {}),
|
|
683
|
-
truncated
|
|
702
|
+
truncated,
|
|
684
703
|
};
|
|
685
704
|
}
|
|
686
705
|
const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
|
|
@@ -695,8 +714,11 @@ const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
|
|
|
695
714
|
return { ...result, content: result.markdown, truncated };
|
|
696
715
|
};
|
|
697
716
|
function serializeMarkdownResult(result) {
|
|
717
|
+
const persistedMarkdown = result.truncated
|
|
718
|
+
? appendTruncationMarker(result.markdown, TRUNCATION_MARKER)
|
|
719
|
+
: result.markdown;
|
|
698
720
|
return JSON.stringify({
|
|
699
|
-
markdown:
|
|
721
|
+
markdown: persistedMarkdown,
|
|
700
722
|
title: result.title,
|
|
701
723
|
metadata: result.metadata,
|
|
702
724
|
truncated: result.truncated,
|
package/dist/transform.js
CHANGED
|
@@ -2369,19 +2369,15 @@ function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
|
|
|
2369
2369
|
});
|
|
2370
2370
|
return transformInputInProcess(htmlOrBuffer, url, options);
|
|
2371
2371
|
}
|
|
2372
|
+
abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
|
|
2372
2373
|
if (error instanceof FetchError)
|
|
2373
2374
|
throw error;
|
|
2374
|
-
abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
|
|
2375
2375
|
const message = getErrorMessage(error);
|
|
2376
|
-
logWarn('Transform worker failed;
|
|
2376
|
+
logWarn('Transform worker failed; falling back to in-process', {
|
|
2377
2377
|
url: redactUrl(url),
|
|
2378
2378
|
error: message,
|
|
2379
2379
|
});
|
|
2380
|
-
|
|
2381
|
-
reason: 'worker_failed',
|
|
2382
|
-
stage: 'transform:worker',
|
|
2383
|
-
error: message,
|
|
2384
|
-
});
|
|
2380
|
+
return transformInputInProcess(htmlOrBuffer, url, options);
|
|
2385
2381
|
}
|
|
2386
2382
|
async function runWorkerTransformWithFallback(htmlOrBuffer, url, options) {
|
|
2387
2383
|
const workerStage = stageTracker.start(url, 'transform:worker');
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/fetch-url-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"mcpName": "io.github.j0hanz/fetch-url-mcp",
|
|
5
5
|
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
|
|
6
6
|
"type": "module",
|
|
@@ -52,6 +52,7 @@
|
|
|
52
52
|
"start": "node dist/index.js",
|
|
53
53
|
"format": "prettier --write .",
|
|
54
54
|
"type-check": "node scripts/tasks.mjs type-check",
|
|
55
|
+
"type-check:tests": "node scripts/tasks.mjs type-check:tests",
|
|
55
56
|
"type-check:diagnostics": "tsc --noEmit --extendedDiagnostics",
|
|
56
57
|
"type-check:trace": "node -e \"require('fs').rmSync('.ts-trace',{recursive:true,force:true})\" && tsc --noEmit --generateTrace .ts-trace",
|
|
57
58
|
"lint": "eslint .",
|