@j0hanz/fetch-url-mcp 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -52,6 +52,7 @@ URL → Validate → DNS Preflight → HTTP Fetch → Decompress
52
52
  ```text
53
53
  fetch-url-mcp/
54
54
  ├── assets/ # Server icon (logo.svg)
55
+ ├── examples/ # Client examples
55
56
  ├── scripts/ # Build & test orchestration
56
57
  ├── src/
57
58
  │ ├── workers/ # Worker-thread child for HTML transforms
@@ -95,6 +96,23 @@ Add to your MCP client configuration:
95
96
  }
96
97
  ```
97
98
 
99
+ ## Client Example (CLI)
100
+
101
+ Build the server and examples, then run the client:
102
+
103
+ ```bash
104
+ npm run build
105
+ node dist/examples/mcp-fetch-url-client.js https://example.com
106
+ ```
107
+
108
+ Optional flags:
109
+
110
+ - `--full` reads the cached markdown resource to avoid inline truncation.
111
+ - `--task` enables task-based execution with streamed status updates.
112
+ - `--task-ttl <ms>` sets task TTL; `--task-poll <ms>` sets poll interval.
113
+ - `--http http://localhost:3000/mcp` connects to the Streamable HTTP server.
114
+ - Progress updates (when emitted) are printed to stderr.
115
+
98
116
  ## Installation
99
117
 
100
118
  ### NPX (Recommended)
package/dist/AGENTS.md CHANGED
@@ -34,7 +34,7 @@
34
34
  - `tests/` — Unit/integration tests (46+ test files) using Node.js built-in test runner
35
35
  - `scripts/` — Build & test orchestration (`tasks.mjs`)
36
36
  - `assets/` — Server icon (`logo.svg`)
37
- - `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → test → build → publish to npm, MCP Registry, Docker)
37
+ - `.github/workflows/` — CI/CD (`release.yml`: lint → type-check → type-check:tests → test → build → publish to npm, MCP Registry, Docker)
38
38
 
39
39
  > Ignore: `dist/`, `node_modules/`, `coverage/`, `.cache/`, `.tsbuildinfo`
40
40
 
@@ -49,6 +49,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
49
49
  - **Start:** `npm run start` → `node dist/index.js` (see `package.json`)
50
50
  - **Build:** `npm run build` → `node scripts/tasks.mjs build` — cleans `dist/`, compiles TS, validates `instructions.md`, copies assets, sets executable bit (see `scripts/tasks.mjs`, `package.json`)
51
51
  - **Type-check:** `npm run type-check` → `tsc -p tsconfig.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
52
+ - **Type-check (tests):** `npm run type-check:tests` → build output + `tsc -p tsconfig.tests.json --noEmit` (see `scripts/tasks.mjs`, `.github/workflows/release.yml`)
52
53
  - **Lint:** `npm run lint` → `eslint .` (see `package.json`, `.github/workflows/release.yml`)
53
54
  - **Lint (fix):** `npm run lint:fix` → `eslint . --fix` (see `package.json`)
54
55
  - **Format:** `npm run format` → `prettier --write .` (see `package.json`)
@@ -135,7 +136,7 @@ All commands verified from `.github/workflows/release.yml` (CI) and `package.jso
135
136
  - Config values temporarily overridden per test with `try/finally` cleanup (observed in `tests/fetch-url-tool.test.ts`)
136
137
  - Worker pool shutdown in `after()` hooks for clean teardown (observed in `tests/fetch-url-tool.test.ts`)
137
138
  - No external services (DB/containers) required for tests
138
- - **CI validation order:** `lint` → `type-check` → `test` → `build` (see `.github/workflows/release.yml`)
139
+ - **CI validation order:** `lint` → `type-check` → `type-check:tests` → `test` → `build` (see `.github/workflows/release.yml`)
139
140
 
140
141
  ## 7) Common Pitfalls (Verified Only)
141
142
 
@@ -0,0 +1,313 @@
1
+ import { access, writeFile } from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import process from 'node:process';
4
+ import { fileURLToPath } from 'node:url';
5
+ import { parseArgs } from 'node:util';
6
+ import { Client } from '@modelcontextprotocol/sdk/client';
7
+ import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
8
+ import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
9
+ const __filename = fileURLToPath(import.meta.url);
10
+ const __dirname = path.dirname(__filename);
11
+ function printUsage() {
12
+ const usage = `
13
+ Usage:
14
+ node dist/examples/mcp-fetch-url-client.js <url> [options]
15
+
16
+ Options:
17
+ --http <url> Connect via Streamable HTTP (e.g. http://localhost:3000/mcp)
18
+ --task Use task-based execution with streamed status updates
19
+ --task-ttl <ms> Task TTL in milliseconds (optional)
20
+ --task-poll <ms> Task poll interval in milliseconds (optional)
21
+ --no-noise Skip noise removal
22
+ --force Force refresh (bypass cache)
23
+ --max-inline <n> Max inline chars before truncation
24
+ --full If truncated, read cached resource for full markdown
25
+ --out <path> Write markdown to file instead of stdout
26
+ --json Print full structured JSON instead of markdown
27
+ --cmd <executable> Stdio: command to spawn (default: node)
28
+ --server <path> Stdio: server entry (default: dist/index.js)
29
+ --cwd <path> Stdio: working directory for server (default: repo root)
30
+ --env KEY=VALUE Stdio: add/override environment variable (repeatable)
31
+ -h, --help Show help
32
+ `;
33
+ process.stderr.write(usage);
34
+ }
35
+ async function findRepoRoot(startDir) {
36
+ let current = startDir;
37
+ while (true) {
38
+ const candidate = path.join(current, 'package.json');
39
+ try {
40
+ await access(candidate);
41
+ return current;
42
+ }
43
+ catch {
44
+ const parent = path.dirname(current);
45
+ if (parent === current) {
46
+ return startDir;
47
+ }
48
+ current = parent;
49
+ }
50
+ }
51
+ }
52
+ function parseEnvOverrides(values) {
53
+ const env = {};
54
+ if (!values) {
55
+ return env;
56
+ }
57
+ for (const item of values) {
58
+ const index = item.indexOf('=');
59
+ if (index <= 0) {
60
+ throw new Error(`Invalid --env value: ${item}`);
61
+ }
62
+ const key = item.slice(0, index).trim();
63
+ const value = item.slice(index + 1);
64
+ if (!key) {
65
+ throw new Error(`Invalid --env key in: ${item}`);
66
+ }
67
+ env[key] = value;
68
+ }
69
+ return env;
70
+ }
71
+ function buildInheritedEnv() {
72
+ const env = {};
73
+ for (const [key, value] of Object.entries(process.env)) {
74
+ if (typeof value === 'string') {
75
+ env[key] = value;
76
+ }
77
+ }
78
+ return env;
79
+ }
80
+ function pickTextResource(contents) {
81
+ for (const entry of contents) {
82
+ const text = entry['text'];
83
+ if (typeof text === 'string') {
84
+ return text;
85
+ }
86
+ }
87
+ return null;
88
+ }
89
+ function formatProgress(progress) {
90
+ const { message, total } = progress;
91
+ if (typeof total === 'number' && total > 0) {
92
+ const percent = Math.round((progress.progress / total) * 100);
93
+ return `${percent}%${message ? ` ${message}` : ''}`;
94
+ }
95
+ return message ? `${progress.progress} ${message}` : `${progress.progress}`;
96
+ }
97
+ function getStructuredContent(result) {
98
+ if (typeof result !== 'object' || result === null) {
99
+ return null;
100
+ }
101
+ const candidate = result;
102
+ if (candidate.structuredContent &&
103
+ typeof candidate.structuredContent === 'object' &&
104
+ !Array.isArray(candidate.structuredContent)) {
105
+ return candidate.structuredContent;
106
+ }
107
+ return null;
108
+ }
109
+ function isToolError(result) {
110
+ if (typeof result !== 'object' || result === null) {
111
+ return false;
112
+ }
113
+ const candidate = result;
114
+ return candidate.isError === true;
115
+ }
116
+ function getStringField(structured, key) {
117
+ if (!structured) {
118
+ return null;
119
+ }
120
+ const value = structured[key];
121
+ return typeof value === 'string' ? value : null;
122
+ }
123
+ const options = {
124
+ help: { type: 'boolean', short: 'h' },
125
+ http: { type: 'string' },
126
+ task: { type: 'boolean' },
127
+ 'task-ttl': { type: 'string' },
128
+ 'task-poll': { type: 'string' },
129
+ 'no-noise': { type: 'boolean' },
130
+ force: { type: 'boolean' },
131
+ 'max-inline': { type: 'string' },
132
+ full: { type: 'boolean' },
133
+ out: { type: 'string' },
134
+ json: { type: 'boolean' },
135
+ cmd: { type: 'string' },
136
+ server: { type: 'string' },
137
+ cwd: { type: 'string' },
138
+ env: { type: 'string', multiple: true },
139
+ };
140
+ const { values, positionals } = parseArgs({
141
+ allowPositionals: true,
142
+ options,
143
+ });
144
+ if (values.help) {
145
+ printUsage();
146
+ process.exit(0);
147
+ }
148
+ function requireUrl(positionals) {
149
+ const url = positionals[0];
150
+ if (typeof url !== 'string' || url.length === 0) {
151
+ printUsage();
152
+ throw new Error('Missing URL.');
153
+ }
154
+ return url;
155
+ }
156
+ const targetUrl = requireUrl(positionals);
157
+ const maxInlineRaw = values['max-inline'];
158
+ const maxInlineChars = maxInlineRaw !== undefined ? Number(maxInlineRaw) : undefined;
159
+ if (maxInlineRaw !== undefined && Number.isNaN(maxInlineChars)) {
160
+ throw new Error(`Invalid --max-inline value: ${maxInlineRaw}`);
161
+ }
162
+ const taskTtlRaw = values['task-ttl'];
163
+ const taskTtl = taskTtlRaw !== undefined ? Number(taskTtlRaw) : undefined;
164
+ if (taskTtlRaw !== undefined && Number.isNaN(taskTtl)) {
165
+ throw new Error(`Invalid --task-ttl value: ${taskTtlRaw}`);
166
+ }
167
+ const taskPollRaw = values['task-poll'];
168
+ const taskPoll = taskPollRaw !== undefined ? Number(taskPollRaw) : undefined;
169
+ if (taskPollRaw !== undefined && Number.isNaN(taskPoll)) {
170
+ throw new Error(`Invalid --task-poll value: ${taskPollRaw}`);
171
+ }
172
+ const onProgress = (progress) => {
173
+ process.stderr.write(`[progress] ${formatProgress(progress)}\n`);
174
+ };
175
+ async function run() {
176
+ let transport = null;
177
+ const client = new Client({ name: 'fetch-url-mcp-client', version: '0.1.0' }, { capabilities: {} });
178
+ try {
179
+ if (values.http) {
180
+ const endpoint = new URL(values.http);
181
+ transport = new StreamableHTTPClientTransport(endpoint);
182
+ }
183
+ else {
184
+ const command = values.cmd ?? process.execPath;
185
+ const repoRoot = await findRepoRoot(__dirname);
186
+ const serverPath = values.server ?? path.join(repoRoot, 'dist/index.js');
187
+ const cwd = values.cwd ?? repoRoot;
188
+ try {
189
+ await access(serverPath);
190
+ }
191
+ catch {
192
+ throw new Error(`Server entry not found at ${serverPath}. Run \"npm run build\" first or set --server.`);
193
+ }
194
+ const env = {
195
+ ...buildInheritedEnv(),
196
+ ...parseEnvOverrides(values.env),
197
+ };
198
+ transport = new StdioClientTransport({
199
+ command,
200
+ args: [serverPath, '--stdio'],
201
+ cwd,
202
+ env,
203
+ stderr: 'inherit',
204
+ });
205
+ }
206
+ await client.connect(transport);
207
+ const toolArguments = {
208
+ url: targetUrl,
209
+ skipNoiseRemoval: values['no-noise'] ?? false,
210
+ forceRefresh: values.force ?? false,
211
+ };
212
+ if (typeof maxInlineChars === 'number') {
213
+ toolArguments.maxInlineChars = maxInlineChars;
214
+ }
215
+ const taskOptions = {};
216
+ if (typeof taskTtl === 'number') {
217
+ taskOptions.ttl = taskTtl;
218
+ }
219
+ if (typeof taskPoll === 'number') {
220
+ taskOptions.pollInterval = taskPoll;
221
+ }
222
+ const requestOptions = values.task
223
+ ? { onprogress: onProgress, task: taskOptions }
224
+ : { onprogress: onProgress };
225
+ const result = values.task
226
+ ? await callToolStream(client, toolArguments, requestOptions)
227
+ : await client.callTool({
228
+ name: 'fetch-url',
229
+ arguments: toolArguments,
230
+ }, undefined, requestOptions);
231
+ if (isToolError(result)) {
232
+ const errorPayload = getStructuredContent(result) ?? {
233
+ message: 'Fetch failed',
234
+ };
235
+ process.stderr.write(`${JSON.stringify(errorPayload, null, 2)}\n`);
236
+ process.exitCode = 2;
237
+ return;
238
+ }
239
+ const structured = getStructuredContent(result);
240
+ if (values.json) {
241
+ const payload = structured ?? result;
242
+ process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
243
+ return;
244
+ }
245
+ let markdown = getStringField(structured, 'markdown');
246
+ if (values.full) {
247
+ const cacheResourceUri = getStringField(structured, 'cacheResourceUri');
248
+ if (cacheResourceUri) {
249
+ const resource = await client.readResource({
250
+ uri: cacheResourceUri,
251
+ });
252
+ const text = pickTextResource(resource.contents);
253
+ if (text) {
254
+ markdown = text;
255
+ }
256
+ }
257
+ }
258
+ if (!markdown) {
259
+ throw new Error('No markdown returned from fetch-url.');
260
+ }
261
+ if (values.out) {
262
+ await writeFile(values.out, markdown, 'utf8');
263
+ }
264
+ else {
265
+ process.stdout.write(`${markdown}\n`);
266
+ }
267
+ }
268
+ finally {
269
+ if (transport) {
270
+ await transport.close();
271
+ }
272
+ }
273
+ }
274
+ async function callToolStream(client, toolArguments, requestOptions) {
275
+ const stream = client.experimental.tasks.callToolStream({
276
+ name: 'fetch-url',
277
+ arguments: toolArguments,
278
+ }, undefined, requestOptions);
279
+ let finalResult = null;
280
+ for await (const message of stream) {
281
+ if (message.type === 'taskCreated') {
282
+ process.stderr.write(`[task] created ${message.task.taskId}\n`);
283
+ continue;
284
+ }
285
+ if (message.type === 'taskStatus') {
286
+ const statusMessage = message.task.statusMessage
287
+ ? ` ${message.task.statusMessage}`
288
+ : '';
289
+ process.stderr.write(`[task] ${message.task.status}${statusMessage}\n`);
290
+ continue;
291
+ }
292
+ if (message.type === 'result') {
293
+ finalResult = message.result;
294
+ continue;
295
+ }
296
+ if (message.type === 'error') {
297
+ throw new Error(message.error.message ?? 'Task failed');
298
+ }
299
+ }
300
+ if (finalResult === null) {
301
+ throw new Error('Task stream ended without a result.');
302
+ }
303
+ return finalResult;
304
+ }
305
+ try {
306
+ await run();
307
+ }
308
+ catch (error) {
309
+ const message = error instanceof Error ? error.message : String(error);
310
+ process.stderr.write(`${message}\n`);
311
+ process.exitCode = 1;
312
+ }
313
+ //# sourceMappingURL=mcp-fetch-url-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"mcp-fetch-url-client.js","sourceRoot":"","sources":["../../examples/mcp-fetch-url-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACrD,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,OAAO,MAAM,cAAc,CAAC;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AAEtC,OAAO,EAAE,MAAM,EAAE,MAAM,kCAAkC,CAAC;AAC1D,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,6BAA6B,EAAE,MAAM,oDAAoD,CAAC;AAUnG,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAE3C,SAAS,UAAU;IACjB,MAAM,KAAK,GAAG;;;;;;;;;;;;;;;;;;;;CAoBf,CAAC;IACA,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;AAC9B,CAAC;AAED,KAAK,UAAU,YAAY,CAAC,QAAgB;IAC1C,IAAI,OAAO,GAAG,QAAQ,CAAC;IACvB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,cAAc,CAAC,CAAC;QACrD,IAAI,CAAC;YACH,MAAM,MAAM,CAAC,SAAS,CAAC,CAAC;YACxB,OAAO,OAAO,CAAC;QACjB,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACrC,IAAI,MAAM,KAAK,OAAO,EAAE,CAAC;gBACvB,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,GAAG,MAAM,CAAC;QACnB,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,iBAAiB,CACxB,MAA4B;IAE5B,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,GAAG,CAAC;IACb,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAChC,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QACxC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC;QACpC,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,KAAK,CAAC,yBAAyB,IAAI,EAAE,CAAC,CAAC;QACnD,CAAC;QACD,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;IACnB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,iBAAiB;IACxB,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;QACvD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,gBAAgB,CACvB,QAAwC;IAExC,KAAK,MAAM,KAAK,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3B,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC7B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,QAAkB;IACxC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,QAAQ,CAAC;IACpC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,QAAQ,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;QAC9D,OAAO,GAAG,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC,IAAI,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,OAAO,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,QAAQ,IAAI,OAAO,EAAE,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,QAAQ,EAAE,CAAC;AAC9E,CAAC;AAED,SAAS,oBAAoB,CAAC,MAAe;IAC3C,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAClD,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,SAAS,GAAG,MAAoB,CAAC;IACvC,IACE,SAAS,CAAC,iBAAiB;QAC3B,OAAO,SAAS,CAAC,iBAAiB,KAAK,QAAQ;QAC/C,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,iBAAiB,CAAC,EAC3C,CAAC;QACD,OAAO,SAAS,CAAC,iBAAiB,CAAC;IACrC,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,WAAW,CAAC,MAAe;IAClC,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QAClD,OAAO,KAAK,CAAC;IACf,CAAC;IACD,MAAM,SAAS,GAAG,MAAoB,CAAC;IACvC,OAAO,SAAS,CAAC,OAAO,KAAK,IAAI,CAAC;AACpC,CAAC;AAED,SAAS,cAAc,CACrB,UAAoC,EACpC,GAAW;IAEX,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;AAClD,CAAC;AAED,MAAM,OAAO,GAAG;IACd,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE;IACrC,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IACxB,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IACzB,UAAU,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC9B,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC/B,UAAU,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IAC/B,KAAK,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IAC1B,YAAY,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IAChC,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IACzB,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IACvB,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE;IACzB,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IACvB,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IAC1B,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;IACvB,GAAG,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,IAAI,EAAE;CAC/B,CAAC;AAEX,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,SAAS,CAAC;IACxC,gBAAgB,EAAE,IAAI;IACtB,OAAO;CACR,CAAC,CAAC;AAEH,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;IAChB,UAAU,EAAE,CAAC;IACb,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,SAAS,UAAU,CAAC,WAAqB;IACvC,MAAM,GAAG,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IAC3B,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChD,UAAU,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,cAAc,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,SAAS,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC;AAE1C,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC;AAC1C,MAAM,cAAc,GAClB,YAAY,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAChE,IAAI,YAAY,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE,CAAC;IAC/D,MAAM,IAAI,KAAK,CAAC,+BAA+B,YAAY,EAAE,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC;AACtC,MAAM,OAAO,GAAG,UAAU,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAC1E,IAAI,UAAU,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;IACtD,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC;AACxC,MAAM,QAAQ,GAAG,WAAW,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAC7E,IAAI,WAAW,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC;IACxD,MAAM,IAAI,KAAK,CAAC,8BAA8B,WAAW,EAAE,CAAC,CAAC;AAC/D,CAAC;AAED,MAAM,UAAU,GAAG,CAAC,QAAkB,EAAQ,EAAE;IAC9C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACnE,CAAC,CAAC;AAEF,KAAK,UAAU,GAAG;IAChB,IAAI,SAAS,GAAqB,IAAI,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,MAAM,CACvB,EAAE,IAAI,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,EAClD,EAAE,YAAY,EAAE,EAAE,EAAE,CACrB,CAAC;IAEF,IAAI,CAAC;QACH,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACtC,SAAS,GAAG,IAAI,6BAA6B,CAAC,QAAQ,CAAc,CAAC;QACvE,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,IAAI,OAAO,CAAC,QAAQ,CAAC;YAC/C,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC;YAC/C,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC;YACzE,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,IAAI,QAAQ,CAAC;YAEnC,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC;YAC3B,CAAC;YAAC,MAAM,CAAC;gBACP,MAAM,IAAI,KAAK,CACb,6BAA6B,UAAU,gDAAgD,CACxF,CAAC;YACJ,CAAC;YAED,MAAM,GAAG,GAAG;gBACV,GAAG,iBAAiB,EAAE;gBACtB,GAAG,iBAAiB,CAAC,MAAM,CAAC,GAAG,CAAC;aACjC,CAAC;YAEF,SAAS,GAAG,IAAI,oBAAoB,CAAC;gBACnC,OAAO;gBACP,IAAI,EAAE,CAAC,UAAU,EAAE,SAAS,CAAC;gBAC7B,GAAG;gBACH,GAAG;gBACH,MAAM,EAAE,SAAS;aAClB,CAAc,CAAC;QAClB,CAAC;QAED,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;QAEhC,MAAM,aAAa,GAKf;YACF,GAAG,EAAE,SAAS;YACd,gBAAgB,EAAE,MAAM,CAAC,UAAU,CAAC,IAAI,KAAK;YAC7C,YAAY,EAAE,MAAM,CAAC,KAAK,IAAI,KAAK;SACpC,CAAC;QAEF,IAAI,OAAO,cAAc,KAAK,QAAQ,EAAE,CAAC;YACvC,aAAa,CAAC,cAAc,GAAG,cAAc,CAAC;QAChD,CAAC;QAED,MAAM,WAAW,GAA4C,EAAE,CAAC;QAChE,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;YAChC,WAAW,CAAC,GAAG,GAAG,OAAO,CAAC;QAC5B,CAAC;QACD,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;YACjC,WAAW,CAAC,YAAY,GAAG,QAAQ,CAAC;QACtC,CAAC;QAED,MAAM,cAAc,GAAG,MAAM,CAAC,IAAI;YAChC,CAAC,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,IAAI,EAAE,WAAW,EAAE;YAC/C,CAAC,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC;QAE/B,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI;YACxB,CAAC,CAAC,MAAM,cAAc,CAAC,MAAM,EAAE,aAAa,EAAE,cAAc,CAAC;YAC7D,CAAC,CAAC,MAAM,MAAM,CAAC,QAAQ,CACnB;gBACE,IAAI,EAAE,WAAW;gBACjB,SAAS,EAAE,aAAa;aACzB,EACD,SAAS,EACT,cAAc,CACf,CAAC;QAEN,IAAI,WAAW,CAAC,MAAM,CAAC,EAAE,CAAC;YACxB,MAAM,YAAY,GAAG,oBAAoB,CAAC,MAAM,CAAC,IAAI;gBACnD,OAAO,EAAE,cAAc;aACxB,CAAC;YACF,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;YACnE,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;YACrB,OAAO;QACT,CAAC;QAED,MAAM,UAAU,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;QAEhD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,MAAM,OAAO,GAAG,UAAU,IAAI,MAAM,CAAC;YACrC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;YAC9D,OAAO;QACT,CAAC;QAED,IAAI,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QACtD,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;YAChB,MAAM,gBAAgB,GAAG,cAAc,CAAC,UAAU,EAAE,kBAAkB,CAAC,CAAC;YACxE,IAAI,gBAAgB,EAAE,CAAC;gBACrB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC;oBACzC,GAAG,EAAE,gBAAgB;iBACtB,CAAC,CAAC;gBACH,MAAM,IAAI,GAAG,gBAAgB,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBACjD,IAAI,IAAI,EAAE,CAAC;oBACT,QAAQ,GAAG,IAAI,CAAC;gBAClB,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,IAAI,MAAM,CAAC,GAAG,EAAE,CAAC;YACf,MAAM,SAAS,CAAC,MAAM,CAAC,GAAG,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAChD,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,QAAQ,IAAI,CAAC,CAAC;QACxC,CAAC;IACH,CAAC;YAAS,CAAC;QACT,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,SAAS,CAAC,KAAK,EAAE,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,cAAc,CAC3B,MAAc,EACd,aAKC,EACD,cAMC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,cAAc,CACrD;QACE,IAAI,EAAE,WAAW;QACjB,SAAS,EAAE,aAAa;KACzB,EACD,SAAS,EACT,cAAc,CACf,CAAC;IAEF,IAAI,WAAW,GAAY,IAAI,CAAC;IAEhC,IAAI,KAAK,EAAE,MAAM,OAAO,IAAI,MAAM,EAAE,CAAC;QACnC,IAAI,OAAO,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YACnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,kBAAkB,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;YAChE,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;YAClC,MAAM,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC,aAAa;gBAC9C,CAAC,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,aAAa,EAAE;gBAClC,CAAC,CAAC,EAAE,CAAC;YACP,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,aAAa,IAAI,CAAC,CAAC;YACxE,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC9B,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;YAC7B,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,IAAI,aAAa,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,IAAI,WAAW,KAAK,IAAI,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;IACzD,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,IAAI,CAAC;IACH,MAAM,GAAG,EAAE,CAAC;AACd,CAAC;AAAC,OAAO,KAAK,EAAE,CAAC;IACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACvE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;IACrC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;AACvB,CAAC"}
@@ -0,0 +1,4 @@
1
+ export declare function getCharsetFromContentType(contentType: string | null): string | undefined;
2
+ export declare function decodeBuffer(buffer: Uint8Array, encoding: string): string;
3
+ export declare function resolveEncoding(declaredEncoding: string | undefined, sample: Uint8Array): string | undefined;
4
+ export declare function isBinaryContent(buffer: Uint8Array, encoding?: string): boolean;
@@ -0,0 +1,163 @@
1
+ import { Buffer } from 'node:buffer';
2
+ export function getCharsetFromContentType(contentType) {
3
+ if (!contentType)
4
+ return undefined;
5
+ const match = /charset=([^;]+)/i.exec(contentType);
6
+ const charsetGroup = match?.[1];
7
+ if (!charsetGroup)
8
+ return undefined;
9
+ let charset = charsetGroup.trim();
10
+ if (charset.startsWith('"') && charset.endsWith('"')) {
11
+ charset = charset.slice(1, -1);
12
+ }
13
+ return charset.trim();
14
+ }
15
+ function createDecoder(encoding) {
16
+ if (!encoding)
17
+ return new TextDecoder('utf-8');
18
+ try {
19
+ return new TextDecoder(encoding);
20
+ }
21
+ catch {
22
+ return new TextDecoder('utf-8');
23
+ }
24
+ }
25
+ export function decodeBuffer(buffer, encoding) {
26
+ return createDecoder(encoding).decode(buffer);
27
+ }
28
+ function normalizeEncodingLabel(encoding) {
29
+ return encoding?.trim().toLowerCase() ?? '';
30
+ }
31
+ function isUnicodeWideEncoding(encoding) {
32
+ const normalized = normalizeEncodingLabel(encoding);
33
+ return (normalized.startsWith('utf-16') ||
34
+ normalized.startsWith('utf-32') ||
35
+ normalized === 'ucs-2' ||
36
+ normalized === 'unicodefffe' ||
37
+ normalized === 'unicodefeff');
38
+ }
39
+ const BOM_SIGNATURES = [
40
+ // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
41
+ { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
42
+ { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
43
+ { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
44
+ { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
45
+ { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
46
+ ];
47
+ function startsWithBytes(buffer, signature) {
48
+ const sigLen = signature.length;
49
+ if (buffer.length < sigLen)
50
+ return false;
51
+ for (let i = 0; i < sigLen; i += 1) {
52
+ if (buffer[i] !== signature[i])
53
+ return false;
54
+ }
55
+ return true;
56
+ }
57
+ function detectBomEncoding(buffer) {
58
+ for (const { bytes, encoding } of BOM_SIGNATURES) {
59
+ if (startsWithBytes(buffer, bytes))
60
+ return encoding;
61
+ }
62
+ return undefined;
63
+ }
64
+ function readQuotedValue(input, startIndex) {
65
+ const first = input[startIndex];
66
+ if (!first)
67
+ return '';
68
+ const quoted = first === '"' || first === "'";
69
+ if (quoted) {
70
+ const end = input.indexOf(first, startIndex + 1);
71
+ return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
72
+ }
73
+ const tail = input.slice(startIndex);
74
+ const stop = tail.search(/[\s/>]/);
75
+ return (stop === -1 ? tail : tail.slice(0, stop)).trim();
76
+ }
77
+ function extractHtmlCharset(headSnippet) {
78
+ const lower = headSnippet.toLowerCase();
79
+ const charsetToken = 'charset=';
80
+ const charsetIdx = lower.indexOf(charsetToken);
81
+ if (charsetIdx === -1)
82
+ return undefined;
83
+ const valueStart = charsetIdx + charsetToken.length;
84
+ const charset = readQuotedValue(headSnippet, valueStart);
85
+ return charset ? charset.toLowerCase() : undefined;
86
+ }
87
+ function extractXmlEncoding(headSnippet) {
88
+ const lower = headSnippet.toLowerCase();
89
+ const xmlStart = lower.indexOf('<?xml');
90
+ if (xmlStart === -1)
91
+ return undefined;
92
+ const xmlEnd = lower.indexOf('?>', xmlStart);
93
+ const declaration = xmlEnd === -1
94
+ ? headSnippet.slice(xmlStart)
95
+ : headSnippet.slice(xmlStart, xmlEnd + 2);
96
+ const declarationLower = declaration.toLowerCase();
97
+ const encodingToken = 'encoding=';
98
+ const encodingIdx = declarationLower.indexOf(encodingToken);
99
+ if (encodingIdx === -1)
100
+ return undefined;
101
+ const valueStart = encodingIdx + encodingToken.length;
102
+ const encoding = readQuotedValue(declaration, valueStart);
103
+ return encoding ? encoding.toLowerCase() : undefined;
104
+ }
105
+ function detectHtmlDeclaredEncoding(buffer) {
106
+ const scanSize = Math.min(buffer.length, 8_192);
107
+ if (scanSize === 0)
108
+ return undefined;
109
+ const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
110
+ return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
111
+ }
112
+ export function resolveEncoding(declaredEncoding, sample) {
113
+ const bomEncoding = detectBomEncoding(sample);
114
+ if (bomEncoding)
115
+ return bomEncoding;
116
+ if (declaredEncoding)
117
+ return declaredEncoding;
118
+ return detectHtmlDeclaredEncoding(sample);
119
+ }
120
+ const BINARY_SIGNATURES = [
121
+ [0x25, 0x50, 0x44, 0x46],
122
+ [0x89, 0x50, 0x4e, 0x47],
123
+ [0x47, 0x49, 0x46, 0x38],
124
+ [0xff, 0xd8, 0xff],
125
+ [0x52, 0x49, 0x46, 0x46],
126
+ [0x42, 0x4d],
127
+ [0x49, 0x49, 0x2a, 0x00],
128
+ [0x4d, 0x4d, 0x00, 0x2a],
129
+ [0x00, 0x00, 0x01, 0x00],
130
+ [0x50, 0x4b, 0x03, 0x04],
131
+ [0x1f, 0x8b],
132
+ [0x42, 0x5a, 0x68],
133
+ [0x52, 0x61, 0x72, 0x21],
134
+ [0x37, 0x7a, 0xbc, 0xaf],
135
+ [0x7f, 0x45, 0x4c, 0x46],
136
+ [0x4d, 0x5a],
137
+ [0xcf, 0xfa, 0xed, 0xfe],
138
+ [0x00, 0x61, 0x73, 0x6d],
139
+ [0x1a, 0x45, 0xdf, 0xa3],
140
+ [0x66, 0x74, 0x79, 0x70],
141
+ [0x46, 0x4c, 0x56],
142
+ [0x49, 0x44, 0x33],
143
+ [0xff, 0xfb],
144
+ [0xff, 0xfa],
145
+ [0x4f, 0x67, 0x67, 0x53],
146
+ [0x66, 0x4c, 0x61, 0x43],
147
+ [0x4d, 0x54, 0x68, 0x64],
148
+ [0x77, 0x4f, 0x46, 0x46],
149
+ [0x00, 0x01, 0x00, 0x00],
150
+ [0x4f, 0x54, 0x54, 0x4f],
151
+ [0x53, 0x51, 0x4c, 0x69],
152
+ ];
153
+ function hasNullByte(buffer, limit) {
154
+ const checkLen = Math.min(buffer.length, limit);
155
+ return buffer.subarray(0, checkLen).includes(0x00);
156
+ }
157
+ export function isBinaryContent(buffer, encoding) {
158
+ for (const signature of BINARY_SIGNATURES) {
159
+ if (startsWithBytes(buffer, signature))
160
+ return true;
161
+ }
162
+ return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
163
+ }
@@ -0,0 +1,4 @@
1
+ import { Readable } from 'node:stream';
2
+ import type { ReadableStream as NodeReadableStream } from 'node:stream/web';
3
+ export declare function toNodeReadableStream(stream: ReadableStream<Uint8Array>, url: string, stage: string): NodeReadableStream<Uint8Array>;
4
+ export declare function toWebReadableStream(stream: Readable, url: string, stage: string): ReadableStream<Uint8Array>;
@@ -0,0 +1,28 @@
1
+ import { Readable } from 'node:stream';
2
+ import { FetchError } from './errors.js';
3
+ import { isObject } from './type-guards.js';
4
+ function isReadableStreamLike(value) {
5
+ if (!isObject(value))
6
+ return false;
7
+ return (typeof value['getReader'] === 'function' &&
8
+ typeof value['cancel'] === 'function' &&
9
+ typeof value['tee'] === 'function' &&
10
+ typeof value['locked'] === 'boolean');
11
+ }
12
+ function assertReadableStreamLike(stream, url, stage) {
13
+ if (isReadableStreamLike(stream))
14
+ return;
15
+ throw new FetchError('Invalid response stream', url, 500, {
16
+ reason: 'invalid_stream',
17
+ stage,
18
+ });
19
+ }
20
+ export function toNodeReadableStream(stream, url, stage) {
21
+ assertReadableStreamLike(stream, url, stage);
22
+ return stream;
23
+ }
24
+ export function toWebReadableStream(stream, url, stage) {
25
+ const converted = Readable.toWeb(stream);
26
+ assertReadableStreamLike(converted, url, stage);
27
+ return converted;
28
+ }
package/dist/fetch.js CHANGED
@@ -10,6 +10,8 @@ import { finished, pipeline } from 'node:stream/promises';
10
10
  import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
11
11
  import { config } from './config.js';
12
12
  import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
13
+ import { decodeBuffer, getCharsetFromContentType, isBinaryContent, resolveEncoding, } from './fetch-content.js';
14
+ import { toNodeReadableStream, toWebReadableStream } from './fetch-stream.js';
13
15
  import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
14
16
  import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
15
17
  import { isError, isObject } from './type-guards.js';
@@ -26,23 +28,6 @@ const defaultRedactor = {
26
28
  redact: redactUrl,
27
29
  };
28
30
  const defaultFetch = (input, init) => globalThis.fetch(input, init);
29
- function assertReadableStreamLike(stream, url, stage) {
30
- if (isObject(stream) && typeof stream['getReader'] === 'function')
31
- return;
32
- throw new FetchError('Invalid response stream', url, 500, {
33
- reason: 'invalid_stream',
34
- stage,
35
- });
36
- }
37
- function toNodeReadableStream(stream, url, stage) {
38
- assertReadableStreamLike(stream, url, stage);
39
- return stream;
40
- }
41
- function toWebReadableStream(stream, url, stage) {
42
- const converted = Readable.toWeb(stream);
43
- assertReadableStreamLike(converted, url, stage);
44
- return converted;
45
- }
46
31
  class IpBlocker {
47
32
  security;
48
33
  blockList = createDefaultBlockList();
@@ -456,6 +441,9 @@ class SafeDnsResolver {
456
441
  if (signal?.aborted) {
457
442
  throw createAbortSignalError();
458
443
  }
444
+ if (this.isBlockedHostname(normalizedHostname)) {
445
+ throw createErrorWithCode(`Blocked host: ${normalizedHostname}. Internal hosts are not allowed`, 'EBLOCKED');
446
+ }
459
447
  if (isIP(normalizedHostname)) {
460
448
  if (this.ipBlocker.isBlockedIp(normalizedHostname)) {
461
449
  throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
@@ -871,168 +859,6 @@ class RedirectFollower {
871
859
  }
872
860
  }
873
861
  }
874
- function getCharsetFromContentType(contentType) {
875
- if (!contentType)
876
- return undefined;
877
- const match = /charset=([^;]+)/i.exec(contentType);
878
- const charsetGroup = match?.[1];
879
- if (!charsetGroup)
880
- return undefined;
881
- let charset = charsetGroup.trim();
882
- if (charset.startsWith('"') && charset.endsWith('"')) {
883
- charset = charset.slice(1, -1);
884
- }
885
- return charset.trim();
886
- }
887
- function createDecoder(encoding) {
888
- if (!encoding)
889
- return new TextDecoder('utf-8');
890
- try {
891
- return new TextDecoder(encoding);
892
- }
893
- catch {
894
- return new TextDecoder('utf-8');
895
- }
896
- }
897
- function decodeBuffer(buffer, encoding) {
898
- return createDecoder(encoding).decode(buffer);
899
- }
900
- function normalizeEncodingLabel(encoding) {
901
- return encoding?.trim().toLowerCase() ?? '';
902
- }
903
- function isUnicodeWideEncoding(encoding) {
904
- const normalized = normalizeEncodingLabel(encoding);
905
- return (normalized.startsWith('utf-16') ||
906
- normalized.startsWith('utf-32') ||
907
- normalized === 'ucs-2' ||
908
- normalized === 'unicodefffe' ||
909
- normalized === 'unicodefeff');
910
- }
911
- const BOM_SIGNATURES = [
912
- // 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
913
- { bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
914
- { bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
915
- { bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
916
- { bytes: [0xff, 0xfe], encoding: 'utf-16le' },
917
- { bytes: [0xfe, 0xff], encoding: 'utf-16be' },
918
- ];
919
- function detectBomEncoding(buffer) {
920
- for (const { bytes, encoding } of BOM_SIGNATURES) {
921
- if (startsWithBytes(buffer, bytes))
922
- return encoding;
923
- }
924
- return undefined;
925
- }
926
- function readQuotedValue(input, startIndex) {
927
- const first = input[startIndex];
928
- if (!first)
929
- return '';
930
- const quoted = first === '"' || first === "'";
931
- if (quoted) {
932
- const end = input.indexOf(first, startIndex + 1);
933
- return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
934
- }
935
- const tail = input.slice(startIndex);
936
- const stop = tail.search(/[\s/>]/);
937
- return (stop === -1 ? tail : tail.slice(0, stop)).trim();
938
- }
939
- function extractHtmlCharset(headSnippet) {
940
- const lower = headSnippet.toLowerCase();
941
- const charsetToken = 'charset=';
942
- const charsetIdx = lower.indexOf(charsetToken);
943
- if (charsetIdx === -1)
944
- return undefined;
945
- const valueStart = charsetIdx + charsetToken.length;
946
- const charset = readQuotedValue(headSnippet, valueStart);
947
- return charset ? charset.toLowerCase() : undefined;
948
- }
949
- function extractXmlEncoding(headSnippet) {
950
- const lower = headSnippet.toLowerCase();
951
- const xmlStart = lower.indexOf('<?xml');
952
- if (xmlStart === -1)
953
- return undefined;
954
- const xmlEnd = lower.indexOf('?>', xmlStart);
955
- const declaration = xmlEnd === -1
956
- ? headSnippet.slice(xmlStart)
957
- : headSnippet.slice(xmlStart, xmlEnd + 2);
958
- const declarationLower = declaration.toLowerCase();
959
- const encodingToken = 'encoding=';
960
- const encodingIdx = declarationLower.indexOf(encodingToken);
961
- if (encodingIdx === -1)
962
- return undefined;
963
- const valueStart = encodingIdx + encodingToken.length;
964
- const encoding = readQuotedValue(declaration, valueStart);
965
- return encoding ? encoding.toLowerCase() : undefined;
966
- }
967
- function detectHtmlDeclaredEncoding(buffer) {
968
- const scanSize = Math.min(buffer.length, 8_192);
969
- if (scanSize === 0)
970
- return undefined;
971
- const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
972
- return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
973
- }
974
- function resolveEncoding(declaredEncoding, sample) {
975
- const bomEncoding = detectBomEncoding(sample);
976
- if (bomEncoding)
977
- return bomEncoding;
978
- if (declaredEncoding)
979
- return declaredEncoding;
980
- return detectHtmlDeclaredEncoding(sample);
981
- }
982
- const BINARY_SIGNATURES = [
983
- [0x25, 0x50, 0x44, 0x46],
984
- [0x89, 0x50, 0x4e, 0x47],
985
- [0x47, 0x49, 0x46, 0x38],
986
- [0xff, 0xd8, 0xff],
987
- [0x52, 0x49, 0x46, 0x46],
988
- [0x42, 0x4d],
989
- [0x49, 0x49, 0x2a, 0x00],
990
- [0x4d, 0x4d, 0x00, 0x2a],
991
- [0x00, 0x00, 0x01, 0x00],
992
- [0x50, 0x4b, 0x03, 0x04],
993
- [0x1f, 0x8b],
994
- [0x42, 0x5a, 0x68],
995
- [0x52, 0x61, 0x72, 0x21],
996
- [0x37, 0x7a, 0xbc, 0xaf],
997
- [0x7f, 0x45, 0x4c, 0x46],
998
- [0x4d, 0x5a],
999
- [0xcf, 0xfa, 0xed, 0xfe],
1000
- [0x00, 0x61, 0x73, 0x6d],
1001
- [0x1a, 0x45, 0xdf, 0xa3],
1002
- [0x66, 0x74, 0x79, 0x70],
1003
- [0x46, 0x4c, 0x56],
1004
- [0x49, 0x44, 0x33],
1005
- [0xff, 0xfb],
1006
- [0xff, 0xfa],
1007
- [0x4f, 0x67, 0x67, 0x53],
1008
- [0x66, 0x4c, 0x61, 0x43],
1009
- [0x4d, 0x54, 0x68, 0x64],
1010
- [0x77, 0x4f, 0x46, 0x46],
1011
- [0x00, 0x01, 0x00, 0x00],
1012
- [0x4f, 0x54, 0x54, 0x4f],
1013
- [0x53, 0x51, 0x4c, 0x69],
1014
- ];
1015
- function startsWithBytes(buffer, signature) {
1016
- const sigLen = signature.length;
1017
- if (buffer.length < sigLen)
1018
- return false;
1019
- for (let i = 0; i < sigLen; i += 1) {
1020
- if (buffer[i] !== signature[i])
1021
- return false;
1022
- }
1023
- return true;
1024
- }
1025
- function hasNullByte(buffer, limit) {
1026
- const checkLen = Math.min(buffer.length, limit);
1027
- return buffer.subarray(0, checkLen).includes(0x00);
1028
- }
1029
- function isBinaryContent(buffer, encoding) {
1030
- for (const signature of BINARY_SIGNATURES) {
1031
- if (startsWithBytes(buffer, signature))
1032
- return true;
1033
- }
1034
- return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
1035
- }
1036
862
  class ResponseTextReader {
1037
863
  async read(response, url, maxBytes, signal, encoding) {
1038
864
  const { buffer, encoding: effectiveEncoding, truncated, } = await this.readBuffer(response, url, maxBytes, signal, encoding);
@@ -1298,7 +1124,7 @@ function createDecompressor(encoding) {
1298
1124
  function createPumpedStream(initialChunk, reader) {
1299
1125
  return new ReadableStream({
1300
1126
  start(controller) {
1301
- if (initialChunk.byteLength > 0) {
1127
+ if (initialChunk && initialChunk.byteLength > 0) {
1302
1128
  controller.enqueue(initialChunk);
1303
1129
  }
1304
1130
  },
@@ -1321,31 +1147,6 @@ function createPumpedStream(initialChunk, reader) {
1321
1147
  },
1322
1148
  });
1323
1149
  }
1324
- function isLikelyCompressed(chunk, encoding) {
1325
- if (chunk.byteLength === 0)
1326
- return false;
1327
- if (encoding === 'gzip') {
1328
- return chunk.byteLength >= 2 && chunk[0] === 0x1f && chunk[1] === 0x8b;
1329
- }
1330
- if (encoding === 'deflate') {
1331
- if (chunk.byteLength < 2)
1332
- return false;
1333
- const byte0 = chunk[0] ?? 0;
1334
- const byte1 = chunk[1] ?? 0;
1335
- const cm = byte0 & 0x0f;
1336
- if (cm !== 8)
1337
- return false;
1338
- return (byte0 * 256 + byte1) % 31 === 0;
1339
- }
1340
- let nonPrintable = 0;
1341
- const limit = Math.min(chunk.length, 50);
1342
- for (let i = 0; i < limit; i += 1) {
1343
- const b = chunk[i] ?? 0;
1344
- if (b < 0x09 || (b > 0x0d && b < 0x20) || b === 0x7f)
1345
- nonPrintable += 1;
1346
- }
1347
- return nonPrintable / limit > 0.1;
1348
- }
1349
1150
  async function decodeResponseIfNeeded(response, url, signal) {
1350
1151
  const encodingHeader = response.headers.get('content-encoding');
1351
1152
  const parsedEncodings = parseContentEncodings(encodingHeader);
@@ -1361,76 +1162,81 @@ async function decodeResponseIfNeeded(response, url, signal) {
1361
1162
  }
1362
1163
  if (!response.body)
1363
1164
  return response;
1364
- // Peek at first chunk to check if actually compressed
1365
- const reader = response.body.getReader();
1366
- let initialChunk;
1367
- try {
1368
- const { done, value } = await reader.read();
1369
- if (done) {
1370
- return new Response(null, {
1371
- status: response.status,
1372
- statusText: response.statusText,
1373
- headers: response.headers,
1374
- });
1375
- }
1376
- initialChunk = value;
1377
- }
1378
- catch (error) {
1379
- // If read fails, throw properly
1380
- throw new FetchError(`Failed to read response body: ${isError(error) ? error.message : String(error)}`, url, 502);
1381
- }
1165
+ const [decodeBranch, passthroughBranch] = response.body.tee();
1382
1166
  const decodeOrder = encodings
1383
1167
  .slice()
1384
1168
  .reverse()
1385
1169
  .filter(isSupportedContentEncoding);
1386
- const firstDecodeEncoding = decodeOrder[0];
1387
- if (!firstDecodeEncoding ||
1388
- !isLikelyCompressed(initialChunk, firstDecodeEncoding)) {
1389
- const body = createPumpedStream(initialChunk, reader);
1390
- const headers = new Headers(response.headers);
1391
- headers.delete('content-encoding');
1392
- headers.delete('content-length');
1393
- return new Response(body, {
1394
- status: response.status,
1395
- statusText: response.statusText,
1396
- headers,
1397
- });
1398
- }
1399
1170
  const decompressors = decodeOrder.map((encoding) => createDecompressor(encoding));
1400
- const sourceStream = Readable.fromWeb(toNodeReadableStream(createPumpedStream(initialChunk, reader), url, 'response:decode-content-encoding'));
1171
+ const decodeSource = Readable.fromWeb(toNodeReadableStream(decodeBranch, url, 'response:decode-content-encoding'));
1401
1172
  const decodedNodeStream = new PassThrough();
1402
- const pipelinePromise = pipeline([
1403
- sourceStream,
1173
+ const decodedPipeline = pipeline([
1174
+ decodeSource,
1404
1175
  ...decompressors,
1405
1176
  decodedNodeStream,
1406
1177
  ]);
1407
- const abortHandler = () => {
1408
- sourceStream.destroy();
1178
+ const headers = new Headers(response.headers);
1179
+ headers.delete('content-encoding');
1180
+ headers.delete('content-length');
1181
+ const abortDecodePipeline = () => {
1182
+ decodeSource.destroy();
1409
1183
  for (const decompressor of decompressors) {
1410
1184
  decompressor.destroy();
1411
1185
  }
1412
1186
  decodedNodeStream.destroy();
1413
1187
  };
1414
1188
  if (signal) {
1415
- signal.addEventListener('abort', abortHandler, { once: true });
1189
+ signal.addEventListener('abort', abortDecodePipeline, { once: true });
1416
1190
  }
1417
- void pipelinePromise.catch((error) => {
1191
+ void decodedPipeline.catch((error) => {
1418
1192
  decodedNodeStream.destroy(error instanceof Error ? error : new Error(String(error)));
1419
1193
  });
1420
- const decodedBody = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
1421
- const headers = new Headers(response.headers);
1422
- headers.delete('content-encoding');
1423
- headers.delete('content-length');
1424
- if (signal) {
1425
- void finished(decodedNodeStream, { cleanup: true }).finally(() => {
1426
- signal.removeEventListener('abort', abortHandler);
1194
+ const decodedBodyStream = toWebReadableStream(decodedNodeStream, url, 'response:decode-content-encoding');
1195
+ const decodedReader = decodedBodyStream.getReader();
1196
+ const clearAbortListener = () => {
1197
+ if (!signal)
1198
+ return;
1199
+ signal.removeEventListener('abort', abortDecodePipeline);
1200
+ };
1201
+ try {
1202
+ const first = await decodedReader.read();
1203
+ if (first.done) {
1204
+ clearAbortListener();
1205
+ void passthroughBranch.cancel().catch(() => undefined);
1206
+ return new Response(null, {
1207
+ status: response.status,
1208
+ statusText: response.statusText,
1209
+ headers,
1210
+ });
1211
+ }
1212
+ void passthroughBranch.cancel().catch(() => undefined);
1213
+ const body = createPumpedStream(first.value, decodedReader);
1214
+ if (signal) {
1215
+ void finished(decodedNodeStream, { cleanup: true }).finally(() => {
1216
+ clearAbortListener();
1217
+ });
1218
+ }
1219
+ return new Response(body, {
1220
+ status: response.status,
1221
+ statusText: response.statusText,
1222
+ headers,
1223
+ });
1224
+ }
1225
+ catch (error) {
1226
+ clearAbortListener();
1227
+ abortDecodePipeline();
1228
+ void decodedReader.cancel(error).catch(() => undefined);
1229
+ logDebug('Content-Encoding decode failed; using passthrough body', {
1230
+ url: redactUrl(url),
1231
+ encoding: encodingHeader ?? encodings.join(','),
1232
+ error: isError(error) ? error.message : String(error),
1233
+ });
1234
+ return new Response(passthroughBranch, {
1235
+ status: response.status,
1236
+ statusText: response.statusText,
1237
+ headers,
1427
1238
  });
1428
1239
  }
1429
- return new Response(decodedBody, {
1430
- status: response.status,
1431
- statusText: response.statusText,
1432
- headers,
1433
- });
1434
1240
  }
1435
1241
  async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry, reader, maxBytes, mode, signal) {
1436
1242
  const responseError = resolveResponseError(response, finalUrl);
package/dist/tools.js CHANGED
@@ -446,8 +446,21 @@ function buildToolContentBlocks(structuredContent, resourceLink, embeddedResourc
446
446
  }
447
447
  function resolveNormalizedUrl(url) {
448
448
  const { normalizedUrl: validatedUrl } = normalizeUrl(url);
449
- const { url: normalizedUrl, transformed } = transformToRawUrl(validatedUrl);
450
- return { normalizedUrl, originalUrl: validatedUrl, transformed };
449
+ const transformedResult = transformToRawUrl(validatedUrl);
450
+ if (!transformedResult.transformed) {
451
+ return {
452
+ normalizedUrl: validatedUrl,
453
+ originalUrl: validatedUrl,
454
+ transformed: false,
455
+ };
456
+ }
457
+ // Re-validate transformed URLs so blocked-host and length policies still apply.
458
+ const { normalizedUrl: transformedUrl } = normalizeUrl(transformedResult.url);
459
+ return {
460
+ normalizedUrl: transformedUrl,
461
+ originalUrl: validatedUrl,
462
+ transformed: true,
463
+ };
451
464
  }
452
465
  function logRawUrlTransformation(resolvedUrl) {
453
466
  if (!resolvedUrl.transformed)
@@ -493,10 +506,12 @@ function attemptCacheRetrieval(params) {
493
506
  return null;
494
507
  }
495
508
  logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
509
+ const finalUrl = cached.url !== normalizedUrl ? cached.url : undefined;
496
510
  return {
497
511
  data,
498
512
  fromCache: true,
499
513
  url: normalizedUrl,
514
+ ...(finalUrl ? { finalUrl } : {}),
500
515
  fetchedAt: cached.fetchedAt,
501
516
  cacheKey,
502
517
  };
@@ -675,12 +690,16 @@ export function parseCachedMarkdownResult(cached) {
675
690
  if (typeof markdown !== 'string')
676
691
  return undefined;
677
692
  const metadata = normalizeExtractedMetadata(result.data.metadata);
693
+ const truncated = result.data.truncated ?? false;
694
+ const persistedMarkdown = truncated
695
+ ? appendTruncationMarker(markdown, TRUNCATION_MARKER)
696
+ : markdown;
678
697
  return {
679
- content: markdown,
680
- markdown,
698
+ content: persistedMarkdown,
699
+ markdown: persistedMarkdown,
681
700
  title: result.data.title,
682
701
  ...(metadata ? { metadata } : {}),
683
- truncated: result.data.truncated ?? false,
702
+ truncated,
684
703
  };
685
704
  }
686
705
  const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
@@ -695,8 +714,11 @@ const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
695
714
  return { ...result, content: result.markdown, truncated };
696
715
  };
697
716
  function serializeMarkdownResult(result) {
717
+ const persistedMarkdown = result.truncated
718
+ ? appendTruncationMarker(result.markdown, TRUNCATION_MARKER)
719
+ : result.markdown;
698
720
  return JSON.stringify({
699
- markdown: result.markdown,
721
+ markdown: persistedMarkdown,
700
722
  title: result.title,
701
723
  metadata: result.metadata,
702
724
  truncated: result.truncated,
package/dist/transform.js CHANGED
@@ -2369,19 +2369,15 @@ function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
2369
2369
  });
2370
2370
  return transformInputInProcess(htmlOrBuffer, url, options);
2371
2371
  }
2372
+ abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
2372
2373
  if (error instanceof FetchError)
2373
2374
  throw error;
2374
- abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
2375
2375
  const message = getErrorMessage(error);
2376
- logWarn('Transform worker failed; refusing in-process fallback', {
2376
+ logWarn('Transform worker failed; falling back to in-process', {
2377
2377
  url: redactUrl(url),
2378
2378
  error: message,
2379
2379
  });
2380
- throw new FetchError('Transform worker failed', url, 503, {
2381
- reason: 'worker_failed',
2382
- stage: 'transform:worker',
2383
- error: message,
2384
- });
2380
+ return transformInputInProcess(htmlOrBuffer, url, options);
2385
2381
  }
2386
2382
  async function runWorkerTransformWithFallback(htmlOrBuffer, url, options) {
2387
2383
  const workerStage = stageTracker.start(url, 'transform:worker');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/fetch-url-mcp",
3
- "version": "1.1.2",
3
+ "version": "1.2.0",
4
4
  "mcpName": "io.github.j0hanz/fetch-url-mcp",
5
5
  "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",
@@ -52,6 +52,7 @@
52
52
  "start": "node dist/index.js",
53
53
  "format": "prettier --write .",
54
54
  "type-check": "node scripts/tasks.mjs type-check",
55
+ "type-check:tests": "node scripts/tasks.mjs type-check:tests",
55
56
  "type-check:diagnostics": "tsc --noEmit --extendedDiagnostics",
56
57
  "type-check:trace": "node -e \"require('fs').rmSync('.ts-trace',{recursive:true,force:true})\" && tsc --noEmit --generateTrace .ts-trace",
57
58
  "lint": "eslint .",