@j0hanz/fetch-url-mcp 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -21
- package/dist/cli.d.ts +3 -3
- package/dist/cli.js +15 -8
- package/dist/http/auth.d.ts +6 -6
- package/dist/http/auth.js +78 -23
- package/dist/http/health.d.ts +1 -2
- package/dist/http/health.js +7 -18
- package/dist/http/helpers.d.ts +3 -11
- package/dist/http/helpers.js +28 -26
- package/dist/http/native.d.ts +0 -1
- package/dist/http/native.js +63 -41
- package/dist/http/rate-limit.d.ts +2 -2
- package/dist/http/rate-limit.js +11 -16
- package/dist/index.d.ts +0 -1
- package/dist/index.js +17 -20
- package/dist/{markdown-cleanup.d.ts → lib/content.d.ts} +4 -2
- package/dist/lib/content.js +1356 -0
- package/dist/lib/core.d.ts +253 -0
- package/dist/lib/core.js +1228 -0
- package/dist/{tool-pipeline.d.ts → lib/fetch-pipeline.d.ts} +1 -3
- package/dist/{tool-pipeline.js → lib/fetch-pipeline.js} +18 -44
- package/dist/{fetch.d.ts → lib/http.d.ts} +7 -9
- package/dist/{fetch.js → lib/http.js} +721 -1004
- package/dist/lib/mcp-tools.d.ts +28 -0
- package/dist/lib/mcp-tools.js +107 -0
- package/dist/{tool-progress.d.ts → lib/progress.d.ts} +0 -2
- package/dist/{tool-progress.js → lib/progress.js} +9 -14
- package/dist/lib/task-handlers.d.ts +5 -0
- package/dist/{mcp.js → lib/task-handlers.js} +95 -31
- package/dist/lib/url.d.ts +70 -0
- package/dist/lib/url.js +686 -0
- package/dist/lib/utils.d.ts +58 -0
- package/dist/lib/utils.js +304 -0
- package/dist/{prompts.d.ts → prompts/index.d.ts} +0 -1
- package/dist/{prompts.js → prompts/index.js} +1 -2
- package/dist/{resources.d.ts → resources/index.d.ts} +0 -1
- package/dist/{resources.js → resources/index.js} +87 -64
- package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -1
- package/dist/{instructions.js → resources/instructions.js} +5 -3
- package/dist/schemas/inputs.d.ts +7 -0
- package/dist/schemas/inputs.js +24 -0
- package/dist/schemas/outputs.d.ts +23 -0
- package/dist/schemas/outputs.js +77 -0
- package/dist/server.d.ts +0 -1
- package/dist/server.js +26 -25
- package/dist/tasks/execution.d.ts +0 -1
- package/dist/tasks/execution.js +106 -70
- package/dist/tasks/manager.d.ts +11 -3
- package/dist/tasks/manager.js +97 -73
- package/dist/tasks/owner.d.ts +3 -3
- package/dist/tasks/owner.js +2 -2
- package/dist/tasks/tool-registry.d.ts +11 -0
- package/dist/tasks/tool-registry.js +13 -0
- package/dist/tools/fetch-url.d.ts +28 -0
- package/dist/{tools.js → tools/fetch-url.js} +95 -147
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +4 -0
- package/dist/transform/html-translators.d.ts +1 -0
- package/dist/transform/html-translators.js +454 -0
- package/dist/transform/metadata.d.ts +4 -0
- package/dist/transform/metadata.js +183 -0
- package/dist/transform/transform.d.ts +0 -1
- package/dist/transform/transform.js +44 -679
- package/dist/transform/types.d.ts +9 -12
- package/dist/transform/types.js +0 -1
- package/dist/transform/worker-pool.d.ts +0 -1
- package/dist/transform/worker-pool.js +7 -16
- package/dist/transform/workers/shared.d.ts +7 -0
- package/dist/transform/workers/shared.js +130 -0
- package/dist/transform/workers/transform-child.d.ts +0 -1
- package/dist/transform/workers/transform-child.js +5 -135
- package/dist/transform/workers/transform-worker.d.ts +0 -1
- package/dist/transform/workers/transform-worker.js +7 -128
- package/package.json +11 -7
- package/dist/cache.d.ts +0 -54
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js +0 -261
- package/dist/cache.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/config.d.ts +0 -141
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js +0 -473
- package/dist/config.js.map +0 -1
- package/dist/crypto.d.ts +0 -4
- package/dist/crypto.d.ts.map +0 -1
- package/dist/crypto.js +0 -56
- package/dist/crypto.js.map +0 -1
- package/dist/dom-noise-removal.d.ts +0 -2
- package/dist/dom-noise-removal.d.ts.map +0 -1
- package/dist/dom-noise-removal.js +0 -494
- package/dist/dom-noise-removal.js.map +0 -1
- package/dist/download.d.ts +0 -4
- package/dist/download.d.ts.map +0 -1
- package/dist/download.js +0 -106
- package/dist/download.js.map +0 -1
- package/dist/errors.d.ts +0 -11
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js +0 -65
- package/dist/errors.js.map +0 -1
- package/dist/examples/mcp-fetch-url-client.js +0 -329
- package/dist/examples/mcp-fetch-url-client.js.map +0 -1
- package/dist/fetch-content.d.ts +0 -5
- package/dist/fetch-content.d.ts.map +0 -1
- package/dist/fetch-content.js +0 -164
- package/dist/fetch-content.js.map +0 -1
- package/dist/fetch-stream.d.ts +0 -5
- package/dist/fetch-stream.d.ts.map +0 -1
- package/dist/fetch-stream.js +0 -29
- package/dist/fetch-stream.js.map +0 -1
- package/dist/fetch.d.ts.map +0 -1
- package/dist/fetch.js.map +0 -1
- package/dist/host-normalization.d.ts +0 -2
- package/dist/host-normalization.d.ts.map +0 -1
- package/dist/host-normalization.js +0 -91
- package/dist/host-normalization.js.map +0 -1
- package/dist/http/auth.d.ts.map +0 -1
- package/dist/http/auth.js.map +0 -1
- package/dist/http/health.d.ts.map +0 -1
- package/dist/http/health.js.map +0 -1
- package/dist/http/helpers.d.ts.map +0 -1
- package/dist/http/helpers.js.map +0 -1
- package/dist/http/native.d.ts.map +0 -1
- package/dist/http/native.js.map +0 -1
- package/dist/http/rate-limit.d.ts.map +0 -1
- package/dist/http/rate-limit.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/instructions.d.ts.map +0 -1
- package/dist/instructions.js.map +0 -1
- package/dist/ip-blocklist.d.ts +0 -9
- package/dist/ip-blocklist.d.ts.map +0 -1
- package/dist/ip-blocklist.js +0 -79
- package/dist/ip-blocklist.js.map +0 -1
- package/dist/json.d.ts +0 -2
- package/dist/json.d.ts.map +0 -1
- package/dist/json.js +0 -45
- package/dist/json.js.map +0 -1
- package/dist/language-detection.d.ts +0 -3
- package/dist/language-detection.d.ts.map +0 -1
- package/dist/language-detection.js +0 -355
- package/dist/language-detection.js.map +0 -1
- package/dist/markdown-cleanup.d.ts.map +0 -1
- package/dist/markdown-cleanup.js +0 -534
- package/dist/markdown-cleanup.js.map +0 -1
- package/dist/mcp-validator.d.ts +0 -17
- package/dist/mcp-validator.d.ts.map +0 -1
- package/dist/mcp-validator.js +0 -45
- package/dist/mcp-validator.js.map +0 -1
- package/dist/mcp.d.ts +0 -4
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/observability.d.ts +0 -23
- package/dist/observability.d.ts.map +0 -1
- package/dist/observability.js +0 -238
- package/dist/observability.js.map +0 -1
- package/dist/prompts.d.ts.map +0 -1
- package/dist/prompts.js.map +0 -1
- package/dist/resources.d.ts.map +0 -1
- package/dist/resources.js.map +0 -1
- package/dist/server-tuning.d.ts +0 -15
- package/dist/server-tuning.d.ts.map +0 -1
- package/dist/server-tuning.js +0 -49
- package/dist/server-tuning.js.map +0 -1
- package/dist/server.d.ts.map +0 -1
- package/dist/server.js.map +0 -1
- package/dist/session.d.ts +0 -42
- package/dist/session.d.ts.map +0 -1
- package/dist/session.js +0 -255
- package/dist/session.js.map +0 -1
- package/dist/tasks/execution.d.ts.map +0 -1
- package/dist/tasks/execution.js.map +0 -1
- package/dist/tasks/manager.d.ts.map +0 -1
- package/dist/tasks/manager.js.map +0 -1
- package/dist/tasks/owner.d.ts.map +0 -1
- package/dist/tasks/owner.js.map +0 -1
- package/dist/timer-utils.d.ts +0 -6
- package/dist/timer-utils.d.ts.map +0 -1
- package/dist/timer-utils.js +0 -27
- package/dist/timer-utils.js.map +0 -1
- package/dist/tool-errors.d.ts +0 -12
- package/dist/tool-errors.d.ts.map +0 -1
- package/dist/tool-errors.js +0 -55
- package/dist/tool-errors.js.map +0 -1
- package/dist/tool-pipeline.d.ts.map +0 -1
- package/dist/tool-pipeline.js.map +0 -1
- package/dist/tool-progress.d.ts.map +0 -1
- package/dist/tool-progress.js.map +0 -1
- package/dist/tools.d.ts +0 -54
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/transform/transform.d.ts.map +0 -1
- package/dist/transform/transform.js.map +0 -1
- package/dist/transform/types.d.ts.map +0 -1
- package/dist/transform/types.js.map +0 -1
- package/dist/transform/worker-pool.d.ts.map +0 -1
- package/dist/transform/worker-pool.js.map +0 -1
- package/dist/transform/workers/transform-child.d.ts.map +0 -1
- package/dist/transform/workers/transform-child.js.map +0 -1
- package/dist/transform/workers/transform-worker.d.ts.map +0 -1
- package/dist/transform/workers/transform-worker.js.map +0 -1
- package/dist/type-guards.d.ts +0 -16
- package/dist/type-guards.d.ts.map +0 -1
- package/dist/type-guards.js +0 -13
- package/dist/type-guards.js.map +0 -1
|
@@ -43,14 +43,17 @@ export interface ExtractionResult {
|
|
|
43
43
|
article: ExtractedArticle | null;
|
|
44
44
|
metadata: ExtractedMetadata;
|
|
45
45
|
}
|
|
46
|
+
interface MarkdownPayload {
|
|
47
|
+
markdown: string;
|
|
48
|
+
title?: string | undefined;
|
|
49
|
+
truncated: boolean;
|
|
50
|
+
metadata?: ExtractedMetadata;
|
|
51
|
+
}
|
|
46
52
|
/**
|
|
47
53
|
* Result of HTML to markdown transformation.
|
|
48
54
|
*/
|
|
49
|
-
export interface MarkdownTransformResult {
|
|
50
|
-
markdown: string;
|
|
55
|
+
export interface MarkdownTransformResult extends MarkdownPayload {
|
|
51
56
|
title: string | undefined;
|
|
52
|
-
truncated: boolean;
|
|
53
|
-
metadata?: ExtractedMetadata;
|
|
54
57
|
}
|
|
55
58
|
/**
|
|
56
59
|
* Options for transform operations.
|
|
@@ -109,14 +112,8 @@ export interface TransformWorkerCancelledMessage {
|
|
|
109
112
|
export interface TransformWorkerResultMessage {
|
|
110
113
|
type: 'result';
|
|
111
114
|
id: string;
|
|
112
|
-
result:
|
|
113
|
-
markdown: string;
|
|
114
|
-
title?: string;
|
|
115
|
-
truncated: boolean;
|
|
116
|
-
metadata?: ExtractedMetadata;
|
|
117
|
-
};
|
|
115
|
+
result: MarkdownPayload;
|
|
118
116
|
}
|
|
119
|
-
export type TransformWorkerPayload = TransformWorkerResultMessage['result'];
|
|
120
117
|
export interface TransformWorkerErrorMessage {
|
|
121
118
|
type: 'error';
|
|
122
119
|
id: string;
|
|
@@ -129,4 +126,4 @@ export interface TransformWorkerErrorMessage {
|
|
|
129
126
|
};
|
|
130
127
|
}
|
|
131
128
|
export type TransformWorkerOutgoingMessage = TransformWorkerResultMessage | TransformWorkerErrorMessage | TransformWorkerCancelledMessage;
|
|
132
|
-
|
|
129
|
+
export {};
|
package/dist/transform/types.js
CHANGED
|
@@ -5,20 +5,12 @@ import { availableParallelism } from 'node:os';
|
|
|
5
5
|
import { fileURLToPath } from 'node:url';
|
|
6
6
|
import { isSharedArrayBuffer } from 'node:util/types';
|
|
7
7
|
import { Worker, } from 'node:worker_threads';
|
|
8
|
-
import { config } from '../
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
|
|
14
|
-
// Abort helper (inlined to avoid circular dependency with transform.ts)
|
|
15
|
-
// ---------------------------------------------------------------------------
|
|
16
|
-
function createAbortError(url, stage) {
|
|
17
|
-
return new FetchError('Request was canceled', url, 499, {
|
|
18
|
-
reason: 'aborted',
|
|
19
|
-
stage,
|
|
20
|
-
});
|
|
21
|
-
}
|
|
8
|
+
import { config } from '../lib/core.js';
|
|
9
|
+
import { logWarn } from '../lib/core.js';
|
|
10
|
+
import { createAbortError } from '../lib/utils.js';
|
|
11
|
+
import { FetchError, getErrorMessage } from '../lib/utils.js';
|
|
12
|
+
import { createUnrefTimeout } from '../lib/utils.js';
|
|
13
|
+
import { isObject } from '../lib/utils.js';
|
|
22
14
|
// ---------------------------------------------------------------------------
|
|
23
15
|
// Worker message validation
|
|
24
16
|
// ---------------------------------------------------------------------------
|
|
@@ -282,7 +274,7 @@ class WorkerPool {
|
|
|
282
274
|
? 0
|
|
283
275
|
: Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
|
|
284
276
|
this.timeoutMs = timeoutMs;
|
|
285
|
-
this.queueMax = this.maxCapacity *
|
|
277
|
+
this.queueMax = this.maxCapacity * 4;
|
|
286
278
|
this.spawnWorkerImpl = spawnWorker;
|
|
287
279
|
}
|
|
288
280
|
async transform(htmlOrBuffer, url, options) {
|
|
@@ -754,4 +746,3 @@ export async function shutdownWorkerPool() {
|
|
|
754
746
|
await workerPool.close();
|
|
755
747
|
workerPool = null;
|
|
756
748
|
}
|
|
757
|
-
//# sourceMappingURL=worker-pool.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { MarkdownTransformResult, TransformOptions, TransformWorkerOutgoingMessage } from '../types.js';
|
|
2
|
+
interface WorkerMessageHandlerOptions {
|
|
3
|
+
sendMessage: (message: TransformWorkerOutgoingMessage) => void;
|
|
4
|
+
runTransform: (html: string, url: string, options: TransformOptions) => MarkdownTransformResult;
|
|
5
|
+
}
|
|
6
|
+
export declare function createTransformMessageHandler(options: WorkerMessageHandlerOptions): (raw: unknown) => void;
|
|
7
|
+
export {};
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { FetchError, getErrorMessage } from '../../lib/utils.js';
|
|
2
|
+
function isTransformMessage(message) {
|
|
3
|
+
if (!message || typeof message !== 'object')
|
|
4
|
+
return false;
|
|
5
|
+
const value = message;
|
|
6
|
+
const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = value;
|
|
7
|
+
return (typeof id === 'string' &&
|
|
8
|
+
typeof url === 'string' &&
|
|
9
|
+
typeof includeMetadata === 'boolean' &&
|
|
10
|
+
(html === undefined || typeof html === 'string') &&
|
|
11
|
+
(htmlBuffer === undefined || htmlBuffer instanceof Uint8Array) &&
|
|
12
|
+
(encoding === undefined || typeof encoding === 'string') &&
|
|
13
|
+
(skipNoiseRemoval === undefined || typeof skipNoiseRemoval === 'boolean') &&
|
|
14
|
+
(inputTruncated === undefined || typeof inputTruncated === 'boolean'));
|
|
15
|
+
}
|
|
16
|
+
function decodeHtml(html, htmlBuffer, encoding, decoder) {
|
|
17
|
+
if (!htmlBuffer)
|
|
18
|
+
return html ?? '';
|
|
19
|
+
if (!encoding || encoding === 'utf-8') {
|
|
20
|
+
return decoder.decode(htmlBuffer);
|
|
21
|
+
}
|
|
22
|
+
try {
|
|
23
|
+
return new TextDecoder(encoding).decode(htmlBuffer);
|
|
24
|
+
}
|
|
25
|
+
catch {
|
|
26
|
+
return decoder.decode(htmlBuffer);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
function createErrorMessage(id, url, error) {
|
|
30
|
+
if (error instanceof FetchError) {
|
|
31
|
+
return {
|
|
32
|
+
type: 'error',
|
|
33
|
+
id,
|
|
34
|
+
error: {
|
|
35
|
+
name: error.name,
|
|
36
|
+
message: error.message,
|
|
37
|
+
url: error.url,
|
|
38
|
+
statusCode: error.statusCode,
|
|
39
|
+
details: { ...error.details },
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
return {
|
|
44
|
+
type: 'error',
|
|
45
|
+
id,
|
|
46
|
+
error: {
|
|
47
|
+
name: error instanceof Error ? error.name : 'Error',
|
|
48
|
+
message: getErrorMessage(error),
|
|
49
|
+
url,
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
function createResultMessage(id, result) {
|
|
54
|
+
return {
|
|
55
|
+
type: 'result',
|
|
56
|
+
id,
|
|
57
|
+
result: {
|
|
58
|
+
markdown: result.markdown,
|
|
59
|
+
...(result.metadata ? { metadata: result.metadata } : {}),
|
|
60
|
+
...(result.title !== undefined ? { title: result.title } : {}),
|
|
61
|
+
truncated: result.truncated,
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
export function createTransformMessageHandler(options) {
|
|
66
|
+
const { sendMessage, runTransform } = options;
|
|
67
|
+
const controllersById = new Map();
|
|
68
|
+
const decoder = new TextDecoder('utf-8');
|
|
69
|
+
return (raw) => {
|
|
70
|
+
if (!raw || typeof raw !== 'object')
|
|
71
|
+
return;
|
|
72
|
+
const message = raw;
|
|
73
|
+
const messageType = message['type'];
|
|
74
|
+
const messageId = message['id'];
|
|
75
|
+
if (messageType === 'cancel') {
|
|
76
|
+
if (typeof messageId !== 'string')
|
|
77
|
+
return;
|
|
78
|
+
const controller = controllersById.get(messageId);
|
|
79
|
+
if (controller)
|
|
80
|
+
controller.abort(new Error('Canceled'));
|
|
81
|
+
sendMessage({ type: 'cancelled', id: messageId });
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
if (messageType !== 'transform' || !isTransformMessage(message))
|
|
85
|
+
return;
|
|
86
|
+
const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = message;
|
|
87
|
+
if (!id.trim()) {
|
|
88
|
+
sendMessage({
|
|
89
|
+
type: 'error',
|
|
90
|
+
id,
|
|
91
|
+
error: {
|
|
92
|
+
name: 'ValidationError',
|
|
93
|
+
message: 'Missing transform message id',
|
|
94
|
+
url: url || '',
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
return;
|
|
98
|
+
}
|
|
99
|
+
if (!url.trim()) {
|
|
100
|
+
sendMessage({
|
|
101
|
+
type: 'error',
|
|
102
|
+
id,
|
|
103
|
+
error: {
|
|
104
|
+
name: 'ValidationError',
|
|
105
|
+
message: 'Missing transform URL',
|
|
106
|
+
url,
|
|
107
|
+
},
|
|
108
|
+
});
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
const controller = new AbortController();
|
|
112
|
+
controllersById.set(id, controller);
|
|
113
|
+
try {
|
|
114
|
+
const content = decodeHtml(html, htmlBuffer, encoding, decoder);
|
|
115
|
+
const result = runTransform(content, url, {
|
|
116
|
+
includeMetadata,
|
|
117
|
+
signal: controller.signal,
|
|
118
|
+
...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
|
|
119
|
+
...(inputTruncated ? { inputTruncated: true } : {}),
|
|
120
|
+
});
|
|
121
|
+
sendMessage(createResultMessage(id, result));
|
|
122
|
+
}
|
|
123
|
+
catch (error) {
|
|
124
|
+
sendMessage(createErrorMessage(id, url, error));
|
|
125
|
+
}
|
|
126
|
+
finally {
|
|
127
|
+
controllersById.delete(id);
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import process from 'node:process';
|
|
2
|
-
import { FetchError, getErrorMessage } from '../../errors.js';
|
|
3
2
|
import { transformHtmlToMarkdownInProcess } from '../transform.js';
|
|
3
|
+
import { createTransformMessageHandler } from './shared.js';
|
|
4
4
|
const send = process.send?.bind(process);
|
|
5
5
|
if (!send)
|
|
6
6
|
throw new Error('transform-child started without IPC channel');
|
|
@@ -8,138 +8,8 @@ const sendMessage = send;
|
|
|
8
8
|
function postMessage(message) {
|
|
9
9
|
sendMessage(message);
|
|
10
10
|
}
|
|
11
|
-
const
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
if (error instanceof FetchError) {
|
|
15
|
-
postMessage({
|
|
16
|
-
type: 'error',
|
|
17
|
-
id,
|
|
18
|
-
error: {
|
|
19
|
-
name: error.name,
|
|
20
|
-
message: error.message,
|
|
21
|
-
url: error.url,
|
|
22
|
-
statusCode: error.statusCode,
|
|
23
|
-
details: { ...error.details },
|
|
24
|
-
},
|
|
25
|
-
});
|
|
26
|
-
return;
|
|
27
|
-
}
|
|
28
|
-
postMessage({
|
|
29
|
-
type: 'error',
|
|
30
|
-
id,
|
|
31
|
-
error: {
|
|
32
|
-
name: error instanceof Error ? error.name : 'Error',
|
|
33
|
-
message: getErrorMessage(error),
|
|
34
|
-
url,
|
|
35
|
-
},
|
|
36
|
-
});
|
|
37
|
-
}
|
|
38
|
-
function isValidMessage(msg) {
|
|
39
|
-
const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = msg;
|
|
40
|
-
if (typeof id !== 'string')
|
|
41
|
-
return false;
|
|
42
|
-
if (typeof url !== 'string')
|
|
43
|
-
return false;
|
|
44
|
-
if (typeof includeMetadata !== 'boolean')
|
|
45
|
-
return false;
|
|
46
|
-
if (html !== undefined && typeof html !== 'string')
|
|
47
|
-
return false;
|
|
48
|
-
if (htmlBuffer !== undefined && !(htmlBuffer instanceof Uint8Array))
|
|
49
|
-
return false;
|
|
50
|
-
if (encoding !== undefined && typeof encoding !== 'string')
|
|
51
|
-
return false;
|
|
52
|
-
if (skipNoiseRemoval !== undefined && typeof skipNoiseRemoval !== 'boolean')
|
|
53
|
-
return false;
|
|
54
|
-
if (inputTruncated !== undefined && typeof inputTruncated !== 'boolean')
|
|
55
|
-
return false;
|
|
56
|
-
return true;
|
|
57
|
-
}
|
|
58
|
-
function postValidationError(id, url, message) {
|
|
59
|
-
postMessage({
|
|
60
|
-
type: 'error',
|
|
61
|
-
id,
|
|
62
|
-
error: { name: 'ValidationError', message, url },
|
|
63
|
-
});
|
|
64
|
-
}
|
|
65
|
-
function decodeHtml(html, htmlBuffer, encoding) {
|
|
66
|
-
if (!htmlBuffer)
|
|
67
|
-
return html ?? '';
|
|
68
|
-
if (!encoding || encoding === 'utf-8')
|
|
69
|
-
return decoder.decode(htmlBuffer);
|
|
70
|
-
try {
|
|
71
|
-
return new TextDecoder(encoding).decode(htmlBuffer);
|
|
72
|
-
}
|
|
73
|
-
catch {
|
|
74
|
-
// Fall back to UTF-8 when server-provided charset labels are invalid.
|
|
75
|
-
return decoder.decode(htmlBuffer);
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
function buildTransformResultMessage(result) {
|
|
79
|
-
return {
|
|
80
|
-
markdown: result.markdown,
|
|
81
|
-
...(result.metadata ? { metadata: result.metadata } : {}),
|
|
82
|
-
...(result.title !== undefined ? { title: result.title } : {}),
|
|
83
|
-
truncated: result.truncated,
|
|
84
|
-
};
|
|
85
|
-
}
|
|
86
|
-
function handleTransform(msg) {
|
|
87
|
-
if (!isValidMessage(msg))
|
|
88
|
-
return;
|
|
89
|
-
const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = msg;
|
|
90
|
-
if (!id.trim()) {
|
|
91
|
-
postValidationError(id, url || '', 'Missing transform message id');
|
|
92
|
-
return;
|
|
93
|
-
}
|
|
94
|
-
if (!url.trim()) {
|
|
95
|
-
postValidationError(id, url, 'Missing transform URL');
|
|
96
|
-
return;
|
|
97
|
-
}
|
|
98
|
-
const controller = new AbortController();
|
|
99
|
-
controllersById.set(id, controller);
|
|
100
|
-
try {
|
|
101
|
-
const content = decodeHtml(html, htmlBuffer, encoding);
|
|
102
|
-
const result = transformHtmlToMarkdownInProcess(content, url, {
|
|
103
|
-
includeMetadata,
|
|
104
|
-
signal: controller.signal,
|
|
105
|
-
...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
|
|
106
|
-
...(inputTruncated ? { inputTruncated: true } : {}),
|
|
107
|
-
});
|
|
108
|
-
const { markdown, metadata, title, truncated } = result;
|
|
109
|
-
postMessage({
|
|
110
|
-
type: 'result',
|
|
111
|
-
id,
|
|
112
|
-
result: buildTransformResultMessage({
|
|
113
|
-
markdown,
|
|
114
|
-
...(metadata ? { metadata } : {}),
|
|
115
|
-
...(title === undefined ? {} : { title }),
|
|
116
|
-
truncated,
|
|
117
|
-
}),
|
|
118
|
-
});
|
|
119
|
-
}
|
|
120
|
-
catch (error) {
|
|
121
|
-
postError(id, url, error);
|
|
122
|
-
}
|
|
123
|
-
finally {
|
|
124
|
-
controllersById.delete(id);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
process.on('message', (raw) => {
|
|
128
|
-
if (!raw || typeof raw !== 'object')
|
|
129
|
-
return;
|
|
130
|
-
const msg = raw;
|
|
131
|
-
const { type, id } = msg;
|
|
132
|
-
if (type === 'cancel') {
|
|
133
|
-
if (typeof id !== 'string')
|
|
134
|
-
return;
|
|
135
|
-
const controller = controllersById.get(id);
|
|
136
|
-
if (controller)
|
|
137
|
-
controller.abort(new Error('Canceled'));
|
|
138
|
-
postMessage({ type: 'cancelled', id });
|
|
139
|
-
return;
|
|
140
|
-
}
|
|
141
|
-
if (type === 'transform') {
|
|
142
|
-
handleTransform(msg);
|
|
143
|
-
}
|
|
11
|
+
const onMessage = createTransformMessageHandler({
|
|
12
|
+
sendMessage: postMessage,
|
|
13
|
+
runTransform: transformHtmlToMarkdownInProcess,
|
|
144
14
|
});
|
|
145
|
-
|
|
15
|
+
process.on('message', onMessage);
|
|
@@ -1,134 +1,13 @@
|
|
|
1
1
|
import { parentPort } from 'node:worker_threads';
|
|
2
|
-
import { FetchError, getErrorMessage } from '../../errors.js';
|
|
3
2
|
import { transformHtmlToMarkdownInProcess } from '../transform.js';
|
|
3
|
+
import { createTransformMessageHandler } from './shared.js';
|
|
4
4
|
if (!parentPort)
|
|
5
5
|
throw new Error('transform-worker started without parentPort');
|
|
6
6
|
const port = parentPort;
|
|
7
|
-
const
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
type: 'error',
|
|
13
|
-
id,
|
|
14
|
-
error: {
|
|
15
|
-
name: error.name,
|
|
16
|
-
message: error.message,
|
|
17
|
-
url: error.url,
|
|
18
|
-
statusCode: error.statusCode,
|
|
19
|
-
details: { ...error.details },
|
|
20
|
-
},
|
|
21
|
-
});
|
|
22
|
-
return;
|
|
23
|
-
}
|
|
24
|
-
port.postMessage({
|
|
25
|
-
type: 'error',
|
|
26
|
-
id,
|
|
27
|
-
error: {
|
|
28
|
-
name: error instanceof Error ? error.name : 'Error',
|
|
29
|
-
message: getErrorMessage(error),
|
|
30
|
-
url,
|
|
31
|
-
},
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
function isValidMessage(msg) {
|
|
35
|
-
const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = msg;
|
|
36
|
-
return (typeof id === 'string' &&
|
|
37
|
-
typeof url === 'string' &&
|
|
38
|
-
typeof includeMetadata === 'boolean' &&
|
|
39
|
-
(html === undefined || typeof html === 'string') &&
|
|
40
|
-
(htmlBuffer === undefined || htmlBuffer instanceof Uint8Array) &&
|
|
41
|
-
(encoding === undefined || typeof encoding === 'string') &&
|
|
42
|
-
(skipNoiseRemoval === undefined || typeof skipNoiseRemoval === 'boolean') &&
|
|
43
|
-
(inputTruncated === undefined || typeof inputTruncated === 'boolean'));
|
|
44
|
-
}
|
|
45
|
-
function postValidationError(id, message, url) {
|
|
46
|
-
port.postMessage({
|
|
47
|
-
type: 'error',
|
|
48
|
-
id,
|
|
49
|
-
error: { name: 'ValidationError', message, url },
|
|
50
|
-
});
|
|
51
|
-
}
|
|
52
|
-
function decodeHtmlBuffer(htmlBuffer, encoding) {
|
|
53
|
-
if (!encoding || encoding === 'utf-8') {
|
|
54
|
-
return decoder.decode(htmlBuffer);
|
|
55
|
-
}
|
|
56
|
-
try {
|
|
57
|
-
return new TextDecoder(encoding).decode(htmlBuffer);
|
|
58
|
-
}
|
|
59
|
-
catch {
|
|
60
|
-
// Fall back to UTF-8 when server-provided charset labels are invalid.
|
|
61
|
-
return decoder.decode(htmlBuffer);
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
function resolveHtmlContent(html, htmlBuffer, encoding) {
|
|
65
|
-
return htmlBuffer ? decodeHtmlBuffer(htmlBuffer, encoding) : (html ?? '');
|
|
66
|
-
}
|
|
67
|
-
function buildTransformResultMessage(result) {
|
|
68
|
-
return {
|
|
69
|
-
markdown: result.markdown,
|
|
70
|
-
...(result.metadata ? { metadata: result.metadata } : {}),
|
|
71
|
-
...(result.title !== undefined ? { title: result.title } : {}),
|
|
72
|
-
truncated: result.truncated,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
function handleTransform(msg) {
|
|
76
|
-
if (!isValidMessage(msg))
|
|
77
|
-
return;
|
|
78
|
-
const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = msg;
|
|
79
|
-
if (!id.trim()) {
|
|
80
|
-
postValidationError(id, 'Missing transform message id', url || '');
|
|
81
|
-
return;
|
|
82
|
-
}
|
|
83
|
-
if (!url.trim()) {
|
|
84
|
-
postValidationError(id, 'Missing transform URL', url);
|
|
85
|
-
return;
|
|
86
|
-
}
|
|
87
|
-
const controller = new AbortController();
|
|
88
|
-
controllersById.set(id, controller);
|
|
89
|
-
try {
|
|
90
|
-
const content = resolveHtmlContent(html, htmlBuffer, encoding);
|
|
91
|
-
const result = transformHtmlToMarkdownInProcess(content, url, {
|
|
92
|
-
includeMetadata,
|
|
93
|
-
signal: controller.signal,
|
|
94
|
-
...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
|
|
95
|
-
...(inputTruncated ? { inputTruncated: true } : {}),
|
|
96
|
-
});
|
|
97
|
-
const { markdown, metadata, title, truncated } = result;
|
|
98
|
-
port.postMessage({
|
|
99
|
-
type: 'result',
|
|
100
|
-
id,
|
|
101
|
-
result: buildTransformResultMessage({
|
|
102
|
-
markdown,
|
|
103
|
-
...(metadata ? { metadata } : {}),
|
|
104
|
-
...(title === undefined ? {} : { title }),
|
|
105
|
-
truncated,
|
|
106
|
-
}),
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
catch (error) {
|
|
110
|
-
postError(id, url, error);
|
|
111
|
-
}
|
|
112
|
-
finally {
|
|
113
|
-
controllersById.delete(id);
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
port.on('message', (raw) => {
|
|
117
|
-
if (!raw || typeof raw !== 'object')
|
|
118
|
-
return;
|
|
119
|
-
const msg = raw;
|
|
120
|
-
const { type, id } = msg;
|
|
121
|
-
if (type === 'cancel') {
|
|
122
|
-
if (typeof id !== 'string')
|
|
123
|
-
return;
|
|
124
|
-
const controller = controllersById.get(id);
|
|
125
|
-
if (controller)
|
|
126
|
-
controller.abort(new Error('Canceled'));
|
|
127
|
-
port.postMessage({ type: 'cancelled', id });
|
|
128
|
-
return;
|
|
129
|
-
}
|
|
130
|
-
if (type === 'transform') {
|
|
131
|
-
handleTransform(msg);
|
|
132
|
-
}
|
|
7
|
+
const onMessage = createTransformMessageHandler({
|
|
8
|
+
sendMessage: (message) => {
|
|
9
|
+
port.postMessage(message);
|
|
10
|
+
},
|
|
11
|
+
runTransform: transformHtmlToMarkdownInProcess,
|
|
133
12
|
});
|
|
134
|
-
|
|
13
|
+
port.on('message', onMessage);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/fetch-url-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.5.0",
|
|
4
4
|
"mcpName": "io.github.j0hanz/fetch-url-mcp",
|
|
5
5
|
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
|
|
6
6
|
"type": "module",
|
|
@@ -51,12 +51,15 @@
|
|
|
51
51
|
"start": "node dist/index.js",
|
|
52
52
|
"format": "prettier --write .",
|
|
53
53
|
"type-check": "node scripts/tasks.mjs type-check",
|
|
54
|
+
"type-check:src": "node node_modules/typescript/bin/tsc -p tsconfig.json --noEmit",
|
|
55
|
+
"type-check:tests": "node node_modules/typescript/bin/tsc -p tsconfig.test.json --noEmit",
|
|
54
56
|
"type-check:diagnostics": "tsc --noEmit --extendedDiagnostics",
|
|
55
57
|
"type-check:trace": "node -e \"require('fs').rmSync('.ts-trace',{recursive:true,force:true})\" && tsc --noEmit --generateTrace .ts-trace",
|
|
56
58
|
"lint": "eslint .",
|
|
59
|
+
"lint:tests": "eslint src/__tests__",
|
|
57
60
|
"lint:fix": "eslint . --fix",
|
|
58
61
|
"test": "node scripts/tasks.mjs test",
|
|
59
|
-
"test:fast": "node --test --import tsx/esm src/__tests__/**/*.test.ts",
|
|
62
|
+
"test:fast": "node --test --import tsx/esm src/__tests__/**/*.test.ts node-tests/**/*.test.ts",
|
|
60
63
|
"test:coverage": "node scripts/tasks.mjs test --coverage",
|
|
61
64
|
"knip": "knip",
|
|
62
65
|
"knip:fix": "knip --fix",
|
|
@@ -64,7 +67,7 @@
|
|
|
64
67
|
"prepublishOnly": "npm run lint && npm run type-check && npm run build"
|
|
65
68
|
},
|
|
66
69
|
"dependencies": {
|
|
67
|
-
"@modelcontextprotocol/sdk": "^1.
|
|
70
|
+
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
68
71
|
"@mozilla/readability": "^0.6.0",
|
|
69
72
|
"linkedom": "^0.18.12",
|
|
70
73
|
"node-html-markdown": "^2.0.0",
|
|
@@ -75,15 +78,16 @@
|
|
|
75
78
|
"@eslint/js": "^10.0.1",
|
|
76
79
|
"@trivago/prettier-plugin-sort-imports": "^6.0.2",
|
|
77
80
|
"@types/node": "^24",
|
|
78
|
-
"eslint": "^10.0.
|
|
81
|
+
"eslint": "^10.0.2",
|
|
79
82
|
"eslint-config-prettier": "^10.1.8",
|
|
80
|
-
"eslint-plugin-de-morgan": "^2.
|
|
83
|
+
"eslint-plugin-de-morgan": "^2.1.1",
|
|
81
84
|
"eslint-plugin-depend": "^1.4.0",
|
|
82
85
|
"eslint-plugin-unused-imports": "^4.4.1",
|
|
83
|
-
"knip": "^5.
|
|
86
|
+
"knip": "^5.85.0",
|
|
84
87
|
"prettier": "^3.8.1",
|
|
88
|
+
"tsx": "^4.21.0",
|
|
85
89
|
"typescript": "^5.9.3",
|
|
86
|
-
"typescript-eslint": "^8.56.
|
|
90
|
+
"typescript-eslint": "^8.56.1"
|
|
87
91
|
},
|
|
88
92
|
"engines": {
|
|
89
93
|
"node": ">=24"
|
package/dist/cache.d.ts
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
declare const CachedPayloadSchema: z.ZodObject<{
|
|
3
|
-
content: z.ZodOptional<z.ZodString>;
|
|
4
|
-
markdown: z.ZodOptional<z.ZodString>;
|
|
5
|
-
title: z.ZodOptional<z.ZodString>;
|
|
6
|
-
}, z.core.$strict>;
|
|
7
|
-
type CachedPayload = z.infer<typeof CachedPayloadSchema>;
|
|
8
|
-
interface CacheEntry {
|
|
9
|
-
url: string;
|
|
10
|
-
title?: string;
|
|
11
|
-
content: string;
|
|
12
|
-
fetchedAt: string;
|
|
13
|
-
expiresAt: string;
|
|
14
|
-
}
|
|
15
|
-
interface CacheKeyParts {
|
|
16
|
-
namespace: string;
|
|
17
|
-
urlHash: string;
|
|
18
|
-
}
|
|
19
|
-
interface CacheSetOptions {
|
|
20
|
-
force?: boolean;
|
|
21
|
-
}
|
|
22
|
-
interface CacheGetOptions {
|
|
23
|
-
force?: boolean;
|
|
24
|
-
}
|
|
25
|
-
interface CacheEntryMetadata {
|
|
26
|
-
url: string;
|
|
27
|
-
title?: string;
|
|
28
|
-
}
|
|
29
|
-
interface CacheUpdateEvent {
|
|
30
|
-
cacheKey: string;
|
|
31
|
-
namespace: string;
|
|
32
|
-
urlHash: string;
|
|
33
|
-
listChanged: boolean;
|
|
34
|
-
}
|
|
35
|
-
type CacheUpdateListener = (event: CacheUpdateEvent) => unknown;
|
|
36
|
-
export declare function parseCachedPayload(raw: string): CachedPayload | null;
|
|
37
|
-
export declare function resolveCachedPayloadContent(payload: CachedPayload): string | null;
|
|
38
|
-
export declare function createCacheKey(namespace: string, url: string, vary?: Record<string, unknown> | string): string | null;
|
|
39
|
-
export declare function parseCacheKey(cacheKey: string): CacheKeyParts | null;
|
|
40
|
-
export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
|
|
41
|
-
export declare function get(cacheKey: string | null, options?: CacheGetOptions): CacheEntry | undefined;
|
|
42
|
-
export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata, options?: CacheSetOptions): void;
|
|
43
|
-
export declare function keys(): readonly string[];
|
|
44
|
-
/**
|
|
45
|
-
* Return lightweight metadata (url and optional page title) for a cache entry.
|
|
46
|
-
* Returns `undefined` if the key is not found or cache is disabled.
|
|
47
|
-
*/
|
|
48
|
-
export declare function getEntryMeta(cacheKey: string): {
|
|
49
|
-
url: string;
|
|
50
|
-
title?: string;
|
|
51
|
-
} | undefined;
|
|
52
|
-
export declare function isEnabled(): boolean;
|
|
53
|
-
export {};
|
|
54
|
-
//# sourceMappingURL=cache.d.ts.map
|
package/dist/cache.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAYxB,QAAA,MAAM,mBAAmB;;;;kBAIvB,CAAC;AACH,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAGzD,UAAU,UAAU;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,UAAU,aAAa;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,eAAe;IACvB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,UAAU,eAAe;IACvB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,UAAU,kBAAkB;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAMD,UAAU,gBAAgB;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,KAAK,mBAAmB,GAAG,CAAC,KAAK,EAAE,gBAAgB,KAAK,OAAO,CAAC;AAWhE,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAOpE;AAED,wBAAgB,2BAA2B,CACzC,OAAO,EAAE,aAAa,GACrB,MAAM,GAAG,IAAI,CAEf;AA4BD,wBAAgB,cAAc,CAC5B,SAAS,EAAE,MAAM,EACjB,GAAG,EAAE,MAAM,EACX,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GACtC,MAAM,GAAG,IAAI,CAoBf;AAED,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAMpE;AA6LD,wBAAgB,aAAa,CAAC,QAAQ,EAAE,mBAAmB,GAAG,MAAM,IAAI,CAEvE;AAED,wBAAgB,GAAG,CACjB,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,OAAO,CAAC,EAAE,eAAe,GACxB,UAAU,GAAG,SAAS,CAExB;AAED,wBAAgB,GAAG,CACjB,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,kBAAkB,EAC5B,OAAO,CAAC,EAAE,eAAe,GACxB,IAAI,CAEN;AAED,wBAAgB,IAAI,IAAI,SAAS,MAAM,EAAE,CAExC;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,EAAE,MAAM,GACf;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CAM7C;AAED,wBAAgB,SAAS,IAAI,OAAO,CAEnC"}
|