@j0hanz/fetch-url-mcp 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/README.md +24 -21
  2. package/dist/cli.d.ts +3 -3
  3. package/dist/cli.js +15 -8
  4. package/dist/http/auth.d.ts +6 -6
  5. package/dist/http/auth.js +78 -23
  6. package/dist/http/health.d.ts +1 -2
  7. package/dist/http/health.js +7 -18
  8. package/dist/http/helpers.d.ts +3 -11
  9. package/dist/http/helpers.js +28 -26
  10. package/dist/http/native.d.ts +0 -1
  11. package/dist/http/native.js +63 -41
  12. package/dist/http/rate-limit.d.ts +2 -2
  13. package/dist/http/rate-limit.js +11 -16
  14. package/dist/index.d.ts +0 -1
  15. package/dist/index.js +17 -20
  16. package/dist/{markdown-cleanup.d.ts → lib/content.d.ts} +4 -2
  17. package/dist/lib/content.js +1356 -0
  18. package/dist/lib/core.d.ts +253 -0
  19. package/dist/lib/core.js +1228 -0
  20. package/dist/{tool-pipeline.d.ts → lib/fetch-pipeline.d.ts} +1 -3
  21. package/dist/{tool-pipeline.js → lib/fetch-pipeline.js} +18 -44
  22. package/dist/{fetch.d.ts → lib/http.d.ts} +7 -9
  23. package/dist/{fetch.js → lib/http.js} +721 -1004
  24. package/dist/lib/mcp-tools.d.ts +28 -0
  25. package/dist/lib/mcp-tools.js +107 -0
  26. package/dist/{tool-progress.d.ts → lib/progress.d.ts} +0 -2
  27. package/dist/{tool-progress.js → lib/progress.js} +9 -14
  28. package/dist/lib/task-handlers.d.ts +5 -0
  29. package/dist/{mcp.js → lib/task-handlers.js} +95 -31
  30. package/dist/lib/url.d.ts +70 -0
  31. package/dist/lib/url.js +686 -0
  32. package/dist/lib/utils.d.ts +58 -0
  33. package/dist/lib/utils.js +304 -0
  34. package/dist/{prompts.d.ts → prompts/index.d.ts} +0 -1
  35. package/dist/{prompts.js → prompts/index.js} +1 -2
  36. package/dist/{resources.d.ts → resources/index.d.ts} +0 -1
  37. package/dist/{resources.js → resources/index.js} +87 -64
  38. package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -1
  39. package/dist/{instructions.js → resources/instructions.js} +5 -3
  40. package/dist/schemas/inputs.d.ts +7 -0
  41. package/dist/schemas/inputs.js +24 -0
  42. package/dist/schemas/outputs.d.ts +23 -0
  43. package/dist/schemas/outputs.js +77 -0
  44. package/dist/server.d.ts +0 -1
  45. package/dist/server.js +26 -25
  46. package/dist/tasks/execution.d.ts +0 -1
  47. package/dist/tasks/execution.js +106 -70
  48. package/dist/tasks/manager.d.ts +11 -3
  49. package/dist/tasks/manager.js +97 -73
  50. package/dist/tasks/owner.d.ts +3 -3
  51. package/dist/tasks/owner.js +2 -2
  52. package/dist/tasks/tool-registry.d.ts +11 -0
  53. package/dist/tasks/tool-registry.js +13 -0
  54. package/dist/tools/fetch-url.d.ts +28 -0
  55. package/dist/{tools.js → tools/fetch-url.js} +95 -147
  56. package/dist/tools/index.d.ts +2 -0
  57. package/dist/tools/index.js +4 -0
  58. package/dist/transform/html-translators.d.ts +1 -0
  59. package/dist/transform/html-translators.js +454 -0
  60. package/dist/transform/metadata.d.ts +4 -0
  61. package/dist/transform/metadata.js +183 -0
  62. package/dist/transform/transform.d.ts +0 -1
  63. package/dist/transform/transform.js +44 -679
  64. package/dist/transform/types.d.ts +9 -12
  65. package/dist/transform/types.js +0 -1
  66. package/dist/transform/worker-pool.d.ts +0 -1
  67. package/dist/transform/worker-pool.js +7 -16
  68. package/dist/transform/workers/shared.d.ts +7 -0
  69. package/dist/transform/workers/shared.js +130 -0
  70. package/dist/transform/workers/transform-child.d.ts +0 -1
  71. package/dist/transform/workers/transform-child.js +5 -135
  72. package/dist/transform/workers/transform-worker.d.ts +0 -1
  73. package/dist/transform/workers/transform-worker.js +7 -128
  74. package/package.json +11 -7
  75. package/dist/cache.d.ts +0 -54
  76. package/dist/cache.d.ts.map +0 -1
  77. package/dist/cache.js +0 -261
  78. package/dist/cache.js.map +0 -1
  79. package/dist/cli.d.ts.map +0 -1
  80. package/dist/cli.js.map +0 -1
  81. package/dist/config.d.ts +0 -141
  82. package/dist/config.d.ts.map +0 -1
  83. package/dist/config.js +0 -473
  84. package/dist/config.js.map +0 -1
  85. package/dist/crypto.d.ts +0 -4
  86. package/dist/crypto.d.ts.map +0 -1
  87. package/dist/crypto.js +0 -56
  88. package/dist/crypto.js.map +0 -1
  89. package/dist/dom-noise-removal.d.ts +0 -2
  90. package/dist/dom-noise-removal.d.ts.map +0 -1
  91. package/dist/dom-noise-removal.js +0 -494
  92. package/dist/dom-noise-removal.js.map +0 -1
  93. package/dist/download.d.ts +0 -4
  94. package/dist/download.d.ts.map +0 -1
  95. package/dist/download.js +0 -106
  96. package/dist/download.js.map +0 -1
  97. package/dist/errors.d.ts +0 -11
  98. package/dist/errors.d.ts.map +0 -1
  99. package/dist/errors.js +0 -65
  100. package/dist/errors.js.map +0 -1
  101. package/dist/examples/mcp-fetch-url-client.js +0 -329
  102. package/dist/examples/mcp-fetch-url-client.js.map +0 -1
  103. package/dist/fetch-content.d.ts +0 -5
  104. package/dist/fetch-content.d.ts.map +0 -1
  105. package/dist/fetch-content.js +0 -164
  106. package/dist/fetch-content.js.map +0 -1
  107. package/dist/fetch-stream.d.ts +0 -5
  108. package/dist/fetch-stream.d.ts.map +0 -1
  109. package/dist/fetch-stream.js +0 -29
  110. package/dist/fetch-stream.js.map +0 -1
  111. package/dist/fetch.d.ts.map +0 -1
  112. package/dist/fetch.js.map +0 -1
  113. package/dist/host-normalization.d.ts +0 -2
  114. package/dist/host-normalization.d.ts.map +0 -1
  115. package/dist/host-normalization.js +0 -91
  116. package/dist/host-normalization.js.map +0 -1
  117. package/dist/http/auth.d.ts.map +0 -1
  118. package/dist/http/auth.js.map +0 -1
  119. package/dist/http/health.d.ts.map +0 -1
  120. package/dist/http/health.js.map +0 -1
  121. package/dist/http/helpers.d.ts.map +0 -1
  122. package/dist/http/helpers.js.map +0 -1
  123. package/dist/http/native.d.ts.map +0 -1
  124. package/dist/http/native.js.map +0 -1
  125. package/dist/http/rate-limit.d.ts.map +0 -1
  126. package/dist/http/rate-limit.js.map +0 -1
  127. package/dist/index.d.ts.map +0 -1
  128. package/dist/index.js.map +0 -1
  129. package/dist/instructions.d.ts.map +0 -1
  130. package/dist/instructions.js.map +0 -1
  131. package/dist/ip-blocklist.d.ts +0 -9
  132. package/dist/ip-blocklist.d.ts.map +0 -1
  133. package/dist/ip-blocklist.js +0 -79
  134. package/dist/ip-blocklist.js.map +0 -1
  135. package/dist/json.d.ts +0 -2
  136. package/dist/json.d.ts.map +0 -1
  137. package/dist/json.js +0 -45
  138. package/dist/json.js.map +0 -1
  139. package/dist/language-detection.d.ts +0 -3
  140. package/dist/language-detection.d.ts.map +0 -1
  141. package/dist/language-detection.js +0 -355
  142. package/dist/language-detection.js.map +0 -1
  143. package/dist/markdown-cleanup.d.ts.map +0 -1
  144. package/dist/markdown-cleanup.js +0 -534
  145. package/dist/markdown-cleanup.js.map +0 -1
  146. package/dist/mcp-validator.d.ts +0 -17
  147. package/dist/mcp-validator.d.ts.map +0 -1
  148. package/dist/mcp-validator.js +0 -45
  149. package/dist/mcp-validator.js.map +0 -1
  150. package/dist/mcp.d.ts +0 -4
  151. package/dist/mcp.d.ts.map +0 -1
  152. package/dist/mcp.js.map +0 -1
  153. package/dist/observability.d.ts +0 -23
  154. package/dist/observability.d.ts.map +0 -1
  155. package/dist/observability.js +0 -238
  156. package/dist/observability.js.map +0 -1
  157. package/dist/prompts.d.ts.map +0 -1
  158. package/dist/prompts.js.map +0 -1
  159. package/dist/resources.d.ts.map +0 -1
  160. package/dist/resources.js.map +0 -1
  161. package/dist/server-tuning.d.ts +0 -15
  162. package/dist/server-tuning.d.ts.map +0 -1
  163. package/dist/server-tuning.js +0 -49
  164. package/dist/server-tuning.js.map +0 -1
  165. package/dist/server.d.ts.map +0 -1
  166. package/dist/server.js.map +0 -1
  167. package/dist/session.d.ts +0 -42
  168. package/dist/session.d.ts.map +0 -1
  169. package/dist/session.js +0 -255
  170. package/dist/session.js.map +0 -1
  171. package/dist/tasks/execution.d.ts.map +0 -1
  172. package/dist/tasks/execution.js.map +0 -1
  173. package/dist/tasks/manager.d.ts.map +0 -1
  174. package/dist/tasks/manager.js.map +0 -1
  175. package/dist/tasks/owner.d.ts.map +0 -1
  176. package/dist/tasks/owner.js.map +0 -1
  177. package/dist/timer-utils.d.ts +0 -6
  178. package/dist/timer-utils.d.ts.map +0 -1
  179. package/dist/timer-utils.js +0 -27
  180. package/dist/timer-utils.js.map +0 -1
  181. package/dist/tool-errors.d.ts +0 -12
  182. package/dist/tool-errors.d.ts.map +0 -1
  183. package/dist/tool-errors.js +0 -55
  184. package/dist/tool-errors.js.map +0 -1
  185. package/dist/tool-pipeline.d.ts.map +0 -1
  186. package/dist/tool-pipeline.js.map +0 -1
  187. package/dist/tool-progress.d.ts.map +0 -1
  188. package/dist/tool-progress.js.map +0 -1
  189. package/dist/tools.d.ts +0 -54
  190. package/dist/tools.d.ts.map +0 -1
  191. package/dist/tools.js.map +0 -1
  192. package/dist/transform/transform.d.ts.map +0 -1
  193. package/dist/transform/transform.js.map +0 -1
  194. package/dist/transform/types.d.ts.map +0 -1
  195. package/dist/transform/types.js.map +0 -1
  196. package/dist/transform/worker-pool.d.ts.map +0 -1
  197. package/dist/transform/worker-pool.js.map +0 -1
  198. package/dist/transform/workers/transform-child.d.ts.map +0 -1
  199. package/dist/transform/workers/transform-child.js.map +0 -1
  200. package/dist/transform/workers/transform-worker.d.ts.map +0 -1
  201. package/dist/transform/workers/transform-worker.js.map +0 -1
  202. package/dist/type-guards.d.ts +0 -16
  203. package/dist/type-guards.d.ts.map +0 -1
  204. package/dist/type-guards.js +0 -13
  205. package/dist/type-guards.js.map +0 -1
@@ -43,14 +43,17 @@ export interface ExtractionResult {
43
43
  article: ExtractedArticle | null;
44
44
  metadata: ExtractedMetadata;
45
45
  }
46
+ interface MarkdownPayload {
47
+ markdown: string;
48
+ title?: string | undefined;
49
+ truncated: boolean;
50
+ metadata?: ExtractedMetadata;
51
+ }
46
52
  /**
47
53
  * Result of HTML to markdown transformation.
48
54
  */
49
- export interface MarkdownTransformResult {
50
- markdown: string;
55
+ export interface MarkdownTransformResult extends MarkdownPayload {
51
56
  title: string | undefined;
52
- truncated: boolean;
53
- metadata?: ExtractedMetadata;
54
57
  }
55
58
  /**
56
59
  * Options for transform operations.
@@ -109,14 +112,8 @@ export interface TransformWorkerCancelledMessage {
109
112
  export interface TransformWorkerResultMessage {
110
113
  type: 'result';
111
114
  id: string;
112
- result: {
113
- markdown: string;
114
- title?: string;
115
- truncated: boolean;
116
- metadata?: ExtractedMetadata;
117
- };
115
+ result: MarkdownPayload;
118
116
  }
119
- export type TransformWorkerPayload = TransformWorkerResultMessage['result'];
120
117
  export interface TransformWorkerErrorMessage {
121
118
  type: 'error';
122
119
  id: string;
@@ -129,4 +126,4 @@ export interface TransformWorkerErrorMessage {
129
126
  };
130
127
  }
131
128
  export type TransformWorkerOutgoingMessage = TransformWorkerResultMessage | TransformWorkerErrorMessage | TransformWorkerCancelledMessage;
132
- //# sourceMappingURL=types.d.ts.map
129
+ export {};
@@ -3,4 +3,3 @@
3
3
  * Extracted to avoid circular dependencies between transform modules.
4
4
  */
5
5
  export {};
6
- //# sourceMappingURL=types.js.map
@@ -90,4 +90,3 @@ export declare function getWorkerPoolStats(): {
90
90
  } | null;
91
91
  export declare function shutdownWorkerPool(): Promise<void>;
92
92
  export {};
93
- //# sourceMappingURL=worker-pool.d.ts.map
@@ -5,20 +5,12 @@ import { availableParallelism } from 'node:os';
5
5
  import { fileURLToPath } from 'node:url';
6
6
  import { isSharedArrayBuffer } from 'node:util/types';
7
7
  import { Worker, } from 'node:worker_threads';
8
- import { config } from '../config.js';
9
- import { FetchError, getErrorMessage } from '../errors.js';
10
- import { logWarn } from '../observability.js';
11
- import { createUnrefTimeout } from '../timer-utils.js';
12
- import { isObject } from '../type-guards.js';
13
- // ---------------------------------------------------------------------------
14
- // Abort helper (inlined to avoid circular dependency with transform.ts)
15
- // ---------------------------------------------------------------------------
16
- function createAbortError(url, stage) {
17
- return new FetchError('Request was canceled', url, 499, {
18
- reason: 'aborted',
19
- stage,
20
- });
21
- }
8
+ import { config } from '../lib/core.js';
9
+ import { logWarn } from '../lib/core.js';
10
+ import { createAbortError } from '../lib/utils.js';
11
+ import { FetchError, getErrorMessage } from '../lib/utils.js';
12
+ import { createUnrefTimeout } from '../lib/utils.js';
13
+ import { isObject } from '../lib/utils.js';
22
14
  // ---------------------------------------------------------------------------
23
15
  // Worker message validation
24
16
  // ---------------------------------------------------------------------------
@@ -282,7 +274,7 @@ class WorkerPool {
282
274
  ? 0
283
275
  : Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
284
276
  this.timeoutMs = timeoutMs;
285
- this.queueMax = this.maxCapacity * 32;
277
+ this.queueMax = this.maxCapacity * 4;
286
278
  this.spawnWorkerImpl = spawnWorker;
287
279
  }
288
280
  async transform(htmlOrBuffer, url, options) {
@@ -754,4 +746,3 @@ export async function shutdownWorkerPool() {
754
746
  await workerPool.close();
755
747
  workerPool = null;
756
748
  }
757
- //# sourceMappingURL=worker-pool.js.map
@@ -0,0 +1,7 @@
1
+ import type { MarkdownTransformResult, TransformOptions, TransformWorkerOutgoingMessage } from '../types.js';
2
+ interface WorkerMessageHandlerOptions {
3
+ sendMessage: (message: TransformWorkerOutgoingMessage) => void;
4
+ runTransform: (html: string, url: string, options: TransformOptions) => MarkdownTransformResult;
5
+ }
6
+ export declare function createTransformMessageHandler(options: WorkerMessageHandlerOptions): (raw: unknown) => void;
7
+ export {};
@@ -0,0 +1,130 @@
1
+ import { FetchError, getErrorMessage } from '../../lib/utils.js';
2
+ function isTransformMessage(message) {
3
+ if (!message || typeof message !== 'object')
4
+ return false;
5
+ const value = message;
6
+ const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = value;
7
+ return (typeof id === 'string' &&
8
+ typeof url === 'string' &&
9
+ typeof includeMetadata === 'boolean' &&
10
+ (html === undefined || typeof html === 'string') &&
11
+ (htmlBuffer === undefined || htmlBuffer instanceof Uint8Array) &&
12
+ (encoding === undefined || typeof encoding === 'string') &&
13
+ (skipNoiseRemoval === undefined || typeof skipNoiseRemoval === 'boolean') &&
14
+ (inputTruncated === undefined || typeof inputTruncated === 'boolean'));
15
+ }
16
+ function decodeHtml(html, htmlBuffer, encoding, decoder) {
17
+ if (!htmlBuffer)
18
+ return html ?? '';
19
+ if (!encoding || encoding === 'utf-8') {
20
+ return decoder.decode(htmlBuffer);
21
+ }
22
+ try {
23
+ return new TextDecoder(encoding).decode(htmlBuffer);
24
+ }
25
+ catch {
26
+ return decoder.decode(htmlBuffer);
27
+ }
28
+ }
29
+ function createErrorMessage(id, url, error) {
30
+ if (error instanceof FetchError) {
31
+ return {
32
+ type: 'error',
33
+ id,
34
+ error: {
35
+ name: error.name,
36
+ message: error.message,
37
+ url: error.url,
38
+ statusCode: error.statusCode,
39
+ details: { ...error.details },
40
+ },
41
+ };
42
+ }
43
+ return {
44
+ type: 'error',
45
+ id,
46
+ error: {
47
+ name: error instanceof Error ? error.name : 'Error',
48
+ message: getErrorMessage(error),
49
+ url,
50
+ },
51
+ };
52
+ }
53
+ function createResultMessage(id, result) {
54
+ return {
55
+ type: 'result',
56
+ id,
57
+ result: {
58
+ markdown: result.markdown,
59
+ ...(result.metadata ? { metadata: result.metadata } : {}),
60
+ ...(result.title !== undefined ? { title: result.title } : {}),
61
+ truncated: result.truncated,
62
+ },
63
+ };
64
+ }
65
+ export function createTransformMessageHandler(options) {
66
+ const { sendMessage, runTransform } = options;
67
+ const controllersById = new Map();
68
+ const decoder = new TextDecoder('utf-8');
69
+ return (raw) => {
70
+ if (!raw || typeof raw !== 'object')
71
+ return;
72
+ const message = raw;
73
+ const messageType = message['type'];
74
+ const messageId = message['id'];
75
+ if (messageType === 'cancel') {
76
+ if (typeof messageId !== 'string')
77
+ return;
78
+ const controller = controllersById.get(messageId);
79
+ if (controller)
80
+ controller.abort(new Error('Canceled'));
81
+ sendMessage({ type: 'cancelled', id: messageId });
82
+ return;
83
+ }
84
+ if (messageType !== 'transform' || !isTransformMessage(message))
85
+ return;
86
+ const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = message;
87
+ if (!id.trim()) {
88
+ sendMessage({
89
+ type: 'error',
90
+ id,
91
+ error: {
92
+ name: 'ValidationError',
93
+ message: 'Missing transform message id',
94
+ url: url || '',
95
+ },
96
+ });
97
+ return;
98
+ }
99
+ if (!url.trim()) {
100
+ sendMessage({
101
+ type: 'error',
102
+ id,
103
+ error: {
104
+ name: 'ValidationError',
105
+ message: 'Missing transform URL',
106
+ url,
107
+ },
108
+ });
109
+ return;
110
+ }
111
+ const controller = new AbortController();
112
+ controllersById.set(id, controller);
113
+ try {
114
+ const content = decodeHtml(html, htmlBuffer, encoding, decoder);
115
+ const result = runTransform(content, url, {
116
+ includeMetadata,
117
+ signal: controller.signal,
118
+ ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
119
+ ...(inputTruncated ? { inputTruncated: true } : {}),
120
+ });
121
+ sendMessage(createResultMessage(id, result));
122
+ }
123
+ catch (error) {
124
+ sendMessage(createErrorMessage(id, url, error));
125
+ }
126
+ finally {
127
+ controllersById.delete(id);
128
+ }
129
+ };
130
+ }
@@ -1,2 +1 @@
1
1
  export {};
2
- //# sourceMappingURL=transform-child.d.ts.map
@@ -1,6 +1,6 @@
1
1
  import process from 'node:process';
2
- import { FetchError, getErrorMessage } from '../../errors.js';
3
2
  import { transformHtmlToMarkdownInProcess } from '../transform.js';
3
+ import { createTransformMessageHandler } from './shared.js';
4
4
  const send = process.send?.bind(process);
5
5
  if (!send)
6
6
  throw new Error('transform-child started without IPC channel');
@@ -8,138 +8,8 @@ const sendMessage = send;
8
8
  function postMessage(message) {
9
9
  sendMessage(message);
10
10
  }
11
- const controllersById = new Map();
12
- const decoder = new TextDecoder('utf-8');
13
- function postError(id, url, error) {
14
- if (error instanceof FetchError) {
15
- postMessage({
16
- type: 'error',
17
- id,
18
- error: {
19
- name: error.name,
20
- message: error.message,
21
- url: error.url,
22
- statusCode: error.statusCode,
23
- details: { ...error.details },
24
- },
25
- });
26
- return;
27
- }
28
- postMessage({
29
- type: 'error',
30
- id,
31
- error: {
32
- name: error instanceof Error ? error.name : 'Error',
33
- message: getErrorMessage(error),
34
- url,
35
- },
36
- });
37
- }
38
- function isValidMessage(msg) {
39
- const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = msg;
40
- if (typeof id !== 'string')
41
- return false;
42
- if (typeof url !== 'string')
43
- return false;
44
- if (typeof includeMetadata !== 'boolean')
45
- return false;
46
- if (html !== undefined && typeof html !== 'string')
47
- return false;
48
- if (htmlBuffer !== undefined && !(htmlBuffer instanceof Uint8Array))
49
- return false;
50
- if (encoding !== undefined && typeof encoding !== 'string')
51
- return false;
52
- if (skipNoiseRemoval !== undefined && typeof skipNoiseRemoval !== 'boolean')
53
- return false;
54
- if (inputTruncated !== undefined && typeof inputTruncated !== 'boolean')
55
- return false;
56
- return true;
57
- }
58
- function postValidationError(id, url, message) {
59
- postMessage({
60
- type: 'error',
61
- id,
62
- error: { name: 'ValidationError', message, url },
63
- });
64
- }
65
- function decodeHtml(html, htmlBuffer, encoding) {
66
- if (!htmlBuffer)
67
- return html ?? '';
68
- if (!encoding || encoding === 'utf-8')
69
- return decoder.decode(htmlBuffer);
70
- try {
71
- return new TextDecoder(encoding).decode(htmlBuffer);
72
- }
73
- catch {
74
- // Fall back to UTF-8 when server-provided charset labels are invalid.
75
- return decoder.decode(htmlBuffer);
76
- }
77
- }
78
- function buildTransformResultMessage(result) {
79
- return {
80
- markdown: result.markdown,
81
- ...(result.metadata ? { metadata: result.metadata } : {}),
82
- ...(result.title !== undefined ? { title: result.title } : {}),
83
- truncated: result.truncated,
84
- };
85
- }
86
- function handleTransform(msg) {
87
- if (!isValidMessage(msg))
88
- return;
89
- const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = msg;
90
- if (!id.trim()) {
91
- postValidationError(id, url || '', 'Missing transform message id');
92
- return;
93
- }
94
- if (!url.trim()) {
95
- postValidationError(id, url, 'Missing transform URL');
96
- return;
97
- }
98
- const controller = new AbortController();
99
- controllersById.set(id, controller);
100
- try {
101
- const content = decodeHtml(html, htmlBuffer, encoding);
102
- const result = transformHtmlToMarkdownInProcess(content, url, {
103
- includeMetadata,
104
- signal: controller.signal,
105
- ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
106
- ...(inputTruncated ? { inputTruncated: true } : {}),
107
- });
108
- const { markdown, metadata, title, truncated } = result;
109
- postMessage({
110
- type: 'result',
111
- id,
112
- result: buildTransformResultMessage({
113
- markdown,
114
- ...(metadata ? { metadata } : {}),
115
- ...(title === undefined ? {} : { title }),
116
- truncated,
117
- }),
118
- });
119
- }
120
- catch (error) {
121
- postError(id, url, error);
122
- }
123
- finally {
124
- controllersById.delete(id);
125
- }
126
- }
127
- process.on('message', (raw) => {
128
- if (!raw || typeof raw !== 'object')
129
- return;
130
- const msg = raw;
131
- const { type, id } = msg;
132
- if (type === 'cancel') {
133
- if (typeof id !== 'string')
134
- return;
135
- const controller = controllersById.get(id);
136
- if (controller)
137
- controller.abort(new Error('Canceled'));
138
- postMessage({ type: 'cancelled', id });
139
- return;
140
- }
141
- if (type === 'transform') {
142
- handleTransform(msg);
143
- }
11
+ const onMessage = createTransformMessageHandler({
12
+ sendMessage: postMessage,
13
+ runTransform: transformHtmlToMarkdownInProcess,
144
14
  });
145
- //# sourceMappingURL=transform-child.js.map
15
+ process.on('message', onMessage);
@@ -1,2 +1 @@
1
1
  export {};
2
- //# sourceMappingURL=transform-worker.d.ts.map
@@ -1,134 +1,13 @@
1
1
  import { parentPort } from 'node:worker_threads';
2
- import { FetchError, getErrorMessage } from '../../errors.js';
3
2
  import { transformHtmlToMarkdownInProcess } from '../transform.js';
3
+ import { createTransformMessageHandler } from './shared.js';
4
4
  if (!parentPort)
5
5
  throw new Error('transform-worker started without parentPort');
6
6
  const port = parentPort;
7
- const controllersById = new Map();
8
- const decoder = new TextDecoder('utf-8');
9
- function postError(id, url, error) {
10
- if (error instanceof FetchError) {
11
- port.postMessage({
12
- type: 'error',
13
- id,
14
- error: {
15
- name: error.name,
16
- message: error.message,
17
- url: error.url,
18
- statusCode: error.statusCode,
19
- details: { ...error.details },
20
- },
21
- });
22
- return;
23
- }
24
- port.postMessage({
25
- type: 'error',
26
- id,
27
- error: {
28
- name: error instanceof Error ? error.name : 'Error',
29
- message: getErrorMessage(error),
30
- url,
31
- },
32
- });
33
- }
34
- function isValidMessage(msg) {
35
- const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = msg;
36
- return (typeof id === 'string' &&
37
- typeof url === 'string' &&
38
- typeof includeMetadata === 'boolean' &&
39
- (html === undefined || typeof html === 'string') &&
40
- (htmlBuffer === undefined || htmlBuffer instanceof Uint8Array) &&
41
- (encoding === undefined || typeof encoding === 'string') &&
42
- (skipNoiseRemoval === undefined || typeof skipNoiseRemoval === 'boolean') &&
43
- (inputTruncated === undefined || typeof inputTruncated === 'boolean'));
44
- }
45
- function postValidationError(id, message, url) {
46
- port.postMessage({
47
- type: 'error',
48
- id,
49
- error: { name: 'ValidationError', message, url },
50
- });
51
- }
52
- function decodeHtmlBuffer(htmlBuffer, encoding) {
53
- if (!encoding || encoding === 'utf-8') {
54
- return decoder.decode(htmlBuffer);
55
- }
56
- try {
57
- return new TextDecoder(encoding).decode(htmlBuffer);
58
- }
59
- catch {
60
- // Fall back to UTF-8 when server-provided charset labels are invalid.
61
- return decoder.decode(htmlBuffer);
62
- }
63
- }
64
- function resolveHtmlContent(html, htmlBuffer, encoding) {
65
- return htmlBuffer ? decodeHtmlBuffer(htmlBuffer, encoding) : (html ?? '');
66
- }
67
- function buildTransformResultMessage(result) {
68
- return {
69
- markdown: result.markdown,
70
- ...(result.metadata ? { metadata: result.metadata } : {}),
71
- ...(result.title !== undefined ? { title: result.title } : {}),
72
- truncated: result.truncated,
73
- };
74
- }
75
- function handleTransform(msg) {
76
- if (!isValidMessage(msg))
77
- return;
78
- const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = msg;
79
- if (!id.trim()) {
80
- postValidationError(id, 'Missing transform message id', url || '');
81
- return;
82
- }
83
- if (!url.trim()) {
84
- postValidationError(id, 'Missing transform URL', url);
85
- return;
86
- }
87
- const controller = new AbortController();
88
- controllersById.set(id, controller);
89
- try {
90
- const content = resolveHtmlContent(html, htmlBuffer, encoding);
91
- const result = transformHtmlToMarkdownInProcess(content, url, {
92
- includeMetadata,
93
- signal: controller.signal,
94
- ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
95
- ...(inputTruncated ? { inputTruncated: true } : {}),
96
- });
97
- const { markdown, metadata, title, truncated } = result;
98
- port.postMessage({
99
- type: 'result',
100
- id,
101
- result: buildTransformResultMessage({
102
- markdown,
103
- ...(metadata ? { metadata } : {}),
104
- ...(title === undefined ? {} : { title }),
105
- truncated,
106
- }),
107
- });
108
- }
109
- catch (error) {
110
- postError(id, url, error);
111
- }
112
- finally {
113
- controllersById.delete(id);
114
- }
115
- }
116
- port.on('message', (raw) => {
117
- if (!raw || typeof raw !== 'object')
118
- return;
119
- const msg = raw;
120
- const { type, id } = msg;
121
- if (type === 'cancel') {
122
- if (typeof id !== 'string')
123
- return;
124
- const controller = controllersById.get(id);
125
- if (controller)
126
- controller.abort(new Error('Canceled'));
127
- port.postMessage({ type: 'cancelled', id });
128
- return;
129
- }
130
- if (type === 'transform') {
131
- handleTransform(msg);
132
- }
7
+ const onMessage = createTransformMessageHandler({
8
+ sendMessage: (message) => {
9
+ port.postMessage(message);
10
+ },
11
+ runTransform: transformHtmlToMarkdownInProcess,
133
12
  });
134
- //# sourceMappingURL=transform-worker.js.map
13
+ port.on('message', onMessage);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/fetch-url-mcp",
3
- "version": "1.3.1",
3
+ "version": "1.5.0",
4
4
  "mcpName": "io.github.j0hanz/fetch-url-mcp",
5
5
  "description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
6
6
  "type": "module",
@@ -51,12 +51,15 @@
51
51
  "start": "node dist/index.js",
52
52
  "format": "prettier --write .",
53
53
  "type-check": "node scripts/tasks.mjs type-check",
54
+ "type-check:src": "node node_modules/typescript/bin/tsc -p tsconfig.json --noEmit",
55
+ "type-check:tests": "node node_modules/typescript/bin/tsc -p tsconfig.test.json --noEmit",
54
56
  "type-check:diagnostics": "tsc --noEmit --extendedDiagnostics",
55
57
  "type-check:trace": "node -e \"require('fs').rmSync('.ts-trace',{recursive:true,force:true})\" && tsc --noEmit --generateTrace .ts-trace",
56
58
  "lint": "eslint .",
59
+ "lint:tests": "eslint src/__tests__",
57
60
  "lint:fix": "eslint . --fix",
58
61
  "test": "node scripts/tasks.mjs test",
59
- "test:fast": "node --test --import tsx/esm src/__tests__/**/*.test.ts",
62
+ "test:fast": "node --test --import tsx/esm src/__tests__/**/*.test.ts node-tests/**/*.test.ts",
60
63
  "test:coverage": "node scripts/tasks.mjs test --coverage",
61
64
  "knip": "knip",
62
65
  "knip:fix": "knip --fix",
@@ -64,7 +67,7 @@
64
67
  "prepublishOnly": "npm run lint && npm run type-check && npm run build"
65
68
  },
66
69
  "dependencies": {
67
- "@modelcontextprotocol/sdk": "^1.26.0",
70
+ "@modelcontextprotocol/sdk": "^1.27.1",
68
71
  "@mozilla/readability": "^0.6.0",
69
72
  "linkedom": "^0.18.12",
70
73
  "node-html-markdown": "^2.0.0",
@@ -75,15 +78,16 @@
75
78
  "@eslint/js": "^10.0.1",
76
79
  "@trivago/prettier-plugin-sort-imports": "^6.0.2",
77
80
  "@types/node": "^24",
78
- "eslint": "^10.0.0",
81
+ "eslint": "^10.0.2",
79
82
  "eslint-config-prettier": "^10.1.8",
80
- "eslint-plugin-de-morgan": "^2.0.0",
83
+ "eslint-plugin-de-morgan": "^2.1.1",
81
84
  "eslint-plugin-depend": "^1.4.0",
82
85
  "eslint-plugin-unused-imports": "^4.4.1",
83
- "knip": "^5.83.1",
86
+ "knip": "^5.85.0",
84
87
  "prettier": "^3.8.1",
88
+ "tsx": "^4.21.0",
85
89
  "typescript": "^5.9.3",
86
- "typescript-eslint": "^8.56.0"
90
+ "typescript-eslint": "^8.56.1"
87
91
  },
88
92
  "engines": {
89
93
  "node": ">=24"
package/dist/cache.d.ts DELETED
@@ -1,54 +0,0 @@
1
- import { z } from 'zod';
2
- declare const CachedPayloadSchema: z.ZodObject<{
3
- content: z.ZodOptional<z.ZodString>;
4
- markdown: z.ZodOptional<z.ZodString>;
5
- title: z.ZodOptional<z.ZodString>;
6
- }, z.core.$strict>;
7
- type CachedPayload = z.infer<typeof CachedPayloadSchema>;
8
- interface CacheEntry {
9
- url: string;
10
- title?: string;
11
- content: string;
12
- fetchedAt: string;
13
- expiresAt: string;
14
- }
15
- interface CacheKeyParts {
16
- namespace: string;
17
- urlHash: string;
18
- }
19
- interface CacheSetOptions {
20
- force?: boolean;
21
- }
22
- interface CacheGetOptions {
23
- force?: boolean;
24
- }
25
- interface CacheEntryMetadata {
26
- url: string;
27
- title?: string;
28
- }
29
- interface CacheUpdateEvent {
30
- cacheKey: string;
31
- namespace: string;
32
- urlHash: string;
33
- listChanged: boolean;
34
- }
35
- type CacheUpdateListener = (event: CacheUpdateEvent) => unknown;
36
- export declare function parseCachedPayload(raw: string): CachedPayload | null;
37
- export declare function resolveCachedPayloadContent(payload: CachedPayload): string | null;
38
- export declare function createCacheKey(namespace: string, url: string, vary?: Record<string, unknown> | string): string | null;
39
- export declare function parseCacheKey(cacheKey: string): CacheKeyParts | null;
40
- export declare function onCacheUpdate(listener: CacheUpdateListener): () => void;
41
- export declare function get(cacheKey: string | null, options?: CacheGetOptions): CacheEntry | undefined;
42
- export declare function set(cacheKey: string | null, content: string, metadata: CacheEntryMetadata, options?: CacheSetOptions): void;
43
- export declare function keys(): readonly string[];
44
- /**
45
- * Return lightweight metadata (url and optional page title) for a cache entry.
46
- * Returns `undefined` if the key is not found or cache is disabled.
47
- */
48
- export declare function getEntryMeta(cacheKey: string): {
49
- url: string;
50
- title?: string;
51
- } | undefined;
52
- export declare function isEnabled(): boolean;
53
- export {};
54
- //# sourceMappingURL=cache.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cache.d.ts","sourceRoot":"","sources":["../src/cache.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAYxB,QAAA,MAAM,mBAAmB;;;;kBAIvB,CAAC;AACH,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAGzD,UAAU,UAAU;IAClB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,UAAU,aAAa;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,UAAU,eAAe;IACvB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,UAAU,eAAe;IACvB,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED,UAAU,kBAAkB;IAC1B,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAMD,UAAU,gBAAgB;IACxB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,OAAO,CAAC;CACtB;AAED,KAAK,mBAAmB,GAAG,CAAC,KAAK,EAAE,gBAAgB,KAAK,OAAO,CAAC;AAWhE,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAOpE;AAED,wBAAgB,2BAA2B,CACzC,OAAO,EAAE,aAAa,GACrB,MAAM,GAAG,IAAI,CAEf;AA4BD,wBAAgB,cAAc,CAC5B,SAAS,EAAE,MAAM,EACjB,GAAG,EAAE,MAAM,EACX,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GACtC,MAAM,GAAG,IAAI,CAoBf;AAED,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAMpE;AA6LD,wBAAgB,aAAa,CAAC,QAAQ,EAAE,mBAAmB,GAAG,MAAM,IAAI,CAEvE;AAED,wBAAgB,GAAG,CACjB,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,OAAO,CAAC,EAAE,eAAe,GACxB,UAAU,GAAG,SAAS,CAExB;AAED,wBAAgB,GAAG,CACjB,QAAQ,EAAE,MAAM,GAAG,IAAI,EACvB,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,kBAAkB,EAC5B,OAAO,CAAC,EAAE,eAAe,GACxB,IAAI,CAEN;AAED,wBAAgB,IAAI,IAAI,SAAS,MAAM,EAAE,CAExC;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAC1B,QAAQ,EAAE,MAAM,GACf;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GAAG,SAAS,CAM7C;AAED,wBAAgB,SAAS,IAAI,OAAO,CAEnC"}