@j0hanz/fetch-url-mcp 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/dist/cache.d.ts +9 -3
  2. package/dist/cache.d.ts.map +1 -0
  3. package/dist/cache.js +44 -110
  4. package/dist/cache.js.map +1 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.d.ts.map +1 -0
  7. package/dist/cli.js +9 -4
  8. package/dist/cli.js.map +1 -0
  9. package/dist/config.d.ts +2 -3
  10. package/dist/config.d.ts.map +1 -0
  11. package/dist/config.js +18 -25
  12. package/dist/config.js.map +1 -0
  13. package/dist/crypto.d.ts +1 -0
  14. package/dist/crypto.d.ts.map +1 -0
  15. package/dist/crypto.js +1 -0
  16. package/dist/crypto.js.map +1 -0
  17. package/dist/dom-noise-removal.d.ts +2 -1
  18. package/dist/dom-noise-removal.d.ts.map +1 -0
  19. package/dist/dom-noise-removal.js +8 -4
  20. package/dist/dom-noise-removal.js.map +1 -0
  21. package/dist/download.d.ts +4 -0
  22. package/dist/download.d.ts.map +1 -0
  23. package/dist/download.js +106 -0
  24. package/dist/download.js.map +1 -0
  25. package/dist/errors.d.ts +1 -0
  26. package/dist/errors.d.ts.map +1 -0
  27. package/dist/errors.js +1 -0
  28. package/dist/errors.js.map +1 -0
  29. package/dist/examples/mcp-fetch-url-client.js +19 -3
  30. package/dist/examples/mcp-fetch-url-client.js.map +1 -1
  31. package/dist/fetch-content.d.ts +1 -0
  32. package/dist/fetch-content.d.ts.map +1 -0
  33. package/dist/fetch-content.js +14 -14
  34. package/dist/fetch-content.js.map +1 -0
  35. package/dist/fetch-stream.d.ts +1 -0
  36. package/dist/fetch-stream.d.ts.map +1 -0
  37. package/dist/fetch-stream.js +6 -3
  38. package/dist/fetch-stream.js.map +1 -0
  39. package/dist/fetch.d.ts +1 -0
  40. package/dist/fetch.d.ts.map +1 -0
  41. package/dist/fetch.js +120 -51
  42. package/dist/fetch.js.map +1 -0
  43. package/dist/host-normalization.d.ts +1 -0
  44. package/dist/host-normalization.d.ts.map +1 -0
  45. package/dist/host-normalization.js +19 -6
  46. package/dist/host-normalization.js.map +1 -0
  47. package/dist/http/auth.d.ts +35 -0
  48. package/dist/http/auth.d.ts.map +1 -0
  49. package/dist/http/auth.js +283 -0
  50. package/dist/http/auth.js.map +1 -0
  51. package/dist/http/health.d.ts +7 -0
  52. package/dist/http/health.d.ts.map +1 -0
  53. package/dist/http/health.js +166 -0
  54. package/dist/http/health.js.map +1 -0
  55. package/dist/http/helpers.d.ts +58 -0
  56. package/dist/http/helpers.d.ts.map +1 -0
  57. package/dist/http/helpers.js +372 -0
  58. package/dist/http/helpers.js.map +1 -0
  59. package/dist/{http-native.d.ts → http/native.d.ts} +1 -0
  60. package/dist/http/native.d.ts.map +1 -0
  61. package/dist/http/native.js +529 -0
  62. package/dist/http/native.js.map +1 -0
  63. package/dist/http/rate-limit.d.ts +13 -0
  64. package/dist/http/rate-limit.d.ts.map +1 -0
  65. package/dist/http/rate-limit.js +81 -0
  66. package/dist/http/rate-limit.js.map +1 -0
  67. package/dist/index.d.ts +1 -0
  68. package/dist/index.d.ts.map +1 -0
  69. package/dist/index.js +2 -1
  70. package/dist/index.js.map +1 -0
  71. package/dist/instructions.d.ts +2 -0
  72. package/dist/instructions.d.ts.map +1 -0
  73. package/dist/instructions.js +108 -0
  74. package/dist/instructions.js.map +1 -0
  75. package/dist/ip-blocklist.d.ts +1 -0
  76. package/dist/ip-blocklist.d.ts.map +1 -0
  77. package/dist/ip-blocklist.js +2 -0
  78. package/dist/ip-blocklist.js.map +1 -0
  79. package/dist/json.d.ts +2 -1
  80. package/dist/json.d.ts.map +1 -0
  81. package/dist/json.js +19 -6
  82. package/dist/json.js.map +1 -0
  83. package/dist/language-detection.d.ts +1 -0
  84. package/dist/language-detection.d.ts.map +1 -0
  85. package/dist/language-detection.js +1 -0
  86. package/dist/language-detection.js.map +1 -0
  87. package/dist/markdown-cleanup.d.ts +2 -1
  88. package/dist/markdown-cleanup.d.ts.map +1 -0
  89. package/dist/markdown-cleanup.js +51 -52
  90. package/dist/markdown-cleanup.js.map +1 -0
  91. package/dist/mcp-validator.d.ts +1 -0
  92. package/dist/mcp-validator.d.ts.map +1 -0
  93. package/dist/mcp-validator.js +16 -8
  94. package/dist/mcp-validator.js.map +1 -0
  95. package/dist/mcp.d.ts +2 -2
  96. package/dist/mcp.d.ts.map +1 -0
  97. package/dist/mcp.js +17 -333
  98. package/dist/mcp.js.map +1 -0
  99. package/dist/observability.d.ts +2 -0
  100. package/dist/observability.d.ts.map +1 -0
  101. package/dist/observability.js +30 -5
  102. package/dist/observability.js.map +1 -0
  103. package/dist/prompts.d.ts +1 -0
  104. package/dist/prompts.d.ts.map +1 -0
  105. package/dist/prompts.js +15 -3
  106. package/dist/prompts.js.map +1 -0
  107. package/dist/resources.d.ts +1 -0
  108. package/dist/resources.d.ts.map +1 -0
  109. package/dist/resources.js +30 -23
  110. package/dist/resources.js.map +1 -0
  111. package/dist/server-tuning.d.ts +1 -0
  112. package/dist/server-tuning.d.ts.map +1 -0
  113. package/dist/server-tuning.js +11 -15
  114. package/dist/server-tuning.js.map +1 -0
  115. package/dist/server.d.ts +1 -0
  116. package/dist/server.d.ts.map +1 -0
  117. package/dist/server.js +23 -23
  118. package/dist/server.js.map +1 -0
  119. package/dist/session.d.ts +1 -0
  120. package/dist/session.d.ts.map +1 -0
  121. package/dist/session.js +55 -28
  122. package/dist/session.js.map +1 -0
  123. package/dist/tasks/execution.d.ts +42 -0
  124. package/dist/tasks/execution.d.ts.map +1 -0
  125. package/dist/tasks/execution.js +232 -0
  126. package/dist/tasks/execution.js.map +1 -0
  127. package/dist/{tasks.d.ts → tasks/manager.d.ts} +6 -0
  128. package/dist/tasks/manager.d.ts.map +1 -0
  129. package/dist/{tasks.js → tasks/manager.js} +86 -37
  130. package/dist/tasks/manager.js.map +1 -0
  131. package/dist/tasks/owner.d.ts +33 -0
  132. package/dist/tasks/owner.d.ts.map +1 -0
  133. package/dist/tasks/owner.js +99 -0
  134. package/dist/tasks/owner.js.map +1 -0
  135. package/dist/timer-utils.d.ts +1 -0
  136. package/dist/timer-utils.d.ts.map +1 -0
  137. package/dist/timer-utils.js +12 -5
  138. package/dist/timer-utils.js.map +1 -0
  139. package/dist/tool-errors.d.ts +12 -0
  140. package/dist/tool-errors.d.ts.map +1 -0
  141. package/dist/tool-errors.js +52 -0
  142. package/dist/tool-errors.js.map +1 -0
  143. package/dist/tool-pipeline.d.ts +72 -0
  144. package/dist/tool-pipeline.d.ts.map +1 -0
  145. package/dist/tool-pipeline.js +407 -0
  146. package/dist/tool-pipeline.js.map +1 -0
  147. package/dist/tool-progress.d.ts +32 -0
  148. package/dist/tool-progress.d.ts.map +1 -0
  149. package/dist/tool-progress.js +123 -0
  150. package/dist/tool-progress.js.map +1 -0
  151. package/dist/tools.d.ts +35 -111
  152. package/dist/tools.d.ts.map +1 -0
  153. package/dist/tools.js +93 -566
  154. package/dist/tools.js.map +1 -0
  155. package/dist/{transform.d.ts → transform/transform.d.ts} +2 -1
  156. package/dist/transform/transform.d.ts.map +1 -0
  157. package/dist/{transform.js → transform/transform.js} +73 -769
  158. package/dist/transform/transform.js.map +1 -0
  159. package/dist/{transform-types.d.ts → transform/types.d.ts} +1 -0
  160. package/dist/transform/types.d.ts.map +1 -0
  161. package/dist/{transform-types.js → transform/types.js} +1 -0
  162. package/dist/transform/types.js.map +1 -0
  163. package/dist/transform/worker-pool.d.ts +93 -0
  164. package/dist/transform/worker-pool.d.ts.map +1 -0
  165. package/dist/transform/worker-pool.js +759 -0
  166. package/dist/transform/worker-pool.js.map +1 -0
  167. package/dist/transform/workers/transform-child.d.ts +2 -0
  168. package/dist/transform/workers/transform-child.d.ts.map +1 -0
  169. package/dist/{workers → transform/workers}/transform-child.js +3 -1
  170. package/dist/transform/workers/transform-child.js.map +1 -0
  171. package/dist/transform/workers/transform-worker.d.ts +2 -0
  172. package/dist/transform/workers/transform-worker.d.ts.map +1 -0
  173. package/dist/{workers → transform/workers}/transform-worker.js +2 -1
  174. package/dist/transform/workers/transform-worker.js.map +1 -0
  175. package/dist/type-guards.d.ts +1 -0
  176. package/dist/type-guards.d.ts.map +1 -0
  177. package/dist/type-guards.js +1 -0
  178. package/dist/type-guards.js.map +1 -0
  179. package/package.json +6 -7
  180. package/dist/AGENTS.md +0 -152
  181. package/dist/http-native.js +0 -1320
  182. package/dist/instructions.md +0 -113
  183. package/dist/workers/transform-child.d.ts +0 -1
  184. package/dist/workers/transform-worker.d.ts +0 -1
@@ -1,24 +1,18 @@
1
- import { AsyncLocalStorage, AsyncResource } from 'node:async_hooks';
2
1
  import { Buffer } from 'node:buffer';
3
- import { fork } from 'node:child_process';
4
2
  import diagnosticsChannel from 'node:diagnostics_channel';
5
- import { availableParallelism } from 'node:os';
6
3
  import { performance } from 'node:perf_hooks';
7
- import { fileURLToPath } from 'node:url';
8
- import { isSharedArrayBuffer } from 'node:util/types';
9
- import { Worker, } from 'node:worker_threads';
4
+ import { isProbablyReaderable, Readability } from '@mozilla/readability';
10
5
  import { parseHTML } from 'linkedom';
11
6
  import { NodeHtmlMarkdown, } from 'node-html-markdown';
12
- import { isProbablyReaderable, Readability } from '@mozilla/readability';
13
- import { config } from './config.js';
14
- import { removeNoiseFromHtml } from './dom-noise-removal.js';
15
- import { FetchError, getErrorMessage } from './errors.js';
16
- import { isRawTextContentUrl } from './fetch.js';
17
- import { detectLanguageFromCode, resolveLanguageFromAttributes, } from './language-detection.js';
18
- import { addSourceToMarkdown, buildMetadataFooter, cleanupMarkdownArtifacts, extractTitleFromRawMarkdown, isRawTextContent, } from './markdown-cleanup.js';
19
- import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from './observability.js';
20
- import { createUnrefTimeout } from './timer-utils.js';
21
- import { isLikeNode, isObject } from './type-guards.js';
7
+ import { config } from '../config.js';
8
+ import { removeNoiseFromHtml } from '../dom-noise-removal.js';
9
+ import { FetchError, getErrorMessage } from '../errors.js';
10
+ import { isRawTextContentUrl } from '../fetch.js';
11
+ import { detectLanguageFromCode, resolveLanguageFromAttributes, } from '../language-detection.js';
12
+ import { addSourceToMarkdown, buildMetadataFooter, cleanupMarkdownArtifacts, extractTitleFromRawMarkdown, isRawTextContent, } from '../markdown-cleanup.js';
13
+ import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../observability.js';
14
+ import { isLikeNode, isObject } from '../type-guards.js';
15
+ import { getOrCreateWorkerPool, getWorkerPoolStats, shutdownWorkerPool, } from './worker-pool.js';
22
16
  const utf8Decoder = new TextDecoder('utf-8');
23
17
  function decodeInput(input, encoding) {
24
18
  if (typeof input === 'string')
@@ -219,26 +213,27 @@ function truncateHtml(html, inputTruncated = false) {
219
213
  const maxSize = config.constants.maxHtmlSize;
220
214
  if (maxSize <= 0)
221
215
  return { html, truncated: false };
222
- // Fast path: V8 optimized byte length check (no allocation)
223
- const byteLength = Buffer.byteLength(html, 'utf8');
224
- if (byteLength <= maxSize && !inputTruncated)
225
- return { html, truncated: false };
216
+ if (html.length <= maxSize) {
217
+ const byteLength = getUtf8ByteLength(html);
218
+ if (byteLength <= maxSize && !inputTruncated)
219
+ return { html, truncated: false };
220
+ }
226
221
  const sliced = html.slice(0, maxSize);
227
- if (Buffer.byteLength(sliced, 'utf8') <= maxSize) {
222
+ if (getUtf8ByteLength(sliced) <= maxSize) {
228
223
  return { html: trimDanglingTagFragment(sliced), truncated: true };
229
224
  }
230
225
  const htmlBuffer = Buffer.from(sliced, 'utf8');
231
226
  const content = trimDanglingTagFragment(trimUtf8Buffer(htmlBuffer, maxSize).toString('utf8'));
232
227
  logWarn('HTML content exceeds maximum size, truncating', {
233
- size: byteLength,
228
+ size: getUtf8ByteLength(html),
234
229
  maxSize,
235
- truncatedSize: Buffer.byteLength(content, 'utf8'),
230
+ truncatedSize: getUtf8ByteLength(content),
236
231
  });
237
232
  return { html: content, truncated: true };
238
233
  }
239
234
  function willTruncate(html) {
240
235
  const maxSize = config.constants.maxHtmlSize;
241
- return maxSize > 0 && getUtf8ByteLength(html) > maxSize;
236
+ return (maxSize > 0 && (html.length > maxSize || getUtf8ByteLength(html) > maxSize));
242
237
  }
243
238
  const HEAD_END_PATTERN = /<\/head\s*>|<body\b/i;
244
239
  const MAX_HEAD_SCAN_LENGTH = 50_000;
@@ -694,11 +689,12 @@ function buildInlineCodeTranslator() {
694
689
  };
695
690
  }
696
691
  function buildCodeTranslator(ctx) {
692
+ const inlineCodeTranslator = buildInlineCodeTranslator();
697
693
  if (!isObject(ctx))
698
- return buildInlineCodeTranslator();
694
+ return inlineCodeTranslator;
699
695
  const { parent } = ctx;
700
696
  if (!isCodeBlock(parent))
701
- return buildInlineCodeTranslator();
697
+ return inlineCodeTranslator;
702
698
  return { noEscape: true, preserveWhitespace: true };
703
699
  }
704
700
  function extractFirstSrcsetUrl(srcset) {
@@ -713,14 +709,17 @@ const LAZY_SRC_ATTRIBUTES = [
713
709
  'data-original',
714
710
  'data-srcset',
715
711
  ];
712
+ function isDataUri(value) {
713
+ return value.startsWith('data:');
714
+ }
716
715
  function extractNonDataSrcsetUrl(value) {
717
716
  const url = extractFirstSrcsetUrl(value);
718
- return url && !url.startsWith('data:') ? url : undefined;
717
+ return url && !isDataUri(url) ? url : undefined;
719
718
  }
720
719
  function resolveLazySrc(getAttribute) {
721
720
  for (const attr of LAZY_SRC_ATTRIBUTES) {
722
721
  const lazy = getAttribute(attr);
723
- if (!lazy || lazy.startsWith('data:'))
722
+ if (!lazy || isDataUri(lazy))
724
723
  continue;
725
724
  if (attr === 'data-srcset') {
726
725
  const url = extractNonDataSrcsetUrl(lazy);
@@ -736,7 +735,7 @@ function resolveImageSrc(getAttribute) {
736
735
  if (!getAttribute)
737
736
  return '';
738
737
  const srcRaw = getAttribute('src') ?? '';
739
- if (srcRaw && !srcRaw.startsWith('data:'))
738
+ if (srcRaw && !isDataUri(srcRaw))
740
739
  return srcRaw;
741
740
  // First check common lazy-loading attributes that may contain non-data URLs before falling back to the native srcset, as some sites use data URIs in lazy attributes while still providing valid URLs in srcset.
742
741
  const lazySrc = resolveLazySrc(getAttribute);
@@ -750,7 +749,7 @@ function resolveImageSrc(getAttribute) {
750
749
  return url;
751
750
  }
752
751
  // If the only available src is a data URI, we choose to omit it rather than include the raw data in the alt text or URL, as data URIs can be very long and are not useful in Markdown output.
753
- if (srcRaw.startsWith('data:'))
752
+ if (isDataUri(srcRaw))
754
753
  return '[data URI removed]';
755
754
  return '';
756
755
  }
@@ -1099,7 +1098,7 @@ function resolveRelativeUrlsInSegment(markdown, baseUrl, origin) {
1099
1098
  }
1100
1099
  return output;
1101
1100
  }
1102
- function resolveRelativeUrls(markdown, baseUrl) {
1101
+ function resolveRelativeUrls(markdown, baseUrl, signal) {
1103
1102
  let origin;
1104
1103
  try {
1105
1104
  ({ origin } = new URL(baseUrl));
@@ -1109,7 +1108,6 @@ function resolveRelativeUrls(markdown, baseUrl) {
1109
1108
  }
1110
1109
  if (!markdown)
1111
1110
  return markdown;
1112
- const lines = markdown.split('\n');
1113
1111
  let output = '';
1114
1112
  let buffer = '';
1115
1113
  let fenceMarker = null;
@@ -1119,26 +1117,51 @@ function resolveRelativeUrls(markdown, baseUrl) {
1119
1117
  output += resolveRelativeUrlsInSegment(buffer, baseUrl, origin);
1120
1118
  buffer = '';
1121
1119
  };
1122
- for (let i = 0; i < lines.length; i += 1) {
1123
- const line = lines[i] ?? '';
1120
+ const len = markdown.length;
1121
+ let lastIndex = 0;
1122
+ let lineCount = 0;
1123
+ while (lastIndex < len) {
1124
+ if (++lineCount % 500 === 0 && signal?.aborted) {
1125
+ throw new Error('Transform aborted during URL resolution');
1126
+ }
1127
+ let nextIndex = markdown.indexOf('\n', lastIndex);
1128
+ let line;
1129
+ let lineWithNewline;
1130
+ if (nextIndex === -1) {
1131
+ line = markdown.slice(lastIndex);
1132
+ lineWithNewline = line;
1133
+ nextIndex = len;
1134
+ }
1135
+ else {
1136
+ if (nextIndex > lastIndex && markdown.charCodeAt(nextIndex - 1) === 13) {
1137
+ line = markdown.slice(lastIndex, nextIndex - 1);
1138
+ }
1139
+ else {
1140
+ line = markdown.slice(lastIndex, nextIndex);
1141
+ }
1142
+ lineWithNewline = markdown.slice(lastIndex, nextIndex + 1);
1143
+ nextIndex++; // Skip \n
1144
+ }
1124
1145
  const trimmed = line.trimStart();
1125
- const lineWithNewline = i < lines.length - 1 ? `${line}\n` : line;
1126
1146
  if (fenceMarker) {
1127
1147
  output += lineWithNewline;
1128
1148
  if (trimmed.startsWith(fenceMarker) &&
1129
1149
  trimmed.slice(fenceMarker.length).trim() === '') {
1130
1150
  fenceMarker = null;
1131
1151
  }
1132
- continue;
1133
1152
  }
1134
- const fenceMatch = FENCE_LINE_PATTERN.exec(line);
1135
- if (fenceMatch?.[1]) {
1136
- flushBuffer();
1137
- output += lineWithNewline;
1138
- fenceMarker = fenceMatch[1];
1139
- continue;
1153
+ else {
1154
+ const fenceMatch = FENCE_LINE_PATTERN.exec(line);
1155
+ if (fenceMatch?.[1]) {
1156
+ flushBuffer();
1157
+ output += lineWithNewline;
1158
+ fenceMarker = fenceMatch[1];
1159
+ }
1160
+ else {
1161
+ buffer += lineWithNewline;
1162
+ }
1140
1163
  }
1141
- buffer += lineWithNewline;
1164
+ lastIndex = nextIndex;
1142
1165
  }
1143
1166
  flushBuffer();
1144
1167
  return output;
@@ -1148,12 +1171,12 @@ function translateHtmlToMarkdown(params) {
1148
1171
  abortPolicy.throwIfAborted(signal, url, 'markdown:begin');
1149
1172
  const cleanedHtml = skipNoiseRemoval
1150
1173
  ? html
1151
- : stageTracker.run(url, 'markdown:noise', () => removeNoiseFromHtml(html, document, url));
1174
+ : stageTracker.run(url, 'markdown:noise', () => removeNoiseFromHtml(html, document, url, signal));
1152
1175
  abortPolicy.throwIfAborted(signal, url, 'markdown:cleaned');
1153
1176
  const content = stageTracker.run(url, 'markdown:translate', () => translateHtmlFragmentToMarkdown(cleanedHtml));
1154
1177
  abortPolicy.throwIfAborted(signal, url, 'markdown:translated');
1155
1178
  const cleaned = cleanupMarkdownArtifacts(content, signal ? { signal, url } : { url });
1156
- return url ? resolveRelativeUrls(cleaned, url) : cleaned;
1179
+ return url ? resolveRelativeUrls(cleaned, url, signal) : cleaned;
1157
1180
  }
1158
1181
  function appendMetadataFooter(content, metadata, url) {
1159
1182
  const footer = buildMetadataFooter(metadata, url);
@@ -1448,13 +1471,13 @@ function shouldUseArticleContent(article, originalHtmlOrDocument) {
1448
1471
  return !hasTruncatedSentences(article.textContent);
1449
1472
  }
1450
1473
  function buildContentSource(params) {
1451
- const { html, url, article, extractedMeta, includeMetadata, useArticleContent, document, truncated, skipNoiseRemoval, } = params;
1474
+ const { html, url, article, extractedMeta, includeMetadata, useArticleContent, document, truncated, skipNoiseRemoval, signal, } = params;
1452
1475
  const metadata = createContentMetadataBlock(url, article, extractedMeta, useArticleContent, includeMetadata);
1453
1476
  if (useArticleContent && article) {
1454
1477
  // Readability output can still be noisy (unless user requested skip).
1455
1478
  const cleanedArticleHtml = skipNoiseRemoval
1456
1479
  ? article.content
1457
- : removeNoiseFromHtml(article.content, undefined, url);
1480
+ : removeNoiseFromHtml(article.content, undefined, url, signal);
1458
1481
  return {
1459
1482
  sourceHtml: cleanedArticleHtml,
1460
1483
  title: article.title,
@@ -1468,7 +1491,7 @@ function buildContentSource(params) {
1468
1491
  if (document) {
1469
1492
  const cleanedHtml = skipNoiseRemoval
1470
1493
  ? html
1471
- : removeNoiseFromHtml(html, document, url);
1494
+ : removeNoiseFromHtml(html, document, url, signal);
1472
1495
  const contentRoot = findContentRoot(document);
1473
1496
  if (contentRoot) {
1474
1497
  return {
@@ -1521,6 +1544,7 @@ function resolveContentSource(params) {
1521
1544
  document,
1522
1545
  truncated: truncated ?? false,
1523
1546
  ...(params.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
1547
+ ...(params.signal ? { signal: params.signal } : {}),
1524
1548
  });
1525
1549
  }
1526
1550
  function buildMarkdownFromContext(context, url, signal) {
@@ -1603,727 +1627,6 @@ export function transformHtmlToMarkdownInProcess(html, url, options) {
1603
1627
  endTotalTransformStage(totalStage, completed);
1604
1628
  }
1605
1629
  }
1606
- function isWorkerResultPayload(value) {
1607
- if (!isObject(value))
1608
- return false;
1609
- const { markdown, metadata, title, truncated } = value;
1610
- const isMetadataObject = metadata === undefined || isObject(metadata);
1611
- if (!isMetadataObject)
1612
- return false;
1613
- if (metadata && !isExtractedMetadataPayload(metadata)) {
1614
- return false;
1615
- }
1616
- return (typeof markdown === 'string' &&
1617
- typeof truncated === 'boolean' &&
1618
- (title === undefined || typeof title === 'string'));
1619
- }
1620
- function isExtractedMetadataPayload(value) {
1621
- if (!isObject(value))
1622
- return false;
1623
- const { author, description, favicon, image, modifiedAt, publishedAt, title, } = value;
1624
- return ((title === undefined || typeof title === 'string') &&
1625
- (description === undefined || typeof description === 'string') &&
1626
- (author === undefined || typeof author === 'string') &&
1627
- (image === undefined || typeof image === 'string') &&
1628
- (favicon === undefined || typeof favicon === 'string') &&
1629
- (publishedAt === undefined || typeof publishedAt === 'string') &&
1630
- (modifiedAt === undefined || typeof modifiedAt === 'string'));
1631
- }
1632
- function isWorkerErrorPayload(value) {
1633
- if (!isObject(value))
1634
- return false;
1635
- const { details, message, name, statusCode, url } = value;
1636
- return (typeof name === 'string' &&
1637
- typeof message === 'string' &&
1638
- typeof url === 'string' &&
1639
- (statusCode === undefined || typeof statusCode === 'number') &&
1640
- (details === undefined || isObject(details)));
1641
- }
1642
- function isWorkerResponse(raw) {
1643
- if (!isObject(raw))
1644
- return false;
1645
- if (typeof raw['id'] !== 'string')
1646
- return false;
1647
- if (raw['type'] === 'result') {
1648
- return isWorkerResultPayload(raw['result']);
1649
- }
1650
- if (raw['type'] === 'error') {
1651
- return isWorkerErrorPayload(raw['error']);
1652
- }
1653
- if (raw['type'] === 'cancelled') {
1654
- return true;
1655
- }
1656
- return false;
1657
- }
1658
- function createTaskContext() {
1659
- const runWithStore = AsyncLocalStorage.snapshot();
1660
- const asyncResource = new AsyncResource('fetch-url-mcp.transform.task');
1661
- let disposed = false;
1662
- return {
1663
- run: (fn) => {
1664
- runWithStore(() => {
1665
- asyncResource.runInAsyncScope(fn);
1666
- });
1667
- },
1668
- dispose: () => {
1669
- if (disposed)
1670
- return;
1671
- disposed = true;
1672
- asyncResource.emitDestroy();
1673
- },
1674
- };
1675
- }
1676
- function buildWorkerDispatchPayload(task, supportsTransferList) {
1677
- const message = {
1678
- type: 'transform',
1679
- id: task.id,
1680
- url: task.url,
1681
- includeMetadata: task.includeMetadata,
1682
- ...(task.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
1683
- ...(task.inputTruncated ? { inputTruncated: true } : {}),
1684
- };
1685
- if (!task.htmlBuffer) {
1686
- message.html = task.html;
1687
- return { message };
1688
- }
1689
- const htmlBuffer = ensureTightBuffer(task.htmlBuffer);
1690
- if (!supportsTransferList) {
1691
- message.htmlBuffer = htmlBuffer;
1692
- if (task.encoding)
1693
- message.encoding = task.encoding;
1694
- return { message };
1695
- }
1696
- const transferableHtmlBuffer = Uint8Array.from(htmlBuffer);
1697
- message.htmlBuffer = transferableHtmlBuffer;
1698
- if (task.encoding)
1699
- message.encoding = task.encoding;
1700
- const backingBuffer = transferableHtmlBuffer.buffer;
1701
- if (isSharedArrayBuffer(backingBuffer))
1702
- return { message };
1703
- return { message, transferList: [backingBuffer] };
1704
- }
1705
- /**
1706
- * Worker Pool Sizing Configuration
1707
- *
1708
- * Default: min(4, floor(availableParallelism() / 2)), constrained to [2, N]
1709
- *
1710
- * Tuning Guidance:
1711
- * - **Default behavior**: Appropriate for most deployments. Uses half of available
1712
- * CPU threads (capped at 4) to balance throughput with system resource availability.
1713
- *
1714
- * - **CPU-limited containers**: If running in a container with strict CPU limits
1715
- * (e.g., Docker with --cpus=2), the default may over-subscribe. Consider setting
1716
- * maxWorkerScale to match the container's CPU limit.
1717
- *
1718
- * - **High-concurrency workloads**: For dedicated servers handling many concurrent
1719
- * fetch requests, increasing maxWorkerScale to (availableParallelism() + 2) may
1720
- * improve throughput by overlapping I/O wait with computation.
1721
- *
1722
- * - **Memory-constrained environments**: Each worker allocates ~50-100MB for DOM
1723
- * parsing. If memory is limited, reduce maxWorkerScale to (availableParallelism() / 2)
1724
- * or lower to prevent OOM errors.
1725
- *
1726
- * - **Shared hosting**: On shared systems where CPU is contested, reducing the pool
1727
- * size prevents starving other processes. Consider maxWorkerScale = 2 or using
1728
- * process-based workers (TRANSFORM_WORKER_MODE=process) for better isolation.
1729
- *
1730
- * Configuration:
1731
- * - TRANSFORM_MAX_WORKER_SCALE env var (default: availableParallelism())
1732
- * - TRANSFORM_WORKER_MODE env var: 'threads' (default) or 'process'
1733
- *
1734
- * See config.ts for full worker configuration options.
1735
- */
1736
- const POOL_MIN_WORKERS = Math.max(2, Math.min(4, Math.floor(availableParallelism() / 2)));
1737
- const POOL_MAX_WORKERS = config.transform.maxWorkerScale;
1738
- const POOL_SCALE_THRESHOLD = 0.5;
1739
- const WORKER_NAME_PREFIX = 'fetch-url-mcp-transform';
1740
- const DEFAULT_TIMEOUT_MS = config.transform.timeoutMs;
1741
- const TRANSFORM_CHILD_PATH = fileURLToPath(new URL('./workers/transform-child.js', import.meta.url));
1742
- function ensureTightBuffer(buffer) {
1743
- if (buffer.byteOffset === 0 &&
1744
- buffer.byteLength === buffer.buffer.byteLength) {
1745
- return buffer;
1746
- }
1747
- return Buffer.from(buffer);
1748
- }
1749
- function createThreadWorkerHost(_workerIndex, name) {
1750
- const resourceLimits = config.transform.workerResourceLimits;
1751
- const worker = new Worker(new URL('./workers/transform-worker.js', import.meta.url), {
1752
- name,
1753
- ...(resourceLimits ? { resourceLimits } : {}),
1754
- });
1755
- return {
1756
- kind: 'thread',
1757
- supportsTransferList: true,
1758
- threadId: worker.threadId,
1759
- postMessage: (message, transferList) => {
1760
- worker.postMessage(message, transferList);
1761
- },
1762
- terminate: async () => {
1763
- await worker.terminate();
1764
- },
1765
- unref: () => {
1766
- worker.unref();
1767
- },
1768
- onMessage: (handler) => {
1769
- worker.on('message', handler);
1770
- },
1771
- onError: (handler) => {
1772
- worker.on('error', handler);
1773
- worker.on('messageerror', handler);
1774
- },
1775
- onExit: (handler) => {
1776
- worker.on('exit', (code) => {
1777
- handler(code, null);
1778
- });
1779
- },
1780
- };
1781
- }
1782
- function createProcessWorkerHost(workerIndex, name) {
1783
- const child = fork(TRANSFORM_CHILD_PATH, [], {
1784
- stdio: ['ignore', 'ignore', 'ignore', 'ipc'],
1785
- serialization: 'advanced',
1786
- env: {
1787
- ...process.env,
1788
- FETCH_URL_MCP_WORKER_INDEX: String(workerIndex),
1789
- FETCH_URL_MCP_WORKER_NAME: name,
1790
- },
1791
- });
1792
- if (child.pid === undefined) {
1793
- throw new Error('Failed to fork process');
1794
- }
1795
- return {
1796
- kind: 'process',
1797
- supportsTransferList: false,
1798
- pid: child.pid,
1799
- postMessage: (message) => {
1800
- if (!child.connected) {
1801
- throw new Error('Transform worker IPC channel is closed');
1802
- }
1803
- child.send(message);
1804
- },
1805
- terminate: () => new Promise((resolve) => {
1806
- if (child.exitCode !== null || child.killed) {
1807
- resolve();
1808
- return;
1809
- }
1810
- child.once('exit', () => {
1811
- resolve();
1812
- });
1813
- try {
1814
- child.kill();
1815
- }
1816
- catch {
1817
- resolve();
1818
- }
1819
- }),
1820
- unref: () => {
1821
- child.unref();
1822
- },
1823
- onMessage: (handler) => {
1824
- child.on('message', handler);
1825
- },
1826
- onError: (handler) => {
1827
- child.on('error', handler);
1828
- },
1829
- onExit: (handler) => {
1830
- child.on('exit', (code, signal) => {
1831
- handler(code, signal);
1832
- });
1833
- },
1834
- };
1835
- }
1836
- class WorkerPool {
1837
- static CLOSED_MESSAGE = 'Transform worker pool closed';
1838
- workers = [];
1839
- capacity;
1840
- minCapacity = POOL_MIN_WORKERS;
1841
- maxCapacity = POOL_MAX_WORKERS;
1842
- queue = [];
1843
- queueHead = 0;
1844
- inflight = new Map();
1845
- cancelAcks = new Map();
1846
- timeoutMs;
1847
- queueMax;
1848
- spawnWorkerImpl;
1849
- closed = false;
1850
- taskIdSeq = 0;
1851
- constructor(size, timeoutMs, spawnWorker) {
1852
- if (size === 0) {
1853
- this.capacity = 0;
1854
- }
1855
- else {
1856
- this.capacity = Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
1857
- }
1858
- this.timeoutMs = timeoutMs;
1859
- this.queueMax = this.maxCapacity * 32;
1860
- this.spawnWorkerImpl = spawnWorker;
1861
- }
1862
- async transform(htmlOrBuffer, url, options) {
1863
- this.ensureOpen();
1864
- if (options.signal?.aborted)
1865
- throw abortPolicy.createAbortError(url, 'transform:enqueue');
1866
- if (this.getQueueDepth() >= this.queueMax) {
1867
- throw new FetchError('Transform worker queue is full', url, 503, {
1868
- reason: 'queue_full',
1869
- stage: 'transform:enqueue',
1870
- });
1871
- }
1872
- return new Promise((resolve, reject) => {
1873
- const task = this.createPendingTask(htmlOrBuffer, url, options, resolve, reject);
1874
- this.queue.push(task);
1875
- this.drainQueue();
1876
- });
1877
- }
1878
- getQueueDepth() {
1879
- const depth = this.queue.length - this.queueHead;
1880
- return depth > 0 ? depth : 0;
1881
- }
1882
- getActiveWorkers() {
1883
- return this.workers.filter((s) => s?.busy).length;
1884
- }
1885
- getCapacity() {
1886
- return this.capacity;
1887
- }
1888
- resize(size) {
1889
- const newCapacity = Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
1890
- if (newCapacity === this.capacity)
1891
- return;
1892
- this.capacity = newCapacity;
1893
- this.drainQueue();
1894
- }
1895
- async close() {
1896
- if (this.closed)
1897
- return;
1898
- this.closed = true;
1899
- const terminations = this.workers
1900
- .map((slot) => slot?.host.terminate())
1901
- .filter((p) => p !== undefined);
1902
- this.workers.fill(undefined);
1903
- this.workers.length = 0;
1904
- for (const id of Array.from(this.inflight.keys())) {
1905
- const inflight = this.takeInflight(id);
1906
- if (!inflight)
1907
- continue;
1908
- this.finalizeTask(inflight.context, () => {
1909
- inflight.reject(new Error(WorkerPool.CLOSED_MESSAGE));
1910
- });
1911
- }
1912
- for (let i = this.queueHead; i < this.queue.length; i += 1) {
1913
- const task = this.queue[i];
1914
- if (!task)
1915
- continue;
1916
- this.clearAbortListener(task.signal, task.abortListener);
1917
- this.finalizeTask(task.context, () => {
1918
- task.reject(new Error(WorkerPool.CLOSED_MESSAGE));
1919
- });
1920
- }
1921
- this.queue.length = 0;
1922
- this.queueHead = 0;
1923
- await Promise.allSettled(terminations);
1924
- }
1925
- ensureOpen() {
1926
- if (this.closed)
1927
- throw new Error(WorkerPool.CLOSED_MESSAGE);
1928
- }
1929
- createPendingTask(htmlOrBuffer, url, options, resolve, reject) {
1930
- const id = (this.taskIdSeq++).toString(36);
1931
- // Preserve request context for resolve/reject even when callbacks fire
1932
- // from worker thread events.
1933
- const context = createTaskContext();
1934
- let abortListener;
1935
- if (options.signal) {
1936
- abortListener = () => {
1937
- this.onAbortSignal(id, url, context, reject);
1938
- };
1939
- options.signal.addEventListener('abort', abortListener, { once: true });
1940
- }
1941
- const task = {
1942
- id,
1943
- url,
1944
- includeMetadata: options.includeMetadata,
1945
- ...(options.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
1946
- ...(options.inputTruncated ? { inputTruncated: true } : {}),
1947
- signal: options.signal,
1948
- abortListener,
1949
- context,
1950
- resolve,
1951
- reject,
1952
- };
1953
- if (typeof htmlOrBuffer === 'string') {
1954
- task.html = htmlOrBuffer;
1955
- }
1956
- else {
1957
- task.htmlBuffer = htmlOrBuffer;
1958
- if (options.encoding) {
1959
- task.encoding = options.encoding;
1960
- }
1961
- }
1962
- return task;
1963
- }
1964
- onAbortSignal(id, url, context, reject) {
1965
- if (this.closed) {
1966
- this.finalizeTask(context, () => {
1967
- reject(new Error(WorkerPool.CLOSED_MESSAGE));
1968
- });
1969
- return;
1970
- }
1971
- const inflight = this.inflight.get(id);
1972
- if (inflight) {
1973
- void this.abortInflight(id, url, inflight.workerIndex);
1974
- return;
1975
- }
1976
- const queuedIndex = this.findQueuedIndex(id);
1977
- if (queuedIndex !== null) {
1978
- const task = this.queue[queuedIndex];
1979
- if (task)
1980
- this.clearAbortListener(task.signal, task.abortListener);
1981
- this.queue.splice(queuedIndex, 1);
1982
- if (task) {
1983
- this.finalizeTask(task.context, () => {
1984
- task.reject(abortPolicy.createAbortError(url, 'transform:queued-abort'));
1985
- });
1986
- }
1987
- else {
1988
- this.finalizeTask(context, () => {
1989
- reject(abortPolicy.createAbortError(url, 'transform:queued-abort'));
1990
- });
1991
- }
1992
- this.maybeCompactQueue();
1993
- }
1994
- }
1995
- resolveCancelAck(id) {
1996
- const pending = this.cancelAcks.get(id);
1997
- if (!pending)
1998
- return;
1999
- pending.timeout.cancel();
2000
- pending.resolve();
2001
- }
2002
- waitForCancelAck(id) {
2003
- const existing = this.cancelAcks.get(id);
2004
- if (existing) {
2005
- return existing.promise;
2006
- }
2007
- let resolve = () => { };
2008
- const timeout = createUnrefTimeout(200, undefined);
2009
- const racePromise = new Promise((finish) => {
2010
- resolve = finish;
2011
- });
2012
- const promise = Promise.race([racePromise, timeout.promise]).finally(() => {
2013
- this.cancelAcks.delete(id);
2014
- timeout.cancel();
2015
- });
2016
- this.cancelAcks.set(id, { promise, resolve, timeout });
2017
- return promise;
2018
- }
2019
- async abortInflight(id, url, workerIndex) {
2020
- const slot = this.workers[workerIndex];
2021
- const inflight = this.inflight.get(id);
2022
- if (inflight) {
2023
- inflight.cancelPending = true;
2024
- }
2025
- if (slot) {
2026
- try {
2027
- slot.host.postMessage({ type: 'cancel', id });
2028
- }
2029
- catch {
2030
- // Worker may be unavailable; failure is acceptable during abort
2031
- }
2032
- }
2033
- await this.waitForCancelAck(id);
2034
- this.failTask(id, abortPolicy.createAbortError(url, 'transform:signal-abort'));
2035
- if (slot)
2036
- this.restartWorker(workerIndex, slot);
2037
- }
2038
- clearAbortListener(signal, listener) {
2039
- if (!signal || !listener)
2040
- return;
2041
- try {
2042
- signal.removeEventListener('abort', listener);
2043
- }
2044
- catch {
2045
- // Defensive: removeEventListener should not throw, but handle edge cases
2046
- }
2047
- }
2048
- spawnWorker(workerIndex) {
2049
- const name = `${WORKER_NAME_PREFIX}-${workerIndex + 1}`;
2050
- const host = this.spawnWorkerImpl(workerIndex, name);
2051
- host.unref();
2052
- host.onMessage((raw) => {
2053
- this.onWorkerMessage(workerIndex, raw);
2054
- });
2055
- host.onError((error) => {
2056
- this.onWorkerBroken(workerIndex, `Transform worker error: ${getErrorMessage(error)}`);
2057
- });
2058
- host.onExit((code, signal) => {
2059
- const suffix = signal ? `signal ${signal}` : `code ${code ?? 'unknown'}`;
2060
- this.onWorkerBroken(workerIndex, `Transform worker exited (${suffix})`);
2061
- });
2062
- return { host, busy: false, currentTaskId: null, name };
2063
- }
2064
- onWorkerBroken(workerIndex, message) {
2065
- if (this.closed)
2066
- return;
2067
- const slot = this.workers[workerIndex];
2068
- if (!slot)
2069
- return;
2070
- logWarn('Transform worker unavailable; restarting', {
2071
- reason: message,
2072
- workerIndex,
2073
- workerKind: slot.host.kind,
2074
- workerName: slot.name,
2075
- ...(slot.host.kind === 'process'
2076
- ? { pid: slot.host.pid }
2077
- : { threadId: slot.host.threadId }),
2078
- });
2079
- if (slot.busy && slot.currentTaskId) {
2080
- this.failTask(slot.currentTaskId, new Error(message));
2081
- }
2082
- this.restartWorker(workerIndex, slot);
2083
- }
2084
- restartWorker(workerIndex, slot) {
2085
- if (this.closed)
2086
- return;
2087
- const target = slot ?? this.workers[workerIndex];
2088
- if (target) {
2089
- target.host.terminate().catch(() => undefined);
2090
- }
2091
- this.workers[workerIndex] = this.spawnWorker(workerIndex);
2092
- this.drainQueue();
2093
- }
2094
- onWorkerMessage(workerIndex, raw) {
2095
- if (!isWorkerResponse(raw))
2096
- return;
2097
- const message = raw;
2098
- if (message.type === 'cancelled') {
2099
- this.resolveCancelAck(message.id);
2100
- return;
2101
- }
2102
- const inflightPeek = this.inflight.get(message.id);
2103
- if (inflightPeek?.cancelPending) {
2104
- this.resolveCancelAck(message.id);
2105
- return;
2106
- }
2107
- const inflight = this.takeInflight(message.id);
2108
- if (!inflight)
2109
- return;
2110
- this.markIdle(workerIndex);
2111
- if (message.type === 'result') {
2112
- this.finalizeTask(inflight.context, () => {
2113
- inflight.resolve({
2114
- markdown: message.result.markdown,
2115
- truncated: message.result.truncated,
2116
- title: message.result.title,
2117
- ...(message.result.metadata
2118
- ? { metadata: message.result.metadata }
2119
- : {}),
2120
- });
2121
- });
2122
- }
2123
- else {
2124
- const err = message.error;
2125
- if (err.name === 'FetchError') {
2126
- this.finalizeTask(inflight.context, () => {
2127
- inflight.reject(new FetchError(err.message, err.url, err.statusCode, err.details ?? {}));
2128
- });
2129
- }
2130
- else {
2131
- this.finalizeTask(inflight.context, () => {
2132
- inflight.reject(new Error(err.message));
2133
- });
2134
- }
2135
- }
2136
- this.drainQueue();
2137
- }
2138
- takeInflight(id) {
2139
- const inflight = this.inflight.get(id);
2140
- if (!inflight)
2141
- return null;
2142
- inflight.timeout.cancel();
2143
- this.clearAbortListener(inflight.signal, inflight.abortListener);
2144
- this.inflight.delete(id);
2145
- return inflight;
2146
- }
2147
- markIdle(workerIndex) {
2148
- const slot = this.workers[workerIndex];
2149
- if (!slot)
2150
- return;
2151
- slot.busy = false;
2152
- slot.currentTaskId = null;
2153
- }
2154
- failTask(id, error) {
2155
- const inflight = this.takeInflight(id);
2156
- if (!inflight)
2157
- return;
2158
- this.finalizeTask(inflight.context, () => {
2159
- inflight.reject(error);
2160
- });
2161
- this.markIdle(inflight.workerIndex);
2162
- }
2163
- maybeScaleUp() {
2164
- if (this.getQueueDepth() > this.capacity * POOL_SCALE_THRESHOLD &&
2165
- this.capacity < this.maxCapacity) {
2166
- this.capacity += 1;
2167
- }
2168
- }
2169
- drainQueue() {
2170
- if (this.closed || this.getQueueDepth() === 0)
2171
- return;
2172
- this.maybeScaleUp();
2173
- for (let i = 0; i < this.workers.length; i += 1) {
2174
- const slot = this.workers[i];
2175
- if (slot && !slot.busy) {
2176
- this.dispatchFromQueue(i, slot);
2177
- if (this.getQueueDepth() === 0)
2178
- return;
2179
- }
2180
- }
2181
- if (this.workers.length < this.capacity && this.getQueueDepth() > 0) {
2182
- const workerIndex = this.workers.length;
2183
- const slot = this.spawnWorker(workerIndex);
2184
- this.workers.push(slot);
2185
- this.dispatchFromQueue(workerIndex, slot);
2186
- if (this.workers.length < this.capacity && this.getQueueDepth() > 0) {
2187
- setImmediate(() => {
2188
- this.drainQueue();
2189
- });
2190
- }
2191
- }
2192
- }
2193
- takeNextQueuedTask() {
2194
- while (this.queueHead < this.queue.length) {
2195
- const task = this.queue[this.queueHead];
2196
- this.queueHead += 1;
2197
- if (task) {
2198
- this.maybeCompactQueue();
2199
- return task;
2200
- }
2201
- }
2202
- this.maybeCompactQueue();
2203
- return null;
2204
- }
2205
- dispatchFromQueue(workerIndex, slot) {
2206
- const task = this.takeNextQueuedTask();
2207
- if (!task)
2208
- return;
2209
- if (this.closed) {
2210
- this.clearAbortListener(task.signal, task.abortListener);
2211
- this.finalizeTask(task.context, () => {
2212
- task.reject(new Error(WorkerPool.CLOSED_MESSAGE));
2213
- });
2214
- return;
2215
- }
2216
- if (task.signal?.aborted) {
2217
- this.clearAbortListener(task.signal, task.abortListener);
2218
- this.finalizeTask(task.context, () => {
2219
- task.reject(abortPolicy.createAbortError(task.url, 'transform:dispatch'));
2220
- });
2221
- return;
2222
- }
2223
- slot.busy = true;
2224
- slot.currentTaskId = task.id;
2225
- const timeout = createUnrefTimeout(this.timeoutMs, null);
2226
- void timeout.promise
2227
- .then(() => {
2228
- try {
2229
- slot.host.postMessage({ type: 'cancel', id: task.id });
2230
- }
2231
- catch {
2232
- // Worker may be unavailable; proceed with timeout handling
2233
- }
2234
- const inflight = this.takeInflight(task.id);
2235
- if (!inflight)
2236
- return;
2237
- this.finalizeTask(inflight.context, () => {
2238
- inflight.reject(new FetchError('Request timeout', task.url, 504, {
2239
- reason: 'timeout',
2240
- stage: 'transform:worker-timeout',
2241
- }));
2242
- });
2243
- this.restartWorker(workerIndex, slot);
2244
- })
2245
- .catch((error) => {
2246
- this.failTask(task.id, error);
2247
- });
2248
- this.inflight.set(task.id, {
2249
- resolve: task.resolve,
2250
- reject: task.reject,
2251
- timeout,
2252
- signal: task.signal,
2253
- abortListener: task.abortListener,
2254
- workerIndex,
2255
- context: task.context,
2256
- cancelPending: false,
2257
- });
2258
- try {
2259
- const { message, transferList } = buildWorkerDispatchPayload(task, slot.host.supportsTransferList);
2260
- slot.host.postMessage(message, transferList);
2261
- }
2262
- catch (error) {
2263
- timeout.cancel();
2264
- this.clearAbortListener(task.signal, task.abortListener);
2265
- this.inflight.delete(task.id);
2266
- this.markIdle(workerIndex);
2267
- this.finalizeTask(task.context, () => {
2268
- task.reject(error instanceof Error
2269
- ? error
2270
- : new Error('Failed to dispatch transform worker message'));
2271
- });
2272
- this.restartWorker(workerIndex, slot);
2273
- }
2274
- }
2275
- finalizeTask(context, fn) {
2276
- try {
2277
- context.run(fn);
2278
- }
2279
- finally {
2280
- context.dispose();
2281
- }
2282
- }
2283
- findQueuedIndex(id) {
2284
- for (let i = this.queueHead; i < this.queue.length; i += 1) {
2285
- const task = this.queue[i];
2286
- if (task?.id === id)
2287
- return i;
2288
- }
2289
- return null;
2290
- }
2291
- maybeCompactQueue() {
2292
- if (this.queueHead === 0)
2293
- return;
2294
- if (this.queueHead >= this.queue.length ||
2295
- (this.queueHead > 1024 && this.queueHead > this.queue.length / 2)) {
2296
- this.queue.splice(0, this.queueHead);
2297
- this.queueHead = 0;
2298
- }
2299
- }
2300
- }
2301
- let workerPool = null;
2302
- function resolveWorkerSpawner() {
2303
- return config.transform.workerMode === 'process'
2304
- ? createProcessWorkerHost
2305
- : createThreadWorkerHost;
2306
- }
2307
- function getOrCreateWorkerPool() {
2308
- const size = config.transform.maxWorkerScale === 0 ? 0 : POOL_MIN_WORKERS;
2309
- workerPool ??= new WorkerPool(size, DEFAULT_TIMEOUT_MS, resolveWorkerSpawner());
2310
- return workerPool;
2311
- }
2312
- function getWorkerPoolStats() {
2313
- if (!workerPool)
2314
- return null;
2315
- return {
2316
- queueDepth: workerPool.getQueueDepth(),
2317
- activeWorkers: workerPool.getActiveWorkers(),
2318
- capacity: workerPool.getCapacity(),
2319
- };
2320
- }
2321
- async function shutdownWorkerPool() {
2322
- if (!workerPool)
2323
- return;
2324
- await workerPool.close();
2325
- workerPool = null;
2326
- }
2327
1630
  export function getTransformPoolStats() {
2328
1631
  return getWorkerPoolStats();
2329
1632
  }
@@ -2410,3 +1713,4 @@ export async function transformHtmlToMarkdown(html, url, options) {
2410
1713
  export async function transformBufferToMarkdown(htmlBuffer, url, options) {
2411
1714
  return transformInputToMarkdown(htmlBuffer, url, options);
2412
1715
  }
1716
+ //# sourceMappingURL=transform.js.map