@j0hanz/fetch-url-mcp 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +24 -21
  2. package/dist/cli.d.ts +1 -0
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +15 -7
  5. package/dist/cli.js.map +1 -1
  6. package/dist/http/auth.d.ts +1 -3
  7. package/dist/http/auth.d.ts.map +1 -1
  8. package/dist/http/auth.js +18 -12
  9. package/dist/http/auth.js.map +1 -1
  10. package/dist/http/health.d.ts +1 -1
  11. package/dist/http/health.d.ts.map +1 -1
  12. package/dist/http/health.js +2 -2
  13. package/dist/http/health.js.map +1 -1
  14. package/dist/http/helpers.d.ts +1 -8
  15. package/dist/http/helpers.d.ts.map +1 -1
  16. package/dist/http/helpers.js +12 -10
  17. package/dist/http/helpers.js.map +1 -1
  18. package/dist/http/native.d.ts.map +1 -1
  19. package/dist/http/native.js +41 -27
  20. package/dist/http/native.js.map +1 -1
  21. package/dist/http/rate-limit.d.ts +2 -1
  22. package/dist/http/rate-limit.d.ts.map +1 -1
  23. package/dist/http/rate-limit.js +11 -15
  24. package/dist/http/rate-limit.js.map +1 -1
  25. package/dist/index.js +4 -6
  26. package/dist/index.js.map +1 -1
  27. package/dist/lib/cache.d.ts.map +1 -0
  28. package/dist/{cache.js → lib/cache.js} +5 -2
  29. package/dist/lib/cache.js.map +1 -0
  30. package/dist/{config.d.ts → lib/config.d.ts} +2 -0
  31. package/dist/lib/config.d.ts.map +1 -0
  32. package/dist/{config.js → lib/config.js} +3 -0
  33. package/dist/lib/config.js.map +1 -0
  34. package/dist/lib/crypto.d.ts.map +1 -0
  35. package/dist/lib/crypto.js.map +1 -0
  36. package/dist/lib/dom-noise-removal.d.ts.map +1 -0
  37. package/dist/lib/dom-noise-removal.js.map +1 -0
  38. package/dist/lib/download.d.ts.map +1 -0
  39. package/dist/lib/download.js.map +1 -0
  40. package/dist/{errors.d.ts → lib/errors.d.ts} +3 -0
  41. package/dist/lib/errors.d.ts.map +1 -0
  42. package/dist/{errors.js → lib/errors.js} +7 -0
  43. package/dist/lib/errors.js.map +1 -0
  44. package/dist/lib/fetch-content.d.ts.map +1 -0
  45. package/dist/lib/fetch-content.js.map +1 -0
  46. package/dist/lib/fetch-stream.d.ts.map +1 -0
  47. package/dist/lib/fetch-stream.js.map +1 -0
  48. package/dist/lib/fetch.d.ts.map +1 -0
  49. package/dist/{fetch.js → lib/fetch.js} +97 -142
  50. package/dist/lib/fetch.js.map +1 -0
  51. package/dist/lib/host-normalization.d.ts.map +1 -0
  52. package/dist/lib/host-normalization.js.map +1 -0
  53. package/dist/lib/ip-blocklist.d.ts.map +1 -0
  54. package/dist/lib/ip-blocklist.js.map +1 -0
  55. package/dist/lib/json.d.ts.map +1 -0
  56. package/dist/lib/json.js.map +1 -0
  57. package/dist/lib/language-detection.d.ts.map +1 -0
  58. package/dist/lib/language-detection.js.map +1 -0
  59. package/dist/{markdown-cleanup.d.ts → lib/markdown-cleanup.d.ts} +1 -1
  60. package/dist/lib/markdown-cleanup.d.ts.map +1 -0
  61. package/dist/{markdown-cleanup.js → lib/markdown-cleanup.js} +1 -3
  62. package/dist/lib/markdown-cleanup.js.map +1 -0
  63. package/dist/lib/mcp-lifecycle.d.ts +5 -0
  64. package/dist/lib/mcp-lifecycle.d.ts.map +1 -0
  65. package/dist/lib/mcp-lifecycle.js +51 -0
  66. package/dist/lib/mcp-lifecycle.js.map +1 -0
  67. package/dist/lib/mcp-validator.d.ts.map +1 -0
  68. package/dist/{mcp-validator.js → lib/mcp-validator.js} +1 -1
  69. package/dist/lib/mcp-validator.js.map +1 -0
  70. package/dist/{mcp.d.ts → lib/mcp.d.ts} +1 -1
  71. package/dist/lib/mcp.d.ts.map +1 -0
  72. package/dist/{mcp.js → lib/mcp.js} +39 -19
  73. package/dist/lib/mcp.js.map +1 -0
  74. package/dist/lib/observability.d.ts.map +1 -0
  75. package/dist/lib/observability.js.map +1 -0
  76. package/dist/lib/server-tuning.d.ts.map +1 -0
  77. package/dist/lib/server-tuning.js.map +1 -0
  78. package/dist/{session.d.ts → lib/session.d.ts} +4 -1
  79. package/dist/lib/session.d.ts.map +1 -0
  80. package/dist/{session.js → lib/session.js} +43 -35
  81. package/dist/lib/session.js.map +1 -0
  82. package/dist/lib/timer-utils.d.ts +13 -0
  83. package/dist/lib/timer-utils.d.ts.map +1 -0
  84. package/dist/lib/timer-utils.js +44 -0
  85. package/dist/lib/timer-utils.js.map +1 -0
  86. package/dist/lib/tool-errors.d.ts.map +1 -0
  87. package/dist/{tool-errors.js → lib/tool-errors.js} +2 -2
  88. package/dist/lib/tool-errors.js.map +1 -0
  89. package/dist/{tool-pipeline.d.ts → lib/tool-pipeline.d.ts} +1 -2
  90. package/dist/lib/tool-pipeline.d.ts.map +1 -0
  91. package/dist/{tool-pipeline.js → lib/tool-pipeline.js} +8 -25
  92. package/dist/lib/tool-pipeline.js.map +1 -0
  93. package/dist/{tool-progress.d.ts → lib/tool-progress.d.ts} +0 -1
  94. package/dist/lib/tool-progress.d.ts.map +1 -0
  95. package/dist/{tool-progress.js → lib/tool-progress.js} +1 -1
  96. package/dist/lib/tool-progress.js.map +1 -0
  97. package/dist/lib/type-guards.d.ts.map +1 -0
  98. package/dist/lib/type-guards.js.map +1 -0
  99. package/dist/{prompts.d.ts → prompts/index.d.ts} +1 -1
  100. package/dist/prompts/index.d.ts.map +1 -0
  101. package/dist/{prompts.js → prompts/index.js} +2 -2
  102. package/dist/prompts/index.js.map +1 -0
  103. package/dist/{resources.d.ts → resources/index.d.ts} +1 -1
  104. package/dist/resources/index.d.ts.map +1 -0
  105. package/dist/{resources.js → resources/index.js} +19 -37
  106. package/dist/resources/index.js.map +1 -0
  107. package/dist/resources/instructions.d.ts.map +1 -0
  108. package/dist/{instructions.js → resources/instructions.js} +4 -2
  109. package/dist/resources/instructions.js.map +1 -0
  110. package/dist/schemas/inputs.d.ts +8 -0
  111. package/dist/schemas/inputs.d.ts.map +1 -0
  112. package/dist/schemas/inputs.js +25 -0
  113. package/dist/schemas/inputs.js.map +1 -0
  114. package/dist/schemas/outputs.d.ts +24 -0
  115. package/dist/schemas/outputs.d.ts.map +1 -0
  116. package/dist/schemas/outputs.js +78 -0
  117. package/dist/schemas/outputs.js.map +1 -0
  118. package/dist/server.d.ts.map +1 -1
  119. package/dist/server.js +25 -14
  120. package/dist/server.js.map +1 -1
  121. package/dist/tasks/execution.d.ts.map +1 -1
  122. package/dist/tasks/execution.js +88 -55
  123. package/dist/tasks/execution.js.map +1 -1
  124. package/dist/tasks/manager.d.ts +4 -0
  125. package/dist/tasks/manager.d.ts.map +1 -1
  126. package/dist/tasks/manager.js +51 -46
  127. package/dist/tasks/manager.js.map +1 -1
  128. package/dist/tasks/owner.d.ts +3 -2
  129. package/dist/tasks/owner.d.ts.map +1 -1
  130. package/dist/tasks/owner.js +2 -1
  131. package/dist/tasks/owner.js.map +1 -1
  132. package/dist/tasks/tool-registry.d.ts +12 -0
  133. package/dist/tasks/tool-registry.d.ts.map +1 -0
  134. package/dist/tasks/tool-registry.js +14 -0
  135. package/dist/tasks/tool-registry.js.map +1 -0
  136. package/dist/tools/fetch-url.d.ts +29 -0
  137. package/dist/tools/fetch-url.d.ts.map +1 -0
  138. package/dist/{tools.js → tools/fetch-url.js} +72 -132
  139. package/dist/tools/fetch-url.js.map +1 -0
  140. package/dist/tools/index.d.ts +3 -0
  141. package/dist/tools/index.d.ts.map +1 -0
  142. package/dist/tools/index.js +5 -0
  143. package/dist/tools/index.js.map +1 -0
  144. package/dist/transform/transform.d.ts.map +1 -1
  145. package/dist/transform/transform.js +156 -174
  146. package/dist/transform/transform.js.map +1 -1
  147. package/dist/transform/types.d.ts +0 -1
  148. package/dist/transform/types.d.ts.map +1 -1
  149. package/dist/transform/worker-pool.d.ts.map +1 -1
  150. package/dist/transform/worker-pool.js +5 -5
  151. package/dist/transform/worker-pool.js.map +1 -1
  152. package/dist/transform/workers/shared.d.ts +8 -0
  153. package/dist/transform/workers/shared.d.ts.map +1 -0
  154. package/dist/transform/workers/shared.js +131 -0
  155. package/dist/transform/workers/shared.js.map +1 -0
  156. package/dist/transform/workers/transform-child.js +5 -134
  157. package/dist/transform/workers/transform-child.js.map +1 -1
  158. package/dist/transform/workers/transform-worker.js +7 -127
  159. package/dist/transform/workers/transform-worker.js.map +1 -1
  160. package/package.json +8 -7
  161. package/dist/cache.d.ts.map +0 -1
  162. package/dist/cache.js.map +0 -1
  163. package/dist/config.d.ts.map +0 -1
  164. package/dist/config.js.map +0 -1
  165. package/dist/crypto.d.ts.map +0 -1
  166. package/dist/crypto.js.map +0 -1
  167. package/dist/dom-noise-removal.d.ts.map +0 -1
  168. package/dist/dom-noise-removal.js.map +0 -1
  169. package/dist/download.d.ts.map +0 -1
  170. package/dist/download.js.map +0 -1
  171. package/dist/errors.d.ts.map +0 -1
  172. package/dist/errors.js.map +0 -1
  173. package/dist/examples/mcp-fetch-url-client.js +0 -329
  174. package/dist/examples/mcp-fetch-url-client.js.map +0 -1
  175. package/dist/fetch-content.d.ts.map +0 -1
  176. package/dist/fetch-content.js.map +0 -1
  177. package/dist/fetch-stream.d.ts.map +0 -1
  178. package/dist/fetch-stream.js.map +0 -1
  179. package/dist/fetch.d.ts.map +0 -1
  180. package/dist/fetch.js.map +0 -1
  181. package/dist/host-normalization.d.ts.map +0 -1
  182. package/dist/host-normalization.js.map +0 -1
  183. package/dist/instructions.d.ts.map +0 -1
  184. package/dist/instructions.js.map +0 -1
  185. package/dist/ip-blocklist.d.ts.map +0 -1
  186. package/dist/ip-blocklist.js.map +0 -1
  187. package/dist/json.d.ts.map +0 -1
  188. package/dist/json.js.map +0 -1
  189. package/dist/language-detection.d.ts.map +0 -1
  190. package/dist/language-detection.js.map +0 -1
  191. package/dist/markdown-cleanup.d.ts.map +0 -1
  192. package/dist/markdown-cleanup.js.map +0 -1
  193. package/dist/mcp-validator.d.ts.map +0 -1
  194. package/dist/mcp-validator.js.map +0 -1
  195. package/dist/mcp.d.ts.map +0 -1
  196. package/dist/mcp.js.map +0 -1
  197. package/dist/observability.d.ts.map +0 -1
  198. package/dist/observability.js.map +0 -1
  199. package/dist/prompts.d.ts.map +0 -1
  200. package/dist/prompts.js.map +0 -1
  201. package/dist/resources.d.ts.map +0 -1
  202. package/dist/resources.js.map +0 -1
  203. package/dist/server-tuning.d.ts.map +0 -1
  204. package/dist/server-tuning.js.map +0 -1
  205. package/dist/session.d.ts.map +0 -1
  206. package/dist/session.js.map +0 -1
  207. package/dist/timer-utils.d.ts +0 -6
  208. package/dist/timer-utils.d.ts.map +0 -1
  209. package/dist/timer-utils.js +0 -27
  210. package/dist/timer-utils.js.map +0 -1
  211. package/dist/tool-errors.d.ts.map +0 -1
  212. package/dist/tool-errors.js.map +0 -1
  213. package/dist/tool-pipeline.d.ts.map +0 -1
  214. package/dist/tool-pipeline.js.map +0 -1
  215. package/dist/tool-progress.d.ts.map +0 -1
  216. package/dist/tool-progress.js.map +0 -1
  217. package/dist/tools.d.ts +0 -54
  218. package/dist/tools.d.ts.map +0 -1
  219. package/dist/tools.js.map +0 -1
  220. package/dist/type-guards.d.ts.map +0 -1
  221. package/dist/type-guards.js.map +0 -1
  222. /package/dist/{cache.d.ts → lib/cache.d.ts} +0 -0
  223. /package/dist/{crypto.d.ts → lib/crypto.d.ts} +0 -0
  224. /package/dist/{crypto.js → lib/crypto.js} +0 -0
  225. /package/dist/{dom-noise-removal.d.ts → lib/dom-noise-removal.d.ts} +0 -0
  226. /package/dist/{dom-noise-removal.js → lib/dom-noise-removal.js} +0 -0
  227. /package/dist/{download.d.ts → lib/download.d.ts} +0 -0
  228. /package/dist/{download.js → lib/download.js} +0 -0
  229. /package/dist/{fetch-content.d.ts → lib/fetch-content.d.ts} +0 -0
  230. /package/dist/{fetch-content.js → lib/fetch-content.js} +0 -0
  231. /package/dist/{fetch-stream.d.ts → lib/fetch-stream.d.ts} +0 -0
  232. /package/dist/{fetch-stream.js → lib/fetch-stream.js} +0 -0
  233. /package/dist/{fetch.d.ts → lib/fetch.d.ts} +0 -0
  234. /package/dist/{host-normalization.d.ts → lib/host-normalization.d.ts} +0 -0
  235. /package/dist/{host-normalization.js → lib/host-normalization.js} +0 -0
  236. /package/dist/{ip-blocklist.d.ts → lib/ip-blocklist.d.ts} +0 -0
  237. /package/dist/{ip-blocklist.js → lib/ip-blocklist.js} +0 -0
  238. /package/dist/{json.d.ts → lib/json.d.ts} +0 -0
  239. /package/dist/{json.js → lib/json.js} +0 -0
  240. /package/dist/{language-detection.d.ts → lib/language-detection.d.ts} +0 -0
  241. /package/dist/{language-detection.js → lib/language-detection.js} +0 -0
  242. /package/dist/{mcp-validator.d.ts → lib/mcp-validator.d.ts} +0 -0
  243. /package/dist/{observability.d.ts → lib/observability.d.ts} +0 -0
  244. /package/dist/{observability.js → lib/observability.js} +0 -0
  245. /package/dist/{server-tuning.d.ts → lib/server-tuning.d.ts} +0 -0
  246. /package/dist/{server-tuning.js → lib/server-tuning.js} +0 -0
  247. /package/dist/{tool-errors.d.ts → lib/tool-errors.d.ts} +0 -0
  248. /package/dist/{type-guards.d.ts → lib/type-guards.d.ts} +0 -0
  249. /package/dist/{type-guards.js → lib/type-guards.js} +0 -0
  250. /package/dist/{instructions.d.ts → resources/instructions.d.ts} +0 -0
@@ -4,14 +4,14 @@ import { performance } from 'node:perf_hooks';
4
4
  import { isProbablyReaderable, Readability } from '@mozilla/readability';
5
5
  import { parseHTML } from 'linkedom';
6
6
  import { NodeHtmlMarkdown, } from 'node-html-markdown';
7
- import { config } from '../config.js';
8
- import { removeNoiseFromHtml } from '../dom-noise-removal.js';
9
- import { FetchError, getErrorMessage } from '../errors.js';
10
- import { isRawTextContentUrl } from '../fetch.js';
11
- import { detectLanguageFromCode, resolveLanguageFromAttributes, } from '../language-detection.js';
12
- import { addSourceToMarkdown, buildMetadataFooter, cleanupMarkdownArtifacts, extractTitleFromRawMarkdown, isRawTextContent, } from '../markdown-cleanup.js';
13
- import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../observability.js';
14
- import { isLikeNode, isObject } from '../type-guards.js';
7
+ import { config } from '../lib/config.js';
8
+ import { removeNoiseFromHtml } from '../lib/dom-noise-removal.js';
9
+ import { FetchError, getErrorMessage } from '../lib/errors.js';
10
+ import { isRawTextContentUrl } from '../lib/fetch.js';
11
+ import { detectLanguageFromCode, resolveLanguageFromAttributes, } from '../lib/language-detection.js';
12
+ import { addSourceToMarkdown, buildMetadataFooter, cleanupMarkdownArtifacts, extractTitleFromRawMarkdown, isRawTextContent, } from '../lib/markdown-cleanup.js';
13
+ import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../lib/observability.js';
14
+ import { isLikeNode, isObject } from '../lib/type-guards.js';
15
15
  import { getOrCreateWorkerPool, getWorkerPoolStats, shutdownWorkerPool, } from './worker-pool.js';
16
16
  const utf8Decoder = new TextDecoder('utf-8');
17
17
  function decodeInput(input, encoding) {
@@ -213,7 +213,11 @@ function trimUtf8Buffer(buffer, maxBytes) {
213
213
  function trimDanglingTagFragment(content) {
214
214
  const lastOpen = content.lastIndexOf('<');
215
215
  const lastClose = content.lastIndexOf('>');
216
- return lastOpen > lastClose ? content.substring(0, lastOpen) : content;
216
+ if (lastOpen > lastClose &&
217
+ /^<([a-zA-Z/!?]|$)/.test(content.substring(lastOpen))) {
218
+ return content.substring(0, lastOpen);
219
+ }
220
+ return content;
217
221
  }
218
222
  function truncateHtml(html, inputTruncated = false) {
219
223
  const maxSize = config.constants.maxHtmlSize;
@@ -270,24 +274,19 @@ function mergeMetadata(early, late) {
270
274
  if (!early)
271
275
  return late;
272
276
  const merged = {};
273
- const title = late.title ?? early.title;
274
- const description = late.description ?? early.description;
275
- const author = late.author ?? early.author;
276
- const image = late.image ?? early.image;
277
- const publishedAt = late.publishedAt ?? early.publishedAt;
278
- const modifiedAt = late.modifiedAt ?? early.modifiedAt;
279
- if (title !== undefined)
280
- merged.title = title;
281
- if (description !== undefined)
282
- merged.description = description;
283
- if (author !== undefined)
284
- merged.author = author;
285
- if (image !== undefined)
286
- merged.image = image;
287
- if (publishedAt !== undefined)
288
- merged.publishedAt = publishedAt;
289
- if (modifiedAt !== undefined)
290
- merged.modifiedAt = modifiedAt;
277
+ const keys = [
278
+ 'title',
279
+ 'description',
280
+ 'author',
281
+ 'image',
282
+ 'publishedAt',
283
+ 'modifiedAt',
284
+ ];
285
+ for (const key of keys) {
286
+ const value = late[key] ?? early[key];
287
+ if (value !== undefined)
288
+ merged[key] = value;
289
+ }
291
290
  return merged;
292
291
  }
293
292
  const META_PROPERTY_HANDLERS = new Map([
@@ -653,9 +652,6 @@ function isCodeBlock(parent) {
653
652
  const tagName = getTagName(parent);
654
653
  return tagName === 'PRE' || tagName === 'WRAPPED-PRE';
655
654
  }
656
- function isAnchor(node) {
657
- return getTagName(node) === 'A';
658
- }
659
655
  function resolveAttributeLanguage(node) {
660
656
  const getAttribute = hasGetAttribute(node)
661
657
  ? node.getAttribute.bind(node)
@@ -762,7 +758,7 @@ function resolveImageSrc(getAttribute) {
762
758
  function buildImageTranslator(ctx) {
763
759
  if (!isObject(ctx))
764
760
  return { content: '' };
765
- const { node, parent } = ctx;
761
+ const { node } = ctx;
766
762
  const getAttribute = hasGetAttribute(node)
767
763
  ? node.getAttribute.bind(node)
768
764
  : undefined;
@@ -770,10 +766,7 @@ function buildImageTranslator(ctx) {
770
766
  const existingAlt = getAttribute?.('alt') ?? '';
771
767
  const alt = existingAlt.trim() || deriveAltFromImageUrl(src);
772
768
  const markdown = `![${alt}](${src})`;
773
- if (isAnchor(parent)) {
774
- return { content: markdown };
775
- }
776
- return { content: `\n\n${markdown}\n\n` };
769
+ return { content: markdown };
777
770
  }
778
771
  const GFM_ALERT_MAP = new Map([
779
772
  ['note', 'NOTE'],
@@ -831,6 +824,112 @@ function buildPreTranslator(ctx) {
831
824
  postprocess: createCodeBlockPostprocessor(attributeLanguage),
832
825
  };
833
826
  }
827
+ function getNodeAttr(node) {
828
+ if (!isLikeNode(node))
829
+ return undefined;
830
+ return typeof node.getAttribute === 'function'
831
+ ? node.getAttribute.bind(node)
832
+ : undefined;
833
+ }
834
+ function buildDivTranslator(ctx) {
835
+ if (!isObject(ctx))
836
+ return {};
837
+ const { node } = ctx;
838
+ const getAttribute = getNodeAttr(node);
839
+ if (!getAttribute)
840
+ return {};
841
+ const className = getAttribute('class') ?? '';
842
+ if (className.includes('mermaid')) {
843
+ return {
844
+ noEscape: true,
845
+ preserveWhitespace: true,
846
+ postprocess: ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`,
847
+ };
848
+ }
849
+ const isAdmonition = className.includes('admonition') ||
850
+ className.includes('callout') ||
851
+ className.includes('custom-block') ||
852
+ getAttribute('role') === 'alert' ||
853
+ /\b(note|tip|info|warning|danger|caution|important)\b/i.test(className);
854
+ if (isAdmonition) {
855
+ return {
856
+ postprocess: ({ content }) => {
857
+ const alertType = resolveGfmAlertType(className);
858
+ const lines = content.trim().split('\n');
859
+ const header = alertType ? `> [!${alertType}]\n` : '';
860
+ return `\n\n${header}> ${lines.join('\n> ')}\n\n`;
861
+ },
862
+ };
863
+ }
864
+ if (!className.includes('type'))
865
+ return {};
866
+ return {
867
+ postprocess: ({ content }) => {
868
+ const lines = content.split('\n');
869
+ const separated = [];
870
+ for (let i = 0; i < lines.length; i++) {
871
+ const line = lines[i] ?? '';
872
+ const nextLine = i < lines.length - 1 ? (lines[i + 1] ?? '') : '';
873
+ separated.push(line);
874
+ if (line.trim() &&
875
+ nextLine.trim() &&
876
+ line.includes(':') &&
877
+ nextLine.includes(':') &&
878
+ !line.startsWith(' ') &&
879
+ !nextLine.startsWith(' ')) {
880
+ separated.push('');
881
+ }
882
+ }
883
+ return separated.join('\n');
884
+ },
885
+ };
886
+ }
887
+ function buildSectionTranslator(ctx) {
888
+ if (isObject(ctx)) {
889
+ const { node } = ctx;
890
+ const getAttribute = getNodeAttr(node);
891
+ if (getAttribute?.('class')?.includes('tsd-member')) {
892
+ return {
893
+ postprocess: ({ content }) => `\n\n&nbsp;\n\n${content}\n\n`,
894
+ };
895
+ }
896
+ }
897
+ return {
898
+ postprocess: ({ content }) => `\n\n${content}\n\n`,
899
+ };
900
+ }
901
+ function buildSpanTranslator(ctx) {
902
+ if (!isObject(ctx))
903
+ return {};
904
+ const { node } = ctx;
905
+ const getAttribute = getNodeAttr(node);
906
+ if (!getAttribute)
907
+ return {};
908
+ const dataAs = getAttribute('data-as') ?? '';
909
+ if (dataAs === 'p') {
910
+ return {
911
+ postprocess: ({ content }) => `\n\n${content.trim()}\n\n`,
912
+ };
913
+ }
914
+ return {};
915
+ }
916
+ function buildMermaidPreTranslator(ctx) {
917
+ if (!isObject(ctx))
918
+ return buildPreTranslator(ctx);
919
+ const { node } = ctx;
920
+ const getAttribute = getNodeAttr(node);
921
+ if (!getAttribute)
922
+ return buildPreTranslator(ctx);
923
+ const className = getAttribute('class') ?? '';
924
+ if (className.includes('mermaid')) {
925
+ return {
926
+ noEscape: true,
927
+ preserveWhitespace: true,
928
+ postprocess: ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`,
929
+ };
930
+ }
931
+ return buildPreTranslator(ctx);
932
+ }
834
933
  function createCustomTranslators() {
835
934
  return {
836
935
  code: (ctx) => buildCodeTranslator(ctx),
@@ -866,61 +965,7 @@ function createCustomTranslators() {
866
965
  }
867
966
  return { content: items ? `\n${items}\n` : '' };
868
967
  },
869
- div: (ctx) => {
870
- if (!isObject(ctx))
871
- return {};
872
- const { node } = ctx;
873
- if (!isLikeNode(node))
874
- return {};
875
- const getAttribute = typeof node.getAttribute === 'function'
876
- ? node.getAttribute.bind(node)
877
- : undefined;
878
- const className = getAttribute?.('class') ?? '';
879
- if (className.includes('mermaid')) {
880
- return {
881
- noEscape: true,
882
- preserveWhitespace: true,
883
- postprocess: ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`,
884
- };
885
- }
886
- const isAdmonition = className.includes('admonition') ||
887
- className.includes('callout') ||
888
- className.includes('custom-block') ||
889
- getAttribute?.('role') === 'alert' ||
890
- /\b(note|tip|info|warning|danger|caution|important)\b/i.test(className);
891
- if (isAdmonition) {
892
- return {
893
- postprocess: ({ content }) => {
894
- const alertType = resolveGfmAlertType(className);
895
- const lines = content.trim().split('\n');
896
- const header = alertType ? `> [!${alertType}]\n` : '';
897
- return `\n\n${header}> ${lines.join('\n> ')}\n\n`;
898
- },
899
- };
900
- }
901
- if (!className.includes('type'))
902
- return {};
903
- return {
904
- postprocess: ({ content }) => {
905
- const lines = content.split('\n');
906
- const separated = [];
907
- for (let i = 0; i < lines.length; i++) {
908
- const line = lines[i] ?? '';
909
- const nextLine = i < lines.length - 1 ? (lines[i + 1] ?? '') : '';
910
- separated.push(line);
911
- if (line.trim() &&
912
- nextLine.trim() &&
913
- line.includes(':') &&
914
- nextLine.includes(':') &&
915
- !line.startsWith(' ') &&
916
- !nextLine.startsWith(' ')) {
917
- separated.push('');
918
- }
919
- }
920
- return separated.join('\n');
921
- },
922
- };
923
- },
968
+ div: buildDivTranslator,
924
969
  kbd: () => ({
925
970
  postprocess: ({ content }) => `\`${content}\``,
926
971
  }),
@@ -933,24 +978,7 @@ function createCustomTranslators() {
933
978
  sup: () => ({
934
979
  postprocess: ({ content }) => `^${content}^`,
935
980
  }),
936
- section: (ctx) => {
937
- if (isObject(ctx)) {
938
- const { node } = ctx;
939
- if (isLikeNode(node)) {
940
- const getAttribute = typeof node.getAttribute === 'function'
941
- ? node.getAttribute.bind(node)
942
- : undefined;
943
- if (getAttribute?.('class')?.includes('tsd-member')) {
944
- return {
945
- postprocess: ({ content }) => `\n\n&nbsp;\n\n${content}\n\n`,
946
- };
947
- }
948
- }
949
- }
950
- return {
951
- postprocess: ({ content }) => `\n\n${content}\n\n`,
952
- };
953
- },
981
+ section: buildSectionTranslator,
954
982
  details: () => ({
955
983
  postprocess: ({ content }) => {
956
984
  const trimmed = content.trim();
@@ -962,43 +990,8 @@ function createCustomTranslators() {
962
990
  summary: () => ({
963
991
  postprocess: ({ content }) => `${content.trim()}\n\n`,
964
992
  }),
965
- span: (ctx) => {
966
- if (!isObject(ctx))
967
- return {};
968
- const { node } = ctx;
969
- if (!isLikeNode(node))
970
- return {};
971
- const getAttribute = typeof node.getAttribute === 'function'
972
- ? node.getAttribute.bind(node)
973
- : undefined;
974
- const dataAs = getAttribute?.('data-as') ?? '';
975
- if (dataAs === 'p') {
976
- return {
977
- postprocess: ({ content }) => `\n\n${content.trim()}\n\n`,
978
- };
979
- }
980
- return {};
981
- },
982
- pre: (ctx) => {
983
- if (!isObject(ctx))
984
- return buildPreTranslator(ctx);
985
- const { node } = ctx;
986
- if (!isLikeNode(node)) {
987
- return buildPreTranslator(ctx);
988
- }
989
- const getAttribute = typeof node.getAttribute === 'function'
990
- ? node.getAttribute.bind(node)
991
- : undefined;
992
- const className = getAttribute?.('class') ?? '';
993
- if (className.includes('mermaid')) {
994
- return {
995
- noEscape: true,
996
- preserveWhitespace: true,
997
- postprocess: ({ content }) => `\n\n\`\`\`mermaid\n${content.trim()}\n\`\`\`\n\n`,
998
- };
999
- }
1000
- return buildPreTranslator(ctx);
1001
- },
993
+ span: buildSpanTranslator,
994
+ pre: buildMermaidPreTranslator,
1002
995
  };
1003
996
  }
1004
997
  let markdownConverter = null;
@@ -1127,8 +1120,8 @@ function resolveRelativeUrls(markdown, baseUrl, signal) {
1127
1120
  let lastIndex = 0;
1128
1121
  let lineCount = 0;
1129
1122
  while (lastIndex < len) {
1130
- if (++lineCount % 500 === 0 && signal?.aborted) {
1131
- throw new Error('Transform aborted during URL resolution');
1123
+ if (++lineCount % 500 === 0) {
1124
+ abortPolicy.throwIfAborted(signal, baseUrl, 'markdown:resolve-urls');
1132
1125
  }
1133
1126
  let nextIndex = markdown.indexOf('\n', lastIndex);
1134
1127
  let line;
@@ -1315,7 +1308,12 @@ function getTextContentSkippingHidden(node, parts) {
1315
1308
  }
1316
1309
  if (nodeType !== 1)
1317
1310
  return;
1318
- const { tagName } = node;
1311
+ const element = node;
1312
+ if (element.hasAttribute('hidden') ||
1313
+ element.getAttribute('aria-hidden') === 'true') {
1314
+ return;
1315
+ }
1316
+ const { tagName } = element;
1319
1317
  if (tagName === 'SCRIPT' || tagName === 'STYLE' || tagName === 'NOSCRIPT')
1320
1318
  return;
1321
1319
  const { childNodes } = node;
@@ -1479,19 +1477,21 @@ function shouldUseArticleContent(article, originalHtmlOrDocument) {
1479
1477
  function buildContentSource(params) {
1480
1478
  const { html, url, article, extractedMeta, includeMetadata, useArticleContent, document, truncated, skipNoiseRemoval, signal, } = params;
1481
1479
  const metadata = createContentMetadataBlock(url, article, extractedMeta, useArticleContent, includeMetadata);
1480
+ const base = {
1481
+ favicon: extractedMeta.favicon,
1482
+ metadata,
1483
+ extractedMetadata: extractedMeta,
1484
+ truncated,
1485
+ };
1482
1486
  if (useArticleContent && article) {
1483
- // Readability output can still be noisy (unless user requested skip).
1484
1487
  const cleanedArticleHtml = skipNoiseRemoval
1485
1488
  ? article.content
1486
1489
  : removeNoiseFromHtml(article.content, undefined, url, signal);
1487
1490
  return {
1491
+ ...base,
1488
1492
  sourceHtml: cleanedArticleHtml,
1489
1493
  title: article.title,
1490
- favicon: extractedMeta.favicon,
1491
- metadata,
1492
- extractedMetadata: extractedMeta,
1493
1494
  skipNoiseRemoval: true,
1494
- truncated,
1495
1495
  };
1496
1496
  }
1497
1497
  if (document) {
@@ -1499,36 +1499,18 @@ function buildContentSource(params) {
1499
1499
  ? html
1500
1500
  : removeNoiseFromHtml(html, document, url, signal);
1501
1501
  const contentRoot = findContentRoot(document);
1502
- if (contentRoot) {
1503
- return {
1504
- sourceHtml: contentRoot,
1505
- title: extractedMeta.title,
1506
- favicon: extractedMeta.favicon,
1507
- metadata,
1508
- extractedMetadata: extractedMeta,
1509
- skipNoiseRemoval: true,
1510
- document,
1511
- truncated,
1512
- };
1513
- }
1514
1502
  return {
1515
- sourceHtml: cleanedHtml,
1503
+ ...base,
1504
+ sourceHtml: contentRoot ?? cleanedHtml,
1516
1505
  title: extractedMeta.title,
1517
- favicon: extractedMeta.favicon,
1518
- metadata,
1519
- extractedMetadata: extractedMeta,
1520
1506
  skipNoiseRemoval: true,
1521
1507
  document,
1522
- truncated,
1523
1508
  };
1524
1509
  }
1525
1510
  return {
1511
+ ...base,
1526
1512
  sourceHtml: html,
1527
1513
  title: extractedMeta.title,
1528
- favicon: extractedMeta.favicon,
1529
- metadata,
1530
- extractedMetadata: extractedMeta,
1531
- truncated,
1532
1514
  };
1533
1515
  }
1534
1516
  function resolveContentSource(params) {