@j0hanz/fetch-url-mcp 1.10.18 → 1.10.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ export declare function resolveDocumentBody(document: Document): Element;
5
5
  export declare function serializeDocumentForMarkdown(document: Document, fallback: string): string;
6
6
  /** Surface hidden tab panels, then strip unselected tab triggers. */
7
7
  export declare function normalizeTabContent(document: Document): void;
8
+ export declare function surfaceCodeEditorContent(document: Document): void;
8
9
  export declare function prepareDocumentForMarkdown(document: Document, baseUrl?: string, signal?: AbortSignal): void;
9
10
  export declare function removeNoiseFromHtml(html: string, document?: Document, baseUrl?: string, signal?: AbortSignal): string;
10
11
  export declare function getVisibleTextLength(htmlOrDocument: string | Document): number;
@@ -1 +1 @@
1
- {"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/lib/dom-prep.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AA8mB9D,eAAO,MAAM,sBAAsB,QAAmB,CAAC;AAyCvD,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA6B9D;AAuBD,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAY/D;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CASR;AA0CD,qEAAqE;AACrE,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAG5D;AAmUD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAON;AA4BD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR;AA0ED,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,MAAM,GAAG,QAAQ,GAChC,MAAM,CAaR;AA6ID,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,EACzB,QAAQ,EAAE,QAAQ,GACjB,QAAQ,GAAG,IAAI,CAsBjB"}
1
+ {"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/lib/dom-prep.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAonB9D,eAAO,MAAM,sBAAsB,QAAmB,CAAC;AAyCvD,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA6B9D;AAuBD,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAY/D;AAED,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CASR;AA0CD,qEAAqE;AACrE,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAG5D;AA0RD,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CA2BjE;AAyDD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAON;AA4BD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR;AA0ED,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,MAAM,GAAG,QAAQ,GAChC,MAAM,CAaR;AA6ID,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,EACzB,QAAQ,EAAE,QAAQ,GACjB,QAAQ,GAAG,IAAI,CAsBjB"}
@@ -271,6 +271,15 @@ function isNavigationAside(element) {
271
271
  return true;
272
272
  return links.length / (textLen / 100) >= ASIDE_NAV_LINK_DENSITY_THRESHOLD;
273
273
  }
274
+ function isNavigationSidebar(element) {
275
+ const links = element.querySelectorAll('a[href]');
276
+ if (links.length < ASIDE_NAV_MIN_LINKS)
277
+ return false;
278
+ const textLen = (element.textContent || '').trim().length;
279
+ if (textLen === 0)
280
+ return true;
281
+ return links.length / (textLen / 100) >= ASIDE_NAV_LINK_DENSITY_THRESHOLD;
282
+ }
274
283
  function shouldPreserve(element, tagName) {
275
284
  // Check Dialog
276
285
  const role = element.getAttribute('role');
@@ -282,12 +291,15 @@ function shouldPreserve(element, tagName) {
282
291
  return true;
283
292
  return element.querySelector('h1,h2,h3,h4,h5,h6') !== null;
284
293
  }
285
- // Check Nav/Footer
286
294
  if (tagName === 'nav' || tagName === 'footer') {
287
295
  if (element.querySelector('article,main,section,[role="main"]'))
288
296
  return true;
289
- return ((element.textContent || '').trim().length >=
290
- NAV_FOOTER_MIN_CHARS_FOR_PRESERVATION);
297
+ const textLen = (element.textContent || '').trim().length;
298
+ if (textLen < NAV_FOOTER_MIN_CHARS_FOR_PRESERVATION)
299
+ return false;
300
+ if (isNavigationSidebar(element))
301
+ return false;
302
+ return true;
291
303
  }
292
304
  // Check Aside — preserve only if it looks like article content, not navigation
293
305
  if (tagName === 'aside') {
@@ -885,6 +897,35 @@ function separateAdjacentInlineElements(document) {
885
897
  }
886
898
  }
887
899
  }
900
+ const CODE_EDITOR_LANG_REGEX = /\blanguage-(\S+)/;
901
+ // Some documentation sites render code examples as highlighted, aria-hidden blocks with a textarea containing the raw code for accessibility.
902
+ // Surface the textarea content and remove the redundant highlighted block to produce cleaner markdown output.
903
+ export function surfaceCodeEditorContent(document) {
904
+ for (const pre of document.querySelectorAll('pre[aria-hidden="true"]')) {
905
+ const codeChild = pre.querySelector('code');
906
+ if (!codeChild)
907
+ continue;
908
+ const container = pre.parentElement;
909
+ if (!container)
910
+ continue;
911
+ const textarea = container.querySelector('textarea');
912
+ if (!textarea)
913
+ continue;
914
+ // Extract language from the highlighted code element
915
+ const langMatch = CODE_EDITOR_LANG_REGEX.exec(codeChild.getAttribute('class') ?? '');
916
+ const lang = langMatch?.[1] ?? '';
917
+ // Build a clean pre>code block from the textarea plain text
918
+ const newPre = document.createElement('pre');
919
+ const newCode = document.createElement('code');
920
+ if (lang)
921
+ newCode.setAttribute('class', `language-${lang}`);
922
+ newCode.textContent = textarea.textContent || '';
923
+ newPre.appendChild(newCode);
924
+ container.insertBefore(newPre, pre);
925
+ pre.remove();
926
+ textarea.remove();
927
+ }
928
+ }
888
929
  function stripDocsControls(document) {
889
930
  removeNodes(document.querySelectorAll(DOCS_CONTROL_SELECTORS.join(',')));
890
931
  }
@@ -898,6 +939,7 @@ function stripAriaLiveInstructions(document) {
898
939
  }
899
940
  function runDocsControlPass(document) {
900
941
  normalizeTabContent(document);
942
+ surfaceCodeEditorContent(document);
901
943
  cleanHeadings(document);
902
944
  stripDocsControls(document);
903
945
  stripAriaLiveInstructions(document);
@@ -910,7 +952,15 @@ function runStructuralNoisePass(document, signal) {
910
952
  function runCodeExamplePass(document) {
911
953
  cleanCodeExamples(document);
912
954
  }
955
+ function unwrapOrphanedTableCells(document) {
956
+ for (const cell of document.querySelectorAll('td, th')) {
957
+ if (!cell.closest('table')) {
958
+ cell.replaceWith(...Array.from(cell.childNodes));
959
+ }
960
+ }
961
+ }
913
962
  function runTableNormalizationPass(document) {
963
+ unwrapOrphanedTableCells(document);
914
964
  normalizeTableCells(document);
915
965
  normalizeTableStructure(document);
916
966
  }
@@ -1 +1 @@
1
- {"version":3,"file":"html-translators.d.ts","sourceRoot":"","sources":["../../src/transform/html-translators.ts"],"names":[],"mappings":"AAyhBA,wBAAgB,+BAA+B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEpE;AAqPD,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAuBpB;AAOD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAKvE"}
1
+ {"version":3,"file":"html-translators.d.ts","sourceRoot":"","sources":["../../src/transform/html-translators.ts"],"names":[],"mappings":"AAohBA,wBAAgB,+BAA+B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEpE;AAqPD,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAuBpB;AAOD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAKvE"}
@@ -364,26 +364,15 @@ function buildDdTranslator() {
364
364
  postprocess: ({ content }) => content.trim() ? `: ${content.trim()}\n` : '',
365
365
  };
366
366
  }
367
- function buildKbdTranslator() {
367
+ function wrapTranslator(prefix, suffix) {
368
368
  return {
369
- postprocess: ({ content }) => `\`${content}\``,
370
- };
371
- }
372
- function buildMarkTranslator() {
373
- return {
374
- postprocess: ({ content }) => `==${content}==`,
375
- };
376
- }
377
- function buildSubTranslator() {
378
- return {
379
- postprocess: ({ content }) => `~${content}~`,
380
- };
381
- }
382
- function buildSupTranslator() {
383
- return {
384
- postprocess: ({ content }) => `^${content}^`,
369
+ postprocess: ({ content }) => `${prefix}${content}${suffix}`,
385
370
  };
386
371
  }
372
+ const buildKbdTranslator = () => wrapTranslator('`', '`');
373
+ const buildMarkTranslator = () => wrapTranslator('==', '==');
374
+ const buildSubTranslator = () => wrapTranslator('~', '~');
375
+ const buildSupTranslator = () => wrapTranslator('^', '^');
387
376
  function buildDetailsTranslator() {
388
377
  return {
389
378
  postprocess: ({ content }) => {
@@ -4,6 +4,7 @@ interface CleanupOptions {
4
4
  url?: string;
5
5
  }
6
6
  export declare function processFencedContent(content: string, processTextSegment: (text: string) => string): string;
7
+ export declare function finalizeMarkdownSections(content: string, options?: Pick<CleanupOptions, 'signal' | 'url'>): string;
7
8
  export declare function cleanupMarkdownArtifacts(content: string, options?: CleanupOptions): string;
8
9
  export {};
9
10
  //# sourceMappingURL=markdown-cleanup.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"markdown-cleanup.d.ts","sourceRoot":"","sources":["../../src/transform/markdown-cleanup.ts"],"names":[],"mappings":"AAqHA,UAAU,cAAc;IACtB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAofD,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,MAAM,EACf,kBAAkB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAC3C,MAAM,CAyBR;AAaD,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,MAAM,CAqBR"}
1
+ {"version":3,"file":"markdown-cleanup.d.ts","sourceRoot":"","sources":["../../src/transform/markdown-cleanup.ts"],"names":[],"mappings":"AAqHA,UAAU,cAAc;IACtB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAofD,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,MAAM,EACf,kBAAkB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAC3C,MAAM,CAyBR;AAaD,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,IAAI,CAAC,cAAc,EAAE,QAAQ,GAAG,KAAK,CAAC,GAC/C,MAAM,CAUR;AAED,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,MAAM,CAkBR"}
@@ -513,14 +513,22 @@ function stripLeadingBreadcrumbNoise(text) {
513
513
  function stripCopyButtonText(text) {
514
514
  return text.replace(/\[Copy\]\(#copy\)\s*/gi, '');
515
515
  }
516
+ export function finalizeMarkdownSections(content, options) {
517
+ if (!content)
518
+ return '';
519
+ throwIfAborted(options?.signal, options?.url ?? '', 'markdown:cleanup:empty-headings');
520
+ return stripLeadingBreadcrumbNoise(stripLeadingDocsChrome(removeEmptyHeadingSections(content)));
521
+ }
516
522
  export function cleanupMarkdownArtifacts(content, options) {
517
523
  if (!content)
518
524
  return '';
519
525
  throwIfAborted(options?.signal, options?.url ?? '', 'markdown:cleanup:begin');
520
526
  let result = stripCopyButtonText(processFencedContent(content, (text) => processTextBuffer(text.split('\n'), options)).trim());
521
527
  if (!options?.preserveEmptyHeadings) {
522
- throwIfAborted(options?.signal, options?.url ?? '', 'markdown:cleanup:empty-headings');
523
- result = removeEmptyHeadingSections(result);
528
+ result = finalizeMarkdownSections(result, options);
524
529
  }
525
- return stripLeadingBreadcrumbNoise(stripLeadingDocsChrome(result));
530
+ else {
531
+ result = stripLeadingBreadcrumbNoise(stripLeadingDocsChrome(result));
532
+ }
533
+ return result;
526
534
  }
@@ -1,4 +1,4 @@
1
- import { cleanupMarkdownArtifacts, processFencedContent } from './markdown-cleanup.js';
1
+ import { cleanupMarkdownArtifacts, finalizeMarkdownSections, processFencedContent } from './markdown-cleanup.js';
2
2
  import type { ExtractedArticle, ExtractedMetadata, ExtractionResult, MarkdownTransformResult, MetadataBlock, TransformOptions, TransformStageContext } from './types.js';
3
3
  interface StageBudget {
4
4
  totalBudgetMs: number;
@@ -34,5 +34,5 @@ type TransformExecutionOptions = TransformOptions & {
34
34
  };
35
35
  export declare function transformHtmlToMarkdown(html: string, url: string, options: TransformOptions): Promise<MarkdownTransformResult>;
36
36
  export declare function transformBufferToMarkdown(htmlBuffer: Uint8Array, url: string, options: TransformExecutionOptions): Promise<MarkdownTransformResult>;
37
- export { cleanupMarkdownArtifacts, processFencedContent };
37
+ export { cleanupMarkdownArtifacts, finalizeMarkdownSections, processFencedContent, };
38
38
  //# sourceMappingURL=transform.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AAwCA,OAAO,EACL,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,uBAAuB,CAAC;AAqB/B,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AA+BpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAgJD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AA6XD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAyKD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IACjC,QAAQ,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAC;IAChC,gBAAgB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACxC,GACA,MAAM,CAyBR;AA2DD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAKD,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAuB3B;AA4bD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAgBzB;AAaD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAgH1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,OAAO,EAAE,wBAAwB,EAAE,oBAAoB,EAAE,CAAC"}
1
+ {"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AAyCA,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,EACrB,MAAM,uBAAuB,CAAC;AAqB/B,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AA+BpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAgJD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AA8XD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAyKD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,GAAG,SAAS,CAAC;IACjC,QAAQ,CAAC,EAAE,QAAQ,GAAG,SAAS,CAAC;IAChC,gBAAgB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACxC,GACA,MAAM,CAyBR;AA2DD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAKD,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAuB3B;AA4bD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAgBzB;AAaD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAgH1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,OAAO,EACL,wBAAwB,EACxB,wBAAwB,EACxB,oBAAoB,GACrB,CAAC"}
@@ -3,12 +3,12 @@ import { isProbablyReaderable, Readability } from '@mozilla/readability';
3
3
  import { parseHTML } from 'linkedom';
4
4
  import { config } from '../lib/core.js';
5
5
  import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from '../lib/core.js';
6
- import { evaluateArticleContent, extractNoscriptImages, getVisibleTextLength, normalizeTabContent, prepareDocumentForMarkdown, removeNoiseFromHtml, serializeDocumentForMarkdown, } from '../lib/dom-prep.js';
6
+ import { evaluateArticleContent, extractNoscriptImages, getVisibleTextLength, normalizeTabContent, prepareDocumentForMarkdown, removeNoiseFromHtml, serializeDocumentForMarkdown, surfaceCodeEditorContent, } from '../lib/dom-prep.js';
7
7
  import { isRawTextContentUrl } from '../lib/http.js';
8
8
  import { composeAbortSignal, FetchError, getErrorMessage, getUtf8ByteLength, isAsciiOnly, isObject, throwIfAborted, toError, trimDanglingTagFragment, truncateToUtf8Boundary, } from '../lib/utils.js';
9
9
  import { extractLanguageFromClassName } from './html-translators.js';
10
10
  import { translateHtmlFragmentToMarkdown } from './html-translators.js';
11
- import { cleanupMarkdownArtifacts, processFencedContent, } from './markdown-cleanup.js';
11
+ import { cleanupMarkdownArtifacts, finalizeMarkdownSections, processFencedContent, } from './markdown-cleanup.js';
12
12
  import { addSourceToMarkdown, buildMetadataFooter, extractTitleFromRawMarkdown, isRawTextContent, } from './metadata.js';
13
13
  import { extractMetadata, extractMetadataFromHead, mergeMetadata, normalizeDocumentTitle, } from './metadata.js';
14
14
  import { supplementMarkdownFromNextFlight } from './next-flight.js';
@@ -249,6 +249,7 @@ function prepareReadabilityDocument(readabilityDoc) {
249
249
  preserveAlertElements(readabilityDoc);
250
250
  preserveCodeLanguageAttributes(readabilityDoc);
251
251
  normalizeTabContent(readabilityDoc);
252
+ surfaceCodeEditorContent(readabilityDoc);
252
253
  for (const el of readabilityDoc.querySelectorAll('[class*="breadcrumb"],[class*="pagination"]')) {
253
254
  if (el.tagName === 'HTML' || el.tagName === 'BODY')
254
255
  continue;
@@ -825,7 +826,7 @@ function postprocessMarkdownStage({ context, url, signal }, markdown) {
825
826
  let content = maybeStripGithubPrimaryHeading(markdown, context.primaryHeading, url);
826
827
  content = maybePrependSyntheticTitle(content, context, url);
827
828
  content = supplementMarkdownFromNextFlight(content, context.originalHtml);
828
- content = cleanupMarkdownArtifacts(content, signal ? { signal, url } : { url });
829
+ content = finalizeMarkdownSections(content, signal ? { signal, url } : { url });
829
830
  return {
830
831
  markdown: content,
831
832
  title: context.title,
@@ -972,4 +973,4 @@ export async function transformHtmlToMarkdown(html, url, options) {
972
973
  export async function transformBufferToMarkdown(htmlBuffer, url, options) {
973
974
  return transformInputToMarkdown(htmlBuffer, url, options);
974
975
  }
975
- export { cleanupMarkdownArtifacts, processFencedContent };
976
+ export { cleanupMarkdownArtifacts, finalizeMarkdownSections, processFencedContent, };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/fetch-url-mcp",
3
- "version": "1.10.18",
3
+ "version": "1.10.19",
4
4
  "mcpName": "io.github.j0hanz/fetch-url-mcp",
5
5
  "description": "A web content fetcher MCP server that converts HTML to clean, AI and human readable markdown.",
6
6
  "type": "module",