@j0hanz/fetch-url-mcp 1.10.5 → 1.10.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"code-lang.d.ts","sourceRoot":"","sources":["../../src/lib/code-lang.ts"],"names":[],"mappings":"AAyQA,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAuBpB;AAqBD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAqBvE"}
1
+ {"version":3,"file":"code-lang.d.ts","sourceRoot":"","sources":["../../src/lib/code-lang.ts"],"names":[],"mappings":"AA2QA,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAuBpB;AAqBD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAqBvE"}
@@ -1,6 +1,5 @@
1
1
  const ASCII_SPACE = 32;
2
2
  const ASCII_TAB = 9;
3
- const ASCII_LT = 60;
4
3
  const ASCII_DIGIT_0 = 48;
5
4
  const ASCII_DIGIT_9 = 57;
6
5
  const ASCII_UPPER_A = 65;
@@ -83,16 +82,12 @@ const PYTHON_UNIQUE_REGEX = /\b(?:def |elif |except |finally:|yield |lambda |rai
83
82
  const JS_SIGNAL_REGEX = /\b(?:const |let |var |function |require\(|=>|===|!==|console\.)/;
84
83
  const CSS_REGEX = /@media|@import|@keyframes|@theme\b|@utility\b|@layer\b|@apply\b|@variant\b|@custom-variant\b|@reference\b|@source\b/;
85
84
  const CSS_PROPERTY_REGEX = /^\s*[a-z][\w-]*\s*:/;
85
+ const PYTHON_REPL_PROMPT_REGEX = /^\s*(?:>>>|\.\.\.)\s/m;
86
+ const PYTHON_OUTPUT_HINT_REGEX = /<(?:QuerySet|[A-Z][A-Za-z0-9_]*:\s)|\bdatetime\.datetime\(|\bDoesNotExist:/;
87
+ const WINDOWS_SHELL_PROMPT_REGEX = /^\s*\.\.\.\\?>\s+\S/m;
88
+ const JSX_TAG_REGEX = /<\/?[A-Z][A-Za-z0-9]*(?:\s+[A-Za-z_:][\w:.-]*(?:\s*=\s*(?:"[^"]*"|'[^']*'|\{[^}]*\}))?)*\s*\/?>/m;
86
89
  function containsJsxTag(code) {
87
- const len = code.length;
88
- for (let i = 0; i < len - 1; i++) {
89
- if (code.charCodeAt(i) === ASCII_LT) {
90
- const next = code.charCodeAt(i + 1);
91
- if (next >= ASCII_UPPER_A && next <= ASCII_UPPER_Z)
92
- return true;
93
- }
94
- }
95
- return false;
90
+ return JSX_TAG_REGEX.test(code);
96
91
  }
97
92
  function isBashLine(line) {
98
93
  const trimmed = line.trimStart();
@@ -101,7 +96,7 @@ function isBashLine(line) {
101
96
  // Shell Prefix
102
97
  if (trimmed.startsWith('#!') ||
103
98
  trimmed.startsWith('$ ') ||
104
- trimmed.startsWith('# ')) {
99
+ WINDOWS_SHELL_PROMPT_REGEX.test(trimmed)) {
105
100
  return true;
106
101
  }
107
102
  const spaceIdx = trimmed.indexOf(' ');
@@ -122,8 +117,9 @@ function detectCssStructure(lines) {
122
117
  const trimmed = line.trimStart();
123
118
  if (trimmed.length === 0)
124
119
  continue;
125
- const hasSelector = (trimmed.startsWith('.') || trimmed.startsWith('#')) &&
126
- trimmed.includes('{');
120
+ if (trimmed.startsWith('# ') || trimmed.startsWith('//'))
121
+ continue;
122
+ const hasSelector = /^[.#][A-Za-z_-][\w-]*\s*\{/.test(trimmed);
127
123
  if (hasSelector)
128
124
  return true;
129
125
  if (trimmed.includes(';') &&
@@ -192,6 +188,14 @@ function matchPython(ctx) {
192
188
  if (matchHtml(ctx))
193
189
  return false;
194
190
  const l = ctx.lower;
191
+ if (PYTHON_REPL_PROMPT_REGEX.test(ctx.code))
192
+ return true;
193
+ if (PYTHON_OUTPUT_HINT_REGEX.test(ctx.code))
194
+ return true;
195
+ if (/^\s*[A-Za-z_][\w.]*\s*=\s*[A-Z][\w.]*\(/m.test(ctx.code))
196
+ return true;
197
+ if (/^\s*[A-Za-z_][\w.]*\.[A-Za-z_][\w]*\s*$/m.test(ctx.code))
198
+ return true;
195
199
  if (l.includes('print(') || l.includes('__name__'))
196
200
  return true;
197
201
  if (l.includes('self.') || l.includes('elif '))
@@ -1 +1 @@
1
- {"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/lib/dom-prep.ts"],"names":[],"mappings":"AA4kBA,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CAQR;AA0CD,qEAAqE;AACrE,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAG5D;AA0PD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAWN;AA4BD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR"}
1
+ {"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/lib/dom-prep.ts"],"names":[],"mappings":"AA4mBA,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CASR;AA0CD,qEAAqE;AACrE,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI,CAG5D;AA0RD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAMN;AA4BD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR"}
@@ -123,6 +123,22 @@ const BASE_NOISE_SELECTORS = {
123
123
  cookieBanners: '[role="dialog"]',
124
124
  hidden: '[style*="display: none"],[style*="display:none"],[style*="visibility: hidden"],[style*="visibility:hidden"],[hidden],[aria-hidden="true"]',
125
125
  };
126
+ const DOCS_CONTROL_SELECTORS = [
127
+ '.content-icon-container',
128
+ '.edit-this-page',
129
+ '.toc-overlay-icon',
130
+ '.theme-toggle-container',
131
+ '.sidebar-toggle',
132
+ '.sidebar-drawer',
133
+ '.toc-drawer',
134
+ '.mobile-header',
135
+ '.overlay.sidebar-overlay',
136
+ '.overlay.toc-overlay',
137
+ '.back-to-top',
138
+ '.backtotop',
139
+ '.headerlink',
140
+ '[title="Edit this page"]',
141
+ ];
126
142
  let cachedContext;
127
143
  let lastContextKey;
128
144
  function escapeRegexLiteral(value) {
@@ -506,8 +522,22 @@ function resolveUrls(document, baseUrlStr) {
506
522
  processUrlElement(el, 'srcset', base, true);
507
523
  }
508
524
  }
525
+ function resolveDocumentBody(document) {
526
+ const { body } = document;
527
+ if (body.innerHTML.trim().length > MIN_BODY_CONTENT_LENGTH)
528
+ return body;
529
+ const { children } = document.documentElement;
530
+ for (const child of children) {
531
+ if (child.tagName === 'BODY' &&
532
+ child.innerHTML.trim().length > MIN_BODY_CONTENT_LENGTH) {
533
+ return child;
534
+ }
535
+ }
536
+ return body;
537
+ }
509
538
  export function serializeDocumentForMarkdown(document, fallback) {
510
- const bodyHtml = document.body.innerHTML;
539
+ const body = resolveDocumentBody(document);
540
+ const bodyHtml = body.innerHTML;
511
541
  if (bodyHtml.trim().length > MIN_BODY_CONTENT_LENGTH)
512
542
  return bodyHtml;
513
543
  const outerHtml = document.documentElement.outerHTML;
@@ -732,21 +762,40 @@ function separateAdjacentInlineElements(document) {
732
762
  }
733
763
  }
734
764
  }
735
- // Called on both raw documents (pre-article path) and article fragments
736
- // (post-Readability). Some passes (stripTabTriggers, etc.) are no-ops
737
- // on Readability output since tabs are already stripped or absent.
738
- export function prepareDocumentForMarkdown(document, baseUrl, signal) {
765
+ function stripDocsControls(document) {
766
+ removeNodes(document.querySelectorAll(DOCS_CONTROL_SELECTORS.join(',')));
767
+ }
768
+ function runDocsControlPass(document) {
739
769
  normalizeTabContent(document);
740
770
  cleanHeadings(document);
741
- stripNoise(document, signal);
771
+ stripDocsControls(document);
742
772
  stripPromoLinks(document);
743
- cleanCodeExamples(document);
744
773
  separateAdjacentInlineElements(document);
774
+ }
775
+ function runStructuralNoisePass(document, signal) {
776
+ stripNoise(document, signal);
777
+ }
778
+ function runCodeExamplePass(document) {
779
+ cleanCodeExamples(document);
780
+ }
781
+ function runTableNormalizationPass(document) {
745
782
  normalizeTableCells(document);
746
783
  normalizeTableStructure(document);
784
+ }
785
+ function runUrlResolutionPass(document, baseUrl) {
747
786
  if (baseUrl)
748
787
  resolveUrls(document, baseUrl);
749
788
  }
789
+ // Called on both raw documents (pre-article path) and article fragments
790
+ // (post-Readability). Some passes (stripTabTriggers, etc.) are no-ops
791
+ // on Readability output since tabs are already stripped or absent.
792
+ export function prepareDocumentForMarkdown(document, baseUrl, signal) {
793
+ runDocsControlPass(document);
794
+ runStructuralNoisePass(document, signal);
795
+ runCodeExamplePass(document);
796
+ runTableNormalizationPass(document);
797
+ runUrlResolutionPass(document, baseUrl);
798
+ }
750
799
  // Some sites put tbody/thead/tfoot inside td/th, which breaks markdown tables.
751
800
  function normalizeTableStructure(document) {
752
801
  for (const table of document.querySelectorAll('table')) {
@@ -14,7 +14,6 @@ interface FetchTransformInput {
14
14
  }
15
15
  export declare function appendTruncationMarker(content: string, marker: string): string;
16
16
  export declare function finalizeInlineMarkdown(markdown: string | undefined, options?: {
17
- truncated?: boolean;
18
17
  maxChars?: number;
19
18
  }): string | undefined;
20
19
  interface FetchPipelineOptions<T> {
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../src/lib/fetch-pipeline.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,KAAK,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAgBrE,OAAO,EAEL,gBAAgB,EAEhB,UAAU,EACX,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,CAAC;AAExC,eAAO,MAAM,iBAAiB,mBAAmB,CAAC;AAClD,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AACD,UAAU,mBAAmB;IAC3B,MAAM,EAAE,UAAU,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAwED,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,GACb,MAAM,CAkBR;AAWD,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,OAAO,GAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GACvD,MAAM,GAAG,SAAS,CAYpB;AAiBD,UAAU,oBAAoB,CAAC,CAAC;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAC5C,SAAS,EAAE,CAAC,KAAK,EAAE,mBAAmB,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IACvE,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,CAAC;IAClC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,CAAC,GAAG,SAAS,CAAC;CACjD;AACD,MAAM,WAAW,cAAc,CAAC,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC;IACR,SAAS,EAAE,OAAO,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AACD,MAAM,MAAM,gBAAgB,GACxB,aAAa,GACb,aAAa,GACb,WAAW,GACX,eAAe,GACf,cAAc,GACd,gBAAgB,GAChB,iBAAiB,GACjB,gBAAgB,GAChB,iBAAiB,CAAC;AAkMtB,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAgD5B;AAED,MAAM,MAAM,sBAAsB,GAAG,uBAAuB,GAAG;IAC7D,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;AAsBF,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,MAAM,GACb,sBAAsB,GAAG,SAAS,CAcpC;AAED,eAAO,MAAM,iBAAiB,GAC5B,OAAO,mBAAmB,EAC1B,KAAK,MAAM,EACX,SAAS,WAAW,KACnB,OAAO,CAAC,sBAAsB,CAchC,CAAC;AAEF,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,sBAAsB,GAC7B,MAAM,CASR;AAED,UAAU,kBAAkB;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,MAAM,CAAC,EAAE,WAAW,CAAC;IAC9B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC;IACtD,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IACrD,QAAQ,CAAC,SAAS,EAAE,CAClB,KAAK,EAAE,mBAAmB,EAC1B,aAAa,EAAE,MAAM,KAClB,sBAAsB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;IAC9D,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,sBAAsB,KAAK,MAAM,CAAC;IAChE,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,sBAAsB,GAAG,SAAS,CAAC;CAC/E;AACD,UAAU,eAAe;IACvB,QAAQ,CAAC,oBAAoB,CAAC,EAAE,OAAO,oBAAoB,CAAC;CAC7D;AAiBD,wBAAsB,kBAAkB,CACtC,OAAO,EAAE,kBAAkB,EAC3B,IAAI,GAAE,eAAoB,GACzB,OAAO,CAAC;IACT,QAAQ,EAAE,cAAc,CAAC,sBAAsB,CAAC,CAAC;IACjD,YAAY,EAAE,mBAAmB,CAAC;CACnC,CAAC,CAUD"}
1
+ {"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../src/lib/fetch-pipeline.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,KAAK,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAgBrE,OAAO,EAEL,gBAAgB,EAEhB,UAAU,EACX,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,CAAC;AAExC,eAAO,MAAM,iBAAiB,mBAAmB,CAAC;AAClD,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AACD,UAAU,mBAAmB;IAC3B,MAAM,EAAE,UAAU,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAwED,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,GACb,MAAM,CAkBR;AAWD,wBAAgB,sBAAsB,CACpC,QAAQ,EAAE,MAAM,GAAG,SAAS,EAC5B,OAAO,GAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAO,GAClC,MAAM,GAAG,SAAS,CAQpB;AAyBD,UAAU,oBAAoB,CAAC,CAAC;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAC5C,SAAS,EAAE,CAAC,KAAK,EAAE,mBAAmB,EAAE,GAAG,EAAE,MAAM,KAAK,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IACvE,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,CAAC;IAClC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,CAAC,GAAG,SAAS,CAAC;CACjD;AACD,MAAM,WAAW,cAAc,CAAC,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC;IACR,SAAS,EAAE,OAAO,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AACD,MAAM,MAAM,gBAAgB,GACxB,aAAa,GACb,aAAa,GACb,WAAW,GACX,eAAe,GACf,cAAc,GACd,gBAAgB,GAChB,iBAAiB,GACjB,gBAAgB,GAChB,iBAAiB,CAAC;AAkMtB,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAgD5B;AAED,MAAM,MAAM,sBAAsB,GAAG,uBAAuB,GAAG;IAC7D,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;AAsBF,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,MAAM,GACb,sBAAsB,GAAG,SAAS,CAcpC;AAED,eAAO,MAAM,iBAAiB,GAC5B,OAAO,mBAAmB,EAC1B,KAAK,MAAM,EACX,SAAS,WAAW,KACnB,OAAO,CAAC,sBAAsB,CAchC,CAAC;AAEF,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,sBAAsB,GAC7B,MAAM,CASR;AAED,UAAU,kBAAkB;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,MAAM,CAAC,EAAE,WAAW,CAAC;IAC9B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC;IACtD,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IACrD,QAAQ,CAAC,SAAS,EAAE,CAClB,KAAK,EAAE,mBAAmB,EAC1B,aAAa,EAAE,MAAM,KAClB,sBAAsB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;IAC9D,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,sBAAsB,KAAK,MAAM,CAAC;IAChE,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,sBAAsB,GAAG,SAAS,CAAC;CAC/E;AACD,UAAU,eAAe;IACvB,QAAQ,CAAC,oBAAoB,CAAC,EAAE,OAAO,oBAAoB,CAAC;CAC7D;AAiBD,wBAAsB,kBAAkB,CACtC,OAAO,EAAE,kBAAkB,EAC3B,IAAI,GAAE,eAAoB,GACzB,OAAO,CAAC;IACT,QAAQ,EAAE,cAAc,CAAC,sBAAsB,CAAC,CAAC;IACjD,YAAY,EAAE,mBAAmB,CAAC;CACnC,CAAC,CAaD"}
@@ -89,20 +89,24 @@ function normalizeMarkdownForTruncation(markdown, truncated) {
89
89
  export function finalizeInlineMarkdown(markdown, options = {}) {
90
90
  if (markdown === undefined)
91
91
  return undefined;
92
- const normalized = normalizeMarkdownForTruncation(markdown, options.truncated ?? false);
93
92
  const maxChars = options.maxChars ?? 0;
94
- return maxChars > 0 && normalized.length > maxChars
95
- ? truncateWithMarker(normalized, maxChars, TRUNCATION_MARKER)
96
- : normalized;
93
+ return maxChars > 0 && markdown.length > maxChars
94
+ ? truncateWithMarker(markdown, maxChars, TRUNCATION_MARKER)
95
+ : markdown;
97
96
  }
98
- function applyInlineContentLimit(content) {
99
- const contentSize = content.length;
97
+ function applyInlineContentLimit(content, truncated = false) {
98
+ const normalized = normalizeMarkdownForTruncation(content, truncated);
99
+ const contentSize = normalized.length;
100
100
  const inlineLimit = config.constants.maxInlineContentChars;
101
101
  if (inlineLimit <= 0 || contentSize <= inlineLimit) {
102
- return { content, contentSize };
102
+ return {
103
+ content: normalized,
104
+ contentSize,
105
+ ...(truncated ? { truncated } : {}),
106
+ };
103
107
  }
104
108
  return {
105
- content: truncateWithMarker(content, inlineLimit, TRUNCATION_MARKER),
109
+ content: truncateWithMarker(normalized, inlineLimit, TRUNCATION_MARKER),
106
110
  contentSize,
107
111
  truncated: true,
108
112
  };
@@ -327,6 +331,6 @@ export async function performSharedFetch(options, deps = {}) {
327
331
  const pipeline = await executePipeline(buildSharedFetchPipelineOptions(options));
328
332
  options.onStage?.('prepare_output');
329
333
  options.onStage?.('finalize_output');
330
- const inlineResult = applyInlineContentLimit(pipeline.data.content);
334
+ const inlineResult = applyInlineContentLimit(pipeline.data.content, pipeline.data.truncated);
331
335
  return { pipeline, inlineResult };
332
336
  }
@@ -1 +1 @@
1
- {"version":3,"file":"md-cleanup.d.ts","sourceRoot":"","sources":["../../src/lib/md-cleanup.ts"],"names":[],"mappings":"AAiGA,UAAU,cAAc;IACtB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAkeD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,MAAM,EACf,kBAAkB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAC3C,MAAM,CAuCR;AAWD,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,MAAM,CAmBR"}
1
+ {"version":3,"file":"md-cleanup.d.ts","sourceRoot":"","sources":["../../src/lib/md-cleanup.ts"],"names":[],"mappings":"AA0GA,UAAU,cAAc;IACtB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AA8eD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,MAAM,EACf,kBAAkB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAC3C,MAAM,CAuCR;AAWD,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,MAAM,CAmBR"}
@@ -60,6 +60,14 @@ const REGEX = {
60
60
  const HEADING_KEYWORDS = new Set(config.markdownCleanup.headingKeywords.map((value) => value.toLocaleLowerCase(config.i18n.locale)));
61
61
  // ── Prefix patterns ─────────────────────────────────────────────────
62
62
  const SPECIAL_PREFIXES = /^(?:example|note|tip|warning|important|caution):\s+\S/i;
63
+ const REPL_PROMPT_LINE = /^(?:>>>|\.\.\.|In \[\d+\]:|Out\[\d+\]:|\.\.\.\\?>)\s*/;
64
+ const LEADING_DOCS_CHROME_PATTERNS = [
65
+ /^Edit this page$/i,
66
+ /^Toggle table of contents sidebar$/i,
67
+ /^Toggle site navigation sidebar$/i,
68
+ /^Toggle Light \/ Dark \/ Auto color theme$/i,
69
+ /^Back to top$/i,
70
+ ];
63
71
  // ── TypeDoc prefixes ────────────────────────────────────────────────
64
72
  const TYPEDOC_PREFIXES = [
65
73
  'Defined in:',
@@ -127,6 +135,8 @@ function isTitleCaseOrKeyword(trimmed) {
127
135
  function getHeadingPrefix(trimmed) {
128
136
  if (trimmed.length > MAX_LINE_LENGTH)
129
137
  return null;
138
+ if (REPL_PROMPT_LINE.test(trimmed))
139
+ return null;
130
140
  // Fast path: Check common markdown markers first
131
141
  const firstChar = trimmed.charCodeAt(0);
132
142
  if (firstChar === ASCII_HASH ||
@@ -396,6 +406,18 @@ function normalizeMarkdownSpacing(text) {
396
406
  result = escapeAngleBracketsInMarkdownTables(result);
397
407
  return normalizeNestedListIndentation(result);
398
408
  }
409
+ function stripLeadingDocsChrome(text) {
410
+ const lines = text.split('\n');
411
+ const cleaned = lines.map((line, index) => {
412
+ if (index >= 12)
413
+ return line;
414
+ const trimmed = line.trim();
415
+ return LEADING_DOCS_CHROME_PATTERNS.some((pattern) => pattern.test(trimmed))
416
+ ? ''
417
+ : line;
418
+ });
419
+ return cleaned.join('\n').replace(REGEX.DOUBLE_NEWLINE_REDUCER, '\n\n');
420
+ }
399
421
  function fixConcatenatedProperties(text) {
400
422
  let result = text;
401
423
  for (let k = 0; k < PROPERTY_FIX_MAX_PASSES; k++) {
@@ -515,5 +537,5 @@ export function cleanupMarkdownArtifacts(content, options) {
515
537
  throwIfAborted(options?.signal, options?.url ?? '', 'markdown:cleanup:empty-headings');
516
538
  result = removeEmptyHeadingSections(result);
517
539
  }
518
- return stripLeadingBreadcrumbNoise(result);
540
+ return stripLeadingBreadcrumbNoise(stripLeadingDocsChrome(result));
519
541
  }
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAexB,OAAO,EAEL,KAAK,gBAAgB,EACtB,MAAM,oBAAoB,CAAC;AAI5B,OAAO,EACL,mBAAmB,EAIpB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAIL,KAAK,sBAAsB,EAE5B,MAAM,2BAA2B,CAAC;AAMnC,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEzD,UAAU,gBAAgB;IACxB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC;IACxD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,eAAO,MAAM,mBAAmB,cAAc,CAAC;AA6L/C,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,aAAa,EACpB,KAAK,CAAC,EAAE,gBAAgB,GACvB,OAAO,CAAC,gBAAgB,CAAC,CAQ3B;AAqBD,MAAM,WAAW,wBAAwB;IACvC,cAAc,EAAE,CAAC,OAAO,EAAE,sBAAsB,KAAK,IAAI,CAAC;CAC3D;AA4BD,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,wBAAwB,CAkCzE"}
1
+ {"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACzE,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAexB,OAAO,EAEL,KAAK,gBAAgB,EACtB,MAAM,oBAAoB,CAAC;AAI5B,OAAO,EACL,mBAAmB,EAIpB,MAAM,eAAe,CAAC;AAEvB,OAAO,EAIL,KAAK,sBAAsB,EAE5B,MAAM,2BAA2B,CAAC;AAMnC,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEzD,UAAU,gBAAgB;IACxB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,YAAY,EAAE,CAAC;IACxB,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC;IACxD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,eAAO,MAAM,mBAAmB,cAAc,CAAC;AA4L/C,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,aAAa,EACpB,KAAK,CAAC,EAAE,gBAAgB,GACvB,OAAO,CAAC,gBAAgB,CAAC,CAQ3B;AAqBD,MAAM,WAAW,wBAAwB;IACvC,cAAc,EAAE,CAAC,OAAO,EAAE,sBAAsB,KAAK,IAAI,CAAC;CAC3D;AA4BD,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,wBAAwB,CAkCzE"}
@@ -59,7 +59,6 @@ function getUrlContext(urlStr) {
59
59
  function buildStructuredContent(pipeline, inlineResult, inputUrl) {
60
60
  const truncated = inlineResult.truncated ?? pipeline.data.truncated;
61
61
  const markdown = finalizeInlineMarkdown(inlineResult.content, {
62
- truncated: pipeline.data.truncated,
63
62
  maxChars: config.constants.maxInlineContentChars,
64
63
  });
65
64
  const metadata = normalizeExtractedMetadata(pipeline.data.metadata);
@@ -1 +1 @@
1
- {"version":3,"file":"html-translators.d.ts","sourceRoot":"","sources":["../../src/transform/html-translators.ts"],"names":[],"mappings":"AAigBA,wBAAgB,+BAA+B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEpE"}
1
+ {"version":3,"file":"html-translators.d.ts","sourceRoot":"","sources":["../../src/transform/html-translators.ts"],"names":[],"mappings":"AAsgBA,wBAAgB,+BAA+B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEpE"}
@@ -351,25 +351,26 @@ function buildSpanTranslator(ctx) {
351
351
  // ---------------------------------------------------------------------------
352
352
  // DL helpers
353
353
  // ---------------------------------------------------------------------------
354
- function resolveDlNodeName(child) {
355
- if (!isLikeNode(child))
354
+ function normalizeDefinitionListContent(content) {
355
+ const lines = content
356
+ .split('\n')
357
+ .map((line) => line.trim())
358
+ .filter(Boolean);
359
+ if (lines.length === 0)
356
360
  return '';
357
- const raw = child.nodeName;
358
- return typeof raw === 'string' ? raw.toUpperCase() : '';
359
- }
360
- function resolveDlTextContent(child) {
361
- if (!isLikeNode(child))
362
- return '';
363
- const raw = child.textContent;
364
- return typeof raw === 'string' ? raw.trim() : '';
365
- }
366
- function buildDlChildFragment(child) {
367
- const nodeName = resolveDlNodeName(child);
368
- if (nodeName === 'DT')
369
- return `**${resolveDlTextContent(child)}**\n`;
370
- if (nodeName === 'DD')
371
- return `: ${resolveDlTextContent(child)}\n`;
372
- return null;
361
+ const normalized = [];
362
+ for (const line of lines) {
363
+ const isDefinition = line.startsWith(': ');
364
+ const previous = normalized[normalized.length - 1];
365
+ if (previous &&
366
+ previous.length > 0 &&
367
+ !previous.startsWith(': ') &&
368
+ !isDefinition) {
369
+ normalized.push('');
370
+ }
371
+ normalized.push(line);
372
+ }
373
+ return normalized.join('\n');
373
374
  }
374
375
  // ---------------------------------------------------------------------------
375
376
  // Translator registry + converter singleton
@@ -378,21 +379,18 @@ function createCustomTranslators() {
378
379
  return {
379
380
  code: (ctx) => buildCodeTranslator(ctx),
380
381
  img: (ctx) => buildImageTranslator(ctx),
381
- dl: (ctx) => {
382
- if (!isObject(ctx))
383
- return { content: '' };
384
- const { node } = ctx;
385
- if (!isLikeNode(node))
386
- return { content: '' };
387
- const childNodes = Array.from(node.childNodes ?? []);
388
- let items = '';
389
- for (const child of childNodes) {
390
- const fragment = buildDlChildFragment(child);
391
- if (fragment !== null)
392
- items += fragment;
393
- }
394
- return { content: items ? `\n${items}\n` : '' };
395
- },
382
+ dl: () => ({
383
+ postprocess: ({ content }) => {
384
+ const normalized = normalizeDefinitionListContent(content);
385
+ return normalized ? `\n\n${normalized}\n\n` : '';
386
+ },
387
+ }),
388
+ dt: () => ({
389
+ postprocess: ({ content }) => `${content.trim()}\n`,
390
+ }),
391
+ dd: () => ({
392
+ postprocess: ({ content }) => content.trim() ? `: ${content.trim()}\n` : '',
393
+ }),
396
394
  div: buildDivTranslator,
397
395
  kbd: () => ({
398
396
  postprocess: ({ content }) => `\`${content}\``,
@@ -1 +1 @@
1
- {"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AAsDA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AAqCpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAiJD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AA6YD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAmLD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,GACA,MAAM,CAsBR;AAkJD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAgED,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAuB3B;AA4cD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAgBzB;AAaD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AA+G1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC"}
1
+ {"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AAsDA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AAqCpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAiJD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AA6YD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAmLD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,GACA,MAAM,CAsBR;AA+JD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAgED,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAuB3B;AAweD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAgBzB;AAaD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AA+G1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC"}
@@ -640,14 +640,9 @@ const RETENTION_RULES = [
640
640
  { selector: 'pre', minOriginal: 1, ratio: 0.15 },
641
641
  { selector: 'table', minOriginal: 1, ratio: 0.5 },
642
642
  { selector: 'img', minOriginal: 4, ratio: 0.2 },
643
- {
644
- selector: 'button,[role="tab"],[role="tabpanel"],[aria-controls]',
645
- minOriginal: 6,
646
- ratio: 0.1,
647
- },
648
643
  ];
649
644
  const MIN_HEADINGS_FOR_EMPTY_SECTION_GATE = 5;
650
- const MAX_EMPTY_SECTION_RATIO = 0.05;
645
+ const MAX_EMPTY_SECTION_RATIO = 0.15;
651
646
  const MIN_LINE_LENGTH_FOR_TRUNCATION_CHECK = 20;
652
647
  const MAX_TRUNCATED_LINE_RATIO = 0.95;
653
648
  function resolveHtmlDocument(htmlOrDocument) {
@@ -695,16 +690,31 @@ function getTextContentSkippingHidden(node, parts) {
695
690
  }
696
691
  }
697
692
  }
693
+ function resolveBody(document) {
694
+ const { body } = document;
695
+ if ((body.textContent || '').trim().length > 0)
696
+ return body;
697
+ const { children } = document.documentElement;
698
+ for (const child of children) {
699
+ if (child.tagName === 'BODY' &&
700
+ (child.textContent || '').trim().length > 0) {
701
+ return child;
702
+ }
703
+ }
704
+ return body;
705
+ }
698
706
  function getVisibleTextLength(htmlOrDocument) {
699
707
  if (typeof htmlOrDocument === 'string') {
700
708
  const doc = resolveHtmlDocument(htmlOrDocument);
701
- for (const el of doc.body.querySelectorAll('script,style,noscript')) {
709
+ const body = resolveBody(doc);
710
+ for (const el of body.querySelectorAll('script,style,noscript')) {
702
711
  el.remove();
703
712
  }
704
- return (doc.body.textContent || '').replace(/\s+/g, ' ').trim().length;
713
+ return (body.textContent || '').replace(/\s+/g, ' ').trim().length;
705
714
  }
715
+ const body = resolveBody(htmlOrDocument);
706
716
  const parts = [];
707
- getTextContentSkippingHidden(htmlOrDocument.body, parts);
717
+ getTextContentSkippingHidden(body, parts);
708
718
  return parts.join('').replace(/\s+/g, ' ').trim().length;
709
719
  }
710
720
  export function isExtractionSufficient(article, originalHtmlOrDocument) {
@@ -1026,14 +1036,16 @@ function resolveContentSource(params) {
1026
1036
  ...(params.signal ? { signal: params.signal } : {}),
1027
1037
  });
1028
1038
  }
1029
- function buildMarkdownFromContext(context, url, signal) {
1030
- let content = stageTracker.run(url, 'transform:markdown', () => htmlToMarkdown(context.sourceHtml, context.metadata, {
1039
+ function renderMarkdownStage({ context, url, signal, }) {
1040
+ return stageTracker.run(url, 'transform:markdown', () => htmlToMarkdown(context.sourceHtml, context.metadata, {
1031
1041
  url,
1032
1042
  ...(signal ? { signal } : {}),
1033
1043
  ...(context.document ? { document: context.document } : {}),
1034
1044
  ...(context.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
1035
1045
  }));
1036
- content = maybeStripGithubPrimaryHeading(content, context.primaryHeading, url);
1046
+ }
1047
+ function postprocessMarkdownStage({ context, url, signal }, markdown) {
1048
+ let content = maybeStripGithubPrimaryHeading(markdown, context.primaryHeading, url);
1037
1049
  content = maybePrependSyntheticTitle(content, context, url);
1038
1050
  content = supplementMarkdownFromNextFlight(content, context.originalHtml);
1039
1051
  content = cleanupMarkdownArtifacts(content, signal ? { signal, url } : { url });
@@ -1044,6 +1056,11 @@ function buildMarkdownFromContext(context, url, signal) {
1044
1056
  metadata: context.extractedMetadata,
1045
1057
  };
1046
1058
  }
1059
+ function buildMarkdownFromContext(context, url, signal) {
1060
+ const renderContext = { context, url, signal };
1061
+ const markdown = renderMarkdownStage(renderContext);
1062
+ return postprocessMarkdownStage(renderContext, markdown);
1063
+ }
1047
1064
  function resolveTransformContentResult(html, url, options, signal) {
1048
1065
  const rawResult = stageTracker.run(url, 'transform:raw', () => tryTransformRawContent({
1049
1066
  html,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/fetch-url-mcp",
3
- "version": "1.10.5",
3
+ "version": "1.10.6",
4
4
  "mcpName": "io.github.j0hanz/fetch-url-mcp",
5
5
  "description": "A web content fetcher MCP server that converts HTML to clean, AI and human readable markdown.",
6
6
  "type": "module",