@j0hanz/fetch-url-mcp 1.9.4 → 1.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"code-lang.d.ts","sourceRoot":"","sources":["../../src/lib/code-lang.ts"],"names":[],"mappings":"AA6QA,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAuBpB;AAqBD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAqBvE"}
1
+ {"version":3,"file":"code-lang.d.ts","sourceRoot":"","sources":["../../src/lib/code-lang.ts"],"names":[],"mappings":"AAgRA,wBAAgB,4BAA4B,CAC1C,SAAS,EAAE,MAAM,GAChB,MAAM,GAAG,SAAS,CAuBpB;AAqBD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CAqBvE"}
@@ -82,7 +82,7 @@ const RUST_REGEX = /\b(?:fn|impl|struct|enum)\b/;
82
82
  const JS_REGEX = /\b(?:const|let|var|function|class|async|await|export|import)\b/;
83
83
  const PYTHON_UNIQUE_REGEX = /\b(?:def |elif |except |finally:|yield |lambda |raise |pass$)/m;
84
84
  const JS_SIGNAL_REGEX = /\b(?:const |let |var |function |require\(|=>|===|!==|console\.)/;
85
- const CSS_REGEX = /@media|@import|@keyframes/;
85
+ const CSS_REGEX = /@media|@import|@keyframes|@theme\b|@utility\b|@layer\b|@apply\b|@variant\b|@custom-variant\b|@reference\b|@source\b/;
86
86
  const CSS_PROPERTY_REGEX = /^\s*[a-z][\w-]*\s*:/;
87
87
  function containsJsxTag(code) {
88
88
  const len = code.length;
@@ -195,6 +195,8 @@ function hasJsSignals(lowerCode) {
195
195
  lowerCode.includes("from '"));
196
196
  }
197
197
  function matchPython(ctx) {
198
+ if (matchHtml(ctx))
199
+ return false;
198
200
  const l = ctx.lower;
199
201
  if (l.includes('print(') || l.includes('__name__'))
200
202
  return true;
@@ -229,6 +231,7 @@ const LANGUAGES = [
229
231
  { lang: 'jsx', weight: 22, match: matchJsx },
230
232
  { lang: 'typescript', weight: 20, match: matchTypeScript },
231
233
  { lang: 'sql', weight: 20, match: matchSql },
234
+ { lang: 'html', weight: 19, match: matchHtml },
232
235
  { lang: 'python', weight: 18, match: matchPython },
233
236
  {
234
237
  lang: 'css',
@@ -238,7 +241,6 @@ const LANGUAGES = [
238
241
  { lang: 'bash', weight: 15, match: (ctx) => detectBashIndicators(ctx.lines) },
239
242
  { lang: 'yaml', weight: 15, match: (ctx) => detectYamlStructure(ctx.lines) },
240
243
  { lang: 'javascript', weight: 15, match: (ctx) => JS_REGEX.test(ctx.lower) },
241
- { lang: 'html', weight: 12, match: matchHtml },
242
244
  {
243
245
  lang: 'json',
244
246
  weight: 10,
@@ -1 +1 @@
1
- {"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/lib/dom-prep.ts"],"names":[],"mappings":"AA8hBA,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CAQR;AA4DD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAiBN;AA+DD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR"}
1
+ {"version":3,"file":"dom-prep.d.ts","sourceRoot":"","sources":["../../src/lib/dom-prep.ts"],"names":[],"mappings":"AAwlBA,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CAQR;AAmPD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAkBN;AAiED,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR"}
@@ -17,6 +17,8 @@ const NOISE_PATTERNS = [
17
17
  ];
18
18
  const HEADER_NOISE_PATTERN = /\b(site-header|masthead|topbar|navbar|nav(?:bar)?|menu|header-nav)\b/i;
19
19
  const FIXED_OR_HIGH_Z_PATTERN = /\b(?:fixed|sticky|z-(?:4\d|50)|isolate)\b/;
20
+ const HEADING_PERMALINK_TEXT_PATTERN = /^(?:#|¶|§|¤|🔗)$/u;
21
+ const HEADING_PERMALINK_CLASS_PATTERN = /\b(?:mark|permalink|hash-link|anchor(?:js)?-?link|header-?link|heading-anchor|deep-link)\b/i;
20
22
  const SKIP_URL_PREFIXES = [
21
23
  '#',
22
24
  'javascript:',
@@ -50,6 +52,9 @@ const NAVIGATION_ROLES = new Set([
50
52
  'alertdialog',
51
53
  'search',
52
54
  ]);
55
+ const INLINE_DEMO_INSTRUCTION_MAX_CHARS = 160;
56
+ const REDUNDANT_PREVIEW_SEGMENT_MAX_CHARS = 60;
57
+ const REDUNDANT_PREVIEW_MAX_SEGMENTS = 12;
53
58
  const INTERACTIVE_CONTENT_ROLES = new Set([
54
59
  'tabpanel',
55
60
  'tab',
@@ -362,11 +367,8 @@ function cleanHeadings(document) {
362
367
  const a = anchors[j];
363
368
  if (!a?.parentNode)
364
369
  continue;
365
- const href = a.getAttribute('href') ?? '';
366
- const txt = (a.textContent || '').replace(/[\u200B\s]/g, '');
367
- if (href.startsWith('#') && txt.length === 0) {
370
+ if (isHeadingPermalinkAnchor(a))
368
371
  a.remove();
369
- }
370
372
  }
371
373
  // Strip zero-width spaces from text nodes
372
374
  const walker = document.createTreeWalker(h, NODE_FILTER_SHOW_TEXT);
@@ -378,6 +380,47 @@ function cleanHeadings(document) {
378
380
  }
379
381
  }
380
382
  }
383
+ function getCollapsedHeadingAnchorText(anchor) {
384
+ return (anchor.textContent || '').replace(/[\u200B\s]/g, '');
385
+ }
386
+ function isHeadingPermalinkAnchor(anchor) {
387
+ const href = anchor.getAttribute('href') ?? '';
388
+ if (!href.startsWith('#'))
389
+ return false;
390
+ const text = getCollapsedHeadingAnchorText(anchor);
391
+ if (text.length === 0 || HEADING_PERMALINK_TEXT_PATTERN.test(text)) {
392
+ return true;
393
+ }
394
+ const className = anchor.getAttribute('class') ?? '';
395
+ if (HEADING_PERMALINK_CLASS_PATTERN.test(className) && text.length <= 2) {
396
+ return true;
397
+ }
398
+ const ariaHidden = anchor.getAttribute('aria-hidden');
399
+ const tabindex = anchor.getAttribute('tabindex');
400
+ return (ariaHidden === 'true' || tabindex === '-1') && text.length <= 2;
401
+ }
402
+ function getDirectRows(section) {
403
+ return Array.from(section.children).filter((child) => child.tagName === 'TR');
404
+ }
405
+ function getDirectCells(row) {
406
+ return Array.from(row.children).filter((child) => child.tagName === 'TH' || child.tagName === 'TD');
407
+ }
408
+ function hoistNestedRows(table) {
409
+ const sections = Array.from(table.querySelectorAll('thead,tbody,tfoot'));
410
+ for (const section of sections) {
411
+ const rows = getDirectRows(section);
412
+ for (const row of rows) {
413
+ let insertAfter = row;
414
+ for (const cell of getDirectCells(row)) {
415
+ const nestedRows = Array.from(cell.querySelectorAll('tr')).filter((nested) => nested.closest('table') === table);
416
+ for (const nestedRow of nestedRows) {
417
+ insertAfter.after(nestedRow);
418
+ insertAfter = nestedRow;
419
+ }
420
+ }
421
+ }
422
+ }
423
+ }
381
424
  function stripNoise(document, context, signal) {
382
425
  cleanHeadings(document);
383
426
  // Structural Removal
@@ -506,6 +549,130 @@ function escapeTableCellPipes(document) {
506
549
  }
507
550
  }
508
551
  }
552
+ function normalizeWhitespace(value) {
553
+ return value.replace(/\s+/g, ' ').trim();
554
+ }
555
+ function hasDirectPreDescendant(element) {
556
+ return (element.tagName === 'PRE' ||
557
+ Array.from(element.children).some((child) => child.tagName === 'PRE' || child.querySelector('pre') !== null));
558
+ }
559
+ function collectLeafTextSegments(element) {
560
+ const seen = new Set();
561
+ const segments = [];
562
+ const candidates = element.querySelectorAll('p,li,div,span');
563
+ for (const candidate of candidates) {
564
+ if (candidate.children.length > 0 ||
565
+ candidate.querySelector('pre,code,table,ul,ol,blockquote,figure') !== null) {
566
+ continue;
567
+ }
568
+ const text = normalizeWhitespace(candidate.textContent || '');
569
+ if (text.length === 0 ||
570
+ text.length > REDUNDANT_PREVIEW_SEGMENT_MAX_CHARS ||
571
+ seen.has(text)) {
572
+ continue;
573
+ }
574
+ seen.add(text);
575
+ segments.push(text);
576
+ }
577
+ if (segments.length > 0)
578
+ return segments;
579
+ const fallback = normalizeWhitespace(element.textContent || '');
580
+ return fallback ? [fallback] : [];
581
+ }
582
+ function isHostnameLike(value) {
583
+ return /^[a-z0-9.-]+\.[a-z]{2,}$/i.test(value);
584
+ }
585
+ function hasPreviewMedia(element) {
586
+ return element.querySelector('svg,canvas') !== null;
587
+ }
588
+ function isRedundantCodePreview(preview, codeContainer) {
589
+ if (preview.tagName === 'FIGCAPTION' ||
590
+ preview.querySelector('a[href],button,input,select,textarea,form,video,audio,iframe,table,ul,ol,blockquote') !== null) {
591
+ return false;
592
+ }
593
+ const segments = collectLeafTextSegments(preview);
594
+ if (segments.length === 0 ||
595
+ segments.length > REDUNDANT_PREVIEW_MAX_SEGMENTS ||
596
+ segments.some((segment) => segment.length > REDUNDANT_PREVIEW_SEGMENT_MAX_CHARS)) {
597
+ return false;
598
+ }
599
+ const codeText = normalizeWhitespace(codeContainer.textContent || '');
600
+ if (!codeText)
601
+ return false;
602
+ const matchingSegments = segments.filter((segment) => codeText.includes(segment));
603
+ if (matchingSegments.length === segments.length)
604
+ return true;
605
+ return ((hasPreviewMedia(preview) ||
606
+ segments.some((segment) => isHostnameLike(segment))) &&
607
+ matchingSegments.length > 0 &&
608
+ segments.every((segment) => segment.length <= REDUNDANT_PREVIEW_SEGMENT_MAX_CHARS));
609
+ }
610
+ function pruneFigurePreviewPanes(document) {
611
+ for (const figure of document.querySelectorAll('figure')) {
612
+ const directChildren = Array.from(figure.children);
613
+ const codeChild = directChildren.find((child) => hasDirectPreDescendant(child));
614
+ if (!codeChild)
615
+ continue;
616
+ for (const child of directChildren) {
617
+ if (child === codeChild || child.tagName === 'FIGCAPTION')
618
+ continue;
619
+ if (isRedundantCodePreview(child, codeChild))
620
+ child.remove();
621
+ }
622
+ }
623
+ }
624
+ function isDemoInstructionBlock(element) {
625
+ if (element.querySelector('a[href],pre,code,table,ul,ol,blockquote,figure,h1,h2,h3,h4,h5,h6') !== null) {
626
+ return false;
627
+ }
628
+ const text = normalizeWhitespace(element.textContent || '');
629
+ if (text.length === 0 ||
630
+ text.length > INLINE_DEMO_INSTRUCTION_MAX_CHARS ||
631
+ /[.!?]$/.test(text)) {
632
+ return false;
633
+ }
634
+ return collectLeafTextSegments(element).length <= 3;
635
+ }
636
+ function pruneDemoInstructionBlocks(document) {
637
+ for (const container of document.querySelectorAll('div,section,article')) {
638
+ const children = Array.from(container.children);
639
+ const figureIndex = children.findIndex((child) => child.tagName === 'FIGURE' && child.querySelector('pre') !== null);
640
+ if (figureIndex <= 0)
641
+ continue;
642
+ for (let i = 0; i < figureIndex; i++) {
643
+ const child = children[i];
644
+ if (child && isDemoInstructionBlock(child))
645
+ child.remove();
646
+ }
647
+ }
648
+ }
649
+ function normalizeHighlightedCodeLines(document) {
650
+ for (const code of document.querySelectorAll('pre > code')) {
651
+ const directChildren = Array.from(code.children);
652
+ if (directChildren.length < 2)
653
+ continue;
654
+ const directSpans = directChildren.filter((child) => child.tagName === 'SPAN');
655
+ if (directSpans.length !== directChildren.length)
656
+ continue;
657
+ const hasLineClass = directSpans.some((child) => (child.getAttribute('class') ?? '').split(/\s+/).includes('line'));
658
+ const hasNewlineNode = Array.from(code.childNodes).some((node) => node.nodeType === 3 && /[\r\n]/.test(node.textContent ?? ''));
659
+ if (hasNewlineNode || !hasLineClass)
660
+ continue;
661
+ for (let i = 0; i < directSpans.length - 1; i++) {
662
+ const current = directSpans[i];
663
+ const next = current?.nextSibling;
664
+ if (next?.nodeType === 3 && (next.textContent ?? '').startsWith('\n')) {
665
+ continue;
666
+ }
667
+ current?.after(document.createTextNode('\n'));
668
+ }
669
+ }
670
+ }
671
+ function cleanCodeExamples(document) {
672
+ pruneFigurePreviewPanes(document);
673
+ pruneDemoInstructionBlocks(document);
674
+ normalizeHighlightedCodeLines(document);
675
+ }
509
676
  function separateAdjacentInlineElements(document) {
510
677
  const badges = document.querySelectorAll('span.chakra-badge, [data-scope="badge"], [class*="badge"]');
511
678
  for (const badge of badges) {
@@ -524,6 +691,7 @@ export function prepareDocumentForMarkdown(document, baseUrl, signal) {
524
691
  }
525
692
  stripNoise(document, context, signal);
526
693
  stripTabTriggers(document);
694
+ cleanCodeExamples(document);
527
695
  separateAdjacentInlineElements(document);
528
696
  flattenTableCellBreaks(document);
529
697
  escapeTableCellPipes(document);
@@ -552,6 +720,7 @@ function normalizeTableStructure(document) {
552
720
  }
553
721
  }
554
722
  }
723
+ hoistNestedRows(table);
555
724
  }
556
725
  }
557
726
  function flattenTableCellBreaks(document) {
@@ -1,4 +1,5 @@
1
1
  interface CleanupOptions {
2
+ preserveEmptyHeadings?: boolean;
2
3
  signal?: AbortSignal;
3
4
  url?: string;
4
5
  }
@@ -1 +1 @@
1
- {"version":3,"file":"md-cleanup.d.ts","sourceRoot":"","sources":["../../src/lib/md-cleanup.ts"],"names":[],"mappings":"AAmEA,UAAU,cAAc;IACtB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AA6VD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,MAAM,EACf,kBAAkB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAC3C,MAAM,CAuCR;AAWD,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,MAAM,CAUR"}
1
+ {"version":3,"file":"md-cleanup.d.ts","sourceRoot":"","sources":["../../src/lib/md-cleanup.ts"],"names":[],"mappings":"AAwEA,UAAU,cAAc;IACtB,qBAAqB,CAAC,EAAE,OAAO,CAAC;IAChC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AA6cD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAClC,OAAO,EAAE,MAAM,EACf,kBAAkB,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,GAC3C,MAAM,CAuCR;AAWD,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,MAAM,CAmBR"}
@@ -22,6 +22,7 @@ const REGEX = {
22
22
  HEADING_STRICT: /^#{1,6}\s+/m,
23
23
  EMPTY_HEADING_LINE: /^#{1,6}[ \t\u00A0]*$/,
24
24
  ANCHOR_ONLY_HEADING: /^#{1,6}\s+\[[^\]]+\]\(#[^)]+\)\s*$/,
25
+ HEADING_TRAILING_PERMALINK: /^(#{1,6}\s+.+?)\s*\[(?:#|¶|§|¤|🔗)\]\(#[^)]+\)\s*$/gmu,
25
26
  FENCE_START: FENCE_PATTERN,
26
27
  LIST_MARKER: /^(?:[-*+])\s/m,
27
28
  TOC_LINK: /^- \[[^\]]+\]\(#[^)]+\)\s*$/,
@@ -36,7 +37,10 @@ const REGEX = {
36
37
  HEADING_CODE_BLOCK: /(^#{1,6}\s+\w+)```/gm,
37
38
  SPACING_LINK_FIX: /\]\(([^)]+)\)\[/g,
38
39
  SPACING_ADJ_COMBINED: /(?:\]\([^)]+\)|`[^`]+`)(?=[A-Za-z0-9])/g,
40
+ SPACING_CODE_PAD_BEFORE: /(\S)[ \t]{2,}(?=`[^`\n]+`)/g,
41
+ SPACING_CODE_PAD_AFTER: /(`[^`\n]+`)[ \t]{2,}(?=\S)/g,
39
42
  SPACING_CODE_DASH: /(`[^`]+`)\s*\\-\s*/g,
43
+ SPACING_ESCAPED_DASH: /(?<=[\w)\]`])\s*\\-\s*(?=[A-Za-z0-9([])/g,
40
44
  SPACING_ESCAPES: /\\([[\].])/g,
41
45
  SPACING_LIST_NUM_COMBINED: /^((?![-*+] |\d+\. |[ \t]).+)\n((?:[-*+]|\d+\.) )/gm,
42
46
  PUNCT_ONLY_LIST_ARTIFACT: /^(?:[-*+]|\d+\.)\s*(?:\\[-*+|/]|[-*+|/])(?:\s+(?:\\[-*+|/]|[-*+|/]))*\s*$/gm,
@@ -72,6 +76,14 @@ function hasFollowingContent(lines, startIndex) {
72
76
  }
73
77
  return false;
74
78
  }
79
+ function findNextNonBlankLine(lines, startIndex) {
80
+ for (let i = startIndex + 1; i < Math.min(lines.length, startIndex + HAS_FOLLOWING_LOOKAHEAD); i++) {
81
+ const line = lines[i];
82
+ if (!isBlank(line))
83
+ return line?.trim();
84
+ }
85
+ return undefined;
86
+ }
75
87
  function stripAnchorOnlyHeading(line) {
76
88
  return line.replace(/^(#{1,6})\s+\[([^\]]+)\]\(#[^)]+\)\s*$/, '$1 $2');
77
89
  }
@@ -191,6 +203,11 @@ function tryPromoteOrphan(lines, i, trimmed) {
191
203
  const isSpecialPrefix = SPECIAL_PREFIXES.test(trimmed);
192
204
  if (!isSpecialPrefix && !hasFollowingContent(lines, i))
193
205
  return null;
206
+ if (!isSpecialPrefix) {
207
+ const nextLine = findNextNonBlankLine(lines, i);
208
+ if (nextLine && REGEX.HEADING_MARKER.test(nextLine))
209
+ return null;
210
+ }
194
211
  return `${prefix}${trimmed}`;
195
212
  }
196
213
  function shouldSkipAsToc(lines, i, trimmed, removeToc, options) {
@@ -205,13 +222,16 @@ function shouldSkipAsToc(lines, i, trimmed, removeToc, options) {
205
222
  throwIfAborted(options?.signal, options?.url ?? '', 'markdown:cleanup:toc');
206
223
  return skipTocLines(lines, i + 1);
207
224
  }
208
- function normalizePreprocessLine(lines, i, trimmed, line) {
225
+ function normalizePreprocessLine(lines, i, trimmed, line, options) {
209
226
  if (REGEX.EMPTY_HEADING_LINE.test(trimmed))
210
227
  return null;
211
228
  if (!REGEX.ANCHOR_ONLY_HEADING.test(trimmed))
212
229
  return line;
213
- if (!hasFollowingContent(lines, i))
214
- return null;
230
+ if (!hasFollowingContent(lines, i)) {
231
+ return options?.preserveEmptyHeadings
232
+ ? stripAnchorOnlyHeading(trimmed)
233
+ : null;
234
+ }
215
235
  return stripAnchorOnlyHeading(trimmed);
216
236
  }
217
237
  function maybeSkipTocBlock(lines, i, trimmed, options) {
@@ -235,7 +255,7 @@ function preprocessLines(lines, options) {
235
255
  if (currentLine === undefined)
236
256
  continue;
237
257
  const trimmed = currentLine.trim();
238
- const normalizedLine = normalizePreprocessLine(lines, i, trimmed, currentLine);
258
+ const normalizedLine = normalizePreprocessLine(lines, i, trimmed, currentLine, options);
239
259
  if (normalizedLine === null)
240
260
  continue;
241
261
  const tocSkip = maybeSkipTocBlock(lines, i, trimmed, options);
@@ -269,21 +289,91 @@ function removeSkipLinks(text) {
269
289
  function normalizeInlineCodeTokens(text) {
270
290
  return text.replace(/`([^`\n]+)`/g, (match, inner) => {
271
291
  const trimmed = inner.trim();
272
- if (trimmed === inner)
273
- return match;
274
292
  if (!/[A-Za-z0-9]/.test(trimmed))
275
293
  return match;
276
294
  const parts = /^(\s*)(.*?)(\s*)$/.exec(inner);
277
295
  if (!parts)
278
296
  return match;
279
- return `${parts[1] ?? ''}\`${parts[2] ?? ''}\`${parts[3] ?? ''}`;
297
+ const normalized = collapseQualifiedIdentifierSpacing(parts[2] ?? '');
298
+ if (trimmed === inner && normalized === inner)
299
+ return match;
300
+ return `${parts[1] ?? ''}\`${normalized}\`${parts[3] ?? ''}`;
280
301
  });
281
302
  }
303
+ function collapseQualifiedIdentifierSpacing(text) {
304
+ let result = text;
305
+ for (let i = 0; i < PROPERTY_FIX_MAX_PASSES; i++) {
306
+ const next = result.replace(/\b([A-Za-z_$][\w$]*)\.\s+(?=[A-Za-z_$<])/g, '$1.');
307
+ if (next === result)
308
+ break;
309
+ result = next;
310
+ }
311
+ return result;
312
+ }
313
+ function normalizeMarkdownLinkText(text) {
314
+ const normalized = collapseQualifiedIdentifierSpacing(text.replace(/\\`/g, '`').replace(/\\</g, '<').replace(/\\>/g, '>'));
315
+ return normalized.replace(/</g, '\\<').replace(/>/g, '\\>');
316
+ }
317
+ function normalizeMarkdownLinkLabels(text) {
318
+ return text.replace(/\[([^\]]+)\]\(([^)]+)\)/g, (_match, linkText, url) => `[${normalizeMarkdownLinkText(linkText)}](${url})`);
319
+ }
320
+ function collapseInlineCodePadding(text) {
321
+ return text
322
+ .replace(/(\S)[ \t]{2,}(?=`[^`\n]+`)/g, '$1 ')
323
+ .replace(/(`[^`\n]+`)[ \t]{2,}(?=\S)/g, '$1 ');
324
+ }
325
+ function escapeAngleBracketsInMarkdownTables(text) {
326
+ return text.replace(/^(?!\|\s*[-: ]+\|)(\|.*\|)\s*$/gm, (line) => line
327
+ .replace(/<\/([A-Za-z][A-Za-z0-9-]*)>/g, '\\</$1\\>')
328
+ .replace(/<([A-Za-z][A-Za-z0-9-]*)>/g, '\\<$1\\>'));
329
+ }
330
+ function stripTrailingHeadingPermalinks(text) {
331
+ return text
332
+ .replace(REGEX.HEADING_TRAILING_PERMALINK, '$1')
333
+ .replace(/^(#{1,6})\s{2,}/gm, '$1 ')
334
+ .replace(/^(#{1,6}\s+.*?)[ \t]+$/gm, '$1');
335
+ }
336
+ function getHeadingInfo(line) {
337
+ const match = /^(#{1,6})\s+/.exec(line.trim());
338
+ if (!match)
339
+ return null;
340
+ return { level: match[1]?.length ?? 0 };
341
+ }
342
+ function removeEmptyHeadingSections(text) {
343
+ const lines = text.split('\n');
344
+ const kept = [];
345
+ for (let i = 0; i < lines.length; i++) {
346
+ const line = lines[i] ?? '';
347
+ const heading = getHeadingInfo(line);
348
+ if (!heading) {
349
+ kept.push(line);
350
+ continue;
351
+ }
352
+ let nextIndex = i + 1;
353
+ while (nextIndex < lines.length && isBlank(lines[nextIndex])) {
354
+ nextIndex += 1;
355
+ }
356
+ const nextLine = lines[nextIndex];
357
+ if (nextLine === undefined) {
358
+ kept.push(line);
359
+ continue;
360
+ }
361
+ const nextHeading = getHeadingInfo(nextLine);
362
+ if (nextHeading && nextHeading.level <= heading.level) {
363
+ continue;
364
+ }
365
+ kept.push(line);
366
+ }
367
+ return kept.join('\n').replace(REGEX.DOUBLE_NEWLINE_REDUCER, '\n\n');
368
+ }
282
369
  function normalizeMarkdownSpacing(text) {
283
370
  let result = text
284
- .replace(REGEX.SPACING_LINK_FIX, ']($1)\n\n[')
371
+ .replace(REGEX.SPACING_LINK_FIX, ']($1) [')
285
372
  .replace(REGEX.SPACING_ADJ_COMBINED, '$& ')
373
+ .replace(REGEX.SPACING_CODE_PAD_BEFORE, '$1 ')
374
+ .replace(REGEX.SPACING_CODE_PAD_AFTER, '$1 ')
286
375
  .replace(REGEX.SPACING_CODE_DASH, '$1 - ')
376
+ .replace(REGEX.SPACING_ESCAPED_DASH, ' - ')
287
377
  .replace(REGEX.SPACING_ESCAPES, '$1')
288
378
  .replace(REGEX.SPACING_LIST_NUM_COMBINED, '$1\n\n$2')
289
379
  .replace(REGEX.PUNCT_ONLY_LIST_ARTIFACT, '')
@@ -292,9 +382,9 @@ function normalizeMarkdownSpacing(text) {
292
382
  result = result.replace(/([.!?:;])([A-Z])/g, '$1 $2');
293
383
  // Trim whitespace around token-like inline code spans.
294
384
  result = normalizeInlineCodeTokens(result);
295
- // Unescape backticks inside markdown link text
296
- result = result.replace(/\[([^\]]*\\`[^\]]*)\]\(([^)]+)\)/g, (_match, linkText, url) => `[${linkText.replace(/\\`/g, '`')}](${url})`);
297
- result = result.replace(/\[([^\]]*<[^\]]*)\]\(([^)]+)\)/g, (_match, linkText, url) => `[${linkText.replace(/</g, '\\<').replace(/>/g, '\\>')}](${url})`);
385
+ result = collapseInlineCodePadding(result);
386
+ result = normalizeMarkdownLinkLabels(result);
387
+ result = escapeAngleBracketsInMarkdownTables(result);
298
388
  return normalizeNestedListIndentation(result);
299
389
  }
300
390
  function fixConcatenatedProperties(text) {
@@ -325,7 +415,8 @@ function applyGlobalRegexes(text, options) {
325
415
  checkAbort('markdown:cleanup:spacing');
326
416
  result = normalizeMarkdownSpacing(result);
327
417
  checkAbort('markdown:cleanup:properties');
328
- return fixConcatenatedProperties(result);
418
+ result = fixConcatenatedProperties(result);
419
+ return stripTrailingHeadingPermalinks(result);
329
420
  }
330
421
  function normalizeNestedListIndentation(text) {
331
422
  return text.replace(REGEX.NESTED_LIST_INDENT, (match, spaces, marker) => {
@@ -386,6 +477,10 @@ export function cleanupMarkdownArtifacts(content, options) {
386
477
  if (!content)
387
478
  return '';
388
479
  throwIfAborted(options?.signal, options?.url ?? '', 'markdown:cleanup:begin');
389
- const result = processFencedContent(content, (text) => processTextBuffer(text.split('\n'), options)).trim();
480
+ let result = processFencedContent(content, (text) => processTextBuffer(text.split('\n'), options)).trim();
481
+ if (!options?.preserveEmptyHeadings) {
482
+ throwIfAborted(options?.signal, options?.url ?? '', 'markdown:cleanup:empty-headings');
483
+ result = removeEmptyHeadingSections(result);
484
+ }
390
485
  return stripLeadingBreadcrumbNoise(result);
391
486
  }
@@ -1 +1 @@
1
- {"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AAgDA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AAqCpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAqJD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AAwYD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AA8KD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,GACA,MAAM,CAsBR;AAuJD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAiED,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAuB3B;AAuCD,iBAAS,eAAe,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,GAAG,SAAS,CAc/D;AAED,iBAAS,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,GAAG,SAAS,CAYlE;AA6CD,iBAAS,yBAAyB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAcvD;AAED,eAAO,MAAM,mBAAmB;;;;CAItB,CAAC;AA2vBX,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAqCzB;AAaD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAkH1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC"}
1
+ {"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AAgDA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AAqCpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AAqJD,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AAwYD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAgLD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,GACA,MAAM,CAsBR;AAuJD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AAiED,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAuB3B;AA4DD,iBAAS,eAAe,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,GAAG,SAAS,CAc/D;AAED,iBAAS,kBAAkB,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,GAAG,SAAS,CAyBlE;AA6CD,iBAAS,yBAAyB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAcvD;AAED,eAAO,MAAM,mBAAmB;;;;CAItB,CAAC;AA8xBX,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CAqCzB;AAaD,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAkH1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC"}
@@ -553,7 +553,9 @@ function translateHtmlToMarkdown(params) {
553
553
  throwIfAborted(signal, url, 'markdown:cleaned');
554
554
  const content = stageTracker.run(url, 'markdown:translate', () => translateHtmlFragmentToMarkdown(cleanedHtml));
555
555
  throwIfAborted(signal, url, 'markdown:translated');
556
- const cleaned = cleanupMarkdownArtifacts(content, signal ? { signal, url } : { url });
556
+ const cleaned = cleanupMarkdownArtifacts(content, signal
557
+ ? { preserveEmptyHeadings: true, signal, url }
558
+ : { preserveEmptyHeadings: true, url });
557
559
  return url ? resolveRelativeUrls(cleaned, url, signal) : cleaned;
558
560
  }
559
561
  function appendMetadataFooter(content, metadata, url) {
@@ -815,6 +817,22 @@ const PRIMARY_HEADING_ROOT_SELECTORS = [
815
817
  '.entry-content',
816
818
  '[itemprop="text"]',
817
819
  ];
820
+ function normalizeSyntheticTitleToken(value) {
821
+ return (value ?? '').replace(/\s+/g, ' ').trim().toLowerCase();
822
+ }
823
+ function shouldPreferPrimaryHeadingTitle(primaryHeading, title) {
824
+ const primary = normalizeSyntheticTitleToken(primaryHeading);
825
+ if (!primary)
826
+ return false;
827
+ const normalizedTitle = normalizeSyntheticTitleToken(title);
828
+ if (!normalizedTitle)
829
+ return true;
830
+ if (normalizedTitle === primary)
831
+ return true;
832
+ return normalizedTitle
833
+ .split(/\s*(?:[-|:•·]|–|—)\s*/u)
834
+ .some((part) => part === primary);
835
+ }
818
836
  function findContentRoot(document) {
819
837
  for (const selector of CONTENT_ROOT_SELECTORS) {
820
838
  const element = document.querySelector(selector);
@@ -829,17 +847,31 @@ function findContentRoot(document) {
829
847
  return undefined;
830
848
  }
831
849
  function findPrimaryHeading(document) {
832
- for (const selector of PRIMARY_HEADING_ROOT_SELECTORS) {
833
- const root = document.querySelector(selector);
834
- if (!root)
835
- continue;
836
- const heading = root.querySelector('h1, h2');
850
+ for (const headingSelector of ['[data-title="true"]', 'h1']) {
851
+ const heading = document.querySelector(headingSelector);
837
852
  if (!heading)
838
853
  continue;
839
854
  const text = heading.textContent.trim();
840
855
  if (text)
841
856
  return text;
842
857
  }
858
+ for (const selector of PRIMARY_HEADING_ROOT_SELECTORS) {
859
+ const root = document.querySelector(selector);
860
+ if (!root)
861
+ continue;
862
+ for (const headingSelector of [
863
+ '[data-title="true"]',
864
+ 'h1',
865
+ 'h2',
866
+ ]) {
867
+ const heading = root.querySelector(headingSelector);
868
+ if (!heading)
869
+ continue;
870
+ const text = heading.textContent.trim();
871
+ if (text)
872
+ return text;
873
+ }
874
+ }
843
875
  return undefined;
844
876
  }
845
877
  function countMatchingElements(root, selector) {
@@ -971,39 +1003,56 @@ function shouldUseArticleContent(article, document) {
971
1003
  function buildContentSource(params) {
972
1004
  const { html, url, article, extractedMeta, includeMetadata, useArticleContent, document, truncated, signal, } = params;
973
1005
  const metadata = createContentMetadataBlock(url, article, extractedMeta, useArticleContent, includeMetadata);
1006
+ const preparedDocument = document;
1007
+ let primaryHeading = document
1008
+ ? TransformHeuristics.findPrimaryHeading(document)
1009
+ : undefined;
1010
+ if (preparedDocument) {
1011
+ prepareDocumentForMarkdown(preparedDocument, url, signal);
1012
+ primaryHeading =
1013
+ TransformHeuristics.findPrimaryHeading(preparedDocument) ??
1014
+ primaryHeading;
1015
+ }
974
1016
  const base = {
975
1017
  favicon: extractedMeta.favicon,
976
1018
  metadata,
977
1019
  extractedMetadata: extractedMeta,
978
1020
  truncated,
979
- primaryHeading: document
980
- ? TransformHeuristics.findPrimaryHeading(document)
981
- : undefined,
1021
+ primaryHeading,
982
1022
  originalHtml: html,
983
1023
  };
984
1024
  if (useArticleContent && article) {
985
1025
  const { document: articleDoc } = parseHTML(`<!DOCTYPE html><html><body>${article.content}</body></html>`);
986
1026
  prepareDocumentForMarkdown(articleDoc, url, signal);
987
- const preferPrimaryHeading = TransformHeuristics.isGithubRepositoryRootUrl(url);
1027
+ const articleTitle = article.title !== undefined
1028
+ ? normalizeDocumentTitle(article.title, url)
1029
+ : extractedMeta.title;
1030
+ const preferPrimaryHeading = TransformHeuristics.isGithubRepositoryRootUrl(url) ||
1031
+ shouldPreferPrimaryHeadingTitle(base.primaryHeading, articleTitle);
1032
+ const resolvedTitle = (preferPrimaryHeading ? base.primaryHeading : undefined) ?? articleTitle;
988
1033
  return {
989
1034
  ...base,
990
1035
  sourceHtml: articleDoc.body.innerHTML,
991
- title: (preferPrimaryHeading ? base.primaryHeading : undefined) ??
992
- (article.title !== undefined
993
- ? normalizeDocumentTitle(article.title, url)
994
- : undefined),
1036
+ title: resolvedTitle,
1037
+ suppressSyntheticFavicon: normalizeSyntheticTitleToken(resolvedTitle) ===
1038
+ normalizeSyntheticTitleToken(base.primaryHeading),
995
1039
  skipNoiseRemoval: true,
996
1040
  };
997
1041
  }
998
1042
  if (document) {
999
- prepareDocumentForMarkdown(document, url, signal);
1000
- const contentRoot = TransformHeuristics.findContentRoot(document);
1043
+ const resolvedDocument = preparedDocument ?? document;
1044
+ const contentRoot = TransformHeuristics.findContentRoot(resolvedDocument);
1045
+ const preferPrimaryHeading = shouldPreferPrimaryHeadingTitle(base.primaryHeading, extractedMeta.title);
1046
+ const resolvedTitle = (preferPrimaryHeading ? base.primaryHeading : undefined) ??
1047
+ extractedMeta.title;
1001
1048
  return {
1002
1049
  ...base,
1003
- sourceHtml: contentRoot ?? serializeDocumentForMarkdown(document, html),
1004
- title: extractedMeta.title,
1050
+ sourceHtml: contentRoot ?? serializeDocumentForMarkdown(resolvedDocument, html),
1051
+ title: resolvedTitle,
1052
+ suppressSyntheticFavicon: normalizeSyntheticTitleToken(resolvedTitle) ===
1053
+ normalizeSyntheticTitleToken(base.primaryHeading),
1005
1054
  skipNoiseRemoval: true,
1006
- document,
1055
+ document: resolvedDocument,
1007
1056
  };
1008
1057
  }
1009
1058
  return {
@@ -1326,8 +1375,8 @@ function maybeStripGithubPrimaryHeading(markdown, context, url) {
1326
1375
  return markdown;
1327
1376
  return stripLeadingHeading(markdown, context.primaryHeading ?? '');
1328
1377
  }
1329
- function buildSyntheticTitlePrefix(url, favicon) {
1330
- if (!favicon)
1378
+ function buildSyntheticTitlePrefix(url, favicon, suppressFavicon) {
1379
+ if (!favicon || suppressFavicon)
1331
1380
  return ' ';
1332
1381
  let alt = '';
1333
1382
  try {
@@ -1342,7 +1391,7 @@ function maybePrependSyntheticTitle(markdown, context, url) {
1342
1391
  if (!context.title || /^(#{1,6})\s/.test(markdown.trimStart())) {
1343
1392
  return markdown;
1344
1393
  }
1345
- return `#${buildSyntheticTitlePrefix(url, context.favicon)}${context.title}\n\n${markdown}`;
1394
+ return `#${buildSyntheticTitlePrefix(url, context.favicon, context.suppressSyntheticFavicon)}${context.title}\n\n${markdown}`;
1346
1395
  }
1347
1396
  function buildMarkdownFromContext(context, url, signal) {
1348
1397
  let content = stageTracker.run(url, 'transform:markdown', () => htmlToMarkdown(context.sourceHtml, context.metadata, {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/fetch-url-mcp",
3
- "version": "1.9.4",
3
+ "version": "1.9.5",
4
4
  "mcpName": "io.github.j0hanz/fetch-url-mcp",
5
5
  "description": "A web content fetcher MCP server that converts HTML to clean, AI and human readable markdown.",
6
6
  "type": "module",
@@ -75,7 +75,7 @@
75
75
  "linkedom": "^0.18.12",
76
76
  "node-html-markdown": "^2.0.0",
77
77
  "ts-morph": "^27.0.2",
78
- "undici": "^7.24.1",
78
+ "undici": "^7.24.3",
79
79
  "zod": "^4.3.6"
80
80
  },
81
81
  "devDependencies": {