@j0hanz/fetch-url-mcp 1.8.4 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"content.d.ts","sourceRoot":"","sources":["../../src/lib/content.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAghB3D,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CAQR;AAWD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAYN;AACD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR;AAkVD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CA6BvE;AA+CD,UAAU,cAAc;IACtB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AA4QD,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,MAAM,CA6DR;AA2GD,wBAAgB,2BAA2B,CACzC,OAAO,EAAE,MAAM,GACd,MAAM,GAAG,SAAS,CAOpB;AACD,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAmCxE;AAcD,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAczD;AAaD,wBAAgB,mBAAmB,CACjC,QAAQ,CAAC,EAAE,aAAa,EACxB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,CAmBR"}
1
+ {"version":3,"file":"content.d.ts","sourceRoot":"","sources":["../../src/lib/content.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,KAAK,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAiiB3D,wBAAgB,4BAA4B,CAC1C,QAAQ,EAAE,QAAQ,EAClB,QAAQ,EAAE,MAAM,GACf,MAAM,CAQR;AAuCD,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,IAAI,CAiBN;AA0BD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,QAAQ,EACnB,OAAO,CAAC,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,WAAW,GACnB,MAAM,CAcR;AAkVD,wBAAgB,6BAA6B,CAC3C,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,GACf,MAAM,GAAG,SAAS,CAKpB;AACD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,CA6BvE;AA+CD,UAAU,cAAc;IACtB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAyRD,wBAAgB,wBAAwB,CACtC,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,cAAc,GACvB,MAAM,CA6DR;AA2GD,wBAAgB,2BAA2B,CACzC,OAAO,EAAE,MAAM,GACd,MAAM,GAAG,SAAS,CAOpB;AACD,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAmCxE;AAcD,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAczD;AAaD,wBAAgB,mBAAmB,CACjC,QAAQ,CAAC,EAAE,aAAa,EACxB,WAAW,CAAC,EAAE,MAAM,GACnB,MAAM,CAmBR"}
@@ -6,6 +6,7 @@ const NOISE_SCAN_LIMIT = 50_000;
6
6
  const MIN_BODY_CONTENT_LENGTH = 100;
7
7
  const DIALOG_MIN_CHARS_FOR_PRESERVATION = 500;
8
8
  const NAV_FOOTER_MIN_CHARS_FOR_PRESERVATION = 500;
9
+ const ABORT_CHECK_INTERVAL = 500;
9
10
  const HTML_DOCUMENT_MARKERS = /<\s*(?:!doctype|html|head|body)\b/i;
10
11
  const HTML_FRAGMENT_MARKERS = /<\s*(?:article|main|section|div|nav|footer|header|aside|table|ul|ol)\b/i;
11
12
  const NOISE_PATTERNS = [
@@ -30,6 +31,7 @@ const BASE_STRUCTURAL_TAGS = new Set([
30
31
  'style',
31
32
  'noscript',
32
33
  'iframe',
34
+ 'template',
33
35
  'form',
34
36
  'button',
35
37
  'input',
@@ -76,6 +78,10 @@ const PROMO_TOKENS_ALWAYS = [
76
78
  'pagination',
77
79
  'pager',
78
80
  'taglist',
81
+ 'twitter-tweet',
82
+ 'fb-post',
83
+ 'instagram-media',
84
+ 'social-embed',
79
85
  ];
80
86
  const PROMO_TOKENS_AGGRESSIVE = ['ad', 'related', 'comment'];
81
87
  const PROMO_TOKENS_BY_CATEGORY = {
@@ -86,7 +92,7 @@ const PROMO_TOKENS_BY_CATEGORY = {
86
92
  const BASE_NOISE_SELECTORS = {
87
93
  navFooter: 'nav,footer,header[class*="site"],header[class*="nav"],header[class*="menu"],[role="banner"],[role="navigation"]',
88
94
  cookieBanners: '[role="dialog"]',
89
- hidden: '[style*="display: none"],[style*="display:none"],[hidden],[aria-hidden="true"]',
95
+ hidden: '[style*="display: none"],[style*="display:none"],[style*="visibility: hidden"],[style*="visibility:hidden"],[hidden],[aria-hidden="true"]',
90
96
  };
91
97
  const NO_MATCH_REGEX = /a^/i;
92
98
  let cachedContext;
@@ -188,8 +194,9 @@ function getContext() {
188
194
  function isInteractive(element, role) {
189
195
  if (role && INTERACTIVE_CONTENT_ROLES.has(role))
190
196
  return true;
197
+ const tag = element.tagName.toLowerCase();
191
198
  const ds = element.getAttribute('data-state');
192
- if (ds === 'inactive' || ds === 'closed')
199
+ if ((ds === 'inactive' || ds === 'closed') && !BASE_STRUCTURAL_TAGS.has(tag))
193
200
  return true;
194
201
  const dataOrientation = element.getAttribute('data-orientation');
195
202
  if (dataOrientation === 'horizontal' || dataOrientation === 'vertical')
@@ -209,6 +216,19 @@ function isWithinPrimaryContent(element) {
209
216
  }
210
217
  return false;
211
218
  }
219
+ const ASIDE_NAV_LINK_DENSITY_THRESHOLD = 0.5;
220
+ const ASIDE_NAV_MIN_LINKS = 10;
221
+ function isNavigationAside(element) {
222
+ if (element.querySelector('nav'))
223
+ return true;
224
+ const links = element.querySelectorAll('a[href]');
225
+ if (links.length < ASIDE_NAV_MIN_LINKS)
226
+ return false;
227
+ const textLen = (element.textContent || '').trim().length;
228
+ if (textLen === 0)
229
+ return true;
230
+ return links.length / (textLen / 100) >= ASIDE_NAV_LINK_DENSITY_THRESHOLD;
231
+ }
212
232
  function shouldPreserve(element, tagName) {
213
233
  // Check Dialog
214
234
  const role = element.getAttribute('role');
@@ -227,6 +247,12 @@ function shouldPreserve(element, tagName) {
227
247
  return ((element.textContent || '').trim().length >=
228
248
  NAV_FOOTER_MIN_CHARS_FOR_PRESERVATION);
229
249
  }
250
+ // Check Aside — preserve only if it looks like article content, not navigation
251
+ if (tagName === 'aside') {
252
+ if (!isWithinPrimaryContent(element))
253
+ return false;
254
+ return !isNavigationAside(element);
255
+ }
230
256
  return false;
231
257
  }
232
258
  function removeNodes(nodes) {
@@ -237,20 +263,24 @@ function removeNodes(nodes) {
237
263
  }
238
264
  }
239
265
  }
240
- function scoreNavFooter(tagName, role, className, id, weights) {
266
+ function scoreNavFooter(meta, weights) {
241
267
  let score = 0;
242
- if (ALWAYS_NOISE_TAGS.has(tagName))
268
+ if (ALWAYS_NOISE_TAGS.has(meta.tagName))
243
269
  score += weights.structural;
244
270
  // Header Boilerplate
245
- if (tagName === 'header') {
246
- if ((role && NAVIGATION_ROLES.has(role)) ||
247
- HEADER_NOISE_PATTERN.test(`${className} ${id}`)) {
271
+ if (meta.tagName === 'header') {
272
+ if ((meta.role && NAVIGATION_ROLES.has(meta.role)) ||
273
+ HEADER_NOISE_PATTERN.test(`${meta.className} ${meta.id}`)) {
248
274
  score += weights.structural;
249
275
  }
250
276
  }
277
+ // Aside (sidebar/complementary) — noise unless inside primary content
278
+ if (meta.tagName === 'aside') {
279
+ score += weights.structural;
280
+ }
251
281
  // Role Noise
252
- if (role && NAVIGATION_ROLES.has(role)) {
253
- if (tagName !== 'aside' || role !== 'complementary') {
282
+ if (meta.role && NAVIGATION_ROLES.has(meta.role)) {
283
+ if (meta.tagName !== 'aside' || meta.role !== 'complementary') {
254
284
  score += weights.structural;
255
285
  }
256
286
  }
@@ -287,7 +317,7 @@ function isNoiseElement(element, context) {
287
317
  }
288
318
  // Nav/Footer Scoring
289
319
  if (context.flags.navFooter) {
290
- score += scoreNavFooter(meta.tagName, meta.role, meta.className, meta.id, weights);
320
+ score += scoreNavFooter(meta, weights);
291
321
  }
292
322
  // Hidden
293
323
  if (meta.isHidden && !meta.isInteractive) {
@@ -375,7 +405,7 @@ function stripNoise(document, context, signal) {
375
405
  // Candidates
376
406
  const candidates = document.querySelectorAll(context.candidateSelector);
377
407
  for (let i = candidates.length - 1; i >= 0; i--) {
378
- if (i % 500 === 0 && signal?.aborted) {
408
+ if (i % ABORT_CHECK_INTERVAL === 0 && signal?.aborted) {
379
409
  throw new Error('Noise removal aborted');
380
410
  }
381
411
  const node = candidates[i];
@@ -463,6 +493,29 @@ function mayContainNoise(html) {
463
493
  : `${html.substring(0, NOISE_SCAN_LIMIT)}\n${html.substring(html.length - NOISE_SCAN_LIMIT)}`;
464
494
  return NOISE_PATTERNS.some((re) => re.test(sample));
465
495
  }
496
+ function stripTabTriggers(document) {
497
+ const tabs = document.querySelectorAll('button[role="tab"]');
498
+ for (let i = tabs.length - 1; i >= 0; i--) {
499
+ tabs[i]?.remove();
500
+ }
501
+ }
502
+ function escapeTableCellPipes(document) {
503
+ const codes = document.querySelectorAll('td code, th code');
504
+ for (const code of codes) {
505
+ if (code.textContent.includes('|')) {
506
+ code.textContent = code.textContent.replace(/\|/g, '\\|');
507
+ }
508
+ }
509
+ }
510
+ function separateAdjacentInlineElements(document) {
511
+ const badges = document.querySelectorAll('span.chakra-badge, [data-scope="badge"], [class*="badge"]');
512
+ for (const badge of badges) {
513
+ const next = badge.nextSibling;
514
+ if (next?.nodeType === 1) {
515
+ badge.after(document.createTextNode(' '));
516
+ }
517
+ }
518
+ }
466
519
  export function prepareDocumentForMarkdown(document, baseUrl, signal) {
467
520
  const context = getContext();
468
521
  if (config.noiseRemoval.debug) {
@@ -471,9 +524,37 @@ export function prepareDocumentForMarkdown(document, baseUrl, signal) {
471
524
  });
472
525
  }
473
526
  stripNoise(document, context, signal);
527
+ stripTabTriggers(document);
528
+ separateAdjacentInlineElements(document);
529
+ flattenTableCellBreaks(document);
530
+ escapeTableCellPipes(document);
531
+ normalizeTableStructure(document);
474
532
  if (baseUrl)
475
533
  resolveUrls(document, baseUrl);
476
534
  }
535
+ // Some sites put tbody/thead/tfoot inside td/th, which breaks markdown tables.
536
+ function normalizeTableStructure(document) {
537
+ for (const table of document.querySelectorAll('table')) {
538
+ for (const cell of table.querySelectorAll('th, td')) {
539
+ for (const tag of ['tbody', 'thead', 'tfoot']) {
540
+ let nested = cell.querySelector(tag);
541
+ while (nested) {
542
+ table.appendChild(nested);
543
+ nested = cell.querySelector(tag);
544
+ }
545
+ }
546
+ }
547
+ }
548
+ }
549
+ function flattenTableCellBreaks(document) {
550
+ const cells = document.querySelectorAll('td, th');
551
+ for (const cell of cells) {
552
+ const brs = cell.querySelectorAll('br');
553
+ for (const br of brs) {
554
+ br.replaceWith(' ');
555
+ }
556
+ }
557
+ }
477
558
  export function removeNoiseFromHtml(html, document, baseUrl, signal) {
478
559
  const shouldParse = isFullDocumentHtml(html) ||
479
560
  mayContainNoise(html) ||
@@ -1070,6 +1151,8 @@ function processTextBuffer(lines, options) {
1070
1151
  function applyGlobalRegexes(text, options) {
1071
1152
  let result = text;
1072
1153
  const checkAbort = createAbortChecker(options);
1154
+ // Normalize non-breaking spaces to regular spaces
1155
+ result = result.replace(/\u00A0/g, ' ');
1073
1156
  checkAbort('markdown:cleanup:headings');
1074
1157
  // fixAndSpaceHeadings
1075
1158
  result = result
@@ -1099,6 +1182,10 @@ function applyGlobalRegexes(text, options) {
1099
1182
  .replace(REGEX.SPACING_LIST_NUM_COMBINED, '$1\n\n$2')
1100
1183
  .replace(REGEX.PUNCT_ONLY_LIST_ARTIFACT, '')
1101
1184
  .replace(REGEX.DOUBLE_NEWLINE_REDUCER, '\n\n');
1185
+ // Trim leading whitespace inside inline code spans
1186
+ result = result.replace(/(?<=\s|^)`\s+([^`]+)`/gm, '`$1`');
1187
+ // Unescape backticks inside markdown link text
1188
+ result = result.replace(/\[([^\]]*\\`[^\]]*)\]\(([^)]+)\)/g, (_match, linkText, url) => `[${linkText.replace(/\\`/g, '`')}](${url})`);
1102
1189
  result = normalizeNestedListIndentation(result);
1103
1190
  checkAbort('markdown:cleanup:properties');
1104
1191
  // fixProperties
@@ -45,14 +45,13 @@ export declare const markdownTransform: (input: {
45
45
  buffer: Uint8Array;
46
46
  encoding: string;
47
47
  truncated?: boolean;
48
- }, url: string, signal?: AbortSignal, skipNoiseRemoval?: boolean) => Promise<MarkdownPipelineResult>;
48
+ }, url: string, signal?: AbortSignal) => Promise<MarkdownPipelineResult>;
49
49
  export declare function serializeMarkdownResult(result: MarkdownPipelineResult): string;
50
50
  interface SharedFetchOptions {
51
51
  readonly url: string;
52
52
  readonly signal?: AbortSignal;
53
53
  readonly cacheVary?: Record<string, unknown> | string;
54
54
  readonly forceRefresh?: boolean;
55
- readonly maxInlineChars?: number;
56
55
  readonly onStage?: (stage: SharedFetchStage) => void;
57
56
  readonly transform: (input: {
58
57
  buffer: Uint8Array;
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../src/lib/fetch-pipeline.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,KAAK,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAqBrE,KAAK,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAY1C,wBAAgB,gBAAgB,CAC9B,GAAG,EAAE,OAAO,EACZ,IAAI,EAAE,SAAS,MAAM,EAAE,GACtB,UAAU,GAAG,SAAS,CAOxB;AACD,wBAAgB,UAAU,CACxB,MAAM,CAAC,EAAE,WAAW,GACnB;IAAE,MAAM,EAAE,WAAW,CAAA;CAAE,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAEjD;AAMD,eAAO,MAAM,iBAAiB,mBAAmB,CAAC;AAClD,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAuED,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,GACb,MAAM,CAkBR;AAoDD,UAAU,oBAAoB,CAAC,CAAC;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAC5C,SAAS,EAAE,CACT,KAAK,EAAE;QAAE,MAAM,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,EACpE,GAAG,EAAE,MAAM,KACR,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IACpB,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,CAAC;IAClC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,CAAC,GAAG,SAAS,CAAC;CACjD;AACD,MAAM,WAAW,cAAc,CAAC,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC;IACR,SAAS,EAAE,OAAO,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AACD,MAAM,MAAM,gBAAgB,GACxB,aAAa,GACb,aAAa,GACb,WAAW,GACX,eAAe,GACf,cAAc,GACd,gBAAgB,GAChB,iBAAiB,GACjB,gBAAgB,GAChB,iBAAiB,CAAC;AAmMtB,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAmE5B;AAMD,MAAM,MAAM,sBAAsB,GAAG,uBAAuB,GAAG;IAC7D,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;AACF,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,MAAM,GACb,sBAAsB,GAAG,SAAS,CAqBpC;AACD,eAAO,MAAM,iBAAiB,GAC5B,OAAO;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,EACpE,KAAK,MAAM,EACX,SAAS,WAAW,EACpB,mBAAmB,OAAO,KACzB,OAAO,CAAC,sBAAsB,CAUhC,CAAC;AACF,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,sBAAsB,GAC7B,MAAM,CAaR;AAMD,UAAU,kBAAkB;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,MAAM,CAAC,EAAE,WAAW,CAAC;IAC9B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC;IACtD,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IACrD,QAAQ,CAAC,SAAS,EAAE,CAClB,KAAK,EAAE;QAAE,MAAM,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,EACpE,aAAa,EAAE,MAAM,KAClB,sBAAsB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;IAC9D,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,sBAAsB,KAAK,MAAM,CAAC;IAChE,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,sBAAsB,GAAG,SAAS,CAAC;CAC/E;AACD,UAAU,eAAe;IACvB,QAAQ,CAAC,oBAAoB,CAAC,EAAE,OAAO,oBAAoB,CAAC;CAC7D;AAgBD,wBAAsB,kBAAkB,CACtC,OAAO,EAAE,kBAAkB,EAC3B,IAAI,GAAE,eAAoB,GACzB,OAAO,CAAC;IACT,QAAQ,EAAE,cAAc,CAAC,sBAAsB,CAAC,CAAC;IACjD,YAAY,EAAE,mBAAmB,CAAC;CACnC,CAAC,CAaD"}
1
+ {"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../src/lib/fetch-pipeline.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,KAAK,uBAAuB,EAAE,MAAM,uBAAuB,CAAC;AAqBrE,KAAK,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAY1C,wBAAgB,gBAAgB,CAC9B,GAAG,EAAE,OAAO,EACZ,IAAI,EAAE,SAAS,MAAM,EAAE,GACtB,UAAU,GAAG,SAAS,CAOxB;AACD,wBAAgB,UAAU,CACxB,MAAM,CAAC,EAAE,WAAW,GACnB;IAAE,MAAM,EAAE,WAAW,CAAA;CAAE,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAEjD;AAMD,eAAO,MAAM,iBAAiB,mBAAmB,CAAC;AAClD,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAuED,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,GACb,MAAM,CAkBR;AAsCD,UAAU,oBAAoB,CAAC,CAAC;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC;IAC7C,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IAC5C,SAAS,EAAE,CACT,KAAK,EAAE;QAAE,MAAM,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,EACpE,GAAG,EAAE,MAAM,KACR,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IACpB,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,MAAM,CAAC;IAClC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,CAAC,GAAG,SAAS,CAAC;CACjD;AACD,MAAM,WAAW,cAAc,CAAC,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC;IACR,SAAS,EAAE,OAAO,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CAC1B;AACD,MAAM,MAAM,gBAAgB,GACxB,aAAa,GACb,aAAa,GACb,WAAW,GACX,eAAe,GACf,cAAc,GACd,gBAAgB,GAChB,iBAAiB,GACjB,gBAAgB,GAChB,iBAAiB,CAAC;AAmMtB,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAmE5B;AAMD,MAAM,MAAM,sBAAsB,GAAG,uBAAuB,GAAG;IAC7D,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;CAC1B,CAAC;AACF,wBAAgB,yBAAyB,CACvC,MAAM,EAAE,MAAM,GACb,sBAAsB,GAAG,SAAS,CAqBpC;AACD,eAAO,MAAM,iBAAiB,GAC5B,OAAO;IAAE,MAAM,EAAE,UAAU,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,EACpE,KAAK,MAAM,EACX,SAAS,WAAW,KACnB,OAAO,CAAC,sBAAsB,CAShC,CAAC;AACF,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,sBAAsB,GAC7B,MAAM,CAaR;AAMD,UAAU,kBAAkB;IAC1B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,MAAM,CAAC,EAAE,WAAW,CAAC;IAC9B,QAAQ,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC;IACtD,QAAQ,CAAC,YAAY,CAAC,EAAE,OAAO,CAAC;IAChC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,KAAK,IAAI,CAAC;IACrD,QAAQ,CAAC,SAAS,EAAE,CAClB,KAAK,EAAE;QAAE,MAAM,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAC;QAAC,SAAS,CAAC,EAAE,OAAO,CAAA;KAAE,EACpE,aAAa,EAAE,MAAM,KAClB,sBAAsB,GAAG,OAAO,CAAC,sBAAsB,CAAC,CAAC;IAC9D,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC,MAAM,EAAE,sBAAsB,KAAK,MAAM,CAAC;IAChE,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,sBAAsB,GAAG,SAAS,CAAC;CAC/E;AACD,UAAU,eAAe;IACvB,QAAQ,CAAC,oBAAoB,CAAC,EAAE,OAAO,oBAAoB,CAAC;CAC7D;AAgBD,wBAAsB,kBAAkB,CACtC,OAAO,EAAE,kBAAkB,EAC3B,IAAI,GAAE,eAAoB,GACzB,OAAO,CAAC;IACT,QAAQ,EAAE,cAAc,CAAC,sBAAsB,CAAC,CAAC;IACjD,YAAY,EAAE,mBAAmB,CAAC;CACnC,CAAC,CAUD"}
@@ -107,9 +107,9 @@ export function appendTruncationMarker(content, marker) {
107
107
  return `${contentWithFence}${marker}`;
108
108
  }
109
109
  class InlineContentLimiter {
110
- apply(content, inlineLimitOverride) {
110
+ apply(content) {
111
111
  const contentSize = content.length;
112
- const inlineLimit = this.resolveInlineLimit(inlineLimitOverride);
112
+ const inlineLimit = config.constants.maxInlineContentChars;
113
113
  if (isWithinInlineLimit(contentSize, inlineLimit)) {
114
114
  return { content, contentSize };
115
115
  }
@@ -120,22 +120,13 @@ class InlineContentLimiter {
120
120
  truncated: true,
121
121
  };
122
122
  }
123
- resolveInlineLimit(inlineLimitOverride) {
124
- const globalLimit = config.constants.maxInlineContentChars;
125
- if (inlineLimitOverride === undefined)
126
- return globalLimit;
127
- if (globalLimit > 0 && inlineLimitOverride > 0) {
128
- return Math.min(inlineLimitOverride, globalLimit);
129
- }
130
- return inlineLimitOverride;
131
- }
132
123
  }
133
124
  function isWithinInlineLimit(contentSize, inlineLimit) {
134
125
  return inlineLimit <= 0 || contentSize <= inlineLimit;
135
126
  }
136
127
  const inlineLimiter = new InlineContentLimiter();
137
- function applyInlineContentLimit(content, inlineLimitOverride) {
138
- return inlineLimiter.apply(content, inlineLimitOverride);
128
+ function applyInlineContentLimit(content) {
129
+ return inlineLimiter.apply(content);
139
130
  }
140
131
  function resolveNormalizedUrl(url) {
141
132
  const { normalizedUrl: validatedUrl } = normalizeUrl(url);
@@ -341,12 +332,11 @@ export function parseCachedMarkdownResult(cached) {
341
332
  truncated,
342
333
  };
343
334
  }
344
- export const markdownTransform = async (input, url, signal, skipNoiseRemoval) => {
335
+ export const markdownTransform = async (input, url, signal) => {
345
336
  const result = await transformBufferToMarkdown(input.buffer, url, {
346
337
  includeMetadata: true,
347
338
  encoding: input.encoding,
348
339
  ...withSignal(signal),
349
- ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
350
340
  ...(input.truncated ? { inputTruncated: true } : {}),
351
341
  });
352
342
  const truncated = Boolean(result.truncated || input.truncated);
@@ -382,6 +372,6 @@ export async function performSharedFetch(options, deps = {}) {
382
372
  const pipeline = await executePipeline(buildSharedFetchPipelineOptions(options));
383
373
  options.onStage?.('prepare_output');
384
374
  options.onStage?.('finalize_output');
385
- const inlineResult = applyInlineContentLimit(pipeline.data.content, options.maxInlineChars);
375
+ const inlineResult = applyInlineContentLimit(pipeline.data.content);
386
376
  return { pipeline, inlineResult };
387
377
  }
@@ -1 +1 @@
1
- {"version":3,"file":"instructions.d.ts","sourceRoot":"","sources":["../../src/resources/instructions.ts"],"names":[],"mappings":"AAIA,wBAAgB,uBAAuB,IAAI,MAAM,CA2ChD"}
1
+ {"version":3,"file":"instructions.d.ts","sourceRoot":"","sources":["../../src/resources/instructions.ts"],"names":[],"mappings":"AAIA,wBAAgB,uBAAuB,IAAI,MAAM,CA0ChD"}
@@ -18,8 +18,7 @@ export function buildServerInstructions() {
18
18
  <workflows>
19
19
  1. Standard: Call \`${FETCH_URL_TOOL_NAME}\` -> Read \`markdown\`. If \`truncated: true\`, retry with \`forceRefresh: true\`.
20
20
  2. Fresh: Set \`forceRefresh: true\` to bypass cache.
21
- 3. Full-Fidelity: Set \`skipNoiseRemoval: true\` to preserve nav/footers.
22
- 4. Async: Add \`task: { ttl: <ms> }\` to \`tools/call\` -> Poll \`tasks/get\` -> Call \`tasks/result\`.
21
+ 3. Async: Add \`task: { ttl: <ms> }\` to \`tools/call\` -> Poll \`tasks/get\` -> Call \`tasks/result\`.
23
22
  </workflows>
24
23
 
25
24
  <constraints>
package/dist/schemas.d.ts CHANGED
@@ -30,9 +30,7 @@ export declare const cachedPayloadSchema: z.ZodObject<{
30
30
  export type CachedPayload = z.infer<typeof cachedPayloadSchema>;
31
31
  export declare const fetchUrlInputSchema: z.ZodObject<{
32
32
  url: z.ZodURL;
33
- skipNoiseRemoval: z.ZodOptional<z.ZodBoolean>;
34
33
  forceRefresh: z.ZodOptional<z.ZodBoolean>;
35
- maxInlineChars: z.ZodOptional<z.ZodNumber>;
36
34
  }, z.core.$strict>;
37
35
  export declare const fetchUrlOutputSchema: z.ZodObject<{
38
36
  url: z.ZodURL;
@@ -1 +1 @@
1
- {"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../src/schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAI9D,eAAO,MAAM,eAAe;;;;;;;;CAQlB,CAAC;AAiCX,eAAO,MAAM,uBAAuB;;;;;;;;kBAQlC,CAAC;AAgBH,wBAAgB,0BAA0B,CACxC,KAAK,EAAE,OAAO,GACb,iBAAiB,GAAG,SAAS,CAQ/B;AAED,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,SAAS,CAErE;AAUD,eAAO,MAAM,mBAAmB;;;;;;iBA2B7B,CAAC;AAEJ,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEhE,eAAO,MAAM,mBAAmB;;;;;kBAuB9B,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;kBAqC/B,CAAC;AAEH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAkBpE;AAED,wBAAgB,2BAA2B,CACzC,OAAO,EAAE,aAAa,GACrB,MAAM,GAAG,IAAI,CAEf"}
1
+ {"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../src/schemas.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAC;AAI9D,eAAO,MAAM,eAAe;;;;;;;;CAQlB,CAAC;AAiCX,eAAO,MAAM,uBAAuB;;;;;;;;kBAQlC,CAAC;AAgBH,wBAAgB,0BAA0B,CACxC,KAAK,EAAE,OAAO,GACb,iBAAiB,GAAG,SAAS,CAQ/B;AAED,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,OAAO,GAAG,MAAM,GAAG,SAAS,CAErE;AAUD,eAAO,MAAM,mBAAmB;;;;;;iBA2B7B,CAAC;AAEJ,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEhE,eAAO,MAAM,mBAAmB;;;kBAU9B,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;kBAqC/B,CAAC;AAEH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,aAAa,GAAG,IAAI,CAkBpE;AAED,wBAAgB,2BAA2B,CACzC,OAAO,EAAE,aAAa,GACrB,MAAM,GAAG,IAAI,CAEf"}
package/dist/schemas.js CHANGED
@@ -95,21 +95,10 @@ export const fetchUrlInputSchema = z.strictObject({
95
95
  .min(1)
96
96
  .max(config.constants.maxUrlLength)
97
97
  .describe(`Target URL. Max ${config.constants.maxUrlLength} chars.`),
98
- skipNoiseRemoval: z
99
- .boolean()
100
- .optional()
101
- .describe('Preserve navigation/footers (disable noise filtering).'),
102
98
  forceRefresh: z
103
99
  .boolean()
104
100
  .optional()
105
101
  .describe('Bypass cache and fetch fresh content.'),
106
- maxInlineChars: z
107
- .number()
108
- .int()
109
- .min(0)
110
- .max(config.constants.maxHtmlSize)
111
- .optional()
112
- .describe(`Inline markdown limit (0-${config.constants.maxHtmlSize}, 0=unlimited). Lower of this or global limit applies.`),
113
102
  });
114
103
  export const fetchUrlOutputSchema = z.strictObject({
115
104
  url: z.httpUrl().max(config.constants.maxUrlLength).describe('Fetched URL.'),
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,SAAS,EAEV,MAAM,yCAAyC,CAAC;AACjD,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAyBxB,OAAO,EAGL,KAAK,gBAAgB,EACtB,MAAM,qBAAqB,CAAC;AAI7B,OAAO,EACL,mBAAmB,EAIpB,MAAM,eAAe,CAAC;AAMvB,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEzD,KAAK,qBAAqB,GAAG,YAAY,CAAC;AAE1C,UAAU,gBAAgB;IACxB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,qBAAqB,EAAE,CAAC;IACjC,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC;IACxD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,eAAO,MAAM,mBAAmB,cAAc,CAAC;AAyU/C,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,aAAa,EACpB,KAAK,CAAC,EAAE,gBAAgB,GACvB,OAAO,CAAC,gBAAgB,CAAC,CAK3B;AAgDD;;;;;;GAMG;AACH,wBAAgB,2BAA2B,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,EAC5E,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,GAC7D,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAmBvD;AAwBD,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CAwCrD"}
1
+ {"version":3,"file":"fetch-url.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-url.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,SAAS,EAEV,MAAM,yCAAyC,CAAC;AACjD,OAAO,KAAK,EACV,YAAY,EAEb,MAAM,oCAAoC,CAAC;AAE5C,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAyBxB,OAAO,EAGL,KAAK,gBAAgB,EACtB,MAAM,qBAAqB,CAAC;AAI7B,OAAO,EACL,mBAAmB,EAIpB,MAAM,eAAe,CAAC;AAMvB,KAAK,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEzD,KAAK,qBAAqB,GAAG,YAAY,CAAC;AAE1C,UAAU,gBAAgB;IACxB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IACvB,OAAO,EAAE,qBAAqB,EAAE,CAAC;IACjC,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAC;IACxD,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,eAAO,MAAM,mBAAmB,cAAc,CAAC;AAwT/C,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,aAAa,EACpB,KAAK,CAAC,EAAE,gBAAgB,GACvB,OAAO,CAAC,gBAAgB,CAAC,CAK3B;AAgDD;;;;;;GAMG;AACH,wBAAgB,2BAA2B,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,EAC5E,OAAO,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,GAC7D,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAmBvD;AAwBD,wBAAgB,aAAa,CAAC,MAAM,EAAE,SAAS,GAAG,IAAI,CAwCrD"}
@@ -179,26 +179,24 @@ function mapFetchStageToProgress(stage, context) {
179
179
  return { step: 7, message: 'Finalizing output' };
180
180
  }
181
181
  }
182
- function buildFetchOptions(url, context, signal, progress, skipNoiseRemoval, forceRefresh, maxInlineChars) {
182
+ function buildFetchOptions(url, context, signal, progress, forceRefresh) {
183
183
  return {
184
184
  url,
185
185
  ...withSignal(signal),
186
- ...(skipNoiseRemoval ? { cacheVary: { skipNoiseRemoval: true } } : {}),
187
186
  ...(forceRefresh ? { forceRefresh: true } : {}),
188
- ...(maxInlineChars !== undefined ? { maxInlineChars } : {}),
189
187
  onStage: (stage) => {
190
188
  const update = mapFetchStageToProgress(stage, context);
191
189
  reportProgress(progress, update.step, update.message);
192
190
  },
193
191
  transform: async ({ buffer, encoding, truncated }, normalizedUrl) => {
194
- return markdownTransform({ buffer, encoding, ...(truncated ? { truncated } : {}) }, normalizedUrl, signal, skipNoiseRemoval);
192
+ return markdownTransform({ buffer, encoding, ...(truncated ? { truncated } : {}) }, normalizedUrl, signal);
195
193
  },
196
194
  serialize: serializeMarkdownResult,
197
195
  deserialize: parseCachedMarkdownResult,
198
196
  };
199
197
  }
200
- async function fetchPipeline(url, context, signal, progress, skipNoiseRemoval, forceRefresh, maxInlineChars) {
201
- return performSharedFetch(buildFetchOptions(url, context, signal, progress, skipNoiseRemoval, forceRefresh, maxInlineChars));
198
+ async function fetchPipeline(url, context, signal, progress, forceRefresh) {
199
+ return performSharedFetch(buildFetchOptions(url, context, signal, progress, forceRefresh));
202
200
  }
203
201
  function formatContentSize(chars) {
204
202
  if (chars < 1000)
@@ -220,7 +218,7 @@ async function executeFetch(input, extra) {
220
218
  logDebug('Fetching URL', { url });
221
219
  try {
222
220
  reportProgress(progress, 1, 'Preparing request');
223
- const { pipeline, inlineResult } = await fetchPipeline(url, context, signal, progress, input.skipNoiseRemoval, input.forceRefresh, input.maxInlineChars);
221
+ const { pipeline, inlineResult } = await fetchPipeline(url, context, signal, progress, input.forceRefresh);
224
222
  const size = formatContentSize(inlineResult.contentSize);
225
223
  reportProgress(progress, 8, `Done — ${size}`);
226
224
  return buildResponse(pipeline, inlineResult, url);
@@ -1 +1 @@
1
- {"version":3,"file":"html-translators.d.ts","sourceRoot":"","sources":["../../src/transform/html-translators.ts"],"names":[],"mappings":"AAghBA,wBAAgB,+BAA+B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEpE"}
1
+ {"version":3,"file":"html-translators.d.ts","sourceRoot":"","sources":["../../src/transform/html-translators.ts"],"names":[],"mappings":"AA4fA,wBAAgB,+BAA+B,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEpE"}
@@ -346,14 +346,8 @@ function buildSpanTranslator(ctx) {
346
346
  return {};
347
347
  }
348
348
  // ---------------------------------------------------------------------------
349
- // Table / DL helpers
349
+ // DL helpers
350
350
  // ---------------------------------------------------------------------------
351
- function hasComplexTableLayout(node) {
352
- if (!isLikeNode(node))
353
- return false;
354
- const innerHTML = typeof node.innerHTML === 'string' ? node.innerHTML : '';
355
- return /(?:colspan|rowspan)=["']?[2-9]/i.test(innerHTML);
356
- }
357
351
  function resolveDlNodeName(child) {
358
352
  if (!isLikeNode(child))
359
353
  return '';
@@ -381,22 +375,6 @@ function createCustomTranslators() {
381
375
  return {
382
376
  code: (ctx) => buildCodeTranslator(ctx),
383
377
  img: (ctx) => buildImageTranslator(ctx),
384
- table: (ctx) => {
385
- if (!isObject(ctx))
386
- return {};
387
- const { node } = ctx;
388
- if (hasComplexTableLayout(node)) {
389
- return {
390
- postprocess: ({ content }) => {
391
- const trimmed = content.trim();
392
- if (!trimmed)
393
- return '';
394
- return `\n\n${trimmed}\n\n`;
395
- },
396
- };
397
- }
398
- return {};
399
- },
400
378
  dl: (ctx) => {
401
379
  if (!isObject(ctx))
402
380
  return { content: '' };
@@ -1,4 +1,5 @@
1
1
  import type { ExtractedMetadata } from './types.js';
2
+ export declare function normalizeDocumentTitle(title: string, baseUrl?: string): string;
2
3
  export declare function extractMetadata(document: Document, baseUrl?: string): ExtractedMetadata;
3
4
  export declare function extractMetadataFromHead(html: string, baseUrl?: string): ExtractedMetadata | null;
4
5
  export declare function mergeMetadata(early: ExtractedMetadata | null, late: ExtractedMetadata): ExtractedMetadata;
@@ -1 +1 @@
1
- {"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../../src/transform/metadata.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAyKpD,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,GACf,iBAAiB,CAenB;AAED,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE,MAAM,GACf,iBAAiB,GAAG,IAAI,CAY1B;AAED,wBAAgB,aAAa,CAC3B,KAAK,EAAE,iBAAiB,GAAG,IAAI,EAC/B,IAAI,EAAE,iBAAiB,GACtB,iBAAiB,CAmBnB"}
1
+ {"version":3,"file":"metadata.d.ts","sourceRoot":"","sources":["../../src/transform/metadata.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAoCpD,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,MAAM,GACf,MAAM,CAsBR;AAuID,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,QAAQ,EAClB,OAAO,CAAC,EAAE,MAAM,GACf,iBAAiB,CAkBnB;AAED,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,OAAO,CAAC,EAAE,MAAM,GACf,iBAAiB,GAAG,IAAI,CAY1B;AAED,wBAAgB,aAAa,CAC3B,KAAK,EAAE,iBAAiB,GAAG,IAAI,EAC/B,IAAI,EAAE,iBAAiB,GACtB,iBAAiB,CAmBnB"}
@@ -15,6 +15,28 @@ function extractHeadSection(html) {
15
15
  return null;
16
16
  return html.substring(0, match.index);
17
17
  }
18
+ export function normalizeDocumentTitle(title, baseUrl) {
19
+ if (!baseUrl || !title.startsWith('GitHub - '))
20
+ return title;
21
+ let parsed;
22
+ try {
23
+ parsed = new URL(baseUrl);
24
+ }
25
+ catch {
26
+ return title;
27
+ }
28
+ const hostname = parsed.hostname.toLowerCase();
29
+ if (hostname !== 'github.com' && hostname !== 'www.github.com') {
30
+ return title;
31
+ }
32
+ const segments = parsed.pathname.split('/').filter(Boolean);
33
+ if (segments.length !== 2)
34
+ return title;
35
+ const [owner, repo] = segments;
36
+ if (!owner || !repo)
37
+ return title;
38
+ return `${owner}/${repo}`;
39
+ }
18
40
  const META_PROPERTY_HANDLERS = new Map([
19
41
  [
20
42
  'og:title',
@@ -139,6 +161,9 @@ function resolveFaviconUrl(href, baseUrl) {
139
161
  export function extractMetadata(document, baseUrl) {
140
162
  const ctx = buildMetaContext(document);
141
163
  const metadata = resolveMetadataFromContext(ctx);
164
+ if (metadata.title) {
165
+ metadata.title = normalizeDocumentTitle(metadata.title, baseUrl);
166
+ }
142
167
  if (baseUrl) {
143
168
  const icon32 = document.querySelector('link[rel="icon"][sizes="32x32"]');
144
169
  const href = icon32?.getAttribute('href');
@@ -1 +1 @@
1
- {"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../src/transform/shared.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,uBAAuB,EACvB,gBAAgB,EAChB,8BAA8B,EAE/B,MAAM,YAAY,CAAC;AAEpB,UAAU,2BAA2B;IACnC,WAAW,EAAE,CAAC,OAAO,EAAE,8BAA8B,KAAK,IAAI,CAAC;IAC/D,YAAY,EAAE,CACZ,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,KACtB,uBAAuB,CAAC;CAC9B;AAkGD,wBAAgB,6BAA6B,CAC3C,OAAO,EAAE,2BAA2B,GACnC,CAAC,GAAG,EAAE,OAAO,KAAK,IAAI,CAgFxB"}
1
+ {"version":3,"file":"shared.d.ts","sourceRoot":"","sources":["../../src/transform/shared.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,uBAAuB,EACvB,gBAAgB,EAChB,8BAA8B,EAE/B,MAAM,YAAY,CAAC;AAEpB,UAAU,2BAA2B;IACnC,WAAW,EAAE,CAAC,OAAO,EAAE,8BAA8B,KAAK,IAAI,CAAC;IAC/D,YAAY,EAAE,CACZ,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,KACtB,uBAAuB,CAAC;CAC9B;AAgGD,wBAAgB,6BAA6B,CAC3C,OAAO,EAAE,2BAA2B,GACnC,CAAC,GAAG,EAAE,OAAO,KAAK,IAAI,CA8ExB"}
@@ -3,14 +3,13 @@ function isTransformMessage(message) {
3
3
  if (!message || typeof message !== 'object')
4
4
  return false;
5
5
  const value = message;
6
- const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = value;
6
+ const { id, url, html, htmlBuffer, encoding, includeMetadata, inputTruncated, } = value;
7
7
  return (typeof id === 'string' &&
8
8
  typeof url === 'string' &&
9
9
  typeof includeMetadata === 'boolean' &&
10
10
  (html === undefined || typeof html === 'string') &&
11
11
  (htmlBuffer === undefined || htmlBuffer instanceof Uint8Array) &&
12
12
  (encoding === undefined || typeof encoding === 'string') &&
13
- (skipNoiseRemoval === undefined || typeof skipNoiseRemoval === 'boolean') &&
14
13
  (inputTruncated === undefined || typeof inputTruncated === 'boolean'));
15
14
  }
16
15
  function decodeHtml(html, htmlBuffer, encoding, decoder) {
@@ -83,7 +82,7 @@ export function createTransformMessageHandler(options) {
83
82
  }
84
83
  if (messageType !== 'transform' || !isTransformMessage(message))
85
84
  return;
86
- const { id, url, html, htmlBuffer, encoding, includeMetadata, skipNoiseRemoval, inputTruncated, } = message;
85
+ const { id, url, html, htmlBuffer, encoding, includeMetadata, inputTruncated, } = message;
87
86
  if (!id.trim()) {
88
87
  sendMessage({
89
88
  type: 'error',
@@ -115,7 +114,6 @@ export function createTransformMessageHandler(options) {
115
114
  const result = runTransform(content, url, {
116
115
  includeMetadata,
117
116
  signal: controller.signal,
118
- ...(skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
119
117
  ...(inputTruncated ? { inputTruncated: true } : {}),
120
118
  });
121
119
  sendMessage(createResultMessage(id, result));
@@ -1 +1 @@
1
- {"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AAsCA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AAqCpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AA4ID,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AAkUD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAqPD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,GACA,MAAM,CAsBR;AAuKD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AA6DD,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAqB3B;AAmQD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CA8CzB;AAED,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAoI1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC"}
1
+ {"version":3,"file":"transform.d.ts","sourceRoot":"","sources":["../../src/transform/transform.ts"],"names":[],"mappings":"AAuCA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,gBAAgB,EAChB,qBAAqB,EAEtB,MAAM,YAAY,CAAC;AAqCpB,UAAU,WAAW;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,SAAS,EAAE,MAAM,CAAC;CACnB;AA4ID,wBAAgB,mBAAmB,CACjC,GAAG,EAAE,MAAM,EACX,KAAK,EAAE,MAAM,EACb,MAAM,CAAC,EAAE,WAAW,GACnB,qBAAqB,GAAG,IAAI,CAE9B;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,qBAAqB,GAAG,IAAI,EACrC,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,OAAO,CAAA;CAAE,GAChC,MAAM,CAER;AA6UD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE;IAAE,cAAc,CAAC,EAAE,OAAO,CAAC;IAAC,MAAM,CAAC,EAAE,WAAW,CAAA;CAExD,GACA,gBAAgB,CAGlB;AAqPD,wBAAgB,cAAc,CAC5B,IAAI,EAAE,MAAM,EACZ,QAAQ,CAAC,EAAE,aAAa,EACxB,OAAO,CAAC,EAAE;IACR,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;CAC5B,GACA,MAAM,CAsBR;AAuKD,wBAAgB,sBAAsB,CACpC,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,sBAAsB,EAAE,MAAM,GAAG,QAAQ,GACxC,OAAO,CAQT;AA6DD,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAuB3B;AAyUD,wBAAgB,gCAAgC,CAC9C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,uBAAuB,CA6CzB;AAED,UAAU,kBAAkB;IAC1B,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,wBAAgB,qBAAqB,IAAI,kBAAkB,GAAG,IAAI,CAEjE;AAED,wBAAsB,2BAA2B,IAAI,OAAO,CAAC,IAAI,CAAC,CAEjE;AAED,KAAK,yBAAyB,GAAG,gBAAgB,GAAG;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC;AAkI1E,wBAAsB,uBAAuB,CAC3C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GACxB,OAAO,CAAC,uBAAuB,CAAC,CAElC;AAED,wBAAsB,yBAAyB,CAC7C,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,yBAAyB,GACjC,OAAO,CAAC,uBAAuB,CAAC,CAElC"}
@@ -11,7 +11,7 @@ import { createAbortError, throwIfAborted } from '../lib/utils.js';
11
11
  import { FetchError, getErrorMessage, toError } from '../lib/utils.js';
12
12
  import { isObject } from '../lib/utils.js';
13
13
  import { translateHtmlFragmentToMarkdown } from './html-translators.js';
14
- import { extractMetadata, extractMetadataFromHead, mergeMetadata, } from './metadata.js';
14
+ import { extractMetadata, extractMetadataFromHead, mergeMetadata, normalizeDocumentTitle, } from './metadata.js';
15
15
  import { getOrCreateWorkerPool, getWorkerPoolStats, shutdownWorkerPool, } from './worker-pool.js';
16
16
  function decodeInput(input, encoding) {
17
17
  if (typeof input === 'string')
@@ -270,6 +270,14 @@ function resolveCollapsedTextLengthUpTo(text, max) {
270
270
  }
271
271
  return length;
272
272
  }
273
+ function preserveAlertElements(doc) {
274
+ const alerts = doc.querySelectorAll('[role="alert"], .admonition, .callout');
275
+ for (const el of alerts) {
276
+ const bq = doc.createElement('blockquote');
277
+ bq.innerHTML = el.innerHTML;
278
+ el.replaceWith(bq);
279
+ }
280
+ }
273
281
  function extractArticle(document, url, signal) {
274
282
  if (!isReadabilityCompatible(document)) {
275
283
  logWarn('Document not compatible with Readability');
@@ -298,6 +306,7 @@ function extractArticle(document, url, signal) {
298
306
  const readabilityDoc = typeof doc.cloneNode === 'function'
299
307
  ? doc.cloneNode(true)
300
308
  : doc;
309
+ preserveAlertElements(readabilityDoc);
301
310
  // F1: Check abort before heavy Readability parse
302
311
  abortPolicy.throwIfAborted(signal, url, 'extract:article:parse');
303
312
  const reader = new Readability(readabilityDoc, {
@@ -797,8 +806,9 @@ export function createContentMetadataBlock(url, article, extractedMeta, shouldEx
797
806
  fetchedAt: new Date().toISOString(),
798
807
  };
799
808
  if (shouldExtractFromArticle && article) {
800
- if (article.title !== undefined)
801
- metadata.title = article.title;
809
+ if (article.title !== undefined) {
810
+ metadata.title = normalizeDocumentTitle(article.title, url);
811
+ }
802
812
  if (article.byline !== undefined)
803
813
  metadata.author = article.byline;
804
814
  }
@@ -828,6 +838,12 @@ const CONTENT_ROOT_SELECTORS = [
828
838
  '.post-body',
829
839
  '.article-body',
830
840
  ];
841
+ const PRIMARY_HEADING_ROOT_SELECTORS = [
842
+ ...CONTENT_ROOT_SELECTORS,
843
+ '.markdown-body',
844
+ '.entry-content',
845
+ '[itemprop="text"]',
846
+ ];
831
847
  function findContentRoot(document) {
832
848
  for (const selector of CONTENT_ROOT_SELECTORS) {
833
849
  const element = document.querySelector(selector);
@@ -841,6 +857,34 @@ function findContentRoot(document) {
841
857
  }
842
858
  return undefined;
843
859
  }
860
+ function findPrimaryHeading(document) {
861
+ for (const selector of PRIMARY_HEADING_ROOT_SELECTORS) {
862
+ const root = document.querySelector(selector);
863
+ if (!root)
864
+ continue;
865
+ const heading = root.querySelector('h1, h2');
866
+ if (!heading)
867
+ continue;
868
+ const text = heading.textContent.trim();
869
+ if (text)
870
+ return text;
871
+ }
872
+ return undefined;
873
+ }
874
+ function isGithubRepositoryRootUrl(url) {
875
+ let parsed;
876
+ try {
877
+ parsed = new URL(url);
878
+ }
879
+ catch {
880
+ return false;
881
+ }
882
+ const hostname = parsed.hostname.toLowerCase();
883
+ if (hostname !== 'github.com' && hostname !== 'www.github.com') {
884
+ return false;
885
+ }
886
+ return parsed.pathname.split('/').filter(Boolean).length === 2;
887
+ }
844
888
  function shouldUseArticleContent(article, originalHtmlOrDocument) {
845
889
  const articleLength = article.textContent.length;
846
890
  const originalLength = getVisibleTextLength(originalHtmlOrDocument);
@@ -868,35 +912,30 @@ function shouldUseArticleContent(article, originalHtmlOrDocument) {
868
912
  return !hasTruncatedSentences(article.textContent);
869
913
  }
870
914
  function buildContentSource(params) {
871
- const { html, url, article, extractedMeta, includeMetadata, useArticleContent, document, truncated, skipNoiseRemoval, signal, } = params;
915
+ const { html, url, article, extractedMeta, includeMetadata, useArticleContent, document, truncated, signal, } = params;
872
916
  const metadata = createContentMetadataBlock(url, article, extractedMeta, useArticleContent, includeMetadata);
873
917
  const base = {
874
918
  favicon: extractedMeta.favicon,
875
919
  metadata,
876
920
  extractedMetadata: extractedMeta,
877
921
  truncated,
922
+ primaryHeading: document ? findPrimaryHeading(document) : undefined,
878
923
  };
879
924
  if (useArticleContent && article) {
880
- const cleanedArticleHtml = skipNoiseRemoval
881
- ? article.content
882
- : removeNoiseFromHtml(article.content, undefined, url, signal);
925
+ const { document: articleDoc } = parseHTML(`<!DOCTYPE html><html><body>${article.content}</body></html>`);
926
+ prepareDocumentForMarkdown(articleDoc, url, signal);
927
+ const preferPrimaryHeading = isGithubRepositoryRootUrl(url);
883
928
  return {
884
929
  ...base,
885
- sourceHtml: cleanedArticleHtml,
886
- title: article.title,
930
+ sourceHtml: articleDoc.body.innerHTML,
931
+ title: (preferPrimaryHeading ? base.primaryHeading : undefined) ??
932
+ (article.title !== undefined
933
+ ? normalizeDocumentTitle(article.title, url)
934
+ : undefined),
887
935
  skipNoiseRemoval: true,
888
936
  };
889
937
  }
890
938
  if (document) {
891
- if (skipNoiseRemoval) {
892
- return {
893
- ...base,
894
- sourceHtml: html,
895
- title: extractedMeta.title,
896
- skipNoiseRemoval: true,
897
- document,
898
- };
899
- }
900
939
  prepareDocumentForMarkdown(document, url, signal);
901
940
  const contentRoot = findContentRoot(document);
902
941
  return {
@@ -919,7 +958,7 @@ function resolveContentSource(params) {
919
958
  ...(params.signal ? { signal: params.signal } : {}),
920
959
  ...(params.inputTruncated ? { inputTruncated: true } : {}),
921
960
  });
922
- const useArticleContent = !params.skipNoiseRemoval && article
961
+ const useArticleContent = article
923
962
  ? shouldUseArticleContent(article, document)
924
963
  : false;
925
964
  return buildContentSource({
@@ -931,7 +970,6 @@ function resolveContentSource(params) {
931
970
  useArticleContent,
932
971
  document,
933
972
  truncated: truncated ?? false,
934
- ...(params.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
935
973
  ...(params.signal ? { signal: params.signal } : {}),
936
974
  });
937
975
  }
@@ -942,7 +980,10 @@ function buildMarkdownFromContext(context, url, signal) {
942
980
  ...(context.document ? { document: context.document } : {}),
943
981
  ...(context.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
944
982
  }));
945
- if (context.title && !content.trim().startsWith('# ')) {
983
+ if (context.primaryHeading && isGithubRepositoryRootUrl(url)) {
984
+ content = stripLeadingHeading(content, context.primaryHeading);
985
+ }
986
+ if (context.title && !/^(#{1,6})\s/.test(content.trimStart())) {
946
987
  const icon = context.favicon;
947
988
  let prefix = ' ';
948
989
  if (icon) {
@@ -964,6 +1005,34 @@ function buildMarkdownFromContext(context, url, signal) {
964
1005
  metadata: context.extractedMetadata,
965
1006
  };
966
1007
  }
1008
+ function normalizeHeadingText(value) {
1009
+ return value.replace(/\s+/g, ' ').trim().toLowerCase();
1010
+ }
1011
+ function stripLeadingHeading(markdown, headingText) {
1012
+ if (!markdown)
1013
+ return markdown;
1014
+ const lines = markdown.split('\n');
1015
+ const target = normalizeHeadingText(headingText);
1016
+ let nonEmptySeen = 0;
1017
+ for (let i = 0; i < lines.length && nonEmptySeen < 12; i += 1) {
1018
+ const trimmed = lines[i]?.trim() ?? '';
1019
+ if (!trimmed)
1020
+ continue;
1021
+ nonEmptySeen += 1;
1022
+ const match = /^(#{1,6})\s+(.+?)\s*$/.exec(trimmed);
1023
+ if (!match)
1024
+ continue;
1025
+ const current = normalizeHeadingText(match[2] ?? '');
1026
+ if (current !== target)
1027
+ return markdown;
1028
+ lines.splice(i, 1);
1029
+ if ((lines[i] ?? '').trim() === '') {
1030
+ lines.splice(i, 1);
1031
+ }
1032
+ return lines.join('\n');
1033
+ }
1034
+ return markdown;
1035
+ }
967
1036
  const REPLACEMENT_CHAR = '\ufffd';
968
1037
  const BINARY_INDICATOR_THRESHOLD = 0.1;
969
1038
  function hasBinaryIndicators(content) {
@@ -1004,7 +1073,6 @@ export function transformHtmlToMarkdownInProcess(html, url, options) {
1004
1073
  url,
1005
1074
  includeMetadata: options.includeMetadata,
1006
1075
  ...(signal ? { signal } : {}),
1007
- ...(options.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
1008
1076
  ...(options.inputTruncated ? { inputTruncated: true } : {}),
1009
1077
  }));
1010
1078
  const result = buildMarkdownFromContext(context, url, signal);
@@ -1035,7 +1103,6 @@ function buildWorkerTransformOptions(options) {
1035
1103
  return {
1036
1104
  includeMetadata: options.includeMetadata,
1037
1105
  ...(options.signal ? { signal: options.signal } : {}),
1038
- ...(options.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
1039
1106
  ...(options.inputTruncated ? { inputTruncated: true } : {}),
1040
1107
  };
1041
1108
  }
@@ -61,7 +61,6 @@ export interface MarkdownTransformResult extends MarkdownPayload {
61
61
  export interface TransformOptions {
62
62
  includeMetadata: boolean;
63
63
  signal?: AbortSignal;
64
- skipNoiseRemoval?: boolean;
65
64
  inputTruncated?: boolean;
66
65
  }
67
66
  /**
@@ -98,7 +97,6 @@ export interface TransformWorkerTransformMessage {
98
97
  encoding?: string | undefined;
99
98
  url: string;
100
99
  includeMetadata: boolean;
101
- skipNoiseRemoval?: boolean | undefined;
102
100
  inputTruncated?: boolean | undefined;
103
101
  }
104
102
  export interface TransformWorkerCancelledMessage {
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/transform/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,iBAAiB,CAAC;CAC7B;AAED,UAAU,eAAe;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,iBAAiB,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,eAAe;IAC9D,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,eAAe,EAAE,OAAO,CAAC;IACzB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,CAAC,EAAE,CAAC,CAAC;IACL,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,WAAW,CAAC;IAClB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC1B,UAAU,CAAC,EAAE,UAAU,GAAG,SAAS,CAAC;IACpC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,EAAE,OAAO,CAAC;IACzB,gBAAgB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACvC,cAAc,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACtC;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,WAAW,CAAC;IAClB,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,eAAe,CAAC;CACzB;AAED,MAAM,WAAW,2BAA2B;IAC1C,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE;QACL,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,GAAG,EAAE,MAAM,CAAC;QACZ,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACnC,CAAC;CACH;AAED,MAAM,MAAM,8BAA8B,GACtC,4BAA4B,GAC5B,2BAA2B,GAC3B,+BAA+B,CAAC"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/transform/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACjC,QAAQ,EAAE,iBAAiB,CAAC;CAC7B;AAED,UAAU,eAAe;IACvB,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,SAAS,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,iBAAiB,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,uBAAwB,SAAQ,eAAe;IAC9D,KAAK,EAAE,MAAM,GAAG,SAAS,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,eAAe,EAAE,OAAO,CAAC;IACzB,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,CAAC,EAAE,CAAC,CAAC;IACL,IAAI,EAAE,OAAO,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC;CACjC;AAED;;GAEG;AACH,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,WAAW,CAAC;IAClB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC1B,UAAU,CAAC,EAAE,UAAU,GAAG,SAAS,CAAC;IACpC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,EAAE,OAAO,CAAC;IACzB,cAAc,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACtC;AAED,MAAM,WAAW,+BAA+B;IAC9C,IAAI,EAAE,WAAW,CAAC;IAClB,EAAE,EAAE,MAAM,CAAC;CACZ;AAED,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,QAAQ,CAAC;IACf,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,eAAe,CAAC;CACzB;AAED,MAAM,WAAW,2BAA2B;IAC1C,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,KAAK,EAAE;QACL,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,GAAG,EAAE,MAAM,CAAC;QACZ,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACnC,CAAC;CACH;AAED,MAAM,MAAM,8BAA8B,GACtC,4BAA4B,GAC5B,2BAA2B,GAC3B,+BAA+B,CAAC"}
@@ -3,7 +3,6 @@ interface TransformWorkerPool {
3
3
  transform(html: string, url: string, options: {
4
4
  includeMetadata: boolean;
5
5
  signal?: AbortSignal;
6
- skipNoiseRemoval?: boolean;
7
6
  inputTruncated?: boolean;
8
7
  }): Promise<MarkdownTransformResult>;
9
8
  close(): Promise<void>;
@@ -29,13 +28,11 @@ declare class WorkerPool implements TransformWorkerPool {
29
28
  transform(html: string, url: string, options: {
30
29
  includeMetadata: boolean;
31
30
  signal?: AbortSignal;
32
- skipNoiseRemoval?: boolean;
33
31
  inputTruncated?: boolean;
34
32
  }): Promise<MarkdownTransformResult>;
35
33
  transform(htmlBuffer: Uint8Array, url: string, options: {
36
34
  includeMetadata: boolean;
37
35
  signal?: AbortSignal;
38
- skipNoiseRemoval?: boolean;
39
36
  inputTruncated?: boolean;
40
37
  encoding?: string;
41
38
  }): Promise<MarkdownTransformResult>;
@@ -1 +1 @@
1
- {"version":3,"file":"worker-pool.d.ts","sourceRoot":"","sources":["../../src/transform/worker-pool.ts"],"names":[],"mappings":"AAuBA,OAAO,KAAK,EACV,uBAAuB,EAGxB,MAAM,YAAY,CAAC;AAuJpB,UAAU,mBAAmB;IAC3B,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,eAAe,EAAE,OAAO,CAAC;QACzB,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC,CAAC;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,aAAa,IAAI,MAAM,CAAC;IACxB,gBAAgB,IAAI,MAAM,CAAC;IAC3B,WAAW,IAAI,MAAM,CAAC;CACvB;AAkBD,cAAM,UAAW,YAAW,mBAAmB;IAC7C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAkC;IAExE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkC;IAC1D,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAChD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAEhD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqB;IAC3C,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAmC;IAC5D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAOvB;IAEJ,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAK;gBAEV,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;IASrC,SAAS,CACb,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,eAAe,EAAE,OAAO,CAAC;QACzB,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC;IAC7B,SAAS,CACb,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,eAAe,EAAE,OAAO,CAAC;QACzB,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAC3B,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GACA,OAAO,CAAC,uBAAuB,CAAC;IAoCnC,aAAa,IAAI,MAAM;IAKvB,gBAAgB,IAAI,MAAM;IAI1B,WAAW,IAAI,MAAM;IAIrB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAWpB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAiC5B,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,iBAAiB;IAoDzB,OAAO,CAAC,aAAa;IAsCrB,OAAO,CAAC,gBAAgB;IAOxB,OAAO,CAAC,gBAAgB;YAyBV,aAAa;IA2B3B,OAAO,CAAC,kBAAkB;IAY1B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,aAAa;IAYrB,OAAO,CAAC,eAAe;IAsDvB,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,QAAQ;IAWhB,OAAO,CAAC,YAAY;IASpB,OAAO,CAAC,UAAU;IA2BlB,OAAO,CAAC,kBAAkB;IAe1B,OAAO,CAAC,iBAAiB;IAiFzB,OAAO,CAAC,YAAY;IAQpB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,iBAAiB;CAW1B;AAMD,wBAAgB,qBAAqB,IAAI,UAAU,CAIlD;AAED,wBAAgB,kBAAkB,IAAI;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB,GAAG,IAAI,CAOP;AAED,wBAAsB,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAIxD"}
1
+ {"version":3,"file":"worker-pool.d.ts","sourceRoot":"","sources":["../../src/transform/worker-pool.ts"],"names":[],"mappings":"AAuBA,OAAO,KAAK,EACV,uBAAuB,EAGxB,MAAM,YAAY,CAAC;AAqJpB,UAAU,mBAAmB;IAC3B,SAAS,CACP,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,eAAe,EAAE,OAAO,CAAC;QACzB,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC,CAAC;IACpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACvB,aAAa,IAAI,MAAM,CAAC;IACxB,gBAAgB,IAAI,MAAM,CAAC;IAC3B,WAAW,IAAI,MAAM,CAAC;CACvB;AAkBD,cAAM,UAAW,YAAW,mBAAmB;IAC7C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAAkC;IAExE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkC;IAC1D,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAChD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAoB;IAEhD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqB;IAC3C,OAAO,CAAC,SAAS,CAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAmC;IAC5D,OAAO,CAAC,QAAQ,CAAC,UAAU,CAOvB;IAEJ,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAK;gBAEV,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM;IASrC,SAAS,CACb,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,eAAe,EAAE,OAAO,CAAC;QACzB,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;KAC1B,GACA,OAAO,CAAC,uBAAuB,CAAC;IAC7B,SAAS,CACb,UAAU,EAAE,UAAU,EACtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE;QACP,eAAe,EAAE,OAAO,CAAC;QACzB,MAAM,CAAC,EAAE,WAAW,CAAC;QACrB,cAAc,CAAC,EAAE,OAAO,CAAC;QACzB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,GACA,OAAO,CAAC,uBAAuB,CAAC;IAmCnC,aAAa,IAAI,MAAM;IAKvB,gBAAgB,IAAI,MAAM;IAI1B,WAAW,IAAI,MAAM;IAIrB,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAWpB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAiC5B,OAAO,CAAC,UAAU;IAIlB,OAAO,CAAC,iBAAiB;IAkDzB,OAAO,CAAC,aAAa;IAsCrB,OAAO,CAAC,gBAAgB;IAOxB,OAAO,CAAC,gBAAgB;YAyBV,aAAa;IA2B3B,OAAO,CAAC,kBAAkB;IAY1B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,cAAc;IAuBtB,OAAO,CAAC,aAAa;IAYrB,OAAO,CAAC,eAAe;IAsDvB,OAAO,CAAC,YAAY;IAWpB,OAAO,CAAC,QAAQ;IAOhB,OAAO,CAAC,QAAQ;IAWhB,OAAO,CAAC,YAAY;IASpB,OAAO,CAAC,UAAU;IA2BlB,OAAO,CAAC,kBAAkB;IAe1B,OAAO,CAAC,iBAAiB;IAiFzB,OAAO,CAAC,YAAY;IAQpB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,iBAAiB;CAW1B;AAMD,wBAAgB,qBAAqB,IAAI,UAAU,CAIlD;AAED,wBAAgB,kBAAkB,IAAI;IACpC,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;CAClB,GAAG,IAAI,CAOP;AAED,wBAAsB,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAIxD"}
@@ -83,7 +83,6 @@ function buildWorkerDispatchPayload(task) {
83
83
  id: task.id,
84
84
  url: task.url,
85
85
  includeMetadata: task.includeMetadata,
86
- ...(task.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
87
86
  ...(task.inputTruncated ? { inputTruncated: true } : {}),
88
87
  };
89
88
  if (!task.htmlBuffer) {
@@ -214,7 +213,6 @@ class WorkerPool {
214
213
  id,
215
214
  url,
216
215
  includeMetadata: options.includeMetadata,
217
- ...(options.skipNoiseRemoval ? { skipNoiseRemoval: true } : {}),
218
216
  ...(options.inputTruncated ? { inputTruncated: true } : {}),
219
217
  signal: options.signal,
220
218
  abortListener,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@j0hanz/fetch-url-mcp",
3
- "version": "1.8.4",
3
+ "version": "1.9.1",
4
4
  "mcpName": "io.github.j0hanz/fetch-url-mcp",
5
5
  "description": "A web content fetcher MCP server that converts HTML to clean, AI and human readable markdown.",
6
6
  "type": "module",
@@ -74,7 +74,7 @@
74
74
  "@mozilla/readability": "^0.6.0",
75
75
  "linkedom": "^0.18.12",
76
76
  "node-html-markdown": "^2.0.0",
77
- "undici": "^7.22.0",
77
+ "undici": "^7.24.1",
78
78
  "zod": "^4.3.6"
79
79
  },
80
80
  "devDependencies": {