@j0hanz/superfetch 2.2.2 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +358 -363
- package/dist/assets/logo.svg +24835 -0
- package/dist/cache.d.ts +0 -1
- package/dist/cache.js +71 -29
- package/dist/config.d.ts +2 -1
- package/dist/config.js +11 -7
- package/dist/crypto.d.ts +0 -1
- package/dist/crypto.js +0 -1
- package/dist/dom-noise-removal.d.ts +0 -1
- package/dist/dom-noise-removal.js +50 -45
- package/dist/errors.d.ts +0 -1
- package/dist/errors.js +0 -1
- package/dist/fetch.d.ts +0 -1
- package/dist/fetch.js +61 -54
- package/dist/host-normalization.d.ts +1 -0
- package/dist/host-normalization.js +47 -0
- package/dist/http-native.d.ts +0 -1
- package/dist/http-native.js +92 -28
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/instructions.md +41 -41
- package/dist/json.d.ts +0 -1
- package/dist/json.js +0 -1
- package/dist/language-detection.d.ts +0 -1
- package/dist/language-detection.js +10 -2
- package/dist/markdown-cleanup.d.ts +6 -13
- package/dist/markdown-cleanup.js +252 -34
- package/dist/mcp-validator.d.ts +14 -0
- package/dist/mcp-validator.js +22 -0
- package/dist/mcp.d.ts +0 -1
- package/dist/mcp.js +20 -10
- package/dist/observability.d.ts +2 -1
- package/dist/observability.js +30 -3
- package/dist/server-tuning.d.ts +9 -0
- package/dist/server-tuning.js +30 -0
- package/dist/{http-utils.d.ts → session.d.ts} +0 -25
- package/dist/{http-utils.js → session.js} +11 -104
- package/dist/tools.d.ts +5 -4
- package/dist/tools.js +46 -41
- package/dist/transform-types.d.ts +38 -1
- package/dist/transform-types.js +0 -1
- package/dist/transform.d.ts +12 -7
- package/dist/transform.js +205 -344
- package/dist/type-guards.d.ts +0 -1
- package/dist/type-guards.js +0 -1
- package/dist/workers/transform-worker.d.ts +0 -1
- package/dist/workers/transform-worker.js +29 -19
- package/package.json +84 -85
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/crypto.d.ts.map +0 -1
- package/dist/crypto.js.map +0 -1
- package/dist/dom-noise-removal.d.ts.map +0 -1
- package/dist/dom-noise-removal.js.map +0 -1
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js.map +0 -1
- package/dist/fetch.d.ts.map +0 -1
- package/dist/fetch.js.map +0 -1
- package/dist/http-native.d.ts.map +0 -1
- package/dist/http-native.js.map +0 -1
- package/dist/http-utils.d.ts.map +0 -1
- package/dist/http-utils.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/json.d.ts.map +0 -1
- package/dist/json.js.map +0 -1
- package/dist/language-detection.d.ts.map +0 -1
- package/dist/language-detection.js.map +0 -1
- package/dist/markdown-cleanup.d.ts.map +0 -1
- package/dist/markdown-cleanup.js.map +0 -1
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/observability.d.ts.map +0 -1
- package/dist/observability.js.map +0 -1
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/transform-types.d.ts.map +0 -1
- package/dist/transform-types.js.map +0 -1
- package/dist/transform.d.ts.map +0 -1
- package/dist/transform.js.map +0 -1
- package/dist/type-guards.d.ts.map +0 -1
- package/dist/type-guards.js.map +0 -1
- package/dist/workers/transform-worker.d.ts.map +0 -1
- package/dist/workers/transform-worker.js.map +0 -1
package/dist/transform.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { randomUUID } from 'node:crypto';
|
|
2
2
|
import diagnosticsChannel from 'node:diagnostics_channel';
|
|
3
|
-
import os from 'node:os';
|
|
4
3
|
import { performance } from 'node:perf_hooks';
|
|
5
4
|
import { Worker } from 'node:worker_threads';
|
|
6
5
|
import { parseHTML } from 'linkedom';
|
|
@@ -12,15 +11,9 @@ import { removeNoiseFromHtml } from './dom-noise-removal.js';
|
|
|
12
11
|
import { FetchError, getErrorMessage } from './errors.js';
|
|
13
12
|
import { isRawTextContentUrl } from './fetch.js';
|
|
14
13
|
import { detectLanguageFromCode, resolveLanguageFromAttributes, } from './language-detection.js';
|
|
15
|
-
import { cleanupMarkdownArtifacts,
|
|
14
|
+
import { addSourceToMarkdown, buildMetadataFooter, cleanupMarkdownArtifacts, extractTitleFromRawMarkdown, isLikelyHtmlContent, isRawTextContent, } from './markdown-cleanup.js';
|
|
16
15
|
import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from './observability.js';
|
|
17
16
|
import { isObject } from './type-guards.js';
|
|
18
|
-
// Re-export language detection for backward compatibility
|
|
19
|
-
export { detectLanguageFromCode, resolveLanguageFromAttributes, } from './language-detection.js';
|
|
20
|
-
// Re-export markdown cleanup for backward compatibility
|
|
21
|
-
export { cleanupMarkdownArtifacts, promoteOrphanHeadings, } from './markdown-cleanup.js';
|
|
22
|
-
// Re-export DOM noise removal for backward compatibility
|
|
23
|
-
export { removeNoiseFromHtml } from './dom-noise-removal.js';
|
|
24
17
|
function getAbortReason(signal) {
|
|
25
18
|
if (!isObject(signal))
|
|
26
19
|
return undefined;
|
|
@@ -34,6 +27,10 @@ const CODE_BLOCK = {
|
|
|
34
27
|
},
|
|
35
28
|
};
|
|
36
29
|
const transformChannel = diagnosticsChannel.channel('superfetch.transform');
|
|
30
|
+
const LOG_URL_MAX = 80;
|
|
31
|
+
function truncateUrlForLog(url) {
|
|
32
|
+
return url.substring(0, LOG_URL_MAX);
|
|
33
|
+
}
|
|
37
34
|
function publishTransformEvent(event) {
|
|
38
35
|
if (!transformChannel.hasSubscribers)
|
|
39
36
|
return;
|
|
@@ -44,25 +41,48 @@ function publishTransformEvent(event) {
|
|
|
44
41
|
/* empty */
|
|
45
42
|
}
|
|
46
43
|
}
|
|
47
|
-
export function startTransformStage(url, stage) {
|
|
48
|
-
if (!transformChannel.hasSubscribers)
|
|
44
|
+
export function startTransformStage(url, stage, budget) {
|
|
45
|
+
if (!transformChannel.hasSubscribers && !budget)
|
|
49
46
|
return null;
|
|
50
|
-
|
|
47
|
+
const remainingBudgetMs = budget
|
|
48
|
+
? budget.totalBudgetMs - budget.elapsedMs
|
|
49
|
+
: undefined;
|
|
50
|
+
const base = {
|
|
51
51
|
stage,
|
|
52
52
|
startTime: performance.now(),
|
|
53
53
|
url: redactUrl(url),
|
|
54
54
|
};
|
|
55
|
+
if (remainingBudgetMs !== undefined && budget) {
|
|
56
|
+
return {
|
|
57
|
+
...base,
|
|
58
|
+
budgetMs: remainingBudgetMs,
|
|
59
|
+
totalBudgetMs: budget.totalBudgetMs,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
return base;
|
|
55
63
|
}
|
|
56
64
|
export function endTransformStage(context, options) {
|
|
57
65
|
if (!context)
|
|
58
|
-
return;
|
|
66
|
+
return 0;
|
|
67
|
+
const durationMs = performance.now() - context.startTime;
|
|
59
68
|
const requestId = getRequestId();
|
|
60
69
|
const operationId = getOperationId();
|
|
70
|
+
if (context.totalBudgetMs !== undefined) {
|
|
71
|
+
const warnThresholdMs = context.totalBudgetMs * config.transform.stageWarnRatio;
|
|
72
|
+
if (durationMs > warnThresholdMs) {
|
|
73
|
+
logWarn('Transform stage exceeded warning threshold', {
|
|
74
|
+
stage: context.stage,
|
|
75
|
+
durationMs: Math.round(durationMs),
|
|
76
|
+
thresholdMs: Math.round(warnThresholdMs),
|
|
77
|
+
url: context.url,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
61
81
|
const event = {
|
|
62
82
|
v: 1,
|
|
63
83
|
type: 'stage',
|
|
64
84
|
stage: context.stage,
|
|
65
|
-
durationMs
|
|
85
|
+
durationMs,
|
|
66
86
|
url: context.url,
|
|
67
87
|
...(requestId ? { requestId } : {}),
|
|
68
88
|
...(operationId ? { operationId } : {}),
|
|
@@ -71,14 +91,22 @@ export function endTransformStage(context, options) {
|
|
|
71
91
|
: {}),
|
|
72
92
|
};
|
|
73
93
|
publishTransformEvent(event);
|
|
94
|
+
return durationMs;
|
|
74
95
|
}
|
|
75
|
-
function runTransformStage(url, stage, fn) {
|
|
76
|
-
|
|
96
|
+
function runTransformStage(url, stage, fn, budget) {
|
|
97
|
+
if (budget && budget.elapsedMs >= budget.totalBudgetMs) {
|
|
98
|
+
throw new FetchError('Transform budget exhausted', url, 504, {
|
|
99
|
+
reason: 'timeout',
|
|
100
|
+
stage: `${stage}:budget_exhausted`,
|
|
101
|
+
elapsedMs: budget.elapsedMs,
|
|
102
|
+
totalBudgetMs: budget.totalBudgetMs,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
const context = startTransformStage(url, stage, budget);
|
|
77
106
|
try {
|
|
78
107
|
return fn();
|
|
79
108
|
}
|
|
80
109
|
finally {
|
|
81
|
-
// Emit duration even if the stage throws; callers decide how to handle the error.
|
|
82
110
|
endTransformStage(context);
|
|
83
111
|
}
|
|
84
112
|
}
|
|
@@ -336,21 +364,22 @@ function applyBaseUri(document, url) {
|
|
|
336
364
|
});
|
|
337
365
|
}
|
|
338
366
|
}
|
|
339
|
-
// DOM noise removal functions moved to ./dom-noise-removal.ts
|
|
340
367
|
function buildInlineCode(content) {
|
|
341
|
-
|
|
342
|
-
let
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
368
|
+
let maxBackticks = 0;
|
|
369
|
+
let currentRun = 0;
|
|
370
|
+
for (const char of content) {
|
|
371
|
+
if (char === '`') {
|
|
372
|
+
currentRun++;
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
if (currentRun > maxBackticks)
|
|
376
|
+
maxBackticks = currentRun;
|
|
377
|
+
currentRun = 0;
|
|
348
378
|
}
|
|
349
379
|
}
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
// CommonMark recommends padding when the code starts/ends with a backtick.
|
|
380
|
+
if (currentRun > maxBackticks)
|
|
381
|
+
maxBackticks = currentRun;
|
|
382
|
+
const delimiter = '`'.repeat(maxBackticks + 1);
|
|
354
383
|
const padding = content.startsWith('`') || content.endsWith('`') ? ' ' : '';
|
|
355
384
|
return `${delimiter}${padding}${content}${padding}${delimiter}`;
|
|
356
385
|
}
|
|
@@ -527,8 +556,7 @@ function translateHtmlToMarkdown(html, url, signal, document, skipNoiseRemoval)
|
|
|
527
556
|
throwIfAborted(signal, url, 'markdown:cleaned');
|
|
528
557
|
const content = runTransformStage(url, 'markdown:translate', () => getMarkdownConverter().translate(cleanedHtml).trim());
|
|
529
558
|
throwIfAborted(signal, url, 'markdown:translated');
|
|
530
|
-
|
|
531
|
-
return promoteOrphanHeadings(cleaned);
|
|
559
|
+
return cleanupMarkdownArtifacts(content);
|
|
532
560
|
}
|
|
533
561
|
function appendMetadataFooter(content, metadata, url) {
|
|
534
562
|
const footer = buildMetadataFooter(metadata, url);
|
|
@@ -550,223 +578,6 @@ export function htmlToMarkdown(html, metadata, options) {
|
|
|
550
578
|
return buildMetadataFooter(metadata, url);
|
|
551
579
|
}
|
|
552
580
|
}
|
|
553
|
-
// Markdown cleanup functions moved to ./markdown-cleanup.ts
|
|
554
|
-
function formatFetchedDate(isoString) {
|
|
555
|
-
try {
|
|
556
|
-
const date = new Date(isoString);
|
|
557
|
-
const day = String(date.getDate()).padStart(2, '0');
|
|
558
|
-
const month = String(date.getMonth() + 1).padStart(2, '0');
|
|
559
|
-
const year = date.getFullYear();
|
|
560
|
-
return `${day}-${month}-${year}`;
|
|
561
|
-
}
|
|
562
|
-
catch {
|
|
563
|
-
return isoString;
|
|
564
|
-
}
|
|
565
|
-
}
|
|
566
|
-
function buildMetadataFooter(metadata, fallbackUrl) {
|
|
567
|
-
if (!metadata)
|
|
568
|
-
return '';
|
|
569
|
-
const lines = ['---', ''];
|
|
570
|
-
const url = metadata.url || fallbackUrl;
|
|
571
|
-
const parts = [];
|
|
572
|
-
if (metadata.title)
|
|
573
|
-
parts.push(`_${metadata.title}_`);
|
|
574
|
-
if (metadata.author)
|
|
575
|
-
parts.push(`_${metadata.author}_`);
|
|
576
|
-
if (url)
|
|
577
|
-
parts.push(`[_Original Source_](${url})`);
|
|
578
|
-
if (metadata.fetchedAt) {
|
|
579
|
-
const formattedDate = formatFetchedDate(metadata.fetchedAt);
|
|
580
|
-
parts.push(`_${formattedDate}_`);
|
|
581
|
-
}
|
|
582
|
-
if (parts.length > 0) {
|
|
583
|
-
lines.push(` ${parts.join(' | ')}`);
|
|
584
|
-
}
|
|
585
|
-
if (metadata.description) {
|
|
586
|
-
lines.push(` <sub>${metadata.description}</sub>`);
|
|
587
|
-
}
|
|
588
|
-
return lines.join('\n');
|
|
589
|
-
}
|
|
590
|
-
const HEADING_PATTERN = /^#{1,6}\s/m;
|
|
591
|
-
const LIST_PATTERN = /^(?:[-*+])\s/m;
|
|
592
|
-
const HTML_DOCUMENT_PATTERN = /^(<!doctype|<html)/i;
|
|
593
|
-
function containsMarkdownHeading(content) {
|
|
594
|
-
return HEADING_PATTERN.test(content);
|
|
595
|
-
}
|
|
596
|
-
function containsMarkdownList(content) {
|
|
597
|
-
return LIST_PATTERN.test(content);
|
|
598
|
-
}
|
|
599
|
-
function containsFencedCodeBlock(content) {
|
|
600
|
-
const first = content.indexOf('```');
|
|
601
|
-
if (first === -1)
|
|
602
|
-
return false;
|
|
603
|
-
return content.includes('```', first + 3);
|
|
604
|
-
}
|
|
605
|
-
function looksLikeMarkdown(content) {
|
|
606
|
-
return (containsMarkdownHeading(content) ||
|
|
607
|
-
containsMarkdownList(content) ||
|
|
608
|
-
containsFencedCodeBlock(content));
|
|
609
|
-
}
|
|
610
|
-
function detectLineEnding(content) {
|
|
611
|
-
return content.includes('\r\n') ? '\r\n' : '\n';
|
|
612
|
-
}
|
|
613
|
-
const FRONTMATTER_DELIMITER = '---';
|
|
614
|
-
function findFrontmatterLines(content) {
|
|
615
|
-
const lineEnding = detectLineEnding(content);
|
|
616
|
-
const lines = content.split(lineEnding);
|
|
617
|
-
if (lines[0] !== FRONTMATTER_DELIMITER)
|
|
618
|
-
return null;
|
|
619
|
-
const endIndex = lines.indexOf(FRONTMATTER_DELIMITER, 1);
|
|
620
|
-
if (endIndex === -1)
|
|
621
|
-
return null;
|
|
622
|
-
return { lineEnding, lines, endIndex };
|
|
623
|
-
}
|
|
624
|
-
function stripOptionalQuotes(value) {
|
|
625
|
-
const trimmed = value.trim();
|
|
626
|
-
if (trimmed.length < 2)
|
|
627
|
-
return trimmed;
|
|
628
|
-
const first = trimmed[0];
|
|
629
|
-
const last = trimmed[trimmed.length - 1];
|
|
630
|
-
if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
|
|
631
|
-
return trimmed.slice(1, -1).trim();
|
|
632
|
-
}
|
|
633
|
-
return trimmed;
|
|
634
|
-
}
|
|
635
|
-
function parseFrontmatterEntry(line) {
|
|
636
|
-
const trimmed = line.trim();
|
|
637
|
-
if (!trimmed)
|
|
638
|
-
return null;
|
|
639
|
-
const separatorIndex = trimmed.indexOf(':');
|
|
640
|
-
if (separatorIndex <= 0)
|
|
641
|
-
return null;
|
|
642
|
-
const key = trimmed.slice(0, separatorIndex).trim().toLowerCase();
|
|
643
|
-
const value = trimmed.slice(separatorIndex + 1);
|
|
644
|
-
return { key, value };
|
|
645
|
-
}
|
|
646
|
-
function isTitleKey(key) {
|
|
647
|
-
return key === 'title' || key === 'name';
|
|
648
|
-
}
|
|
649
|
-
function extractTitleFromHeading(content) {
|
|
650
|
-
const lineEnding = detectLineEnding(content);
|
|
651
|
-
const lines = content.split(lineEnding);
|
|
652
|
-
for (const line of lines) {
|
|
653
|
-
const trimmed = line.trim();
|
|
654
|
-
if (!trimmed)
|
|
655
|
-
continue;
|
|
656
|
-
let index = 0;
|
|
657
|
-
while (index < trimmed.length && trimmed[index] === '#') {
|
|
658
|
-
index += 1;
|
|
659
|
-
}
|
|
660
|
-
if (index === 0 || index > 6)
|
|
661
|
-
return undefined;
|
|
662
|
-
const nextChar = trimmed[index];
|
|
663
|
-
if (nextChar !== ' ' && nextChar !== '\t')
|
|
664
|
-
return undefined;
|
|
665
|
-
const heading = trimmed.slice(index).trim();
|
|
666
|
-
return heading.length > 0 ? heading : undefined;
|
|
667
|
-
}
|
|
668
|
-
return undefined;
|
|
669
|
-
}
|
|
670
|
-
function extractTitleFromRawMarkdown(content) {
|
|
671
|
-
const frontmatter = findFrontmatterLines(content);
|
|
672
|
-
if (!frontmatter) {
|
|
673
|
-
return extractTitleFromHeading(content);
|
|
674
|
-
}
|
|
675
|
-
const { lines, endIndex } = frontmatter;
|
|
676
|
-
const entry = lines
|
|
677
|
-
.slice(1, endIndex)
|
|
678
|
-
.map((line) => parseFrontmatterEntry(line))
|
|
679
|
-
.find((parsed) => parsed !== null && isTitleKey(parsed.key));
|
|
680
|
-
if (!entry)
|
|
681
|
-
return undefined;
|
|
682
|
-
const value = stripOptionalQuotes(entry.value);
|
|
683
|
-
return value || undefined;
|
|
684
|
-
}
|
|
685
|
-
function hasMarkdownSourceLine(content) {
|
|
686
|
-
const lineEnding = detectLineEnding(content);
|
|
687
|
-
const lines = content.split(lineEnding);
|
|
688
|
-
const limit = Math.min(lines.length, 50);
|
|
689
|
-
for (let index = 0; index < limit; index += 1) {
|
|
690
|
-
const line = lines[index];
|
|
691
|
-
if (!line)
|
|
692
|
-
continue;
|
|
693
|
-
if (line.trimStart().toLowerCase().startsWith('source:')) {
|
|
694
|
-
return true;
|
|
695
|
-
}
|
|
696
|
-
}
|
|
697
|
-
return false;
|
|
698
|
-
}
|
|
699
|
-
function addSourceToMarkdownMarkdownFormat(content, url) {
|
|
700
|
-
if (hasMarkdownSourceLine(content))
|
|
701
|
-
return content;
|
|
702
|
-
const lineEnding = detectLineEnding(content);
|
|
703
|
-
const lines = content.split(lineEnding);
|
|
704
|
-
const firstNonEmptyIndex = lines.findIndex((line) => line.trim().length > 0);
|
|
705
|
-
if (firstNonEmptyIndex !== -1) {
|
|
706
|
-
const firstLine = lines[firstNonEmptyIndex];
|
|
707
|
-
if (firstLine && /^#{1,6}\s+/.test(firstLine.trim())) {
|
|
708
|
-
const insertAt = firstNonEmptyIndex + 1;
|
|
709
|
-
const updated = [
|
|
710
|
-
...lines.slice(0, insertAt),
|
|
711
|
-
'',
|
|
712
|
-
`Source: ${url}`,
|
|
713
|
-
'',
|
|
714
|
-
...lines.slice(insertAt),
|
|
715
|
-
];
|
|
716
|
-
return updated.join(lineEnding);
|
|
717
|
-
}
|
|
718
|
-
}
|
|
719
|
-
return [`Source: ${url}`, '', content].join(lineEnding);
|
|
720
|
-
}
|
|
721
|
-
function addSourceToMarkdown(content, url) {
|
|
722
|
-
const frontmatter = findFrontmatterLines(content);
|
|
723
|
-
if (config.transform.metadataFormat === 'markdown' && !frontmatter) {
|
|
724
|
-
return addSourceToMarkdownMarkdownFormat(content, url);
|
|
725
|
-
}
|
|
726
|
-
if (!frontmatter) {
|
|
727
|
-
return `---\nsource: "${url}"\n---\n\n${content}`;
|
|
728
|
-
}
|
|
729
|
-
const { lineEnding, lines, endIndex } = frontmatter;
|
|
730
|
-
const bodyLines = lines.slice(1, endIndex);
|
|
731
|
-
const hasSource = bodyLines.some((line) => line.trimStart().toLowerCase().startsWith('source:'));
|
|
732
|
-
if (hasSource)
|
|
733
|
-
return content;
|
|
734
|
-
const updatedLines = [
|
|
735
|
-
lines[0],
|
|
736
|
-
...bodyLines,
|
|
737
|
-
`source: "${url}"`,
|
|
738
|
-
...lines.slice(endIndex),
|
|
739
|
-
];
|
|
740
|
-
return updatedLines.join(lineEnding);
|
|
741
|
-
}
|
|
742
|
-
function hasFrontmatter(trimmed) {
|
|
743
|
-
return trimmed.startsWith('---\n') || trimmed.startsWith('---\r\n');
|
|
744
|
-
}
|
|
745
|
-
function looksLikeHtmlDocument(trimmed) {
|
|
746
|
-
return HTML_DOCUMENT_PATTERN.test(trimmed);
|
|
747
|
-
}
|
|
748
|
-
function countCommonHtmlTags(content) {
|
|
749
|
-
const matches = content.match(/<(html|head|body|div|span|script|style|meta|link)\b/gi) ??
|
|
750
|
-
[];
|
|
751
|
-
return matches.length;
|
|
752
|
-
}
|
|
753
|
-
function isRawTextContent(content) {
|
|
754
|
-
const trimmed = content.trim();
|
|
755
|
-
const isHtmlDocument = looksLikeHtmlDocument(trimmed);
|
|
756
|
-
const hasMarkdownFrontmatter = hasFrontmatter(trimmed);
|
|
757
|
-
const hasTooManyHtmlTags = countCommonHtmlTags(content) > 2;
|
|
758
|
-
const isMarkdown = looksLikeMarkdown(content);
|
|
759
|
-
return (!isHtmlDocument &&
|
|
760
|
-
(hasMarkdownFrontmatter || (!hasTooManyHtmlTags && isMarkdown)));
|
|
761
|
-
}
|
|
762
|
-
function isLikelyHtmlContent(content) {
|
|
763
|
-
const trimmed = content.trim();
|
|
764
|
-
if (!trimmed)
|
|
765
|
-
return false;
|
|
766
|
-
if (looksLikeHtmlDocument(trimmed))
|
|
767
|
-
return true;
|
|
768
|
-
return countCommonHtmlTags(content) > 2;
|
|
769
|
-
}
|
|
770
581
|
function shouldPreserveRawContent(url, content) {
|
|
771
582
|
if (isRawTextContentUrl(url)) {
|
|
772
583
|
return !isLikelyHtmlContent(content);
|
|
@@ -780,13 +591,9 @@ function buildRawMarkdownPayload({ rawContent, url, includeMetadata, }) {
|
|
|
780
591
|
: rawContent;
|
|
781
592
|
return { content, title };
|
|
782
593
|
}
|
|
783
|
-
function
|
|
784
|
-
if (!shouldPreserveRawContent(url, html)) {
|
|
785
|
-
return null;
|
|
786
|
-
}
|
|
787
|
-
logDebug('Preserving raw markdown content', { url: url.substring(0, 80) });
|
|
594
|
+
function buildRawMarkdownResult({ rawContent, url, includeMetadata, }) {
|
|
788
595
|
const { content, title } = buildRawMarkdownPayload({
|
|
789
|
-
rawContent
|
|
596
|
+
rawContent,
|
|
790
597
|
url,
|
|
791
598
|
includeMetadata,
|
|
792
599
|
});
|
|
@@ -796,36 +603,21 @@ function tryTransformRawContent({ html, url, includeMetadata, }) {
|
|
|
796
603
|
truncated: false,
|
|
797
604
|
};
|
|
798
605
|
}
|
|
606
|
+
function tryTransformRawContent({ html, url, includeMetadata, }) {
|
|
607
|
+
if (!shouldPreserveRawContent(url, html)) {
|
|
608
|
+
return null;
|
|
609
|
+
}
|
|
610
|
+
logDebug('Preserving raw markdown content', { url: truncateUrlForLog(url) });
|
|
611
|
+
return buildRawMarkdownResult({
|
|
612
|
+
rawContent: html,
|
|
613
|
+
url,
|
|
614
|
+
includeMetadata,
|
|
615
|
+
});
|
|
616
|
+
}
|
|
799
617
|
const MIN_CONTENT_RATIO = 0.3;
|
|
800
618
|
const MIN_HTML_LENGTH_FOR_GATE = 100;
|
|
801
619
|
const MIN_HEADING_RETENTION_RATIO = 0.7;
|
|
802
620
|
const MIN_CODE_BLOCK_RETENTION_RATIO = 0.5;
|
|
803
|
-
/**
|
|
804
|
-
* Count headings using DOM querySelectorAll.
|
|
805
|
-
* Handles nested content like <h2><span>Text</span></h2> correctly.
|
|
806
|
-
*/
|
|
807
|
-
function countHeadingsDom(htmlOrDocument) {
|
|
808
|
-
if (typeof htmlOrDocument === 'string') {
|
|
809
|
-
// Wrap fragments in document structure for proper parsing
|
|
810
|
-
const htmlToParse = needsDocumentWrapper(htmlOrDocument)
|
|
811
|
-
? wrapHtmlFragment(htmlOrDocument)
|
|
812
|
-
: htmlOrDocument;
|
|
813
|
-
const { document: doc } = parseHTML(htmlToParse);
|
|
814
|
-
return doc.querySelectorAll('h1,h2,h3,h4,h5,h6').length;
|
|
815
|
-
}
|
|
816
|
-
return htmlOrDocument.querySelectorAll('h1,h2,h3,h4,h5,h6').length;
|
|
817
|
-
}
|
|
818
|
-
function countCodeBlocksDom(htmlOrDocument) {
|
|
819
|
-
if (typeof htmlOrDocument === 'string') {
|
|
820
|
-
// Wrap fragments in document structure for proper parsing
|
|
821
|
-
const htmlToParse = needsDocumentWrapper(htmlOrDocument)
|
|
822
|
-
? wrapHtmlFragment(htmlOrDocument)
|
|
823
|
-
: htmlOrDocument;
|
|
824
|
-
const { document: doc } = parseHTML(htmlToParse);
|
|
825
|
-
return doc.querySelectorAll('pre').length;
|
|
826
|
-
}
|
|
827
|
-
return htmlOrDocument.querySelectorAll('pre').length;
|
|
828
|
-
}
|
|
829
621
|
/**
|
|
830
622
|
* Check if HTML string needs document wrapper for proper parsing.
|
|
831
623
|
* Fragments without doctype/html/body tags need wrapping.
|
|
@@ -842,40 +634,53 @@ function needsDocumentWrapper(html) {
|
|
|
842
634
|
function wrapHtmlFragment(html) {
|
|
843
635
|
return `<!DOCTYPE html><html><body>${html}</body></html>`;
|
|
844
636
|
}
|
|
637
|
+
function resolveHtmlDocument(htmlOrDocument) {
|
|
638
|
+
if (typeof htmlOrDocument !== 'string') {
|
|
639
|
+
return htmlOrDocument;
|
|
640
|
+
}
|
|
641
|
+
const htmlToParse = needsDocumentWrapper(htmlOrDocument)
|
|
642
|
+
? wrapHtmlFragment(htmlOrDocument)
|
|
643
|
+
: htmlOrDocument;
|
|
644
|
+
return parseHTML(htmlToParse).document;
|
|
645
|
+
}
|
|
646
|
+
function countDomSelector(htmlOrDocument, selector) {
|
|
647
|
+
return resolveHtmlDocument(htmlOrDocument).querySelectorAll(selector).length;
|
|
648
|
+
}
|
|
845
649
|
/**
|
|
846
|
-
*
|
|
847
|
-
*
|
|
650
|
+
* Count headings using DOM querySelectorAll.
|
|
651
|
+
* Handles nested content like <h2><span>Text</span></h2> correctly.
|
|
848
652
|
*/
|
|
849
|
-
function
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
// Note: linkedom may return null for body on HTML fragments despite types
|
|
863
|
-
const body = doc.body;
|
|
864
|
-
const docElement = doc.documentElement;
|
|
865
|
-
const text = body?.textContent ?? docElement?.textContent ?? '';
|
|
866
|
-
return text.replace(/\s+/g, ' ').trim().length;
|
|
867
|
-
}
|
|
868
|
-
// For Document input, clone to avoid mutation
|
|
869
|
-
const workDoc = htmlOrDocument.cloneNode(true);
|
|
870
|
-
// Remove non-visible content that inflates text length
|
|
871
|
-
for (const el of workDoc.querySelectorAll('script,style,noscript')) {
|
|
653
|
+
function countHeadingsDom(htmlOrDocument) {
|
|
654
|
+
return countDomSelector(htmlOrDocument, 'h1,h2,h3,h4,h5,h6');
|
|
655
|
+
}
|
|
656
|
+
function countCodeBlocksDom(htmlOrDocument) {
|
|
657
|
+
return countDomSelector(htmlOrDocument, 'pre');
|
|
658
|
+
}
|
|
659
|
+
function cloneDocumentIfNeeded(htmlOrDocument, doc) {
|
|
660
|
+
return typeof htmlOrDocument === 'string'
|
|
661
|
+
? doc
|
|
662
|
+
: doc.cloneNode(true);
|
|
663
|
+
}
|
|
664
|
+
function stripNonVisibleNodes(doc) {
|
|
665
|
+
for (const el of doc.querySelectorAll('script,style,noscript')) {
|
|
872
666
|
el.remove();
|
|
873
667
|
}
|
|
874
|
-
|
|
668
|
+
}
|
|
669
|
+
function resolveDocumentText(doc) {
|
|
875
670
|
// Note: linkedom may return null for body on HTML fragments despite types
|
|
876
|
-
const body =
|
|
877
|
-
const docElement =
|
|
878
|
-
|
|
671
|
+
const body = doc.body;
|
|
672
|
+
const docElement = doc.documentElement;
|
|
673
|
+
return body?.textContent ?? docElement?.textContent ?? '';
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Get visible text length from HTML, excluding script/style/noscript content.
|
|
677
|
+
* Fixes the bug where stripHtmlTagsForLength() counted JS/CSS as visible text.
|
|
678
|
+
*/
|
|
679
|
+
function getVisibleTextLength(htmlOrDocument) {
|
|
680
|
+
const doc = resolveHtmlDocument(htmlOrDocument);
|
|
681
|
+
const workDoc = cloneDocumentIfNeeded(htmlOrDocument, doc);
|
|
682
|
+
stripNonVisibleNodes(workDoc);
|
|
683
|
+
const text = resolveDocumentText(workDoc);
|
|
879
684
|
return text.replace(/\s+/g, ' ').trim().length;
|
|
880
685
|
}
|
|
881
686
|
export function isExtractionSufficient(article, originalHtmlOrDocument) {
|
|
@@ -995,7 +800,7 @@ function buildContentSource({ html, url, article, extractedMeta, includeMetadata
|
|
|
995
800
|
const contentRoot = findContentRoot(cleanedDoc);
|
|
996
801
|
if (contentRoot) {
|
|
997
802
|
logDebug('Using content root fallback instead of full HTML', {
|
|
998
|
-
url: url
|
|
803
|
+
url: truncateUrlForLog(url),
|
|
999
804
|
contentLength: contentRoot.length,
|
|
1000
805
|
});
|
|
1001
806
|
return {
|
|
@@ -1015,31 +820,39 @@ function buildContentSource({ html, url, article, extractedMeta, includeMetadata
|
|
|
1015
820
|
...(document ? { document } : {}),
|
|
1016
821
|
};
|
|
1017
822
|
}
|
|
1018
|
-
function logQualityGateFallback({
|
|
823
|
+
function logQualityGateFallback({ safeUrl, articleLength, }) {
|
|
1019
824
|
logDebug('Quality gate: Readability extraction below threshold, using full HTML', {
|
|
1020
|
-
url:
|
|
825
|
+
url: safeUrl,
|
|
1021
826
|
articleLength,
|
|
1022
827
|
});
|
|
1023
828
|
}
|
|
1024
829
|
function shouldUseArticleContent(article, originalHtmlOrDocument, url) {
|
|
1025
830
|
const articleLength = article.textContent.length;
|
|
1026
831
|
const originalLength = getVisibleTextLength(originalHtmlOrDocument);
|
|
832
|
+
const safeUrl = truncateUrlForLog(url);
|
|
833
|
+
let articleDocument = null;
|
|
834
|
+
const getArticleDocument = () => {
|
|
835
|
+
if (articleDocument)
|
|
836
|
+
return articleDocument;
|
|
837
|
+
articleDocument = resolveHtmlDocument(article.content);
|
|
838
|
+
return articleDocument;
|
|
839
|
+
};
|
|
1027
840
|
// If the document is tiny, don't gate too aggressively.
|
|
1028
841
|
if (originalLength >= MIN_HTML_LENGTH_FOR_GATE) {
|
|
1029
842
|
const ratio = articleLength / originalLength;
|
|
1030
843
|
if (ratio < MIN_CONTENT_RATIO) {
|
|
1031
|
-
logQualityGateFallback({
|
|
844
|
+
logQualityGateFallback({ safeUrl, articleLength });
|
|
1032
845
|
return false;
|
|
1033
846
|
}
|
|
1034
847
|
}
|
|
1035
848
|
// Heading structure retention (compute counts once to avoid repeated DOM queries/parses).
|
|
1036
849
|
const originalHeadings = countHeadingsDom(originalHtmlOrDocument);
|
|
1037
850
|
if (originalHeadings > 0) {
|
|
1038
|
-
const articleHeadings = countHeadingsDom(
|
|
851
|
+
const articleHeadings = countHeadingsDom(getArticleDocument());
|
|
1039
852
|
const retentionRatio = articleHeadings / originalHeadings;
|
|
1040
853
|
if (retentionRatio < MIN_HEADING_RETENTION_RATIO) {
|
|
1041
854
|
logDebug('Quality gate: Readability broke heading structure, using full HTML', {
|
|
1042
|
-
url:
|
|
855
|
+
url: safeUrl,
|
|
1043
856
|
originalHeadings,
|
|
1044
857
|
articleHeadings,
|
|
1045
858
|
});
|
|
@@ -1048,18 +861,18 @@ function shouldUseArticleContent(article, originalHtmlOrDocument, url) {
|
|
|
1048
861
|
}
|
|
1049
862
|
const originalCodeBlocks = countCodeBlocksDom(originalHtmlOrDocument);
|
|
1050
863
|
if (originalCodeBlocks > 0) {
|
|
1051
|
-
const articleCodeBlocks = countCodeBlocksDom(
|
|
864
|
+
const articleCodeBlocks = countCodeBlocksDom(getArticleDocument());
|
|
1052
865
|
const codeRetentionRatio = articleCodeBlocks / originalCodeBlocks;
|
|
1053
866
|
// Always log code block counts for debugging
|
|
1054
867
|
logDebug('Code block retention check', {
|
|
1055
|
-
url:
|
|
868
|
+
url: safeUrl,
|
|
1056
869
|
originalCodeBlocks,
|
|
1057
870
|
articleCodeBlocks,
|
|
1058
871
|
codeRetentionRatio,
|
|
1059
872
|
});
|
|
1060
873
|
if (codeRetentionRatio < MIN_CODE_BLOCK_RETENTION_RATIO) {
|
|
1061
874
|
logDebug('Quality gate: Readability removed code blocks, using full HTML', {
|
|
1062
|
-
url:
|
|
875
|
+
url: safeUrl,
|
|
1063
876
|
originalCodeBlocks,
|
|
1064
877
|
articleCodeBlocks,
|
|
1065
878
|
});
|
|
@@ -1068,7 +881,7 @@ function shouldUseArticleContent(article, originalHtmlOrDocument, url) {
|
|
|
1068
881
|
}
|
|
1069
882
|
// Layout extraction issue: truncated/fragmented lines.
|
|
1070
883
|
if (hasTruncatedSentences(article.textContent)) {
|
|
1071
|
-
logDebug('Quality gate: Extracted text has many truncated sentences, using full HTML', { url:
|
|
884
|
+
logDebug('Quality gate: Extracted text has many truncated sentences, using full HTML', { url: safeUrl });
|
|
1072
885
|
return false;
|
|
1073
886
|
}
|
|
1074
887
|
return true;
|
|
@@ -1078,7 +891,7 @@ function resolveContentSource({ html, url, includeMetadata, signal, }) {
|
|
|
1078
891
|
extractArticle: true,
|
|
1079
892
|
...(signal ? { signal } : {}),
|
|
1080
893
|
});
|
|
1081
|
-
const originalDocument =
|
|
894
|
+
const originalDocument = document;
|
|
1082
895
|
const useArticleContent = article
|
|
1083
896
|
? shouldUseArticleContent(article, originalDocument, url)
|
|
1084
897
|
: false;
|
|
@@ -1129,11 +942,14 @@ function runTotalTransformStage(url, fn) {
|
|
|
1129
942
|
return result;
|
|
1130
943
|
}
|
|
1131
944
|
finally {
|
|
1132
|
-
|
|
1133
|
-
endTransformStage(totalStage, { truncated: false });
|
|
1134
|
-
}
|
|
945
|
+
finalizeTotalTransformStage(totalStage, success);
|
|
1135
946
|
}
|
|
1136
947
|
}
|
|
948
|
+
function finalizeTotalTransformStage(stage, success) {
|
|
949
|
+
if (!success)
|
|
950
|
+
return;
|
|
951
|
+
endTransformStage(stage, { truncated: false });
|
|
952
|
+
}
|
|
1137
953
|
async function runTotalTransformStageAsync(url, fn) {
|
|
1138
954
|
const totalStage = startTransformStage(url, 'transform:total');
|
|
1139
955
|
let success = false;
|
|
@@ -1143,9 +959,7 @@ async function runTotalTransformStageAsync(url, fn) {
|
|
|
1143
959
|
return result;
|
|
1144
960
|
}
|
|
1145
961
|
finally {
|
|
1146
|
-
|
|
1147
|
-
endTransformStage(totalStage, { truncated: false });
|
|
1148
|
-
}
|
|
962
|
+
finalizeTotalTransformStage(totalStage, success);
|
|
1149
963
|
}
|
|
1150
964
|
}
|
|
1151
965
|
export function transformHtmlToMarkdownInProcess(html, url, options) {
|
|
@@ -1182,11 +996,11 @@ const workerMessageSchema = z.discriminatedUnion('type', [
|
|
|
1182
996
|
}),
|
|
1183
997
|
]);
|
|
1184
998
|
let pool = null;
|
|
999
|
+
const POOL_MIN_WORKERS = 2;
|
|
1000
|
+
const POOL_MAX_WORKERS = 4;
|
|
1001
|
+
const POOL_SCALE_THRESHOLD = 0.5;
|
|
1185
1002
|
function resolveDefaultWorkerCount() {
|
|
1186
|
-
|
|
1187
|
-
? os.availableParallelism()
|
|
1188
|
-
: os.cpus().length;
|
|
1189
|
-
return Math.min(16, Math.max(1, parallelism - 1));
|
|
1003
|
+
return POOL_MIN_WORKERS;
|
|
1190
1004
|
}
|
|
1191
1005
|
const DEFAULT_TIMEOUT_MS = config.transform.timeoutMs;
|
|
1192
1006
|
function getOrCreateTransformWorkerPool() {
|
|
@@ -1199,8 +1013,20 @@ export async function shutdownTransformWorkerPool() {
|
|
|
1199
1013
|
await pool.close();
|
|
1200
1014
|
pool = null;
|
|
1201
1015
|
}
|
|
1016
|
+
export function getTransformPoolStats() {
|
|
1017
|
+
if (!pool)
|
|
1018
|
+
return null;
|
|
1019
|
+
return {
|
|
1020
|
+
queueDepth: pool.getQueueDepth(),
|
|
1021
|
+
activeWorkers: pool.getActiveWorkers(),
|
|
1022
|
+
capacity: pool.getCapacity(),
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1202
1025
|
class WorkerPool {
|
|
1203
1026
|
workers = [];
|
|
1027
|
+
capacity;
|
|
1028
|
+
minCapacity;
|
|
1029
|
+
maxCapacity;
|
|
1204
1030
|
queue = [];
|
|
1205
1031
|
inflight = new Map();
|
|
1206
1032
|
timeoutMs;
|
|
@@ -1316,12 +1142,11 @@ class WorkerPool {
|
|
|
1316
1142
|
});
|
|
1317
1143
|
}
|
|
1318
1144
|
constructor(size, timeoutMs) {
|
|
1319
|
-
|
|
1145
|
+
this.minCapacity = POOL_MIN_WORKERS;
|
|
1146
|
+
this.maxCapacity = POOL_MAX_WORKERS;
|
|
1147
|
+
this.capacity = Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
|
|
1320
1148
|
this.timeoutMs = timeoutMs;
|
|
1321
|
-
this.queueMax =
|
|
1322
|
-
for (let index = 0; index < safeSize; index += 1) {
|
|
1323
|
-
this.workers.push(this.spawnWorker(index));
|
|
1324
|
-
}
|
|
1149
|
+
this.queueMax = this.maxCapacity * 32;
|
|
1325
1150
|
}
|
|
1326
1151
|
spawnWorker(workerIndex) {
|
|
1327
1152
|
const worker = new Worker(new URL('./workers/transform-worker.js', import.meta.url));
|
|
@@ -1419,20 +1244,45 @@ class WorkerPool {
|
|
|
1419
1244
|
this.drainQueue();
|
|
1420
1245
|
});
|
|
1421
1246
|
}
|
|
1247
|
+
/** Scale capacity up if queue pressure exceeds threshold. */
|
|
1248
|
+
maybeScaleUp() {
|
|
1249
|
+
if (this.queue.length > this.capacity * POOL_SCALE_THRESHOLD &&
|
|
1250
|
+
this.capacity < this.maxCapacity) {
|
|
1251
|
+
this.capacity += 1;
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1422
1254
|
drainQueue() {
|
|
1255
|
+
if (this.closed)
|
|
1256
|
+
return;
|
|
1423
1257
|
if (this.queue.length === 0)
|
|
1424
1258
|
return;
|
|
1259
|
+
this.maybeScaleUp();
|
|
1260
|
+
// First pass: try to find an idle existing worker
|
|
1425
1261
|
for (let workerIndex = 0; workerIndex < this.workers.length; workerIndex += 1) {
|
|
1426
1262
|
const slot = this.workers[workerIndex];
|
|
1427
|
-
if (
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
this.dispatch(workerIndex, slot, task);
|
|
1433
|
-
if (this.queue.length === 0)
|
|
1434
|
-
return;
|
|
1263
|
+
if (slot && !slot.busy) {
|
|
1264
|
+
this.dispatchQueueTask(workerIndex, slot);
|
|
1265
|
+
if (this.queue.length === 0)
|
|
1266
|
+
return;
|
|
1267
|
+
}
|
|
1435
1268
|
}
|
|
1269
|
+
if (this.workers.length < this.capacity && this.queue.length > 0) {
|
|
1270
|
+
const workerIndex = this.workers.length;
|
|
1271
|
+
const slot = this.spawnWorker(workerIndex);
|
|
1272
|
+
this.workers.push(slot);
|
|
1273
|
+
this.dispatchQueueTask(workerIndex, slot);
|
|
1274
|
+
if (this.workers.length < this.capacity && this.queue.length > 0) {
|
|
1275
|
+
setImmediate(() => {
|
|
1276
|
+
this.drainQueue();
|
|
1277
|
+
});
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
dispatchQueueTask(workerIndex, slot) {
|
|
1282
|
+
const task = this.queue.shift();
|
|
1283
|
+
if (!task)
|
|
1284
|
+
return;
|
|
1285
|
+
this.dispatch(workerIndex, slot, task);
|
|
1436
1286
|
}
|
|
1437
1287
|
dispatch(workerIndex, slot, task) {
|
|
1438
1288
|
if (this.rejectIfAborted(task))
|
|
@@ -1503,11 +1353,23 @@ class WorkerPool {
|
|
|
1503
1353
|
task.reject(message);
|
|
1504
1354
|
this.restartWorker(workerIndex, slot);
|
|
1505
1355
|
}
|
|
1356
|
+
getQueueDepth() {
|
|
1357
|
+
return this.queue.length;
|
|
1358
|
+
}
|
|
1359
|
+
getActiveWorkers() {
|
|
1360
|
+
return this.workers.filter((s) => s?.busy).length;
|
|
1361
|
+
}
|
|
1362
|
+
getCapacity() {
|
|
1363
|
+
return this.capacity;
|
|
1364
|
+
}
|
|
1506
1365
|
async close() {
|
|
1507
1366
|
if (this.closed)
|
|
1508
1367
|
return;
|
|
1509
1368
|
this.closed = true;
|
|
1510
|
-
const terminations = this.workers
|
|
1369
|
+
const terminations = this.workers
|
|
1370
|
+
.map((slot) => slot?.worker.terminate())
|
|
1371
|
+
.filter((p) => p !== undefined);
|
|
1372
|
+
this.workers.fill(undefined);
|
|
1511
1373
|
this.workers.length = 0;
|
|
1512
1374
|
for (const [id, inflight] of this.inflight.entries()) {
|
|
1513
1375
|
clearTimeout(inflight.timer);
|
|
@@ -1556,4 +1418,3 @@ export async function transformHtmlToMarkdown(html, url, options) {
|
|
|
1556
1418
|
}
|
|
1557
1419
|
});
|
|
1558
1420
|
}
|
|
1559
|
-
//# sourceMappingURL=transform.js.map
|