@j0hanz/superfetch 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -9
- package/dist/assets/logo.svg +24835 -0
- package/dist/cache.js +58 -4
- package/dist/config.d.ts +2 -0
- package/dist/config.js +2 -0
- package/dist/dom-noise-removal.js +15 -13
- package/dist/fetch.js +16 -25
- package/dist/http-native.js +19 -3
- package/dist/markdown-cleanup.d.ts +6 -12
- package/dist/markdown-cleanup.js +243 -25
- package/dist/mcp.js +20 -9
- package/dist/observability.d.ts +2 -0
- package/dist/observability.js +25 -0
- package/dist/tools.d.ts +5 -3
- package/dist/tools.js +27 -12
- package/dist/transform-types.d.ts +38 -0
- package/dist/transform.d.ts +12 -6
- package/dist/transform.js +120 -265
- package/package.json +1 -2
package/dist/transform.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { randomUUID } from 'node:crypto';
|
|
2
2
|
import diagnosticsChannel from 'node:diagnostics_channel';
|
|
3
|
-
import os from 'node:os';
|
|
4
3
|
import { performance } from 'node:perf_hooks';
|
|
5
4
|
import { Worker } from 'node:worker_threads';
|
|
6
5
|
import { parseHTML } from 'linkedom';
|
|
@@ -12,15 +11,9 @@ import { removeNoiseFromHtml } from './dom-noise-removal.js';
|
|
|
12
11
|
import { FetchError, getErrorMessage } from './errors.js';
|
|
13
12
|
import { isRawTextContentUrl } from './fetch.js';
|
|
14
13
|
import { detectLanguageFromCode, resolveLanguageFromAttributes, } from './language-detection.js';
|
|
15
|
-
import { cleanupMarkdownArtifacts,
|
|
14
|
+
import { addSourceToMarkdown, buildMetadataFooter, cleanupMarkdownArtifacts, extractTitleFromRawMarkdown, isLikelyHtmlContent, isRawTextContent, } from './markdown-cleanup.js';
|
|
16
15
|
import { getOperationId, getRequestId, logDebug, logError, logInfo, logWarn, redactUrl, } from './observability.js';
|
|
17
16
|
import { isObject } from './type-guards.js';
|
|
18
|
-
// Re-export language detection for backward compatibility
|
|
19
|
-
export { detectLanguageFromCode, resolveLanguageFromAttributes, } from './language-detection.js';
|
|
20
|
-
// Re-export markdown cleanup for backward compatibility
|
|
21
|
-
export { cleanupMarkdownArtifacts, promoteOrphanHeadings, } from './markdown-cleanup.js';
|
|
22
|
-
// Re-export DOM noise removal for backward compatibility
|
|
23
|
-
export { removeNoiseFromHtml } from './dom-noise-removal.js';
|
|
24
17
|
function getAbortReason(signal) {
|
|
25
18
|
if (!isObject(signal))
|
|
26
19
|
return undefined;
|
|
@@ -48,25 +41,48 @@ function publishTransformEvent(event) {
|
|
|
48
41
|
/* empty */
|
|
49
42
|
}
|
|
50
43
|
}
|
|
51
|
-
export function startTransformStage(url, stage) {
|
|
52
|
-
if (!transformChannel.hasSubscribers)
|
|
44
|
+
export function startTransformStage(url, stage, budget) {
|
|
45
|
+
if (!transformChannel.hasSubscribers && !budget)
|
|
53
46
|
return null;
|
|
54
|
-
|
|
47
|
+
const remainingBudgetMs = budget
|
|
48
|
+
? budget.totalBudgetMs - budget.elapsedMs
|
|
49
|
+
: undefined;
|
|
50
|
+
const base = {
|
|
55
51
|
stage,
|
|
56
52
|
startTime: performance.now(),
|
|
57
53
|
url: redactUrl(url),
|
|
58
54
|
};
|
|
55
|
+
if (remainingBudgetMs !== undefined && budget) {
|
|
56
|
+
return {
|
|
57
|
+
...base,
|
|
58
|
+
budgetMs: remainingBudgetMs,
|
|
59
|
+
totalBudgetMs: budget.totalBudgetMs,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
return base;
|
|
59
63
|
}
|
|
60
64
|
export function endTransformStage(context, options) {
|
|
61
65
|
if (!context)
|
|
62
|
-
return;
|
|
66
|
+
return 0;
|
|
67
|
+
const durationMs = performance.now() - context.startTime;
|
|
63
68
|
const requestId = getRequestId();
|
|
64
69
|
const operationId = getOperationId();
|
|
70
|
+
if (context.totalBudgetMs !== undefined) {
|
|
71
|
+
const warnThresholdMs = context.totalBudgetMs * config.transform.stageWarnRatio;
|
|
72
|
+
if (durationMs > warnThresholdMs) {
|
|
73
|
+
logWarn('Transform stage exceeded warning threshold', {
|
|
74
|
+
stage: context.stage,
|
|
75
|
+
durationMs: Math.round(durationMs),
|
|
76
|
+
thresholdMs: Math.round(warnThresholdMs),
|
|
77
|
+
url: context.url,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
65
81
|
const event = {
|
|
66
82
|
v: 1,
|
|
67
83
|
type: 'stage',
|
|
68
84
|
stage: context.stage,
|
|
69
|
-
durationMs
|
|
85
|
+
durationMs,
|
|
70
86
|
url: context.url,
|
|
71
87
|
...(requestId ? { requestId } : {}),
|
|
72
88
|
...(operationId ? { operationId } : {}),
|
|
@@ -75,14 +91,22 @@ export function endTransformStage(context, options) {
|
|
|
75
91
|
: {}),
|
|
76
92
|
};
|
|
77
93
|
publishTransformEvent(event);
|
|
94
|
+
return durationMs;
|
|
78
95
|
}
|
|
79
|
-
function runTransformStage(url, stage, fn) {
|
|
80
|
-
|
|
96
|
+
function runTransformStage(url, stage, fn, budget) {
|
|
97
|
+
if (budget && budget.elapsedMs >= budget.totalBudgetMs) {
|
|
98
|
+
throw new FetchError('Transform budget exhausted', url, 504, {
|
|
99
|
+
reason: 'timeout',
|
|
100
|
+
stage: `${stage}:budget_exhausted`,
|
|
101
|
+
elapsedMs: budget.elapsedMs,
|
|
102
|
+
totalBudgetMs: budget.totalBudgetMs,
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
const context = startTransformStage(url, stage, budget);
|
|
81
106
|
try {
|
|
82
107
|
return fn();
|
|
83
108
|
}
|
|
84
109
|
finally {
|
|
85
|
-
// Emit duration even if the stage throws; callers decide how to handle the error.
|
|
86
110
|
endTransformStage(context);
|
|
87
111
|
}
|
|
88
112
|
}
|
|
@@ -340,21 +364,22 @@ function applyBaseUri(document, url) {
|
|
|
340
364
|
});
|
|
341
365
|
}
|
|
342
366
|
}
|
|
343
|
-
// DOM noise removal functions moved to ./dom-noise-removal.ts
|
|
344
367
|
function buildInlineCode(content) {
|
|
345
|
-
|
|
346
|
-
let
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
368
|
+
let maxBackticks = 0;
|
|
369
|
+
let currentRun = 0;
|
|
370
|
+
for (const char of content) {
|
|
371
|
+
if (char === '`') {
|
|
372
|
+
currentRun++;
|
|
373
|
+
}
|
|
374
|
+
else {
|
|
375
|
+
if (currentRun > maxBackticks)
|
|
376
|
+
maxBackticks = currentRun;
|
|
377
|
+
currentRun = 0;
|
|
352
378
|
}
|
|
353
379
|
}
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
// CommonMark recommends padding when the code starts/ends with a backtick.
|
|
380
|
+
if (currentRun > maxBackticks)
|
|
381
|
+
maxBackticks = currentRun;
|
|
382
|
+
const delimiter = '`'.repeat(maxBackticks + 1);
|
|
358
383
|
const padding = content.startsWith('`') || content.endsWith('`') ? ' ' : '';
|
|
359
384
|
return `${delimiter}${padding}${content}${padding}${delimiter}`;
|
|
360
385
|
}
|
|
@@ -531,8 +556,7 @@ function translateHtmlToMarkdown(html, url, signal, document, skipNoiseRemoval)
|
|
|
531
556
|
throwIfAborted(signal, url, 'markdown:cleaned');
|
|
532
557
|
const content = runTransformStage(url, 'markdown:translate', () => getMarkdownConverter().translate(cleanedHtml).trim());
|
|
533
558
|
throwIfAborted(signal, url, 'markdown:translated');
|
|
534
|
-
|
|
535
|
-
return promoteOrphanHeadings(cleaned);
|
|
559
|
+
return cleanupMarkdownArtifacts(content);
|
|
536
560
|
}
|
|
537
561
|
function appendMetadataFooter(content, metadata, url) {
|
|
538
562
|
const footer = buildMetadataFooter(metadata, url);
|
|
@@ -554,223 +578,6 @@ export function htmlToMarkdown(html, metadata, options) {
|
|
|
554
578
|
return buildMetadataFooter(metadata, url);
|
|
555
579
|
}
|
|
556
580
|
}
|
|
557
|
-
// Markdown cleanup functions moved to ./markdown-cleanup.ts
|
|
558
|
-
function formatFetchedDate(isoString) {
|
|
559
|
-
try {
|
|
560
|
-
const date = new Date(isoString);
|
|
561
|
-
const day = String(date.getDate()).padStart(2, '0');
|
|
562
|
-
const month = String(date.getMonth() + 1).padStart(2, '0');
|
|
563
|
-
const year = date.getFullYear();
|
|
564
|
-
return `${day}-${month}-${year}`;
|
|
565
|
-
}
|
|
566
|
-
catch {
|
|
567
|
-
return isoString;
|
|
568
|
-
}
|
|
569
|
-
}
|
|
570
|
-
function buildMetadataFooter(metadata, fallbackUrl) {
|
|
571
|
-
if (!metadata)
|
|
572
|
-
return '';
|
|
573
|
-
const lines = ['---', ''];
|
|
574
|
-
const url = metadata.url || fallbackUrl;
|
|
575
|
-
const parts = [];
|
|
576
|
-
if (metadata.title)
|
|
577
|
-
parts.push(`_${metadata.title}_`);
|
|
578
|
-
if (metadata.author)
|
|
579
|
-
parts.push(`_${metadata.author}_`);
|
|
580
|
-
if (url)
|
|
581
|
-
parts.push(`[_Original Source_](${url})`);
|
|
582
|
-
if (metadata.fetchedAt) {
|
|
583
|
-
const formattedDate = formatFetchedDate(metadata.fetchedAt);
|
|
584
|
-
parts.push(`_${formattedDate}_`);
|
|
585
|
-
}
|
|
586
|
-
if (parts.length > 0) {
|
|
587
|
-
lines.push(` ${parts.join(' | ')}`);
|
|
588
|
-
}
|
|
589
|
-
if (metadata.description) {
|
|
590
|
-
lines.push(` <sub>${metadata.description}</sub>`);
|
|
591
|
-
}
|
|
592
|
-
return lines.join('\n');
|
|
593
|
-
}
|
|
594
|
-
const HEADING_PATTERN = /^#{1,6}\s/m;
|
|
595
|
-
const LIST_PATTERN = /^(?:[-*+])\s/m;
|
|
596
|
-
const HTML_DOCUMENT_PATTERN = /^(<!doctype|<html)/i;
|
|
597
|
-
function containsMarkdownHeading(content) {
|
|
598
|
-
return HEADING_PATTERN.test(content);
|
|
599
|
-
}
|
|
600
|
-
function containsMarkdownList(content) {
|
|
601
|
-
return LIST_PATTERN.test(content);
|
|
602
|
-
}
|
|
603
|
-
function containsFencedCodeBlock(content) {
|
|
604
|
-
const first = content.indexOf('```');
|
|
605
|
-
if (first === -1)
|
|
606
|
-
return false;
|
|
607
|
-
return content.includes('```', first + 3);
|
|
608
|
-
}
|
|
609
|
-
function looksLikeMarkdown(content) {
|
|
610
|
-
return (containsMarkdownHeading(content) ||
|
|
611
|
-
containsMarkdownList(content) ||
|
|
612
|
-
containsFencedCodeBlock(content));
|
|
613
|
-
}
|
|
614
|
-
function detectLineEnding(content) {
|
|
615
|
-
return content.includes('\r\n') ? '\r\n' : '\n';
|
|
616
|
-
}
|
|
617
|
-
const FRONTMATTER_DELIMITER = '---';
|
|
618
|
-
function findFrontmatterLines(content) {
|
|
619
|
-
const lineEnding = detectLineEnding(content);
|
|
620
|
-
const lines = content.split(lineEnding);
|
|
621
|
-
if (lines[0] !== FRONTMATTER_DELIMITER)
|
|
622
|
-
return null;
|
|
623
|
-
const endIndex = lines.indexOf(FRONTMATTER_DELIMITER, 1);
|
|
624
|
-
if (endIndex === -1)
|
|
625
|
-
return null;
|
|
626
|
-
return { lineEnding, lines, endIndex };
|
|
627
|
-
}
|
|
628
|
-
function stripOptionalQuotes(value) {
|
|
629
|
-
const trimmed = value.trim();
|
|
630
|
-
if (trimmed.length < 2)
|
|
631
|
-
return trimmed;
|
|
632
|
-
const first = trimmed[0];
|
|
633
|
-
const last = trimmed[trimmed.length - 1];
|
|
634
|
-
if ((first === '"' && last === '"') || (first === "'" && last === "'")) {
|
|
635
|
-
return trimmed.slice(1, -1).trim();
|
|
636
|
-
}
|
|
637
|
-
return trimmed;
|
|
638
|
-
}
|
|
639
|
-
function parseFrontmatterEntry(line) {
|
|
640
|
-
const trimmed = line.trim();
|
|
641
|
-
if (!trimmed)
|
|
642
|
-
return null;
|
|
643
|
-
const separatorIndex = trimmed.indexOf(':');
|
|
644
|
-
if (separatorIndex <= 0)
|
|
645
|
-
return null;
|
|
646
|
-
const key = trimmed.slice(0, separatorIndex).trim().toLowerCase();
|
|
647
|
-
const value = trimmed.slice(separatorIndex + 1);
|
|
648
|
-
return { key, value };
|
|
649
|
-
}
|
|
650
|
-
function isTitleKey(key) {
|
|
651
|
-
return key === 'title' || key === 'name';
|
|
652
|
-
}
|
|
653
|
-
function extractTitleFromHeading(content) {
|
|
654
|
-
const lineEnding = detectLineEnding(content);
|
|
655
|
-
const lines = content.split(lineEnding);
|
|
656
|
-
for (const line of lines) {
|
|
657
|
-
const trimmed = line.trim();
|
|
658
|
-
if (!trimmed)
|
|
659
|
-
continue;
|
|
660
|
-
let index = 0;
|
|
661
|
-
while (index < trimmed.length && trimmed[index] === '#') {
|
|
662
|
-
index += 1;
|
|
663
|
-
}
|
|
664
|
-
if (index === 0 || index > 6)
|
|
665
|
-
return undefined;
|
|
666
|
-
const nextChar = trimmed[index];
|
|
667
|
-
if (nextChar !== ' ' && nextChar !== '\t')
|
|
668
|
-
return undefined;
|
|
669
|
-
const heading = trimmed.slice(index).trim();
|
|
670
|
-
return heading.length > 0 ? heading : undefined;
|
|
671
|
-
}
|
|
672
|
-
return undefined;
|
|
673
|
-
}
|
|
674
|
-
function extractTitleFromRawMarkdown(content) {
|
|
675
|
-
const frontmatter = findFrontmatterLines(content);
|
|
676
|
-
if (!frontmatter) {
|
|
677
|
-
return extractTitleFromHeading(content);
|
|
678
|
-
}
|
|
679
|
-
const { lines, endIndex } = frontmatter;
|
|
680
|
-
const entry = lines
|
|
681
|
-
.slice(1, endIndex)
|
|
682
|
-
.map((line) => parseFrontmatterEntry(line))
|
|
683
|
-
.find((parsed) => parsed !== null && isTitleKey(parsed.key));
|
|
684
|
-
if (!entry)
|
|
685
|
-
return undefined;
|
|
686
|
-
const value = stripOptionalQuotes(entry.value);
|
|
687
|
-
return value || undefined;
|
|
688
|
-
}
|
|
689
|
-
function hasMarkdownSourceLine(content) {
|
|
690
|
-
const lineEnding = detectLineEnding(content);
|
|
691
|
-
const lines = content.split(lineEnding);
|
|
692
|
-
const limit = Math.min(lines.length, 50);
|
|
693
|
-
for (let index = 0; index < limit; index += 1) {
|
|
694
|
-
const line = lines[index];
|
|
695
|
-
if (!line)
|
|
696
|
-
continue;
|
|
697
|
-
if (line.trimStart().toLowerCase().startsWith('source:')) {
|
|
698
|
-
return true;
|
|
699
|
-
}
|
|
700
|
-
}
|
|
701
|
-
return false;
|
|
702
|
-
}
|
|
703
|
-
function addSourceToMarkdownMarkdownFormat(content, url) {
|
|
704
|
-
if (hasMarkdownSourceLine(content))
|
|
705
|
-
return content;
|
|
706
|
-
const lineEnding = detectLineEnding(content);
|
|
707
|
-
const lines = content.split(lineEnding);
|
|
708
|
-
const firstNonEmptyIndex = lines.findIndex((line) => line.trim().length > 0);
|
|
709
|
-
if (firstNonEmptyIndex !== -1) {
|
|
710
|
-
const firstLine = lines[firstNonEmptyIndex];
|
|
711
|
-
if (firstLine && /^#{1,6}\s+/.test(firstLine.trim())) {
|
|
712
|
-
const insertAt = firstNonEmptyIndex + 1;
|
|
713
|
-
const updated = [
|
|
714
|
-
...lines.slice(0, insertAt),
|
|
715
|
-
'',
|
|
716
|
-
`Source: ${url}`,
|
|
717
|
-
'',
|
|
718
|
-
...lines.slice(insertAt),
|
|
719
|
-
];
|
|
720
|
-
return updated.join(lineEnding);
|
|
721
|
-
}
|
|
722
|
-
}
|
|
723
|
-
return [`Source: ${url}`, '', content].join(lineEnding);
|
|
724
|
-
}
|
|
725
|
-
function addSourceToMarkdown(content, url) {
|
|
726
|
-
const frontmatter = findFrontmatterLines(content);
|
|
727
|
-
if (config.transform.metadataFormat === 'markdown' && !frontmatter) {
|
|
728
|
-
return addSourceToMarkdownMarkdownFormat(content, url);
|
|
729
|
-
}
|
|
730
|
-
if (!frontmatter) {
|
|
731
|
-
return `---\nsource: "${url}"\n---\n\n${content}`;
|
|
732
|
-
}
|
|
733
|
-
const { lineEnding, lines, endIndex } = frontmatter;
|
|
734
|
-
const bodyLines = lines.slice(1, endIndex);
|
|
735
|
-
const hasSource = bodyLines.some((line) => line.trimStart().toLowerCase().startsWith('source:'));
|
|
736
|
-
if (hasSource)
|
|
737
|
-
return content;
|
|
738
|
-
const updatedLines = [
|
|
739
|
-
lines[0],
|
|
740
|
-
...bodyLines,
|
|
741
|
-
`source: "${url}"`,
|
|
742
|
-
...lines.slice(endIndex),
|
|
743
|
-
];
|
|
744
|
-
return updatedLines.join(lineEnding);
|
|
745
|
-
}
|
|
746
|
-
function hasFrontmatter(trimmed) {
|
|
747
|
-
return trimmed.startsWith('---\n') || trimmed.startsWith('---\r\n');
|
|
748
|
-
}
|
|
749
|
-
function looksLikeHtmlDocument(trimmed) {
|
|
750
|
-
return HTML_DOCUMENT_PATTERN.test(trimmed);
|
|
751
|
-
}
|
|
752
|
-
function countCommonHtmlTags(content) {
|
|
753
|
-
const matches = content.match(/<(html|head|body|div|span|script|style|meta|link)\b/gi) ??
|
|
754
|
-
[];
|
|
755
|
-
return matches.length;
|
|
756
|
-
}
|
|
757
|
-
function isRawTextContent(content) {
|
|
758
|
-
const trimmed = content.trim();
|
|
759
|
-
const isHtmlDocument = looksLikeHtmlDocument(trimmed);
|
|
760
|
-
const hasMarkdownFrontmatter = hasFrontmatter(trimmed);
|
|
761
|
-
const hasTooManyHtmlTags = countCommonHtmlTags(content) > 2;
|
|
762
|
-
const isMarkdown = looksLikeMarkdown(content);
|
|
763
|
-
return (!isHtmlDocument &&
|
|
764
|
-
(hasMarkdownFrontmatter || (!hasTooManyHtmlTags && isMarkdown)));
|
|
765
|
-
}
|
|
766
|
-
function isLikelyHtmlContent(content) {
|
|
767
|
-
const trimmed = content.trim();
|
|
768
|
-
if (!trimmed)
|
|
769
|
-
return false;
|
|
770
|
-
if (looksLikeHtmlDocument(trimmed))
|
|
771
|
-
return true;
|
|
772
|
-
return countCommonHtmlTags(content) > 2;
|
|
773
|
-
}
|
|
774
581
|
function shouldPreserveRawContent(url, content) {
|
|
775
582
|
if (isRawTextContentUrl(url)) {
|
|
776
583
|
return !isLikelyHtmlContent(content);
|
|
@@ -1189,11 +996,11 @@ const workerMessageSchema = z.discriminatedUnion('type', [
|
|
|
1189
996
|
}),
|
|
1190
997
|
]);
|
|
1191
998
|
let pool = null;
|
|
999
|
+
const POOL_MIN_WORKERS = 2;
|
|
1000
|
+
const POOL_MAX_WORKERS = 4;
|
|
1001
|
+
const POOL_SCALE_THRESHOLD = 0.5;
|
|
1192
1002
|
function resolveDefaultWorkerCount() {
|
|
1193
|
-
|
|
1194
|
-
? os.availableParallelism()
|
|
1195
|
-
: os.cpus().length;
|
|
1196
|
-
return Math.min(16, Math.max(1, parallelism - 1));
|
|
1003
|
+
return POOL_MIN_WORKERS;
|
|
1197
1004
|
}
|
|
1198
1005
|
const DEFAULT_TIMEOUT_MS = config.transform.timeoutMs;
|
|
1199
1006
|
function getOrCreateTransformWorkerPool() {
|
|
@@ -1206,8 +1013,20 @@ export async function shutdownTransformWorkerPool() {
|
|
|
1206
1013
|
await pool.close();
|
|
1207
1014
|
pool = null;
|
|
1208
1015
|
}
|
|
1016
|
+
export function getTransformPoolStats() {
|
|
1017
|
+
if (!pool)
|
|
1018
|
+
return null;
|
|
1019
|
+
return {
|
|
1020
|
+
queueDepth: pool.getQueueDepth(),
|
|
1021
|
+
activeWorkers: pool.getActiveWorkers(),
|
|
1022
|
+
capacity: pool.getCapacity(),
|
|
1023
|
+
};
|
|
1024
|
+
}
|
|
1209
1025
|
class WorkerPool {
|
|
1210
1026
|
workers = [];
|
|
1027
|
+
capacity;
|
|
1028
|
+
minCapacity;
|
|
1029
|
+
maxCapacity;
|
|
1211
1030
|
queue = [];
|
|
1212
1031
|
inflight = new Map();
|
|
1213
1032
|
timeoutMs;
|
|
@@ -1323,12 +1142,11 @@ class WorkerPool {
|
|
|
1323
1142
|
});
|
|
1324
1143
|
}
|
|
1325
1144
|
constructor(size, timeoutMs) {
|
|
1326
|
-
|
|
1145
|
+
this.minCapacity = POOL_MIN_WORKERS;
|
|
1146
|
+
this.maxCapacity = POOL_MAX_WORKERS;
|
|
1147
|
+
this.capacity = Math.max(this.minCapacity, Math.min(size, this.maxCapacity));
|
|
1327
1148
|
this.timeoutMs = timeoutMs;
|
|
1328
|
-
this.queueMax =
|
|
1329
|
-
for (let index = 0; index < safeSize; index += 1) {
|
|
1330
|
-
this.workers.push(this.spawnWorker(index));
|
|
1331
|
-
}
|
|
1149
|
+
this.queueMax = this.maxCapacity * 32;
|
|
1332
1150
|
}
|
|
1333
1151
|
spawnWorker(workerIndex) {
|
|
1334
1152
|
const worker = new Worker(new URL('./workers/transform-worker.js', import.meta.url));
|
|
@@ -1426,21 +1244,46 @@ class WorkerPool {
|
|
|
1426
1244
|
this.drainQueue();
|
|
1427
1245
|
});
|
|
1428
1246
|
}
|
|
1247
|
+
/** Scale capacity up if queue pressure exceeds threshold. */
|
|
1248
|
+
maybeScaleUp() {
|
|
1249
|
+
if (this.queue.length > this.capacity * POOL_SCALE_THRESHOLD &&
|
|
1250
|
+
this.capacity < this.maxCapacity) {
|
|
1251
|
+
this.capacity += 1;
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1429
1254
|
drainQueue() {
|
|
1255
|
+
if (this.closed)
|
|
1256
|
+
return;
|
|
1430
1257
|
if (this.queue.length === 0)
|
|
1431
1258
|
return;
|
|
1259
|
+
this.maybeScaleUp();
|
|
1260
|
+
// First pass: try to find an idle existing worker
|
|
1432
1261
|
for (let workerIndex = 0; workerIndex < this.workers.length; workerIndex += 1) {
|
|
1433
1262
|
const slot = this.workers[workerIndex];
|
|
1434
|
-
if (
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1263
|
+
if (slot && !slot.busy) {
|
|
1264
|
+
this.dispatchQueueTask(workerIndex, slot);
|
|
1265
|
+
if (this.queue.length === 0)
|
|
1266
|
+
return;
|
|
1267
|
+
}
|
|
1268
|
+
}
|
|
1269
|
+
if (this.workers.length < this.capacity && this.queue.length > 0) {
|
|
1270
|
+
const workerIndex = this.workers.length;
|
|
1271
|
+
const slot = this.spawnWorker(workerIndex);
|
|
1272
|
+
this.workers.push(slot);
|
|
1273
|
+
this.dispatchQueueTask(workerIndex, slot);
|
|
1274
|
+
if (this.workers.length < this.capacity && this.queue.length > 0) {
|
|
1275
|
+
setImmediate(() => {
|
|
1276
|
+
this.drainQueue();
|
|
1277
|
+
});
|
|
1278
|
+
}
|
|
1442
1279
|
}
|
|
1443
1280
|
}
|
|
1281
|
+
dispatchQueueTask(workerIndex, slot) {
|
|
1282
|
+
const task = this.queue.shift();
|
|
1283
|
+
if (!task)
|
|
1284
|
+
return;
|
|
1285
|
+
this.dispatch(workerIndex, slot, task);
|
|
1286
|
+
}
|
|
1444
1287
|
dispatch(workerIndex, slot, task) {
|
|
1445
1288
|
if (this.rejectIfAborted(task))
|
|
1446
1289
|
return;
|
|
@@ -1510,11 +1353,23 @@ class WorkerPool {
|
|
|
1510
1353
|
task.reject(message);
|
|
1511
1354
|
this.restartWorker(workerIndex, slot);
|
|
1512
1355
|
}
|
|
1356
|
+
getQueueDepth() {
|
|
1357
|
+
return this.queue.length;
|
|
1358
|
+
}
|
|
1359
|
+
getActiveWorkers() {
|
|
1360
|
+
return this.workers.filter((s) => s?.busy).length;
|
|
1361
|
+
}
|
|
1362
|
+
getCapacity() {
|
|
1363
|
+
return this.capacity;
|
|
1364
|
+
}
|
|
1513
1365
|
async close() {
|
|
1514
1366
|
if (this.closed)
|
|
1515
1367
|
return;
|
|
1516
1368
|
this.closed = true;
|
|
1517
|
-
const terminations = this.workers
|
|
1369
|
+
const terminations = this.workers
|
|
1370
|
+
.map((slot) => slot?.worker.terminate())
|
|
1371
|
+
.filter((p) => p !== undefined);
|
|
1372
|
+
this.workers.fill(undefined);
|
|
1518
1373
|
this.workers.length = 0;
|
|
1519
1374
|
for (const [id, inflight] of this.inflight.entries()) {
|
|
1520
1375
|
clearTimeout(inflight.timer);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@j0hanz/superfetch",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.4.0",
|
|
4
4
|
"mcpName": "io.github.j0hanz/superfetch",
|
|
5
5
|
"description": "Intelligent web content fetcher MCP server that converts HTML to clean, AI-readable Markdown",
|
|
6
6
|
"type": "module",
|
|
@@ -59,7 +59,6 @@
|
|
|
59
59
|
"@modelcontextprotocol/sdk": "^1.25.3",
|
|
60
60
|
"@mozilla/readability": "^0.6.0",
|
|
61
61
|
"linkedom": "^0.18.12",
|
|
62
|
-
"lru-cache": "^11.2.5",
|
|
63
62
|
"node-html-markdown": "^2.0.0",
|
|
64
63
|
"undici": "^7.19.2",
|
|
65
64
|
"zod": "^4.3.6"
|