@j0hanz/superfetch 2.7.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.js +12 -11
- package/dist/markdown-cleanup.js +0 -2
- package/dist/resources.js +9 -21
- package/dist/transform.js +46 -73
- package/package.json +1 -1
package/dist/cache.js
CHANGED
|
@@ -41,14 +41,6 @@ export function parseCachedPayload(raw) {
|
|
|
41
41
|
export function resolveCachedPayloadContent(payload) {
|
|
42
42
|
return payload.markdown ?? payload.content ?? null;
|
|
43
43
|
}
|
|
44
|
-
function stableStringify(value) {
|
|
45
|
-
try {
|
|
46
|
-
return stableJsonStringify(value);
|
|
47
|
-
}
|
|
48
|
-
catch {
|
|
49
|
-
return null;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
44
|
function createHashFragment(input, length) {
|
|
53
45
|
return sha256Hex(input).substring(0, length);
|
|
54
46
|
}
|
|
@@ -63,9 +55,18 @@ export function createCacheKey(namespace, url, vary) {
|
|
|
63
55
|
const urlHash = createHashFragment(url, CACHE_CONSTANTS.URL_HASH_LENGTH);
|
|
64
56
|
let varyHash;
|
|
65
57
|
if (vary) {
|
|
66
|
-
|
|
67
|
-
if (
|
|
68
|
-
|
|
58
|
+
let varyString;
|
|
59
|
+
if (typeof vary === 'string') {
|
|
60
|
+
varyString = vary;
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
try {
|
|
64
|
+
varyString = stableJsonStringify(vary);
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
69
70
|
if (varyString) {
|
|
70
71
|
varyHash = createHashFragment(varyString, CACHE_CONSTANTS.VARY_HASH_LENGTH);
|
|
71
72
|
}
|
package/dist/markdown-cleanup.js
CHANGED
|
@@ -22,7 +22,6 @@ const REGEX = {
|
|
|
22
22
|
SPACING_ADJ_COMBINED: /(?:\]\([^)]+\)|`[^`]+`)(?=[A-Za-z0-9])/g,
|
|
23
23
|
SPACING_CODE_DASH: /(`[^`]+`)\s*\\-\s*/g,
|
|
24
24
|
SPACING_ESCAPES: /\\([[\].])/g,
|
|
25
|
-
SPACING_URL_ENC: /\]\([^)]*%5[Ff][^)]*\)/g,
|
|
26
25
|
SPACING_LIST_NUM_COMBINED: /^((?![-*+] |\d+\. |[ \t]).+)\n((?:[-*+]|\d+\.) )/gm,
|
|
27
26
|
TYPEDOC: /(`+)(?:(?!\1)[\s\S])*?\1|\s?\/\\?\*[\s\S]*?\\?\*\//g,
|
|
28
27
|
};
|
|
@@ -203,7 +202,6 @@ function applyGlobalRegexes(text) {
|
|
|
203
202
|
.replace(REGEX.SPACING_ADJ_COMBINED, '$& ')
|
|
204
203
|
.replace(REGEX.SPACING_CODE_DASH, '$1 - ')
|
|
205
204
|
.replace(REGEX.SPACING_ESCAPES, '$1')
|
|
206
|
-
.replace(REGEX.SPACING_URL_ENC, (m) => m.replace(/%5[Ff]/g, '_'))
|
|
207
205
|
.replace(REGEX.SPACING_LIST_NUM_COMBINED, '$1\n\n$2')
|
|
208
206
|
.replace(REGEX.DOUBLE_NEWLINE_REDUCER, '\n\n');
|
|
209
207
|
// fixProperties
|
package/dist/resources.js
CHANGED
|
@@ -8,30 +8,18 @@ const REDACTED = '<REDACTED>';
|
|
|
8
8
|
const CONFIG_RESOURCE_NAME = 'config';
|
|
9
9
|
const CONFIG_RESOURCE_URI = 'internal://config';
|
|
10
10
|
const JSON_MIME = 'application/json';
|
|
11
|
-
function redactIfPresent(value) {
|
|
12
|
-
return value ? REDACTED : undefined;
|
|
13
|
-
}
|
|
14
|
-
function redactArray(values) {
|
|
15
|
-
return values.map(() => REDACTED);
|
|
16
|
-
}
|
|
17
|
-
function scrubAuth(auth) {
|
|
18
|
-
return {
|
|
19
|
-
...auth,
|
|
20
|
-
clientSecret: redactIfPresent(auth.clientSecret),
|
|
21
|
-
staticTokens: redactArray(auth.staticTokens),
|
|
22
|
-
};
|
|
23
|
-
}
|
|
24
|
-
function scrubSecurity(security) {
|
|
25
|
-
return {
|
|
26
|
-
...security,
|
|
27
|
-
apiKey: redactIfPresent(security.apiKey),
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
11
|
function scrubConfig(source) {
|
|
31
12
|
return {
|
|
32
13
|
...source,
|
|
33
|
-
auth:
|
|
34
|
-
|
|
14
|
+
auth: {
|
|
15
|
+
...source.auth,
|
|
16
|
+
clientSecret: source.auth.clientSecret ? REDACTED : undefined,
|
|
17
|
+
staticTokens: source.auth.staticTokens.map(() => REDACTED),
|
|
18
|
+
},
|
|
19
|
+
security: {
|
|
20
|
+
...source.security,
|
|
21
|
+
apiKey: source.security.apiKey ? REDACTED : undefined,
|
|
22
|
+
},
|
|
35
23
|
};
|
|
36
24
|
}
|
|
37
25
|
export function registerConfigResource(server) {
|
package/dist/transform.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { AsyncLocalStorage, AsyncResource } from 'node:async_hooks';
|
|
2
2
|
import { Buffer } from 'node:buffer';
|
|
3
3
|
import { fork } from 'node:child_process';
|
|
4
|
-
import { randomUUID } from 'node:crypto';
|
|
5
4
|
import diagnosticsChannel from 'node:diagnostics_channel';
|
|
6
5
|
import { availableParallelism } from 'node:os';
|
|
7
6
|
import { performance } from 'node:perf_hooks';
|
|
@@ -202,7 +201,11 @@ function truncateHtml(html) {
|
|
|
202
201
|
const byteLength = Buffer.byteLength(html, 'utf8');
|
|
203
202
|
if (byteLength <= maxSize)
|
|
204
203
|
return { html, truncated: false };
|
|
205
|
-
const
|
|
204
|
+
const sliced = html.slice(0, maxSize);
|
|
205
|
+
if (Buffer.byteLength(sliced, 'utf8') <= maxSize) {
|
|
206
|
+
return { html: sliced, truncated: true };
|
|
207
|
+
}
|
|
208
|
+
const htmlBuffer = Buffer.from(sliced, 'utf8');
|
|
206
209
|
let content = trimUtf8Buffer(htmlBuffer, maxSize).toString('utf8');
|
|
207
210
|
// Avoid truncating inside tags.
|
|
208
211
|
const lastOpen = content.lastIndexOf('<');
|
|
@@ -224,8 +227,11 @@ function willTruncate(html) {
|
|
|
224
227
|
const HEAD_END_PATTERN = /<\/head\s*>|<body\b/i;
|
|
225
228
|
const MAX_HEAD_SCAN_LENGTH = 50_000;
|
|
226
229
|
function extractHeadSection(html) {
|
|
227
|
-
|
|
228
|
-
|
|
230
|
+
if (html.length <= MAX_HEAD_SCAN_LENGTH) {
|
|
231
|
+
const match = HEAD_END_PATTERN.exec(html);
|
|
232
|
+
return match ? html.substring(0, match.index) : null;
|
|
233
|
+
}
|
|
234
|
+
const searchText = html.substring(0, MAX_HEAD_SCAN_LENGTH);
|
|
229
235
|
const match = HEAD_END_PATTERN.exec(searchText);
|
|
230
236
|
if (!match)
|
|
231
237
|
return null;
|
|
@@ -875,9 +881,22 @@ function createCustomTranslators() {
|
|
|
875
881
|
sup: () => ({
|
|
876
882
|
postprocess: ({ content }) => `^${content}^`,
|
|
877
883
|
}),
|
|
878
|
-
section: () =>
|
|
879
|
-
|
|
880
|
-
|
|
884
|
+
section: (ctx) => {
|
|
885
|
+
if (isObject(ctx) && isObject(ctx.node)) {
|
|
886
|
+
const { node } = ctx;
|
|
887
|
+
const getAttribute = hasGetAttribute(node)
|
|
888
|
+
? node.getAttribute.bind(node)
|
|
889
|
+
: undefined;
|
|
890
|
+
if (getAttribute?.('class')?.includes('tsd-member')) {
|
|
891
|
+
return {
|
|
892
|
+
postprocess: ({ content }) => `\n\n \n\n${content}\n\n`,
|
|
893
|
+
};
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
return {
|
|
897
|
+
postprocess: ({ content }) => `\n\n${content}\n\n`,
|
|
898
|
+
};
|
|
899
|
+
},
|
|
881
900
|
details: () => ({
|
|
882
901
|
postprocess: ({ content }) => {
|
|
883
902
|
const trimmed = content.trim();
|
|
@@ -948,48 +967,6 @@ function containsWhitespace(value) {
|
|
|
948
967
|
}
|
|
949
968
|
return false;
|
|
950
969
|
}
|
|
951
|
-
function extractClassAttribute(openTag) {
|
|
952
|
-
const lower = openTag.toLowerCase();
|
|
953
|
-
const classIndex = lower.indexOf('class');
|
|
954
|
-
if (classIndex === -1)
|
|
955
|
-
return null;
|
|
956
|
-
let i = classIndex + 5;
|
|
957
|
-
while (i < lower.length && isWhitespaceChar(lower.charCodeAt(i)))
|
|
958
|
-
i += 1;
|
|
959
|
-
if (lower[i] !== '=')
|
|
960
|
-
return null;
|
|
961
|
-
i += 1;
|
|
962
|
-
while (i < lower.length && isWhitespaceChar(lower.charCodeAt(i)))
|
|
963
|
-
i += 1;
|
|
964
|
-
const quote = openTag[i];
|
|
965
|
-
if (quote !== '"' && quote !== "'")
|
|
966
|
-
return null;
|
|
967
|
-
i += 1;
|
|
968
|
-
const end = openTag.indexOf(quote, i);
|
|
969
|
-
if (end === -1)
|
|
970
|
-
return null;
|
|
971
|
-
return openTag.slice(i, end);
|
|
972
|
-
}
|
|
973
|
-
function skipWhitespace(text, start) {
|
|
974
|
-
let index = start;
|
|
975
|
-
while (index < text.length && isWhitespaceChar(text.charCodeAt(index))) {
|
|
976
|
-
index += 1;
|
|
977
|
-
}
|
|
978
|
-
return index;
|
|
979
|
-
}
|
|
980
|
-
function isTsdMemberSectionTag(openTag) {
|
|
981
|
-
const classValue = extractClassAttribute(openTag);
|
|
982
|
-
return classValue ? classValue.toLowerCase().includes('tsd-member') : false;
|
|
983
|
-
}
|
|
984
|
-
function findTsdMemberSectionStart(html, scan) {
|
|
985
|
-
if (scan >= html.length || !html.startsWith('<section', scan))
|
|
986
|
-
return null;
|
|
987
|
-
const tagEnd = html.indexOf('>', scan);
|
|
988
|
-
if (tagEnd === -1)
|
|
989
|
-
return null;
|
|
990
|
-
const openTag = html.slice(scan, tagEnd + 1);
|
|
991
|
-
return isTsdMemberSectionTag(openTag) ? scan : null;
|
|
992
|
-
}
|
|
993
970
|
function resolveRelativeHref(href, baseUrl, origin) {
|
|
994
971
|
const trimmedHref = href.trim();
|
|
995
972
|
if (!trimmedHref || containsWhitespace(trimmedHref))
|
|
@@ -1045,26 +1022,6 @@ function findInlineLink(markdown, start) {
|
|
|
1045
1022
|
}
|
|
1046
1023
|
return null;
|
|
1047
1024
|
}
|
|
1048
|
-
function preprocessPropertySections(html) {
|
|
1049
|
-
const closeTag = '</section>';
|
|
1050
|
-
let cursor = 0;
|
|
1051
|
-
let output = '';
|
|
1052
|
-
for (let closeIndex = html.indexOf(closeTag, cursor); closeIndex !== -1; closeIndex = html.indexOf(closeTag, cursor)) {
|
|
1053
|
-
const afterClose = closeIndex + closeTag.length;
|
|
1054
|
-
output += html.slice(cursor, afterClose);
|
|
1055
|
-
const scan = skipWhitespace(html, afterClose);
|
|
1056
|
-
const sectionStart = findTsdMemberSectionStart(html, scan);
|
|
1057
|
-
if (sectionStart !== null) {
|
|
1058
|
-
output += '<p> </p>';
|
|
1059
|
-
cursor = sectionStart;
|
|
1060
|
-
continue;
|
|
1061
|
-
}
|
|
1062
|
-
output += html.slice(afterClose, scan);
|
|
1063
|
-
cursor = scan;
|
|
1064
|
-
}
|
|
1065
|
-
output += html.slice(cursor);
|
|
1066
|
-
return output;
|
|
1067
|
-
}
|
|
1068
1025
|
function isAbsoluteOrSpecialUrl(href) {
|
|
1069
1026
|
const trimmedHref = href.trim();
|
|
1070
1027
|
if (!trimmedHref)
|
|
@@ -1102,8 +1059,7 @@ function translateHtmlToMarkdown(params) {
|
|
|
1102
1059
|
? html
|
|
1103
1060
|
: stageTracker.run(url, 'markdown:noise', () => removeNoiseFromHtml(html, document, url));
|
|
1104
1061
|
abortPolicy.throwIfAborted(signal, url, 'markdown:cleaned');
|
|
1105
|
-
const
|
|
1106
|
-
const content = stageTracker.run(url, 'markdown:translate', () => translateHtmlFragmentToMarkdown(preprocessedHtml));
|
|
1062
|
+
const content = stageTracker.run(url, 'markdown:translate', () => translateHtmlFragmentToMarkdown(cleanedHtml));
|
|
1107
1063
|
abortPolicy.throwIfAborted(signal, url, 'markdown:translated');
|
|
1108
1064
|
const cleaned = cleanupMarkdownArtifacts(content);
|
|
1109
1065
|
return url ? resolveRelativeUrls(cleaned, url) : cleaned;
|
|
@@ -1673,6 +1629,7 @@ class WorkerPool {
|
|
|
1673
1629
|
queueMax;
|
|
1674
1630
|
spawnWorkerImpl;
|
|
1675
1631
|
closed = false;
|
|
1632
|
+
taskIdSeq = 0;
|
|
1676
1633
|
constructor(size, timeoutMs, spawnWorker) {
|
|
1677
1634
|
if (size === 0) {
|
|
1678
1635
|
this.capacity = 0;
|
|
@@ -1752,7 +1709,7 @@ class WorkerPool {
|
|
|
1752
1709
|
throw new Error(WorkerPool.CLOSED_MESSAGE);
|
|
1753
1710
|
}
|
|
1754
1711
|
createPendingTask(htmlOrBuffer, url, options, resolve, reject) {
|
|
1755
|
-
const id =
|
|
1712
|
+
const id = (this.taskIdSeq++).toString(36);
|
|
1756
1713
|
// Preserve request context for resolve/reject even when callbacks fire
|
|
1757
1714
|
// from worker thread events.
|
|
1758
1715
|
const context = createTaskContext();
|
|
@@ -2153,10 +2110,26 @@ async function transformWithWorkerPool(htmlOrBuffer, url, options) {
|
|
|
2153
2110
|
});
|
|
2154
2111
|
}
|
|
2155
2112
|
function resolveWorkerFallback(error, htmlOrBuffer, url, options) {
|
|
2113
|
+
const isQueueFull = error instanceof FetchError && error.details.reason === 'queue_full';
|
|
2114
|
+
if (isQueueFull) {
|
|
2115
|
+
logWarn('Transform worker queue full; falling back to in-process', {
|
|
2116
|
+
url: redactUrl(url),
|
|
2117
|
+
});
|
|
2118
|
+
return transformHtmlToMarkdownInProcess(decodeUtf8Input(htmlOrBuffer), url, options);
|
|
2119
|
+
}
|
|
2156
2120
|
if (error instanceof FetchError)
|
|
2157
2121
|
throw error;
|
|
2158
2122
|
abortPolicy.throwIfAborted(options.signal, url, 'transform:worker-fallback');
|
|
2159
|
-
|
|
2123
|
+
const message = getErrorMessage(error);
|
|
2124
|
+
logWarn('Transform worker failed; refusing in-process fallback', {
|
|
2125
|
+
url: redactUrl(url),
|
|
2126
|
+
error: message,
|
|
2127
|
+
});
|
|
2128
|
+
throw new FetchError('Transform worker failed', url, 503, {
|
|
2129
|
+
reason: 'worker_failed',
|
|
2130
|
+
stage: 'transform:worker',
|
|
2131
|
+
error: message,
|
|
2132
|
+
});
|
|
2160
2133
|
}
|
|
2161
2134
|
async function transformInputToMarkdown(htmlOrBuffer, url, options) {
|
|
2162
2135
|
const totalStage = stageTracker.start(url, 'transform:total');
|
package/package.json
CHANGED