@j0hanz/superfetch 2.6.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cache.js +26 -23
- package/dist/config.js +51 -39
- package/dist/dom-noise-removal.js +4 -1
- package/dist/fetch.d.ts +1 -0
- package/dist/fetch.js +160 -97
- package/dist/http-native.js +31 -14
- package/dist/language-detection.js +28 -4
- package/dist/markdown-cleanup.js +0 -2
- package/dist/mcp.js +7 -1
- package/dist/resources.js +9 -21
- package/dist/tasks.d.ts +1 -0
- package/dist/tasks.js +129 -95
- package/dist/tools.d.ts +2 -0
- package/dist/tools.js +4 -3
- package/dist/transform-types.d.ts +1 -0
- package/dist/transform.js +168 -90
- package/package.json +1 -1
package/dist/cache.js
CHANGED
|
@@ -41,14 +41,6 @@ export function parseCachedPayload(raw) {
|
|
|
41
41
|
export function resolveCachedPayloadContent(payload) {
|
|
42
42
|
return payload.markdown ?? payload.content ?? null;
|
|
43
43
|
}
|
|
44
|
-
function stableStringify(value) {
|
|
45
|
-
try {
|
|
46
|
-
return stableJsonStringify(value);
|
|
47
|
-
}
|
|
48
|
-
catch {
|
|
49
|
-
return null;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
44
|
function createHashFragment(input, length) {
|
|
53
45
|
return sha256Hex(input).substring(0, length);
|
|
54
46
|
}
|
|
@@ -63,9 +55,18 @@ export function createCacheKey(namespace, url, vary) {
|
|
|
63
55
|
const urlHash = createHashFragment(url, CACHE_CONSTANTS.URL_HASH_LENGTH);
|
|
64
56
|
let varyHash;
|
|
65
57
|
if (vary) {
|
|
66
|
-
|
|
67
|
-
if (
|
|
68
|
-
|
|
58
|
+
let varyString;
|
|
59
|
+
if (typeof vary === 'string') {
|
|
60
|
+
varyString = vary;
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
try {
|
|
64
|
+
varyString = stableJsonStringify(vary);
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
69
70
|
if (varyString) {
|
|
70
71
|
varyHash = createHashFragment(varyString, CACHE_CONSTANTS.VARY_HASH_LENGTH);
|
|
71
72
|
}
|
|
@@ -102,9 +103,12 @@ class InMemoryCacheStore {
|
|
|
102
103
|
if (!this.isEnabled())
|
|
103
104
|
return [];
|
|
104
105
|
const now = Date.now();
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
.
|
|
106
|
+
const result = [];
|
|
107
|
+
for (const [key, entry] of this.entries) {
|
|
108
|
+
if (entry.expiresAtMs > now)
|
|
109
|
+
result.push(key);
|
|
110
|
+
}
|
|
111
|
+
return result;
|
|
108
112
|
}
|
|
109
113
|
onUpdate(listener) {
|
|
110
114
|
const wrapped = (event) => {
|
|
@@ -131,7 +135,8 @@ class InMemoryCacheStore {
|
|
|
131
135
|
const entry = this.entries.get(cacheKey);
|
|
132
136
|
if (!entry)
|
|
133
137
|
return undefined;
|
|
134
|
-
|
|
138
|
+
const now = Date.now();
|
|
139
|
+
if (entry.expiresAtMs <= now) {
|
|
135
140
|
this.entries.delete(cacheKey);
|
|
136
141
|
return undefined;
|
|
137
142
|
}
|
|
@@ -157,12 +162,11 @@ class InMemoryCacheStore {
|
|
|
157
162
|
};
|
|
158
163
|
this.entries.delete(cacheKey);
|
|
159
164
|
this.entries.set(cacheKey, entry);
|
|
160
|
-
// Eviction
|
|
165
|
+
// Eviction (LRU: first insertion-order key)
|
|
161
166
|
if (this.entries.size > this.max) {
|
|
162
167
|
const firstKey = this.entries.keys().next();
|
|
163
|
-
if (!firstKey.done)
|
|
168
|
+
if (!firstKey.done)
|
|
164
169
|
this.entries.delete(firstKey.value);
|
|
165
|
-
}
|
|
166
170
|
}
|
|
167
171
|
this.notify(cacheKey);
|
|
168
172
|
}
|
|
@@ -170,9 +174,9 @@ class InMemoryCacheStore {
|
|
|
170
174
|
if (this.updateEmitter.listenerCount('update') === 0)
|
|
171
175
|
return;
|
|
172
176
|
const parts = parseCacheKey(cacheKey);
|
|
173
|
-
if (parts)
|
|
174
|
-
|
|
175
|
-
}
|
|
177
|
+
if (!parts)
|
|
178
|
+
return;
|
|
179
|
+
this.updateEmitter.emit('update', { cacheKey, ...parts });
|
|
176
180
|
}
|
|
177
181
|
logError(message, cacheKey, error) {
|
|
178
182
|
logWarn(message, {
|
|
@@ -231,7 +235,6 @@ function resolveCachedMarkdownText(raw) {
|
|
|
231
235
|
return raw;
|
|
232
236
|
}
|
|
233
237
|
export function registerCachedContentResource(server, serverIcons) {
|
|
234
|
-
// Resource Registration
|
|
235
238
|
server.registerResource('cached-content', new ResourceTemplate('superfetch://cache/{namespace}/{urlHash}', {
|
|
236
239
|
list: listCachedResources,
|
|
237
240
|
}), {
|
|
@@ -288,7 +291,7 @@ export function registerCachedContentResource(server, serverIcons) {
|
|
|
288
291
|
store.onUpdate(({ cacheKey }) => {
|
|
289
292
|
if (!server.isConnected() || !initialized)
|
|
290
293
|
return;
|
|
291
|
-
// Check capabilities via unsafe cast
|
|
294
|
+
// Check capabilities via unsafe cast (SDK limitation)
|
|
292
295
|
const capabilities = server.server.getClientCapabilities();
|
|
293
296
|
const uri = toResourceUri(cacheKey);
|
|
294
297
|
if (capabilities?.resources?.subscribe && uri && subscriptions.has(uri)) {
|
package/dist/config.js
CHANGED
|
@@ -3,16 +3,19 @@ import { findPackageJSON } from 'node:module';
|
|
|
3
3
|
import { isIP } from 'node:net';
|
|
4
4
|
import process from 'node:process';
|
|
5
5
|
import { domainToASCII } from 'node:url';
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
11
|
-
if (typeof packageJson.version !== 'string') {
|
|
12
|
-
|
|
6
|
+
function readServerVersion(moduleUrl) {
|
|
7
|
+
const packageJsonPath = findPackageJSON(moduleUrl);
|
|
8
|
+
if (!packageJsonPath)
|
|
9
|
+
throw new Error('package.json not found');
|
|
10
|
+
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
11
|
+
if (typeof packageJson.version !== 'string') {
|
|
12
|
+
throw new Error('package.json version is missing');
|
|
13
|
+
}
|
|
14
|
+
return packageJson.version;
|
|
13
15
|
}
|
|
14
|
-
export const serverVersion =
|
|
16
|
+
export const serverVersion = readServerVersion(import.meta.url);
|
|
15
17
|
const LOG_LEVELS = ['debug', 'info', 'warn', 'error'];
|
|
18
|
+
const ALLOWED_LOG_LEVELS = new Set(LOG_LEVELS);
|
|
16
19
|
const DEFAULT_HEADING_KEYWORDS = [
|
|
17
20
|
'overview',
|
|
18
21
|
'introduction',
|
|
@@ -34,6 +37,9 @@ const DEFAULT_HEADING_KEYWORDS = [
|
|
|
34
37
|
'acknowledgments',
|
|
35
38
|
'appendix',
|
|
36
39
|
];
|
|
40
|
+
class ConfigError extends Error {
|
|
41
|
+
name = 'ConfigError';
|
|
42
|
+
}
|
|
37
43
|
function isMissingEnvFileError(error) {
|
|
38
44
|
if (!error || typeof error !== 'object')
|
|
39
45
|
return false;
|
|
@@ -54,23 +60,20 @@ function loadEnvFileIfAvailable() {
|
|
|
54
60
|
}
|
|
55
61
|
loadEnvFileIfAvailable();
|
|
56
62
|
const { env } = process;
|
|
57
|
-
class ConfigError extends Error {
|
|
58
|
-
name = 'ConfigError';
|
|
59
|
-
}
|
|
60
63
|
function buildIpv4(parts) {
|
|
61
64
|
return parts.join('.');
|
|
62
65
|
}
|
|
63
|
-
function formatHostForUrl(hostname) {
|
|
64
|
-
if (hostname.includes(':') && !hostname.startsWith('['))
|
|
65
|
-
return `[${hostname}]`;
|
|
66
|
-
return hostname;
|
|
67
|
-
}
|
|
68
66
|
function stripTrailingDots(value) {
|
|
69
67
|
let result = value;
|
|
70
68
|
while (result.endsWith('.'))
|
|
71
69
|
result = result.slice(0, -1);
|
|
72
70
|
return result;
|
|
73
71
|
}
|
|
72
|
+
function formatHostForUrl(hostname) {
|
|
73
|
+
if (hostname.includes(':') && !hostname.startsWith('['))
|
|
74
|
+
return `[${hostname}]`;
|
|
75
|
+
return hostname;
|
|
76
|
+
}
|
|
74
77
|
function normalizeHostname(value) {
|
|
75
78
|
const trimmed = value.trim();
|
|
76
79
|
if (!trimmed)
|
|
@@ -86,24 +89,29 @@ function normalizeHostValue(value) {
|
|
|
86
89
|
const raw = value.trim();
|
|
87
90
|
if (!raw)
|
|
88
91
|
return null;
|
|
92
|
+
// Full URL
|
|
89
93
|
if (raw.includes('://')) {
|
|
90
94
|
if (!URL.canParse(raw))
|
|
91
95
|
return null;
|
|
92
96
|
return normalizeHostname(new URL(raw).hostname);
|
|
93
97
|
}
|
|
98
|
+
// host[:port]
|
|
94
99
|
const candidateUrl = `http://${raw}`;
|
|
95
100
|
if (URL.canParse(candidateUrl)) {
|
|
96
101
|
return normalizeHostname(new URL(candidateUrl).hostname);
|
|
97
102
|
}
|
|
98
103
|
const lowered = raw.toLowerCase();
|
|
104
|
+
// [::1]:port
|
|
99
105
|
if (lowered.startsWith('[')) {
|
|
100
106
|
const end = lowered.indexOf(']');
|
|
101
107
|
if (end === -1)
|
|
102
108
|
return null;
|
|
103
109
|
return normalizeHostname(lowered.slice(1, end));
|
|
104
110
|
}
|
|
111
|
+
// Bare IPv6
|
|
105
112
|
if (isIP(lowered) === 6)
|
|
106
113
|
return stripTrailingDots(lowered);
|
|
114
|
+
// Split host:port (single colon only)
|
|
107
115
|
const firstColon = lowered.indexOf(':');
|
|
108
116
|
if (firstColon === -1)
|
|
109
117
|
return normalizeHostname(lowered);
|
|
@@ -159,27 +167,6 @@ function normalizeLocale(value) {
|
|
|
159
167
|
return undefined;
|
|
160
168
|
return trimmed;
|
|
161
169
|
}
|
|
162
|
-
function parseUrlEnv(value, name) {
|
|
163
|
-
if (!value)
|
|
164
|
-
return undefined;
|
|
165
|
-
if (!URL.canParse(value)) {
|
|
166
|
-
throw new ConfigError(`Invalid ${name} value: ${value}`);
|
|
167
|
-
}
|
|
168
|
-
return new URL(value);
|
|
169
|
-
}
|
|
170
|
-
function readUrlEnv(name) {
|
|
171
|
-
return parseUrlEnv(env[name], name);
|
|
172
|
-
}
|
|
173
|
-
function parseAllowedHosts(envValue) {
|
|
174
|
-
const hosts = new Set();
|
|
175
|
-
for (const entry of parseList(envValue)) {
|
|
176
|
-
const normalized = normalizeHostValue(entry);
|
|
177
|
-
if (normalized)
|
|
178
|
-
hosts.add(normalized);
|
|
179
|
-
}
|
|
180
|
-
return hosts;
|
|
181
|
-
}
|
|
182
|
-
const ALLOWED_LOG_LEVELS = new Set(LOG_LEVELS);
|
|
183
170
|
function isLogLevel(value) {
|
|
184
171
|
return ALLOWED_LOG_LEVELS.has(value);
|
|
185
172
|
}
|
|
@@ -202,6 +189,26 @@ function parsePort(envValue) {
|
|
|
202
189
|
return 0;
|
|
203
190
|
return parseInteger(envValue, 3000, 1024, 65535);
|
|
204
191
|
}
|
|
192
|
+
function parseUrlEnv(value, name) {
|
|
193
|
+
if (!value)
|
|
194
|
+
return undefined;
|
|
195
|
+
if (!URL.canParse(value)) {
|
|
196
|
+
throw new ConfigError(`Invalid ${name} value: ${value}`);
|
|
197
|
+
}
|
|
198
|
+
return new URL(value);
|
|
199
|
+
}
|
|
200
|
+
function readUrlEnv(name) {
|
|
201
|
+
return parseUrlEnv(env[name], name);
|
|
202
|
+
}
|
|
203
|
+
function parseAllowedHosts(envValue) {
|
|
204
|
+
const hosts = new Set();
|
|
205
|
+
for (const entry of parseList(envValue)) {
|
|
206
|
+
const normalized = normalizeHostValue(entry);
|
|
207
|
+
if (normalized)
|
|
208
|
+
hosts.add(normalized);
|
|
209
|
+
}
|
|
210
|
+
return hosts;
|
|
211
|
+
}
|
|
205
212
|
const MAX_HTML_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
206
213
|
const MAX_INLINE_CONTENT_CHARS = 0;
|
|
207
214
|
const DEFAULT_SESSION_TTL_MS = 30 * 60 * 1000;
|
|
@@ -216,23 +223,28 @@ const DEFAULT_TOOL_TIMEOUT_MS = DEFAULT_FETCH_TIMEOUT_MS +
|
|
|
216
223
|
DEFAULT_TOOL_TIMEOUT_PADDING_MS;
|
|
217
224
|
function resolveWorkerResourceLimits() {
|
|
218
225
|
const limits = {};
|
|
226
|
+
let hasAny = false;
|
|
219
227
|
const maxOldGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_OLD_GENERATION_MB, 1);
|
|
220
228
|
const maxYoungGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_YOUNG_GENERATION_MB, 1);
|
|
221
229
|
const codeRangeSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_CODE_RANGE_MB, 1);
|
|
222
230
|
const stackSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_STACK_MB, 1);
|
|
223
231
|
if (maxOldGenerationSizeMb !== undefined) {
|
|
224
232
|
limits.maxOldGenerationSizeMb = maxOldGenerationSizeMb;
|
|
233
|
+
hasAny = true;
|
|
225
234
|
}
|
|
226
235
|
if (maxYoungGenerationSizeMb !== undefined) {
|
|
227
236
|
limits.maxYoungGenerationSizeMb = maxYoungGenerationSizeMb;
|
|
237
|
+
hasAny = true;
|
|
228
238
|
}
|
|
229
239
|
if (codeRangeSizeMb !== undefined) {
|
|
230
240
|
limits.codeRangeSizeMb = codeRangeSizeMb;
|
|
241
|
+
hasAny = true;
|
|
231
242
|
}
|
|
232
243
|
if (stackSizeMb !== undefined) {
|
|
233
244
|
limits.stackSizeMb = stackSizeMb;
|
|
245
|
+
hasAny = true;
|
|
234
246
|
}
|
|
235
|
-
return
|
|
247
|
+
return hasAny ? limits : undefined;
|
|
236
248
|
}
|
|
237
249
|
function readOAuthUrls(baseUrl) {
|
|
238
250
|
const issuerUrl = readUrlEnv('OAUTH_ISSUER_URL');
|
|
@@ -318,8 +330,8 @@ const host = (env.HOST ?? LOOPBACK_V4).trim();
|
|
|
318
330
|
const port = parsePort(env.PORT);
|
|
319
331
|
const maxConnections = parseInteger(env.SERVER_MAX_CONNECTIONS, 0, 0);
|
|
320
332
|
const blockPrivateConnections = parseBoolean(env.SERVER_BLOCK_PRIVATE_CONNECTIONS, false);
|
|
321
|
-
const baseUrl = new URL(`http://${formatHostForUrl(host)}:${port}`);
|
|
322
333
|
const allowRemote = parseBoolean(env.ALLOW_REMOTE, false);
|
|
334
|
+
const baseUrl = new URL(`http://${formatHostForUrl(host)}:${port}`);
|
|
323
335
|
const runtimeState = {
|
|
324
336
|
httpMode: false,
|
|
325
337
|
};
|
|
@@ -8,6 +8,7 @@ const DIALOG_MIN_CHARS_FOR_PRESERVATION = 500;
|
|
|
8
8
|
const NAV_FOOTER_MIN_CHARS_FOR_PRESERVATION = 500;
|
|
9
9
|
// Merged markers for fast rejection
|
|
10
10
|
const HTML_DOCUMENT_MARKERS = /<\s*(?:!doctype|html|head|body)\b/i;
|
|
11
|
+
const HTML_FRAGMENT_MARKERS = /<\s*(?:article|main|section|div|nav|footer|header|aside|table|ul|ol)\b/i;
|
|
11
12
|
// Split into smaller regexes to stay within sonarjs/regex-complexity limit
|
|
12
13
|
const NOISE_PATTERNS = [
|
|
13
14
|
/<\s*(?:script|style|noscript|iframe|nav|footer|header|form|button|input|select|textarea|svg|canvas)\b/i,
|
|
@@ -451,7 +452,9 @@ function mayContainNoise(html) {
|
|
|
451
452
|
return NOISE_PATTERNS.some((re) => re.test(sample));
|
|
452
453
|
}
|
|
453
454
|
export function removeNoiseFromHtml(html, document, baseUrl) {
|
|
454
|
-
const shouldParse = isFullDocumentHtml(html) ||
|
|
455
|
+
const shouldParse = isFullDocumentHtml(html) ||
|
|
456
|
+
mayContainNoise(html) ||
|
|
457
|
+
HTML_FRAGMENT_MARKERS.test(html);
|
|
455
458
|
if (!shouldParse)
|
|
456
459
|
return html;
|
|
457
460
|
try {
|
package/dist/fetch.d.ts
CHANGED