@j0hanz/superfetch 2.2.2 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +358 -363
- package/dist/assets/logo.svg +24835 -0
- package/dist/cache.d.ts +0 -1
- package/dist/cache.js +71 -29
- package/dist/config.d.ts +2 -1
- package/dist/config.js +11 -7
- package/dist/crypto.d.ts +0 -1
- package/dist/crypto.js +0 -1
- package/dist/dom-noise-removal.d.ts +0 -1
- package/dist/dom-noise-removal.js +50 -45
- package/dist/errors.d.ts +0 -1
- package/dist/errors.js +0 -1
- package/dist/fetch.d.ts +0 -1
- package/dist/fetch.js +61 -54
- package/dist/host-normalization.d.ts +1 -0
- package/dist/host-normalization.js +47 -0
- package/dist/http-native.d.ts +0 -1
- package/dist/http-native.js +92 -28
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/instructions.md +41 -41
- package/dist/json.d.ts +0 -1
- package/dist/json.js +0 -1
- package/dist/language-detection.d.ts +0 -1
- package/dist/language-detection.js +10 -2
- package/dist/markdown-cleanup.d.ts +6 -13
- package/dist/markdown-cleanup.js +252 -34
- package/dist/mcp-validator.d.ts +14 -0
- package/dist/mcp-validator.js +22 -0
- package/dist/mcp.d.ts +0 -1
- package/dist/mcp.js +20 -10
- package/dist/observability.d.ts +2 -1
- package/dist/observability.js +30 -3
- package/dist/server-tuning.d.ts +9 -0
- package/dist/server-tuning.js +30 -0
- package/dist/{http-utils.d.ts → session.d.ts} +0 -25
- package/dist/{http-utils.js → session.js} +11 -104
- package/dist/tools.d.ts +5 -4
- package/dist/tools.js +46 -41
- package/dist/transform-types.d.ts +38 -1
- package/dist/transform-types.js +0 -1
- package/dist/transform.d.ts +12 -7
- package/dist/transform.js +205 -344
- package/dist/type-guards.d.ts +0 -1
- package/dist/type-guards.js +0 -1
- package/dist/workers/transform-worker.d.ts +0 -1
- package/dist/workers/transform-worker.js +29 -19
- package/package.json +84 -85
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/crypto.d.ts.map +0 -1
- package/dist/crypto.js.map +0 -1
- package/dist/dom-noise-removal.d.ts.map +0 -1
- package/dist/dom-noise-removal.js.map +0 -1
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js.map +0 -1
- package/dist/fetch.d.ts.map +0 -1
- package/dist/fetch.js.map +0 -1
- package/dist/http-native.d.ts.map +0 -1
- package/dist/http-native.js.map +0 -1
- package/dist/http-utils.d.ts.map +0 -1
- package/dist/http-utils.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/json.d.ts.map +0 -1
- package/dist/json.js.map +0 -1
- package/dist/language-detection.d.ts.map +0 -1
- package/dist/language-detection.js.map +0 -1
- package/dist/markdown-cleanup.d.ts.map +0 -1
- package/dist/markdown-cleanup.js.map +0 -1
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/observability.d.ts.map +0 -1
- package/dist/observability.js.map +0 -1
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/transform-types.d.ts.map +0 -1
- package/dist/transform-types.js.map +0 -1
- package/dist/transform.d.ts.map +0 -1
- package/dist/transform.js.map +0 -1
- package/dist/type-guards.d.ts.map +0 -1
- package/dist/type-guards.js.map +0 -1
- package/dist/workers/transform-worker.d.ts.map +0 -1
- package/dist/workers/transform-worker.js.map +0 -1
package/dist/cache.d.ts
CHANGED
|
@@ -40,4 +40,3 @@ export declare function registerCachedContentResource(server: McpServer): void;
|
|
|
40
40
|
export declare function generateSafeFilename(url: string, title?: string, hashFallback?: string, extension?: string): string;
|
|
41
41
|
export declare function handleDownload(res: ServerResponse, namespace: string, hash: string): void;
|
|
42
42
|
export {};
|
|
43
|
-
//# sourceMappingURL=cache.d.ts.map
|
package/dist/cache.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { LRUCache } from 'lru-cache';
|
|
2
1
|
import { ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
3
2
|
import { ErrorCode, McpError, SubscribeRequestSchema, UnsubscribeRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
4
3
|
import { config } from './config.js';
|
|
@@ -105,10 +104,65 @@ export function toResourceUri(cacheKey) {
|
|
|
105
104
|
return null;
|
|
106
105
|
return buildCacheResourceUri(parts.namespace, parts.urlHash);
|
|
107
106
|
}
|
|
108
|
-
|
|
107
|
+
// Cache behavior contract (native implementation):
|
|
108
|
+
// - Max entries: config.cache.maxKeys
|
|
109
|
+
// - TTL in ms: config.cache.ttl * 1000
|
|
110
|
+
// - Access does NOT extend TTL
|
|
111
|
+
class NativeLruCache {
|
|
112
|
+
max;
|
|
113
|
+
ttlMs;
|
|
114
|
+
entries = new Map();
|
|
115
|
+
constructor({ max, ttlMs }) {
|
|
116
|
+
this.max = max;
|
|
117
|
+
this.ttlMs = ttlMs;
|
|
118
|
+
}
|
|
119
|
+
get(key) {
|
|
120
|
+
const entry = this.entries.get(key);
|
|
121
|
+
if (!entry)
|
|
122
|
+
return undefined;
|
|
123
|
+
if (this.isExpired(entry, Date.now())) {
|
|
124
|
+
this.entries.delete(key);
|
|
125
|
+
return undefined;
|
|
126
|
+
}
|
|
127
|
+
// Refresh LRU order without extending TTL.
|
|
128
|
+
this.entries.delete(key);
|
|
129
|
+
this.entries.set(key, entry);
|
|
130
|
+
return entry.value;
|
|
131
|
+
}
|
|
132
|
+
set(key, value) {
|
|
133
|
+
if (this.max <= 0 || this.ttlMs <= 0)
|
|
134
|
+
return;
|
|
135
|
+
this.entries.delete(key);
|
|
136
|
+
this.entries.set(key, {
|
|
137
|
+
value,
|
|
138
|
+
expiresAtMs: Date.now() + this.ttlMs,
|
|
139
|
+
});
|
|
140
|
+
this.purgeExpired(Date.now());
|
|
141
|
+
while (this.entries.size > this.max) {
|
|
142
|
+
const oldestKey = this.entries.keys().next().value;
|
|
143
|
+
if (oldestKey === undefined)
|
|
144
|
+
break;
|
|
145
|
+
this.entries.delete(oldestKey);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
keys() {
|
|
149
|
+
this.purgeExpired(Date.now());
|
|
150
|
+
return [...this.entries.keys()];
|
|
151
|
+
}
|
|
152
|
+
purgeExpired(now) {
|
|
153
|
+
for (const [key, entry] of this.entries) {
|
|
154
|
+
if (this.isExpired(entry, now)) {
|
|
155
|
+
this.entries.delete(key);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
isExpired(entry, now) {
|
|
160
|
+
return entry.expiresAtMs <= now;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
const contentCache = new NativeLruCache({
|
|
109
164
|
max: config.cache.maxKeys,
|
|
110
|
-
|
|
111
|
-
updateAgeOnGet: false,
|
|
165
|
+
ttlMs: config.cache.ttl * 1000,
|
|
112
166
|
});
|
|
113
167
|
const updateListeners = new Set();
|
|
114
168
|
export function onCacheUpdate(listener) {
|
|
@@ -396,43 +450,32 @@ function buildMarkdownContentResponse(uri, content) {
|
|
|
396
450
|
],
|
|
397
451
|
};
|
|
398
452
|
}
|
|
399
|
-
function isSingleParam(value) {
|
|
400
|
-
return typeof value === 'string';
|
|
401
|
-
}
|
|
402
453
|
function parseDownloadParams(namespace, hash) {
|
|
403
|
-
|
|
454
|
+
const resolvedNamespace = resolveStringParam(namespace);
|
|
455
|
+
const resolvedHash = resolveStringParam(hash);
|
|
456
|
+
if (!resolvedNamespace || !resolvedHash)
|
|
404
457
|
return null;
|
|
405
|
-
if (!
|
|
458
|
+
if (!isValidNamespace(resolvedNamespace))
|
|
406
459
|
return null;
|
|
407
|
-
if (!
|
|
460
|
+
if (!isValidHash(resolvedHash))
|
|
408
461
|
return null;
|
|
409
|
-
|
|
410
|
-
return null;
|
|
411
|
-
return { namespace, hash };
|
|
462
|
+
return { namespace: resolvedNamespace, hash: resolvedHash };
|
|
412
463
|
}
|
|
413
464
|
function buildCacheKeyFromParams(params) {
|
|
414
465
|
return `${params.namespace}:${params.hash}`;
|
|
415
466
|
}
|
|
467
|
+
function sendJsonError(res, status, error, code) {
|
|
468
|
+
res.writeHead(status, { 'Content-Type': 'application/json' });
|
|
469
|
+
res.end(JSON.stringify({ error, code }));
|
|
470
|
+
}
|
|
416
471
|
function respondBadRequest(res, message) {
|
|
417
|
-
res
|
|
418
|
-
res.end(JSON.stringify({
|
|
419
|
-
error: message,
|
|
420
|
-
code: 'BAD_REQUEST',
|
|
421
|
-
}));
|
|
472
|
+
sendJsonError(res, 400, message, 'BAD_REQUEST');
|
|
422
473
|
}
|
|
423
474
|
function respondNotFound(res) {
|
|
424
|
-
res
|
|
425
|
-
res.end(JSON.stringify({
|
|
426
|
-
error: 'Content not found or expired',
|
|
427
|
-
code: 'NOT_FOUND',
|
|
428
|
-
}));
|
|
475
|
+
sendJsonError(res, 404, 'Content not found or expired', 'NOT_FOUND');
|
|
429
476
|
}
|
|
430
477
|
function respondServiceUnavailable(res) {
|
|
431
|
-
res
|
|
432
|
-
res.end(JSON.stringify({
|
|
433
|
-
error: 'Download service is disabled',
|
|
434
|
-
code: 'SERVICE_UNAVAILABLE',
|
|
435
|
-
}));
|
|
478
|
+
sendJsonError(res, 503, 'Download service is disabled', 'SERVICE_UNAVAILABLE');
|
|
436
479
|
}
|
|
437
480
|
export function generateSafeFilename(url, title, hashFallback, extension = '.md') {
|
|
438
481
|
const fromUrl = extractFilenameFromUrl(url);
|
|
@@ -567,4 +610,3 @@ export function handleDownload(res, namespace, hash) {
|
|
|
567
610
|
logDebug('Serving download', { cacheKey, fileName: payload.fileName });
|
|
568
611
|
sendDownloadPayload(res, payload);
|
|
569
612
|
}
|
|
570
|
-
//# sourceMappingURL=cache.js.map
|
package/dist/config.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
export declare const serverVersion: string;
|
|
1
2
|
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
2
3
|
export type TransformMetadataFormat = 'markdown' | 'frontmatter';
|
|
3
4
|
interface AuthConfig {
|
|
@@ -43,6 +44,7 @@ export declare const config: {
|
|
|
43
44
|
};
|
|
44
45
|
transform: {
|
|
45
46
|
timeoutMs: number;
|
|
47
|
+
stageWarnRatio: number;
|
|
46
48
|
metadataFormat: TransformMetadataFormat;
|
|
47
49
|
};
|
|
48
50
|
tools: {
|
|
@@ -89,4 +91,3 @@ export declare const config: {
|
|
|
89
91
|
};
|
|
90
92
|
export declare function enableHttpMode(): void;
|
|
91
93
|
export {};
|
|
92
|
-
//# sourceMappingURL=config.d.ts.map
|
package/dist/config.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import packageJson from '../package.json' with { type: 'json' };
|
|
2
|
+
export const serverVersion = packageJson.version;
|
|
2
3
|
function buildIpv4(parts) {
|
|
3
4
|
return parts.join('.');
|
|
4
5
|
}
|
|
@@ -73,6 +74,9 @@ function parseUrlEnv(value, name) {
|
|
|
73
74
|
}
|
|
74
75
|
return new URL(value);
|
|
75
76
|
}
|
|
77
|
+
function readUrlEnv(name) {
|
|
78
|
+
return parseUrlEnv(process.env[name], name);
|
|
79
|
+
}
|
|
76
80
|
function parseAllowedHosts(envValue) {
|
|
77
81
|
const hosts = new Set();
|
|
78
82
|
for (const entry of parseList(envValue)) {
|
|
@@ -108,16 +112,16 @@ const DEFAULT_TOOL_TIMEOUT_MS = TIMEOUT.DEFAULT_FETCH_TIMEOUT_MS +
|
|
|
108
112
|
5000;
|
|
109
113
|
function readCoreOAuthUrls() {
|
|
110
114
|
return {
|
|
111
|
-
issuerUrl:
|
|
112
|
-
authorizationUrl:
|
|
113
|
-
tokenUrl:
|
|
115
|
+
issuerUrl: readUrlEnv('OAUTH_ISSUER_URL'),
|
|
116
|
+
authorizationUrl: readUrlEnv('OAUTH_AUTHORIZATION_URL'),
|
|
117
|
+
tokenUrl: readUrlEnv('OAUTH_TOKEN_URL'),
|
|
114
118
|
};
|
|
115
119
|
}
|
|
116
120
|
function readOptionalOAuthUrls(baseUrl) {
|
|
117
121
|
return {
|
|
118
|
-
revocationUrl:
|
|
119
|
-
registrationUrl:
|
|
120
|
-
introspectionUrl:
|
|
122
|
+
revocationUrl: readUrlEnv('OAUTH_REVOCATION_URL'),
|
|
123
|
+
registrationUrl: readUrlEnv('OAUTH_REGISTRATION_URL'),
|
|
124
|
+
introspectionUrl: readUrlEnv('OAUTH_INTROSPECTION_URL'),
|
|
121
125
|
resourceUrl: parseUrlEnv(process.env.OAUTH_RESOURCE_URL, 'OAUTH_RESOURCE_URL') ??
|
|
122
126
|
new URL('/mcp', baseUrl),
|
|
123
127
|
};
|
|
@@ -196,6 +200,7 @@ export const config = {
|
|
|
196
200
|
},
|
|
197
201
|
transform: {
|
|
198
202
|
timeoutMs: TIMEOUT.DEFAULT_TRANSFORM_TIMEOUT_MS,
|
|
203
|
+
stageWarnRatio: parseFloat(process.env.TRANSFORM_STAGE_WARN_RATIO ?? '0.5'),
|
|
199
204
|
metadataFormat: parseTransformMetadataFormat(process.env.TRANSFORM_METADATA_FORMAT),
|
|
200
205
|
},
|
|
201
206
|
tools: {
|
|
@@ -271,4 +276,3 @@ export const config = {
|
|
|
271
276
|
export function enableHttpMode() {
|
|
272
277
|
runtimeState.httpMode = true;
|
|
273
278
|
}
|
|
274
|
-
//# sourceMappingURL=config.js.map
|
package/dist/crypto.d.ts
CHANGED
package/dist/crypto.js
CHANGED
|
@@ -56,6 +56,37 @@ const STRUCTURAL_TAGS = new Set([
|
|
|
56
56
|
'canvas',
|
|
57
57
|
]);
|
|
58
58
|
const ALWAYS_NOISE_TAGS = new Set(['nav', 'footer']);
|
|
59
|
+
const BASE_NOISE_SELECTORS = [
|
|
60
|
+
'nav',
|
|
61
|
+
'footer',
|
|
62
|
+
'header[class*="site"]',
|
|
63
|
+
'header[class*="nav"]',
|
|
64
|
+
'header[class*="menu"]',
|
|
65
|
+
'[role="banner"]',
|
|
66
|
+
'[role="navigation"]',
|
|
67
|
+
'[role="dialog"]',
|
|
68
|
+
'[style*="display: none"]',
|
|
69
|
+
'[style*="display:none"]',
|
|
70
|
+
'[hidden]',
|
|
71
|
+
'[aria-hidden="true"]',
|
|
72
|
+
];
|
|
73
|
+
const BASE_NOISE_SELECTOR = BASE_NOISE_SELECTORS.join(',');
|
|
74
|
+
const CANDIDATE_NOISE_SELECTOR = [
|
|
75
|
+
...STRUCTURAL_TAGS,
|
|
76
|
+
...ALWAYS_NOISE_TAGS,
|
|
77
|
+
'aside',
|
|
78
|
+
'header',
|
|
79
|
+
'[class]',
|
|
80
|
+
'[id]',
|
|
81
|
+
'[role]',
|
|
82
|
+
'[style]',
|
|
83
|
+
].join(',');
|
|
84
|
+
function buildNoiseSelector(extraSelectors) {
|
|
85
|
+
const extra = extraSelectors.filter((selector) => selector.trim().length > 0);
|
|
86
|
+
if (extra.length === 0)
|
|
87
|
+
return BASE_NOISE_SELECTOR;
|
|
88
|
+
return `${BASE_NOISE_SELECTOR},${extra.join(',')}`;
|
|
89
|
+
}
|
|
59
90
|
const NAVIGATION_ROLES = new Set([
|
|
60
91
|
'navigation',
|
|
61
92
|
'banner',
|
|
@@ -126,6 +157,16 @@ function getPromoTokens() {
|
|
|
126
157
|
promoTokensCache = tokens;
|
|
127
158
|
return tokens;
|
|
128
159
|
}
|
|
160
|
+
let promoRegexCache = null;
|
|
161
|
+
function getPromoRegex() {
|
|
162
|
+
if (promoRegexCache)
|
|
163
|
+
return promoRegexCache;
|
|
164
|
+
const tokens = Array.from(getPromoTokens());
|
|
165
|
+
const escaped = tokens.map((t) => t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'));
|
|
166
|
+
const pattern = `(?:^|[^a-z0-9])(?:${escaped.join('|')})(?:$|[^a-z0-9])`;
|
|
167
|
+
promoRegexCache = new RegExp(pattern, 'i');
|
|
168
|
+
return promoRegexCache;
|
|
169
|
+
}
|
|
129
170
|
const HEADER_NOISE_PATTERN = /\b(site-header|masthead|topbar|navbar|nav(?:bar)?|menu|header-nav)\b/i;
|
|
130
171
|
const FIXED_PATTERN = /\b(fixed|sticky)\b/;
|
|
131
172
|
const HIGH_Z_PATTERN = /\bz-(?:4\d|50)\b/;
|
|
@@ -230,18 +271,9 @@ function isElementHidden(element) {
|
|
|
230
271
|
function hasNoiseRole(role) {
|
|
231
272
|
return role !== null && NAVIGATION_ROLES.has(role);
|
|
232
273
|
}
|
|
233
|
-
function tokenizeIdentifierLikeText(value) {
|
|
234
|
-
return value
|
|
235
|
-
.toLowerCase()
|
|
236
|
-
.replace(/[^a-z0-9]+/g, ' ')
|
|
237
|
-
.trim()
|
|
238
|
-
.split(' ')
|
|
239
|
-
.filter(Boolean);
|
|
240
|
-
}
|
|
241
274
|
function matchesPromoIdOrClass(className, id) {
|
|
242
|
-
const
|
|
243
|
-
|
|
244
|
-
return tokens.some((token) => promoTokens.has(token));
|
|
275
|
+
const regex = getPromoRegex();
|
|
276
|
+
return regex.test(className) || regex.test(id);
|
|
245
277
|
}
|
|
246
278
|
function matchesFixedOrHighZIsolate(className) {
|
|
247
279
|
return (FIXED_PATTERN.test(className) ||
|
|
@@ -279,9 +311,10 @@ function isNodeListLike(value) {
|
|
|
279
311
|
return isObject(value) && typeof value.length === 'number';
|
|
280
312
|
}
|
|
281
313
|
function tryGetNodeListItem(nodes, index) {
|
|
282
|
-
if (typeof nodes.item === 'function')
|
|
314
|
+
if ('item' in nodes && typeof nodes.item === 'function') {
|
|
283
315
|
return nodes.item(index);
|
|
284
|
-
|
|
316
|
+
}
|
|
317
|
+
return nodes[index] ?? null;
|
|
285
318
|
}
|
|
286
319
|
function removeNoiseFromNodeListLike(nodes, shouldCheckNoise) {
|
|
287
320
|
for (let index = nodes.length - 1; index >= 0; index -= 1) {
|
|
@@ -309,39 +342,12 @@ function removeNoiseNodes(nodes, shouldCheckNoise = true) {
|
|
|
309
342
|
function stripNoiseNodes(document) {
|
|
310
343
|
// Pass 1: Trusted selectors (Common noise)
|
|
311
344
|
// We trust these selectors match actual noise, so we skip the expensive isNoiseElement check
|
|
312
|
-
const baseSelectors = [
|
|
313
|
-
'nav',
|
|
314
|
-
'footer',
|
|
315
|
-
'header[class*="site"]',
|
|
316
|
-
'header[class*="nav"]',
|
|
317
|
-
'header[class*="menu"]',
|
|
318
|
-
'[role="banner"]',
|
|
319
|
-
'[role="navigation"]',
|
|
320
|
-
'[role="dialog"]',
|
|
321
|
-
'[style*="display: none"]',
|
|
322
|
-
'[style*="display:none"]',
|
|
323
|
-
'[hidden]',
|
|
324
|
-
'[aria-hidden="true"]',
|
|
325
|
-
];
|
|
326
345
|
// Add user-configured extra selectors
|
|
327
|
-
const
|
|
328
|
-
const
|
|
329
|
-
|
|
330
|
-
const potentialNoiseNodes = document.querySelectorAll(targetSelectors);
|
|
331
|
-
removeNoiseNodes(potentialNoiseNodes, false);
|
|
332
|
-
}
|
|
346
|
+
const targetSelectors = buildNoiseSelector(config.noiseRemoval.extraSelectors);
|
|
347
|
+
const potentialNoiseNodes = document.querySelectorAll(targetSelectors);
|
|
348
|
+
removeNoiseNodes(potentialNoiseNodes, false);
|
|
333
349
|
// Second pass: check remaining elements for noise patterns (promo, fixed positioning, etc.)
|
|
334
|
-
const
|
|
335
|
-
...STRUCTURAL_TAGS,
|
|
336
|
-
...ALWAYS_NOISE_TAGS,
|
|
337
|
-
'aside',
|
|
338
|
-
'header',
|
|
339
|
-
'[class]',
|
|
340
|
-
'[id]',
|
|
341
|
-
'[role]',
|
|
342
|
-
'[style]',
|
|
343
|
-
].join(',');
|
|
344
|
-
const allElements = document.querySelectorAll(candidateSelectors);
|
|
350
|
+
const allElements = document.querySelectorAll(CANDIDATE_NOISE_SELECTOR);
|
|
345
351
|
removeNoiseNodes(allElements, true);
|
|
346
352
|
}
|
|
347
353
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
@@ -479,4 +485,3 @@ export function removeNoiseFromHtml(html, document, baseUrl) {
|
|
|
479
485
|
return html;
|
|
480
486
|
}
|
|
481
487
|
}
|
|
482
|
-
//# sourceMappingURL=dom-noise-removal.js.map
|
package/dist/errors.d.ts
CHANGED
|
@@ -8,4 +8,3 @@ export declare class FetchError extends Error {
|
|
|
8
8
|
export declare function getErrorMessage(error: unknown): string;
|
|
9
9
|
export declare function createErrorWithCode(message: string, code: string): NodeJS.ErrnoException;
|
|
10
10
|
export declare function isSystemError(error: unknown): error is NodeJS.ErrnoException;
|
|
11
|
-
//# sourceMappingURL=errors.d.ts.map
|
package/dist/errors.js
CHANGED
package/dist/fetch.d.ts
CHANGED
package/dist/fetch.js
CHANGED
|
@@ -300,11 +300,11 @@ export function isRawTextContentUrl(url) {
|
|
|
300
300
|
return hasKnownRawTextExtension(lowerBase);
|
|
301
301
|
}
|
|
302
302
|
function hasKnownRawTextExtension(urlBaseLower) {
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
return
|
|
303
|
+
const lastDot = urlBaseLower.lastIndexOf('.');
|
|
304
|
+
if (lastDot === -1)
|
|
305
|
+
return false;
|
|
306
|
+
const ext = urlBaseLower.slice(lastDot);
|
|
307
|
+
return RAW_TEXT_EXTENSIONS.has(ext);
|
|
308
308
|
}
|
|
309
309
|
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
310
310
|
const SLOW_REQUEST_THRESHOLD_MS = 5000;
|
|
@@ -314,25 +314,6 @@ function normalizeLookupResults(addresses, family) {
|
|
|
314
314
|
}
|
|
315
315
|
return [{ address: addresses, family: family ?? 4 }];
|
|
316
316
|
}
|
|
317
|
-
function findBlockedIpError(list, hostname) {
|
|
318
|
-
for (const addr of list) {
|
|
319
|
-
const ip = typeof addr === 'string' ? addr : addr.address;
|
|
320
|
-
if (!isBlockedIp(ip)) {
|
|
321
|
-
continue;
|
|
322
|
-
}
|
|
323
|
-
return createErrorWithCode(`Blocked IP detected for ${hostname}`, 'EBLOCKED');
|
|
324
|
-
}
|
|
325
|
-
return null;
|
|
326
|
-
}
|
|
327
|
-
function findInvalidFamilyError(list, hostname) {
|
|
328
|
-
for (const addr of list) {
|
|
329
|
-
const family = typeof addr === 'string' ? 0 : addr.family;
|
|
330
|
-
if (family === 4 || family === 6)
|
|
331
|
-
continue;
|
|
332
|
-
return createErrorWithCode(`Invalid address family returned for ${hostname}`, 'EINVAL');
|
|
333
|
-
}
|
|
334
|
-
return null;
|
|
335
|
-
}
|
|
336
317
|
function createNoDnsResultsError(hostname) {
|
|
337
318
|
return createErrorWithCode(`No DNS results returned for ${hostname}`, 'ENODATA');
|
|
338
319
|
}
|
|
@@ -358,7 +339,17 @@ function selectLookupResult(list, useAll, hostname) {
|
|
|
358
339
|
};
|
|
359
340
|
}
|
|
360
341
|
function findLookupError(list, hostname) {
|
|
361
|
-
|
|
342
|
+
for (const addr of list) {
|
|
343
|
+
const family = typeof addr === 'string' ? 0 : addr.family;
|
|
344
|
+
if (family !== 4 && family !== 6) {
|
|
345
|
+
return createErrorWithCode(`Invalid address family returned for ${hostname}`, 'EINVAL');
|
|
346
|
+
}
|
|
347
|
+
const ip = typeof addr === 'string' ? addr : addr.address;
|
|
348
|
+
if (isBlockedIp(ip)) {
|
|
349
|
+
return createErrorWithCode(`Blocked IP detected for ${hostname}`, 'EBLOCKED');
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
return null;
|
|
362
353
|
}
|
|
363
354
|
function normalizeAndValidateLookupResults(addresses, resolvedFamily, hostname) {
|
|
364
355
|
const list = normalizeLookupResults(addresses, resolvedFamily);
|
|
@@ -503,6 +494,12 @@ function createRateLimitError(url, headerValue) {
|
|
|
503
494
|
function createHttpError(url, status, statusText) {
|
|
504
495
|
return new FetchError(`HTTP ${status}: ${statusText}`, url, status);
|
|
505
496
|
}
|
|
497
|
+
function createTooManyRedirectsError(url) {
|
|
498
|
+
return new FetchError('Too many redirects', url);
|
|
499
|
+
}
|
|
500
|
+
function createMissingRedirectLocationError(url) {
|
|
501
|
+
return new FetchError('Redirect response missing Location header', url);
|
|
502
|
+
}
|
|
506
503
|
function createSizeLimitError(url, maxBytes) {
|
|
507
504
|
return new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
|
|
508
505
|
}
|
|
@@ -534,21 +531,29 @@ function resolveErrorUrl(error, fallback) {
|
|
|
534
531
|
return requestUrl;
|
|
535
532
|
return fallback;
|
|
536
533
|
}
|
|
537
|
-
function
|
|
538
|
-
if (error
|
|
539
|
-
return
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
if (isTimeoutError(error)) {
|
|
543
|
-
return createTimeoutError(url, timeoutMs);
|
|
544
|
-
}
|
|
545
|
-
return createCanceledError(url);
|
|
534
|
+
function resolveAbortFetchError(error, url, timeoutMs) {
|
|
535
|
+
if (!isAbortError(error))
|
|
536
|
+
return null;
|
|
537
|
+
if (isTimeoutError(error)) {
|
|
538
|
+
return createTimeoutError(url, timeoutMs);
|
|
546
539
|
}
|
|
540
|
+
return createCanceledError(url);
|
|
541
|
+
}
|
|
542
|
+
function resolveUnexpectedFetchError(error, url) {
|
|
547
543
|
if (error instanceof Error) {
|
|
548
544
|
return createNetworkError(url, error.message);
|
|
549
545
|
}
|
|
550
546
|
return createUnknownError(url, 'Unexpected error');
|
|
551
547
|
}
|
|
548
|
+
function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
549
|
+
if (error instanceof FetchError)
|
|
550
|
+
return error;
|
|
551
|
+
const url = resolveErrorUrl(error, fallbackUrl);
|
|
552
|
+
const abortError = resolveAbortFetchError(error, url, timeoutMs);
|
|
553
|
+
if (abortError)
|
|
554
|
+
return abortError;
|
|
555
|
+
return resolveUnexpectedFetchError(error, url);
|
|
556
|
+
}
|
|
552
557
|
const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
|
|
553
558
|
function publishFetchEvent(event) {
|
|
554
559
|
if (!fetchChannel.hasSubscribers)
|
|
@@ -713,14 +718,14 @@ function assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirect
|
|
|
713
718
|
if (redirectCount < redirectLimit)
|
|
714
719
|
return;
|
|
715
720
|
cancelResponseBody(response);
|
|
716
|
-
throw
|
|
721
|
+
throw createTooManyRedirectsError(currentUrl);
|
|
717
722
|
}
|
|
718
723
|
function getRedirectLocation(response, currentUrl) {
|
|
719
724
|
const location = response.headers.get('location');
|
|
720
725
|
if (location)
|
|
721
726
|
return location;
|
|
722
727
|
cancelResponseBody(response);
|
|
723
|
-
throw
|
|
728
|
+
throw createMissingRedirectLocationError(currentUrl);
|
|
724
729
|
}
|
|
725
730
|
function annotateRedirectError(error, url) {
|
|
726
731
|
if (!isObject(error))
|
|
@@ -737,26 +742,26 @@ function resolveRedirectTarget(baseUrl, location) {
|
|
|
737
742
|
}
|
|
738
743
|
return validateAndNormalizeUrl(resolved.href);
|
|
739
744
|
}
|
|
745
|
+
async function withRedirectErrorContext(url, fn) {
|
|
746
|
+
try {
|
|
747
|
+
return await fn();
|
|
748
|
+
}
|
|
749
|
+
catch (error) {
|
|
750
|
+
annotateRedirectError(error, url);
|
|
751
|
+
throw error;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
740
754
|
export async function fetchWithRedirects(url, init, maxRedirects) {
|
|
741
755
|
let currentUrl = url;
|
|
742
756
|
const redirectLimit = Math.max(0, maxRedirects);
|
|
743
757
|
for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
|
|
744
|
-
const { response, nextUrl } = await
|
|
758
|
+
const { response, nextUrl } = await withRedirectErrorContext(currentUrl, () => performFetchCycle(currentUrl, init, redirectLimit, redirectCount));
|
|
745
759
|
if (!nextUrl) {
|
|
746
760
|
return { response, url: currentUrl };
|
|
747
761
|
}
|
|
748
762
|
currentUrl = nextUrl;
|
|
749
763
|
}
|
|
750
|
-
throw
|
|
751
|
-
}
|
|
752
|
-
async function performFetchCycleSafely(currentUrl, init, redirectLimit, redirectCount) {
|
|
753
|
-
try {
|
|
754
|
-
return await performFetchCycle(currentUrl, init, redirectLimit, redirectCount);
|
|
755
|
-
}
|
|
756
|
-
catch (error) {
|
|
757
|
-
annotateRedirectError(error, currentUrl);
|
|
758
|
-
throw error;
|
|
759
|
-
}
|
|
764
|
+
throw createTooManyRedirectsError(currentUrl);
|
|
760
765
|
}
|
|
761
766
|
function assertContentLengthWithinLimit(response, url, maxBytes) {
|
|
762
767
|
const contentLengthHeader = response.headers.get('content-length');
|
|
@@ -841,15 +846,18 @@ async function readStreamWithLimit(stream, url, maxBytes, signal) {
|
|
|
841
846
|
finalizeRead(state);
|
|
842
847
|
return { text: state.parts.join(''), size: state.total };
|
|
843
848
|
}
|
|
849
|
+
async function readResponseTextFallback(response, url, maxBytes) {
|
|
850
|
+
const text = await response.text();
|
|
851
|
+
const size = Buffer.byteLength(text);
|
|
852
|
+
if (size > maxBytes) {
|
|
853
|
+
throw createSizeLimitError(url, maxBytes);
|
|
854
|
+
}
|
|
855
|
+
return { text, size };
|
|
856
|
+
}
|
|
844
857
|
export async function readResponseText(response, url, maxBytes, signal) {
|
|
845
858
|
assertContentLengthWithinLimit(response, url, maxBytes);
|
|
846
859
|
if (!response.body) {
|
|
847
|
-
|
|
848
|
-
const size = Buffer.byteLength(text);
|
|
849
|
-
if (size > maxBytes) {
|
|
850
|
-
throw createSizeLimitError(url, maxBytes);
|
|
851
|
-
}
|
|
852
|
-
return { text, size };
|
|
860
|
+
return readResponseTextFallback(response, url, maxBytes);
|
|
853
861
|
}
|
|
854
862
|
return readStreamWithLimit(response.body, url, maxBytes, signal);
|
|
855
863
|
}
|
|
@@ -925,4 +933,3 @@ export async function fetchNormalizedUrl(normalizedUrl, options) {
|
|
|
925
933
|
const requestInit = buildRequestInit(headers, signal);
|
|
926
934
|
return fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs);
|
|
927
935
|
}
|
|
928
|
-
//# sourceMappingURL=fetch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function normalizeHost(value: string): string | null;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { isIP } from 'node:net';
|
|
2
|
+
export function normalizeHost(value) {
|
|
3
|
+
const trimmed = value.trim().toLowerCase();
|
|
4
|
+
if (!trimmed)
|
|
5
|
+
return null;
|
|
6
|
+
const first = takeFirstHostValue(trimmed);
|
|
7
|
+
if (!first)
|
|
8
|
+
return null;
|
|
9
|
+
const ipv6 = stripIpv6Brackets(first);
|
|
10
|
+
if (ipv6)
|
|
11
|
+
return stripTrailingDots(ipv6);
|
|
12
|
+
if (isIpV6Literal(first)) {
|
|
13
|
+
return stripTrailingDots(first);
|
|
14
|
+
}
|
|
15
|
+
return stripTrailingDots(stripPortIfPresent(first));
|
|
16
|
+
}
|
|
17
|
+
function takeFirstHostValue(value) {
|
|
18
|
+
const first = value.split(',')[0];
|
|
19
|
+
if (!first)
|
|
20
|
+
return null;
|
|
21
|
+
const trimmed = first.trim();
|
|
22
|
+
return trimmed ? trimmed : null;
|
|
23
|
+
}
|
|
24
|
+
function stripIpv6Brackets(value) {
|
|
25
|
+
if (!value.startsWith('['))
|
|
26
|
+
return null;
|
|
27
|
+
const end = value.indexOf(']');
|
|
28
|
+
if (end === -1)
|
|
29
|
+
return null;
|
|
30
|
+
return value.slice(1, end);
|
|
31
|
+
}
|
|
32
|
+
function stripPortIfPresent(value) {
|
|
33
|
+
const colonIndex = value.indexOf(':');
|
|
34
|
+
if (colonIndex === -1)
|
|
35
|
+
return value;
|
|
36
|
+
return value.slice(0, colonIndex);
|
|
37
|
+
}
|
|
38
|
+
function isIpV6Literal(value) {
|
|
39
|
+
return isIP(value) === 6;
|
|
40
|
+
}
|
|
41
|
+
function stripTrailingDots(value) {
|
|
42
|
+
let result = value;
|
|
43
|
+
while (result.endsWith('.')) {
|
|
44
|
+
result = result.slice(0, -1);
|
|
45
|
+
}
|
|
46
|
+
return result;
|
|
47
|
+
}
|
package/dist/http-native.d.ts
CHANGED