@j0hanz/superfetch 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +243 -494
- package/dist/cache.d.ts +2 -3
- package/dist/cache.js +51 -241
- package/dist/config.d.ts +6 -1
- package/dist/config.js +29 -34
- package/dist/crypto.d.ts +0 -1
- package/dist/crypto.js +0 -1
- package/dist/dom-noise-removal.d.ts +5 -0
- package/dist/dom-noise-removal.js +485 -0
- package/dist/errors.d.ts +0 -1
- package/dist/errors.js +8 -6
- package/dist/fetch.d.ts +0 -1
- package/dist/fetch.js +71 -61
- package/dist/host-normalization.d.ts +1 -0
- package/dist/host-normalization.js +47 -0
- package/dist/http-native.d.ts +5 -0
- package/dist/http-native.js +693 -0
- package/dist/index.d.ts +0 -1
- package/dist/index.js +1 -2
- package/dist/instructions.md +22 -20
- package/dist/json.d.ts +1 -0
- package/dist/json.js +29 -0
- package/dist/language-detection.d.ts +12 -0
- package/dist/language-detection.js +291 -0
- package/dist/markdown-cleanup.d.ts +18 -0
- package/dist/markdown-cleanup.js +283 -0
- package/dist/mcp-validator.d.ts +14 -0
- package/dist/mcp-validator.js +22 -0
- package/dist/mcp.d.ts +0 -1
- package/dist/mcp.js +0 -1
- package/dist/observability.d.ts +1 -1
- package/dist/observability.js +15 -3
- package/dist/server-tuning.d.ts +9 -0
- package/dist/server-tuning.js +30 -0
- package/dist/session.d.ts +36 -0
- package/dist/session.js +159 -0
- package/dist/tools.d.ts +0 -1
- package/dist/tools.js +23 -33
- package/dist/transform-types.d.ts +80 -0
- package/dist/transform-types.js +5 -0
- package/dist/transform.d.ts +7 -53
- package/dist/transform.js +434 -856
- package/dist/type-guards.d.ts +1 -2
- package/dist/type-guards.js +1 -2
- package/dist/workers/transform-worker.d.ts +0 -1
- package/dist/workers/transform-worker.js +52 -43
- package/package.json +11 -12
- package/dist/cache.d.ts.map +0 -1
- package/dist/cache.js.map +0 -1
- package/dist/config.d.ts.map +0 -1
- package/dist/config.js.map +0 -1
- package/dist/crypto.d.ts.map +0 -1
- package/dist/crypto.js.map +0 -1
- package/dist/errors.d.ts.map +0 -1
- package/dist/errors.js.map +0 -1
- package/dist/fetch.d.ts.map +0 -1
- package/dist/fetch.js.map +0 -1
- package/dist/http.d.ts +0 -90
- package/dist/http.d.ts.map +0 -1
- package/dist/http.js +0 -1576
- package/dist/http.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/observability.d.ts.map +0 -1
- package/dist/observability.js.map +0 -1
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/transform.d.ts.map +0 -1
- package/dist/transform.js.map +0 -1
- package/dist/type-guards.d.ts.map +0 -1
- package/dist/type-guards.js.map +0 -1
- package/dist/workers/transform-worker.d.ts.map +0 -1
- package/dist/workers/transform-worker.js.map +0 -1
package/dist/fetch.js
CHANGED
|
@@ -8,14 +8,13 @@ import { Agent } from 'undici';
|
|
|
8
8
|
import { config } from './config.js';
|
|
9
9
|
import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
|
|
10
10
|
import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
|
|
11
|
-
import {
|
|
11
|
+
import { isObject } from './type-guards.js';
|
|
12
12
|
function buildIpv4(parts) {
|
|
13
13
|
return parts.join('.');
|
|
14
14
|
}
|
|
15
15
|
function buildIpv6(parts) {
|
|
16
16
|
return parts.map(String).join(':');
|
|
17
17
|
}
|
|
18
|
-
const BLOCK_LIST = new BlockList();
|
|
19
18
|
const IPV6_ZERO = buildIpv6([0, 0, 0, 0, 0, 0, 0, 0]);
|
|
20
19
|
const IPV6_LOOPBACK = buildIpv6([0, 0, 0, 0, 0, 0, 0, 1]);
|
|
21
20
|
const IPV6_64_FF9B = buildIpv6(['64', 'ff9b', 0, 0, 0, 0, 0, 0]);
|
|
@@ -47,19 +46,22 @@ const BLOCKED_IPV6_SUBNETS = [
|
|
|
47
46
|
{ subnet: IPV6_FE80, prefix: 10 },
|
|
48
47
|
{ subnet: IPV6_FF00, prefix: 8 },
|
|
49
48
|
];
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
return true;
|
|
49
|
+
let cachedBlockList;
|
|
50
|
+
function getBlockList() {
|
|
51
|
+
if (!cachedBlockList) {
|
|
52
|
+
cachedBlockList = new BlockList();
|
|
53
|
+
for (const entry of BLOCKED_IPV4_SUBNETS) {
|
|
54
|
+
cachedBlockList.addSubnet(entry.subnet, entry.prefix, 'ipv4');
|
|
55
|
+
}
|
|
56
|
+
for (const entry of BLOCKED_IPV6_SUBNETS) {
|
|
57
|
+
cachedBlockList.addSubnet(entry.subnet, entry.prefix, 'ipv6');
|
|
60
58
|
}
|
|
61
59
|
}
|
|
62
|
-
return
|
|
60
|
+
return cachedBlockList;
|
|
61
|
+
}
|
|
62
|
+
function matchesBlockedIpPatterns(resolvedIp) {
|
|
63
|
+
return (config.security.blockedIpPattern.test(resolvedIp) ||
|
|
64
|
+
config.security.blockedIpv4MappedPattern.test(resolvedIp));
|
|
63
65
|
}
|
|
64
66
|
export function isBlockedIp(ip) {
|
|
65
67
|
if (config.security.blockedHosts.has(ip)) {
|
|
@@ -78,10 +80,11 @@ function resolveIpType(ip) {
|
|
|
78
80
|
return ipType === 4 || ipType === 6 ? ipType : null;
|
|
79
81
|
}
|
|
80
82
|
function isBlockedByList(ip, ipType) {
|
|
83
|
+
const blockList = getBlockList();
|
|
81
84
|
if (ipType === 4) {
|
|
82
|
-
return
|
|
85
|
+
return blockList.check(ip, 'ipv4');
|
|
83
86
|
}
|
|
84
|
-
return
|
|
87
|
+
return blockList.check(ip, 'ipv6');
|
|
85
88
|
}
|
|
86
89
|
export function normalizeUrl(urlString) {
|
|
87
90
|
const trimmedUrl = requireTrimmedUrl(urlString);
|
|
@@ -217,28 +220,18 @@ const TRANSFORM_RULES = [
|
|
|
217
220
|
GITLAB_BLOB_RULE,
|
|
218
221
|
BITBUCKET_SRC_RULE,
|
|
219
222
|
];
|
|
223
|
+
const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
|
|
220
224
|
function isRawUrl(url) {
|
|
221
225
|
const lowerUrl = url.toLowerCase();
|
|
222
226
|
return (lowerUrl.includes('raw.githubusercontent.com') ||
|
|
223
227
|
lowerUrl.includes('gist.githubusercontent.com') ||
|
|
224
228
|
lowerUrl.includes('/-/raw/') ||
|
|
225
|
-
|
|
229
|
+
BITBUCKET_RAW_RE.test(lowerUrl));
|
|
226
230
|
}
|
|
227
231
|
function getUrlWithoutParams(url) {
|
|
228
232
|
const hashIndex = url.indexOf('#');
|
|
229
233
|
const queryIndex = url.indexOf('?');
|
|
230
|
-
|
|
231
|
-
if (queryIndex !== -1) {
|
|
232
|
-
if (hashIndex !== -1) {
|
|
233
|
-
endIndex = Math.min(queryIndex, hashIndex);
|
|
234
|
-
}
|
|
235
|
-
else {
|
|
236
|
-
endIndex = queryIndex;
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
else if (hashIndex !== -1) {
|
|
240
|
-
endIndex = hashIndex;
|
|
241
|
-
}
|
|
234
|
+
const endIndex = Math.min(queryIndex === -1 ? url.length : queryIndex, hashIndex === -1 ? url.length : hashIndex);
|
|
242
235
|
const hash = hashIndex !== -1 ? url.slice(hashIndex) : '';
|
|
243
236
|
return {
|
|
244
237
|
base: url.slice(0, endIndex),
|
|
@@ -314,6 +307,7 @@ function hasKnownRawTextExtension(urlBaseLower) {
|
|
|
314
307
|
return false;
|
|
315
308
|
}
|
|
316
309
|
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
310
|
+
const SLOW_REQUEST_THRESHOLD_MS = 5000;
|
|
317
311
|
function normalizeLookupResults(addresses, family) {
|
|
318
312
|
if (Array.isArray(addresses)) {
|
|
319
313
|
return addresses;
|
|
@@ -422,7 +416,7 @@ function resolveResultOrder(options) {
|
|
|
422
416
|
return DEFAULT_DNS_ORDER;
|
|
423
417
|
}
|
|
424
418
|
function getLegacyVerbatim(options) {
|
|
425
|
-
if (
|
|
419
|
+
if (isObject(options)) {
|
|
426
420
|
const { verbatim } = options;
|
|
427
421
|
return typeof verbatim === 'boolean' ? verbatim : undefined;
|
|
428
422
|
}
|
|
@@ -509,6 +503,12 @@ function createRateLimitError(url, headerValue) {
|
|
|
509
503
|
function createHttpError(url, status, statusText) {
|
|
510
504
|
return new FetchError(`HTTP ${status}: ${statusText}`, url, status);
|
|
511
505
|
}
|
|
506
|
+
function createTooManyRedirectsError(url) {
|
|
507
|
+
return new FetchError('Too many redirects', url);
|
|
508
|
+
}
|
|
509
|
+
function createMissingRedirectLocationError(url) {
|
|
510
|
+
return new FetchError('Redirect response missing Location header', url);
|
|
511
|
+
}
|
|
512
512
|
function createSizeLimitError(url, maxBytes) {
|
|
513
513
|
return new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
|
|
514
514
|
}
|
|
@@ -533,28 +533,36 @@ function getRequestUrl(record) {
|
|
|
533
533
|
function resolveErrorUrl(error, fallback) {
|
|
534
534
|
if (error instanceof FetchError)
|
|
535
535
|
return error.url;
|
|
536
|
-
if (!
|
|
536
|
+
if (!isObject(error))
|
|
537
537
|
return fallback;
|
|
538
538
|
const requestUrl = getRequestUrl(error);
|
|
539
539
|
if (requestUrl)
|
|
540
540
|
return requestUrl;
|
|
541
541
|
return fallback;
|
|
542
542
|
}
|
|
543
|
-
function
|
|
544
|
-
if (error
|
|
545
|
-
return
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
if (isTimeoutError(error)) {
|
|
549
|
-
return createTimeoutError(url, timeoutMs);
|
|
550
|
-
}
|
|
551
|
-
return createCanceledError(url);
|
|
543
|
+
function resolveAbortFetchError(error, url, timeoutMs) {
|
|
544
|
+
if (!isAbortError(error))
|
|
545
|
+
return null;
|
|
546
|
+
if (isTimeoutError(error)) {
|
|
547
|
+
return createTimeoutError(url, timeoutMs);
|
|
552
548
|
}
|
|
549
|
+
return createCanceledError(url);
|
|
550
|
+
}
|
|
551
|
+
function resolveUnexpectedFetchError(error, url) {
|
|
553
552
|
if (error instanceof Error) {
|
|
554
553
|
return createNetworkError(url, error.message);
|
|
555
554
|
}
|
|
556
555
|
return createUnknownError(url, 'Unexpected error');
|
|
557
556
|
}
|
|
557
|
+
function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
558
|
+
if (error instanceof FetchError)
|
|
559
|
+
return error;
|
|
560
|
+
const url = resolveErrorUrl(error, fallbackUrl);
|
|
561
|
+
const abortError = resolveAbortFetchError(error, url, timeoutMs);
|
|
562
|
+
if (abortError)
|
|
563
|
+
return abortError;
|
|
564
|
+
return resolveUnexpectedFetchError(error, url);
|
|
565
|
+
}
|
|
558
566
|
const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
|
|
559
567
|
function publishFetchEvent(event) {
|
|
560
568
|
if (!fetchChannel.hasSubscribers)
|
|
@@ -589,7 +597,7 @@ function buildResponseMetadata(response, contentSize) {
|
|
|
589
597
|
return metadata;
|
|
590
598
|
}
|
|
591
599
|
function logSlowRequest(context, duration, durationLabel, contextFields) {
|
|
592
|
-
if (duration <=
|
|
600
|
+
if (duration <= SLOW_REQUEST_THRESHOLD_MS)
|
|
593
601
|
return;
|
|
594
602
|
logWarn('Slow HTTP request detected', {
|
|
595
603
|
requestId: context.requestId,
|
|
@@ -719,17 +727,17 @@ function assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirect
|
|
|
719
727
|
if (redirectCount < redirectLimit)
|
|
720
728
|
return;
|
|
721
729
|
cancelResponseBody(response);
|
|
722
|
-
throw
|
|
730
|
+
throw createTooManyRedirectsError(currentUrl);
|
|
723
731
|
}
|
|
724
732
|
function getRedirectLocation(response, currentUrl) {
|
|
725
733
|
const location = response.headers.get('location');
|
|
726
734
|
if (location)
|
|
727
735
|
return location;
|
|
728
736
|
cancelResponseBody(response);
|
|
729
|
-
throw
|
|
737
|
+
throw createMissingRedirectLocationError(currentUrl);
|
|
730
738
|
}
|
|
731
739
|
function annotateRedirectError(error, url) {
|
|
732
|
-
if (!
|
|
740
|
+
if (!isObject(error))
|
|
733
741
|
return;
|
|
734
742
|
error.requestUrl = url;
|
|
735
743
|
}
|
|
@@ -743,26 +751,26 @@ function resolveRedirectTarget(baseUrl, location) {
|
|
|
743
751
|
}
|
|
744
752
|
return validateAndNormalizeUrl(resolved.href);
|
|
745
753
|
}
|
|
754
|
+
async function withRedirectErrorContext(url, fn) {
|
|
755
|
+
try {
|
|
756
|
+
return await fn();
|
|
757
|
+
}
|
|
758
|
+
catch (error) {
|
|
759
|
+
annotateRedirectError(error, url);
|
|
760
|
+
throw error;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
746
763
|
export async function fetchWithRedirects(url, init, maxRedirects) {
|
|
747
764
|
let currentUrl = url;
|
|
748
765
|
const redirectLimit = Math.max(0, maxRedirects);
|
|
749
766
|
for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
|
|
750
|
-
const { response, nextUrl } = await
|
|
767
|
+
const { response, nextUrl } = await withRedirectErrorContext(currentUrl, () => performFetchCycle(currentUrl, init, redirectLimit, redirectCount));
|
|
751
768
|
if (!nextUrl) {
|
|
752
769
|
return { response, url: currentUrl };
|
|
753
770
|
}
|
|
754
771
|
currentUrl = nextUrl;
|
|
755
772
|
}
|
|
756
|
-
throw
|
|
757
|
-
}
|
|
758
|
-
async function performFetchCycleSafely(currentUrl, init, redirectLimit, redirectCount) {
|
|
759
|
-
try {
|
|
760
|
-
return await performFetchCycle(currentUrl, init, redirectLimit, redirectCount);
|
|
761
|
-
}
|
|
762
|
-
catch (error) {
|
|
763
|
-
annotateRedirectError(error, currentUrl);
|
|
764
|
-
throw error;
|
|
765
|
-
}
|
|
773
|
+
throw createTooManyRedirectsError(currentUrl);
|
|
766
774
|
}
|
|
767
775
|
function assertContentLengthWithinLimit(response, url, maxBytes) {
|
|
768
776
|
const contentLengthHeader = response.headers.get('content-length');
|
|
@@ -847,15 +855,18 @@ async function readStreamWithLimit(stream, url, maxBytes, signal) {
|
|
|
847
855
|
finalizeRead(state);
|
|
848
856
|
return { text: state.parts.join(''), size: state.total };
|
|
849
857
|
}
|
|
858
|
+
async function readResponseTextFallback(response, url, maxBytes) {
|
|
859
|
+
const text = await response.text();
|
|
860
|
+
const size = Buffer.byteLength(text);
|
|
861
|
+
if (size > maxBytes) {
|
|
862
|
+
throw createSizeLimitError(url, maxBytes);
|
|
863
|
+
}
|
|
864
|
+
return { text, size };
|
|
865
|
+
}
|
|
850
866
|
export async function readResponseText(response, url, maxBytes, signal) {
|
|
851
867
|
assertContentLengthWithinLimit(response, url, maxBytes);
|
|
852
868
|
if (!response.body) {
|
|
853
|
-
|
|
854
|
-
const size = Buffer.byteLength(text);
|
|
855
|
-
if (size > maxBytes) {
|
|
856
|
-
throw createSizeLimitError(url, maxBytes);
|
|
857
|
-
}
|
|
858
|
-
return { text, size };
|
|
869
|
+
return readResponseTextFallback(response, url, maxBytes);
|
|
859
870
|
}
|
|
860
871
|
return readStreamWithLimit(response.body, url, maxBytes, signal);
|
|
861
872
|
}
|
|
@@ -931,4 +942,3 @@ export async function fetchNormalizedUrl(normalizedUrl, options) {
|
|
|
931
942
|
const requestInit = buildRequestInit(headers, signal);
|
|
932
943
|
return fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs);
|
|
933
944
|
}
|
|
934
|
-
//# sourceMappingURL=fetch.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function normalizeHost(value: string): string | null;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { isIP } from 'node:net';
|
|
2
|
+
export function normalizeHost(value) {
|
|
3
|
+
const trimmed = value.trim().toLowerCase();
|
|
4
|
+
if (!trimmed)
|
|
5
|
+
return null;
|
|
6
|
+
const first = takeFirstHostValue(trimmed);
|
|
7
|
+
if (!first)
|
|
8
|
+
return null;
|
|
9
|
+
const ipv6 = stripIpv6Brackets(first);
|
|
10
|
+
if (ipv6)
|
|
11
|
+
return stripTrailingDots(ipv6);
|
|
12
|
+
if (isIpV6Literal(first)) {
|
|
13
|
+
return stripTrailingDots(first);
|
|
14
|
+
}
|
|
15
|
+
return stripTrailingDots(stripPortIfPresent(first));
|
|
16
|
+
}
|
|
17
|
+
function takeFirstHostValue(value) {
|
|
18
|
+
const first = value.split(',')[0];
|
|
19
|
+
if (!first)
|
|
20
|
+
return null;
|
|
21
|
+
const trimmed = first.trim();
|
|
22
|
+
return trimmed ? trimmed : null;
|
|
23
|
+
}
|
|
24
|
+
function stripIpv6Brackets(value) {
|
|
25
|
+
if (!value.startsWith('['))
|
|
26
|
+
return null;
|
|
27
|
+
const end = value.indexOf(']');
|
|
28
|
+
if (end === -1)
|
|
29
|
+
return null;
|
|
30
|
+
return value.slice(1, end);
|
|
31
|
+
}
|
|
32
|
+
function stripPortIfPresent(value) {
|
|
33
|
+
const colonIndex = value.indexOf(':');
|
|
34
|
+
if (colonIndex === -1)
|
|
35
|
+
return value;
|
|
36
|
+
return value.slice(0, colonIndex);
|
|
37
|
+
}
|
|
38
|
+
function isIpV6Literal(value) {
|
|
39
|
+
return isIP(value) === 6;
|
|
40
|
+
}
|
|
41
|
+
function stripTrailingDots(value) {
|
|
42
|
+
let result = value;
|
|
43
|
+
while (result.endsWith('.')) {
|
|
44
|
+
result = result.slice(0, -1);
|
|
45
|
+
}
|
|
46
|
+
return result;
|
|
47
|
+
}
|