@j0hanz/superfetch 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +243 -494
  2. package/dist/cache.d.ts +2 -3
  3. package/dist/cache.js +51 -241
  4. package/dist/config.d.ts +6 -1
  5. package/dist/config.js +29 -34
  6. package/dist/crypto.d.ts +0 -1
  7. package/dist/crypto.js +0 -1
  8. package/dist/dom-noise-removal.d.ts +5 -0
  9. package/dist/dom-noise-removal.js +485 -0
  10. package/dist/errors.d.ts +0 -1
  11. package/dist/errors.js +8 -6
  12. package/dist/fetch.d.ts +0 -1
  13. package/dist/fetch.js +71 -61
  14. package/dist/host-normalization.d.ts +1 -0
  15. package/dist/host-normalization.js +47 -0
  16. package/dist/http-native.d.ts +5 -0
  17. package/dist/http-native.js +693 -0
  18. package/dist/index.d.ts +0 -1
  19. package/dist/index.js +1 -2
  20. package/dist/instructions.md +22 -20
  21. package/dist/json.d.ts +1 -0
  22. package/dist/json.js +29 -0
  23. package/dist/language-detection.d.ts +12 -0
  24. package/dist/language-detection.js +291 -0
  25. package/dist/markdown-cleanup.d.ts +18 -0
  26. package/dist/markdown-cleanup.js +283 -0
  27. package/dist/mcp-validator.d.ts +14 -0
  28. package/dist/mcp-validator.js +22 -0
  29. package/dist/mcp.d.ts +0 -1
  30. package/dist/mcp.js +0 -1
  31. package/dist/observability.d.ts +1 -1
  32. package/dist/observability.js +15 -3
  33. package/dist/server-tuning.d.ts +9 -0
  34. package/dist/server-tuning.js +30 -0
  35. package/dist/session.d.ts +36 -0
  36. package/dist/session.js +159 -0
  37. package/dist/tools.d.ts +0 -1
  38. package/dist/tools.js +23 -33
  39. package/dist/transform-types.d.ts +80 -0
  40. package/dist/transform-types.js +5 -0
  41. package/dist/transform.d.ts +7 -53
  42. package/dist/transform.js +434 -856
  43. package/dist/type-guards.d.ts +1 -2
  44. package/dist/type-guards.js +1 -2
  45. package/dist/workers/transform-worker.d.ts +0 -1
  46. package/dist/workers/transform-worker.js +52 -43
  47. package/package.json +11 -12
  48. package/dist/cache.d.ts.map +0 -1
  49. package/dist/cache.js.map +0 -1
  50. package/dist/config.d.ts.map +0 -1
  51. package/dist/config.js.map +0 -1
  52. package/dist/crypto.d.ts.map +0 -1
  53. package/dist/crypto.js.map +0 -1
  54. package/dist/errors.d.ts.map +0 -1
  55. package/dist/errors.js.map +0 -1
  56. package/dist/fetch.d.ts.map +0 -1
  57. package/dist/fetch.js.map +0 -1
  58. package/dist/http.d.ts +0 -90
  59. package/dist/http.d.ts.map +0 -1
  60. package/dist/http.js +0 -1576
  61. package/dist/http.js.map +0 -1
  62. package/dist/index.d.ts.map +0 -1
  63. package/dist/index.js.map +0 -1
  64. package/dist/mcp.d.ts.map +0 -1
  65. package/dist/mcp.js.map +0 -1
  66. package/dist/observability.d.ts.map +0 -1
  67. package/dist/observability.js.map +0 -1
  68. package/dist/tools.d.ts.map +0 -1
  69. package/dist/tools.js.map +0 -1
  70. package/dist/transform.d.ts.map +0 -1
  71. package/dist/transform.js.map +0 -1
  72. package/dist/type-guards.d.ts.map +0 -1
  73. package/dist/type-guards.js.map +0 -1
  74. package/dist/workers/transform-worker.d.ts.map +0 -1
  75. package/dist/workers/transform-worker.js.map +0 -1
package/dist/fetch.js CHANGED
@@ -8,14 +8,13 @@ import { Agent } from 'undici';
8
8
  import { config } from './config.js';
9
9
  import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
10
10
  import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
11
- import { isRecord } from './type-guards.js';
11
+ import { isObject } from './type-guards.js';
12
12
  function buildIpv4(parts) {
13
13
  return parts.join('.');
14
14
  }
15
15
  function buildIpv6(parts) {
16
16
  return parts.map(String).join(':');
17
17
  }
18
- const BLOCK_LIST = new BlockList();
19
18
  const IPV6_ZERO = buildIpv6([0, 0, 0, 0, 0, 0, 0, 0]);
20
19
  const IPV6_LOOPBACK = buildIpv6([0, 0, 0, 0, 0, 0, 0, 1]);
21
20
  const IPV6_64_FF9B = buildIpv6(['64', 'ff9b', 0, 0, 0, 0, 0, 0]);
@@ -47,19 +46,22 @@ const BLOCKED_IPV6_SUBNETS = [
47
46
  { subnet: IPV6_FE80, prefix: 10 },
48
47
  { subnet: IPV6_FF00, prefix: 8 },
49
48
  ];
50
- for (const entry of BLOCKED_IPV4_SUBNETS) {
51
- BLOCK_LIST.addSubnet(entry.subnet, entry.prefix, 'ipv4');
52
- }
53
- for (const entry of BLOCKED_IPV6_SUBNETS) {
54
- BLOCK_LIST.addSubnet(entry.subnet, entry.prefix, 'ipv6');
55
- }
56
- function matchesBlockedIpPatterns(resolvedIp) {
57
- for (const pattern of config.security.blockedIpPatterns) {
58
- if (pattern.test(resolvedIp)) {
59
- return true;
49
+ let cachedBlockList;
50
+ function getBlockList() {
51
+ if (!cachedBlockList) {
52
+ cachedBlockList = new BlockList();
53
+ for (const entry of BLOCKED_IPV4_SUBNETS) {
54
+ cachedBlockList.addSubnet(entry.subnet, entry.prefix, 'ipv4');
55
+ }
56
+ for (const entry of BLOCKED_IPV6_SUBNETS) {
57
+ cachedBlockList.addSubnet(entry.subnet, entry.prefix, 'ipv6');
60
58
  }
61
59
  }
62
- return false;
60
+ return cachedBlockList;
61
+ }
62
+ function matchesBlockedIpPatterns(resolvedIp) {
63
+ return (config.security.blockedIpPattern.test(resolvedIp) ||
64
+ config.security.blockedIpv4MappedPattern.test(resolvedIp));
63
65
  }
64
66
  export function isBlockedIp(ip) {
65
67
  if (config.security.blockedHosts.has(ip)) {
@@ -78,10 +80,11 @@ function resolveIpType(ip) {
78
80
  return ipType === 4 || ipType === 6 ? ipType : null;
79
81
  }
80
82
  function isBlockedByList(ip, ipType) {
83
+ const blockList = getBlockList();
81
84
  if (ipType === 4) {
82
- return BLOCK_LIST.check(ip, 'ipv4');
85
+ return blockList.check(ip, 'ipv4');
83
86
  }
84
- return BLOCK_LIST.check(ip, 'ipv6');
87
+ return blockList.check(ip, 'ipv6');
85
88
  }
86
89
  export function normalizeUrl(urlString) {
87
90
  const trimmedUrl = requireTrimmedUrl(urlString);
@@ -217,28 +220,18 @@ const TRANSFORM_RULES = [
217
220
  GITLAB_BLOB_RULE,
218
221
  BITBUCKET_SRC_RULE,
219
222
  ];
223
+ const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
220
224
  function isRawUrl(url) {
221
225
  const lowerUrl = url.toLowerCase();
222
226
  return (lowerUrl.includes('raw.githubusercontent.com') ||
223
227
  lowerUrl.includes('gist.githubusercontent.com') ||
224
228
  lowerUrl.includes('/-/raw/') ||
225
- /bitbucket\.org\/[^/]+\/[^/]+\/raw\//.test(lowerUrl));
229
+ BITBUCKET_RAW_RE.test(lowerUrl));
226
230
  }
227
231
  function getUrlWithoutParams(url) {
228
232
  const hashIndex = url.indexOf('#');
229
233
  const queryIndex = url.indexOf('?');
230
- let endIndex = url.length;
231
- if (queryIndex !== -1) {
232
- if (hashIndex !== -1) {
233
- endIndex = Math.min(queryIndex, hashIndex);
234
- }
235
- else {
236
- endIndex = queryIndex;
237
- }
238
- }
239
- else if (hashIndex !== -1) {
240
- endIndex = hashIndex;
241
- }
234
+ const endIndex = Math.min(queryIndex === -1 ? url.length : queryIndex, hashIndex === -1 ? url.length : hashIndex);
242
235
  const hash = hashIndex !== -1 ? url.slice(hashIndex) : '';
243
236
  return {
244
237
  base: url.slice(0, endIndex),
@@ -314,6 +307,7 @@ function hasKnownRawTextExtension(urlBaseLower) {
314
307
  return false;
315
308
  }
316
309
  const DNS_LOOKUP_TIMEOUT_MS = 5000;
310
+ const SLOW_REQUEST_THRESHOLD_MS = 5000;
317
311
  function normalizeLookupResults(addresses, family) {
318
312
  if (Array.isArray(addresses)) {
319
313
  return addresses;
@@ -422,7 +416,7 @@ function resolveResultOrder(options) {
422
416
  return DEFAULT_DNS_ORDER;
423
417
  }
424
418
  function getLegacyVerbatim(options) {
425
- if (isRecord(options)) {
419
+ if (isObject(options)) {
426
420
  const { verbatim } = options;
427
421
  return typeof verbatim === 'boolean' ? verbatim : undefined;
428
422
  }
@@ -509,6 +503,12 @@ function createRateLimitError(url, headerValue) {
509
503
  function createHttpError(url, status, statusText) {
510
504
  return new FetchError(`HTTP ${status}: ${statusText}`, url, status);
511
505
  }
506
+ function createTooManyRedirectsError(url) {
507
+ return new FetchError('Too many redirects', url);
508
+ }
509
+ function createMissingRedirectLocationError(url) {
510
+ return new FetchError('Redirect response missing Location header', url);
511
+ }
512
512
  function createSizeLimitError(url, maxBytes) {
513
513
  return new FetchError(`Response exceeds maximum size of ${maxBytes} bytes`, url);
514
514
  }
@@ -533,28 +533,36 @@ function getRequestUrl(record) {
533
533
  function resolveErrorUrl(error, fallback) {
534
534
  if (error instanceof FetchError)
535
535
  return error.url;
536
- if (!isRecord(error))
536
+ if (!isObject(error))
537
537
  return fallback;
538
538
  const requestUrl = getRequestUrl(error);
539
539
  if (requestUrl)
540
540
  return requestUrl;
541
541
  return fallback;
542
542
  }
543
- function mapFetchError(error, fallbackUrl, timeoutMs) {
544
- if (error instanceof FetchError)
545
- return error;
546
- const url = resolveErrorUrl(error, fallbackUrl);
547
- if (isAbortError(error)) {
548
- if (isTimeoutError(error)) {
549
- return createTimeoutError(url, timeoutMs);
550
- }
551
- return createCanceledError(url);
543
+ function resolveAbortFetchError(error, url, timeoutMs) {
544
+ if (!isAbortError(error))
545
+ return null;
546
+ if (isTimeoutError(error)) {
547
+ return createTimeoutError(url, timeoutMs);
552
548
  }
549
+ return createCanceledError(url);
550
+ }
551
+ function resolveUnexpectedFetchError(error, url) {
553
552
  if (error instanceof Error) {
554
553
  return createNetworkError(url, error.message);
555
554
  }
556
555
  return createUnknownError(url, 'Unexpected error');
557
556
  }
557
+ function mapFetchError(error, fallbackUrl, timeoutMs) {
558
+ if (error instanceof FetchError)
559
+ return error;
560
+ const url = resolveErrorUrl(error, fallbackUrl);
561
+ const abortError = resolveAbortFetchError(error, url, timeoutMs);
562
+ if (abortError)
563
+ return abortError;
564
+ return resolveUnexpectedFetchError(error, url);
565
+ }
558
566
  const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
559
567
  function publishFetchEvent(event) {
560
568
  if (!fetchChannel.hasSubscribers)
@@ -589,7 +597,7 @@ function buildResponseMetadata(response, contentSize) {
589
597
  return metadata;
590
598
  }
591
599
  function logSlowRequest(context, duration, durationLabel, contextFields) {
592
- if (duration <= 5000)
600
+ if (duration <= SLOW_REQUEST_THRESHOLD_MS)
593
601
  return;
594
602
  logWarn('Slow HTTP request detected', {
595
603
  requestId: context.requestId,
@@ -719,17 +727,17 @@ function assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirect
719
727
  if (redirectCount < redirectLimit)
720
728
  return;
721
729
  cancelResponseBody(response);
722
- throw new FetchError('Too many redirects', currentUrl);
730
+ throw createTooManyRedirectsError(currentUrl);
723
731
  }
724
732
  function getRedirectLocation(response, currentUrl) {
725
733
  const location = response.headers.get('location');
726
734
  if (location)
727
735
  return location;
728
736
  cancelResponseBody(response);
729
- throw new FetchError('Redirect response missing Location header', currentUrl);
737
+ throw createMissingRedirectLocationError(currentUrl);
730
738
  }
731
739
  function annotateRedirectError(error, url) {
732
- if (!isRecord(error))
740
+ if (!isObject(error))
733
741
  return;
734
742
  error.requestUrl = url;
735
743
  }
@@ -743,26 +751,26 @@ function resolveRedirectTarget(baseUrl, location) {
743
751
  }
744
752
  return validateAndNormalizeUrl(resolved.href);
745
753
  }
754
+ async function withRedirectErrorContext(url, fn) {
755
+ try {
756
+ return await fn();
757
+ }
758
+ catch (error) {
759
+ annotateRedirectError(error, url);
760
+ throw error;
761
+ }
762
+ }
746
763
  export async function fetchWithRedirects(url, init, maxRedirects) {
747
764
  let currentUrl = url;
748
765
  const redirectLimit = Math.max(0, maxRedirects);
749
766
  for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
750
- const { response, nextUrl } = await performFetchCycleSafely(currentUrl, init, redirectLimit, redirectCount);
767
+ const { response, nextUrl } = await withRedirectErrorContext(currentUrl, () => performFetchCycle(currentUrl, init, redirectLimit, redirectCount));
751
768
  if (!nextUrl) {
752
769
  return { response, url: currentUrl };
753
770
  }
754
771
  currentUrl = nextUrl;
755
772
  }
756
- throw new FetchError('Too many redirects', currentUrl);
757
- }
758
- async function performFetchCycleSafely(currentUrl, init, redirectLimit, redirectCount) {
759
- try {
760
- return await performFetchCycle(currentUrl, init, redirectLimit, redirectCount);
761
- }
762
- catch (error) {
763
- annotateRedirectError(error, currentUrl);
764
- throw error;
765
- }
773
+ throw createTooManyRedirectsError(currentUrl);
766
774
  }
767
775
  function assertContentLengthWithinLimit(response, url, maxBytes) {
768
776
  const contentLengthHeader = response.headers.get('content-length');
@@ -847,15 +855,18 @@ async function readStreamWithLimit(stream, url, maxBytes, signal) {
847
855
  finalizeRead(state);
848
856
  return { text: state.parts.join(''), size: state.total };
849
857
  }
858
+ async function readResponseTextFallback(response, url, maxBytes) {
859
+ const text = await response.text();
860
+ const size = Buffer.byteLength(text);
861
+ if (size > maxBytes) {
862
+ throw createSizeLimitError(url, maxBytes);
863
+ }
864
+ return { text, size };
865
+ }
850
866
  export async function readResponseText(response, url, maxBytes, signal) {
851
867
  assertContentLengthWithinLimit(response, url, maxBytes);
852
868
  if (!response.body) {
853
- const text = await response.text();
854
- const size = Buffer.byteLength(text);
855
- if (size > maxBytes) {
856
- throw createSizeLimitError(url, maxBytes);
857
- }
858
- return { text, size };
869
+ return readResponseTextFallback(response, url, maxBytes);
859
870
  }
860
871
  return readStreamWithLimit(response.body, url, maxBytes, signal);
861
872
  }
@@ -931,4 +942,3 @@ export async function fetchNormalizedUrl(normalizedUrl, options) {
931
942
  const requestInit = buildRequestInit(headers, signal);
932
943
  return fetchWithTelemetry(normalizedUrl, requestInit, timeoutMs);
933
944
  }
934
- //# sourceMappingURL=fetch.js.map
@@ -0,0 +1 @@
1
+ export declare function normalizeHost(value: string): string | null;
@@ -0,0 +1,47 @@
1
+ import { isIP } from 'node:net';
2
+ export function normalizeHost(value) {
3
+ const trimmed = value.trim().toLowerCase();
4
+ if (!trimmed)
5
+ return null;
6
+ const first = takeFirstHostValue(trimmed);
7
+ if (!first)
8
+ return null;
9
+ const ipv6 = stripIpv6Brackets(first);
10
+ if (ipv6)
11
+ return stripTrailingDots(ipv6);
12
+ if (isIpV6Literal(first)) {
13
+ return stripTrailingDots(first);
14
+ }
15
+ return stripTrailingDots(stripPortIfPresent(first));
16
+ }
17
+ function takeFirstHostValue(value) {
18
+ const first = value.split(',')[0];
19
+ if (!first)
20
+ return null;
21
+ const trimmed = first.trim();
22
+ return trimmed ? trimmed : null;
23
+ }
24
+ function stripIpv6Brackets(value) {
25
+ if (!value.startsWith('['))
26
+ return null;
27
+ const end = value.indexOf(']');
28
+ if (end === -1)
29
+ return null;
30
+ return value.slice(1, end);
31
+ }
32
+ function stripPortIfPresent(value) {
33
+ const colonIndex = value.indexOf(':');
34
+ if (colonIndex === -1)
35
+ return value;
36
+ return value.slice(0, colonIndex);
37
+ }
38
+ function isIpV6Literal(value) {
39
+ return isIP(value) === 6;
40
+ }
41
+ function stripTrailingDots(value) {
42
+ let result = value;
43
+ while (result.endsWith('.')) {
44
+ result = result.slice(0, -1);
45
+ }
46
+ return result;
47
+ }
@@ -0,0 +1,5 @@
1
+ export declare function startHttpServer(): Promise<{
2
+ shutdown: (signal: string) => Promise<void>;
3
+ port: number;
4
+ host: string;
5
+ }>;