@j0hanz/superfetch 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +120 -38
  2. package/dist/cache.d.ts +42 -0
  3. package/dist/cache.js +565 -0
  4. package/dist/config/env-parsers.d.ts +1 -0
  5. package/dist/config/env-parsers.js +12 -0
  6. package/dist/config/index.d.ts +7 -0
  7. package/dist/config/index.js +10 -3
  8. package/dist/config/types/content.d.ts +1 -0
  9. package/dist/config.d.ts +77 -0
  10. package/dist/config.js +261 -0
  11. package/dist/crypto.d.ts +2 -0
  12. package/dist/crypto.js +32 -0
  13. package/dist/errors.d.ts +10 -0
  14. package/dist/errors.js +28 -0
  15. package/dist/fetch.d.ts +40 -0
  16. package/dist/fetch.js +910 -0
  17. package/dist/http/base-middleware.d.ts +7 -0
  18. package/dist/http/base-middleware.js +143 -0
  19. package/dist/http/cors.d.ts +0 -5
  20. package/dist/http/cors.js +0 -6
  21. package/dist/http/download-routes.js +6 -2
  22. package/dist/http/error-handler.d.ts +2 -0
  23. package/dist/http/error-handler.js +55 -0
  24. package/dist/http/mcp-routes.js +2 -2
  25. package/dist/http/mcp-sessions.d.ts +3 -5
  26. package/dist/http/mcp-sessions.js +8 -8
  27. package/dist/http/server-tuning.d.ts +9 -0
  28. package/dist/http/server-tuning.js +45 -0
  29. package/dist/http/server.d.ts +0 -10
  30. package/dist/http/server.js +33 -333
  31. package/dist/http.d.ts +78 -0
  32. package/dist/http.js +1437 -0
  33. package/dist/index.js +3 -3
  34. package/dist/mcp.d.ts +3 -0
  35. package/dist/mcp.js +94 -0
  36. package/dist/observability.d.ts +16 -0
  37. package/dist/observability.js +78 -0
  38. package/dist/server.js +20 -5
  39. package/dist/services/cache.d.ts +1 -1
  40. package/dist/services/context.d.ts +2 -0
  41. package/dist/services/context.js +3 -0
  42. package/dist/services/extractor.d.ts +1 -0
  43. package/dist/services/extractor.js +28 -2
  44. package/dist/services/fetcher.d.ts +2 -0
  45. package/dist/services/fetcher.js +35 -14
  46. package/dist/services/logger.js +4 -1
  47. package/dist/services/telemetry.d.ts +19 -0
  48. package/dist/services/telemetry.js +43 -0
  49. package/dist/services/transform-worker-pool.d.ts +10 -3
  50. package/dist/services/transform-worker-pool.js +213 -184
  51. package/dist/tools/handlers/fetch-url.tool.js +8 -6
  52. package/dist/tools/index.d.ts +1 -0
  53. package/dist/tools/index.js +13 -1
  54. package/dist/tools/schemas.d.ts +2 -0
  55. package/dist/tools/schemas.js +8 -0
  56. package/dist/tools/utils/content-transform-core.d.ts +5 -0
  57. package/dist/tools/utils/content-transform-core.js +180 -0
  58. package/dist/tools/utils/content-transform-workers.d.ts +1 -0
  59. package/dist/tools/utils/content-transform-workers.js +1 -0
  60. package/dist/tools/utils/content-transform.d.ts +3 -5
  61. package/dist/tools/utils/content-transform.js +35 -148
  62. package/dist/tools/utils/raw-markdown.js +15 -1
  63. package/dist/tools.d.ts +104 -0
  64. package/dist/tools.js +421 -0
  65. package/dist/transform.d.ts +69 -0
  66. package/dist/transform.js +1509 -0
  67. package/dist/transformers/markdown.d.ts +4 -1
  68. package/dist/transformers/markdown.js +182 -53
  69. package/dist/utils/cancellation.d.ts +1 -0
  70. package/dist/utils/cancellation.js +18 -0
  71. package/dist/utils/code-language.d.ts +0 -9
  72. package/dist/utils/code-language.js +5 -5
  73. package/dist/utils/host-normalizer.d.ts +1 -0
  74. package/dist/utils/host-normalizer.js +37 -0
  75. package/dist/utils/url-redactor.d.ts +1 -0
  76. package/dist/utils/url-redactor.js +13 -0
  77. package/dist/utils/url-validator.js +8 -5
  78. package/dist/workers/transform-worker.js +82 -38
  79. package/package.json +8 -7
@@ -0,0 +1,77 @@
1
+ export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
2
+ interface AuthConfig {
3
+ mode: 'oauth' | 'static';
4
+ issuerUrl: URL | undefined;
5
+ authorizationUrl: URL | undefined;
6
+ tokenUrl: URL | undefined;
7
+ revocationUrl: URL | undefined;
8
+ registrationUrl: URL | undefined;
9
+ introspectionUrl: URL | undefined;
10
+ resourceUrl: URL;
11
+ requiredScopes: string[];
12
+ clientId: string | undefined;
13
+ clientSecret: string | undefined;
14
+ introspectionTimeoutMs: number;
15
+ staticTokens: string[];
16
+ }
17
+ interface RuntimeState {
18
+ httpMode: boolean;
19
+ }
20
+ export declare const config: {
21
+ server: {
22
+ name: string;
23
+ version: string;
24
+ port: number;
25
+ host: string;
26
+ sessionTtlMs: number;
27
+ sessionInitTimeoutMs: number;
28
+ maxSessions: number;
29
+ http: {
30
+ headersTimeoutMs: number | undefined;
31
+ requestTimeoutMs: number | undefined;
32
+ keepAliveTimeoutMs: number | undefined;
33
+ shutdownCloseIdleConnections: boolean;
34
+ shutdownCloseAllConnections: boolean;
35
+ };
36
+ };
37
+ fetcher: {
38
+ timeout: number;
39
+ maxRedirects: number;
40
+ userAgent: string;
41
+ maxContentLength: number;
42
+ };
43
+ cache: {
44
+ enabled: boolean;
45
+ ttl: number;
46
+ maxKeys: number;
47
+ };
48
+ extraction: {
49
+ maxBlockLength: number;
50
+ minParagraphLength: number;
51
+ };
52
+ logging: {
53
+ level: LogLevel;
54
+ };
55
+ constants: {
56
+ maxHtmlSize: number;
57
+ maxUrlLength: number;
58
+ maxInlineContentChars: number;
59
+ };
60
+ security: {
61
+ blockedHosts: Set<string>;
62
+ blockedIpPatterns: RegExp[];
63
+ allowedHosts: Set<string>;
64
+ apiKey: string | undefined;
65
+ allowRemote: boolean;
66
+ };
67
+ auth: AuthConfig;
68
+ rateLimit: {
69
+ enabled: boolean;
70
+ maxRequests: number;
71
+ windowMs: number;
72
+ cleanupIntervalMs: number;
73
+ };
74
+ runtime: RuntimeState;
75
+ };
76
+ export declare function enableHttpMode(): void;
77
+ export {};
package/dist/config.js ADDED
@@ -0,0 +1,261 @@
1
+ import packageJson from '../package.json' with { type: 'json' };
2
+ function buildIpv4(parts) {
3
+ return parts.join('.');
4
+ }
5
+ function formatHostForUrl(hostname) {
6
+ if (hostname.includes(':') && !hostname.startsWith('[')) {
7
+ return `[${hostname}]`;
8
+ }
9
+ return hostname;
10
+ }
11
+ function normalizeHostValue(value) {
12
+ const trimmed = value.trim().toLowerCase();
13
+ if (!trimmed)
14
+ return null;
15
+ if (trimmed.startsWith('[')) {
16
+ const end = trimmed.indexOf(']');
17
+ if (end === -1)
18
+ return null;
19
+ return trimmed.slice(1, end);
20
+ }
21
+ const colonIndex = trimmed.indexOf(':');
22
+ if (colonIndex !== -1) {
23
+ return trimmed.slice(0, colonIndex);
24
+ }
25
+ return trimmed;
26
+ }
27
+ const ALLOWED_LOG_LEVELS = new Set([
28
+ 'debug',
29
+ 'info',
30
+ 'warn',
31
+ 'error',
32
+ ]);
33
+ function isLogLevel(value) {
34
+ return ALLOWED_LOG_LEVELS.has(value);
35
+ }
36
+ function isBelowMin(value, min) {
37
+ if (min === undefined)
38
+ return false;
39
+ return value < min;
40
+ }
41
+ function isAboveMax(value, max) {
42
+ if (max === undefined)
43
+ return false;
44
+ return value > max;
45
+ }
46
+ function parseInteger(envValue, defaultValue, min, max) {
47
+ if (!envValue)
48
+ return defaultValue;
49
+ const parsed = parseInt(envValue, 10);
50
+ if (Number.isNaN(parsed))
51
+ return defaultValue;
52
+ if (isBelowMin(parsed, min))
53
+ return defaultValue;
54
+ if (isAboveMax(parsed, max))
55
+ return defaultValue;
56
+ return parsed;
57
+ }
58
+ function parseOptionalInteger(envValue, min, max) {
59
+ if (!envValue)
60
+ return undefined;
61
+ const parsed = parseInt(envValue, 10);
62
+ if (Number.isNaN(parsed))
63
+ return undefined;
64
+ if (isBelowMin(parsed, min))
65
+ return undefined;
66
+ if (isAboveMax(parsed, max))
67
+ return undefined;
68
+ return parsed;
69
+ }
70
+ function parseBoolean(envValue, defaultValue) {
71
+ if (!envValue)
72
+ return defaultValue;
73
+ return envValue !== 'false';
74
+ }
75
+ function parseList(envValue) {
76
+ if (!envValue)
77
+ return [];
78
+ return envValue
79
+ .split(/[\s,]+/)
80
+ .map((entry) => entry.trim())
81
+ .filter((entry) => entry.length > 0);
82
+ }
83
+ function parseUrlEnv(value, name) {
84
+ if (!value)
85
+ return undefined;
86
+ if (!URL.canParse(value)) {
87
+ throw new Error(`Invalid ${name} value: ${value}`);
88
+ }
89
+ return new URL(value);
90
+ }
91
+ function parseAllowedHosts(envValue) {
92
+ const hosts = new Set();
93
+ for (const entry of parseList(envValue)) {
94
+ const normalized = normalizeHostValue(entry);
95
+ if (normalized) {
96
+ hosts.add(normalized);
97
+ }
98
+ }
99
+ return hosts;
100
+ }
101
+ function parseLogLevel(envValue) {
102
+ const level = envValue?.toLowerCase();
103
+ if (!level)
104
+ return 'info';
105
+ return isLogLevel(level) ? level : 'info';
106
+ }
107
+ const SIZE_LIMITS = {
108
+ TEN_MB: 10 * 1024 * 1024,
109
+ };
110
+ const TIMEOUT = {
111
+ DEFAULT_FETCH_TIMEOUT_MS: 15000,
112
+ DEFAULT_SESSION_TTL_MS: 30 * 60 * 1000,
113
+ };
114
+ function readCoreOAuthUrls() {
115
+ return {
116
+ issuerUrl: parseUrlEnv(process.env.OAUTH_ISSUER_URL, 'OAUTH_ISSUER_URL'),
117
+ authorizationUrl: parseUrlEnv(process.env.OAUTH_AUTHORIZATION_URL, 'OAUTH_AUTHORIZATION_URL'),
118
+ tokenUrl: parseUrlEnv(process.env.OAUTH_TOKEN_URL, 'OAUTH_TOKEN_URL'),
119
+ };
120
+ }
121
+ function readOptionalOAuthUrls(baseUrl) {
122
+ return {
123
+ revocationUrl: parseUrlEnv(process.env.OAUTH_REVOCATION_URL, 'OAUTH_REVOCATION_URL'),
124
+ registrationUrl: parseUrlEnv(process.env.OAUTH_REGISTRATION_URL, 'OAUTH_REGISTRATION_URL'),
125
+ introspectionUrl: parseUrlEnv(process.env.OAUTH_INTROSPECTION_URL, 'OAUTH_INTROSPECTION_URL'),
126
+ resourceUrl: parseUrlEnv(process.env.OAUTH_RESOURCE_URL, 'OAUTH_RESOURCE_URL') ??
127
+ new URL('/mcp', baseUrl),
128
+ };
129
+ }
130
+ function readOAuthUrls(baseUrl) {
131
+ return { ...readCoreOAuthUrls(), ...readOptionalOAuthUrls(baseUrl) };
132
+ }
133
+ function resolveAuthMode(authModeEnv, urls) {
134
+ if (authModeEnv === 'oauth')
135
+ return 'oauth';
136
+ if (authModeEnv === 'static')
137
+ return 'static';
138
+ const oauthConfigured = [
139
+ urls.issuerUrl,
140
+ urls.authorizationUrl,
141
+ urls.tokenUrl,
142
+ urls.introspectionUrl,
143
+ ].some((value) => value !== undefined);
144
+ return oauthConfigured ? 'oauth' : 'static';
145
+ }
146
+ function collectStaticTokens() {
147
+ const staticTokens = new Set(parseList(process.env.ACCESS_TOKENS));
148
+ if (process.env.API_KEY) {
149
+ staticTokens.add(process.env.API_KEY);
150
+ }
151
+ return Array.from(staticTokens);
152
+ }
153
+ function buildAuthConfig(baseUrl) {
154
+ const urls = readOAuthUrls(baseUrl);
155
+ const mode = resolveAuthMode(process.env.AUTH_MODE?.toLowerCase(), urls);
156
+ return {
157
+ mode,
158
+ ...urls,
159
+ requiredScopes: parseList(process.env.OAUTH_REQUIRED_SCOPES),
160
+ clientId: process.env.OAUTH_CLIENT_ID,
161
+ clientSecret: process.env.OAUTH_CLIENT_SECRET,
162
+ introspectionTimeoutMs: parseInteger(process.env.OAUTH_INTROSPECTION_TIMEOUT_MS, 5000, 1000, 30000),
163
+ staticTokens: collectStaticTokens(),
164
+ };
165
+ }
166
+ const LOOPBACK_V4 = buildIpv4([127, 0, 0, 1]);
167
+ const ANY_V4 = buildIpv4([0, 0, 0, 0]);
168
+ const METADATA_V4_AWS = buildIpv4([169, 254, 169, 254]);
169
+ const METADATA_V4_AZURE = buildIpv4([100, 100, 100, 200]);
170
+ const host = process.env.HOST ?? LOOPBACK_V4;
171
+ const port = parseInteger(process.env.PORT, 3000, 1024, 65535);
172
+ const baseUrl = new URL(`http://${formatHostForUrl(host)}:${port}`);
173
+ const allowRemote = parseBoolean(process.env.ALLOW_REMOTE, false);
174
+ const runtimeState = {
175
+ httpMode: false,
176
+ };
177
+ export const config = {
178
+ server: {
179
+ name: 'superFetch',
180
+ version: packageJson.version,
181
+ port,
182
+ host,
183
+ sessionTtlMs: TIMEOUT.DEFAULT_SESSION_TTL_MS,
184
+ sessionInitTimeoutMs: 10000,
185
+ maxSessions: 200,
186
+ http: {
187
+ headersTimeoutMs: parseOptionalInteger(process.env.SERVER_HEADERS_TIMEOUT_MS, 1000, 600000),
188
+ requestTimeoutMs: parseOptionalInteger(process.env.SERVER_REQUEST_TIMEOUT_MS, 1000, 600000),
189
+ keepAliveTimeoutMs: parseOptionalInteger(process.env.SERVER_KEEP_ALIVE_TIMEOUT_MS, 1000, 600000),
190
+ shutdownCloseIdleConnections: parseBoolean(process.env.SERVER_SHUTDOWN_CLOSE_IDLE, false),
191
+ shutdownCloseAllConnections: parseBoolean(process.env.SERVER_SHUTDOWN_CLOSE_ALL, false),
192
+ },
193
+ },
194
+ fetcher: {
195
+ timeout: TIMEOUT.DEFAULT_FETCH_TIMEOUT_MS,
196
+ maxRedirects: 5,
197
+ userAgent: process.env.USER_AGENT ?? 'superFetch-MCP/2.0',
198
+ maxContentLength: SIZE_LIMITS.TEN_MB,
199
+ },
200
+ cache: {
201
+ enabled: parseBoolean(process.env.CACHE_ENABLED, true),
202
+ ttl: parseInteger(process.env.CACHE_TTL, 3600, 60, 86400),
203
+ maxKeys: 100,
204
+ },
205
+ extraction: {
206
+ maxBlockLength: 5000,
207
+ minParagraphLength: 10,
208
+ },
209
+ logging: {
210
+ level: parseLogLevel(process.env.LOG_LEVEL),
211
+ },
212
+ constants: {
213
+ maxHtmlSize: SIZE_LIMITS.TEN_MB,
214
+ maxUrlLength: 2048,
215
+ maxInlineContentChars: 20000,
216
+ },
217
+ security: {
218
+ blockedHosts: new Set([
219
+ 'localhost',
220
+ LOOPBACK_V4,
221
+ ANY_V4,
222
+ '::1',
223
+ METADATA_V4_AWS,
224
+ 'metadata.google.internal',
225
+ 'metadata.azure.com',
226
+ METADATA_V4_AZURE,
227
+ 'instance-data',
228
+ ]),
229
+ blockedIpPatterns: [
230
+ /^10\./,
231
+ /^172\.(1[6-9]|2\d|3[01])\./,
232
+ /^192\.168\./,
233
+ /^127\./,
234
+ /^0\./,
235
+ /^169\.254\./,
236
+ /^100\.64\./,
237
+ /^fc00:/i,
238
+ /^fd00:/i,
239
+ /^fe80:/i,
240
+ /^::ffff:127\./,
241
+ /^::ffff:10\./,
242
+ /^::ffff:172\.(1[6-9]|2\d|3[01])\./,
243
+ /^::ffff:192\.168\./,
244
+ /^::ffff:169\.254\./,
245
+ ],
246
+ allowedHosts: parseAllowedHosts(process.env.ALLOWED_HOSTS),
247
+ apiKey: process.env.API_KEY,
248
+ allowRemote,
249
+ },
250
+ auth: buildAuthConfig(baseUrl),
251
+ rateLimit: {
252
+ enabled: true,
253
+ maxRequests: 100,
254
+ windowMs: 60000,
255
+ cleanupIntervalMs: 60000,
256
+ },
257
+ runtime: runtimeState,
258
+ };
259
+ export function enableHttpMode() {
260
+ runtimeState.httpMode = true;
261
+ }
@@ -0,0 +1,2 @@
1
+ export declare function timingSafeEqualUtf8(a: string, b: string): boolean;
2
+ export declare function sha256Hex(input: string | Uint8Array): string;
package/dist/crypto.js ADDED
@@ -0,0 +1,32 @@
1
+ import { createHash, hash as oneShotHash, timingSafeEqual } from 'node:crypto';
2
+ const MAX_HASH_INPUT_BYTES = 5 * 1024 * 1024;
3
+ const ALLOWED_HASH_ALGORITHMS = new Set([
4
+ 'sha256',
5
+ 'sha512',
6
+ ]);
7
+ function byteLength(input) {
8
+ return typeof input === 'string'
9
+ ? Buffer.byteLength(input, 'utf8')
10
+ : input.byteLength;
11
+ }
12
+ export function timingSafeEqualUtf8(a, b) {
13
+ const aBuffer = Buffer.from(a, 'utf8');
14
+ const bBuffer = Buffer.from(b, 'utf8');
15
+ if (aBuffer.length !== bBuffer.length)
16
+ return false;
17
+ return timingSafeEqual(aBuffer, bBuffer);
18
+ }
19
+ function hashHex(algorithm, input) {
20
+ if (!ALLOWED_HASH_ALGORITHMS.has(algorithm)) {
21
+ throw new Error(`Hash algorithm not allowed: ${algorithm}`);
22
+ }
23
+ if (byteLength(input) <= MAX_HASH_INPUT_BYTES) {
24
+ return oneShotHash(algorithm, input, 'hex');
25
+ }
26
+ const hasher = createHash(algorithm);
27
+ hasher.update(input);
28
+ return hasher.digest('hex');
29
+ }
30
+ export function sha256Hex(input) {
31
+ return hashHex('sha256', input);
32
+ }
@@ -0,0 +1,10 @@
1
+ export declare class FetchError extends Error {
2
+ readonly url: string;
3
+ readonly statusCode: number;
4
+ readonly code: string;
5
+ readonly details: Readonly<Record<string, unknown>>;
6
+ constructor(message: string, url: string, httpStatus?: number, details?: Record<string, unknown>);
7
+ }
8
+ export declare function getErrorMessage(error: unknown): string;
9
+ export declare function createErrorWithCode(message: string, code: string): NodeJS.ErrnoException;
10
+ export declare function isSystemError(error: unknown): error is NodeJS.ErrnoException;
package/dist/errors.js ADDED
@@ -0,0 +1,28 @@
1
+ const DEFAULT_HTTP_STATUS = 502;
2
+ export class FetchError extends Error {
3
+ url;
4
+ statusCode;
5
+ code;
6
+ details;
7
+ constructor(message, url, httpStatus, details = {}) {
8
+ super(message);
9
+ this.url = url;
10
+ this.name = 'FetchError';
11
+ this.statusCode = httpStatus ?? DEFAULT_HTTP_STATUS;
12
+ this.code = httpStatus ? `HTTP_${httpStatus}` : 'FETCH_ERROR';
13
+ this.details = Object.freeze({ url, httpStatus, ...details });
14
+ Error.captureStackTrace(this, this.constructor);
15
+ }
16
+ }
17
+ export function getErrorMessage(error) {
18
+ return error instanceof Error ? error.message : 'Unknown error';
19
+ }
20
+ export function createErrorWithCode(message, code) {
21
+ const error = new Error(message);
22
+ return Object.assign(error, { code });
23
+ }
24
+ export function isSystemError(error) {
25
+ return (error instanceof Error &&
26
+ 'code' in error &&
27
+ typeof Reflect.get(error, 'code') === 'string');
28
+ }
@@ -0,0 +1,40 @@
1
+ import type { Dispatcher } from 'undici';
2
+ export interface FetchOptions {
3
+ signal?: AbortSignal;
4
+ }
5
+ export declare function isBlockedIp(ip: string): boolean;
6
+ export declare function normalizeUrl(urlString: string): {
7
+ normalizedUrl: string;
8
+ hostname: string;
9
+ };
10
+ export declare function validateAndNormalizeUrl(urlString: string): string;
11
+ export interface TransformResult {
12
+ readonly url: string;
13
+ readonly transformed: boolean;
14
+ readonly platform?: string;
15
+ }
16
+ export declare function transformToRawUrl(url: string): TransformResult;
17
+ export declare function isRawTextContentUrl(url: string): boolean;
18
+ export declare const dispatcher: Dispatcher;
19
+ export declare function destroyAgents(): void;
20
+ interface FetchTelemetryContext {
21
+ requestId: string;
22
+ startTime: number;
23
+ url: string;
24
+ method: string;
25
+ contextRequestId?: string;
26
+ operationId?: string;
27
+ }
28
+ export declare function startFetchTelemetry(url: string, method: string): FetchTelemetryContext;
29
+ export declare function recordFetchResponse(context: FetchTelemetryContext, response: Response, contentSize?: number): void;
30
+ export declare function recordFetchError(context: FetchTelemetryContext, error: unknown, status?: number): void;
31
+ export declare function fetchWithRedirects(url: string, init: RequestInit, maxRedirects: number): Promise<{
32
+ response: Response;
33
+ url: string;
34
+ }>;
35
+ export declare function readResponseText(response: Response, url: string, maxBytes: number, signal?: AbortSignal): Promise<{
36
+ text: string;
37
+ size: number;
38
+ }>;
39
+ export declare function fetchNormalizedUrl(normalizedUrl: string, options?: FetchOptions): Promise<string>;
40
+ export {};