@j0hanz/superfetch 2.5.3 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +350 -226
  2. package/dist/assets/logo.svg +24837 -24835
  3. package/dist/cache.d.ts +28 -20
  4. package/dist/cache.js +292 -514
  5. package/dist/config.d.ts +41 -7
  6. package/dist/config.js +298 -148
  7. package/dist/crypto.js +25 -12
  8. package/dist/dom-noise-removal.js +379 -421
  9. package/dist/errors.d.ts +2 -2
  10. package/dist/errors.js +25 -8
  11. package/dist/fetch.d.ts +18 -16
  12. package/dist/fetch.js +1132 -526
  13. package/dist/host-normalization.js +40 -10
  14. package/dist/http-native.js +628 -287
  15. package/dist/index.js +67 -7
  16. package/dist/instructions.md +44 -31
  17. package/dist/ip-blocklist.d.ts +8 -0
  18. package/dist/ip-blocklist.js +65 -0
  19. package/dist/json.js +14 -9
  20. package/dist/language-detection.d.ts +2 -11
  21. package/dist/language-detection.js +289 -280
  22. package/dist/markdown-cleanup.d.ts +0 -1
  23. package/dist/markdown-cleanup.js +391 -429
  24. package/dist/mcp-validator.js +4 -2
  25. package/dist/mcp.js +184 -135
  26. package/dist/observability.js +89 -21
  27. package/dist/resources.js +16 -6
  28. package/dist/server-tuning.d.ts +2 -0
  29. package/dist/server-tuning.js +25 -23
  30. package/dist/session.d.ts +1 -0
  31. package/dist/session.js +41 -33
  32. package/dist/tasks.d.ts +2 -0
  33. package/dist/tasks.js +91 -9
  34. package/dist/timer-utils.d.ts +5 -0
  35. package/dist/timer-utils.js +20 -0
  36. package/dist/tools.d.ts +28 -5
  37. package/dist/tools.js +317 -183
  38. package/dist/transform-types.d.ts +5 -1
  39. package/dist/transform.d.ts +3 -2
  40. package/dist/transform.js +1138 -421
  41. package/dist/type-guards.d.ts +1 -0
  42. package/dist/type-guards.js +7 -0
  43. package/dist/workers/transform-child.d.ts +1 -0
  44. package/dist/workers/transform-child.js +118 -0
  45. package/dist/workers/transform-worker.js +87 -78
  46. package/package.json +14 -6
package/dist/config.d.ts CHANGED
@@ -1,8 +1,17 @@
1
1
  export declare const serverVersion: string;
2
2
  export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
3
- export type TransformMetadataFormat = 'markdown' | 'frontmatter';
3
+ /** Hardcoded to 'markdown'. Type retained for consumer compatibility. */
4
+ export type TransformMetadataFormat = 'markdown';
5
+ export type TransformWorkerMode = 'threads' | 'process';
6
+ type AuthMode = 'oauth' | 'static';
7
+ interface WorkerResourceLimits {
8
+ maxOldGenerationSizeMb?: number;
9
+ maxYoungGenerationSizeMb?: number;
10
+ codeRangeSizeMb?: number;
11
+ stackSizeMb?: number;
12
+ }
4
13
  interface AuthConfig {
5
- mode: 'oauth' | 'static';
14
+ mode: AuthMode;
6
15
  issuerUrl: URL | undefined;
7
16
  authorizationUrl: URL | undefined;
8
17
  tokenUrl: URL | undefined;
@@ -29,9 +38,11 @@ export declare const config: {
29
38
  sessionInitTimeoutMs: number;
30
39
  maxSessions: number;
31
40
  http: {
32
- headersTimeoutMs: number | undefined;
33
- requestTimeoutMs: number | undefined;
34
- keepAliveTimeoutMs: number | undefined;
41
+ headersTimeoutMs: undefined;
42
+ requestTimeoutMs: undefined;
43
+ keepAliveTimeoutMs: undefined;
44
+ maxConnections: number;
45
+ blockPrivateConnections: boolean;
35
46
  shutdownCloseIdleConnections: boolean;
36
47
  shutdownCloseAllConnections: boolean;
37
48
  };
@@ -45,8 +56,10 @@ export declare const config: {
45
56
  transform: {
46
57
  timeoutMs: number;
47
58
  stageWarnRatio: number;
48
- metadataFormat: TransformMetadataFormat;
59
+ metadataFormat: string;
49
60
  maxWorkerScale: number;
61
+ workerMode: TransformWorkerMode;
62
+ workerResourceLimits: WorkerResourceLimits | undefined;
50
63
  };
51
64
  tools: {
52
65
  enabled: string[];
@@ -64,6 +77,27 @@ export declare const config: {
64
77
  noiseRemoval: {
65
78
  extraTokens: string[];
66
79
  extraSelectors: string[];
80
+ enabledCategories: string[];
81
+ debug: boolean;
82
+ aggressiveMode: boolean;
83
+ preserveSvgCanvas: boolean;
84
+ weights: {
85
+ hidden: number;
86
+ structural: number;
87
+ promo: number;
88
+ stickyFixed: number;
89
+ threshold: number;
90
+ };
91
+ };
92
+ markdownCleanup: {
93
+ promoteOrphanHeadings: boolean;
94
+ removeSkipLinks: boolean;
95
+ removeTocBlocks: boolean;
96
+ removeTypeDocComments: boolean;
97
+ headingKeywords: string[];
98
+ };
99
+ i18n: {
100
+ locale: string | undefined;
67
101
  };
68
102
  logging: {
69
103
  level: LogLevel;
@@ -75,7 +109,7 @@ export declare const config: {
75
109
  };
76
110
  security: {
77
111
  blockedHosts: Set<string>;
78
- blockedIpPatterns: readonly [RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp, RegExp];
112
+ blockedIpPatterns: readonly RegExp[];
79
113
  blockedIpPattern: RegExp;
80
114
  blockedIpv4MappedPattern: RegExp;
81
115
  allowedHosts: Set<string>;
package/dist/config.js CHANGED
@@ -1,62 +1,140 @@
1
- import packageJson from '../package.json' with { type: 'json' };
1
+ import { readFileSync } from 'node:fs';
2
+ import { findPackageJSON } from 'node:module';
3
+ import { isIP } from 'node:net';
4
+ import process from 'node:process';
5
+ import { domainToASCII } from 'node:url';
6
+ const packageJsonPath = findPackageJSON(import.meta.url);
7
+ if (!packageJsonPath) {
8
+ throw new Error('package.json not found');
9
+ }
10
+ const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
11
+ if (typeof packageJson.version !== 'string') {
12
+ throw new Error('package.json version is missing');
13
+ }
2
14
  export const serverVersion = packageJson.version;
15
+ const LOG_LEVELS = ['debug', 'info', 'warn', 'error'];
16
+ const DEFAULT_HEADING_KEYWORDS = [
17
+ 'overview',
18
+ 'introduction',
19
+ 'summary',
20
+ 'conclusion',
21
+ 'prerequisites',
22
+ 'requirements',
23
+ 'installation',
24
+ 'configuration',
25
+ 'usage',
26
+ 'features',
27
+ 'limitations',
28
+ 'troubleshooting',
29
+ 'faq',
30
+ 'resources',
31
+ 'references',
32
+ 'changelog',
33
+ 'license',
34
+ 'acknowledgments',
35
+ 'appendix',
36
+ ];
37
+ function isMissingEnvFileError(error) {
38
+ if (!error || typeof error !== 'object')
39
+ return false;
40
+ const { code } = error;
41
+ return code === 'ENOENT' || code === 'ERR_ENV_FILE_NOT_FOUND';
42
+ }
43
+ function loadEnvFileIfAvailable() {
44
+ if (typeof process.loadEnvFile !== 'function')
45
+ return;
46
+ try {
47
+ process.loadEnvFile();
48
+ }
49
+ catch (error) {
50
+ if (isMissingEnvFileError(error))
51
+ return;
52
+ throw error;
53
+ }
54
+ }
55
+ loadEnvFileIfAvailable();
56
+ const { env } = process;
57
+ class ConfigError extends Error {
58
+ name = 'ConfigError';
59
+ }
3
60
  function buildIpv4(parts) {
4
61
  return parts.join('.');
5
62
  }
6
63
  function formatHostForUrl(hostname) {
7
- if (hostname.includes(':') && !hostname.startsWith('[')) {
64
+ if (hostname.includes(':') && !hostname.startsWith('['))
8
65
  return `[${hostname}]`;
9
- }
10
66
  return hostname;
11
67
  }
12
- function normalizeHostValue(value) {
13
- const trimmed = value.trim().toLowerCase();
68
+ function stripTrailingDots(value) {
69
+ let result = value;
70
+ while (result.endsWith('.'))
71
+ result = result.slice(0, -1);
72
+ return result;
73
+ }
74
+ function normalizeHostname(value) {
75
+ const trimmed = value.trim();
14
76
  if (!trimmed)
15
77
  return null;
16
- if (trimmed.startsWith('[')) {
17
- const end = trimmed.indexOf(']');
18
- if (end === -1)
78
+ const lowered = trimmed.toLowerCase();
79
+ const ipType = isIP(lowered);
80
+ if (ipType)
81
+ return stripTrailingDots(lowered);
82
+ const ascii = domainToASCII(lowered);
83
+ return ascii ? stripTrailingDots(ascii) : null;
84
+ }
85
+ function normalizeHostValue(value) {
86
+ const raw = value.trim();
87
+ if (!raw)
88
+ return null;
89
+ if (raw.includes('://')) {
90
+ if (!URL.canParse(raw))
19
91
  return null;
20
- return trimmed.slice(1, end);
92
+ return normalizeHostname(new URL(raw).hostname);
21
93
  }
22
- const colonIndex = trimmed.indexOf(':');
23
- if (colonIndex !== -1) {
24
- return trimmed.slice(0, colonIndex);
94
+ const candidateUrl = `http://${raw}`;
95
+ if (URL.canParse(candidateUrl)) {
96
+ return normalizeHostname(new URL(candidateUrl).hostname);
25
97
  }
26
- return trimmed;
27
- }
28
- const ALLOWED_LOG_LEVELS = new Set([
29
- 'debug',
30
- 'info',
31
- 'warn',
32
- 'error',
33
- ]);
34
- function isLogLevel(value) {
35
- return ALLOWED_LOG_LEVELS.has(value);
36
- }
37
- function isOutsideRange(value, min, max) {
38
- return ((min !== undefined && value < min) || (max !== undefined && value > max));
98
+ const lowered = raw.toLowerCase();
99
+ if (lowered.startsWith('[')) {
100
+ const end = lowered.indexOf(']');
101
+ if (end === -1)
102
+ return null;
103
+ return normalizeHostname(lowered.slice(1, end));
104
+ }
105
+ if (isIP(lowered) === 6)
106
+ return stripTrailingDots(lowered);
107
+ const firstColon = lowered.indexOf(':');
108
+ if (firstColon === -1)
109
+ return normalizeHostname(lowered);
110
+ if (lowered.includes(':', firstColon + 1))
111
+ return null;
112
+ const host = lowered.slice(0, firstColon);
113
+ return host ? normalizeHostname(host) : null;
39
114
  }
40
115
  function parseIntegerValue(envValue, min, max) {
41
116
  if (!envValue)
42
117
  return null;
43
- const parsed = parseInt(envValue, 10);
118
+ const parsed = Number.parseInt(envValue, 10);
44
119
  if (Number.isNaN(parsed))
45
120
  return null;
46
- if (isOutsideRange(parsed, min, max))
121
+ if (min !== undefined && parsed < min)
122
+ return null;
123
+ if (max !== undefined && parsed > max)
47
124
  return null;
48
125
  return parsed;
49
126
  }
127
+ function parseOptionalInteger(envValue, min, max) {
128
+ const parsed = parseIntegerValue(envValue, min, max);
129
+ return parsed ?? undefined;
130
+ }
50
131
  function parseInteger(envValue, defaultValue, min, max) {
51
132
  return parseIntegerValue(envValue, min, max) ?? defaultValue;
52
133
  }
53
- function parseOptionalInteger(envValue, min, max) {
54
- return parseIntegerValue(envValue, min, max) ?? undefined;
55
- }
56
134
  function parseBoolean(envValue, defaultValue) {
57
135
  if (!envValue)
58
136
  return defaultValue;
59
- return envValue !== 'false';
137
+ return envValue.trim().toLowerCase() !== 'false';
60
138
  }
61
139
  function parseList(envValue) {
62
140
  if (!envValue)
@@ -66,74 +144,115 @@ function parseList(envValue) {
66
144
  .map((entry) => entry.trim())
67
145
  .filter((entry) => entry.length > 0);
68
146
  }
147
+ function parseListOrDefault(envValue, defaultValue) {
148
+ const parsed = parseList(envValue);
149
+ return parsed.length > 0 ? parsed : [...defaultValue];
150
+ }
151
+ function normalizeLocale(value) {
152
+ if (!value)
153
+ return undefined;
154
+ const trimmed = value.trim();
155
+ if (!trimmed)
156
+ return undefined;
157
+ const lowered = trimmed.toLowerCase();
158
+ if (lowered === 'system' || lowered === 'default')
159
+ return undefined;
160
+ return trimmed;
161
+ }
69
162
  function parseUrlEnv(value, name) {
70
163
  if (!value)
71
164
  return undefined;
72
165
  if (!URL.canParse(value)) {
73
- throw new Error(`Invalid ${name} value: ${value}`);
166
+ throw new ConfigError(`Invalid ${name} value: ${value}`);
74
167
  }
75
168
  return new URL(value);
76
169
  }
77
170
  function readUrlEnv(name) {
78
- return parseUrlEnv(process.env[name], name);
171
+ return parseUrlEnv(env[name], name);
79
172
  }
80
173
  function parseAllowedHosts(envValue) {
81
174
  const hosts = new Set();
82
175
  for (const entry of parseList(envValue)) {
83
176
  const normalized = normalizeHostValue(entry);
84
- if (normalized) {
177
+ if (normalized)
85
178
  hosts.add(normalized);
86
- }
87
179
  }
88
180
  return hosts;
89
181
  }
182
+ const ALLOWED_LOG_LEVELS = new Set(LOG_LEVELS);
183
+ function isLogLevel(value) {
184
+ return ALLOWED_LOG_LEVELS.has(value);
185
+ }
90
186
  function parseLogLevel(envValue) {
91
- const level = envValue?.toLowerCase();
92
- if (!level)
187
+ if (!envValue)
93
188
  return 'info';
189
+ const level = envValue.toLowerCase();
94
190
  return isLogLevel(level) ? level : 'info';
95
191
  }
96
- function parseTransformMetadataFormat(envValue) {
97
- const normalized = envValue?.trim().toLowerCase();
98
- if (normalized === 'frontmatter')
99
- return 'frontmatter';
100
- return 'markdown';
192
+ function parseTransformWorkerMode(envValue) {
193
+ if (!envValue)
194
+ return 'threads';
195
+ const normalized = envValue.trim().toLowerCase();
196
+ if (normalized === 'process' || normalized === 'fork')
197
+ return 'process';
198
+ return 'threads';
101
199
  }
102
- const SIZE_LIMITS = {
103
- TEN_MB: 10 * 1024 * 1024,
104
- };
105
- const TIMEOUT = {
106
- DEFAULT_FETCH_TIMEOUT_MS: parseInteger(process.env.FETCH_TIMEOUT_MS, 15000, 1000, 60000),
107
- DEFAULT_SESSION_TTL_MS: 30 * 60 * 1000,
108
- DEFAULT_TRANSFORM_TIMEOUT_MS: parseInteger(process.env.TRANSFORM_TIMEOUT_MS, 30000, 5000, 120000),
109
- };
110
- const DEFAULT_TOOL_TIMEOUT_MS = TIMEOUT.DEFAULT_FETCH_TIMEOUT_MS +
111
- TIMEOUT.DEFAULT_TRANSFORM_TIMEOUT_MS +
112
- 5000;
113
- function readCoreOAuthUrls() {
114
- return {
115
- issuerUrl: readUrlEnv('OAUTH_ISSUER_URL'),
116
- authorizationUrl: readUrlEnv('OAUTH_AUTHORIZATION_URL'),
117
- tokenUrl: readUrlEnv('OAUTH_TOKEN_URL'),
118
- };
200
+ function parsePort(envValue) {
201
+ if (envValue?.trim() === '0')
202
+ return 0;
203
+ return parseInteger(envValue, 3000, 1024, 65535);
119
204
  }
120
- function readOptionalOAuthUrls(baseUrl) {
121
- return {
122
- revocationUrl: readUrlEnv('OAUTH_REVOCATION_URL'),
123
- registrationUrl: readUrlEnv('OAUTH_REGISTRATION_URL'),
124
- introspectionUrl: readUrlEnv('OAUTH_INTROSPECTION_URL'),
125
- resourceUrl: parseUrlEnv(process.env.OAUTH_RESOURCE_URL, 'OAUTH_RESOURCE_URL') ??
126
- new URL('/mcp', baseUrl),
127
- };
205
+ const MAX_HTML_BYTES = 10 * 1024 * 1024; // 10 MB
206
+ const MAX_INLINE_CONTENT_CHARS = 0;
207
+ const DEFAULT_SESSION_TTL_MS = 30 * 60 * 1000;
208
+ const DEFAULT_SESSION_INIT_TIMEOUT_MS = 10000;
209
+ const DEFAULT_MAX_SESSIONS = 200;
210
+ const DEFAULT_USER_AGENT = `superFetch-MCP/${serverVersion}`;
211
+ const DEFAULT_TOOL_TIMEOUT_PADDING_MS = 5000;
212
+ const DEFAULT_TRANSFORM_TIMEOUT_MS = 30000;
213
+ const DEFAULT_FETCH_TIMEOUT_MS = parseInteger(env.FETCH_TIMEOUT_MS, 15000, 1000, 60000);
214
+ const DEFAULT_TOOL_TIMEOUT_MS = DEFAULT_FETCH_TIMEOUT_MS +
215
+ DEFAULT_TRANSFORM_TIMEOUT_MS +
216
+ DEFAULT_TOOL_TIMEOUT_PADDING_MS;
217
+ function resolveWorkerResourceLimits() {
218
+ const limits = {};
219
+ const maxOldGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_OLD_GENERATION_MB, 1);
220
+ const maxYoungGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_YOUNG_GENERATION_MB, 1);
221
+ const codeRangeSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_CODE_RANGE_MB, 1);
222
+ const stackSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_STACK_MB, 1);
223
+ if (maxOldGenerationSizeMb !== undefined) {
224
+ limits.maxOldGenerationSizeMb = maxOldGenerationSizeMb;
225
+ }
226
+ if (maxYoungGenerationSizeMb !== undefined) {
227
+ limits.maxYoungGenerationSizeMb = maxYoungGenerationSizeMb;
228
+ }
229
+ if (codeRangeSizeMb !== undefined) {
230
+ limits.codeRangeSizeMb = codeRangeSizeMb;
231
+ }
232
+ if (stackSizeMb !== undefined) {
233
+ limits.stackSizeMb = stackSizeMb;
234
+ }
235
+ return Object.keys(limits).length > 0 ? limits : undefined;
128
236
  }
129
237
  function readOAuthUrls(baseUrl) {
130
- return { ...readCoreOAuthUrls(), ...readOptionalOAuthUrls(baseUrl) };
238
+ const issuerUrl = readUrlEnv('OAUTH_ISSUER_URL');
239
+ const authorizationUrl = readUrlEnv('OAUTH_AUTHORIZATION_URL');
240
+ const tokenUrl = readUrlEnv('OAUTH_TOKEN_URL');
241
+ const revocationUrl = readUrlEnv('OAUTH_REVOCATION_URL');
242
+ const registrationUrl = readUrlEnv('OAUTH_REGISTRATION_URL');
243
+ const introspectionUrl = readUrlEnv('OAUTH_INTROSPECTION_URL');
244
+ const resourceUrl = new URL('/mcp', baseUrl);
245
+ return {
246
+ issuerUrl,
247
+ authorizationUrl,
248
+ tokenUrl,
249
+ revocationUrl,
250
+ registrationUrl,
251
+ introspectionUrl,
252
+ resourceUrl,
253
+ };
131
254
  }
132
- function resolveAuthMode(authModeEnv, urls) {
133
- if (authModeEnv === 'oauth')
134
- return 'oauth';
135
- if (authModeEnv === 'static')
136
- return 'static';
255
+ function resolveAuthMode(urls) {
137
256
  const oauthConfigured = [
138
257
  urls.issuerUrl,
139
258
  urls.authorizationUrl,
@@ -143,22 +262,21 @@ function resolveAuthMode(authModeEnv, urls) {
143
262
  return oauthConfigured ? 'oauth' : 'static';
144
263
  }
145
264
  function collectStaticTokens() {
146
- const staticTokens = new Set(parseList(process.env.ACCESS_TOKENS));
147
- if (process.env.API_KEY) {
148
- staticTokens.add(process.env.API_KEY);
149
- }
265
+ const staticTokens = new Set(parseList(env.ACCESS_TOKENS));
266
+ if (env.API_KEY)
267
+ staticTokens.add(env.API_KEY);
150
268
  return [...staticTokens];
151
269
  }
152
270
  function buildAuthConfig(baseUrl) {
153
271
  const urls = readOAuthUrls(baseUrl);
154
- const mode = resolveAuthMode(process.env.AUTH_MODE?.toLowerCase(), urls);
272
+ const mode = resolveAuthMode(urls);
155
273
  return {
156
274
  mode,
157
275
  ...urls,
158
- requiredScopes: parseList(process.env.OAUTH_REQUIRED_SCOPES),
159
- clientId: process.env.OAUTH_CLIENT_ID,
160
- clientSecret: process.env.OAUTH_CLIENT_SECRET,
161
- introspectionTimeoutMs: parseInteger(process.env.OAUTH_INTROSPECTION_TIMEOUT_MS, 5000, 1000, 30000),
276
+ requiredScopes: parseList(env.OAUTH_REQUIRED_SCOPES),
277
+ clientId: env.OAUTH_CLIENT_ID,
278
+ clientSecret: env.OAUTH_CLIENT_SECRET,
279
+ introspectionTimeoutMs: 5000,
162
280
  staticTokens: collectStaticTokens(),
163
281
  };
164
282
  }
@@ -166,51 +284,85 @@ const LOOPBACK_V4 = buildIpv4([127, 0, 0, 1]);
166
284
  const ANY_V4 = buildIpv4([0, 0, 0, 0]);
167
285
  const METADATA_V4_AWS = buildIpv4([169, 254, 169, 254]);
168
286
  const METADATA_V4_AZURE = buildIpv4([100, 100, 100, 200]);
169
- const host = process.env.HOST ?? LOOPBACK_V4;
170
- const port = process.env.PORT?.trim() === '0'
171
- ? 0
172
- : parseInteger(process.env.PORT, 3000, 1024, 65535);
287
+ const BLOCKED_HOSTS = new Set([
288
+ 'localhost',
289
+ LOOPBACK_V4,
290
+ ANY_V4,
291
+ '::1',
292
+ METADATA_V4_AWS,
293
+ 'metadata.google.internal',
294
+ 'metadata.azure.com',
295
+ METADATA_V4_AZURE,
296
+ 'instance-data',
297
+ ]);
298
+ const BLOCKED_IP_PATTERNS = [
299
+ /^10\./,
300
+ /^172\.(1[6-9]|2\d|3[01])\./,
301
+ /^192\.168\./,
302
+ /^127\./,
303
+ /^0\./,
304
+ /^169\.254\./,
305
+ /^100\.64\./,
306
+ /^fc00:/i,
307
+ /^fd00:/i,
308
+ /^fe80:/i,
309
+ /^::ffff:127\./,
310
+ /^::ffff:10\./,
311
+ /^::ffff:172\.(1[6-9]|2\d|3[01])\./,
312
+ /^::ffff:192\.168\./,
313
+ /^::ffff:169\.254\./,
314
+ ];
315
+ const BLOCKED_IP_PATTERN = /^(?:10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|127\.|0\.|169\.254\.|100\.64\.|fc00:|fd00:|fe80:)/i;
316
+ const BLOCKED_IPV4_MAPPED_PATTERN = /^::ffff:(?:127\.|10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/i;
317
+ const host = (env.HOST ?? LOOPBACK_V4).trim();
318
+ const port = parsePort(env.PORT);
319
+ const maxConnections = parseInteger(env.SERVER_MAX_CONNECTIONS, 0, 0);
320
+ const blockPrivateConnections = parseBoolean(env.SERVER_BLOCK_PRIVATE_CONNECTIONS, false);
173
321
  const baseUrl = new URL(`http://${formatHostForUrl(host)}:${port}`);
174
- const allowRemote = parseBoolean(process.env.ALLOW_REMOTE, false);
322
+ const allowRemote = parseBoolean(env.ALLOW_REMOTE, false);
175
323
  const runtimeState = {
176
324
  httpMode: false,
177
325
  };
178
326
  export const config = {
179
327
  server: {
180
328
  name: 'superFetch',
181
- version: packageJson.version,
329
+ version: serverVersion,
182
330
  port,
183
331
  host,
184
- sessionTtlMs: TIMEOUT.DEFAULT_SESSION_TTL_MS,
185
- sessionInitTimeoutMs: 10000,
186
- maxSessions: 200,
332
+ sessionTtlMs: DEFAULT_SESSION_TTL_MS,
333
+ sessionInitTimeoutMs: DEFAULT_SESSION_INIT_TIMEOUT_MS,
334
+ maxSessions: DEFAULT_MAX_SESSIONS,
187
335
  http: {
188
- headersTimeoutMs: parseOptionalInteger(process.env.SERVER_HEADERS_TIMEOUT_MS, 1000, 600000),
189
- requestTimeoutMs: parseOptionalInteger(process.env.SERVER_REQUEST_TIMEOUT_MS, 1000, 600000),
190
- keepAliveTimeoutMs: parseOptionalInteger(process.env.SERVER_KEEP_ALIVE_TIMEOUT_MS, 1000, 600000),
191
- shutdownCloseIdleConnections: parseBoolean(process.env.SERVER_SHUTDOWN_CLOSE_IDLE, false),
192
- shutdownCloseAllConnections: parseBoolean(process.env.SERVER_SHUTDOWN_CLOSE_ALL, false),
336
+ headersTimeoutMs: undefined,
337
+ requestTimeoutMs: undefined,
338
+ keepAliveTimeoutMs: undefined,
339
+ maxConnections,
340
+ blockPrivateConnections,
341
+ shutdownCloseIdleConnections: true,
342
+ shutdownCloseAllConnections: false,
193
343
  },
194
344
  },
195
345
  fetcher: {
196
- timeout: TIMEOUT.DEFAULT_FETCH_TIMEOUT_MS,
346
+ timeout: DEFAULT_FETCH_TIMEOUT_MS,
197
347
  maxRedirects: 5,
198
- userAgent: process.env.USER_AGENT ?? 'superFetch-MCP/2.0',
199
- maxContentLength: SIZE_LIMITS.TEN_MB,
348
+ userAgent: env.USER_AGENT ?? DEFAULT_USER_AGENT,
349
+ maxContentLength: MAX_HTML_BYTES,
200
350
  },
201
351
  transform: {
202
- timeoutMs: TIMEOUT.DEFAULT_TRANSFORM_TIMEOUT_MS,
203
- stageWarnRatio: parseFloat(process.env.TRANSFORM_STAGE_WARN_RATIO ?? '0.5'),
204
- metadataFormat: parseTransformMetadataFormat(process.env.TRANSFORM_METADATA_FORMAT),
205
- maxWorkerScale: parseInteger(process.env.TRANSFORM_WORKER_MAX_SCALE, 4, 1, 16),
352
+ timeoutMs: DEFAULT_TRANSFORM_TIMEOUT_MS,
353
+ stageWarnRatio: 0.5,
354
+ metadataFormat: 'markdown',
355
+ maxWorkerScale: 4,
356
+ workerMode: parseTransformWorkerMode(env.TRANSFORM_WORKER_MODE),
357
+ workerResourceLimits: resolveWorkerResourceLimits(),
206
358
  },
207
359
  tools: {
208
- enabled: parseList(process.env.ENABLED_TOOLS ?? 'fetch-url'),
209
- timeoutMs: parseInteger(process.env.TOOL_TIMEOUT_MS, DEFAULT_TOOL_TIMEOUT_MS, 1000, 300000),
360
+ enabled: ['fetch-url'],
361
+ timeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
210
362
  },
211
363
  cache: {
212
- enabled: parseBoolean(process.env.CACHE_ENABLED, true),
213
- ttl: parseInteger(process.env.CACHE_TTL, 3600, 60, 86400),
364
+ enabled: parseBoolean(env.CACHE_ENABLED, true),
365
+ ttl: 86400,
214
366
  maxKeys: 100,
215
367
  },
216
368
  extraction: {
@@ -218,59 +370,57 @@ export const config = {
218
370
  minParagraphLength: 10,
219
371
  },
220
372
  noiseRemoval: {
221
- extraTokens: parseList(process.env.SUPERFETCH_EXTRA_NOISE_TOKENS),
222
- extraSelectors: parseList(process.env.SUPERFETCH_EXTRA_NOISE_SELECTORS),
373
+ extraTokens: parseList(env.SUPERFETCH_EXTRA_NOISE_TOKENS),
374
+ extraSelectors: parseList(env.SUPERFETCH_EXTRA_NOISE_SELECTORS),
375
+ enabledCategories: [
376
+ 'cookie-banners',
377
+ 'newsletters',
378
+ 'social-share',
379
+ 'nav-footer',
380
+ ],
381
+ debug: false,
382
+ aggressiveMode: false,
383
+ preserveSvgCanvas: false,
384
+ weights: {
385
+ hidden: 50,
386
+ structural: 50,
387
+ promo: 35,
388
+ stickyFixed: 30,
389
+ threshold: 50,
390
+ },
391
+ },
392
+ markdownCleanup: {
393
+ promoteOrphanHeadings: true,
394
+ removeSkipLinks: true,
395
+ removeTocBlocks: true,
396
+ removeTypeDocComments: true,
397
+ headingKeywords: parseListOrDefault(env.MARKDOWN_HEADING_KEYWORDS, DEFAULT_HEADING_KEYWORDS),
398
+ },
399
+ i18n: {
400
+ locale: normalizeLocale(env.SUPERFETCH_LOCALE),
223
401
  },
224
402
  logging: {
225
- level: parseLogLevel(process.env.LOG_LEVEL),
403
+ level: parseLogLevel(env.LOG_LEVEL),
226
404
  },
227
405
  constants: {
228
- maxHtmlSize: SIZE_LIMITS.TEN_MB,
406
+ maxHtmlSize: MAX_HTML_BYTES,
229
407
  maxUrlLength: 2048,
230
- maxInlineContentChars: 20000,
408
+ maxInlineContentChars: MAX_INLINE_CONTENT_CHARS,
231
409
  },
232
410
  security: {
233
- blockedHosts: new Set([
234
- 'localhost',
235
- LOOPBACK_V4,
236
- ANY_V4,
237
- '::1',
238
- METADATA_V4_AWS,
239
- 'metadata.google.internal',
240
- 'metadata.azure.com',
241
- METADATA_V4_AZURE,
242
- 'instance-data',
243
- ]),
244
- blockedIpPatterns: [
245
- /^10\./,
246
- /^172\.(1[6-9]|2\d|3[01])\./,
247
- /^192\.168\./,
248
- /^127\./,
249
- /^0\./,
250
- /^169\.254\./,
251
- /^100\.64\./,
252
- /^fc00:/i,
253
- /^fd00:/i,
254
- /^fe80:/i,
255
- /^::ffff:127\./,
256
- /^::ffff:10\./,
257
- /^::ffff:172\.(1[6-9]|2\d|3[01])\./,
258
- /^::ffff:192\.168\./,
259
- /^::ffff:169\.254\./,
260
- ],
261
- // Combined regex patterns for fast IP blocking (used in fetch.ts)
262
- // Split into two patterns to reduce complexity while maintaining performance
263
- blockedIpPattern: /^(?:10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|127\.|0\.|169\.254\.|100\.64\.|fc00:|fd00:|fe80:)/i,
264
- blockedIpv4MappedPattern: /^::ffff:(?:127\.|10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/i,
265
- allowedHosts: parseAllowedHosts(process.env.ALLOWED_HOSTS),
266
- apiKey: process.env.API_KEY,
411
+ blockedHosts: BLOCKED_HOSTS,
412
+ blockedIpPatterns: BLOCKED_IP_PATTERNS,
413
+ blockedIpPattern: BLOCKED_IP_PATTERN,
414
+ blockedIpv4MappedPattern: BLOCKED_IPV4_MAPPED_PATTERN,
415
+ allowedHosts: parseAllowedHosts(env.ALLOWED_HOSTS),
416
+ apiKey: env.API_KEY,
267
417
  allowRemote,
268
418
  },
269
419
  auth: buildAuthConfig(baseUrl),
270
420
  rateLimit: {
271
421
  enabled: true,
272
- maxRequests: parseInteger(process.env.RATE_LIMIT_MAX, 100, 1, 10000),
273
- windowMs: parseInteger(process.env.RATE_LIMIT_WINDOW_MS, 60000, 1000, 3600000),
422
+ maxRequests: 100,
423
+ windowMs: 60000,
274
424
  cleanupIntervalMs: 60000,
275
425
  },
276
426
  runtime: runtimeState,