@j0hanz/superfetch 2.5.3 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +350 -226
- package/dist/assets/logo.svg +24837 -24835
- package/dist/cache.d.ts +28 -20
- package/dist/cache.js +292 -514
- package/dist/config.d.ts +41 -7
- package/dist/config.js +298 -148
- package/dist/crypto.js +25 -12
- package/dist/dom-noise-removal.js +379 -421
- package/dist/errors.d.ts +2 -2
- package/dist/errors.js +25 -8
- package/dist/fetch.d.ts +18 -16
- package/dist/fetch.js +1132 -526
- package/dist/host-normalization.js +40 -10
- package/dist/http-native.js +628 -287
- package/dist/index.js +67 -7
- package/dist/instructions.md +44 -31
- package/dist/ip-blocklist.d.ts +8 -0
- package/dist/ip-blocklist.js +65 -0
- package/dist/json.js +14 -9
- package/dist/language-detection.d.ts +2 -11
- package/dist/language-detection.js +289 -280
- package/dist/markdown-cleanup.d.ts +0 -1
- package/dist/markdown-cleanup.js +391 -429
- package/dist/mcp-validator.js +4 -2
- package/dist/mcp.js +184 -135
- package/dist/observability.js +89 -21
- package/dist/resources.js +16 -6
- package/dist/server-tuning.d.ts +2 -0
- package/dist/server-tuning.js +25 -23
- package/dist/session.d.ts +1 -0
- package/dist/session.js +41 -33
- package/dist/tasks.d.ts +2 -0
- package/dist/tasks.js +91 -9
- package/dist/timer-utils.d.ts +5 -0
- package/dist/timer-utils.js +20 -0
- package/dist/tools.d.ts +28 -5
- package/dist/tools.js +317 -183
- package/dist/transform-types.d.ts +5 -1
- package/dist/transform.d.ts +3 -2
- package/dist/transform.js +1138 -421
- package/dist/type-guards.d.ts +1 -0
- package/dist/type-guards.js +7 -0
- package/dist/workers/transform-child.d.ts +1 -0
- package/dist/workers/transform-child.js +118 -0
- package/dist/workers/transform-worker.js +87 -78
- package/package.json +14 -6
package/dist/config.d.ts
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
1
|
export declare const serverVersion: string;
|
|
2
2
|
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
3
|
-
|
|
3
|
+
/** Hardcoded to 'markdown'. Type retained for consumer compatibility. */
|
|
4
|
+
export type TransformMetadataFormat = 'markdown';
|
|
5
|
+
export type TransformWorkerMode = 'threads' | 'process';
|
|
6
|
+
type AuthMode = 'oauth' | 'static';
|
|
7
|
+
interface WorkerResourceLimits {
|
|
8
|
+
maxOldGenerationSizeMb?: number;
|
|
9
|
+
maxYoungGenerationSizeMb?: number;
|
|
10
|
+
codeRangeSizeMb?: number;
|
|
11
|
+
stackSizeMb?: number;
|
|
12
|
+
}
|
|
4
13
|
interface AuthConfig {
|
|
5
|
-
mode:
|
|
14
|
+
mode: AuthMode;
|
|
6
15
|
issuerUrl: URL | undefined;
|
|
7
16
|
authorizationUrl: URL | undefined;
|
|
8
17
|
tokenUrl: URL | undefined;
|
|
@@ -29,9 +38,11 @@ export declare const config: {
|
|
|
29
38
|
sessionInitTimeoutMs: number;
|
|
30
39
|
maxSessions: number;
|
|
31
40
|
http: {
|
|
32
|
-
headersTimeoutMs:
|
|
33
|
-
requestTimeoutMs:
|
|
34
|
-
keepAliveTimeoutMs:
|
|
41
|
+
headersTimeoutMs: undefined;
|
|
42
|
+
requestTimeoutMs: undefined;
|
|
43
|
+
keepAliveTimeoutMs: undefined;
|
|
44
|
+
maxConnections: number;
|
|
45
|
+
blockPrivateConnections: boolean;
|
|
35
46
|
shutdownCloseIdleConnections: boolean;
|
|
36
47
|
shutdownCloseAllConnections: boolean;
|
|
37
48
|
};
|
|
@@ -45,8 +56,10 @@ export declare const config: {
|
|
|
45
56
|
transform: {
|
|
46
57
|
timeoutMs: number;
|
|
47
58
|
stageWarnRatio: number;
|
|
48
|
-
metadataFormat:
|
|
59
|
+
metadataFormat: string;
|
|
49
60
|
maxWorkerScale: number;
|
|
61
|
+
workerMode: TransformWorkerMode;
|
|
62
|
+
workerResourceLimits: WorkerResourceLimits | undefined;
|
|
50
63
|
};
|
|
51
64
|
tools: {
|
|
52
65
|
enabled: string[];
|
|
@@ -64,6 +77,27 @@ export declare const config: {
|
|
|
64
77
|
noiseRemoval: {
|
|
65
78
|
extraTokens: string[];
|
|
66
79
|
extraSelectors: string[];
|
|
80
|
+
enabledCategories: string[];
|
|
81
|
+
debug: boolean;
|
|
82
|
+
aggressiveMode: boolean;
|
|
83
|
+
preserveSvgCanvas: boolean;
|
|
84
|
+
weights: {
|
|
85
|
+
hidden: number;
|
|
86
|
+
structural: number;
|
|
87
|
+
promo: number;
|
|
88
|
+
stickyFixed: number;
|
|
89
|
+
threshold: number;
|
|
90
|
+
};
|
|
91
|
+
};
|
|
92
|
+
markdownCleanup: {
|
|
93
|
+
promoteOrphanHeadings: boolean;
|
|
94
|
+
removeSkipLinks: boolean;
|
|
95
|
+
removeTocBlocks: boolean;
|
|
96
|
+
removeTypeDocComments: boolean;
|
|
97
|
+
headingKeywords: string[];
|
|
98
|
+
};
|
|
99
|
+
i18n: {
|
|
100
|
+
locale: string | undefined;
|
|
67
101
|
};
|
|
68
102
|
logging: {
|
|
69
103
|
level: LogLevel;
|
|
@@ -75,7 +109,7 @@ export declare const config: {
|
|
|
75
109
|
};
|
|
76
110
|
security: {
|
|
77
111
|
blockedHosts: Set<string>;
|
|
78
|
-
blockedIpPatterns: readonly [
|
|
112
|
+
blockedIpPatterns: readonly RegExp[];
|
|
79
113
|
blockedIpPattern: RegExp;
|
|
80
114
|
blockedIpv4MappedPattern: RegExp;
|
|
81
115
|
allowedHosts: Set<string>;
|
package/dist/config.js
CHANGED
|
@@ -1,62 +1,140 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { findPackageJSON } from 'node:module';
|
|
3
|
+
import { isIP } from 'node:net';
|
|
4
|
+
import process from 'node:process';
|
|
5
|
+
import { domainToASCII } from 'node:url';
|
|
6
|
+
const packageJsonPath = findPackageJSON(import.meta.url);
|
|
7
|
+
if (!packageJsonPath) {
|
|
8
|
+
throw new Error('package.json not found');
|
|
9
|
+
}
|
|
10
|
+
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
|
11
|
+
if (typeof packageJson.version !== 'string') {
|
|
12
|
+
throw new Error('package.json version is missing');
|
|
13
|
+
}
|
|
2
14
|
export const serverVersion = packageJson.version;
|
|
15
|
+
const LOG_LEVELS = ['debug', 'info', 'warn', 'error'];
|
|
16
|
+
const DEFAULT_HEADING_KEYWORDS = [
|
|
17
|
+
'overview',
|
|
18
|
+
'introduction',
|
|
19
|
+
'summary',
|
|
20
|
+
'conclusion',
|
|
21
|
+
'prerequisites',
|
|
22
|
+
'requirements',
|
|
23
|
+
'installation',
|
|
24
|
+
'configuration',
|
|
25
|
+
'usage',
|
|
26
|
+
'features',
|
|
27
|
+
'limitations',
|
|
28
|
+
'troubleshooting',
|
|
29
|
+
'faq',
|
|
30
|
+
'resources',
|
|
31
|
+
'references',
|
|
32
|
+
'changelog',
|
|
33
|
+
'license',
|
|
34
|
+
'acknowledgments',
|
|
35
|
+
'appendix',
|
|
36
|
+
];
|
|
37
|
+
function isMissingEnvFileError(error) {
|
|
38
|
+
if (!error || typeof error !== 'object')
|
|
39
|
+
return false;
|
|
40
|
+
const { code } = error;
|
|
41
|
+
return code === 'ENOENT' || code === 'ERR_ENV_FILE_NOT_FOUND';
|
|
42
|
+
}
|
|
43
|
+
function loadEnvFileIfAvailable() {
|
|
44
|
+
if (typeof process.loadEnvFile !== 'function')
|
|
45
|
+
return;
|
|
46
|
+
try {
|
|
47
|
+
process.loadEnvFile();
|
|
48
|
+
}
|
|
49
|
+
catch (error) {
|
|
50
|
+
if (isMissingEnvFileError(error))
|
|
51
|
+
return;
|
|
52
|
+
throw error;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
loadEnvFileIfAvailable();
|
|
56
|
+
const { env } = process;
|
|
57
|
+
class ConfigError extends Error {
|
|
58
|
+
name = 'ConfigError';
|
|
59
|
+
}
|
|
3
60
|
function buildIpv4(parts) {
|
|
4
61
|
return parts.join('.');
|
|
5
62
|
}
|
|
6
63
|
function formatHostForUrl(hostname) {
|
|
7
|
-
if (hostname.includes(':') && !hostname.startsWith('['))
|
|
64
|
+
if (hostname.includes(':') && !hostname.startsWith('['))
|
|
8
65
|
return `[${hostname}]`;
|
|
9
|
-
}
|
|
10
66
|
return hostname;
|
|
11
67
|
}
|
|
12
|
-
function
|
|
13
|
-
|
|
68
|
+
function stripTrailingDots(value) {
|
|
69
|
+
let result = value;
|
|
70
|
+
while (result.endsWith('.'))
|
|
71
|
+
result = result.slice(0, -1);
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
function normalizeHostname(value) {
|
|
75
|
+
const trimmed = value.trim();
|
|
14
76
|
if (!trimmed)
|
|
15
77
|
return null;
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
78
|
+
const lowered = trimmed.toLowerCase();
|
|
79
|
+
const ipType = isIP(lowered);
|
|
80
|
+
if (ipType)
|
|
81
|
+
return stripTrailingDots(lowered);
|
|
82
|
+
const ascii = domainToASCII(lowered);
|
|
83
|
+
return ascii ? stripTrailingDots(ascii) : null;
|
|
84
|
+
}
|
|
85
|
+
function normalizeHostValue(value) {
|
|
86
|
+
const raw = value.trim();
|
|
87
|
+
if (!raw)
|
|
88
|
+
return null;
|
|
89
|
+
if (raw.includes('://')) {
|
|
90
|
+
if (!URL.canParse(raw))
|
|
19
91
|
return null;
|
|
20
|
-
return
|
|
92
|
+
return normalizeHostname(new URL(raw).hostname);
|
|
21
93
|
}
|
|
22
|
-
const
|
|
23
|
-
if (
|
|
24
|
-
return
|
|
94
|
+
const candidateUrl = `http://${raw}`;
|
|
95
|
+
if (URL.canParse(candidateUrl)) {
|
|
96
|
+
return normalizeHostname(new URL(candidateUrl).hostname);
|
|
25
97
|
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
const
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
98
|
+
const lowered = raw.toLowerCase();
|
|
99
|
+
if (lowered.startsWith('[')) {
|
|
100
|
+
const end = lowered.indexOf(']');
|
|
101
|
+
if (end === -1)
|
|
102
|
+
return null;
|
|
103
|
+
return normalizeHostname(lowered.slice(1, end));
|
|
104
|
+
}
|
|
105
|
+
if (isIP(lowered) === 6)
|
|
106
|
+
return stripTrailingDots(lowered);
|
|
107
|
+
const firstColon = lowered.indexOf(':');
|
|
108
|
+
if (firstColon === -1)
|
|
109
|
+
return normalizeHostname(lowered);
|
|
110
|
+
if (lowered.includes(':', firstColon + 1))
|
|
111
|
+
return null;
|
|
112
|
+
const host = lowered.slice(0, firstColon);
|
|
113
|
+
return host ? normalizeHostname(host) : null;
|
|
39
114
|
}
|
|
40
115
|
function parseIntegerValue(envValue, min, max) {
|
|
41
116
|
if (!envValue)
|
|
42
117
|
return null;
|
|
43
|
-
const parsed = parseInt(envValue, 10);
|
|
118
|
+
const parsed = Number.parseInt(envValue, 10);
|
|
44
119
|
if (Number.isNaN(parsed))
|
|
45
120
|
return null;
|
|
46
|
-
if (
|
|
121
|
+
if (min !== undefined && parsed < min)
|
|
122
|
+
return null;
|
|
123
|
+
if (max !== undefined && parsed > max)
|
|
47
124
|
return null;
|
|
48
125
|
return parsed;
|
|
49
126
|
}
|
|
127
|
+
function parseOptionalInteger(envValue, min, max) {
|
|
128
|
+
const parsed = parseIntegerValue(envValue, min, max);
|
|
129
|
+
return parsed ?? undefined;
|
|
130
|
+
}
|
|
50
131
|
function parseInteger(envValue, defaultValue, min, max) {
|
|
51
132
|
return parseIntegerValue(envValue, min, max) ?? defaultValue;
|
|
52
133
|
}
|
|
53
|
-
function parseOptionalInteger(envValue, min, max) {
|
|
54
|
-
return parseIntegerValue(envValue, min, max) ?? undefined;
|
|
55
|
-
}
|
|
56
134
|
function parseBoolean(envValue, defaultValue) {
|
|
57
135
|
if (!envValue)
|
|
58
136
|
return defaultValue;
|
|
59
|
-
return envValue !== 'false';
|
|
137
|
+
return envValue.trim().toLowerCase() !== 'false';
|
|
60
138
|
}
|
|
61
139
|
function parseList(envValue) {
|
|
62
140
|
if (!envValue)
|
|
@@ -66,74 +144,115 @@ function parseList(envValue) {
|
|
|
66
144
|
.map((entry) => entry.trim())
|
|
67
145
|
.filter((entry) => entry.length > 0);
|
|
68
146
|
}
|
|
147
|
+
function parseListOrDefault(envValue, defaultValue) {
|
|
148
|
+
const parsed = parseList(envValue);
|
|
149
|
+
return parsed.length > 0 ? parsed : [...defaultValue];
|
|
150
|
+
}
|
|
151
|
+
function normalizeLocale(value) {
|
|
152
|
+
if (!value)
|
|
153
|
+
return undefined;
|
|
154
|
+
const trimmed = value.trim();
|
|
155
|
+
if (!trimmed)
|
|
156
|
+
return undefined;
|
|
157
|
+
const lowered = trimmed.toLowerCase();
|
|
158
|
+
if (lowered === 'system' || lowered === 'default')
|
|
159
|
+
return undefined;
|
|
160
|
+
return trimmed;
|
|
161
|
+
}
|
|
69
162
|
function parseUrlEnv(value, name) {
|
|
70
163
|
if (!value)
|
|
71
164
|
return undefined;
|
|
72
165
|
if (!URL.canParse(value)) {
|
|
73
|
-
throw new
|
|
166
|
+
throw new ConfigError(`Invalid ${name} value: ${value}`);
|
|
74
167
|
}
|
|
75
168
|
return new URL(value);
|
|
76
169
|
}
|
|
77
170
|
function readUrlEnv(name) {
|
|
78
|
-
return parseUrlEnv(
|
|
171
|
+
return parseUrlEnv(env[name], name);
|
|
79
172
|
}
|
|
80
173
|
function parseAllowedHosts(envValue) {
|
|
81
174
|
const hosts = new Set();
|
|
82
175
|
for (const entry of parseList(envValue)) {
|
|
83
176
|
const normalized = normalizeHostValue(entry);
|
|
84
|
-
if (normalized)
|
|
177
|
+
if (normalized)
|
|
85
178
|
hosts.add(normalized);
|
|
86
|
-
}
|
|
87
179
|
}
|
|
88
180
|
return hosts;
|
|
89
181
|
}
|
|
182
|
+
const ALLOWED_LOG_LEVELS = new Set(LOG_LEVELS);
|
|
183
|
+
function isLogLevel(value) {
|
|
184
|
+
return ALLOWED_LOG_LEVELS.has(value);
|
|
185
|
+
}
|
|
90
186
|
function parseLogLevel(envValue) {
|
|
91
|
-
|
|
92
|
-
if (!level)
|
|
187
|
+
if (!envValue)
|
|
93
188
|
return 'info';
|
|
189
|
+
const level = envValue.toLowerCase();
|
|
94
190
|
return isLogLevel(level) ? level : 'info';
|
|
95
191
|
}
|
|
96
|
-
function
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
192
|
+
function parseTransformWorkerMode(envValue) {
|
|
193
|
+
if (!envValue)
|
|
194
|
+
return 'threads';
|
|
195
|
+
const normalized = envValue.trim().toLowerCase();
|
|
196
|
+
if (normalized === 'process' || normalized === 'fork')
|
|
197
|
+
return 'process';
|
|
198
|
+
return 'threads';
|
|
101
199
|
}
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
DEFAULT_FETCH_TIMEOUT_MS: parseInteger(process.env.FETCH_TIMEOUT_MS, 15000, 1000, 60000),
|
|
107
|
-
DEFAULT_SESSION_TTL_MS: 30 * 60 * 1000,
|
|
108
|
-
DEFAULT_TRANSFORM_TIMEOUT_MS: parseInteger(process.env.TRANSFORM_TIMEOUT_MS, 30000, 5000, 120000),
|
|
109
|
-
};
|
|
110
|
-
const DEFAULT_TOOL_TIMEOUT_MS = TIMEOUT.DEFAULT_FETCH_TIMEOUT_MS +
|
|
111
|
-
TIMEOUT.DEFAULT_TRANSFORM_TIMEOUT_MS +
|
|
112
|
-
5000;
|
|
113
|
-
function readCoreOAuthUrls() {
|
|
114
|
-
return {
|
|
115
|
-
issuerUrl: readUrlEnv('OAUTH_ISSUER_URL'),
|
|
116
|
-
authorizationUrl: readUrlEnv('OAUTH_AUTHORIZATION_URL'),
|
|
117
|
-
tokenUrl: readUrlEnv('OAUTH_TOKEN_URL'),
|
|
118
|
-
};
|
|
200
|
+
function parsePort(envValue) {
|
|
201
|
+
if (envValue?.trim() === '0')
|
|
202
|
+
return 0;
|
|
203
|
+
return parseInteger(envValue, 3000, 1024, 65535);
|
|
119
204
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
205
|
+
const MAX_HTML_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
206
|
+
const MAX_INLINE_CONTENT_CHARS = 0;
|
|
207
|
+
const DEFAULT_SESSION_TTL_MS = 30 * 60 * 1000;
|
|
208
|
+
const DEFAULT_SESSION_INIT_TIMEOUT_MS = 10000;
|
|
209
|
+
const DEFAULT_MAX_SESSIONS = 200;
|
|
210
|
+
const DEFAULT_USER_AGENT = `superFetch-MCP/${serverVersion}`;
|
|
211
|
+
const DEFAULT_TOOL_TIMEOUT_PADDING_MS = 5000;
|
|
212
|
+
const DEFAULT_TRANSFORM_TIMEOUT_MS = 30000;
|
|
213
|
+
const DEFAULT_FETCH_TIMEOUT_MS = parseInteger(env.FETCH_TIMEOUT_MS, 15000, 1000, 60000);
|
|
214
|
+
const DEFAULT_TOOL_TIMEOUT_MS = DEFAULT_FETCH_TIMEOUT_MS +
|
|
215
|
+
DEFAULT_TRANSFORM_TIMEOUT_MS +
|
|
216
|
+
DEFAULT_TOOL_TIMEOUT_PADDING_MS;
|
|
217
|
+
function resolveWorkerResourceLimits() {
|
|
218
|
+
const limits = {};
|
|
219
|
+
const maxOldGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_OLD_GENERATION_MB, 1);
|
|
220
|
+
const maxYoungGenerationSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_MAX_YOUNG_GENERATION_MB, 1);
|
|
221
|
+
const codeRangeSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_CODE_RANGE_MB, 1);
|
|
222
|
+
const stackSizeMb = parseOptionalInteger(env.TRANSFORM_WORKER_STACK_MB, 1);
|
|
223
|
+
if (maxOldGenerationSizeMb !== undefined) {
|
|
224
|
+
limits.maxOldGenerationSizeMb = maxOldGenerationSizeMb;
|
|
225
|
+
}
|
|
226
|
+
if (maxYoungGenerationSizeMb !== undefined) {
|
|
227
|
+
limits.maxYoungGenerationSizeMb = maxYoungGenerationSizeMb;
|
|
228
|
+
}
|
|
229
|
+
if (codeRangeSizeMb !== undefined) {
|
|
230
|
+
limits.codeRangeSizeMb = codeRangeSizeMb;
|
|
231
|
+
}
|
|
232
|
+
if (stackSizeMb !== undefined) {
|
|
233
|
+
limits.stackSizeMb = stackSizeMb;
|
|
234
|
+
}
|
|
235
|
+
return Object.keys(limits).length > 0 ? limits : undefined;
|
|
128
236
|
}
|
|
129
237
|
function readOAuthUrls(baseUrl) {
|
|
130
|
-
|
|
238
|
+
const issuerUrl = readUrlEnv('OAUTH_ISSUER_URL');
|
|
239
|
+
const authorizationUrl = readUrlEnv('OAUTH_AUTHORIZATION_URL');
|
|
240
|
+
const tokenUrl = readUrlEnv('OAUTH_TOKEN_URL');
|
|
241
|
+
const revocationUrl = readUrlEnv('OAUTH_REVOCATION_URL');
|
|
242
|
+
const registrationUrl = readUrlEnv('OAUTH_REGISTRATION_URL');
|
|
243
|
+
const introspectionUrl = readUrlEnv('OAUTH_INTROSPECTION_URL');
|
|
244
|
+
const resourceUrl = new URL('/mcp', baseUrl);
|
|
245
|
+
return {
|
|
246
|
+
issuerUrl,
|
|
247
|
+
authorizationUrl,
|
|
248
|
+
tokenUrl,
|
|
249
|
+
revocationUrl,
|
|
250
|
+
registrationUrl,
|
|
251
|
+
introspectionUrl,
|
|
252
|
+
resourceUrl,
|
|
253
|
+
};
|
|
131
254
|
}
|
|
132
|
-
function resolveAuthMode(
|
|
133
|
-
if (authModeEnv === 'oauth')
|
|
134
|
-
return 'oauth';
|
|
135
|
-
if (authModeEnv === 'static')
|
|
136
|
-
return 'static';
|
|
255
|
+
function resolveAuthMode(urls) {
|
|
137
256
|
const oauthConfigured = [
|
|
138
257
|
urls.issuerUrl,
|
|
139
258
|
urls.authorizationUrl,
|
|
@@ -143,22 +262,21 @@ function resolveAuthMode(authModeEnv, urls) {
|
|
|
143
262
|
return oauthConfigured ? 'oauth' : 'static';
|
|
144
263
|
}
|
|
145
264
|
function collectStaticTokens() {
|
|
146
|
-
const staticTokens = new Set(parseList(
|
|
147
|
-
if (
|
|
148
|
-
staticTokens.add(
|
|
149
|
-
}
|
|
265
|
+
const staticTokens = new Set(parseList(env.ACCESS_TOKENS));
|
|
266
|
+
if (env.API_KEY)
|
|
267
|
+
staticTokens.add(env.API_KEY);
|
|
150
268
|
return [...staticTokens];
|
|
151
269
|
}
|
|
152
270
|
function buildAuthConfig(baseUrl) {
|
|
153
271
|
const urls = readOAuthUrls(baseUrl);
|
|
154
|
-
const mode = resolveAuthMode(
|
|
272
|
+
const mode = resolveAuthMode(urls);
|
|
155
273
|
return {
|
|
156
274
|
mode,
|
|
157
275
|
...urls,
|
|
158
|
-
requiredScopes: parseList(
|
|
159
|
-
clientId:
|
|
160
|
-
clientSecret:
|
|
161
|
-
introspectionTimeoutMs:
|
|
276
|
+
requiredScopes: parseList(env.OAUTH_REQUIRED_SCOPES),
|
|
277
|
+
clientId: env.OAUTH_CLIENT_ID,
|
|
278
|
+
clientSecret: env.OAUTH_CLIENT_SECRET,
|
|
279
|
+
introspectionTimeoutMs: 5000,
|
|
162
280
|
staticTokens: collectStaticTokens(),
|
|
163
281
|
};
|
|
164
282
|
}
|
|
@@ -166,51 +284,85 @@ const LOOPBACK_V4 = buildIpv4([127, 0, 0, 1]);
|
|
|
166
284
|
const ANY_V4 = buildIpv4([0, 0, 0, 0]);
|
|
167
285
|
const METADATA_V4_AWS = buildIpv4([169, 254, 169, 254]);
|
|
168
286
|
const METADATA_V4_AZURE = buildIpv4([100, 100, 100, 200]);
|
|
169
|
-
const
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
287
|
+
const BLOCKED_HOSTS = new Set([
|
|
288
|
+
'localhost',
|
|
289
|
+
LOOPBACK_V4,
|
|
290
|
+
ANY_V4,
|
|
291
|
+
'::1',
|
|
292
|
+
METADATA_V4_AWS,
|
|
293
|
+
'metadata.google.internal',
|
|
294
|
+
'metadata.azure.com',
|
|
295
|
+
METADATA_V4_AZURE,
|
|
296
|
+
'instance-data',
|
|
297
|
+
]);
|
|
298
|
+
const BLOCKED_IP_PATTERNS = [
|
|
299
|
+
/^10\./,
|
|
300
|
+
/^172\.(1[6-9]|2\d|3[01])\./,
|
|
301
|
+
/^192\.168\./,
|
|
302
|
+
/^127\./,
|
|
303
|
+
/^0\./,
|
|
304
|
+
/^169\.254\./,
|
|
305
|
+
/^100\.64\./,
|
|
306
|
+
/^fc00:/i,
|
|
307
|
+
/^fd00:/i,
|
|
308
|
+
/^fe80:/i,
|
|
309
|
+
/^::ffff:127\./,
|
|
310
|
+
/^::ffff:10\./,
|
|
311
|
+
/^::ffff:172\.(1[6-9]|2\d|3[01])\./,
|
|
312
|
+
/^::ffff:192\.168\./,
|
|
313
|
+
/^::ffff:169\.254\./,
|
|
314
|
+
];
|
|
315
|
+
const BLOCKED_IP_PATTERN = /^(?:10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|127\.|0\.|169\.254\.|100\.64\.|fc00:|fd00:|fe80:)/i;
|
|
316
|
+
const BLOCKED_IPV4_MAPPED_PATTERN = /^::ffff:(?:127\.|10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/i;
|
|
317
|
+
const host = (env.HOST ?? LOOPBACK_V4).trim();
|
|
318
|
+
const port = parsePort(env.PORT);
|
|
319
|
+
const maxConnections = parseInteger(env.SERVER_MAX_CONNECTIONS, 0, 0);
|
|
320
|
+
const blockPrivateConnections = parseBoolean(env.SERVER_BLOCK_PRIVATE_CONNECTIONS, false);
|
|
173
321
|
const baseUrl = new URL(`http://${formatHostForUrl(host)}:${port}`);
|
|
174
|
-
const allowRemote = parseBoolean(
|
|
322
|
+
const allowRemote = parseBoolean(env.ALLOW_REMOTE, false);
|
|
175
323
|
const runtimeState = {
|
|
176
324
|
httpMode: false,
|
|
177
325
|
};
|
|
178
326
|
export const config = {
|
|
179
327
|
server: {
|
|
180
328
|
name: 'superFetch',
|
|
181
|
-
version:
|
|
329
|
+
version: serverVersion,
|
|
182
330
|
port,
|
|
183
331
|
host,
|
|
184
|
-
sessionTtlMs:
|
|
185
|
-
sessionInitTimeoutMs:
|
|
186
|
-
maxSessions:
|
|
332
|
+
sessionTtlMs: DEFAULT_SESSION_TTL_MS,
|
|
333
|
+
sessionInitTimeoutMs: DEFAULT_SESSION_INIT_TIMEOUT_MS,
|
|
334
|
+
maxSessions: DEFAULT_MAX_SESSIONS,
|
|
187
335
|
http: {
|
|
188
|
-
headersTimeoutMs:
|
|
189
|
-
requestTimeoutMs:
|
|
190
|
-
keepAliveTimeoutMs:
|
|
191
|
-
|
|
192
|
-
|
|
336
|
+
headersTimeoutMs: undefined,
|
|
337
|
+
requestTimeoutMs: undefined,
|
|
338
|
+
keepAliveTimeoutMs: undefined,
|
|
339
|
+
maxConnections,
|
|
340
|
+
blockPrivateConnections,
|
|
341
|
+
shutdownCloseIdleConnections: true,
|
|
342
|
+
shutdownCloseAllConnections: false,
|
|
193
343
|
},
|
|
194
344
|
},
|
|
195
345
|
fetcher: {
|
|
196
|
-
timeout:
|
|
346
|
+
timeout: DEFAULT_FETCH_TIMEOUT_MS,
|
|
197
347
|
maxRedirects: 5,
|
|
198
|
-
userAgent:
|
|
199
|
-
maxContentLength:
|
|
348
|
+
userAgent: env.USER_AGENT ?? DEFAULT_USER_AGENT,
|
|
349
|
+
maxContentLength: MAX_HTML_BYTES,
|
|
200
350
|
},
|
|
201
351
|
transform: {
|
|
202
|
-
timeoutMs:
|
|
203
|
-
stageWarnRatio:
|
|
204
|
-
metadataFormat:
|
|
205
|
-
maxWorkerScale:
|
|
352
|
+
timeoutMs: DEFAULT_TRANSFORM_TIMEOUT_MS,
|
|
353
|
+
stageWarnRatio: 0.5,
|
|
354
|
+
metadataFormat: 'markdown',
|
|
355
|
+
maxWorkerScale: 4,
|
|
356
|
+
workerMode: parseTransformWorkerMode(env.TRANSFORM_WORKER_MODE),
|
|
357
|
+
workerResourceLimits: resolveWorkerResourceLimits(),
|
|
206
358
|
},
|
|
207
359
|
tools: {
|
|
208
|
-
enabled:
|
|
209
|
-
timeoutMs:
|
|
360
|
+
enabled: ['fetch-url'],
|
|
361
|
+
timeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
|
|
210
362
|
},
|
|
211
363
|
cache: {
|
|
212
|
-
enabled: parseBoolean(
|
|
213
|
-
ttl:
|
|
364
|
+
enabled: parseBoolean(env.CACHE_ENABLED, true),
|
|
365
|
+
ttl: 86400,
|
|
214
366
|
maxKeys: 100,
|
|
215
367
|
},
|
|
216
368
|
extraction: {
|
|
@@ -218,59 +370,57 @@ export const config = {
|
|
|
218
370
|
minParagraphLength: 10,
|
|
219
371
|
},
|
|
220
372
|
noiseRemoval: {
|
|
221
|
-
extraTokens: parseList(
|
|
222
|
-
extraSelectors: parseList(
|
|
373
|
+
extraTokens: parseList(env.SUPERFETCH_EXTRA_NOISE_TOKENS),
|
|
374
|
+
extraSelectors: parseList(env.SUPERFETCH_EXTRA_NOISE_SELECTORS),
|
|
375
|
+
enabledCategories: [
|
|
376
|
+
'cookie-banners',
|
|
377
|
+
'newsletters',
|
|
378
|
+
'social-share',
|
|
379
|
+
'nav-footer',
|
|
380
|
+
],
|
|
381
|
+
debug: false,
|
|
382
|
+
aggressiveMode: false,
|
|
383
|
+
preserveSvgCanvas: false,
|
|
384
|
+
weights: {
|
|
385
|
+
hidden: 50,
|
|
386
|
+
structural: 50,
|
|
387
|
+
promo: 35,
|
|
388
|
+
stickyFixed: 30,
|
|
389
|
+
threshold: 50,
|
|
390
|
+
},
|
|
391
|
+
},
|
|
392
|
+
markdownCleanup: {
|
|
393
|
+
promoteOrphanHeadings: true,
|
|
394
|
+
removeSkipLinks: true,
|
|
395
|
+
removeTocBlocks: true,
|
|
396
|
+
removeTypeDocComments: true,
|
|
397
|
+
headingKeywords: parseListOrDefault(env.MARKDOWN_HEADING_KEYWORDS, DEFAULT_HEADING_KEYWORDS),
|
|
398
|
+
},
|
|
399
|
+
i18n: {
|
|
400
|
+
locale: normalizeLocale(env.SUPERFETCH_LOCALE),
|
|
223
401
|
},
|
|
224
402
|
logging: {
|
|
225
|
-
level: parseLogLevel(
|
|
403
|
+
level: parseLogLevel(env.LOG_LEVEL),
|
|
226
404
|
},
|
|
227
405
|
constants: {
|
|
228
|
-
maxHtmlSize:
|
|
406
|
+
maxHtmlSize: MAX_HTML_BYTES,
|
|
229
407
|
maxUrlLength: 2048,
|
|
230
|
-
maxInlineContentChars:
|
|
408
|
+
maxInlineContentChars: MAX_INLINE_CONTENT_CHARS,
|
|
231
409
|
},
|
|
232
410
|
security: {
|
|
233
|
-
blockedHosts:
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
'metadata.google.internal',
|
|
240
|
-
'metadata.azure.com',
|
|
241
|
-
METADATA_V4_AZURE,
|
|
242
|
-
'instance-data',
|
|
243
|
-
]),
|
|
244
|
-
blockedIpPatterns: [
|
|
245
|
-
/^10\./,
|
|
246
|
-
/^172\.(1[6-9]|2\d|3[01])\./,
|
|
247
|
-
/^192\.168\./,
|
|
248
|
-
/^127\./,
|
|
249
|
-
/^0\./,
|
|
250
|
-
/^169\.254\./,
|
|
251
|
-
/^100\.64\./,
|
|
252
|
-
/^fc00:/i,
|
|
253
|
-
/^fd00:/i,
|
|
254
|
-
/^fe80:/i,
|
|
255
|
-
/^::ffff:127\./,
|
|
256
|
-
/^::ffff:10\./,
|
|
257
|
-
/^::ffff:172\.(1[6-9]|2\d|3[01])\./,
|
|
258
|
-
/^::ffff:192\.168\./,
|
|
259
|
-
/^::ffff:169\.254\./,
|
|
260
|
-
],
|
|
261
|
-
// Combined regex patterns for fast IP blocking (used in fetch.ts)
|
|
262
|
-
// Split into two patterns to reduce complexity while maintaining performance
|
|
263
|
-
blockedIpPattern: /^(?:10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|127\.|0\.|169\.254\.|100\.64\.|fc00:|fd00:|fe80:)/i,
|
|
264
|
-
blockedIpv4MappedPattern: /^::ffff:(?:127\.|10\.|172\.(?:1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.)/i,
|
|
265
|
-
allowedHosts: parseAllowedHosts(process.env.ALLOWED_HOSTS),
|
|
266
|
-
apiKey: process.env.API_KEY,
|
|
411
|
+
blockedHosts: BLOCKED_HOSTS,
|
|
412
|
+
blockedIpPatterns: BLOCKED_IP_PATTERNS,
|
|
413
|
+
blockedIpPattern: BLOCKED_IP_PATTERN,
|
|
414
|
+
blockedIpv4MappedPattern: BLOCKED_IPV4_MAPPED_PATTERN,
|
|
415
|
+
allowedHosts: parseAllowedHosts(env.ALLOWED_HOSTS),
|
|
416
|
+
apiKey: env.API_KEY,
|
|
267
417
|
allowRemote,
|
|
268
418
|
},
|
|
269
419
|
auth: buildAuthConfig(baseUrl),
|
|
270
420
|
rateLimit: {
|
|
271
421
|
enabled: true,
|
|
272
|
-
maxRequests:
|
|
273
|
-
windowMs:
|
|
422
|
+
maxRequests: 100,
|
|
423
|
+
windowMs: 60000,
|
|
274
424
|
cleanupIntervalMs: 60000,
|
|
275
425
|
},
|
|
276
426
|
runtime: runtimeState,
|