@j0hanz/superfetch 2.5.3 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +350 -226
- package/dist/assets/logo.svg +24837 -24835
- package/dist/cache.d.ts +28 -20
- package/dist/cache.js +292 -514
- package/dist/config.d.ts +41 -7
- package/dist/config.js +298 -148
- package/dist/crypto.js +25 -12
- package/dist/dom-noise-removal.js +379 -421
- package/dist/errors.d.ts +2 -2
- package/dist/errors.js +25 -8
- package/dist/fetch.d.ts +18 -16
- package/dist/fetch.js +1132 -526
- package/dist/host-normalization.js +40 -10
- package/dist/http-native.js +628 -287
- package/dist/index.js +67 -7
- package/dist/instructions.md +44 -31
- package/dist/ip-blocklist.d.ts +8 -0
- package/dist/ip-blocklist.js +65 -0
- package/dist/json.js +14 -9
- package/dist/language-detection.d.ts +2 -11
- package/dist/language-detection.js +289 -280
- package/dist/markdown-cleanup.d.ts +0 -1
- package/dist/markdown-cleanup.js +391 -429
- package/dist/mcp-validator.js +4 -2
- package/dist/mcp.js +184 -135
- package/dist/observability.js +89 -21
- package/dist/resources.js +16 -6
- package/dist/server-tuning.d.ts +2 -0
- package/dist/server-tuning.js +25 -23
- package/dist/session.d.ts +1 -0
- package/dist/session.js +41 -33
- package/dist/tasks.d.ts +2 -0
- package/dist/tasks.js +91 -9
- package/dist/timer-utils.d.ts +5 -0
- package/dist/timer-utils.js +20 -0
- package/dist/tools.d.ts +28 -5
- package/dist/tools.js +317 -183
- package/dist/transform-types.d.ts +5 -1
- package/dist/transform.d.ts +3 -2
- package/dist/transform.js +1138 -421
- package/dist/type-guards.d.ts +1 -0
- package/dist/type-guards.js +7 -0
- package/dist/workers/transform-child.d.ts +1 -0
- package/dist/workers/transform-child.js +118 -0
- package/dist/workers/transform-worker.js +87 -78
- package/package.json +14 -6
package/dist/fetch.js
CHANGED
|
@@ -1,106 +1,81 @@
|
|
|
1
|
+
import { Buffer } from 'node:buffer';
|
|
1
2
|
import { randomUUID } from 'node:crypto';
|
|
2
3
|
import diagnosticsChannel from 'node:diagnostics_channel';
|
|
3
4
|
import dns from 'node:dns';
|
|
4
|
-
import {
|
|
5
|
+
import { isIP } from 'node:net';
|
|
5
6
|
import { performance } from 'node:perf_hooks';
|
|
7
|
+
import { Readable } from 'node:stream';
|
|
8
|
+
import { setTimeout as delay } from 'node:timers/promises';
|
|
9
|
+
import { createBrotliDecompress, createGunzip, createInflate } from 'node:zlib';
|
|
6
10
|
import { config } from './config.js';
|
|
7
11
|
import { createErrorWithCode, FetchError, isSystemError } from './errors.js';
|
|
12
|
+
import { createDefaultBlockList, normalizeIpForBlockList, } from './ip-blocklist.js';
|
|
8
13
|
import { getOperationId, getRequestId, logDebug, logError, logWarn, redactUrl, } from './observability.js';
|
|
9
|
-
import { isObject } from './type-guards.js';
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
const
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
const
|
|
23
|
-
const IPV6_FE80 = buildIpv6(['fe80', 0, 0, 0, 0, 0, 0, 0]);
|
|
24
|
-
const IPV6_FF00 = buildIpv6(['ff00', 0, 0, 0, 0, 0, 0, 0]);
|
|
25
|
-
const BLOCKED_IPV4_SUBNETS = [
|
|
26
|
-
{ subnet: buildIpv4([0, 0, 0, 0]), prefix: 8 },
|
|
27
|
-
{ subnet: buildIpv4([10, 0, 0, 0]), prefix: 8 },
|
|
28
|
-
{ subnet: buildIpv4([100, 64, 0, 0]), prefix: 10 },
|
|
29
|
-
{ subnet: buildIpv4([127, 0, 0, 0]), prefix: 8 },
|
|
30
|
-
{ subnet: buildIpv4([169, 254, 0, 0]), prefix: 16 },
|
|
31
|
-
{ subnet: buildIpv4([172, 16, 0, 0]), prefix: 12 },
|
|
32
|
-
{ subnet: buildIpv4([192, 168, 0, 0]), prefix: 16 },
|
|
33
|
-
{ subnet: buildIpv4([224, 0, 0, 0]), prefix: 4 },
|
|
34
|
-
{ subnet: buildIpv4([240, 0, 0, 0]), prefix: 4 },
|
|
35
|
-
];
|
|
36
|
-
const BLOCKED_IPV6_SUBNETS = [
|
|
37
|
-
{ subnet: IPV6_ZERO, prefix: 128 },
|
|
38
|
-
{ subnet: IPV6_LOOPBACK, prefix: 128 },
|
|
39
|
-
{ subnet: IPV6_64_FF9B, prefix: 96 },
|
|
40
|
-
{ subnet: IPV6_64_FF9B_1, prefix: 48 },
|
|
41
|
-
{ subnet: IPV6_2001, prefix: 32 },
|
|
42
|
-
{ subnet: IPV6_2002, prefix: 16 },
|
|
43
|
-
{ subnet: IPV6_FC00, prefix: 7 },
|
|
44
|
-
{ subnet: IPV6_FE80, prefix: 10 },
|
|
45
|
-
{ subnet: IPV6_FF00, prefix: 8 },
|
|
46
|
-
];
|
|
14
|
+
import { isError, isObject } from './type-guards.js';
|
|
15
|
+
const defaultLogger = {
|
|
16
|
+
debug: logDebug,
|
|
17
|
+
warn: logWarn,
|
|
18
|
+
error: logError,
|
|
19
|
+
};
|
|
20
|
+
const defaultContext = {
|
|
21
|
+
getRequestId,
|
|
22
|
+
getOperationId,
|
|
23
|
+
};
|
|
24
|
+
const defaultRedactor = {
|
|
25
|
+
redact: redactUrl,
|
|
26
|
+
};
|
|
27
|
+
const defaultFetch = (input, init) => globalThis.fetch(input, init);
|
|
47
28
|
class IpBlocker {
|
|
48
|
-
|
|
29
|
+
security;
|
|
30
|
+
blockList = createDefaultBlockList();
|
|
31
|
+
constructor(security) {
|
|
32
|
+
this.security = security;
|
|
33
|
+
}
|
|
49
34
|
isBlockedIp(candidate) {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
const ipType = this.resolveIpType(candidate);
|
|
53
|
-
if (!ipType)
|
|
35
|
+
const normalized = candidate.trim().toLowerCase();
|
|
36
|
+
if (!normalized)
|
|
54
37
|
return false;
|
|
55
|
-
|
|
56
|
-
if (this.isBlockedBySubnetList(normalized, ipType))
|
|
38
|
+
if (this.security.blockedHosts.has(normalized))
|
|
57
39
|
return true;
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
const ipType = isIP(ip);
|
|
63
|
-
return ipType === 4 || ipType === 6 ? ipType : null;
|
|
64
|
-
}
|
|
65
|
-
isBlockedBySubnetList(ip, ipType) {
|
|
66
|
-
const list = this.getBlockList();
|
|
67
|
-
return ipType === 4 ? list.check(ip, 'ipv4') : list.check(ip, 'ipv6');
|
|
68
|
-
}
|
|
69
|
-
getBlockList() {
|
|
70
|
-
if (!this.cachedBlockList) {
|
|
71
|
-
const list = new BlockList();
|
|
72
|
-
for (const entry of BLOCKED_IPV4_SUBNETS)
|
|
73
|
-
list.addSubnet(entry.subnet, entry.prefix, 'ipv4');
|
|
74
|
-
for (const entry of BLOCKED_IPV6_SUBNETS)
|
|
75
|
-
list.addSubnet(entry.subnet, entry.prefix, 'ipv6');
|
|
76
|
-
this.cachedBlockList = list;
|
|
77
|
-
}
|
|
78
|
-
return this.cachedBlockList;
|
|
40
|
+
const normalizedIp = normalizeIpForBlockList(normalized);
|
|
41
|
+
if (!normalizedIp)
|
|
42
|
+
return false;
|
|
43
|
+
return this.blockList.check(normalizedIp.ip, normalizedIp.family);
|
|
79
44
|
}
|
|
80
45
|
}
|
|
81
|
-
const ipBlocker = new IpBlocker();
|
|
82
|
-
/** Backwards-compatible export */
|
|
83
|
-
export function isBlockedIp(ip) {
|
|
84
|
-
return ipBlocker.isBlockedIp(ip);
|
|
85
|
-
}
|
|
86
|
-
/* -------------------------------------------------------------------------------------------------
|
|
87
|
-
* URL normalization & hostname policy
|
|
88
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
89
46
|
const VALIDATION_ERROR_CODE = 'VALIDATION_ERROR';
|
|
90
47
|
function createValidationError(message) {
|
|
91
48
|
return createErrorWithCode(message, VALIDATION_ERROR_CODE);
|
|
92
49
|
}
|
|
93
50
|
const BLOCKED_HOST_SUFFIXES = ['.local', '.internal'];
|
|
94
51
|
class UrlNormalizer {
|
|
52
|
+
constants;
|
|
53
|
+
security;
|
|
54
|
+
ipBlocker;
|
|
55
|
+
blockedHostSuffixes;
|
|
56
|
+
constructor(constants, security, ipBlocker, blockedHostSuffixes) {
|
|
57
|
+
this.constants = constants;
|
|
58
|
+
this.security = security;
|
|
59
|
+
this.ipBlocker = ipBlocker;
|
|
60
|
+
this.blockedHostSuffixes = blockedHostSuffixes;
|
|
61
|
+
}
|
|
95
62
|
normalize(urlString) {
|
|
96
63
|
const trimmedUrl = this.requireTrimmedUrl(urlString);
|
|
97
|
-
this.
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
64
|
+
if (trimmedUrl.length > this.constants.maxUrlLength) {
|
|
65
|
+
throw createValidationError(`URL exceeds maximum length of ${this.constants.maxUrlLength} characters`);
|
|
66
|
+
}
|
|
67
|
+
if (!URL.canParse(trimmedUrl)) {
|
|
68
|
+
throw createValidationError('Invalid URL format');
|
|
69
|
+
}
|
|
70
|
+
const url = new URL(trimmedUrl);
|
|
71
|
+
if (url.protocol !== 'http:' && url.protocol !== 'https:') {
|
|
72
|
+
throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
|
|
73
|
+
}
|
|
74
|
+
if (url.username || url.password) {
|
|
75
|
+
throw createValidationError('URLs with embedded credentials are not allowed');
|
|
76
|
+
}
|
|
101
77
|
const hostname = this.normalizeHostname(url);
|
|
102
78
|
this.assertHostnameAllowed(hostname);
|
|
103
|
-
// Canonicalize hostname to avoid trailing-dot variants and keep url.href consistent.
|
|
104
79
|
url.hostname = hostname;
|
|
105
80
|
return { normalizedUrl: url.href, hostname };
|
|
106
81
|
}
|
|
@@ -116,32 +91,13 @@ class UrlNormalizer {
|
|
|
116
91
|
throw createValidationError('URL cannot be empty');
|
|
117
92
|
return trimmed;
|
|
118
93
|
}
|
|
119
|
-
assertUrlLength(url) {
|
|
120
|
-
if (url.length <= config.constants.maxUrlLength)
|
|
121
|
-
return;
|
|
122
|
-
throw createValidationError(`URL exceeds maximum length of ${config.constants.maxUrlLength} characters`);
|
|
123
|
-
}
|
|
124
|
-
parseUrl(urlString) {
|
|
125
|
-
if (!URL.canParse(urlString))
|
|
126
|
-
throw createValidationError('Invalid URL format');
|
|
127
|
-
return new URL(urlString);
|
|
128
|
-
}
|
|
129
|
-
assertHttpProtocol(url) {
|
|
130
|
-
if (url.protocol === 'http:' || url.protocol === 'https:')
|
|
131
|
-
return;
|
|
132
|
-
throw createValidationError(`Invalid protocol: ${url.protocol}. Only http: and https: are allowed`);
|
|
133
|
-
}
|
|
134
|
-
assertNoCredentials(url) {
|
|
135
|
-
if (!url.username && !url.password)
|
|
136
|
-
return;
|
|
137
|
-
throw createValidationError('URLs with embedded credentials are not allowed');
|
|
138
|
-
}
|
|
139
94
|
normalizeHostname(url) {
|
|
140
95
|
let hostname = url.hostname.toLowerCase();
|
|
141
96
|
while (hostname.endsWith('.'))
|
|
142
97
|
hostname = hostname.slice(0, -1);
|
|
143
|
-
if (!hostname)
|
|
98
|
+
if (!hostname) {
|
|
144
99
|
throw createValidationError('URL must have a valid hostname');
|
|
100
|
+
}
|
|
145
101
|
return hostname;
|
|
146
102
|
}
|
|
147
103
|
assertHostnameAllowed(hostname) {
|
|
@@ -150,80 +106,62 @@ class UrlNormalizer {
|
|
|
150
106
|
this.assertNotBlockedHostnameSuffix(hostname);
|
|
151
107
|
}
|
|
152
108
|
assertNotBlockedHost(hostname) {
|
|
153
|
-
if (!
|
|
109
|
+
if (!this.security.blockedHosts.has(hostname))
|
|
154
110
|
return;
|
|
155
111
|
throw createValidationError(`Blocked host: ${hostname}. Internal hosts are not allowed`);
|
|
156
112
|
}
|
|
157
113
|
assertNotBlockedIp(hostname) {
|
|
158
|
-
if (!ipBlocker.isBlockedIp(hostname))
|
|
114
|
+
if (!this.ipBlocker.isBlockedIp(hostname))
|
|
159
115
|
return;
|
|
160
116
|
throw createValidationError(`Blocked IP range: ${hostname}. Private IPs are not allowed`);
|
|
161
117
|
}
|
|
162
118
|
assertNotBlockedHostnameSuffix(hostname) {
|
|
163
|
-
const blocked =
|
|
119
|
+
const blocked = this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
|
|
164
120
|
if (!blocked)
|
|
165
121
|
return;
|
|
166
122
|
throw createValidationError(`Blocked hostname pattern: ${hostname}. Internal domain suffixes are not allowed`);
|
|
167
123
|
}
|
|
168
124
|
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
return
|
|
125
|
+
function getPatternGroup(groups, key) {
|
|
126
|
+
const value = groups[key];
|
|
127
|
+
if (value === undefined)
|
|
128
|
+
return null;
|
|
129
|
+
if (value === '')
|
|
130
|
+
return null;
|
|
131
|
+
return value;
|
|
176
132
|
}
|
|
177
|
-
const
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
pattern: /^(https?:\/\/(?:[^/]+\.)?gitlab\.com\/[^/]+\/[^/]+)\/-\/blob\/([^/]+)\/(.+)$/i,
|
|
204
|
-
transform: (match) => {
|
|
205
|
-
const baseUrl = match[1] ?? '';
|
|
206
|
-
const branch = match[2] ?? '';
|
|
207
|
-
const path = match[3] ?? '';
|
|
208
|
-
return `${baseUrl}/-/raw/${branch}/${path}`;
|
|
209
|
-
},
|
|
210
|
-
};
|
|
211
|
-
const BITBUCKET_SRC_RULE = {
|
|
212
|
-
name: 'bitbucket',
|
|
213
|
-
pattern: /^(https?:\/\/(?:www\.)?bitbucket\.org\/[^/]+\/[^/]+)\/src\/([^/]+)\/(.+)$/i,
|
|
214
|
-
transform: (match) => {
|
|
215
|
-
const baseUrl = match[1] ?? '';
|
|
216
|
-
const branch = match[2] ?? '';
|
|
217
|
-
const path = match[3] ?? '';
|
|
218
|
-
return `${baseUrl}/raw/${branch}/${path}`;
|
|
219
|
-
},
|
|
220
|
-
};
|
|
221
|
-
const TRANSFORM_RULES = [
|
|
222
|
-
GITHUB_BLOB_RULE,
|
|
223
|
-
GITHUB_GIST_RULE,
|
|
224
|
-
GITLAB_BLOB_RULE,
|
|
225
|
-
BITBUCKET_SRC_RULE,
|
|
133
|
+
const GITHUB_BLOB_PATTERN = new URLPattern({
|
|
134
|
+
protocol: 'http{s}?',
|
|
135
|
+
hostname: '{:sub.}?github.com',
|
|
136
|
+
pathname: '/:owner/:repo/blob/:branch/:path+',
|
|
137
|
+
});
|
|
138
|
+
const GITHUB_GIST_PATTERN = new URLPattern({
|
|
139
|
+
protocol: 'http{s}?',
|
|
140
|
+
hostname: 'gist.github.com',
|
|
141
|
+
pathname: '/:user/:gistId',
|
|
142
|
+
});
|
|
143
|
+
const GITHUB_GIST_RAW_PATTERN = new URLPattern({
|
|
144
|
+
protocol: 'http{s}?',
|
|
145
|
+
hostname: 'gist.github.com',
|
|
146
|
+
pathname: '/:user/:gistId/raw/:filePath+',
|
|
147
|
+
});
|
|
148
|
+
const GITLAB_BLOB_PATTERNS = [
|
|
149
|
+
new URLPattern({
|
|
150
|
+
protocol: 'http{s}?',
|
|
151
|
+
hostname: 'gitlab.com',
|
|
152
|
+
pathname: '/:base+/-/blob/:branch/:path+',
|
|
153
|
+
}),
|
|
154
|
+
new URLPattern({
|
|
155
|
+
protocol: 'http{s}?',
|
|
156
|
+
hostname: '*:sub.gitlab.com',
|
|
157
|
+
pathname: '/:base+/-/blob/:branch/:path+',
|
|
158
|
+
}),
|
|
226
159
|
];
|
|
160
|
+
const BITBUCKET_SRC_PATTERN = new URLPattern({
|
|
161
|
+
protocol: 'http{s}?',
|
|
162
|
+
hostname: '{:sub.}?bitbucket.org',
|
|
163
|
+
pathname: '/:owner/:repo/src/:branch/:path+',
|
|
164
|
+
});
|
|
227
165
|
const BITBUCKET_RAW_RE = /bitbucket\.org\/[^/]+\/[^/]+\/raw\//;
|
|
228
166
|
const RAW_TEXT_EXTENSIONS = new Set([
|
|
229
167
|
'.md',
|
|
@@ -240,33 +178,57 @@ const RAW_TEXT_EXTENSIONS = new Set([
|
|
|
240
178
|
'.org',
|
|
241
179
|
]);
|
|
242
180
|
class RawUrlTransformer {
|
|
181
|
+
logger;
|
|
182
|
+
constructor(logger) {
|
|
183
|
+
this.logger = logger;
|
|
184
|
+
}
|
|
243
185
|
transformToRawUrl(url) {
|
|
244
186
|
if (!url)
|
|
245
187
|
return { url, transformed: false };
|
|
246
188
|
if (this.isRawUrl(url))
|
|
247
189
|
return { url, transformed: false };
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
190
|
+
let base;
|
|
191
|
+
let hash;
|
|
192
|
+
let parsed;
|
|
193
|
+
try {
|
|
194
|
+
parsed = new URL(url);
|
|
195
|
+
base = parsed.origin + parsed.pathname;
|
|
196
|
+
({ hash } = parsed);
|
|
197
|
+
}
|
|
198
|
+
catch {
|
|
199
|
+
({ base, hash } = this.splitParams(url));
|
|
200
|
+
}
|
|
201
|
+
const match = this.tryTransformWithUrl(base, hash, parsed);
|
|
202
|
+
if (!match)
|
|
251
203
|
return { url, transformed: false };
|
|
252
|
-
|
|
253
|
-
platform:
|
|
204
|
+
this.logger.debug('URL transformed to raw content URL', {
|
|
205
|
+
platform: match.platform,
|
|
254
206
|
original: url.substring(0, 100),
|
|
255
|
-
transformed:
|
|
207
|
+
transformed: match.url.substring(0, 100),
|
|
256
208
|
});
|
|
257
|
-
return { url:
|
|
209
|
+
return { url: match.url, transformed: true, platform: match.platform };
|
|
258
210
|
}
|
|
259
|
-
isRawTextContentUrl(
|
|
260
|
-
if (!
|
|
211
|
+
isRawTextContentUrl(urlString) {
|
|
212
|
+
if (!urlString)
|
|
261
213
|
return false;
|
|
262
|
-
if (this.isRawUrl(
|
|
214
|
+
if (this.isRawUrl(urlString))
|
|
263
215
|
return true;
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
216
|
+
try {
|
|
217
|
+
const url = new URL(urlString);
|
|
218
|
+
const pathname = url.pathname.toLowerCase();
|
|
219
|
+
const lastDot = pathname.lastIndexOf('.');
|
|
220
|
+
if (lastDot === -1)
|
|
221
|
+
return false;
|
|
222
|
+
return RAW_TEXT_EXTENSIONS.has(pathname.slice(lastDot));
|
|
223
|
+
}
|
|
224
|
+
catch {
|
|
225
|
+
const { base } = this.splitParams(urlString);
|
|
226
|
+
const lowerBase = base.toLowerCase();
|
|
227
|
+
const lastDot = lowerBase.lastIndexOf('.');
|
|
228
|
+
if (lastDot === -1)
|
|
229
|
+
return false;
|
|
230
|
+
return RAW_TEXT_EXTENSIONS.has(lowerBase.slice(lastDot));
|
|
231
|
+
}
|
|
270
232
|
}
|
|
271
233
|
isRawUrl(url) {
|
|
272
234
|
const lower = url.toLowerCase();
|
|
@@ -275,226 +237,340 @@ class RawUrlTransformer {
|
|
|
275
237
|
lower.includes('/-/raw/') ||
|
|
276
238
|
BITBUCKET_RAW_RE.test(lower));
|
|
277
239
|
}
|
|
278
|
-
splitParams(
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
return { base: url.slice(0, endIndex), hash };
|
|
284
|
-
}
|
|
285
|
-
applyRules(base, hash) {
|
|
286
|
-
for (const rule of TRANSFORM_RULES) {
|
|
287
|
-
const urlToMatch = rule.name === 'github-gist' && hash.startsWith('#file-')
|
|
288
|
-
? base + hash
|
|
289
|
-
: base;
|
|
290
|
-
const match = rule.pattern.exec(urlToMatch);
|
|
291
|
-
if (match)
|
|
292
|
-
return { url: rule.transform(match), platform: rule.name };
|
|
293
|
-
}
|
|
294
|
-
return null;
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
const rawUrlTransformer = new RawUrlTransformer();
|
|
298
|
-
/** Backwards-compatible exports */
|
|
299
|
-
export function transformToRawUrl(url) {
|
|
300
|
-
return rawUrlTransformer.transformToRawUrl(url);
|
|
301
|
-
}
|
|
302
|
-
export function isRawTextContentUrl(url) {
|
|
303
|
-
return rawUrlTransformer.isRawTextContentUrl(url);
|
|
304
|
-
}
|
|
305
|
-
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
306
|
-
class SafeDnsLookup {
|
|
307
|
-
lookup(hostname, options, callback) {
|
|
308
|
-
const normalizedOptions = this.normalizeOptions(options);
|
|
309
|
-
const useAll = Boolean(normalizedOptions.all);
|
|
310
|
-
const resolvedFamily = this.resolveFamily(normalizedOptions.family);
|
|
311
|
-
const lookupOptions = {
|
|
312
|
-
family: normalizedOptions.family,
|
|
313
|
-
hints: normalizedOptions.hints,
|
|
314
|
-
all: true, // Always request all results; we select based on caller preference.
|
|
315
|
-
order: this.resolveOrder(normalizedOptions),
|
|
316
|
-
};
|
|
317
|
-
const timeout = this.createTimeout(hostname, callback);
|
|
318
|
-
const safeCallback = (err, address, family) => {
|
|
319
|
-
if (timeout.isDone())
|
|
320
|
-
return;
|
|
321
|
-
timeout.markDone();
|
|
322
|
-
callback(err, address, family);
|
|
323
|
-
};
|
|
324
|
-
(async () => {
|
|
325
|
-
try {
|
|
326
|
-
const result = await dns.promises.lookup(hostname, lookupOptions);
|
|
327
|
-
const addresses = Array.isArray(result) ? result : [result];
|
|
328
|
-
this.handleLookupResult(null, addresses, hostname, resolvedFamily, useAll, safeCallback);
|
|
329
|
-
}
|
|
330
|
-
catch (error) {
|
|
331
|
-
this.handleLookupResult(error, [], hostname, resolvedFamily, useAll, safeCallback);
|
|
332
|
-
}
|
|
333
|
-
})().catch((error) => {
|
|
334
|
-
if (!timeout.isDone()) {
|
|
335
|
-
safeCallback(error, []);
|
|
336
|
-
}
|
|
337
|
-
});
|
|
338
|
-
}
|
|
339
|
-
normalizeOptions(options) {
|
|
340
|
-
return typeof options === 'number' ? { family: options } : options;
|
|
341
|
-
}
|
|
342
|
-
resolveFamily(family) {
|
|
343
|
-
if (family === 'IPv4')
|
|
344
|
-
return 4;
|
|
345
|
-
if (family === 'IPv6')
|
|
346
|
-
return 6;
|
|
347
|
-
return family;
|
|
348
|
-
}
|
|
349
|
-
resolveOrder(options) {
|
|
350
|
-
if (options.order)
|
|
351
|
-
return options.order;
|
|
352
|
-
// legacy `verbatim` option support
|
|
353
|
-
if (isObject(options)) {
|
|
354
|
-
const legacy = options.verbatim;
|
|
355
|
-
if (typeof legacy === 'boolean')
|
|
356
|
-
return legacy ? 'verbatim' : 'ipv4first';
|
|
357
|
-
}
|
|
358
|
-
return 'verbatim';
|
|
359
|
-
}
|
|
360
|
-
handleLookupResult(error, addresses, hostname, resolvedFamily, useAll, callback) {
|
|
361
|
-
if (error) {
|
|
362
|
-
callback(error, addresses);
|
|
363
|
-
return;
|
|
364
|
-
}
|
|
365
|
-
const list = this.normalizeResults(addresses, resolvedFamily);
|
|
366
|
-
const validationError = this.validateResults(list, hostname);
|
|
367
|
-
if (validationError) {
|
|
368
|
-
callback(validationError, list);
|
|
369
|
-
return;
|
|
240
|
+
splitParams(urlString) {
|
|
241
|
+
try {
|
|
242
|
+
const url = new URL(urlString);
|
|
243
|
+
const base = url.origin + url.pathname;
|
|
244
|
+
return { base, hash: url.hash };
|
|
370
245
|
}
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
246
|
+
catch {
|
|
247
|
+
const hashIndex = urlString.indexOf('#');
|
|
248
|
+
const queryIndex = urlString.indexOf('?');
|
|
249
|
+
const endIndex = Math.min(queryIndex === -1 ? urlString.length : queryIndex, hashIndex === -1 ? urlString.length : hashIndex);
|
|
250
|
+
const hash = hashIndex !== -1 ? urlString.slice(hashIndex) : '';
|
|
251
|
+
return { base: urlString.slice(0, endIndex), hash };
|
|
375
252
|
}
|
|
376
|
-
callback(null, selection.address, selection.family);
|
|
377
253
|
}
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
}
|
|
383
|
-
validateResults(list, hostname) {
|
|
384
|
-
if (list.length === 0) {
|
|
385
|
-
return createErrorWithCode(`No DNS results returned for ${hostname}`, 'ENODATA');
|
|
254
|
+
tryTransformWithUrl(base, hash, preParsed) {
|
|
255
|
+
let parsed = null;
|
|
256
|
+
if (preParsed?.href.startsWith(base)) {
|
|
257
|
+
parsed = preParsed;
|
|
386
258
|
}
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
return createErrorWithCode(`Invalid address family returned for ${hostname}`, 'EINVAL');
|
|
390
|
-
}
|
|
391
|
-
if (ipBlocker.isBlockedIp(addr.address)) {
|
|
392
|
-
return createErrorWithCode(`Blocked IP detected for ${hostname}`, 'EBLOCKED');
|
|
393
|
-
}
|
|
259
|
+
else if (URL.canParse(base)) {
|
|
260
|
+
parsed = new URL(base);
|
|
394
261
|
}
|
|
262
|
+
if (!parsed)
|
|
263
|
+
return null;
|
|
264
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:')
|
|
265
|
+
return null;
|
|
266
|
+
const gist = this.transformGithubGist(base, hash);
|
|
267
|
+
if (gist)
|
|
268
|
+
return gist;
|
|
269
|
+
const github = this.transformGithubBlob(base);
|
|
270
|
+
if (github)
|
|
271
|
+
return github;
|
|
272
|
+
const gitlab = this.transformGitLab(base, parsed.origin);
|
|
273
|
+
if (gitlab)
|
|
274
|
+
return gitlab;
|
|
275
|
+
const bitbucket = this.transformBitbucket(base, parsed.origin);
|
|
276
|
+
if (bitbucket)
|
|
277
|
+
return bitbucket;
|
|
395
278
|
return null;
|
|
396
279
|
}
|
|
397
|
-
|
|
398
|
-
|
|
280
|
+
transformGithubBlob(url) {
|
|
281
|
+
const match = GITHUB_BLOB_PATTERN.exec(url);
|
|
282
|
+
if (!match)
|
|
283
|
+
return null;
|
|
284
|
+
const groups = match.pathname.groups;
|
|
285
|
+
const owner = getPatternGroup(groups, 'owner');
|
|
286
|
+
const repo = getPatternGroup(groups, 'repo');
|
|
287
|
+
const branch = getPatternGroup(groups, 'branch');
|
|
288
|
+
const path = getPatternGroup(groups, 'path');
|
|
289
|
+
if (!owner || !repo || !branch || !path)
|
|
290
|
+
return null;
|
|
291
|
+
return {
|
|
292
|
+
url: `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${path}`,
|
|
293
|
+
platform: 'github',
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
transformGithubGist(url, hash) {
|
|
297
|
+
const rawMatch = GITHUB_GIST_RAW_PATTERN.exec(url);
|
|
298
|
+
if (rawMatch) {
|
|
299
|
+
const groups = rawMatch.pathname.groups;
|
|
300
|
+
const user = getPatternGroup(groups, 'user');
|
|
301
|
+
const gistId = getPatternGroup(groups, 'gistId');
|
|
302
|
+
const filePath = getPatternGroup(groups, 'filePath');
|
|
303
|
+
if (!user || !gistId)
|
|
304
|
+
return null;
|
|
305
|
+
const resolvedFilePath = filePath ? `/${filePath}` : '';
|
|
399
306
|
return {
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
address: [],
|
|
307
|
+
url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${resolvedFilePath}`,
|
|
308
|
+
platform: 'github-gist',
|
|
403
309
|
};
|
|
404
310
|
}
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
311
|
+
const match = GITHUB_GIST_PATTERN.exec(url);
|
|
312
|
+
if (!match)
|
|
313
|
+
return null;
|
|
314
|
+
const groups = match.pathname.groups;
|
|
315
|
+
const user = getPatternGroup(groups, 'user');
|
|
316
|
+
const gistId = getPatternGroup(groups, 'gistId');
|
|
317
|
+
if (!user || !gistId)
|
|
318
|
+
return null;
|
|
319
|
+
let filePath = '';
|
|
320
|
+
if (hash.startsWith('#file-')) {
|
|
321
|
+
const filename = hash.slice('#file-'.length).replace(/-/g, '.');
|
|
322
|
+
if (filename)
|
|
323
|
+
filePath = `/${filename}`;
|
|
324
|
+
}
|
|
325
|
+
return {
|
|
326
|
+
url: `https://gist.githubusercontent.com/${user}/${gistId}/raw${filePath}`,
|
|
327
|
+
platform: 'github-gist',
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
transformGitLab(url, origin) {
|
|
331
|
+
for (const pattern of GITLAB_BLOB_PATTERNS) {
|
|
332
|
+
const match = pattern.exec(url);
|
|
333
|
+
if (!match)
|
|
334
|
+
continue;
|
|
335
|
+
const groups = match.pathname.groups;
|
|
336
|
+
const base = getPatternGroup(groups, 'base');
|
|
337
|
+
const branch = getPatternGroup(groups, 'branch');
|
|
338
|
+
const path = getPatternGroup(groups, 'path');
|
|
339
|
+
if (!base || !branch || !path)
|
|
340
|
+
return null;
|
|
409
341
|
return {
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
address: [],
|
|
342
|
+
url: `${origin}/${base}/-/raw/${branch}/${path}`,
|
|
343
|
+
platform: 'gitlab',
|
|
413
344
|
};
|
|
414
345
|
}
|
|
415
|
-
return
|
|
346
|
+
return null;
|
|
416
347
|
}
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
348
|
+
transformBitbucket(url, origin) {
|
|
349
|
+
const match = BITBUCKET_SRC_PATTERN.exec(url);
|
|
350
|
+
if (!match)
|
|
351
|
+
return null;
|
|
352
|
+
const groups = match.pathname.groups;
|
|
353
|
+
const owner = getPatternGroup(groups, 'owner');
|
|
354
|
+
const repo = getPatternGroup(groups, 'repo');
|
|
355
|
+
const branch = getPatternGroup(groups, 'branch');
|
|
356
|
+
const path = getPatternGroup(groups, 'path');
|
|
357
|
+
if (!owner || !repo || !branch || !path)
|
|
358
|
+
return null;
|
|
426
359
|
return {
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
done = true;
|
|
430
|
-
clearTimeout(timer);
|
|
431
|
-
},
|
|
360
|
+
url: `${origin}/${owner}/${repo}/raw/${branch}/${path}`,
|
|
361
|
+
platform: 'bitbucket',
|
|
432
362
|
};
|
|
433
363
|
}
|
|
434
364
|
}
|
|
435
|
-
const
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
}
|
|
443
|
-
resolve();
|
|
444
|
-
});
|
|
445
|
-
});
|
|
446
|
-
}
|
|
447
|
-
/* -------------------------------------------------------------------------------------------------
|
|
448
|
-
* Fetch error mapping (request-level)
|
|
449
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
450
|
-
function parseRetryAfter(header) {
|
|
451
|
-
if (!header)
|
|
452
|
-
return 60;
|
|
453
|
-
const parsed = Number.parseInt(header, 10);
|
|
454
|
-
return Number.isNaN(parsed) ? 60 : parsed;
|
|
365
|
+
const DNS_LOOKUP_TIMEOUT_MS = 5000;
|
|
366
|
+
const CNAME_LOOKUP_MAX_DEPTH = 5;
|
|
367
|
+
function normalizeDnsName(value) {
|
|
368
|
+
let normalized = value.trim().toLowerCase();
|
|
369
|
+
while (normalized.endsWith('.'))
|
|
370
|
+
normalized = normalized.slice(0, -1);
|
|
371
|
+
return normalized;
|
|
455
372
|
}
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
return
|
|
459
|
-
reason: 'aborted',
|
|
460
|
-
});
|
|
373
|
+
function createAbortRace(signal, onAbort) {
|
|
374
|
+
if (!signal) {
|
|
375
|
+
return { abortPromise: null, cleanup: () => { } };
|
|
461
376
|
}
|
|
462
|
-
|
|
463
|
-
return
|
|
464
|
-
|
|
465
|
-
|
|
377
|
+
if (signal.aborted) {
|
|
378
|
+
return {
|
|
379
|
+
abortPromise: Promise.reject(onAbort()),
|
|
380
|
+
cleanup: () => { },
|
|
381
|
+
};
|
|
466
382
|
}
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
383
|
+
let abortListener = null;
|
|
384
|
+
const abortPromise = new Promise((_, reject) => {
|
|
385
|
+
abortListener = () => {
|
|
386
|
+
reject(onAbort());
|
|
387
|
+
};
|
|
388
|
+
signal.addEventListener('abort', abortListener, { once: true });
|
|
389
|
+
});
|
|
390
|
+
const cleanup = () => {
|
|
391
|
+
if (!abortListener)
|
|
392
|
+
return;
|
|
393
|
+
try {
|
|
394
|
+
signal.removeEventListener('abort', abortListener);
|
|
395
|
+
}
|
|
396
|
+
catch {
|
|
397
|
+
// Ignore listener cleanup failures; they are non-fatal by design.
|
|
398
|
+
}
|
|
399
|
+
abortListener = null;
|
|
400
|
+
};
|
|
401
|
+
return { abortPromise, cleanup };
|
|
402
|
+
}
|
|
403
|
+
async function withTimeout(promise, timeoutMs, onTimeout, signal, onAbort) {
|
|
404
|
+
const controller = new AbortController();
|
|
405
|
+
const timeoutPromise = delay(timeoutMs, null, {
|
|
406
|
+
ref: false,
|
|
407
|
+
signal: controller.signal,
|
|
408
|
+
})
|
|
409
|
+
.then(() => Promise.reject(onTimeout()))
|
|
410
|
+
.catch((err) => {
|
|
411
|
+
if (isError(err) && err.name === 'AbortError')
|
|
412
|
+
return new Promise(() => { });
|
|
413
|
+
throw err;
|
|
414
|
+
});
|
|
415
|
+
const abortRace = createAbortRace(signal, onAbort ?? (() => new Error('Request was canceled')));
|
|
416
|
+
try {
|
|
417
|
+
return await Promise.race(abortRace.abortPromise
|
|
418
|
+
? [promise, timeoutPromise, abortRace.abortPromise]
|
|
419
|
+
: [promise, timeoutPromise]);
|
|
471
420
|
}
|
|
472
|
-
|
|
473
|
-
|
|
421
|
+
finally {
|
|
422
|
+
controller.abort();
|
|
423
|
+
abortRace.cleanup();
|
|
474
424
|
}
|
|
475
|
-
|
|
476
|
-
|
|
425
|
+
}
|
|
426
|
+
function createAbortSignalError() {
|
|
427
|
+
const err = new Error('Request was canceled');
|
|
428
|
+
err.name = 'AbortError';
|
|
429
|
+
return err;
|
|
430
|
+
}
|
|
431
|
+
class SafeDnsResolver {
|
|
432
|
+
ipBlocker;
|
|
433
|
+
security;
|
|
434
|
+
blockedHostSuffixes;
|
|
435
|
+
constructor(ipBlocker, security, blockedHostSuffixes) {
|
|
436
|
+
this.ipBlocker = ipBlocker;
|
|
437
|
+
this.security = security;
|
|
438
|
+
this.blockedHostSuffixes = blockedHostSuffixes;
|
|
477
439
|
}
|
|
478
|
-
|
|
479
|
-
|
|
440
|
+
async assertSafeHostname(hostname, signal) {
|
|
441
|
+
const normalizedHostname = normalizeDnsName(hostname);
|
|
442
|
+
if (!normalizedHostname) {
|
|
443
|
+
throw createErrorWithCode('Invalid hostname provided', 'EINVAL');
|
|
444
|
+
}
|
|
445
|
+
if (signal?.aborted) {
|
|
446
|
+
throw createAbortSignalError();
|
|
447
|
+
}
|
|
448
|
+
if (isIP(normalizedHostname)) {
|
|
449
|
+
if (this.ipBlocker.isBlockedIp(normalizedHostname)) {
|
|
450
|
+
throw createErrorWithCode(`Blocked IP range: ${normalizedHostname}. Private IPs are not allowed`, 'EBLOCKED');
|
|
451
|
+
}
|
|
452
|
+
return;
|
|
453
|
+
}
|
|
454
|
+
await this.assertNoBlockedCname(normalizedHostname, signal);
|
|
455
|
+
const resultPromise = dns.promises.lookup(normalizedHostname, {
|
|
456
|
+
all: true,
|
|
457
|
+
order: 'verbatim',
|
|
458
|
+
});
|
|
459
|
+
const addresses = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS lookup timed out for ${normalizedHostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
|
|
460
|
+
if (addresses.length === 0) {
|
|
461
|
+
throw createErrorWithCode(`No DNS results returned for ${normalizedHostname}`, 'ENODATA');
|
|
462
|
+
}
|
|
463
|
+
for (const addr of addresses) {
|
|
464
|
+
if (addr.family !== 4 && addr.family !== 6) {
|
|
465
|
+
throw createErrorWithCode(`Invalid address family returned for ${normalizedHostname}`, 'EINVAL');
|
|
466
|
+
}
|
|
467
|
+
if (this.ipBlocker.isBlockedIp(addr.address)) {
|
|
468
|
+
throw createErrorWithCode(`Blocked IP detected for ${normalizedHostname}`, 'EBLOCKED');
|
|
469
|
+
}
|
|
470
|
+
}
|
|
480
471
|
}
|
|
481
|
-
|
|
482
|
-
|
|
472
|
+
isBlockedHostname(hostname) {
|
|
473
|
+
if (this.security.blockedHosts.has(hostname))
|
|
474
|
+
return true;
|
|
475
|
+
return this.blockedHostSuffixes.some((suffix) => hostname.endsWith(suffix));
|
|
483
476
|
}
|
|
484
|
-
|
|
485
|
-
|
|
477
|
+
async assertNoBlockedCname(hostname, signal) {
|
|
478
|
+
let current = hostname;
|
|
479
|
+
const seen = new Set();
|
|
480
|
+
for (let depth = 0; depth < CNAME_LOOKUP_MAX_DEPTH; depth += 1) {
|
|
481
|
+
if (!current || seen.has(current))
|
|
482
|
+
return;
|
|
483
|
+
seen.add(current);
|
|
484
|
+
const cnames = await this.resolveCname(current, signal);
|
|
485
|
+
if (cnames.length === 0)
|
|
486
|
+
return;
|
|
487
|
+
for (const cname of cnames) {
|
|
488
|
+
if (this.isBlockedHostname(cname)) {
|
|
489
|
+
throw createErrorWithCode(`Blocked DNS CNAME detected for ${hostname}: ${cname}`, 'EBLOCKED');
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
current = cnames[0] ?? '';
|
|
493
|
+
}
|
|
486
494
|
}
|
|
487
|
-
|
|
488
|
-
|
|
495
|
+
async resolveCname(hostname, signal) {
|
|
496
|
+
try {
|
|
497
|
+
const resultPromise = dns.promises.resolveCname(hostname);
|
|
498
|
+
const cnames = await withTimeout(resultPromise, DNS_LOOKUP_TIMEOUT_MS, () => createErrorWithCode(`DNS CNAME lookup timed out for ${hostname}`, 'ETIMEOUT'), signal, createAbortSignalError);
|
|
499
|
+
return cnames
|
|
500
|
+
.map((value) => normalizeDnsName(value))
|
|
501
|
+
.filter((value) => value.length > 0);
|
|
502
|
+
}
|
|
503
|
+
catch (error) {
|
|
504
|
+
if (isError(error) && error.name === 'AbortError') {
|
|
505
|
+
throw error;
|
|
506
|
+
}
|
|
507
|
+
if (isSystemError(error) &&
|
|
508
|
+
(error.code === 'ENODATA' ||
|
|
509
|
+
error.code === 'ENOTFOUND' ||
|
|
510
|
+
error.code === 'ENODOMAIN')) {
|
|
511
|
+
return [];
|
|
512
|
+
}
|
|
513
|
+
return [];
|
|
514
|
+
}
|
|
489
515
|
}
|
|
490
516
|
}
|
|
491
|
-
|
|
517
|
+
function parseRetryAfter(header) {
|
|
518
|
+
if (!header)
|
|
519
|
+
return 60;
|
|
520
|
+
const trimmed = header.trim();
|
|
521
|
+
// Retry-After can be seconds or an HTTP-date.
|
|
522
|
+
const seconds = Number.parseInt(trimmed, 10);
|
|
523
|
+
if (!Number.isNaN(seconds) && seconds >= 0)
|
|
524
|
+
return seconds;
|
|
525
|
+
const dateMs = Date.parse(trimmed);
|
|
526
|
+
if (Number.isNaN(dateMs))
|
|
527
|
+
return 60;
|
|
528
|
+
const deltaMs = dateMs - Date.now();
|
|
529
|
+
if (deltaMs <= 0)
|
|
530
|
+
return 0;
|
|
531
|
+
return Math.ceil(deltaMs / 1000);
|
|
532
|
+
}
|
|
533
|
+
function createCanceledFetchError(url) {
|
|
534
|
+
return new FetchError('Request was canceled', url, 499, {
|
|
535
|
+
reason: 'aborted',
|
|
536
|
+
});
|
|
537
|
+
}
|
|
538
|
+
function createTimeoutFetchError(url, timeoutMs) {
|
|
539
|
+
return new FetchError(`Request timeout after ${timeoutMs}ms`, url, 504, {
|
|
540
|
+
timeout: timeoutMs,
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
function createRateLimitedFetchError(url, retryAfterHeader) {
|
|
544
|
+
return new FetchError('Too many requests', url, 429, {
|
|
545
|
+
retryAfter: parseRetryAfter(retryAfterHeader),
|
|
546
|
+
});
|
|
547
|
+
}
|
|
548
|
+
function createHttpFetchError(url, status, statusText) {
|
|
549
|
+
return new FetchError(`HTTP ${status}: ${statusText}`, url, status);
|
|
550
|
+
}
|
|
551
|
+
function createTooManyRedirectsFetchError(url) {
|
|
552
|
+
return new FetchError('Too many redirects', url);
|
|
553
|
+
}
|
|
554
|
+
function createMissingRedirectLocationFetchError(url) {
|
|
555
|
+
return new FetchError('Redirect response missing Location header', url);
|
|
556
|
+
}
|
|
557
|
+
function createNetworkFetchError(url, message) {
|
|
558
|
+
return new FetchError(`Network error: Could not reach ${url}`, url, undefined, message ? { message } : {});
|
|
559
|
+
}
|
|
560
|
+
function createUnknownFetchError(url, message) {
|
|
561
|
+
return new FetchError(message, url);
|
|
562
|
+
}
|
|
563
|
+
function createAbortedFetchError(url) {
|
|
564
|
+
return new FetchError('Request was aborted during response read', url, 499, {
|
|
565
|
+
reason: 'aborted',
|
|
566
|
+
});
|
|
567
|
+
}
|
|
492
568
|
function isAbortError(error) {
|
|
493
|
-
return (error
|
|
569
|
+
return (isError(error) &&
|
|
494
570
|
(error.name === 'AbortError' || error.name === 'TimeoutError'));
|
|
495
571
|
}
|
|
496
572
|
function isTimeoutError(error) {
|
|
497
|
-
return error
|
|
573
|
+
return isError(error) && error.name === 'TimeoutError';
|
|
498
574
|
}
|
|
499
575
|
function resolveErrorUrl(error, fallback) {
|
|
500
576
|
if (error instanceof FetchError)
|
|
@@ -510,122 +586,165 @@ function mapFetchError(error, fallbackUrl, timeoutMs) {
|
|
|
510
586
|
const url = resolveErrorUrl(error, fallbackUrl);
|
|
511
587
|
if (isAbortError(error)) {
|
|
512
588
|
return isTimeoutError(error)
|
|
513
|
-
?
|
|
514
|
-
:
|
|
589
|
+
? createTimeoutFetchError(url, timeoutMs)
|
|
590
|
+
: createCanceledFetchError(url);
|
|
591
|
+
}
|
|
592
|
+
if (!isError(error))
|
|
593
|
+
return createUnknownFetchError(url, 'Unexpected error');
|
|
594
|
+
if (!isSystemError(error))
|
|
595
|
+
return createNetworkFetchError(url, error.message);
|
|
596
|
+
const { code } = error;
|
|
597
|
+
if (code === 'ETIMEOUT') {
|
|
598
|
+
return new FetchError(error.message, url, 504, { code });
|
|
515
599
|
}
|
|
516
|
-
if (
|
|
517
|
-
|
|
518
|
-
|
|
600
|
+
if (code === VALIDATION_ERROR_CODE ||
|
|
601
|
+
code === 'EBADREDIRECT' ||
|
|
602
|
+
code === 'EBLOCKED' ||
|
|
603
|
+
code === 'ENODATA' ||
|
|
604
|
+
code === 'EINVAL') {
|
|
605
|
+
return new FetchError(error.message, url, 400, { code });
|
|
606
|
+
}
|
|
607
|
+
return new FetchError(`Network error: Could not reach ${url}`, url, undefined, {
|
|
608
|
+
code,
|
|
609
|
+
message: error.message,
|
|
610
|
+
});
|
|
519
611
|
}
|
|
520
612
|
const fetchChannel = diagnosticsChannel.channel('superfetch.fetch');
|
|
521
613
|
const SLOW_REQUEST_THRESHOLD_MS = 5000;
|
|
522
614
|
class FetchTelemetry {
|
|
615
|
+
logger;
|
|
616
|
+
context;
|
|
617
|
+
redactor;
|
|
618
|
+
constructor(logger, context, redactor) {
|
|
619
|
+
this.logger = logger;
|
|
620
|
+
this.context = context;
|
|
621
|
+
this.redactor = redactor;
|
|
622
|
+
}
|
|
623
|
+
redact(url) {
|
|
624
|
+
return this.redactor.redact(url);
|
|
625
|
+
}
|
|
523
626
|
start(url, method) {
|
|
524
|
-
const safeUrl =
|
|
525
|
-
const contextRequestId = getRequestId();
|
|
526
|
-
const operationId = getOperationId();
|
|
627
|
+
const safeUrl = this.redactor.redact(url);
|
|
628
|
+
const contextRequestId = this.context.getRequestId();
|
|
629
|
+
const operationId = this.context.getOperationId();
|
|
527
630
|
const ctx = {
|
|
528
631
|
requestId: randomUUID(),
|
|
529
632
|
startTime: performance.now(),
|
|
530
633
|
url: safeUrl,
|
|
531
634
|
method: method.toUpperCase(),
|
|
532
|
-
...(contextRequestId ? { contextRequestId } : {}),
|
|
533
|
-
...(operationId ? { operationId } : {}),
|
|
534
635
|
};
|
|
535
|
-
|
|
636
|
+
if (contextRequestId)
|
|
637
|
+
ctx.contextRequestId = contextRequestId;
|
|
638
|
+
if (operationId)
|
|
639
|
+
ctx.operationId = operationId;
|
|
640
|
+
const event = {
|
|
536
641
|
v: 1,
|
|
537
642
|
type: 'start',
|
|
538
643
|
requestId: ctx.requestId,
|
|
539
644
|
method: ctx.method,
|
|
540
645
|
url: ctx.url,
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
646
|
+
};
|
|
647
|
+
if (ctx.contextRequestId)
|
|
648
|
+
event.contextRequestId = ctx.contextRequestId;
|
|
649
|
+
if (ctx.operationId)
|
|
650
|
+
event.operationId = ctx.operationId;
|
|
651
|
+
this.publish(event);
|
|
652
|
+
const logData = {
|
|
547
653
|
requestId: ctx.requestId,
|
|
548
654
|
method: ctx.method,
|
|
549
655
|
url: ctx.url,
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
656
|
+
};
|
|
657
|
+
if (ctx.contextRequestId)
|
|
658
|
+
logData.contextRequestId = ctx.contextRequestId;
|
|
659
|
+
if (ctx.operationId)
|
|
660
|
+
logData.operationId = ctx.operationId;
|
|
661
|
+
this.logger.debug('HTTP Request', logData);
|
|
555
662
|
return ctx;
|
|
556
663
|
}
|
|
557
664
|
recordResponse(context, response, contentSize) {
|
|
558
665
|
const duration = performance.now() - context.startTime;
|
|
559
666
|
const durationLabel = `${Math.round(duration)}ms`;
|
|
560
|
-
|
|
667
|
+
const event = {
|
|
561
668
|
v: 1,
|
|
562
669
|
type: 'end',
|
|
563
670
|
requestId: context.requestId,
|
|
564
671
|
status: response.status,
|
|
565
672
|
duration,
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
673
|
+
};
|
|
674
|
+
if (context.contextRequestId)
|
|
675
|
+
event.contextRequestId = context.contextRequestId;
|
|
676
|
+
if (context.operationId)
|
|
677
|
+
event.operationId = context.operationId;
|
|
678
|
+
this.publish(event);
|
|
571
679
|
const contentType = response.headers.get('content-type') ?? undefined;
|
|
572
680
|
const contentLengthHeader = response.headers.get('content-length');
|
|
573
681
|
const size = contentLengthHeader ??
|
|
574
682
|
(contentSize === undefined ? undefined : String(contentSize));
|
|
575
|
-
|
|
683
|
+
const logData = {
|
|
576
684
|
requestId: context.requestId,
|
|
577
685
|
status: response.status,
|
|
578
686
|
url: context.url,
|
|
579
687
|
duration: durationLabel,
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
688
|
+
};
|
|
689
|
+
if (context.contextRequestId)
|
|
690
|
+
logData.contextRequestId = context.contextRequestId;
|
|
691
|
+
if (context.operationId)
|
|
692
|
+
logData.operationId = context.operationId;
|
|
693
|
+
if (contentType)
|
|
694
|
+
logData.contentType = contentType;
|
|
695
|
+
if (size)
|
|
696
|
+
logData.size = size;
|
|
697
|
+
this.logger.debug('HTTP Response', logData);
|
|
587
698
|
if (duration > SLOW_REQUEST_THRESHOLD_MS) {
|
|
588
|
-
|
|
699
|
+
const warnData = {
|
|
589
700
|
requestId: context.requestId,
|
|
590
701
|
url: context.url,
|
|
591
702
|
duration: durationLabel,
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
703
|
+
};
|
|
704
|
+
if (context.contextRequestId)
|
|
705
|
+
warnData.contextRequestId = context.contextRequestId;
|
|
706
|
+
if (context.operationId)
|
|
707
|
+
warnData.operationId = context.operationId;
|
|
708
|
+
this.logger.warn('Slow HTTP request detected', warnData);
|
|
597
709
|
}
|
|
598
710
|
}
|
|
599
711
|
recordError(context, error, status) {
|
|
600
712
|
const duration = performance.now() - context.startTime;
|
|
601
|
-
const err = error
|
|
713
|
+
const err = isError(error) ? error : new Error(String(error));
|
|
602
714
|
const code = isSystemError(err) ? err.code : undefined;
|
|
603
|
-
|
|
715
|
+
const event = {
|
|
604
716
|
v: 1,
|
|
605
717
|
type: 'error',
|
|
606
718
|
requestId: context.requestId,
|
|
607
719
|
url: context.url,
|
|
608
720
|
error: err.message,
|
|
609
721
|
duration,
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
722
|
+
};
|
|
723
|
+
if (code !== undefined)
|
|
724
|
+
event.code = code;
|
|
725
|
+
if (status !== undefined)
|
|
726
|
+
event.status = status;
|
|
727
|
+
if (context.contextRequestId)
|
|
728
|
+
event.contextRequestId = context.contextRequestId;
|
|
729
|
+
if (context.operationId)
|
|
730
|
+
event.operationId = context.operationId;
|
|
731
|
+
this.publish(event);
|
|
732
|
+
const logData = {
|
|
619
733
|
requestId: context.requestId,
|
|
620
734
|
url: context.url,
|
|
621
735
|
status,
|
|
622
736
|
code,
|
|
623
737
|
error: err.message,
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
738
|
+
};
|
|
739
|
+
if (context.contextRequestId)
|
|
740
|
+
logData.contextRequestId = context.contextRequestId;
|
|
741
|
+
if (context.operationId)
|
|
742
|
+
logData.operationId = context.operationId;
|
|
743
|
+
if (status === 429) {
|
|
744
|
+
this.logger.warn('HTTP Request Error', logData);
|
|
745
|
+
return;
|
|
746
|
+
}
|
|
747
|
+
this.logger.error('HTTP Request Error', logData);
|
|
629
748
|
}
|
|
630
749
|
publish(event) {
|
|
631
750
|
if (!fetchChannel.hasSubscribers)
|
|
@@ -634,49 +753,50 @@ class FetchTelemetry {
|
|
|
634
753
|
fetchChannel.publish(event);
|
|
635
754
|
}
|
|
636
755
|
catch {
|
|
637
|
-
// Best-effort;
|
|
756
|
+
// Best-effort telemetry; never crash request path.
|
|
638
757
|
}
|
|
639
758
|
}
|
|
640
759
|
}
|
|
641
|
-
const telemetry = new FetchTelemetry();
|
|
642
|
-
/** Backwards-compatible exports */
|
|
643
|
-
export function startFetchTelemetry(url, method) {
|
|
644
|
-
return telemetry.start(url, method);
|
|
645
|
-
}
|
|
646
|
-
export function recordFetchResponse(context, response, contentSize) {
|
|
647
|
-
telemetry.recordResponse(context, response, contentSize);
|
|
648
|
-
}
|
|
649
|
-
export function recordFetchError(context, error, status) {
|
|
650
|
-
telemetry.recordError(context, error, status);
|
|
651
|
-
}
|
|
652
|
-
/* -------------------------------------------------------------------------------------------------
|
|
653
|
-
* Redirect handling
|
|
654
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
655
760
|
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
|
656
761
|
function isRedirectStatus(status) {
|
|
657
762
|
return REDIRECT_STATUSES.has(status);
|
|
658
763
|
}
|
|
659
764
|
function cancelResponseBody(response) {
|
|
660
765
|
const cancelPromise = response.body?.cancel();
|
|
661
|
-
if (cancelPromise)
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
});
|
|
766
|
+
if (!cancelPromise)
|
|
767
|
+
return;
|
|
768
|
+
void cancelPromise.catch(() => undefined);
|
|
665
769
|
}
|
|
666
770
|
class RedirectFollower {
|
|
771
|
+
fetchFn;
|
|
772
|
+
normalizeUrl;
|
|
773
|
+
preflight;
|
|
774
|
+
constructor(fetchFn, normalizeUrl, preflight) {
|
|
775
|
+
this.fetchFn = fetchFn;
|
|
776
|
+
this.normalizeUrl = normalizeUrl;
|
|
777
|
+
this.preflight = preflight;
|
|
778
|
+
}
|
|
667
779
|
async fetchWithRedirects(url, init, maxRedirects) {
|
|
668
780
|
let currentUrl = url;
|
|
669
781
|
const redirectLimit = Math.max(0, maxRedirects);
|
|
670
782
|
for (let redirectCount = 0; redirectCount <= redirectLimit; redirectCount += 1) {
|
|
671
|
-
const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () =>
|
|
783
|
+
const { response, nextUrl } = await this.withRedirectErrorContext(currentUrl, async () => {
|
|
784
|
+
if (this.preflight) {
|
|
785
|
+
await this.preflight(currentUrl, init.signal ?? undefined);
|
|
786
|
+
}
|
|
787
|
+
return this.performFetchCycle(currentUrl, init, redirectLimit, redirectCount);
|
|
788
|
+
});
|
|
672
789
|
if (!nextUrl)
|
|
673
790
|
return { response, url: currentUrl };
|
|
674
791
|
currentUrl = nextUrl;
|
|
675
792
|
}
|
|
676
|
-
throw
|
|
793
|
+
throw createTooManyRedirectsFetchError(currentUrl);
|
|
677
794
|
}
|
|
678
795
|
async performFetchCycle(currentUrl, init, redirectLimit, redirectCount) {
|
|
679
|
-
const response = await
|
|
796
|
+
const response = await this.fetchFn(currentUrl, {
|
|
797
|
+
...init,
|
|
798
|
+
redirect: 'manual',
|
|
799
|
+
});
|
|
680
800
|
if (!isRedirectStatus(response.status))
|
|
681
801
|
return { response };
|
|
682
802
|
this.assertRedirectWithinLimit(response, currentUrl, redirectLimit, redirectCount);
|
|
@@ -691,23 +811,24 @@ class RedirectFollower {
|
|
|
691
811
|
if (redirectCount < redirectLimit)
|
|
692
812
|
return;
|
|
693
813
|
cancelResponseBody(response);
|
|
694
|
-
throw
|
|
814
|
+
throw createTooManyRedirectsFetchError(currentUrl);
|
|
695
815
|
}
|
|
696
816
|
getRedirectLocation(response, currentUrl) {
|
|
697
817
|
const location = response.headers.get('location');
|
|
698
818
|
if (location)
|
|
699
819
|
return location;
|
|
700
820
|
cancelResponseBody(response);
|
|
701
|
-
throw
|
|
821
|
+
throw createMissingRedirectLocationFetchError(currentUrl);
|
|
702
822
|
}
|
|
703
823
|
resolveRedirectTarget(baseUrl, location) {
|
|
704
|
-
if (!URL.canParse(location, baseUrl))
|
|
824
|
+
if (!URL.canParse(location, baseUrl)) {
|
|
705
825
|
throw createErrorWithCode('Invalid redirect target', 'EBADREDIRECT');
|
|
826
|
+
}
|
|
706
827
|
const resolved = new URL(location, baseUrl);
|
|
707
828
|
if (resolved.username || resolved.password) {
|
|
708
829
|
throw createErrorWithCode('Redirect target includes credentials', 'EBADREDIRECT');
|
|
709
830
|
}
|
|
710
|
-
return
|
|
831
|
+
return this.normalizeUrl(resolved.href);
|
|
711
832
|
}
|
|
712
833
|
annotateRedirectError(error, url) {
|
|
713
834
|
if (!isObject(error))
|
|
@@ -724,94 +845,266 @@ class RedirectFollower {
|
|
|
724
845
|
}
|
|
725
846
|
}
|
|
726
847
|
}
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
848
|
+
function getCharsetFromContentType(contentType) {
|
|
849
|
+
if (!contentType)
|
|
850
|
+
return undefined;
|
|
851
|
+
const match = /charset=([^;]+)/i.exec(contentType);
|
|
852
|
+
const charsetGroup = match?.[1];
|
|
853
|
+
if (!charsetGroup)
|
|
854
|
+
return undefined;
|
|
855
|
+
let charset = charsetGroup.trim();
|
|
856
|
+
if (charset.startsWith('"') && charset.endsWith('"')) {
|
|
857
|
+
charset = charset.slice(1, -1);
|
|
858
|
+
}
|
|
859
|
+
return charset.trim();
|
|
731
860
|
}
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
861
|
+
function createDecoder(encoding) {
|
|
862
|
+
if (!encoding)
|
|
863
|
+
return new TextDecoder('utf-8');
|
|
864
|
+
try {
|
|
865
|
+
return new TextDecoder(encoding);
|
|
866
|
+
}
|
|
867
|
+
catch {
|
|
868
|
+
return new TextDecoder('utf-8');
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
function normalizeEncodingLabel(encoding) {
|
|
872
|
+
return encoding?.trim().toLowerCase() ?? '';
|
|
873
|
+
}
|
|
874
|
+
function isUnicodeWideEncoding(encoding) {
|
|
875
|
+
const normalized = normalizeEncodingLabel(encoding);
|
|
876
|
+
return (normalized.startsWith('utf-16') ||
|
|
877
|
+
normalized.startsWith('utf-32') ||
|
|
878
|
+
normalized === 'ucs-2' ||
|
|
879
|
+
normalized === 'unicodefffe' ||
|
|
880
|
+
normalized === 'unicodefeff');
|
|
881
|
+
}
|
|
882
|
+
const BOM_SIGNATURES = [
|
|
883
|
+
// 4-byte BOMs must come first to avoid false matches with 2-byte prefixes
|
|
884
|
+
{ bytes: [0xff, 0xfe, 0x00, 0x00], encoding: 'utf-32le' },
|
|
885
|
+
{ bytes: [0x00, 0x00, 0xfe, 0xff], encoding: 'utf-32be' },
|
|
886
|
+
{ bytes: [0xef, 0xbb, 0xbf], encoding: 'utf-8' },
|
|
887
|
+
{ bytes: [0xff, 0xfe], encoding: 'utf-16le' },
|
|
888
|
+
{ bytes: [0xfe, 0xff], encoding: 'utf-16be' },
|
|
889
|
+
];
|
|
890
|
+
function detectBomEncoding(buffer) {
|
|
891
|
+
for (const { bytes, encoding } of BOM_SIGNATURES) {
|
|
892
|
+
if (startsWithBytes(buffer, bytes))
|
|
893
|
+
return encoding;
|
|
894
|
+
}
|
|
895
|
+
return undefined;
|
|
896
|
+
}
|
|
897
|
+
function readQuotedValue(input, startIndex) {
|
|
898
|
+
const first = input[startIndex];
|
|
899
|
+
if (!first)
|
|
900
|
+
return '';
|
|
901
|
+
const quoted = first === '"' || first === "'";
|
|
902
|
+
if (quoted) {
|
|
903
|
+
const end = input.indexOf(first, startIndex + 1);
|
|
904
|
+
return end === -1 ? '' : input.slice(startIndex + 1, end).trim();
|
|
905
|
+
}
|
|
906
|
+
const tail = input.slice(startIndex);
|
|
907
|
+
const stop = tail.search(/[\s/>]/);
|
|
908
|
+
return (stop === -1 ? tail : tail.slice(0, stop)).trim();
|
|
909
|
+
}
|
|
910
|
+
function extractHtmlCharset(headSnippet) {
|
|
911
|
+
const lower = headSnippet.toLowerCase();
|
|
912
|
+
const charsetToken = 'charset=';
|
|
913
|
+
const charsetIdx = lower.indexOf(charsetToken);
|
|
914
|
+
if (charsetIdx === -1)
|
|
915
|
+
return undefined;
|
|
916
|
+
const valueStart = charsetIdx + charsetToken.length;
|
|
917
|
+
const charset = readQuotedValue(headSnippet, valueStart);
|
|
918
|
+
return charset ? charset.toLowerCase() : undefined;
|
|
919
|
+
}
|
|
920
|
+
function extractXmlEncoding(headSnippet) {
|
|
921
|
+
const lower = headSnippet.toLowerCase();
|
|
922
|
+
const xmlStart = lower.indexOf('<?xml');
|
|
923
|
+
if (xmlStart === -1)
|
|
924
|
+
return undefined;
|
|
925
|
+
const xmlEnd = lower.indexOf('?>', xmlStart);
|
|
926
|
+
const declaration = xmlEnd === -1
|
|
927
|
+
? headSnippet.slice(xmlStart)
|
|
928
|
+
: headSnippet.slice(xmlStart, xmlEnd + 2);
|
|
929
|
+
const declarationLower = declaration.toLowerCase();
|
|
930
|
+
const encodingToken = 'encoding=';
|
|
931
|
+
const encodingIdx = declarationLower.indexOf(encodingToken);
|
|
932
|
+
if (encodingIdx === -1)
|
|
933
|
+
return undefined;
|
|
934
|
+
const valueStart = encodingIdx + encodingToken.length;
|
|
935
|
+
const encoding = readQuotedValue(declaration, valueStart);
|
|
936
|
+
return encoding ? encoding.toLowerCase() : undefined;
|
|
937
|
+
}
|
|
938
|
+
function detectHtmlDeclaredEncoding(buffer) {
|
|
939
|
+
const scanSize = Math.min(buffer.length, 8_192);
|
|
940
|
+
if (scanSize === 0)
|
|
941
|
+
return undefined;
|
|
942
|
+
const headSnippet = Buffer.from(buffer.buffer, buffer.byteOffset, scanSize).toString('latin1');
|
|
943
|
+
return extractHtmlCharset(headSnippet) ?? extractXmlEncoding(headSnippet);
|
|
944
|
+
}
|
|
945
|
+
function resolveEncoding(declaredEncoding, sample) {
|
|
946
|
+
const bomEncoding = detectBomEncoding(sample);
|
|
947
|
+
if (bomEncoding)
|
|
948
|
+
return bomEncoding;
|
|
949
|
+
if (declaredEncoding)
|
|
950
|
+
return declaredEncoding;
|
|
951
|
+
return detectHtmlDeclaredEncoding(sample);
|
|
952
|
+
}
|
|
953
|
+
const BINARY_SIGNATURES = [
|
|
954
|
+
[0x25, 0x50, 0x44, 0x46],
|
|
955
|
+
[0x89, 0x50, 0x4e, 0x47],
|
|
956
|
+
[0x47, 0x49, 0x46, 0x38],
|
|
957
|
+
[0xff, 0xd8, 0xff],
|
|
958
|
+
[0x52, 0x49, 0x46, 0x46],
|
|
959
|
+
[0x42, 0x4d],
|
|
960
|
+
[0x49, 0x49, 0x2a, 0x00],
|
|
961
|
+
[0x4d, 0x4d, 0x00, 0x2a],
|
|
962
|
+
[0x00, 0x00, 0x01, 0x00],
|
|
963
|
+
[0x50, 0x4b, 0x03, 0x04],
|
|
964
|
+
[0x1f, 0x8b],
|
|
965
|
+
[0x42, 0x5a, 0x68],
|
|
966
|
+
[0x52, 0x61, 0x72, 0x21],
|
|
967
|
+
[0x37, 0x7a, 0xbc, 0xaf],
|
|
968
|
+
[0x7f, 0x45, 0x4c, 0x46],
|
|
969
|
+
[0x4d, 0x5a],
|
|
970
|
+
[0xcf, 0xfa, 0xed, 0xfe],
|
|
971
|
+
[0x00, 0x61, 0x73, 0x6d],
|
|
972
|
+
[0x1a, 0x45, 0xdf, 0xa3],
|
|
973
|
+
[0x66, 0x74, 0x79, 0x70],
|
|
974
|
+
[0x46, 0x4c, 0x56],
|
|
975
|
+
[0x49, 0x44, 0x33],
|
|
976
|
+
[0xff, 0xfb],
|
|
977
|
+
[0xff, 0xfa],
|
|
978
|
+
[0x4f, 0x67, 0x67, 0x53],
|
|
979
|
+
[0x66, 0x4c, 0x61, 0x43],
|
|
980
|
+
[0x4d, 0x54, 0x68, 0x64],
|
|
981
|
+
[0x77, 0x4f, 0x46, 0x46],
|
|
982
|
+
[0x00, 0x01, 0x00, 0x00],
|
|
983
|
+
[0x4f, 0x54, 0x54, 0x4f],
|
|
984
|
+
[0x53, 0x51, 0x4c, 0x69],
|
|
985
|
+
];
|
|
986
|
+
function startsWithBytes(buffer, signature) {
|
|
987
|
+
const sigLen = signature.length;
|
|
988
|
+
if (buffer.length < sigLen)
|
|
989
|
+
return false;
|
|
990
|
+
for (let i = 0; i < sigLen; i += 1) {
|
|
991
|
+
if (buffer[i] !== signature[i])
|
|
992
|
+
return false;
|
|
993
|
+
}
|
|
994
|
+
return true;
|
|
995
|
+
}
|
|
996
|
+
function hasNullByte(buffer, limit) {
|
|
997
|
+
const checkLen = Math.min(buffer.length, limit);
|
|
998
|
+
return buffer.subarray(0, checkLen).includes(0x00);
|
|
999
|
+
}
|
|
1000
|
+
function isBinaryContent(buffer, encoding) {
|
|
1001
|
+
for (const signature of BINARY_SIGNATURES) {
|
|
1002
|
+
if (startsWithBytes(buffer, signature))
|
|
1003
|
+
return true;
|
|
1004
|
+
}
|
|
1005
|
+
return !isUnicodeWideEncoding(encoding) && hasNullByte(buffer, 1000);
|
|
744
1006
|
}
|
|
745
1007
|
class ResponseTextReader {
|
|
746
|
-
async read(response, url, maxBytes, signal) {
|
|
747
|
-
|
|
1008
|
+
async read(response, url, maxBytes, signal, encoding) {
|
|
1009
|
+
const { buffer, encoding: effectiveEncoding } = await this.readBuffer(response, url, maxBytes, signal, encoding);
|
|
1010
|
+
const decoder = createDecoder(effectiveEncoding);
|
|
1011
|
+
const text = decoder.decode(buffer);
|
|
1012
|
+
return { text, size: buffer.byteLength };
|
|
1013
|
+
}
|
|
1014
|
+
async readBuffer(response, url, maxBytes, signal, encoding) {
|
|
1015
|
+
if (signal?.aborted) {
|
|
1016
|
+
cancelResponseBody(response);
|
|
1017
|
+
throw createAbortedFetchError(url);
|
|
1018
|
+
}
|
|
1019
|
+
const limit = maxBytes <= 0 ? Number.POSITIVE_INFINITY : maxBytes;
|
|
748
1020
|
if (!response.body) {
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
1021
|
+
if (signal?.aborted)
|
|
1022
|
+
throw createCanceledFetchError(url);
|
|
1023
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
1024
|
+
const length = Math.min(arrayBuffer.byteLength, limit);
|
|
1025
|
+
const buffer = new Uint8Array(arrayBuffer, 0, length);
|
|
1026
|
+
const effectiveEncoding = resolveEncoding(encoding, buffer) ?? encoding ?? 'utf-8';
|
|
1027
|
+
if (isBinaryContent(buffer, effectiveEncoding)) {
|
|
1028
|
+
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
1029
|
+
}
|
|
1030
|
+
return { buffer, encoding: effectiveEncoding, size: buffer.byteLength };
|
|
754
1031
|
}
|
|
755
|
-
return this.
|
|
1032
|
+
return this.readStreamToBuffer(response.body, url, limit, signal, encoding);
|
|
756
1033
|
}
|
|
757
|
-
async
|
|
758
|
-
|
|
759
|
-
|
|
1034
|
+
async readNext(reader, abortPromise) {
|
|
1035
|
+
return abortPromise
|
|
1036
|
+
? await Promise.race([reader.read(), abortPromise])
|
|
1037
|
+
: await reader.read();
|
|
1038
|
+
}
|
|
1039
|
+
async readStreamToBuffer(stream, url, maxBytes, signal, encoding) {
|
|
1040
|
+
let effectiveEncoding = encoding;
|
|
1041
|
+
let decoder = null;
|
|
1042
|
+
const chunks = [];
|
|
760
1043
|
let total = 0;
|
|
761
1044
|
const reader = stream.getReader();
|
|
1045
|
+
const abortRace = createAbortRace(signal, () => createAbortedFetchError(url));
|
|
762
1046
|
try {
|
|
763
|
-
await this.
|
|
764
|
-
|
|
1047
|
+
let result = await this.readNext(reader, abortRace.abortPromise);
|
|
1048
|
+
if (!result.done) {
|
|
1049
|
+
effectiveEncoding =
|
|
1050
|
+
resolveEncoding(encoding, result.value) ?? encoding ?? 'utf-8';
|
|
1051
|
+
decoder = createDecoder(effectiveEncoding);
|
|
1052
|
+
}
|
|
1053
|
+
let checkedBinary = false;
|
|
765
1054
|
while (!result.done) {
|
|
766
|
-
|
|
767
|
-
if (
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
1055
|
+
const chunk = result.value;
|
|
1056
|
+
if (!checkedBinary) {
|
|
1057
|
+
checkedBinary = true;
|
|
1058
|
+
if (isBinaryContent(chunk, decoder?.encoding)) {
|
|
1059
|
+
await this.cancelReaderQuietly(reader);
|
|
1060
|
+
throw new FetchError('Detailed content type check failed: binary content detected', url, 500, { reason: 'binary_content_detected' });
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
const newTotal = total + chunk.length;
|
|
1064
|
+
if (newTotal > maxBytes) {
|
|
1065
|
+
const remaining = maxBytes - total;
|
|
1066
|
+
if (remaining > 0) {
|
|
1067
|
+
chunks.push(chunk.subarray(0, remaining));
|
|
1068
|
+
total += remaining;
|
|
1069
|
+
}
|
|
1070
|
+
await this.cancelReaderQuietly(reader);
|
|
1071
|
+
break;
|
|
1072
|
+
}
|
|
1073
|
+
chunks.push(chunk);
|
|
1074
|
+
total = newTotal;
|
|
1075
|
+
result = await this.readNext(reader, abortRace.abortPromise);
|
|
774
1076
|
}
|
|
775
1077
|
}
|
|
776
1078
|
catch (error) {
|
|
777
1079
|
await this.cancelReaderQuietly(reader);
|
|
778
|
-
|
|
779
|
-
throw new FetchError('Request was aborted during response read', url, 499, { reason: 'aborted' });
|
|
780
|
-
throw error;
|
|
1080
|
+
this.handleReadingError(error, url, signal);
|
|
781
1081
|
}
|
|
782
1082
|
finally {
|
|
1083
|
+
abortRace.cleanup();
|
|
783
1084
|
reader.releaseLock();
|
|
784
1085
|
}
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
1086
|
+
return {
|
|
1087
|
+
buffer: Buffer.concat(chunks, total),
|
|
1088
|
+
encoding: effectiveEncoding ?? 'utf-8',
|
|
1089
|
+
size: total,
|
|
1090
|
+
};
|
|
789
1091
|
}
|
|
790
|
-
|
|
791
|
-
if (
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
});
|
|
1092
|
+
handleReadingError(error, url, signal) {
|
|
1093
|
+
if (error instanceof FetchError)
|
|
1094
|
+
throw error;
|
|
1095
|
+
if (signal?.aborted)
|
|
1096
|
+
throw createAbortedFetchError(url);
|
|
1097
|
+
throw error;
|
|
797
1098
|
}
|
|
798
1099
|
async cancelReaderQuietly(reader) {
|
|
799
1100
|
try {
|
|
800
1101
|
await reader.cancel();
|
|
801
1102
|
}
|
|
802
1103
|
catch {
|
|
803
|
-
//
|
|
1104
|
+
// Ignore cancellation failures; stream teardown must proceed.
|
|
804
1105
|
}
|
|
805
1106
|
}
|
|
806
1107
|
}
|
|
807
|
-
const responseReader = new ResponseTextReader();
|
|
808
|
-
/** Backwards-compatible export */
|
|
809
|
-
export async function readResponseText(response, url, maxBytes, signal) {
|
|
810
|
-
return responseReader.read(response, url, maxBytes, signal);
|
|
811
|
-
}
|
|
812
|
-
/* -------------------------------------------------------------------------------------------------
|
|
813
|
-
* HTTP fetcher (headers, signals, response handling)
|
|
814
|
-
* ------------------------------------------------------------------------------------------------- */
|
|
815
1108
|
const DEFAULT_HEADERS = {
|
|
816
1109
|
'User-Agent': config.fetcher.userAgent,
|
|
817
1110
|
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
@@ -820,57 +1113,370 @@ const DEFAULT_HEADERS = {
|
|
|
820
1113
|
Connection: 'keep-alive',
|
|
821
1114
|
};
|
|
822
1115
|
function buildHeaders() {
|
|
823
|
-
return
|
|
1116
|
+
return DEFAULT_HEADERS;
|
|
824
1117
|
}
|
|
825
1118
|
function buildRequestSignal(timeoutMs, external) {
|
|
1119
|
+
if (timeoutMs <= 0)
|
|
1120
|
+
return external;
|
|
826
1121
|
const timeoutSignal = AbortSignal.timeout(timeoutMs);
|
|
827
1122
|
return external ? AbortSignal.any([external, timeoutSignal]) : timeoutSignal;
|
|
828
1123
|
}
|
|
829
1124
|
function buildRequestInit(headers, signal) {
|
|
830
|
-
return {
|
|
1125
|
+
return {
|
|
1126
|
+
method: 'GET',
|
|
1127
|
+
headers,
|
|
1128
|
+
...(signal ? { signal } : {}),
|
|
1129
|
+
};
|
|
831
1130
|
}
|
|
832
1131
|
function resolveResponseError(response, finalUrl) {
|
|
833
1132
|
if (response.status === 429) {
|
|
834
|
-
return
|
|
1133
|
+
return createRateLimitedFetchError(finalUrl, response.headers.get('retry-after'));
|
|
835
1134
|
}
|
|
836
1135
|
return response.ok
|
|
837
1136
|
? null
|
|
838
|
-
:
|
|
1137
|
+
: createHttpFetchError(finalUrl, response.status, response.statusText);
|
|
1138
|
+
}
|
|
1139
|
+
function resolveMediaType(contentType) {
|
|
1140
|
+
if (!contentType)
|
|
1141
|
+
return null;
|
|
1142
|
+
const semiIndex = contentType.indexOf(';');
|
|
1143
|
+
const mediaType = semiIndex === -1 ? contentType : contentType.slice(0, semiIndex);
|
|
1144
|
+
const trimmed = mediaType.trim();
|
|
1145
|
+
return trimmed ? trimmed.toLowerCase() : null;
|
|
1146
|
+
}
|
|
1147
|
+
const TEXTUAL_MEDIA_TYPES = new Set([
|
|
1148
|
+
'application/json',
|
|
1149
|
+
'application/ld+json',
|
|
1150
|
+
'application/xml',
|
|
1151
|
+
'application/xhtml+xml',
|
|
1152
|
+
'application/javascript',
|
|
1153
|
+
'application/ecmascript',
|
|
1154
|
+
'application/x-javascript',
|
|
1155
|
+
'application/x-yaml',
|
|
1156
|
+
'application/yaml',
|
|
1157
|
+
'application/markdown',
|
|
1158
|
+
]);
|
|
1159
|
+
function isTextLikeMediaType(mediaType) {
|
|
1160
|
+
if (mediaType.startsWith('text/'))
|
|
1161
|
+
return true;
|
|
1162
|
+
if (TEXTUAL_MEDIA_TYPES.has(mediaType))
|
|
1163
|
+
return true;
|
|
1164
|
+
return (mediaType.endsWith('+json') ||
|
|
1165
|
+
mediaType.endsWith('+xml') ||
|
|
1166
|
+
mediaType.endsWith('+yaml') ||
|
|
1167
|
+
mediaType.endsWith('+text') ||
|
|
1168
|
+
mediaType.endsWith('+markdown'));
|
|
1169
|
+
}
|
|
1170
|
+
function assertSupportedContentType(contentType, url) {
|
|
1171
|
+
const mediaType = resolveMediaType(contentType);
|
|
1172
|
+
if (!mediaType)
|
|
1173
|
+
return;
|
|
1174
|
+
if (!isTextLikeMediaType(mediaType)) {
|
|
1175
|
+
throw new FetchError(`Unsupported content type: ${mediaType}`, url);
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
function extractEncodingTokens(value) {
|
|
1179
|
+
const tokens = [];
|
|
1180
|
+
let i = 0;
|
|
1181
|
+
const len = value.length;
|
|
1182
|
+
while (i < len) {
|
|
1183
|
+
while (i < len &&
|
|
1184
|
+
(value.charCodeAt(i) === 44 || value.charCodeAt(i) <= 32)) {
|
|
1185
|
+
i += 1;
|
|
1186
|
+
}
|
|
1187
|
+
if (i >= len)
|
|
1188
|
+
break;
|
|
1189
|
+
const start = i;
|
|
1190
|
+
while (i < len && value.charCodeAt(i) !== 44)
|
|
1191
|
+
i += 1;
|
|
1192
|
+
const token = value.slice(start, i).trim().toLowerCase();
|
|
1193
|
+
if (token)
|
|
1194
|
+
tokens.push(token);
|
|
1195
|
+
if (i < len && value.charCodeAt(i) === 44)
|
|
1196
|
+
i += 1;
|
|
1197
|
+
}
|
|
1198
|
+
return tokens;
|
|
1199
|
+
}
|
|
1200
|
+
function parseSingleContentEncoding(value) {
|
|
1201
|
+
if (!value)
|
|
1202
|
+
return null;
|
|
1203
|
+
const tokens = extractEncodingTokens(value);
|
|
1204
|
+
if (tokens.length === 0)
|
|
1205
|
+
return null;
|
|
1206
|
+
if (tokens.length > 1)
|
|
1207
|
+
return undefined;
|
|
1208
|
+
return tokens[0] ?? null;
|
|
1209
|
+
}
|
|
1210
|
+
function createUnsupportedContentEncodingError(url, encodingHeader) {
|
|
1211
|
+
return new FetchError(`Unsupported Content-Encoding: ${encodingHeader}`, url, 415, {
|
|
1212
|
+
reason: 'unsupported_content_encoding',
|
|
1213
|
+
encoding: encodingHeader,
|
|
1214
|
+
});
|
|
1215
|
+
}
|
|
1216
|
+
function createPumpedStream(initialChunk, reader) {
|
|
1217
|
+
return new ReadableStream({
|
|
1218
|
+
start(controller) {
|
|
1219
|
+
if (initialChunk.byteLength > 0) {
|
|
1220
|
+
controller.enqueue(initialChunk);
|
|
1221
|
+
}
|
|
1222
|
+
},
|
|
1223
|
+
async pull(controller) {
|
|
1224
|
+
try {
|
|
1225
|
+
const { done, value } = await reader.read();
|
|
1226
|
+
if (done) {
|
|
1227
|
+
controller.close();
|
|
1228
|
+
}
|
|
1229
|
+
else {
|
|
1230
|
+
controller.enqueue(value);
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
catch (error) {
|
|
1234
|
+
controller.error(error);
|
|
1235
|
+
}
|
|
1236
|
+
},
|
|
1237
|
+
cancel(reason) {
|
|
1238
|
+
void reader.cancel(reason).catch(() => undefined);
|
|
1239
|
+
},
|
|
1240
|
+
});
|
|
1241
|
+
}
|
|
1242
|
+
function isLikelyCompressed(chunk, encoding) {
|
|
1243
|
+
if (chunk.byteLength === 0)
|
|
1244
|
+
return false;
|
|
1245
|
+
if (encoding === 'gzip') {
|
|
1246
|
+
return chunk.byteLength >= 2 && chunk[0] === 0x1f && chunk[1] === 0x8b;
|
|
1247
|
+
}
|
|
1248
|
+
if (encoding === 'deflate') {
|
|
1249
|
+
if (chunk.byteLength < 2)
|
|
1250
|
+
return false;
|
|
1251
|
+
const byte0 = chunk[0] ?? 0;
|
|
1252
|
+
const byte1 = chunk[1] ?? 0;
|
|
1253
|
+
const cm = byte0 & 0x0f;
|
|
1254
|
+
if (cm !== 8)
|
|
1255
|
+
return false;
|
|
1256
|
+
return (byte0 * 256 + byte1) % 31 === 0;
|
|
1257
|
+
}
|
|
1258
|
+
let nonPrintable = 0;
|
|
1259
|
+
const limit = Math.min(chunk.length, 50);
|
|
1260
|
+
for (let i = 0; i < limit; i += 1) {
|
|
1261
|
+
const b = chunk[i] ?? 0;
|
|
1262
|
+
if (b < 0x09 || (b > 0x0d && b < 0x20) || b === 0x7f)
|
|
1263
|
+
nonPrintable += 1;
|
|
1264
|
+
}
|
|
1265
|
+
return nonPrintable / limit > 0.1;
|
|
839
1266
|
}
|
|
840
|
-
async function
|
|
1267
|
+
async function decodeResponseIfNeeded(response, url, signal) {
|
|
1268
|
+
const encodingHeader = response.headers.get('content-encoding');
|
|
1269
|
+
const encoding = parseSingleContentEncoding(encodingHeader);
|
|
1270
|
+
if (encoding === null || encoding === 'identity')
|
|
1271
|
+
return response;
|
|
1272
|
+
if (encoding === undefined) {
|
|
1273
|
+
throw createUnsupportedContentEncodingError(url, encodingHeader ?? '');
|
|
1274
|
+
}
|
|
1275
|
+
if (encoding !== 'gzip' && encoding !== 'deflate' && encoding !== 'br') {
|
|
1276
|
+
throw createUnsupportedContentEncodingError(url, encodingHeader ?? encoding);
|
|
1277
|
+
}
|
|
1278
|
+
if (!response.body)
|
|
1279
|
+
return response;
|
|
1280
|
+
// Peek at first chunk to check if actually compressed
|
|
1281
|
+
const reader = response.body.getReader();
|
|
1282
|
+
let initialChunk;
|
|
1283
|
+
try {
|
|
1284
|
+
const { done, value } = await reader.read();
|
|
1285
|
+
if (done) {
|
|
1286
|
+
return new Response(null, {
|
|
1287
|
+
status: response.status,
|
|
1288
|
+
statusText: response.statusText,
|
|
1289
|
+
headers: response.headers,
|
|
1290
|
+
});
|
|
1291
|
+
}
|
|
1292
|
+
initialChunk = value;
|
|
1293
|
+
}
|
|
1294
|
+
catch (error) {
|
|
1295
|
+
// If read fails, throw properly
|
|
1296
|
+
throw new FetchError(`Failed to read response body: ${isError(error) ? error.message : String(error)}`, url, 502);
|
|
1297
|
+
}
|
|
1298
|
+
if (!isLikelyCompressed(initialChunk, encoding)) {
|
|
1299
|
+
const body = createPumpedStream(initialChunk, reader);
|
|
1300
|
+
const headers = new Headers(response.headers);
|
|
1301
|
+
headers.delete('content-encoding');
|
|
1302
|
+
headers.delete('content-length');
|
|
1303
|
+
return new Response(body, {
|
|
1304
|
+
status: response.status,
|
|
1305
|
+
statusText: response.statusText,
|
|
1306
|
+
headers,
|
|
1307
|
+
});
|
|
1308
|
+
}
|
|
1309
|
+
// Set up decompression
|
|
1310
|
+
let decompressor = null;
|
|
1311
|
+
switch (encoding) {
|
|
1312
|
+
case 'gzip':
|
|
1313
|
+
decompressor = createGunzip();
|
|
1314
|
+
break;
|
|
1315
|
+
case 'deflate':
|
|
1316
|
+
decompressor = createInflate();
|
|
1317
|
+
break;
|
|
1318
|
+
case 'br':
|
|
1319
|
+
decompressor = createBrotliDecompress();
|
|
1320
|
+
break;
|
|
1321
|
+
default:
|
|
1322
|
+
// Should have been caught by parseSingleContentEncoding check, but safe fallback
|
|
1323
|
+
decompressor = null;
|
|
1324
|
+
}
|
|
1325
|
+
if (!decompressor) {
|
|
1326
|
+
// Should be unreachable if encoding valid
|
|
1327
|
+
throw createUnsupportedContentEncodingError(url, encodingHeader ?? encoding);
|
|
1328
|
+
}
|
|
1329
|
+
const sourceStream = Readable.fromWeb(createPumpedStream(initialChunk, reader));
|
|
1330
|
+
const decodedNodeStream = sourceStream.pipe(decompressor);
|
|
1331
|
+
const abortHandler = () => {
|
|
1332
|
+
sourceStream.destroy();
|
|
1333
|
+
decompressor.destroy();
|
|
1334
|
+
decodedNodeStream.destroy();
|
|
1335
|
+
};
|
|
1336
|
+
if (signal) {
|
|
1337
|
+
signal.addEventListener('abort', abortHandler, { once: true });
|
|
1338
|
+
}
|
|
1339
|
+
const decodedBody = Readable.toWeb(decodedNodeStream);
|
|
1340
|
+
const headers = new Headers(response.headers);
|
|
1341
|
+
headers.delete('content-encoding');
|
|
1342
|
+
headers.delete('content-length');
|
|
1343
|
+
if (signal) {
|
|
1344
|
+
decodedNodeStream.once('close', () => {
|
|
1345
|
+
signal.removeEventListener('abort', abortHandler);
|
|
1346
|
+
});
|
|
1347
|
+
decodedNodeStream.once('error', () => {
|
|
1348
|
+
signal.removeEventListener('abort', abortHandler);
|
|
1349
|
+
});
|
|
1350
|
+
}
|
|
1351
|
+
return new Response(decodedBody, {
|
|
1352
|
+
status: response.status,
|
|
1353
|
+
statusText: response.statusText,
|
|
1354
|
+
headers,
|
|
1355
|
+
});
|
|
1356
|
+
}
|
|
1357
|
+
async function readAndRecordDecodedResponse(response, finalUrl, ctx, telemetry, reader, maxBytes, mode, signal) {
|
|
841
1358
|
const responseError = resolveResponseError(response, finalUrl);
|
|
842
1359
|
if (responseError) {
|
|
843
1360
|
cancelResponseBody(response);
|
|
844
1361
|
throw responseError;
|
|
845
1362
|
}
|
|
846
|
-
const
|
|
847
|
-
|
|
848
|
-
|
|
1363
|
+
const decodedResponse = await decodeResponseIfNeeded(response, finalUrl, signal);
|
|
1364
|
+
const contentType = decodedResponse.headers.get('content-type');
|
|
1365
|
+
assertSupportedContentType(contentType, finalUrl);
|
|
1366
|
+
const declaredEncoding = getCharsetFromContentType(contentType ?? null);
|
|
1367
|
+
if (mode === 'text') {
|
|
1368
|
+
const { text, size } = await reader.read(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
|
|
1369
|
+
telemetry.recordResponse(ctx, decodedResponse, size);
|
|
1370
|
+
return { kind: 'text', text, size };
|
|
1371
|
+
}
|
|
1372
|
+
const { buffer, encoding, size } = await reader.readBuffer(decodedResponse, finalUrl, maxBytes, signal, declaredEncoding);
|
|
1373
|
+
telemetry.recordResponse(ctx, decodedResponse, size);
|
|
1374
|
+
return { kind: 'buffer', buffer, encoding, size };
|
|
1375
|
+
}
|
|
1376
|
+
function extractHostname(url) {
|
|
1377
|
+
if (!URL.canParse(url)) {
|
|
1378
|
+
throw createErrorWithCode('Invalid URL', 'EINVAL');
|
|
1379
|
+
}
|
|
1380
|
+
return new URL(url).hostname;
|
|
1381
|
+
}
|
|
1382
|
+
function createDnsPreflight(dnsResolver) {
|
|
1383
|
+
return async (url, signal) => {
|
|
1384
|
+
const hostname = extractHostname(url);
|
|
1385
|
+
await dnsResolver.assertSafeHostname(hostname, signal);
|
|
1386
|
+
};
|
|
849
1387
|
}
|
|
850
1388
|
class HttpFetcher {
|
|
1389
|
+
fetcherConfig;
|
|
1390
|
+
dnsResolver;
|
|
1391
|
+
redirectFollower;
|
|
1392
|
+
reader;
|
|
1393
|
+
telemetry;
|
|
1394
|
+
constructor(fetcherConfig, dnsResolver, redirectFollower, reader, telemetry) {
|
|
1395
|
+
this.fetcherConfig = fetcherConfig;
|
|
1396
|
+
this.dnsResolver = dnsResolver;
|
|
1397
|
+
this.redirectFollower = redirectFollower;
|
|
1398
|
+
this.reader = reader;
|
|
1399
|
+
this.telemetry = telemetry;
|
|
1400
|
+
}
|
|
851
1401
|
async fetchNormalizedUrl(normalizedUrl, options) {
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
1402
|
+
return this.fetchNormalized(normalizedUrl, 'text', options);
|
|
1403
|
+
}
|
|
1404
|
+
async fetchNormalizedUrlBuffer(normalizedUrl, options) {
|
|
1405
|
+
return this.fetchNormalized(normalizedUrl, 'buffer', options);
|
|
1406
|
+
}
|
|
1407
|
+
async fetchNormalized(normalizedUrl, mode, options) {
|
|
1408
|
+
const hostname = extractHostname(normalizedUrl);
|
|
1409
|
+
const timeoutMs = this.fetcherConfig.timeout;
|
|
855
1410
|
const headers = buildHeaders();
|
|
856
1411
|
const signal = buildRequestSignal(timeoutMs, options?.signal);
|
|
857
1412
|
const init = buildRequestInit(headers, signal);
|
|
858
|
-
const ctx = telemetry.start(normalizedUrl, 'GET');
|
|
1413
|
+
const ctx = this.telemetry.start(normalizedUrl, 'GET');
|
|
859
1414
|
try {
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
1415
|
+
await this.dnsResolver.assertSafeHostname(hostname, signal ?? undefined);
|
|
1416
|
+
const { response, url: finalUrl } = await this.redirectFollower.fetchWithRedirects(normalizedUrl, init, this.fetcherConfig.maxRedirects);
|
|
1417
|
+
ctx.url = this.telemetry.redact(finalUrl);
|
|
1418
|
+
const payload = await readAndRecordDecodedResponse(response, finalUrl, ctx, this.telemetry, this.reader, this.fetcherConfig.maxContentLength, mode, init.signal ?? undefined);
|
|
1419
|
+
if (payload.kind === 'text')
|
|
1420
|
+
return payload.text;
|
|
1421
|
+
return { buffer: payload.buffer, encoding: payload.encoding };
|
|
863
1422
|
}
|
|
864
1423
|
catch (error) {
|
|
865
1424
|
const mapped = mapFetchError(error, normalizedUrl, timeoutMs);
|
|
866
|
-
ctx.url = mapped.url;
|
|
867
|
-
telemetry.recordError(ctx, mapped, mapped.statusCode);
|
|
1425
|
+
ctx.url = this.telemetry.redact(mapped.url);
|
|
1426
|
+
this.telemetry.recordError(ctx, mapped, mapped.statusCode);
|
|
868
1427
|
throw mapped;
|
|
869
1428
|
}
|
|
870
1429
|
}
|
|
871
1430
|
}
|
|
872
|
-
const
|
|
873
|
-
|
|
1431
|
+
const ipBlocker = new IpBlocker(config.security);
|
|
1432
|
+
const urlNormalizer = new UrlNormalizer(config.constants, config.security, ipBlocker, BLOCKED_HOST_SUFFIXES);
|
|
1433
|
+
const rawUrlTransformer = new RawUrlTransformer(defaultLogger);
|
|
1434
|
+
const dnsResolver = new SafeDnsResolver(ipBlocker, config.security, BLOCKED_HOST_SUFFIXES);
|
|
1435
|
+
const telemetry = new FetchTelemetry(defaultLogger, defaultContext, defaultRedactor);
|
|
1436
|
+
const normalizeRedirectUrl = (url) => urlNormalizer.validateAndNormalize(url);
|
|
1437
|
+
const dnsPreflight = createDnsPreflight(dnsResolver);
|
|
1438
|
+
// Redirect follower with per-hop DNS preflight.
|
|
1439
|
+
const secureRedirectFollower = new RedirectFollower(defaultFetch, normalizeRedirectUrl, dnsPreflight);
|
|
1440
|
+
const responseReader = new ResponseTextReader();
|
|
1441
|
+
const httpFetcher = new HttpFetcher(config.fetcher, dnsResolver, secureRedirectFollower, responseReader, telemetry);
|
|
1442
|
+
export function isBlockedIp(ip) {
|
|
1443
|
+
return ipBlocker.isBlockedIp(ip);
|
|
1444
|
+
}
|
|
1445
|
+
export function normalizeUrl(urlString) {
|
|
1446
|
+
return urlNormalizer.normalize(urlString);
|
|
1447
|
+
}
|
|
1448
|
+
export function validateAndNormalizeUrl(urlString) {
|
|
1449
|
+
return urlNormalizer.validateAndNormalize(urlString);
|
|
1450
|
+
}
|
|
1451
|
+
export function transformToRawUrl(url) {
|
|
1452
|
+
return rawUrlTransformer.transformToRawUrl(url);
|
|
1453
|
+
}
|
|
1454
|
+
export function isRawTextContentUrl(url) {
|
|
1455
|
+
return rawUrlTransformer.isRawTextContentUrl(url);
|
|
1456
|
+
}
|
|
1457
|
+
export function startFetchTelemetry(url, method) {
|
|
1458
|
+
return telemetry.start(url, method);
|
|
1459
|
+
}
|
|
1460
|
+
export function recordFetchResponse(context, response, contentSize) {
|
|
1461
|
+
telemetry.recordResponse(context, response, contentSize);
|
|
1462
|
+
}
|
|
1463
|
+
export function recordFetchError(context, error, status) {
|
|
1464
|
+
telemetry.recordError(context, error, status);
|
|
1465
|
+
}
|
|
1466
|
+
export async function fetchWithRedirects(url, init, maxRedirects) {
|
|
1467
|
+
return secureRedirectFollower.fetchWithRedirects(url, init, maxRedirects);
|
|
1468
|
+
}
|
|
1469
|
+
export async function readResponseText(response, url, maxBytes, signal, encoding) {
|
|
1470
|
+
const decodedResponse = await decodeResponseIfNeeded(response, url, signal);
|
|
1471
|
+
return responseReader.read(decodedResponse, url, maxBytes, signal, encoding);
|
|
1472
|
+
}
|
|
1473
|
+
export async function readResponseBuffer(response, url, maxBytes, signal, encoding) {
|
|
1474
|
+
const decodedResponse = await decodeResponseIfNeeded(response, url, signal);
|
|
1475
|
+
return responseReader.readBuffer(decodedResponse, url, maxBytes, signal, encoding);
|
|
1476
|
+
}
|
|
874
1477
|
export async function fetchNormalizedUrl(normalizedUrl, options) {
|
|
875
1478
|
return httpFetcher.fetchNormalizedUrl(normalizedUrl, options);
|
|
876
1479
|
}
|
|
1480
|
+
export async function fetchNormalizedUrlBuffer(normalizedUrl, options) {
|
|
1481
|
+
return httpFetcher.fetchNormalizedUrlBuffer(normalizedUrl, options);
|
|
1482
|
+
}
|