maven-proxy 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +466 -420
- package/bin/maven-proxy.js +585 -573
- package/package.json +54 -54
- package/scripts/truststore.js +96 -96
- package/src/cache/cache-path.js +50 -50
- package/src/cache/downloader.js +350 -350
- package/src/cert/cert-manager.js +194 -194
- package/src/cert/truststore-utils.js +383 -289
- package/src/common/console-log-file.js +61 -61
- package/src/common/daily-log-file.js +78 -78
- package/src/common/domain-match.js +39 -39
- package/src/common/download-log-writer.js +26 -26
- package/src/common/ecosystem.js +63 -63
- package/src/common/java-home.js +327 -327
- package/src/config/config.js +224 -213
- package/src/index.js +93 -93
- package/src/proxy/proxy-connect-handler.js +173 -173
- package/src/proxy/proxy-http-handler.js +187 -187
- package/src/proxy/proxy-server.js +35 -35
- package/src/proxy/upstream-proxy.js +236 -236
- package/src/repo/repo-server.js +120 -120
package/src/cache/downloader.js
CHANGED
|
@@ -1,350 +1,350 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import http from "node:http";
|
|
3
|
-
import https from "node:https";
|
|
4
|
-
import path from "node:path";
|
|
5
|
-
import { pipeline } from "node:stream/promises";
|
|
6
|
-
import { DownloadLogWriter } from "../common/download-log-writer.js";
|
|
7
|
-
|
|
8
|
-
const REDIRECT_STATUS = new Set([301, 302, 303, 307, 308]);
|
|
9
|
-
const MAX_REDIRECTS = 5;
|
|
10
|
-
|
|
11
|
-
function pickClient(protocol) {
|
|
12
|
-
return protocol === "https:" ? https : http;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
function stripHopByHopHeaders(headers = {}) {
|
|
16
|
-
const result = { ...headers };
|
|
17
|
-
const blocked = [
|
|
18
|
-
"connection",
|
|
19
|
-
"proxy-connection",
|
|
20
|
-
"keep-alive",
|
|
21
|
-
"transfer-encoding",
|
|
22
|
-
"upgrade",
|
|
23
|
-
"te",
|
|
24
|
-
"trailer",
|
|
25
|
-
"proxy-authenticate",
|
|
26
|
-
"proxy-authorization",
|
|
27
|
-
];
|
|
28
|
-
|
|
29
|
-
for (const header of blocked) {
|
|
30
|
-
delete result[header];
|
|
31
|
-
delete result[header.toLowerCase()];
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
return result;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
function requestRaw(urlObj, { method, headers, timeoutMs, getAgent }) {
|
|
38
|
-
const client = pickClient(urlObj.protocol);
|
|
39
|
-
const agent = getAgent ? getAgent(urlObj) : undefined;
|
|
40
|
-
|
|
41
|
-
return new Promise((resolve, reject) => {
|
|
42
|
-
const req = client.request(
|
|
43
|
-
{
|
|
44
|
-
protocol: urlObj.protocol,
|
|
45
|
-
hostname: urlObj.hostname,
|
|
46
|
-
port: urlObj.port || (urlObj.protocol === "https:" ? 443 : 80),
|
|
47
|
-
path: `${urlObj.pathname}${urlObj.search}`,
|
|
48
|
-
method,
|
|
49
|
-
headers,
|
|
50
|
-
agent,
|
|
51
|
-
},
|
|
52
|
-
(res) => resolve({ req, res }),
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
req.setTimeout(timeoutMs, () => {
|
|
56
|
-
req.destroy(new Error(`Request timeout after ${timeoutMs}ms: ${urlObj.href}`));
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
req.on("error", reject);
|
|
60
|
-
req.end();
|
|
61
|
-
});
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
async function requestWithRedirect(urlObj, options, redirectCount = 0) {
|
|
65
|
-
if (redirectCount > MAX_REDIRECTS) {
|
|
66
|
-
throw new Error(`Too many redirects while requesting ${urlObj.href}`);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
const { res } = await requestRaw(urlObj, options);
|
|
70
|
-
|
|
71
|
-
if (REDIRECT_STATUS.has(res.statusCode) && res.headers.location) {
|
|
72
|
-
res.resume();
|
|
73
|
-
const nextUrl = new URL(res.headers.location, urlObj);
|
|
74
|
-
return requestWithRedirect(nextUrl, options, redirectCount + 1);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
return { urlObj, res };
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
async function probe(urlObj, headers, timeoutMs, getAgent) {
|
|
81
|
-
try {
|
|
82
|
-
const { urlObj: finalUrl, res } = await requestWithRedirect(urlObj, {
|
|
83
|
-
method: "HEAD",
|
|
84
|
-
headers,
|
|
85
|
-
timeoutMs,
|
|
86
|
-
getAgent,
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
const contentLength = Number.parseInt(res.headers["content-length"], 10);
|
|
90
|
-
const acceptRanges = String(res.headers["accept-ranges"] || "").toLowerCase().includes("bytes");
|
|
91
|
-
const statusCode = res.statusCode || 0;
|
|
92
|
-
res.resume();
|
|
93
|
-
|
|
94
|
-
if (statusCode >= 400) {
|
|
95
|
-
return { finalUrl, contentLength: null, acceptRanges: false };
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
return {
|
|
99
|
-
finalUrl,
|
|
100
|
-
contentLength: Number.isFinite(contentLength) ? contentLength : null,
|
|
101
|
-
acceptRanges,
|
|
102
|
-
};
|
|
103
|
-
} catch {
|
|
104
|
-
return { finalUrl: urlObj, contentLength: null, acceptRanges: false };
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
async function downloadSingle(urlObj, tempPath, headers, timeoutMs, getAgent) {
|
|
109
|
-
const requestHeaders = {
|
|
110
|
-
...stripHopByHopHeaders(headers),
|
|
111
|
-
"accept-encoding": "identity",
|
|
112
|
-
};
|
|
113
|
-
|
|
114
|
-
const { urlObj: finalUrl, res } = await requestWithRedirect(
|
|
115
|
-
urlObj,
|
|
116
|
-
{
|
|
117
|
-
method: "GET",
|
|
118
|
-
headers: requestHeaders,
|
|
119
|
-
timeoutMs,
|
|
120
|
-
getAgent,
|
|
121
|
-
},
|
|
122
|
-
0,
|
|
123
|
-
);
|
|
124
|
-
|
|
125
|
-
const statusCode = res.statusCode || 0;
|
|
126
|
-
if (statusCode >= 400) {
|
|
127
|
-
const chunks = [];
|
|
128
|
-
for await (const chunk of res) {
|
|
129
|
-
chunks.push(chunk);
|
|
130
|
-
if (Buffer.concat(chunks).length > 2048) {
|
|
131
|
-
break;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
const body = Buffer.concat(chunks).toString("utf8");
|
|
135
|
-
throw Object.assign(new Error(`Upstream GET failed (${statusCode}) ${finalUrl.href}`), {
|
|
136
|
-
statusCode,
|
|
137
|
-
upstreamBody: body,
|
|
138
|
-
});
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
if (statusCode < 200 || statusCode >= 300) {
|
|
142
|
-
throw new Error(`Unexpected status code ${statusCode} for ${finalUrl.href}`);
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
const contentLength = Number.parseInt(res.headers["content-length"], 10);
|
|
146
|
-
|
|
147
|
-
const stream = fs.createWriteStream(tempPath, { flags: "w" });
|
|
148
|
-
await pipeline(res, stream);
|
|
149
|
-
|
|
150
|
-
return {
|
|
151
|
-
finalUrl,
|
|
152
|
-
contentLength: Number.isFinite(contentLength) ? contentLength : null,
|
|
153
|
-
};
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
async function downloadRange(urlObj, tempPath, start, end, headers, timeoutMs, getAgent) {
|
|
157
|
-
const requestHeaders = {
|
|
158
|
-
...stripHopByHopHeaders(headers),
|
|
159
|
-
"accept-encoding": "identity",
|
|
160
|
-
range: `bytes=${start}-${end}`,
|
|
161
|
-
};
|
|
162
|
-
|
|
163
|
-
const { res } = await requestRaw(urlObj, {
|
|
164
|
-
method: "GET",
|
|
165
|
-
headers: requestHeaders,
|
|
166
|
-
timeoutMs,
|
|
167
|
-
getAgent,
|
|
168
|
-
});
|
|
169
|
-
|
|
170
|
-
const statusCode = res.statusCode || 0;
|
|
171
|
-
if (statusCode !== 206) {
|
|
172
|
-
res.resume();
|
|
173
|
-
throw new Error(`Range request failed with status ${statusCode} (${start}-${end})`);
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
const writeStream = fs.createWriteStream(tempPath, {
|
|
177
|
-
flags: "r+",
|
|
178
|
-
start,
|
|
179
|
-
});
|
|
180
|
-
|
|
181
|
-
await pipeline(res, writeStream);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
async function downloadMultiThread(urlObj, tempPath, headers, timeoutMs, contentLength, threadCount, getAgent) {
|
|
185
|
-
const handle = await fs.promises.open(tempPath, "w");
|
|
186
|
-
await handle.truncate(contentLength);
|
|
187
|
-
await handle.close();
|
|
188
|
-
|
|
189
|
-
const partSize = Math.ceil(contentLength / threadCount);
|
|
190
|
-
const tasks = [];
|
|
191
|
-
|
|
192
|
-
for (let index = 0; index < threadCount; index += 1) {
|
|
193
|
-
const start = index * partSize;
|
|
194
|
-
const end = Math.min(contentLength - 1, start + partSize - 1);
|
|
195
|
-
|
|
196
|
-
if (start > end) {
|
|
197
|
-
continue;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
tasks.push(downloadRange(urlObj, tempPath, start, end, headers, timeoutMs, getAgent));
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
await Promise.all(tasks);
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
async function fileExists(filePath) {
|
|
207
|
-
try {
|
|
208
|
-
const stats = await fs.promises.stat(filePath);
|
|
209
|
-
return stats.isFile();
|
|
210
|
-
} catch (error) {
|
|
211
|
-
if (error.code === "ENOENT") {
|
|
212
|
-
return false;
|
|
213
|
-
}
|
|
214
|
-
throw error;
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
async function verifyFileSize(filePath, expectedSize) {
|
|
219
|
-
if (!Number.isFinite(expectedSize)) {
|
|
220
|
-
return;
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
const stats = await fs.promises.stat(filePath);
|
|
224
|
-
if (stats.size !== expectedSize) {
|
|
225
|
-
throw new Error(`Integrity check failed: expected ${expectedSize} bytes, got ${stats.size}`);
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
async function removeIfExists(filePath) {
|
|
230
|
-
try {
|
|
231
|
-
await fs.promises.unlink(filePath);
|
|
232
|
-
} catch (error) {
|
|
233
|
-
if (error.code !== "ENOENT") {
|
|
234
|
-
throw error;
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
export class Downloader {
|
|
240
|
-
constructor(config, domainMatcher, upstreamProxyManager = null) {
|
|
241
|
-
this.config = config;
|
|
242
|
-
this.domainMatcher = domainMatcher;
|
|
243
|
-
this.upstreamProxyManager = upstreamProxyManager;
|
|
244
|
-
this.inflight = new Map();
|
|
245
|
-
this.downloadLogWriter = new DownloadLogWriter(config.downloadLogDir, config.logRetentionDays);
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
logDownload(event, urlObj, details = {}) {
|
|
249
|
-
const url = typeof urlObj === "string" ? urlObj : urlObj?.href;
|
|
250
|
-
const detailText = Object.entries(details)
|
|
251
|
-
.filter(([, value]) => value !== undefined && value !== null && value !== "")
|
|
252
|
-
.map(([key, value]) => `${key}=${value}`)
|
|
253
|
-
.join(" ");
|
|
254
|
-
|
|
255
|
-
console.log(`[downloader] ${event} url=${url}${detailText ? ` ${detailText}` : ""}`);
|
|
256
|
-
this.downloadLogWriter.write(event, url, details);
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
async ensureCached(urlObj, finalPath, requestHeaders = {}) {
|
|
260
|
-
if (await fileExists(finalPath)) {
|
|
261
|
-
return { cacheHit: true, finalPath };
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
const existing = this.inflight.get(finalPath);
|
|
265
|
-
if (existing) {
|
|
266
|
-
await existing;
|
|
267
|
-
return { cacheHit: true, finalPath };
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
const downloadPromise = this.#downloadAtomic(urlObj, finalPath, requestHeaders);
|
|
271
|
-
this.inflight.set(finalPath, downloadPromise);
|
|
272
|
-
|
|
273
|
-
try {
|
|
274
|
-
await downloadPromise;
|
|
275
|
-
} finally {
|
|
276
|
-
this.inflight.delete(finalPath);
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
return { cacheHit: false, finalPath };
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
async #downloadAtomic(urlObj, finalPath, requestHeaders) {
|
|
283
|
-
await fs.promises.mkdir(path.dirname(finalPath), { recursive: true });
|
|
284
|
-
const tempPath = `${finalPath}.temp`;
|
|
285
|
-
await removeIfExists(tempPath);
|
|
286
|
-
|
|
287
|
-
try {
|
|
288
|
-
const headers = stripHopByHopHeaders(requestHeaders);
|
|
289
|
-
const getAgent = this.upstreamProxyManager
|
|
290
|
-
? (currentUrl) => this.upstreamProxyManager.getAgentForUrl(currentUrl)
|
|
291
|
-
: null;
|
|
292
|
-
const metadata = await probe(urlObj, headers, this.config.downloadTimeoutMs, getAgent);
|
|
293
|
-
const downloadUrl = metadata.finalUrl;
|
|
294
|
-
const hostname = downloadUrl.hostname;
|
|
295
|
-
|
|
296
|
-
this.logDownload("download start", downloadUrl, { host: hostname, targetPath: finalPath });
|
|
297
|
-
|
|
298
|
-
if (getAgent && this.upstreamProxyManager.hasProxyFor(downloadUrl.protocol, hostname)) {
|
|
299
|
-
this.logDownload("outbound via upstream proxy", downloadUrl, {
|
|
300
|
-
host: hostname,
|
|
301
|
-
protocol: downloadUrl.protocol,
|
|
302
|
-
});
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
const shouldUseMulti =
|
|
306
|
-
this.domainMatcher(hostname, this.config.multiThreadDomains) &&
|
|
307
|
-
Number.isFinite(metadata.contentLength) &&
|
|
308
|
-
metadata.contentLength >= this.config.multiThreadMinSizeBytes &&
|
|
309
|
-
metadata.acceptRanges;
|
|
310
|
-
|
|
311
|
-
if (shouldUseMulti) {
|
|
312
|
-
this.logDownload("multi-thread download enabled", downloadUrl, {
|
|
313
|
-
host: hostname,
|
|
314
|
-
size: metadata.contentLength,
|
|
315
|
-
threads: this.config.multiThreadCount,
|
|
316
|
-
});
|
|
317
|
-
try {
|
|
318
|
-
await downloadMultiThread(
|
|
319
|
-
downloadUrl,
|
|
320
|
-
tempPath,
|
|
321
|
-
headers,
|
|
322
|
-
this.config.downloadTimeoutMs,
|
|
323
|
-
metadata.contentLength,
|
|
324
|
-
this.config.multiThreadCount,
|
|
325
|
-
getAgent,
|
|
326
|
-
);
|
|
327
|
-
} catch (error) {
|
|
328
|
-
await removeIfExists(tempPath);
|
|
329
|
-
this.logDownload("multi-thread fallback to single-thread", downloadUrl, {
|
|
330
|
-
host: hostname,
|
|
331
|
-
reason: error.message,
|
|
332
|
-
});
|
|
333
|
-
await downloadSingle(downloadUrl, tempPath, headers, this.config.downloadTimeoutMs, getAgent);
|
|
334
|
-
}
|
|
335
|
-
} else {
|
|
336
|
-
this.logDownload("single-thread download", downloadUrl, { host: hostname });
|
|
337
|
-
const single = await downloadSingle(downloadUrl, tempPath, headers, this.config.downloadTimeoutMs, getAgent);
|
|
338
|
-
if (single.contentLength != null) {
|
|
339
|
-
metadata.contentLength = single.contentLength;
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
await verifyFileSize(tempPath, metadata.contentLength);
|
|
344
|
-
await fs.promises.rename(tempPath, finalPath);
|
|
345
|
-
} catch (error) {
|
|
346
|
-
await removeIfExists(tempPath);
|
|
347
|
-
throw error;
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
}
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import http from "node:http";
|
|
3
|
+
import https from "node:https";
|
|
4
|
+
import path from "node:path";
|
|
5
|
+
import { pipeline } from "node:stream/promises";
|
|
6
|
+
import { DownloadLogWriter } from "../common/download-log-writer.js";
|
|
7
|
+
|
|
8
|
+
const REDIRECT_STATUS = new Set([301, 302, 303, 307, 308]);
|
|
9
|
+
const MAX_REDIRECTS = 5;
|
|
10
|
+
|
|
11
|
+
function pickClient(protocol) {
|
|
12
|
+
return protocol === "https:" ? https : http;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function stripHopByHopHeaders(headers = {}) {
|
|
16
|
+
const result = { ...headers };
|
|
17
|
+
const blocked = [
|
|
18
|
+
"connection",
|
|
19
|
+
"proxy-connection",
|
|
20
|
+
"keep-alive",
|
|
21
|
+
"transfer-encoding",
|
|
22
|
+
"upgrade",
|
|
23
|
+
"te",
|
|
24
|
+
"trailer",
|
|
25
|
+
"proxy-authenticate",
|
|
26
|
+
"proxy-authorization",
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
for (const header of blocked) {
|
|
30
|
+
delete result[header];
|
|
31
|
+
delete result[header.toLowerCase()];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function requestRaw(urlObj, { method, headers, timeoutMs, getAgent }) {
|
|
38
|
+
const client = pickClient(urlObj.protocol);
|
|
39
|
+
const agent = getAgent ? getAgent(urlObj) : undefined;
|
|
40
|
+
|
|
41
|
+
return new Promise((resolve, reject) => {
|
|
42
|
+
const req = client.request(
|
|
43
|
+
{
|
|
44
|
+
protocol: urlObj.protocol,
|
|
45
|
+
hostname: urlObj.hostname,
|
|
46
|
+
port: urlObj.port || (urlObj.protocol === "https:" ? 443 : 80),
|
|
47
|
+
path: `${urlObj.pathname}${urlObj.search}`,
|
|
48
|
+
method,
|
|
49
|
+
headers,
|
|
50
|
+
agent,
|
|
51
|
+
},
|
|
52
|
+
(res) => resolve({ req, res }),
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
req.setTimeout(timeoutMs, () => {
|
|
56
|
+
req.destroy(new Error(`Request timeout after ${timeoutMs}ms: ${urlObj.href}`));
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
req.on("error", reject);
|
|
60
|
+
req.end();
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async function requestWithRedirect(urlObj, options, redirectCount = 0) {
|
|
65
|
+
if (redirectCount > MAX_REDIRECTS) {
|
|
66
|
+
throw new Error(`Too many redirects while requesting ${urlObj.href}`);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const { res } = await requestRaw(urlObj, options);
|
|
70
|
+
|
|
71
|
+
if (REDIRECT_STATUS.has(res.statusCode) && res.headers.location) {
|
|
72
|
+
res.resume();
|
|
73
|
+
const nextUrl = new URL(res.headers.location, urlObj);
|
|
74
|
+
return requestWithRedirect(nextUrl, options, redirectCount + 1);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return { urlObj, res };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async function probe(urlObj, headers, timeoutMs, getAgent) {
|
|
81
|
+
try {
|
|
82
|
+
const { urlObj: finalUrl, res } = await requestWithRedirect(urlObj, {
|
|
83
|
+
method: "HEAD",
|
|
84
|
+
headers,
|
|
85
|
+
timeoutMs,
|
|
86
|
+
getAgent,
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
const contentLength = Number.parseInt(res.headers["content-length"], 10);
|
|
90
|
+
const acceptRanges = String(res.headers["accept-ranges"] || "").toLowerCase().includes("bytes");
|
|
91
|
+
const statusCode = res.statusCode || 0;
|
|
92
|
+
res.resume();
|
|
93
|
+
|
|
94
|
+
if (statusCode >= 400) {
|
|
95
|
+
return { finalUrl, contentLength: null, acceptRanges: false };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
finalUrl,
|
|
100
|
+
contentLength: Number.isFinite(contentLength) ? contentLength : null,
|
|
101
|
+
acceptRanges,
|
|
102
|
+
};
|
|
103
|
+
} catch {
|
|
104
|
+
return { finalUrl: urlObj, contentLength: null, acceptRanges: false };
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async function downloadSingle(urlObj, tempPath, headers, timeoutMs, getAgent) {
|
|
109
|
+
const requestHeaders = {
|
|
110
|
+
...stripHopByHopHeaders(headers),
|
|
111
|
+
"accept-encoding": "identity",
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
const { urlObj: finalUrl, res } = await requestWithRedirect(
|
|
115
|
+
urlObj,
|
|
116
|
+
{
|
|
117
|
+
method: "GET",
|
|
118
|
+
headers: requestHeaders,
|
|
119
|
+
timeoutMs,
|
|
120
|
+
getAgent,
|
|
121
|
+
},
|
|
122
|
+
0,
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
const statusCode = res.statusCode || 0;
|
|
126
|
+
if (statusCode >= 400) {
|
|
127
|
+
const chunks = [];
|
|
128
|
+
for await (const chunk of res) {
|
|
129
|
+
chunks.push(chunk);
|
|
130
|
+
if (Buffer.concat(chunks).length > 2048) {
|
|
131
|
+
break;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const body = Buffer.concat(chunks).toString("utf8");
|
|
135
|
+
throw Object.assign(new Error(`Upstream GET failed (${statusCode}) ${finalUrl.href}`), {
|
|
136
|
+
statusCode,
|
|
137
|
+
upstreamBody: body,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (statusCode < 200 || statusCode >= 300) {
|
|
142
|
+
throw new Error(`Unexpected status code ${statusCode} for ${finalUrl.href}`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const contentLength = Number.parseInt(res.headers["content-length"], 10);
|
|
146
|
+
|
|
147
|
+
const stream = fs.createWriteStream(tempPath, { flags: "w" });
|
|
148
|
+
await pipeline(res, stream);
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
finalUrl,
|
|
152
|
+
contentLength: Number.isFinite(contentLength) ? contentLength : null,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async function downloadRange(urlObj, tempPath, start, end, headers, timeoutMs, getAgent) {
|
|
157
|
+
const requestHeaders = {
|
|
158
|
+
...stripHopByHopHeaders(headers),
|
|
159
|
+
"accept-encoding": "identity",
|
|
160
|
+
range: `bytes=${start}-${end}`,
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const { res } = await requestRaw(urlObj, {
|
|
164
|
+
method: "GET",
|
|
165
|
+
headers: requestHeaders,
|
|
166
|
+
timeoutMs,
|
|
167
|
+
getAgent,
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
const statusCode = res.statusCode || 0;
|
|
171
|
+
if (statusCode !== 206) {
|
|
172
|
+
res.resume();
|
|
173
|
+
throw new Error(`Range request failed with status ${statusCode} (${start}-${end})`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const writeStream = fs.createWriteStream(tempPath, {
|
|
177
|
+
flags: "r+",
|
|
178
|
+
start,
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
await pipeline(res, writeStream);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
async function downloadMultiThread(urlObj, tempPath, headers, timeoutMs, contentLength, threadCount, getAgent) {
|
|
185
|
+
const handle = await fs.promises.open(tempPath, "w");
|
|
186
|
+
await handle.truncate(contentLength);
|
|
187
|
+
await handle.close();
|
|
188
|
+
|
|
189
|
+
const partSize = Math.ceil(contentLength / threadCount);
|
|
190
|
+
const tasks = [];
|
|
191
|
+
|
|
192
|
+
for (let index = 0; index < threadCount; index += 1) {
|
|
193
|
+
const start = index * partSize;
|
|
194
|
+
const end = Math.min(contentLength - 1, start + partSize - 1);
|
|
195
|
+
|
|
196
|
+
if (start > end) {
|
|
197
|
+
continue;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
tasks.push(downloadRange(urlObj, tempPath, start, end, headers, timeoutMs, getAgent));
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
await Promise.all(tasks);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
async function fileExists(filePath) {
|
|
207
|
+
try {
|
|
208
|
+
const stats = await fs.promises.stat(filePath);
|
|
209
|
+
return stats.isFile();
|
|
210
|
+
} catch (error) {
|
|
211
|
+
if (error.code === "ENOENT") {
|
|
212
|
+
return false;
|
|
213
|
+
}
|
|
214
|
+
throw error;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
async function verifyFileSize(filePath, expectedSize) {
|
|
219
|
+
if (!Number.isFinite(expectedSize)) {
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const stats = await fs.promises.stat(filePath);
|
|
224
|
+
if (stats.size !== expectedSize) {
|
|
225
|
+
throw new Error(`Integrity check failed: expected ${expectedSize} bytes, got ${stats.size}`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
async function removeIfExists(filePath) {
|
|
230
|
+
try {
|
|
231
|
+
await fs.promises.unlink(filePath);
|
|
232
|
+
} catch (error) {
|
|
233
|
+
if (error.code !== "ENOENT") {
|
|
234
|
+
throw error;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
export class Downloader {
|
|
240
|
+
constructor(config, domainMatcher, upstreamProxyManager = null) {
|
|
241
|
+
this.config = config;
|
|
242
|
+
this.domainMatcher = domainMatcher;
|
|
243
|
+
this.upstreamProxyManager = upstreamProxyManager;
|
|
244
|
+
this.inflight = new Map();
|
|
245
|
+
this.downloadLogWriter = new DownloadLogWriter(config.downloadLogDir, config.logRetentionDays);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
logDownload(event, urlObj, details = {}) {
|
|
249
|
+
const url = typeof urlObj === "string" ? urlObj : urlObj?.href;
|
|
250
|
+
const detailText = Object.entries(details)
|
|
251
|
+
.filter(([, value]) => value !== undefined && value !== null && value !== "")
|
|
252
|
+
.map(([key, value]) => `${key}=${value}`)
|
|
253
|
+
.join(" ");
|
|
254
|
+
|
|
255
|
+
console.log(`[downloader] ${event} url=${url}${detailText ? ` ${detailText}` : ""}`);
|
|
256
|
+
this.downloadLogWriter.write(event, url, details);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
async ensureCached(urlObj, finalPath, requestHeaders = {}) {
|
|
260
|
+
if (await fileExists(finalPath)) {
|
|
261
|
+
return { cacheHit: true, finalPath };
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const existing = this.inflight.get(finalPath);
|
|
265
|
+
if (existing) {
|
|
266
|
+
await existing;
|
|
267
|
+
return { cacheHit: true, finalPath };
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const downloadPromise = this.#downloadAtomic(urlObj, finalPath, requestHeaders);
|
|
271
|
+
this.inflight.set(finalPath, downloadPromise);
|
|
272
|
+
|
|
273
|
+
try {
|
|
274
|
+
await downloadPromise;
|
|
275
|
+
} finally {
|
|
276
|
+
this.inflight.delete(finalPath);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return { cacheHit: false, finalPath };
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async #downloadAtomic(urlObj, finalPath, requestHeaders) {
|
|
283
|
+
await fs.promises.mkdir(path.dirname(finalPath), { recursive: true });
|
|
284
|
+
const tempPath = `${finalPath}.temp`;
|
|
285
|
+
await removeIfExists(tempPath);
|
|
286
|
+
|
|
287
|
+
try {
|
|
288
|
+
const headers = stripHopByHopHeaders(requestHeaders);
|
|
289
|
+
const getAgent = this.upstreamProxyManager
|
|
290
|
+
? (currentUrl) => this.upstreamProxyManager.getAgentForUrl(currentUrl)
|
|
291
|
+
: null;
|
|
292
|
+
const metadata = await probe(urlObj, headers, this.config.downloadTimeoutMs, getAgent);
|
|
293
|
+
const downloadUrl = metadata.finalUrl;
|
|
294
|
+
const hostname = downloadUrl.hostname;
|
|
295
|
+
|
|
296
|
+
this.logDownload("download start", downloadUrl, { host: hostname, targetPath: finalPath });
|
|
297
|
+
|
|
298
|
+
if (getAgent && this.upstreamProxyManager.hasProxyFor(downloadUrl.protocol, hostname)) {
|
|
299
|
+
this.logDownload("outbound via upstream proxy", downloadUrl, {
|
|
300
|
+
host: hostname,
|
|
301
|
+
protocol: downloadUrl.protocol,
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const shouldUseMulti =
|
|
306
|
+
this.domainMatcher(hostname, this.config.multiThreadDomains) &&
|
|
307
|
+
Number.isFinite(metadata.contentLength) &&
|
|
308
|
+
metadata.contentLength >= this.config.multiThreadMinSizeBytes &&
|
|
309
|
+
metadata.acceptRanges;
|
|
310
|
+
|
|
311
|
+
if (shouldUseMulti) {
|
|
312
|
+
this.logDownload("multi-thread download enabled", downloadUrl, {
|
|
313
|
+
host: hostname,
|
|
314
|
+
size: metadata.contentLength,
|
|
315
|
+
threads: this.config.multiThreadCount,
|
|
316
|
+
});
|
|
317
|
+
try {
|
|
318
|
+
await downloadMultiThread(
|
|
319
|
+
downloadUrl,
|
|
320
|
+
tempPath,
|
|
321
|
+
headers,
|
|
322
|
+
this.config.downloadTimeoutMs,
|
|
323
|
+
metadata.contentLength,
|
|
324
|
+
this.config.multiThreadCount,
|
|
325
|
+
getAgent,
|
|
326
|
+
);
|
|
327
|
+
} catch (error) {
|
|
328
|
+
await removeIfExists(tempPath);
|
|
329
|
+
this.logDownload("multi-thread fallback to single-thread", downloadUrl, {
|
|
330
|
+
host: hostname,
|
|
331
|
+
reason: error.message,
|
|
332
|
+
});
|
|
333
|
+
await downloadSingle(downloadUrl, tempPath, headers, this.config.downloadTimeoutMs, getAgent);
|
|
334
|
+
}
|
|
335
|
+
} else {
|
|
336
|
+
this.logDownload("single-thread download", downloadUrl, { host: hostname });
|
|
337
|
+
const single = await downloadSingle(downloadUrl, tempPath, headers, this.config.downloadTimeoutMs, getAgent);
|
|
338
|
+
if (single.contentLength != null) {
|
|
339
|
+
metadata.contentLength = single.contentLength;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
await verifyFileSize(tempPath, metadata.contentLength);
|
|
344
|
+
await fs.promises.rename(tempPath, finalPath);
|
|
345
|
+
} catch (error) {
|
|
346
|
+
await removeIfExists(tempPath);
|
|
347
|
+
throw error;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|