@juzi/file-box 1.7.20 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/dist/cjs/src/config.d.ts +5 -5
- package/dist/cjs/src/config.d.ts.map +1 -1
- package/dist/cjs/src/config.js +7 -9
- package/dist/cjs/src/config.js.map +1 -1
- package/dist/cjs/src/file-box.js +1 -1
- package/dist/cjs/src/file-box.js.map +1 -1
- package/dist/cjs/src/misc.d.ts.map +1 -1
- package/dist/cjs/src/misc.js +185 -69
- package/dist/cjs/src/misc.js.map +1 -1
- package/dist/cjs/src/misc.spec.js +26 -17
- package/dist/cjs/src/misc.spec.js.map +1 -1
- package/dist/cjs/src/version.d.ts.map +1 -1
- package/dist/cjs/src/version.js +1 -1
- package/dist/cjs/src/version.js.map +1 -1
- package/dist/cjs/tests/chunk-download.spec.js +62 -90
- package/dist/cjs/tests/chunk-download.spec.js.map +1 -1
- package/dist/cjs/tests/misc-error-handling.spec.js +134 -30
- package/dist/cjs/tests/misc-error-handling.spec.js.map +1 -1
- package/dist/cjs/tests/network-timeout.spec.js +101 -105
- package/dist/cjs/tests/network-timeout.spec.js.map +1 -1
- package/dist/esm/src/config.d.ts +5 -5
- package/dist/esm/src/config.d.ts.map +1 -1
- package/dist/esm/src/config.js +6 -8
- package/dist/esm/src/config.js.map +1 -1
- package/dist/esm/src/file-box.js +2 -2
- package/dist/esm/src/file-box.js.map +1 -1
- package/dist/esm/src/misc.d.ts.map +1 -1
- package/dist/esm/src/misc.js +187 -71
- package/dist/esm/src/misc.js.map +1 -1
- package/dist/esm/src/misc.spec.js +26 -17
- package/dist/esm/src/misc.spec.js.map +1 -1
- package/dist/esm/src/version.d.ts.map +1 -1
- package/dist/esm/src/version.js +1 -1
- package/dist/esm/src/version.js.map +1 -1
- package/dist/esm/tests/chunk-download.spec.js +62 -90
- package/dist/esm/tests/chunk-download.spec.js.map +1 -1
- package/dist/esm/tests/misc-error-handling.spec.js +134 -30
- package/dist/esm/tests/misc-error-handling.spec.js.map +1 -1
- package/dist/esm/tests/network-timeout.spec.js +103 -107
- package/dist/esm/tests/network-timeout.spec.js.map +1 -1
- package/package.json +2 -2
- package/src/config.ts +6 -12
- package/src/file-box.ts +2 -2
- package/src/misc.spec.ts +29 -14
- package/src/misc.ts +200 -75
- package/src/version.ts +1 -1
package/src/misc.ts
CHANGED
|
@@ -2,20 +2,19 @@ import assert from 'assert'
|
|
|
2
2
|
import { randomUUID } from 'crypto'
|
|
3
3
|
import { once } from 'events'
|
|
4
4
|
import { createReadStream, createWriteStream } from 'fs'
|
|
5
|
-
import { rm } from 'fs/promises'
|
|
5
|
+
import { rm, stat } from 'fs/promises'
|
|
6
6
|
import http, { RequestOptions } from 'http'
|
|
7
7
|
import https from 'https'
|
|
8
8
|
import { HttpsProxyAgent } from 'https-proxy-agent'
|
|
9
9
|
import { tmpdir } from 'os'
|
|
10
10
|
import { join } from 'path'
|
|
11
11
|
import type { Readable } from 'stream'
|
|
12
|
+
import { Transform } from 'stream'
|
|
13
|
+
import { pipeline } from 'stream/promises'
|
|
14
|
+
import { setTimeout } from 'timers/promises'
|
|
12
15
|
import { URL } from 'url'
|
|
13
16
|
|
|
14
|
-
import {
|
|
15
|
-
HTTP_CHUNK_SIZE,
|
|
16
|
-
HTTP_REQUEST_TIMEOUT,
|
|
17
|
-
HTTP_RESPONSE_TIMEOUT,
|
|
18
|
-
} from './config.js'
|
|
17
|
+
import { CONFIG } from './config.js'
|
|
19
18
|
|
|
20
19
|
const protocolMap: {
|
|
21
20
|
[key: string]: { agent: http.Agent; request: typeof http.request }
|
|
@@ -27,6 +26,16 @@ const protocolMap: {
|
|
|
27
26
|
const noop = () => { }
|
|
28
27
|
const unsupportedRangeDomains = new Set<string>()
|
|
29
28
|
|
|
29
|
+
// 自定义 Error:标记需要回退到非分片下载
|
|
30
|
+
class FallbackError extends Error {
|
|
31
|
+
|
|
32
|
+
constructor (reason: string) {
|
|
33
|
+
super(`Fallback required: ${reason}`)
|
|
34
|
+
this.name = 'FallbackError'
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
}
|
|
38
|
+
|
|
30
39
|
function getProtocol (protocol: string) {
|
|
31
40
|
assert(protocolMap[protocol], new Error('unknown protocol: ' + protocol))
|
|
32
41
|
return protocolMap[protocol]!
|
|
@@ -65,8 +74,6 @@ export async function httpHeadHeader (url: string, headers: http.OutgoingHttpHea
|
|
|
65
74
|
return res.headers
|
|
66
75
|
}
|
|
67
76
|
|
|
68
|
-
// console.log('302 found for ' + url)
|
|
69
|
-
|
|
70
77
|
if (!res.headers.location) {
|
|
71
78
|
throw new Error('302 found but no location!')
|
|
72
79
|
}
|
|
@@ -98,7 +105,7 @@ export async function httpStream (url: string, headers: http.OutgoingHttpHeaders
|
|
|
98
105
|
if (headHeaders.location) {
|
|
99
106
|
url = headHeaders.location
|
|
100
107
|
}
|
|
101
|
-
const { protocol, hostname } = new URL(url)
|
|
108
|
+
const { protocol, hostname, port } = new URL(url)
|
|
102
109
|
getProtocol(protocol)
|
|
103
110
|
|
|
104
111
|
const options: http.RequestOptions = {
|
|
@@ -106,17 +113,17 @@ export async function httpStream (url: string, headers: http.OutgoingHttpHeaders
|
|
|
106
113
|
method: 'GET',
|
|
107
114
|
}
|
|
108
115
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
116
|
+
// 使用 hostname:port 作为域名标识,避免不同端口的服务互相影响
|
|
117
|
+
const defaultPort = protocol === 'https:' ? '443' : '80'
|
|
118
|
+
const hostKey = `${hostname}:${port || defaultPort}`
|
|
119
|
+
|
|
120
|
+
// 直接尝试分片下载,不检查 Accept-Ranges 和 fileSize
|
|
121
|
+
// 原因:
|
|
122
|
+
// 1. 有些服务器 HEAD 不返回 Accept-Ranges 但实际支持分片
|
|
123
|
+
// 2. 有些服务器 HEAD 返回 fileSize=0 但实际支持分片
|
|
124
|
+
// downloadFileInChunks 内部有完善的回退机制处理不支持的情况
|
|
125
|
+
const result = await downloadFileInChunks(url, options, proxyUrl, hostKey)
|
|
126
|
+
return result
|
|
120
127
|
}
|
|
121
128
|
|
|
122
129
|
async function fetch (url: string, options: http.RequestOptions, proxyUrl?: string): Promise<http.IncomingMessage> {
|
|
@@ -149,10 +156,10 @@ async function fetch (url: string, options: http.RequestOptions, proxyUrl?: stri
|
|
|
149
156
|
req.off('error', noop)
|
|
150
157
|
})
|
|
151
158
|
// request timeout:只用于“拿到 response 之前”(连接/握手/首包)
|
|
152
|
-
.setTimeout(HTTP_REQUEST_TIMEOUT, () => {
|
|
159
|
+
.setTimeout(CONFIG.HTTP_REQUEST_TIMEOUT, () => {
|
|
153
160
|
// 已经拿到 response 时,不要再用 request timeout 误伤(会导致 aborted/ECONNRESET)
|
|
154
161
|
if (res) return
|
|
155
|
-
abortController.abort(new Error(`FileBox: Http request timeout (${HTTP_REQUEST_TIMEOUT})!`))
|
|
162
|
+
abortController.abort(new Error(`FileBox: Http request timeout (${CONFIG.HTTP_REQUEST_TIMEOUT})!`))
|
|
156
163
|
})
|
|
157
164
|
.end()
|
|
158
165
|
|
|
@@ -165,7 +172,6 @@ async function fetch (url: string, options: http.RequestOptions, proxyUrl?: stri
|
|
|
165
172
|
res.on('error', noop)
|
|
166
173
|
signal.throwIfAborted()
|
|
167
174
|
} catch (e) {
|
|
168
|
-
// once(...) 被 signal abort 时通常会抛 AbortError;优先抛出 abort(reason) 的真实原因
|
|
169
175
|
const reason = signal.reason as unknown
|
|
170
176
|
const err = reason instanceof Error
|
|
171
177
|
? reason
|
|
@@ -193,21 +199,44 @@ async function fetch (url: string, options: http.RequestOptions, proxyUrl?: stri
|
|
|
193
199
|
signal.removeEventListener('abort', onAbort)
|
|
194
200
|
res!.off('error', noop)
|
|
195
201
|
})
|
|
196
|
-
.setTimeout(HTTP_RESPONSE_TIMEOUT, () => {
|
|
197
|
-
abortController.abort(new Error(`FileBox: Http response timeout (${HTTP_RESPONSE_TIMEOUT})!`))
|
|
202
|
+
.setTimeout(CONFIG.HTTP_RESPONSE_TIMEOUT, () => {
|
|
203
|
+
abortController.abort(new Error(`FileBox: Http response timeout (${CONFIG.HTTP_RESPONSE_TIMEOUT})!`))
|
|
198
204
|
})
|
|
199
205
|
return res!
|
|
200
206
|
}
|
|
201
207
|
|
|
208
|
+
function createSkipTransform (skipBytes: number): Transform {
|
|
209
|
+
let skipped = 0
|
|
210
|
+
return new Transform({
|
|
211
|
+
transform (chunk, _encoding, callback) {
|
|
212
|
+
if (skipped < skipBytes) {
|
|
213
|
+
const remaining = skipBytes - skipped
|
|
214
|
+
if (chunk.length <= remaining) {
|
|
215
|
+
// 整个 chunk 都需要跳过
|
|
216
|
+
skipped += chunk.length
|
|
217
|
+
callback()
|
|
218
|
+
return
|
|
219
|
+
} else {
|
|
220
|
+
// 跳过部分 chunk
|
|
221
|
+
skipped = skipBytes
|
|
222
|
+
callback(null, chunk.subarray(remaining))
|
|
223
|
+
return
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// 已经跳过足够的字节,直接传递
|
|
227
|
+
callback(null, chunk)
|
|
228
|
+
},
|
|
229
|
+
})
|
|
230
|
+
}
|
|
231
|
+
|
|
202
232
|
async function downloadFileInChunks (
|
|
203
233
|
url: string,
|
|
204
234
|
options: http.RequestOptions,
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
proxyUrl?: string,
|
|
235
|
+
proxyUrl: string | undefined,
|
|
236
|
+
hostname: string,
|
|
208
237
|
): Promise<Readable> {
|
|
209
238
|
const tmpFile = join(tmpdir(), `filebox-${randomUUID()}`)
|
|
210
|
-
|
|
239
|
+
let writeStream = createWriteStream(tmpFile)
|
|
211
240
|
const writeAbortController = new AbortController()
|
|
212
241
|
const signal = writeAbortController.signal
|
|
213
242
|
const onWriteError = (err: unknown) => {
|
|
@@ -219,78 +248,174 @@ async function downloadFileInChunks (
|
|
|
219
248
|
headers: {},
|
|
220
249
|
...options,
|
|
221
250
|
}
|
|
222
|
-
|
|
251
|
+
// 预期文件大小(初始为 null,从首次 206 响应中获取)
|
|
252
|
+
let expectedTotal: number | null = null
|
|
223
253
|
let start = 0
|
|
224
|
-
let end = 0
|
|
225
254
|
let downSize = 0
|
|
226
255
|
let retries = 3
|
|
256
|
+
// 控制是否使用 Range 请求(根据域名黑名单初始化)
|
|
257
|
+
let useRange = !unsupportedRangeDomains.has(hostname)
|
|
258
|
+
let useChunked = false
|
|
259
|
+
|
|
260
|
+
do {
|
|
261
|
+
// 每次循环前检查文件实际大小,作为真实的下载进度
|
|
262
|
+
// 这样在重试时可以从实际写入的位置继续,避免数据重复
|
|
263
|
+
const fileStats = await stat(tmpFile).then(stats => stats.size).catch(() => 0)
|
|
264
|
+
if (fileStats !== downSize) {
|
|
265
|
+
// 文件实际大小与记录的不一致,使用实际大小
|
|
266
|
+
downSize = fileStats
|
|
267
|
+
start = fileStats
|
|
268
|
+
}
|
|
227
269
|
|
|
228
|
-
while (downSize < fileSize) {
|
|
229
|
-
end = Math.min(start + chunkSize, fileSize - 1)
|
|
230
|
-
const range = `bytes=${start}-${end}`
|
|
231
270
|
const requestOptions = Object.assign({}, requestBaseOptions)
|
|
232
271
|
assert(requestOptions.headers, 'Errors that should not happen: Invalid headers')
|
|
233
|
-
|
|
272
|
+
const headers = requestOptions.headers as http.OutgoingHttpHeaders
|
|
273
|
+
|
|
274
|
+
// 根据 useRange flag 决定是否添加 Range header
|
|
275
|
+
if (useRange) {
|
|
276
|
+
const range = `bytes=${start}-`
|
|
277
|
+
headers['Range'] = range
|
|
278
|
+
} else {
|
|
279
|
+
delete headers['Range']
|
|
280
|
+
}
|
|
234
281
|
|
|
282
|
+
let res: http.IncomingMessage
|
|
235
283
|
try {
|
|
236
|
-
|
|
284
|
+
res = await fetch(url, requestOptions, proxyUrl)
|
|
237
285
|
if (res.statusCode === 416) {
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
writeStream.destroy()
|
|
241
|
-
try {
|
|
242
|
-
await once(writeStream, 'close', { signal })
|
|
243
|
-
} catch {}
|
|
244
|
-
await rm(tmpFile, { force: true })
|
|
245
|
-
return await fetch(url, requestBaseOptions, proxyUrl)
|
|
286
|
+
// 416: Range Not Satisfiable,服务器不支持此范围或文件大小不匹配
|
|
287
|
+
throw new FallbackError('416 Range Not Satisfiable')
|
|
246
288
|
}
|
|
247
289
|
assert(allowStatusCode.includes(res.statusCode ?? 0), `Request failed with status code ${res.statusCode}`)
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
290
|
+
const contentLength = Number(res.headers['content-length']) || 0
|
|
291
|
+
assert(contentLength >= 0, `Server returned ${contentLength} bytes of data`)
|
|
292
|
+
|
|
293
|
+
// 206: 部分内容,继续分片下载
|
|
294
|
+
// 200: 完整内容,服务器不支持 range 或返回全部数据
|
|
295
|
+
if (res.statusCode === 206) {
|
|
296
|
+
// 206 响应必须包含有效的 Content-Range 头(RFC 7233)
|
|
297
|
+
const contentRange = res.headers['content-range']
|
|
298
|
+
if (!contentRange) {
|
|
299
|
+
// Content-Range 缺失,服务器不规范,回退到非分片下载
|
|
300
|
+
throw new FallbackError('Missing Content-Range header')
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
let end: number
|
|
304
|
+
let total: number
|
|
305
|
+
let actualStart: number
|
|
306
|
+
try {
|
|
307
|
+
const parsed = parseContentRange(contentRange)
|
|
308
|
+
actualStart = parsed.start
|
|
309
|
+
end = parsed.end
|
|
310
|
+
total = parsed.total
|
|
311
|
+
} catch (error) {
|
|
312
|
+
// Content-Range 格式错误,服务器不规范,回退到非分片下载
|
|
313
|
+
throw new FallbackError(`Invalid Content-Range: ${contentRange}`)
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if (expectedTotal === null) {
|
|
317
|
+
// 首次获得文件总大小
|
|
252
318
|
// 某些云服务商(如腾讯云)在 head 方法中返回的 size 是原图大小,但下载时返回的是压缩后的图片,会比原图小。
|
|
253
|
-
// 这种在首次下载时虽然请求了原图大小的范围,可能比缩略图大,但会一次性返回完整的原图,而不是报错 416,通过修正
|
|
254
|
-
|
|
319
|
+
// 这种在首次下载时虽然请求了原图大小的范围,可能比缩略图大,但会一次性返回完整的原图,而不是报错 416,通过修正 expectedTotal 跳出循环即可。
|
|
320
|
+
expectedTotal = total
|
|
321
|
+
} else if (total !== expectedTotal) {
|
|
322
|
+
// 服务器返回的文件总大小出现了变化
|
|
323
|
+
throw new Error(`File size mismatch: expected ${expectedTotal}, but server returned ${total}`)
|
|
255
324
|
}
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
325
|
+
|
|
326
|
+
// 标记使用了分片下载
|
|
327
|
+
useChunked = true
|
|
328
|
+
|
|
329
|
+
// 验证服务器返回的范围是否与请求匹配
|
|
330
|
+
if (actualStart !== start) {
|
|
331
|
+
if (actualStart > start) {
|
|
332
|
+
// 服务器跳过了部分数据,这是严重错误
|
|
333
|
+
throw new Error(`Range mismatch: requested start=${start}, but server returned start=${actualStart} (gap detected)`)
|
|
334
|
+
} else {
|
|
335
|
+
// actualStart < start: 服务器返回了重叠数据,需要跳过前面的字节
|
|
336
|
+
const skipBytes = start - actualStart
|
|
337
|
+
const skipTransform = createSkipTransform(skipBytes)
|
|
338
|
+
await pipeline(res, skipTransform, writeStream, { end: false, signal })
|
|
339
|
+
// 更新进度时使用我们请求的范围,而不是服务器返回的范围
|
|
340
|
+
downSize += end - actualStart + 1 - skipBytes
|
|
341
|
+
start = downSize
|
|
342
|
+
retries = 3 // 成功后重置重试次数
|
|
343
|
+
continue
|
|
266
344
|
}
|
|
267
345
|
}
|
|
346
|
+
// 使用 pipeline,但不关闭 writeStream(继续下载下一个分片)
|
|
347
|
+
await pipeline(res, writeStream, { end: false, signal })
|
|
348
|
+
// pipeline 成功后才更新下载进度
|
|
349
|
+
// end 是最后一个字节的索引,下次从 end+1 开始
|
|
350
|
+
downSize += end - start + 1
|
|
351
|
+
start = downSize
|
|
352
|
+
} else if (res.statusCode === 200) {
|
|
353
|
+
// 200: 服务器返回完整文件
|
|
354
|
+
if (useChunked || start > 0) {
|
|
355
|
+
// 之前以分片模式下载过数据
|
|
356
|
+
writeStream.destroy()
|
|
357
|
+
await rm(tmpFile, { force: true }).catch(() => {})
|
|
358
|
+
writeStream = createWriteStream(tmpFile)
|
|
359
|
+
writeStream.on('error', onWriteError)
|
|
360
|
+
start = 0
|
|
361
|
+
downSize = 0
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// 处理完整文件响应
|
|
365
|
+
expectedTotal = contentLength
|
|
366
|
+
await pipeline(res, writeStream, { end: false, signal })
|
|
367
|
+
downSize = contentLength
|
|
368
|
+
break
|
|
369
|
+
} else {
|
|
370
|
+
throw new Error(`Unexpected status code: ${res.statusCode}`)
|
|
268
371
|
}
|
|
269
|
-
|
|
372
|
+
// 成功后重置重试次数
|
|
373
|
+
retries = 3
|
|
270
374
|
} catch (error) {
|
|
375
|
+
if (error instanceof FallbackError) {
|
|
376
|
+
// 回退逻辑:记录域名、重置状态,在下次循环中以非 range 模式请求
|
|
377
|
+
unsupportedRangeDomains.add(hostname)
|
|
378
|
+
|
|
379
|
+
// 关闭当前写入流
|
|
380
|
+
writeStream.destroy()
|
|
381
|
+
await rm(tmpFile, { force: true }).catch(() => {})
|
|
382
|
+
|
|
383
|
+
// 检查是否已经是非 Range 模式,避免无限回退
|
|
384
|
+
if (!useRange) {
|
|
385
|
+
// 已经是非 Range 模式还失败,无法继续
|
|
386
|
+
throw new Error(`Download failed even in non-chunked mode: ${(error as Error).message}`)
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
writeStream = createWriteStream(tmpFile)
|
|
390
|
+
writeStream.once('error', onWriteError)
|
|
391
|
+
|
|
392
|
+
// 重置所有状态
|
|
393
|
+
expectedTotal = null
|
|
394
|
+
downSize = 0
|
|
395
|
+
start = 0
|
|
396
|
+
useChunked = false
|
|
397
|
+
useRange = false
|
|
398
|
+
retries = 3
|
|
399
|
+
continue
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// 普通错误:重试
|
|
271
403
|
const err = error instanceof Error ? error : new Error(String(error))
|
|
272
404
|
if (--retries <= 0) {
|
|
273
405
|
writeStream.destroy()
|
|
274
|
-
|
|
406
|
+
await rm(tmpFile, { force: true }).catch(() => {})
|
|
275
407
|
throw new Error(`Download file with chunk failed! ${err.message}`, { cause: err })
|
|
276
408
|
}
|
|
409
|
+
// 失败后等待一小段时间再重试
|
|
410
|
+
await setTimeout(100)
|
|
277
411
|
}
|
|
278
|
-
|
|
279
|
-
start = downSize
|
|
280
|
-
}
|
|
412
|
+
} while (expectedTotal === null || downSize < expectedTotal)
|
|
281
413
|
|
|
282
|
-
writeStream.
|
|
283
|
-
|
|
414
|
+
if (!writeStream.destroyed && !writeStream.writableFinished) {
|
|
415
|
+
writeStream.end()
|
|
284
416
|
await once(writeStream, 'finish', { signal })
|
|
285
|
-
} catch (e) {
|
|
286
|
-
const reason = signal.reason as unknown
|
|
287
|
-
if (reason instanceof Error) {
|
|
288
|
-
throw reason
|
|
289
|
-
}
|
|
290
|
-
throw e
|
|
291
|
-
} finally {
|
|
292
|
-
writeStream.off('error', onWriteError)
|
|
293
417
|
}
|
|
418
|
+
writeStream.off('error', onWriteError)
|
|
294
419
|
|
|
295
420
|
const readStream = createReadStream(tmpFile)
|
|
296
421
|
readStream.once('close', () => {
|
package/src/version.ts
CHANGED