@juzi/file-box 1.7.20 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/README.md +4 -2
  2. package/dist/cjs/src/config.d.ts +5 -5
  3. package/dist/cjs/src/config.d.ts.map +1 -1
  4. package/dist/cjs/src/config.js +7 -9
  5. package/dist/cjs/src/config.js.map +1 -1
  6. package/dist/cjs/src/file-box.js +1 -1
  7. package/dist/cjs/src/file-box.js.map +1 -1
  8. package/dist/cjs/src/misc.d.ts.map +1 -1
  9. package/dist/cjs/src/misc.js +185 -69
  10. package/dist/cjs/src/misc.js.map +1 -1
  11. package/dist/cjs/src/misc.spec.js +26 -17
  12. package/dist/cjs/src/misc.spec.js.map +1 -1
  13. package/dist/cjs/src/version.d.ts.map +1 -1
  14. package/dist/cjs/src/version.js +1 -1
  15. package/dist/cjs/src/version.js.map +1 -1
  16. package/dist/cjs/tests/chunk-download.spec.js +62 -90
  17. package/dist/cjs/tests/chunk-download.spec.js.map +1 -1
  18. package/dist/cjs/tests/misc-error-handling.spec.js +134 -30
  19. package/dist/cjs/tests/misc-error-handling.spec.js.map +1 -1
  20. package/dist/cjs/tests/network-timeout.spec.js +101 -105
  21. package/dist/cjs/tests/network-timeout.spec.js.map +1 -1
  22. package/dist/esm/src/config.d.ts +5 -5
  23. package/dist/esm/src/config.d.ts.map +1 -1
  24. package/dist/esm/src/config.js +6 -8
  25. package/dist/esm/src/config.js.map +1 -1
  26. package/dist/esm/src/file-box.js +2 -2
  27. package/dist/esm/src/file-box.js.map +1 -1
  28. package/dist/esm/src/misc.d.ts.map +1 -1
  29. package/dist/esm/src/misc.js +187 -71
  30. package/dist/esm/src/misc.js.map +1 -1
  31. package/dist/esm/src/misc.spec.js +26 -17
  32. package/dist/esm/src/misc.spec.js.map +1 -1
  33. package/dist/esm/src/version.d.ts.map +1 -1
  34. package/dist/esm/src/version.js +1 -1
  35. package/dist/esm/src/version.js.map +1 -1
  36. package/dist/esm/tests/chunk-download.spec.js +62 -90
  37. package/dist/esm/tests/chunk-download.spec.js.map +1 -1
  38. package/dist/esm/tests/misc-error-handling.spec.js +134 -30
  39. package/dist/esm/tests/misc-error-handling.spec.js.map +1 -1
  40. package/dist/esm/tests/network-timeout.spec.js +103 -107
  41. package/dist/esm/tests/network-timeout.spec.js.map +1 -1
  42. package/package.json +2 -2
  43. package/src/config.ts +6 -12
  44. package/src/file-box.ts +2 -2
  45. package/src/misc.spec.ts +29 -14
  46. package/src/misc.ts +200 -75
  47. package/src/version.ts +1 -1
package/src/misc.ts CHANGED
@@ -2,20 +2,19 @@ import assert from 'assert'
2
2
  import { randomUUID } from 'crypto'
3
3
  import { once } from 'events'
4
4
  import { createReadStream, createWriteStream } from 'fs'
5
- import { rm } from 'fs/promises'
5
+ import { rm, stat } from 'fs/promises'
6
6
  import http, { RequestOptions } from 'http'
7
7
  import https from 'https'
8
8
  import { HttpsProxyAgent } from 'https-proxy-agent'
9
9
  import { tmpdir } from 'os'
10
10
  import { join } from 'path'
11
11
  import type { Readable } from 'stream'
12
+ import { Transform } from 'stream'
13
+ import { pipeline } from 'stream/promises'
14
+ import { setTimeout } from 'timers/promises'
12
15
  import { URL } from 'url'
13
16
 
14
- import {
15
- HTTP_CHUNK_SIZE,
16
- HTTP_REQUEST_TIMEOUT,
17
- HTTP_RESPONSE_TIMEOUT,
18
- } from './config.js'
17
+ import { CONFIG } from './config.js'
19
18
 
20
19
  const protocolMap: {
21
20
  [key: string]: { agent: http.Agent; request: typeof http.request }
@@ -27,6 +26,16 @@ const protocolMap: {
27
26
  const noop = () => { }
28
27
  const unsupportedRangeDomains = new Set<string>()
29
28
 
29
+ // 自定义 Error:标记需要回退到非分片下载
30
+ class FallbackError extends Error {
31
+
32
+ constructor (reason: string) {
33
+ super(`Fallback required: ${reason}`)
34
+ this.name = 'FallbackError'
35
+ }
36
+
37
+ }
38
+
30
39
  function getProtocol (protocol: string) {
31
40
  assert(protocolMap[protocol], new Error('unknown protocol: ' + protocol))
32
41
  return protocolMap[protocol]!
@@ -65,8 +74,6 @@ export async function httpHeadHeader (url: string, headers: http.OutgoingHttpHea
65
74
  return res.headers
66
75
  }
67
76
 
68
- // console.log('302 found for ' + url)
69
-
70
77
  if (!res.headers.location) {
71
78
  throw new Error('302 found but no location!')
72
79
  }
@@ -98,7 +105,7 @@ export async function httpStream (url: string, headers: http.OutgoingHttpHeaders
98
105
  if (headHeaders.location) {
99
106
  url = headHeaders.location
100
107
  }
101
- const { protocol, hostname } = new URL(url)
108
+ const { protocol, hostname, port } = new URL(url)
102
109
  getProtocol(protocol)
103
110
 
104
111
  const options: http.RequestOptions = {
@@ -106,17 +113,17 @@ export async function httpStream (url: string, headers: http.OutgoingHttpHeaders
106
113
  method: 'GET',
107
114
  }
108
115
 
109
- const fileSize = Number(headHeaders['content-length'])
110
-
111
- // 运行时读取 env:方便测试/调用方动态调整
112
- const noSliceDown = process.env['FILEBOX_NO_SLICE_DOWN'] === 'true'
113
- const chunkSize = Number(process.env['FILEBOX_HTTP_CHUNK_SIZE']) || HTTP_CHUNK_SIZE
114
-
115
- if (!unsupportedRangeDomains.has(hostname) && !noSliceDown && headHeaders['accept-ranges'] === 'bytes' && fileSize > chunkSize) {
116
- return await downloadFileInChunks(url, options, fileSize, chunkSize, proxyUrl)
117
- } else {
118
- return await fetch(url, options, proxyUrl)
119
- }
116
+ // 使用 hostname:port 作为域名标识,避免不同端口的服务互相影响
117
+ const defaultPort = protocol === 'https:' ? '443' : '80'
118
+ const hostKey = `${hostname}:${port || defaultPort}`
119
+
120
+ // 直接尝试分片下载,不检查 Accept-Ranges fileSize
121
+ // 原因:
122
+ // 1. 有些服务器 HEAD 不返回 Accept-Ranges 但实际支持分片
123
+ // 2. 有些服务器 HEAD 返回 fileSize=0 但实际支持分片
124
+ // downloadFileInChunks 内部有完善的回退机制处理不支持的情况
125
+ const result = await downloadFileInChunks(url, options, proxyUrl, hostKey)
126
+ return result
120
127
  }
121
128
 
122
129
  async function fetch (url: string, options: http.RequestOptions, proxyUrl?: string): Promise<http.IncomingMessage> {
@@ -149,10 +156,10 @@ async function fetch (url: string, options: http.RequestOptions, proxyUrl?: stri
149
156
  req.off('error', noop)
150
157
  })
151
158
  // request timeout:只用于“拿到 response 之前”(连接/握手/首包)
152
- .setTimeout(HTTP_REQUEST_TIMEOUT, () => {
159
+ .setTimeout(CONFIG.HTTP_REQUEST_TIMEOUT, () => {
153
160
  // 已经拿到 response 时,不要再用 request timeout 误伤(会导致 aborted/ECONNRESET)
154
161
  if (res) return
155
- abortController.abort(new Error(`FileBox: Http request timeout (${HTTP_REQUEST_TIMEOUT})!`))
162
+ abortController.abort(new Error(`FileBox: Http request timeout (${CONFIG.HTTP_REQUEST_TIMEOUT})!`))
156
163
  })
157
164
  .end()
158
165
 
@@ -165,7 +172,6 @@ async function fetch (url: string, options: http.RequestOptions, proxyUrl?: stri
165
172
  res.on('error', noop)
166
173
  signal.throwIfAborted()
167
174
  } catch (e) {
168
- // once(...) 被 signal abort 时通常会抛 AbortError;优先抛出 abort(reason) 的真实原因
169
175
  const reason = signal.reason as unknown
170
176
  const err = reason instanceof Error
171
177
  ? reason
@@ -193,21 +199,44 @@ async function fetch (url: string, options: http.RequestOptions, proxyUrl?: stri
193
199
  signal.removeEventListener('abort', onAbort)
194
200
  res!.off('error', noop)
195
201
  })
196
- .setTimeout(HTTP_RESPONSE_TIMEOUT, () => {
197
- abortController.abort(new Error(`FileBox: Http response timeout (${HTTP_RESPONSE_TIMEOUT})!`))
202
+ .setTimeout(CONFIG.HTTP_RESPONSE_TIMEOUT, () => {
203
+ abortController.abort(new Error(`FileBox: Http response timeout (${CONFIG.HTTP_RESPONSE_TIMEOUT})!`))
198
204
  })
199
205
  return res!
200
206
  }
201
207
 
208
+ function createSkipTransform (skipBytes: number): Transform {
209
+ let skipped = 0
210
+ return new Transform({
211
+ transform (chunk, _encoding, callback) {
212
+ if (skipped < skipBytes) {
213
+ const remaining = skipBytes - skipped
214
+ if (chunk.length <= remaining) {
215
+ // 整个 chunk 都需要跳过
216
+ skipped += chunk.length
217
+ callback()
218
+ return
219
+ } else {
220
+ // 跳过部分 chunk
221
+ skipped = skipBytes
222
+ callback(null, chunk.subarray(remaining))
223
+ return
224
+ }
225
+ }
226
+ // 已经跳过足够的字节,直接传递
227
+ callback(null, chunk)
228
+ },
229
+ })
230
+ }
231
+
202
232
  async function downloadFileInChunks (
203
233
  url: string,
204
234
  options: http.RequestOptions,
205
- fileSize: number,
206
- chunkSize = HTTP_CHUNK_SIZE,
207
- proxyUrl?: string,
235
+ proxyUrl: string | undefined,
236
+ hostname: string,
208
237
  ): Promise<Readable> {
209
238
  const tmpFile = join(tmpdir(), `filebox-${randomUUID()}`)
210
- const writeStream = createWriteStream(tmpFile)
239
+ let writeStream = createWriteStream(tmpFile)
211
240
  const writeAbortController = new AbortController()
212
241
  const signal = writeAbortController.signal
213
242
  const onWriteError = (err: unknown) => {
@@ -219,78 +248,174 @@ async function downloadFileInChunks (
219
248
  headers: {},
220
249
  ...options,
221
250
  }
222
- let chunkSeq = 0
251
+ // 预期文件大小(初始为 null,从首次 206 响应中获取)
252
+ let expectedTotal: number | null = null
223
253
  let start = 0
224
- let end = 0
225
254
  let downSize = 0
226
255
  let retries = 3
256
+ // 控制是否使用 Range 请求(根据域名黑名单初始化)
257
+ let useRange = !unsupportedRangeDomains.has(hostname)
258
+ let useChunked = false
259
+
260
+ do {
261
+ // 每次循环前检查文件实际大小,作为真实的下载进度
262
+ // 这样在重试时可以从实际写入的位置继续,避免数据重复
263
+ const fileStats = await stat(tmpFile).then(stats => stats.size).catch(() => 0)
264
+ if (fileStats !== downSize) {
265
+ // 文件实际大小与记录的不一致,使用实际大小
266
+ downSize = fileStats
267
+ start = fileStats
268
+ }
227
269
 
228
- while (downSize < fileSize) {
229
- end = Math.min(start + chunkSize, fileSize - 1)
230
- const range = `bytes=${start}-${end}`
231
270
  const requestOptions = Object.assign({}, requestBaseOptions)
232
271
  assert(requestOptions.headers, 'Errors that should not happen: Invalid headers')
233
- ;(requestOptions.headers as http.OutgoingHttpHeaders)['Range'] = range
272
+ const headers = requestOptions.headers as http.OutgoingHttpHeaders
273
+
274
+ // 根据 useRange flag 决定是否添加 Range header
275
+ if (useRange) {
276
+ const range = `bytes=${start}-`
277
+ headers['Range'] = range
278
+ } else {
279
+ delete headers['Range']
280
+ }
234
281
 
282
+ let res: http.IncomingMessage
235
283
  try {
236
- const res = await fetch(url, requestOptions, proxyUrl)
284
+ res = await fetch(url, requestOptions, proxyUrl)
237
285
  if (res.statusCode === 416) {
238
- unsupportedRangeDomains.add(new URL(url).hostname)
239
- // 某些云服务商对分片下载的支持可能不规范,需要保留一个回退的方式
240
- writeStream.destroy()
241
- try {
242
- await once(writeStream, 'close', { signal })
243
- } catch {}
244
- await rm(tmpFile, { force: true })
245
- return await fetch(url, requestBaseOptions, proxyUrl)
286
+ // 416: Range Not Satisfiable,服务器不支持此范围或文件大小不匹配
287
+ throw new FallbackError('416 Range Not Satisfiable')
246
288
  }
247
289
  assert(allowStatusCode.includes(res.statusCode ?? 0), `Request failed with status code ${res.statusCode}`)
248
- assert(Number(res.headers['content-length']) > 0, 'Server returned 0 bytes of data')
249
- try {
250
- const { total } = parseContentRange(res.headers['content-range'] ?? '')
251
- if (total > 0 && total < fileSize) {
290
+ const contentLength = Number(res.headers['content-length']) || 0
291
+ assert(contentLength >= 0, `Server returned ${contentLength} bytes of data`)
292
+
293
+ // 206: 部分内容,继续分片下载
294
+ // 200: 完整内容,服务器不支持 range 或返回全部数据
295
+ if (res.statusCode === 206) {
296
+ // 206 响应必须包含有效的 Content-Range 头(RFC 7233)
297
+ const contentRange = res.headers['content-range']
298
+ if (!contentRange) {
299
+ // Content-Range 缺失,服务器不规范,回退到非分片下载
300
+ throw new FallbackError('Missing Content-Range header')
301
+ }
302
+
303
+ let end: number
304
+ let total: number
305
+ let actualStart: number
306
+ try {
307
+ const parsed = parseContentRange(contentRange)
308
+ actualStart = parsed.start
309
+ end = parsed.end
310
+ total = parsed.total
311
+ } catch (error) {
312
+ // Content-Range 格式错误,服务器不规范,回退到非分片下载
313
+ throw new FallbackError(`Invalid Content-Range: ${contentRange}`)
314
+ }
315
+
316
+ if (expectedTotal === null) {
317
+ // 首次获得文件总大小
252
318
  // 某些云服务商(如腾讯云)在 head 方法中返回的 size 是原图大小,但下载时返回的是压缩后的图片,会比原图小。
253
- // 这种在首次下载时虽然请求了原图大小的范围,可能比缩略图大,但会一次性返回完整的原图,而不是报错 416,通过修正 fileSize 跳出循环即可。
254
- fileSize = total
319
+ // 这种在首次下载时虽然请求了原图大小的范围,可能比缩略图大,但会一次性返回完整的原图,而不是报错 416,通过修正 expectedTotal 跳出循环即可。
320
+ expectedTotal = total
321
+ } else if (total !== expectedTotal) {
322
+ // 服务器返回的文件总大小出现了变化
323
+ throw new Error(`File size mismatch: expected ${expectedTotal}, but server returned ${total}`)
255
324
  }
256
- } catch (error) {}
257
- for await (const chunk of res) {
258
- assert(Buffer.isBuffer(chunk))
259
- downSize += chunk.length
260
- if (!writeStream.write(chunk)) {
261
- try {
262
- await once(writeStream, 'drain', { signal })
263
- } catch (e) {
264
- const reason = signal.reason as unknown
265
- throw reason instanceof Error ? reason : (e as Error)
325
+
326
+ // 标记使用了分片下载
327
+ useChunked = true
328
+
329
+ // 验证服务器返回的范围是否与请求匹配
330
+ if (actualStart !== start) {
331
+ if (actualStart > start) {
332
+ // 服务器跳过了部分数据,这是严重错误
333
+ throw new Error(`Range mismatch: requested start=${start}, but server returned start=${actualStart} (gap detected)`)
334
+ } else {
335
+ // actualStart < start: 服务器返回了重叠数据,需要跳过前面的字节
336
+ const skipBytes = start - actualStart
337
+ const skipTransform = createSkipTransform(skipBytes)
338
+ await pipeline(res, skipTransform, writeStream, { end: false, signal })
339
+ // 更新进度时使用我们请求的范围,而不是服务器返回的范围
340
+ downSize += end - actualStart + 1 - skipBytes
341
+ start = downSize
342
+ retries = 3 // 成功后重置重试次数
343
+ continue
266
344
  }
267
345
  }
346
+ // 使用 pipeline,但不关闭 writeStream(继续下载下一个分片)
347
+ await pipeline(res, writeStream, { end: false, signal })
348
+ // pipeline 成功后才更新下载进度
349
+ // end 是最后一个字节的索引,下次从 end+1 开始
350
+ downSize += end - start + 1
351
+ start = downSize
352
+ } else if (res.statusCode === 200) {
353
+ // 200: 服务器返回完整文件
354
+ if (useChunked || start > 0) {
355
+ // 之前以分片模式下载过数据
356
+ writeStream.destroy()
357
+ await rm(tmpFile, { force: true }).catch(() => {})
358
+ writeStream = createWriteStream(tmpFile)
359
+ writeStream.on('error', onWriteError)
360
+ start = 0
361
+ downSize = 0
362
+ }
363
+
364
+ // 处理完整文件响应
365
+ expectedTotal = contentLength
366
+ await pipeline(res, writeStream, { end: false, signal })
367
+ downSize = contentLength
368
+ break
369
+ } else {
370
+ throw new Error(`Unexpected status code: ${res.statusCode}`)
268
371
  }
269
- res.destroy()
372
+ // 成功后重置重试次数
373
+ retries = 3
270
374
  } catch (error) {
375
+ if (error instanceof FallbackError) {
376
+ // 回退逻辑:记录域名、重置状态,在下次循环中以非 range 模式请求
377
+ unsupportedRangeDomains.add(hostname)
378
+
379
+ // 关闭当前写入流
380
+ writeStream.destroy()
381
+ await rm(tmpFile, { force: true }).catch(() => {})
382
+
383
+ // 检查是否已经是非 Range 模式,避免无限回退
384
+ if (!useRange) {
385
+ // 已经是非 Range 模式还失败,无法继续
386
+ throw new Error(`Download failed even in non-chunked mode: ${(error as Error).message}`)
387
+ }
388
+
389
+ writeStream = createWriteStream(tmpFile)
390
+ writeStream.once('error', onWriteError)
391
+
392
+ // 重置所有状态
393
+ expectedTotal = null
394
+ downSize = 0
395
+ start = 0
396
+ useChunked = false
397
+ useRange = false
398
+ retries = 3
399
+ continue
400
+ }
401
+
402
+ // 普通错误:重试
271
403
  const err = error instanceof Error ? error : new Error(String(error))
272
404
  if (--retries <= 0) {
273
405
  writeStream.destroy()
274
- void rm(tmpFile, { force: true })
406
+ await rm(tmpFile, { force: true }).catch(() => {})
275
407
  throw new Error(`Download file with chunk failed! ${err.message}`, { cause: err })
276
408
  }
409
+ // 失败后等待一小段时间再重试
410
+ await setTimeout(100)
277
411
  }
278
- chunkSeq++
279
- start = downSize
280
- }
412
+ } while (expectedTotal === null || downSize < expectedTotal)
281
413
 
282
- writeStream.end()
283
- try {
414
+ if (!writeStream.destroyed && !writeStream.writableFinished) {
415
+ writeStream.end()
284
416
  await once(writeStream, 'finish', { signal })
285
- } catch (e) {
286
- const reason = signal.reason as unknown
287
- if (reason instanceof Error) {
288
- throw reason
289
- }
290
- throw e
291
- } finally {
292
- writeStream.off('error', onWriteError)
293
417
  }
418
+ writeStream.off('error', onWriteError)
294
419
 
295
420
  const readStream = createReadStream(tmpFile)
296
421
  readStream.once('close', () => {
package/src/version.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  /**
2
2
  * This file was auto generated from scripts/generate-version.sh
3
3
  */
4
- export const VERSION: string = '1.7.20'
4
+ export const VERSION: string = '1.8.1'