@juzi/file-box 1.7.20 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +2 -2
  2. package/dist/cjs/src/config.d.ts +0 -1
  3. package/dist/cjs/src/config.d.ts.map +1 -1
  4. package/dist/cjs/src/config.js +1 -3
  5. package/dist/cjs/src/config.js.map +1 -1
  6. package/dist/cjs/src/misc.d.ts.map +1 -1
  7. package/dist/cjs/src/misc.js +143 -38
  8. package/dist/cjs/src/misc.js.map +1 -1
  9. package/dist/cjs/src/misc.spec.js +20 -17
  10. package/dist/cjs/src/misc.spec.js.map +1 -1
  11. package/dist/cjs/src/version.d.ts.map +1 -1
  12. package/dist/cjs/src/version.js +1 -1
  13. package/dist/cjs/src/version.js.map +1 -1
  14. package/dist/cjs/tests/chunk-download.spec.js +59 -43
  15. package/dist/cjs/tests/chunk-download.spec.js.map +1 -1
  16. package/dist/cjs/tests/misc-error-handling.spec.js +98 -4
  17. package/dist/cjs/tests/misc-error-handling.spec.js.map +1 -1
  18. package/dist/cjs/tests/network-timeout.spec.js +15 -2
  19. package/dist/cjs/tests/network-timeout.spec.js.map +1 -1
  20. package/dist/esm/src/config.d.ts +0 -1
  21. package/dist/esm/src/config.d.ts.map +1 -1
  22. package/dist/esm/src/config.js +0 -2
  23. package/dist/esm/src/config.js.map +1 -1
  24. package/dist/esm/src/misc.d.ts.map +1 -1
  25. package/dist/esm/src/misc.js +145 -40
  26. package/dist/esm/src/misc.js.map +1 -1
  27. package/dist/esm/src/misc.spec.js +20 -17
  28. package/dist/esm/src/misc.spec.js.map +1 -1
  29. package/dist/esm/src/version.d.ts.map +1 -1
  30. package/dist/esm/src/version.js +1 -1
  31. package/dist/esm/src/version.js.map +1 -1
  32. package/dist/esm/tests/chunk-download.spec.js +59 -43
  33. package/dist/esm/tests/chunk-download.spec.js.map +1 -1
  34. package/dist/esm/tests/misc-error-handling.spec.js +98 -4
  35. package/dist/esm/tests/misc-error-handling.spec.js.map +1 -1
  36. package/dist/esm/tests/network-timeout.spec.js +15 -2
  37. package/dist/esm/tests/network-timeout.spec.js.map +1 -1
  38. package/package.json +2 -2
  39. package/src/config.ts +0 -3
  40. package/src/misc.spec.ts +22 -14
  41. package/src/misc.ts +145 -39
  42. package/src/version.ts +1 -1
package/src/misc.ts CHANGED
@@ -2,17 +2,18 @@ import assert from 'assert'
2
2
  import { randomUUID } from 'crypto'
3
3
  import { once } from 'events'
4
4
  import { createReadStream, createWriteStream } from 'fs'
5
- import { rm } from 'fs/promises'
5
+ import { rm, stat } from 'fs/promises'
6
6
  import http, { RequestOptions } from 'http'
7
7
  import https from 'https'
8
8
  import { HttpsProxyAgent } from 'https-proxy-agent'
9
9
  import { tmpdir } from 'os'
10
10
  import { join } from 'path'
11
11
  import type { Readable } from 'stream'
12
+ import { Transform } from 'stream'
13
+ import { pipeline } from 'stream/promises'
12
14
  import { URL } from 'url'
13
15
 
14
16
  import {
15
- HTTP_CHUNK_SIZE,
16
17
  HTTP_REQUEST_TIMEOUT,
17
18
  HTTP_RESPONSE_TIMEOUT,
18
19
  } from './config.js'
@@ -110,10 +111,12 @@ export async function httpStream (url: string, headers: http.OutgoingHttpHeaders
110
111
 
111
112
  // 运行时读取 env:方便测试/调用方动态调整
112
113
  const noSliceDown = process.env['FILEBOX_NO_SLICE_DOWN'] === 'true'
113
- const chunkSize = Number(process.env['FILEBOX_HTTP_CHUNK_SIZE']) || HTTP_CHUNK_SIZE
114
114
 
115
- if (!unsupportedRangeDomains.has(hostname) && !noSliceDown && headHeaders['accept-ranges'] === 'bytes' && fileSize > chunkSize) {
116
- return await downloadFileInChunks(url, options, fileSize, chunkSize, proxyUrl)
115
+ // 检查服务器是否支持 range 请求
116
+ const supportsRange = headHeaders['accept-ranges'] === 'bytes'
117
+
118
+ if (!unsupportedRangeDomains.has(hostname) && !noSliceDown && supportsRange && fileSize > 0) {
119
+ return await downloadFileInChunks(url, options, proxyUrl)
117
120
  } else {
118
121
  return await fetch(url, options, proxyUrl)
119
122
  }
@@ -199,11 +202,33 @@ async function fetch (url: string, options: http.RequestOptions, proxyUrl?: stri
199
202
  return res!
200
203
  }
201
204
 
205
+ function createSkipTransform (skipBytes: number): Transform {
206
+ let skipped = 0
207
+ return new Transform({
208
+ transform (chunk, _encoding, callback) {
209
+ if (skipped < skipBytes) {
210
+ const remaining = skipBytes - skipped
211
+ if (chunk.length <= remaining) {
212
+ // 整个 chunk 都需要跳过
213
+ skipped += chunk.length
214
+ callback()
215
+ return
216
+ } else {
217
+ // 跳过部分 chunk
218
+ skipped = skipBytes
219
+ callback(null, chunk.subarray(remaining))
220
+ return
221
+ }
222
+ }
223
+ // 已经跳过足够的字节,直接传递
224
+ callback(null, chunk)
225
+ },
226
+ })
227
+ }
228
+
202
229
  async function downloadFileInChunks (
203
230
  url: string,
204
231
  options: http.RequestOptions,
205
- fileSize: number,
206
- chunkSize = HTTP_CHUNK_SIZE,
207
232
  proxyUrl?: string,
208
233
  ): Promise<Readable> {
209
234
  const tmpFile = join(tmpdir(), `filebox-${randomUUID()}`)
@@ -219,64 +244,145 @@ async function downloadFileInChunks (
219
244
  headers: {},
220
245
  ...options,
221
246
  }
222
- let chunkSeq = 0
247
+ // 预期文件大小(初始为 null,从首次 206 响应中获取)
248
+ let expectedTotal: number | null = null
223
249
  let start = 0
224
- let end = 0
225
250
  let downSize = 0
226
251
  let retries = 3
252
+ // 标识是否需要回退到非分片下载
253
+ let shouldFallback = false
227
254
 
228
- while (downSize < fileSize) {
229
- end = Math.min(start + chunkSize, fileSize - 1)
230
- const range = `bytes=${start}-${end}`
255
+ do {
256
+ // 每次循环前检查文件实际大小,作为真实的下载进度
257
+ // 这样在重试时可以从实际写入的位置继续,避免数据重复
258
+ try {
259
+ const fileStats = await stat(tmpFile)
260
+ const actualSize = fileStats.size
261
+ if (actualSize > downSize) {
262
+ // 文件实际大小比记录的大,说明之前有部分写入
263
+ downSize = actualSize
264
+ start = actualSize
265
+ }
266
+ } catch (error) {
267
+ // 文件不存在或无法访问,使用当前的 downSize
268
+ }
269
+
270
+ const range = `bytes=${start}-`
231
271
  const requestOptions = Object.assign({}, requestBaseOptions)
232
272
  assert(requestOptions.headers, 'Errors that should not happen: Invalid headers')
233
273
  ;(requestOptions.headers as http.OutgoingHttpHeaders)['Range'] = range
234
274
 
275
+ // 每次请求创建独立的 AbortController 来管理当前请求的生命周期
276
+ const requestAbortController = new AbortController()
277
+ requestOptions.signal = requestAbortController.signal
278
+
235
279
  try {
236
280
  const res = await fetch(url, requestOptions, proxyUrl)
237
281
  if (res.statusCode === 416) {
238
- unsupportedRangeDomains.add(new URL(url).hostname)
239
282
  // 某些云服务商对分片下载的支持可能不规范,需要保留一个回退的方式
240
- writeStream.destroy()
241
- try {
242
- await once(writeStream, 'close', { signal })
243
- } catch {}
244
- await rm(tmpFile, { force: true })
245
- return await fetch(url, requestBaseOptions, proxyUrl)
283
+ shouldFallback = true
284
+ break
246
285
  }
247
286
  assert(allowStatusCode.includes(res.statusCode ?? 0), `Request failed with status code ${res.statusCode}`)
248
- assert(Number(res.headers['content-length']) > 0, 'Server returned 0 bytes of data')
249
- try {
250
- const { total } = parseContentRange(res.headers['content-range'] ?? '')
251
- if (total > 0 && total < fileSize) {
287
+ const contentLength = Number(res.headers['content-length'])
288
+ assert(contentLength > 0, 'Server returned 0 bytes of data')
289
+
290
+ // 206: 部分内容,继续分片下载
291
+ // 200: 完整内容,服务器不支持 range 或返回全部数据
292
+ if (res.statusCode === 206) {
293
+ // 206 响应必须包含有效的 Content-Range 头(RFC 7233)
294
+ const contentRange = res.headers['content-range']
295
+ if (!contentRange) {
296
+ // Content-Range 缺失,服务器不规范,回退到非分片下载
297
+ shouldFallback = true
298
+ break
299
+ }
300
+
301
+ let end: number
302
+ let total: number
303
+ let actualStart: number
304
+ try {
305
+ const parsed = parseContentRange(contentRange)
306
+ actualStart = parsed.start
307
+ end = parsed.end
308
+ total = parsed.total
309
+ } catch (error) {
310
+ // Content-Range 格式错误,服务器不规范,回退到非分片下载
311
+ shouldFallback = true
312
+ break
313
+ }
314
+
315
+ if (expectedTotal === null) {
316
+ // 首次获得文件总大小
252
317
  // 某些云服务商(如腾讯云)在 head 方法中返回的 size 是原图大小,但下载时返回的是压缩后的图片,会比原图小。
253
- // 这种在首次下载时虽然请求了原图大小的范围,可能比缩略图大,但会一次性返回完整的原图,而不是报错 416,通过修正 fileSize 跳出循环即可。
254
- fileSize = total
318
+ // 这种在首次下载时虽然请求了原图大小的范围,可能比缩略图大,但会一次性返回完整的原图,而不是报错 416,通过修正 expectedTotal 跳出循环即可。
319
+ expectedTotal = total
320
+ } else if (total !== expectedTotal) {
321
+ // 服务器返回的文件总大小出现了变化
322
+ throw new Error(`File size mismatch: expected ${expectedTotal}, but server returned ${total}`)
255
323
  }
256
- } catch (error) {}
257
- for await (const chunk of res) {
258
- assert(Buffer.isBuffer(chunk))
259
- downSize += chunk.length
260
- if (!writeStream.write(chunk)) {
261
- try {
262
- await once(writeStream, 'drain', { signal })
263
- } catch (e) {
264
- const reason = signal.reason as unknown
265
- throw reason instanceof Error ? reason : (e as Error)
324
+
325
+ // 验证服务器返回的范围是否与请求匹配
326
+ if (actualStart !== start) {
327
+ if (actualStart > start) {
328
+ // 服务器跳过了部分数据,这是严重错误
329
+ throw new Error(`Range mismatch: requested start=${start}, but server returned start=${actualStart} (gap detected)`)
330
+ } else {
331
+ // actualStart < start: 服务器返回了重叠数据,需要跳过前面的字节
332
+ const skipBytes = start - actualStart
333
+ const skipTransform = createSkipTransform(skipBytes)
334
+ await pipeline(res, skipTransform, writeStream, { end: false, signal })
335
+ // 更新进度时使用我们请求的范围,而不是服务器返回的范围
336
+ downSize += end - actualStart + 1 - skipBytes
337
+ start = downSize
338
+ retries = 3 // 成功后重置重试次数
339
+ continue
266
340
  }
267
341
  }
342
+ // 使用 pipeline,但不关闭 writeStream(继续下载下一个分片)
343
+ await pipeline(res, writeStream, { end: false, signal })
344
+ // pipeline 成功后才更新下载进度
345
+ // end 是最后一个字节的索引,下次从 end+1 开始
346
+ downSize += end - start + 1
347
+ start = downSize
348
+ } else {
349
+ // 200: 服务器返回完整文件,不支持 range
350
+ if (start > 0) {
351
+ // 中途收到 200,服务器停止支持 range,标记并回退到普通下载
352
+ shouldFallback = true
353
+ break
354
+ }
355
+ // 首次请求返回 200,正常处理
356
+ await pipeline(res, writeStream, { signal })
357
+ downSize = contentLength
358
+ break
268
359
  }
269
- res.destroy()
360
+ // 成功后重置重试次数
361
+ retries = 3
270
362
  } catch (error) {
271
363
  const err = error instanceof Error ? error : new Error(String(error))
272
364
  if (--retries <= 0) {
273
365
  writeStream.destroy()
274
- void rm(tmpFile, { force: true })
366
+ await rm(tmpFile, { force: true })
275
367
  throw new Error(`Download file with chunk failed! ${err.message}`, { cause: err })
276
368
  }
369
+ // 失败后等待一小段时间再重试
370
+ await new Promise(resolve => setTimeout(resolve, 100))
371
+ } finally {
372
+ // 确保请求被清理(成功时也需要 abort 以释放资源)
373
+ requestAbortController.abort()
277
374
  }
278
- chunkSeq++
279
- start = downSize
375
+ } while (expectedTotal === null || downSize < expectedTotal)
376
+
377
+ // 统一处理回退到非分片下载的情况
378
+ if (shouldFallback) {
379
+ unsupportedRangeDomains.add(new URL(url).hostname)
380
+ writeStream.destroy()
381
+ try {
382
+ await once(writeStream, 'close', { signal })
383
+ } catch {}
384
+ await rm(tmpFile, { force: true })
385
+ return await fetch(url, requestBaseOptions, proxyUrl)
280
386
  }
281
387
 
282
388
  writeStream.end()
package/src/version.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  /**
2
2
  * This file was auto generated from scripts/generate-version.sh
3
3
  */
4
- export const VERSION: string = '1.7.20'
4
+ export const VERSION: string = '1.8.0'