@juzi/file-box 1.8.4 → 1.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/misc.spec.ts CHANGED
@@ -12,6 +12,8 @@ import {
12
12
  httpHeadHeader,
13
13
  httpStream,
14
14
  streamToBuffer,
15
+ __clearUnsupportedRangeDomains,
16
+ __addUnsupportedRangeDomain,
15
17
  } from './misc.js'
16
18
 
17
19
  // 设置短超时用于测试
@@ -200,3 +202,217 @@ test('httpStream in chunks', async (t) => {
200
202
  const buffer = await streamToBuffer(res)
201
203
  t.equal(buffer.length, FILE_SIZE, 'should get data in chunks right')
202
204
  })
205
+
206
+ test('httpStream: HEAD Accept-Ranges=none 时不发 Range 请求(A2)', async (t) => {
207
+ __clearUnsupportedRangeDomains()
208
+
209
+ const TRUE_DATA = Buffer.from('TRUE-DATA-A2', 'utf8')
210
+ let getCallCount = 0
211
+ let getHadRangeHeader: boolean | undefined
212
+
213
+ const server = createServer((req, res) => {
214
+ if (req.method === 'HEAD') {
215
+ res.writeHead(200, {
216
+ 'Accept-Ranges': 'none',
217
+ 'Content-Length': String(TRUE_DATA.length),
218
+ })
219
+ res.end()
220
+ return
221
+ }
222
+ getCallCount += 1
223
+ getHadRangeHeader = 'range' in req.headers
224
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
225
+ res.end(TRUE_DATA)
226
+ })
227
+
228
+ const host = await new Promise<string>((resolve) => {
229
+ server.listen(0, '127.0.0.1', () => {
230
+ const addr = server.address() as AddressInfo
231
+ resolve(`http://127.0.0.1:${addr.port}`)
232
+ })
233
+ })
234
+ t.teardown(() => { server.close() })
235
+
236
+ const stream = await httpStream(`${host}/file`)
237
+ const buffer = await streamToBuffer(stream)
238
+
239
+ t.equal(getCallCount, 1, 'GET 应只被调用 1 次')
240
+ t.equal(getHadRangeHeader, false, 'GET 请求不应携带 Range header')
241
+ t.equal(buffer.toString('utf8'), TRUE_DATA.toString('utf8'), '应拿到真实数据')
242
+ })
243
+
244
+ test('httpStream: HEAD Accept-Ranges=none 端到端持久化,第二次请求直接跳过 Range(A2 持久化)', async (t) => {
245
+ __clearUnsupportedRangeDomains()
246
+
247
+ const TRUE_DATA = Buffer.from('E2E-A2-PERSISTENCE', 'utf8')
248
+ let getCallCount = 0
249
+ const getRangeHeaderByCall: (string | undefined)[] = []
250
+
251
+ const server = createServer((req, res) => {
252
+ if (req.method === 'HEAD') {
253
+ res.writeHead(200, {
254
+ 'Accept-Ranges': 'none',
255
+ 'Content-Length': String(TRUE_DATA.length),
256
+ })
257
+ res.end()
258
+ return
259
+ }
260
+ getCallCount += 1
261
+ const rangeHeader = req.headers.range
262
+ getRangeHeaderByCall.push(typeof rangeHeader === 'string' ? rangeHeader : undefined)
263
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
264
+ res.end(TRUE_DATA)
265
+ })
266
+
267
+ const host = await new Promise<string>((resolve) => {
268
+ server.listen(0, '127.0.0.1', () => {
269
+ const addr = server.address() as AddressInfo
270
+ resolve(`http://127.0.0.1:${addr.port}`)
271
+ })
272
+ })
273
+ t.teardown(() => { server.close() })
274
+
275
+ // 第一次请求:HEAD 里 Accept-Ranges=none,A2 把 host 加入黑名单
276
+ const stream1 = await httpStream(`${host}/file`)
277
+ await streamToBuffer(stream1)
278
+
279
+ // 第二次请求:同 host,预期 A2 黑名单命中,GET 不带 Range
280
+ const stream2 = await httpStream(`${host}/file`)
281
+ const buffer2 = await streamToBuffer(stream2)
282
+
283
+ t.equal(getCallCount, 2, 'GET 应被调用 2 次(每次请求各 1 次,无 Range 重试)')
284
+ t.equal(getRangeHeaderByCall[0], undefined, '第 1 次 GET 不带 Range(本次 HEAD 已宣告 none)')
285
+ t.equal(getRangeHeaderByCall[1], undefined, '第 2 次 GET 不带 Range(黑名单命中)')
286
+ t.equal(buffer2.toString('utf8'), TRUE_DATA.toString('utf8'), '第二次应拿到真实数据')
287
+ })
288
+
289
+ test('httpStream: 带 Range 却收到 200 时回退重发不带 Range(B1 - CMSV6 场景)', async (t) => {
290
+ __clearUnsupportedRangeDomains()
291
+
292
+ const FAKE_DATA = Buffer.from('FAKE-DATA-FROM-WRONG-STREAM', 'utf8')
293
+ const TRUE_DATA = Buffer.alloc(FAKE_DATA.length, 'T') // 长度相同,内容不同
294
+ let getCallCount = 0
295
+ const getRangeHeaderByCall: (string | undefined)[] = []
296
+
297
+ const server = createServer((req, res) => {
298
+ if (req.method === 'HEAD') {
299
+ // 注意:不返回 Accept-Ranges,模拟 CMSV6
300
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
301
+ res.end()
302
+ return
303
+ }
304
+ getCallCount += 1
305
+ const rangeHeader = req.headers.range
306
+ getRangeHeaderByCall.push(typeof rangeHeader === 'string' ? rangeHeader : undefined)
307
+
308
+ // CMSV6 行为:无论是否带 Range,都返回 200 + 正确 Content-Length
309
+ // 但内容随 Range 存在与否而不同
310
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
311
+ if (rangeHeader) {
312
+ res.end(FAKE_DATA)
313
+ } else {
314
+ res.end(TRUE_DATA)
315
+ }
316
+ })
317
+
318
+ const host = await new Promise<string>((resolve) => {
319
+ server.listen(0, '127.0.0.1', () => {
320
+ const addr = server.address() as AddressInfo
321
+ resolve(`http://127.0.0.1:${addr.port}`)
322
+ })
323
+ })
324
+ t.teardown(() => { server.close() })
325
+
326
+ const stream = await httpStream(`${host}/file`)
327
+ const buffer = await streamToBuffer(stream)
328
+
329
+ t.equal(getCallCount, 2, 'GET 应被调用 2 次(第一次带 Range 触发 B1,第二次回退)')
330
+ t.ok(getRangeHeaderByCall[0], '第 1 次 GET 应携带 Range header')
331
+ t.equal(getRangeHeaderByCall[1], undefined, '第 2 次 GET 不应携带 Range header')
332
+ t.equal(buffer.toString('utf8'), TRUE_DATA.toString('utf8'), '最终数据应为 TRUE_DATA(回退后拿到的)')
333
+ })
334
+
335
+ test('httpStream: HEAD 返回 4xx 且 Range GET 返回 400 时回退到非 Range 模式(B2 - HEAD 不支持场景)', async (t) => {
336
+ __clearUnsupportedRangeDomains()
337
+
338
+ const TRUE_DATA = Buffer.from('TRUE-DATA-HEAD-UNSUPPORTED', 'utf8')
339
+ let getCallCount = 0
340
+ const getRangeHeaderByCall: (string | undefined)[] = []
341
+
342
+ const server = createServer((req, res) => {
343
+ if (req.method === 'HEAD') {
344
+ // 模拟服务器不支持 HEAD 方法,返回 425
345
+ res.writeHead(425)
346
+ res.end()
347
+ return
348
+ }
349
+ getCallCount += 1
350
+ const rangeHeader = req.headers.range
351
+ getRangeHeaderByCall.push(typeof rangeHeader === 'string' ? rangeHeader : undefined)
352
+
353
+ if (rangeHeader) {
354
+ // 服务器也不支持 Range,返回 400
355
+ res.writeHead(400)
356
+ res.end('Bad Request')
357
+ return
358
+ }
359
+
360
+ // 不带 Range 的 GET 正常返回
361
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
362
+ res.end(TRUE_DATA)
363
+ })
364
+
365
+ const host = await new Promise<string>((resolve) => {
366
+ server.listen(0, '127.0.0.1', () => {
367
+ const addr = server.address() as AddressInfo
368
+ resolve(`http://127.0.0.1:${addr.port}`)
369
+ })
370
+ })
371
+ t.teardown(() => { server.close() })
372
+
373
+ const stream = await httpStream(`${host}/file`)
374
+ const buffer = await streamToBuffer(stream)
375
+
376
+ t.equal(getCallCount, 2, 'GET 应被调用 2 次(第一次带 Range 收到 400 触发回退,第二次不带 Range)')
377
+ t.ok(getRangeHeaderByCall[0], '第 1 次 GET 应携带 Range header')
378
+ t.equal(getRangeHeaderByCall[1], undefined, '第 2 次 GET 不应携带 Range header')
379
+ t.equal(buffer.toString('utf8'), TRUE_DATA.toString('utf8'), '最终数据应为 TRUE_DATA')
380
+ })
381
+
382
+ test('httpStream: 黑名单登记的 host 后续请求直接跳过 Range(B1 黑名单持久化)', async (t) => {
383
+ __clearUnsupportedRangeDomains()
384
+
385
+ const TRUE_DATA = Buffer.from('SECOND-DOWNLOAD-AFTER-BLACKLIST', 'utf8')
386
+ let getCallCount = 0
387
+ let getHadRangeHeader: boolean | undefined
388
+
389
+ const server = createServer((req, res) => {
390
+ if (req.method === 'HEAD') {
391
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
392
+ res.end()
393
+ return
394
+ }
395
+ getCallCount += 1
396
+ getHadRangeHeader = 'range' in req.headers
397
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
398
+ res.end(TRUE_DATA)
399
+ })
400
+
401
+ const port = await new Promise<number>((resolve) => {
402
+ server.listen(0, '127.0.0.1', () => {
403
+ const addr = server.address() as AddressInfo
404
+ resolve(addr.port)
405
+ })
406
+ })
407
+ t.teardown(() => { server.close() })
408
+
409
+ // 测试前手工 seed hostKey 进黑名单,模拟"此前已因 B1 加入过"
410
+ __addUnsupportedRangeDomain(`127.0.0.1:${port}`)
411
+
412
+ const stream = await httpStream(`http://127.0.0.1:${port}/file`)
413
+ const buffer = await streamToBuffer(stream)
414
+
415
+ t.equal(getCallCount, 1, '黑名单命中后 GET 只调用 1 次')
416
+ t.equal(getHadRangeHeader, false, '黑名单命中后 GET 不带 Range header')
417
+ t.equal(buffer.toString('utf8'), TRUE_DATA.toString('utf8'), '应拿到真实数据')
418
+ })
package/src/misc.ts CHANGED
@@ -24,8 +24,27 @@ const protocolMap: {
24
24
  }
25
25
 
26
26
  const noop = () => { }
27
+
28
+ // 运行期黑名单:记录已知不支持 Range 请求的 host(格式 `hostname:port`)
29
+ // 用 Set 天然按插入序保序,命中上限时淘汰最早插入的条目(FIFO)
30
+ // 上限防止长运行进程(如网关)内存无界增长
31
+ const UNSUPPORTED_RANGE_DOMAINS_MAX = 1024
27
32
  const unsupportedRangeDomains = new Set<string>()
28
33
 
34
+ function addUnsupportedRangeDomain (hostKey: string): void {
35
+ if (unsupportedRangeDomains.has(hostKey)) {
36
+ return
37
+ }
38
+ unsupportedRangeDomains.add(hostKey)
39
+ if (unsupportedRangeDomains.size > UNSUPPORTED_RANGE_DOMAINS_MAX) {
40
+ // Set.values() 按插入顺序迭代,删除最早一条即可
41
+ const oldest = unsupportedRangeDomains.values().next().value
42
+ if (oldest !== undefined) {
43
+ unsupportedRangeDomains.delete(oldest)
44
+ }
45
+ }
46
+ }
47
+
29
48
  // 自定义 Error:标记需要回退到非分片下载
30
49
  class FallbackError extends Error {
31
50
 
@@ -36,6 +55,16 @@ class FallbackError extends Error {
36
55
 
37
56
  }
38
57
 
58
+ // 仅测试用:清空运行期黑名单(模块级 Set 跨 test 会污染)
59
+ export function __clearUnsupportedRangeDomains (): void {
60
+ unsupportedRangeDomains.clear()
61
+ }
62
+
63
+ // 仅测试用:手工登记一个 host 到黑名单(用于验证后续请求直接跳过 Range)
64
+ export function __addUnsupportedRangeDomain (hostKey: string): void {
65
+ addUnsupportedRangeDomain(hostKey)
66
+ }
67
+
39
68
  function getProtocol (protocol: string) {
40
69
  assert(protocolMap[protocol], new Error('unknown protocol: ' + protocol))
41
70
  return protocolMap[protocol]!
@@ -114,11 +143,20 @@ export async function httpStream (url: string, headers: http.OutgoingHttpHeaders
114
143
  const defaultPort = protocol === 'https:' ? '443' : '80'
115
144
  const hostKey = `${hostname}:${port || defaultPort}`
116
145
 
117
- // 直接尝试分片下载,不检查 Accept-Ranges 和 fileSize
146
+ // A2:若 HEAD 明确声明 Accept-Ranges: none,记录到运行期黑名单
147
+ // 以便 downloadFileInChunks 本次请求就直接以非 Range 模式发起
148
+ // Accept-Ranges header 可能是 string | string[],归一化后匹配 'none'
149
+ const acceptRangesRaw = headHeaders['accept-ranges']
150
+ const acceptRanges = Array.isArray(acceptRangesRaw) ? acceptRangesRaw[0] : acceptRangesRaw
151
+ if (typeof acceptRanges === 'string' && acceptRanges.trim().toLowerCase() === 'none') {
152
+ addUnsupportedRangeDomain(hostKey)
153
+ }
154
+
155
+ // 直接尝试分片下载,不检查 fileSize
118
156
  // 原因:
119
157
  // 1. 有些服务器 HEAD 不返回 Accept-Ranges 但实际支持分片
120
158
  // 2. 有些服务器 HEAD 返回 fileSize=0 但实际支持分片
121
- // downloadFileInChunks 内部有完善的回退机制处理不支持的情况
159
+ // downloadFileInChunks 内部有完善的回退机制处理不支持的情况(见 B1)
122
160
  const result = await downloadFileInChunks(url, options, proxyUrl, hostKey)
123
161
  return result
124
162
  }
@@ -292,7 +330,7 @@ async function downloadFileInChunks (
292
330
  url: string,
293
331
  options: http.RequestOptions,
294
332
  proxyUrl: string | undefined,
295
- hostname: string,
333
+ hostKey: string,
296
334
  ): Promise<Readable> {
297
335
  const tmpFile = join(tmpdir(), `filebox-${randomUUID()}`)
298
336
  let writeStream = createWriteStream(tmpFile)
@@ -313,8 +351,7 @@ async function downloadFileInChunks (
313
351
  let downSize = 0
314
352
  let retries = 3
315
353
  // 控制是否使用 Range 请求(根据域名黑名单初始化)
316
- let useRange = !unsupportedRangeDomains.has(hostname)
317
- let useChunked = false
354
+ let useRange = !unsupportedRangeDomains.has(hostKey)
318
355
 
319
356
  do {
320
357
  // 每次循环前检查文件实际大小,作为真实的下载进度
@@ -345,6 +382,12 @@ async function downloadFileInChunks (
345
382
  // 416: Range Not Satisfiable,服务器不支持此范围或文件大小不匹配
346
383
  throw new FallbackError('416 Range Not Satisfiable')
347
384
  }
385
+ // B2:带 Range 请求收到 4xx 时,视为服务器不支持 Range,触发回退
386
+ // 典型场景:HEAD 返回非标准状态码(如 425),httpHeadHeader 无法判断是否支持 Range,
387
+ // 随后带 Range 的 GET 收到 400/403/405 等
388
+ if (useRange && !allowStatusCode.includes(res.statusCode ?? 0) && res.statusCode && res.statusCode >= 400 && res.statusCode < 500) {
389
+ throw new FallbackError(`Server returned ${res.statusCode} for Range request`)
390
+ }
348
391
  assert(allowStatusCode.includes(res.statusCode ?? 0), `Request failed with status code ${res.statusCode}`)
349
392
  const contentLength = Number(res.headers['content-length']) || 0
350
393
  assert(contentLength >= 0, `Server returned ${contentLength} bytes of data`)
@@ -382,9 +425,6 @@ async function downloadFileInChunks (
382
425
  throw new Error(`File size mismatch: expected ${expectedTotal}, but server returned ${total}`)
383
426
  }
384
427
 
385
- // 标记使用了分片下载
386
- useChunked = true
387
-
388
428
  // 验证服务器返回的范围是否与请求匹配
389
429
  if (actualStart !== start) {
390
430
  if (actualStart > start) {
@@ -409,18 +449,16 @@ async function downloadFileInChunks (
409
449
  downSize += end - start + 1
410
450
  start = downSize
411
451
  } else if (res.statusCode === 200) {
412
- // 200: 服务器返回完整文件
413
- if (useChunked || start > 0) {
414
- // 之前以分片模式下载过数据
415
- writeStream.destroy()
416
- await rm(tmpFile, { force: true }).catch(() => {})
417
- writeStream = createWriteStream(tmpFile)
418
- writeStream.on('error', onWriteError)
419
- start = 0
420
- downSize = 0
452
+ if (useRange) {
453
+ // B1:发了 Range 却收到 200 —— 服务器未实现 Range
454
+ // 响应体是"对带 Range 请求的回答",不可信(见 CMSV6 场景)
455
+ // 交给 FallbackError 的 catch 分支统一处理:销毁流、删 tmp、加入黑名单、重发
456
+ throw new FallbackError('Server returned 200 for Range request')
421
457
  }
422
-
423
- // 处理完整文件响应
458
+ // 200: 服务器返回完整文件(本次未带 Range)
459
+ // B1 保证带 Range 收 200 一定经 FallbackError 回退,catch 里已重置
460
+ // expectedTotal/downSize/start/useRange,进入此分支时一定是
461
+ // 全新的非 Range 请求,无须再清理已写数据
424
462
  expectedTotal = contentLength
425
463
  await pipeline(res, writeStream, { end: false, signal })
426
464
  downSize = contentLength
@@ -433,7 +471,7 @@ async function downloadFileInChunks (
433
471
  } catch (error) {
434
472
  if (error instanceof FallbackError) {
435
473
  // 回退逻辑:记录域名、重置状态,在下次循环中以非 range 模式请求
436
- unsupportedRangeDomains.add(hostname)
474
+ addUnsupportedRangeDomain(hostKey)
437
475
 
438
476
  // 关闭当前写入流
439
477
  writeStream.destroy()
@@ -452,7 +490,6 @@ async function downloadFileInChunks (
452
490
  expectedTotal = null
453
491
  downSize = 0
454
492
  start = 0
455
- useChunked = false
456
493
  useRange = false
457
494
  retries = 3
458
495
  continue
package/src/version.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  /**
2
2
  * This file was auto generated from scripts/generate-version.sh
3
3
  */
4
- export const VERSION: string = '1.8.4'
4
+ export const VERSION: string = '1.8.6'