@juzi/file-box 1.8.2 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/misc.spec.ts CHANGED
@@ -12,12 +12,16 @@ import {
12
12
  httpHeadHeader,
13
13
  httpStream,
14
14
  streamToBuffer,
15
+ __clearUnsupportedRangeDomains,
16
+ __addUnsupportedRangeDomain,
15
17
  } from './misc.js'
16
18
 
17
19
  // 设置短超时用于测试
18
20
  CONFIG.HTTP_REQUEST_TIMEOUT = 1000
19
21
  CONFIG.HTTP_RESPONSE_TIMEOUT = 1000
20
22
 
23
+ const TC_QQ_DOWNLOAD_URL = 'http://wxapp.tc.qq.com/270/20304/stodownload?m=775ba582fe1d27e158806a4c10230a45&filekey=30350201010421301f0202010e040253480410775ba582fe1d27e158806a4c10230a450203017a31040d00000004627466730000000132&hy=SH&storeid=2685965c90008afdf000000000000010e00004f50534801c33031571b54757&bizid=1023'
24
+
21
25
  test('dataUrl to base64', async t => {
22
26
  const base64 = [
23
27
  'R0lGODlhEAAQAMQAAORHHOVSKudfOulrSOp3WOyDZu6QdvCchPGolfO0o/XBs/fNwfjZ0frl',
@@ -72,6 +76,17 @@ test('httpHeadHeader', async t => {
72
76
  )
73
77
  })
74
78
 
79
+ test('httpHeadHeader with tc qq download url', async t => {
80
+ const requestTimeout = CONFIG.HTTP_REQUEST_TIMEOUT
81
+ CONFIG.HTTP_REQUEST_TIMEOUT = 10000
82
+ t.teardown(() => { CONFIG.HTTP_REQUEST_TIMEOUT = requestTimeout })
83
+
84
+ const headers = await httpHeadHeader(TC_QQ_DOWNLOAD_URL)
85
+ t.equal(headers['accept-ranges'], 'bytes', 'should support byte range')
86
+ t.equal(headers['content-type'], 'image/jpg', 'should get content type')
87
+ t.ok(Number(headers['content-length']) > 0, 'should get content length')
88
+ })
89
+
75
90
  test('httpHeaderToFileName', async t => {
76
91
  const HEADERS_QUOTATION_MARK: any = {
77
92
  'content-disposition': 'attachment; filename="db-0.0.19.zip"',
@@ -187,3 +202,170 @@ test('httpStream in chunks', async (t) => {
187
202
  const buffer = await streamToBuffer(res)
188
203
  t.equal(buffer.length, FILE_SIZE, 'should get data in chunks right')
189
204
  })
205
+
206
+ test('httpStream: HEAD Accept-Ranges=none 时不发 Range 请求(A2)', async (t) => {
207
+ __clearUnsupportedRangeDomains()
208
+
209
+ const TRUE_DATA = Buffer.from('TRUE-DATA-A2', 'utf8')
210
+ let getCallCount = 0
211
+ let getHadRangeHeader: boolean | undefined
212
+
213
+ const server = createServer((req, res) => {
214
+ if (req.method === 'HEAD') {
215
+ res.writeHead(200, {
216
+ 'Accept-Ranges': 'none',
217
+ 'Content-Length': String(TRUE_DATA.length),
218
+ })
219
+ res.end()
220
+ return
221
+ }
222
+ getCallCount += 1
223
+ getHadRangeHeader = 'range' in req.headers
224
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
225
+ res.end(TRUE_DATA)
226
+ })
227
+
228
+ const host = await new Promise<string>((resolve) => {
229
+ server.listen(0, '127.0.0.1', () => {
230
+ const addr = server.address() as AddressInfo
231
+ resolve(`http://127.0.0.1:${addr.port}`)
232
+ })
233
+ })
234
+ t.teardown(() => { server.close() })
235
+
236
+ const stream = await httpStream(`${host}/file`)
237
+ const buffer = await streamToBuffer(stream)
238
+
239
+ t.equal(getCallCount, 1, 'GET 应只被调用 1 次')
240
+ t.equal(getHadRangeHeader, false, 'GET 请求不应携带 Range header')
241
+ t.equal(buffer.toString('utf8'), TRUE_DATA.toString('utf8'), '应拿到真实数据')
242
+ })
243
+
244
+ test('httpStream: HEAD Accept-Ranges=none 端到端持久化,第二次请求直接跳过 Range(A2 持久化)', async (t) => {
245
+ __clearUnsupportedRangeDomains()
246
+
247
+ const TRUE_DATA = Buffer.from('E2E-A2-PERSISTENCE', 'utf8')
248
+ let getCallCount = 0
249
+ const getRangeHeaderByCall: (string | undefined)[] = []
250
+
251
+ const server = createServer((req, res) => {
252
+ if (req.method === 'HEAD') {
253
+ res.writeHead(200, {
254
+ 'Accept-Ranges': 'none',
255
+ 'Content-Length': String(TRUE_DATA.length),
256
+ })
257
+ res.end()
258
+ return
259
+ }
260
+ getCallCount += 1
261
+ const rangeHeader = req.headers.range
262
+ getRangeHeaderByCall.push(typeof rangeHeader === 'string' ? rangeHeader : undefined)
263
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
264
+ res.end(TRUE_DATA)
265
+ })
266
+
267
+ const host = await new Promise<string>((resolve) => {
268
+ server.listen(0, '127.0.0.1', () => {
269
+ const addr = server.address() as AddressInfo
270
+ resolve(`http://127.0.0.1:${addr.port}`)
271
+ })
272
+ })
273
+ t.teardown(() => { server.close() })
274
+
275
+ // 第一次请求:HEAD 里 Accept-Ranges=none,A2 把 host 加入黑名单
276
+ const stream1 = await httpStream(`${host}/file`)
277
+ await streamToBuffer(stream1)
278
+
279
+ // 第二次请求:同 host,预期 A2 黑名单命中,GET 不带 Range
280
+ const stream2 = await httpStream(`${host}/file`)
281
+ const buffer2 = await streamToBuffer(stream2)
282
+
283
+ t.equal(getCallCount, 2, 'GET 应被调用 2 次(每次请求各 1 次,无 Range 重试)')
284
+ t.equal(getRangeHeaderByCall[0], undefined, '第 1 次 GET 不带 Range(本次 HEAD 已宣告 none)')
285
+ t.equal(getRangeHeaderByCall[1], undefined, '第 2 次 GET 不带 Range(黑名单命中)')
286
+ t.equal(buffer2.toString('utf8'), TRUE_DATA.toString('utf8'), '第二次应拿到真实数据')
287
+ })
288
+
289
+ test('httpStream: 带 Range 却收到 200 时回退重发不带 Range(B1 - CMSV6 场景)', async (t) => {
290
+ __clearUnsupportedRangeDomains()
291
+
292
+ const FAKE_DATA = Buffer.from('FAKE-DATA-FROM-WRONG-STREAM', 'utf8')
293
+ const TRUE_DATA = Buffer.alloc(FAKE_DATA.length, 'T') // 长度相同,内容不同
294
+ let getCallCount = 0
295
+ const getRangeHeaderByCall: (string | undefined)[] = []
296
+
297
+ const server = createServer((req, res) => {
298
+ if (req.method === 'HEAD') {
299
+ // 注意:不返回 Accept-Ranges,模拟 CMSV6
300
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
301
+ res.end()
302
+ return
303
+ }
304
+ getCallCount += 1
305
+ const rangeHeader = req.headers.range
306
+ getRangeHeaderByCall.push(typeof rangeHeader === 'string' ? rangeHeader : undefined)
307
+
308
+ // CMSV6 行为:无论是否带 Range,都返回 200 + 正确 Content-Length
309
+ // 但内容随 Range 存在与否而不同
310
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
311
+ if (rangeHeader) {
312
+ res.end(FAKE_DATA)
313
+ } else {
314
+ res.end(TRUE_DATA)
315
+ }
316
+ })
317
+
318
+ const host = await new Promise<string>((resolve) => {
319
+ server.listen(0, '127.0.0.1', () => {
320
+ const addr = server.address() as AddressInfo
321
+ resolve(`http://127.0.0.1:${addr.port}`)
322
+ })
323
+ })
324
+ t.teardown(() => { server.close() })
325
+
326
+ const stream = await httpStream(`${host}/file`)
327
+ const buffer = await streamToBuffer(stream)
328
+
329
+ t.equal(getCallCount, 2, 'GET 应被调用 2 次(第一次带 Range 触发 B1,第二次回退)')
330
+ t.ok(getRangeHeaderByCall[0], '第 1 次 GET 应携带 Range header')
331
+ t.equal(getRangeHeaderByCall[1], undefined, '第 2 次 GET 不应携带 Range header')
332
+ t.equal(buffer.toString('utf8'), TRUE_DATA.toString('utf8'), '最终数据应为 TRUE_DATA(回退后拿到的)')
333
+ })
334
+
335
+ test('httpStream: 黑名单登记的 host 后续请求直接跳过 Range(B1 黑名单持久化)', async (t) => {
336
+ __clearUnsupportedRangeDomains()
337
+
338
+ const TRUE_DATA = Buffer.from('SECOND-DOWNLOAD-AFTER-BLACKLIST', 'utf8')
339
+ let getCallCount = 0
340
+ let getHadRangeHeader: boolean | undefined
341
+
342
+ const server = createServer((req, res) => {
343
+ if (req.method === 'HEAD') {
344
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
345
+ res.end()
346
+ return
347
+ }
348
+ getCallCount += 1
349
+ getHadRangeHeader = 'range' in req.headers
350
+ res.writeHead(200, { 'Content-Length': String(TRUE_DATA.length) })
351
+ res.end(TRUE_DATA)
352
+ })
353
+
354
+ const port = await new Promise<number>((resolve) => {
355
+ server.listen(0, '127.0.0.1', () => {
356
+ const addr = server.address() as AddressInfo
357
+ resolve(addr.port)
358
+ })
359
+ })
360
+ t.teardown(() => { server.close() })
361
+
362
+ // 测试前手工 seed hostKey 进黑名单,模拟"此前已因 B1 加入过"
363
+ __addUnsupportedRangeDomain(`127.0.0.1:${port}`)
364
+
365
+ const stream = await httpStream(`http://127.0.0.1:${port}/file`)
366
+ const buffer = await streamToBuffer(stream)
367
+
368
+ t.equal(getCallCount, 1, '黑名单命中后 GET 只调用 1 次')
369
+ t.equal(getHadRangeHeader, false, '黑名单命中后 GET 不带 Range header')
370
+ t.equal(buffer.toString('utf8'), TRUE_DATA.toString('utf8'), '应拿到真实数据')
371
+ })
package/src/misc.ts CHANGED
@@ -24,8 +24,27 @@ const protocolMap: {
24
24
  }
25
25
 
26
26
  const noop = () => { }
27
+
28
+ // 运行期黑名单:记录已知不支持 Range 请求的 host(格式 `hostname:port`)
29
+ // 用 Set 天然按插入序保序,命中上限时淘汰最早插入的条目(FIFO)
30
+ // 上限防止长运行进程(如网关)内存无界增长
31
+ const UNSUPPORTED_RANGE_DOMAINS_MAX = 1024
27
32
  const unsupportedRangeDomains = new Set<string>()
28
33
 
34
+ function addUnsupportedRangeDomain (hostKey: string): void {
35
+ if (unsupportedRangeDomains.has(hostKey)) {
36
+ return
37
+ }
38
+ unsupportedRangeDomains.add(hostKey)
39
+ if (unsupportedRangeDomains.size > UNSUPPORTED_RANGE_DOMAINS_MAX) {
40
+ // Set.values() 按插入顺序迭代,删除最早一条即可
41
+ const oldest = unsupportedRangeDomains.values().next().value
42
+ if (oldest !== undefined) {
43
+ unsupportedRangeDomains.delete(oldest)
44
+ }
45
+ }
46
+ }
47
+
29
48
  // 自定义 Error:标记需要回退到非分片下载
30
49
  class FallbackError extends Error {
31
50
 
@@ -36,6 +55,16 @@ class FallbackError extends Error {
36
55
 
37
56
  }
38
57
 
58
+ // 仅测试用:清空运行期黑名单(模块级 Set 跨 test 会污染)
59
+ export function __clearUnsupportedRangeDomains (): void {
60
+ unsupportedRangeDomains.clear()
61
+ }
62
+
63
+ // 仅测试用:手工登记一个 host 到黑名单(用于验证后续请求直接跳过 Range)
64
+ export function __addUnsupportedRangeDomain (hostKey: string): void {
65
+ addUnsupportedRangeDomain(hostKey)
66
+ }
67
+
39
68
  function getProtocol (protocol: string) {
40
69
  assert(protocolMap[protocol], new Error('unknown protocol: ' + protocol))
41
70
  return protocolMap[protocol]!
@@ -61,10 +90,7 @@ export async function httpHeadHeader (url: string, headers: http.OutgoingHttpHea
61
90
  throw new Error(`ttl expired! too many(>${REDIRECT_TTL}) 302 redirection.`)
62
91
  }
63
92
 
64
- const res = await fetch(url, {
65
- headers,
66
- method: 'HEAD',
67
- }, proxyUrl)
93
+ const res = await fetchHead(url, headers, proxyUrl)
68
94
  res.destroy()
69
95
 
70
96
  if (!/^3/.test(String(res.statusCode))) {
@@ -117,11 +143,20 @@ export async function httpStream (url: string, headers: http.OutgoingHttpHeaders
117
143
  const defaultPort = protocol === 'https:' ? '443' : '80'
118
144
  const hostKey = `${hostname}:${port || defaultPort}`
119
145
 
120
- // 直接尝试分片下载,不检查 Accept-Ranges 和 fileSize
146
+ // A2:若 HEAD 明确声明 Accept-Ranges: none,记录到运行期黑名单
147
+ // 以便 downloadFileInChunks 本次请求就直接以非 Range 模式发起
148
+ // Accept-Ranges header 可能是 string | string[],归一化后匹配 'none'
149
+ const acceptRangesRaw = headHeaders['accept-ranges']
150
+ const acceptRanges = Array.isArray(acceptRangesRaw) ? acceptRangesRaw[0] : acceptRangesRaw
151
+ if (typeof acceptRanges === 'string' && acceptRanges.trim().toLowerCase() === 'none') {
152
+ addUnsupportedRangeDomain(hostKey)
153
+ }
154
+
155
+ // 直接尝试分片下载,不检查 fileSize
121
156
  // 原因:
122
157
  // 1. 有些服务器 HEAD 不返回 Accept-Ranges 但实际支持分片
123
158
  // 2. 有些服务器 HEAD 返回 fileSize=0 但实际支持分片
124
- // downloadFileInChunks 内部有完善的回退机制处理不支持的情况
159
+ // downloadFileInChunks 内部有完善的回退机制处理不支持的情况(见 B1)
125
160
  const result = await downloadFileInChunks(url, options, proxyUrl, hostKey)
126
161
  return result
127
162
  }
@@ -205,6 +240,68 @@ async function fetch (url: string, options: http.RequestOptions, proxyUrl?: stri
205
240
  return res!
206
241
  }
207
242
 
243
+ async function fetchHead (url: string, headers: http.OutgoingHttpHeaders = {}, proxyUrl?: string): Promise<http.IncomingMessage> {
244
+ try {
245
+ return await fetch(url, {
246
+ headers,
247
+ method: 'HEAD',
248
+ }, proxyUrl)
249
+ } catch (error) {
250
+ if (!shouldFallbackHeadToRangeGet(error)) {
251
+ throw error
252
+ }
253
+
254
+ try {
255
+ return await fetchRangeHeader(url, headers, proxyUrl)
256
+ } catch {
257
+ throw error
258
+ }
259
+ }
260
+ }
261
+
262
+ function shouldFallbackHeadToRangeGet (error: unknown): boolean {
263
+ const code = typeof error === 'object' && error && 'code' in error
264
+ ? String((error as NodeJS.ErrnoException).code)
265
+ : ''
266
+ const message = error instanceof Error ? error.message : String(error)
267
+
268
+ return code.startsWith('HPE_') || message.includes('Parse Error')
269
+ }
270
+
271
+ async function fetchRangeHeader (url: string, headers: http.OutgoingHttpHeaders = {}, proxyUrl?: string): Promise<http.IncomingMessage> {
272
+ const res = await fetch(url, {
273
+ headers: createRangeProbeHeaders(headers),
274
+ method: 'GET',
275
+ }, proxyUrl)
276
+ normalizeRangeProbeHeaders(res.headers)
277
+ return res
278
+ }
279
+
280
+ function createRangeProbeHeaders (headers: http.OutgoingHttpHeaders): http.OutgoingHttpHeaders {
281
+ const rangeHeaders: http.OutgoingHttpHeaders = {}
282
+
283
+ for (const [ key, value ] of Object.entries(headers)) {
284
+ if (key.toLowerCase() === 'range') {
285
+ continue
286
+ }
287
+ rangeHeaders[key] = value
288
+ }
289
+
290
+ rangeHeaders['Range'] = 'bytes=0-0'
291
+ return rangeHeaders
292
+ }
293
+
294
+ function normalizeRangeProbeHeaders (headers: http.IncomingHttpHeaders): void {
295
+ const contentRange = Array.isArray(headers['content-range'])
296
+ ? headers['content-range'][0]
297
+ : headers['content-range']
298
+ const matches = contentRange?.match(/bytes \d+-\d+\/(\d+)/)
299
+
300
+ if (matches?.[1]) {
301
+ headers['content-length'] = matches[1]
302
+ }
303
+ }
304
+
208
305
  function createSkipTransform (skipBytes: number): Transform {
209
306
  let skipped = 0
210
307
  return new Transform({
@@ -233,7 +330,7 @@ async function downloadFileInChunks (
233
330
  url: string,
234
331
  options: http.RequestOptions,
235
332
  proxyUrl: string | undefined,
236
- hostname: string,
333
+ hostKey: string,
237
334
  ): Promise<Readable> {
238
335
  const tmpFile = join(tmpdir(), `filebox-${randomUUID()}`)
239
336
  let writeStream = createWriteStream(tmpFile)
@@ -254,8 +351,7 @@ async function downloadFileInChunks (
254
351
  let downSize = 0
255
352
  let retries = 3
256
353
  // 控制是否使用 Range 请求(根据域名黑名单初始化)
257
- let useRange = !unsupportedRangeDomains.has(hostname)
258
- let useChunked = false
354
+ let useRange = !unsupportedRangeDomains.has(hostKey)
259
355
 
260
356
  do {
261
357
  // 每次循环前检查文件实际大小,作为真实的下载进度
@@ -323,9 +419,6 @@ async function downloadFileInChunks (
323
419
  throw new Error(`File size mismatch: expected ${expectedTotal}, but server returned ${total}`)
324
420
  }
325
421
 
326
- // 标记使用了分片下载
327
- useChunked = true
328
-
329
422
  // 验证服务器返回的范围是否与请求匹配
330
423
  if (actualStart !== start) {
331
424
  if (actualStart > start) {
@@ -350,18 +443,16 @@ async function downloadFileInChunks (
350
443
  downSize += end - start + 1
351
444
  start = downSize
352
445
  } else if (res.statusCode === 200) {
353
- // 200: 服务器返回完整文件
354
- if (useChunked || start > 0) {
355
- // 之前以分片模式下载过数据
356
- writeStream.destroy()
357
- await rm(tmpFile, { force: true }).catch(() => {})
358
- writeStream = createWriteStream(tmpFile)
359
- writeStream.on('error', onWriteError)
360
- start = 0
361
- downSize = 0
446
+ if (useRange) {
447
+ // B1:发了 Range 却收到 200 —— 服务器未实现 Range
448
+ // 响应体是"对带 Range 请求的回答",不可信(见 CMSV6 场景)
449
+ // 交给 FallbackError 的 catch 分支统一处理:销毁流、删 tmp、加入黑名单、重发
450
+ throw new FallbackError('Server returned 200 for Range request')
362
451
  }
363
-
364
- // 处理完整文件响应
452
+ // 200: 服务器返回完整文件(本次未带 Range)
453
+ // B1 保证带 Range 收 200 一定经 FallbackError 回退,catch 里已重置
454
+ // expectedTotal/downSize/start/useRange,进入此分支时一定是
455
+ // 全新的非 Range 请求,无须再清理已写数据
365
456
  expectedTotal = contentLength
366
457
  await pipeline(res, writeStream, { end: false, signal })
367
458
  downSize = contentLength
@@ -374,7 +465,7 @@ async function downloadFileInChunks (
374
465
  } catch (error) {
375
466
  if (error instanceof FallbackError) {
376
467
  // 回退逻辑:记录域名、重置状态,在下次循环中以非 range 模式请求
377
- unsupportedRangeDomains.add(hostname)
468
+ addUnsupportedRangeDomain(hostKey)
378
469
 
379
470
  // 关闭当前写入流
380
471
  writeStream.destroy()
@@ -393,7 +484,6 @@ async function downloadFileInChunks (
393
484
  expectedTotal = null
394
485
  downSize = 0
395
486
  start = 0
396
- useChunked = false
397
487
  useRange = false
398
488
  retries = 3
399
489
  continue
package/src/version.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  /**
2
2
  * This file was auto generated from scripts/generate-version.sh
3
3
  */
4
- export const VERSION: string = '1.8.2'
4
+ export const VERSION: string = '1.8.5'