@audio/decode-webm 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/decode-webm.js +174 -48
  2. package/package.json +4 -2
package/decode-webm.js CHANGED
@@ -189,6 +189,7 @@ function parseWebm(buf) {
189
189
 
190
190
  return {
191
191
  codec: audioTrack.codec,
192
+ trackNum: audioTrack.number,
192
193
  sampleRate: audioTrack.sampleRate,
193
194
  channels: audioTrack.channels,
194
195
  codecPrivate: audioTrack.codecPrivate,
@@ -314,20 +315,11 @@ function makeOggPage(packets, granule, serial, seq, flags) {
314
315
  }
315
316
 
316
317
  /**
317
- * Wrap raw Vorbis header packets and audio frames into an OGG bitstream.
318
- * Max 255 segments per OGG page. Granule on EOS page set high to avoid truncation
319
- * (exact sample count is unknown without deep Vorbis mode parsing).
318
+ * Wrap raw Vorbis frames into OGG page(s) for incremental feeding to OggVorbisDecoder.
319
+ * Granule = -1 (not set) decoder uses internal sample counting.
320
320
  */
321
- function vorbisToOgg(headers, frames) {
322
- let serial = 0x564F5242, pages = [], seq = 0 // "VORB"
323
-
324
- // Page 0: BOS — identification header only, granule 0
325
- pages.push(makeOggPage([headers[0]], 0, serial, seq++, 0x02))
326
- // Page 1: comment + setup headers, granule 0
327
- pages.push(makeOggPage([headers[1], headers[2]], 0, serial, seq++, 0))
328
-
329
- // Audio pages — pack frames respecting 255-segment limit
330
- let i = 0
321
+ function framesToOgg(frames, serial, seqRef) {
322
+ let pages = [], i = 0
331
323
  while (i < frames.length) {
332
324
  let pkt = [], segCount = 0
333
325
  while (i < frames.length) {
@@ -337,44 +329,31 @@ function vorbisToOgg(headers, frames) {
337
329
  segCount += needed
338
330
  i++
339
331
  }
340
- let isLast = i >= frames.length
341
- // Granule: -1 (not set) on intermediate pages; max safe int on EOS to avoid truncation
342
- pages.push(makeOggPage(pkt, isLast ? 0x1FFFFFFFFFFFFF : -1, serial, seq++, isLast ? 0x04 : 0))
332
+ pages.push(makeOggPage(pkt, -1, serial, seqRef.n++, 0))
343
333
  }
344
-
345
334
  let totalLen = 0
346
335
  for (let p of pages) totalLen += p.length
347
- let ogg = new Uint8Array(totalLen)
348
- let off = 0
349
- for (let p of pages) { ogg.set(p, off); off += p.length }
350
- return ogg
336
+ return concat(pages, totalLen)
351
337
  }
352
338
 
353
339
  /**
354
- * Decode raw Vorbis frames via ogg-vorbis decoder
340
+ * Create OggVorbisDecoder initialized with Vorbis headers from WebM CodecPrivate
355
341
  */
356
- async function decodeVorbis(info) {
342
+ async function createVorbisStream(info) {
357
343
  let { OggVorbisDecoder } = await import('@wasm-audio-decoders/ogg-vorbis')
358
-
359
344
  let headers = parseVorbisPrivate(info.codecPrivate)
360
345
  if (!headers) throw Error('Invalid Vorbis CodecPrivate')
361
346
 
362
- if (!info.frames.length) return EMPTY
363
-
364
- let ogg = vorbisToOgg(headers, info.frames)
365
347
  let dec = new OggVorbisDecoder()
366
348
  await dec.ready
367
349
 
368
- let result = await dec.decodeFile(ogg)
369
- dec.free()
370
-
371
- if (!result?.channelData?.length) return EMPTY
372
-
373
- let { channelData, samplesDecoded, sampleRate } = result
374
- if (samplesDecoded != null && samplesDecoded < channelData[0].length)
375
- channelData = channelData.map(ch => ch.subarray(0, samplesDecoded))
350
+ let serial = 0x564F5242, seq = { n: 0 }
351
+ // Feed header pages: BOS (identification) + comment/setup
352
+ let bos = makeOggPage([headers[0]], 0, serial, seq.n++, 0x02)
353
+ let hdr = makeOggPage([headers[1], headers[2]], 0, serial, seq.n++, 0)
354
+ await dec.decode(concat([bos, hdr], bos.length + hdr.length))
376
355
 
377
- return { channelData, sampleRate }
356
+ return { dec, serial, seq }
378
357
  }
379
358
 
380
359
  /**
@@ -402,34 +381,181 @@ export default async function decode(src) {
402
381
  * @returns {Promise<{decode(chunk: Uint8Array): Promise<AudioData>, flush(): Promise<AudioData>, free(): void}>}
403
382
  */
404
383
  export async function decoder() {
405
- let freed = false, chunks = [], totalLen = 0
384
+ let freed = false
385
+ let codecDec = null, info = null
386
+ let accum = [], accumLen = 0 // header parsing accumulator
387
+ let scanner = null
406
388
 
407
389
  return {
408
390
  async decode(data) {
409
391
  if (freed) throw Error('Decoder already freed')
410
392
  if (!data?.length) return EMPTY
411
- chunks.push(data instanceof Uint8Array ? data : new Uint8Array(data))
412
- totalLen += data.length
393
+ let chunk = data instanceof Uint8Array ? data : new Uint8Array(data)
394
+
395
+ // Phase 1: parse header to get track info
396
+ if (!info) {
397
+ accum.push(chunk)
398
+ accumLen += chunk.length
399
+ let buf = accum.length === 1 ? accum[0] : concat(accum, accumLen)
400
+ try { info = parseWebm(buf) } catch {
401
+ if (accumLen < 8192) return EMPTY
402
+ throw Error('Not a WebM file')
403
+ }
413
404
 
414
- let buf = chunks.length === 1 ? chunks[0] : concat(chunks, totalLen)
415
- let info = parseWebm(buf)
416
- if (!info.frames.length) return EMPTY
405
+ if (info.codec === 'A_VORBIS') {
406
+ codecDec = await createVorbisStream(info)
407
+ } else if (info.codec === 'A_OPUS') {
408
+ codecDec = await createOpusStream(info)
409
+ } else {
410
+ throw Error('Unsupported WebM codec: ' + info.codec)
411
+ }
417
412
 
418
- if (info.codec === 'A_OPUS') return decodeOpus(info)
419
- if (info.codec === 'A_VORBIS') return decodeVorbis(info)
420
- throw Error('Unsupported WebM codec: ' + info.codec)
413
+ // Init incremental scanner walk initial buffer to establish position
414
+ scanner = new EBMLScanner(info.trackNum)
415
+ scanner.init(buf)
416
+ accum = []; accumLen = 0
417
+
418
+ // Decode initial frames found by parseWebm
419
+ if (info.frames.length) {
420
+ if (info.codec === 'A_VORBIS') {
421
+ let ogg = framesToOgg(info.frames, codecDec.serial, codecDec.seq)
422
+ let result = await codecDec.dec.decode(ogg)
423
+ return normResult(result)
424
+ }
425
+ let result = codecDec.dec.decodeFrames(info.frames)
426
+ return normResult(result)
427
+ }
428
+ return EMPTY
429
+ }
430
+
431
+ // Phase 2: incremental scanning
432
+ let frames = scanner.feed(chunk)
433
+ if (!frames.length) return EMPTY
434
+ if (info.codec === 'A_VORBIS') {
435
+ let ogg = framesToOgg(frames, codecDec.serial, codecDec.seq)
436
+ let result = await codecDec.dec.decode(ogg)
437
+ return normResult(result)
438
+ }
439
+ let result = codecDec.dec.decodeFrames(frames)
440
+ return normResult(result)
421
441
  },
422
442
  async flush() {
423
443
  if (freed) return EMPTY
424
- freed = true
425
- chunks = []; totalLen = 0
444
+
445
+ if (codecDec) {
446
+ let result = await codecDec.dec.flush?.()
447
+ let r = normResult(result)
448
+ codecDec.dec.free?.()
449
+ codecDec = null
450
+ freed = true; scanner = null
451
+ return r
452
+ }
453
+
454
+ freed = true; scanner = null
426
455
  return EMPTY
427
456
  },
428
457
  free() {
458
+ if (freed) return
429
459
  freed = true
430
- chunks = []; totalLen = 0
460
+ if (codecDec) { codecDec.dec.free?.(); codecDec = null }
461
+ scanner = null
462
+ }
463
+ }
464
+ }
465
+
466
+ /**
467
+ * Incremental EBML scanner — extracts audio frames from Cluster/SimpleBlock
468
+ * elements without re-parsing the entire buffer.
469
+ */
470
+ class EBMLScanner {
471
+ constructor(trackNum) {
472
+ this.trackNum = trackNum
473
+ this.left = null
474
+ }
475
+
476
+ // Walk initial buffer to establish position (frames already decoded by parseWebm)
477
+ init(buf) {
478
+ this.left = null
479
+ let pos = 0
480
+ while (pos < buf.length) {
481
+ let eid = readId(buf, pos)
482
+ if (!eid) break
483
+ let siz = readSize(buf, pos + eid.len)
484
+ if (!siz) break
485
+ let dataOff = pos + eid.len + siz.len
486
+ let id = eid.val, dataLen = siz.val
487
+ if (id === ID_SEGMENT || id === ID_CLUSTER || id === ID_BLOCK_GROUP) { pos = dataOff; continue }
488
+ if (dataLen < 0) break
489
+ if (dataOff + dataLen > buf.length) break
490
+ pos = dataOff + dataLen
431
491
  }
492
+ if (pos < buf.length) this.left = buf.subarray(pos).slice()
432
493
  }
494
+
495
+ // Feed new data, return extracted audio frames
496
+ feed(chunk) {
497
+ let buf = chunk
498
+ if (this.left) {
499
+ buf = new Uint8Array(this.left.length + chunk.length)
500
+ buf.set(this.left); buf.set(chunk, this.left.length)
501
+ this.left = null
502
+ }
503
+ let frames = [], pos = 0
504
+ while (pos < buf.length) {
505
+ let eid = readId(buf, pos)
506
+ if (!eid) break
507
+ let siz = readSize(buf, pos + eid.len)
508
+ if (!siz) break
509
+ let dataOff = pos + eid.len + siz.len
510
+ let id = eid.val, dataLen = siz.val
511
+ // Master elements: descend (skip element header)
512
+ if (id === ID_SEGMENT || id === ID_CLUSTER || id === ID_BLOCK_GROUP) { pos = dataOff; continue }
513
+ if (dataLen < 0) break // unknown-size non-master
514
+ if (dataOff + dataLen > buf.length) break // incomplete element
515
+ // SimpleBlock / Block: extract audio frame
516
+ if ((id === ID_SIMPLE_BLOCK || id === ID_BLOCK) && dataLen > 4) {
517
+ let bp = dataOff
518
+ let tn = readSize(buf, bp)
519
+ if (tn && tn.val === this.trackNum) {
520
+ bp += tn.len + 3
521
+ if (bp < dataOff + dataLen) frames.push(buf.slice(bp, dataOff + dataLen))
522
+ }
523
+ }
524
+ pos = dataOff + dataLen
525
+ }
526
+ if (pos < buf.length) this.left = buf.subarray(pos).slice()
527
+ return frames
528
+ }
529
+ }
530
+
531
+ async function createOpusStream(info) {
532
+ let { OpusDecoder } = await import('opus-decoder')
533
+ let head = info.codecPrivate ? parseOpusHead(info.codecPrivate) : null
534
+ let channels = head?.channels || info.channels || 2
535
+ let preSkip = head?.preSkip || 0
536
+ if (!preSkip && info.codecDelay) preSkip = Math.round(info.codecDelay / 1e9 * 48000)
537
+ let opts = { channels, sampleRate: 48000, preSkip }
538
+ if (head && head.mappingFamily > 0) {
539
+ opts.streamCount = head.streamCount
540
+ opts.coupledStreamCount = head.coupledStreamCount
541
+ opts.channelMappingTable = head.channelMappingTable
542
+ } else if (channels === 1) {
543
+ opts.streamCount = 1; opts.coupledStreamCount = 0; opts.channelMappingTable = [0]
544
+ } else if (channels === 2) {
545
+ opts.streamCount = 1; opts.coupledStreamCount = 1; opts.channelMappingTable = [0, 1]
546
+ }
547
+ let dec = new OpusDecoder(opts)
548
+ await dec.ready
549
+ return { dec, channels }
550
+ }
551
+
552
+ function normResult(result) {
553
+ if (!result?.channelData?.length) return EMPTY
554
+ let { channelData, samplesDecoded, sampleRate } = result
555
+ if (samplesDecoded != null && samplesDecoded < channelData[0].length)
556
+ channelData = channelData.map(ch => ch.subarray(0, samplesDecoded))
557
+ if (!channelData[0]?.length) return EMPTY
558
+ return { channelData, sampleRate }
433
559
  }
434
560
 
435
561
  function concat(parts, totalLen) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@audio/decode-webm",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "description": "Decode WebM audio (Opus, Vorbis) to PCM samples",
5
5
  "type": "module",
6
6
  "main": "decode-webm.js",
@@ -29,7 +29,9 @@
29
29
  "decoder",
30
30
  "pcm"
31
31
  ],
32
- "publishConfig": { "access": "public" },
32
+ "publishConfig": {
33
+ "access": "public"
34
+ },
33
35
  "license": "MIT",
34
36
  "author": "audiojs",
35
37
  "homepage": "https://github.com/audiojs/decode-webm#readme",