npm - @audio/decode-webm - Versions diffs - 1.0.0 → 1.1.0 - Mend

@audio/decode-webm 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/decode-webm.js +174 -48
package/package.json +4 -2

package/decode-webm.js CHANGED Viewed

@@ -189,6 +189,7 @@ function parseWebm(buf) {
 	return {
 		codec: audioTrack.codec,
+		trackNum: audioTrack.number,
 		sampleRate: audioTrack.sampleRate,
 		channels: audioTrack.channels,
 		codecPrivate: audioTrack.codecPrivate,
@@ -314,20 +315,11 @@ function makeOggPage(packets, granule, serial, seq, flags) {
 }
 /**
- * Wrap raw Vorbis header packets and audio frames into an OGG bitstream.
- * Max 255 segments per OGG page. Granule on EOS page set high to avoid truncation
- * (exact sample count is unknown without deep Vorbis mode parsing).
+ * Wrap raw Vorbis frames into OGG page(s) for incremental feeding to OggVorbisDecoder.
+ * Granule = -1 (not set) — decoder uses internal sample counting.
  */
-function vorbisToOgg(headers, frames) {
-	let serial = 0x564F5242, pages = [], seq = 0 // "VORB"
-	// Page 0: BOS — identification header only, granule 0
-	pages.push(makeOggPage([headers[0]], 0, serial, seq++, 0x02))
-	// Page 1: comment + setup headers, granule 0
-	pages.push(makeOggPage([headers[1], headers[2]], 0, serial, seq++, 0))
-	// Audio pages — pack frames respecting 255-segment limit
-	let i = 0
+function framesToOgg(frames, serial, seqRef) {
+	let pages = [], i = 0
 	while (i < frames.length) {
 		let pkt = [], segCount = 0
 		while (i < frames.length) {
@@ -337,44 +329,31 @@ function vorbisToOgg(headers, frames) {
 			segCount += needed
 			i++
 		}
-		let isLast = i >= frames.length
-		// Granule: -1 (not set) on intermediate pages; max safe int on EOS to avoid truncation
-		pages.push(makeOggPage(pkt, isLast ? 0x1FFFFFFFFFFFFF : -1, serial, seq++, isLast ? 0x04 : 0))
+		pages.push(makeOggPage(pkt, -1, serial, seqRef.n++, 0))
 	}
 	let totalLen = 0
 	for (let p of pages) totalLen += p.length
-	let ogg = new Uint8Array(totalLen)
-	let off = 0
-	for (let p of pages) { ogg.set(p, off); off += p.length }
-	return ogg
+	return concat(pages, totalLen)
 }
 /**
- * Decode raw Vorbis frames via ogg-vorbis decoder
+ * Create OggVorbisDecoder initialized with Vorbis headers from WebM CodecPrivate
  */
-async function decodeVorbis(info) {
+async function createVorbisStream(info) {
 	let { OggVorbisDecoder } = await import('@wasm-audio-decoders/ogg-vorbis')
 	let headers = parseVorbisPrivate(info.codecPrivate)
 	if (!headers) throw Error('Invalid Vorbis CodecPrivate')
-	if (!info.frames.length) return EMPTY
-	let ogg = vorbisToOgg(headers, info.frames)
 	let dec = new OggVorbisDecoder()
 	await dec.ready
-	let result = await dec.decodeFile(ogg)
-	dec.free()
-	if (!result?.channelData?.length) return EMPTY
-	let { channelData, samplesDecoded, sampleRate } = result
-	if (samplesDecoded != null && samplesDecoded < channelData[0].length)
-		channelData = channelData.map(ch => ch.subarray(0, samplesDecoded))
+	let serial = 0x564F5242, seq = { n: 0 }
+	// Feed header pages: BOS (identification) + comment/setup
+	let bos = makeOggPage([headers[0]], 0, serial, seq.n++, 0x02)
+	let hdr = makeOggPage([headers[1], headers[2]], 0, serial, seq.n++, 0)
+	await dec.decode(concat([bos, hdr], bos.length + hdr.length))
-	return { channelData, sampleRate }
+	return { dec, serial, seq }
 }
 /**
@@ -402,34 +381,181 @@ export default async function decode(src) {
  * @returns {Promise<{decode(chunk: Uint8Array): Promise<AudioData>, flush(): Promise<AudioData>, free(): void}>}
  */
 export async function decoder() {
-	let freed = false, chunks = [], totalLen = 0
+	let freed = false
+	let codecDec = null, info = null
+	let accum = [], accumLen = 0 // header parsing accumulator
+	let scanner = null
 	return {
 		async decode(data) {
 			if (freed) throw Error('Decoder already freed')
 			if (!data?.length) return EMPTY
-			chunks.push(data instanceof Uint8Array ? data : new Uint8Array(data))
-			totalLen += data.length
+			let chunk = data instanceof Uint8Array ? data : new Uint8Array(data)
+			// Phase 1: parse header to get track info
+			if (!info) {
+				accum.push(chunk)
+				accumLen += chunk.length
+				let buf = accum.length === 1 ? accum[0] : concat(accum, accumLen)
+				try { info = parseWebm(buf) } catch {
+					if (accumLen < 8192) return EMPTY
+					throw Error('Not a WebM file')
+				}
-			let buf = chunks.length === 1 ? chunks[0] : concat(chunks, totalLen)
-			let info = parseWebm(buf)
-			if (!info.frames.length) return EMPTY
+				if (info.codec === 'A_VORBIS') {
+					codecDec = await createVorbisStream(info)
+				} else if (info.codec === 'A_OPUS') {
+					codecDec = await createOpusStream(info)
+				} else {
+					throw Error('Unsupported WebM codec: ' + info.codec)
+				}
-			if (info.codec === 'A_OPUS') return decodeOpus(info)
-			if (info.codec === 'A_VORBIS') return decodeVorbis(info)
-			throw Error('Unsupported WebM codec: ' + info.codec)
+				// Init incremental scanner — walk initial buffer to establish position
+				scanner = new EBMLScanner(info.trackNum)
+				scanner.init(buf)
+				accum = []; accumLen = 0
+				// Decode initial frames found by parseWebm
+				if (info.frames.length) {
+					if (info.codec === 'A_VORBIS') {
+						let ogg = framesToOgg(info.frames, codecDec.serial, codecDec.seq)
+						let result = await codecDec.dec.decode(ogg)
+						return normResult(result)
+					}
+					let result = codecDec.dec.decodeFrames(info.frames)
+					return normResult(result)
+				}
+				return EMPTY
+			}
+			// Phase 2: incremental scanning
+			let frames = scanner.feed(chunk)
+			if (!frames.length) return EMPTY
+			if (info.codec === 'A_VORBIS') {
+				let ogg = framesToOgg(frames, codecDec.serial, codecDec.seq)
+				let result = await codecDec.dec.decode(ogg)
+				return normResult(result)
+			}
+			let result = codecDec.dec.decodeFrames(frames)
+			return normResult(result)
 		},
 		async flush() {
 			if (freed) return EMPTY
-			freed = true
-			chunks = []; totalLen = 0
+			if (codecDec) {
+				let result = await codecDec.dec.flush?.()
+				let r = normResult(result)
+				codecDec.dec.free?.()
+				codecDec = null
+				freed = true; scanner = null
+				return r
+			}
+			freed = true; scanner = null
 			return EMPTY
 		},
 		free() {
+			if (freed) return
 			freed = true
-			chunks = []; totalLen = 0
+			if (codecDec) { codecDec.dec.free?.(); codecDec = null }
+			scanner = null
+		}
+	}
+}
+/**
+ * Incremental EBML scanner — extracts audio frames from Cluster/SimpleBlock
+ * elements without re-parsing the entire buffer.
+ */
+class EBMLScanner {
+	constructor(trackNum) {
+		this.trackNum = trackNum
+		this.left = null
+	}
+	// Walk initial buffer to establish position (frames already decoded by parseWebm)
+	init(buf) {
+		this.left = null
+		let pos = 0
+		while (pos < buf.length) {
+			let eid = readId(buf, pos)
+			if (!eid) break
+			let siz = readSize(buf, pos + eid.len)
+			if (!siz) break
+			let dataOff = pos + eid.len + siz.len
+			let id = eid.val, dataLen = siz.val
+			if (id === ID_SEGMENT || id === ID_CLUSTER || id === ID_BLOCK_GROUP) { pos = dataOff; continue }
+			if (dataLen < 0) break
+			if (dataOff + dataLen > buf.length) break
+			pos = dataOff + dataLen
 		}
+		if (pos < buf.length) this.left = buf.subarray(pos).slice()
 	}
+	// Feed new data, return extracted audio frames
+	feed(chunk) {
+		let buf = chunk
+		if (this.left) {
+			buf = new Uint8Array(this.left.length + chunk.length)
+			buf.set(this.left); buf.set(chunk, this.left.length)
+			this.left = null
+		}
+		let frames = [], pos = 0
+		while (pos < buf.length) {
+			let eid = readId(buf, pos)
+			if (!eid) break
+			let siz = readSize(buf, pos + eid.len)
+			if (!siz) break
+			let dataOff = pos + eid.len + siz.len
+			let id = eid.val, dataLen = siz.val
+			// Master elements: descend (skip element header)
+			if (id === ID_SEGMENT || id === ID_CLUSTER || id === ID_BLOCK_GROUP) { pos = dataOff; continue }
+			if (dataLen < 0) break // unknown-size non-master
+			if (dataOff + dataLen > buf.length) break // incomplete element
+			// SimpleBlock / Block: extract audio frame
+			if ((id === ID_SIMPLE_BLOCK || id === ID_BLOCK) && dataLen > 4) {
+				let bp = dataOff
+				let tn = readSize(buf, bp)
+				if (tn && tn.val === this.trackNum) {
+					bp += tn.len + 3
+					if (bp < dataOff + dataLen) frames.push(buf.slice(bp, dataOff + dataLen))
+				}
+			}
+			pos = dataOff + dataLen
+		}
+		if (pos < buf.length) this.left = buf.subarray(pos).slice()
+		return frames
+	}
+}
+async function createOpusStream(info) {
+	let { OpusDecoder } = await import('opus-decoder')
+	let head = info.codecPrivate ? parseOpusHead(info.codecPrivate) : null
+	let channels = head?.channels || info.channels || 2
+	let preSkip = head?.preSkip || 0
+	if (!preSkip && info.codecDelay) preSkip = Math.round(info.codecDelay / 1e9 * 48000)
+	let opts = { channels, sampleRate: 48000, preSkip }
+	if (head && head.mappingFamily > 0) {
+		opts.streamCount = head.streamCount
+		opts.coupledStreamCount = head.coupledStreamCount
+		opts.channelMappingTable = head.channelMappingTable
+	} else if (channels === 1) {
+		opts.streamCount = 1; opts.coupledStreamCount = 0; opts.channelMappingTable = [0]
+	} else if (channels === 2) {
+		opts.streamCount = 1; opts.coupledStreamCount = 1; opts.channelMappingTable = [0, 1]
+	}
+	let dec = new OpusDecoder(opts)
+	await dec.ready
+	return { dec, channels }
+}
+function normResult(result) {
+	if (!result?.channelData?.length) return EMPTY
+	let { channelData, samplesDecoded, sampleRate } = result
+	if (samplesDecoded != null && samplesDecoded < channelData[0].length)
+		channelData = channelData.map(ch => ch.subarray(0, samplesDecoded))
+	if (!channelData[0]?.length) return EMPTY
+	return { channelData, sampleRate }
 }
 function concat(parts, totalLen) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@audio/decode-webm",
-  "version": "1.0.0",
+  "version": "1.1.0",
   "description": "Decode WebM audio (Opus, Vorbis) to PCM samples",
   "type": "module",
   "main": "decode-webm.js",
@@ -29,7 +29,9 @@
     "decoder",
     "pcm"
   ],
-  "publishConfig": { "access": "public" },
+  "publishConfig": {
+    "access": "public"
+  },
   "license": "MIT",
   "author": "audiojs",
   "homepage": "https://github.com/audiojs/decode-webm#readme",