@e9g/buffered-audio-nodes 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,4479 @@
1
+ import { SourceNode, TargetNode, WHOLE_FILE, TransformNode, BufferedSourceStream, BufferedTargetStream, BufferedTransformStream, FileChunkBuffer } from 'buffered-audio-nodes-core';
2
+ export * from 'buffered-audio-nodes-core';
3
+ import { deinterleaveBuffer, hanningWindow, createFftWorkspace, detectFftBackend, getFftAddon, replaceChannel, lowPassCoefficients, zeroPhaseBiquadFilter, initFftBackend, stft, istft, MixedRadixFft, resampleDirect, applyBandpass, fft, bandPassCoefficients, biquadFilter, smoothEnvelope, preFilterCoefficients, rlbFilterCoefficients, interleave } from 'buffered-audio-nodes-utils';
4
+ export * from 'buffered-audio-nodes-utils';
5
+ import { extname } from 'path';
6
+ import { z } from 'zod';
7
+ import { spawn } from 'child_process';
8
+ import { open, stat, readFile } from 'fs/promises';
9
+ import { WaveFile } from 'wavefile';
10
+ import { createRequire } from 'module';
11
+
12
+ // src/index.ts
13
+ var ffmpegSchema = z.object({
14
+ path: z.string().default("").meta({ input: "file", mode: "open" }),
15
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
16
+ ffprobePath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffprobe", download: "https://ffmpeg.org/download.html" }).describe("FFprobe \u2014 media file analyzer (included with FFmpeg)")
17
+ });
18
+ var DEFAULT_CHUNK_SIZE = 44100;
19
+ var ReadFfmpegStream = class extends BufferedSourceStream {
20
+ constructor() {
21
+ super(...arguments);
22
+ this.frameOffset = 0;
23
+ this.outputChannels = 0;
24
+ this.sourceSampleRate = 0;
25
+ this.sourceBitDepth = 0;
26
+ }
27
+ async getMetadata() {
28
+ const probe = await this.probe(this.properties.ffprobePath, this.properties.path);
29
+ const selectedChannels = this.properties.channels;
30
+ const channels = selectedChannels ? selectedChannels.length : probe.channels;
31
+ return {
32
+ sampleRate: probe.sampleRate,
33
+ channels,
34
+ durationFrames: Math.round(probe.duration * probe.sampleRate)
35
+ };
36
+ }
37
+ async ensureInitialized() {
38
+ if (this.ffmpegProcess) return;
39
+ const probe = await this.probe(this.properties.ffprobePath, this.properties.path);
40
+ const selectedChannels = this.properties.channels;
41
+ this.outputChannels = selectedChannels ? selectedChannels.length : probe.channels;
42
+ this.sourceSampleRate = probe.sampleRate;
43
+ this.sourceBitDepth = 32;
44
+ const args = ["-i", this.properties.path, "-f", "f32le", "-acodec", "pcm_f32le", "-ar", String(probe.sampleRate)];
45
+ if (selectedChannels) {
46
+ const panParts = selectedChannels.map((srcCh, outCh) => `c${outCh}=c${srcCh}`);
47
+ const layout = this.outputChannels === 1 ? "mono" : `${this.outputChannels}c`;
48
+ args.push("-af", `pan=${layout}|${panParts.join("|")}`);
49
+ args.push("-ac", String(this.outputChannels));
50
+ } else {
51
+ args.push("-ac", String(probe.channels));
52
+ }
53
+ args.push("pipe:1");
54
+ const proc = spawn(this.properties.ffmpegPath, args, {
55
+ stdio: ["ignore", "pipe", "pipe"]
56
+ });
57
+ this.ffmpegProcess = proc;
58
+ this.stdout = proc.stdout;
59
+ this.remainder = void 0;
60
+ this.frameOffset = 0;
61
+ proc.stderr.resume();
62
+ }
63
+ async _read() {
64
+ await this.ensureInitialized();
65
+ const bytesPerFrame = this.outputChannels * 4;
66
+ const targetBytes = DEFAULT_CHUNK_SIZE * bytesPerFrame;
67
+ const data = await this.readBytes(targetBytes);
68
+ if (!data || data.length === 0) {
69
+ return void 0;
70
+ }
71
+ const usableBytes = Math.floor(data.length / bytesPerFrame) * bytesPerFrame;
72
+ if (usableBytes === 0) {
73
+ return void 0;
74
+ }
75
+ const leftover = data.length - usableBytes;
76
+ if (leftover > 0) {
77
+ this.remainder = Buffer.from(data.buffer, data.byteOffset + usableBytes, leftover);
78
+ }
79
+ const frames = usableBytes / bytesPerFrame;
80
+ const sampleBuffer = Buffer.from(data.buffer, data.byteOffset, usableBytes);
81
+ const samples = deinterleaveBuffer(sampleBuffer, this.outputChannels);
82
+ const offset = this.frameOffset;
83
+ this.frameOffset += frames;
84
+ return {
85
+ samples,
86
+ offset,
87
+ sampleRate: this.sourceSampleRate,
88
+ bitDepth: this.sourceBitDepth
89
+ };
90
+ }
91
+ async _flush() {
92
+ const proc = this.ffmpegProcess;
93
+ if (proc) {
94
+ proc.kill();
95
+ await new Promise((resolve) => {
96
+ proc.on("close", () => resolve());
97
+ if (proc.exitCode !== null) resolve();
98
+ });
99
+ this.ffmpegProcess = void 0;
100
+ }
101
+ this.stdout = void 0;
102
+ this.remainder = void 0;
103
+ }
104
+ _teardown() {
105
+ if (this.ffmpegProcess) {
106
+ this.ffmpegProcess.kill();
107
+ this.ffmpegProcess = void 0;
108
+ }
109
+ this.stdout = void 0;
110
+ this.remainder = void 0;
111
+ }
112
+ async probe(ffprobePath, filePath) {
113
+ const proc = spawn(ffprobePath, ["-v", "quiet", "-print_format", "json", "-show_streams", "-select_streams", "a:0", filePath], {
114
+ stdio: ["ignore", "pipe", "pipe"]
115
+ });
116
+ const chunks = [];
117
+ proc.stdout.on("data", (chunk) => {
118
+ chunks.push(chunk);
119
+ });
120
+ proc.stderr.resume();
121
+ await new Promise((resolve, reject) => {
122
+ proc.on("close", (code) => {
123
+ if (code !== 0) {
124
+ reject(new Error(`ffprobe exited with code ${code} for "${filePath}"`));
125
+ } else {
126
+ resolve();
127
+ }
128
+ });
129
+ proc.on("error", (error) => {
130
+ reject(new Error(`Failed to spawn ffprobe: ${error.message}`));
131
+ });
132
+ });
133
+ const json = JSON.parse(Buffer.concat(chunks).toString("utf-8"));
134
+ const stream = json.streams?.[0];
135
+ if (!stream) {
136
+ throw new Error(`No audio stream found in "${filePath}"`);
137
+ }
138
+ return {
139
+ sampleRate: Number(stream.sample_rate) || 44100,
140
+ channels: stream.channels ?? 1,
141
+ duration: Number(stream.duration) || 0
142
+ };
143
+ }
144
+ readBytes(targetBytes) {
145
+ return new Promise((resolve) => {
146
+ const stdout = this.stdout;
147
+ if (!stdout) {
148
+ resolve(void 0);
149
+ return;
150
+ }
151
+ const existing = this.remainder;
152
+ this.remainder = void 0;
153
+ const read2 = () => {
154
+ const raw = stdout.read(targetBytes - (existing?.length ?? 0));
155
+ if (raw) {
156
+ resolve(existing ? Buffer.concat([existing, raw]) : raw);
157
+ return;
158
+ }
159
+ if (stdout.readableEnded) {
160
+ resolve(existing && existing.length > 0 ? existing : void 0);
161
+ return;
162
+ }
163
+ const onReadable = () => {
164
+ cleanup();
165
+ read2();
166
+ };
167
+ const onEnd = () => {
168
+ cleanup();
169
+ resolve(existing && existing.length > 0 ? existing : void 0);
170
+ };
171
+ const cleanup = () => {
172
+ stdout.removeListener("readable", onReadable);
173
+ stdout.removeListener("end", onEnd);
174
+ };
175
+ stdout.once("readable", onReadable);
176
+ stdout.once("end", onEnd);
177
+ };
178
+ read2();
179
+ });
180
+ }
181
+ };
182
+ var _ReadFfmpegNode = class _ReadFfmpegNode extends SourceNode {
183
+ constructor() {
184
+ super(...arguments);
185
+ this.type = ["buffered-audio-node", "source", "read-ffmpeg"];
186
+ }
187
+ createStream() {
188
+ return new ReadFfmpegStream(this.properties);
189
+ }
190
+ clone(overrides) {
191
+ return new _ReadFfmpegNode({ ...this.properties, previousProperties: this.properties, ...overrides });
192
+ }
193
+ };
194
+ _ReadFfmpegNode.moduleName = "ReadFfmpeg";
195
+ _ReadFfmpegNode.moduleDescription = "Read audio from a file using FFmpeg";
196
+ _ReadFfmpegNode.schema = ffmpegSchema;
197
+ var ReadFfmpegNode = _ReadFfmpegNode;
198
+ function readFfmpeg(path, options) {
199
+ return new ReadFfmpegNode({ path, channels: options.channels, ffmpegPath: options.ffmpegPath, ffprobePath: options.ffprobePath });
200
+ }
201
+ var wavSchema = z.object({
202
+ path: z.string().default("").meta({ input: "file", mode: "open" })
203
+ });
204
+ var DEFAULT_CHUNK_SIZE2 = 44100;
205
+ function readSample(data, offset, bitsPerSample, audioFormat) {
206
+ if (audioFormat === 3) {
207
+ if (bitsPerSample === 32) return data.readFloatLE(offset);
208
+ if (bitsPerSample === 64) return data.readDoubleLE(offset);
209
+ }
210
+ if (bitsPerSample === 16) return data.readInt16LE(offset) / 32768;
211
+ if (bitsPerSample === 24) {
212
+ const byte0 = data[offset] ?? 0;
213
+ const byte1 = data[offset + 1] ?? 0;
214
+ const byte2 = data[offset + 2] ?? 0;
215
+ const raw = byte0 | byte1 << 8 | byte2 << 16;
216
+ return (raw > 8388607 ? raw - 16777216 : raw) / 8388608;
217
+ }
218
+ if (bitsPerSample === 32) return data.readInt32LE(offset) / 2147483648;
219
+ if (bitsPerSample === 8) return ((data[offset] ?? 128) - 128) / 128;
220
+ return 0;
221
+ }
222
+ async function parseWavFormat(fh, path) {
223
+ const fileInfo = await stat(path);
224
+ const header = Buffer.alloc(12);
225
+ await fh.read(header, 0, 12, 0);
226
+ const magic = header.toString("ascii", 0, 4);
227
+ const wave = header.toString("ascii", 8, 12);
228
+ if (magic !== "RIFF" && magic !== "RF64" || wave !== "WAVE") {
229
+ throw new Error(`Not a WAV file: "${path}"`);
230
+ }
231
+ const isRf64 = magic === "RF64";
232
+ let ds64DataSize;
233
+ let offset = 12;
234
+ const fileSize = fileInfo.size;
235
+ let format;
236
+ const chunkHeader = Buffer.alloc(8);
237
+ while (offset < fileSize) {
238
+ await fh.read(chunkHeader, 0, 8, offset);
239
+ const chunkId = chunkHeader.toString("ascii", 0, 4);
240
+ const chunkSize = chunkHeader.readUInt32LE(4);
241
+ if (chunkId === "ds64") {
242
+ const ds64Data = Buffer.alloc(Math.min(chunkSize, 28));
243
+ await fh.read(ds64Data, 0, ds64Data.length, offset + 8);
244
+ ds64DataSize = Number(ds64Data.readBigUInt64LE(8));
245
+ } else if (chunkId === "JUNK") ; else if (chunkId === "fmt ") {
246
+ if (chunkSize < 16) throw new Error("WAV fmt chunk too small");
247
+ const fmtData = Buffer.alloc(chunkSize);
248
+ await fh.read(fmtData, 0, chunkSize, offset + 8);
249
+ const audioFormat = fmtData.readUInt16LE(0);
250
+ const channels = fmtData.readUInt16LE(2);
251
+ const sampleRate = fmtData.readUInt32LE(4);
252
+ const blockAlign = fmtData.readUInt16LE(12);
253
+ const bitsPerSample = fmtData.readUInt16LE(14);
254
+ format = { sampleRate, channels, bitsPerSample, audioFormat, blockAlign, dataOffset: 0, dataSize: 0 };
255
+ } else if (chunkId === "data") {
256
+ if (!format) throw new Error("WAV file has data chunk before fmt chunk");
257
+ const dataSize = isRf64 && ds64DataSize !== void 0 ? ds64DataSize : chunkSize;
258
+ format = { ...format, dataOffset: offset + 8, dataSize };
259
+ break;
260
+ }
261
+ offset += 8 + chunkSize;
262
+ if (chunkSize % 2 !== 0) offset++;
263
+ }
264
+ if (!format || format.dataOffset === 0) {
265
+ throw new Error(`Invalid WAV file: "${path}"`);
266
+ }
267
+ return format;
268
+ }
269
+ var ReadWavStream = class extends BufferedSourceStream {
270
+ constructor() {
271
+ super(...arguments);
272
+ this.bytesRead = 0;
273
+ this.sourceSampleRate = 0;
274
+ this.sourceBitDepth = 0;
275
+ }
276
+ async getMetadata() {
277
+ const fh = await open(this.properties.path, "r");
278
+ try {
279
+ const format = await parseWavFormat(fh, this.properties.path);
280
+ const selectedChannels = this.properties.channels;
281
+ const outputChannels = selectedChannels ? selectedChannels.length : format.channels;
282
+ const totalFrames = Math.floor(format.dataSize / format.blockAlign);
283
+ return {
284
+ sampleRate: format.sampleRate,
285
+ channels: outputChannels,
286
+ durationFrames: totalFrames
287
+ };
288
+ } finally {
289
+ await fh.close();
290
+ }
291
+ }
292
+ async ensureInitialized() {
293
+ if (this.format) return;
294
+ this.fileHandle = await open(this.properties.path, "r");
295
+ const format = await parseWavFormat(this.fileHandle, this.properties.path);
296
+ this.format = format;
297
+ this.bytesRead = 0;
298
+ this.sourceSampleRate = format.sampleRate;
299
+ this.sourceBitDepth = format.bitsPerSample;
300
+ }
301
+ async _read() {
302
+ await this.ensureInitialized();
303
+ const fh = this.fileHandle;
304
+ const format = this.format;
305
+ if (!fh || !format) {
306
+ return void 0;
307
+ }
308
+ const remaining = format.dataSize - this.bytesRead;
309
+ if (remaining <= 0) {
310
+ return void 0;
311
+ }
312
+ const framesWanted = DEFAULT_CHUNK_SIZE2;
313
+ const bytesWanted = Math.min(framesWanted * format.blockAlign, remaining);
314
+ const chunk = Buffer.alloc(bytesWanted);
315
+ const { bytesRead } = await fh.read(chunk, 0, bytesWanted, format.dataOffset + this.bytesRead);
316
+ if (bytesRead === 0) {
317
+ return void 0;
318
+ }
319
+ const frames = Math.floor(bytesRead / format.blockAlign);
320
+ this.bytesRead += frames * format.blockAlign;
321
+ const fileChannels = format.channels;
322
+ const selectedChannels = this.properties.channels;
323
+ const allChannels = [];
324
+ for (let ch = 0; ch < fileChannels; ch++) {
325
+ allChannels.push(new Float32Array(frames));
326
+ }
327
+ for (let frame = 0; frame < frames; frame++) {
328
+ for (let ch = 0; ch < fileChannels; ch++) {
329
+ const byteOffset = frame * format.blockAlign + ch * (format.bitsPerSample / 8);
330
+ const channel = allChannels[ch];
331
+ if (channel) {
332
+ channel[frame] = readSample(chunk, byteOffset, format.bitsPerSample, format.audioFormat);
333
+ }
334
+ }
335
+ }
336
+ let samples;
337
+ if (selectedChannels) {
338
+ samples = selectedChannels.map((srcCh) => allChannels[srcCh] ?? new Float32Array(frames));
339
+ } else {
340
+ samples = allChannels;
341
+ }
342
+ const frameOffset = Math.floor((this.bytesRead - frames * format.blockAlign) / format.blockAlign);
343
+ return {
344
+ samples,
345
+ offset: frameOffset,
346
+ sampleRate: this.sourceSampleRate,
347
+ bitDepth: this.sourceBitDepth
348
+ };
349
+ }
350
+ async _flush() {
351
+ if (this.fileHandle) {
352
+ await this.fileHandle.close();
353
+ this.fileHandle = void 0;
354
+ }
355
+ }
356
+ _teardown() {
357
+ if (this.fileHandle) {
358
+ this.fileHandle.close().catch(() => void 0);
359
+ this.fileHandle = void 0;
360
+ }
361
+ }
362
+ };
363
+ var _ReadWavNode = class _ReadWavNode extends SourceNode {
364
+ constructor() {
365
+ super(...arguments);
366
+ this.type = ["buffered-audio-node", "source", "read-wav"];
367
+ }
368
+ createStream() {
369
+ return new ReadWavStream(this.properties);
370
+ }
371
+ clone(overrides) {
372
+ return new _ReadWavNode({ ...this.properties, previousProperties: this.properties, ...overrides });
373
+ }
374
+ };
375
+ _ReadWavNode.moduleName = "ReadWav";
376
+ _ReadWavNode.moduleDescription = "Read audio from a WAV file";
377
+ _ReadWavNode.schema = wavSchema;
378
+ var ReadWavNode = _ReadWavNode;
379
+ function readWav(path, options) {
380
+ return new ReadWavNode({ path, channels: options?.channels });
381
+ }
382
+
383
+ // src/sources/read/index.ts
384
+ var schema = z.object({
385
+ path: z.string().default("").meta({ input: "file", mode: "open" }),
386
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
387
+ ffprobePath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffprobe", download: "https://ffmpeg.org/download.html" }).describe("FFprobe \u2014 media file analyzer (included with FFmpeg)")
388
+ });
389
+ var _ReadNode = class _ReadNode extends SourceNode {
390
+ constructor() {
391
+ super(...arguments);
392
+ this.type = ["buffered-audio-node", "source", "read"];
393
+ }
394
+ createStream() {
395
+ const ext = extname(this.properties.path).toLowerCase();
396
+ if (ext === ".wav") {
397
+ return new ReadWavStream(this.properties);
398
+ }
399
+ if (!this.properties.ffmpegPath || !this.properties.ffprobePath) {
400
+ throw new Error(`Non-WAV file requires ffmpegPath and ffprobePath: "${this.properties.path}"`);
401
+ }
402
+ return new ReadFfmpegStream(this.properties);
403
+ }
404
+ clone(overrides) {
405
+ return new _ReadNode({ ...this.properties, previousProperties: this.properties, ...overrides });
406
+ }
407
+ };
408
+ _ReadNode.moduleName = "Read";
409
+ _ReadNode.moduleDescription = "Read audio from a file";
410
+ _ReadNode.schema = schema;
411
+ var ReadNode = _ReadNode;
412
+ function read(path, options) {
413
+ return new ReadNode({ path, channels: options?.channels, ffmpegPath: options?.ffmpegPath ?? "", ffprobePath: options?.ffprobePath ?? "" });
414
+ }
415
+ function flattenBuffers(chunks, totalFrames) {
416
+ const result = new Float32Array(totalFrames);
417
+ let offset = 0;
418
+ for (const chunk of chunks) {
419
+ result.set(chunk, offset);
420
+ offset += chunk.length;
421
+ }
422
+ return result;
423
+ }
424
+ function applyKWeighting(channelBuffers, channels, frames, sampleRate) {
425
+ const result = [];
426
+ for (let ch = 0; ch < channels; ch++) {
427
+ const buffers = channelBuffers[ch];
428
+ if (!buffers) continue;
429
+ const channelData = flattenBuffers(buffers, frames);
430
+ const filtered = applyPreFilter(channelData, sampleRate);
431
+ const rlbFiltered = applyRlbFilter(filtered, sampleRate);
432
+ result.push(rlbFiltered);
433
+ }
434
+ return result;
435
+ }
436
+ function applyPreFilter(samples, sampleRate) {
437
+ const { fb, fa } = preFilterCoefficients(sampleRate);
438
+ return biquadFilter(samples, fb, fa);
439
+ }
440
+ function applyRlbFilter(samples, sampleRate) {
441
+ const { fb, fa } = rlbFilterCoefficients(sampleRate);
442
+ return biquadFilter(samples, fb, fa);
443
+ }
444
+ function computeBlockLoudness(kWeighted, channels, frames, blockSize, stepSize) {
445
+ const results = [];
446
+ for (let start = 0; start + blockSize <= frames; start += stepSize) {
447
+ let sumMeanSquare = 0;
448
+ for (let ch = 0; ch < channels; ch++) {
449
+ const channel = kWeighted[ch];
450
+ if (!channel) continue;
451
+ let sum = 0;
452
+ for (let index = start; index < start + blockSize; index++) {
453
+ const sample = channel[index] ?? 0;
454
+ sum += sample * sample;
455
+ }
456
+ sumMeanSquare += sum / blockSize;
457
+ }
458
+ const loudness2 = -0.691 + 10 * Math.log10(Math.max(sumMeanSquare, 1e-10));
459
+ results.push(loudness2);
460
+ }
461
+ return results;
462
+ }
463
+ function computeIntegratedLoudness(kWeighted, channels, frames, blockSize, stepSize) {
464
+ const blockLoudness = computeBlockLoudness(kWeighted, channels, frames, blockSize, stepSize);
465
+ if (blockLoudness.length === 0) return -Infinity;
466
+ const absoluteGated = blockLoudness.filter((value) => value > -70);
467
+ if (absoluteGated.length === 0) return -Infinity;
468
+ const absoluteMean = absoluteGated.reduce((sum, value) => sum + Math.pow(10, value / 10), 0) / absoluteGated.length;
469
+ const relativeThreshold = 10 * Math.log10(absoluteMean) - 10;
470
+ const relativeGated = absoluteGated.filter((value) => value > relativeThreshold);
471
+ if (relativeGated.length === 0) return -Infinity;
472
+ const relativeMean = relativeGated.reduce((sum, value) => sum + Math.pow(10, value / 10), 0) / relativeGated.length;
473
+ return 10 * Math.log10(relativeMean);
474
+ }
475
+ function computeLra(shortTermLoudness) {
476
+ const absoluteGated = shortTermLoudness.filter((value) => value > -70);
477
+ if (absoluteGated.length < 2) return 0;
478
+ const absoluteMean = absoluteGated.reduce((sum, value) => sum + Math.pow(10, value / 10), 0) / absoluteGated.length;
479
+ const relativeThreshold = 10 * Math.log10(absoluteMean) - 20;
480
+ const relativeGated = absoluteGated.filter((value) => value > relativeThreshold);
481
+ if (relativeGated.length < 2) return 0;
482
+ relativeGated.sort((lhs, rhs) => lhs - rhs);
483
+ const p10Index = Math.floor(relativeGated.length * 0.1);
484
+ const p95Index = Math.min(Math.ceil(relativeGated.length * 0.95) - 1, relativeGated.length - 1);
485
+ return (relativeGated[p95Index] ?? 0) - (relativeGated[p10Index] ?? 0);
486
+ }
487
+
488
+ // src/targets/loudness-stats/index.ts
489
+ var schema2 = z.object({});
490
+ var LoudnessStatsStream = class extends BufferedTargetStream {
491
+ constructor() {
492
+ super(...arguments);
493
+ this.channels = 0;
494
+ this.sampleRate = 0;
495
+ this.truePeakValue = 0;
496
+ this.channelBuffers = [];
497
+ this.totalFrames = 0;
498
+ this.statsInitialized = false;
499
+ }
500
+ get stats() {
501
+ return this._stats;
502
+ }
503
+ ensureInit(chunk) {
504
+ if (this.statsInitialized) return;
505
+ this.statsInitialized = true;
506
+ this.channels = chunk.samples.length;
507
+ this.sampleRate = chunk.sampleRate;
508
+ for (let ch = 0; ch < this.channels; ch++) {
509
+ this.channelBuffers.push([]);
510
+ }
511
+ }
512
+ // eslint-disable-next-line @typescript-eslint/require-await
513
+ async _write(chunk) {
514
+ this.ensureInit(chunk);
515
+ for (let ch = 0; ch < this.channels; ch++) {
516
+ const samples = chunk.samples[ch];
517
+ if (!samples) continue;
518
+ const channelBuffer = this.channelBuffers[ch];
519
+ if (channelBuffer) channelBuffer.push(new Float32Array(samples));
520
+ for (const sample of samples) {
521
+ const abs = Math.abs(sample);
522
+ if (abs > this.truePeakValue) {
523
+ this.truePeakValue = abs;
524
+ }
525
+ }
526
+ }
527
+ this.totalFrames += chunk.samples[0]?.length ?? 0;
528
+ }
529
+ // eslint-disable-next-line @typescript-eslint/require-await
530
+ async _close() {
531
+ const channels = this.channels;
532
+ const frames = this.totalFrames;
533
+ const sampleRate = this.sampleRate;
534
+ const kWeighted = applyKWeighting(this.channelBuffers, channels, frames, sampleRate);
535
+ const blockSize400ms = Math.round(sampleRate * 0.4);
536
+ const stepSize = Math.round(sampleRate * 0.1);
537
+ const blockSize3s = sampleRate * 3;
538
+ const momentary = computeBlockLoudness(kWeighted, channels, frames, blockSize400ms, stepSize);
539
+ const shortTerm = computeBlockLoudness(kWeighted, channels, frames, blockSize3s, stepSize);
540
+ const integrated = computeIntegratedLoudness(kWeighted, channels, frames, blockSize400ms, stepSize);
541
+ const truePeak = 20 * Math.log10(Math.max(this.truePeakValue, 1e-10));
542
+ const range = computeLra(shortTerm);
543
+ this._stats = { integrated, shortTerm, momentary, truePeak, range };
544
+ this.channelBuffers = [];
545
+ }
546
+ };
547
+ var _LoudnessStatsNode = class _LoudnessStatsNode extends TargetNode {
548
+ constructor(properties) {
549
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
550
+ this.type = ["buffered-audio-node", "target", "loudness-stats"];
551
+ }
552
+ static is(value) {
553
+ return TargetNode.is(value) && value.type[2] === "loudness-stats";
554
+ }
555
+ get stats() {
556
+ const last = this.streams[this.streams.length - 1];
557
+ return last instanceof LoudnessStatsStream ? last.stats : this.cachedStats;
558
+ }
559
+ _teardown() {
560
+ const last = this.streams[this.streams.length - 1];
561
+ if (last instanceof LoudnessStatsStream && last.stats) {
562
+ this.cachedStats = last.stats;
563
+ }
564
+ }
565
+ createStream() {
566
+ return new LoudnessStatsStream(this.properties);
567
+ }
568
+ clone(overrides) {
569
+ return new _LoudnessStatsNode({ ...this.properties, previousProperties: this.properties, ...overrides });
570
+ }
571
+ };
572
+ _LoudnessStatsNode.moduleName = "Loudness Stats";
573
+ _LoudnessStatsNode.moduleDescription = "Measure integrated loudness, true peak, loudness range, and short-term/momentary loudness per EBU R128";
574
+ _LoudnessStatsNode.schema = schema2;
575
+ var LoudnessStatsNode = _LoudnessStatsNode;
576
+ function loudnessStats(options) {
577
+ return new LoudnessStatsNode({
578
+ id: options?.id
579
+ });
580
+ }
581
+ function computeFrameMagnitudes(re, im, reOffset, linearBins, magScale, outputBins, bandMappings, magnitudeBuffer) {
582
+ const result = new Float32Array(outputBins);
583
+ if (bandMappings) {
584
+ for (let bin = 0; bin < linearBins; bin++) {
585
+ const real = re[reOffset + bin];
586
+ const imag = im[reOffset + bin];
587
+ magnitudeBuffer[bin] = Math.sqrt(real * real + imag * imag) * magScale;
588
+ }
589
+ for (let band = 0; band < outputBins; band++) {
590
+ const mapping = bandMappings[band];
591
+ if (!mapping) continue;
592
+ let sum = 0;
593
+ let weightSum = 0;
594
+ for (let bin = mapping.binStart; bin <= mapping.binEnd; bin++) {
595
+ let weight = 1;
596
+ if (bin === mapping.binStart) weight = mapping.weightStart;
597
+ else if (bin === mapping.binEnd) weight = mapping.weightEnd;
598
+ sum += magnitudeBuffer[bin] * weight;
599
+ weightSum += weight;
600
+ }
601
+ result[band] = weightSum > 0 ? sum / weightSum : 0;
602
+ }
603
+ } else {
604
+ for (let bin = 0; bin < outputBins; bin++) {
605
+ const real = re[reOffset + bin];
606
+ const imag = im[reOffset + bin];
607
+ result[bin] = Math.sqrt(real * real + imag * imag) * magScale;
608
+ }
609
+ }
610
+ return result;
611
+ }
612
+ function computeSpectrogramFrames(samples, batchFrames, fftSize, hopSize, linearBins, magScale, outputBins, windowCoefficients, workspace, addon, bandMappings, magnitudeBuffer) {
613
+ const frames = [];
614
+ if (addon) {
615
+ const batchInput = new Float32Array(fftSize * batchFrames);
616
+ for (let fi = 0; fi < batchFrames; fi++) {
617
+ const offset = fi * hopSize;
618
+ const destOffset = fi * fftSize;
619
+ for (let si = 0; si < fftSize; si++) {
620
+ batchInput[destOffset + si] = samples[offset + si] * windowCoefficients[si];
621
+ }
622
+ }
623
+ const { re: batchRe, im: batchIm } = addon.batchFft(batchInput, fftSize, batchFrames);
624
+ for (let fi = 0; fi < batchFrames; fi++) {
625
+ frames.push(computeFrameMagnitudes(batchRe, batchIm, fi * linearBins, linearBins, magScale, outputBins, bandMappings, magnitudeBuffer));
626
+ }
627
+ } else {
628
+ const windowed = new Float32Array(fftSize);
629
+ for (let fi = 0; fi < batchFrames; fi++) {
630
+ const offset = fi * hopSize;
631
+ for (let si = 0; si < fftSize; si++) {
632
+ windowed[si] = samples[offset + si] * windowCoefficients[si];
633
+ }
634
+ const { re, im } = fft(windowed, workspace);
635
+ frames.push(computeFrameMagnitudes(re, im, 0, linearBins, magScale, outputBins, bandMappings, magnitudeBuffer));
636
+ }
637
+ }
638
+ return frames;
639
+ }
640
+
641
+ // src/targets/spectrogram/utils/frequency.ts
642
+ var FREQUENCY_SCALE_BYTE = { linear: 0, log: 1, mel: 2, erb: 3 };
643
+ function freqToMel(freq) {
644
+ return 2595 * Math.log10(1 + freq / 700);
645
+ }
646
+ function melToFreq(mel) {
647
+ return 700 * (Math.pow(10, mel / 2595) - 1);
648
+ }
649
+ function freqToErb(freq) {
650
+ return 21.4 * Math.log10(1 + 437e-5 * freq);
651
+ }
652
+ function erbToFreq(erb) {
653
+ return (Math.pow(10, erb / 21.4) - 1) / 437e-5;
654
+ }
655
+ function computeScaledBandMappings(numBands, minFreq, maxFreq, sampleRate, fftSize, toScale, fromScale) {
656
+ const scaleMin = toScale(minFreq);
657
+ const scaleMax = toScale(maxFreq);
658
+ const scaleStep = (scaleMax - scaleMin) / numBands;
659
+ const binWidth = sampleRate / fftSize;
660
+ const numLinearBins = fftSize / 2 + 1;
661
+ const mappings = [];
662
+ for (let band = 0; band < numBands; band++) {
663
+ const freqLow = fromScale(scaleMin + band * scaleStep);
664
+ const freqHigh = fromScale(scaleMin + (band + 1) * scaleStep);
665
+ const exactBinLow = freqLow / binWidth;
666
+ const exactBinHigh = freqHigh / binWidth;
667
+ const binStart = Math.max(0, Math.floor(exactBinLow));
668
+ const binEnd = Math.min(numLinearBins - 1, Math.ceil(exactBinHigh));
669
+ const weightStart = 1 - (exactBinLow - binStart);
670
+ const weightEnd = 1 - (binEnd - exactBinHigh);
671
+ mappings.push({
672
+ binStart,
673
+ binEnd: Math.max(binStart, binEnd),
674
+ weightStart: Math.max(0, Math.min(1, weightStart)),
675
+ weightEnd: Math.max(0, Math.min(1, weightEnd))
676
+ });
677
+ }
678
+ return mappings;
679
+ }
680
+ function computeMelBandMappings(numBands, minFreq, maxFreq, sampleRate, fftSize) {
681
+ return computeScaledBandMappings(numBands, minFreq, maxFreq, sampleRate, fftSize, freqToMel, melToFreq);
682
+ }
683
+ function computeErbBandMappings(numBands, minFreq, maxFreq, sampleRate, fftSize) {
684
+ return computeScaledBandMappings(numBands, minFreq, maxFreq, sampleRate, fftSize, freqToErb, erbToFreq);
685
+ }
686
+ function computeLogBandMappings(numBands, minFreq, maxFreq, sampleRate, fftSize) {
687
+ const logMin = Math.log(minFreq);
688
+ const logMax = Math.log(maxFreq);
689
+ const logStep = (logMax - logMin) / numBands;
690
+ const binWidth = sampleRate / fftSize;
691
+ const numLinearBins = fftSize / 2 + 1;
692
+ const mappings = [];
693
+ for (let band = 0; band < numBands; band++) {
694
+ const freqLow = Math.exp(logMin + band * logStep);
695
+ const freqHigh = Math.exp(logMin + (band + 1) * logStep);
696
+ const exactBinLow = freqLow / binWidth;
697
+ const exactBinHigh = freqHigh / binWidth;
698
+ const binStart = Math.max(0, Math.floor(exactBinLow));
699
+ const binEnd = Math.min(numLinearBins - 1, Math.ceil(exactBinHigh));
700
+ const weightStart = 1 - (exactBinLow - binStart);
701
+ const weightEnd = 1 - (binEnd - exactBinHigh);
702
+ mappings.push({
703
+ binStart,
704
+ binEnd: Math.max(binStart, binEnd),
705
+ weightStart: Math.max(0, Math.min(1, weightStart)),
706
+ weightEnd: Math.max(0, Math.min(1, weightEnd))
707
+ });
708
+ }
709
+ return mappings;
710
+ }
711
+
712
+ // src/targets/spectrogram/index.ts
713
+ var schema3 = z.object({
714
+ outputPath: z.string().default("").meta({ input: "file", mode: "save" }).describe("Output Path"),
715
+ fftSize: z.number().min(256).max(8192).multipleOf(256).default(2048).describe("FFT Size"),
716
+ hopSize: z.number().min(64).max(8192).multipleOf(64).default(512).describe("Hop Size"),
717
+ fftwAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "fftw-addon" }).describe("FFTW Addon")
718
+ });
719
+ var HEADER_SIZE = 33;
720
+ var SpectrogramStream = class extends BufferedTargetStream {
721
+ constructor() {
722
+ super(...arguments);
723
+ this.channels = 0;
724
+ this.linearBins = 0;
725
+ this.outputBins = 0;
726
+ this.numFrames = 0;
727
+ this.fileOffset = HEADER_SIZE;
728
+ this.windowCoefficients = new Float32Array(0);
729
+ this.addon = null;
730
+ this.magnitudes = new Float32Array(0);
731
+ this.sampleBuffers = [];
732
+ this.sampleBufferOffset = 0;
733
+ this.sampleBufferCapacity = 0;
734
+ this.writeBufferOffset = 0;
735
+ this.writeBufferFileOffset = HEADER_SIZE;
736
+ this.WRITE_BATCH_FRAMES = 1e3;
737
+ this.initialized = false;
738
+ }
739
+ async _setup(input, context) {
740
+ this.linearBins = this.properties.fftSize / 2 + 1;
741
+ this.windowCoefficients = hanningWindow(this.properties.fftSize);
742
+ this.workspace = createFftWorkspace(this.properties.fftSize);
743
+ const fftAddonOptions = { fftwPath: this.properties.fftwAddonPath || void 0 };
744
+ const fftBackend = detectFftBackend(context.executionProviders, fftAddonOptions);
745
+ this.addon = getFftAddon(fftBackend, fftAddonOptions);
746
+ this.magnitudes = new Float32Array(this.linearBins);
747
+ this.numFrames = 0;
748
+ this.fileOffset = HEADER_SIZE;
749
+ this.sampleBufferOffset = 0;
750
+ this.sampleBufferCapacity = this.properties.fftSize + 8 * 1024 * 1024 / 4;
751
+ this.fileHandle = await open(this.properties.outputPath, "w");
752
+ return super._setup(input, context);
753
+ }
754
+ async initialize(chunk) {
755
+ if (this.initialized) return;
756
+ this.initialized = true;
757
+ this.channels = chunk.samples.length;
758
+ const scale = this.properties.frequencyScale ?? "log";
759
+ const numBands = this.properties.numBands ?? 512;
760
+ const minFreq = this.properties.minFrequency ?? 20;
761
+ const maxFreq = this.properties.maxFrequency ?? chunk.sampleRate / 2;
762
+ if (scale === "linear") {
763
+ this.bandMappings = void 0;
764
+ this.outputBins = this.linearBins;
765
+ } else {
766
+ const computeFn = scale === "mel" ? computeMelBandMappings : scale === "erb" ? computeErbBandMappings : computeLogBandMappings;
767
+ this.bandMappings = computeFn(numBands, minFreq, maxFreq, chunk.sampleRate, this.properties.fftSize);
768
+ this.outputBins = numBands;
769
+ }
770
+ this.sampleBuffers = [];
771
+ for (let ch = 0; ch < this.channels; ch++) {
772
+ this.sampleBuffers.push(new Float32Array(this.sampleBufferCapacity));
773
+ }
774
+ if (!this.fileHandle) return;
775
+ const header = Buffer.alloc(HEADER_SIZE);
776
+ header.writeUInt32LE(chunk.sampleRate, 0);
777
+ header.writeUInt32LE(this.channels, 4);
778
+ header.writeUInt32LE(this.properties.fftSize, 8);
779
+ header.writeUInt32LE(this.properties.hopSize, 12);
780
+ header.writeUInt32LE(0, 16);
781
+ header.writeUInt32LE(this.outputBins, 20);
782
+ header.writeUInt8(FREQUENCY_SCALE_BYTE[scale], 24);
783
+ header.writeFloatLE(minFreq, 25);
784
+ header.writeFloatLE(maxFreq, 29);
785
+ await this.fileHandle.write(header, 0, HEADER_SIZE, 0);
786
+ }
787
+ async _write(chunk) {
788
+ await this.initialize(chunk);
789
+ const frames = chunk.samples[0]?.length ?? 0;
790
+ if (this.sampleBufferOffset + frames > this.sampleBufferCapacity) {
791
+ const newCapacity = Math.max(this.sampleBufferCapacity * 2, this.sampleBufferOffset + frames);
792
+ for (let ch = 0; ch < this.channels; ch++) {
793
+ const newBuf = new Float32Array(newCapacity);
794
+ newBuf.set(this.sampleBuffers[ch].subarray(0, this.sampleBufferOffset));
795
+ this.sampleBuffers[ch] = newBuf;
796
+ }
797
+ this.sampleBufferCapacity = newCapacity;
798
+ }
799
+ for (let ch = 0; ch < this.channels; ch++) {
800
+ const src = chunk.samples[ch];
801
+ if (!src) continue;
802
+ this.sampleBuffers[ch].set(src, this.sampleBufferOffset);
803
+ }
804
+ this.sampleBufferOffset += frames;
805
+ await this.processAccumulatedSamples(false);
806
+ }
807
+ async _close() {
808
+ await this.processAccumulatedSamples(true);
809
+ if (this.writeBuffer && this.writeBufferOffset > 0) {
810
+ await this.fileHandle.write(this.writeBuffer, 0, this.writeBufferOffset, this.writeBufferFileOffset);
811
+ this.writeBufferOffset = 0;
812
+ }
813
+ this.writeBuffer = void 0;
814
+ const header = Buffer.alloc(4);
815
+ header.writeUInt32LE(this.numFrames, 0);
816
+ await this.fileHandle.write(header, 0, 4, 16);
817
+ await this.fileHandle.close();
818
+ this.fileHandle = void 0;
819
+ }
820
+ async processAccumulatedSamples(flush) {
821
+ const { fftSize, hopSize } = this.properties;
822
+ const { addon } = this;
823
+ const halfSize = this.linearBins;
824
+ const magScale = 2 / fftSize;
825
+ if (flush && this.sampleBufferOffset > 0 && this.sampleBufferOffset < fftSize) {
826
+ for (let ch = 0; ch < this.channels; ch++) {
827
+ const buf = this.sampleBuffers[ch];
828
+ buf.fill(0, this.sampleBufferOffset, fftSize);
829
+ }
830
+ this.sampleBufferOffset = fftSize;
831
+ }
832
+ if (this.sampleBufferOffset < fftSize) return;
833
+ const batchFrames = Math.floor((this.sampleBufferOffset - fftSize) / hopSize) + 1;
834
+ if (batchFrames === 0) return;
835
+ const frameByteSize = this.outputBins * this.channels * 4;
836
+ const batchBytes = this.WRITE_BATCH_FRAMES * frameByteSize;
837
+ if (!this.writeBuffer) {
838
+ this.writeBuffer = Buffer.alloc(batchBytes);
839
+ this.writeBufferOffset = 0;
840
+ this.writeBufferFileOffset = this.fileOffset;
841
+ }
842
+ for (let ch = 0; ch < this.channels; ch++) {
843
+ const frames = computeSpectrogramFrames(
844
+ this.sampleBuffers[ch],
845
+ batchFrames,
846
+ fftSize,
847
+ hopSize,
848
+ halfSize,
849
+ magScale,
850
+ this.outputBins,
851
+ this.windowCoefficients,
852
+ this.workspace,
853
+ addon,
854
+ this.bandMappings,
855
+ this.magnitudes
856
+ );
857
+ for (const frame of frames) {
858
+ await this.writeFrame(ch, frame);
859
+ }
860
+ }
861
+ await this.flushWriteBuffer();
862
+ const keepFrom = batchFrames * hopSize;
863
+ const keepCount = this.sampleBufferOffset - keepFrom;
864
+ if (keepCount > 0) {
865
+ for (let ch = 0; ch < this.channels; ch++) {
866
+ const buf = this.sampleBuffers[ch];
867
+ buf.copyWithin(0, keepFrom, keepFrom + keepCount);
868
+ }
869
+ }
870
+ this.sampleBufferOffset = keepCount > 0 ? keepCount : 0;
871
+ }
872
+ async writeFrame(ch, frame) {
873
+ const frameByteSize = this.outputBins * this.channels * 4;
874
+ if (this.writeBuffer && this.writeBufferOffset + frameByteSize > this.writeBuffer.length) {
875
+ await this.flushWriteBuffer();
876
+ }
877
+ const buf = this.writeBuffer;
878
+ if (!buf) return;
879
+ const offset = this.writeBufferOffset;
880
+ for (let bin = 0; bin < this.outputBins; bin++) {
881
+ buf.writeFloatLE(frame[bin], offset + (ch * this.outputBins + bin) * 4);
882
+ }
883
+ this.writeBufferOffset += frameByteSize;
884
+ this.fileOffset += frameByteSize;
885
+ this.numFrames++;
886
+ }
887
+ async flushWriteBuffer() {
888
+ if (!this.writeBuffer || this.writeBufferOffset === 0) return;
889
+ await this.fileHandle.write(this.writeBuffer, 0, this.writeBufferOffset, this.writeBufferFileOffset);
890
+ this.writeBufferFileOffset += this.writeBufferOffset;
891
+ this.writeBufferOffset = 0;
892
+ }
893
+ };
894
+ var _SpectrogramNode = class _SpectrogramNode extends TargetNode {
895
+ constructor(properties) {
896
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
897
+ this.type = ["buffered-audio-node", "target", "spectrogram"];
898
+ }
899
+ static is(value) {
900
+ return TargetNode.is(value) && value.type[2] === "spectrogram";
901
+ }
902
+ createStream() {
903
+ return new SpectrogramStream(this.properties);
904
+ }
905
+ clone(overrides) {
906
+ return new _SpectrogramNode({ ...this.properties, previousProperties: this.properties, ...overrides });
907
+ }
908
+ };
909
+ _SpectrogramNode.moduleName = "Spectrogram";
910
+ _SpectrogramNode.moduleDescription = "Generate spectrogram visualization data";
911
+ _SpectrogramNode.schema = schema3;
912
+ var SpectrogramNode = _SpectrogramNode;
913
+ function spectrogram(outputPath, options) {
914
+ return new SpectrogramNode({
915
+ outputPath,
916
+ fftSize: options?.fftSize ?? 2048,
917
+ hopSize: options?.hopSize ?? 512,
918
+ frequencyScale: options?.frequencyScale,
919
+ numBands: options?.numBands,
920
+ minFrequency: options?.minFrequency,
921
+ maxFrequency: options?.maxFrequency,
922
+ fftwAddonPath: options?.fftwAddonPath ?? ""
923
+ });
924
+ }
925
+
926
+ // src/targets/waveform/utils/minmax.ts
927
+ function updateMinMax(samples, frame, channels, min, max) {
928
+ for (let ch = 0; ch < channels; ch++) {
929
+ const sample = samples[ch]?.[frame] ?? 0;
930
+ const currentMin = min[ch];
931
+ const currentMax = max[ch];
932
+ if (currentMin !== void 0 && sample < currentMin) min[ch] = sample;
933
+ if (currentMax !== void 0 && sample > currentMax) max[ch] = sample;
934
+ }
935
+ }
936
+ function writeMinMaxPoint(min, max, channels, target, offset) {
937
+ for (let ch = 0; ch < channels; ch++) {
938
+ target.writeFloatLE(min[ch] ?? 0, offset + ch * 8);
939
+ target.writeFloatLE(max[ch] ?? 0, offset + ch * 8 + 4);
940
+ }
941
+ }
942
+
943
+ // src/targets/waveform/index.ts
944
+ var schema4 = z.object({
945
+ outputPath: z.string().default("").meta({ input: "file", mode: "save" }).describe("Output Path"),
946
+ resolution: z.number().min(100).max(1e4).multipleOf(100).default(1e3).describe("Resolution")
947
+ });
948
+ var HEADER_SIZE2 = 16;
949
+ var WaveformStream = class extends BufferedTargetStream {
950
+ constructor() {
951
+ super(...arguments);
952
+ this.channels = 0;
953
+ this.samplesPerPoint = 1;
954
+ this.totalPoints = 0;
955
+ this.fileOffset = HEADER_SIZE2;
956
+ this.samplesInCurrentWindow = 0;
957
+ this.currentMin = new Float32Array(0);
958
+ this.currentMax = new Float32Array(0);
959
+ this.writeBuffer = Buffer.alloc(0);
960
+ this.writeBufferOffset = 0;
961
+ this.writeBufferFileOffset = HEADER_SIZE2;
962
+ this.WRITE_BATCH_POINTS = 1e3;
963
+ this.initialized = false;
964
+ }
965
+ async _setup(input, context) {
966
+ this.writeBufferOffset = 0;
967
+ this.writeBufferFileOffset = HEADER_SIZE2;
968
+ this.totalPoints = 0;
969
+ this.fileOffset = HEADER_SIZE2;
970
+ this.samplesInCurrentWindow = 0;
971
+ this.fileHandle = await open(this.properties.outputPath, "w");
972
+ return super._setup(input, context);
973
+ }
974
+ initialize(chunk) {
975
+ if (this.initialized) return;
976
+ this.initialized = true;
977
+ this.channels = chunk.samples.length;
978
+ this.samplesPerPoint = Math.max(1, Math.round(chunk.sampleRate / this.properties.resolution));
979
+ this.currentMin = new Float32Array(this.channels).fill(1);
980
+ this.currentMax = new Float32Array(this.channels).fill(-1);
981
+ const pointByteSize = this.channels * 8;
982
+ this.writeBuffer = Buffer.alloc(this.WRITE_BATCH_POINTS * pointByteSize);
983
+ }
984
+ async writeHeader(chunk) {
985
+ if (!this.fileHandle) return;
986
+ const header = Buffer.alloc(HEADER_SIZE2);
987
+ header.writeUInt32LE(chunk.sampleRate, 0);
988
+ header.writeUInt32LE(this.channels, 4);
989
+ header.writeUInt32LE(this.properties.resolution, 8);
990
+ header.writeUInt32LE(0, 12);
991
+ await this.fileHandle.write(header, 0, HEADER_SIZE2, 0);
992
+ }
993
+ async _write(chunk) {
994
+ if (!this.initialized) {
995
+ this.initialize(chunk);
996
+ await this.writeHeader(chunk);
997
+ }
998
+ const frames = chunk.samples[0]?.length ?? 0;
999
+ for (let frame = 0; frame < frames; frame++) {
1000
+ updateMinMax(chunk.samples, frame, this.channels, this.currentMin, this.currentMax);
1001
+ this.samplesInCurrentWindow++;
1002
+ if (this.samplesInCurrentWindow >= this.samplesPerPoint) {
1003
+ await this.flushPoint();
1004
+ }
1005
+ }
1006
+ }
1007
+ async _close() {
1008
+ if (this.samplesInCurrentWindow > 0) {
1009
+ await this.flushPoint();
1010
+ }
1011
+ const fh = this.fileHandle;
1012
+ if (!fh) return;
1013
+ if (this.writeBufferOffset > 0) {
1014
+ await fh.write(this.writeBuffer, 0, this.writeBufferOffset, this.writeBufferFileOffset);
1015
+ this.writeBufferOffset = 0;
1016
+ }
1017
+ const header = Buffer.alloc(4);
1018
+ header.writeUInt32LE(this.totalPoints, 0);
1019
+ await fh.write(header, 0, 4, 12);
1020
+ await fh.close();
1021
+ this.fileHandle = void 0;
1022
+ }
1023
+ async flushPoint() {
1024
+ const pointByteSize = this.channels * 8;
1025
+ if (this.writeBufferOffset + pointByteSize > this.writeBuffer.length) {
1026
+ await this.flushWriteBuffer();
1027
+ }
1028
+ writeMinMaxPoint(this.currentMin, this.currentMax, this.channels, this.writeBuffer, this.writeBufferOffset);
1029
+ this.writeBufferOffset += pointByteSize;
1030
+ this.fileOffset += pointByteSize;
1031
+ this.totalPoints++;
1032
+ this.samplesInCurrentWindow = 0;
1033
+ this.currentMin.fill(1);
1034
+ this.currentMax.fill(-1);
1035
+ }
1036
+ async flushWriteBuffer() {
1037
+ if (this.writeBufferOffset === 0 || !this.fileHandle) return;
1038
+ await this.fileHandle.write(this.writeBuffer, 0, this.writeBufferOffset, this.writeBufferFileOffset);
1039
+ this.writeBufferFileOffset += this.writeBufferOffset;
1040
+ this.writeBufferOffset = 0;
1041
+ }
1042
+ };
1043
+ var _WaveformNode = class _WaveformNode extends TargetNode {
1044
+ constructor(properties) {
1045
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
1046
+ this.type = ["buffered-audio-node", "target", "waveform"];
1047
+ }
1048
+ static is(value) {
1049
+ return TargetNode.is(value) && value.type[2] === "waveform";
1050
+ }
1051
+ createStream() {
1052
+ return new WaveformStream(this.properties);
1053
+ }
1054
+ clone(overrides) {
1055
+ return new _WaveformNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1056
+ }
1057
+ };
1058
+ _WaveformNode.moduleName = "Waveform";
1059
+ _WaveformNode.moduleDescription = "Generate waveform visualization data";
1060
+ _WaveformNode.schema = schema4;
1061
+ var WaveformNode = _WaveformNode;
1062
+ function waveform(outputPath, options) {
1063
+ return new WaveformNode({
1064
+ outputPath,
1065
+ resolution: options?.resolution ?? 1e3
1066
+ });
1067
+ }
1068
+
1069
+ // src/utils/ffmpeg.ts
1070
+ function waitForDrain(proc, stdin) {
1071
+ return new Promise((resolve, reject) => {
1072
+ const cleanup = () => {
1073
+ stdin.removeListener("drain", onDrain);
1074
+ proc.removeListener("error", onError);
1075
+ proc.removeListener("close", onClose);
1076
+ };
1077
+ const onDrain = () => {
1078
+ cleanup();
1079
+ resolve();
1080
+ };
1081
+ const onError = (error) => {
1082
+ cleanup();
1083
+ reject(error);
1084
+ };
1085
+ const onClose = (code) => {
1086
+ cleanup();
1087
+ reject(new Error(`ffmpeg exited with code ${code} while writing stdin`));
1088
+ };
1089
+ stdin.once("drain", onDrain);
1090
+ proc.once("error", onError);
1091
+ proc.once("close", onClose);
1092
+ });
1093
+ }
1094
+
1095
+ // src/targets/write/utils/wav.ts
1096
+ var UINT32_MAX = 4294967295;
1097
+ function getBytesPerSample(bitDepth) {
1098
+ switch (bitDepth) {
1099
+ case "16":
1100
+ return 2;
1101
+ case "24":
1102
+ return 3;
1103
+ case "32":
1104
+ case "32f":
1105
+ return 4;
1106
+ }
1107
+ }
1108
+ function writeSample(buffer, offset, sample, bitDepth) {
1109
+ switch (bitDepth) {
1110
+ case "16": {
1111
+ const clamped = Math.max(-1, Math.min(1, sample));
1112
+ const value = clamped < 0 ? clamped * 32768 : clamped * 32767;
1113
+ buffer.writeInt16LE(Math.round(value), offset);
1114
+ return offset + 2;
1115
+ }
1116
+ case "24": {
1117
+ const clamped = Math.max(-1, Math.min(1, sample));
1118
+ const value = Math.round(clamped < 0 ? clamped * 8388608 : clamped * 8388607);
1119
+ buffer[offset] = value & 255;
1120
+ buffer[offset + 1] = value >> 8 & 255;
1121
+ buffer[offset + 2] = value >> 16 & 255;
1122
+ return offset + 3;
1123
+ }
1124
+ case "32": {
1125
+ const clamped = Math.max(-1, Math.min(1, sample));
1126
+ const value = clamped < 0 ? clamped * 2147483648 : clamped * 2147483647;
1127
+ buffer.writeInt32LE(Math.round(value), offset);
1128
+ return offset + 4;
1129
+ }
1130
+ case "32f": {
1131
+ buffer.writeFloatLE(sample, offset);
1132
+ return offset + 4;
1133
+ }
1134
+ }
1135
+ }
1136
+ function writeFmtAndDataChunks(header, offset, sampleRate, channels, bitDepth, dataSize) {
1137
+ const bytesPerSample = getBytesPerSample(bitDepth);
1138
+ const blockAlign = channels * bytesPerSample;
1139
+ const byteRate = sampleRate * blockAlign;
1140
+ const bitsPerSample = bytesPerSample * 8;
1141
+ const audioFormat = bitDepth === "32f" ? 3 : 1;
1142
+ header.write("fmt ", offset);
1143
+ header.writeUInt32LE(16, offset + 4);
1144
+ header.writeUInt16LE(audioFormat, offset + 8);
1145
+ header.writeUInt16LE(channels, offset + 10);
1146
+ header.writeUInt32LE(sampleRate, offset + 12);
1147
+ header.writeUInt32LE(byteRate, offset + 16);
1148
+ header.writeUInt16LE(blockAlign, offset + 20);
1149
+ header.writeUInt16LE(bitsPerSample, offset + 22);
1150
+ header.write("data", offset + 24);
1151
+ header.writeUInt32LE(dataSize, offset + 28);
1152
+ }
1153
+ function buildWavHeader(dataSize, sampleRate, channels, bitDepth) {
1154
+ const WAV_HEADER_SIZE2 = 80;
1155
+ const header = Buffer.alloc(WAV_HEADER_SIZE2);
1156
+ header.write("RIFF", 0);
1157
+ header.writeUInt32LE(WAV_HEADER_SIZE2 - 8 + dataSize, 4);
1158
+ header.write("WAVE", 8);
1159
+ header.write("JUNK", 12);
1160
+ header.writeUInt32LE(28, 16);
1161
+ writeFmtAndDataChunks(header, 48, sampleRate, channels, bitDepth, dataSize);
1162
+ return header;
1163
+ }
1164
+ function buildRf64Header(dataSize, sampleRate, channels, bitDepth) {
1165
+ const WAV_HEADER_SIZE2 = 80;
1166
+ const header = Buffer.alloc(WAV_HEADER_SIZE2);
1167
+ const bytesPerSample = getBytesPerSample(bitDepth);
1168
+ const blockAlign = channels * bytesPerSample;
1169
+ const sampleCount = Math.floor(dataSize / blockAlign);
1170
+ header.write("RF64", 0);
1171
+ header.writeUInt32LE(UINT32_MAX, 4);
1172
+ header.write("WAVE", 8);
1173
+ header.write("ds64", 12);
1174
+ header.writeUInt32LE(28, 16);
1175
+ writeBigUInt64LE(header, 20, WAV_HEADER_SIZE2 - 8 + dataSize);
1176
+ writeBigUInt64LE(header, 28, dataSize);
1177
+ writeBigUInt64LE(header, 36, sampleCount);
1178
+ header.writeUInt32LE(0, 44);
1179
+ writeFmtAndDataChunks(header, 48, sampleRate, channels, bitDepth, UINT32_MAX);
1180
+ return header;
1181
+ }
1182
+ function writeBigUInt64LE(buffer, offset, value) {
1183
+ buffer.writeBigUInt64LE(BigInt(Math.floor(value)), offset);
1184
+ }
1185
+
1186
+ // src/targets/write/index.ts
1187
+ var schema5 = z.object({
1188
+ path: z.string().default("").meta({ input: "file", mode: "save" }),
1189
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
1190
+ bitDepth: z.enum(["16", "24", "32", "32f"]).default("16")
1191
+ });
1192
+ var WAV_HEADER_SIZE = 80;
1193
+ var UINT32_MAX2 = 4294967295;
1194
+ var WriteStream = class extends BufferedTargetStream {
1195
+ constructor() {
1196
+ super(...arguments);
1197
+ this.sampleRate = 0;
1198
+ this.channels = 0;
1199
+ this.bytesWritten = 0;
1200
+ this.useEncoding = false;
1201
+ this.headerWritten = false;
1202
+ this.initialized = false;
1203
+ }
1204
+ async _setup(input, context) {
1205
+ this.bytesWritten = 0;
1206
+ this.headerWritten = false;
1207
+ this.initialized = false;
1208
+ const encoding = this.properties.encoding;
1209
+ if (encoding && encoding.format !== "wav" && !this.properties.ffmpegPath) {
1210
+ throw new Error(`Encoding to ${encoding.format} requires ffmpegPath`);
1211
+ }
1212
+ this.useEncoding = encoding !== void 0 && encoding.format !== "wav" && !!this.properties.ffmpegPath;
1213
+ if (this.useEncoding && encoding) {
1214
+ const ffmpegPath = this.properties.ffmpegPath;
1215
+ if (!ffmpegPath) throw new Error("ffmpegPath is required for encoding");
1216
+ const args = this.buildFfmpegArgs(encoding);
1217
+ const proc = spawn(ffmpegPath, args, {
1218
+ stdio: ["pipe", "ignore", "pipe"]
1219
+ });
1220
+ this.ffmpegProcess = proc;
1221
+ this.ffmpegStdin = proc.stdin;
1222
+ this.ffmpegStdin.on("error", () => {
1223
+ });
1224
+ this.ffmpegDone = new Promise((resolve, reject) => {
1225
+ proc.on("close", (code) => {
1226
+ if (code !== 0) {
1227
+ reject(new Error(`ffmpeg exited with code ${code}`));
1228
+ } else {
1229
+ resolve();
1230
+ }
1231
+ });
1232
+ proc.on("error", (error) => {
1233
+ reject(new Error(`Failed to spawn ffmpeg: ${error.message}`));
1234
+ });
1235
+ });
1236
+ } else {
1237
+ this.fileHandle = await open(this.properties.path, "w");
1238
+ }
1239
+ return super._setup(input, context);
1240
+ }
1241
+ async initialize(chunk) {
1242
+ if (this.initialized) return;
1243
+ this.initialized = true;
1244
+ this.sampleRate = chunk.sampleRate;
1245
+ this.channels = chunk.samples.length;
1246
+ if (!this.useEncoding && this.fileHandle) {
1247
+ const header = buildWavHeader(0, this.sampleRate, this.channels, this.properties.bitDepth);
1248
+ await this.fileHandle.write(header, 0, WAV_HEADER_SIZE, 0);
1249
+ }
1250
+ }
1251
+ async _write(chunk) {
1252
+ await this.initialize(chunk);
1253
+ const bytes = this.convertChunk(chunk);
1254
+ if (this.useEncoding && this.ffmpegStdin) {
1255
+ if (!this.headerWritten) {
1256
+ const header = buildWavHeader(4294967295, this.sampleRate, this.channels, this.properties.bitDepth);
1257
+ await this.writeToStdin(header);
1258
+ this.headerWritten = true;
1259
+ }
1260
+ await this.writeToStdin(bytes);
1261
+ } else if (this.fileHandle) {
1262
+ await this.fileHandle.write(bytes, 0, bytes.length, WAV_HEADER_SIZE + this.bytesWritten);
1263
+ }
1264
+ this.bytesWritten += bytes.length;
1265
+ }
1266
+ async _close() {
1267
+ if (this.useEncoding) {
1268
+ if (this.ffmpegStdin) {
1269
+ this.ffmpegStdin.end();
1270
+ }
1271
+ if (this.ffmpegDone) {
1272
+ await this.ffmpegDone;
1273
+ }
1274
+ this.ffmpegProcess = void 0;
1275
+ this.ffmpegStdin = void 0;
1276
+ this.ffmpegDone = void 0;
1277
+ } else if (this.fileHandle) {
1278
+ const header = this.bytesWritten > UINT32_MAX2 ? buildRf64Header(this.bytesWritten, this.sampleRate, this.channels, this.properties.bitDepth) : buildWavHeader(this.bytesWritten, this.sampleRate, this.channels, this.properties.bitDepth);
1279
+ await this.fileHandle.write(header, 0, WAV_HEADER_SIZE, 0);
1280
+ await this.fileHandle.close();
1281
+ this.fileHandle = void 0;
1282
+ }
1283
+ }
1284
+ convertChunk(chunk) {
1285
+ const frames = chunk.samples[0]?.length ?? 0;
1286
+ const channels = chunk.samples.length;
1287
+ const bytesPerSample = getBytesPerSample(this.properties.bitDepth);
1288
+ const buffer = Buffer.alloc(frames * channels * bytesPerSample);
1289
+ let offset = 0;
1290
+ for (let frame = 0; frame < frames; frame++) {
1291
+ for (let ch = 0; ch < channels; ch++) {
1292
+ const sample = chunk.samples[ch]?.[frame] ?? 0;
1293
+ offset = writeSample(buffer, offset, sample, this.properties.bitDepth);
1294
+ }
1295
+ }
1296
+ return buffer;
1297
+ }
1298
+ buildFfmpegArgs(encoding) {
1299
+ const args = ["-f", "wav", "-i", "pipe:0"];
1300
+ switch (encoding.format) {
1301
+ case "flac":
1302
+ args.push("-codec:a", "flac");
1303
+ break;
1304
+ case "mp3":
1305
+ args.push("-codec:a", "libmp3lame");
1306
+ if (encoding.vbr !== void 0) {
1307
+ args.push("-q:a", String(encoding.vbr));
1308
+ } else {
1309
+ args.push("-b:a", encoding.bitrate ?? "192k");
1310
+ }
1311
+ break;
1312
+ case "aac":
1313
+ args.push("-codec:a", "aac", "-b:a", encoding.bitrate ?? "192k");
1314
+ break;
1315
+ }
1316
+ args.push("-y", this.properties.path);
1317
+ return args;
1318
+ }
1319
+ writeToStdin(data) {
1320
+ const stdin = this.ffmpegStdin;
1321
+ const proc = this.ffmpegProcess;
1322
+ if (!stdin || !proc) return Promise.resolve();
1323
+ const canWrite = stdin.write(data);
1324
+ if (!canWrite) {
1325
+ return waitForDrain(proc, stdin);
1326
+ }
1327
+ return Promise.resolve();
1328
+ }
1329
+ };
1330
+ var _WriteNode = class _WriteNode extends TargetNode {
1331
+ constructor() {
1332
+ super(...arguments);
1333
+ this.type = ["buffered-audio-node", "target", "write"];
1334
+ }
1335
+ createStream() {
1336
+ return new WriteStream(this.properties);
1337
+ }
1338
+ clone(overrides) {
1339
+ return new _WriteNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1340
+ }
1341
+ };
1342
+ _WriteNode.moduleName = "Write";
1343
+ _WriteNode.moduleDescription = "Write audio to a file";
1344
+ _WriteNode.schema = schema5;
1345
+ var WriteNode = _WriteNode;
1346
+ function write(path, options) {
1347
+ return new WriteNode({
1348
+ path,
1349
+ bitDepth: options?.bitDepth ?? "16",
1350
+ ffmpegPath: options?.ffmpegPath ?? "",
1351
+ encoding: options?.encoding
1352
+ });
1353
+ }
1354
+ var cutRegionSchema = z.object({
1355
+ start: z.number().min(0).describe("Start (seconds)"),
1356
+ end: z.number().min(0).describe("End (seconds)")
1357
+ });
1358
+ var schema6 = z.object({
1359
+ regions: z.array(cutRegionSchema).default([]).describe("Regions")
1360
+ });
1361
+ var CutStream = class extends BufferedTransformStream {
1362
+ constructor(properties) {
1363
+ super(properties);
1364
+ this.removedFrames = 0;
1365
+ this.sortedRegions = [...this.properties.regions].sort((left, right) => left.start - right.start);
1366
+ }
1367
+ _unbuffer(chunk) {
1368
+ const sampleRate = chunk.sampleRate;
1369
+ const chunkFrames = chunk.samples[0]?.length ?? 0;
1370
+ const chunkStartSec = chunk.offset / sampleRate;
1371
+ const keepRanges = [];
1372
+ let cursor = 0;
1373
+ for (const region of this.sortedRegions) {
1374
+ const cutStart = Math.max(0, Math.round((region.start - chunkStartSec) * sampleRate));
1375
+ const cutEnd = Math.min(chunkFrames, Math.round((region.end - chunkStartSec) * sampleRate));
1376
+ if (cutEnd <= 0 || cutStart >= chunkFrames) continue;
1377
+ const clampedStart = Math.max(cursor, 0);
1378
+ const clampedEnd = Math.max(clampedStart, cutStart);
1379
+ if (clampedEnd > clampedStart) {
1380
+ keepRanges.push({ start: clampedStart, end: clampedEnd });
1381
+ }
1382
+ cursor = Math.max(cursor, cutEnd);
1383
+ }
1384
+ if (cursor < chunkFrames) {
1385
+ keepRanges.push({ start: cursor, end: chunkFrames });
1386
+ }
1387
+ if (keepRanges.length === 0) return void 0;
1388
+ const totalKept = keepRanges.reduce((sum, range) => sum + (range.end - range.start), 0);
1389
+ const removedFrames = chunkFrames - totalKept;
1390
+ const adjustedOffset = chunk.offset - this.removedFrames;
1391
+ this.removedFrames += removedFrames;
1392
+ if (totalKept === chunkFrames) {
1393
+ return { samples: chunk.samples, offset: adjustedOffset, sampleRate: chunk.sampleRate, bitDepth: chunk.bitDepth };
1394
+ }
1395
+ const channels = chunk.samples.length;
1396
+ const output = [];
1397
+ for (let ch = 0; ch < channels; ch++) {
1398
+ const channel = chunk.samples[ch];
1399
+ if (!channel) {
1400
+ output.push(new Float32Array(totalKept));
1401
+ continue;
1402
+ }
1403
+ const out = new Float32Array(totalKept);
1404
+ let writeOffset = 0;
1405
+ for (const range of keepRanges) {
1406
+ out.set(channel.subarray(range.start, range.end), writeOffset);
1407
+ writeOffset += range.end - range.start;
1408
+ }
1409
+ output.push(out);
1410
+ }
1411
+ return { samples: output, offset: adjustedOffset, sampleRate: chunk.sampleRate, bitDepth: chunk.bitDepth };
1412
+ }
1413
+ };
1414
+ var _CutNode = class _CutNode extends TransformNode {
1415
+ constructor() {
1416
+ super(...arguments);
1417
+ this.type = ["buffered-audio-node", "transform", "cut"];
1418
+ }
1419
+ static is(value) {
1420
+ return TransformNode.is(value) && value.type[2] === "cut";
1421
+ }
1422
+ createStream() {
1423
+ return new CutStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
1424
+ }
1425
+ clone(overrides) {
1426
+ return new _CutNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1427
+ }
1428
+ };
1429
+ _CutNode.moduleName = "Cut";
1430
+ _CutNode.moduleDescription = "Remove a region of audio";
1431
+ _CutNode.schema = schema6;
1432
+ var CutNode = _CutNode;
1433
+ function cut(regions, options) {
1434
+ return new CutNode({ regions, id: options?.id });
1435
+ }
1436
+ var schema7 = z.object({
1437
+ bitDepth: z.union([z.literal(16), z.literal(24)]).default(16).describe("Bit Depth"),
1438
+ noiseShaping: z.boolean().default(false).describe("Noise Shaping")
1439
+ });
1440
+ var DitherStream = class extends BufferedTransformStream {
1441
+ constructor() {
1442
+ super(...arguments);
1443
+ this.lastError = [];
1444
+ }
1445
+ async _buffer(chunk, buffer) {
1446
+ await buffer.append(chunk.samples, chunk.sampleRate, this.properties.bitDepth);
1447
+ }
1448
+ _unbuffer(chunk) {
1449
+ const { bitDepth, noiseShaping } = this.properties;
1450
+ const quantizationLevels = Math.pow(2, bitDepth - 1);
1451
+ const lsb = 1 / quantizationLevels;
1452
+ while (this.lastError.length < chunk.samples.length) {
1453
+ this.lastError.push(0);
1454
+ }
1455
+ const samples = chunk.samples.map((channel, ch) => {
1456
+ const output = new Float32Array(channel.length);
1457
+ for (let index = 0; index < channel.length; index++) {
1458
+ const sample = channel[index] ?? 0;
1459
+ const tpdfNoise = (Math.random() - Math.random()) * lsb;
1460
+ let dithered = sample + tpdfNoise;
1461
+ if (noiseShaping) {
1462
+ dithered += this.lastError[ch] ?? 0;
1463
+ }
1464
+ const quantized = Math.round(dithered * quantizationLevels) / quantizationLevels;
1465
+ if (noiseShaping) {
1466
+ this.lastError[ch] = dithered - quantized;
1467
+ }
1468
+ output[index] = quantized;
1469
+ }
1470
+ return output;
1471
+ });
1472
+ return { samples, offset: chunk.offset, sampleRate: chunk.sampleRate, bitDepth: this.properties.bitDepth };
1473
+ }
1474
+ };
1475
+ var _DitherNode = class _DitherNode extends TransformNode {
1476
+ constructor() {
1477
+ super(...arguments);
1478
+ this.type = ["buffered-audio-node", "transform", "dither"];
1479
+ }
1480
+ static is(value) {
1481
+ return TransformNode.is(value) && value.type[2] === "dither";
1482
+ }
1483
+ createStream() {
1484
+ return new DitherStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
1485
+ }
1486
+ clone(overrides) {
1487
+ return new _DitherNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1488
+ }
1489
+ };
1490
+ _DitherNode.moduleName = "Dither";
1491
+ _DitherNode.moduleDescription = "Add shaped noise to reduce quantization distortion";
1492
+ _DitherNode.schema = schema7;
1493
+ var DitherNode = _DitherNode;
1494
+ function dither(bitDepth, options) {
1495
+ const parsed = schema7.parse({ bitDepth, noiseShaping: options?.noiseShaping });
1496
+ return new DitherNode({ ...parsed, id: options?.id });
1497
+ }
1498
+ var schema8 = z.object({
1499
+ ceiling: z.number().min(0).max(1).multipleOf(0.01).default(1).describe("Ceiling")
1500
+ });
1501
+ var NormalizeStream = class extends BufferedTransformStream {
1502
+ constructor() {
1503
+ super(...arguments);
1504
+ this.peak = 0;
1505
+ this.scale = 1;
1506
+ }
1507
+ async _buffer(chunk, buffer) {
1508
+ await super._buffer(chunk, buffer);
1509
+ for (let ch = 0; ch < chunk.samples.length; ch++) {
1510
+ const channel = chunk.samples[ch] ?? new Float32Array(0);
1511
+ for (let si = 0; si < channel.length; si++) {
1512
+ const absolute = Math.abs(channel[si] ?? 0);
1513
+ if (Number.isFinite(absolute) && absolute > this.peak) this.peak = absolute;
1514
+ }
1515
+ }
1516
+ }
1517
+ _process(_buffer) {
1518
+ const raw = this.peak === 0 ? 1 : this.properties.ceiling / this.peak;
1519
+ this.scale = Number.isFinite(raw) ? raw : 1;
1520
+ }
1521
+ _unbuffer(chunk) {
1522
+ if (this.scale === 1) return chunk;
1523
+ const scaledSamples = chunk.samples.map((channel) => {
1524
+ const scaled = new Float32Array(channel.length);
1525
+ for (let index = 0; index < channel.length; index++) {
1526
+ scaled[index] = (channel[index] ?? 0) * this.scale;
1527
+ }
1528
+ return scaled;
1529
+ });
1530
+ return { samples: scaledSamples, offset: chunk.offset, sampleRate: chunk.sampleRate, bitDepth: chunk.bitDepth };
1531
+ }
1532
+ };
1533
+ var _NormalizeNode = class _NormalizeNode extends TransformNode {
1534
+ constructor(properties) {
1535
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
1536
+ this.type = ["buffered-audio-node", "transform", "normalize"];
1537
+ }
1538
+ static is(value) {
1539
+ return TransformNode.is(value) && value.type[2] === "normalize";
1540
+ }
1541
+ createStream() {
1542
+ return new NormalizeStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
1543
+ }
1544
+ clone(overrides) {
1545
+ return new _NormalizeNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1546
+ }
1547
+ };
1548
+ _NormalizeNode.moduleName = "Normalize";
1549
+ _NormalizeNode.moduleDescription = "Adjust peak or loudness level to a target ceiling";
1550
+ _NormalizeNode.schema = schema8;
1551
+ var NormalizeNode = _NormalizeNode;
1552
+ function normalize(options) {
1553
+ return new NormalizeNode({ ceiling: options?.ceiling ?? 1, id: options?.id });
1554
+ }
1555
+ var schema9 = z.object({
1556
+ before: z.number().min(0).multipleOf(1e-3).default(0).describe("Before"),
1557
+ after: z.number().min(0).multipleOf(1e-3).default(0).describe("After")
1558
+ });
1559
+ var PadStream = class extends BufferedTransformStream {
1560
+ async _process(buffer) {
1561
+ const { before, after } = this.properties;
1562
+ const channels = buffer.channels;
1563
+ const sr = this.sampleRate ?? 44100;
1564
+ if (before > 0) {
1565
+ const silenceFrames = Math.round(before * sr);
1566
+ const frames = buffer.frames;
1567
+ const allAudio = await buffer.read(0, frames);
1568
+ const padded = [];
1569
+ for (let ch = 0; ch < channels; ch++) {
1570
+ const original = allAudio.samples[ch] ?? new Float32Array(0);
1571
+ const withPad = new Float32Array(silenceFrames + original.length);
1572
+ withPad.set(original, silenceFrames);
1573
+ padded.push(withPad);
1574
+ }
1575
+ await buffer.truncate(0);
1576
+ await buffer.append(padded);
1577
+ }
1578
+ if (after > 0) {
1579
+ const silenceFrames = Math.round(after * sr);
1580
+ const silence = [];
1581
+ for (let ch = 0; ch < channels; ch++) {
1582
+ silence.push(new Float32Array(silenceFrames));
1583
+ }
1584
+ await buffer.append(silence);
1585
+ }
1586
+ }
1587
+ };
1588
+ var _PadNode = class _PadNode extends TransformNode {
1589
+ constructor(properties) {
1590
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
1591
+ this.type = ["buffered-audio-node", "transform", "pad"];
1592
+ }
1593
+ static is(value) {
1594
+ return TransformNode.is(value) && value.type[2] === "pad";
1595
+ }
1596
+ createStream() {
1597
+ return new PadStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
1598
+ }
1599
+ clone(overrides) {
1600
+ return new _PadNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1601
+ }
1602
+ };
1603
+ _PadNode.moduleName = "Pad";
1604
+ _PadNode.moduleDescription = "Add silence to start or end of audio";
1605
+ _PadNode.schema = schema9;
1606
+ var PadNode = _PadNode;
1607
+ function pad(options) {
1608
+ const parsed = schema9.parse(options);
1609
+ return new PadNode({ ...parsed, id: options.id });
1610
+ }
1611
+ var schema10 = z.object({
1612
+ invert: z.boolean().default(true).describe("Invert"),
1613
+ angle: z.number().min(-180).max(180).multipleOf(1).optional().describe("Angle")
1614
+ });
1615
+ var PhaseStream = class extends BufferedTransformStream {
1616
+ constructor() {
1617
+ super(...arguments);
1618
+ this.allpassState = [];
1619
+ }
1620
+ _unbuffer(chunk) {
1621
+ const { invert: invert2, angle } = this.properties;
1622
+ if (angle !== void 0) {
1623
+ return this.applyPhaseRotation(chunk, angle);
1624
+ }
1625
+ if (invert2) {
1626
+ return this.applyInvert(chunk);
1627
+ }
1628
+ return chunk;
1629
+ }
1630
+ applyInvert(chunk) {
1631
+ const samples = chunk.samples.map((channel) => {
1632
+ const output = new Float32Array(channel.length);
1633
+ for (let index = 0; index < channel.length; index++) {
1634
+ output[index] = -(channel[index] ?? 0);
1635
+ }
1636
+ return output;
1637
+ });
1638
+ return { samples, offset: chunk.offset, sampleRate: chunk.sampleRate, bitDepth: chunk.bitDepth };
1639
+ }
1640
+ applyPhaseRotation(chunk, angle) {
1641
+ const radians = angle * Math.PI / 180;
1642
+ const coefficient = Math.tan((radians - Math.PI) / 4);
1643
+ while (this.allpassState.length < chunk.samples.length) {
1644
+ this.allpassState.push(0);
1645
+ }
1646
+ const samples = chunk.samples.map((channel, ch) => {
1647
+ const output = new Float32Array(channel.length);
1648
+ let state = this.allpassState[ch] ?? 0;
1649
+ for (let index = 0; index < channel.length; index++) {
1650
+ const input = channel[index] ?? 0;
1651
+ const allpassOut = coefficient * input + state;
1652
+ state = input - coefficient * allpassOut;
1653
+ output[index] = allpassOut;
1654
+ }
1655
+ this.allpassState[ch] = state;
1656
+ return output;
1657
+ });
1658
+ return { samples, offset: chunk.offset, sampleRate: chunk.sampleRate, bitDepth: chunk.bitDepth };
1659
+ }
1660
+ };
1661
+ var _PhaseNode = class _PhaseNode extends TransformNode {
1662
+ constructor() {
1663
+ super(...arguments);
1664
+ this.type = ["buffered-audio-node", "transform", "phase"];
1665
+ }
1666
+ static is(value) {
1667
+ return TransformNode.is(value) && value.type[2] === "phase";
1668
+ }
1669
+ createStream() {
1670
+ return new PhaseStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
1671
+ }
1672
+ clone(overrides) {
1673
+ return new _PhaseNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1674
+ }
1675
+ };
1676
+ _PhaseNode.moduleName = "Phase";
1677
+ _PhaseNode.moduleDescription = "Invert or rotate signal phase";
1678
+ _PhaseNode.schema = schema10;
1679
+ var PhaseNode = _PhaseNode;
1680
+ function phase(options) {
1681
+ return new PhaseNode({
1682
+ invert: options?.invert ?? true,
1683
+ angle: options?.angle,
1684
+ id: options?.id
1685
+ });
1686
+ }
1687
+ function invert(options) {
1688
+ return phase({ invert: true, id: options?.id });
1689
+ }
1690
+ var schema11 = z.object({});
1691
+ var ReverseStream = class extends BufferedTransformStream {
1692
+ constructor() {
1693
+ super(...arguments);
1694
+ this.spareChunkSize = 44100;
1695
+ this.spareInitialized = false;
1696
+ }
1697
+ async _setup(input, context) {
1698
+ this.reverseMemoryLimit = context.memoryLimit;
1699
+ return super._setup(input, context);
1700
+ }
1701
+ ensureSpareBuffer(chunk) {
1702
+ if (this.spareInitialized) return;
1703
+ this.spareInitialized = true;
1704
+ this.spareChunkSize = chunk.sampleRate;
1705
+ this.spareBuffer = new FileChunkBuffer(Infinity, chunk.samples.length, this.reverseMemoryLimit);
1706
+ }
1707
+ async _buffer(chunk, buffer) {
1708
+ this.ensureSpareBuffer(chunk);
1709
+ await super._buffer(chunk, buffer);
1710
+ await this.spareBuffer?.append(chunk.samples);
1711
+ }
1712
+ async _process(buffer) {
1713
+ if (!this.spareBuffer) return;
1714
+ await buffer.truncate(0);
1715
+ let remaining = this.spareBuffer.frames;
1716
+ while (remaining > 0) {
1717
+ const frames = Math.min(this.spareChunkSize, remaining);
1718
+ const offset = remaining - frames;
1719
+ const chunk = await this.spareBuffer.read(offset, frames);
1720
+ for (const channel of chunk.samples) {
1721
+ channel.reverse();
1722
+ }
1723
+ await buffer.append(chunk.samples);
1724
+ remaining = offset;
1725
+ }
1726
+ await this.spareBuffer.close();
1727
+ this.spareBuffer = void 0;
1728
+ }
1729
+ };
1730
+ var _ReverseNode = class _ReverseNode extends TransformNode {
1731
+ constructor(properties) {
1732
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
1733
+ this.type = ["buffered-audio-node", "transform", "reverse"];
1734
+ }
1735
+ static is(value) {
1736
+ return TransformNode.is(value) && value.type[2] === "reverse";
1737
+ }
1738
+ createStream() {
1739
+ return new ReverseStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
1740
+ }
1741
+ clone(overrides) {
1742
+ return new _ReverseNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1743
+ }
1744
+ };
1745
+ _ReverseNode.moduleName = "Reverse";
1746
+ _ReverseNode.moduleDescription = "Reverse audio playback direction";
1747
+ _ReverseNode.schema = schema11;
1748
+ var ReverseNode = _ReverseNode;
1749
+ function reverse(options) {
1750
+ return new ReverseNode({ id: options?.id });
1751
+ }
1752
+ async function readWavSamples(path) {
1753
+ const data = await readFile(path);
1754
+ const wav = new WaveFile(new Uint8Array(data.buffer, data.byteOffset, data.byteLength));
1755
+ wav.toBitDepth("32f");
1756
+ const fmt = wav.fmt;
1757
+ const rawSamples = wav.getSamples(false, Float64Array);
1758
+ const sampleRate = fmt.sampleRate;
1759
+ const channels = fmt.numChannels;
1760
+ let samples;
1761
+ if (channels === 1) {
1762
+ samples = [new Float32Array(rawSamples)];
1763
+ } else {
1764
+ samples = rawSamples.map((channel) => new Float32Array(channel));
1765
+ }
1766
+ const durationFrames = samples[0]?.length ?? 0;
1767
+ return { samples, sampleRate, channels, durationFrames };
1768
+ }
1769
+ async function readToBuffer(path) {
1770
+ const { samples, sampleRate, channels, durationFrames } = await readWavSamples(path);
1771
+ const buffer = new FileChunkBuffer(durationFrames, channels);
1772
+ await buffer.append(samples);
1773
+ return { buffer, context: { sampleRate, channels, durationFrames } };
1774
+ }
1775
+
1776
+ // src/transforms/splice/index.ts
1777
+ var schema12 = z.object({
1778
+ insertPath: z.string().default("").meta({ input: "file", mode: "open", accept: ".wav" }).describe("Insert File Path"),
1779
+ insertAt: z.number().min(0).default(0).describe("Insert At (frames)")
1780
+ });
1781
+ var SpliceStream = class extends BufferedTransformStream {
1782
+ constructor() {
1783
+ super(...arguments);
1784
+ this.insertSampleRate = 0;
1785
+ this.insertLength = 0;
1786
+ this.sampleRateChecked = false;
1787
+ }
1788
+ async _setup(input, context) {
1789
+ const { samples, sampleRate } = await readWavSamples(this.properties.insertPath);
1790
+ const targetChannels = this.properties.channels;
1791
+ if (targetChannels) {
1792
+ for (const ch of targetChannels) {
1793
+ if (ch < 0) {
1794
+ throw new Error(`Splice: target channel ${ch} is out of range`);
1795
+ }
1796
+ }
1797
+ }
1798
+ this.insertSamples = samples;
1799
+ this.insertSampleRate = sampleRate;
1800
+ this.insertLength = samples[0]?.length ?? 0;
1801
+ return super._setup(input, context);
1802
+ }
1803
+ _unbuffer(chunk) {
1804
+ if (!this.sampleRateChecked) {
1805
+ this.sampleRateChecked = true;
1806
+ if (this.sampleRate !== void 0 && this.insertSampleRate !== this.sampleRate) {
1807
+ throw new Error(`Splice: insert file sample rate ${this.insertSampleRate} does not match stream sample rate ${this.sampleRate}`);
1808
+ }
1809
+ }
1810
+ const chunkFrames = chunk.samples[0]?.length ?? 0;
1811
+ const chunkStart = chunk.offset;
1812
+ const chunkEnd = chunkStart + chunkFrames;
1813
+ const insertEnd = this.properties.insertAt + this.insertLength;
1814
+ if (chunkEnd <= this.properties.insertAt || chunkStart >= insertEnd) {
1815
+ return chunk;
1816
+ }
1817
+ const samples = chunk.samples.map((channel) => new Float32Array(channel));
1818
+ const overlapStart = Math.max(0, this.properties.insertAt - chunkStart);
1819
+ const overlapEnd = Math.min(chunkFrames, insertEnd - chunkStart);
1820
+ const insertOffset = Math.max(0, chunkStart - this.properties.insertAt);
1821
+ const targetChannels = this.properties.channels;
1822
+ if (targetChannels) {
1823
+ for (let insertCh = 0; insertCh < targetChannels.length; insertCh++) {
1824
+ const primaryCh = targetChannels[insertCh];
1825
+ if (primaryCh === void 0) continue;
1826
+ const channelSamples = samples[primaryCh];
1827
+ const insertChannel = this.insertSamples[insertCh];
1828
+ if (!channelSamples || !insertChannel) continue;
1829
+ for (let frame = overlapStart; frame < overlapEnd; frame++) {
1830
+ const insertIndex = insertOffset + frame - overlapStart;
1831
+ const insertSample = insertChannel[insertIndex];
1832
+ if (insertSample !== void 0) {
1833
+ channelSamples[frame] = insertSample;
1834
+ }
1835
+ }
1836
+ }
1837
+ } else {
1838
+ for (let ch = 0; ch < samples.length; ch++) {
1839
+ const channelSamples = samples[ch];
1840
+ const insertChannel = this.insertSamples[ch];
1841
+ if (!channelSamples || !insertChannel) continue;
1842
+ for (let frame = overlapStart; frame < overlapEnd; frame++) {
1843
+ const insertIndex = insertOffset + frame - overlapStart;
1844
+ const insertSample = insertChannel[insertIndex];
1845
+ if (insertSample !== void 0) {
1846
+ channelSamples[frame] = insertSample;
1847
+ }
1848
+ }
1849
+ }
1850
+ }
1851
+ return { samples, offset: chunk.offset, sampleRate: chunk.sampleRate, bitDepth: chunk.bitDepth };
1852
+ }
1853
+ };
1854
+ var _SpliceNode = class _SpliceNode extends TransformNode {
1855
+ constructor() {
1856
+ super(...arguments);
1857
+ this.type = ["buffered-audio-node", "transform", "splice"];
1858
+ }
1859
+ static is(value) {
1860
+ return TransformNode.is(value) && value.type[2] === "splice";
1861
+ }
1862
+ createStream() {
1863
+ return new SpliceStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
1864
+ }
1865
+ clone(overrides) {
1866
+ return new _SpliceNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1867
+ }
1868
+ };
1869
+ _SpliceNode.moduleName = "Splice";
1870
+ _SpliceNode.moduleDescription = "Replace a region of audio with processed content";
1871
+ _SpliceNode.schema = schema12;
1872
+ var SpliceNode = _SpliceNode;
1873
+ function splice(insertPath, insertAt, options) {
1874
+ return new SpliceNode({ insertPath, insertAt, channels: options?.channels });
1875
+ }
1876
+
1877
+ // src/transforms/trim/utils/silence.ts
1878
+ function findFirstAbove(samples, frames, threshold) {
1879
+ for (let index = 0; index < frames; index++) {
1880
+ for (const channel of samples) {
1881
+ if (Math.abs(channel[index] ?? 0) > threshold) {
1882
+ return index;
1883
+ }
1884
+ }
1885
+ }
1886
+ return frames;
1887
+ }
1888
+ function findLastAbove(samples, frames, threshold) {
1889
+ for (let index = frames - 1; index >= 0; index--) {
1890
+ for (const channel of samples) {
1891
+ if (Math.abs(channel[index] ?? 0) > threshold) {
1892
+ return index;
1893
+ }
1894
+ }
1895
+ }
1896
+ return 0;
1897
+ }
1898
+
1899
+ // src/transforms/trim/index.ts
1900
+ var schema13 = z.object({
1901
+ threshold: z.number().min(0).max(1).multipleOf(1e-3).default(1e-3).describe("Threshold"),
1902
+ margin: z.number().min(0).max(1).multipleOf(1e-3).default(0.01).describe("Margin"),
1903
+ start: z.boolean().default(true).describe("Start"),
1904
+ end: z.boolean().default(true).describe("End")
1905
+ });
1906
+ var TrimStream = class extends BufferedTransformStream {
1907
+ async _process(buffer) {
1908
+ const frames = buffer.frames;
1909
+ const allAudio = await buffer.read(0, frames);
1910
+ const channels = allAudio.samples.length;
1911
+ if (channels === 0 || frames === 0) return;
1912
+ const threshold = this.properties.threshold;
1913
+ const marginSeconds = this.properties.margin;
1914
+ const marginFrames = Math.round(marginSeconds * (this.sampleRate ?? 44100));
1915
+ const trimStart = this.properties.start;
1916
+ const trimEnd = this.properties.end;
1917
+ const firstAbove = findFirstAbove(allAudio.samples, frames, threshold);
1918
+ if (firstAbove >= frames) {
1919
+ await buffer.truncate(0);
1920
+ return;
1921
+ }
1922
+ let startFrame = 0;
1923
+ let endFrame = frames;
1924
+ if (trimStart) {
1925
+ startFrame = Math.max(0, firstAbove - marginFrames);
1926
+ }
1927
+ if (trimEnd) {
1928
+ endFrame = findLastAbove(allAudio.samples, frames, threshold) + 1;
1929
+ endFrame = Math.min(frames, endFrame + marginFrames);
1930
+ }
1931
+ if (startFrame >= endFrame) return;
1932
+ if (startFrame === 0 && endFrame === frames) return;
1933
+ const trimmedLength = endFrame - startFrame;
1934
+ const trimmed = [];
1935
+ for (let ch = 0; ch < channels; ch++) {
1936
+ const channel = allAudio.samples[ch];
1937
+ if (!channel) {
1938
+ trimmed.push(new Float32Array(trimmedLength));
1939
+ continue;
1940
+ }
1941
+ trimmed.push(channel.subarray(startFrame, endFrame));
1942
+ }
1943
+ await buffer.truncate(0);
1944
+ await buffer.append(trimmed);
1945
+ }
1946
+ };
1947
+ var _TrimNode = class _TrimNode extends TransformNode {
1948
+ constructor(properties) {
1949
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
1950
+ this.type = ["buffered-audio-node", "transform", "trim"];
1951
+ }
1952
+ static is(value) {
1953
+ return TransformNode.is(value) && value.type[2] === "trim";
1954
+ }
1955
+ createStream() {
1956
+ return new TrimStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
1957
+ }
1958
+ clone(overrides) {
1959
+ return new _TrimNode({ ...this.properties, previousProperties: this.properties, ...overrides });
1960
+ }
1961
+ };
1962
+ _TrimNode.moduleName = "Trim";
1963
+ _TrimNode.moduleDescription = "Remove silence from start and end";
1964
+ _TrimNode.schema = schema13;
1965
+ var TrimNode = _TrimNode;
1966
+ function trim(options) {
1967
+ const parsed = schema13.parse(options ?? {});
1968
+ return new TrimNode({ ...parsed, id: options?.id });
1969
+ }
1970
+ function runFfmpegWithFile(binaryPath, args, channels) {
1971
+ return new Promise((resolve, reject) => {
1972
+ const proc = spawn(binaryPath, args, {
1973
+ stdio: ["ignore", "pipe", "pipe"]
1974
+ });
1975
+ if (!proc.stdout || !proc.stderr) {
1976
+ reject(new Error("Failed to create ffmpeg stdio streams"));
1977
+ return;
1978
+ }
1979
+ const outputChunks = [];
1980
+ const stderrChunks = [];
1981
+ proc.stdout.on("data", (chunk) => {
1982
+ outputChunks.push(chunk);
1983
+ });
1984
+ proc.stderr.on("data", (chunk) => {
1985
+ stderrChunks.push(chunk);
1986
+ });
1987
+ proc.on("error", (error) => {
1988
+ reject(new Error(`Failed to spawn ffmpeg: ${error.message}`));
1989
+ });
1990
+ proc.on("close", (code) => {
1991
+ if (code !== 0) {
1992
+ const stderrOutput = Buffer.concat(stderrChunks).toString();
1993
+ reject(new Error(`ffmpeg exited with code ${code}: ${stderrOutput}`));
1994
+ return;
1995
+ }
1996
+ const outputBuffer = Buffer.concat(outputChunks);
1997
+ const samples = deinterleaveBuffer(outputBuffer, channels);
1998
+ resolve(samples);
1999
+ });
2000
+ });
2001
+ }
2002
+ function runFfmpeg(binaryPath, args, buffer, channels) {
2003
+ return new Promise((resolve, reject) => {
2004
+ const proc = spawn(binaryPath, args, {
2005
+ stdio: ["pipe", "pipe", "pipe"]
2006
+ });
2007
+ if (!proc.stdout || !proc.stderr || !proc.stdin) {
2008
+ reject(new Error("Failed to create ffmpeg stdio streams"));
2009
+ return;
2010
+ }
2011
+ const stdout = proc.stdout;
2012
+ const stderr = proc.stderr;
2013
+ const stdin = proc.stdin;
2014
+ const outputChunks = [];
2015
+ const stderrChunks = [];
2016
+ stdout.on("data", (chunk) => {
2017
+ outputChunks.push(chunk);
2018
+ });
2019
+ stderr.on("data", (chunk) => {
2020
+ stderrChunks.push(chunk);
2021
+ });
2022
+ proc.on("error", (error) => {
2023
+ reject(new Error(`Failed to spawn ffmpeg: ${error.message}`));
2024
+ });
2025
+ proc.on("close", (code) => {
2026
+ if (code !== 0) {
2027
+ const stderrOutput = Buffer.concat(stderrChunks).toString();
2028
+ reject(new Error(`ffmpeg exited with code ${code}: ${stderrOutput}`));
2029
+ return;
2030
+ }
2031
+ const outputBuffer = Buffer.concat(outputChunks);
2032
+ const samples = deinterleaveBuffer(outputBuffer, channels);
2033
+ resolve(samples);
2034
+ });
2035
+ stdin.on("error", () => {
2036
+ });
2037
+ void writeBufferToStdin(proc, stdin, buffer).catch(() => {
2038
+ });
2039
+ });
2040
+ }
2041
+ async function writeBufferToStdin(proc, stdin, buffer) {
2042
+ const chunkSize = 44100;
2043
+ for await (const chunk of buffer.iterate(chunkSize)) {
2044
+ const interleaved = interleave(chunk.samples, chunk.samples[0]?.length ?? 0, chunk.samples.length);
2045
+ const buf = Buffer.from(interleaved.buffer, interleaved.byteOffset, interleaved.byteLength);
2046
+ const canWrite = stdin.write(buf);
2047
+ if (!canWrite) {
2048
+ await waitForDrain(proc, stdin);
2049
+ }
2050
+ }
2051
+ stdin.end();
2052
+ }
2053
+
2054
+ // src/transforms/ffmpeg/index.ts
2055
+ var schema14 = z.object({
2056
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
2057
+ args: z.array(z.string()).default([])
2058
+ });
2059
+ var FfmpegStream = class extends BufferedTransformStream {
2060
+ constructor() {
2061
+ super(...arguments);
2062
+ this._ffmpegChannels = 1;
2063
+ }
2064
+ async _setup(input, context) {
2065
+ this.streamContext = context;
2066
+ return super._setup(input, context);
2067
+ }
2068
+ _buildArgs(_context) {
2069
+ const { args } = this.properties;
2070
+ if (!args) return [];
2071
+ return typeof args === "function" ? args(_context) : args;
2072
+ }
2073
+ _buildOutputArgs(_context) {
2074
+ return ["-f", "f32le", "-ar", String(this.sampleRate ?? 44100), "-ac", String(this.ffmpegChannels), "pipe:1"];
2075
+ }
2076
+ get ffmpegChannels() {
2077
+ return this._ffmpegChannels;
2078
+ }
2079
+ async _process(buffer) {
2080
+ if (!this.streamContext) throw new Error("FfmpegStream._process called before setup()");
2081
+ this._ffmpegChannels = buffer.channels;
2082
+ const sr = this.sampleRate ?? 44100;
2083
+ const channels = buffer.channels;
2084
+ const filterArgs = this._buildArgs(this.streamContext);
2085
+ const outputArgs = this._buildOutputArgs(this.streamContext);
2086
+ const filePath = buffer instanceof FileChunkBuffer ? buffer.filePath : void 0;
2087
+ const inputArgs = ["-f", "f32le", "-ar", String(sr), "-ac", String(channels), "-i", filePath ?? "pipe:0"];
2088
+ const result = filePath ? await runFfmpegWithFile(this.properties.ffmpegPath, [...inputArgs, ...filterArgs, ...outputArgs], channels) : await runFfmpeg(this.properties.ffmpegPath, [...inputArgs, ...filterArgs, ...outputArgs], buffer, channels);
2089
+ await buffer.truncate(0);
2090
+ const frames = result[0]?.length ?? 0;
2091
+ if (frames > 0) {
2092
+ await buffer.append(result);
2093
+ }
2094
+ }
2095
+ };
2096
+ var _FfmpegNode = class _FfmpegNode extends TransformNode {
2097
+ constructor(properties) {
2098
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
2099
+ this.type = ["buffered-audio-node", "transform", "ffmpeg"];
2100
+ }
2101
+ static is(value) {
2102
+ return TransformNode.is(value) && value.type[2] === "ffmpeg";
2103
+ }
2104
+ createStream() {
2105
+ return new FfmpegStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
2106
+ }
2107
+ clone(overrides) {
2108
+ return new _FfmpegNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2109
+ }
2110
+ };
2111
+ _FfmpegNode.moduleName = "FFmpeg";
2112
+ _FfmpegNode.moduleDescription = "Process audio through FFmpeg filters";
2113
+ _FfmpegNode.schema = schema14;
2114
+ var FfmpegNode = _FfmpegNode;
2115
+ function ffmpeg(options) {
2116
+ return new FfmpegNode({
2117
+ ffmpegPath: options.ffmpegPath,
2118
+ args: options.args,
2119
+ id: options.id
2120
+ });
2121
+ }
2122
+ async function measureLoudness(buffer, sampleRate, channels, properties) {
2123
+ const binaryPath = properties.ffmpegPath;
2124
+ const parts = [`I=${properties.target}`, `TP=${properties.truePeak}`, properties.lra !== void 0 ? `LRA=${properties.lra}` : "", "print_format=json"].filter(Boolean);
2125
+ const args = ["-f", "f32le", "-ar", String(sampleRate), "-ac", String(channels), "-i", "pipe:0", "-af", `loudnorm=${parts.join(":")}`, "-f", "null", "-"];
2126
+ return new Promise((resolve, reject) => {
2127
+ const proc = spawn(binaryPath, args, {
2128
+ stdio: ["pipe", "pipe", "pipe"]
2129
+ });
2130
+ const stdin = proc.stdin;
2131
+ const stderrStream = proc.stderr;
2132
+ const stderrChunks = [];
2133
+ stderrStream.on("data", (chunk) => {
2134
+ stderrChunks.push(chunk);
2135
+ });
2136
+ proc.on("error", (error) => {
2137
+ reject(new Error(`Failed to spawn ffmpeg: ${error.message}`));
2138
+ });
2139
+ proc.on("close", (code) => {
2140
+ const stderr = Buffer.concat(stderrChunks).toString();
2141
+ if (code !== 0) {
2142
+ reject(new Error(`ffmpeg measurement pass exited with code ${code}: ${stderr}`));
2143
+ return;
2144
+ }
2145
+ const jsonMatch = /\{[^}]*"input_i"[^}]*\}/s.exec(stderr);
2146
+ if (!jsonMatch) {
2147
+ reject(new Error("Failed to parse loudnorm measurement output"));
2148
+ return;
2149
+ }
2150
+ const measured = JSON.parse(jsonMatch[0]);
2151
+ resolve({
2152
+ inputI: measured.input_i ?? "0",
2153
+ inputTp: measured.input_tp ?? "0",
2154
+ inputLra: measured.input_lra ?? "0",
2155
+ inputThresh: measured.input_thresh ?? "0",
2156
+ targetOffset: measured.target_offset ?? "0"
2157
+ });
2158
+ });
2159
+ void writeToStdin(stdin, buffer, channels);
2160
+ });
2161
+ }
2162
+ async function writeToStdin(stdin, buffer, channels) {
2163
+ for await (const chunk of buffer.iterate(44100)) {
2164
+ const interleaved = interleave(chunk.samples, chunk.samples[0]?.length ?? 0, channels);
2165
+ const buf = Buffer.from(interleaved.buffer, interleaved.byteOffset, interleaved.byteLength);
2166
+ const canWrite = stdin.write(buf);
2167
+ if (!canWrite) {
2168
+ await new Promise((resolve) => {
2169
+ stdin.once("drain", resolve);
2170
+ });
2171
+ }
2172
+ }
2173
+ stdin.end();
2174
+ }
2175
+
2176
+ // src/transforms/loudness/index.ts
2177
+ var schema15 = z.object({
2178
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
2179
+ target: z.number().min(-50).max(0).multipleOf(0.1).default(-14).describe("Target"),
2180
+ truePeak: z.number().min(-10).max(0).multipleOf(0.1).default(-1).describe("True Peak"),
2181
+ lra: z.number().min(0).max(20).multipleOf(0.1).default(0).describe("LRA")
2182
+ });
2183
+ var LoudnessStream = class extends FfmpegStream {
2184
+ _buildArgs(_context) {
2185
+ return this.buildArgsWithMeasurement();
2186
+ }
2187
+ async _process(buffer) {
2188
+ const sr = this.sampleRate ?? 44100;
2189
+ const ch = buffer.channels;
2190
+ this.measuredValues = await measureLoudness(buffer, sr, ch, this.properties);
2191
+ await super._process(buffer);
2192
+ this.measuredValues = void 0;
2193
+ }
2194
+ buildArgsWithMeasurement() {
2195
+ const { target, truePeak, lra } = this.properties;
2196
+ if (this.measuredValues) {
2197
+ const { inputI, inputTp, inputLra, inputThresh, targetOffset } = this.measuredValues;
2198
+ const parts2 = [
2199
+ `I=${target}`,
2200
+ `TP=${truePeak}`,
2201
+ lra !== void 0 ? `LRA=${lra}` : "",
2202
+ `measured_I=${inputI}`,
2203
+ `measured_TP=${inputTp}`,
2204
+ `measured_LRA=${inputLra}`,
2205
+ `measured_thresh=${inputThresh}`,
2206
+ `offset=${targetOffset}`,
2207
+ "linear=true"
2208
+ ].filter(Boolean);
2209
+ return ["-af", `loudnorm=${parts2.join(":")}`];
2210
+ }
2211
+ const parts = [`I=${target}`, `TP=${truePeak}`, lra !== void 0 ? `LRA=${lra}` : ""].filter(Boolean);
2212
+ return ["-af", `loudnorm=${parts.join(":")}`];
2213
+ }
2214
+ };
2215
+ var _LoudnessNode = class _LoudnessNode extends FfmpegNode {
2216
+ constructor() {
2217
+ super(...arguments);
2218
+ this.type = ["buffered-audio-node", "transform", "ffmpeg", "loudness"];
2219
+ }
2220
+ static is(value) {
2221
+ return FfmpegNode.is(value) && value.type[3] === "loudness";
2222
+ }
2223
+ createStream() {
2224
+ return new LoudnessStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
2225
+ }
2226
+ clone(overrides) {
2227
+ return new _LoudnessNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2228
+ }
2229
+ };
2230
+ _LoudnessNode.moduleName = "Loudness";
2231
+ _LoudnessNode.moduleDescription = "Measure integrated, short-term, and momentary loudness";
2232
+ _LoudnessNode.schema = schema15;
2233
+ var LoudnessNode = _LoudnessNode;
2234
+ function loudness(ffmpegPath, options) {
2235
+ return new LoudnessNode({
2236
+ ffmpegPath,
2237
+ target: options?.target ?? -14,
2238
+ truePeak: options?.truePeak ?? -1,
2239
+ lra: options?.lra,
2240
+ id: options?.id
2241
+ });
2242
+ }
2243
+ var schema16 = z.object({
2244
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
2245
+ sampleRate: z.number().min(8e3).max(192e3).multipleOf(100).default(44100).describe("Sample Rate"),
2246
+ dither: z.enum(["triangular", "lipshitz", "none"]).default("triangular").describe("Dither")
2247
+ });
2248
+ var ResampleStream = class extends FfmpegStream {
2249
+ async _process(buffer) {
2250
+ await super._process(buffer);
2251
+ buffer.setSampleRate(this.properties.sampleRate);
2252
+ }
2253
+ _buildArgs(_context) {
2254
+ const { sampleRate, dither: dither2 } = this.properties;
2255
+ const ditherMethod = dither2 ?? "triangular";
2256
+ return ["-af", `aresample=${sampleRate}:resampler=soxr:dither_method=${ditherMethod}`];
2257
+ }
2258
+ _buildOutputArgs(_context) {
2259
+ return ["-f", "f32le", "-ar", String(this.properties.sampleRate), "-ac", String(this.ffmpegChannels), "pipe:1"];
2260
+ }
2261
+ };
2262
+ var _ResampleNode = class _ResampleNode extends FfmpegNode {
2263
+ constructor() {
2264
+ super(...arguments);
2265
+ this.type = ["buffered-audio-node", "transform", "ffmpeg", "resample"];
2266
+ }
2267
+ static is(value) {
2268
+ return FfmpegNode.is(value) && value.type[3] === "resample";
2269
+ }
2270
+ createStream() {
2271
+ return new ResampleStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
2272
+ }
2273
+ clone(overrides) {
2274
+ return new _ResampleNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2275
+ }
2276
+ };
2277
+ _ResampleNode.moduleName = "Resample";
2278
+ _ResampleNode.moduleDescription = "Change sample rate";
2279
+ _ResampleNode.schema = schema16;
2280
+ var ResampleNode = _ResampleNode;
2281
+ function resample(ffmpegPath, sampleRate, options) {
2282
+ return new ResampleNode({
2283
+ ffmpegPath,
2284
+ sampleRate,
2285
+ dither: options?.dither,
2286
+ id: options?.id
2287
+ });
2288
+ }
2289
+ function computeBreathEnvelopes(channel, sampleRate, breathBandLow, breathBandHigh) {
2290
+ const frames = channel.length;
2291
+ const envSmooth = Math.round(sampleRate * 0.01);
2292
+ const wideband = new Float32Array(frames);
2293
+ const breathBand = new Float32Array(frames);
2294
+ for (let index = 0; index < frames; index++) {
2295
+ wideband[index] = (channel[index] ?? 0) ** 2;
2296
+ }
2297
+ const centerFreq = Math.sqrt(breathBandLow * breathBandHigh);
2298
+ const quality = centerFreq / (breathBandHigh - breathBandLow);
2299
+ const { fb, fa } = bandPassCoefficients(sampleRate, centerFreq, quality);
2300
+ const breathBandSignal = biquadFilter(channel, fb, fa);
2301
+ for (let index = 0; index < frames; index++) {
2302
+ breathBand[index] = (breathBandSignal[index] ?? 0) ** 2;
2303
+ }
2304
+ const scratch = new Float32Array(frames);
2305
+ smoothEnvelope(wideband, envSmooth, scratch);
2306
+ smoothEnvelope(breathBand, envSmooth, scratch);
2307
+ for (let index = 0; index < frames; index++) {
2308
+ wideband[index] = Math.sqrt(wideband[index] ?? 0);
2309
+ breathBand[index] = Math.sqrt(breathBand[index] ?? 0);
2310
+ }
2311
+ return { wideband, breathBand };
2312
+ }
2313
+ function expandBreathRegions(regions, widebandEnvelope, speechThreshold) {
2314
+ const frames = widebandEnvelope.length;
2315
+ const noiseFloor = speechThreshold * 0.3;
2316
+ for (const region of regions) {
2317
+ while (region.start > 0 && (widebandEnvelope[region.start - 1] ?? 0) < speechThreshold) {
2318
+ region.start--;
2319
+ }
2320
+ while (region.end < frames && (widebandEnvelope[region.end] ?? 0) < speechThreshold) {
2321
+ region.end++;
2322
+ }
2323
+ while (region.start < region.end && (widebandEnvelope[region.start] ?? 0) < noiseFloor) {
2324
+ region.start++;
2325
+ }
2326
+ while (region.end > region.start && (widebandEnvelope[region.end - 1] ?? 0) < noiseFloor) {
2327
+ region.end--;
2328
+ }
2329
+ }
2330
+ }
2331
+ function buildGainEnvelope(regions, length, fadeInSamples, fadeOutSamples, targetGain) {
2332
+ const envelope = new Float32Array(length);
2333
+ envelope.fill(1);
2334
+ for (const region of regions) {
2335
+ for (let index = region.start; index < region.end; index++) {
2336
+ envelope[index] = targetGain;
2337
+ }
2338
+ for (let index = 0; index < fadeInSamples; index++) {
2339
+ const pos = region.start - fadeInSamples + index;
2340
+ if (pos >= 0 && pos < length) {
2341
+ const fade = (index + 1) / (fadeInSamples + 1);
2342
+ envelope[pos] = 1 + (targetGain - 1) * fade;
2343
+ }
2344
+ }
2345
+ for (let index = 0; index < fadeOutSamples; index++) {
2346
+ const pos = region.end + index;
2347
+ if (pos >= 0 && pos < length) {
2348
+ const fade = 1 - (index + 1) / (fadeOutSamples + 1);
2349
+ envelope[pos] = 1 + (targetGain - 1) * fade;
2350
+ }
2351
+ }
2352
+ }
2353
+ return envelope;
2354
+ }
2355
+
2356
+ // src/transforms/breath-control/utils/regions.ts
2357
+ function findRegions(mask, minDuration, length) {
2358
+ const regions = [];
2359
+ let regionStart = -1;
2360
+ for (let index = 0; index <= length; index++) {
2361
+ const active = index < length && (mask[index] ?? 0) > 0;
2362
+ if (active && regionStart === -1) {
2363
+ regionStart = index;
2364
+ } else if (!active && regionStart !== -1) {
2365
+ if (index - regionStart >= minDuration) {
2366
+ regions.push({ start: regionStart, end: index });
2367
+ }
2368
+ regionStart = -1;
2369
+ }
2370
+ }
2371
+ return regions;
2372
+ }
2373
+
2374
+ // src/transforms/breath-control/index.ts
2375
+ var schema17 = z.object({
2376
+ sensitivity: z.number().min(0).max(1).multipleOf(0.01).default(0.5).describe("Sensitivity"),
2377
+ reduction: z.number().min(-60).max(0).multipleOf(1).default(-12).describe("Reduction"),
2378
+ mode: z.enum(["remove", "attenuate"]).default("attenuate").describe("Mode")
2379
+ });
2380
+ var BreathControlStream = class extends BufferedTransformStream {
2381
+ async _process(buffer) {
2382
+ const frames = buffer.frames;
2383
+ const channels = buffer.channels;
2384
+ const sampleRate = this.sampleRate ?? 44100;
2385
+ const { sensitivity, reduction, mode } = this.properties;
2386
+ const gainDb = mode === "remove" ? -96 : reduction;
2387
+ const gainLinear = Math.pow(10, gainDb / 20);
2388
+ const allAudio = await buffer.read(0, frames);
2389
+ const channel = allAudio.samples[0];
2390
+ if (!channel) return;
2391
+ const { wideband, breathBand } = computeBreathEnvelopes(channel, sampleRate, 1e3, 6e3);
2392
+ const speechThreshold = 0.015 * (1 - sensitivity * 0.5);
2393
+ const breathThreshold = 2e-3 * sensitivity;
2394
+ const isBreath = new Uint8Array(frames);
2395
+ for (let index = 0; index < frames; index++) {
2396
+ const isSpeechGap = (wideband[index] ?? 0) < speechThreshold;
2397
+ const isBreathy = (breathBand[index] ?? 0) > breathThreshold;
2398
+ isBreath[index] = isSpeechGap && isBreathy ? 1 : 0;
2399
+ }
2400
+ const minBreathDuration = Math.round(sampleRate * 0.08);
2401
+ const regions = findRegions(isBreath, minBreathDuration, frames);
2402
+ expandBreathRegions(regions, wideband, speechThreshold);
2403
+ const fadeLength = Math.round(sampleRate * 0.015);
2404
+ const gainEnvelope = buildGainEnvelope(regions, frames, fadeLength, fadeLength, gainLinear);
2405
+ for (let ch = 0; ch < channels; ch++) {
2406
+ const chData = allAudio.samples[ch];
2407
+ if (!chData) continue;
2408
+ for (let index = 0; index < frames; index++) {
2409
+ chData[index] = (chData[index] ?? 0) * (gainEnvelope[index] ?? 1);
2410
+ }
2411
+ }
2412
+ await buffer.truncate(0);
2413
+ await buffer.append(allAudio.samples);
2414
+ }
2415
+ };
2416
+ var _BreathControlNode = class _BreathControlNode extends TransformNode {
2417
+ constructor(properties) {
2418
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
2419
+ this.type = ["buffered-audio-node", "transform", "breath-control"];
2420
+ }
2421
+ static is(value) {
2422
+ return TransformNode.is(value) && value.type[2] === "breath-control";
2423
+ }
2424
+ createStream() {
2425
+ return new BreathControlStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
2426
+ }
2427
+ clone(overrides) {
2428
+ return new _BreathControlNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2429
+ }
2430
+ };
2431
+ _BreathControlNode.moduleName = "Breath Control";
2432
+ _BreathControlNode.moduleDescription = "Attenuate or remove breath sounds between phrases";
2433
+ _BreathControlNode.schema = schema17;
2434
+ var BreathControlNode = _BreathControlNode;
2435
+ function breathControl(options) {
2436
+ return new BreathControlNode({
2437
+ sensitivity: options?.sensitivity ?? 0.5,
2438
+ reduction: options?.reduction ?? -12,
2439
+ mode: options?.mode ?? "attenuate",
2440
+ id: options?.id
2441
+ });
2442
+ }
2443
+
2444
+ // src/transforms/de-bleed/utils/nlms.ts
2445
+ function nlmsAdaptiveFilter(signal, reference, filterLength, stepSize, output) {
2446
+ const frames = signal.length;
2447
+ const coeffs = new Float32Array(filterLength);
2448
+ let refPower = 0;
2449
+ for (let index = 0; index < frames; index++) {
2450
+ const newRef = index < reference.length ? reference[index] : 0;
2451
+ refPower += newRef * newRef;
2452
+ const droppedIndex = index - filterLength;
2453
+ if (droppedIndex >= 0 && droppedIndex < reference.length) {
2454
+ refPower -= reference[droppedIndex] * reference[droppedIndex];
2455
+ }
2456
+ if (refPower < 0) refPower = 0;
2457
+ let predicted = 0;
2458
+ for (let tap = 0; tap < filterLength; tap++) {
2459
+ const refIndex = index - tap;
2460
+ if (refIndex >= 0 && refIndex < reference.length) {
2461
+ predicted += coeffs[tap] * reference[refIndex];
2462
+ }
2463
+ }
2464
+ const error = signal[index] - predicted;
2465
+ output[index] = error;
2466
+ const mu = refPower > 1e-10 ? stepSize / (refPower + 1e-10) : 0;
2467
+ for (let tap = 0; tap < filterLength; tap++) {
2468
+ const refIndex = index - tap;
2469
+ if (refIndex >= 0 && refIndex < reference.length) {
2470
+ coeffs[tap] = coeffs[tap] + mu * error * reference[refIndex];
2471
+ }
2472
+ }
2473
+ }
2474
+ }
2475
+
2476
+ // src/transforms/de-bleed/index.ts
2477
+ var schema18 = z.object({
2478
+ referencePath: z.string().default("").describe("Reference Path"),
2479
+ filterLength: z.number().min(64).max(8192).multipleOf(64).default(1024).describe("Filter Length"),
2480
+ stepSize: z.number().min(1e-3).max(1).multipleOf(1e-3).default(0.1).describe("Step Size")
2481
+ });
2482
+ var DeBleedStream = class extends BufferedTransformStream {
2483
+ async _setup(input, context) {
2484
+ const { buffer: refBuffer } = await readToBuffer(this.properties.referencePath);
2485
+ const chunk = await refBuffer.read(0, refBuffer.frames);
2486
+ const channel = chunk.samples[0];
2487
+ this.referenceSignal = channel ? Float32Array.from(channel) : new Float32Array(0);
2488
+ await refBuffer.close();
2489
+ return super._setup(input, context);
2490
+ }
2491
+ async _process(buffer) {
2492
+ const frames = buffer.frames;
2493
+ const channels = buffer.channels;
2494
+ const { filterLength, stepSize } = this.properties;
2495
+ const reference = this.referenceSignal;
2496
+ const output = new Float32Array(frames);
2497
+ for (let ch = 0; ch < channels; ch++) {
2498
+ const chunk = await buffer.read(0, frames);
2499
+ const channel = chunk.samples[ch];
2500
+ if (!channel) continue;
2501
+ output.fill(0);
2502
+ nlmsAdaptiveFilter(channel, reference, filterLength, stepSize, output);
2503
+ await buffer.write(0, replaceChannel(chunk, ch, output, channels));
2504
+ }
2505
+ }
2506
+ };
2507
+ var _DeBleedNode = class _DeBleedNode extends TransformNode {
2508
+ constructor(properties) {
2509
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
2510
+ this.type = ["buffered-audio-node", "transform", "de-bleed"];
2511
+ }
2512
+ static is(value) {
2513
+ return TransformNode.is(value) && value.type[2] === "de-bleed";
2514
+ }
2515
+ createStream() {
2516
+ return new DeBleedStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
2517
+ }
2518
+ clone(overrides) {
2519
+ return new _DeBleedNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2520
+ }
2521
+ };
2522
+ _DeBleedNode.moduleName = "De-Bleed";
2523
+ _DeBleedNode.moduleDescription = "Reduce microphone bleed between channels";
2524
+ _DeBleedNode.schema = schema18;
2525
+ var DeBleedNode = _DeBleedNode;
2526
+ function deBleed(referencePath, options) {
2527
+ return new DeBleedNode({
2528
+ referencePath,
2529
+ filterLength: options?.filterLength ?? 1024,
2530
+ stepSize: options?.stepSize ?? 0.1,
2531
+ id: options?.id
2532
+ });
2533
+ }
2534
+ function detectClickMask(signal, sampleRate, sensitivity, maxClickDuration) {
2535
+ const mask = new Uint8Array(signal.length);
2536
+ const hpCutoff = 4e3;
2537
+ const rc = 1 / (2 * Math.PI * hpCutoff);
2538
+ const dt = 1 / sampleRate;
2539
+ const alpha = rc / (rc + dt);
2540
+ const highPassed = new Float32Array(signal.length);
2541
+ let prevSample = 0;
2542
+ let prevHP = 0;
2543
+ for (let index = 0; index < signal.length; index++) {
2544
+ const sample = signal[index] ?? 0;
2545
+ highPassed[index] = alpha * (prevHP + sample - prevSample);
2546
+ prevSample = sample;
2547
+ prevHP = highPassed[index] ?? 0;
2548
+ }
2549
+ const envSmooth = Math.round(sampleRate * 5e-4);
2550
+ const envelope = new Float32Array(signal.length);
2551
+ for (let index = 0; index < signal.length; index++) {
2552
+ envelope[index] = (highPassed[index] ?? 0) * (highPassed[index] ?? 0);
2553
+ }
2554
+ smoothEnvelope(envelope, envSmooth);
2555
+ for (let index = 0; index < signal.length; index++) {
2556
+ envelope[index] = Math.sqrt(envelope[index] ?? 0);
2557
+ }
2558
+ const median = approximateMedian(envelope);
2559
+ const threshold = median * (5 + 20 * (1 - sensitivity));
2560
+ for (let index = 0; index < signal.length; index++) {
2561
+ if ((envelope[index] ?? 0) > threshold) {
2562
+ mask[index] = 1;
2563
+ }
2564
+ }
2565
+ let regionStart = -1;
2566
+ for (let index = 0; index <= signal.length; index++) {
2567
+ const active = index < signal.length && (mask[index] ?? 0) > 0;
2568
+ if (active && regionStart === -1) {
2569
+ regionStart = index;
2570
+ } else if (!active && regionStart !== -1) {
2571
+ if (index - regionStart > maxClickDuration) {
2572
+ for (let clear = regionStart; clear < index; clear++) {
2573
+ mask[clear] = 0;
2574
+ }
2575
+ }
2576
+ regionStart = -1;
2577
+ }
2578
+ }
2579
+ return mask;
2580
+ }
2581
+ function buildBlendEnvelope(mask, length, fadeSamples) {
2582
+ const envelope = new Float32Array(length);
2583
+ for (let index = 0; index < length; index++) {
2584
+ if ((mask[index] ?? 0) > 0) {
2585
+ envelope[index] = 1;
2586
+ }
2587
+ }
2588
+ for (let index = 0; index < length; index++) {
2589
+ if ((mask[index] ?? 0) === 0) continue;
2590
+ const start = index;
2591
+ let end = index;
2592
+ while (end < length && (mask[end] ?? 0) > 0) {
2593
+ end++;
2594
+ }
2595
+ for (let fade = 0; fade < fadeSamples; fade++) {
2596
+ const pos = start - fadeSamples + fade;
2597
+ if (pos >= 0 && (envelope[pos] ?? 0) < 1) {
2598
+ const fadeIn = (fade + 1) / (fadeSamples + 1);
2599
+ envelope[pos] = Math.max(envelope[pos] ?? 0, fadeIn);
2600
+ }
2601
+ }
2602
+ for (let fade = 0; fade < fadeSamples; fade++) {
2603
+ const pos = end + fade;
2604
+ if (pos < length && (envelope[pos] ?? 0) < 1) {
2605
+ const fadeOut = 1 - (fade + 1) / (fadeSamples + 1);
2606
+ envelope[pos] = Math.max(envelope[pos] ?? 0, fadeOut);
2607
+ }
2608
+ }
2609
+ index = end - 1;
2610
+ }
2611
+ return envelope;
2612
+ }
2613
+ function approximateMedian(values) {
2614
+ const len = values.length;
2615
+ if (len === 0) return 0;
2616
+ let min = values[0] ?? 0;
2617
+ let max = values[0] ?? 0;
2618
+ for (let si = 1; si < len; si++) {
2619
+ const sample = values[si] ?? 0;
2620
+ if (sample < min) min = sample;
2621
+ if (sample > max) max = sample;
2622
+ }
2623
+ if (min === max) return min;
2624
+ const numBins = 1024;
2625
+ const bins = new Uint32Array(numBins);
2626
+ const scale = (numBins - 1) / (max - min);
2627
+ for (let si = 0; si < len; si++) {
2628
+ const bin = Math.floor(((values[si] ?? 0) - min) * scale);
2629
+ bins[bin] = (bins[bin] ?? 0) + 1;
2630
+ }
2631
+ const target = len >>> 1;
2632
+ let count = 0;
2633
+ for (let bi = 0; bi < numBins; bi++) {
2634
+ count += bins[bi] ?? 0;
2635
+ if (count > target) {
2636
+ return min + (bi + 0.5) / scale;
2637
+ }
2638
+ }
2639
+ return max;
2640
+ }
2641
+
2642
+ // src/transforms/de-click/index.ts
2643
+ var schema19 = z.object({
2644
+ sensitivity: z.number().min(0).max(1).multipleOf(0.01).default(0.5).describe("Sensitivity"),
2645
+ maxClickDuration: z.number().min(1).max(1e3).multipleOf(1).default(200).describe("Max Click Duration")
2646
+ });
2647
+ var DeClickStream = class extends BufferedTransformStream {
2648
+ async _process(buffer) {
2649
+ const frames = buffer.frames;
2650
+ const channels = buffer.channels;
2651
+ const sampleRate = this.sampleRate ?? 44100;
2652
+ const { sensitivity, maxClickDuration } = this.properties;
2653
+ const allAudio = await buffer.read(0, frames);
2654
+ const refChannel = allAudio.samples[0];
2655
+ if (!refChannel) return;
2656
+ const clickMask = detectClickMask(refChannel, sampleRate, sensitivity, maxClickDuration);
2657
+ const fadeSamples = Math.round(sampleRate * 1e-3);
2658
+ const blendEnv = buildBlendEnvelope(clickMask, frames, fadeSamples);
2659
+ let hasClicks = false;
2660
+ for (let index = 0; index < frames; index++) {
2661
+ if ((blendEnv[index] ?? 0) > 0) {
2662
+ hasClicks = true;
2663
+ break;
2664
+ }
2665
+ }
2666
+ if (!hasClicks) return;
2667
+ const lpfCutoff = 2500;
2668
+ const lpfCoeffs = lowPassCoefficients(sampleRate, lpfCutoff);
2669
+ for (let ch = 0; ch < channels; ch++) {
2670
+ const channel = allAudio.samples[ch];
2671
+ if (!channel) continue;
2672
+ const filtered = Float32Array.from(channel);
2673
+ zeroPhaseBiquadFilter(filtered, lpfCoeffs);
2674
+ for (let index = 0; index < frames; index++) {
2675
+ const blend = blendEnv[index] ?? 0;
2676
+ if (blend > 0) {
2677
+ channel[index] = (channel[index] ?? 0) * (1 - blend) + (filtered[index] ?? 0) * blend;
2678
+ }
2679
+ }
2680
+ }
2681
+ await buffer.truncate(0);
2682
+ await buffer.append(allAudio.samples);
2683
+ }
2684
+ };
2685
+ var _DeClickNode = class _DeClickNode extends TransformNode {
2686
+ constructor(properties) {
2687
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties, ...schema19.encode(properties) });
2688
+ this.type = ["buffered-audio-node", "transform", "de-click"];
2689
+ }
2690
+ static is(value) {
2691
+ return TransformNode.is(value) && value.type[2] === "de-click";
2692
+ }
2693
+ createStream() {
2694
+ return new DeClickStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
2695
+ }
2696
+ clone(overrides) {
2697
+ return new _DeClickNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2698
+ }
2699
+ };
2700
+ _DeClickNode.moduleName = "De-Click";
2701
+ _DeClickNode.moduleDescription = "Remove clicks, pops, and impulse artifacts";
2702
+ _DeClickNode.schema = schema19;
2703
+ var DeClickNode = _DeClickNode;
2704
+ function deClick(options) {
2705
+ const parsed = schema19.parse(options ?? {});
2706
+ return new DeClickNode({ ...parsed, id: options?.id });
2707
+ }
2708
+ var schema20 = z.object({
2709
+ sensitivity: z.number().min(0).max(1).multipleOf(0.01).default(0.5).describe("Sensitivity"),
2710
+ maxClickDuration: z.number().min(1).max(1e3).multipleOf(1).default(20).describe("Max Click Duration")
2711
+ });
2712
+ var _DeCrackleNode = class _DeCrackleNode extends DeClickNode {
2713
+ constructor(properties) {
2714
+ super({ ...properties, ...schema20.encode(properties) });
2715
+ this.type = ["buffered-audio-node", "transform", "de-click", "de-crackle"];
2716
+ }
2717
+ static is(value) {
2718
+ return DeClickNode.is(value) && value.type[3] === "de-crackle";
2719
+ }
2720
+ clone(overrides) {
2721
+ return new _DeCrackleNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2722
+ }
2723
+ };
2724
+ _DeCrackleNode.moduleName = "De-Crackle";
2725
+ _DeCrackleNode.schema = schema20;
2726
+ var DeCrackleNode = _DeCrackleNode;
2727
+ function deCrackle(options) {
2728
+ const parsed = schema20.parse(options ?? {});
2729
+ return new DeCrackleNode({ ...parsed, id: options?.id });
2730
+ }
2731
+ var mouthDeClickSchema = z.object({
2732
+ sensitivity: z.number().min(0).max(1).multipleOf(0.01).default(0.7).describe("Sensitivity"),
2733
+ maxClickDuration: z.number().min(1).max(1e3).multipleOf(1).default(50).describe("Max Click Duration")
2734
+ });
2735
+ var _MouthDeClickNode = class _MouthDeClickNode extends DeClickNode {
2736
+ constructor(properties) {
2737
+ super({ ...properties, ...mouthDeClickSchema.encode(properties) });
2738
+ this.type = ["buffered-audio-node", "transform", "de-click", "mouth-de-click"];
2739
+ }
2740
+ static is(value) {
2741
+ return DeClickNode.is(value) && value.type[3] === "mouth-de-click";
2742
+ }
2743
+ clone(overrides) {
2744
+ return new _MouthDeClickNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2745
+ }
2746
+ };
2747
+ _MouthDeClickNode.moduleName = "Mouth De-Click";
2748
+ _MouthDeClickNode.schema = mouthDeClickSchema;
2749
+ var MouthDeClickNode = _MouthDeClickNode;
2750
+ function mouthDeClick(options) {
2751
+ const parsed = mouthDeClickSchema.parse(options ?? {});
2752
+ return new MouthDeClickNode({ ...parsed, id: options?.id });
2753
+ }
2754
+
2755
+ // src/transforms/de-clip/utils/clip-detection.ts
2756
+ function detectClippedRegions(signal, threshold) {
2757
+ const regions = [];
2758
+ let regionStart = -1;
2759
+ for (let index = 0; index < signal.length; index++) {
2760
+ const isClipped = Math.abs(signal[index] ?? 0) >= threshold;
2761
+ if (isClipped && regionStart === -1) {
2762
+ regionStart = index;
2763
+ } else if (!isClipped && regionStart !== -1) {
2764
+ regions.push({ start: regionStart, end: index });
2765
+ regionStart = -1;
2766
+ }
2767
+ }
2768
+ if (regionStart !== -1) {
2769
+ regions.push({ start: regionStart, end: signal.length });
2770
+ }
2771
+ return regions;
2772
+ }
2773
+ function reconstructClippedRegion(signal, start, end, threshold) {
2774
+ const arOrder = 16;
2775
+ const contextBefore = Math.max(0, start - arOrder * 4);
2776
+ const contextAfter = Math.min(signal.length, end + arOrder * 4);
2777
+ const contextSignal = signal.slice(contextBefore, contextAfter);
2778
+ const arCoeffs = fitArModelForDeclip(contextSignal, arOrder);
2779
+ const iterations = 5;
2780
+ const localStart = start - contextBefore;
2781
+ const localEnd = end - contextBefore;
2782
+ for (let iter = 0; iter < iterations; iter++) {
2783
+ for (let index = localStart; index < localEnd; index++) {
2784
+ let predicted = 0;
2785
+ for (let coeff = 0; coeff < arOrder; coeff++) {
2786
+ const sampleIdx = index - 1 - coeff;
2787
+ if (sampleIdx >= 0) {
2788
+ predicted += (arCoeffs[coeff] ?? 0) * (contextSignal[sampleIdx] ?? 0);
2789
+ }
2790
+ }
2791
+ const sign = (contextSignal[index] ?? 0) >= 0 ? 1 : -1;
2792
+ const constrained = Math.abs(predicted) >= threshold ? predicted : sign * threshold;
2793
+ contextSignal[index] = constrained;
2794
+ }
2795
+ }
2796
+ for (let index = localStart; index < localEnd; index++) {
2797
+ signal[contextBefore + index] = contextSignal[index] ?? 0;
2798
+ }
2799
+ }
2800
+ function fitArModelForDeclip(signal, order) {
2801
+ const autocorr = new Float32Array(order + 1);
2802
+ for (let lag = 0; lag <= order; lag++) {
2803
+ let sum = 0;
2804
+ for (let index = lag; index < signal.length; index++) {
2805
+ sum += (signal[index] ?? 0) * (signal[index - lag] ?? 0);
2806
+ }
2807
+ autocorr[lag] = sum / signal.length;
2808
+ }
2809
+ return levinsonDurbin(autocorr, order);
2810
+ }
2811
+ function levinsonDurbin(autocorr, order) {
2812
+ const coeffs = new Float32Array(order);
2813
+ const prev = new Float32Array(order);
2814
+ const r0 = autocorr[0] ?? 1;
2815
+ if (r0 === 0) return coeffs;
2816
+ const firstCoeff = (autocorr[1] ?? 0) / r0;
2817
+ coeffs[0] = firstCoeff;
2818
+ let error = r0 * (1 - firstCoeff * firstCoeff);
2819
+ for (let step = 1; step < order; step++) {
2820
+ let lambda = 0;
2821
+ for (let index = 0; index < step; index++) {
2822
+ lambda += (coeffs[index] ?? 0) * (autocorr[step - index] ?? 0);
2823
+ }
2824
+ lambda = ((autocorr[step + 1] ?? 0) - lambda) / Math.max(error, 1e-10);
2825
+ prev.set(coeffs);
2826
+ for (let index = 0; index < step; index++) {
2827
+ coeffs[index] = (prev[index] ?? 0) - lambda * (prev[step - 1 - index] ?? 0);
2828
+ }
2829
+ coeffs[step] = lambda;
2830
+ error *= 1 - lambda * lambda;
2831
+ if (error <= 0) break;
2832
+ }
2833
+ return coeffs;
2834
+ }
2835
+
2836
+ // src/transforms/de-clip/index.ts
2837
+ var schema21 = z.object({
2838
+ threshold: z.number().min(0).max(1).multipleOf(0.01).default(0.99).describe("Threshold"),
2839
+ method: z.enum(["ar", "sparse"]).default("ar").describe("Method")
2840
+ });
2841
+ var DeClipStream = class extends BufferedTransformStream {
2842
+ _buffer(chunk, buffer) {
2843
+ if (this.bufferSize === 0) {
2844
+ this.bufferSize = Math.round(chunk.sampleRate * 0.05);
2845
+ }
2846
+ return super._buffer(chunk, buffer);
2847
+ }
2848
+ async _process(buffer) {
2849
+ for await (const chunk of buffer.iterate(this.bufferSize)) {
2850
+ const samples = chunk.samples.map((channel) => {
2851
+ const output = new Float32Array(channel);
2852
+ const regions = detectClippedRegions(channel, this.properties.threshold);
2853
+ for (const region of regions) {
2854
+ reconstructClippedRegion(output, region.start, region.end, this.properties.threshold);
2855
+ }
2856
+ return output;
2857
+ });
2858
+ await buffer.write(chunk.offset, samples);
2859
+ }
2860
+ }
2861
+ };
2862
+ var _DeClipNode = class _DeClipNode extends TransformNode {
2863
+ constructor() {
2864
+ super(...arguments);
2865
+ this.type = ["buffered-audio-node", "transform", "de-clip"];
2866
+ }
2867
+ static is(value) {
2868
+ return TransformNode.is(value) && value.type[2] === "de-clip";
2869
+ }
2870
+ createStream() {
2871
+ return new DeClipStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
2872
+ }
2873
+ clone(overrides) {
2874
+ return new _DeClipNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2875
+ }
2876
+ };
2877
+ _DeClipNode.moduleName = "De-Clip";
2878
+ _DeClipNode.moduleDescription = "Restore clipped audio peaks";
2879
+ _DeClipNode.schema = schema21;
2880
+ var DeClipNode = _DeClipNode;
2881
+ function deClip(options) {
2882
+ const parsed = schema21.parse(options ?? {});
2883
+ return new DeClipNode({ ...parsed, id: options?.id });
2884
+ }
2885
+
2886
+ // src/transforms/de-plosive/utils/plosive.ts
2887
+ function detectPlosive(channel, cutoffCoeff, threshold, initialLpState) {
2888
+ let lpVal = initialLpState;
2889
+ let lowEnergy = 0;
2890
+ let totalEnergy = 0;
2891
+ for (const sample of channel) {
2892
+ lpVal = lpVal * cutoffCoeff + sample * (1 - cutoffCoeff);
2893
+ lowEnergy += lpVal * lpVal;
2894
+ totalEnergy += sample * sample;
2895
+ }
2896
+ const lowRatio = totalEnergy > 0 ? lowEnergy / totalEnergy : 0;
2897
+ const isPlosive = lowRatio > 0.5 && Math.sqrt(lowEnergy / channel.length) > threshold;
2898
+ return { isPlosive, lpState: lpVal };
2899
+ }
2900
+ function removePlosive(channel, cutoffCoeff, initialLpState, fadeLength) {
2901
+ const output = new Float32Array(channel.length);
2902
+ let lpVal = initialLpState;
2903
+ for (let index = 0; index < channel.length; index++) {
2904
+ const sample = channel[index] ?? 0;
2905
+ lpVal = lpVal * cutoffCoeff + sample * (1 - cutoffCoeff);
2906
+ const filtered = sample - lpVal * 0.8;
2907
+ let fade = 1;
2908
+ if (index < fadeLength) {
2909
+ fade = index / fadeLength;
2910
+ } else if (index > channel.length - fadeLength) {
2911
+ fade = (channel.length - index) / fadeLength;
2912
+ }
2913
+ output[index] = sample * (1 - fade) + filtered * fade;
2914
+ }
2915
+ return output;
2916
+ }
2917
+
2918
+ // src/transforms/de-plosive/index.ts
2919
+ var schema22 = z.object({
2920
+ sensitivity: z.number().min(0).max(1).multipleOf(0.01).default(0.5).describe("Sensitivity"),
2921
+ frequency: z.number().min(50).max(500).multipleOf(10).default(200).describe("Frequency")
2922
+ });
2923
+ var DePlosiveStream = class extends BufferedTransformStream {
2924
+ constructor() {
2925
+ super(...arguments);
2926
+ this.lpState = [];
2927
+ }
2928
+ _buffer(chunk, buffer) {
2929
+ if (this.bufferSize === 0) {
2930
+ const blockSize = Math.round(chunk.sampleRate * 0.02);
2931
+ this.bufferSize = blockSize;
2932
+ this.streamChunkSize = blockSize;
2933
+ }
2934
+ return super._buffer(chunk, buffer);
2935
+ }
2936
+ _unbuffer(chunk) {
2937
+ const { sensitivity, frequency } = this.properties;
2938
+ const cutoffCoeff = Math.exp(-2 * Math.PI * frequency / chunk.sampleRate);
2939
+ const threshold = 0.1 * (1 - sensitivity);
2940
+ while (this.lpState.length < chunk.samples.length) {
2941
+ this.lpState.push(0);
2942
+ }
2943
+ const samples = chunk.samples.map((channel, ch) => {
2944
+ const detection = detectPlosive(channel, cutoffCoeff, threshold, this.lpState[ch] ?? 0);
2945
+ this.lpState[ch] = detection.lpState;
2946
+ if (detection.isPlosive) {
2947
+ const fadeLength = Math.min(channel.length, Math.round(chunk.sampleRate * 5e-3));
2948
+ return removePlosive(channel, cutoffCoeff, detection.lpState, fadeLength);
2949
+ }
2950
+ return Float32Array.from(channel);
2951
+ });
2952
+ return { samples, offset: chunk.offset, sampleRate: chunk.sampleRate, bitDepth: chunk.bitDepth };
2953
+ }
2954
+ };
2955
+ var _DePlosiveNode = class _DePlosiveNode extends TransformNode {
2956
+ constructor() {
2957
+ super(...arguments);
2958
+ this.type = ["buffered-audio-node", "transform", "de-plosive"];
2959
+ }
2960
+ static is(value) {
2961
+ return TransformNode.is(value) && value.type[2] === "de-plosive";
2962
+ }
2963
+ createStream() {
2964
+ return new DePlosiveStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
2965
+ }
2966
+ clone(overrides) {
2967
+ return new _DePlosiveNode({ ...this.properties, previousProperties: this.properties, ...overrides });
2968
+ }
2969
+ };
2970
+ _DePlosiveNode.moduleName = "De-Plosive";
2971
+ _DePlosiveNode.moduleDescription = "Reduce plosive bursts (p, b, t, d sounds)";
2972
+ _DePlosiveNode.schema = schema22;
2973
+ var DePlosiveNode = _DePlosiveNode;
2974
+ function dePlosive(options) {
2975
+ return new DePlosiveNode({
2976
+ sensitivity: options?.sensitivity ?? 0.5,
2977
+ frequency: options?.frequency ?? 200,
2978
+ id: options?.id
2979
+ });
2980
+ }
2981
+
2982
+ // src/transforms/de-reverb/utils/transpose.ts
2983
+ function transposeToBinMajor(stftReal, stftImag, numFrames, numBins, realT, imagT) {
2984
+ for (let frame = 0; frame < numFrames; frame++) {
2985
+ const re = stftReal[frame];
2986
+ const im = stftImag[frame];
2987
+ if (!re || !im) continue;
2988
+ for (let bin = 0; bin < numBins; bin++) {
2989
+ realT[bin * numFrames + frame] = re[bin];
2990
+ imagT[bin * numFrames + frame] = im[bin];
2991
+ }
2992
+ }
2993
+ }
2994
+ function transposeToFrameMajor(realT, imagT, stftReal, stftImag, numFrames, numBins) {
2995
+ for (let frame = 0; frame < numFrames; frame++) {
2996
+ const re = stftReal[frame];
2997
+ const im = stftImag[frame];
2998
+ if (!re || !im) continue;
2999
+ for (let bin = 0; bin < numBins; bin++) {
3000
+ re[bin] = realT[bin * numFrames + frame];
3001
+ im[bin] = imagT[bin * numFrames + frame];
3002
+ }
3003
+ }
3004
+ }
3005
+
3006
+ // src/transforms/de-reverb/utils/wpe.ts
3007
+ function computeBinPowerAndEnergy(realT, imagT, numBins, numFrames, powerT, binEnergy) {
3008
+ const usedSize = numBins * numFrames;
3009
+ for (let pos = 0; pos < usedSize; pos++) {
3010
+ powerT[pos] = Math.max(realT[pos] * realT[pos] + imagT[pos] * imagT[pos], 1e-10);
3011
+ }
3012
+ for (let bin = 0; bin < numBins; bin++) {
3013
+ const offset = bin * numFrames;
3014
+ let energy = 0;
3015
+ for (let frame = 0; frame < numFrames; frame++) {
3016
+ energy += powerT[offset + frame];
3017
+ }
3018
+ binEnergy[bin] = energy;
3019
+ }
3020
+ }
3021
+ function applyWpePrediction(realT, imagT, originalPowerT, binOffset, numFrames, predictionDelay, filterLen, filterReal, filterImag) {
3022
+ for (let frame = predictionDelay + filterLen; frame < numFrames; frame++) {
3023
+ let predR = 0;
3024
+ let predI = 0;
3025
+ for (let tap = 0; tap < filterLen; tap++) {
3026
+ const pastOffset = binOffset + frame - predictionDelay - tap - 1;
3027
+ const pR = realT[pastOffset];
3028
+ const pI = imagT[pastOffset];
3029
+ predR += filterReal[tap] * pR - filterImag[tap] * pI;
3030
+ predI += filterReal[tap] * pI + filterImag[tap] * pR;
3031
+ }
3032
+ const pos = binOffset + frame;
3033
+ const newR = realT[pos] - predR;
3034
+ const newI = imagT[pos] - predI;
3035
+ const newPow = newR * newR + newI * newI;
3036
+ const origPow = originalPowerT[pos];
3037
+ if (newPow > origPow) {
3038
+ const scale = Math.sqrt(origPow / newPow);
3039
+ realT[pos] = newR * scale;
3040
+ imagT[pos] = newI * scale;
3041
+ } else {
3042
+ realT[pos] = newR;
3043
+ imagT[pos] = newI;
3044
+ }
3045
+ }
3046
+ }
3047
+ function solveWpeFilter(realT, imagT, powerT, binOffset, numFrames, predictionDelay, filterLength, outReal, outImag, corrReal, corrImag, crossReal, crossImag, arWork, aiWork, brWork, biWork) {
3048
+ corrReal.fill(0);
3049
+ corrImag.fill(0);
3050
+ crossReal.fill(0);
3051
+ crossImag.fill(0);
3052
+ const filterLen = filterLength;
3053
+ const delay = predictionDelay;
3054
+ for (let frame = delay + filterLen; frame < numFrames; frame++) {
3055
+ const weight = 1 / powerT[binOffset + frame];
3056
+ const targetR = realT[binOffset + frame];
3057
+ const targetI = imagT[binOffset + frame];
3058
+ for (let tap1 = 0; tap1 < filterLen; tap1++) {
3059
+ const pastIdx1 = binOffset + frame - delay - tap1 - 1;
3060
+ const pR1 = realT[pastIdx1];
3061
+ const pI1 = imagT[pastIdx1];
3062
+ crossReal[tap1] = (crossReal[tap1] ?? 0) + weight * (pR1 * targetR + pI1 * targetI);
3063
+ crossImag[tap1] = (crossImag[tap1] ?? 0) + weight * (pR1 * targetI - pI1 * targetR);
3064
+ for (let tap2 = tap1; tap2 < filterLen; tap2++) {
3065
+ const pastIdx2 = binOffset + frame - delay - tap2 - 1;
3066
+ const pR2 = realT[pastIdx2];
3067
+ const pI2 = imagT[pastIdx2];
3068
+ corrReal[tap1 * filterLen + tap2] = (corrReal[tap1 * filterLen + tap2] ?? 0) + weight * (pR1 * pR2 + pI1 * pI2);
3069
+ corrImag[tap1 * filterLen + tap2] = (corrImag[tap1 * filterLen + tap2] ?? 0) + weight * (pR1 * pI2 - pI1 * pR2);
3070
+ }
3071
+ }
3072
+ }
3073
+ for (let tap1 = 1; tap1 < filterLen; tap1++) {
3074
+ for (let tap2 = 0; tap2 < tap1; tap2++) {
3075
+ corrReal[tap1 * filterLen + tap2] = corrReal[tap2 * filterLen + tap1];
3076
+ corrImag[tap1 * filterLen + tap2] = -corrImag[tap2 * filterLen + tap1];
3077
+ }
3078
+ }
3079
+ for (let tap = 0; tap < filterLen; tap++) {
3080
+ corrReal[tap * filterLen + tap] = (corrReal[tap * filterLen + tap] ?? 0) + 1e-6;
3081
+ }
3082
+ solveLinearSystem(corrReal, corrImag, crossReal, crossImag, filterLen, outReal, outImag, arWork, aiWork, brWork, biWork);
3083
+ }
3084
+ function solveLinearSystem(aReal, aImag, bReal, bImag, size, outReal, outImag, ar, ai, br, bi) {
3085
+ ar.set(aReal);
3086
+ ai.set(aImag);
3087
+ br.set(bReal);
3088
+ bi.set(bImag);
3089
+ for (let col = 0; col < size; col++) {
3090
+ let maxMag = 0;
3091
+ let maxRow = col;
3092
+ for (let row = col; row < size; row++) {
3093
+ const re = ar[row * size + col] ?? 0;
3094
+ const im = ai[row * size + col] ?? 0;
3095
+ const mag = re * re + im * im;
3096
+ if (mag > maxMag) {
3097
+ maxMag = mag;
3098
+ maxRow = row;
3099
+ }
3100
+ }
3101
+ if (maxMag < 1e-20) continue;
3102
+ if (maxRow !== col) {
3103
+ for (let sc = col; sc < size; sc++) {
3104
+ const tmpR = ar[col * size + sc] ?? 0;
3105
+ const tmpI = ai[col * size + sc] ?? 0;
3106
+ ar[col * size + sc] = ar[maxRow * size + sc] ?? 0;
3107
+ ai[col * size + sc] = ai[maxRow * size + sc] ?? 0;
3108
+ ar[maxRow * size + sc] = tmpR;
3109
+ ai[maxRow * size + sc] = tmpI;
3110
+ }
3111
+ const tmpBr = br[col] ?? 0;
3112
+ const tmpBi = bi[col] ?? 0;
3113
+ br[col] = br[maxRow] ?? 0;
3114
+ bi[col] = bi[maxRow] ?? 0;
3115
+ br[maxRow] = tmpBr;
3116
+ bi[maxRow] = tmpBi;
3117
+ }
3118
+ const pivR = ar[col * size + col] ?? 0;
3119
+ const pivI = ai[col * size + col] ?? 0;
3120
+ const pivMag2 = pivR * pivR + pivI * pivI;
3121
+ for (let row = col + 1; row < size; row++) {
3122
+ const elemR = ar[row * size + col] ?? 0;
3123
+ const elemI = ai[row * size + col] ?? 0;
3124
+ const factR = (elemR * pivR + elemI * pivI) / pivMag2;
3125
+ const factI = (elemI * pivR - elemR * pivI) / pivMag2;
3126
+ for (let ec = col + 1; ec < size; ec++) {
3127
+ const ajR = ar[col * size + ec] ?? 0;
3128
+ const ajI = ai[col * size + ec] ?? 0;
3129
+ ar[row * size + ec] = (ar[row * size + ec] ?? 0) - (factR * ajR - factI * ajI);
3130
+ ai[row * size + ec] = (ai[row * size + ec] ?? 0) - (factR * ajI + factI * ajR);
3131
+ }
3132
+ br[row] = (br[row] ?? 0) - (factR * (br[col] ?? 0) - factI * (bi[col] ?? 0));
3133
+ bi[row] = (bi[row] ?? 0) - (factR * (bi[col] ?? 0) + factI * (br[col] ?? 0));
3134
+ ar[row * size + col] = 0;
3135
+ ai[row * size + col] = 0;
3136
+ }
3137
+ }
3138
+ for (let row = size - 1; row >= 0; row--) {
3139
+ let sumR = br[row] ?? 0;
3140
+ let sumI = bi[row] ?? 0;
3141
+ for (let bc = row + 1; bc < size; bc++) {
3142
+ const ajR = ar[row * size + bc] ?? 0;
3143
+ const ajI = ai[row * size + bc] ?? 0;
3144
+ const xjR = outReal[bc] ?? 0;
3145
+ const xjI = outImag[bc] ?? 0;
3146
+ sumR -= ajR * xjR - ajI * xjI;
3147
+ sumI -= ajR * xjI + ajI * xjR;
3148
+ }
3149
+ const diagR = ar[row * size + row] ?? 0;
3150
+ const diagI = ai[row * size + row] ?? 0;
3151
+ const diagMag2 = diagR * diagR + diagI * diagI;
3152
+ if (diagMag2 > 1e-20) {
3153
+ outReal[row] = (sumR * diagR + sumI * diagI) / diagMag2;
3154
+ outImag[row] = (sumI * diagR - sumR * diagI) / diagMag2;
3155
+ }
3156
+ }
3157
+ }
3158
+
3159
+ // src/transforms/de-reverb/index.ts
3160
+ var schema23 = z.object({
3161
+ predictionDelay: z.number().min(1).max(10).multipleOf(1).default(4).describe("Prediction Delay"),
3162
+ filterLength: z.number().min(5).max(30).multipleOf(1).default(12).describe("Filter Length"),
3163
+ iterations: z.number().min(1).max(10).multipleOf(1).default(4).describe("Iterations"),
3164
+ vkfftAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "vkfft-addon", download: "https://github.com/visionsofparadise/vkfft-addon" }).describe("VkFFT native addon \u2014 GPU FFT acceleration"),
3165
+ fftwAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "fftw-addon", download: "https://github.com/visionsofparadise/fftw-addon" }).describe("FFTW native addon \u2014 CPU FFT acceleration")
3166
+ });
3167
+ var DeReverbStream = class extends BufferedTransformStream {
3168
+ async _setup(input, context) {
3169
+ const fft2 = initFftBackend(context.executionProviders, this.properties);
3170
+ this.fftBackend = fft2.backend;
3171
+ this.fftAddonOptions = fft2.addonOptions;
3172
+ return super._setup(input, context);
3173
+ }
3174
+ async _process(buffer) {
3175
+ const { frames, channels } = buffer;
3176
+ const fftSize = 1024;
3177
+ const hopSize = fftSize / 4;
3178
+ const numBins = fftSize / 2 + 1;
3179
+ const paddedLength = Math.max(frames, fftSize);
3180
+ const numStftFrames = Math.floor((paddedLength - fftSize) / hopSize) + 1;
3181
+ const stftOutput = {
3182
+ real: Array.from({ length: numStftFrames }, () => new Float32Array(numBins)),
3183
+ imag: Array.from({ length: numStftFrames }, () => new Float32Array(numBins))
3184
+ };
3185
+ const chunk = await buffer.read(0, frames);
3186
+ const flatSize = numBins * numStftFrames;
3187
+ const realT = new Float32Array(flatSize);
3188
+ const imagT = new Float32Array(flatSize);
3189
+ const originalPowerT = new Float32Array(flatSize);
3190
+ const iterPowerT = new Float32Array(flatSize);
3191
+ const binEnergy = new Float32Array(numBins);
3192
+ const { predictionDelay, filterLength, iterations } = this.properties;
3193
+ const filterLen = filterLength;
3194
+ const corrReal = new Float32Array(filterLen * filterLen);
3195
+ const corrImag = new Float32Array(filterLen * filterLen);
3196
+ const crossReal = new Float32Array(filterLen);
3197
+ const crossImag = new Float32Array(filterLen);
3198
+ const filterReal = new Float32Array(filterLen);
3199
+ const filterImag = new Float32Array(filterLen);
3200
+ const arWork = new Float32Array(filterLen * filterLen);
3201
+ const aiWork = new Float32Array(filterLen * filterLen);
3202
+ const brWork = new Float32Array(filterLen);
3203
+ const biWork = new Float32Array(filterLen);
3204
+ for (let ch = 0; ch < channels; ch++) {
3205
+ let channel = chunk.samples[ch];
3206
+ if (!channel) continue;
3207
+ if (channel.length < fftSize) {
3208
+ const padded = new Float32Array(fftSize);
3209
+ padded.set(channel);
3210
+ channel = padded;
3211
+ }
3212
+ const stftResult = stft(channel, fftSize, hopSize, stftOutput, this.fftBackend, this.fftAddonOptions);
3213
+ const numFrames = stftResult.frames;
3214
+ transposeToBinMajor(stftResult.real, stftResult.imag, numFrames, numBins, realT, imagT);
3215
+ computeBinPowerAndEnergy(realT, imagT, numBins, numFrames, originalPowerT, binEnergy);
3216
+ const meanEnergy = binEnergy.reduce((sum, sample) => sum + sample, 0) / numBins;
3217
+ const energyThreshold = meanEnergy * 1e-4;
3218
+ const usedSize = numBins * numFrames;
3219
+ for (let iter = 0; iter < iterations; iter++) {
3220
+ let powerT;
3221
+ if (iter === 0) {
3222
+ powerT = originalPowerT;
3223
+ } else {
3224
+ for (let ci = 0; ci < usedSize; ci++) {
3225
+ iterPowerT[ci] = Math.max(realT[ci] * realT[ci] + imagT[ci] * imagT[ci], 1e-10);
3226
+ }
3227
+ powerT = iterPowerT;
3228
+ }
3229
+ for (let bin = 0; bin < numBins; bin++) {
3230
+ if (binEnergy[bin] < energyThreshold) continue;
3231
+ const bo = bin * numFrames;
3232
+ filterReal.fill(0);
3233
+ filterImag.fill(0);
3234
+ solveWpeFilter(realT, imagT, powerT, bo, numFrames, predictionDelay, filterLen, filterReal, filterImag, corrReal, corrImag, crossReal, crossImag, arWork, aiWork, brWork, biWork);
3235
+ applyWpePrediction(realT, imagT, originalPowerT, bo, numFrames, predictionDelay, filterLen, filterReal, filterImag);
3236
+ }
3237
+ }
3238
+ transposeToFrameMajor(realT, imagT, stftResult.real, stftResult.imag, numFrames, numBins);
3239
+ const dereverberated = istft(stftResult, hopSize, paddedLength, this.fftBackend, this.fftAddonOptions).subarray(0, frames);
3240
+ await buffer.write(0, replaceChannel(chunk, ch, dereverberated, channels));
3241
+ }
3242
+ }
3243
+ };
3244
+ var _DeReverbNode = class _DeReverbNode extends TransformNode {
3245
+ constructor(properties) {
3246
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
3247
+ this.type = ["buffered-audio-node", "transform", "de-reverb"];
3248
+ }
3249
+ static is(value) {
3250
+ return TransformNode.is(value) && value.type[2] === "de-reverb";
3251
+ }
3252
+ createStream() {
3253
+ return new DeReverbStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
3254
+ }
3255
+ clone(overrides) {
3256
+ return new _DeReverbNode({ ...this.properties, previousProperties: this.properties, ...overrides });
3257
+ }
3258
+ };
3259
+ _DeReverbNode.moduleName = "De-Reverb (WPE)";
3260
+ _DeReverbNode.moduleDescription = "Reduce room reverb using Weighted Prediction Error \u2014 classical DSP, fully tunable, no model required";
3261
+ _DeReverbNode.schema = schema23;
3262
+ var DeReverbNode = _DeReverbNode;
3263
+ function deReverb(options) {
3264
+ const sensitivity = Math.max(0, Math.min(1, options?.sensitivity ?? 0.5));
3265
+ return new DeReverbNode({
3266
+ predictionDelay: options?.predictionDelay ?? Math.round(2 + (1 - sensitivity) * 4),
3267
+ filterLength: options?.filterLength ?? Math.round(5 + sensitivity * 15),
3268
+ iterations: options?.iterations ?? Math.round(2 + sensitivity * 4),
3269
+ vkfftAddonPath: options?.vkfftAddonPath ?? "",
3270
+ fftwAddonPath: options?.fftwAddonPath ?? "",
3271
+ id: options?.id
3272
+ });
3273
+ }
3274
+
3275
+ // src/utils/onnx-providers.ts
3276
+ function filterOnnxProviders(providers) {
3277
+ const filtered = providers.filter((ep) => ep !== "gpu" && ep !== "cpu-native");
3278
+ return filtered.length > 0 ? filtered : ["cpu"];
3279
+ }
3280
+ var require2 = createRequire(import.meta.url);
3281
+ function createOnnxSession(addonPath, modelPath, options) {
3282
+ let addon;
3283
+ try {
3284
+ addon = require2(addonPath);
3285
+ } catch (error) {
3286
+ throw new Error(`Failed to load ONNX Runtime addon from "${addonPath}": ${error instanceof Error ? error.message : String(error)}`);
3287
+ }
3288
+ let session;
3289
+ try {
3290
+ session = addon.createSession(modelPath, {
3291
+ executionProviders: options?.executionProviders ? [...options.executionProviders] : ["cuda", "cpu"]
3292
+ });
3293
+ } catch (error) {
3294
+ throw new Error(`Failed to create ONNX session for model "${modelPath}": ${error instanceof Error ? error.message : String(error)}`);
3295
+ }
3296
+ return {
3297
+ run(inputs) {
3298
+ return session.run(inputs);
3299
+ },
3300
+ dispose() {
3301
+ session.dispose();
3302
+ }
3303
+ };
3304
+ }
3305
+ var N_FFT = 7680;
3306
+ var HOP_SIZE = 1024;
3307
+ var DIM_F = 3072;
3308
+ var DIM_T = 256;
3309
+ var NB_BINS = N_FFT / 2 + 1;
3310
+ function stft7680IntoTensor(fft2, signal, tensor, realOffset, imagOffset) {
3311
+ const win = hanningWindow(N_FFT);
3312
+ const windowed = fft2.frameRe;
3313
+ const zeros = fft2.frameIm;
3314
+ zeros.fill(0);
3315
+ let frame = 0;
3316
+ for (let start = 0; start + N_FFT <= signal.length; start += HOP_SIZE) {
3317
+ for (let index = 0; index < N_FFT; index++) {
3318
+ windowed[index] = (signal[start + index] ?? 0) * (win[index] ?? 0);
3319
+ }
3320
+ fft2.fft(windowed, zeros, fft2.outRe, fft2.outIm);
3321
+ for (let freq = 0; freq < DIM_F; freq++) {
3322
+ tensor[realOffset + freq * DIM_T + frame] = fft2.outRe[freq] ?? 0;
3323
+ tensor[imagOffset + freq * DIM_T + frame] = fft2.outIm[freq] ?? 0;
3324
+ }
3325
+ frame++;
3326
+ }
3327
+ }
3328
+ function istft7680FromTensor(fft2, tensor, realOffset, imagOffset, numFrames, scale, output, windowSum) {
3329
+ const win = hanningWindow(N_FFT);
3330
+ const fullRe = fft2.frameRe;
3331
+ const fullIm = fft2.frameIm;
3332
+ const outputLength = output.length;
3333
+ for (let frame = 0; frame < numFrames; frame++) {
3334
+ fullRe.fill(0);
3335
+ fullIm.fill(0);
3336
+ for (let freq = 0; freq < DIM_F; freq++) {
3337
+ fullRe[freq] = (tensor[realOffset + freq * DIM_T + frame] ?? 0) * scale;
3338
+ fullIm[freq] = (tensor[imagOffset + freq * DIM_T + frame] ?? 0) * scale;
3339
+ }
3340
+ for (let index = 1; index < NB_BINS - 1; index++) {
3341
+ fullRe[N_FFT - index] = fullRe[index] ?? 0;
3342
+ fullIm[N_FFT - index] = -(fullIm[index] ?? 0);
3343
+ }
3344
+ fft2.ifft(fullRe, fullIm, fft2.outRe, fft2.outIm);
3345
+ const frameOffset = frame * HOP_SIZE;
3346
+ for (let index = 0; index < N_FFT; index++) {
3347
+ const pos = frameOffset + index;
3348
+ if (pos < outputLength) {
3349
+ const wt = win[index] ?? 0;
3350
+ output[pos] = (output[pos] ?? 0) + (fft2.outRe[index] ?? 0) * wt;
3351
+ windowSum[pos] = (windowSum[pos] ?? 0) + wt * wt;
3352
+ }
3353
+ }
3354
+ }
3355
+ for (let index = 0; index < outputLength; index++) {
3356
+ const ws = windowSum[index] ?? 0;
3357
+ if (ws > 1e-8) {
3358
+ output[index] = (output[index] ?? 0) / ws;
3359
+ }
3360
+ }
3361
+ }
3362
+
3363
+ // src/transforms/dialogue-isolate/utils/segment.ts
3364
+ var DIM_F2 = 3072;
3365
+ var DIM_T2 = 256;
3366
+ var CHANNEL_STRIDE = DIM_F2 * DIM_T2;
3367
+ function buildTransitionWindow(segmentLength, transitionPower) {
3368
+ const window = new Float32Array(segmentLength);
3369
+ const half = segmentLength / 2;
3370
+ for (let index = 0; index < half; index++) {
3371
+ const value = Math.pow((index + 1) / half, transitionPower);
3372
+ window[index] = value;
3373
+ window[segmentLength - 1 - index] = value;
3374
+ }
3375
+ return window;
3376
+ }
3377
+ function createSegmentWorkspace(segmentLength) {
3378
+ return {
3379
+ segLeft: new Float32Array(segmentLength),
3380
+ segRight: new Float32Array(segmentLength),
3381
+ inputData: new Float32Array(4 * CHANNEL_STRIDE),
3382
+ segOutLeft: new Float32Array(segmentLength),
3383
+ segOutRight: new Float32Array(segmentLength),
3384
+ istftWindowSum: new Float32Array(segmentLength)
3385
+ };
3386
+ }
3387
+ function processSegment(left, right, offset, chunkLen, isMono, workspace, fft2, session, compensate) {
3388
+ const { segLeft, segRight, inputData, segOutLeft, segOutRight, istftWindowSum } = workspace;
3389
+ segLeft.fill(0);
3390
+ for (let index = 0; index < chunkLen; index++) {
3391
+ segLeft[index] = left[offset + index] ?? 0;
3392
+ }
3393
+ inputData.fill(0);
3394
+ stft7680IntoTensor(fft2, segLeft, inputData, 0 * CHANNEL_STRIDE, 2 * CHANNEL_STRIDE);
3395
+ if (isMono) {
3396
+ inputData.copyWithin(1 * CHANNEL_STRIDE, 0 * CHANNEL_STRIDE, 1 * CHANNEL_STRIDE);
3397
+ inputData.copyWithin(3 * CHANNEL_STRIDE, 2 * CHANNEL_STRIDE, 3 * CHANNEL_STRIDE);
3398
+ } else {
3399
+ segRight.fill(0);
3400
+ for (let index = 0; index < chunkLen; index++) {
3401
+ segRight[index] = right[offset + index] ?? 0;
3402
+ }
3403
+ stft7680IntoTensor(fft2, segRight, inputData, 1 * CHANNEL_STRIDE, 3 * CHANNEL_STRIDE);
3404
+ }
3405
+ const result = session.run({
3406
+ input: { data: inputData, dims: [1, 4, DIM_F2, DIM_T2] }
3407
+ });
3408
+ const modelOutput = result.output;
3409
+ if (!modelOutput) return void 0;
3410
+ segOutLeft.fill(0);
3411
+ istftWindowSum.fill(0);
3412
+ istft7680FromTensor(fft2, modelOutput.data, 0 * CHANNEL_STRIDE, 2 * CHANNEL_STRIDE, DIM_T2, compensate, segOutLeft, istftWindowSum);
3413
+ if (isMono) {
3414
+ segOutRight.set(segOutLeft);
3415
+ } else {
3416
+ segOutRight.fill(0);
3417
+ istftWindowSum.fill(0);
3418
+ istft7680FromTensor(fft2, modelOutput.data, 1 * CHANNEL_STRIDE, 3 * CHANNEL_STRIDE, DIM_T2, compensate, segOutRight, istftWindowSum);
3419
+ }
3420
+ return { left: segOutLeft, right: segOutRight };
3421
+ }
3422
+ function normalizeOverlapAdd(output, weights, length) {
3423
+ for (let index = 0; index < length; index++) {
3424
+ const sw = weights[index] ?? 1;
3425
+ if (sw > 0) {
3426
+ output[index] = (output[index] ?? 0) / sw;
3427
+ }
3428
+ }
3429
+ }
3430
+
3431
+ // src/transforms/dialogue-isolate/index.ts
3432
+ var schema24 = z.object({
3433
+ modelPath: z.string().default("").meta({ input: "file", mode: "open", accept: ".onnx", binary: "Kim_Vocal_2", download: "https://huggingface.co/seanghay/uvr_models" }).describe("MDX-Net vocal isolation model (.onnx)"),
3434
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
3435
+ onnxAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "onnx-addon", download: "https://github.com/visionsofparadise/onnx-runtime-addon" }).describe("ONNX Runtime native addon"),
3436
+ highPass: z.number().min(20).max(500).multipleOf(10).default(80).describe("High Pass"),
3437
+ lowPass: z.number().min(1e3).max(22050).multipleOf(100).default(2e4).describe("Low Pass")
3438
+ });
3439
+ var SAMPLE_RATE = 44100;
3440
+ var N_FFT2 = 7680;
3441
+ var HOP_SIZE2 = 1024;
3442
+ var DIM_T3 = 256;
3443
+ var COMPENSATE = 1.009;
3444
+ var SEGMENT_SAMPLES = N_FFT2 + (DIM_T3 - 1) * HOP_SIZE2;
3445
+ var OVERLAP = 0.25;
3446
+ var TRANSITION_POWER = 1;
3447
+ var DialogueIsolateStream = class extends BufferedTransformStream {
3448
+ constructor(properties) {
3449
+ super(properties);
3450
+ this.fftInstance = new MixedRadixFft(N_FFT2);
3451
+ }
3452
+ async _setup(input, context) {
3453
+ this.session = createOnnxSession(this.properties.onnxAddonPath, this.properties.modelPath, { executionProviders: filterOnnxProviders(context.executionProviders) });
3454
+ return super._setup(input, context);
3455
+ }
3456
+ async _process(buffer) {
3457
+ const frames = buffer.frames;
3458
+ const channels = buffer.channels;
3459
+ const chunk = await buffer.read(0, frames);
3460
+ const left = chunk.samples[0] ?? new Float32Array(frames);
3461
+ const right = channels >= 2 ? chunk.samples[1] ?? left : left;
3462
+ const isMono = left === right;
3463
+ let left44k = left;
3464
+ let right44k = right;
3465
+ if ((this.sampleRate ?? 44100) !== SAMPLE_RATE) {
3466
+ const resampled = await resampleDirect(this.properties.ffmpegPath, [left, right], this.sampleRate ?? 44100, SAMPLE_RATE);
3467
+ left44k = resampled[0] ?? left;
3468
+ right44k = resampled[1] ?? right;
3469
+ }
3470
+ const samples44k = left44k.length;
3471
+ const stride = Math.round((1 - OVERLAP) * SEGMENT_SAMPLES);
3472
+ const outputLeft = new Float32Array(samples44k);
3473
+ const outputRight = new Float32Array(samples44k);
3474
+ const sumWeight = new Float32Array(samples44k);
3475
+ const weight = buildTransitionWindow(SEGMENT_SAMPLES, TRANSITION_POWER);
3476
+ const workspace = createSegmentWorkspace(SEGMENT_SAMPLES);
3477
+ for (let offset = 0; offset < samples44k; offset += stride) {
3478
+ const chunkLen = Math.min(SEGMENT_SAMPLES, samples44k - offset);
3479
+ const processed = processSegment(left44k, right44k, offset, chunkLen, isMono, workspace, this.fftInstance, this.session, COMPENSATE);
3480
+ if (!processed) continue;
3481
+ for (let index = 0; index < chunkLen; index++) {
3482
+ const wt = weight[index] ?? 1;
3483
+ outputLeft[offset + index] = (outputLeft[offset + index] ?? 0) + (processed.left[index] ?? 0) * wt;
3484
+ outputRight[offset + index] = (outputRight[offset + index] ?? 0) + (processed.right[index] ?? 0) * wt;
3485
+ sumWeight[offset + index] = (sumWeight[offset + index] ?? 0) + wt;
3486
+ }
3487
+ }
3488
+ normalizeOverlapAdd(outputLeft, sumWeight, samples44k);
3489
+ normalizeOverlapAdd(outputRight, sumWeight, samples44k);
3490
+ let finalLeft = outputLeft;
3491
+ let finalRight = outputRight;
3492
+ if ((this.sampleRate ?? 44100) !== SAMPLE_RATE) {
3493
+ const resampled = await resampleDirect(this.properties.ffmpegPath, [outputLeft, outputRight], SAMPLE_RATE, this.sampleRate ?? 44100);
3494
+ finalLeft = resampled[0] ?? outputLeft;
3495
+ finalRight = resampled[1] ?? outputRight;
3496
+ }
3497
+ const outputChannels = [];
3498
+ for (let ch = 0; ch < channels; ch++) {
3499
+ const out = new Float32Array(frames);
3500
+ const srcCh = Math.min(ch, 1);
3501
+ const src = srcCh === 0 ? finalLeft : finalRight;
3502
+ out.set(src.subarray(0, Math.min(src.length, frames)));
3503
+ outputChannels.push(out);
3504
+ }
3505
+ applyBandpass(outputChannels, this.sampleRate ?? 44100, this.properties.highPass, this.properties.lowPass);
3506
+ await buffer.write(0, outputChannels);
3507
+ }
3508
+ };
3509
+ var _DialogueIsolateNode = class _DialogueIsolateNode extends TransformNode {
3510
+ constructor(properties) {
3511
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
3512
+ this.type = ["buffered-audio-node", "transform", "dialogue-isolate"];
3513
+ }
3514
+ static is(value) {
3515
+ return TransformNode.is(value) && value.type[2] === "dialogue-isolate";
3516
+ }
3517
+ createStream() {
3518
+ return new DialogueIsolateStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
3519
+ }
3520
+ clone(overrides) {
3521
+ return new _DialogueIsolateNode({ ...this.properties, previousProperties: this.properties, ...overrides });
3522
+ }
3523
+ };
3524
+ _DialogueIsolateNode.moduleName = "Dialogue Isolate";
3525
+ _DialogueIsolateNode.moduleDescription = "Isolate dialogue from background using MDX-Net vocal separation";
3526
+ _DialogueIsolateNode.schema = schema24;
3527
+ var DialogueIsolateNode = _DialogueIsolateNode;
3528
+ function dialogueIsolate(options) {
3529
+ return new DialogueIsolateNode({
3530
+ modelPath: options.modelPath,
3531
+ ffmpegPath: options.ffmpegPath,
3532
+ onnxAddonPath: options.onnxAddonPath ?? "",
3533
+ highPass: options.highPass ?? 80,
3534
+ lowPass: options.lowPass ?? 2e4,
3535
+ id: options.id
3536
+ });
3537
+ }
3538
+ function computeAverageSpectrum(signal, _sampleRate) {
3539
+ const fftSize = 2048;
3540
+ const hopSize = fftSize / 4;
3541
+ const result = stft(signal, fftSize, hopSize);
3542
+ const halfSize = fftSize / 2 + 1;
3543
+ const avgMagnitude = new Float32Array(halfSize);
3544
+ for (let frame = 0; frame < result.frames; frame++) {
3545
+ const re = result.real[frame];
3546
+ const im = result.imag[frame];
3547
+ if (!re || !im) continue;
3548
+ for (let bin = 0; bin < halfSize; bin++) {
3549
+ const rVal = re[bin] ?? 0;
3550
+ const iVal = im[bin] ?? 0;
3551
+ avgMagnitude[bin] = (avgMagnitude[bin] ?? 0) + Math.sqrt(rVal * rVal + iVal * iVal);
3552
+ }
3553
+ }
3554
+ if (result.frames > 0) {
3555
+ for (let bin = 0; bin < halfSize; bin++) {
3556
+ avgMagnitude[bin] = (avgMagnitude[bin] ?? 0) / result.frames;
3557
+ }
3558
+ }
3559
+ return avgMagnitude;
3560
+ }
3561
+ function averageSpectrumFromStft(result, halfSize) {
3562
+ const avgMagnitude = new Float32Array(halfSize);
3563
+ for (let frame = 0; frame < result.frames; frame++) {
3564
+ const re = result.real[frame];
3565
+ const im = result.imag[frame];
3566
+ if (!re || !im) continue;
3567
+ for (let bin = 0; bin < halfSize; bin++) {
3568
+ const rVal = re[bin] ?? 0;
3569
+ const iVal = im[bin] ?? 0;
3570
+ avgMagnitude[bin] = (avgMagnitude[bin] ?? 0) + Math.sqrt(rVal * rVal + iVal * iVal);
3571
+ }
3572
+ }
3573
+ if (result.frames > 0) {
3574
+ for (let bin = 0; bin < halfSize; bin++) {
3575
+ avgMagnitude[bin] = (avgMagnitude[bin] ?? 0) / result.frames;
3576
+ }
3577
+ }
3578
+ return avgMagnitude;
3579
+ }
3580
+ function computeCorrection(reference, input, smoothingOctaves) {
3581
+ const size = Math.min(reference.length, input.length);
3582
+ const correctionDb = new Float32Array(size);
3583
+ for (let bin = 0; bin < size; bin++) {
3584
+ const refDb = 20 * Math.log10(Math.max(reference[bin] ?? 0, 1e-10));
3585
+ const inDb = 20 * Math.log10(Math.max(input[bin] ?? 0, 1e-10));
3586
+ correctionDb[bin] = refDb - inDb;
3587
+ }
3588
+ return smoothSpectrum(correctionDb, smoothingOctaves);
3589
+ }
3590
+ function smoothSpectrum(spectrum, octaves) {
3591
+ const smoothed = new Float32Array(spectrum.length);
3592
+ for (let bin = 1; bin < spectrum.length; bin++) {
3593
+ const lowerBin = Math.max(1, Math.round(bin / Math.pow(2, octaves / 2)));
3594
+ const upperBin = Math.min(spectrum.length - 1, Math.round(bin * Math.pow(2, octaves / 2)));
3595
+ let sum = 0;
3596
+ let count = 0;
3597
+ for (let neighbor = lowerBin; neighbor <= upperBin; neighbor++) {
3598
+ sum += spectrum[neighbor] ?? 0;
3599
+ count++;
3600
+ }
3601
+ smoothed[bin] = count > 0 ? sum / count : spectrum[bin] ?? 0;
3602
+ }
3603
+ smoothed[0] = spectrum[0] ?? 0;
3604
+ return smoothed;
3605
+ }
3606
+
3607
+ // src/transforms/eq-match/index.ts
3608
+ var schema25 = z.object({
3609
+ referencePath: z.string().default("").describe("Reference Path"),
3610
+ smoothing: z.number().min(0).max(1).multipleOf(0.01).default(1 / 3).describe("Smoothing"),
3611
+ vkfftAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "vkfft-addon", download: "https://github.com/visionsofparadise/vkfft-addon" }).describe("VkFFT native addon \u2014 GPU FFT acceleration"),
3612
+ fftwAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "fftw-addon", download: "https://github.com/visionsofparadise/fftw-addon" }).describe("FFTW native addon \u2014 CPU FFT acceleration")
3613
+ });
3614
+ var EqMatchStream = class extends BufferedTransformStream {
3615
+ async _setup(input, context) {
3616
+ const fft2 = initFftBackend(context.executionProviders, this.properties);
3617
+ this.fftBackend = fft2.backend;
3618
+ this.fftAddonOptions = fft2.addonOptions;
3619
+ const { buffer: refBuffer } = await readToBuffer(this.properties.referencePath);
3620
+ const refFrames = refBuffer.frames;
3621
+ const chunk = await refBuffer.read(0, refFrames);
3622
+ const channel = chunk.samples[0];
3623
+ if (channel) {
3624
+ this.referenceSpectrum = computeAverageSpectrum(channel, this.sampleRate ?? 44100);
3625
+ }
3626
+ await refBuffer.close();
3627
+ return super._setup(input, context);
3628
+ }
3629
+ async _process(buffer) {
3630
+ if (!this.referenceSpectrum) return;
3631
+ const frames = buffer.frames;
3632
+ const channels = buffer.channels;
3633
+ const fftSize = 2048;
3634
+ const hopSize = fftSize / 4;
3635
+ const halfSize = fftSize / 2 + 1;
3636
+ const paddedLength = Math.max(frames, fftSize);
3637
+ const numStftFrames = Math.floor((paddedLength - fftSize) / hopSize) + 1;
3638
+ const stftOutput = {
3639
+ real: Array.from({ length: numStftFrames }, () => new Float32Array(halfSize)),
3640
+ imag: Array.from({ length: numStftFrames }, () => new Float32Array(halfSize))
3641
+ };
3642
+ const chunk = await buffer.read(0, frames);
3643
+ for (let ch = 0; ch < channels; ch++) {
3644
+ let channel = chunk.samples[ch];
3645
+ if (!channel) continue;
3646
+ if (channel.length < fftSize) {
3647
+ const padded = new Float32Array(fftSize);
3648
+ padded.set(channel);
3649
+ channel = padded;
3650
+ }
3651
+ const stftResult = stft(channel, fftSize, hopSize, stftOutput, this.fftBackend, this.fftAddonOptions);
3652
+ const inputSpectrum = averageSpectrumFromStft(stftResult, halfSize);
3653
+ const correctionDb = computeCorrection(this.referenceSpectrum, inputSpectrum, this.properties.smoothing);
3654
+ const correctionLinear = correctionDb.map((db) => Math.pow(10, db / 20));
3655
+ for (let frame = 0; frame < stftResult.frames; frame++) {
3656
+ const realFrame = stftResult.real[frame];
3657
+ const imagFrame = stftResult.imag[frame];
3658
+ if (!realFrame || !imagFrame) continue;
3659
+ for (let bin = 0; bin < realFrame.length; bin++) {
3660
+ const correctionIdx = Math.min(bin, correctionLinear.length - 1);
3661
+ const gain = correctionLinear[correctionIdx] ?? 1;
3662
+ realFrame[bin] = (realFrame[bin] ?? 0) * gain;
3663
+ imagFrame[bin] = (imagFrame[bin] ?? 0) * gain;
3664
+ }
3665
+ }
3666
+ const matched = istft(stftResult, hopSize, paddedLength, this.fftBackend, this.fftAddonOptions).subarray(0, frames);
3667
+ await buffer.write(0, replaceChannel(chunk, ch, matched, channels));
3668
+ }
3669
+ }
3670
+ };
3671
+ var _EqMatchNode = class _EqMatchNode extends TransformNode {
3672
+ constructor(properties) {
3673
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
3674
+ this.type = ["buffered-audio-node", "transform", "eq-match"];
3675
+ }
3676
+ static is(value) {
3677
+ return TransformNode.is(value) && value.type[2] === "eq-match";
3678
+ }
3679
+ createStream() {
3680
+ return new EqMatchStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
3681
+ }
3682
+ clone(overrides) {
3683
+ return new _EqMatchNode({ ...this.properties, previousProperties: this.properties, ...overrides });
3684
+ }
3685
+ };
3686
+ _EqMatchNode.moduleName = "EQ Match";
3687
+ _EqMatchNode.moduleDescription = "Match frequency response to a reference profile";
3688
+ _EqMatchNode.schema = schema25;
3689
+ var EqMatchNode = _EqMatchNode;
3690
+ function eqMatch(referencePath, options) {
3691
+ return new EqMatchNode({
3692
+ referencePath,
3693
+ smoothing: options?.smoothing ?? 1 / 3,
3694
+ vkfftAddonPath: options?.vkfftAddonPath ?? "",
3695
+ fftwAddonPath: options?.fftwAddonPath ?? "",
3696
+ id: options?.id
3697
+ });
3698
+ }
3699
+
3700
+ // src/transforms/leveler/utils/rms.ts
3701
+ var GATE_THRESHOLD_DB = -60;
3702
+ function computeRms(samples) {
3703
+ let sum = 0;
3704
+ let count = 0;
3705
+ for (const channel of samples) {
3706
+ for (const sample of channel) {
3707
+ sum += sample * sample;
3708
+ count++;
3709
+ }
3710
+ }
3711
+ return count > 0 ? Math.sqrt(sum / count) : 0;
3712
+ }
3713
+ function computeTargetGain(rms, targetLoudness, maxGain, minGain) {
3714
+ const rmsDb = 20 * Math.log10(Math.max(rms, 1e-10));
3715
+ if (rmsDb <= GATE_THRESHOLD_DB) return void 0;
3716
+ const targetGainDb = targetLoudness - rmsDb;
3717
+ return Math.max(-minGain, Math.min(maxGain, targetGainDb));
3718
+ }
3719
+
3720
+ // src/transforms/leveler/index.ts
3721
+ var schema26 = z.object({
3722
+ target: z.number().min(-60).max(0).multipleOf(1).default(-20).describe("Target"),
3723
+ window: z.number().min(0.01).max(5).multipleOf(0.01).default(0.5).describe("Window"),
3724
+ speed: z.number().min(0.01).max(1).multipleOf(0.01).default(0.1).describe("Speed"),
3725
+ maxGain: z.number().min(0).max(40).multipleOf(1).default(12).describe("Max Gain"),
3726
+ maxCut: z.number().min(0).max(40).multipleOf(1).default(12).describe("Max Cut")
3727
+ });
3728
+ var LevelerStream = class extends BufferedTransformStream {
3729
+ constructor(properties) {
3730
+ super(properties);
3731
+ this.windowSamples = 0;
3732
+ this.currentGainDb = 0;
3733
+ this.windowSeconds = this.properties.window;
3734
+ }
3735
+ _buffer(chunk, buffer) {
3736
+ if (this.bufferSize === 0) {
3737
+ this.bufferSize = Math.round(chunk.sampleRate * this.properties.window);
3738
+ }
3739
+ return super._buffer(chunk, buffer);
3740
+ }
3741
+ _process(_buffer) {
3742
+ }
3743
+ _unbuffer(chunk) {
3744
+ if (this.windowSamples === 0) {
3745
+ this.windowSamples = Math.round(this.properties.window * chunk.sampleRate);
3746
+ }
3747
+ const { target, speed, maxGain, maxCut } = this.properties;
3748
+ const rms = computeRms(chunk.samples);
3749
+ const targetGainDb = computeTargetGain(rms, target, maxGain, maxCut);
3750
+ if (targetGainDb !== void 0) {
3751
+ const alpha = 1 - Math.exp(-1 / (speed * (this.windowSamples / this.windowSeconds)));
3752
+ this.currentGainDb += alpha * (targetGainDb - this.currentGainDb);
3753
+ }
3754
+ const gainLinear = Math.pow(10, this.currentGainDb / 20);
3755
+ const samples = chunk.samples.map((channel) => {
3756
+ const output = new Float32Array(channel.length);
3757
+ for (let index = 0; index < channel.length; index++) {
3758
+ output[index] = (channel[index] ?? 0) * gainLinear;
3759
+ }
3760
+ return output;
3761
+ });
3762
+ return { samples, offset: chunk.offset, sampleRate: chunk.sampleRate, bitDepth: chunk.bitDepth };
3763
+ }
3764
+ };
3765
+ var _LevelerNode = class _LevelerNode extends TransformNode {
3766
+ constructor() {
3767
+ super(...arguments);
3768
+ this.type = ["buffered-audio-node", "transform", "leveler"];
3769
+ }
3770
+ static is(value) {
3771
+ return TransformNode.is(value) && value.type[2] === "leveler";
3772
+ }
3773
+ createStream() {
3774
+ return new LevelerStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
3775
+ }
3776
+ clone(overrides) {
3777
+ return new _LevelerNode({ ...this.properties, previousProperties: this.properties, ...overrides });
3778
+ }
3779
+ };
3780
+ _LevelerNode.moduleName = "Leveler";
3781
+ _LevelerNode.moduleDescription = "Smooth volume variations for consistent loudness";
3782
+ _LevelerNode.schema = schema26;
3783
+ var LevelerNode = _LevelerNode;
3784
+ function leveler(options) {
3785
+ return new LevelerNode({
3786
+ target: options?.target ?? -20,
3787
+ window: options?.window ?? 0.5,
3788
+ speed: options?.speed ?? 0.1,
3789
+ maxGain: options?.maxGain ?? 12,
3790
+ maxCut: options?.maxCut ?? 12,
3791
+ id: options?.id
3792
+ });
3793
+ }
3794
+ var FFT_SIZE = 4096;
3795
+ var HOP_SIZE3 = 1024;
3796
+ function reflectPad(signal, padLeft, padRight, totalLen) {
3797
+ const result = new Float32Array(totalLen);
3798
+ result.set(signal, padLeft);
3799
+ for (let index = 0; index < padLeft; index++) {
3800
+ result[padLeft - 1 - index] = result[padLeft + index] ?? 0;
3801
+ }
3802
+ const signalEnd = padLeft + signal.length - 1;
3803
+ for (let index = 0; index < padRight; index++) {
3804
+ result[signalEnd + index + 1] = result[signalEnd - index] ?? 0;
3805
+ }
3806
+ return result;
3807
+ }
3808
+ function computeStftScaled(signal) {
3809
+ const scale = 1 / Math.sqrt(FFT_SIZE);
3810
+ const result = stft(signal, FFT_SIZE, HOP_SIZE3);
3811
+ for (const frame of result.real) {
3812
+ for (let index = 0; index < frame.length; index++) {
3813
+ frame[index] = (frame[index] ?? 0) * scale;
3814
+ }
3815
+ }
3816
+ for (const frame of result.imag) {
3817
+ for (let index = 0; index < frame.length; index++) {
3818
+ frame[index] = (frame[index] ?? 0) * scale;
3819
+ }
3820
+ }
3821
+ return result;
3822
+ }
3823
+ function computeIstftScaled(real, imag, outputLength) {
3824
+ const scale = Math.sqrt(FFT_SIZE);
3825
+ for (const frame of real) {
3826
+ for (let index = 0; index < frame.length; index++) {
3827
+ frame[index] = (frame[index] ?? 0) * scale;
3828
+ }
3829
+ }
3830
+ for (const frame of imag) {
3831
+ for (let index = 0; index < frame.length; index++) {
3832
+ frame[index] = (frame[index] ?? 0) * scale;
3833
+ }
3834
+ }
3835
+ return istft({ real, imag, frames: real.length, fftSize: FFT_SIZE }, HOP_SIZE3, outputLength);
3836
+ }
3837
+
3838
+ // src/transforms/music-rebalance/utils/stems.ts
3839
+ function normalizeAudio(left, right, frames) {
3840
+ const stereo = new Float32Array(2 * frames);
3841
+ stereo.set(left, 0);
3842
+ stereo.set(right, frames);
3843
+ let sum = 0;
3844
+ for (const sample of stereo) {
3845
+ sum += sample;
3846
+ }
3847
+ const mean = sum / stereo.length;
3848
+ let variance = 0;
3849
+ for (const sample of stereo) {
3850
+ const diff = sample - mean;
3851
+ variance += diff * diff;
3852
+ }
3853
+ const std = Math.sqrt(variance / stereo.length) || 1;
3854
+ const normalizedLeft = new Float32Array(frames);
3855
+ const normalizedRight = new Float32Array(frames);
3856
+ for (let index = 0; index < frames; index++) {
3857
+ normalizedLeft[index] = ((left[index] ?? 0) - mean) / std;
3858
+ normalizedRight[index] = ((right[index] ?? 0) - mean) / std;
3859
+ }
3860
+ return { normalizedLeft, normalizedRight, stats: { mean, std } };
3861
+ }
3862
+ function buildModelInput(segLeft, segRight, stftLeft, stftRight, segmentLength, xBins, xFrames) {
3863
+ const xData = new Float32Array(4 * xBins * xFrames);
3864
+ for (let ch = 0; ch < 2; ch++) {
3865
+ const stftCh = ch === 0 ? stftLeft : stftRight;
3866
+ for (let freq = 0; freq < xBins; freq++) {
3867
+ for (let frame = 0; frame < xFrames; frame++) {
3868
+ const realIdx = 2 * ch * xBins * xFrames + freq * xFrames + frame;
3869
+ const imagIdx = (2 * ch + 1) * xBins * xFrames + freq * xFrames + frame;
3870
+ const srcFrame = frame + 2;
3871
+ xData[realIdx] = stftCh.real[srcFrame]?.[freq] ?? 0;
3872
+ xData[imagIdx] = stftCh.imag[srcFrame]?.[freq] ?? 0;
3873
+ }
3874
+ }
3875
+ }
3876
+ const inputData = new Float32Array(2 * segmentLength);
3877
+ inputData.set(segLeft, 0);
3878
+ inputData.set(segRight, segmentLength);
3879
+ return { inputData, xData };
3880
+ }
3881
+ function extractStems(xtOut, xOut, workspace, stemOutputs, weight, segmentOffset, chunkLength, segmentLength) {
3882
+ const { freqRealBuffers, freqImagBuffers, nbFrames, stftLen, stftPad, pad: pad2, xBins, xFrames } = workspace;
3883
+ for (let source = 0; source < 4; source++) {
3884
+ for (let ch = 0; ch < 2; ch++) {
3885
+ const xtIndex = source * 2 * segmentLength + ch * segmentLength;
3886
+ for (let frame = 0; frame < nbFrames; frame++) {
3887
+ freqRealBuffers[frame]?.fill(0);
3888
+ freqImagBuffers[frame]?.fill(0);
3889
+ }
3890
+ if (xOut) {
3891
+ const baseOffset = source * 4 * xBins * xFrames;
3892
+ for (let freq = 0; freq < xBins; freq++) {
3893
+ for (let frame = 0; frame < xFrames; frame++) {
3894
+ const realIdx = baseOffset + 2 * ch * xBins * xFrames + freq * xFrames + frame;
3895
+ const imagIdx = baseOffset + (2 * ch + 1) * xBins * xFrames + freq * xFrames + frame;
3896
+ const destFrame = frame + 2;
3897
+ const realArr = freqRealBuffers[destFrame];
3898
+ const imagArr = freqImagBuffers[destFrame];
3899
+ if (realArr && imagArr) {
3900
+ realArr[freq] = xOut.data[realIdx] ?? 0;
3901
+ imagArr[freq] = xOut.data[imagIdx] ?? 0;
3902
+ }
3903
+ }
3904
+ }
3905
+ }
3906
+ const freqWaveform = computeIstftScaled(freqRealBuffers, freqImagBuffers, stftLen);
3907
+ const freqOffset = stftPad + pad2;
3908
+ for (let index = 0; index < chunkLength; index++) {
3909
+ const timeVal = xtOut ? xtOut.data[xtIndex + index] ?? 0 : 0;
3910
+ const freqVal = freqWaveform[freqOffset + index] ?? 0;
3911
+ const combined = timeVal + freqVal;
3912
+ const wt = weight[index] ?? 1;
3913
+ const outIdx = source * 2 + ch;
3914
+ const arr = stemOutputs[outIdx];
3915
+ if (arr) {
3916
+ arr[segmentOffset + index] = (arr[segmentOffset + index] ?? 0) + combined * wt;
3917
+ }
3918
+ }
3919
+ }
3920
+ }
3921
+ }
3922
+ function mixStems(stemOutputs, sumWeight, stemGains, stats, frames, channels) {
3923
+ const outputChannels = [];
3924
+ for (let ch = 0; ch < channels; ch++) {
3925
+ const output = new Float32Array(frames);
3926
+ const srcCh = Math.min(ch, 1);
3927
+ for (let index = 0; index < frames; index++) {
3928
+ const sw = sumWeight[index] ?? 1;
3929
+ let normalizedSum = 0;
3930
+ for (let source = 0; source < 4; source++) {
3931
+ const gain = stemGains[source] ?? 1;
3932
+ if (gain === 0) continue;
3933
+ const arr = stemOutputs[source * 2 + srcCh];
3934
+ normalizedSum += (arr ? (arr[index] ?? 0) / sw : 0) * gain;
3935
+ }
3936
+ output[index] = normalizedSum * stats.std + stats.mean;
3937
+ }
3938
+ outputChannels.push(output);
3939
+ }
3940
+ return outputChannels;
3941
+ }
3942
+
3943
+ // src/transforms/music-rebalance/index.ts
3944
+ var schema27 = z.object({
3945
+ modelPath: z.string().default("").meta({ input: "file", mode: "open", accept: ".onnx", binary: "htdemucs", download: "https://github.com/facebookresearch/demucs" }).describe("HTDemucs source separation model (.onnx) \u2014 requires .onnx.data file alongside"),
3946
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
3947
+ onnxAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "onnx-addon", download: "https://github.com/visionsofparadise/onnx-runtime-addon" }).describe("ONNX Runtime native addon"),
3948
+ highPass: z.number().min(0).max(500).multipleOf(10).default(0).describe("High Pass"),
3949
+ lowPass: z.number().min(0).max(22050).multipleOf(100).default(0).describe("Low Pass")
3950
+ });
3951
+ var HTDEMUCS_SAMPLE_RATE = 44100;
3952
+ var FFT_SIZE2 = 4096;
3953
+ var HOP_SIZE4 = 1024;
3954
+ var SEGMENT_SAMPLES2 = 343980;
3955
+ var OVERLAP2 = 0.25;
3956
+ var TRANSITION_POWER2 = 1;
3957
+ var MusicRebalanceStream = class extends BufferedTransformStream {
3958
+ async _setup(input, context) {
3959
+ this.session = createOnnxSession(this.properties.onnxAddonPath, this.properties.modelPath, { executionProviders: filterOnnxProviders(context.executionProviders) });
3960
+ return super._setup(input, context);
3961
+ }
3962
+ async _process(buffer) {
3963
+ const originalFrames = buffer.frames;
3964
+ const channels = buffer.channels;
3965
+ const chunk = await buffer.read(0, originalFrames);
3966
+ let left = chunk.samples[0] ?? new Float32Array(originalFrames);
3967
+ let right = channels >= 2 ? chunk.samples[1] ?? left : left;
3968
+ if ((this.sampleRate ?? 44100) !== HTDEMUCS_SAMPLE_RATE) {
3969
+ const resampled = await resampleDirect(this.properties.ffmpegPath, [left, right], this.sampleRate ?? 44100, HTDEMUCS_SAMPLE_RATE);
3970
+ left = resampled[0] ?? left;
3971
+ right = resampled[1] ?? right;
3972
+ }
3973
+ const frames = left.length;
3974
+ const { normalizedLeft, normalizedRight, stats } = normalizeAudio(left, right, frames);
3975
+ const stridesamples = Math.round((1 - OVERLAP2) * SEGMENT_SAMPLES2);
3976
+ const stemOutputs = new Array(4 * 2);
3977
+ for (let si = 0; si < 8; si++) {
3978
+ stemOutputs[si] = new Float32Array(frames);
3979
+ }
3980
+ const sumWeight = new Float32Array(frames);
3981
+ const weight = new Float32Array(SEGMENT_SAMPLES2);
3982
+ const half = SEGMENT_SAMPLES2 / 2;
3983
+ for (let index = 0; index < half; index++) {
3984
+ weight[index] = Math.pow((index + 1) / half, TRANSITION_POWER2);
3985
+ }
3986
+ for (let index = 0; index < half; index++) {
3987
+ weight[SEGMENT_SAMPLES2 - 1 - index] = weight[index] ?? 0;
3988
+ }
3989
+ const pad2 = Math.floor(HOP_SIZE4 / 2) * 3;
3990
+ const le = Math.ceil(SEGMENT_SAMPLES2 / HOP_SIZE4);
3991
+ const padEnd = pad2 + le * HOP_SIZE4 - SEGMENT_SAMPLES2;
3992
+ const paddedLen = SEGMENT_SAMPLES2 + pad2 + padEnd;
3993
+ const stftPadConst = FFT_SIZE2 / 2;
3994
+ const stftLenConst = paddedLen + FFT_SIZE2;
3995
+ const nbBinsConst = FFT_SIZE2 / 2 + 1;
3996
+ const nbFramesConst = Math.floor((stftLenConst - FFT_SIZE2) / HOP_SIZE4) + 1;
3997
+ const xBinsConst = nbBinsConst - 1;
3998
+ const xFramesConst = nbFramesConst - 4;
3999
+ const segLeft = new Float32Array(SEGMENT_SAMPLES2);
4000
+ const segRight = new Float32Array(SEGMENT_SAMPLES2);
4001
+ const freqRealBuffers = [];
4002
+ const freqImagBuffers = [];
4003
+ for (let frame = 0; frame < nbFramesConst; frame++) {
4004
+ freqRealBuffers.push(new Float32Array(nbBinsConst));
4005
+ freqImagBuffers.push(new Float32Array(nbBinsConst));
4006
+ }
4007
+ const workspace = {
4008
+ freqRealBuffers,
4009
+ freqImagBuffers,
4010
+ nbFrames: nbFramesConst,
4011
+ stftLen: stftLenConst,
4012
+ stftPad: stftPadConst,
4013
+ pad: pad2,
4014
+ xBins: xBinsConst,
4015
+ xFrames: xFramesConst
4016
+ };
4017
+ for (let segmentOffset = 0; segmentOffset < frames; segmentOffset += stridesamples) {
4018
+ const chunkLength = Math.min(SEGMENT_SAMPLES2, frames - segmentOffset);
4019
+ segLeft.fill(0);
4020
+ segRight.fill(0);
4021
+ for (let index = 0; index < chunkLength; index++) {
4022
+ segLeft[index] = normalizedLeft[segmentOffset + index] ?? 0;
4023
+ segRight[index] = normalizedRight[segmentOffset + index] ?? 0;
4024
+ }
4025
+ const paddedLeft = reflectPad(segLeft, pad2, padEnd, paddedLen);
4026
+ const paddedRight = reflectPad(segRight, pad2, padEnd, paddedLen);
4027
+ const stftInputLeft = reflectPad(paddedLeft, stftPadConst, stftPadConst, stftLenConst);
4028
+ const stftInputRight = reflectPad(paddedRight, stftPadConst, stftPadConst, stftLenConst);
4029
+ const stftLeft = computeStftScaled(stftInputLeft);
4030
+ const stftRight = computeStftScaled(stftInputRight);
4031
+ const { inputData, xData } = buildModelInput(segLeft, segRight, stftLeft, stftRight, SEGMENT_SAMPLES2, xBinsConst, xFramesConst);
4032
+ const result = this.session.run({
4033
+ input: { data: inputData, dims: [1, 2, SEGMENT_SAMPLES2] },
4034
+ x: { data: xData, dims: [1, 4, xBinsConst, xFramesConst] }
4035
+ });
4036
+ const xtOut = result.add_67 ?? result[Object.keys(result).pop() ?? ""];
4037
+ const xOut = result.output ?? result[Object.keys(result)[0] ?? ""];
4038
+ extractStems(xtOut, xOut, workspace, stemOutputs, weight, segmentOffset, chunkLength, SEGMENT_SAMPLES2);
4039
+ for (let index = 0; index < chunkLength; index++) {
4040
+ sumWeight[segmentOffset + index] = (sumWeight[segmentOffset + index] ?? 0) + (weight[index] ?? 0);
4041
+ }
4042
+ }
4043
+ const { stems } = this.properties;
4044
+ const stemGains = [stems.drums, stems.bass, stems.other, stems.vocals];
4045
+ const outputChannels = mixStems(stemOutputs, sumWeight, stemGains, stats, frames, channels);
4046
+ applyBandpass(outputChannels, HTDEMUCS_SAMPLE_RATE, this.properties.highPass, this.properties.lowPass);
4047
+ if ((this.sampleRate ?? 44100) !== HTDEMUCS_SAMPLE_RATE) {
4048
+ const resampled = await resampleDirect(this.properties.ffmpegPath, outputChannels, HTDEMUCS_SAMPLE_RATE, this.sampleRate ?? 44100);
4049
+ for (let ch = 0; ch < outputChannels.length; ch++) {
4050
+ const resampledCh = resampled[ch];
4051
+ if (!resampledCh) continue;
4052
+ const finalCh = new Float32Array(originalFrames);
4053
+ finalCh.set(resampledCh.subarray(0, Math.min(resampledCh.length, originalFrames)));
4054
+ outputChannels[ch] = finalCh;
4055
+ }
4056
+ }
4057
+ await buffer.write(0, outputChannels);
4058
+ }
4059
+ };
4060
+ var _MusicRebalanceNode = class _MusicRebalanceNode extends TransformNode {
4061
+ constructor(properties) {
4062
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
4063
+ this.type = ["buffered-audio-node", "transform", "music-rebalance"];
4064
+ }
4065
+ static is(value) {
4066
+ return TransformNode.is(value) && value.type[2] === "music-rebalance";
4067
+ }
4068
+ createStream() {
4069
+ return new MusicRebalanceStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
4070
+ }
4071
+ clone(overrides) {
4072
+ return new _MusicRebalanceNode({ ...this.properties, previousProperties: this.properties, ...overrides });
4073
+ }
4074
+ };
4075
+ _MusicRebalanceNode.moduleName = "Music Rebalance";
4076
+ _MusicRebalanceNode.moduleDescription = "Rebalance stem volumes using HTDemucs source separation";
4077
+ _MusicRebalanceNode.schema = schema27;
4078
+ var MusicRebalanceNode = _MusicRebalanceNode;
4079
+ function musicRebalance(modelPath, stems, options) {
4080
+ const parsed = schema27.parse({
4081
+ modelPath,
4082
+ ffmpegPath: options?.ffmpegPath,
4083
+ onnxAddonPath: options?.onnxAddonPath
4084
+ });
4085
+ return new MusicRebalanceNode({
4086
+ ...parsed,
4087
+ stems: {
4088
+ vocals: stems.vocals ?? 1,
4089
+ drums: stems.drums ?? 1,
4090
+ bass: stems.bass ?? 1,
4091
+ other: stems.other ?? 1
4092
+ },
4093
+ id: options?.id
4094
+ });
4095
+ }
4096
+ var schema28 = z.object({
4097
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
4098
+ semitones: z.number().min(-24).max(24).multipleOf(1).default(0).describe("Semitones"),
4099
+ cents: z.number().min(-100).max(100).multipleOf(1).default(0).describe("Cents")
4100
+ });
4101
+ var PitchShiftStream = class extends FfmpegStream {
4102
+ _buildArgs(_context) {
4103
+ const { semitones, cents } = this.properties;
4104
+ const totalSemitones = semitones + (cents ?? 0) / 100;
4105
+ const pitchRatio = Math.pow(2, totalSemitones / 12);
4106
+ return ["-af", `rubberband=pitch=${pitchRatio}`];
4107
+ }
4108
+ };
4109
+ var _PitchShiftNode = class _PitchShiftNode extends FfmpegNode {
4110
+ constructor() {
4111
+ super(...arguments);
4112
+ this.type = ["buffered-audio-node", "transform", "ffmpeg", "pitch-shift"];
4113
+ }
4114
+ static is(value) {
4115
+ return FfmpegNode.is(value) && value.type[3] === "pitch-shift";
4116
+ }
4117
+ createStream() {
4118
+ return new PitchShiftStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
4119
+ }
4120
+ clone(overrides) {
4121
+ return new _PitchShiftNode({ ...this.properties, previousProperties: this.properties, ...overrides });
4122
+ }
4123
+ };
4124
+ _PitchShiftNode.moduleName = "Pitch Shift";
4125
+ _PitchShiftNode.moduleDescription = "Change pitch without affecting duration";
4126
+ _PitchShiftNode.schema = schema28;
4127
+ var PitchShiftNode = _PitchShiftNode;
4128
+ function pitchShift(ffmpegPath, semitones, options) {
4129
+ return new PitchShiftNode({
4130
+ ffmpegPath,
4131
+ semitones,
4132
+ cents: options?.cents,
4133
+ id: options?.id
4134
+ });
4135
+ }
4136
+
4137
+ // src/transforms/spectral-repair/utils/interpolation.ts
4138
+ function interpolateTfRegion(real, imag, startFrame, endFrame, startBin, endBin) {
4139
+ const iterations = 5;
4140
+ const clampedStart = Math.max(0, startFrame);
4141
+ const clampedEnd = Math.min(real.length, endFrame);
4142
+ if (clampedStart >= clampedEnd) return;
4143
+ const halfSize = real[0]?.length ?? 0;
4144
+ const clampedStartBin = Math.max(0, startBin);
4145
+ const clampedEndBin = Math.min(halfSize, endBin);
4146
+ const regionFrames = clampedEnd - clampedStart;
4147
+ const regionBins = clampedEndBin - clampedStartBin;
4148
+ const writeReal = new Float32Array(regionFrames * regionBins);
4149
+ const writeImag = new Float32Array(regionFrames * regionBins);
4150
+ for (let iter = 0; iter < iterations; iter++) {
4151
+ for (let frame = clampedStart; frame < clampedEnd; frame++) {
4152
+ const realFrame = real[frame];
4153
+ const imagFrame = imag[frame];
4154
+ if (!realFrame || !imagFrame) continue;
4155
+ for (let bin = clampedStartBin; bin < clampedEndBin; bin++) {
4156
+ let realSum = 0;
4157
+ let imagSum = 0;
4158
+ let count = 0;
4159
+ const prevFrame = real[frame - 1];
4160
+ const nextFrame = real[frame + 1];
4161
+ const prevImag = imag[frame - 1];
4162
+ const nextImag = imag[frame + 1];
4163
+ if (prevFrame && prevImag) {
4164
+ realSum += prevFrame[bin] ?? 0;
4165
+ imagSum += prevImag[bin] ?? 0;
4166
+ count++;
4167
+ }
4168
+ if (nextFrame && nextImag) {
4169
+ realSum += nextFrame[bin] ?? 0;
4170
+ imagSum += nextImag[bin] ?? 0;
4171
+ count++;
4172
+ }
4173
+ if (bin > 0) {
4174
+ realSum += realFrame[bin - 1] ?? 0;
4175
+ imagSum += imagFrame[bin - 1] ?? 0;
4176
+ count++;
4177
+ }
4178
+ if (bin < halfSize - 1) {
4179
+ realSum += realFrame[bin + 1] ?? 0;
4180
+ imagSum += imagFrame[bin + 1] ?? 0;
4181
+ count++;
4182
+ }
4183
+ if (count > 0) {
4184
+ const bufferIndex = (frame - clampedStart) * regionBins + (bin - clampedStartBin);
4185
+ writeReal[bufferIndex] = realSum / count;
4186
+ writeImag[bufferIndex] = imagSum / count;
4187
+ }
4188
+ }
4189
+ }
4190
+ for (let frame = clampedStart; frame < clampedEnd; frame++) {
4191
+ const realFrame = real[frame];
4192
+ const imagFrame = imag[frame];
4193
+ if (!realFrame || !imagFrame) continue;
4194
+ for (let bin = clampedStartBin; bin < clampedEndBin; bin++) {
4195
+ const bufferIndex = (frame - clampedStart) * regionBins + (bin - clampedStartBin);
4196
+ realFrame[bin] = writeReal[bufferIndex] ?? 0;
4197
+ imagFrame[bin] = writeImag[bufferIndex] ?? 0;
4198
+ }
4199
+ }
4200
+ }
4201
+ }
4202
+
4203
+ // src/transforms/spectral-repair/index.ts
4204
+ var schema29 = z.object({
4205
+ method: z.enum(["ar", "nmf"]).default("ar").describe("Method"),
4206
+ vkfftAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "vkfft-addon", download: "https://github.com/visionsofparadise/vkfft-addon" }).describe("VkFFT native addon \u2014 GPU FFT acceleration"),
4207
+ fftwAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "fftw-addon", download: "https://github.com/visionsofparadise/fftw-addon" }).describe("FFTW native addon \u2014 CPU FFT acceleration")
4208
+ });
4209
+ var SpectralRepairStream = class extends BufferedTransformStream {
4210
+ async _setup(input, context) {
4211
+ const fft2 = initFftBackend(context.executionProviders, this.properties);
4212
+ this.fftBackend = fft2.backend;
4213
+ this.fftAddonOptions = fft2.addonOptions;
4214
+ return super._setup(input, context);
4215
+ }
4216
+ async _process(buffer) {
4217
+ const sampleRate = this.sampleRate ?? 44100;
4218
+ const channels = buffer.channels;
4219
+ const frames = buffer.frames;
4220
+ const fftSize = 2048;
4221
+ const hopSize = fftSize / 4;
4222
+ const halfSize = fftSize / 2 + 1;
4223
+ const paddedLength = Math.max(frames, fftSize);
4224
+ const numStftFrames = Math.floor((paddedLength - fftSize) / hopSize) + 1;
4225
+ const stftOutput = {
4226
+ real: Array.from({ length: numStftFrames }, () => new Float32Array(halfSize)),
4227
+ imag: Array.from({ length: numStftFrames }, () => new Float32Array(halfSize))
4228
+ };
4229
+ const chunk = await buffer.read(0, frames);
4230
+ for (let ch = 0; ch < channels; ch++) {
4231
+ let channel = chunk.samples[ch];
4232
+ if (!channel) continue;
4233
+ if (channel.length < fftSize) {
4234
+ const padded = new Float32Array(fftSize);
4235
+ padded.set(channel);
4236
+ channel = padded;
4237
+ }
4238
+ const stftResult = stft(channel, fftSize, hopSize, stftOutput, this.fftBackend, this.fftAddonOptions);
4239
+ const freqPerBin = sampleRate / fftSize;
4240
+ const timePerFrame = hopSize / sampleRate;
4241
+ for (const region of this.properties.regions) {
4242
+ const startFrame = Math.floor(region.startTime / timePerFrame);
4243
+ const endFrame = Math.ceil(region.endTime / timePerFrame);
4244
+ const startBin = Math.floor(region.startFreq / freqPerBin);
4245
+ const endBin = Math.ceil(region.endFreq / freqPerBin);
4246
+ interpolateTfRegion(stftResult.real, stftResult.imag, startFrame, endFrame, startBin, endBin);
4247
+ }
4248
+ const repaired = istft(stftResult, hopSize, paddedLength, this.fftBackend, this.fftAddonOptions).subarray(0, frames);
4249
+ await buffer.write(0, replaceChannel(chunk, ch, repaired, channels));
4250
+ }
4251
+ }
4252
+ };
4253
+ var _SpectralRepairNode = class _SpectralRepairNode extends TransformNode {
4254
+ constructor(properties) {
4255
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
4256
+ this.type = ["buffered-audio-node", "transform", "spectral-repair"];
4257
+ }
4258
+ static is(value) {
4259
+ return TransformNode.is(value) && value.type[2] === "spectral-repair";
4260
+ }
4261
+ createStream() {
4262
+ return new SpectralRepairStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
4263
+ }
4264
+ clone(overrides) {
4265
+ return new _SpectralRepairNode({ ...this.properties, previousProperties: this.properties, ...overrides });
4266
+ }
4267
+ };
4268
+ _SpectralRepairNode.moduleName = "Spectral Repair";
4269
+ _SpectralRepairNode.moduleDescription = "Repair spectral artifacts by interpolating from surrounding content";
4270
+ _SpectralRepairNode.schema = schema29;
4271
+ var SpectralRepairNode = _SpectralRepairNode;
4272
+ function spectralRepair(regions, options) {
4273
+ return new SpectralRepairNode({
4274
+ regions,
4275
+ method: options?.method ?? "ar",
4276
+ vkfftAddonPath: options?.vkfftAddonPath ?? "",
4277
+ fftwAddonPath: options?.fftwAddonPath ?? "",
4278
+ id: options?.id
4279
+ });
4280
+ }
4281
+ var schema30 = z.object({
4282
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
4283
+ rate: z.number().min(0.25).max(4).multipleOf(0.01).default(1).describe("Rate")
4284
+ });
4285
+ var TimeStretchStream = class extends FfmpegStream {
4286
+ _buildArgs(_context) {
4287
+ const filters = buildAtempoChain(this.properties.rate);
4288
+ return ["-af", filters.join(",")];
4289
+ }
4290
+ };
4291
+ var _TimeStretchNode = class _TimeStretchNode extends FfmpegNode {
4292
+ constructor() {
4293
+ super(...arguments);
4294
+ this.type = ["buffered-audio-node", "transform", "ffmpeg", "time-stretch"];
4295
+ }
4296
+ static is(value) {
4297
+ return FfmpegNode.is(value) && value.type[3] === "time-stretch";
4298
+ }
4299
+ createStream() {
4300
+ return new TimeStretchStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
4301
+ }
4302
+ clone(overrides) {
4303
+ return new _TimeStretchNode({ ...this.properties, previousProperties: this.properties, ...overrides });
4304
+ }
4305
+ };
4306
+ _TimeStretchNode.moduleName = "Time Stretch";
4307
+ _TimeStretchNode.moduleDescription = "Change duration without affecting pitch";
4308
+ _TimeStretchNode.schema = schema30;
4309
+ var TimeStretchNode = _TimeStretchNode;
4310
+ function buildAtempoChain(rate) {
4311
+ const filters = [];
4312
+ let remaining = rate;
4313
+ while (remaining > 2) {
4314
+ filters.push("atempo=2.0");
4315
+ remaining /= 2;
4316
+ }
4317
+ while (remaining < 0.5) {
4318
+ filters.push("atempo=0.5");
4319
+ remaining /= 0.5;
4320
+ }
4321
+ filters.push(`atempo=${remaining}`);
4322
+ return filters;
4323
+ }
4324
+ function timeStretch(ffmpegPath, rate, options) {
4325
+ return new TimeStretchNode({
4326
+ ffmpegPath,
4327
+ rate,
4328
+ id: options?.id
4329
+ });
4330
+ }
4331
+ var BLOCK_LEN = 512;
4332
+ var BLOCK_SHIFT = 128;
4333
+ var FFT_BINS = BLOCK_LEN / 2 + 1;
4334
+ var LSTM_UNITS = 128;
4335
+ function processDtlnFrames(signal, session1, session2, fftBackend, fftAddonOptions) {
4336
+ const originalLength = signal.length;
4337
+ if (originalLength < BLOCK_LEN) {
4338
+ const padded = new Float32Array(BLOCK_LEN);
4339
+ padded.set(signal);
4340
+ signal = padded;
4341
+ }
4342
+ const totalFrames = signal.length;
4343
+ const output = new Float32Array(totalFrames);
4344
+ const stateSize = 1 * 2 * LSTM_UNITS * 2;
4345
+ let states1 = new Float32Array(stateSize);
4346
+ let states2 = new Float32Array(stateSize);
4347
+ const inputBuffer = new Float32Array(BLOCK_LEN);
4348
+ const magnitude = new Float32Array(FFT_BINS);
4349
+ const maskedReal = new Float32Array(FFT_BINS);
4350
+ const maskedImag = new Float32Array(FFT_BINS);
4351
+ const maskedStft = {
4352
+ real: [maskedReal],
4353
+ imag: [maskedImag],
4354
+ frames: 1,
4355
+ fftSize: BLOCK_LEN
4356
+ };
4357
+ const stftOutput = { real: [new Float32Array(FFT_BINS)], imag: [new Float32Array(FFT_BINS)] };
4358
+ for (let offset = 0; offset + BLOCK_LEN <= totalFrames; offset += BLOCK_SHIFT) {
4359
+ inputBuffer.set(signal.subarray(offset, offset + BLOCK_LEN));
4360
+ const stftResult = stft(inputBuffer, BLOCK_LEN, BLOCK_LEN, stftOutput, fftBackend, fftAddonOptions);
4361
+ const realFrame = stftResult.real[0];
4362
+ const imagFrame = stftResult.imag[0];
4363
+ if (!realFrame || !imagFrame) continue;
4364
+ for (let bin = 0; bin < FFT_BINS; bin++) {
4365
+ const re = realFrame[bin] ?? 0;
4366
+ const im = imagFrame[bin] ?? 0;
4367
+ magnitude[bin] = Math.log(Math.sqrt(re * re + im * im) + 1e-7);
4368
+ }
4369
+ const result1 = session1.run({
4370
+ input_2: { data: magnitude, dims: [1, 1, FFT_BINS] },
4371
+ input_3: { data: states1, dims: [1, 2, LSTM_UNITS, 2] }
4372
+ });
4373
+ const mask = result1.activation_2;
4374
+ states1 = result1.tf_op_layer_stack_2 ? new Float32Array(result1.tf_op_layer_stack_2.data) : states1;
4375
+ if (!mask) continue;
4376
+ for (let bin = 0; bin < FFT_BINS; bin++) {
4377
+ const maskVal = mask.data[bin] ?? 0;
4378
+ maskedReal[bin] = (realFrame[bin] ?? 0) * maskVal;
4379
+ maskedImag[bin] = (imagFrame[bin] ?? 0) * maskVal;
4380
+ }
4381
+ const maskedTimeDomain = istft(maskedStft, BLOCK_LEN, BLOCK_LEN, fftBackend, fftAddonOptions);
4382
+ const result2 = session2.run({
4383
+ input_4: { data: maskedTimeDomain, dims: [1, 1, BLOCK_LEN] },
4384
+ input_5: { data: states2, dims: [1, 2, LSTM_UNITS, 2] }
4385
+ });
4386
+ const denoisedFrame = result2.conv1d_3;
4387
+ states2 = result2.tf_op_layer_stack_5 ? new Float32Array(result2.tf_op_layer_stack_5.data) : states2;
4388
+ if (!denoisedFrame) continue;
4389
+ for (let index = 0; index < BLOCK_LEN; index++) {
4390
+ const outIdx = offset + index;
4391
+ if (outIdx < totalFrames) {
4392
+ output[outIdx] = (output[outIdx] ?? 0) + (denoisedFrame.data[index] ?? 0);
4393
+ }
4394
+ }
4395
+ }
4396
+ return originalLength < output.length ? output.subarray(0, originalLength) : output;
4397
+ }
4398
+
4399
+ // src/transforms/voice-denoise/index.ts
4400
+ var schema31 = z.object({
4401
+ modelPath1: z.string().default("").meta({ input: "file", mode: "open", accept: ".onnx", binary: "dtln-model_1", download: "https://github.com/breizhn/DTLN" }).describe("DTLN magnitude mask model (.onnx)"),
4402
+ modelPath2: z.string().default("").meta({ input: "file", mode: "open", accept: ".onnx", binary: "dtln-model_2", download: "https://github.com/breizhn/DTLN" }).describe("DTLN time-domain model (.onnx)"),
4403
+ ffmpegPath: z.string().default("").meta({ input: "file", mode: "open", binary: "ffmpeg", download: "https://ffmpeg.org/download.html" }).describe("FFmpeg \u2014 audio/video processing tool"),
4404
+ onnxAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "onnx-addon", download: "https://github.com/visionsofparadise/onnx-runtime-addon" }).describe("ONNX Runtime native addon"),
4405
+ vkfftAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "vkfft-addon", download: "https://github.com/visionsofparadise/vkfft-addon" }).describe("VkFFT native addon \u2014 GPU FFT acceleration"),
4406
+ fftwAddonPath: z.string().default("").meta({ input: "file", mode: "open", binary: "fftw-addon", download: "https://github.com/visionsofparadise/fftw-addon" }).describe("FFTW native addon \u2014 CPU FFT acceleration")
4407
+ });
4408
+ var DTLN_SAMPLE_RATE = 16e3;
4409
+ var VoiceDenoiseStream = class extends BufferedTransformStream {
4410
+ async _setup(input, context) {
4411
+ const onnxProviders = filterOnnxProviders(context.executionProviders);
4412
+ this.session1 = createOnnxSession(this.properties.onnxAddonPath, this.properties.modelPath1, { executionProviders: onnxProviders });
4413
+ this.session2 = createOnnxSession(this.properties.onnxAddonPath, this.properties.modelPath2, { executionProviders: onnxProviders });
4414
+ const cpuProviders = context.executionProviders.filter((ep) => ep !== "gpu");
4415
+ const fft2 = initFftBackend(cpuProviders.length > 0 ? cpuProviders : ["cpu"], this.properties);
4416
+ this.fftBackend = fft2.backend;
4417
+ this.fftAddonOptions = fft2.addonOptions;
4418
+ return super._setup(input, context);
4419
+ }
4420
+ async _process(buffer) {
4421
+ const frames = buffer.frames;
4422
+ const channels = buffer.channels;
4423
+ for (let ch = 0; ch < channels; ch++) {
4424
+ const chunk = await buffer.read(0, frames);
4425
+ const channel = chunk.samples[ch];
4426
+ if (!channel) continue;
4427
+ let input16k = channel;
4428
+ if ((this.sampleRate ?? 44100) !== DTLN_SAMPLE_RATE) {
4429
+ const resampled = await resampleDirect(this.properties.ffmpegPath, [channel], this.sampleRate ?? 44100, DTLN_SAMPLE_RATE);
4430
+ input16k = resampled[0] ?? channel;
4431
+ }
4432
+ const denoised16k = processDtlnFrames(input16k, this.session1, this.session2, this.fftBackend, this.fftAddonOptions);
4433
+ let output = denoised16k;
4434
+ if ((this.sampleRate ?? 44100) !== DTLN_SAMPLE_RATE) {
4435
+ const resampled = await resampleDirect(this.properties.ffmpegPath, [denoised16k], DTLN_SAMPLE_RATE, this.sampleRate ?? 44100);
4436
+ output = resampled[0] ?? denoised16k;
4437
+ }
4438
+ const finalOutput = new Float32Array(frames);
4439
+ finalOutput.set(output.subarray(0, Math.min(output.length, frames)));
4440
+ const allChannels = [];
4441
+ for (let writeCh = 0; writeCh < channels; writeCh++) {
4442
+ allChannels.push(writeCh === ch ? finalOutput : chunk.samples[writeCh] ?? new Float32Array(frames));
4443
+ }
4444
+ await buffer.write(0, allChannels);
4445
+ }
4446
+ }
4447
+ };
4448
+ var _VoiceDenoiseNode = class _VoiceDenoiseNode extends TransformNode {
4449
+ constructor(properties) {
4450
+ super({ bufferSize: WHOLE_FILE, latency: WHOLE_FILE, ...properties });
4451
+ this.type = ["buffered-audio-node", "transform", "voice-denoise"];
4452
+ }
4453
+ static is(value) {
4454
+ return TransformNode.is(value) && value.type[2] === "voice-denoise";
4455
+ }
4456
+ createStream() {
4457
+ return new VoiceDenoiseStream({ ...this.properties, bufferSize: this.bufferSize, overlap: this.properties.overlap ?? 0 });
4458
+ }
4459
+ clone(overrides) {
4460
+ return new _VoiceDenoiseNode({ ...this.properties, previousProperties: this.properties, ...overrides });
4461
+ }
4462
+ };
4463
+ _VoiceDenoiseNode.moduleName = "Voice Denoise";
4464
+ _VoiceDenoiseNode.moduleDescription = "Remove background noise from speech using DTLN neural network";
4465
+ _VoiceDenoiseNode.schema = schema31;
4466
+ var VoiceDenoiseNode = _VoiceDenoiseNode;
4467
+ function voiceDenoise(options) {
4468
+ return new VoiceDenoiseNode({
4469
+ modelPath1: options.modelPath1,
4470
+ modelPath2: options.modelPath2,
4471
+ ffmpegPath: options.ffmpegPath,
4472
+ onnxAddonPath: options.onnxAddonPath ?? "",
4473
+ vkfftAddonPath: options.vkfftAddonPath ?? "",
4474
+ fftwAddonPath: options.fftwAddonPath ?? "",
4475
+ id: options.id
4476
+ });
4477
+ }
4478
+
4479
+ export { BreathControlNode, BreathControlStream, CutNode, CutStream, DeBleedNode, DeBleedStream, DeClickNode, DeClickStream, DeClipNode, DeClipStream, DeCrackleNode, DePlosiveNode, DePlosiveStream, DeReverbNode, DeReverbStream, DialogueIsolateNode, DialogueIsolateStream, DitherNode, DitherStream, EqMatchNode, EqMatchStream, FfmpegNode, FfmpegStream, LevelerNode, LevelerStream, LoudnessNode, LoudnessStatsNode, LoudnessStatsStream, LoudnessStream, MouthDeClickNode, MusicRebalanceNode, MusicRebalanceStream, NormalizeNode, NormalizeStream, PadNode, PadStream, PhaseNode, PhaseStream, PitchShiftNode, ReadFfmpegNode, ReadFfmpegStream, ReadNode, ReadWavNode, ReadWavStream, ResampleNode, ReverseNode, ReverseStream, SpectralRepairNode, SpectralRepairStream, SpectrogramNode, SpectrogramStream, SpliceNode, SpliceStream, TimeStretchNode, TrimNode, TrimStream, VoiceDenoiseNode, VoiceDenoiseStream, WaveformNode, WaveformStream, WriteNode, WriteStream, breathControl, cut, deBleed, deClick, deClip, deCrackle, dePlosive, deReverb, dialogueIsolate, dither, eqMatch, ffmpeg, ffmpegSchema, invert, leveler, loudness, loudnessStats, mouthDeClick, musicRebalance, normalize, pad, phase, pitchShift, read, readFfmpeg, readSample, readWav, resample, reverse, spectralRepair, spectrogram, splice, timeStretch, trim, voiceDenoise, wavSchema, waveform, write };