@livekit/agents 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio.cjs +10 -0
- package/dist/audio.cjs.map +1 -1
- package/dist/audio.d.cts +1 -1
- package/dist/audio.d.ts +1 -1
- package/dist/audio.d.ts.map +1 -1
- package/dist/audio.js +10 -0
- package/dist/audio.js.map +1 -1
- package/dist/inference/api_protos.d.cts +26 -26
- package/dist/inference/api_protos.d.ts +26 -26
- package/dist/inference/tts.cjs +14 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +24 -3
- package/dist/inference/tts.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +7 -2
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +7 -2
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/ipc/supervised_proc.cjs +4 -1
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.d.ts.map +1 -1
- package/dist/ipc/supervised_proc.js +4 -1
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/ipc/supervised_proc.test.cjs +82 -0
- package/dist/ipc/supervised_proc.test.cjs.map +1 -1
- package/dist/ipc/supervised_proc.test.js +82 -0
- package/dist/ipc/supervised_proc.test.js.map +1 -1
- package/dist/job.cjs +2 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +2 -1
- package/dist/job.js.map +1 -1
- package/dist/utils.cjs +28 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +18 -0
- package/dist/utils.d.ts +18 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +25 -0
- package/dist/utils.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent_activity.cjs +10 -0
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +11 -0
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +1 -1
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +4 -2
- package/dist/voice/agent_session.d.ts +4 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +1 -1
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/events.cjs +11 -0
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +12 -1
- package/dist/voice/events.d.ts +12 -1
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +10 -0
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +23 -4
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +32 -5
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/generation_tts_timeout.test.cjs +85 -0
- package/dist/voice/generation_tts_timeout.test.cjs.map +1 -0
- package/dist/voice/generation_tts_timeout.test.js +84 -0
- package/dist/voice/generation_tts_timeout.test.js.map +1 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +3 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs +1 -2
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
- package/dist/voice/recorder_io/recorder_io.js +2 -3
- package/dist/voice/recorder_io/recorder_io.js.map +1 -1
- package/dist/voice/report.cjs +1 -1
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.js +1 -1
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +70 -0
- package/dist/voice/report.test.cjs.map +1 -1
- package/dist/voice/report.test.js +70 -0
- package/dist/voice/report.test.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +5 -1
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +5 -1
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/room_io/room_io.test.cjs +18 -0
- package/dist/voice/room_io/room_io.test.cjs.map +1 -0
- package/dist/voice/room_io/room_io.test.js +17 -0
- package/dist/voice/room_io/room_io.test.js.map +1 -0
- package/package.json +1 -1
- package/src/audio.ts +12 -1
- package/src/inference/tts.ts +25 -3
- package/src/ipc/job_proc_lazy_main.ts +7 -2
- package/src/ipc/supervised_proc.test.ts +96 -0
- package/src/ipc/supervised_proc.ts +8 -1
- package/src/job.ts +1 -0
- package/src/utils.ts +43 -0
- package/src/voice/agent_activity.ts +11 -0
- package/src/voice/agent_session.ts +13 -7
- package/src/voice/events.ts +21 -0
- package/src/voice/generation.ts +35 -8
- package/src/voice/generation_tts_timeout.test.ts +112 -0
- package/src/voice/index.ts +6 -1
- package/src/voice/recorder_io/recorder_io.ts +2 -7
- package/src/voice/report.test.ts +78 -0
- package/src/voice/report.ts +1 -1
- package/src/voice/room_io/room_io.test.ts +38 -0
- package/src/voice/room_io/room_io.ts +7 -2
package/dist/audio.cjs
CHANGED
|
@@ -39,6 +39,7 @@ var import_rtc_node = require("@livekit/rtc-node");
|
|
|
39
39
|
var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"), 1);
|
|
40
40
|
var import_log = require("./log.cjs");
|
|
41
41
|
var import_stream_channel = require("./stream/stream_channel.cjs");
|
|
42
|
+
var import_utils = require("./utils.cjs");
|
|
42
43
|
import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
|
|
43
44
|
function calculateAudioDurationSeconds(frame) {
|
|
44
45
|
return Array.isArray(frame) ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0) : frame.samplesPerChannel / frame.sampleRate;
|
|
@@ -118,6 +119,15 @@ function audioFramesFromFile(filePath, options = {}) {
|
|
|
118
119
|
command.kill("SIGKILL");
|
|
119
120
|
}
|
|
120
121
|
};
|
|
122
|
+
command.on("error", (err) => {
|
|
123
|
+
if ((0, import_utils.isFfmpegTeardownError)(err)) {
|
|
124
|
+
logger.debug("FFmpeg command ended during shutdown");
|
|
125
|
+
} else {
|
|
126
|
+
logger.error(err, "FFmpeg command error");
|
|
127
|
+
}
|
|
128
|
+
commandRunning = false;
|
|
129
|
+
onClose();
|
|
130
|
+
});
|
|
121
131
|
const outputStream = command.pipe();
|
|
122
132
|
(_a = options.abortSignal) == null ? void 0 : _a.addEventListener("abort", onClose, { once: true });
|
|
123
133
|
outputStream.on("data", (chunk) => {
|
package/dist/audio.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/audio.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport ffmpegInstaller from '@ffmpeg-installer/ffmpeg';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport ffmpeg from 'fluent-ffmpeg';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from './log.js';\nimport { createStreamChannel } from './stream/stream_channel.js';\nimport type
|
|
1
|
+
{"version":3,"sources":["../src/audio.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport ffmpegInstaller from '@ffmpeg-installer/ffmpeg';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport ffmpeg from 'fluent-ffmpeg';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from './log.js';\nimport { createStreamChannel } from './stream/stream_channel.js';\nimport { type AudioBuffer, isFfmpegTeardownError } from './utils.js';\n\nffmpeg.setFfmpegPath(ffmpegInstaller.path);\n\nexport interface AudioDecodeOptions {\n sampleRate?: number;\n numChannels?: number;\n /**\n * Audio format hint (e.g., 'mp3', 'ogg', 'wav', 'opus')\n * If not provided, FFmpeg will auto-detect\n */\n format?: string;\n abortSignal?: AbortSignal;\n}\n\nexport function calculateAudioDurationSeconds(frame: AudioBuffer) {\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n return Array.isArray(frame)\n ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)\n : frame.samplesPerChannel / frame.sampleRate;\n}\n\n/** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */\nexport class AudioByteStream {\n #sampleRate: number;\n #numChannels: number;\n #bytesPerFrame: number;\n #buf: Int8Array;\n #logger = log();\n\n constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) {\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n\n if (samplesPerChannel === null) {\n samplesPerChannel = Math.floor(sampleRate / 10); // 100ms by default\n }\n\n this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)\n this.#buf = new Int8Array();\n }\n\n write(data: ArrayBuffer): AudioFrame[] {\n this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]);\n\n const frames: AudioFrame[] = [];\n while (this.#buf.length >= this.#bytesPerFrame) {\n const frameData = this.#buf.slice(0, this.#bytesPerFrame);\n this.#buf = this.#buf.slice(this.#bytesPerFrame);\n\n frames.push(\n new AudioFrame(\n new Int16Array(frameData.buffer),\n this.#sampleRate,\n this.#numChannels,\n frameData.length / 2,\n ),\n );\n }\n\n return frames;\n }\n\n flush(): AudioFrame[] {\n if (this.#buf.length % (2 * this.#numChannels) !== 0) {\n this.#logger.warn('AudioByteStream: incomplete frame during flush, dropping');\n return [];\n }\n\n const frames = [\n new AudioFrame(\n new Int16Array(this.#buf.buffer),\n this.#sampleRate,\n this.#numChannels,\n this.#buf.length / 2,\n ),\n ];\n\n this.#buf = new Int8Array(); // Clear buffer after flushing\n return frames;\n }\n}\n\n/**\n * Decode an audio file into AudioFrame instances\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects\n *\n * @example\n * ```typescript\n * for await (const frame of audioFramesFromFile('audio.ogg', { sampleRate: 48000 })) {\n * console.log('Frame:', frame.samplesPerChannel, 'samples');\n * }\n * ```\n */\nexport function audioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): ReadableStream<AudioFrame> {\n const sampleRate = options.sampleRate ?? 48000;\n const numChannels = options.numChannels ?? 1;\n\n const audioStream = new AudioByteStream(sampleRate, numChannels);\n const channel = createStreamChannel<AudioFrame>();\n const logger = log();\n\n // TODO (Brian): decode WAV using a custom decoder instead of FFmpeg\n const command = ffmpeg(filePath)\n .inputOptions([\n '-probesize',\n '32',\n '-analyzeduration',\n '0',\n '-fflags',\n '+nobuffer+flush_packets',\n '-flags',\n 'low_delay',\n ])\n .format('s16le') // signed 16-bit little-endian PCM to be consistent cross-platform\n .audioChannels(numChannels)\n .audioFrequency(sampleRate);\n\n let commandRunning = true;\n\n const onClose = () => {\n logger.debug('Audio file playback aborted');\n\n channel.close();\n if (commandRunning) {\n commandRunning = false;\n command.kill('SIGKILL');\n }\n };\n\n command.on('error', (err: Error) => {\n if (isFfmpegTeardownError(err)) {\n // Expected during teardown — not an error\n logger.debug('FFmpeg command ended during shutdown');\n } else {\n logger.error(err, 'FFmpeg command error');\n }\n commandRunning = false;\n onClose();\n });\n\n const outputStream = command.pipe();\n options.abortSignal?.addEventListener('abort', onClose, { once: true });\n\n outputStream.on('data', (chunk: Buffer) => {\n const arrayBuffer = chunk.buffer.slice(\n chunk.byteOffset,\n chunk.byteOffset + chunk.byteLength,\n ) as ArrayBuffer;\n\n const frames = audioStream.write(arrayBuffer);\n for (const frame of frames) {\n channel.write(frame);\n }\n });\n\n outputStream.on('end', () => {\n const frames = audioStream.flush();\n for (const frame of frames) {\n channel.write(frame);\n }\n commandRunning = false;\n channel.close();\n });\n\n outputStream.on('error', (err: Error) => {\n logger.error(err);\n commandRunning = false;\n onClose();\n });\n\n return channel.stream();\n}\n\n/**\n * Loop audio frames from a file indefinitely\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects in an infinite loop\n */\nexport async function* loopAudioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): AsyncGenerator<AudioFrame, void, unknown> {\n const frames: AudioFrame[] = [];\n const logger = log();\n\n for await (const frame of audioFramesFromFile(filePath, options)) {\n frames.push(frame);\n yield frame;\n }\n\n while (!options.abortSignal?.aborted) {\n for (const frame of frames) {\n yield frame;\n }\n }\n\n logger.debug('Audio file playback loop finished');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA4B;AAC5B,sBAA2B;AAC3B,2BAAmB;AAEnB,iBAAoB;AACpB,4BAAoC;AACpC,mBAAwD;AAExD,qBAAAA,QAAO,cAAc,cAAAC,QAAgB,IAAI;AAalC,SAAS,8BAA8B,OAAoB;AAEhE,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,oBAAoB,EAAE,YAAY,CAAC,IACpE,MAAM,oBAAoB,MAAM;AACtC;AAGO,MAAM,gBAAgB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,YAAoB,aAAqB,oBAAmC,MAAM;AAC5F,SAAK,cAAc;AACnB,SAAK,eAAe;AAEpB,QAAI,sBAAsB,MAAM;AAC9B,0BAAoB,KAAK,MAAM,aAAa,EAAE;AAAA,IAChD;AAEA,SAAK,iBAAiB,cAAc,oBAAoB;AACxD,SAAK,OAAO,IAAI,UAAU;AAAA,EAC5B;AAAA,EAEA,MAAM,MAAiC;AACrC,SAAK,OAAO,IAAI,UAAU,CAAC,GAAG,KAAK,MAAM,GAAG,IAAI,UAAU,IAAI,CAAC,CAAC;AAEhE,UAAM,SAAuB,CAAC;AAC9B,WAAO,KAAK,KAAK,UAAU,KAAK,gBAAgB;AAC9C,YAAM,YAAY,KAAK,KAAK,MAAM,GAAG,KAAK,cAAc;AACxD,WAAK,OAAO,KAAK,KAAK,MAAM,KAAK,cAAc;AAE/C,aAAO;AAAA,QACL,IAAI;AAAA,UACF,IAAI,WAAW,UAAU,MAAM;AAAA,UAC/B,KAAK;AAAA,UACL,KAAK;AAAA,UACL,UAAU,SAAS;AAAA,QACrB;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,QAAsB;AACpB,QAAI,KAAK,KAAK,UAAU,IAAI,KAAK,kBAAkB,GAAG;AACpD,WAAK,QAAQ,KAAK,0DAA0D;AAC5E,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,SAAS;AAAA,MACb,IAAI;AAAA,QACF,IAAI,WAAW,KAAK,KAAK,MAAM;AAAA,QAC/B,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK,KAAK,SAAS;AAAA,MACrB;AAAA,IACF;AAEA,SAAK,OAAO,IAAI,UAAU;AAC1B,WAAO;AAAA,EACT;AACF;AAgBO,SAAS,oBACd,UACA,UAA8B,CAAC,GACH;AA7G9B;AA8GE,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,cAAc,QAAQ,eAAe;AAE3C,QAAM,cAAc,IAAI,gBAAgB,YAAY,WAAW;AAC/D,QAAM,cAAU,2CAAgC;AAChD,QAAM,aAAS,gBAAI;AAGnB,QAAM,cAAU,qBAAAD,SAAO,QAAQ,EAC5B,aAAa;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,OAAO,OAAO,EACd,cAAc,WAAW,EACzB,eAAe,UAAU;AAE5B,MAAI,iBAAiB;AAErB,QAAM,UAAU,MAAM;AACpB,WAAO,MAAM,6BAA6B;AAE1C,YAAQ,MAAM;AACd,QAAI,gBAAgB;AAClB,uBAAiB;AACjB,cAAQ,KAAK,SAAS;AAAA,IACxB;AAAA,EACF;AAEA,UAAQ,GAAG,SAAS,CAAC,QAAe;AAClC,YAAI,oCAAsB,GAAG,GAAG;AAE9B,aAAO,MAAM,sCAAsC;AAAA,IACrD,OAAO;AACL,aAAO,MAAM,KAAK,sBAAsB;AAAA,IAC1C;AACA,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,QAAM,eAAe,QAAQ,KAAK;AAClC,gBAAQ,gBAAR,mBAAqB,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK;AAErE,eAAa,GAAG,QAAQ,CAAC,UAAkB;AACzC,UAAM,cAAc,MAAM,OAAO;AAAA,MAC/B,MAAM;AAAA,MACN,MAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,SAAS,YAAY,MAAM,WAAW;AAC5C,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AAAA,EACF,CAAC;AAED,eAAa,GAAG,OAAO,MAAM;AAC3B,UAAM,SAAS,YAAY,MAAM;AACjC,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AACA,qBAAiB;AACjB,YAAQ,MAAM;AAAA,EAChB,CAAC;AAED,eAAa,GAAG,SAAS,CAAC,QAAe;AACvC,WAAO,MAAM,GAAG;AAChB,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,SAAO,QAAQ,OAAO;AACxB;AASA,gBAAuB,wBACrB,UACA,UAA8B,CAAC,GACY;AAvM7C;AAwME,QAAM,SAAuB,CAAC;AAC9B,QAAM,aAAS,gBAAI;AAEnB,mBAAiB,SAAS,oBAAoB,UAAU,OAAO,GAAG;AAChE,WAAO,KAAK,KAAK;AACjB,UAAM;AAAA,EACR;AAEA,SAAO,GAAC,aAAQ,gBAAR,mBAAqB,UAAS;AACpC,eAAW,SAAS,QAAQ;AAC1B,YAAM;AAAA,IACR;AAAA,EACF;AAEA,SAAO,MAAM,mCAAmC;AAClD;","names":["ffmpeg","ffmpegInstaller"]}
|
package/dist/audio.d.cts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/// <reference types="node" resolution-mode="require"/>
|
|
2
2
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
3
3
|
import type { ReadableStream } from 'node:stream/web';
|
|
4
|
-
import type
|
|
4
|
+
import { type AudioBuffer } from './utils.js';
|
|
5
5
|
export interface AudioDecodeOptions {
|
|
6
6
|
sampleRate?: number;
|
|
7
7
|
numChannels?: number;
|
package/dist/audio.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/// <reference types="node" resolution-mode="require"/>
|
|
2
2
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
3
3
|
import type { ReadableStream } from 'node:stream/web';
|
|
4
|
-
import type
|
|
4
|
+
import { type AudioBuffer } from './utils.js';
|
|
5
5
|
export interface AudioDecodeOptions {
|
|
6
6
|
sampleRate?: number;
|
|
7
7
|
numChannels?: number;
|
package/dist/audio.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../src/audio.ts"],"names":[],"mappings":";AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGtD,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../src/audio.ts"],"names":[],"mappings":";AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGtD,OAAO,EAAE,KAAK,WAAW,EAAyB,MAAM,YAAY,CAAC;AAIrE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B;AAED,wBAAgB,6BAA6B,CAAC,KAAK,EAAE,WAAW,UAK/D;AAED,uFAAuF;AACvF,qBAAa,eAAe;;gBAOd,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,iBAAiB,GAAE,MAAM,GAAG,IAAW;IAY5F,KAAK,CAAC,IAAI,EAAE,WAAW,GAAG,UAAU,EAAE;IAqBtC,KAAK,IAAI,UAAU,EAAE;CAkBtB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,kBAAuB,GAC/B,cAAc,CAAC,UAAU,CAAC,CA8E5B;AAED;;;;;;GAMG;AACH,wBAAuB,uBAAuB,CAC5C,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,kBAAuB,GAC/B,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,CAgB3C"}
|
package/dist/audio.js
CHANGED
|
@@ -3,6 +3,7 @@ import { AudioFrame } from "@livekit/rtc-node";
|
|
|
3
3
|
import ffmpeg from "fluent-ffmpeg";
|
|
4
4
|
import { log } from "./log.js";
|
|
5
5
|
import { createStreamChannel } from "./stream/stream_channel.js";
|
|
6
|
+
import { isFfmpegTeardownError } from "./utils.js";
|
|
6
7
|
ffmpeg.setFfmpegPath(ffmpegInstaller.path);
|
|
7
8
|
function calculateAudioDurationSeconds(frame) {
|
|
8
9
|
return Array.isArray(frame) ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0) : frame.samplesPerChannel / frame.sampleRate;
|
|
@@ -82,6 +83,15 @@ function audioFramesFromFile(filePath, options = {}) {
|
|
|
82
83
|
command.kill("SIGKILL");
|
|
83
84
|
}
|
|
84
85
|
};
|
|
86
|
+
command.on("error", (err) => {
|
|
87
|
+
if (isFfmpegTeardownError(err)) {
|
|
88
|
+
logger.debug("FFmpeg command ended during shutdown");
|
|
89
|
+
} else {
|
|
90
|
+
logger.error(err, "FFmpeg command error");
|
|
91
|
+
}
|
|
92
|
+
commandRunning = false;
|
|
93
|
+
onClose();
|
|
94
|
+
});
|
|
85
95
|
const outputStream = command.pipe();
|
|
86
96
|
(_a = options.abortSignal) == null ? void 0 : _a.addEventListener("abort", onClose, { once: true });
|
|
87
97
|
outputStream.on("data", (chunk) => {
|
package/dist/audio.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/audio.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport ffmpegInstaller from '@ffmpeg-installer/ffmpeg';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport ffmpeg from 'fluent-ffmpeg';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from './log.js';\nimport { createStreamChannel } from './stream/stream_channel.js';\nimport type
|
|
1
|
+
{"version":3,"sources":["../src/audio.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport ffmpegInstaller from '@ffmpeg-installer/ffmpeg';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport ffmpeg from 'fluent-ffmpeg';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from './log.js';\nimport { createStreamChannel } from './stream/stream_channel.js';\nimport { type AudioBuffer, isFfmpegTeardownError } from './utils.js';\n\nffmpeg.setFfmpegPath(ffmpegInstaller.path);\n\nexport interface AudioDecodeOptions {\n sampleRate?: number;\n numChannels?: number;\n /**\n * Audio format hint (e.g., 'mp3', 'ogg', 'wav', 'opus')\n * If not provided, FFmpeg will auto-detect\n */\n format?: string;\n abortSignal?: AbortSignal;\n}\n\nexport function calculateAudioDurationSeconds(frame: AudioBuffer) {\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n return Array.isArray(frame)\n ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)\n : frame.samplesPerChannel / frame.sampleRate;\n}\n\n/** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */\nexport class AudioByteStream {\n #sampleRate: number;\n #numChannels: number;\n #bytesPerFrame: number;\n #buf: Int8Array;\n #logger = log();\n\n constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) {\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n\n if (samplesPerChannel === null) {\n samplesPerChannel = Math.floor(sampleRate / 10); // 100ms by default\n }\n\n this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)\n this.#buf = new Int8Array();\n }\n\n write(data: ArrayBuffer): AudioFrame[] {\n this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]);\n\n const frames: AudioFrame[] = [];\n while (this.#buf.length >= this.#bytesPerFrame) {\n const frameData = this.#buf.slice(0, this.#bytesPerFrame);\n this.#buf = this.#buf.slice(this.#bytesPerFrame);\n\n frames.push(\n new AudioFrame(\n new Int16Array(frameData.buffer),\n this.#sampleRate,\n this.#numChannels,\n frameData.length / 2,\n ),\n );\n }\n\n return frames;\n }\n\n flush(): AudioFrame[] {\n if (this.#buf.length % (2 * this.#numChannels) !== 0) {\n this.#logger.warn('AudioByteStream: incomplete frame during flush, dropping');\n return [];\n }\n\n const frames = [\n new AudioFrame(\n new Int16Array(this.#buf.buffer),\n this.#sampleRate,\n this.#numChannels,\n this.#buf.length / 2,\n ),\n ];\n\n this.#buf = new Int8Array(); // Clear buffer after flushing\n return frames;\n }\n}\n\n/**\n * Decode an audio file into AudioFrame instances\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects\n *\n * @example\n * ```typescript\n * for await (const frame of audioFramesFromFile('audio.ogg', { sampleRate: 48000 })) {\n * console.log('Frame:', frame.samplesPerChannel, 'samples');\n * }\n * ```\n */\nexport function audioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): ReadableStream<AudioFrame> {\n const sampleRate = options.sampleRate ?? 48000;\n const numChannels = options.numChannels ?? 1;\n\n const audioStream = new AudioByteStream(sampleRate, numChannels);\n const channel = createStreamChannel<AudioFrame>();\n const logger = log();\n\n // TODO (Brian): decode WAV using a custom decoder instead of FFmpeg\n const command = ffmpeg(filePath)\n .inputOptions([\n '-probesize',\n '32',\n '-analyzeduration',\n '0',\n '-fflags',\n '+nobuffer+flush_packets',\n '-flags',\n 'low_delay',\n ])\n .format('s16le') // signed 16-bit little-endian PCM to be consistent cross-platform\n .audioChannels(numChannels)\n .audioFrequency(sampleRate);\n\n let commandRunning = true;\n\n const onClose = () => {\n logger.debug('Audio file playback aborted');\n\n channel.close();\n if (commandRunning) {\n commandRunning = false;\n command.kill('SIGKILL');\n }\n };\n\n command.on('error', (err: Error) => {\n if (isFfmpegTeardownError(err)) {\n // Expected during teardown — not an error\n logger.debug('FFmpeg command ended during shutdown');\n } else {\n logger.error(err, 'FFmpeg command error');\n }\n commandRunning = false;\n onClose();\n });\n\n const outputStream = command.pipe();\n options.abortSignal?.addEventListener('abort', onClose, { once: true });\n\n outputStream.on('data', (chunk: Buffer) => {\n const arrayBuffer = chunk.buffer.slice(\n chunk.byteOffset,\n chunk.byteOffset + chunk.byteLength,\n ) as ArrayBuffer;\n\n const frames = audioStream.write(arrayBuffer);\n for (const frame of frames) {\n channel.write(frame);\n }\n });\n\n outputStream.on('end', () => {\n const frames = audioStream.flush();\n for (const frame of frames) {\n channel.write(frame);\n }\n commandRunning = false;\n channel.close();\n });\n\n outputStream.on('error', (err: Error) => {\n logger.error(err);\n commandRunning = false;\n onClose();\n });\n\n return channel.stream();\n}\n\n/**\n * Loop audio frames from a file indefinitely\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects in an infinite loop\n */\nexport async function* loopAudioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): AsyncGenerator<AudioFrame, void, unknown> {\n const frames: AudioFrame[] = [];\n const logger = log();\n\n for await (const frame of audioFramesFromFile(filePath, options)) {\n frames.push(frame);\n yield frame;\n }\n\n while (!options.abortSignal?.aborted) {\n for (const frame of frames) {\n yield frame;\n }\n }\n\n logger.debug('Audio file playback loop finished');\n}\n"],"mappings":"AAGA,OAAO,qBAAqB;AAC5B,SAAS,kBAAkB;AAC3B,OAAO,YAAY;AAEnB,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC,SAA2B,6BAA6B;AAExD,OAAO,cAAc,gBAAgB,IAAI;AAalC,SAAS,8BAA8B,OAAoB;AAEhE,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,oBAAoB,EAAE,YAAY,CAAC,IACpE,MAAM,oBAAoB,MAAM;AACtC;AAGO,MAAM,gBAAgB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,YAAoB,aAAqB,oBAAmC,MAAM;AAC5F,SAAK,cAAc;AACnB,SAAK,eAAe;AAEpB,QAAI,sBAAsB,MAAM;AAC9B,0BAAoB,KAAK,MAAM,aAAa,EAAE;AAAA,IAChD;AAEA,SAAK,iBAAiB,cAAc,oBAAoB;AACxD,SAAK,OAAO,IAAI,UAAU;AAAA,EAC5B;AAAA,EAEA,MAAM,MAAiC;AACrC,SAAK,OAAO,IAAI,UAAU,CAAC,GAAG,KAAK,MAAM,GAAG,IAAI,UAAU,IAAI,CAAC,CAAC;AAEhE,UAAM,SAAuB,CAAC;AAC9B,WAAO,KAAK,KAAK,UAAU,KAAK,gBAAgB;AAC9C,YAAM,YAAY,KAAK,KAAK,MAAM,GAAG,KAAK,cAAc;AACxD,WAAK,OAAO,KAAK,KAAK,MAAM,KAAK,cAAc;AAE/C,aAAO;AAAA,QACL,IAAI;AAAA,UACF,IAAI,WAAW,UAAU,MAAM;AAAA,UAC/B,KAAK;AAAA,UACL,KAAK;AAAA,UACL,UAAU,SAAS;AAAA,QACrB;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,QAAsB;AACpB,QAAI,KAAK,KAAK,UAAU,IAAI,KAAK,kBAAkB,GAAG;AACpD,WAAK,QAAQ,KAAK,0DAA0D;AAC5E,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,SAAS;AAAA,MACb,IAAI;AAAA,QACF,IAAI,WAAW,KAAK,KAAK,MAAM;AAAA,QAC/B,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK,KAAK,SAAS;AAAA,MACrB;AAAA,IACF;AAEA,SAAK,OAAO,IAAI,UAAU;AAC1B,WAAO;AAAA,EACT;AACF;AAgBO,SAAS,oBACd,UACA,UAA8B,CAAC,GACH;AA7G9B;AA8GE,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,cAAc,QAAQ,eAAe;AAE3C,QAAM,cAAc,IAAI,gBAAgB,YAAY,WAAW;AAC/D,QAAM,UAAU,oBAAgC;AAChD,QAAM,SAAS,IAAI;AAGnB,QAAM,UAAU,OAAO,QAAQ,EAC5B,aAAa;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,OAAO,OAAO,EACd,cAAc,WAAW,EACzB,eAAe,UAAU;AAE5B,MAAI,iBAAiB;AAErB,QAAM,UAAU,MAAM;AACpB,WAAO,MAAM,6BAA6B;AAE1C,YAAQ,MAAM;AACd,QAAI,gBAAgB;AAClB,uBAAiB;AACjB,cAAQ,KAAK,SAAS;AAAA,IACxB;AAAA,EACF;AAEA,UAAQ,GAAG,SAAS,CAAC,QAAe;AAClC,QAAI,sBAAsB,GAAG,GAAG;AAE9B,aAAO,MAAM,sCAAsC;AAAA,IACrD,OAAO;AACL,aAAO,MAAM,KAAK,sBAAsB;AAAA,IAC1C;AACA,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,QAAM,eAAe,QAAQ,KAAK;AAClC,gBAAQ,gBAAR,mBAAqB,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK;AAErE,eAAa,GAAG,QAAQ,CAAC,UAAkB;AACzC,UAAM,cAAc,MAAM,OAAO;AAAA,MAC/B,MAAM;AAAA,MACN,MAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,SAAS,YAAY,MAAM,WAAW;AAC5C,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AAAA,EACF,CAAC;AAED,eAAa,GAAG,OAAO,MAAM;AAC3B,UAAM,SAAS,YAAY,MAAM;AACjC,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AACA,qBAAiB;AACjB,YAAQ,MAAM;AAAA,EAChB,CAAC;AAED,eAAa,GAAG,SAAS,CAAC,QAAe;AACvC,WAAO,MAAM,GAAG;AAChB,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,SAAO,QAAQ,OAAO;AACxB;AASA,gBAAuB,wBACrB,UACA,UAA8B,CAAC,GACY;AAvM7C;AAwME,QAAM,SAAuB,CAAC;AAC9B,QAAM,SAAS,IAAI;AAEnB,mBAAiB,SAAS,oBAAoB,UAAU,OAAO,GAAG;AAChE,WAAO,KAAK,KAAK;AACjB,UAAM;AAAA,EACR;AAEA,SAAO,GAAC,aAAQ,gBAAR,mBAAqB,UAAS;AACpC,eAAW,SAAS,QAAQ;AAC1B,YAAM;AAAA,IACR;AAAA,EACF;AAEA,SAAO,MAAM,mCAAmC;AAClD;","names":[]}
|
|
@@ -227,15 +227,15 @@ export declare const sttWordSchema: z.ZodObject<{
|
|
|
227
227
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
228
228
|
}, "strip", z.ZodTypeAny, {
|
|
229
229
|
end: number;
|
|
230
|
-
confidence: number;
|
|
231
230
|
start: number;
|
|
231
|
+
confidence: number;
|
|
232
232
|
word: string;
|
|
233
233
|
extra?: unknown;
|
|
234
234
|
}, {
|
|
235
235
|
end?: number | undefined;
|
|
236
|
+
start?: number | undefined;
|
|
236
237
|
extra?: unknown;
|
|
237
238
|
confidence?: number | undefined;
|
|
238
|
-
start?: number | undefined;
|
|
239
239
|
word?: string | undefined;
|
|
240
240
|
}>;
|
|
241
241
|
export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
|
|
@@ -254,29 +254,29 @@ export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
|
|
|
254
254
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
255
255
|
}, "strip", z.ZodTypeAny, {
|
|
256
256
|
end: number;
|
|
257
|
-
confidence: number;
|
|
258
257
|
start: number;
|
|
258
|
+
confidence: number;
|
|
259
259
|
word: string;
|
|
260
260
|
extra?: unknown;
|
|
261
261
|
}, {
|
|
262
262
|
end?: number | undefined;
|
|
263
|
+
start?: number | undefined;
|
|
263
264
|
extra?: unknown;
|
|
264
265
|
confidence?: number | undefined;
|
|
265
|
-
start?: number | undefined;
|
|
266
266
|
word?: string | undefined;
|
|
267
267
|
}>, "many">>>;
|
|
268
268
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
269
269
|
}, "strip", z.ZodTypeAny, {
|
|
270
|
+
start: number;
|
|
270
271
|
type: "interim_transcript";
|
|
271
272
|
transcript: string;
|
|
272
273
|
language: string;
|
|
273
274
|
confidence: number;
|
|
274
|
-
start: number;
|
|
275
275
|
duration: number;
|
|
276
276
|
words: {
|
|
277
277
|
end: number;
|
|
278
|
-
confidence: number;
|
|
279
278
|
start: number;
|
|
279
|
+
confidence: number;
|
|
280
280
|
word: string;
|
|
281
281
|
extra?: unknown;
|
|
282
282
|
}[];
|
|
@@ -284,18 +284,18 @@ export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
|
|
|
284
284
|
session_id?: string | undefined;
|
|
285
285
|
}, {
|
|
286
286
|
type: "interim_transcript";
|
|
287
|
+
start?: number | undefined;
|
|
287
288
|
extra?: unknown;
|
|
288
289
|
transcript?: string | undefined;
|
|
289
290
|
session_id?: string | undefined;
|
|
290
291
|
language?: string | undefined;
|
|
291
292
|
confidence?: number | undefined;
|
|
292
|
-
start?: number | undefined;
|
|
293
293
|
duration?: number | undefined;
|
|
294
294
|
words?: {
|
|
295
295
|
end?: number | undefined;
|
|
296
|
+
start?: number | undefined;
|
|
296
297
|
extra?: unknown;
|
|
297
298
|
confidence?: number | undefined;
|
|
298
|
-
start?: number | undefined;
|
|
299
299
|
word?: string | undefined;
|
|
300
300
|
}[] | undefined;
|
|
301
301
|
}>;
|
|
@@ -315,29 +315,29 @@ export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
|
|
|
315
315
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
316
316
|
}, "strip", z.ZodTypeAny, {
|
|
317
317
|
end: number;
|
|
318
|
-
confidence: number;
|
|
319
318
|
start: number;
|
|
319
|
+
confidence: number;
|
|
320
320
|
word: string;
|
|
321
321
|
extra?: unknown;
|
|
322
322
|
}, {
|
|
323
323
|
end?: number | undefined;
|
|
324
|
+
start?: number | undefined;
|
|
324
325
|
extra?: unknown;
|
|
325
326
|
confidence?: number | undefined;
|
|
326
|
-
start?: number | undefined;
|
|
327
327
|
word?: string | undefined;
|
|
328
328
|
}>, "many">>>;
|
|
329
329
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
330
330
|
}, "strip", z.ZodTypeAny, {
|
|
331
|
+
start: number;
|
|
331
332
|
type: "final_transcript";
|
|
332
333
|
transcript: string;
|
|
333
334
|
language: string;
|
|
334
335
|
confidence: number;
|
|
335
|
-
start: number;
|
|
336
336
|
duration: number;
|
|
337
337
|
words: {
|
|
338
338
|
end: number;
|
|
339
|
-
confidence: number;
|
|
340
339
|
start: number;
|
|
340
|
+
confidence: number;
|
|
341
341
|
word: string;
|
|
342
342
|
extra?: unknown;
|
|
343
343
|
}[];
|
|
@@ -345,18 +345,18 @@ export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
|
|
|
345
345
|
session_id?: string | undefined;
|
|
346
346
|
}, {
|
|
347
347
|
type: "final_transcript";
|
|
348
|
+
start?: number | undefined;
|
|
348
349
|
extra?: unknown;
|
|
349
350
|
transcript?: string | undefined;
|
|
350
351
|
session_id?: string | undefined;
|
|
351
352
|
language?: string | undefined;
|
|
352
353
|
confidence?: number | undefined;
|
|
353
|
-
start?: number | undefined;
|
|
354
354
|
duration?: number | undefined;
|
|
355
355
|
words?: {
|
|
356
356
|
end?: number | undefined;
|
|
357
|
+
start?: number | undefined;
|
|
357
358
|
extra?: unknown;
|
|
358
359
|
confidence?: number | undefined;
|
|
359
|
-
start?: number | undefined;
|
|
360
360
|
word?: string | undefined;
|
|
361
361
|
}[] | undefined;
|
|
362
362
|
}>;
|
|
@@ -434,29 +434,29 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
434
434
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
435
435
|
}, "strip", z.ZodTypeAny, {
|
|
436
436
|
end: number;
|
|
437
|
-
confidence: number;
|
|
438
437
|
start: number;
|
|
438
|
+
confidence: number;
|
|
439
439
|
word: string;
|
|
440
440
|
extra?: unknown;
|
|
441
441
|
}, {
|
|
442
442
|
end?: number | undefined;
|
|
443
|
+
start?: number | undefined;
|
|
443
444
|
extra?: unknown;
|
|
444
445
|
confidence?: number | undefined;
|
|
445
|
-
start?: number | undefined;
|
|
446
446
|
word?: string | undefined;
|
|
447
447
|
}>, "many">>>;
|
|
448
448
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
449
449
|
}, "strip", z.ZodTypeAny, {
|
|
450
|
+
start: number;
|
|
450
451
|
type: "interim_transcript";
|
|
451
452
|
transcript: string;
|
|
452
453
|
language: string;
|
|
453
454
|
confidence: number;
|
|
454
|
-
start: number;
|
|
455
455
|
duration: number;
|
|
456
456
|
words: {
|
|
457
457
|
end: number;
|
|
458
|
-
confidence: number;
|
|
459
458
|
start: number;
|
|
459
|
+
confidence: number;
|
|
460
460
|
word: string;
|
|
461
461
|
extra?: unknown;
|
|
462
462
|
}[];
|
|
@@ -464,18 +464,18 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
464
464
|
session_id?: string | undefined;
|
|
465
465
|
}, {
|
|
466
466
|
type: "interim_transcript";
|
|
467
|
+
start?: number | undefined;
|
|
467
468
|
extra?: unknown;
|
|
468
469
|
transcript?: string | undefined;
|
|
469
470
|
session_id?: string | undefined;
|
|
470
471
|
language?: string | undefined;
|
|
471
472
|
confidence?: number | undefined;
|
|
472
|
-
start?: number | undefined;
|
|
473
473
|
duration?: number | undefined;
|
|
474
474
|
words?: {
|
|
475
475
|
end?: number | undefined;
|
|
476
|
+
start?: number | undefined;
|
|
476
477
|
extra?: unknown;
|
|
477
478
|
confidence?: number | undefined;
|
|
478
|
-
start?: number | undefined;
|
|
479
479
|
word?: string | undefined;
|
|
480
480
|
}[] | undefined;
|
|
481
481
|
}>, z.ZodObject<{
|
|
@@ -494,29 +494,29 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
494
494
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
495
495
|
}, "strip", z.ZodTypeAny, {
|
|
496
496
|
end: number;
|
|
497
|
-
confidence: number;
|
|
498
497
|
start: number;
|
|
498
|
+
confidence: number;
|
|
499
499
|
word: string;
|
|
500
500
|
extra?: unknown;
|
|
501
501
|
}, {
|
|
502
502
|
end?: number | undefined;
|
|
503
|
+
start?: number | undefined;
|
|
503
504
|
extra?: unknown;
|
|
504
505
|
confidence?: number | undefined;
|
|
505
|
-
start?: number | undefined;
|
|
506
506
|
word?: string | undefined;
|
|
507
507
|
}>, "many">>>;
|
|
508
508
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
509
509
|
}, "strip", z.ZodTypeAny, {
|
|
510
|
+
start: number;
|
|
510
511
|
type: "final_transcript";
|
|
511
512
|
transcript: string;
|
|
512
513
|
language: string;
|
|
513
514
|
confidence: number;
|
|
514
|
-
start: number;
|
|
515
515
|
duration: number;
|
|
516
516
|
words: {
|
|
517
517
|
end: number;
|
|
518
|
-
confidence: number;
|
|
519
518
|
start: number;
|
|
519
|
+
confidence: number;
|
|
520
520
|
word: string;
|
|
521
521
|
extra?: unknown;
|
|
522
522
|
}[];
|
|
@@ -524,18 +524,18 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
524
524
|
session_id?: string | undefined;
|
|
525
525
|
}, {
|
|
526
526
|
type: "final_transcript";
|
|
527
|
+
start?: number | undefined;
|
|
527
528
|
extra?: unknown;
|
|
528
529
|
transcript?: string | undefined;
|
|
529
530
|
session_id?: string | undefined;
|
|
530
531
|
language?: string | undefined;
|
|
531
532
|
confidence?: number | undefined;
|
|
532
|
-
start?: number | undefined;
|
|
533
533
|
duration?: number | undefined;
|
|
534
534
|
words?: {
|
|
535
535
|
end?: number | undefined;
|
|
536
|
+
start?: number | undefined;
|
|
536
537
|
extra?: unknown;
|
|
537
538
|
confidence?: number | undefined;
|
|
538
|
-
start?: number | undefined;
|
|
539
539
|
word?: string | undefined;
|
|
540
540
|
}[] | undefined;
|
|
541
541
|
}>, z.ZodObject<{
|
|
@@ -227,15 +227,15 @@ export declare const sttWordSchema: z.ZodObject<{
|
|
|
227
227
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
228
228
|
}, "strip", z.ZodTypeAny, {
|
|
229
229
|
end: number;
|
|
230
|
-
confidence: number;
|
|
231
230
|
start: number;
|
|
231
|
+
confidence: number;
|
|
232
232
|
word: string;
|
|
233
233
|
extra?: unknown;
|
|
234
234
|
}, {
|
|
235
235
|
end?: number | undefined;
|
|
236
|
+
start?: number | undefined;
|
|
236
237
|
extra?: unknown;
|
|
237
238
|
confidence?: number | undefined;
|
|
238
|
-
start?: number | undefined;
|
|
239
239
|
word?: string | undefined;
|
|
240
240
|
}>;
|
|
241
241
|
export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
|
|
@@ -254,29 +254,29 @@ export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
|
|
|
254
254
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
255
255
|
}, "strip", z.ZodTypeAny, {
|
|
256
256
|
end: number;
|
|
257
|
-
confidence: number;
|
|
258
257
|
start: number;
|
|
258
|
+
confidence: number;
|
|
259
259
|
word: string;
|
|
260
260
|
extra?: unknown;
|
|
261
261
|
}, {
|
|
262
262
|
end?: number | undefined;
|
|
263
|
+
start?: number | undefined;
|
|
263
264
|
extra?: unknown;
|
|
264
265
|
confidence?: number | undefined;
|
|
265
|
-
start?: number | undefined;
|
|
266
266
|
word?: string | undefined;
|
|
267
267
|
}>, "many">>>;
|
|
268
268
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
269
269
|
}, "strip", z.ZodTypeAny, {
|
|
270
|
+
start: number;
|
|
270
271
|
type: "interim_transcript";
|
|
271
272
|
transcript: string;
|
|
272
273
|
language: string;
|
|
273
274
|
confidence: number;
|
|
274
|
-
start: number;
|
|
275
275
|
duration: number;
|
|
276
276
|
words: {
|
|
277
277
|
end: number;
|
|
278
|
-
confidence: number;
|
|
279
278
|
start: number;
|
|
279
|
+
confidence: number;
|
|
280
280
|
word: string;
|
|
281
281
|
extra?: unknown;
|
|
282
282
|
}[];
|
|
@@ -284,18 +284,18 @@ export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
|
|
|
284
284
|
session_id?: string | undefined;
|
|
285
285
|
}, {
|
|
286
286
|
type: "interim_transcript";
|
|
287
|
+
start?: number | undefined;
|
|
287
288
|
extra?: unknown;
|
|
288
289
|
transcript?: string | undefined;
|
|
289
290
|
session_id?: string | undefined;
|
|
290
291
|
language?: string | undefined;
|
|
291
292
|
confidence?: number | undefined;
|
|
292
|
-
start?: number | undefined;
|
|
293
293
|
duration?: number | undefined;
|
|
294
294
|
words?: {
|
|
295
295
|
end?: number | undefined;
|
|
296
|
+
start?: number | undefined;
|
|
296
297
|
extra?: unknown;
|
|
297
298
|
confidence?: number | undefined;
|
|
298
|
-
start?: number | undefined;
|
|
299
299
|
word?: string | undefined;
|
|
300
300
|
}[] | undefined;
|
|
301
301
|
}>;
|
|
@@ -315,29 +315,29 @@ export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
|
|
|
315
315
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
316
316
|
}, "strip", z.ZodTypeAny, {
|
|
317
317
|
end: number;
|
|
318
|
-
confidence: number;
|
|
319
318
|
start: number;
|
|
319
|
+
confidence: number;
|
|
320
320
|
word: string;
|
|
321
321
|
extra?: unknown;
|
|
322
322
|
}, {
|
|
323
323
|
end?: number | undefined;
|
|
324
|
+
start?: number | undefined;
|
|
324
325
|
extra?: unknown;
|
|
325
326
|
confidence?: number | undefined;
|
|
326
|
-
start?: number | undefined;
|
|
327
327
|
word?: string | undefined;
|
|
328
328
|
}>, "many">>>;
|
|
329
329
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
330
330
|
}, "strip", z.ZodTypeAny, {
|
|
331
|
+
start: number;
|
|
331
332
|
type: "final_transcript";
|
|
332
333
|
transcript: string;
|
|
333
334
|
language: string;
|
|
334
335
|
confidence: number;
|
|
335
|
-
start: number;
|
|
336
336
|
duration: number;
|
|
337
337
|
words: {
|
|
338
338
|
end: number;
|
|
339
|
-
confidence: number;
|
|
340
339
|
start: number;
|
|
340
|
+
confidence: number;
|
|
341
341
|
word: string;
|
|
342
342
|
extra?: unknown;
|
|
343
343
|
}[];
|
|
@@ -345,18 +345,18 @@ export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
|
|
|
345
345
|
session_id?: string | undefined;
|
|
346
346
|
}, {
|
|
347
347
|
type: "final_transcript";
|
|
348
|
+
start?: number | undefined;
|
|
348
349
|
extra?: unknown;
|
|
349
350
|
transcript?: string | undefined;
|
|
350
351
|
session_id?: string | undefined;
|
|
351
352
|
language?: string | undefined;
|
|
352
353
|
confidence?: number | undefined;
|
|
353
|
-
start?: number | undefined;
|
|
354
354
|
duration?: number | undefined;
|
|
355
355
|
words?: {
|
|
356
356
|
end?: number | undefined;
|
|
357
|
+
start?: number | undefined;
|
|
357
358
|
extra?: unknown;
|
|
358
359
|
confidence?: number | undefined;
|
|
359
|
-
start?: number | undefined;
|
|
360
360
|
word?: string | undefined;
|
|
361
361
|
}[] | undefined;
|
|
362
362
|
}>;
|
|
@@ -434,29 +434,29 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
434
434
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
435
435
|
}, "strip", z.ZodTypeAny, {
|
|
436
436
|
end: number;
|
|
437
|
-
confidence: number;
|
|
438
437
|
start: number;
|
|
438
|
+
confidence: number;
|
|
439
439
|
word: string;
|
|
440
440
|
extra?: unknown;
|
|
441
441
|
}, {
|
|
442
442
|
end?: number | undefined;
|
|
443
|
+
start?: number | undefined;
|
|
443
444
|
extra?: unknown;
|
|
444
445
|
confidence?: number | undefined;
|
|
445
|
-
start?: number | undefined;
|
|
446
446
|
word?: string | undefined;
|
|
447
447
|
}>, "many">>>;
|
|
448
448
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
449
449
|
}, "strip", z.ZodTypeAny, {
|
|
450
|
+
start: number;
|
|
450
451
|
type: "interim_transcript";
|
|
451
452
|
transcript: string;
|
|
452
453
|
language: string;
|
|
453
454
|
confidence: number;
|
|
454
|
-
start: number;
|
|
455
455
|
duration: number;
|
|
456
456
|
words: {
|
|
457
457
|
end: number;
|
|
458
|
-
confidence: number;
|
|
459
458
|
start: number;
|
|
459
|
+
confidence: number;
|
|
460
460
|
word: string;
|
|
461
461
|
extra?: unknown;
|
|
462
462
|
}[];
|
|
@@ -464,18 +464,18 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
464
464
|
session_id?: string | undefined;
|
|
465
465
|
}, {
|
|
466
466
|
type: "interim_transcript";
|
|
467
|
+
start?: number | undefined;
|
|
467
468
|
extra?: unknown;
|
|
468
469
|
transcript?: string | undefined;
|
|
469
470
|
session_id?: string | undefined;
|
|
470
471
|
language?: string | undefined;
|
|
471
472
|
confidence?: number | undefined;
|
|
472
|
-
start?: number | undefined;
|
|
473
473
|
duration?: number | undefined;
|
|
474
474
|
words?: {
|
|
475
475
|
end?: number | undefined;
|
|
476
|
+
start?: number | undefined;
|
|
476
477
|
extra?: unknown;
|
|
477
478
|
confidence?: number | undefined;
|
|
478
|
-
start?: number | undefined;
|
|
479
479
|
word?: string | undefined;
|
|
480
480
|
}[] | undefined;
|
|
481
481
|
}>, z.ZodObject<{
|
|
@@ -494,29 +494,29 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
494
494
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
495
495
|
}, "strip", z.ZodTypeAny, {
|
|
496
496
|
end: number;
|
|
497
|
-
confidence: number;
|
|
498
497
|
start: number;
|
|
498
|
+
confidence: number;
|
|
499
499
|
word: string;
|
|
500
500
|
extra?: unknown;
|
|
501
501
|
}, {
|
|
502
502
|
end?: number | undefined;
|
|
503
|
+
start?: number | undefined;
|
|
503
504
|
extra?: unknown;
|
|
504
505
|
confidence?: number | undefined;
|
|
505
|
-
start?: number | undefined;
|
|
506
506
|
word?: string | undefined;
|
|
507
507
|
}>, "many">>>;
|
|
508
508
|
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
509
509
|
}, "strip", z.ZodTypeAny, {
|
|
510
|
+
start: number;
|
|
510
511
|
type: "final_transcript";
|
|
511
512
|
transcript: string;
|
|
512
513
|
language: string;
|
|
513
514
|
confidence: number;
|
|
514
|
-
start: number;
|
|
515
515
|
duration: number;
|
|
516
516
|
words: {
|
|
517
517
|
end: number;
|
|
518
|
-
confidence: number;
|
|
519
518
|
start: number;
|
|
519
|
+
confidence: number;
|
|
520
520
|
word: string;
|
|
521
521
|
extra?: unknown;
|
|
522
522
|
}[];
|
|
@@ -524,18 +524,18 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
524
524
|
session_id?: string | undefined;
|
|
525
525
|
}, {
|
|
526
526
|
type: "final_transcript";
|
|
527
|
+
start?: number | undefined;
|
|
527
528
|
extra?: unknown;
|
|
528
529
|
transcript?: string | undefined;
|
|
529
530
|
session_id?: string | undefined;
|
|
530
531
|
language?: string | undefined;
|
|
531
532
|
confidence?: number | undefined;
|
|
532
|
-
start?: number | undefined;
|
|
533
533
|
duration?: number | undefined;
|
|
534
534
|
words?: {
|
|
535
535
|
end?: number | undefined;
|
|
536
|
+
start?: number | undefined;
|
|
536
537
|
extra?: unknown;
|
|
537
538
|
confidence?: number | undefined;
|
|
538
|
-
start?: number | undefined;
|
|
539
539
|
word?: string | undefined;
|
|
540
540
|
}[] | undefined;
|
|
541
541
|
}>, z.ZodObject<{
|
package/dist/inference/tts.cjs
CHANGED
|
@@ -356,13 +356,18 @@ class SynthesizeStream extends import_tts.SynthesizeStream {
|
|
|
356
356
|
};
|
|
357
357
|
const createRecvTask = async (signal) => {
|
|
358
358
|
let currentSessionId = null;
|
|
359
|
+
const recvTimeoutMs = this.connOptions.timeoutMs;
|
|
359
360
|
const bstream = new import_audio.AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);
|
|
360
361
|
const serverEventStream = eventChannel.stream();
|
|
361
362
|
const reader = serverEventStream.getReader();
|
|
362
363
|
try {
|
|
363
364
|
await inputSentEvent.wait();
|
|
364
365
|
while (!this.closed && !signal.aborted) {
|
|
365
|
-
const result = await
|
|
366
|
+
const result = await (0, import_utils.waitUntilTimeout)(
|
|
367
|
+
reader.read(),
|
|
368
|
+
recvTimeoutMs,
|
|
369
|
+
() => new import_exceptions.APITimeoutError({ message: "TTS recv idle timeout" })
|
|
370
|
+
);
|
|
366
371
|
if (signal.aborted) return;
|
|
367
372
|
if (result.done) return;
|
|
368
373
|
const serverEvent = result.value;
|
|
@@ -406,6 +411,14 @@ class SynthesizeStream extends import_tts.SynthesizeStream {
|
|
|
406
411
|
break;
|
|
407
412
|
}
|
|
408
413
|
}
|
|
414
|
+
} catch (e) {
|
|
415
|
+
if (e instanceof import_exceptions.APITimeoutError) {
|
|
416
|
+
this.#logger.warn("TTS recv task timed out waiting for server message");
|
|
417
|
+
await resourceCleanup();
|
|
418
|
+
completionFuture.reject(e);
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
throw e;
|
|
409
422
|
} finally {
|
|
410
423
|
reader.releaseLock();
|
|
411
424
|
try {
|