@livekit/agents 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/dist/audio.cjs +10 -0
  2. package/dist/audio.cjs.map +1 -1
  3. package/dist/audio.d.cts +1 -1
  4. package/dist/audio.d.ts +1 -1
  5. package/dist/audio.d.ts.map +1 -1
  6. package/dist/audio.js +10 -0
  7. package/dist/audio.js.map +1 -1
  8. package/dist/inference/api_protos.d.cts +26 -26
  9. package/dist/inference/api_protos.d.ts +26 -26
  10. package/dist/inference/tts.cjs +14 -1
  11. package/dist/inference/tts.cjs.map +1 -1
  12. package/dist/inference/tts.d.ts.map +1 -1
  13. package/dist/inference/tts.js +24 -3
  14. package/dist/inference/tts.js.map +1 -1
  15. package/dist/ipc/job_proc_lazy_main.cjs +7 -2
  16. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  17. package/dist/ipc/job_proc_lazy_main.js +7 -2
  18. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  19. package/dist/ipc/supervised_proc.cjs +4 -1
  20. package/dist/ipc/supervised_proc.cjs.map +1 -1
  21. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  22. package/dist/ipc/supervised_proc.js +4 -1
  23. package/dist/ipc/supervised_proc.js.map +1 -1
  24. package/dist/ipc/supervised_proc.test.cjs +82 -0
  25. package/dist/ipc/supervised_proc.test.cjs.map +1 -1
  26. package/dist/ipc/supervised_proc.test.js +82 -0
  27. package/dist/ipc/supervised_proc.test.js.map +1 -1
  28. package/dist/job.cjs +2 -1
  29. package/dist/job.cjs.map +1 -1
  30. package/dist/job.d.ts.map +1 -1
  31. package/dist/job.js +2 -1
  32. package/dist/job.js.map +1 -1
  33. package/dist/utils.cjs +28 -0
  34. package/dist/utils.cjs.map +1 -1
  35. package/dist/utils.d.cts +18 -0
  36. package/dist/utils.d.ts +18 -0
  37. package/dist/utils.d.ts.map +1 -1
  38. package/dist/utils.js +25 -0
  39. package/dist/utils.js.map +1 -1
  40. package/dist/version.cjs +1 -1
  41. package/dist/version.js +1 -1
  42. package/dist/voice/agent_activity.cjs +10 -0
  43. package/dist/voice/agent_activity.cjs.map +1 -1
  44. package/dist/voice/agent_activity.d.ts.map +1 -1
  45. package/dist/voice/agent_activity.js +11 -0
  46. package/dist/voice/agent_activity.js.map +1 -1
  47. package/dist/voice/agent_session.cjs +1 -1
  48. package/dist/voice/agent_session.cjs.map +1 -1
  49. package/dist/voice/agent_session.d.cts +4 -2
  50. package/dist/voice/agent_session.d.ts +4 -2
  51. package/dist/voice/agent_session.d.ts.map +1 -1
  52. package/dist/voice/agent_session.js +1 -1
  53. package/dist/voice/agent_session.js.map +1 -1
  54. package/dist/voice/events.cjs +11 -0
  55. package/dist/voice/events.cjs.map +1 -1
  56. package/dist/voice/events.d.cts +12 -1
  57. package/dist/voice/events.d.ts +12 -1
  58. package/dist/voice/events.d.ts.map +1 -1
  59. package/dist/voice/events.js +10 -0
  60. package/dist/voice/events.js.map +1 -1
  61. package/dist/voice/generation.cjs +23 -4
  62. package/dist/voice/generation.cjs.map +1 -1
  63. package/dist/voice/generation.d.ts.map +1 -1
  64. package/dist/voice/generation.js +32 -5
  65. package/dist/voice/generation.js.map +1 -1
  66. package/dist/voice/generation_tts_timeout.test.cjs +85 -0
  67. package/dist/voice/generation_tts_timeout.test.cjs.map +1 -0
  68. package/dist/voice/generation_tts_timeout.test.js +84 -0
  69. package/dist/voice/generation_tts_timeout.test.js.map +1 -0
  70. package/dist/voice/index.cjs.map +1 -1
  71. package/dist/voice/index.d.cts +1 -1
  72. package/dist/voice/index.d.ts +1 -1
  73. package/dist/voice/index.d.ts.map +1 -1
  74. package/dist/voice/index.js +3 -1
  75. package/dist/voice/index.js.map +1 -1
  76. package/dist/voice/recorder_io/recorder_io.cjs +1 -2
  77. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
  78. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
  79. package/dist/voice/recorder_io/recorder_io.js +2 -3
  80. package/dist/voice/recorder_io/recorder_io.js.map +1 -1
  81. package/dist/voice/report.cjs +1 -1
  82. package/dist/voice/report.cjs.map +1 -1
  83. package/dist/voice/report.js +1 -1
  84. package/dist/voice/report.js.map +1 -1
  85. package/dist/voice/report.test.cjs +70 -0
  86. package/dist/voice/report.test.cjs.map +1 -1
  87. package/dist/voice/report.test.js +70 -0
  88. package/dist/voice/report.test.js.map +1 -1
  89. package/dist/voice/room_io/room_io.cjs +5 -1
  90. package/dist/voice/room_io/room_io.cjs.map +1 -1
  91. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  92. package/dist/voice/room_io/room_io.js +5 -1
  93. package/dist/voice/room_io/room_io.js.map +1 -1
  94. package/dist/voice/room_io/room_io.test.cjs +18 -0
  95. package/dist/voice/room_io/room_io.test.cjs.map +1 -0
  96. package/dist/voice/room_io/room_io.test.js +17 -0
  97. package/dist/voice/room_io/room_io.test.js.map +1 -0
  98. package/package.json +1 -1
  99. package/src/audio.ts +12 -1
  100. package/src/inference/tts.ts +25 -3
  101. package/src/ipc/job_proc_lazy_main.ts +7 -2
  102. package/src/ipc/supervised_proc.test.ts +96 -0
  103. package/src/ipc/supervised_proc.ts +8 -1
  104. package/src/job.ts +1 -0
  105. package/src/utils.ts +43 -0
  106. package/src/voice/agent_activity.ts +11 -0
  107. package/src/voice/agent_session.ts +13 -7
  108. package/src/voice/events.ts +21 -0
  109. package/src/voice/generation.ts +35 -8
  110. package/src/voice/generation_tts_timeout.test.ts +112 -0
  111. package/src/voice/index.ts +6 -1
  112. package/src/voice/recorder_io/recorder_io.ts +2 -7
  113. package/src/voice/report.test.ts +78 -0
  114. package/src/voice/report.ts +1 -1
  115. package/src/voice/room_io/room_io.test.ts +38 -0
  116. package/src/voice/room_io/room_io.ts +7 -2
package/dist/audio.cjs CHANGED
@@ -39,6 +39,7 @@ var import_rtc_node = require("@livekit/rtc-node");
39
39
  var import_fluent_ffmpeg = __toESM(require("fluent-ffmpeg"), 1);
40
40
  var import_log = require("./log.cjs");
41
41
  var import_stream_channel = require("./stream/stream_channel.cjs");
42
+ var import_utils = require("./utils.cjs");
42
43
  import_fluent_ffmpeg.default.setFfmpegPath(import_ffmpeg.default.path);
43
44
  function calculateAudioDurationSeconds(frame) {
44
45
  return Array.isArray(frame) ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0) : frame.samplesPerChannel / frame.sampleRate;
@@ -118,6 +119,15 @@ function audioFramesFromFile(filePath, options = {}) {
118
119
  command.kill("SIGKILL");
119
120
  }
120
121
  };
122
+ command.on("error", (err) => {
123
+ if ((0, import_utils.isFfmpegTeardownError)(err)) {
124
+ logger.debug("FFmpeg command ended during shutdown");
125
+ } else {
126
+ logger.error(err, "FFmpeg command error");
127
+ }
128
+ commandRunning = false;
129
+ onClose();
130
+ });
121
131
  const outputStream = command.pipe();
122
132
  (_a = options.abortSignal) == null ? void 0 : _a.addEventListener("abort", onClose, { once: true });
123
133
  outputStream.on("data", (chunk) => {
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/audio.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport ffmpegInstaller from '@ffmpeg-installer/ffmpeg';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport ffmpeg from 'fluent-ffmpeg';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from './log.js';\nimport { createStreamChannel } from './stream/stream_channel.js';\nimport type { AudioBuffer } from './utils.js';\n\nffmpeg.setFfmpegPath(ffmpegInstaller.path);\n\nexport interface AudioDecodeOptions {\n sampleRate?: number;\n numChannels?: number;\n /**\n * Audio format hint (e.g., 'mp3', 'ogg', 'wav', 'opus')\n * If not provided, FFmpeg will auto-detect\n */\n format?: string;\n abortSignal?: AbortSignal;\n}\n\nexport function calculateAudioDurationSeconds(frame: AudioBuffer) {\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n return Array.isArray(frame)\n ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)\n : frame.samplesPerChannel / frame.sampleRate;\n}\n\n/** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */\nexport class AudioByteStream {\n #sampleRate: number;\n #numChannels: number;\n #bytesPerFrame: number;\n #buf: Int8Array;\n #logger = log();\n\n constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) {\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n\n if (samplesPerChannel === null) {\n samplesPerChannel = Math.floor(sampleRate / 10); // 100ms by default\n }\n\n this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)\n this.#buf = new Int8Array();\n }\n\n write(data: ArrayBuffer): AudioFrame[] {\n this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]);\n\n const frames: AudioFrame[] = [];\n while (this.#buf.length >= this.#bytesPerFrame) {\n const frameData = this.#buf.slice(0, this.#bytesPerFrame);\n this.#buf = this.#buf.slice(this.#bytesPerFrame);\n\n frames.push(\n new AudioFrame(\n new Int16Array(frameData.buffer),\n this.#sampleRate,\n this.#numChannels,\n frameData.length / 2,\n ),\n );\n }\n\n return frames;\n }\n\n flush(): AudioFrame[] {\n if (this.#buf.length % (2 * this.#numChannels) !== 0) {\n this.#logger.warn('AudioByteStream: incomplete frame during flush, dropping');\n return [];\n }\n\n const frames = [\n new AudioFrame(\n new Int16Array(this.#buf.buffer),\n this.#sampleRate,\n this.#numChannels,\n this.#buf.length / 2,\n ),\n ];\n\n this.#buf = new Int8Array(); // Clear buffer after flushing\n return frames;\n }\n}\n\n/**\n * Decode an audio file into AudioFrame instances\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects\n *\n * @example\n * ```typescript\n * for await (const frame of audioFramesFromFile('audio.ogg', { sampleRate: 48000 })) {\n * console.log('Frame:', frame.samplesPerChannel, 'samples');\n * }\n * ```\n */\nexport function audioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): ReadableStream<AudioFrame> {\n const sampleRate = options.sampleRate ?? 48000;\n const numChannels = options.numChannels ?? 1;\n\n const audioStream = new AudioByteStream(sampleRate, numChannels);\n const channel = createStreamChannel<AudioFrame>();\n const logger = log();\n\n // TODO (Brian): decode WAV using a custom decoder instead of FFmpeg\n const command = ffmpeg(filePath)\n .inputOptions([\n '-probesize',\n '32',\n '-analyzeduration',\n '0',\n '-fflags',\n '+nobuffer+flush_packets',\n '-flags',\n 'low_delay',\n ])\n .format('s16le') // signed 16-bit little-endian PCM to be consistent cross-platform\n .audioChannels(numChannels)\n .audioFrequency(sampleRate);\n\n let commandRunning = true;\n\n const onClose = () => {\n logger.debug('Audio file playback aborted');\n\n channel.close();\n if (commandRunning) {\n commandRunning = false;\n command.kill('SIGKILL');\n }\n };\n\n const outputStream = command.pipe();\n options.abortSignal?.addEventListener('abort', onClose, { once: true });\n\n outputStream.on('data', (chunk: Buffer) => {\n const arrayBuffer = chunk.buffer.slice(\n chunk.byteOffset,\n chunk.byteOffset + chunk.byteLength,\n ) as ArrayBuffer;\n\n const frames = audioStream.write(arrayBuffer);\n for (const frame of frames) {\n channel.write(frame);\n }\n });\n\n outputStream.on('end', () => {\n const frames = audioStream.flush();\n for (const frame of frames) {\n channel.write(frame);\n }\n commandRunning = false;\n channel.close();\n });\n\n outputStream.on('error', (err: Error) => {\n logger.error(err);\n commandRunning = false;\n onClose();\n });\n\n return channel.stream();\n}\n\n/**\n * Loop audio frames from a file indefinitely\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects in an infinite loop\n */\nexport async function* loopAudioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): AsyncGenerator<AudioFrame, void, unknown> {\n const frames: AudioFrame[] = [];\n const logger = log();\n\n for await (const frame of audioFramesFromFile(filePath, options)) {\n frames.push(frame);\n yield frame;\n }\n\n while (!options.abortSignal?.aborted) {\n for (const frame of frames) {\n yield frame;\n }\n }\n\n logger.debug('Audio file playback loop finished');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA4B;AAC5B,sBAA2B;AAC3B,2BAAmB;AAEnB,iBAAoB;AACpB,4BAAoC;AAGpC,qBAAAA,QAAO,cAAc,cAAAC,QAAgB,IAAI;AAalC,SAAS,8BAA8B,OAAoB;AAEhE,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,oBAAoB,EAAE,YAAY,CAAC,IACpE,MAAM,oBAAoB,MAAM;AACtC;AAGO,MAAM,gBAAgB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,YAAoB,aAAqB,oBAAmC,MAAM;AAC5F,SAAK,cAAc;AACnB,SAAK,eAAe;AAEpB,QAAI,sBAAsB,MAAM;AAC9B,0BAAoB,KAAK,MAAM,aAAa,EAAE;AAAA,IAChD;AAEA,SAAK,iBAAiB,cAAc,oBAAoB;AACxD,SAAK,OAAO,IAAI,UAAU;AAAA,EAC5B;AAAA,EAEA,MAAM,MAAiC;AACrC,SAAK,OAAO,IAAI,UAAU,CAAC,GAAG,KAAK,MAAM,GAAG,IAAI,UAAU,IAAI,CAAC,CAAC;AAEhE,UAAM,SAAuB,CAAC;AAC9B,WAAO,KAAK,KAAK,UAAU,KAAK,gBAAgB;AAC9C,YAAM,YAAY,KAAK,KAAK,MAAM,GAAG,KAAK,cAAc;AACxD,WAAK,OAAO,KAAK,KAAK,MAAM,KAAK,cAAc;AAE/C,aAAO;AAAA,QACL,IAAI;AAAA,UACF,IAAI,WAAW,UAAU,MAAM;AAAA,UAC/B,KAAK;AAAA,UACL,KAAK;AAAA,UACL,UAAU,SAAS;AAAA,QACrB;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,QAAsB;AACpB,QAAI,KAAK,KAAK,UAAU,IAAI,KAAK,kBAAkB,GAAG;AACpD,WAAK,QAAQ,KAAK,0DAA0D;AAC5E,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,SAAS;AAAA,MACb,IAAI;AAAA,QACF,IAAI,WAAW,KAAK,KAAK,MAAM;AAAA,QAC/B,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK,KAAK,SAAS;AAAA,MACrB;AAAA,IACF;AAEA,SAAK,OAAO,IAAI,UAAU;AAC1B,WAAO;AAAA,EACT;AACF;AAgBO,SAAS,oBACd,UACA,UAA8B,CAAC,GACH;AA7G9B;AA8GE,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,cAAc,QAAQ,eAAe;AAE3C,QAAM,cAAc,IAAI,gBAAgB,YAAY,WAAW;AAC/D,QAAM,cAAU,2CAAgC;AAChD,QAAM,aAAS,gBAAI;AAGnB,QAAM,cAAU,qBAAAD,SAAO,QAAQ,EAC5B,aAAa;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,OAAO,OAAO,EACd,cAAc,WAAW,EACzB,eAAe,UAAU;AAE5B,MAAI,iBAAiB;AAErB,QAAM,UAAU,MAAM;AACpB,WAAO,MAAM,6BAA6B;AAE1C,YAAQ,MAAM;AACd,QAAI,gBAAgB;AAClB,uBAAiB;AACjB,cAAQ,KAAK,SAAS;AAAA,IACxB;AAAA,EACF;AAEA,QAAM,eAAe,QAAQ,KAAK;AAClC,gBAAQ,gBAAR,mBAAqB,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK;AAErE,eAAa,GAAG,QAAQ,CAAC,UAAkB;AACzC,UAAM,cAAc,MAAM,OAAO;AAAA,MAC/B,MAAM;AAAA,MACN,MAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,SAAS,YAAY,MAAM,WAAW;AAC5C,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AAAA,EACF,CAAC;AAED,eAAa,GAAG,OAAO,MAAM;AAC3B,UAAM,SAAS,YAAY,MAAM;AACjC,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AACA,qBAAiB;AACjB,YAAQ,MAAM;AAAA,EAChB,CAAC;AAED,eAAa,GAAG,SAAS,CAAC,QAAe;AACvC,WAAO,MAAM,GAAG;AAChB,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,SAAO,QAAQ,OAAO;AACxB;AASA,gBAAuB,wBACrB,UACA,UAA8B,CAAC,GACY;AA5L7C;AA6LE,QAAM,SAAuB,CAAC;AAC9B,QAAM,aAAS,gBAAI;AAEnB,mBAAiB,SAAS,oBAAoB,UAAU,OAAO,GAAG;AAChE,WAAO,KAAK,KAAK;AACjB,UAAM;AAAA,EACR;AAEA,SAAO,GAAC,aAAQ,gBAAR,mBAAqB,UAAS;AACpC,eAAW,SAAS,QAAQ;AAC1B,YAAM;AAAA,IACR;AAAA,EACF;AAEA,SAAO,MAAM,mCAAmC;AAClD;","names":["ffmpeg","ffmpegInstaller"]}
1
+ {"version":3,"sources":["../src/audio.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport ffmpegInstaller from '@ffmpeg-installer/ffmpeg';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport ffmpeg from 'fluent-ffmpeg';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from './log.js';\nimport { createStreamChannel } from './stream/stream_channel.js';\nimport { type AudioBuffer, isFfmpegTeardownError } from './utils.js';\n\nffmpeg.setFfmpegPath(ffmpegInstaller.path);\n\nexport interface AudioDecodeOptions {\n sampleRate?: number;\n numChannels?: number;\n /**\n * Audio format hint (e.g., 'mp3', 'ogg', 'wav', 'opus')\n * If not provided, FFmpeg will auto-detect\n */\n format?: string;\n abortSignal?: AbortSignal;\n}\n\nexport function calculateAudioDurationSeconds(frame: AudioBuffer) {\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n return Array.isArray(frame)\n ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)\n : frame.samplesPerChannel / frame.sampleRate;\n}\n\n/** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */\nexport class AudioByteStream {\n #sampleRate: number;\n #numChannels: number;\n #bytesPerFrame: number;\n #buf: Int8Array;\n #logger = log();\n\n constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) {\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n\n if (samplesPerChannel === null) {\n samplesPerChannel = Math.floor(sampleRate / 10); // 100ms by default\n }\n\n this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)\n this.#buf = new Int8Array();\n }\n\n write(data: ArrayBuffer): AudioFrame[] {\n this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]);\n\n const frames: AudioFrame[] = [];\n while (this.#buf.length >= this.#bytesPerFrame) {\n const frameData = this.#buf.slice(0, this.#bytesPerFrame);\n this.#buf = this.#buf.slice(this.#bytesPerFrame);\n\n frames.push(\n new AudioFrame(\n new Int16Array(frameData.buffer),\n this.#sampleRate,\n this.#numChannels,\n frameData.length / 2,\n ),\n );\n }\n\n return frames;\n }\n\n flush(): AudioFrame[] {\n if (this.#buf.length % (2 * this.#numChannels) !== 0) {\n this.#logger.warn('AudioByteStream: incomplete frame during flush, dropping');\n return [];\n }\n\n const frames = [\n new AudioFrame(\n new Int16Array(this.#buf.buffer),\n this.#sampleRate,\n this.#numChannels,\n this.#buf.length / 2,\n ),\n ];\n\n this.#buf = new Int8Array(); // Clear buffer after flushing\n return frames;\n }\n}\n\n/**\n * Decode an audio file into AudioFrame instances\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects\n *\n * @example\n * ```typescript\n * for await (const frame of audioFramesFromFile('audio.ogg', { sampleRate: 48000 })) {\n * console.log('Frame:', frame.samplesPerChannel, 'samples');\n * }\n * ```\n */\nexport function audioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): ReadableStream<AudioFrame> {\n const sampleRate = options.sampleRate ?? 48000;\n const numChannels = options.numChannels ?? 1;\n\n const audioStream = new AudioByteStream(sampleRate, numChannels);\n const channel = createStreamChannel<AudioFrame>();\n const logger = log();\n\n // TODO (Brian): decode WAV using a custom decoder instead of FFmpeg\n const command = ffmpeg(filePath)\n .inputOptions([\n '-probesize',\n '32',\n '-analyzeduration',\n '0',\n '-fflags',\n '+nobuffer+flush_packets',\n '-flags',\n 'low_delay',\n ])\n .format('s16le') // signed 16-bit little-endian PCM to be consistent cross-platform\n .audioChannels(numChannels)\n .audioFrequency(sampleRate);\n\n let commandRunning = true;\n\n const onClose = () => {\n logger.debug('Audio file playback aborted');\n\n channel.close();\n if (commandRunning) {\n commandRunning = false;\n command.kill('SIGKILL');\n }\n };\n\n command.on('error', (err: Error) => {\n if (isFfmpegTeardownError(err)) {\n // Expected during teardown — not an error\n logger.debug('FFmpeg command ended during shutdown');\n } else {\n logger.error(err, 'FFmpeg command error');\n }\n commandRunning = false;\n onClose();\n });\n\n const outputStream = command.pipe();\n options.abortSignal?.addEventListener('abort', onClose, { once: true });\n\n outputStream.on('data', (chunk: Buffer) => {\n const arrayBuffer = chunk.buffer.slice(\n chunk.byteOffset,\n chunk.byteOffset + chunk.byteLength,\n ) as ArrayBuffer;\n\n const frames = audioStream.write(arrayBuffer);\n for (const frame of frames) {\n channel.write(frame);\n }\n });\n\n outputStream.on('end', () => {\n const frames = audioStream.flush();\n for (const frame of frames) {\n channel.write(frame);\n }\n commandRunning = false;\n channel.close();\n });\n\n outputStream.on('error', (err: Error) => {\n logger.error(err);\n commandRunning = false;\n onClose();\n });\n\n return channel.stream();\n}\n\n/**\n * Loop audio frames from a file indefinitely\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects in an infinite loop\n */\nexport async function* loopAudioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): AsyncGenerator<AudioFrame, void, unknown> {\n const frames: AudioFrame[] = [];\n const logger = log();\n\n for await (const frame of audioFramesFromFile(filePath, options)) {\n frames.push(frame);\n yield frame;\n }\n\n while (!options.abortSignal?.aborted) {\n for (const frame of frames) {\n yield frame;\n }\n }\n\n logger.debug('Audio file playback loop finished');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,oBAA4B;AAC5B,sBAA2B;AAC3B,2BAAmB;AAEnB,iBAAoB;AACpB,4BAAoC;AACpC,mBAAwD;AAExD,qBAAAA,QAAO,cAAc,cAAAC,QAAgB,IAAI;AAalC,SAAS,8BAA8B,OAAoB;AAEhE,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,oBAAoB,EAAE,YAAY,CAAC,IACpE,MAAM,oBAAoB,MAAM;AACtC;AAGO,MAAM,gBAAgB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,cAAU,gBAAI;AAAA,EAEd,YAAY,YAAoB,aAAqB,oBAAmC,MAAM;AAC5F,SAAK,cAAc;AACnB,SAAK,eAAe;AAEpB,QAAI,sBAAsB,MAAM;AAC9B,0BAAoB,KAAK,MAAM,aAAa,EAAE;AAAA,IAChD;AAEA,SAAK,iBAAiB,cAAc,oBAAoB;AACxD,SAAK,OAAO,IAAI,UAAU;AAAA,EAC5B;AAAA,EAEA,MAAM,MAAiC;AACrC,SAAK,OAAO,IAAI,UAAU,CAAC,GAAG,KAAK,MAAM,GAAG,IAAI,UAAU,IAAI,CAAC,CAAC;AAEhE,UAAM,SAAuB,CAAC;AAC9B,WAAO,KAAK,KAAK,UAAU,KAAK,gBAAgB;AAC9C,YAAM,YAAY,KAAK,KAAK,MAAM,GAAG,KAAK,cAAc;AACxD,WAAK,OAAO,KAAK,KAAK,MAAM,KAAK,cAAc;AAE/C,aAAO;AAAA,QACL,IAAI;AAAA,UACF,IAAI,WAAW,UAAU,MAAM;AAAA,UAC/B,KAAK;AAAA,UACL,KAAK;AAAA,UACL,UAAU,SAAS;AAAA,QACrB;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,QAAsB;AACpB,QAAI,KAAK,KAAK,UAAU,IAAI,KAAK,kBAAkB,GAAG;AACpD,WAAK,QAAQ,KAAK,0DAA0D;AAC5E,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,SAAS;AAAA,MACb,IAAI;AAAA,QACF,IAAI,WAAW,KAAK,KAAK,MAAM;AAAA,QAC/B,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK,KAAK,SAAS;AAAA,MACrB;AAAA,IACF;AAEA,SAAK,OAAO,IAAI,UAAU;AAC1B,WAAO;AAAA,EACT;AACF;AAgBO,SAAS,oBACd,UACA,UAA8B,CAAC,GACH;AA7G9B;AA8GE,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,cAAc,QAAQ,eAAe;AAE3C,QAAM,cAAc,IAAI,gBAAgB,YAAY,WAAW;AAC/D,QAAM,cAAU,2CAAgC;AAChD,QAAM,aAAS,gBAAI;AAGnB,QAAM,cAAU,qBAAAD,SAAO,QAAQ,EAC5B,aAAa;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,OAAO,OAAO,EACd,cAAc,WAAW,EACzB,eAAe,UAAU;AAE5B,MAAI,iBAAiB;AAErB,QAAM,UAAU,MAAM;AACpB,WAAO,MAAM,6BAA6B;AAE1C,YAAQ,MAAM;AACd,QAAI,gBAAgB;AAClB,uBAAiB;AACjB,cAAQ,KAAK,SAAS;AAAA,IACxB;AAAA,EACF;AAEA,UAAQ,GAAG,SAAS,CAAC,QAAe;AAClC,YAAI,oCAAsB,GAAG,GAAG;AAE9B,aAAO,MAAM,sCAAsC;AAAA,IACrD,OAAO;AACL,aAAO,MAAM,KAAK,sBAAsB;AAAA,IAC1C;AACA,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,QAAM,eAAe,QAAQ,KAAK;AAClC,gBAAQ,gBAAR,mBAAqB,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK;AAErE,eAAa,GAAG,QAAQ,CAAC,UAAkB;AACzC,UAAM,cAAc,MAAM,OAAO;AAAA,MAC/B,MAAM;AAAA,MACN,MAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,SAAS,YAAY,MAAM,WAAW;AAC5C,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AAAA,EACF,CAAC;AAED,eAAa,GAAG,OAAO,MAAM;AAC3B,UAAM,SAAS,YAAY,MAAM;AACjC,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AACA,qBAAiB;AACjB,YAAQ,MAAM;AAAA,EAChB,CAAC;AAED,eAAa,GAAG,SAAS,CAAC,QAAe;AACvC,WAAO,MAAM,GAAG;AAChB,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,SAAO,QAAQ,OAAO;AACxB;AASA,gBAAuB,wBACrB,UACA,UAA8B,CAAC,GACY;AAvM7C;AAwME,QAAM,SAAuB,CAAC;AAC9B,QAAM,aAAS,gBAAI;AAEnB,mBAAiB,SAAS,oBAAoB,UAAU,OAAO,GAAG;AAChE,WAAO,KAAK,KAAK;AACjB,UAAM;AAAA,EACR;AAEA,SAAO,GAAC,aAAQ,gBAAR,mBAAqB,UAAS;AACpC,eAAW,SAAS,QAAQ;AAC1B,YAAM;AAAA,IACR;AAAA,EACF;AAEA,SAAO,MAAM,mCAAmC;AAClD;","names":["ffmpeg","ffmpegInstaller"]}
package/dist/audio.d.cts CHANGED
@@ -1,7 +1,7 @@
1
1
  /// <reference types="node" resolution-mode="require"/>
2
2
  import { AudioFrame } from '@livekit/rtc-node';
3
3
  import type { ReadableStream } from 'node:stream/web';
4
- import type { AudioBuffer } from './utils.js';
4
+ import { type AudioBuffer } from './utils.js';
5
5
  export interface AudioDecodeOptions {
6
6
  sampleRate?: number;
7
7
  numChannels?: number;
package/dist/audio.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  /// <reference types="node" resolution-mode="require"/>
2
2
  import { AudioFrame } from '@livekit/rtc-node';
3
3
  import type { ReadableStream } from 'node:stream/web';
4
- import type { AudioBuffer } from './utils.js';
4
+ import { type AudioBuffer } from './utils.js';
5
5
  export interface AudioDecodeOptions {
6
6
  sampleRate?: number;
7
7
  numChannels?: number;
@@ -1 +1 @@
1
- {"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../src/audio.ts"],"names":[],"mappings":";AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGtD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAI9C,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B;AAED,wBAAgB,6BAA6B,CAAC,KAAK,EAAE,WAAW,UAK/D;AAED,uFAAuF;AACvF,qBAAa,eAAe;;gBAOd,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,iBAAiB,GAAE,MAAM,GAAG,IAAW;IAY5F,KAAK,CAAC,IAAI,EAAE,WAAW,GAAG,UAAU,EAAE;IAqBtC,KAAK,IAAI,UAAU,EAAE;CAkBtB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,kBAAuB,GAC/B,cAAc,CAAC,UAAU,CAAC,CAmE5B;AAED;;;;;;GAMG;AACH,wBAAuB,uBAAuB,CAC5C,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,kBAAuB,GAC/B,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,CAgB3C"}
1
+ {"version":3,"file":"audio.d.ts","sourceRoot":"","sources":["../src/audio.ts"],"names":[],"mappings":";AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAGtD,OAAO,EAAE,KAAK,WAAW,EAAyB,MAAM,YAAY,CAAC;AAIrE,MAAM,WAAW,kBAAkB;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,WAAW,CAAC;CAC3B;AAED,wBAAgB,6BAA6B,CAAC,KAAK,EAAE,WAAW,UAK/D;AAED,uFAAuF;AACvF,qBAAa,eAAe;;gBAOd,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,iBAAiB,GAAE,MAAM,GAAG,IAAW;IAY5F,KAAK,CAAC,IAAI,EAAE,WAAW,GAAG,UAAU,EAAE;IAqBtC,KAAK,IAAI,UAAU,EAAE;CAkBtB;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,kBAAuB,GAC/B,cAAc,CAAC,UAAU,CAAC,CA8E5B;AAED;;;;;;GAMG;AACH,wBAAuB,uBAAuB,CAC5C,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,kBAAuB,GAC/B,cAAc,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,CAgB3C"}
package/dist/audio.js CHANGED
@@ -3,6 +3,7 @@ import { AudioFrame } from "@livekit/rtc-node";
3
3
  import ffmpeg from "fluent-ffmpeg";
4
4
  import { log } from "./log.js";
5
5
  import { createStreamChannel } from "./stream/stream_channel.js";
6
+ import { isFfmpegTeardownError } from "./utils.js";
6
7
  ffmpeg.setFfmpegPath(ffmpegInstaller.path);
7
8
  function calculateAudioDurationSeconds(frame) {
8
9
  return Array.isArray(frame) ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0) : frame.samplesPerChannel / frame.sampleRate;
@@ -82,6 +83,15 @@ function audioFramesFromFile(filePath, options = {}) {
82
83
  command.kill("SIGKILL");
83
84
  }
84
85
  };
86
+ command.on("error", (err) => {
87
+ if (isFfmpegTeardownError(err)) {
88
+ logger.debug("FFmpeg command ended during shutdown");
89
+ } else {
90
+ logger.error(err, "FFmpeg command error");
91
+ }
92
+ commandRunning = false;
93
+ onClose();
94
+ });
85
95
  const outputStream = command.pipe();
86
96
  (_a = options.abortSignal) == null ? void 0 : _a.addEventListener("abort", onClose, { once: true });
87
97
  outputStream.on("data", (chunk) => {
package/dist/audio.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/audio.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport ffmpegInstaller from '@ffmpeg-installer/ffmpeg';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport ffmpeg from 'fluent-ffmpeg';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from './log.js';\nimport { createStreamChannel } from './stream/stream_channel.js';\nimport type { AudioBuffer } from './utils.js';\n\nffmpeg.setFfmpegPath(ffmpegInstaller.path);\n\nexport interface AudioDecodeOptions {\n sampleRate?: number;\n numChannels?: number;\n /**\n * Audio format hint (e.g., 'mp3', 'ogg', 'wav', 'opus')\n * If not provided, FFmpeg will auto-detect\n */\n format?: string;\n abortSignal?: AbortSignal;\n}\n\nexport function calculateAudioDurationSeconds(frame: AudioBuffer) {\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n return Array.isArray(frame)\n ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)\n : frame.samplesPerChannel / frame.sampleRate;\n}\n\n/** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */\nexport class AudioByteStream {\n #sampleRate: number;\n #numChannels: number;\n #bytesPerFrame: number;\n #buf: Int8Array;\n #logger = log();\n\n constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) {\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n\n if (samplesPerChannel === null) {\n samplesPerChannel = Math.floor(sampleRate / 10); // 100ms by default\n }\n\n this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)\n this.#buf = new Int8Array();\n }\n\n write(data: ArrayBuffer): AudioFrame[] {\n this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]);\n\n const frames: AudioFrame[] = [];\n while (this.#buf.length >= this.#bytesPerFrame) {\n const frameData = this.#buf.slice(0, this.#bytesPerFrame);\n this.#buf = this.#buf.slice(this.#bytesPerFrame);\n\n frames.push(\n new AudioFrame(\n new Int16Array(frameData.buffer),\n this.#sampleRate,\n this.#numChannels,\n frameData.length / 2,\n ),\n );\n }\n\n return frames;\n }\n\n flush(): AudioFrame[] {\n if (this.#buf.length % (2 * this.#numChannels) !== 0) {\n this.#logger.warn('AudioByteStream: incomplete frame during flush, dropping');\n return [];\n }\n\n const frames = [\n new AudioFrame(\n new Int16Array(this.#buf.buffer),\n this.#sampleRate,\n this.#numChannels,\n this.#buf.length / 2,\n ),\n ];\n\n this.#buf = new Int8Array(); // Clear buffer after flushing\n return frames;\n }\n}\n\n/**\n * Decode an audio file into AudioFrame instances\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects\n *\n * @example\n * ```typescript\n * for await (const frame of audioFramesFromFile('audio.ogg', { sampleRate: 48000 })) {\n * console.log('Frame:', frame.samplesPerChannel, 'samples');\n * }\n * ```\n */\nexport function audioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): ReadableStream<AudioFrame> {\n const sampleRate = options.sampleRate ?? 48000;\n const numChannels = options.numChannels ?? 1;\n\n const audioStream = new AudioByteStream(sampleRate, numChannels);\n const channel = createStreamChannel<AudioFrame>();\n const logger = log();\n\n // TODO (Brian): decode WAV using a custom decoder instead of FFmpeg\n const command = ffmpeg(filePath)\n .inputOptions([\n '-probesize',\n '32',\n '-analyzeduration',\n '0',\n '-fflags',\n '+nobuffer+flush_packets',\n '-flags',\n 'low_delay',\n ])\n .format('s16le') // signed 16-bit little-endian PCM to be consistent cross-platform\n .audioChannels(numChannels)\n .audioFrequency(sampleRate);\n\n let commandRunning = true;\n\n const onClose = () => {\n logger.debug('Audio file playback aborted');\n\n channel.close();\n if (commandRunning) {\n commandRunning = false;\n command.kill('SIGKILL');\n }\n };\n\n const outputStream = command.pipe();\n options.abortSignal?.addEventListener('abort', onClose, { once: true });\n\n outputStream.on('data', (chunk: Buffer) => {\n const arrayBuffer = chunk.buffer.slice(\n chunk.byteOffset,\n chunk.byteOffset + chunk.byteLength,\n ) as ArrayBuffer;\n\n const frames = audioStream.write(arrayBuffer);\n for (const frame of frames) {\n channel.write(frame);\n }\n });\n\n outputStream.on('end', () => {\n const frames = audioStream.flush();\n for (const frame of frames) {\n channel.write(frame);\n }\n commandRunning = false;\n channel.close();\n });\n\n outputStream.on('error', (err: Error) => {\n logger.error(err);\n commandRunning = false;\n onClose();\n });\n\n return channel.stream();\n}\n\n/**\n * Loop audio frames from a file indefinitely\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects in an infinite loop\n */\nexport async function* loopAudioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): AsyncGenerator<AudioFrame, void, unknown> {\n const frames: AudioFrame[] = [];\n const logger = log();\n\n for await (const frame of audioFramesFromFile(filePath, options)) {\n frames.push(frame);\n yield frame;\n }\n\n while (!options.abortSignal?.aborted) {\n for (const frame of frames) {\n yield frame;\n }\n }\n\n logger.debug('Audio file playback loop finished');\n}\n"],"mappings":"AAGA,OAAO,qBAAqB;AAC5B,SAAS,kBAAkB;AAC3B,OAAO,YAAY;AAEnB,SAAS,WAAW;AACpB,SAAS,2BAA2B;AAGpC,OAAO,cAAc,gBAAgB,IAAI;AAalC,SAAS,8BAA8B,OAAoB;AAEhE,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,oBAAoB,EAAE,YAAY,CAAC,IACpE,MAAM,oBAAoB,MAAM;AACtC;AAGO,MAAM,gBAAgB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,YAAoB,aAAqB,oBAAmC,MAAM;AAC5F,SAAK,cAAc;AACnB,SAAK,eAAe;AAEpB,QAAI,sBAAsB,MAAM;AAC9B,0BAAoB,KAAK,MAAM,aAAa,EAAE;AAAA,IAChD;AAEA,SAAK,iBAAiB,cAAc,oBAAoB;AACxD,SAAK,OAAO,IAAI,UAAU;AAAA,EAC5B;AAAA,EAEA,MAAM,MAAiC;AACrC,SAAK,OAAO,IAAI,UAAU,CAAC,GAAG,KAAK,MAAM,GAAG,IAAI,UAAU,IAAI,CAAC,CAAC;AAEhE,UAAM,SAAuB,CAAC;AAC9B,WAAO,KAAK,KAAK,UAAU,KAAK,gBAAgB;AAC9C,YAAM,YAAY,KAAK,KAAK,MAAM,GAAG,KAAK,cAAc;AACxD,WAAK,OAAO,KAAK,KAAK,MAAM,KAAK,cAAc;AAE/C,aAAO;AAAA,QACL,IAAI;AAAA,UACF,IAAI,WAAW,UAAU,MAAM;AAAA,UAC/B,KAAK;AAAA,UACL,KAAK;AAAA,UACL,UAAU,SAAS;AAAA,QACrB;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,QAAsB;AACpB,QAAI,KAAK,KAAK,UAAU,IAAI,KAAK,kBAAkB,GAAG;AACpD,WAAK,QAAQ,KAAK,0DAA0D;AAC5E,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,SAAS;AAAA,MACb,IAAI;AAAA,QACF,IAAI,WAAW,KAAK,KAAK,MAAM;AAAA,QAC/B,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK,KAAK,SAAS;AAAA,MACrB;AAAA,IACF;AAEA,SAAK,OAAO,IAAI,UAAU;AAC1B,WAAO;AAAA,EACT;AACF;AAgBO,SAAS,oBACd,UACA,UAA8B,CAAC,GACH;AA7G9B;AA8GE,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,cAAc,QAAQ,eAAe;AAE3C,QAAM,cAAc,IAAI,gBAAgB,YAAY,WAAW;AAC/D,QAAM,UAAU,oBAAgC;AAChD,QAAM,SAAS,IAAI;AAGnB,QAAM,UAAU,OAAO,QAAQ,EAC5B,aAAa;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,OAAO,OAAO,EACd,cAAc,WAAW,EACzB,eAAe,UAAU;AAE5B,MAAI,iBAAiB;AAErB,QAAM,UAAU,MAAM;AACpB,WAAO,MAAM,6BAA6B;AAE1C,YAAQ,MAAM;AACd,QAAI,gBAAgB;AAClB,uBAAiB;AACjB,cAAQ,KAAK,SAAS;AAAA,IACxB;AAAA,EACF;AAEA,QAAM,eAAe,QAAQ,KAAK;AAClC,gBAAQ,gBAAR,mBAAqB,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK;AAErE,eAAa,GAAG,QAAQ,CAAC,UAAkB;AACzC,UAAM,cAAc,MAAM,OAAO;AAAA,MAC/B,MAAM;AAAA,MACN,MAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,SAAS,YAAY,MAAM,WAAW;AAC5C,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AAAA,EACF,CAAC;AAED,eAAa,GAAG,OAAO,MAAM;AAC3B,UAAM,SAAS,YAAY,MAAM;AACjC,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AACA,qBAAiB;AACjB,YAAQ,MAAM;AAAA,EAChB,CAAC;AAED,eAAa,GAAG,SAAS,CAAC,QAAe;AACvC,WAAO,MAAM,GAAG;AAChB,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,SAAO,QAAQ,OAAO;AACxB;AASA,gBAAuB,wBACrB,UACA,UAA8B,CAAC,GACY;AA5L7C;AA6LE,QAAM,SAAuB,CAAC;AAC9B,QAAM,SAAS,IAAI;AAEnB,mBAAiB,SAAS,oBAAoB,UAAU,OAAO,GAAG;AAChE,WAAO,KAAK,KAAK;AACjB,UAAM;AAAA,EACR;AAEA,SAAO,GAAC,aAAQ,gBAAR,mBAAqB,UAAS;AACpC,eAAW,SAAS,QAAQ;AAC1B,YAAM;AAAA,IACR;AAAA,EACF;AAEA,SAAO,MAAM,mCAAmC;AAClD;","names":[]}
1
+ {"version":3,"sources":["../src/audio.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport ffmpegInstaller from '@ffmpeg-installer/ffmpeg';\nimport { AudioFrame } from '@livekit/rtc-node';\nimport ffmpeg from 'fluent-ffmpeg';\nimport type { ReadableStream } from 'node:stream/web';\nimport { log } from './log.js';\nimport { createStreamChannel } from './stream/stream_channel.js';\nimport { type AudioBuffer, isFfmpegTeardownError } from './utils.js';\n\nffmpeg.setFfmpegPath(ffmpegInstaller.path);\n\nexport interface AudioDecodeOptions {\n sampleRate?: number;\n numChannels?: number;\n /**\n * Audio format hint (e.g., 'mp3', 'ogg', 'wav', 'opus')\n * If not provided, FFmpeg will auto-detect\n */\n format?: string;\n abortSignal?: AbortSignal;\n}\n\nexport function calculateAudioDurationSeconds(frame: AudioBuffer) {\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n return Array.isArray(frame)\n ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)\n : frame.samplesPerChannel / frame.sampleRate;\n}\n\n/** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */\nexport class AudioByteStream {\n #sampleRate: number;\n #numChannels: number;\n #bytesPerFrame: number;\n #buf: Int8Array;\n #logger = log();\n\n constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) {\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n\n if (samplesPerChannel === null) {\n samplesPerChannel = Math.floor(sampleRate / 10); // 100ms by default\n }\n\n this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16)\n this.#buf = new Int8Array();\n }\n\n write(data: ArrayBuffer): AudioFrame[] {\n this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]);\n\n const frames: AudioFrame[] = [];\n while (this.#buf.length >= this.#bytesPerFrame) {\n const frameData = this.#buf.slice(0, this.#bytesPerFrame);\n this.#buf = this.#buf.slice(this.#bytesPerFrame);\n\n frames.push(\n new AudioFrame(\n new Int16Array(frameData.buffer),\n this.#sampleRate,\n this.#numChannels,\n frameData.length / 2,\n ),\n );\n }\n\n return frames;\n }\n\n flush(): AudioFrame[] {\n if (this.#buf.length % (2 * this.#numChannels) !== 0) {\n this.#logger.warn('AudioByteStream: incomplete frame during flush, dropping');\n return [];\n }\n\n const frames = [\n new AudioFrame(\n new Int16Array(this.#buf.buffer),\n this.#sampleRate,\n this.#numChannels,\n this.#buf.length / 2,\n ),\n ];\n\n this.#buf = new Int8Array(); // Clear buffer after flushing\n return frames;\n }\n}\n\n/**\n * Decode an audio file into AudioFrame instances\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects\n *\n * @example\n * ```typescript\n * for await (const frame of audioFramesFromFile('audio.ogg', { sampleRate: 48000 })) {\n * console.log('Frame:', frame.samplesPerChannel, 'samples');\n * }\n * ```\n */\nexport function audioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): ReadableStream<AudioFrame> {\n const sampleRate = options.sampleRate ?? 48000;\n const numChannels = options.numChannels ?? 1;\n\n const audioStream = new AudioByteStream(sampleRate, numChannels);\n const channel = createStreamChannel<AudioFrame>();\n const logger = log();\n\n // TODO (Brian): decode WAV using a custom decoder instead of FFmpeg\n const command = ffmpeg(filePath)\n .inputOptions([\n '-probesize',\n '32',\n '-analyzeduration',\n '0',\n '-fflags',\n '+nobuffer+flush_packets',\n '-flags',\n 'low_delay',\n ])\n .format('s16le') // signed 16-bit little-endian PCM to be consistent cross-platform\n .audioChannels(numChannels)\n .audioFrequency(sampleRate);\n\n let commandRunning = true;\n\n const onClose = () => {\n logger.debug('Audio file playback aborted');\n\n channel.close();\n if (commandRunning) {\n commandRunning = false;\n command.kill('SIGKILL');\n }\n };\n\n command.on('error', (err: Error) => {\n if (isFfmpegTeardownError(err)) {\n // Expected during teardown — not an error\n logger.debug('FFmpeg command ended during shutdown');\n } else {\n logger.error(err, 'FFmpeg command error');\n }\n commandRunning = false;\n onClose();\n });\n\n const outputStream = command.pipe();\n options.abortSignal?.addEventListener('abort', onClose, { once: true });\n\n outputStream.on('data', (chunk: Buffer) => {\n const arrayBuffer = chunk.buffer.slice(\n chunk.byteOffset,\n chunk.byteOffset + chunk.byteLength,\n ) as ArrayBuffer;\n\n const frames = audioStream.write(arrayBuffer);\n for (const frame of frames) {\n channel.write(frame);\n }\n });\n\n outputStream.on('end', () => {\n const frames = audioStream.flush();\n for (const frame of frames) {\n channel.write(frame);\n }\n commandRunning = false;\n channel.close();\n });\n\n outputStream.on('error', (err: Error) => {\n logger.error(err);\n commandRunning = false;\n onClose();\n });\n\n return channel.stream();\n}\n\n/**\n * Loop audio frames from a file indefinitely\n *\n * @param filePath - Path to the audio file\n * @param options - Decoding options\n * @returns AsyncGenerator that yields AudioFrame objects in an infinite loop\n */\nexport async function* loopAudioFramesFromFile(\n filePath: string,\n options: AudioDecodeOptions = {},\n): AsyncGenerator<AudioFrame, void, unknown> {\n const frames: AudioFrame[] = [];\n const logger = log();\n\n for await (const frame of audioFramesFromFile(filePath, options)) {\n frames.push(frame);\n yield frame;\n }\n\n while (!options.abortSignal?.aborted) {\n for (const frame of frames) {\n yield frame;\n }\n }\n\n logger.debug('Audio file playback loop finished');\n}\n"],"mappings":"AAGA,OAAO,qBAAqB;AAC5B,SAAS,kBAAkB;AAC3B,OAAO,YAAY;AAEnB,SAAS,WAAW;AACpB,SAAS,2BAA2B;AACpC,SAA2B,6BAA6B;AAExD,OAAO,cAAc,gBAAgB,IAAI;AAalC,SAAS,8BAA8B,OAAoB;AAEhE,SAAO,MAAM,QAAQ,KAAK,IACtB,MAAM,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,oBAAoB,EAAE,YAAY,CAAC,IACpE,MAAM,oBAAoB,MAAM;AACtC;AAGO,MAAM,gBAAgB;AAAA,EAC3B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,YAAoB,aAAqB,oBAAmC,MAAM;AAC5F,SAAK,cAAc;AACnB,SAAK,eAAe;AAEpB,QAAI,sBAAsB,MAAM;AAC9B,0BAAoB,KAAK,MAAM,aAAa,EAAE;AAAA,IAChD;AAEA,SAAK,iBAAiB,cAAc,oBAAoB;AACxD,SAAK,OAAO,IAAI,UAAU;AAAA,EAC5B;AAAA,EAEA,MAAM,MAAiC;AACrC,SAAK,OAAO,IAAI,UAAU,CAAC,GAAG,KAAK,MAAM,GAAG,IAAI,UAAU,IAAI,CAAC,CAAC;AAEhE,UAAM,SAAuB,CAAC;AAC9B,WAAO,KAAK,KAAK,UAAU,KAAK,gBAAgB;AAC9C,YAAM,YAAY,KAAK,KAAK,MAAM,GAAG,KAAK,cAAc;AACxD,WAAK,OAAO,KAAK,KAAK,MAAM,KAAK,cAAc;AAE/C,aAAO;AAAA,QACL,IAAI;AAAA,UACF,IAAI,WAAW,UAAU,MAAM;AAAA,UAC/B,KAAK;AAAA,UACL,KAAK;AAAA,UACL,UAAU,SAAS;AAAA,QACrB;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,QAAsB;AACpB,QAAI,KAAK,KAAK,UAAU,IAAI,KAAK,kBAAkB,GAAG;AACpD,WAAK,QAAQ,KAAK,0DAA0D;AAC5E,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,SAAS;AAAA,MACb,IAAI;AAAA,QACF,IAAI,WAAW,KAAK,KAAK,MAAM;AAAA,QAC/B,KAAK;AAAA,QACL,KAAK;AAAA,QACL,KAAK,KAAK,SAAS;AAAA,MACrB;AAAA,IACF;AAEA,SAAK,OAAO,IAAI,UAAU;AAC1B,WAAO;AAAA,EACT;AACF;AAgBO,SAAS,oBACd,UACA,UAA8B,CAAC,GACH;AA7G9B;AA8GE,QAAM,aAAa,QAAQ,cAAc;AACzC,QAAM,cAAc,QAAQ,eAAe;AAE3C,QAAM,cAAc,IAAI,gBAAgB,YAAY,WAAW;AAC/D,QAAM,UAAU,oBAAgC;AAChD,QAAM,SAAS,IAAI;AAGnB,QAAM,UAAU,OAAO,QAAQ,EAC5B,aAAa;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,CAAC,EACA,OAAO,OAAO,EACd,cAAc,WAAW,EACzB,eAAe,UAAU;AAE5B,MAAI,iBAAiB;AAErB,QAAM,UAAU,MAAM;AACpB,WAAO,MAAM,6BAA6B;AAE1C,YAAQ,MAAM;AACd,QAAI,gBAAgB;AAClB,uBAAiB;AACjB,cAAQ,KAAK,SAAS;AAAA,IACxB;AAAA,EACF;AAEA,UAAQ,GAAG,SAAS,CAAC,QAAe;AAClC,QAAI,sBAAsB,GAAG,GAAG;AAE9B,aAAO,MAAM,sCAAsC;AAAA,IACrD,OAAO;AACL,aAAO,MAAM,KAAK,sBAAsB;AAAA,IAC1C;AACA,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,QAAM,eAAe,QAAQ,KAAK;AAClC,gBAAQ,gBAAR,mBAAqB,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK;AAErE,eAAa,GAAG,QAAQ,CAAC,UAAkB;AACzC,UAAM,cAAc,MAAM,OAAO;AAAA,MAC/B,MAAM;AAAA,MACN,MAAM,aAAa,MAAM;AAAA,IAC3B;AAEA,UAAM,SAAS,YAAY,MAAM,WAAW;AAC5C,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AAAA,EACF,CAAC;AAED,eAAa,GAAG,OAAO,MAAM;AAC3B,UAAM,SAAS,YAAY,MAAM;AACjC,eAAW,SAAS,QAAQ;AAC1B,cAAQ,MAAM,KAAK;AAAA,IACrB;AACA,qBAAiB;AACjB,YAAQ,MAAM;AAAA,EAChB,CAAC;AAED,eAAa,GAAG,SAAS,CAAC,QAAe;AACvC,WAAO,MAAM,GAAG;AAChB,qBAAiB;AACjB,YAAQ;AAAA,EACV,CAAC;AAED,SAAO,QAAQ,OAAO;AACxB;AASA,gBAAuB,wBACrB,UACA,UAA8B,CAAC,GACY;AAvM7C;AAwME,QAAM,SAAuB,CAAC;AAC9B,QAAM,SAAS,IAAI;AAEnB,mBAAiB,SAAS,oBAAoB,UAAU,OAAO,GAAG;AAChE,WAAO,KAAK,KAAK;AACjB,UAAM;AAAA,EACR;AAEA,SAAO,GAAC,aAAQ,gBAAR,mBAAqB,UAAS;AACpC,eAAW,SAAS,QAAQ;AAC1B,YAAM;AAAA,IACR;AAAA,EACF;AAEA,SAAO,MAAM,mCAAmC;AAClD;","names":[]}
@@ -227,15 +227,15 @@ export declare const sttWordSchema: z.ZodObject<{
227
227
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
228
228
  }, "strip", z.ZodTypeAny, {
229
229
  end: number;
230
- confidence: number;
231
230
  start: number;
231
+ confidence: number;
232
232
  word: string;
233
233
  extra?: unknown;
234
234
  }, {
235
235
  end?: number | undefined;
236
+ start?: number | undefined;
236
237
  extra?: unknown;
237
238
  confidence?: number | undefined;
238
- start?: number | undefined;
239
239
  word?: string | undefined;
240
240
  }>;
241
241
  export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
@@ -254,29 +254,29 @@ export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
254
254
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
255
255
  }, "strip", z.ZodTypeAny, {
256
256
  end: number;
257
- confidence: number;
258
257
  start: number;
258
+ confidence: number;
259
259
  word: string;
260
260
  extra?: unknown;
261
261
  }, {
262
262
  end?: number | undefined;
263
+ start?: number | undefined;
263
264
  extra?: unknown;
264
265
  confidence?: number | undefined;
265
- start?: number | undefined;
266
266
  word?: string | undefined;
267
267
  }>, "many">>>;
268
268
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
269
269
  }, "strip", z.ZodTypeAny, {
270
+ start: number;
270
271
  type: "interim_transcript";
271
272
  transcript: string;
272
273
  language: string;
273
274
  confidence: number;
274
- start: number;
275
275
  duration: number;
276
276
  words: {
277
277
  end: number;
278
- confidence: number;
279
278
  start: number;
279
+ confidence: number;
280
280
  word: string;
281
281
  extra?: unknown;
282
282
  }[];
@@ -284,18 +284,18 @@ export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
284
284
  session_id?: string | undefined;
285
285
  }, {
286
286
  type: "interim_transcript";
287
+ start?: number | undefined;
287
288
  extra?: unknown;
288
289
  transcript?: string | undefined;
289
290
  session_id?: string | undefined;
290
291
  language?: string | undefined;
291
292
  confidence?: number | undefined;
292
- start?: number | undefined;
293
293
  duration?: number | undefined;
294
294
  words?: {
295
295
  end?: number | undefined;
296
+ start?: number | undefined;
296
297
  extra?: unknown;
297
298
  confidence?: number | undefined;
298
- start?: number | undefined;
299
299
  word?: string | undefined;
300
300
  }[] | undefined;
301
301
  }>;
@@ -315,29 +315,29 @@ export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
315
315
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
316
316
  }, "strip", z.ZodTypeAny, {
317
317
  end: number;
318
- confidence: number;
319
318
  start: number;
319
+ confidence: number;
320
320
  word: string;
321
321
  extra?: unknown;
322
322
  }, {
323
323
  end?: number | undefined;
324
+ start?: number | undefined;
324
325
  extra?: unknown;
325
326
  confidence?: number | undefined;
326
- start?: number | undefined;
327
327
  word?: string | undefined;
328
328
  }>, "many">>>;
329
329
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
330
330
  }, "strip", z.ZodTypeAny, {
331
+ start: number;
331
332
  type: "final_transcript";
332
333
  transcript: string;
333
334
  language: string;
334
335
  confidence: number;
335
- start: number;
336
336
  duration: number;
337
337
  words: {
338
338
  end: number;
339
- confidence: number;
340
339
  start: number;
340
+ confidence: number;
341
341
  word: string;
342
342
  extra?: unknown;
343
343
  }[];
@@ -345,18 +345,18 @@ export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
345
345
  session_id?: string | undefined;
346
346
  }, {
347
347
  type: "final_transcript";
348
+ start?: number | undefined;
348
349
  extra?: unknown;
349
350
  transcript?: string | undefined;
350
351
  session_id?: string | undefined;
351
352
  language?: string | undefined;
352
353
  confidence?: number | undefined;
353
- start?: number | undefined;
354
354
  duration?: number | undefined;
355
355
  words?: {
356
356
  end?: number | undefined;
357
+ start?: number | undefined;
357
358
  extra?: unknown;
358
359
  confidence?: number | undefined;
359
- start?: number | undefined;
360
360
  word?: string | undefined;
361
361
  }[] | undefined;
362
362
  }>;
@@ -434,29 +434,29 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
434
434
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
435
435
  }, "strip", z.ZodTypeAny, {
436
436
  end: number;
437
- confidence: number;
438
437
  start: number;
438
+ confidence: number;
439
439
  word: string;
440
440
  extra?: unknown;
441
441
  }, {
442
442
  end?: number | undefined;
443
+ start?: number | undefined;
443
444
  extra?: unknown;
444
445
  confidence?: number | undefined;
445
- start?: number | undefined;
446
446
  word?: string | undefined;
447
447
  }>, "many">>>;
448
448
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
449
449
  }, "strip", z.ZodTypeAny, {
450
+ start: number;
450
451
  type: "interim_transcript";
451
452
  transcript: string;
452
453
  language: string;
453
454
  confidence: number;
454
- start: number;
455
455
  duration: number;
456
456
  words: {
457
457
  end: number;
458
- confidence: number;
459
458
  start: number;
459
+ confidence: number;
460
460
  word: string;
461
461
  extra?: unknown;
462
462
  }[];
@@ -464,18 +464,18 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
464
464
  session_id?: string | undefined;
465
465
  }, {
466
466
  type: "interim_transcript";
467
+ start?: number | undefined;
467
468
  extra?: unknown;
468
469
  transcript?: string | undefined;
469
470
  session_id?: string | undefined;
470
471
  language?: string | undefined;
471
472
  confidence?: number | undefined;
472
- start?: number | undefined;
473
473
  duration?: number | undefined;
474
474
  words?: {
475
475
  end?: number | undefined;
476
+ start?: number | undefined;
476
477
  extra?: unknown;
477
478
  confidence?: number | undefined;
478
- start?: number | undefined;
479
479
  word?: string | undefined;
480
480
  }[] | undefined;
481
481
  }>, z.ZodObject<{
@@ -494,29 +494,29 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
494
494
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
495
495
  }, "strip", z.ZodTypeAny, {
496
496
  end: number;
497
- confidence: number;
498
497
  start: number;
498
+ confidence: number;
499
499
  word: string;
500
500
  extra?: unknown;
501
501
  }, {
502
502
  end?: number | undefined;
503
+ start?: number | undefined;
503
504
  extra?: unknown;
504
505
  confidence?: number | undefined;
505
- start?: number | undefined;
506
506
  word?: string | undefined;
507
507
  }>, "many">>>;
508
508
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
509
509
  }, "strip", z.ZodTypeAny, {
510
+ start: number;
510
511
  type: "final_transcript";
511
512
  transcript: string;
512
513
  language: string;
513
514
  confidence: number;
514
- start: number;
515
515
  duration: number;
516
516
  words: {
517
517
  end: number;
518
- confidence: number;
519
518
  start: number;
519
+ confidence: number;
520
520
  word: string;
521
521
  extra?: unknown;
522
522
  }[];
@@ -524,18 +524,18 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
524
524
  session_id?: string | undefined;
525
525
  }, {
526
526
  type: "final_transcript";
527
+ start?: number | undefined;
527
528
  extra?: unknown;
528
529
  transcript?: string | undefined;
529
530
  session_id?: string | undefined;
530
531
  language?: string | undefined;
531
532
  confidence?: number | undefined;
532
- start?: number | undefined;
533
533
  duration?: number | undefined;
534
534
  words?: {
535
535
  end?: number | undefined;
536
+ start?: number | undefined;
536
537
  extra?: unknown;
537
538
  confidence?: number | undefined;
538
- start?: number | undefined;
539
539
  word?: string | undefined;
540
540
  }[] | undefined;
541
541
  }>, z.ZodObject<{
@@ -227,15 +227,15 @@ export declare const sttWordSchema: z.ZodObject<{
227
227
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
228
228
  }, "strip", z.ZodTypeAny, {
229
229
  end: number;
230
- confidence: number;
231
230
  start: number;
231
+ confidence: number;
232
232
  word: string;
233
233
  extra?: unknown;
234
234
  }, {
235
235
  end?: number | undefined;
236
+ start?: number | undefined;
236
237
  extra?: unknown;
237
238
  confidence?: number | undefined;
238
- start?: number | undefined;
239
239
  word?: string | undefined;
240
240
  }>;
241
241
  export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
@@ -254,29 +254,29 @@ export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
254
254
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
255
255
  }, "strip", z.ZodTypeAny, {
256
256
  end: number;
257
- confidence: number;
258
257
  start: number;
258
+ confidence: number;
259
259
  word: string;
260
260
  extra?: unknown;
261
261
  }, {
262
262
  end?: number | undefined;
263
+ start?: number | undefined;
263
264
  extra?: unknown;
264
265
  confidence?: number | undefined;
265
- start?: number | undefined;
266
266
  word?: string | undefined;
267
267
  }>, "many">>>;
268
268
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
269
269
  }, "strip", z.ZodTypeAny, {
270
+ start: number;
270
271
  type: "interim_transcript";
271
272
  transcript: string;
272
273
  language: string;
273
274
  confidence: number;
274
- start: number;
275
275
  duration: number;
276
276
  words: {
277
277
  end: number;
278
- confidence: number;
279
278
  start: number;
279
+ confidence: number;
280
280
  word: string;
281
281
  extra?: unknown;
282
282
  }[];
@@ -284,18 +284,18 @@ export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
284
284
  session_id?: string | undefined;
285
285
  }, {
286
286
  type: "interim_transcript";
287
+ start?: number | undefined;
287
288
  extra?: unknown;
288
289
  transcript?: string | undefined;
289
290
  session_id?: string | undefined;
290
291
  language?: string | undefined;
291
292
  confidence?: number | undefined;
292
- start?: number | undefined;
293
293
  duration?: number | undefined;
294
294
  words?: {
295
295
  end?: number | undefined;
296
+ start?: number | undefined;
296
297
  extra?: unknown;
297
298
  confidence?: number | undefined;
298
- start?: number | undefined;
299
299
  word?: string | undefined;
300
300
  }[] | undefined;
301
301
  }>;
@@ -315,29 +315,29 @@ export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
315
315
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
316
316
  }, "strip", z.ZodTypeAny, {
317
317
  end: number;
318
- confidence: number;
319
318
  start: number;
319
+ confidence: number;
320
320
  word: string;
321
321
  extra?: unknown;
322
322
  }, {
323
323
  end?: number | undefined;
324
+ start?: number | undefined;
324
325
  extra?: unknown;
325
326
  confidence?: number | undefined;
326
- start?: number | undefined;
327
327
  word?: string | undefined;
328
328
  }>, "many">>>;
329
329
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
330
330
  }, "strip", z.ZodTypeAny, {
331
+ start: number;
331
332
  type: "final_transcript";
332
333
  transcript: string;
333
334
  language: string;
334
335
  confidence: number;
335
- start: number;
336
336
  duration: number;
337
337
  words: {
338
338
  end: number;
339
- confidence: number;
340
339
  start: number;
340
+ confidence: number;
341
341
  word: string;
342
342
  extra?: unknown;
343
343
  }[];
@@ -345,18 +345,18 @@ export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
345
345
  session_id?: string | undefined;
346
346
  }, {
347
347
  type: "final_transcript";
348
+ start?: number | undefined;
348
349
  extra?: unknown;
349
350
  transcript?: string | undefined;
350
351
  session_id?: string | undefined;
351
352
  language?: string | undefined;
352
353
  confidence?: number | undefined;
353
- start?: number | undefined;
354
354
  duration?: number | undefined;
355
355
  words?: {
356
356
  end?: number | undefined;
357
+ start?: number | undefined;
357
358
  extra?: unknown;
358
359
  confidence?: number | undefined;
359
- start?: number | undefined;
360
360
  word?: string | undefined;
361
361
  }[] | undefined;
362
362
  }>;
@@ -434,29 +434,29 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
434
434
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
435
435
  }, "strip", z.ZodTypeAny, {
436
436
  end: number;
437
- confidence: number;
438
437
  start: number;
438
+ confidence: number;
439
439
  word: string;
440
440
  extra?: unknown;
441
441
  }, {
442
442
  end?: number | undefined;
443
+ start?: number | undefined;
443
444
  extra?: unknown;
444
445
  confidence?: number | undefined;
445
- start?: number | undefined;
446
446
  word?: string | undefined;
447
447
  }>, "many">>>;
448
448
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
449
449
  }, "strip", z.ZodTypeAny, {
450
+ start: number;
450
451
  type: "interim_transcript";
451
452
  transcript: string;
452
453
  language: string;
453
454
  confidence: number;
454
- start: number;
455
455
  duration: number;
456
456
  words: {
457
457
  end: number;
458
- confidence: number;
459
458
  start: number;
459
+ confidence: number;
460
460
  word: string;
461
461
  extra?: unknown;
462
462
  }[];
@@ -464,18 +464,18 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
464
464
  session_id?: string | undefined;
465
465
  }, {
466
466
  type: "interim_transcript";
467
+ start?: number | undefined;
467
468
  extra?: unknown;
468
469
  transcript?: string | undefined;
469
470
  session_id?: string | undefined;
470
471
  language?: string | undefined;
471
472
  confidence?: number | undefined;
472
- start?: number | undefined;
473
473
  duration?: number | undefined;
474
474
  words?: {
475
475
  end?: number | undefined;
476
+ start?: number | undefined;
476
477
  extra?: unknown;
477
478
  confidence?: number | undefined;
478
- start?: number | undefined;
479
479
  word?: string | undefined;
480
480
  }[] | undefined;
481
481
  }>, z.ZodObject<{
@@ -494,29 +494,29 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
494
494
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
495
495
  }, "strip", z.ZodTypeAny, {
496
496
  end: number;
497
- confidence: number;
498
497
  start: number;
498
+ confidence: number;
499
499
  word: string;
500
500
  extra?: unknown;
501
501
  }, {
502
502
  end?: number | undefined;
503
+ start?: number | undefined;
503
504
  extra?: unknown;
504
505
  confidence?: number | undefined;
505
- start?: number | undefined;
506
506
  word?: string | undefined;
507
507
  }>, "many">>>;
508
508
  extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
509
509
  }, "strip", z.ZodTypeAny, {
510
+ start: number;
510
511
  type: "final_transcript";
511
512
  transcript: string;
512
513
  language: string;
513
514
  confidence: number;
514
- start: number;
515
515
  duration: number;
516
516
  words: {
517
517
  end: number;
518
- confidence: number;
519
518
  start: number;
519
+ confidence: number;
520
520
  word: string;
521
521
  extra?: unknown;
522
522
  }[];
@@ -524,18 +524,18 @@ export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
524
524
  session_id?: string | undefined;
525
525
  }, {
526
526
  type: "final_transcript";
527
+ start?: number | undefined;
527
528
  extra?: unknown;
528
529
  transcript?: string | undefined;
529
530
  session_id?: string | undefined;
530
531
  language?: string | undefined;
531
532
  confidence?: number | undefined;
532
- start?: number | undefined;
533
533
  duration?: number | undefined;
534
534
  words?: {
535
535
  end?: number | undefined;
536
+ start?: number | undefined;
536
537
  extra?: unknown;
537
538
  confidence?: number | undefined;
538
- start?: number | undefined;
539
539
  word?: string | undefined;
540
540
  }[] | undefined;
541
541
  }>, z.ZodObject<{
@@ -356,13 +356,18 @@ class SynthesizeStream extends import_tts.SynthesizeStream {
356
356
  };
357
357
  const createRecvTask = async (signal) => {
358
358
  let currentSessionId = null;
359
+ const recvTimeoutMs = this.connOptions.timeoutMs;
359
360
  const bstream = new import_audio.AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);
360
361
  const serverEventStream = eventChannel.stream();
361
362
  const reader = serverEventStream.getReader();
362
363
  try {
363
364
  await inputSentEvent.wait();
364
365
  while (!this.closed && !signal.aborted) {
365
- const result = await reader.read();
366
+ const result = await (0, import_utils.waitUntilTimeout)(
367
+ reader.read(),
368
+ recvTimeoutMs,
369
+ () => new import_exceptions.APITimeoutError({ message: "TTS recv idle timeout" })
370
+ );
366
371
  if (signal.aborted) return;
367
372
  if (result.done) return;
368
373
  const serverEvent = result.value;
@@ -406,6 +411,14 @@ class SynthesizeStream extends import_tts.SynthesizeStream {
406
411
  break;
407
412
  }
408
413
  }
414
+ } catch (e) {
415
+ if (e instanceof import_exceptions.APITimeoutError) {
416
+ this.#logger.warn("TTS recv task timed out waiting for server message");
417
+ await resourceCleanup();
418
+ completionFuture.reject(e);
419
+ return;
420
+ }
421
+ throw e;
409
422
  } finally {
410
423
  reader.releaseLock();
411
424
  try {