@livekit/agents 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/dist/audio.cjs +3 -3
  2. package/dist/audio.cjs.map +1 -1
  3. package/dist/audio.d.cts +1 -1
  4. package/dist/audio.d.ts +1 -1
  5. package/dist/audio.d.ts.map +1 -1
  6. package/dist/audio.js +2 -2
  7. package/dist/audio.js.map +1 -1
  8. package/dist/llm/llm.cjs +7 -4
  9. package/dist/llm/llm.cjs.map +1 -1
  10. package/dist/llm/llm.d.ts.map +1 -1
  11. package/dist/llm/llm.js +7 -4
  12. package/dist/llm/llm.js.map +1 -1
  13. package/dist/metrics/base.cjs.map +1 -1
  14. package/dist/metrics/base.d.cts +23 -18
  15. package/dist/metrics/base.d.ts +23 -18
  16. package/dist/metrics/base.d.ts.map +1 -1
  17. package/dist/metrics/usage_collector.cjs +2 -2
  18. package/dist/metrics/usage_collector.cjs.map +1 -1
  19. package/dist/metrics/usage_collector.d.cts +1 -1
  20. package/dist/metrics/usage_collector.d.ts +1 -1
  21. package/dist/metrics/usage_collector.d.ts.map +1 -1
  22. package/dist/metrics/usage_collector.js +2 -2
  23. package/dist/metrics/usage_collector.js.map +1 -1
  24. package/dist/metrics/utils.cjs +14 -7
  25. package/dist/metrics/utils.cjs.map +1 -1
  26. package/dist/metrics/utils.d.ts.map +1 -1
  27. package/dist/metrics/utils.js +14 -7
  28. package/dist/metrics/utils.js.map +1 -1
  29. package/dist/stt/stt.cjs +5 -5
  30. package/dist/stt/stt.cjs.map +1 -1
  31. package/dist/stt/stt.js +6 -6
  32. package/dist/stt/stt.js.map +1 -1
  33. package/dist/tts/tts.cjs +11 -10
  34. package/dist/tts/tts.cjs.map +1 -1
  35. package/dist/tts/tts.d.ts.map +1 -1
  36. package/dist/tts/tts.js +11 -10
  37. package/dist/tts/tts.js.map +1 -1
  38. package/dist/vad.cjs +5 -5
  39. package/dist/vad.cjs.map +1 -1
  40. package/dist/vad.js +5 -5
  41. package/dist/vad.js.map +1 -1
  42. package/dist/voice/agent_activity.cjs +7 -4
  43. package/dist/voice/agent_activity.cjs.map +1 -1
  44. package/dist/voice/agent_activity.d.ts.map +1 -1
  45. package/dist/voice/agent_activity.js +7 -4
  46. package/dist/voice/agent_activity.js.map +1 -1
  47. package/dist/voice/generation_tools.test.cjs +236 -0
  48. package/dist/voice/generation_tools.test.cjs.map +1 -0
  49. package/dist/voice/generation_tools.test.js +235 -0
  50. package/dist/voice/generation_tools.test.js.map +1 -0
  51. package/dist/voice/index.cjs +3 -1
  52. package/dist/voice/index.cjs.map +1 -1
  53. package/dist/voice/index.d.cts +1 -0
  54. package/dist/voice/index.d.ts +1 -0
  55. package/dist/voice/index.d.ts.map +1 -1
  56. package/dist/voice/index.js +1 -0
  57. package/dist/voice/index.js.map +1 -1
  58. package/package.json +1 -1
  59. package/src/audio.ts +1 -1
  60. package/src/llm/llm.ts +7 -4
  61. package/src/metrics/base.ts +23 -18
  62. package/src/metrics/usage_collector.ts +3 -3
  63. package/src/metrics/utils.ts +16 -7
  64. package/src/stt/stt.ts +6 -6
  65. package/src/tts/tts.ts +11 -10
  66. package/src/vad.ts +5 -5
  67. package/src/voice/agent_activity.ts +8 -4
  68. package/src/voice/generation_tools.test.ts +268 -0
  69. package/src/voice/index.ts +1 -0
package/dist/tts/tts.js CHANGED
@@ -120,21 +120,22 @@ class SynthesizeStream {
120
120
  }
121
121
  async monitorMetrics() {
122
122
  const startTime = process.hrtime.bigint();
123
- let audioDuration = 0;
123
+ let audioDurationMs = 0;
124
124
  let ttfb = BigInt(-1);
125
125
  let requestId = "";
126
126
  const emit = () => {
127
127
  if (this.#metricsPendingTexts.length) {
128
128
  const text = this.#metricsPendingTexts.shift();
129
129
  const duration = process.hrtime.bigint() - startTime;
130
+ const roundedAudioDurationMs = Math.round(audioDurationMs);
130
131
  const metrics = {
131
132
  type: "tts_metrics",
132
133
  timestamp: Date.now(),
133
134
  requestId,
134
- ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1e6))),
135
- duration: Math.trunc(Number(duration / BigInt(1e6))),
135
+ ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1e6))),
136
+ durationMs: Math.trunc(Number(duration / BigInt(1e6))),
136
137
  charactersCount: text.length,
137
- audioDuration,
138
+ audioDurationMs: roundedAudioDurationMs,
138
139
  cancelled: this.abortController.signal.aborted,
139
140
  label: this.#tts.label,
140
141
  streamed: false
@@ -152,7 +153,7 @@ class SynthesizeStream {
152
153
  if (ttfb === BigInt(-1)) {
153
154
  ttfb = process.hrtime.bigint() - startTime;
154
155
  }
155
- audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;
156
+ audioDurationMs += audio.frame.samplesPerChannel / audio.frame.sampleRate * 1e3;
156
157
  if (audio.final) {
157
158
  emit();
158
159
  }
@@ -278,7 +279,7 @@ class ChunkedStream {
278
279
  }
279
280
  async monitorMetrics() {
280
281
  const startTime = process.hrtime.bigint();
281
- let audioDuration = 0;
282
+ let audioDurationMs = 0;
282
283
  let ttfb = BigInt(-1);
283
284
  let requestId = "";
284
285
  for await (const audio of this.queue) {
@@ -287,7 +288,7 @@ class ChunkedStream {
287
288
  if (ttfb === BigInt(-1)) {
288
289
  ttfb = process.hrtime.bigint() - startTime;
289
290
  }
290
- audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;
291
+ audioDurationMs += audio.frame.samplesPerChannel / audio.frame.sampleRate * 1e3;
291
292
  }
292
293
  this.output.close();
293
294
  const duration = process.hrtime.bigint() - startTime;
@@ -295,10 +296,10 @@ class ChunkedStream {
295
296
  type: "tts_metrics",
296
297
  timestamp: Date.now(),
297
298
  requestId,
298
- ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1e6))),
299
- duration: Math.trunc(Number(duration / BigInt(1e6))),
299
+ ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1e6))),
300
+ durationMs: Math.trunc(Number(duration / BigInt(1e6))),
300
301
  charactersCount: this.#text.length,
301
- audioDuration,
302
+ audioDurationMs: Math.round(audioDurationMs),
302
303
  cancelled: false,
303
304
  // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here
304
305
  label: this.#tts.label,
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/tts/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIStatusError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { TTSMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';\n\n/** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */\nexport interface SynthesizedAudio {\n /** Request ID (one segment could be made up of multiple requests) */\n requestId: string;\n /** Segment ID, each segment is separated by a flush */\n segmentId: string;\n /** Synthesized audio frame */\n frame: AudioFrame;\n /** Current segment of the synthesized audio */\n deltaText?: string;\n /** Whether this is the last frame of the segment (streaming only) */\n final: boolean;\n}\n\n/**\n * Describes the capabilities of the TTS provider.\n *\n * @remarks\n * At present, only `streaming` is supplied to this interface, and the framework only supports\n * providers that do have a streaming endpoint.\n */\nexport interface TTSCapabilities {\n streaming: boolean;\n}\n\nexport interface TTSError {\n type: 'tts_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type TTSCallbacks = {\n ['metrics_collected']: (metrics: TTSMetrics) => void;\n ['error']: (error: TTSError) => void;\n};\n\n/**\n * An instance of a text-to-speech adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child TTS class, which inherits this class's methods.\n */\nexport abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCallbacks>) {\n #capabilities: TTSCapabilities;\n #sampleRate: number;\n #numChannels: number;\n abstract label: string;\n\n constructor(sampleRate: number, numChannels: number, capabilities: TTSCapabilities) {\n super();\n this.#capabilities = capabilities;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n }\n\n /** Returns this TTS's capabilities */\n get capabilities(): TTSCapabilities {\n return this.#capabilities;\n }\n\n /** Returns the sample rate of audio frames returned by this TTS */\n get sampleRate(): number {\n return this.#sampleRate;\n }\n\n /** Returns the channel count of audio frames returned by this TTS */\n get numChannels(): number {\n return this.#numChannels;\n }\n\n /**\n * Receives text and returns synthesis in the form of a {@link ChunkedStream}\n */\n abstract synthesize(text: string): ChunkedStream;\n\n /**\n * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data\n */\n abstract stream(): SynthesizeStream;\n}\n\n/**\n * An instance of a text-to-speech stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SynthesizeStream class, which inherits this class's methods.\n */\nexport abstract class SynthesizeStream\n implements AsyncIterableIterator<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>\n{\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n static readonly END_OF_STREAM = Symbol('END_OF_STREAM');\n protected input = new AsyncIterableQueue<string | typeof SynthesizeStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected output = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected closed = false;\n abstract label: string;\n #tts: TTS;\n #metricsPendingTexts: string[] = [];\n #metricsText = '';\n #monitorMetricsTask?: Promise<void>;\n private _connOptions: APIConnectOptions;\n protected abortController = new AbortController();\n\n private deferredInputStream: DeferredReadableStream<\n string | typeof SynthesizeStream.FLUSH_SENTINEL\n >;\n private logger = log();\n\n constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {\n this.#tts = tts;\n this._connOptions = connOptions;\n this.deferredInputStream = new DeferredReadableStream();\n this.pumpInput();\n this.abortController.signal.addEventListener('abort', () => {\n this.deferredInputStream.detachSource();\n // TODO (AJS-36) clean this up when we refactor with streams\n this.input.close();\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIStatusError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to synthesize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n // TODO(AJS-37) Remove when refactoring TTS to use streams\n protected async pumpInput() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done || value === SynthesizeStream.FLUSH_SENTINEL) {\n break;\n }\n this.pushText(value);\n }\n this.endInput();\n } catch (error) {\n this.logger.error(error, 'Error reading deferred input stream');\n } finally {\n reader.releaseLock();\n // Ensure output is closed when the stream ends\n if (!this.#monitorMetricsTask) {\n // No text was received, close the output directly\n this.output.close();\n }\n }\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDuration = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n const emit = () => {\n if (this.#metricsPendingTexts.length) {\n const text = this.#metricsPendingTexts.shift()!;\n const duration = process.hrtime.bigint() - startTime;\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: text.length,\n audioDuration,\n cancelled: this.abortController.signal.aborted,\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n };\n\n for await (const audio of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(audio);\n if (audio === SynthesizeStream.END_OF_STREAM) continue;\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;\n if (audio.final) {\n emit();\n }\n }\n\n if (requestId) {\n emit();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(text: ReadableStream<string>) {\n this.deferredInputStream.setSource(text);\n }\n\n /** Push a string of text to the TTS */\n /** @deprecated Use `updateInputStream` instead */\n pushText(text: string) {\n if (!this.#monitorMetricsTask) {\n this.#monitorMetricsTask = this.monitorMetrics();\n // Close output when metrics task completes\n this.#monitorMetricsTask.finally(() => this.output.close());\n }\n this.#metricsText += text;\n\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(text);\n }\n\n /** Flush the TTS, causing it to process all pending text */\n flush() {\n if (this.#metricsText) {\n this.#metricsPendingTexts.push(this.#metricsText);\n this.#metricsText = '';\n }\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SynthesizeStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n this.flush();\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): SynthesizeStream {\n return this;\n }\n}\n\n/**\n * An instance of a text-to-speech response, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child ChunkedStream class, which inherits this class's methods.\n */\nexport abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {\n protected queue = new AsyncIterableQueue<SynthesizedAudio>();\n protected output = new AsyncIterableQueue<SynthesizedAudio>();\n protected closed = false;\n abstract label: string;\n #text: string;\n #tts: TTS;\n private _connOptions: APIConnectOptions;\n private logger = log();\n\n constructor(\n text: string,\n tts: TTS,\n connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#text = text;\n this.#tts = tts;\n this._connOptions = connOptions;\n\n this.monitorMetrics();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n Promise.resolve().then(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIStatusError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to generate TTS completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n protected abstract run(): Promise<void>;\n\n get inputText(): string {\n return this.#text;\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDuration = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n for await (const audio of this.queue) {\n this.output.put(audio);\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n audioDuration += audio.frame.samplesPerChannel / audio.frame.sampleRate;\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfb: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: this.#text.length,\n audioDuration,\n cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n\n /** Collect every frame into one in a single call */\n async collect(): Promise<AudioFrame> {\n const frames = [];\n for await (const event of this) {\n frames.push(event.frame);\n }\n return mergeFrames(frames);\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): ChunkedStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,sBAAsB;AACnD,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,mCAAmC;AACpE,SAAS,oBAAoB,OAAO,aAAa,WAAW,eAAe;AA+CpE,MAAe,YAAa,aAAsD;AAAA,EACvF;AAAA,EACA;AAAA,EACA;AAAA,EAGA,YAAY,YAAoB,aAAqB,cAA+B;AAClF,UAAM;AACN,SAAK,gBAAgB;AACrB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAWF;AAgBO,MAAe,iBAEtB;AAAA,EACE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAClE,OAAgB,gBAAgB,OAAO,eAAe;AAAA,EAC5C,QAAQ,IAAI,mBAAoE;AAAA,EAChF,QAAQ,IAAI,mBAEpB;AAAA,EACQ,SAAS,IAAI,mBAErB;AAAA,EACQ,SAAS;AAAA,EAEnB;AAAA,EACA,uBAAiC,CAAC;AAAA,EAClC,eAAe;AAAA,EACf;AAAA,EACQ;AAAA,EACE,kBAAkB,IAAI,gBAAgB;AAAA,EAExC;AAAA,EAGA,SAAS,IAAI;AAAA,EAErB,YAAY,KAAU,cAAiC,6BAA6B;AAClF,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAuB;AACtD,SAAK,UAAU;AACf,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,WAAK,oBAAoB,aAAa;AAEtC,WAAK,MAAM,MAAM;AACjB,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,cAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,gBAAgB;AACnC,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,6CAA6C,aAAa;AAAA,YAC5D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA,EAGA,MAAgB,YAAY;AAC1B,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,QAAQ,UAAU,iBAAiB,gBAAgB;AACrD;AAAA,QACF;AACA,aAAK,SAAS,KAAK;AAAA,MACrB;AACA,WAAK,SAAS;AAAA,IAChB,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,OAAO,qCAAqC;AAAA,IAChE,UAAE;AACA,aAAO,YAAY;AAEnB,UAAI,CAAC,KAAK,qBAAqB;AAE7B,aAAK,OAAO,MAAM;AAAA,MACpB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,gBAAgB;AACpB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,UAAM,OAAO,MAAM;AACjB,UAAI,KAAK,qBAAqB,QAAQ;AACpC,cAAM,OAAO,KAAK,qBAAqB,MAAM;AAC7C,cAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,cAAM,UAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,UACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,UAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,UACvD,iBAAiB,KAAK;AAAA,UACtB;AAAA,UACA,WAAW,KAAK,gBAAgB,OAAO;AAAA,UACvC,OAAO,KAAK,KAAK;AAAA,UACjB,UAAU;AAAA,QACZ;AACA,aAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,UAAU,iBAAiB,cAAe;AAC9C,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AAEA,uBAAiB,MAAM,MAAM,oBAAoB,MAAM,MAAM;AAC7D,UAAI,MAAM,OAAO;AACf,aAAK;AAAA,MACP;AAAA,IACF;AAEA,QAAI,WAAW;AACb,WAAK;AAAA,IACP;AAAA,EACF;AAAA,EAIA,kBAAkB,MAA8B;AAC9C,SAAK,oBAAoB,UAAU,IAAI;AAAA,EACzC;AAAA;AAAA;AAAA,EAIA,SAAS,MAAc;AACrB,QAAI,CAAC,KAAK,qBAAqB;AAC7B,WAAK,sBAAsB,KAAK,eAAe;AAE/C,WAAK,oBAAoB,QAAQ,MAAM,KAAK,OAAO,MAAM,CAAC;AAAA,IAC5D;AACA,SAAK,gBAAgB;AAErB,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,IAAI;AAAA,EACrB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,cAAc;AACrB,WAAK,qBAAqB,KAAK,KAAK,YAAY;AAChD,WAAK,eAAe;AAAA,IACtB;AACA,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,iBAAiB,cAAc;AAAA,EAChD;AAAA;AAAA,EAGA,WAAW;AACT,SAAK,MAAM;AACX,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA0F;AACxF,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO;AAAA,EACT;AACF;AAgBO,MAAe,cAAiE;AAAA,EAC3E,QAAQ,IAAI,mBAAqC;AAAA,EACjD,SAAS,IAAI,mBAAqC;AAAA,EAClD,SAAS;AAAA,EAEnB;AAAA,EACA;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EAErB,YACE,MACA,KACA,cAAiC,6BACjC;AACA,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,eAAe;AAMpB,YAAQ,QAAQ,EAAE,KAAK,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAC7E;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,gBAAgB;AACnC,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAIA,IAAI,YAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,gBAAgB;AACpB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,uBAAiB,MAAM,MAAM,oBAAoB,MAAM,MAAM;AAAA,IAC/D;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,MAAM,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC1E,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACvD,iBAAiB,KAAK,MAAM;AAAA,MAC5B;AAAA,MACA,WAAW;AAAA;AAAA,MACX,OAAO,KAAK,KAAK;AAAA,MACjB,UAAU;AAAA,IACZ;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAGA,MAAM,UAA+B;AACnC,UAAM,SAAS,CAAC;AAChB,qBAAiB,SAAS,MAAM;AAC9B,aAAO,KAAK,MAAM,KAAK;AAAA,IACzB;AACA,WAAO,YAAY,MAAM;AAAA,EAC3B;AAAA,EAEA,OAAkD;AAChD,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAmB;AACtC,WAAO;AAAA,EACT;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/tts/tts.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIStatusError } from '../_exceptions.js';\nimport { log } from '../log.js';\nimport type { TTSMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';\n\n/** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */\nexport interface SynthesizedAudio {\n /** Request ID (one segment could be made up of multiple requests) */\n requestId: string;\n /** Segment ID, each segment is separated by a flush */\n segmentId: string;\n /** Synthesized audio frame */\n frame: AudioFrame;\n /** Current segment of the synthesized audio */\n deltaText?: string;\n /** Whether this is the last frame of the segment (streaming only) */\n final: boolean;\n}\n\n/**\n * Describes the capabilities of the TTS provider.\n *\n * @remarks\n * At present, only `streaming` is supplied to this interface, and the framework only supports\n * providers that do have a streaming endpoint.\n */\nexport interface TTSCapabilities {\n streaming: boolean;\n}\n\nexport interface TTSError {\n type: 'tts_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type TTSCallbacks = {\n ['metrics_collected']: (metrics: TTSMetrics) => void;\n ['error']: (error: TTSError) => void;\n};\n\n/**\n * An instance of a text-to-speech adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child TTS class, which inherits this class's methods.\n */\nexport abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCallbacks>) {\n #capabilities: TTSCapabilities;\n #sampleRate: number;\n #numChannels: number;\n abstract label: string;\n\n constructor(sampleRate: number, numChannels: number, capabilities: TTSCapabilities) {\n super();\n this.#capabilities = capabilities;\n this.#sampleRate = sampleRate;\n this.#numChannels = numChannels;\n }\n\n /** Returns this TTS's capabilities */\n get capabilities(): TTSCapabilities {\n return this.#capabilities;\n }\n\n /** Returns the sample rate of audio frames returned by this TTS */\n get sampleRate(): number {\n return this.#sampleRate;\n }\n\n /** Returns the channel count of audio frames returned by this TTS */\n get numChannels(): number {\n return this.#numChannels;\n }\n\n /**\n * Receives text and returns synthesis in the form of a {@link ChunkedStream}\n */\n abstract synthesize(text: string): ChunkedStream;\n\n /**\n * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data\n */\n abstract stream(): SynthesizeStream;\n}\n\n/**\n * An instance of a text-to-speech stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SynthesizeStream class, which inherits this class's methods.\n */\nexport abstract class SynthesizeStream\n implements AsyncIterableIterator<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>\n{\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n static readonly END_OF_STREAM = Symbol('END_OF_STREAM');\n protected input = new AsyncIterableQueue<string | typeof SynthesizeStream.FLUSH_SENTINEL>();\n protected queue = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected output = new AsyncIterableQueue<\n SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM\n >();\n protected closed = false;\n abstract label: string;\n #tts: TTS;\n #metricsPendingTexts: string[] = [];\n #metricsText = '';\n #monitorMetricsTask?: Promise<void>;\n private _connOptions: APIConnectOptions;\n protected abortController = new AbortController();\n\n private deferredInputStream: DeferredReadableStream<\n string | typeof SynthesizeStream.FLUSH_SENTINEL\n >;\n private logger = log();\n\n constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {\n this.#tts = tts;\n this._connOptions = connOptions;\n this.deferredInputStream = new DeferredReadableStream();\n this.pumpInput();\n this.abortController.signal.addEventListener('abort', () => {\n this.deferredInputStream.detachSource();\n // TODO (AJS-36) clean this up when we refactor with streams\n this.input.close();\n this.output.close();\n this.closed = true;\n });\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIStatusError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to synthesize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n // TODO(AJS-37) Remove when refactoring TTS to use streams\n protected async pumpInput() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done || value === SynthesizeStream.FLUSH_SENTINEL) {\n break;\n }\n this.pushText(value);\n }\n this.endInput();\n } catch (error) {\n this.logger.error(error, 'Error reading deferred input stream');\n } finally {\n reader.releaseLock();\n // Ensure output is closed when the stream ends\n if (!this.#monitorMetricsTask) {\n // No text was received, close the output directly\n this.output.close();\n }\n }\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDurationMs = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n const emit = () => {\n if (this.#metricsPendingTexts.length) {\n const text = this.#metricsPendingTexts.shift()!;\n const duration = process.hrtime.bigint() - startTime;\n const roundedAudioDurationMs = Math.round(audioDurationMs);\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n durationMs: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: text.length,\n audioDurationMs: roundedAudioDurationMs,\n cancelled: this.abortController.signal.aborted,\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n };\n\n for await (const audio of this.queue) {\n if (this.abortController.signal.aborted) {\n break;\n }\n this.output.put(audio);\n if (audio === SynthesizeStream.END_OF_STREAM) continue;\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;\n if (audio.final) {\n emit();\n }\n }\n\n if (requestId) {\n emit();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(text: ReadableStream<string>) {\n this.deferredInputStream.setSource(text);\n }\n\n /** Push a string of text to the TTS */\n /** @deprecated Use `updateInputStream` instead */\n pushText(text: string) {\n if (!this.#monitorMetricsTask) {\n this.#monitorMetricsTask = this.monitorMetrics();\n // Close output when metrics task completes\n this.#monitorMetricsTask.finally(() => this.output.close());\n }\n this.#metricsText += text;\n\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(text);\n }\n\n /** Flush the TTS, causing it to process all pending text */\n flush() {\n if (this.#metricsText) {\n this.#metricsPendingTexts.push(this.#metricsText);\n this.#metricsText = '';\n }\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SynthesizeStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n this.flush();\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.abortController.abort();\n }\n\n [Symbol.asyncIterator](): SynthesizeStream {\n return this;\n }\n}\n\n/**\n * An instance of a text-to-speech response, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * await source.captureFrame(event.frame);\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child ChunkedStream class, which inherits this class's methods.\n */\nexport abstract class ChunkedStream implements AsyncIterableIterator<SynthesizedAudio> {\n protected queue = new AsyncIterableQueue<SynthesizedAudio>();\n protected output = new AsyncIterableQueue<SynthesizedAudio>();\n protected closed = false;\n abstract label: string;\n #text: string;\n #tts: TTS;\n private _connOptions: APIConnectOptions;\n private logger = log();\n\n constructor(\n text: string,\n tts: TTS,\n connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#text = text;\n this.#tts = tts;\n this._connOptions = connOptions;\n\n this.monitorMetrics();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n Promise.resolve().then(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIStatusError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#tts.label, attempt: i + 1, error },\n `failed to generate TTS completion, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#tts.emit('error', {\n type: 'tts_error',\n timestamp: Date.now(),\n label: this.#tts.label,\n error,\n recoverable,\n });\n }\n\n protected abstract run(): Promise<void>;\n\n get inputText(): string {\n return this.#text;\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n let audioDurationMs = 0;\n let ttfb: bigint = BigInt(-1);\n let requestId = '';\n\n for await (const audio of this.queue) {\n this.output.put(audio);\n requestId = audio.requestId;\n if (ttfb === BigInt(-1)) {\n ttfb = process.hrtime.bigint() - startTime;\n }\n audioDurationMs += (audio.frame.samplesPerChannel / audio.frame.sampleRate) * 1000;\n }\n this.output.close();\n\n const duration = process.hrtime.bigint() - startTime;\n const metrics: TTSMetrics = {\n type: 'tts_metrics',\n timestamp: Date.now(),\n requestId,\n ttfbMs: ttfb === BigInt(-1) ? -1 : Math.trunc(Number(ttfb / BigInt(1000000))),\n durationMs: Math.trunc(Number(duration / BigInt(1000000))),\n charactersCount: this.#text.length,\n audioDurationMs: Math.round(audioDurationMs),\n cancelled: false, // TODO(AJS-186): support ChunkedStream with 1.0 - add this.abortController.signal.aborted here\n label: this.#tts.label,\n streamed: false,\n };\n this.#tts.emit('metrics_collected', metrics);\n }\n\n /** Collect every frame into one in a single call */\n async collect(): Promise<AudioFrame> {\n const frames = [];\n for await (const event of this) {\n frames.push(event.frame);\n }\n return mergeFrames(frames);\n }\n\n next(): Promise<IteratorResult<SynthesizedAudio>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the TTS stream */\n close() {\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): ChunkedStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,sBAAsB;AACnD,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,mCAAmC;AACpE,SAAS,oBAAoB,OAAO,aAAa,WAAW,eAAe;AA+CpE,MAAe,YAAa,aAAsD;AAAA,EACvF;AAAA,EACA;AAAA,EACA;AAAA,EAGA,YAAY,YAAoB,aAAqB,cAA+B;AAClF,UAAM;AACN,SAAK,gBAAgB;AACrB,SAAK,cAAc;AACnB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,aAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,IAAI,cAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAWF;AAgBO,MAAe,iBAEtB;AAAA,EACE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAClE,OAAgB,gBAAgB,OAAO,eAAe;AAAA,EAC5C,QAAQ,IAAI,mBAAoE;AAAA,EAChF,QAAQ,IAAI,mBAEpB;AAAA,EACQ,SAAS,IAAI,mBAErB;AAAA,EACQ,SAAS;AAAA,EAEnB;AAAA,EACA,uBAAiC,CAAC;AAAA,EAClC,eAAe;AAAA,EACf;AAAA,EACQ;AAAA,EACE,kBAAkB,IAAI,gBAAgB;AAAA,EAExC;AAAA,EAGA,SAAS,IAAI;AAAA,EAErB,YAAY,KAAU,cAAiC,6BAA6B;AAClF,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAuB;AACtD,SAAK,UAAU;AACf,SAAK,gBAAgB,OAAO,iBAAiB,SAAS,MAAM;AAC1D,WAAK,oBAAoB,aAAa;AAEtC,WAAK,MAAM,MAAM;AACjB,WAAK,OAAO,MAAM;AAClB,WAAK,SAAS;AAAA,IAChB,CAAC;AAMD,cAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,gBAAgB;AACnC,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,6CAA6C,aAAa;AAAA,YAC5D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA;AAAA,EAGA,MAAgB,YAAY;AAC1B,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,QAAQ,UAAU,iBAAiB,gBAAgB;AACrD;AAAA,QACF;AACA,aAAK,SAAS,KAAK;AAAA,MACrB;AACA,WAAK,SAAS;AAAA,IAChB,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,OAAO,qCAAqC;AAAA,IAChE,UAAE;AACA,aAAO,YAAY;AAEnB,UAAI,CAAC,KAAK,qBAAqB;AAE7B,aAAK,OAAO,MAAM;AAAA,MACpB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,kBAAkB;AACtB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,UAAM,OAAO,MAAM;AACjB,UAAI,KAAK,qBAAqB,QAAQ;AACpC,cAAM,OAAO,KAAK,qBAAqB,MAAM;AAC7C,cAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,cAAM,yBAAyB,KAAK,MAAM,eAAe;AACzD,cAAM,UAAsB;AAAA,UAC1B,MAAM;AAAA,UACN,WAAW,KAAK,IAAI;AAAA,UACpB;AAAA,UACA,QAAQ,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,UAC5E,YAAY,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,UACzD,iBAAiB,KAAK;AAAA,UACtB,iBAAiB;AAAA,UACjB,WAAW,KAAK,gBAAgB,OAAO;AAAA,UACvC,OAAO,KAAK,KAAK;AAAA,UACjB,UAAU;AAAA,QACZ;AACA,aAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,MAC7C;AAAA,IACF;AAEA,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,KAAK,gBAAgB,OAAO,SAAS;AACvC;AAAA,MACF;AACA,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,UAAU,iBAAiB,cAAe;AAC9C,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AAEA,yBAAoB,MAAM,MAAM,oBAAoB,MAAM,MAAM,aAAc;AAC9E,UAAI,MAAM,OAAO;AACf,aAAK;AAAA,MACP;AAAA,IACF;AAEA,QAAI,WAAW;AACb,WAAK;AAAA,IACP;AAAA,EACF;AAAA,EAIA,kBAAkB,MAA8B;AAC9C,SAAK,oBAAoB,UAAU,IAAI;AAAA,EACzC;AAAA;AAAA;AAAA,EAIA,SAAS,MAAc;AACrB,QAAI,CAAC,KAAK,qBAAqB;AAC7B,WAAK,sBAAsB,KAAK,eAAe;AAE/C,WAAK,oBAAoB,QAAQ,MAAM,KAAK,OAAO,MAAM,CAAC;AAAA,IAC5D;AACA,SAAK,gBAAgB;AAErB,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,IAAI;AAAA,EACrB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,cAAc;AACrB,WAAK,qBAAqB,KAAK,KAAK,YAAY;AAChD,WAAK,eAAe;AAAA,IACtB;AACA,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,iBAAiB,cAAc;AAAA,EAChD;AAAA;AAAA,EAGA,WAAW;AACT,SAAK,MAAM;AACX,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA0F;AACxF,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,gBAAgB,MAAM;AAAA,EAC7B;AAAA,EAEA,CAAC,OAAO,aAAa,IAAsB;AACzC,WAAO;AAAA,EACT;AACF;AAgBO,MAAe,cAAiE;AAAA,EAC3E,QAAQ,IAAI,mBAAqC;AAAA,EACjD,SAAS,IAAI,mBAAqC;AAAA,EAClD,SAAS;AAAA,EAEnB;AAAA,EACA;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EAErB,YACE,MACA,KACA,cAAiC,6BACjC;AACA,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,eAAe;AAEpB,SAAK,eAAe;AAMpB,YAAQ,QAAQ,EAAE,KAAK,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAC7E;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,gBAAgB;AACnC,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,2CAA2C,KAAK,aAAa,WAAW,CAAC;AAAA,cAClF,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,kDAAkD,aAAa;AAAA,YACjE;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAIA,IAAI,YAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,QAAI,kBAAkB;AACtB,QAAI,OAAe,OAAO,EAAE;AAC5B,QAAI,YAAY;AAEhB,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,kBAAY,MAAM;AAClB,UAAI,SAAS,OAAO,EAAE,GAAG;AACvB,eAAO,QAAQ,OAAO,OAAO,IAAI;AAAA,MACnC;AACA,yBAAoB,MAAM,MAAM,oBAAoB,MAAM,MAAM,aAAc;AAAA,IAChF;AACA,SAAK,OAAO,MAAM;AAElB,UAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,UAAM,UAAsB;AAAA,MAC1B,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,QAAQ,SAAS,OAAO,EAAE,IAAI,KAAK,KAAK,MAAM,OAAO,OAAO,OAAO,GAAO,CAAC,CAAC;AAAA,MAC5E,YAAY,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,MACzD,iBAAiB,KAAK,MAAM;AAAA,MAC5B,iBAAiB,KAAK,MAAM,eAAe;AAAA,MAC3C,WAAW;AAAA;AAAA,MACX,OAAO,KAAK,KAAK;AAAA,MACjB,UAAU;AAAA,IACZ;AACA,SAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,EAC7C;AAAA;AAAA,EAGA,MAAM,UAA+B;AACnC,UAAM,SAAS,CAAC;AAChB,qBAAiB,SAAS,MAAM;AAC9B,aAAO,KAAK,MAAM,KAAK;AAAA,IACzB;AACA,WAAO,YAAY,MAAM;AAAA,EAC3B;AAAA,EAEA,OAAkD;AAChD,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAmB;AACtC,WAAO;AAAA,EACT;AACF;","names":[]}
package/dist/vad.cjs CHANGED
@@ -94,7 +94,7 @@ class VADStream {
94
94
  }
95
95
  }
96
96
  async monitorMetrics() {
97
- let inferenceDurationTotal = 0;
97
+ let inferenceDurationTotalMs = 0;
98
98
  let inferenceCount = 0;
99
99
  const metricsReader = this.metricsStream.getReader();
100
100
  while (true) {
@@ -109,19 +109,19 @@ class VADStream {
109
109
  this.#vad.emit("metrics_collected", {
110
110
  type: "vad_metrics",
111
111
  timestamp: Date.now(),
112
- idleTime: Math.trunc(
112
+ idleTimeMs: Math.trunc(
113
113
  Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1e6))
114
114
  ),
115
- inferenceDurationTotal,
115
+ inferenceDurationTotalMs,
116
116
  inferenceCount,
117
117
  label: this.#vad.label
118
118
  });
119
119
  inferenceCount = 0;
120
- inferenceDurationTotal = 0;
120
+ inferenceDurationTotalMs = 0;
121
121
  }
122
122
  break;
123
123
  case 1 /* INFERENCE_DONE */:
124
- inferenceDurationTotal += value.inferenceDuration;
124
+ inferenceDurationTotalMs += Math.round(value.inferenceDuration);
125
125
  this.#lastActivityTime = process.hrtime.bigint();
126
126
  break;
127
127
  case 2 /* END_OF_SPEECH */:
package/dist/vad.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotal = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTime: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotal,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotal = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n inferenceDurationTotal += value.inferenceDuration;\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAM7B,iBAAoB;AAEpB,6BAAuC;AACvC,gCAAkC;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,gCAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,4CAAgE;AAAA,EAC5E,SAAS,IAAI,4CAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,aAAS,gBAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,8CAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,yBAAyB;AAC7B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,UAAU,KAAK;AAAA,gBACb,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,qCAAyB;AAAA,UAC3B;AACA;AAAA,QACF,KAAK;AACH,oCAA0B,MAAM;AAChC,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,QACF,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
1
+ {"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotalMs = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTimeMs: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotalMs,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotalMs = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n inferenceDurationTotalMs += Math.round(value.inferenceDuration);\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAM7B,iBAAoB;AAEpB,6BAAuC;AACvC,gCAAkC;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,gCAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,4CAAgE;AAAA,EAC5E,SAAS,IAAI,4CAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,aAAS,gBAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,8CAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,2BAA2B;AAC/B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,YAAY,KAAK;AAAA,gBACf,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,uCAA2B;AAAA,UAC7B;AACA;AAAA,QACF,KAAK;AACH,sCAA4B,KAAK,MAAM,MAAM,iBAAiB;AAC9D,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,QACF,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
package/dist/vad.js CHANGED
@@ -69,7 +69,7 @@ class VADStream {
69
69
  }
70
70
  }
71
71
  async monitorMetrics() {
72
- let inferenceDurationTotal = 0;
72
+ let inferenceDurationTotalMs = 0;
73
73
  let inferenceCount = 0;
74
74
  const metricsReader = this.metricsStream.getReader();
75
75
  while (true) {
@@ -84,19 +84,19 @@ class VADStream {
84
84
  this.#vad.emit("metrics_collected", {
85
85
  type: "vad_metrics",
86
86
  timestamp: Date.now(),
87
- idleTime: Math.trunc(
87
+ idleTimeMs: Math.trunc(
88
88
  Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1e6))
89
89
  ),
90
- inferenceDurationTotal,
90
+ inferenceDurationTotalMs,
91
91
  inferenceCount,
92
92
  label: this.#vad.label
93
93
  });
94
94
  inferenceCount = 0;
95
- inferenceDurationTotal = 0;
95
+ inferenceDurationTotalMs = 0;
96
96
  }
97
97
  break;
98
98
  case 1 /* INFERENCE_DONE */:
99
- inferenceDurationTotal += value.inferenceDuration;
99
+ inferenceDurationTotalMs += Math.round(value.inferenceDuration);
100
100
  this.#lastActivityTime = process.hrtime.bigint();
101
101
  break;
102
102
  case 2 /* END_OF_SPEECH */:
package/dist/vad.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotal = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTime: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotal,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotal = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n inferenceDurationTotal += value.inferenceDuration;\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAM7B,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAS,yBAAyB;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,aAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,kBAAgE;AAAA,EAC5E,SAAS,IAAI,kBAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,SAAS,IAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,uBAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,yBAAyB;AAC7B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,UAAU,KAAK;AAAA,gBACb,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,qCAAyB;AAAA,UAC3B;AACA;AAAA,QACF,KAAK;AACH,oCAA0B,MAAM;AAChC,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,QACF,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
1
+ {"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotalMs = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTimeMs: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotalMs,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotalMs = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n inferenceDurationTotalMs += Math.round(value.inferenceDuration);\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAM7B,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAS,yBAAyB;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,aAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,kBAAgE;AAAA,EAC5E,SAAS,IAAI,kBAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,SAAS,IAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,uBAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,2BAA2B;AAC/B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,YAAY,KAAK;AAAA,gBACf,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,uCAA2B;AAAA,UAC7B;AACA;AAAA,QACF,KAAK;AACH,sCAA4B,KAAK,MAAM,MAAM,iBAAiB;AAC9D,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,QACF,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
@@ -713,9 +713,9 @@ ${instructions}` : instructions,
713
713
  const eouMetrics = {
714
714
  type: "eou_metrics",
715
715
  timestamp: Date.now(),
716
- endOfUtteranceDelay: info.endOfUtteranceDelay,
717
- transcriptionDelay: info.transcriptionDelay,
718
- onUserTurnCompletedDelay: callbackDuration,
716
+ endOfUtteranceDelayMs: info.endOfUtteranceDelay,
717
+ transcriptionDelayMs: info.transcriptionDelay,
718
+ onUserTurnCompletedDelayMs: callbackDuration,
719
719
  speechId: speechHandle.id
720
720
  };
721
721
  this.agentSession.emit(
@@ -1115,6 +1115,9 @@ ${instructions}` : instructions,
1115
1115
  this.agentSession._updateAgentState("speaking");
1116
1116
  };
1117
1117
  const readMessages = async (abortController, outputs) => {
1118
+ replyAbortController.signal.addEventListener("abort", () => abortController.abort(), {
1119
+ once: true
1120
+ });
1118
1121
  const forwardTasks = [];
1119
1122
  try {
1120
1123
  for await (const msg of ev.messageStream) {
@@ -1171,7 +1174,7 @@ ${instructions}` : instructions,
1171
1174
  const tasks = [
1172
1175
  import_utils.Task.from(
1173
1176
  (controller) => readMessages(controller, messageOutputs),
1174
- replyAbortController,
1177
+ void 0,
1175
1178
  "AgentActivity.realtime_generation.read_messages"
1176
1179
  )
1177
1180
  ];