@livekit/agents 1.0.19 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stt/stt.cjs +3 -0
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +1 -0
- package/dist/stt/stt.d.ts +1 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +3 -0
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/tts.cjs +3 -0
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +1 -0
- package/dist/tts/tts.d.ts +1 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +3 -0
- package/dist/tts/tts.js.map +1 -1
- package/dist/vad.cjs +3 -0
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +1 -0
- package/dist/vad.d.ts +1 -0
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +3 -0
- package/dist/vad.js.map +1 -1
- package/dist/voice/agent_activity.cjs +3 -0
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +3 -0
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +1 -1
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.js +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/package.json +1 -1
- package/src/stt/stt.ts +4 -0
- package/src/tts/tts.ts +4 -0
- package/src/vad.ts +4 -0
- package/src/voice/agent_activity.ts +3 -0
- package/src/voice/audio_recognition.ts +1 -1
package/dist/vad.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vad.d.ts","sourceRoot":"","sources":["../src/vad.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EACV,cAAc,EACd,2BAA2B,EAC3B,2BAA2B,EAC5B,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAEnE,oBAAY,YAAY;IACtB,eAAe,IAAA;IACf,cAAc,IAAA;IACd,aAAa,IAAA;IACb,iBAAiB,IAAA;CAClB;AAED,MAAM,WAAW,QAAQ;IACvB,oFAAoF;IACpF,IAAI,EAAE,YAAY,CAAC;IACnB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,cAAc,EAAE,MAAM,CAAC;IACvB,uCAAuC;IACvC,eAAe,EAAE,MAAM,CAAC;IACxB;;;;;;;OAOG;IACH,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,6EAA6E;IAC7E,WAAW,EAAE,MAAM,CAAC;IACpB,0FAA0F;IAC1F,iBAAiB,EAAE,MAAM,CAAC;IAC1B,2DAA2D;IAC3D,QAAQ,EAAE,OAAO,CAAC;IAClB,wCAAwC;IACxC,qBAAqB,EAAE,MAAM,CAAC;IAC9B,uCAAuC;IACvC,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;CACtD,CAAC;kCAE2D,aAAa,YAAY,CAAC;AAAvF,8BAAsB,GAAI,SAAQ,QAAsD;;IAEtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,YAAY,EAAE,eAAe;IAKzC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED;;OAEG;IACH,QAAQ,CAAC,MAAM,IAAI,SAAS;
|
|
1
|
+
{"version":3,"file":"vad.d.ts","sourceRoot":"","sources":["../src/vad.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EACV,cAAc,EACd,2BAA2B,EAC3B,2BAA2B,EAC5B,MAAM,iBAAiB,CAAC;AAEzB,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAE,iBAAiB,EAAE,MAAM,gCAAgC,CAAC;AAEnE,oBAAY,YAAY;IACtB,eAAe,IAAA;IACf,cAAc,IAAA;IACd,aAAa,IAAA;IACb,iBAAiB,IAAA;CAClB;AAED,MAAM,WAAW,QAAQ;IACvB,oFAAoF;IACpF,IAAI,EAAE,YAAY,CAAC;IACnB;;OAEG;IACH,YAAY,EAAE,MAAM,CAAC;IACrB,0CAA0C;IAC1C,SAAS,EAAE,MAAM,CAAC;IAClB,sCAAsC;IACtC,cAAc,EAAE,MAAM,CAAC;IACvB,uCAAuC;IACvC,eAAe,EAAE,MAAM,CAAC;IACxB;;;;;;;OAOG;IACH,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,6EAA6E;IAC7E,WAAW,EAAE,MAAM,CAAC;IACpB,0FAA0F;IAC1F,iBAAiB,EAAE,MAAM,CAAC;IAC1B,2DAA2D;IAC3D,QAAQ,EAAE,OAAO,CAAC;IAClB,wCAAwC;IACxC,qBAAqB,EAAE,MAAM,CAAC;IAC9B,uCAAuC;IACvC,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED,MAAM,WAAW,eAAe;IAC9B,cAAc,EAAE,MAAM,CAAC;CACxB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,mBAAmB,CAAC,EAAE,CAAC,OAAO,EAAE,UAAU,KAAK,IAAI,CAAC;CACtD,CAAC;kCAE2D,aAAa,YAAY,CAAC;AAAvF,8BAAsB,GAAI,SAAQ,QAAsD;;IAEtF,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;gBAEX,YAAY,EAAE,eAAe;IAKzC,IAAI,YAAY,IAAI,eAAe,CAElC;IAED;;OAEG;IACH,QAAQ,CAAC,MAAM,IAAI,SAAS;IAEtB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAG7B;AAED,8BAAsB,SAAU,YAAW,qBAAqB,CAAC,QAAQ,CAAC;;IACxE,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IACpE,SAAS,CAAC,KAAK,kEAAyE;IACxF,SAAS,CAAC,MAAM,8BAAqC;IACrD,SAAS,CAAC,WAAW,EAAE,2BAA2B,CAAC,UAAU,GAAG,OAAO,SAAS,CAAC,cAAc,CAAC,CAAC;IACjG,SAAS,CAAC,WAAW,EAAE,2BAA2B,CAAC,UAAU,GAAG,OAAO,SAAS,CAAC,cAAc,CAAC,CAAC;IACjG,SAAS,CAAC,YAAY,EAAE,2BAA2B,CAAC,QAAQ,CAAC,CAAC;IAC9D,SAAS,CAAC,YAAY,EAAE,2BAA2B,CAAC,QAAQ,CAAC,CAAC;IAC9D,SAAS,CAAC,MAAM,UAAS;IACzB,SAAS,CAAC,WAAW,UAAS;IAI9B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,mBAAmB,CAAqC;IAEhE,OAAO,CAAC,aAAa,CAA2B;gBACpC,GAAG,EAAE,GAAG;IAgBpB;;;;;;OAMG;YACW,kBAAkB;cAgBhB,cAAc;IAuC9B;;;;OAIG;IACH,SAAS,CAAC,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,OAAO;IAahD,iBAAiB,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,CAAC;IAIzD,iBAAiB;IAIjB,kDAAkD;IAClD,SAAS,CAAC,KAAK,EAAE,UAAU;IAW3B,KAAK;IAUL,QAAQ;IAWF,IAAI,IAAI,OAAO,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAS/C,KAAK;IAOL,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,SAAS;CAGpC"}
|
package/dist/vad.js
CHANGED
package/dist/vad.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotalMs = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTimeMs: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotalMs,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotalMs = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n inferenceDurationTotalMs += Math.round(value.inferenceDuration);\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n /**\n * Safely send a VAD event to the output stream, handling writer release errors during shutdown.\n * @returns true if the event was sent, false if the stream is closing\n * @throws Error if an unexpected error occurs\n */\n protected sendVADEvent(event: VADEvent): boolean {\n if (this.closed) {\n return false;\n }\n\n try {\n this.outputWriter.write(event);\n return true;\n } catch (e) {\n throw e;\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAM7B,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAS,yBAAyB;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,aAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAMF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,kBAAgE;AAAA,EAC5E,SAAS,IAAI,kBAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,SAAS,IAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,uBAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,2BAA2B;AAC/B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,YAAY,KAAK;AAAA,gBACf,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,uCAA2B;AAAA,UAC7B;AACA;AAAA,QACF,KAAK;AACH,sCAA4B,KAAK,MAAM,MAAM,iBAAiB;AAC9D,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,QACF,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOU,aAAa,OAA0B;AAC/C,QAAI,KAAK,QAAQ;AACf,aAAO;AAAA,IACT;AAEA,QAAI;AACF,WAAK,aAAa,MAAM,KAAK;AAC7B,aAAO;AAAA,IACT,SAAS,GAAG;AACV,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
|
|
1
|
+
{"version":3,"sources":["../src/vad.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type {\n ReadableStream,\n ReadableStreamDefaultReader,\n WritableStreamDefaultWriter,\n} from 'node:stream/web';\nimport { log } from './log.js';\nimport type { VADMetrics } from './metrics/base.js';\nimport { DeferredReadableStream } from './stream/deferred_stream.js';\nimport { IdentityTransform } from './stream/identity_transform.js';\n\nexport enum VADEventType {\n START_OF_SPEECH,\n INFERENCE_DONE,\n END_OF_SPEECH,\n METRICS_COLLECTED,\n}\n\nexport interface VADEvent {\n /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */\n type: VADEventType;\n /**\n * Index of the audio sample where the event occurred, relative to the inference sample rate.\n */\n samplesIndex: number;\n /** Timestamp when the event was fired. */\n timestamp: number;\n /** Duration of the speech segment. */\n speechDuration: number;\n /** Duration of the silence segment. */\n silenceDuration: number;\n /**\n * List of audio frames associated with the speech.\n *\n * @remarks\n * - For `start_of_speech` events, this contains the audio chunks that triggered the detection.\n * - For `inference_done` events, this contains the audio chunks that were processed.\n * - For `end_of_speech` events, this contains the complete user speech.\n */\n frames: AudioFrame[];\n /** Probability that speech is present (only for `INFERENCE_DONE` events). */\n probability: number;\n /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */\n inferenceDuration: number;\n /** Indicates whether speech was detected in the frames. */\n speaking: boolean;\n /** Threshold used to detect silence. */\n rawAccumulatedSilence: number;\n /** Threshold used to detect speech. */\n rawAccumulatedSpeech: number;\n}\n\nexport interface VADCapabilities {\n updateInterval: number;\n}\n\nexport type VADCallbacks = {\n ['metrics_collected']: (metrics: VADMetrics) => void;\n};\n\nexport abstract class VAD extends (EventEmitter as new () => TypedEmitter<VADCallbacks>) {\n #capabilities: VADCapabilities;\n abstract label: string;\n\n constructor(capabilities: VADCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n get capabilities(): VADCapabilities {\n return this.#capabilities;\n }\n\n /**\n * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.\n */\n abstract stream(): VADStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\nexport abstract class VADStream implements AsyncIterableIterator<VADEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new IdentityTransform<AudioFrame | typeof VADStream.FLUSH_SENTINEL>();\n protected output = new IdentityTransform<VADEvent>();\n protected inputWriter: WritableStreamDefaultWriter<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected inputReader: ReadableStreamDefaultReader<AudioFrame | typeof VADStream.FLUSH_SENTINEL>;\n protected outputWriter: WritableStreamDefaultWriter<VADEvent>;\n protected outputReader: ReadableStreamDefaultReader<VADEvent>;\n protected closed = false;\n protected inputClosed = false;\n\n #vad: VAD;\n #lastActivityTime = BigInt(0);\n private logger = log();\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n\n private metricsStream: ReadableStream<VADEvent>;\n constructor(vad: VAD) {\n this.#vad = vad;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n\n this.inputWriter = this.input.writable.getWriter();\n this.inputReader = this.input.readable.getReader();\n this.outputWriter = this.output.writable.getWriter();\n\n const [outputStream, metricsStream] = this.output.readable.tee();\n this.metricsStream = metricsStream;\n this.outputReader = outputStream.getReader();\n\n this.pumpDeferredStream();\n this.monitorMetrics();\n }\n\n /**\n * Reads from the deferred input stream and forwards chunks to the input writer.\n *\n * Note: we can't just do this.deferredInputStream.stream.pipeTo(this.input.writable)\n * because the inputWriter locks the this.input.writable stream. All writes must go through\n * the inputWriter.\n */\n private async pumpDeferredStream() {\n const reader = this.deferredInputStream.stream.getReader();\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n await this.inputWriter.write(value);\n }\n } catch (e) {\n this.logger.error(`Error pumping deferred stream: ${e}`);\n throw e;\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n let inferenceDurationTotalMs = 0;\n let inferenceCount = 0;\n const metricsReader = this.metricsStream.getReader();\n while (true) {\n const { done, value } = await metricsReader.read();\n if (done) {\n break;\n }\n switch (value.type) {\n case VADEventType.START_OF_SPEECH:\n inferenceCount++;\n if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) {\n this.#vad.emit('metrics_collected', {\n type: 'vad_metrics',\n timestamp: Date.now(),\n idleTimeMs: Math.trunc(\n Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)),\n ),\n inferenceDurationTotalMs,\n inferenceCount,\n label: this.#vad.label,\n });\n\n inferenceCount = 0;\n inferenceDurationTotalMs = 0;\n }\n break;\n case VADEventType.INFERENCE_DONE:\n inferenceDurationTotalMs += Math.round(value.inferenceDuration);\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n case VADEventType.END_OF_SPEECH:\n this.#lastActivityTime = process.hrtime.bigint();\n break;\n }\n }\n }\n\n /**\n * Safely send a VAD event to the output stream, handling writer release errors during shutdown.\n * @returns true if the event was sent, false if the stream is closing\n * @throws Error if an unexpected error occurs\n */\n protected sendVADEvent(event: VADEvent): boolean {\n if (this.closed) {\n return false;\n }\n\n try {\n this.outputWriter.write(event);\n return true;\n } catch (e) {\n throw e;\n }\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** @deprecated Use `updateInputStream` instead */\n pushFrame(frame: AudioFrame) {\n // TODO(AJS-395): remove this method\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(frame);\n }\n\n flush() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputWriter.write(VADStream.FLUSH_SENTINEL);\n }\n\n endInput() {\n if (this.inputClosed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.inputClosed = true;\n this.input.writable.close();\n }\n\n async next(): Promise<IteratorResult<VADEvent>> {\n return this.outputReader.read().then(({ done, value }) => {\n if (done) {\n return { done: true, value: undefined };\n }\n return { done: false, value };\n });\n }\n\n close() {\n this.outputWriter.releaseLock();\n this.outputReader.cancel();\n this.output.writable.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): VADStream {\n return this;\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAM7B,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAS,yBAAyB;AAE3B,IAAK,eAAL,kBAAKA,kBAAL;AACL,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AACA,EAAAA,4BAAA;AAJU,SAAAA;AAAA,GAAA;AAiDL,MAAe,YAAa,aAAsD;AAAA,EACvF;AAAA,EAGA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA,EAOA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAEO,MAAe,UAAqD;AAAA,EACzE,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,kBAAgE;AAAA,EAC5E,SAAS,IAAI,kBAA4B;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,SAAS;AAAA,EACT,cAAc;AAAA,EAExB;AAAA,EACA,oBAAoB,OAAO,CAAC;AAAA,EACpB,SAAS,IAAI;AAAA,EACb;AAAA,EAEA;AAAA,EACR,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,sBAAsB,IAAI,uBAAmC;AAElE,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,cAAc,KAAK,MAAM,SAAS,UAAU;AACjD,SAAK,eAAe,KAAK,OAAO,SAAS,UAAU;AAEnD,UAAM,CAAC,cAAc,aAAa,IAAI,KAAK,OAAO,SAAS,IAAI;AAC/D,SAAK,gBAAgB;AACrB,SAAK,eAAe,aAAa,UAAU;AAE3C,SAAK,mBAAmB;AACxB,SAAK,eAAe;AAAA,EACtB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAc,qBAAqB;AACjC,UAAM,SAAS,KAAK,oBAAoB,OAAO,UAAU;AACzD,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,cAAM,KAAK,YAAY,MAAM,KAAK;AAAA,MACpC;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,kCAAkC,CAAC,EAAE;AACvD,YAAM;AAAA,IACR,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,QAAI,2BAA2B;AAC/B,QAAI,iBAAiB;AACrB,UAAM,gBAAgB,KAAK,cAAc,UAAU;AACnD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,MAAM,IAAI,MAAM,cAAc,KAAK;AACjD,UAAI,MAAM;AACR;AAAA,MACF;AACA,cAAQ,MAAM,MAAM;AAAA,QAClB,KAAK;AACH;AACA,cAAI,kBAAkB,IAAI,KAAK,KAAK,aAAa,gBAAgB;AAC/D,iBAAK,KAAK,KAAK,qBAAqB;AAAA,cAClC,MAAM;AAAA,cACN,WAAW,KAAK,IAAI;AAAA,cACpB,YAAY,KAAK;AAAA,gBACf,QAAQ,QAAQ,OAAO,OAAO,IAAI,KAAK,qBAAqB,OAAO,GAAO,CAAC;AAAA,cAC7E;AAAA,cACA;AAAA,cACA;AAAA,cACA,OAAO,KAAK,KAAK;AAAA,YACnB,CAAC;AAED,6BAAiB;AACjB,uCAA2B;AAAA,UAC7B;AACA;AAAA,QACF,KAAK;AACH,sCAA4B,KAAK,MAAM,MAAM,iBAAiB;AAC9D,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,QACF,KAAK;AACH,eAAK,oBAAoB,QAAQ,OAAO,OAAO;AAC/C;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOU,aAAa,OAA0B;AAC/C,QAAI,KAAK,QAAQ;AACf,aAAO;AAAA,IACT;AAEA,QAAI;AACF,WAAK,aAAa,MAAM,KAAK;AAC7B,aAAO;AAAA,IACT,SAAS,GAAG;AACV,YAAM;AAAA,IACR;AAAA,EACF;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAE3B,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,KAAK;AAAA,EAC9B;AAAA,EAEA,QAAQ;AACN,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,YAAY,MAAM,UAAU,cAAc;AAAA,EACjD;AAAA,EAEA,WAAW;AACT,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,cAAc;AACnB,SAAK,MAAM,SAAS,MAAM;AAAA,EAC5B;AAAA,EAEA,MAAM,OAA0C;AAC9C,WAAO,KAAK,aAAa,KAAK,EAAE,KAAK,CAAC,EAAE,MAAM,MAAM,MAAM;AACxD,UAAI,MAAM;AACR,eAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,MACxC;AACA,aAAO,EAAE,MAAM,OAAO,MAAM;AAAA,IAC9B,CAAC;AAAA,EACH;AAAA,EAEA,QAAQ;AACN,SAAK,aAAa,YAAY;AAC9B,SAAK,aAAa,OAAO;AACzB,SAAK,OAAO,SAAS,MAAM;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAe;AAClC,WAAO;AAAA,EACT;AACF;","names":["VADEventType"]}
|
|
@@ -1601,12 +1601,15 @@ ${instructions}` : instructions,
|
|
|
1601
1601
|
}
|
|
1602
1602
|
if (this.stt instanceof import_stt.STT) {
|
|
1603
1603
|
this.stt.off("metrics_collected", this.onMetricsCollected);
|
|
1604
|
+
await this.stt.close();
|
|
1604
1605
|
}
|
|
1605
1606
|
if (this.tts instanceof import_tts.TTS) {
|
|
1606
1607
|
this.tts.off("metrics_collected", this.onMetricsCollected);
|
|
1608
|
+
await this.tts.close();
|
|
1607
1609
|
}
|
|
1608
1610
|
if (this.vad instanceof import_vad.VAD) {
|
|
1609
1611
|
this.vad.off("metrics_collected", this.onMetricsCollected);
|
|
1612
|
+
await this.vad.close();
|
|
1610
1613
|
}
|
|
1611
1614
|
this.detachAudioInput();
|
|
1612
1615
|
await ((_a = this.realtimeSession) == null ? void 0 : _a.close());
|