@livekit/agents 1.0.0-next.0 → 1.0.0-next.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/llm/llm.cjs +1 -2
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +1 -2
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/utils.cjs +5 -4
- package/dist/llm/utils.cjs.map +1 -1
- package/dist/llm/utils.d.cts +2 -2
- package/dist/llm/utils.d.ts +2 -2
- package/dist/llm/utils.d.ts.map +1 -1
- package/dist/llm/utils.js +5 -4
- package/dist/llm/utils.js.map +1 -1
- package/dist/stream/deferred_stream.test.cjs +10 -10
- package/dist/stream/deferred_stream.test.cjs.map +1 -1
- package/dist/stream/deferred_stream.test.js +1 -1
- package/dist/stream/deferred_stream.test.js.map +1 -1
- package/dist/stt/stt.cjs +1 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +1 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/tts/tts.cjs +2 -3
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +1 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +19 -2
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +11 -0
- package/dist/utils.d.ts +11 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +17 -1
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +30 -31
- package/dist/utils.test.cjs.map +1 -1
- package/dist/utils.test.js +1 -1
- package/dist/utils.test.js.map +1 -1
- package/dist/voice/agent_session.cjs +3 -0
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +1 -0
- package/dist/voice/agent_session.d.ts +1 -0
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +3 -0
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +2 -3
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +1 -2
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/transcription/synchronizer.cjs +1 -2
- package/dist/voice/transcription/synchronizer.cjs.map +1 -1
- package/dist/voice/transcription/synchronizer.d.ts.map +1 -1
- package/dist/voice/transcription/synchronizer.js +1 -2
- package/dist/voice/transcription/synchronizer.js.map +1 -1
- package/package.json +1 -2
- package/src/llm/llm.ts +1 -2
- package/src/llm/utils.ts +9 -5
- package/src/stream/deferred_stream.test.ts +1 -1
- package/src/stt/stt.ts +1 -2
- package/src/tts/tts.ts +1 -2
- package/src/utils.test.ts +1 -1
- package/src/utils.ts +28 -1
- package/src/voice/agent_session.ts +4 -0
- package/src/voice/audio_recognition.ts +1 -2
- package/src/voice/transcription/synchronizer.ts +1 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport { delay } from '@std/async';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private lastSpeakingTime = 0;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.audioInterimTranscript = '';\n\n if (!this.speaking) {\n if (!this.vad) {\n // Copied from python agents:\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for _last_speaking_time\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.userTurnCommitted = true;\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask = (lastSpeakingTime: number) => async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n // TODO(AJS-74): need to support actual turn detection model plugins for following code to run\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),\n endOfUtteranceDelay: Date.now() - lastSpeakingTime,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n this.speaking = false;\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.lastSpeakingTime = Date.now() - ev.silenceDuration;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAC3B,SAAS,aAAa;AAEtB,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,YAAY;AACrB,SAAkC,oBAAoB;AAqC/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AAzH5C;AA0HI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,UAAU;AAClB,cAAI,CAAC,KAAK,KAAK;AAOb,iBAAK,mBAAmB,KAAK,IAAI;AAAA,UACnC;AAEA,cAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,oBAAoB;AAEzB,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AA1MhD;AA2MI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBAAgB,CAAC,qBAA6B,OAAO,eAAgC;AACzF,UAAI,mBAAmB,KAAK;AAG5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,YAAM,aAAa,mBAAmB,mBAAmB,KAAK,IAAI;AAElE,YAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAElE,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,oBAAoB,KAAK,IAAI,KAAK,0BAA0B,kBAAkB,CAAC;AAAA,QAC/E,qBAAqB,KAAK,IAAI,IAAI;AAAA,MACpC,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGA,eAAK,kBAAL,mBAAoB;AACpB,SAAK,gBAAgB,KAAK,KAAK,cAAc,KAAK,gBAAgB,CAAC;AAEnE,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA3VzE;AA4VI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAEhB,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAChC;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAC3B,iBAAK,WAAW;AAEhB,iBAAK,mBAAmB,KAAK,IAAI,IAAI,GAAG;AAExC,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AAvZlB;AAwZI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AApazC;AAqaI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AA9chB;AA+cI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/voice/audio_recognition.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { AudioFrame } from '@livekit/rtc-node';\nimport type { WritableStreamDefaultWriter } from 'node:stream/web';\nimport { ReadableStream } from 'node:stream/web';\nimport { type ChatContext } from '../llm/chat_context.js';\nimport { log } from '../log.js';\nimport { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';\nimport { IdentityTransform } from '../stream/identity_transform.js';\nimport { mergeReadableStreams } from '../stream/merge_readable_streams.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { Task, delay } from '../utils.js';\nimport { type VAD, type VADEvent, VADEventType } from '../vad.js';\nimport type { TurnDetectionMode } from './agent_session.js';\nimport type { STTNode } from './io.js';\n\nexport interface EndOfTurnInfo {\n newTranscript: string;\n transcriptionDelay: number;\n endOfUtteranceDelay: number;\n}\n\nexport interface RecognitionHooks {\n onStartOfSpeech: (ev: VADEvent) => void;\n onVADInferenceDone: (ev: VADEvent) => void;\n onEndOfSpeech: (ev: VADEvent) => void;\n onInterimTranscript: (ev: SpeechEvent) => void;\n onFinalTranscript: (ev: SpeechEvent) => void;\n onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;\n\n retrieveChatCtx: () => ChatContext;\n}\n\nexport interface _TurnDetector {\n unlikelyThreshold: (language?: string) => Promise<number | undefined>;\n supportsLanguage: (language?: string) => Promise<boolean>;\n predictEndOfTurn(chatCtx: ChatContext): Promise<number>;\n}\n\nexport interface AudioRecognitionOptions {\n recognitionHooks: RecognitionHooks;\n stt?: STTNode;\n vad?: VAD;\n turnDetector?: _TurnDetector;\n turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n}\n\nexport class AudioRecognition {\n private hooks: RecognitionHooks;\n private stt?: STTNode;\n private vad?: VAD;\n private turnDetector?: _TurnDetector;\n private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;\n private minEndpointingDelay: number;\n private maxEndpointingDelay: number;\n private lastLanguage?: string;\n\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private lastFinalTranscriptTime = 0;\n private audioTranscript = '';\n private audioInterimTranscript = '';\n private lastSpeakingTime = 0;\n private userTurnCommitted = false;\n private speaking = false;\n private sampleRate?: number;\n\n private vadInputStream: ReadableStream<AudioFrame>;\n private sttInputStream: ReadableStream<AudioFrame>;\n private silenceAudioTransform = new IdentityTransform<AudioFrame>();\n private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;\n\n // all cancellable tasks\n private bounceEOUTask?: Task<void>;\n private commitUserTurnTask?: Task<void>;\n private vadTask?: Task<void>;\n private sttTask?: Task<void>;\n\n constructor(opts: AudioRecognitionOptions) {\n this.hooks = opts.recognitionHooks;\n this.stt = opts.stt;\n this.vad = opts.vad;\n this.turnDetector = opts.turnDetector;\n this.turnDetectionMode = opts.turnDetectionMode;\n this.minEndpointingDelay = opts.minEndpointingDelay;\n this.maxEndpointingDelay = opts.maxEndpointingDelay;\n this.lastLanguage = undefined;\n\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();\n this.vadInputStream = vadInputStream;\n this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);\n this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();\n }\n\n /**\n * Current transcript of the user's speech, including interim transcript if available.\n */\n get currentTranscript(): string {\n if (this.audioInterimTranscript) {\n return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n return this.audioTranscript;\n }\n\n async start() {\n this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));\n this.vadTask.result.catch((err) => {\n this.logger.error(`Error running VAD task: ${err}`);\n });\n\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n }\n\n private async onSTTEvent(ev: SpeechEvent) {\n if (\n this.turnDetectionMode === 'manual' &&\n this.userTurnCommitted &&\n (this.bounceEOUTask === undefined ||\n this.bounceEOUTask.done ||\n ev.type == SpeechEventType.INTERIM_TRANSCRIPT)\n ) {\n // ignore stt event if user turn already committed and EOU task is done\n // or it's an interim transcript\n this.logger.debug(\n {\n userTurnCommitted: this.userTurnCommitted,\n eouTaskDone: this.bounceEOUTask?.done,\n evType: ev.type,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'ignoring stt event',\n );\n return;\n }\n\n switch (ev.type) {\n case SpeechEventType.FINAL_TRANSCRIPT:\n this.hooks.onFinalTranscript(ev);\n const transcript = ev.alternatives?.[0]?.text;\n this.lastLanguage = ev.alternatives?.[0]?.language;\n\n if (!transcript) {\n // stt final transcript received but no transcript\n return;\n }\n\n this.logger.debug(\n {\n user_transcript: transcript,\n language: this.lastLanguage,\n },\n 'received user transcript',\n );\n\n this.lastFinalTranscriptTime = Date.now();\n this.audioTranscript += ` ${transcript}`;\n this.audioTranscript = this.audioTranscript.trimStart();\n this.audioInterimTranscript = '';\n\n if (!this.speaking) {\n if (!this.vad) {\n // Copied from python agents:\n // vad disabled, use stt timestamp\n // TODO: this would screw up transcription latency metrics\n // but we'll live with it for now.\n // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH\n // and using that timestamp for _last_speaking_time\n this.lastSpeakingTime = Date.now();\n }\n\n if (this.vadBaseTurnDetection || this.userTurnCommitted) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');\n this.runEOUDetection(chatCtx);\n }\n }\n break;\n case SpeechEventType.INTERIM_TRANSCRIPT:\n this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');\n this.hooks.onInterimTranscript(ev);\n this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';\n break;\n case SpeechEventType.END_OF_SPEECH:\n if (this.turnDetectionMode !== 'stt') break;\n this.userTurnCommitted = true;\n\n if (!this.speaking) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on stt END_OF_SPEECH');\n this.runEOUDetection(chatCtx);\n }\n }\n }\n\n private runEOUDetection(chatCtx: ChatContext) {\n this.logger.debug(\n {\n stt: this.stt,\n audioTranscript: this.audioTranscript,\n turnDetectionMode: this.turnDetectionMode,\n },\n 'running EOU detection',\n );\n\n if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {\n // stt enabled but no transcript yet\n this.logger.debug('skipping EOU detection');\n return;\n }\n\n chatCtx = chatCtx.copy();\n chatCtx.addMessage({ role: 'user', content: this.audioTranscript });\n\n const turnDetector =\n // disable EOU model if manual turn detection enabled\n this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;\n\n const bounceEOUTask = (lastSpeakingTime: number) => async (controller: AbortController) => {\n let endpointingDelay = this.minEndpointingDelay;\n\n // TODO(AJS-74): need to support actual turn detection model plugins for following code to run\n if (turnDetector) {\n this.logger.debug('Running turn detector model');\n if (!turnDetector.supportsLanguage(this.lastLanguage)) {\n this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);\n } else {\n const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);\n this.logger.debug(\n { endOfTurnProbability, language: this.lastLanguage },\n 'end of turn probability',\n );\n\n const unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);\n this.logger.debug(\n {\n unlikelyThreshold,\n endOfTurnProbability,\n language: this.lastLanguage,\n transcript: this.audioTranscript,\n },\n 'EOU Detection',\n );\n\n if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {\n endpointingDelay = this.maxEndpointingDelay;\n }\n }\n }\n\n const extraSleep = lastSpeakingTime + endpointingDelay - Date.now();\n // add delay to see if there's a potential upcoming EOU task that cancels this one\n await delay(Math.max(extraSleep, 0), { signal: controller.signal });\n\n this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');\n\n const committed = await this.hooks.onEndOfTurn({\n newTranscript: this.audioTranscript,\n transcriptionDelay: Math.max(this.lastFinalTranscriptTime - lastSpeakingTime, 0),\n endOfUtteranceDelay: Date.now() - lastSpeakingTime,\n });\n\n if (committed) {\n // clear the transcript if the user turn was committed\n this.audioTranscript = '';\n }\n\n this.userTurnCommitted = false;\n };\n\n // cancel any existing EOU task\n this.bounceEOUTask?.cancel();\n this.bounceEOUTask = Task.from(bounceEOUTask(this.lastSpeakingTime));\n\n this.bounceEOUTask.result\n .then(() => {\n this.logger.debug('EOU detection task completed');\n })\n .catch((err: unknown) => {\n if (err instanceof Error && err.message.includes('This operation was aborted')) {\n // ignore aborted errors\n return;\n }\n this.logger.error(err, 'Error in EOU detection task:');\n });\n }\n\n private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {\n if (!stt) return;\n\n this.logger.debug('createSttTask: create stt stream from stt node');\n\n const sttStream = await stt(this.sttInputStream, {});\n\n if (signal.aborted || sttStream === null) return;\n\n if (sttStream instanceof ReadableStream) {\n const reader = sttStream.getReader();\n\n signal.addEventListener('abort', async () => {\n try {\n reader.releaseLock();\n await sttStream?.cancel();\n } catch (e) {\n this.logger.debug('createSttTask: error during abort handler:', e);\n }\n });\n\n try {\n while (true) {\n if (signal.aborted) break;\n\n const { done, value: ev } = await reader.read();\n if (done) break;\n\n if (typeof ev === 'string') {\n throw new Error('STT node must yield SpeechEvent');\n } else {\n await this.onSTTEvent(ev);\n }\n }\n } catch (e) {\n if (isStreamReaderReleaseError(e)) {\n return;\n }\n this.logger.error({ error: e }, 'createSttTask: error reading sttStream');\n } finally {\n reader.releaseLock();\n try {\n await sttStream.cancel();\n } catch (e) {\n this.logger.debug(\n 'createSttTask: error cancelling sttStream (may already be cancelled):',\n e,\n );\n }\n }\n }\n }\n\n private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {\n if (!vad) return;\n\n const vadStream = vad.stream();\n vadStream.updateInputStream(this.vadInputStream);\n\n const abortHandler = () => {\n vadStream.detachInputStream();\n vadStream.close();\n signal.removeEventListener('abort', abortHandler);\n };\n signal.addEventListener('abort', abortHandler);\n\n try {\n for await (const ev of vadStream) {\n if (signal.aborted) break;\n\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.logger.debug('VAD task: START_OF_SPEECH');\n this.hooks.onStartOfSpeech(ev);\n this.speaking = true;\n\n this.bounceEOUTask?.cancel();\n break;\n case VADEventType.INFERENCE_DONE:\n this.hooks.onVADInferenceDone(ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.logger.debug('VAD task: END_OF_SPEECH');\n this.hooks.onEndOfSpeech(ev);\n this.speaking = false;\n // when VAD fires END_OF_SPEECH, it already waited for the silence_duration\n this.lastSpeakingTime = Date.now() - ev.silenceDuration;\n\n if (\n this.vadBaseTurnDetection ||\n (this.turnDetectionMode === 'stt' && this.userTurnCommitted)\n ) {\n const chatCtx = this.hooks.retrieveChatCtx();\n this.runEOUDetection(chatCtx);\n }\n break;\n }\n }\n } catch (e) {\n this.logger.error(e, 'Error in VAD task');\n } finally {\n this.logger.debug('VAD task closed');\n }\n }\n\n setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputAudioStream() {\n this.deferredInputStream.detachSource();\n }\n\n clearUserTurn() {\n this.audioTranscript = '';\n this.audioInterimTranscript = '';\n this.userTurnCommitted = false;\n\n this.sttTask?.cancelAndWait().finally(() => {\n this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));\n this.sttTask.result.catch((err) => {\n this.logger.error(`Error running STT task: ${err}`);\n });\n });\n }\n\n commitUserTurn(audioDetached: boolean) {\n const commitUserTurnTask =\n (delayDuration: number = 500) =>\n async (controller: AbortController) => {\n if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {\n // flush the stt by pushing silence\n if (audioDetached && this.sampleRate !== undefined) {\n const numSamples = Math.floor(this.sampleRate * 0.5);\n const silence = new Int16Array(numSamples * 2);\n const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);\n this.silenceAudioWriter.write(silenceFrame);\n }\n\n // wait for the final transcript to be available\n await delay(delayDuration, { signal: controller.signal });\n }\n\n if (this.audioInterimTranscript) {\n // append interim transcript in case the final transcript is not ready\n this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();\n }\n this.audioInterimTranscript = '';\n\n const chatCtx = this.hooks.retrieveChatCtx();\n this.logger.debug('running EOU detection on commitUserTurn');\n this.runEOUDetection(chatCtx);\n this.userTurnCommitted = true;\n };\n\n // cancel any existing commit user turn task\n this.commitUserTurnTask?.cancel();\n this.commitUserTurnTask = Task.from(commitUserTurnTask());\n\n this.commitUserTurnTask.result\n .then(() => {\n this.logger.debug('User turn committed');\n })\n .catch((err: unknown) => {\n this.logger.error(err, 'Error in user turn commit task:');\n });\n }\n\n async close() {\n this.detachInputAudioStream();\n await this.commitUserTurnTask?.cancelAndWait();\n await this.sttTask?.cancelAndWait();\n await this.vadTask?.cancelAndWait();\n await this.bounceEOUTask?.cancelAndWait();\n }\n\n private get vadBaseTurnDetection() {\n return ['vad', undefined].includes(this.turnDetectionMode);\n }\n}\n"],"mappings":"AAGA,SAAS,kBAAkB;AAE3B,SAAS,sBAAsB;AAC/B,eAAiC;AACjC,SAAS,WAAW;AACpB,SAAS,wBAAwB,kCAAkC;AACnE,SAAS,yBAAyB;AAClC,SAAS,4BAA4B;AACrC,SAA2B,uBAAuB;AAClD,SAAS,MAAM,aAAa;AAC5B,SAAkC,oBAAoB;AAqC/C,MAAM,iBAAiB;AAAA,EACpB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA;AAAA,EACA,SAAS,IAAI;AAAA,EACb,0BAA0B;AAAA,EAC1B,kBAAkB;AAAA,EAClB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,WAAW;AAAA,EACX;AAAA,EAEA;AAAA,EACA;AAAA,EACA,wBAAwB,IAAI,kBAA8B;AAAA,EAC1D;AAAA;AAAA,EAGA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,MAA+B;AACzC,SAAK,QAAQ,KAAK;AAClB,SAAK,MAAM,KAAK;AAChB,SAAK,MAAM,KAAK;AAChB,SAAK,eAAe,KAAK;AACzB,SAAK,oBAAoB,KAAK;AAC9B,SAAK,sBAAsB,KAAK;AAChC,SAAK,sBAAsB,KAAK;AAChC,SAAK,eAAe;AAEpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,UAAM,CAAC,gBAAgB,cAAc,IAAI,KAAK,oBAAoB,OAAO,IAAI;AAC7E,SAAK,iBAAiB;AACtB,SAAK,iBAAiB,qBAAqB,gBAAgB,KAAK,sBAAsB,QAAQ;AAC9F,SAAK,qBAAqB,KAAK,sBAAsB,SAAS,UAAU;AAAA,EAC1E;AAAA;AAAA;AAAA;AAAA,EAKA,IAAI,oBAA4B;AAC9B,QAAI,KAAK,wBAAwB;AAC/B,aAAO,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,IACvE;AACA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAED,SAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,SAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,WAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,IACpD,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,WAAW,IAAiB;AAxH5C;AAyHI,QACE,KAAK,sBAAsB,YAC3B,KAAK,sBACJ,KAAK,kBAAkB,UACtB,KAAK,cAAc,QACnB,GAAG,QAAQ,gBAAgB,qBAC7B;AAGA,WAAK,OAAO;AAAA,QACV;AAAA,UACE,mBAAmB,KAAK;AAAA,UACxB,cAAa,UAAK,kBAAL,mBAAoB;AAAA,UACjC,QAAQ,GAAG;AAAA,UACX,mBAAmB,KAAK;AAAA,QAC1B;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF;AAEA,YAAQ,GAAG,MAAM;AAAA,MACf,KAAK,gBAAgB;AACnB,aAAK,MAAM,kBAAkB,EAAE;AAC/B,cAAM,cAAa,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AACzC,aAAK,gBAAe,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB;AAE1C,YAAI,CAAC,YAAY;AAEf;AAAA,QACF;AAEA,aAAK,OAAO;AAAA,UACV;AAAA,YACE,iBAAiB;AAAA,YACjB,UAAU,KAAK;AAAA,UACjB;AAAA,UACA;AAAA,QACF;AAEA,aAAK,0BAA0B,KAAK,IAAI;AACxC,aAAK,mBAAmB,IAAI,UAAU;AACtC,aAAK,kBAAkB,KAAK,gBAAgB,UAAU;AACtD,aAAK,yBAAyB;AAE9B,YAAI,CAAC,KAAK,UAAU;AAClB,cAAI,CAAC,KAAK,KAAK;AAOb,iBAAK,mBAAmB,KAAK,IAAI;AAAA,UACnC;AAEA,cAAI,KAAK,wBAAwB,KAAK,mBAAmB;AACvD,kBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,iBAAK,OAAO,MAAM,+CAA+C;AACjE,iBAAK,gBAAgB,OAAO;AAAA,UAC9B;AAAA,QACF;AACA;AAAA,MACF,KAAK,gBAAgB;AACnB,aAAK,OAAO,MAAM,EAAE,aAAY,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,KAAK,GAAG,oBAAoB;AAClF,aAAK,MAAM,oBAAoB,EAAE;AACjC,aAAK,2BAAyB,cAAG,iBAAH,mBAAkB,OAAlB,mBAAsB,SAAQ;AAC5D;AAAA,MACF,KAAK,gBAAgB;AACnB,YAAI,KAAK,sBAAsB,MAAO;AACtC,aAAK,oBAAoB;AAEzB,YAAI,CAAC,KAAK,UAAU;AAClB,gBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,eAAK,OAAO,MAAM,4CAA4C;AAC9D,eAAK,gBAAgB,OAAO;AAAA,QAC9B;AAAA,IACJ;AAAA,EACF;AAAA,EAEQ,gBAAgB,SAAsB;AAzMhD;AA0MI,SAAK,OAAO;AAAA,MACV;AAAA,QACE,KAAK,KAAK;AAAA,QACV,iBAAiB,KAAK;AAAA,QACtB,mBAAmB,KAAK;AAAA,MAC1B;AAAA,MACA;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,CAAC,KAAK,mBAAmB,KAAK,sBAAsB,UAAU;AAE5E,WAAK,OAAO,MAAM,wBAAwB;AAC1C;AAAA,IACF;AAEA,cAAU,QAAQ,KAAK;AACvB,YAAQ,WAAW,EAAE,MAAM,QAAQ,SAAS,KAAK,gBAAgB,CAAC;AAElE,UAAM;AAAA;AAAA,MAEJ,KAAK,mBAAmB,KAAK,sBAAsB,WAAW,KAAK,eAAe;AAAA;AAEpF,UAAM,gBAAgB,CAAC,qBAA6B,OAAO,eAAgC;AACzF,UAAI,mBAAmB,KAAK;AAG5B,UAAI,cAAc;AAChB,aAAK,OAAO,MAAM,6BAA6B;AAC/C,YAAI,CAAC,aAAa,iBAAiB,KAAK,YAAY,GAAG;AACrD,eAAK,OAAO,MAAM,2CAA2C,KAAK,YAAY,EAAE;AAAA,QAClF,OAAO;AACL,gBAAM,uBAAuB,MAAM,aAAa,iBAAiB,OAAO;AACxE,eAAK,OAAO;AAAA,YACV,EAAE,sBAAsB,UAAU,KAAK,aAAa;AAAA,YACpD;AAAA,UACF;AAEA,gBAAM,oBAAoB,MAAM,aAAa,kBAAkB,KAAK,YAAY;AAChF,eAAK,OAAO;AAAA,YACV;AAAA,cACE;AAAA,cACA;AAAA,cACA,UAAU,KAAK;AAAA,cACf,YAAY,KAAK;AAAA,YACnB;AAAA,YACA;AAAA,UACF;AAEA,cAAI,qBAAqB,uBAAuB,mBAAmB;AACjE,+BAAmB,KAAK;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAEA,YAAM,aAAa,mBAAmB,mBAAmB,KAAK,IAAI;AAElE,YAAM,MAAM,KAAK,IAAI,YAAY,CAAC,GAAG,EAAE,QAAQ,WAAW,OAAO,CAAC;AAElE,WAAK,OAAO,MAAM,EAAE,YAAY,KAAK,gBAAgB,GAAG,kBAAkB;AAE1E,YAAM,YAAY,MAAM,KAAK,MAAM,YAAY;AAAA,QAC7C,eAAe,KAAK;AAAA,QACpB,oBAAoB,KAAK,IAAI,KAAK,0BAA0B,kBAAkB,CAAC;AAAA,QAC/E,qBAAqB,KAAK,IAAI,IAAI;AAAA,MACpC,CAAC;AAED,UAAI,WAAW;AAEb,aAAK,kBAAkB;AAAA,MACzB;AAEA,WAAK,oBAAoB;AAAA,IAC3B;AAGA,eAAK,kBAAL,mBAAoB;AACpB,SAAK,gBAAgB,KAAK,KAAK,cAAc,KAAK,gBAAgB,CAAC;AAEnE,SAAK,cAAc,OAChB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,8BAA8B;AAAA,IAClD,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,UAAI,eAAe,SAAS,IAAI,QAAQ,SAAS,4BAA4B,GAAG;AAE9E;AAAA,MACF;AACA,WAAK,OAAO,MAAM,KAAK,8BAA8B;AAAA,IACvD,CAAC;AAAA,EACL;AAAA,EAEA,MAAc,cAAc,KAA0B,QAAqB;AACzE,QAAI,CAAC,IAAK;AAEV,SAAK,OAAO,MAAM,gDAAgD;AAElE,UAAM,YAAY,MAAM,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAEnD,QAAI,OAAO,WAAW,cAAc,KAAM;AAE1C,QAAI,qBAAqB,gBAAgB;AACvC,YAAM,SAAS,UAAU,UAAU;AAEnC,aAAO,iBAAiB,SAAS,YAAY;AAC3C,YAAI;AACF,iBAAO,YAAY;AACnB,iBAAM,uCAAW;AAAA,QACnB,SAAS,GAAG;AACV,eAAK,OAAO,MAAM,8CAA8C,CAAC;AAAA,QACnE;AAAA,MACF,CAAC;AAED,UAAI;AACF,eAAO,MAAM;AACX,cAAI,OAAO,QAAS;AAEpB,gBAAM,EAAE,MAAM,OAAO,GAAG,IAAI,MAAM,OAAO,KAAK;AAC9C,cAAI,KAAM;AAEV,cAAI,OAAO,OAAO,UAAU;AAC1B,kBAAM,IAAI,MAAM,iCAAiC;AAAA,UACnD,OAAO;AACL,kBAAM,KAAK,WAAW,EAAE;AAAA,UAC1B;AAAA,QACF;AAAA,MACF,SAAS,GAAG;AACV,YAAI,2BAA2B,CAAC,GAAG;AACjC;AAAA,QACF;AACA,aAAK,OAAO,MAAM,EAAE,OAAO,EAAE,GAAG,wCAAwC;AAAA,MAC1E,UAAE;AACA,eAAO,YAAY;AACnB,YAAI;AACF,gBAAM,UAAU,OAAO;AAAA,QACzB,SAAS,GAAG;AACV,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,cAAc,KAAsB,QAAqB;AA1VzE;AA2VI,QAAI,CAAC,IAAK;AAEV,UAAM,YAAY,IAAI,OAAO;AAC7B,cAAU,kBAAkB,KAAK,cAAc;AAE/C,UAAM,eAAe,MAAM;AACzB,gBAAU,kBAAkB;AAC5B,gBAAU,MAAM;AAChB,aAAO,oBAAoB,SAAS,YAAY;AAAA,IAClD;AACA,WAAO,iBAAiB,SAAS,YAAY;AAE7C,QAAI;AACF,uBAAiB,MAAM,WAAW;AAChC,YAAI,OAAO,QAAS;AAEpB,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,2BAA2B;AAC7C,iBAAK,MAAM,gBAAgB,EAAE;AAC7B,iBAAK,WAAW;AAEhB,uBAAK,kBAAL,mBAAoB;AACpB;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,MAAM,mBAAmB,EAAE;AAChC;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,MAAM,yBAAyB;AAC3C,iBAAK,MAAM,cAAc,EAAE;AAC3B,iBAAK,WAAW;AAEhB,iBAAK,mBAAmB,KAAK,IAAI,IAAI,GAAG;AAExC,gBACE,KAAK,wBACJ,KAAK,sBAAsB,SAAS,KAAK,mBAC1C;AACA,oBAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,mBAAK,gBAAgB,OAAO;AAAA,YAC9B;AACA;AAAA,QACJ;AAAA,MACF;AAAA,IACF,SAAS,GAAG;AACV,WAAK,OAAO,MAAM,GAAG,mBAAmB;AAAA,IAC1C,UAAE;AACA,WAAK,OAAO,MAAM,iBAAiB;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,oBAAoB,aAAyC;AAC3D,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,yBAAyB;AACvB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA,EAEA,gBAAgB;AAtZlB;AAuZI,SAAK,kBAAkB;AACvB,SAAK,yBAAyB;AAC9B,SAAK,oBAAoB;AAEzB,eAAK,YAAL,mBAAc,gBAAgB,QAAQ,MAAM;AAC1C,WAAK,UAAU,KAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,cAAc,KAAK,KAAK,MAAM,CAAC;AAC7E,WAAK,QAAQ,OAAO,MAAM,CAAC,QAAQ;AACjC,aAAK,OAAO,MAAM,2BAA2B,GAAG,EAAE;AAAA,MACpD,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,eAAe,eAAwB;AAnazC;AAoaI,UAAM,qBACJ,CAAC,gBAAwB,QACzB,OAAO,eAAgC;AACrC,UAAI,KAAK,IAAI,IAAI,KAAK,0BAA0B,eAAe;AAE7D,YAAI,iBAAiB,KAAK,eAAe,QAAW;AAClD,gBAAM,aAAa,KAAK,MAAM,KAAK,aAAa,GAAG;AACnD,gBAAM,UAAU,IAAI,WAAW,aAAa,CAAC;AAC7C,gBAAM,eAAe,IAAI,WAAW,SAAS,KAAK,YAAY,GAAG,UAAU;AAC3E,eAAK,mBAAmB,MAAM,YAAY;AAAA,QAC5C;AAGA,cAAM,MAAM,eAAe,EAAE,QAAQ,WAAW,OAAO,CAAC;AAAA,MAC1D;AAEA,UAAI,KAAK,wBAAwB;AAE/B,aAAK,kBAAkB,GAAG,KAAK,eAAe,IAAI,KAAK,sBAAsB,GAAG,KAAK;AAAA,MACvF;AACA,WAAK,yBAAyB;AAE9B,YAAM,UAAU,KAAK,MAAM,gBAAgB;AAC3C,WAAK,OAAO,MAAM,yCAAyC;AAC3D,WAAK,gBAAgB,OAAO;AAC5B,WAAK,oBAAoB;AAAA,IAC3B;AAGF,eAAK,uBAAL,mBAAyB;AACzB,SAAK,qBAAqB,KAAK,KAAK,mBAAmB,CAAC;AAExD,SAAK,mBAAmB,OACrB,KAAK,MAAM;AACV,WAAK,OAAO,MAAM,qBAAqB;AAAA,IACzC,CAAC,EACA,MAAM,CAAC,QAAiB;AACvB,WAAK,OAAO,MAAM,KAAK,iCAAiC;AAAA,IAC1D,CAAC;AAAA,EACL;AAAA,EAEA,MAAM,QAAQ;AA7chB;AA8cI,SAAK,uBAAuB;AAC5B,YAAM,UAAK,uBAAL,mBAAyB;AAC/B,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,YAAL,mBAAc;AACpB,YAAM,UAAK,kBAAL,mBAAoB;AAAA,EAC5B;AAAA,EAEA,IAAY,uBAAuB;AACjC,WAAO,CAAC,OAAO,MAAS,EAAE,SAAS,KAAK,iBAAiB;AAAA,EAC3D;AACF;","names":[]}
|
|
@@ -22,7 +22,6 @@ __export(synchronizer_exports, {
|
|
|
22
22
|
defaultTextSyncOptions: () => defaultTextSyncOptions
|
|
23
23
|
});
|
|
24
24
|
module.exports = __toCommonJS(synchronizer_exports);
|
|
25
|
-
var import_async = require("@std/async");
|
|
26
25
|
var import_log = require("../../log.cjs");
|
|
27
26
|
var import_identity_transform = require("../../stream/identity_transform.cjs");
|
|
28
27
|
var import_tokenize = require("../../tokenize/index.cjs");
|
|
@@ -194,7 +193,7 @@ class SegmentSynchronizerImpl {
|
|
|
194
193
|
if (this.closed) {
|
|
195
194
|
return;
|
|
196
195
|
}
|
|
197
|
-
await (0,
|
|
196
|
+
await (0, import_utils.delay)(sleepTimeSeconds * 1e3);
|
|
198
197
|
}
|
|
199
198
|
async close() {
|
|
200
199
|
if (this.closed) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/voice/transcription/synchronizer.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { delay } from '@std/async';\nimport type { ReadableStream, WritableStreamDefaultWriter } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { IdentityTransform } from '../../stream/identity_transform.js';\nimport type { SentenceStream, SentenceTokenizer } from '../../tokenize/index.js';\nimport { basic } from '../../tokenize/index.js';\nimport { Future, Task } from '../../utils.js';\nimport { AudioOutput, type PlaybackFinishedEvent, TextOutput } from '../io.js';\n\nconst STANDARD_SPEECH_RATE = 3.83; // hyphens (syllables) per second\n\ninterface TextSyncOptions {\n speed: number;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n sentenceTokenizer: SentenceTokenizer;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedText: string;\n}\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\nclass SegmentSynchronizerImpl {\n private textData: TextData;\n private audioData: AudioData;\n private speed: number;\n private outputStream: IdentityTransform<string>;\n private outputStreamWriter: WritableStreamDefaultWriter<string>;\n private captureTask: Promise<void>;\n private startWallTime?: number;\n\n private startFuture: Future = new Future();\n private closedFuture: Future = new Future();\n private playbackCompleted: boolean = false;\n\n private logger = log();\n\n constructor(\n private readonly options: TextSyncOptions,\n private readonly nextInChain: TextOutput,\n ) {\n this.speed = options.speed * STANDARD_SPEECH_RATE; // hyphens per second\n this.textData = {\n sentenceStream: options.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedText: '',\n };\n this.audioData = {\n pushedDuration: 0,\n done: false,\n };\n this.outputStream = new IdentityTransform();\n this.outputStreamWriter = this.outputStream.writable.getWriter();\n\n this.mainTask()\n .then(() => {\n this.outputStreamWriter.close();\n })\n .catch((error) => {\n this.logger.error({ error }, 'mainTask SegmentSynchronizerImpl');\n });\n this.captureTask = this.captureTaskImpl();\n }\n\n get closed() {\n return this.closedFuture.done;\n }\n\n get audioInputEnded() {\n return this.audioData.done;\n }\n\n get textInputEnded() {\n return this.textData.done;\n }\n\n get readable(): ReadableStream<string> {\n return this.outputStream.readable;\n }\n\n pushAudio(frame: AudioFrame) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.pushAudio called after close');\n return;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n\n if (!this.startWallTime && frameDuration > 0) {\n this.startWallTime = Date.now();\n this.startFuture.resolve();\n }\n\n this.audioData.pushedDuration += frameDuration;\n }\n\n endAudioInput() {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.endAudioInput called after close');\n return;\n }\n\n this.audioData.done = true;\n }\n\n pushText(text: string) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.pushText called after close');\n return;\n }\n\n this.textData.sentenceStream.pushText(text);\n this.textData.pushedText += text;\n }\n\n endTextInput() {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.endTextInput called after close');\n return;\n }\n\n this.textData.done = true;\n this.textData.sentenceStream.endInput();\n }\n\n markPlaybackFinished(_playbackPosition: number, interrupted: boolean) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.markPlaybackFinished called after close');\n return;\n }\n\n if (!this.textData.done || !this.audioData.done) {\n this.logger.warn(\n { textDone: this.textData.done, audioDone: this.audioData.done },\n 'SegmentSynchronizerImpl.markPlaybackFinished called before text/audio input is done',\n );\n return;\n }\n\n if (!interrupted) {\n this.playbackCompleted = true;\n }\n }\n\n get synchronizedTranscript(): string {\n if (this.playbackCompleted) {\n return this.textData.pushedText;\n }\n return this.textData.forwardedText;\n }\n\n private async captureTaskImpl() {\n // Don't use a for-await loop here, because exiting the loop will close the writer in the\n // outputStream, which will cause an error in the mainTask.then method.\n const reader = this.outputStream.readable.getReader();\n while (true) {\n const { done, value: text } = await reader.read();\n if (done) {\n break;\n }\n this.textData.forwardedText += text;\n await this.nextInChain.captureText(text);\n }\n reader.releaseLock();\n this.nextInChain.flush();\n }\n\n private async mainTask(): Promise<void> {\n await this.startFuture.await;\n\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n if (!this.startWallTime) {\n throw new Error('startWallTime is not set when starting SegmentSynchronizerImpl.mainTask');\n }\n\n for await (const textSegment of this.textData.sentenceStream) {\n const sentence = textSegment.token;\n\n let textCursor = 0;\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n for (const [word, _, endPos] of this.options.splitWords(sentence)) {\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n if (this.playbackCompleted) {\n this.outputStreamWriter.write(sentence.slice(textCursor, endPos));\n textCursor = endPos;\n continue;\n }\n\n const wordHphens = this.options.hyphenateWord(word).length;\n const elapsedSeconds = (Date.now() - this.startWallTime) / 1000;\n const targetHyphens = elapsedSeconds * this.options.speed;\n const hyphensBehind = Math.max(0, targetHyphens - this.textData.forwardedHyphens);\n let delay = Math.max(0, wordHphens - hyphensBehind) / this.speed;\n\n if (this.playbackCompleted) {\n delay = 0;\n }\n\n await this.sleepIfNotClosed(delay / 2);\n this.outputStreamWriter.write(sentence.slice(textCursor, endPos));\n await this.sleepIfNotClosed(delay / 2);\n\n this.textData.forwardedHyphens += wordHphens;\n textCursor = endPos;\n }\n\n if (textCursor < sentence.length) {\n const remaining = sentence.slice(textCursor);\n this.outputStreamWriter.write(remaining);\n }\n }\n }\n\n private async sleepIfNotClosed(sleepTimeSeconds: number) {\n if (this.closed) {\n return;\n }\n await delay(sleepTimeSeconds * 1000);\n }\n\n async close(): Promise<void> {\n if (this.closed) {\n return;\n }\n this.closedFuture.resolve();\n this.startFuture.resolve(); // avoid deadlock of mainTaskImpl in case it never started\n this.textData.sentenceStream.close();\n await this.captureTask;\n }\n}\n\nexport interface TranscriptionSynchronizerOptions {\n speed: number;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n sentenceTokenizer: SentenceTokenizer;\n}\n\nexport const defaultTextSyncOptions: TranscriptionSynchronizerOptions = {\n speed: 1,\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n sentenceTokenizer: new basic.SentenceTokenizer({\n retainFormat: true,\n }),\n};\n\nexport class TranscriptionSynchronizer {\n readonly audioOutput: SyncedAudioOutput;\n readonly textOutput: SyncedTextOutput;\n\n private options: TextSyncOptions;\n private rotateSegmentTask: Task<void>;\n private _enabled: boolean = true;\n private closed: boolean = false;\n\n /* @internal */\n _impl: SegmentSynchronizerImpl;\n\n private logger = log();\n\n constructor(\n nextInChainAudio: AudioOutput,\n nextInChainText: TextOutput,\n options: TranscriptionSynchronizerOptions = defaultTextSyncOptions,\n ) {\n this.audioOutput = new SyncedAudioOutput(this, nextInChainAudio);\n this.textOutput = new SyncedTextOutput(this, nextInChainText);\n this.options = {\n speed: options.speed,\n hyphenateWord: options.hyphenateWord,\n splitWords: options.splitWords,\n sentenceTokenizer: options.sentenceTokenizer,\n };\n\n // initial segment/first segment, recreated for each new segment\n this._impl = new SegmentSynchronizerImpl(this.options, nextInChainText);\n this.rotateSegmentTask = Task.from((controller) =>\n this.rotateSegmentTaskImpl(controller.signal),\n );\n }\n\n get enabled(): boolean {\n return this._enabled;\n }\n\n set enabled(enabled: boolean) {\n if (this._enabled === enabled) {\n return;\n }\n\n this._enabled = enabled;\n this.rotateSegment();\n }\n\n rotateSegment() {\n if (this.closed) {\n return;\n }\n\n if (!this.rotateSegmentTask.done) {\n this.logger.warn('rotateSegment called while previous segment is still being rotated');\n }\n this.rotateSegmentTask = Task.from((controller) =>\n this.rotateSegmentTaskImpl(controller.signal, this.rotateSegmentTask),\n );\n }\n\n async close(): Promise<void> {\n this.closed = true;\n await this.rotateSegmentTask.cancelAndWait();\n await this._impl.close();\n }\n\n async barrier(): Promise<void> {\n if (this.rotateSegmentTask.done) {\n return;\n }\n await this.rotateSegmentTask.result;\n }\n\n private async rotateSegmentTaskImpl(abort: AbortSignal, oldTask?: Task<void>) {\n if (oldTask) {\n await oldTask.result;\n }\n\n if (abort.aborted) {\n return;\n }\n await this._impl.close();\n this._impl = new SegmentSynchronizerImpl(this.options, this.textOutput.nextInChain);\n }\n}\n\nclass SyncedAudioOutput extends AudioOutput {\n private pushedDuration: number = 0.0;\n\n constructor(\n public synchronizer: TranscriptionSynchronizer,\n private nextInChainAudio: AudioOutput,\n ) {\n super(nextInChainAudio.sampleRate, nextInChainAudio);\n }\n\n async captureFrame(frame: AudioFrame): Promise<void> {\n // using barrier() on capture should be sufficient, flush() must not be called if\n // capture_frame isn't completed\n await this.synchronizer.barrier();\n\n await super.captureFrame(frame);\n await this.nextInChainAudio.captureFrame(frame); // passthrough audio\n\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n this.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n\n if (!this.synchronizer.enabled) {\n return;\n }\n\n if (this.synchronizer._impl.audioInputEnded) {\n this.logger.warn(\n 'SegmentSynchronizerImpl audio marked as ended in capture audio, rotating segment',\n );\n this.synchronizer.rotateSegment();\n await this.synchronizer.barrier();\n }\n this.synchronizer._impl.pushAudio(frame);\n }\n\n flush() {\n super.flush();\n this.nextInChainAudio.flush();\n\n if (!this.synchronizer.enabled) {\n return;\n }\n\n if (!this.pushedDuration) {\n // in case there is no audio after the text was pushed, rotate the segment\n this.synchronizer.rotateSegment();\n return;\n }\n\n this.synchronizer._impl.endAudioInput();\n }\n\n clearBuffer() {\n this.nextInChainAudio.clearBuffer();\n }\n\n // this is going to be automatically called by the next_in_chain\n onPlaybackFinished(ev: PlaybackFinishedEvent) {\n if (!this.synchronizer.enabled) {\n super.onPlaybackFinished(ev);\n return;\n }\n\n this.synchronizer._impl.markPlaybackFinished(ev.playbackPosition, ev.interrupted);\n super.onPlaybackFinished({\n playbackPosition: ev.playbackPosition,\n interrupted: ev.interrupted,\n synchronizedTranscript: this.synchronizer._impl.synchronizedTranscript,\n });\n\n this.synchronizer.rotateSegment();\n this.pushedDuration = 0.0;\n }\n}\n\nclass SyncedTextOutput extends TextOutput {\n private capturing: boolean = false;\n private logger = log();\n\n constructor(\n private readonly synchronizer: TranscriptionSynchronizer,\n public readonly nextInChain: TextOutput,\n ) {\n super(nextInChain);\n }\n\n async captureText(text: string): Promise<void> {\n await this.synchronizer.barrier();\n\n if (!this.synchronizer.enabled) {\n // pass through to the next in chain\n await this.nextInChain.captureText(text);\n return;\n }\n\n this.capturing = true;\n if (this.synchronizer._impl.textInputEnded) {\n this.logger.warn(\n 'SegmentSynchronizerImpl text marked as ended in capture text, rotating segment',\n );\n this.synchronizer.rotateSegment();\n await this.synchronizer.barrier();\n }\n this.synchronizer._impl.pushText(text);\n }\n\n flush() {\n if (!this.synchronizer.enabled) {\n this.nextInChain.flush(); // passthrough text if the synchronizer is disabled\n return;\n }\n\n if (!this.capturing) {\n return;\n }\n\n this.capturing = false;\n this.synchronizer._impl.endTextInput();\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,mBAAsB;AAEtB,iBAAoB;AACpB,gCAAkC;AAElC,sBAAsB;AACtB,mBAA6B;AAC7B,gBAAoE;AAEpE,MAAM,uBAAuB;AAsB7B,MAAM,wBAAwB;AAAA,EAe5B,YACmB,SACA,aACjB;AAFiB;AACA;AAEjB,SAAK,QAAQ,QAAQ,QAAQ;AAC7B,SAAK,WAAW;AAAA,MACd,gBAAgB,QAAQ,kBAAkB,OAAO;AAAA,MACjD,YAAY;AAAA,MACZ,MAAM;AAAA,MACN,kBAAkB;AAAA,MAClB,eAAe;AAAA,IACjB;AACA,SAAK,YAAY;AAAA,MACf,gBAAgB;AAAA,MAChB,MAAM;AAAA,IACR;AACA,SAAK,eAAe,IAAI,4CAAkB;AAC1C,SAAK,qBAAqB,KAAK,aAAa,SAAS,UAAU;AAE/D,SAAK,SAAS,EACX,KAAK,MAAM;AACV,WAAK,mBAAmB,MAAM;AAAA,IAChC,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,WAAK,OAAO,MAAM,EAAE,MAAM,GAAG,kCAAkC;AAAA,IACjE,CAAC;AACH,SAAK,cAAc,KAAK,gBAAgB;AAAA,EAC1C;AAAA,EAzCQ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA,cAAsB,IAAI,oBAAO;AAAA,EACjC,eAAuB,IAAI,oBAAO;AAAA,EAClC,oBAA6B;AAAA,EAE7B,aAAS,gBAAI;AAAA,EA+BrB,IAAI,SAAS;AACX,WAAO,KAAK,aAAa;AAAA,EAC3B;AAAA,EAEA,IAAI,kBAAkB;AACpB,WAAO,KAAK,UAAU;AAAA,EACxB;AAAA,EAEA,IAAI,iBAAiB;AACnB,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmC;AACrC,WAAO,KAAK,aAAa;AAAA,EAC3B;AAAA,EAEA,UAAU,OAAmB;AAC3B,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,sDAAsD;AACvE;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM,oBAAoB,MAAM;AAEtD,QAAI,CAAC,KAAK,iBAAiB,gBAAgB,GAAG;AAC5C,WAAK,gBAAgB,KAAK,IAAI;AAC9B,WAAK,YAAY,QAAQ;AAAA,IAC3B;AAEA,SAAK,UAAU,kBAAkB;AAAA,EACnC;AAAA,EAEA,gBAAgB;AACd,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,0DAA0D;AAC3E;AAAA,IACF;AAEA,SAAK,UAAU,OAAO;AAAA,EACxB;AAAA,EAEA,SAAS,MAAc;AACrB,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,qDAAqD;AACtE;AAAA,IACF;AAEA,SAAK,SAAS,eAAe,SAAS,IAAI;AAC1C,SAAK,SAAS,cAAc;AAAA,EAC9B;AAAA,EAEA,eAAe;AACb,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,yDAAyD;AAC1E;AAAA,IACF;AAEA,SAAK,SAAS,OAAO;AACrB,SAAK,SAAS,eAAe,SAAS;AAAA,EACxC;AAAA,EAEA,qBAAqB,mBAA2B,aAAsB;AACpE,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,iEAAiE;AAClF;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,SAAS,QAAQ,CAAC,KAAK,UAAU,MAAM;AAC/C,WAAK,OAAO;AAAA,QACV,EAAE,UAAU,KAAK,SAAS,MAAM,WAAW,KAAK,UAAU,KAAK;AAAA,QAC/D;AAAA,MACF;AACA;AAAA,IACF;AAEA,QAAI,CAAC,aAAa;AAChB,WAAK,oBAAoB;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,IAAI,yBAAiC;AACnC,QAAI,KAAK,mBAAmB;AAC1B,aAAO,KAAK,SAAS;AAAA,IACvB;AACA,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,MAAc,kBAAkB;AAG9B,UAAM,SAAS,KAAK,aAAa,SAAS,UAAU;AACpD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,OAAO,KAAK,IAAI,MAAM,OAAO,KAAK;AAChD,UAAI,MAAM;AACR;AAAA,MACF;AACA,WAAK,SAAS,iBAAiB;AAC/B,YAAM,KAAK,YAAY,YAAY,IAAI;AAAA,IACzC;AACA,WAAO,YAAY;AACnB,SAAK,YAAY,MAAM;AAAA,EACzB;AAAA,EAEA,MAAc,WAA0B;AACtC,UAAM,KAAK,YAAY;AAEvB,QAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,eAAe;AACvB,YAAM,IAAI,MAAM,yEAAyE;AAAA,IAC3F;AAEA,qBAAiB,eAAe,KAAK,SAAS,gBAAgB;AAC5D,YAAM,WAAW,YAAY;AAE7B,UAAI,aAAa;AACjB,UAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,MACF;AAEA,iBAAW,CAAC,MAAM,GAAG,MAAM,KAAK,KAAK,QAAQ,WAAW,QAAQ,GAAG;AACjE,YAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,QACF;AAEA,YAAI,KAAK,mBAAmB;AAC1B,eAAK,mBAAmB,MAAM,SAAS,MAAM,YAAY,MAAM,CAAC;AAChE,uBAAa;AACb;AAAA,QACF;AAEA,cAAM,aAAa,KAAK,QAAQ,cAAc,IAAI,EAAE;AACpD,cAAM,kBAAkB,KAAK,IAAI,IAAI,KAAK,iBAAiB;AAC3D,cAAM,gBAAgB,iBAAiB,KAAK,QAAQ;AACpD,cAAM,gBAAgB,KAAK,IAAI,GAAG,gBAAgB,KAAK,SAAS,gBAAgB;AAChF,YAAIA,SAAQ,KAAK,IAAI,GAAG,aAAa,aAAa,IAAI,KAAK;AAE3D,YAAI,KAAK,mBAAmB;AAC1B,UAAAA,SAAQ;AAAA,QACV;AAEA,cAAM,KAAK,iBAAiBA,SAAQ,CAAC;AACrC,aAAK,mBAAmB,MAAM,SAAS,MAAM,YAAY,MAAM,CAAC;AAChE,cAAM,KAAK,iBAAiBA,SAAQ,CAAC;AAErC,aAAK,SAAS,oBAAoB;AAClC,qBAAa;AAAA,MACf;AAEA,UAAI,aAAa,SAAS,QAAQ;AAChC,cAAM,YAAY,SAAS,MAAM,UAAU;AAC3C,aAAK,mBAAmB,MAAM,SAAS;AAAA,MACzC;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB,kBAA0B;AACvD,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AACA,cAAM,oBAAM,mBAAmB,GAAI;AAAA,EACrC;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AACA,SAAK,aAAa,QAAQ;AAC1B,SAAK,YAAY,QAAQ;AACzB,SAAK,SAAS,eAAe,MAAM;AACnC,UAAM,KAAK;AAAA,EACb;AACF;AASO,MAAM,yBAA2D;AAAA,EACtE,OAAO;AAAA,EACP,eAAe,sBAAM;AAAA,EACrB,YAAY,sBAAM;AAAA,EAClB,mBAAmB,IAAI,sBAAM,kBAAkB;AAAA,IAC7C,cAAc;AAAA,EAChB,CAAC;AACH;AAEO,MAAM,0BAA0B;AAAA,EAC5B;AAAA,EACA;AAAA,EAED;AAAA,EACA;AAAA,EACA,WAAoB;AAAA,EACpB,SAAkB;AAAA;AAAA,EAG1B;AAAA,EAEQ,aAAS,gBAAI;AAAA,EAErB,YACE,kBACA,iBACA,UAA4C,wBAC5C;AACA,SAAK,cAAc,IAAI,kBAAkB,MAAM,gBAAgB;AAC/D,SAAK,aAAa,IAAI,iBAAiB,MAAM,eAAe;AAC5D,SAAK,UAAU;AAAA,MACb,OAAO,QAAQ;AAAA,MACf,eAAe,QAAQ;AAAA,MACvB,YAAY,QAAQ;AAAA,MACpB,mBAAmB,QAAQ;AAAA,IAC7B;AAGA,SAAK,QAAQ,IAAI,wBAAwB,KAAK,SAAS,eAAe;AACtE,SAAK,oBAAoB,kBAAK;AAAA,MAAK,CAAC,eAClC,KAAK,sBAAsB,WAAW,MAAM;AAAA,IAC9C;AAAA,EACF;AAAA,EAEA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,QAAQ,SAAkB;AAC5B,QAAI,KAAK,aAAa,SAAS;AAC7B;AAAA,IACF;AAEA,SAAK,WAAW;AAChB,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,gBAAgB;AACd,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,kBAAkB,MAAM;AAChC,WAAK,OAAO,KAAK,oEAAoE;AAAA,IACvF;AACA,SAAK,oBAAoB,kBAAK;AAAA,MAAK,CAAC,eAClC,KAAK,sBAAsB,WAAW,QAAQ,KAAK,iBAAiB;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,SAAS;AACd,UAAM,KAAK,kBAAkB,cAAc;AAC3C,UAAM,KAAK,MAAM,MAAM;AAAA,EACzB;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,kBAAkB,MAAM;AAC/B;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB;AAAA,EAC/B;AAAA,EAEA,MAAc,sBAAsB,OAAoB,SAAsB;AAC5E,QAAI,SAAS;AACX,YAAM,QAAQ;AAAA,IAChB;AAEA,QAAI,MAAM,SAAS;AACjB;AAAA,IACF;AACA,UAAM,KAAK,MAAM,MAAM;AACvB,SAAK,QAAQ,IAAI,wBAAwB,KAAK,SAAS,KAAK,WAAW,WAAW;AAAA,EACpF;AACF;AAEA,MAAM,0BAA0B,sBAAY;AAAA,EAG1C,YACS,cACC,kBACR;AACA,UAAM,iBAAiB,YAAY,gBAAgB;AAH5C;AACC;AAAA,EAGV;AAAA,EAPQ,iBAAyB;AAAA,EASjC,MAAM,aAAa,OAAkC;AAGnD,UAAM,KAAK,aAAa,QAAQ;AAEhC,UAAM,MAAM,aAAa,KAAK;AAC9B,UAAM,KAAK,iBAAiB,aAAa,KAAK;AAG9C,SAAK,kBAAkB,MAAM,oBAAoB,MAAM;AAEvD,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B;AAAA,IACF;AAEA,QAAI,KAAK,aAAa,MAAM,iBAAiB;AAC3C,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA,WAAK,aAAa,cAAc;AAChC,YAAM,KAAK,aAAa,QAAQ;AAAA,IAClC;AACA,SAAK,aAAa,MAAM,UAAU,KAAK;AAAA,EACzC;AAAA,EAEA,QAAQ;AACN,UAAM,MAAM;AACZ,SAAK,iBAAiB,MAAM;AAE5B,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,gBAAgB;AAExB,WAAK,aAAa,cAAc;AAChC;AAAA,IACF;AAEA,SAAK,aAAa,MAAM,cAAc;AAAA,EACxC;AAAA,EAEA,cAAc;AACZ,SAAK,iBAAiB,YAAY;AAAA,EACpC;AAAA;AAAA,EAGA,mBAAmB,IAA2B;AAC5C,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B,YAAM,mBAAmB,EAAE;AAC3B;AAAA,IACF;AAEA,SAAK,aAAa,MAAM,qBAAqB,GAAG,kBAAkB,GAAG,WAAW;AAChF,UAAM,mBAAmB;AAAA,MACvB,kBAAkB,GAAG;AAAA,MACrB,aAAa,GAAG;AAAA,MAChB,wBAAwB,KAAK,aAAa,MAAM;AAAA,IAClD,CAAC;AAED,SAAK,aAAa,cAAc;AAChC,SAAK,iBAAiB;AAAA,EACxB;AACF;AAEA,MAAM,yBAAyB,qBAAW;AAAA,EAIxC,YACmB,cACD,aAChB;AACA,UAAM,WAAW;AAHA;AACD;AAAA,EAGlB;AAAA,EARQ,YAAqB;AAAA,EACrB,aAAS,gBAAI;AAAA,EASrB,MAAM,YAAY,MAA6B;AAC7C,UAAM,KAAK,aAAa,QAAQ;AAEhC,QAAI,CAAC,KAAK,aAAa,SAAS;AAE9B,YAAM,KAAK,YAAY,YAAY,IAAI;AACvC;AAAA,IACF;AAEA,SAAK,YAAY;AACjB,QAAI,KAAK,aAAa,MAAM,gBAAgB;AAC1C,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA,WAAK,aAAa,cAAc;AAChC,YAAM,KAAK,aAAa,QAAQ;AAAA,IAClC;AACA,SAAK,aAAa,MAAM,SAAS,IAAI;AAAA,EACvC;AAAA,EAEA,QAAQ;AACN,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B,WAAK,YAAY,MAAM;AACvB;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,WAAW;AACnB;AAAA,IACF;AAEA,SAAK,YAAY;AACjB,SAAK,aAAa,MAAM,aAAa;AAAA,EACvC;AACF;","names":["delay"]}
|
|
1
|
+
{"version":3,"sources":["../../../src/voice/transcription/synchronizer.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { ReadableStream, WritableStreamDefaultWriter } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { IdentityTransform } from '../../stream/identity_transform.js';\nimport type { SentenceStream, SentenceTokenizer } from '../../tokenize/index.js';\nimport { basic } from '../../tokenize/index.js';\nimport { Future, Task, delay } from '../../utils.js';\nimport { AudioOutput, type PlaybackFinishedEvent, TextOutput } from '../io.js';\n\nconst STANDARD_SPEECH_RATE = 3.83; // hyphens (syllables) per second\n\ninterface TextSyncOptions {\n speed: number;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n sentenceTokenizer: SentenceTokenizer;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedText: string;\n}\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\nclass SegmentSynchronizerImpl {\n private textData: TextData;\n private audioData: AudioData;\n private speed: number;\n private outputStream: IdentityTransform<string>;\n private outputStreamWriter: WritableStreamDefaultWriter<string>;\n private captureTask: Promise<void>;\n private startWallTime?: number;\n\n private startFuture: Future = new Future();\n private closedFuture: Future = new Future();\n private playbackCompleted: boolean = false;\n\n private logger = log();\n\n constructor(\n private readonly options: TextSyncOptions,\n private readonly nextInChain: TextOutput,\n ) {\n this.speed = options.speed * STANDARD_SPEECH_RATE; // hyphens per second\n this.textData = {\n sentenceStream: options.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedText: '',\n };\n this.audioData = {\n pushedDuration: 0,\n done: false,\n };\n this.outputStream = new IdentityTransform();\n this.outputStreamWriter = this.outputStream.writable.getWriter();\n\n this.mainTask()\n .then(() => {\n this.outputStreamWriter.close();\n })\n .catch((error) => {\n this.logger.error({ error }, 'mainTask SegmentSynchronizerImpl');\n });\n this.captureTask = this.captureTaskImpl();\n }\n\n get closed() {\n return this.closedFuture.done;\n }\n\n get audioInputEnded() {\n return this.audioData.done;\n }\n\n get textInputEnded() {\n return this.textData.done;\n }\n\n get readable(): ReadableStream<string> {\n return this.outputStream.readable;\n }\n\n pushAudio(frame: AudioFrame) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.pushAudio called after close');\n return;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n\n if (!this.startWallTime && frameDuration > 0) {\n this.startWallTime = Date.now();\n this.startFuture.resolve();\n }\n\n this.audioData.pushedDuration += frameDuration;\n }\n\n endAudioInput() {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.endAudioInput called after close');\n return;\n }\n\n this.audioData.done = true;\n }\n\n pushText(text: string) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.pushText called after close');\n return;\n }\n\n this.textData.sentenceStream.pushText(text);\n this.textData.pushedText += text;\n }\n\n endTextInput() {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.endTextInput called after close');\n return;\n }\n\n this.textData.done = true;\n this.textData.sentenceStream.endInput();\n }\n\n markPlaybackFinished(_playbackPosition: number, interrupted: boolean) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.markPlaybackFinished called after close');\n return;\n }\n\n if (!this.textData.done || !this.audioData.done) {\n this.logger.warn(\n { textDone: this.textData.done, audioDone: this.audioData.done },\n 'SegmentSynchronizerImpl.markPlaybackFinished called before text/audio input is done',\n );\n return;\n }\n\n if (!interrupted) {\n this.playbackCompleted = true;\n }\n }\n\n get synchronizedTranscript(): string {\n if (this.playbackCompleted) {\n return this.textData.pushedText;\n }\n return this.textData.forwardedText;\n }\n\n private async captureTaskImpl() {\n // Don't use a for-await loop here, because exiting the loop will close the writer in the\n // outputStream, which will cause an error in the mainTask.then method.\n const reader = this.outputStream.readable.getReader();\n while (true) {\n const { done, value: text } = await reader.read();\n if (done) {\n break;\n }\n this.textData.forwardedText += text;\n await this.nextInChain.captureText(text);\n }\n reader.releaseLock();\n this.nextInChain.flush();\n }\n\n private async mainTask(): Promise<void> {\n await this.startFuture.await;\n\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n if (!this.startWallTime) {\n throw new Error('startWallTime is not set when starting SegmentSynchronizerImpl.mainTask');\n }\n\n for await (const textSegment of this.textData.sentenceStream) {\n const sentence = textSegment.token;\n\n let textCursor = 0;\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n for (const [word, _, endPos] of this.options.splitWords(sentence)) {\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n if (this.playbackCompleted) {\n this.outputStreamWriter.write(sentence.slice(textCursor, endPos));\n textCursor = endPos;\n continue;\n }\n\n const wordHphens = this.options.hyphenateWord(word).length;\n const elapsedSeconds = (Date.now() - this.startWallTime) / 1000;\n const targetHyphens = elapsedSeconds * this.options.speed;\n const hyphensBehind = Math.max(0, targetHyphens - this.textData.forwardedHyphens);\n let delay = Math.max(0, wordHphens - hyphensBehind) / this.speed;\n\n if (this.playbackCompleted) {\n delay = 0;\n }\n\n await this.sleepIfNotClosed(delay / 2);\n this.outputStreamWriter.write(sentence.slice(textCursor, endPos));\n await this.sleepIfNotClosed(delay / 2);\n\n this.textData.forwardedHyphens += wordHphens;\n textCursor = endPos;\n }\n\n if (textCursor < sentence.length) {\n const remaining = sentence.slice(textCursor);\n this.outputStreamWriter.write(remaining);\n }\n }\n }\n\n private async sleepIfNotClosed(sleepTimeSeconds: number) {\n if (this.closed) {\n return;\n }\n await delay(sleepTimeSeconds * 1000);\n }\n\n async close(): Promise<void> {\n if (this.closed) {\n return;\n }\n this.closedFuture.resolve();\n this.startFuture.resolve(); // avoid deadlock of mainTaskImpl in case it never started\n this.textData.sentenceStream.close();\n await this.captureTask;\n }\n}\n\nexport interface TranscriptionSynchronizerOptions {\n speed: number;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n sentenceTokenizer: SentenceTokenizer;\n}\n\nexport const defaultTextSyncOptions: TranscriptionSynchronizerOptions = {\n speed: 1,\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n sentenceTokenizer: new basic.SentenceTokenizer({\n retainFormat: true,\n }),\n};\n\nexport class TranscriptionSynchronizer {\n readonly audioOutput: SyncedAudioOutput;\n readonly textOutput: SyncedTextOutput;\n\n private options: TextSyncOptions;\n private rotateSegmentTask: Task<void>;\n private _enabled: boolean = true;\n private closed: boolean = false;\n\n /* @internal */\n _impl: SegmentSynchronizerImpl;\n\n private logger = log();\n\n constructor(\n nextInChainAudio: AudioOutput,\n nextInChainText: TextOutput,\n options: TranscriptionSynchronizerOptions = defaultTextSyncOptions,\n ) {\n this.audioOutput = new SyncedAudioOutput(this, nextInChainAudio);\n this.textOutput = new SyncedTextOutput(this, nextInChainText);\n this.options = {\n speed: options.speed,\n hyphenateWord: options.hyphenateWord,\n splitWords: options.splitWords,\n sentenceTokenizer: options.sentenceTokenizer,\n };\n\n // initial segment/first segment, recreated for each new segment\n this._impl = new SegmentSynchronizerImpl(this.options, nextInChainText);\n this.rotateSegmentTask = Task.from((controller) =>\n this.rotateSegmentTaskImpl(controller.signal),\n );\n }\n\n get enabled(): boolean {\n return this._enabled;\n }\n\n set enabled(enabled: boolean) {\n if (this._enabled === enabled) {\n return;\n }\n\n this._enabled = enabled;\n this.rotateSegment();\n }\n\n rotateSegment() {\n if (this.closed) {\n return;\n }\n\n if (!this.rotateSegmentTask.done) {\n this.logger.warn('rotateSegment called while previous segment is still being rotated');\n }\n this.rotateSegmentTask = Task.from((controller) =>\n this.rotateSegmentTaskImpl(controller.signal, this.rotateSegmentTask),\n );\n }\n\n async close(): Promise<void> {\n this.closed = true;\n await this.rotateSegmentTask.cancelAndWait();\n await this._impl.close();\n }\n\n async barrier(): Promise<void> {\n if (this.rotateSegmentTask.done) {\n return;\n }\n await this.rotateSegmentTask.result;\n }\n\n private async rotateSegmentTaskImpl(abort: AbortSignal, oldTask?: Task<void>) {\n if (oldTask) {\n await oldTask.result;\n }\n\n if (abort.aborted) {\n return;\n }\n await this._impl.close();\n this._impl = new SegmentSynchronizerImpl(this.options, this.textOutput.nextInChain);\n }\n}\n\nclass SyncedAudioOutput extends AudioOutput {\n private pushedDuration: number = 0.0;\n\n constructor(\n public synchronizer: TranscriptionSynchronizer,\n private nextInChainAudio: AudioOutput,\n ) {\n super(nextInChainAudio.sampleRate, nextInChainAudio);\n }\n\n async captureFrame(frame: AudioFrame): Promise<void> {\n // using barrier() on capture should be sufficient, flush() must not be called if\n // capture_frame isn't completed\n await this.synchronizer.barrier();\n\n await super.captureFrame(frame);\n await this.nextInChainAudio.captureFrame(frame); // passthrough audio\n\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n this.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n\n if (!this.synchronizer.enabled) {\n return;\n }\n\n if (this.synchronizer._impl.audioInputEnded) {\n this.logger.warn(\n 'SegmentSynchronizerImpl audio marked as ended in capture audio, rotating segment',\n );\n this.synchronizer.rotateSegment();\n await this.synchronizer.barrier();\n }\n this.synchronizer._impl.pushAudio(frame);\n }\n\n flush() {\n super.flush();\n this.nextInChainAudio.flush();\n\n if (!this.synchronizer.enabled) {\n return;\n }\n\n if (!this.pushedDuration) {\n // in case there is no audio after the text was pushed, rotate the segment\n this.synchronizer.rotateSegment();\n return;\n }\n\n this.synchronizer._impl.endAudioInput();\n }\n\n clearBuffer() {\n this.nextInChainAudio.clearBuffer();\n }\n\n // this is going to be automatically called by the next_in_chain\n onPlaybackFinished(ev: PlaybackFinishedEvent) {\n if (!this.synchronizer.enabled) {\n super.onPlaybackFinished(ev);\n return;\n }\n\n this.synchronizer._impl.markPlaybackFinished(ev.playbackPosition, ev.interrupted);\n super.onPlaybackFinished({\n playbackPosition: ev.playbackPosition,\n interrupted: ev.interrupted,\n synchronizedTranscript: this.synchronizer._impl.synchronizedTranscript,\n });\n\n this.synchronizer.rotateSegment();\n this.pushedDuration = 0.0;\n }\n}\n\nclass SyncedTextOutput extends TextOutput {\n private capturing: boolean = false;\n private logger = log();\n\n constructor(\n private readonly synchronizer: TranscriptionSynchronizer,\n public readonly nextInChain: TextOutput,\n ) {\n super(nextInChain);\n }\n\n async captureText(text: string): Promise<void> {\n await this.synchronizer.barrier();\n\n if (!this.synchronizer.enabled) {\n // pass through to the next in chain\n await this.nextInChain.captureText(text);\n return;\n }\n\n this.capturing = true;\n if (this.synchronizer._impl.textInputEnded) {\n this.logger.warn(\n 'SegmentSynchronizerImpl text marked as ended in capture text, rotating segment',\n );\n this.synchronizer.rotateSegment();\n await this.synchronizer.barrier();\n }\n this.synchronizer._impl.pushText(text);\n }\n\n flush() {\n if (!this.synchronizer.enabled) {\n this.nextInChain.flush(); // passthrough text if the synchronizer is disabled\n return;\n }\n\n if (!this.capturing) {\n return;\n }\n\n this.capturing = false;\n this.synchronizer._impl.endTextInput();\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,iBAAoB;AACpB,gCAAkC;AAElC,sBAAsB;AACtB,mBAAoC;AACpC,gBAAoE;AAEpE,MAAM,uBAAuB;AAsB7B,MAAM,wBAAwB;AAAA,EAe5B,YACmB,SACA,aACjB;AAFiB;AACA;AAEjB,SAAK,QAAQ,QAAQ,QAAQ;AAC7B,SAAK,WAAW;AAAA,MACd,gBAAgB,QAAQ,kBAAkB,OAAO;AAAA,MACjD,YAAY;AAAA,MACZ,MAAM;AAAA,MACN,kBAAkB;AAAA,MAClB,eAAe;AAAA,IACjB;AACA,SAAK,YAAY;AAAA,MACf,gBAAgB;AAAA,MAChB,MAAM;AAAA,IACR;AACA,SAAK,eAAe,IAAI,4CAAkB;AAC1C,SAAK,qBAAqB,KAAK,aAAa,SAAS,UAAU;AAE/D,SAAK,SAAS,EACX,KAAK,MAAM;AACV,WAAK,mBAAmB,MAAM;AAAA,IAChC,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,WAAK,OAAO,MAAM,EAAE,MAAM,GAAG,kCAAkC;AAAA,IACjE,CAAC;AACH,SAAK,cAAc,KAAK,gBAAgB;AAAA,EAC1C;AAAA,EAzCQ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA,cAAsB,IAAI,oBAAO;AAAA,EACjC,eAAuB,IAAI,oBAAO;AAAA,EAClC,oBAA6B;AAAA,EAE7B,aAAS,gBAAI;AAAA,EA+BrB,IAAI,SAAS;AACX,WAAO,KAAK,aAAa;AAAA,EAC3B;AAAA,EAEA,IAAI,kBAAkB;AACpB,WAAO,KAAK,UAAU;AAAA,EACxB;AAAA,EAEA,IAAI,iBAAiB;AACnB,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmC;AACrC,WAAO,KAAK,aAAa;AAAA,EAC3B;AAAA,EAEA,UAAU,OAAmB;AAC3B,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,sDAAsD;AACvE;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM,oBAAoB,MAAM;AAEtD,QAAI,CAAC,KAAK,iBAAiB,gBAAgB,GAAG;AAC5C,WAAK,gBAAgB,KAAK,IAAI;AAC9B,WAAK,YAAY,QAAQ;AAAA,IAC3B;AAEA,SAAK,UAAU,kBAAkB;AAAA,EACnC;AAAA,EAEA,gBAAgB;AACd,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,0DAA0D;AAC3E;AAAA,IACF;AAEA,SAAK,UAAU,OAAO;AAAA,EACxB;AAAA,EAEA,SAAS,MAAc;AACrB,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,qDAAqD;AACtE;AAAA,IACF;AAEA,SAAK,SAAS,eAAe,SAAS,IAAI;AAC1C,SAAK,SAAS,cAAc;AAAA,EAC9B;AAAA,EAEA,eAAe;AACb,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,yDAAyD;AAC1E;AAAA,IACF;AAEA,SAAK,SAAS,OAAO;AACrB,SAAK,SAAS,eAAe,SAAS;AAAA,EACxC;AAAA,EAEA,qBAAqB,mBAA2B,aAAsB;AACpE,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,iEAAiE;AAClF;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,SAAS,QAAQ,CAAC,KAAK,UAAU,MAAM;AAC/C,WAAK,OAAO;AAAA,QACV,EAAE,UAAU,KAAK,SAAS,MAAM,WAAW,KAAK,UAAU,KAAK;AAAA,QAC/D;AAAA,MACF;AACA;AAAA,IACF;AAEA,QAAI,CAAC,aAAa;AAChB,WAAK,oBAAoB;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,IAAI,yBAAiC;AACnC,QAAI,KAAK,mBAAmB;AAC1B,aAAO,KAAK,SAAS;AAAA,IACvB;AACA,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,MAAc,kBAAkB;AAG9B,UAAM,SAAS,KAAK,aAAa,SAAS,UAAU;AACpD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,OAAO,KAAK,IAAI,MAAM,OAAO,KAAK;AAChD,UAAI,MAAM;AACR;AAAA,MACF;AACA,WAAK,SAAS,iBAAiB;AAC/B,YAAM,KAAK,YAAY,YAAY,IAAI;AAAA,IACzC;AACA,WAAO,YAAY;AACnB,SAAK,YAAY,MAAM;AAAA,EACzB;AAAA,EAEA,MAAc,WAA0B;AACtC,UAAM,KAAK,YAAY;AAEvB,QAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,eAAe;AACvB,YAAM,IAAI,MAAM,yEAAyE;AAAA,IAC3F;AAEA,qBAAiB,eAAe,KAAK,SAAS,gBAAgB;AAC5D,YAAM,WAAW,YAAY;AAE7B,UAAI,aAAa;AACjB,UAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,MACF;AAEA,iBAAW,CAAC,MAAM,GAAG,MAAM,KAAK,KAAK,QAAQ,WAAW,QAAQ,GAAG;AACjE,YAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,QACF;AAEA,YAAI,KAAK,mBAAmB;AAC1B,eAAK,mBAAmB,MAAM,SAAS,MAAM,YAAY,MAAM,CAAC;AAChE,uBAAa;AACb;AAAA,QACF;AAEA,cAAM,aAAa,KAAK,QAAQ,cAAc,IAAI,EAAE;AACpD,cAAM,kBAAkB,KAAK,IAAI,IAAI,KAAK,iBAAiB;AAC3D,cAAM,gBAAgB,iBAAiB,KAAK,QAAQ;AACpD,cAAM,gBAAgB,KAAK,IAAI,GAAG,gBAAgB,KAAK,SAAS,gBAAgB;AAChF,YAAIA,SAAQ,KAAK,IAAI,GAAG,aAAa,aAAa,IAAI,KAAK;AAE3D,YAAI,KAAK,mBAAmB;AAC1B,UAAAA,SAAQ;AAAA,QACV;AAEA,cAAM,KAAK,iBAAiBA,SAAQ,CAAC;AACrC,aAAK,mBAAmB,MAAM,SAAS,MAAM,YAAY,MAAM,CAAC;AAChE,cAAM,KAAK,iBAAiBA,SAAQ,CAAC;AAErC,aAAK,SAAS,oBAAoB;AAClC,qBAAa;AAAA,MACf;AAEA,UAAI,aAAa,SAAS,QAAQ;AAChC,cAAM,YAAY,SAAS,MAAM,UAAU;AAC3C,aAAK,mBAAmB,MAAM,SAAS;AAAA,MACzC;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB,kBAA0B;AACvD,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AACA,cAAM,oBAAM,mBAAmB,GAAI;AAAA,EACrC;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AACA,SAAK,aAAa,QAAQ;AAC1B,SAAK,YAAY,QAAQ;AACzB,SAAK,SAAS,eAAe,MAAM;AACnC,UAAM,KAAK;AAAA,EACb;AACF;AASO,MAAM,yBAA2D;AAAA,EACtE,OAAO;AAAA,EACP,eAAe,sBAAM;AAAA,EACrB,YAAY,sBAAM;AAAA,EAClB,mBAAmB,IAAI,sBAAM,kBAAkB;AAAA,IAC7C,cAAc;AAAA,EAChB,CAAC;AACH;AAEO,MAAM,0BAA0B;AAAA,EAC5B;AAAA,EACA;AAAA,EAED;AAAA,EACA;AAAA,EACA,WAAoB;AAAA,EACpB,SAAkB;AAAA;AAAA,EAG1B;AAAA,EAEQ,aAAS,gBAAI;AAAA,EAErB,YACE,kBACA,iBACA,UAA4C,wBAC5C;AACA,SAAK,cAAc,IAAI,kBAAkB,MAAM,gBAAgB;AAC/D,SAAK,aAAa,IAAI,iBAAiB,MAAM,eAAe;AAC5D,SAAK,UAAU;AAAA,MACb,OAAO,QAAQ;AAAA,MACf,eAAe,QAAQ;AAAA,MACvB,YAAY,QAAQ;AAAA,MACpB,mBAAmB,QAAQ;AAAA,IAC7B;AAGA,SAAK,QAAQ,IAAI,wBAAwB,KAAK,SAAS,eAAe;AACtE,SAAK,oBAAoB,kBAAK;AAAA,MAAK,CAAC,eAClC,KAAK,sBAAsB,WAAW,MAAM;AAAA,IAC9C;AAAA,EACF;AAAA,EAEA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,QAAQ,SAAkB;AAC5B,QAAI,KAAK,aAAa,SAAS;AAC7B;AAAA,IACF;AAEA,SAAK,WAAW;AAChB,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,gBAAgB;AACd,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,kBAAkB,MAAM;AAChC,WAAK,OAAO,KAAK,oEAAoE;AAAA,IACvF;AACA,SAAK,oBAAoB,kBAAK;AAAA,MAAK,CAAC,eAClC,KAAK,sBAAsB,WAAW,QAAQ,KAAK,iBAAiB;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,SAAS;AACd,UAAM,KAAK,kBAAkB,cAAc;AAC3C,UAAM,KAAK,MAAM,MAAM;AAAA,EACzB;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,kBAAkB,MAAM;AAC/B;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB;AAAA,EAC/B;AAAA,EAEA,MAAc,sBAAsB,OAAoB,SAAsB;AAC5E,QAAI,SAAS;AACX,YAAM,QAAQ;AAAA,IAChB;AAEA,QAAI,MAAM,SAAS;AACjB;AAAA,IACF;AACA,UAAM,KAAK,MAAM,MAAM;AACvB,SAAK,QAAQ,IAAI,wBAAwB,KAAK,SAAS,KAAK,WAAW,WAAW;AAAA,EACpF;AACF;AAEA,MAAM,0BAA0B,sBAAY;AAAA,EAG1C,YACS,cACC,kBACR;AACA,UAAM,iBAAiB,YAAY,gBAAgB;AAH5C;AACC;AAAA,EAGV;AAAA,EAPQ,iBAAyB;AAAA,EASjC,MAAM,aAAa,OAAkC;AAGnD,UAAM,KAAK,aAAa,QAAQ;AAEhC,UAAM,MAAM,aAAa,KAAK;AAC9B,UAAM,KAAK,iBAAiB,aAAa,KAAK;AAG9C,SAAK,kBAAkB,MAAM,oBAAoB,MAAM;AAEvD,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B;AAAA,IACF;AAEA,QAAI,KAAK,aAAa,MAAM,iBAAiB;AAC3C,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA,WAAK,aAAa,cAAc;AAChC,YAAM,KAAK,aAAa,QAAQ;AAAA,IAClC;AACA,SAAK,aAAa,MAAM,UAAU,KAAK;AAAA,EACzC;AAAA,EAEA,QAAQ;AACN,UAAM,MAAM;AACZ,SAAK,iBAAiB,MAAM;AAE5B,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,gBAAgB;AAExB,WAAK,aAAa,cAAc;AAChC;AAAA,IACF;AAEA,SAAK,aAAa,MAAM,cAAc;AAAA,EACxC;AAAA,EAEA,cAAc;AACZ,SAAK,iBAAiB,YAAY;AAAA,EACpC;AAAA;AAAA,EAGA,mBAAmB,IAA2B;AAC5C,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B,YAAM,mBAAmB,EAAE;AAC3B;AAAA,IACF;AAEA,SAAK,aAAa,MAAM,qBAAqB,GAAG,kBAAkB,GAAG,WAAW;AAChF,UAAM,mBAAmB;AAAA,MACvB,kBAAkB,GAAG;AAAA,MACrB,aAAa,GAAG;AAAA,MAChB,wBAAwB,KAAK,aAAa,MAAM;AAAA,IAClD,CAAC;AAED,SAAK,aAAa,cAAc;AAChC,SAAK,iBAAiB;AAAA,EACxB;AACF;AAEA,MAAM,yBAAyB,qBAAW;AAAA,EAIxC,YACmB,cACD,aAChB;AACA,UAAM,WAAW;AAHA;AACD;AAAA,EAGlB;AAAA,EARQ,YAAqB;AAAA,EACrB,aAAS,gBAAI;AAAA,EASrB,MAAM,YAAY,MAA6B;AAC7C,UAAM,KAAK,aAAa,QAAQ;AAEhC,QAAI,CAAC,KAAK,aAAa,SAAS;AAE9B,YAAM,KAAK,YAAY,YAAY,IAAI;AACvC;AAAA,IACF;AAEA,SAAK,YAAY;AACjB,QAAI,KAAK,aAAa,MAAM,gBAAgB;AAC1C,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA,WAAK,aAAa,cAAc;AAChC,YAAM,KAAK,aAAa,QAAQ;AAAA,IAClC;AACA,SAAK,aAAa,MAAM,SAAS,IAAI;AAAA,EACvC;AAAA,EAEA,QAAQ;AACN,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B,WAAK,YAAY,MAAM;AACvB;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,WAAW;AACnB;AAAA,IACF;AAEA,SAAK,YAAY;AACjB,SAAK,aAAa,MAAM,aAAa;AAAA,EACvC;AACF;","names":["delay"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synchronizer.d.ts","sourceRoot":"","sources":["../../../src/voice/transcription/synchronizer.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;
|
|
1
|
+
{"version":3,"file":"synchronizer.d.ts","sourceRoot":"","sources":["../../../src/voice/transcription/synchronizer.ts"],"names":[],"mappings":";AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,KAAK,EAAE,cAAc,EAA+B,MAAM,iBAAiB,CAAC;AAGnF,OAAO,KAAK,EAAkB,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAGjF,OAAO,EAAE,WAAW,EAAE,KAAK,qBAAqB,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAI/E,UAAU,eAAe;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;IAC1C,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC1D,iBAAiB,EAAE,iBAAiB,CAAC;CACtC;AAeD,cAAM,uBAAuB;IAgBzB,OAAO,CAAC,QAAQ,CAAC,OAAO;IACxB,OAAO,CAAC,QAAQ,CAAC,WAAW;IAhB9B,OAAO,CAAC,QAAQ,CAAW;IAC3B,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,YAAY,CAA4B;IAChD,OAAO,CAAC,kBAAkB,CAAsC;IAChE,OAAO,CAAC,WAAW,CAAgB;IACnC,OAAO,CAAC,aAAa,CAAC,CAAS;IAE/B,OAAO,CAAC,WAAW,CAAwB;IAC3C,OAAO,CAAC,YAAY,CAAwB;IAC5C,OAAO,CAAC,iBAAiB,CAAkB;IAE3C,OAAO,CAAC,MAAM,CAAS;gBAGJ,OAAO,EAAE,eAAe,EACxB,WAAW,EAAE,UAAU;IA2B1C,IAAI,MAAM,YAET;IAED,IAAI,eAAe,YAElB;IAED,IAAI,cAAc,YAEjB;IAED,IAAI,QAAQ,IAAI,cAAc,CAAC,MAAM,CAAC,CAErC;IAED,SAAS,CAAC,KAAK,EAAE,UAAU;IAgB3B,aAAa;IASb,QAAQ,CAAC,IAAI,EAAE,MAAM;IAUrB,YAAY;IAUZ,oBAAoB,CAAC,iBAAiB,EAAE,MAAM,EAAE,WAAW,EAAE,OAAO;IAmBpE,IAAI,sBAAsB,IAAI,MAAM,CAKnC;YAEa,eAAe;YAgBf,QAAQ;YAuDR,gBAAgB;IAOxB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;CAS7B;AAED,MAAM,WAAW,gCAAgC;IAC/C,KAAK,EAAE,MAAM,CAAC;IACd,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;IAC1C,UAAU,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,CAAC;IAC1D,iBAAiB,EAAE,iBAAiB,CAAC;CACtC;AAED,eAAO,MAAM,sBAAsB,EAAE,gCAOpC,CAAC;AAEF,qBAAa,yBAAyB;IACpC,QAAQ,CAAC,WAAW,EAAE,iBAAiB,CAAC;IACxC,QAAQ,CAAC,UAAU,EAAE,gBAAgB,CAAC;IAEtC,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,iBAAiB,CAAa;IACtC,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,MAAM,CAAkB;IAGhC,KAAK,EAAE,uBAAuB,CAAC;IAE/B,OAAO,CAAC,MAAM,CAAS;gBAGrB,gBAAgB,EAAE,WAAW,EAC7B,eAAe,EAAE,UAAU,EAC3B,OAAO,GAAE,gCAAyD;IAkBpE,IAAI,OAAO,IAAI,OAAO,CAErB;IAED,IAAI,OAAO,CAAC,OAAO,EAAE,OAAO,EAO3B;IAED,aAAa;IAaP,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAMtB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;YAOhB,qBAAqB;CAWpC;AAED,cAAM,iBAAkB,SAAQ,WAAW;IAIhC,YAAY,EAAE,yBAAyB;IAC9C,OAAO,CAAC,gBAAgB;IAJ1B,OAAO,CAAC,cAAc,CAAe;gBAG5B,YAAY,EAAE,yBAAyB,EACtC,gBAAgB,EAAE,WAAW;IAKjC,YAAY,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC;IAyBpD,KAAK;IAiBL,WAAW;IAKX,kBAAkB,CAAC,EAAE,EAAE,qBAAqB;CAgB7C;AAED,cAAM,gBAAiB,SAAQ,UAAU;IAKrC,OAAO,CAAC,QAAQ,CAAC,YAAY;aACb,WAAW,EAAE,UAAU;IALzC,OAAO,CAAC,SAAS,CAAkB;IACnC,OAAO,CAAC,MAAM,CAAS;gBAGJ,YAAY,EAAE,yBAAyB,EACxC,WAAW,EAAE,UAAU;IAKnC,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAoB9C,KAAK;CAaN"}
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
import { delay } from "@std/async";
|
|
2
1
|
import { log } from "../../log.js";
|
|
3
2
|
import { IdentityTransform } from "../../stream/identity_transform.js";
|
|
4
3
|
import { basic } from "../../tokenize/index.js";
|
|
5
|
-
import { Future, Task } from "../../utils.js";
|
|
4
|
+
import { Future, Task, delay } from "../../utils.js";
|
|
6
5
|
import { AudioOutput, TextOutput } from "../io.js";
|
|
7
6
|
const STANDARD_SPEECH_RATE = 3.83;
|
|
8
7
|
class SegmentSynchronizerImpl {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/voice/transcription/synchronizer.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { delay } from '@std/async';\nimport type { ReadableStream, WritableStreamDefaultWriter } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { IdentityTransform } from '../../stream/identity_transform.js';\nimport type { SentenceStream, SentenceTokenizer } from '../../tokenize/index.js';\nimport { basic } from '../../tokenize/index.js';\nimport { Future, Task } from '../../utils.js';\nimport { AudioOutput, type PlaybackFinishedEvent, TextOutput } from '../io.js';\n\nconst STANDARD_SPEECH_RATE = 3.83; // hyphens (syllables) per second\n\ninterface TextSyncOptions {\n speed: number;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n sentenceTokenizer: SentenceTokenizer;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedText: string;\n}\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\nclass SegmentSynchronizerImpl {\n private textData: TextData;\n private audioData: AudioData;\n private speed: number;\n private outputStream: IdentityTransform<string>;\n private outputStreamWriter: WritableStreamDefaultWriter<string>;\n private captureTask: Promise<void>;\n private startWallTime?: number;\n\n private startFuture: Future = new Future();\n private closedFuture: Future = new Future();\n private playbackCompleted: boolean = false;\n\n private logger = log();\n\n constructor(\n private readonly options: TextSyncOptions,\n private readonly nextInChain: TextOutput,\n ) {\n this.speed = options.speed * STANDARD_SPEECH_RATE; // hyphens per second\n this.textData = {\n sentenceStream: options.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedText: '',\n };\n this.audioData = {\n pushedDuration: 0,\n done: false,\n };\n this.outputStream = new IdentityTransform();\n this.outputStreamWriter = this.outputStream.writable.getWriter();\n\n this.mainTask()\n .then(() => {\n this.outputStreamWriter.close();\n })\n .catch((error) => {\n this.logger.error({ error }, 'mainTask SegmentSynchronizerImpl');\n });\n this.captureTask = this.captureTaskImpl();\n }\n\n get closed() {\n return this.closedFuture.done;\n }\n\n get audioInputEnded() {\n return this.audioData.done;\n }\n\n get textInputEnded() {\n return this.textData.done;\n }\n\n get readable(): ReadableStream<string> {\n return this.outputStream.readable;\n }\n\n pushAudio(frame: AudioFrame) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.pushAudio called after close');\n return;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n\n if (!this.startWallTime && frameDuration > 0) {\n this.startWallTime = Date.now();\n this.startFuture.resolve();\n }\n\n this.audioData.pushedDuration += frameDuration;\n }\n\n endAudioInput() {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.endAudioInput called after close');\n return;\n }\n\n this.audioData.done = true;\n }\n\n pushText(text: string) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.pushText called after close');\n return;\n }\n\n this.textData.sentenceStream.pushText(text);\n this.textData.pushedText += text;\n }\n\n endTextInput() {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.endTextInput called after close');\n return;\n }\n\n this.textData.done = true;\n this.textData.sentenceStream.endInput();\n }\n\n markPlaybackFinished(_playbackPosition: number, interrupted: boolean) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.markPlaybackFinished called after close');\n return;\n }\n\n if (!this.textData.done || !this.audioData.done) {\n this.logger.warn(\n { textDone: this.textData.done, audioDone: this.audioData.done },\n 'SegmentSynchronizerImpl.markPlaybackFinished called before text/audio input is done',\n );\n return;\n }\n\n if (!interrupted) {\n this.playbackCompleted = true;\n }\n }\n\n get synchronizedTranscript(): string {\n if (this.playbackCompleted) {\n return this.textData.pushedText;\n }\n return this.textData.forwardedText;\n }\n\n private async captureTaskImpl() {\n // Don't use a for-await loop here, because exiting the loop will close the writer in the\n // outputStream, which will cause an error in the mainTask.then method.\n const reader = this.outputStream.readable.getReader();\n while (true) {\n const { done, value: text } = await reader.read();\n if (done) {\n break;\n }\n this.textData.forwardedText += text;\n await this.nextInChain.captureText(text);\n }\n reader.releaseLock();\n this.nextInChain.flush();\n }\n\n private async mainTask(): Promise<void> {\n await this.startFuture.await;\n\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n if (!this.startWallTime) {\n throw new Error('startWallTime is not set when starting SegmentSynchronizerImpl.mainTask');\n }\n\n for await (const textSegment of this.textData.sentenceStream) {\n const sentence = textSegment.token;\n\n let textCursor = 0;\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n for (const [word, _, endPos] of this.options.splitWords(sentence)) {\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n if (this.playbackCompleted) {\n this.outputStreamWriter.write(sentence.slice(textCursor, endPos));\n textCursor = endPos;\n continue;\n }\n\n const wordHphens = this.options.hyphenateWord(word).length;\n const elapsedSeconds = (Date.now() - this.startWallTime) / 1000;\n const targetHyphens = elapsedSeconds * this.options.speed;\n const hyphensBehind = Math.max(0, targetHyphens - this.textData.forwardedHyphens);\n let delay = Math.max(0, wordHphens - hyphensBehind) / this.speed;\n\n if (this.playbackCompleted) {\n delay = 0;\n }\n\n await this.sleepIfNotClosed(delay / 2);\n this.outputStreamWriter.write(sentence.slice(textCursor, endPos));\n await this.sleepIfNotClosed(delay / 2);\n\n this.textData.forwardedHyphens += wordHphens;\n textCursor = endPos;\n }\n\n if (textCursor < sentence.length) {\n const remaining = sentence.slice(textCursor);\n this.outputStreamWriter.write(remaining);\n }\n }\n }\n\n private async sleepIfNotClosed(sleepTimeSeconds: number) {\n if (this.closed) {\n return;\n }\n await delay(sleepTimeSeconds * 1000);\n }\n\n async close(): Promise<void> {\n if (this.closed) {\n return;\n }\n this.closedFuture.resolve();\n this.startFuture.resolve(); // avoid deadlock of mainTaskImpl in case it never started\n this.textData.sentenceStream.close();\n await this.captureTask;\n }\n}\n\nexport interface TranscriptionSynchronizerOptions {\n speed: number;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n sentenceTokenizer: SentenceTokenizer;\n}\n\nexport const defaultTextSyncOptions: TranscriptionSynchronizerOptions = {\n speed: 1,\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n sentenceTokenizer: new basic.SentenceTokenizer({\n retainFormat: true,\n }),\n};\n\nexport class TranscriptionSynchronizer {\n readonly audioOutput: SyncedAudioOutput;\n readonly textOutput: SyncedTextOutput;\n\n private options: TextSyncOptions;\n private rotateSegmentTask: Task<void>;\n private _enabled: boolean = true;\n private closed: boolean = false;\n\n /* @internal */\n _impl: SegmentSynchronizerImpl;\n\n private logger = log();\n\n constructor(\n nextInChainAudio: AudioOutput,\n nextInChainText: TextOutput,\n options: TranscriptionSynchronizerOptions = defaultTextSyncOptions,\n ) {\n this.audioOutput = new SyncedAudioOutput(this, nextInChainAudio);\n this.textOutput = new SyncedTextOutput(this, nextInChainText);\n this.options = {\n speed: options.speed,\n hyphenateWord: options.hyphenateWord,\n splitWords: options.splitWords,\n sentenceTokenizer: options.sentenceTokenizer,\n };\n\n // initial segment/first segment, recreated for each new segment\n this._impl = new SegmentSynchronizerImpl(this.options, nextInChainText);\n this.rotateSegmentTask = Task.from((controller) =>\n this.rotateSegmentTaskImpl(controller.signal),\n );\n }\n\n get enabled(): boolean {\n return this._enabled;\n }\n\n set enabled(enabled: boolean) {\n if (this._enabled === enabled) {\n return;\n }\n\n this._enabled = enabled;\n this.rotateSegment();\n }\n\n rotateSegment() {\n if (this.closed) {\n return;\n }\n\n if (!this.rotateSegmentTask.done) {\n this.logger.warn('rotateSegment called while previous segment is still being rotated');\n }\n this.rotateSegmentTask = Task.from((controller) =>\n this.rotateSegmentTaskImpl(controller.signal, this.rotateSegmentTask),\n );\n }\n\n async close(): Promise<void> {\n this.closed = true;\n await this.rotateSegmentTask.cancelAndWait();\n await this._impl.close();\n }\n\n async barrier(): Promise<void> {\n if (this.rotateSegmentTask.done) {\n return;\n }\n await this.rotateSegmentTask.result;\n }\n\n private async rotateSegmentTaskImpl(abort: AbortSignal, oldTask?: Task<void>) {\n if (oldTask) {\n await oldTask.result;\n }\n\n if (abort.aborted) {\n return;\n }\n await this._impl.close();\n this._impl = new SegmentSynchronizerImpl(this.options, this.textOutput.nextInChain);\n }\n}\n\nclass SyncedAudioOutput extends AudioOutput {\n private pushedDuration: number = 0.0;\n\n constructor(\n public synchronizer: TranscriptionSynchronizer,\n private nextInChainAudio: AudioOutput,\n ) {\n super(nextInChainAudio.sampleRate, nextInChainAudio);\n }\n\n async captureFrame(frame: AudioFrame): Promise<void> {\n // using barrier() on capture should be sufficient, flush() must not be called if\n // capture_frame isn't completed\n await this.synchronizer.barrier();\n\n await super.captureFrame(frame);\n await this.nextInChainAudio.captureFrame(frame); // passthrough audio\n\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n this.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n\n if (!this.synchronizer.enabled) {\n return;\n }\n\n if (this.synchronizer._impl.audioInputEnded) {\n this.logger.warn(\n 'SegmentSynchronizerImpl audio marked as ended in capture audio, rotating segment',\n );\n this.synchronizer.rotateSegment();\n await this.synchronizer.barrier();\n }\n this.synchronizer._impl.pushAudio(frame);\n }\n\n flush() {\n super.flush();\n this.nextInChainAudio.flush();\n\n if (!this.synchronizer.enabled) {\n return;\n }\n\n if (!this.pushedDuration) {\n // in case there is no audio after the text was pushed, rotate the segment\n this.synchronizer.rotateSegment();\n return;\n }\n\n this.synchronizer._impl.endAudioInput();\n }\n\n clearBuffer() {\n this.nextInChainAudio.clearBuffer();\n }\n\n // this is going to be automatically called by the next_in_chain\n onPlaybackFinished(ev: PlaybackFinishedEvent) {\n if (!this.synchronizer.enabled) {\n super.onPlaybackFinished(ev);\n return;\n }\n\n this.synchronizer._impl.markPlaybackFinished(ev.playbackPosition, ev.interrupted);\n super.onPlaybackFinished({\n playbackPosition: ev.playbackPosition,\n interrupted: ev.interrupted,\n synchronizedTranscript: this.synchronizer._impl.synchronizedTranscript,\n });\n\n this.synchronizer.rotateSegment();\n this.pushedDuration = 0.0;\n }\n}\n\nclass SyncedTextOutput extends TextOutput {\n private capturing: boolean = false;\n private logger = log();\n\n constructor(\n private readonly synchronizer: TranscriptionSynchronizer,\n public readonly nextInChain: TextOutput,\n ) {\n super(nextInChain);\n }\n\n async captureText(text: string): Promise<void> {\n await this.synchronizer.barrier();\n\n if (!this.synchronizer.enabled) {\n // pass through to the next in chain\n await this.nextInChain.captureText(text);\n return;\n }\n\n this.capturing = true;\n if (this.synchronizer._impl.textInputEnded) {\n this.logger.warn(\n 'SegmentSynchronizerImpl text marked as ended in capture text, rotating segment',\n );\n this.synchronizer.rotateSegment();\n await this.synchronizer.barrier();\n }\n this.synchronizer._impl.pushText(text);\n }\n\n flush() {\n if (!this.synchronizer.enabled) {\n this.nextInChain.flush(); // passthrough text if the synchronizer is disabled\n return;\n }\n\n if (!this.capturing) {\n return;\n }\n\n this.capturing = false;\n this.synchronizer._impl.endTextInput();\n }\n}\n"],"mappings":"AAIA,SAAS,aAAa;AAEtB,SAAS,WAAW;AACpB,SAAS,yBAAyB;AAElC,SAAS,aAAa;AACtB,SAAS,QAAQ,YAAY;AAC7B,SAAS,aAAyC,kBAAkB;AAEpE,MAAM,uBAAuB;AAsB7B,MAAM,wBAAwB;AAAA,EAe5B,YACmB,SACA,aACjB;AAFiB;AACA;AAEjB,SAAK,QAAQ,QAAQ,QAAQ;AAC7B,SAAK,WAAW;AAAA,MACd,gBAAgB,QAAQ,kBAAkB,OAAO;AAAA,MACjD,YAAY;AAAA,MACZ,MAAM;AAAA,MACN,kBAAkB;AAAA,MAClB,eAAe;AAAA,IACjB;AACA,SAAK,YAAY;AAAA,MACf,gBAAgB;AAAA,MAChB,MAAM;AAAA,IACR;AACA,SAAK,eAAe,IAAI,kBAAkB;AAC1C,SAAK,qBAAqB,KAAK,aAAa,SAAS,UAAU;AAE/D,SAAK,SAAS,EACX,KAAK,MAAM;AACV,WAAK,mBAAmB,MAAM;AAAA,IAChC,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,WAAK,OAAO,MAAM,EAAE,MAAM,GAAG,kCAAkC;AAAA,IACjE,CAAC;AACH,SAAK,cAAc,KAAK,gBAAgB;AAAA,EAC1C;AAAA,EAzCQ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA,cAAsB,IAAI,OAAO;AAAA,EACjC,eAAuB,IAAI,OAAO;AAAA,EAClC,oBAA6B;AAAA,EAE7B,SAAS,IAAI;AAAA,EA+BrB,IAAI,SAAS;AACX,WAAO,KAAK,aAAa;AAAA,EAC3B;AAAA,EAEA,IAAI,kBAAkB;AACpB,WAAO,KAAK,UAAU;AAAA,EACxB;AAAA,EAEA,IAAI,iBAAiB;AACnB,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmC;AACrC,WAAO,KAAK,aAAa;AAAA,EAC3B;AAAA,EAEA,UAAU,OAAmB;AAC3B,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,sDAAsD;AACvE;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM,oBAAoB,MAAM;AAEtD,QAAI,CAAC,KAAK,iBAAiB,gBAAgB,GAAG;AAC5C,WAAK,gBAAgB,KAAK,IAAI;AAC9B,WAAK,YAAY,QAAQ;AAAA,IAC3B;AAEA,SAAK,UAAU,kBAAkB;AAAA,EACnC;AAAA,EAEA,gBAAgB;AACd,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,0DAA0D;AAC3E;AAAA,IACF;AAEA,SAAK,UAAU,OAAO;AAAA,EACxB;AAAA,EAEA,SAAS,MAAc;AACrB,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,qDAAqD;AACtE;AAAA,IACF;AAEA,SAAK,SAAS,eAAe,SAAS,IAAI;AAC1C,SAAK,SAAS,cAAc;AAAA,EAC9B;AAAA,EAEA,eAAe;AACb,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,yDAAyD;AAC1E;AAAA,IACF;AAEA,SAAK,SAAS,OAAO;AACrB,SAAK,SAAS,eAAe,SAAS;AAAA,EACxC;AAAA,EAEA,qBAAqB,mBAA2B,aAAsB;AACpE,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,iEAAiE;AAClF;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,SAAS,QAAQ,CAAC,KAAK,UAAU,MAAM;AAC/C,WAAK,OAAO;AAAA,QACV,EAAE,UAAU,KAAK,SAAS,MAAM,WAAW,KAAK,UAAU,KAAK;AAAA,QAC/D;AAAA,MACF;AACA;AAAA,IACF;AAEA,QAAI,CAAC,aAAa;AAChB,WAAK,oBAAoB;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,IAAI,yBAAiC;AACnC,QAAI,KAAK,mBAAmB;AAC1B,aAAO,KAAK,SAAS;AAAA,IACvB;AACA,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,MAAc,kBAAkB;AAG9B,UAAM,SAAS,KAAK,aAAa,SAAS,UAAU;AACpD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,OAAO,KAAK,IAAI,MAAM,OAAO,KAAK;AAChD,UAAI,MAAM;AACR;AAAA,MACF;AACA,WAAK,SAAS,iBAAiB;AAC/B,YAAM,KAAK,YAAY,YAAY,IAAI;AAAA,IACzC;AACA,WAAO,YAAY;AACnB,SAAK,YAAY,MAAM;AAAA,EACzB;AAAA,EAEA,MAAc,WAA0B;AACtC,UAAM,KAAK,YAAY;AAEvB,QAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,eAAe;AACvB,YAAM,IAAI,MAAM,yEAAyE;AAAA,IAC3F;AAEA,qBAAiB,eAAe,KAAK,SAAS,gBAAgB;AAC5D,YAAM,WAAW,YAAY;AAE7B,UAAI,aAAa;AACjB,UAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,MACF;AAEA,iBAAW,CAAC,MAAM,GAAG,MAAM,KAAK,KAAK,QAAQ,WAAW,QAAQ,GAAG;AACjE,YAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,QACF;AAEA,YAAI,KAAK,mBAAmB;AAC1B,eAAK,mBAAmB,MAAM,SAAS,MAAM,YAAY,MAAM,CAAC;AAChE,uBAAa;AACb;AAAA,QACF;AAEA,cAAM,aAAa,KAAK,QAAQ,cAAc,IAAI,EAAE;AACpD,cAAM,kBAAkB,KAAK,IAAI,IAAI,KAAK,iBAAiB;AAC3D,cAAM,gBAAgB,iBAAiB,KAAK,QAAQ;AACpD,cAAM,gBAAgB,KAAK,IAAI,GAAG,gBAAgB,KAAK,SAAS,gBAAgB;AAChF,YAAIA,SAAQ,KAAK,IAAI,GAAG,aAAa,aAAa,IAAI,KAAK;AAE3D,YAAI,KAAK,mBAAmB;AAC1B,UAAAA,SAAQ;AAAA,QACV;AAEA,cAAM,KAAK,iBAAiBA,SAAQ,CAAC;AACrC,aAAK,mBAAmB,MAAM,SAAS,MAAM,YAAY,MAAM,CAAC;AAChE,cAAM,KAAK,iBAAiBA,SAAQ,CAAC;AAErC,aAAK,SAAS,oBAAoB;AAClC,qBAAa;AAAA,MACf;AAEA,UAAI,aAAa,SAAS,QAAQ;AAChC,cAAM,YAAY,SAAS,MAAM,UAAU;AAC3C,aAAK,mBAAmB,MAAM,SAAS;AAAA,MACzC;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB,kBAA0B;AACvD,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AACA,UAAM,MAAM,mBAAmB,GAAI;AAAA,EACrC;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AACA,SAAK,aAAa,QAAQ;AAC1B,SAAK,YAAY,QAAQ;AACzB,SAAK,SAAS,eAAe,MAAM;AACnC,UAAM,KAAK;AAAA,EACb;AACF;AASO,MAAM,yBAA2D;AAAA,EACtE,OAAO;AAAA,EACP,eAAe,MAAM;AAAA,EACrB,YAAY,MAAM;AAAA,EAClB,mBAAmB,IAAI,MAAM,kBAAkB;AAAA,IAC7C,cAAc;AAAA,EAChB,CAAC;AACH;AAEO,MAAM,0BAA0B;AAAA,EAC5B;AAAA,EACA;AAAA,EAED;AAAA,EACA;AAAA,EACA,WAAoB;AAAA,EACpB,SAAkB;AAAA;AAAA,EAG1B;AAAA,EAEQ,SAAS,IAAI;AAAA,EAErB,YACE,kBACA,iBACA,UAA4C,wBAC5C;AACA,SAAK,cAAc,IAAI,kBAAkB,MAAM,gBAAgB;AAC/D,SAAK,aAAa,IAAI,iBAAiB,MAAM,eAAe;AAC5D,SAAK,UAAU;AAAA,MACb,OAAO,QAAQ;AAAA,MACf,eAAe,QAAQ;AAAA,MACvB,YAAY,QAAQ;AAAA,MACpB,mBAAmB,QAAQ;AAAA,IAC7B;AAGA,SAAK,QAAQ,IAAI,wBAAwB,KAAK,SAAS,eAAe;AACtE,SAAK,oBAAoB,KAAK;AAAA,MAAK,CAAC,eAClC,KAAK,sBAAsB,WAAW,MAAM;AAAA,IAC9C;AAAA,EACF;AAAA,EAEA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,QAAQ,SAAkB;AAC5B,QAAI,KAAK,aAAa,SAAS;AAC7B;AAAA,IACF;AAEA,SAAK,WAAW;AAChB,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,gBAAgB;AACd,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,kBAAkB,MAAM;AAChC,WAAK,OAAO,KAAK,oEAAoE;AAAA,IACvF;AACA,SAAK,oBAAoB,KAAK;AAAA,MAAK,CAAC,eAClC,KAAK,sBAAsB,WAAW,QAAQ,KAAK,iBAAiB;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,SAAS;AACd,UAAM,KAAK,kBAAkB,cAAc;AAC3C,UAAM,KAAK,MAAM,MAAM;AAAA,EACzB;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,kBAAkB,MAAM;AAC/B;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB;AAAA,EAC/B;AAAA,EAEA,MAAc,sBAAsB,OAAoB,SAAsB;AAC5E,QAAI,SAAS;AACX,YAAM,QAAQ;AAAA,IAChB;AAEA,QAAI,MAAM,SAAS;AACjB;AAAA,IACF;AACA,UAAM,KAAK,MAAM,MAAM;AACvB,SAAK,QAAQ,IAAI,wBAAwB,KAAK,SAAS,KAAK,WAAW,WAAW;AAAA,EACpF;AACF;AAEA,MAAM,0BAA0B,YAAY;AAAA,EAG1C,YACS,cACC,kBACR;AACA,UAAM,iBAAiB,YAAY,gBAAgB;AAH5C;AACC;AAAA,EAGV;AAAA,EAPQ,iBAAyB;AAAA,EASjC,MAAM,aAAa,OAAkC;AAGnD,UAAM,KAAK,aAAa,QAAQ;AAEhC,UAAM,MAAM,aAAa,KAAK;AAC9B,UAAM,KAAK,iBAAiB,aAAa,KAAK;AAG9C,SAAK,kBAAkB,MAAM,oBAAoB,MAAM;AAEvD,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B;AAAA,IACF;AAEA,QAAI,KAAK,aAAa,MAAM,iBAAiB;AAC3C,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA,WAAK,aAAa,cAAc;AAChC,YAAM,KAAK,aAAa,QAAQ;AAAA,IAClC;AACA,SAAK,aAAa,MAAM,UAAU,KAAK;AAAA,EACzC;AAAA,EAEA,QAAQ;AACN,UAAM,MAAM;AACZ,SAAK,iBAAiB,MAAM;AAE5B,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,gBAAgB;AAExB,WAAK,aAAa,cAAc;AAChC;AAAA,IACF;AAEA,SAAK,aAAa,MAAM,cAAc;AAAA,EACxC;AAAA,EAEA,cAAc;AACZ,SAAK,iBAAiB,YAAY;AAAA,EACpC;AAAA;AAAA,EAGA,mBAAmB,IAA2B;AAC5C,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B,YAAM,mBAAmB,EAAE;AAC3B;AAAA,IACF;AAEA,SAAK,aAAa,MAAM,qBAAqB,GAAG,kBAAkB,GAAG,WAAW;AAChF,UAAM,mBAAmB;AAAA,MACvB,kBAAkB,GAAG;AAAA,MACrB,aAAa,GAAG;AAAA,MAChB,wBAAwB,KAAK,aAAa,MAAM;AAAA,IAClD,CAAC;AAED,SAAK,aAAa,cAAc;AAChC,SAAK,iBAAiB;AAAA,EACxB;AACF;AAEA,MAAM,yBAAyB,WAAW;AAAA,EAIxC,YACmB,cACD,aAChB;AACA,UAAM,WAAW;AAHA;AACD;AAAA,EAGlB;AAAA,EARQ,YAAqB;AAAA,EACrB,SAAS,IAAI;AAAA,EASrB,MAAM,YAAY,MAA6B;AAC7C,UAAM,KAAK,aAAa,QAAQ;AAEhC,QAAI,CAAC,KAAK,aAAa,SAAS;AAE9B,YAAM,KAAK,YAAY,YAAY,IAAI;AACvC;AAAA,IACF;AAEA,SAAK,YAAY;AACjB,QAAI,KAAK,aAAa,MAAM,gBAAgB;AAC1C,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA,WAAK,aAAa,cAAc;AAChC,YAAM,KAAK,aAAa,QAAQ;AAAA,IAClC;AACA,SAAK,aAAa,MAAM,SAAS,IAAI;AAAA,EACvC;AAAA,EAEA,QAAQ;AACN,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B,WAAK,YAAY,MAAM;AACvB;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,WAAW;AACnB;AAAA,IACF;AAEA,SAAK,YAAY;AACjB,SAAK,aAAa,MAAM,aAAa;AAAA,EACvC;AACF;","names":["delay"]}
|
|
1
|
+
{"version":3,"sources":["../../../src/voice/transcription/synchronizer.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { ReadableStream, WritableStreamDefaultWriter } from 'node:stream/web';\nimport { log } from '../../log.js';\nimport { IdentityTransform } from '../../stream/identity_transform.js';\nimport type { SentenceStream, SentenceTokenizer } from '../../tokenize/index.js';\nimport { basic } from '../../tokenize/index.js';\nimport { Future, Task, delay } from '../../utils.js';\nimport { AudioOutput, type PlaybackFinishedEvent, TextOutput } from '../io.js';\n\nconst STANDARD_SPEECH_RATE = 3.83; // hyphens (syllables) per second\n\ninterface TextSyncOptions {\n speed: number;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n sentenceTokenizer: SentenceTokenizer;\n}\n\ninterface TextData {\n sentenceStream: SentenceStream;\n pushedText: string;\n done: boolean;\n forwardedHyphens: number;\n forwardedText: string;\n}\n\ninterface AudioData {\n pushedDuration: number;\n done: boolean;\n}\n\nclass SegmentSynchronizerImpl {\n private textData: TextData;\n private audioData: AudioData;\n private speed: number;\n private outputStream: IdentityTransform<string>;\n private outputStreamWriter: WritableStreamDefaultWriter<string>;\n private captureTask: Promise<void>;\n private startWallTime?: number;\n\n private startFuture: Future = new Future();\n private closedFuture: Future = new Future();\n private playbackCompleted: boolean = false;\n\n private logger = log();\n\n constructor(\n private readonly options: TextSyncOptions,\n private readonly nextInChain: TextOutput,\n ) {\n this.speed = options.speed * STANDARD_SPEECH_RATE; // hyphens per second\n this.textData = {\n sentenceStream: options.sentenceTokenizer.stream(),\n pushedText: '',\n done: false,\n forwardedHyphens: 0,\n forwardedText: '',\n };\n this.audioData = {\n pushedDuration: 0,\n done: false,\n };\n this.outputStream = new IdentityTransform();\n this.outputStreamWriter = this.outputStream.writable.getWriter();\n\n this.mainTask()\n .then(() => {\n this.outputStreamWriter.close();\n })\n .catch((error) => {\n this.logger.error({ error }, 'mainTask SegmentSynchronizerImpl');\n });\n this.captureTask = this.captureTaskImpl();\n }\n\n get closed() {\n return this.closedFuture.done;\n }\n\n get audioInputEnded() {\n return this.audioData.done;\n }\n\n get textInputEnded() {\n return this.textData.done;\n }\n\n get readable(): ReadableStream<string> {\n return this.outputStream.readable;\n }\n\n pushAudio(frame: AudioFrame) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.pushAudio called after close');\n return;\n }\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n const frameDuration = frame.samplesPerChannel / frame.sampleRate;\n\n if (!this.startWallTime && frameDuration > 0) {\n this.startWallTime = Date.now();\n this.startFuture.resolve();\n }\n\n this.audioData.pushedDuration += frameDuration;\n }\n\n endAudioInput() {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.endAudioInput called after close');\n return;\n }\n\n this.audioData.done = true;\n }\n\n pushText(text: string) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.pushText called after close');\n return;\n }\n\n this.textData.sentenceStream.pushText(text);\n this.textData.pushedText += text;\n }\n\n endTextInput() {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.endTextInput called after close');\n return;\n }\n\n this.textData.done = true;\n this.textData.sentenceStream.endInput();\n }\n\n markPlaybackFinished(_playbackPosition: number, interrupted: boolean) {\n if (this.closed) {\n this.logger.warn('SegmentSynchronizerImpl.markPlaybackFinished called after close');\n return;\n }\n\n if (!this.textData.done || !this.audioData.done) {\n this.logger.warn(\n { textDone: this.textData.done, audioDone: this.audioData.done },\n 'SegmentSynchronizerImpl.markPlaybackFinished called before text/audio input is done',\n );\n return;\n }\n\n if (!interrupted) {\n this.playbackCompleted = true;\n }\n }\n\n get synchronizedTranscript(): string {\n if (this.playbackCompleted) {\n return this.textData.pushedText;\n }\n return this.textData.forwardedText;\n }\n\n private async captureTaskImpl() {\n // Don't use a for-await loop here, because exiting the loop will close the writer in the\n // outputStream, which will cause an error in the mainTask.then method.\n const reader = this.outputStream.readable.getReader();\n while (true) {\n const { done, value: text } = await reader.read();\n if (done) {\n break;\n }\n this.textData.forwardedText += text;\n await this.nextInChain.captureText(text);\n }\n reader.releaseLock();\n this.nextInChain.flush();\n }\n\n private async mainTask(): Promise<void> {\n await this.startFuture.await;\n\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n if (!this.startWallTime) {\n throw new Error('startWallTime is not set when starting SegmentSynchronizerImpl.mainTask');\n }\n\n for await (const textSegment of this.textData.sentenceStream) {\n const sentence = textSegment.token;\n\n let textCursor = 0;\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n for (const [word, _, endPos] of this.options.splitWords(sentence)) {\n if (this.closed && !this.playbackCompleted) {\n return;\n }\n\n if (this.playbackCompleted) {\n this.outputStreamWriter.write(sentence.slice(textCursor, endPos));\n textCursor = endPos;\n continue;\n }\n\n const wordHphens = this.options.hyphenateWord(word).length;\n const elapsedSeconds = (Date.now() - this.startWallTime) / 1000;\n const targetHyphens = elapsedSeconds * this.options.speed;\n const hyphensBehind = Math.max(0, targetHyphens - this.textData.forwardedHyphens);\n let delay = Math.max(0, wordHphens - hyphensBehind) / this.speed;\n\n if (this.playbackCompleted) {\n delay = 0;\n }\n\n await this.sleepIfNotClosed(delay / 2);\n this.outputStreamWriter.write(sentence.slice(textCursor, endPos));\n await this.sleepIfNotClosed(delay / 2);\n\n this.textData.forwardedHyphens += wordHphens;\n textCursor = endPos;\n }\n\n if (textCursor < sentence.length) {\n const remaining = sentence.slice(textCursor);\n this.outputStreamWriter.write(remaining);\n }\n }\n }\n\n private async sleepIfNotClosed(sleepTimeSeconds: number) {\n if (this.closed) {\n return;\n }\n await delay(sleepTimeSeconds * 1000);\n }\n\n async close(): Promise<void> {\n if (this.closed) {\n return;\n }\n this.closedFuture.resolve();\n this.startFuture.resolve(); // avoid deadlock of mainTaskImpl in case it never started\n this.textData.sentenceStream.close();\n await this.captureTask;\n }\n}\n\nexport interface TranscriptionSynchronizerOptions {\n speed: number;\n hyphenateWord: (word: string) => string[];\n splitWords: (words: string) => [string, number, number][];\n sentenceTokenizer: SentenceTokenizer;\n}\n\nexport const defaultTextSyncOptions: TranscriptionSynchronizerOptions = {\n speed: 1,\n hyphenateWord: basic.hyphenateWord,\n splitWords: basic.splitWords,\n sentenceTokenizer: new basic.SentenceTokenizer({\n retainFormat: true,\n }),\n};\n\nexport class TranscriptionSynchronizer {\n readonly audioOutput: SyncedAudioOutput;\n readonly textOutput: SyncedTextOutput;\n\n private options: TextSyncOptions;\n private rotateSegmentTask: Task<void>;\n private _enabled: boolean = true;\n private closed: boolean = false;\n\n /* @internal */\n _impl: SegmentSynchronizerImpl;\n\n private logger = log();\n\n constructor(\n nextInChainAudio: AudioOutput,\n nextInChainText: TextOutput,\n options: TranscriptionSynchronizerOptions = defaultTextSyncOptions,\n ) {\n this.audioOutput = new SyncedAudioOutput(this, nextInChainAudio);\n this.textOutput = new SyncedTextOutput(this, nextInChainText);\n this.options = {\n speed: options.speed,\n hyphenateWord: options.hyphenateWord,\n splitWords: options.splitWords,\n sentenceTokenizer: options.sentenceTokenizer,\n };\n\n // initial segment/first segment, recreated for each new segment\n this._impl = new SegmentSynchronizerImpl(this.options, nextInChainText);\n this.rotateSegmentTask = Task.from((controller) =>\n this.rotateSegmentTaskImpl(controller.signal),\n );\n }\n\n get enabled(): boolean {\n return this._enabled;\n }\n\n set enabled(enabled: boolean) {\n if (this._enabled === enabled) {\n return;\n }\n\n this._enabled = enabled;\n this.rotateSegment();\n }\n\n rotateSegment() {\n if (this.closed) {\n return;\n }\n\n if (!this.rotateSegmentTask.done) {\n this.logger.warn('rotateSegment called while previous segment is still being rotated');\n }\n this.rotateSegmentTask = Task.from((controller) =>\n this.rotateSegmentTaskImpl(controller.signal, this.rotateSegmentTask),\n );\n }\n\n async close(): Promise<void> {\n this.closed = true;\n await this.rotateSegmentTask.cancelAndWait();\n await this._impl.close();\n }\n\n async barrier(): Promise<void> {\n if (this.rotateSegmentTask.done) {\n return;\n }\n await this.rotateSegmentTask.result;\n }\n\n private async rotateSegmentTaskImpl(abort: AbortSignal, oldTask?: Task<void>) {\n if (oldTask) {\n await oldTask.result;\n }\n\n if (abort.aborted) {\n return;\n }\n await this._impl.close();\n this._impl = new SegmentSynchronizerImpl(this.options, this.textOutput.nextInChain);\n }\n}\n\nclass SyncedAudioOutput extends AudioOutput {\n private pushedDuration: number = 0.0;\n\n constructor(\n public synchronizer: TranscriptionSynchronizer,\n private nextInChainAudio: AudioOutput,\n ) {\n super(nextInChainAudio.sampleRate, nextInChainAudio);\n }\n\n async captureFrame(frame: AudioFrame): Promise<void> {\n // using barrier() on capture should be sufficient, flush() must not be called if\n // capture_frame isn't completed\n await this.synchronizer.barrier();\n\n await super.captureFrame(frame);\n await this.nextInChainAudio.captureFrame(frame); // passthrough audio\n\n // TODO(AJS-102): use frame.durationMs once available in rtc-node\n this.pushedDuration += frame.samplesPerChannel / frame.sampleRate;\n\n if (!this.synchronizer.enabled) {\n return;\n }\n\n if (this.synchronizer._impl.audioInputEnded) {\n this.logger.warn(\n 'SegmentSynchronizerImpl audio marked as ended in capture audio, rotating segment',\n );\n this.synchronizer.rotateSegment();\n await this.synchronizer.barrier();\n }\n this.synchronizer._impl.pushAudio(frame);\n }\n\n flush() {\n super.flush();\n this.nextInChainAudio.flush();\n\n if (!this.synchronizer.enabled) {\n return;\n }\n\n if (!this.pushedDuration) {\n // in case there is no audio after the text was pushed, rotate the segment\n this.synchronizer.rotateSegment();\n return;\n }\n\n this.synchronizer._impl.endAudioInput();\n }\n\n clearBuffer() {\n this.nextInChainAudio.clearBuffer();\n }\n\n // this is going to be automatically called by the next_in_chain\n onPlaybackFinished(ev: PlaybackFinishedEvent) {\n if (!this.synchronizer.enabled) {\n super.onPlaybackFinished(ev);\n return;\n }\n\n this.synchronizer._impl.markPlaybackFinished(ev.playbackPosition, ev.interrupted);\n super.onPlaybackFinished({\n playbackPosition: ev.playbackPosition,\n interrupted: ev.interrupted,\n synchronizedTranscript: this.synchronizer._impl.synchronizedTranscript,\n });\n\n this.synchronizer.rotateSegment();\n this.pushedDuration = 0.0;\n }\n}\n\nclass SyncedTextOutput extends TextOutput {\n private capturing: boolean = false;\n private logger = log();\n\n constructor(\n private readonly synchronizer: TranscriptionSynchronizer,\n public readonly nextInChain: TextOutput,\n ) {\n super(nextInChain);\n }\n\n async captureText(text: string): Promise<void> {\n await this.synchronizer.barrier();\n\n if (!this.synchronizer.enabled) {\n // pass through to the next in chain\n await this.nextInChain.captureText(text);\n return;\n }\n\n this.capturing = true;\n if (this.synchronizer._impl.textInputEnded) {\n this.logger.warn(\n 'SegmentSynchronizerImpl text marked as ended in capture text, rotating segment',\n );\n this.synchronizer.rotateSegment();\n await this.synchronizer.barrier();\n }\n this.synchronizer._impl.pushText(text);\n }\n\n flush() {\n if (!this.synchronizer.enabled) {\n this.nextInChain.flush(); // passthrough text if the synchronizer is disabled\n return;\n }\n\n if (!this.capturing) {\n return;\n }\n\n this.capturing = false;\n this.synchronizer._impl.endTextInput();\n }\n}\n"],"mappings":"AAKA,SAAS,WAAW;AACpB,SAAS,yBAAyB;AAElC,SAAS,aAAa;AACtB,SAAS,QAAQ,MAAM,aAAa;AACpC,SAAS,aAAyC,kBAAkB;AAEpE,MAAM,uBAAuB;AAsB7B,MAAM,wBAAwB;AAAA,EAe5B,YACmB,SACA,aACjB;AAFiB;AACA;AAEjB,SAAK,QAAQ,QAAQ,QAAQ;AAC7B,SAAK,WAAW;AAAA,MACd,gBAAgB,QAAQ,kBAAkB,OAAO;AAAA,MACjD,YAAY;AAAA,MACZ,MAAM;AAAA,MACN,kBAAkB;AAAA,MAClB,eAAe;AAAA,IACjB;AACA,SAAK,YAAY;AAAA,MACf,gBAAgB;AAAA,MAChB,MAAM;AAAA,IACR;AACA,SAAK,eAAe,IAAI,kBAAkB;AAC1C,SAAK,qBAAqB,KAAK,aAAa,SAAS,UAAU;AAE/D,SAAK,SAAS,EACX,KAAK,MAAM;AACV,WAAK,mBAAmB,MAAM;AAAA,IAChC,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,WAAK,OAAO,MAAM,EAAE,MAAM,GAAG,kCAAkC;AAAA,IACjE,CAAC;AACH,SAAK,cAAc,KAAK,gBAAgB;AAAA,EAC1C;AAAA,EAzCQ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEA,cAAsB,IAAI,OAAO;AAAA,EACjC,eAAuB,IAAI,OAAO;AAAA,EAClC,oBAA6B;AAAA,EAE7B,SAAS,IAAI;AAAA,EA+BrB,IAAI,SAAS;AACX,WAAO,KAAK,aAAa;AAAA,EAC3B;AAAA,EAEA,IAAI,kBAAkB;AACpB,WAAO,KAAK,UAAU;AAAA,EACxB;AAAA,EAEA,IAAI,iBAAiB;AACnB,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,IAAI,WAAmC;AACrC,WAAO,KAAK,aAAa;AAAA,EAC3B;AAAA,EAEA,UAAU,OAAmB;AAC3B,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,sDAAsD;AACvE;AAAA,IACF;AAEA,UAAM,gBAAgB,MAAM,oBAAoB,MAAM;AAEtD,QAAI,CAAC,KAAK,iBAAiB,gBAAgB,GAAG;AAC5C,WAAK,gBAAgB,KAAK,IAAI;AAC9B,WAAK,YAAY,QAAQ;AAAA,IAC3B;AAEA,SAAK,UAAU,kBAAkB;AAAA,EACnC;AAAA,EAEA,gBAAgB;AACd,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,0DAA0D;AAC3E;AAAA,IACF;AAEA,SAAK,UAAU,OAAO;AAAA,EACxB;AAAA,EAEA,SAAS,MAAc;AACrB,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,qDAAqD;AACtE;AAAA,IACF;AAEA,SAAK,SAAS,eAAe,SAAS,IAAI;AAC1C,SAAK,SAAS,cAAc;AAAA,EAC9B;AAAA,EAEA,eAAe;AACb,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,yDAAyD;AAC1E;AAAA,IACF;AAEA,SAAK,SAAS,OAAO;AACrB,SAAK,SAAS,eAAe,SAAS;AAAA,EACxC;AAAA,EAEA,qBAAqB,mBAA2B,aAAsB;AACpE,QAAI,KAAK,QAAQ;AACf,WAAK,OAAO,KAAK,iEAAiE;AAClF;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,SAAS,QAAQ,CAAC,KAAK,UAAU,MAAM;AAC/C,WAAK,OAAO;AAAA,QACV,EAAE,UAAU,KAAK,SAAS,MAAM,WAAW,KAAK,UAAU,KAAK;AAAA,QAC/D;AAAA,MACF;AACA;AAAA,IACF;AAEA,QAAI,CAAC,aAAa;AAChB,WAAK,oBAAoB;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,IAAI,yBAAiC;AACnC,QAAI,KAAK,mBAAmB;AAC1B,aAAO,KAAK,SAAS;AAAA,IACvB;AACA,WAAO,KAAK,SAAS;AAAA,EACvB;AAAA,EAEA,MAAc,kBAAkB;AAG9B,UAAM,SAAS,KAAK,aAAa,SAAS,UAAU;AACpD,WAAO,MAAM;AACX,YAAM,EAAE,MAAM,OAAO,KAAK,IAAI,MAAM,OAAO,KAAK;AAChD,UAAI,MAAM;AACR;AAAA,MACF;AACA,WAAK,SAAS,iBAAiB;AAC/B,YAAM,KAAK,YAAY,YAAY,IAAI;AAAA,IACzC;AACA,WAAO,YAAY;AACnB,SAAK,YAAY,MAAM;AAAA,EACzB;AAAA,EAEA,MAAc,WAA0B;AACtC,UAAM,KAAK,YAAY;AAEvB,QAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,eAAe;AACvB,YAAM,IAAI,MAAM,yEAAyE;AAAA,IAC3F;AAEA,qBAAiB,eAAe,KAAK,SAAS,gBAAgB;AAC5D,YAAM,WAAW,YAAY;AAE7B,UAAI,aAAa;AACjB,UAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,MACF;AAEA,iBAAW,CAAC,MAAM,GAAG,MAAM,KAAK,KAAK,QAAQ,WAAW,QAAQ,GAAG;AACjE,YAAI,KAAK,UAAU,CAAC,KAAK,mBAAmB;AAC1C;AAAA,QACF;AAEA,YAAI,KAAK,mBAAmB;AAC1B,eAAK,mBAAmB,MAAM,SAAS,MAAM,YAAY,MAAM,CAAC;AAChE,uBAAa;AACb;AAAA,QACF;AAEA,cAAM,aAAa,KAAK,QAAQ,cAAc,IAAI,EAAE;AACpD,cAAM,kBAAkB,KAAK,IAAI,IAAI,KAAK,iBAAiB;AAC3D,cAAM,gBAAgB,iBAAiB,KAAK,QAAQ;AACpD,cAAM,gBAAgB,KAAK,IAAI,GAAG,gBAAgB,KAAK,SAAS,gBAAgB;AAChF,YAAIA,SAAQ,KAAK,IAAI,GAAG,aAAa,aAAa,IAAI,KAAK;AAE3D,YAAI,KAAK,mBAAmB;AAC1B,UAAAA,SAAQ;AAAA,QACV;AAEA,cAAM,KAAK,iBAAiBA,SAAQ,CAAC;AACrC,aAAK,mBAAmB,MAAM,SAAS,MAAM,YAAY,MAAM,CAAC;AAChE,cAAM,KAAK,iBAAiBA,SAAQ,CAAC;AAErC,aAAK,SAAS,oBAAoB;AAClC,qBAAa;AAAA,MACf;AAEA,UAAI,aAAa,SAAS,QAAQ;AAChC,cAAM,YAAY,SAAS,MAAM,UAAU;AAC3C,aAAK,mBAAmB,MAAM,SAAS;AAAA,MACzC;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,iBAAiB,kBAA0B;AACvD,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AACA,UAAM,MAAM,mBAAmB,GAAI;AAAA,EACrC;AAAA,EAEA,MAAM,QAAuB;AAC3B,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AACA,SAAK,aAAa,QAAQ;AAC1B,SAAK,YAAY,QAAQ;AACzB,SAAK,SAAS,eAAe,MAAM;AACnC,UAAM,KAAK;AAAA,EACb;AACF;AASO,MAAM,yBAA2D;AAAA,EACtE,OAAO;AAAA,EACP,eAAe,MAAM;AAAA,EACrB,YAAY,MAAM;AAAA,EAClB,mBAAmB,IAAI,MAAM,kBAAkB;AAAA,IAC7C,cAAc;AAAA,EAChB,CAAC;AACH;AAEO,MAAM,0BAA0B;AAAA,EAC5B;AAAA,EACA;AAAA,EAED;AAAA,EACA;AAAA,EACA,WAAoB;AAAA,EACpB,SAAkB;AAAA;AAAA,EAG1B;AAAA,EAEQ,SAAS,IAAI;AAAA,EAErB,YACE,kBACA,iBACA,UAA4C,wBAC5C;AACA,SAAK,cAAc,IAAI,kBAAkB,MAAM,gBAAgB;AAC/D,SAAK,aAAa,IAAI,iBAAiB,MAAM,eAAe;AAC5D,SAAK,UAAU;AAAA,MACb,OAAO,QAAQ;AAAA,MACf,eAAe,QAAQ;AAAA,MACvB,YAAY,QAAQ;AAAA,MACpB,mBAAmB,QAAQ;AAAA,IAC7B;AAGA,SAAK,QAAQ,IAAI,wBAAwB,KAAK,SAAS,eAAe;AACtE,SAAK,oBAAoB,KAAK;AAAA,MAAK,CAAC,eAClC,KAAK,sBAAsB,WAAW,MAAM;AAAA,IAC9C;AAAA,EACF;AAAA,EAEA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,QAAQ,SAAkB;AAC5B,QAAI,KAAK,aAAa,SAAS;AAC7B;AAAA,IACF;AAEA,SAAK,WAAW;AAChB,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,gBAAgB;AACd,QAAI,KAAK,QAAQ;AACf;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,kBAAkB,MAAM;AAChC,WAAK,OAAO,KAAK,oEAAoE;AAAA,IACvF;AACA,SAAK,oBAAoB,KAAK;AAAA,MAAK,CAAC,eAClC,KAAK,sBAAsB,WAAW,QAAQ,KAAK,iBAAiB;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,SAAS;AACd,UAAM,KAAK,kBAAkB,cAAc;AAC3C,UAAM,KAAK,MAAM,MAAM;AAAA,EACzB;AAAA,EAEA,MAAM,UAAyB;AAC7B,QAAI,KAAK,kBAAkB,MAAM;AAC/B;AAAA,IACF;AACA,UAAM,KAAK,kBAAkB;AAAA,EAC/B;AAAA,EAEA,MAAc,sBAAsB,OAAoB,SAAsB;AAC5E,QAAI,SAAS;AACX,YAAM,QAAQ;AAAA,IAChB;AAEA,QAAI,MAAM,SAAS;AACjB;AAAA,IACF;AACA,UAAM,KAAK,MAAM,MAAM;AACvB,SAAK,QAAQ,IAAI,wBAAwB,KAAK,SAAS,KAAK,WAAW,WAAW;AAAA,EACpF;AACF;AAEA,MAAM,0BAA0B,YAAY;AAAA,EAG1C,YACS,cACC,kBACR;AACA,UAAM,iBAAiB,YAAY,gBAAgB;AAH5C;AACC;AAAA,EAGV;AAAA,EAPQ,iBAAyB;AAAA,EASjC,MAAM,aAAa,OAAkC;AAGnD,UAAM,KAAK,aAAa,QAAQ;AAEhC,UAAM,MAAM,aAAa,KAAK;AAC9B,UAAM,KAAK,iBAAiB,aAAa,KAAK;AAG9C,SAAK,kBAAkB,MAAM,oBAAoB,MAAM;AAEvD,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B;AAAA,IACF;AAEA,QAAI,KAAK,aAAa,MAAM,iBAAiB;AAC3C,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA,WAAK,aAAa,cAAc;AAChC,YAAM,KAAK,aAAa,QAAQ;AAAA,IAClC;AACA,SAAK,aAAa,MAAM,UAAU,KAAK;AAAA,EACzC;AAAA,EAEA,QAAQ;AACN,UAAM,MAAM;AACZ,SAAK,iBAAiB,MAAM;AAE5B,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,gBAAgB;AAExB,WAAK,aAAa,cAAc;AAChC;AAAA,IACF;AAEA,SAAK,aAAa,MAAM,cAAc;AAAA,EACxC;AAAA,EAEA,cAAc;AACZ,SAAK,iBAAiB,YAAY;AAAA,EACpC;AAAA;AAAA,EAGA,mBAAmB,IAA2B;AAC5C,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B,YAAM,mBAAmB,EAAE;AAC3B;AAAA,IACF;AAEA,SAAK,aAAa,MAAM,qBAAqB,GAAG,kBAAkB,GAAG,WAAW;AAChF,UAAM,mBAAmB;AAAA,MACvB,kBAAkB,GAAG;AAAA,MACrB,aAAa,GAAG;AAAA,MAChB,wBAAwB,KAAK,aAAa,MAAM;AAAA,IAClD,CAAC;AAED,SAAK,aAAa,cAAc;AAChC,SAAK,iBAAiB;AAAA,EACxB;AACF;AAEA,MAAM,yBAAyB,WAAW;AAAA,EAIxC,YACmB,cACD,aAChB;AACA,UAAM,WAAW;AAHA;AACD;AAAA,EAGlB;AAAA,EARQ,YAAqB;AAAA,EACrB,SAAS,IAAI;AAAA,EASrB,MAAM,YAAY,MAA6B;AAC7C,UAAM,KAAK,aAAa,QAAQ;AAEhC,QAAI,CAAC,KAAK,aAAa,SAAS;AAE9B,YAAM,KAAK,YAAY,YAAY,IAAI;AACvC;AAAA,IACF;AAEA,SAAK,YAAY;AACjB,QAAI,KAAK,aAAa,MAAM,gBAAgB;AAC1C,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AACA,WAAK,aAAa,cAAc;AAChC,YAAM,KAAK,aAAa,QAAQ;AAAA,IAClC;AACA,SAAK,aAAa,MAAM,SAAS,IAAI;AAAA,EACvC;AAAA,EAEA,QAAQ;AACN,QAAI,CAAC,KAAK,aAAa,SAAS;AAC9B,WAAK,YAAY,MAAM;AACvB;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,WAAW;AACnB;AAAA,IACF;AAEA,SAAK,YAAY;AACjB,SAAK,aAAa,MAAM,aAAa;AAAA,EACvC;AACF;","names":["delay"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@livekit/agents",
|
|
3
|
-
"version": "1.0.0-next.
|
|
3
|
+
"version": "1.0.0-next.2",
|
|
4
4
|
"description": "LiveKit Agents - Node.js",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"require": "dist/index.cjs",
|
|
@@ -37,7 +37,6 @@
|
|
|
37
37
|
"@livekit/mutex": "^1.1.1",
|
|
38
38
|
"@livekit/protocol": "^1.29.1",
|
|
39
39
|
"@livekit/typed-emitter": "^3.0.0",
|
|
40
|
-
"@std/async": "npm:@jsr/std__async@^1.0.13",
|
|
41
40
|
"commander": "^12.0.0",
|
|
42
41
|
"heap-js": "^2.6.0",
|
|
43
42
|
"json-schema": "^0.4.0",
|
package/src/llm/llm.ts
CHANGED
|
@@ -2,13 +2,12 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
5
|
-
import { delay } from '@std/async';
|
|
6
5
|
import { EventEmitter } from 'node:events';
|
|
7
6
|
import { APIConnectionError, APIError } from '../_exceptions.js';
|
|
8
7
|
import { log } from '../log.js';
|
|
9
8
|
import type { LLMMetrics } from '../metrics/base.js';
|
|
10
9
|
import type { APIConnectOptions } from '../types.js';
|
|
11
|
-
import { AsyncIterableQueue, startSoon, toError } from '../utils.js';
|
|
10
|
+
import { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';
|
|
12
11
|
import { type ChatContext, type ChatRole, type FunctionCall } from './chat_context.js';
|
|
13
12
|
import type { ToolChoice, ToolContext } from './tool_context.js';
|
|
14
13
|
|
package/src/llm/utils.ts
CHANGED
|
@@ -151,10 +151,14 @@ export const createToolOptions = <UserData extends UnknownUserData>(
|
|
|
151
151
|
|
|
152
152
|
/** @internal */
|
|
153
153
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
154
|
-
export const oaiParams = (
|
|
155
|
-
|
|
154
|
+
export const oaiParams = (
|
|
155
|
+
p: ZodObject<any>,
|
|
156
|
+
isOpenai: boolean = true,
|
|
157
|
+
): OpenAIFunctionParameters => {
|
|
158
|
+
// Adapted from https://github.com/vercel/ai/blob/56eb0ee9/packages/provider-utils/src/zod-schema.ts
|
|
156
159
|
const { properties, required, additionalProperties } = zodToJsonSchema(p, {
|
|
157
|
-
|
|
160
|
+
// note: openai mode breaks various gemini conversions
|
|
161
|
+
target: isOpenai ? 'openAi' : 'jsonSchema7',
|
|
158
162
|
}) as OpenAIFunctionParameters;
|
|
159
163
|
|
|
160
164
|
return {
|
|
@@ -316,9 +320,9 @@ export function computeChatCtxDiff(oldCtx: ChatContext, newCtx: ChatContext): Di
|
|
|
316
320
|
};
|
|
317
321
|
}
|
|
318
322
|
|
|
319
|
-
export function toJsonSchema(schema: ToolInputSchema<any
|
|
323
|
+
export function toJsonSchema(schema: ToolInputSchema<any>, isOpenai: boolean = true): JSONSchema7 {
|
|
320
324
|
if (schema instanceof ZodObject) {
|
|
321
|
-
return oaiParams(schema);
|
|
325
|
+
return oaiParams(schema, isOpenai);
|
|
322
326
|
}
|
|
323
327
|
return schema;
|
|
324
328
|
}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import { delay } from '@std/async/delay';
|
|
5
4
|
import { ReadableStream } from 'node:stream/web';
|
|
6
5
|
import { describe, expect, it } from 'vitest';
|
|
6
|
+
import { delay } from '../utils.js';
|
|
7
7
|
import { DeferredReadableStream } from './deferred_stream.js';
|
|
8
8
|
|
|
9
9
|
describe('DeferredReadableStream', () => {
|
package/src/stt/stt.ts
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { type AudioFrame, AudioResampler } from '@livekit/rtc-node';
|
|
5
5
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
6
|
-
import { delay } from '@std/async/delay';
|
|
7
6
|
import { EventEmitter } from 'node:events';
|
|
8
7
|
import type { ReadableStream } from 'node:stream/web';
|
|
9
8
|
import { APIConnectionError, APIError } from '../_exceptions.js';
|
|
@@ -13,7 +12,7 @@ import type { STTMetrics } from '../metrics/base.js';
|
|
|
13
12
|
import { DeferredReadableStream } from '../stream/deferred_stream.js';
|
|
14
13
|
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
15
14
|
import type { AudioBuffer } from '../utils.js';
|
|
16
|
-
import { AsyncIterableQueue, startSoon, toError } from '../utils.js';
|
|
15
|
+
import { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';
|
|
17
16
|
|
|
18
17
|
/** Indicates start/middle/end of speech */
|
|
19
18
|
export enum SpeechEventType {
|
package/src/tts/tts.ts
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
5
5
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
6
|
-
import { delay } from '@std/async';
|
|
7
6
|
import { EventEmitter } from 'node:events';
|
|
8
7
|
import type { ReadableStream } from 'node:stream/web';
|
|
9
8
|
import { APIConnectionError, APIStatusError } from '../_exceptions.js';
|
|
@@ -11,7 +10,7 @@ import { log } from '../log.js';
|
|
|
11
10
|
import type { TTSMetrics } from '../metrics/base.js';
|
|
12
11
|
import { DeferredReadableStream } from '../stream/deferred_stream.js';
|
|
13
12
|
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
14
|
-
import { AsyncIterableQueue, mergeFrames, startSoon, toError } from '../utils.js';
|
|
13
|
+
import { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';
|
|
15
14
|
|
|
16
15
|
/** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */
|
|
17
16
|
export interface SynthesizedAudio {
|
package/src/utils.test.ts
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
5
|
-
import { delay } from '@std/async';
|
|
6
5
|
import { ReadableStream } from 'node:stream/web';
|
|
7
6
|
import { describe, expect, it } from 'vitest';
|
|
8
7
|
import { initializeLogger } from '../src/log.js';
|
|
@@ -11,6 +10,7 @@ import {
|
|
|
11
10
|
TASK_TIMEOUT_ERROR,
|
|
12
11
|
Task,
|
|
13
12
|
TaskResult,
|
|
13
|
+
delay,
|
|
14
14
|
isPending,
|
|
15
15
|
resampleStream,
|
|
16
16
|
} from '../src/utils.js';
|
package/src/utils.ts
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
|
|
5
|
-
import { delay } from '@std/async';
|
|
6
5
|
import { EventEmitter, once } from 'node:events';
|
|
7
6
|
import type { ReadableStream } from 'node:stream/web';
|
|
8
7
|
import { TransformStream, type TransformStreamDefaultController } from 'node:stream/web';
|
|
@@ -670,3 +669,31 @@ export function toError(error: unknown): Error {
|
|
|
670
669
|
export function startSoon(func: () => void) {
|
|
671
670
|
setTimeout(func, 0);
|
|
672
671
|
}
|
|
672
|
+
|
|
673
|
+
export type DelayOptions = {
|
|
674
|
+
signal?: AbortSignal;
|
|
675
|
+
};
|
|
676
|
+
|
|
677
|
+
/**
|
|
678
|
+
* Delay for a given number of milliseconds.
|
|
679
|
+
*
|
|
680
|
+
* @param ms - The number of milliseconds to delay.
|
|
681
|
+
* @param options - The options for the delay.
|
|
682
|
+
* @returns A promise that resolves after the delay.
|
|
683
|
+
*/
|
|
684
|
+
export function delay(ms: number, options: DelayOptions = {}): Promise<void> {
|
|
685
|
+
const { signal } = options;
|
|
686
|
+
if (signal?.aborted) return Promise.reject(signal.reason);
|
|
687
|
+
return new Promise((resolve, reject) => {
|
|
688
|
+
const abort = () => {
|
|
689
|
+
clearTimeout(i);
|
|
690
|
+
reject(signal?.reason);
|
|
691
|
+
};
|
|
692
|
+
const done = () => {
|
|
693
|
+
signal?.removeEventListener('abort', abort);
|
|
694
|
+
resolve();
|
|
695
|
+
};
|
|
696
|
+
const i = setTimeout(done, ms);
|
|
697
|
+
signal?.addEventListener('abort', abort, { once: true });
|
|
698
|
+
});
|
|
699
|
+
}
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { AudioFrame } from '@livekit/rtc-node';
|
|
5
|
-
import { delay } from '@std/async';
|
|
6
5
|
import type { WritableStreamDefaultWriter } from 'node:stream/web';
|
|
7
6
|
import { ReadableStream } from 'node:stream/web';
|
|
8
7
|
import { type ChatContext } from '../llm/chat_context.js';
|
|
@@ -11,7 +10,7 @@ import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/de
|
|
|
11
10
|
import { IdentityTransform } from '../stream/identity_transform.js';
|
|
12
11
|
import { mergeReadableStreams } from '../stream/merge_readable_streams.js';
|
|
13
12
|
import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
|
|
14
|
-
import { Task } from '../utils.js';
|
|
13
|
+
import { Task, delay } from '../utils.js';
|
|
15
14
|
import { type VAD, type VADEvent, VADEventType } from '../vad.js';
|
|
16
15
|
import type { TurnDetectionMode } from './agent_session.js';
|
|
17
16
|
import type { STTNode } from './io.js';
|
|
@@ -2,13 +2,12 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
5
|
-
import { delay } from '@std/async';
|
|
6
5
|
import type { ReadableStream, WritableStreamDefaultWriter } from 'node:stream/web';
|
|
7
6
|
import { log } from '../../log.js';
|
|
8
7
|
import { IdentityTransform } from '../../stream/identity_transform.js';
|
|
9
8
|
import type { SentenceStream, SentenceTokenizer } from '../../tokenize/index.js';
|
|
10
9
|
import { basic } from '../../tokenize/index.js';
|
|
11
|
-
import { Future, Task } from '../../utils.js';
|
|
10
|
+
import { Future, Task, delay } from '../../utils.js';
|
|
12
11
|
import { AudioOutput, type PlaybackFinishedEvent, TextOutput } from '../io.js';
|
|
13
12
|
|
|
14
13
|
const STANDARD_SPEECH_RATE = 3.83; // hyphens (syllables) per second
|