@livekit/agents 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,7 @@ var import_utils = require("../utils.cjs");
28
28
  class SynthesisHandle {
29
29
  static FLUSH_SENTINEL = Symbol("FLUSH_SENTINEL");
30
30
  #speechId;
31
+ text;
31
32
  ttsSource;
32
33
  #agentPlayout;
33
34
  tts;
@@ -111,6 +112,10 @@ class AgentOutput {
111
112
  } finally {
112
113
  if (handle.intFut.done) {
113
114
  (0, import_utils.gracefullyCancel)(task);
115
+ } else {
116
+ task.then((text) => {
117
+ handle.text = text;
118
+ });
114
119
  }
115
120
  }
116
121
  resolve();
@@ -134,11 +139,12 @@ const stringSynthesisTask = (text, handle) => {
134
139
  handle.queue.put(audio.frame);
135
140
  }
136
141
  handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);
137
- resolve();
142
+ resolve(text);
138
143
  });
139
144
  };
140
145
  const streamSynthesisTask = (stream, handle) => {
141
146
  return new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
147
+ let fullText = "";
142
148
  let cancelled = false;
143
149
  onCancel(() => {
144
150
  cancelled = true;
@@ -156,12 +162,13 @@ const streamSynthesisTask = (stream, handle) => {
156
162
  };
157
163
  readGeneratedAudio();
158
164
  for await (const text of stream) {
165
+ fullText += text;
159
166
  if (cancelled) break;
160
167
  ttsStream.pushText(text);
161
168
  }
162
169
  ttsStream.flush();
163
170
  ttsStream.endInput();
164
- resolve();
171
+ resolve(fullText);
165
172
  });
166
173
  };
167
174
  // Annotate the CommonJS export names for ESM import in node:
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_output.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport { SynthesizeStream, type TTS } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { AgentPlayout, PlayoutHandle } from './agent_playout.js';\n\nexport type SpeechSource = AsyncIterable<string> | string | Promise<string>;\n\nexport class SynthesisHandle {\n static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #speechId: string;\n ttsSource: SpeechSource;\n #agentPlayout: AgentPlayout;\n tts: TTS;\n queue = new AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>();\n #playHandle?: PlayoutHandle;\n intFut = new Future();\n #logger = log();\n\n constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS) {\n this.#speechId = speechId;\n this.ttsSource = ttsSource;\n this.#agentPlayout = agentPlayout;\n this.tts = tts;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get validated(): boolean {\n return !!this.#playHandle;\n }\n\n get interrupted(): boolean {\n return this.intFut.done;\n }\n\n get playHandle(): PlayoutHandle | undefined {\n return this.#playHandle;\n }\n\n /** Validate the speech for playout. */\n play(): PlayoutHandle {\n if (this.interrupted) {\n throw new Error('synthesis was interrupted');\n }\n\n this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue);\n return this.#playHandle;\n }\n\n /** Interrupt the speech. */\n interrupt() {\n if (this.interrupted) {\n return;\n }\n\n this.#logger.child({ speechId: this.#speechId }).debug('interrupting synthesis/playout');\n this.#playHandle?.interrupt();\n this.intFut.resolve();\n }\n}\n\nexport class AgentOutput {\n #agentPlayout: AgentPlayout;\n #tts: TTS;\n #tasks: CancellablePromise<void>[] = [];\n\n constructor(agentPlayout: AgentPlayout, tts: TTS) {\n this.#agentPlayout = agentPlayout;\n this.#tts = tts;\n }\n\n get playout(): AgentPlayout {\n return this.#agentPlayout;\n }\n\n async close() {\n this.#tasks.forEach((task) => task.cancel());\n await Promise.all(this.#tasks);\n }\n\n synthesize(speechId: string, ttsSource: SpeechSource): SynthesisHandle {\n const handle = new SynthesisHandle(speechId, ttsSource, this.#agentPlayout, this.#tts);\n const task = this.#synthesize(handle);\n this.#tasks.push(task);\n task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));\n return handle;\n }\n\n #synthesize(handle: SynthesisHandle): CancellablePromise<void> {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const ttsSource = await handle.ttsSource;\n let task: CancellablePromise<void>;\n if (typeof ttsSource === 'string') {\n task = stringSynthesisTask(ttsSource, handle);\n } else {\n task = streamSynthesisTask(ttsSource, handle);\n }\n\n onCancel(() => {\n gracefullyCancel(task);\n });\n\n try {\n await Promise.any([task, handle.intFut.await]);\n } finally {\n if (handle.intFut.done) {\n gracefullyCancel(task);\n }\n }\n\n resolve();\n });\n }\n}\n\nconst stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<void> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n ttsStream.pushText(text);\n ttsStream.flush();\n ttsStream.endInput();\n for await (const audio of ttsStream) {\n if (cancelled || audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n\n resolve();\n });\n};\n\nconst streamSynthesisTask = (\n stream: AsyncIterable<string>,\n handle: SynthesisHandle,\n): CancellablePromise<void> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n const readGeneratedAudio = async () => {\n for await (const audio of ttsStream) {\n if (cancelled) break;\n if (audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n };\n readGeneratedAudio();\n\n for await (const text of stream) {\n if (cancelled) break;\n ttsStream.pushText(text);\n }\n ttsStream.flush();\n ttsStream.endInput();\n\n resolve();\n });\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,iBAAoB;AACpB,iBAA2C;AAC3C,mBAAiF;AAK1E,MAAM,gBAAgB;AAAA,EAC3B,OAAgB,iBAAiB,OAAO,gBAAgB;AAAA,EAExD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ,IAAI,gCAAuE;AAAA,EACnF;AAAA,EACA,SAAS,IAAI,oBAAO;AAAA,EACpB,cAAU,gBAAI;AAAA,EAEd,YAAY,UAAkB,WAAyB,cAA4B,KAAU;AAC3F,SAAK,YAAY;AACjB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AACrB,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK,OAAO;AAAA,EACrB;AAAA,EAEA,IAAI,aAAwC;AAC1C,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAsB;AACpB,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AAEA,SAAK,cAAc,KAAK,cAAc,KAAK,KAAK,WAAW,KAAK,KAAK;AACrE,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,YAAY;AAzDd;AA0DI,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,UAAU,CAAC,EAAE,MAAM,gCAAgC;AACvF,eAAK,gBAAL,mBAAkB;AAClB,SAAK,OAAO,QAAQ;AAAA,EACtB;AACF;AAEO,MAAM,YAAY;AAAA,EACvB;AAAA,EACA;AAAA,EACA,SAAqC,CAAC;AAAA,EAEtC,YAAY,cAA4B,KAAU;AAChD,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,UAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,OAAO,QAAQ,CAAC,SAAS,KAAK,OAAO,CAAC;AAC3C,UAAM,QAAQ,IAAI,KAAK,MAAM;AAAA,EAC/B;AAAA,EAEA,WAAW,UAAkB,WAA0C;AACrE,UAAM,SAAS,IAAI,gBAAgB,UAAU,WAAW,KAAK,eAAe,KAAK,IAAI;AACrF,UAAM,OAAO,KAAK,YAAY,MAAM;AACpC,SAAK,OAAO,KAAK,IAAI;AACrB,SAAK,QAAQ,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,QAAQ,IAAI,CAAC,CAAC;AAChE,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,QAAmD;AAE7D,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,YAAY,MAAM,OAAO;AAC/B,UAAI;AACJ,UAAI,OAAO,cAAc,UAAU;AACjC,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C,OAAO;AACL,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C;AAEA,eAAS,MAAM;AACb,2CAAiB,IAAI;AAAA,MACvB,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,MAAM,OAAO,OAAO,KAAK,CAAC;AAAA,MAC/C,UAAE;AACA,YAAI,OAAO,OAAO,MAAM;AACtB,6CAAiB,IAAI;AAAA,QACvB;AAAA,MACF;AAEA,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,MAAM,sBAAsB,CAAC,MAAc,WAAsD;AAE/F,SAAO,IAAI,gCAAyB,OAAO,SAAS,GAAG,aAAa;AAClE,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,cAAU,SAAS,IAAI;AACvB,cAAU,MAAM;AAChB,cAAU,SAAS;AACnB,qBAAiB,SAAS,WAAW;AACnC,UAAI,aAAa,UAAU,4BAAiB,eAAe;AACzD;AAAA,MACF;AACA,aAAO,MAAM,IAAI,MAAM,KAAK;AAAA,IAC9B;AACA,WAAO,MAAM,IAAI,gBAAgB,cAAc;AAE/C,YAAQ;AAAA,EACV,CAAC;AACH;AAEA,MAAM,sBAAsB,CAC1B,QACA,WAC6B;AAE7B,SAAO,IAAI,gCAAyB,OAAO,SAAS,GAAG,aAAa;AAClE,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,UAAM,qBAAqB,YAAY;AACrC,uBAAiB,SAAS,WAAW;AACnC,YAAI,UAAW;AACf,YAAI,UAAU,4BAAiB,eAAe;AAC5C;AAAA,QACF;AACA,eAAO,MAAM,IAAI,MAAM,KAAK;AAAA,MAC9B;AACA,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,uBAAmB;AAEnB,qBAAiB,QAAQ,QAAQ;AAC/B,UAAI,UAAW;AACf,gBAAU,SAAS,IAAI;AAAA,IACzB;AACA,cAAU,MAAM;AAChB,cAAU,SAAS;AAEnB,YAAQ;AAAA,EACV,CAAC;AACH;","names":[]}
1
+ {"version":3,"sources":["../../src/pipeline/agent_output.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport { SynthesizeStream, type TTS } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { AgentPlayout, PlayoutHandle } from './agent_playout.js';\n\nexport type SpeechSource = AsyncIterable<string> | string | Promise<string>;\n\nexport class SynthesisHandle {\n static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #speechId: string;\n text?: string;\n ttsSource: SpeechSource;\n #agentPlayout: AgentPlayout;\n tts: TTS;\n queue = new AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>();\n #playHandle?: PlayoutHandle;\n intFut = new Future();\n #logger = log();\n\n constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS) {\n this.#speechId = speechId;\n this.ttsSource = ttsSource;\n this.#agentPlayout = agentPlayout;\n this.tts = tts;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get validated(): boolean {\n return !!this.#playHandle;\n }\n\n get interrupted(): boolean {\n return this.intFut.done;\n }\n\n get playHandle(): PlayoutHandle | undefined {\n return this.#playHandle;\n }\n\n /** Validate the speech for playout. */\n play(): PlayoutHandle {\n if (this.interrupted) {\n throw new Error('synthesis was interrupted');\n }\n\n this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue);\n return this.#playHandle;\n }\n\n /** Interrupt the speech. */\n interrupt() {\n if (this.interrupted) {\n return;\n }\n\n this.#logger.child({ speechId: this.#speechId }).debug('interrupting synthesis/playout');\n this.#playHandle?.interrupt();\n this.intFut.resolve();\n }\n}\n\nexport class AgentOutput {\n #agentPlayout: AgentPlayout;\n #tts: TTS;\n #tasks: CancellablePromise<void>[] = [];\n\n constructor(agentPlayout: AgentPlayout, tts: TTS) {\n this.#agentPlayout = agentPlayout;\n this.#tts = tts;\n }\n\n get playout(): AgentPlayout {\n return this.#agentPlayout;\n }\n\n async close() {\n this.#tasks.forEach((task) => task.cancel());\n await Promise.all(this.#tasks);\n }\n\n synthesize(speechId: string, ttsSource: SpeechSource): SynthesisHandle {\n const handle = new SynthesisHandle(speechId, ttsSource, this.#agentPlayout, this.#tts);\n const task = this.#synthesize(handle);\n this.#tasks.push(task);\n task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));\n return handle;\n }\n\n #synthesize(handle: SynthesisHandle): CancellablePromise<void> {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const ttsSource = await handle.ttsSource;\n let task: CancellablePromise<string>;\n if (typeof ttsSource === 'string') {\n task = stringSynthesisTask(ttsSource, handle);\n } else {\n task = streamSynthesisTask(ttsSource, handle);\n }\n\n onCancel(() => {\n gracefullyCancel(task);\n });\n\n try {\n await Promise.any([task, handle.intFut.await]);\n } finally {\n if (handle.intFut.done) {\n gracefullyCancel(task);\n } else {\n task.then((text) => {\n handle.text = text;\n });\n }\n }\n\n resolve();\n });\n }\n}\n\nconst stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n ttsStream.pushText(text);\n ttsStream.flush();\n ttsStream.endInput();\n for await (const audio of ttsStream) {\n if (cancelled || audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n\n resolve(text);\n });\n};\n\nconst streamSynthesisTask = (\n stream: AsyncIterable<string>,\n handle: SynthesisHandle,\n): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let fullText = '';\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n const readGeneratedAudio = async () => {\n for await (const audio of ttsStream) {\n if (cancelled) break;\n if (audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n };\n readGeneratedAudio();\n\n for await (const text of stream) {\n fullText += text;\n if (cancelled) break;\n ttsStream.pushText(text);\n }\n ttsStream.flush();\n ttsStream.endInput();\n\n resolve(fullText);\n });\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,iBAAoB;AACpB,iBAA2C;AAC3C,mBAAiF;AAK1E,MAAM,gBAAgB;AAAA,EAC3B,OAAgB,iBAAiB,OAAO,gBAAgB;AAAA,EAExD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ,IAAI,gCAAuE;AAAA,EACnF;AAAA,EACA,SAAS,IAAI,oBAAO;AAAA,EACpB,cAAU,gBAAI;AAAA,EAEd,YAAY,UAAkB,WAAyB,cAA4B,KAAU;AAC3F,SAAK,YAAY;AACjB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AACrB,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK,OAAO;AAAA,EACrB;AAAA,EAEA,IAAI,aAAwC;AAC1C,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAsB;AACpB,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AAEA,SAAK,cAAc,KAAK,cAAc,KAAK,KAAK,WAAW,KAAK,KAAK;AACrE,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,YAAY;AA1Dd;AA2DI,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,UAAU,CAAC,EAAE,MAAM,gCAAgC;AACvF,eAAK,gBAAL,mBAAkB;AAClB,SAAK,OAAO,QAAQ;AAAA,EACtB;AACF;AAEO,MAAM,YAAY;AAAA,EACvB;AAAA,EACA;AAAA,EACA,SAAqC,CAAC;AAAA,EAEtC,YAAY,cAA4B,KAAU;AAChD,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,UAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,OAAO,QAAQ,CAAC,SAAS,KAAK,OAAO,CAAC;AAC3C,UAAM,QAAQ,IAAI,KAAK,MAAM;AAAA,EAC/B;AAAA,EAEA,WAAW,UAAkB,WAA0C;AACrE,UAAM,SAAS,IAAI,gBAAgB,UAAU,WAAW,KAAK,eAAe,KAAK,IAAI;AACrF,UAAM,OAAO,KAAK,YAAY,MAAM;AACpC,SAAK,OAAO,KAAK,IAAI;AACrB,SAAK,QAAQ,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,QAAQ,IAAI,CAAC,CAAC;AAChE,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,QAAmD;AAE7D,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,YAAY,MAAM,OAAO;AAC/B,UAAI;AACJ,UAAI,OAAO,cAAc,UAAU;AACjC,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C,OAAO;AACL,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C;AAEA,eAAS,MAAM;AACb,2CAAiB,IAAI;AAAA,MACvB,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,MAAM,OAAO,OAAO,KAAK,CAAC;AAAA,MAC/C,UAAE;AACA,YAAI,OAAO,OAAO,MAAM;AACtB,6CAAiB,IAAI;AAAA,QACvB,OAAO;AACL,eAAK,KAAK,CAAC,SAAS;AAClB,mBAAO,OAAO;AAAA,UAChB,CAAC;AAAA,QACH;AAAA,MACF;AAEA,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,MAAM,sBAAsB,CAAC,MAAc,WAAwD;AAEjG,SAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,cAAU,SAAS,IAAI;AACvB,cAAU,MAAM;AAChB,cAAU,SAAS;AACnB,qBAAiB,SAAS,WAAW;AACnC,UAAI,aAAa,UAAU,4BAAiB,eAAe;AACzD;AAAA,MACF;AACA,aAAO,MAAM,IAAI,MAAM,KAAK;AAAA,IAC9B;AACA,WAAO,MAAM,IAAI,gBAAgB,cAAc;AAE/C,YAAQ,IAAI;AAAA,EACd,CAAC;AACH;AAEA,MAAM,sBAAsB,CAC1B,QACA,WAC+B;AAE/B,SAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,WAAW;AACf,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,UAAM,qBAAqB,YAAY;AACrC,uBAAiB,SAAS,WAAW;AACnC,YAAI,UAAW;AACf,YAAI,UAAU,4BAAiB,eAAe;AAC5C;AAAA,QACF;AACA,eAAO,MAAM,IAAI,MAAM,KAAK;AAAA,MAC9B;AACA,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,uBAAmB;AAEnB,qBAAiB,QAAQ,QAAQ;AAC/B,kBAAY;AACZ,UAAI,UAAW;AACf,gBAAU,SAAS,IAAI;AAAA,IACzB;AACA,cAAU,MAAM;AAChB,cAAU,SAAS;AAEnB,YAAQ,QAAQ;AAAA,EAClB,CAAC;AACH;","names":[]}
@@ -6,6 +6,7 @@ export type SpeechSource = AsyncIterable<string> | string | Promise<string>;
6
6
  export declare class SynthesisHandle {
7
7
  #private;
8
8
  static readonly FLUSH_SENTINEL: unique symbol;
9
+ text?: string;
9
10
  ttsSource: SpeechSource;
10
11
  tts: TTS;
11
12
  queue: AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
@@ -1 +1 @@
1
- {"version":3,"file":"agent_output.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_output.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAoB,KAAK,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC/F,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEtE,MAAM,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAE5E,qBAAa,eAAe;;IAC1B,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IAG1D,SAAS,EAAE,YAAY,CAAC;IAExB,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,yEAAgF;IAErF,MAAM,SAAgB;gBAGV,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,YAAY,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG;IAO3F,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,SAAS,IAAI,OAAO,CAEvB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,aAAa,GAAG,SAAS,CAE1C;IAED,uCAAuC;IACvC,IAAI,IAAI,aAAa;IASrB,4BAA4B;IAC5B,SAAS;CASV;AAED,qBAAa,WAAW;;gBAKV,YAAY,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG;IAKhD,IAAI,OAAO,IAAI,YAAY,CAE1B;IAEK,KAAK;IAKX,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,GAAG,eAAe;CAkCvE"}
1
+ {"version":3,"file":"agent_output.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_output.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAoB,KAAK,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC/F,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEtE,MAAM,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC,GAAG,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AAE5E,qBAAa,eAAe;;IAC1B,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;IAG1D,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,YAAY,CAAC;IAExB,GAAG,EAAE,GAAG,CAAC;IACT,KAAK,yEAAgF;IAErF,MAAM,SAAgB;gBAGV,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,YAAY,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG;IAO3F,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,SAAS,IAAI,OAAO,CAEvB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,aAAa,GAAG,SAAS,CAE1C;IAED,uCAAuC;IACvC,IAAI,IAAI,aAAa;IASrB,4BAA4B;IAC5B,SAAS;CASV;AAED,qBAAa,WAAW;;gBAKV,YAAY,EAAE,YAAY,EAAE,GAAG,EAAE,GAAG;IAKhD,IAAI,OAAO,IAAI,YAAY,CAE1B;IAEK,KAAK;IAKX,UAAU,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,GAAG,eAAe;CAsCvE"}
@@ -4,6 +4,7 @@ import { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from
4
4
  class SynthesisHandle {
5
5
  static FLUSH_SENTINEL = Symbol("FLUSH_SENTINEL");
6
6
  #speechId;
7
+ text;
7
8
  ttsSource;
8
9
  #agentPlayout;
9
10
  tts;
@@ -87,6 +88,10 @@ class AgentOutput {
87
88
  } finally {
88
89
  if (handle.intFut.done) {
89
90
  gracefullyCancel(task);
91
+ } else {
92
+ task.then((text) => {
93
+ handle.text = text;
94
+ });
90
95
  }
91
96
  }
92
97
  resolve();
@@ -110,11 +115,12 @@ const stringSynthesisTask = (text, handle) => {
110
115
  handle.queue.put(audio.frame);
111
116
  }
112
117
  handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);
113
- resolve();
118
+ resolve(text);
114
119
  });
115
120
  };
116
121
  const streamSynthesisTask = (stream, handle) => {
117
122
  return new CancellablePromise(async (resolve, _, onCancel) => {
123
+ let fullText = "";
118
124
  let cancelled = false;
119
125
  onCancel(() => {
120
126
  cancelled = true;
@@ -132,12 +138,13 @@ const streamSynthesisTask = (stream, handle) => {
132
138
  };
133
139
  readGeneratedAudio();
134
140
  for await (const text of stream) {
141
+ fullText += text;
135
142
  if (cancelled) break;
136
143
  ttsStream.pushText(text);
137
144
  }
138
145
  ttsStream.flush();
139
146
  ttsStream.endInput();
140
- resolve();
147
+ resolve(fullText);
141
148
  });
142
149
  };
143
150
  export {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_output.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport { SynthesizeStream, type TTS } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { AgentPlayout, PlayoutHandle } from './agent_playout.js';\n\nexport type SpeechSource = AsyncIterable<string> | string | Promise<string>;\n\nexport class SynthesisHandle {\n static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #speechId: string;\n ttsSource: SpeechSource;\n #agentPlayout: AgentPlayout;\n tts: TTS;\n queue = new AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>();\n #playHandle?: PlayoutHandle;\n intFut = new Future();\n #logger = log();\n\n constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS) {\n this.#speechId = speechId;\n this.ttsSource = ttsSource;\n this.#agentPlayout = agentPlayout;\n this.tts = tts;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get validated(): boolean {\n return !!this.#playHandle;\n }\n\n get interrupted(): boolean {\n return this.intFut.done;\n }\n\n get playHandle(): PlayoutHandle | undefined {\n return this.#playHandle;\n }\n\n /** Validate the speech for playout. */\n play(): PlayoutHandle {\n if (this.interrupted) {\n throw new Error('synthesis was interrupted');\n }\n\n this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue);\n return this.#playHandle;\n }\n\n /** Interrupt the speech. */\n interrupt() {\n if (this.interrupted) {\n return;\n }\n\n this.#logger.child({ speechId: this.#speechId }).debug('interrupting synthesis/playout');\n this.#playHandle?.interrupt();\n this.intFut.resolve();\n }\n}\n\nexport class AgentOutput {\n #agentPlayout: AgentPlayout;\n #tts: TTS;\n #tasks: CancellablePromise<void>[] = [];\n\n constructor(agentPlayout: AgentPlayout, tts: TTS) {\n this.#agentPlayout = agentPlayout;\n this.#tts = tts;\n }\n\n get playout(): AgentPlayout {\n return this.#agentPlayout;\n }\n\n async close() {\n this.#tasks.forEach((task) => task.cancel());\n await Promise.all(this.#tasks);\n }\n\n synthesize(speechId: string, ttsSource: SpeechSource): SynthesisHandle {\n const handle = new SynthesisHandle(speechId, ttsSource, this.#agentPlayout, this.#tts);\n const task = this.#synthesize(handle);\n this.#tasks.push(task);\n task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));\n return handle;\n }\n\n #synthesize(handle: SynthesisHandle): CancellablePromise<void> {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const ttsSource = await handle.ttsSource;\n let task: CancellablePromise<void>;\n if (typeof ttsSource === 'string') {\n task = stringSynthesisTask(ttsSource, handle);\n } else {\n task = streamSynthesisTask(ttsSource, handle);\n }\n\n onCancel(() => {\n gracefullyCancel(task);\n });\n\n try {\n await Promise.any([task, handle.intFut.await]);\n } finally {\n if (handle.intFut.done) {\n gracefullyCancel(task);\n }\n }\n\n resolve();\n });\n }\n}\n\nconst stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<void> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n ttsStream.pushText(text);\n ttsStream.flush();\n ttsStream.endInput();\n for await (const audio of ttsStream) {\n if (cancelled || audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n\n resolve();\n });\n};\n\nconst streamSynthesisTask = (\n stream: AsyncIterable<string>,\n handle: SynthesisHandle,\n): CancellablePromise<void> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n const readGeneratedAudio = async () => {\n for await (const audio of ttsStream) {\n if (cancelled) break;\n if (audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n };\n readGeneratedAudio();\n\n for await (const text of stream) {\n if (cancelled) break;\n ttsStream.pushText(text);\n }\n ttsStream.flush();\n ttsStream.endInput();\n\n resolve();\n });\n};\n"],"mappings":"AAIA,SAAS,WAAW;AACpB,SAAS,wBAAkC;AAC3C,SAAS,oBAAoB,oBAAoB,QAAQ,wBAAwB;AAK1E,MAAM,gBAAgB;AAAA,EAC3B,OAAgB,iBAAiB,OAAO,gBAAgB;AAAA,EAExD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ,IAAI,mBAAuE;AAAA,EACnF;AAAA,EACA,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI;AAAA,EAEd,YAAY,UAAkB,WAAyB,cAA4B,KAAU;AAC3F,SAAK,YAAY;AACjB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AACrB,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK,OAAO;AAAA,EACrB;AAAA,EAEA,IAAI,aAAwC;AAC1C,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAsB;AACpB,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AAEA,SAAK,cAAc,KAAK,cAAc,KAAK,KAAK,WAAW,KAAK,KAAK;AACrE,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,YAAY;AAzDd;AA0DI,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,UAAU,CAAC,EAAE,MAAM,gCAAgC;AACvF,eAAK,gBAAL,mBAAkB;AAClB,SAAK,OAAO,QAAQ;AAAA,EACtB;AACF;AAEO,MAAM,YAAY;AAAA,EACvB;AAAA,EACA;AAAA,EACA,SAAqC,CAAC;AAAA,EAEtC,YAAY,cAA4B,KAAU;AAChD,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,UAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,OAAO,QAAQ,CAAC,SAAS,KAAK,OAAO,CAAC;AAC3C,UAAM,QAAQ,IAAI,KAAK,MAAM;AAAA,EAC/B;AAAA,EAEA,WAAW,UAAkB,WAA0C;AACrE,UAAM,SAAS,IAAI,gBAAgB,UAAU,WAAW,KAAK,eAAe,KAAK,IAAI;AACrF,UAAM,OAAO,KAAK,YAAY,MAAM;AACpC,SAAK,OAAO,KAAK,IAAI;AACrB,SAAK,QAAQ,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,QAAQ,IAAI,CAAC,CAAC;AAChE,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,QAAmD;AAE7D,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,YAAY,MAAM,OAAO;AAC/B,UAAI;AACJ,UAAI,OAAO,cAAc,UAAU;AACjC,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C,OAAO;AACL,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C;AAEA,eAAS,MAAM;AACb,yBAAiB,IAAI;AAAA,MACvB,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,MAAM,OAAO,OAAO,KAAK,CAAC;AAAA,MAC/C,UAAE;AACA,YAAI,OAAO,OAAO,MAAM;AACtB,2BAAiB,IAAI;AAAA,QACvB;AAAA,MACF;AAEA,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,MAAM,sBAAsB,CAAC,MAAc,WAAsD;AAE/F,SAAO,IAAI,mBAAyB,OAAO,SAAS,GAAG,aAAa;AAClE,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,cAAU,SAAS,IAAI;AACvB,cAAU,MAAM;AAChB,cAAU,SAAS;AACnB,qBAAiB,SAAS,WAAW;AACnC,UAAI,aAAa,UAAU,iBAAiB,eAAe;AACzD;AAAA,MACF;AACA,aAAO,MAAM,IAAI,MAAM,KAAK;AAAA,IAC9B;AACA,WAAO,MAAM,IAAI,gBAAgB,cAAc;AAE/C,YAAQ;AAAA,EACV,CAAC;AACH;AAEA,MAAM,sBAAsB,CAC1B,QACA,WAC6B;AAE7B,SAAO,IAAI,mBAAyB,OAAO,SAAS,GAAG,aAAa;AAClE,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,UAAM,qBAAqB,YAAY;AACrC,uBAAiB,SAAS,WAAW;AACnC,YAAI,UAAW;AACf,YAAI,UAAU,iBAAiB,eAAe;AAC5C;AAAA,QACF;AACA,eAAO,MAAM,IAAI,MAAM,KAAK;AAAA,MAC9B;AACA,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,uBAAmB;AAEnB,qBAAiB,QAAQ,QAAQ;AAC/B,UAAI,UAAW;AACf,gBAAU,SAAS,IAAI;AAAA,IACzB;AACA,cAAU,MAAM;AAChB,cAAU,SAAS;AAEnB,YAAQ;AAAA,EACV,CAAC;AACH;","names":[]}
1
+ {"version":3,"sources":["../../src/pipeline/agent_output.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport { SynthesizeStream, type TTS } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { AgentPlayout, PlayoutHandle } from './agent_playout.js';\n\nexport type SpeechSource = AsyncIterable<string> | string | Promise<string>;\n\nexport class SynthesisHandle {\n static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #speechId: string;\n text?: string;\n ttsSource: SpeechSource;\n #agentPlayout: AgentPlayout;\n tts: TTS;\n queue = new AsyncIterableQueue<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>();\n #playHandle?: PlayoutHandle;\n intFut = new Future();\n #logger = log();\n\n constructor(speechId: string, ttsSource: SpeechSource, agentPlayout: AgentPlayout, tts: TTS) {\n this.#speechId = speechId;\n this.ttsSource = ttsSource;\n this.#agentPlayout = agentPlayout;\n this.tts = tts;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get validated(): boolean {\n return !!this.#playHandle;\n }\n\n get interrupted(): boolean {\n return this.intFut.done;\n }\n\n get playHandle(): PlayoutHandle | undefined {\n return this.#playHandle;\n }\n\n /** Validate the speech for playout. */\n play(): PlayoutHandle {\n if (this.interrupted) {\n throw new Error('synthesis was interrupted');\n }\n\n this.#playHandle = this.#agentPlayout.play(this.#speechId, this.queue);\n return this.#playHandle;\n }\n\n /** Interrupt the speech. */\n interrupt() {\n if (this.interrupted) {\n return;\n }\n\n this.#logger.child({ speechId: this.#speechId }).debug('interrupting synthesis/playout');\n this.#playHandle?.interrupt();\n this.intFut.resolve();\n }\n}\n\nexport class AgentOutput {\n #agentPlayout: AgentPlayout;\n #tts: TTS;\n #tasks: CancellablePromise<void>[] = [];\n\n constructor(agentPlayout: AgentPlayout, tts: TTS) {\n this.#agentPlayout = agentPlayout;\n this.#tts = tts;\n }\n\n get playout(): AgentPlayout {\n return this.#agentPlayout;\n }\n\n async close() {\n this.#tasks.forEach((task) => task.cancel());\n await Promise.all(this.#tasks);\n }\n\n synthesize(speechId: string, ttsSource: SpeechSource): SynthesisHandle {\n const handle = new SynthesisHandle(speechId, ttsSource, this.#agentPlayout, this.#tts);\n const task = this.#synthesize(handle);\n this.#tasks.push(task);\n task.finally(() => this.#tasks.splice(this.#tasks.indexOf(task)));\n return handle;\n }\n\n #synthesize(handle: SynthesisHandle): CancellablePromise<void> {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const ttsSource = await handle.ttsSource;\n let task: CancellablePromise<string>;\n if (typeof ttsSource === 'string') {\n task = stringSynthesisTask(ttsSource, handle);\n } else {\n task = streamSynthesisTask(ttsSource, handle);\n }\n\n onCancel(() => {\n gracefullyCancel(task);\n });\n\n try {\n await Promise.any([task, handle.intFut.await]);\n } finally {\n if (handle.intFut.done) {\n gracefullyCancel(task);\n } else {\n task.then((text) => {\n handle.text = text;\n });\n }\n }\n\n resolve();\n });\n }\n}\n\nconst stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n ttsStream.pushText(text);\n ttsStream.flush();\n ttsStream.endInput();\n for await (const audio of ttsStream) {\n if (cancelled || audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n\n resolve(text);\n });\n};\n\nconst streamSynthesisTask = (\n stream: AsyncIterable<string>,\n handle: SynthesisHandle,\n): CancellablePromise<string> => {\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let fullText = '';\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const ttsStream = handle.tts.stream();\n const readGeneratedAudio = async () => {\n for await (const audio of ttsStream) {\n if (cancelled) break;\n if (audio === SynthesizeStream.END_OF_STREAM) {\n break;\n }\n handle.queue.put(audio.frame);\n }\n handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);\n };\n readGeneratedAudio();\n\n for await (const text of stream) {\n fullText += text;\n if (cancelled) break;\n ttsStream.pushText(text);\n }\n ttsStream.flush();\n ttsStream.endInput();\n\n resolve(fullText);\n });\n};\n"],"mappings":"AAIA,SAAS,WAAW;AACpB,SAAS,wBAAkC;AAC3C,SAAS,oBAAoB,oBAAoB,QAAQ,wBAAwB;AAK1E,MAAM,gBAAgB;AAAA,EAC3B,OAAgB,iBAAiB,OAAO,gBAAgB;AAAA,EAExD;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,QAAQ,IAAI,mBAAuE;AAAA,EACnF;AAAA,EACA,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI;AAAA,EAEd,YAAY,UAAkB,WAAyB,cAA4B,KAAU;AAC3F,SAAK,YAAY;AACjB,SAAK,YAAY;AACjB,SAAK,gBAAgB;AACrB,SAAK,MAAM;AAAA,EACb;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,CAAC,CAAC,KAAK;AAAA,EAChB;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK,OAAO;AAAA,EACrB;AAAA,EAEA,IAAI,aAAwC;AAC1C,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAsB;AACpB,QAAI,KAAK,aAAa;AACpB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AAEA,SAAK,cAAc,KAAK,cAAc,KAAK,KAAK,WAAW,KAAK,KAAK;AACrE,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,YAAY;AA1Dd;AA2DI,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,UAAU,CAAC,EAAE,MAAM,gCAAgC;AACvF,eAAK,gBAAL,mBAAkB;AAClB,SAAK,OAAO,QAAQ;AAAA,EACtB;AACF;AAEO,MAAM,YAAY;AAAA,EACvB;AAAA,EACA;AAAA,EACA,SAAqC,CAAC;AAAA,EAEtC,YAAY,cAA4B,KAAU;AAChD,SAAK,gBAAgB;AACrB,SAAK,OAAO;AAAA,EACd;AAAA,EAEA,IAAI,UAAwB;AAC1B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,OAAO,QAAQ,CAAC,SAAS,KAAK,OAAO,CAAC;AAC3C,UAAM,QAAQ,IAAI,KAAK,MAAM;AAAA,EAC/B;AAAA,EAEA,WAAW,UAAkB,WAA0C;AACrE,UAAM,SAAS,IAAI,gBAAgB,UAAU,WAAW,KAAK,eAAe,KAAK,IAAI;AACrF,UAAM,OAAO,KAAK,YAAY,MAAM;AACpC,SAAK,OAAO,KAAK,IAAI;AACrB,SAAK,QAAQ,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,QAAQ,IAAI,CAAC,CAAC;AAChE,WAAO;AAAA,EACT;AAAA,EAEA,YAAY,QAAmD;AAE7D,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,YAAY,MAAM,OAAO;AAC/B,UAAI;AACJ,UAAI,OAAO,cAAc,UAAU;AACjC,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C,OAAO;AACL,eAAO,oBAAoB,WAAW,MAAM;AAAA,MAC9C;AAEA,eAAS,MAAM;AACb,yBAAiB,IAAI;AAAA,MACvB,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,MAAM,OAAO,OAAO,KAAK,CAAC;AAAA,MAC/C,UAAE;AACA,YAAI,OAAO,OAAO,MAAM;AACtB,2BAAiB,IAAI;AAAA,QACvB,OAAO;AACL,eAAK,KAAK,CAAC,SAAS;AAClB,mBAAO,OAAO;AAAA,UAChB,CAAC;AAAA,QACH;AAAA,MACF;AAEA,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACF;AAEA,MAAM,sBAAsB,CAAC,MAAc,WAAwD;AAEjG,SAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,cAAU,SAAS,IAAI;AACvB,cAAU,MAAM;AAChB,cAAU,SAAS;AACnB,qBAAiB,SAAS,WAAW;AACnC,UAAI,aAAa,UAAU,iBAAiB,eAAe;AACzD;AAAA,MACF;AACA,aAAO,MAAM,IAAI,MAAM,KAAK;AAAA,IAC9B;AACA,WAAO,MAAM,IAAI,gBAAgB,cAAc;AAE/C,YAAQ,IAAI;AAAA,EACd,CAAC;AACH;AAEA,MAAM,sBAAsB,CAC1B,QACA,WAC+B;AAE/B,SAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,QAAI,WAAW;AACf,QAAI,YAAY;AAChB,aAAS,MAAM;AACb,kBAAY;AAAA,IACd,CAAC;AAED,UAAM,YAAY,OAAO,IAAI,OAAO;AACpC,UAAM,qBAAqB,YAAY;AACrC,uBAAiB,SAAS,WAAW;AACnC,YAAI,UAAW;AACf,YAAI,UAAU,iBAAiB,eAAe;AAC5C;AAAA,QACF;AACA,eAAO,MAAM,IAAI,MAAM,KAAK;AAAA,MAC9B;AACA,aAAO,MAAM,IAAI,gBAAgB,cAAc;AAAA,IACjD;AACA,uBAAmB;AAEnB,qBAAiB,QAAQ,QAAQ;AAC/B,kBAAY;AACZ,UAAI,UAAW;AACf,gBAAU,SAAS,IAAI;AAAA,IACzB;AACA,cAAU,MAAM;AAChB,cAAU,SAAS;AAEnB,YAAQ,QAAQ;AAAA,EAClB,CAAC;AACH;","names":[]}
@@ -344,8 +344,7 @@ class VoicePipelineAgent extends import_node_events.default {
344
344
  if ((!playingSpeech.userQuestion || playingSpeech.userCommitted) && !playingSpeech.speechCommitted) {
345
345
  copiedCtx.messages.push(
346
346
  import_llm2.ChatMessage.create({
347
- // TODO(nbsp): uhhh unsure where to get the played text here
348
- // text: playingSpeech.synthesisHandle.(theres no ttsForwarder here)
347
+ text: playingSpeech.synthesisHandle.text,
349
348
  role: import_llm2.ChatRole.ASSISTANT
350
349
  })
351
350
  );
@@ -413,7 +412,7 @@ class VoicePipelineAgent extends import_node_events.default {
413
412
  if (handle.interrupted) break;
414
413
  }
415
414
  commitUserQuestionIfNeeded();
416
- let collectedText = "";
415
+ const collectedText = handle.synthesisHandle.text;
417
416
  const isUsingTools = handle.source instanceof import_llm.LLMStream && !!handle.source.functionCalls.length;
418
417
  const extraToolsMessages = [];
419
418
  let interrupted = handle.interrupted;
@@ -460,7 +459,6 @@ class VoicePipelineAgent extends import_node_events.default {
460
459
  handle.synthesisHandle = answerSynthesis;
461
460
  const playHandle2 = answerSynthesis.play();
462
461
  await playHandle2.join().await;
463
- collectedText = "";
464
462
  interrupted = answerSynthesis.interrupted;
465
463
  newFunctionCalls = answerLLMStream.functionCalls;
466
464
  this.emit(8 /* FUNCTION_CALLS_FINISHED */, calledFuncs);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/pipeline_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { LocalTrackPublication, RemoteParticipant, Room } from '@livekit/rtc-node';\nimport {\n AudioSource,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport type {\n CallableFunctionResult,\n FunctionCallInfo,\n FunctionContext,\n LLM,\n} from '../llm/index.js';\nimport { LLMStream } from '../llm/index.js';\nimport { ChatContext, ChatMessage, ChatRole } from '../llm/index.js';\nimport { log } from '../log.js';\nimport { type STT, StreamAdapter as STTStreamAdapter } from '../stt/index.js';\nimport {\n SentenceTokenizer as BasicSentenceTokenizer,\n WordTokenizer as BasicWordTokenizer,\n hyphenateWord,\n} from '../tokenize/basic/index.js';\nimport type { SentenceTokenizer, WordTokenizer } from '../tokenize/tokenizer.js';\nimport type { TTS } from '../tts/index.js';\nimport { StreamAdapter as TTSStreamAdapter } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport type { SpeechSource, SynthesisHandle } from './agent_output.js';\nimport { AgentOutput } from './agent_output.js';\nimport { AgentPlayout, AgentPlayoutEvent } from './agent_playout.js';\nimport { HumanInput, HumanInputEvent } from './human_input.js';\nimport { SpeechHandle } from './speech_handle.js';\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\nexport type BeforeLLMCallback = (\n agent: VoicePipelineAgent,\n chatCtx: ChatContext,\n) => LLMStream | false | void | Promise<LLMStream | false | void>;\n\nexport type BeforeTTSCallback = (\n agent: VoicePipelineAgent,\n source: string | AsyncIterable<string>,\n) => SpeechSource;\n\nexport enum VPAEvent {\n USER_STARTED_SPEAKING,\n USER_STOPPED_SPEAKING,\n AGENT_STARTED_SPEAKING,\n AGENT_STOPPED_SPEAKING,\n USER_SPEECH_COMMITTED,\n AGENT_SPEECH_COMMITTED,\n AGENT_SPEECH_INTERRUPTED,\n FUNCTION_CALLS_COLLECTED,\n FUNCTION_CALLS_FINISHED,\n}\n\nexport type VPACallbacks = {\n [VPAEvent.USER_STARTED_SPEAKING]: () => void;\n [VPAEvent.USER_STOPPED_SPEAKING]: () => void;\n [VPAEvent.AGENT_STARTED_SPEAKING]: () => void;\n [VPAEvent.AGENT_STOPPED_SPEAKING]: () => void;\n [VPAEvent.USER_SPEECH_COMMITTED]: (msg: ChatMessage) => void;\n [VPAEvent.AGENT_SPEECH_COMMITTED]: (msg: ChatMessage) => void;\n [VPAEvent.AGENT_SPEECH_INTERRUPTED]: (msg: ChatMessage) => void;\n [VPAEvent.FUNCTION_CALLS_COLLECTED]: (funcs: FunctionCallInfo[]) => void;\n [VPAEvent.FUNCTION_CALLS_FINISHED]: (funcs: CallableFunctionResult[]) => void;\n};\n\nexport class AgentCallContext {\n #agent: VoicePipelineAgent;\n #llmStream: LLMStream;\n #metadata = new Map<string, any>();\n static #current: AgentCallContext;\n\n constructor(agent: VoicePipelineAgent, llmStream: LLMStream) {\n this.#agent = agent;\n this.#llmStream = llmStream;\n AgentCallContext.#current = this;\n }\n\n static getCurrent(): AgentCallContext {\n return AgentCallContext.#current;\n }\n\n get agent(): VoicePipelineAgent {\n return this.#agent;\n }\n\n storeMetadata(key: string, value: any) {\n this.#metadata.set(key, value);\n }\n\n getMetadata(key: string, orDefault: any = undefined) {\n return this.#metadata.get(key) || orDefault;\n }\n\n get llmStream(): LLMStream {\n return this.#llmStream;\n }\n}\n\nconst defaultBeforeLLMCallback: BeforeLLMCallback = (\n agent: VoicePipelineAgent,\n chatCtx: ChatContext,\n): LLMStream => {\n return agent.llm.chat({ chatCtx, fncCtx: agent.fncCtx });\n};\n\nconst defaultBeforeTTSCallback: BeforeTTSCallback = (\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n _: VoicePipelineAgent,\n text: string | AsyncIterable<string>,\n): string | AsyncIterable<string> => {\n return text;\n};\n\nexport interface AgentTranscriptionOptions {\n /** Whether to forward the user transcription to the client */\n userTranscription: boolean;\n /** Whether to forward the agent transcription to the client */\n agentTranscription: boolean;\n /**\n * The speed at which the agent's speech transcription is forwarded to the client.\n * We try to mimic the agent's speech speed by adjusting the transcription speed.\n */\n agentTranscriptionSpeech: number;\n /**\n * The tokenizer used to split the speech into sentences.\n * This is used to decide when to mark a transcript as final for the agent transcription.\n */\n sentenceTokenizer: SentenceTokenizer;\n /**\n * The tokenizer used to split the speech into words.\n * This is used to simulate the \"interim results\" of the agent transcription.\n */\n wordTokenizer: WordTokenizer;\n /**\n * A function that takes a string (word) as input and returns a list of strings,\n * representing the hyphenated parts of the word.\n */\n hyphenateWord: (word: string) => string[];\n}\n\nconst defaultAgentTranscriptionOptions: AgentTranscriptionOptions = {\n userTranscription: true,\n agentTranscription: true,\n agentTranscriptionSpeech: 1,\n sentenceTokenizer: new BasicSentenceTokenizer(),\n wordTokenizer: new BasicWordTokenizer(false),\n hyphenateWord: hyphenateWord,\n};\n\nexport interface VPAOptions {\n /** Chat context for the assistant. */\n chatCtx?: ChatContext;\n /** Function context for the assistant. */\n fncCtx?: FunctionContext;\n /** Whether to allow the user to interrupt the assistant. */\n allowInterruptions: boolean;\n /** Minimum duration of speech to consider for interruption. */\n interruptSpeechDuration: number;\n /** Minimum number of words to consider for interuption. This may increase latency. */\n interruptMinWords: number;\n /** Delay to wait before considering the user speech done. */\n minEndpointingDelay: number;\n maxRecursiveFncCalls: number;\n /* Whether to preemptively synthesize responses. */\n preemptiveSynthesis: boolean;\n /*\n * Callback called when the assistant is about to synthesize a reply.\n *\n * @remarks\n * Returning void will create a default LLM stream.\n * You can also return your own LLM stream by calling `llm.chat()`.\n * Returning `false` ill cancel the synthesis of the reply.\n */\n beforeLLMCallback: BeforeLLMCallback;\n /*\n * Callback called when the assistant is about to synthesize speech.\n *\n * @remarks\n * This can be used to customize text before synthesis\n * (e.g. editing the pronunciation of a word).\n */\n beforeTTSCallback: BeforeTTSCallback;\n /** Options for assistant transcription. */\n transcription: AgentTranscriptionOptions;\n}\n\nconst defaultVPAOptions: VPAOptions = {\n chatCtx: new ChatContext(),\n allowInterruptions: true,\n interruptSpeechDuration: 50,\n interruptMinWords: 0,\n minEndpointingDelay: 500,\n maxRecursiveFncCalls: 1,\n preemptiveSynthesis: false,\n beforeLLMCallback: defaultBeforeLLMCallback,\n beforeTTSCallback: defaultBeforeTTSCallback,\n transcription: defaultAgentTranscriptionOptions,\n};\n\n/** A pipeline agent (VAD + STT + LLM + TTS) implementation. */\nexport class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<VPACallbacks>) {\n /** Minimum time played for the user speech to be committed to the chat context. */\n readonly MIN_TIME_PLAYED_FOR_COMMIT = 1.5;\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #vad: VAD;\n #stt: STT;\n #llm: LLM;\n #tts: TTS;\n #opts: VPAOptions;\n #humanInput?: HumanInput;\n #agentOutput?: AgentOutput;\n #trackPublishedFut = new Future();\n #pendingAgentReply?: SpeechHandle;\n #agentReplyTask?: CancellablePromise<void>;\n #playingSpeech?: SpeechHandle;\n #transcribedText = '';\n #transcribedInterimText = '';\n #speechQueueOpen = new Future();\n #speechQueue = new AsyncIterableQueue<SpeechHandle | typeof VoicePipelineAgent.FLUSH_SENTINEL>();\n #lastEndOfSpeechTime?: number;\n #updateStateTask?: CancellablePromise<void>;\n #started = false;\n #room?: Room;\n #participant: RemoteParticipant | string | null = null;\n #deferredValidation: DeferredReplyValidation;\n #logger = log();\n #agentPublication?: LocalTrackPublication;\n\n constructor(\n /** Voice Activity Detection instance. */\n vad: VAD,\n /** Speech-to-Text instance. */\n stt: STT,\n /** Large Language Model instance. */\n llm: LLM,\n /** Text-to-Speech instance. */\n tts: TTS,\n /** Additional VoicePipelineAgent options. */\n opts: Partial<VPAOptions> = defaultVPAOptions,\n ) {\n super();\n\n this.#opts = { ...defaultVPAOptions, ...opts };\n\n if (!stt.capabilities.streaming) {\n stt = new STTStreamAdapter(stt, vad);\n }\n\n if (!tts.capabilities.streaming) {\n tts = new TTSStreamAdapter(tts, new BasicSentenceTokenizer());\n }\n\n this.#vad = vad;\n this.#stt = stt;\n this.#llm = llm;\n this.#tts = tts;\n\n this.#deferredValidation = new DeferredReplyValidation(\n this.#validateReplyIfPossible.bind(this),\n this.#opts.minEndpointingDelay,\n );\n }\n\n get fncCtx(): FunctionContext | undefined {\n return this.#opts.fncCtx;\n }\n\n set fncCtx(ctx: FunctionContext) {\n this.#opts.fncCtx = ctx;\n }\n\n get chatCtx(): ChatContext {\n return this.#opts.chatCtx!;\n }\n\n get llm(): LLM {\n return this.#llm;\n }\n\n get tts(): TTS {\n return this.#tts;\n }\n\n get stt(): STT {\n return this.#stt;\n }\n\n get vad(): VAD {\n return this.#vad;\n }\n\n /** Start the voice assistant. */\n start(\n /** The room to connect to. */\n room: Room,\n /**\n * The participant to listen to.\n *\n * @remarks\n * Can be a participant or an identity.\n * If omitted, the first participant in the room will be selected.\n */\n participant: RemoteParticipant | string | null = null,\n ) {\n if (this.#started) {\n throw new Error('voice assistant already started');\n }\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.#participant) {\n return;\n }\n this.#linkParticipant.call(this, participant.identity);\n });\n\n this.#room = room;\n this.#participant = participant;\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity);\n }\n }\n\n this.#run();\n }\n\n /** Play a speech source through the voice assistant. */\n async say(\n source: string | LLMStream | AsyncIterable<string>,\n allowInterruptions = true,\n addToChatCtx = true,\n ) {\n await this.#trackPublishedFut.await;\n const newHandle = SpeechHandle.createAssistantSpeech(allowInterruptions, addToChatCtx);\n const synthesisHandle = this.#synthesizeAgentSpeech(newHandle.id, source);\n newHandle.initialize(source, synthesisHandle);\n this.#addSpeechForPlayout(newHandle);\n }\n\n #updateState(state: AgentState, delay = 0) {\n const runTask = (delay: number): CancellablePromise<void> => {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n await new Promise((resolve) => setTimeout(resolve, delay));\n if (this.#room?.isConnected) {\n if (!cancelled) {\n await this.#room.localParticipant?.setAttributes({ [AGENT_STATE_ATTRIBUTE]: state });\n }\n }\n resolve();\n });\n };\n\n if (this.#updateStateTask) {\n this.#updateStateTask.cancel();\n }\n\n this.#updateStateTask = runTask(delay);\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.#room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.#participant = this.#room.remoteParticipants.get(participantIdentity) || null;\n if (!this.#participant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n this.#humanInput = new HumanInput(this.#room, this.#vad, this.#stt, this.#participant);\n this.#humanInput.on(HumanInputEvent.START_OF_SPEECH, (event) => {\n this.emit(VPAEvent.USER_STARTED_SPEAKING);\n this.#deferredValidation.onHumanStartOfSpeech(event);\n });\n this.#humanInput.on(HumanInputEvent.VAD_INFERENCE_DONE, (event) => {\n if (!this.#trackPublishedFut.done) {\n return;\n }\n if (!this.#agentOutput) {\n throw new Error('agent output is undefined');\n }\n\n let tv = 1;\n if (this.#opts.allowInterruptions) {\n tv = Math.max(0, 1 - event.probability);\n this.#agentOutput.playout.targetVolume = tv;\n }\n\n if (event.speechDuration >= this.#opts.interruptSpeechDuration) {\n this.#interruptIfPossible();\n }\n });\n this.#humanInput.on(HumanInputEvent.END_OF_SPEECH, (event) => {\n this.emit(VPAEvent.USER_STARTED_SPEAKING);\n this.#deferredValidation.onHumanEndOfSpeech(event);\n this.#lastEndOfSpeechTime = Date.now();\n });\n this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {\n this.#transcribedInterimText = event.alternatives![0].text;\n });\n this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, (event) => {\n const newTranscript = event.alternatives![0].text;\n if (!newTranscript) return;\n\n this.#logger.child({ userTranscript: newTranscript }).debug('received user transcript');\n this.#transcribedText += (this.#transcribedText ? ' ' : '') + newTranscript;\n\n if (\n this.#opts.preemptiveSynthesis &&\n (!this.#playingSpeech || this.#playingSpeech.allowInterruptions)\n ) {\n this.#synthesizeAgentReply();\n }\n\n this.#deferredValidation.onHumanFinalTranscript(newTranscript);\n\n const words = this.#opts.transcription.wordTokenizer.tokenize(newTranscript);\n if (words.length >= 3) {\n // VAD can sometimes not detect that the human is speaking.\n // to make the interruption more reliable, we also interrupt on the final transcript.\n this.#interruptIfPossible();\n }\n });\n }\n\n async #run() {\n this.#updateState('initializing');\n const audioSource = new AudioSource(this.#tts.sampleRate, this.#tts.numChannels);\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', audioSource);\n this.#agentPublication = await this.#room?.localParticipant?.publishTrack(\n track,\n new TrackPublishOptions({ source: TrackSource.SOURCE_MICROPHONE }),\n );\n\n const agentPlayout = new AgentPlayout(audioSource);\n this.#agentOutput = new AgentOutput(agentPlayout, this.#tts);\n\n agentPlayout.on(AgentPlayoutEvent.PLAYOUT_STARTED, () => {\n this.emit(VPAEvent.AGENT_STARTED_SPEAKING);\n this.#updateState('speaking');\n });\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n agentPlayout.on(AgentPlayoutEvent.PLAYOUT_STOPPED, (_) => {\n this.emit(VPAEvent.AGENT_STOPPED_SPEAKING);\n this.#updateState('listening');\n });\n\n this.#trackPublishedFut.resolve();\n\n while (true) {\n await this.#speechQueueOpen.await;\n for await (const speech of this.#speechQueue) {\n if (speech === VoicePipelineAgent.FLUSH_SENTINEL) break;\n this.#playingSpeech = speech;\n await this.#playSpeech(speech);\n this.#playingSpeech = undefined;\n }\n this.#speechQueueOpen = new Future();\n }\n }\n\n #synthesizeAgentReply() {\n this.#pendingAgentReply?.cancel();\n if (this.#humanInput && this.#humanInput.speaking) {\n this.#updateState('thinking', 200);\n }\n\n this.#pendingAgentReply = SpeechHandle.createAssistantReply(\n this.#opts.allowInterruptions,\n true,\n this.#transcribedText,\n );\n const newHandle = this.#pendingAgentReply;\n this.#agentReplyTask = this.#synthesizeAnswerTask(this.#agentReplyTask, newHandle);\n }\n\n #synthesizeAnswerTask(\n oldTask: CancellablePromise<void> | undefined,\n handle?: SpeechHandle,\n ): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n const copiedCtx = this.chatCtx.copy();\n const playingSpeech = this.#playingSpeech;\n if (playingSpeech && playingSpeech.initialized) {\n if (\n (!playingSpeech.userQuestion || playingSpeech.userCommitted) &&\n !playingSpeech.speechCommitted\n ) {\n // the speech is playing but not committed yet,\n // add it to the chat context for this new reply synthesis\n copiedCtx.messages.push(\n ChatMessage.create({\n // TODO(nbsp): uhhh unsure where to get the played text here\n // text: playingSpeech.synthesisHandle.(theres no ttsForwarder here)\n role: ChatRole.ASSISTANT,\n }),\n );\n }\n }\n\n copiedCtx.messages.push(\n ChatMessage.create({\n text: handle?.userQuestion,\n role: ChatRole.USER,\n }),\n );\n\n if (cancelled) resolve();\n let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);\n if (llmStream === false) {\n handle?.cancel();\n return;\n }\n\n if (cancelled) resolve();\n // fallback to default impl if no custom/user stream is returned\n if (!(llmStream instanceof LLMStream)) {\n llmStream = (await defaultBeforeLLMCallback(this, copiedCtx)) as LLMStream;\n }\n\n if (handle!.interrupted) {\n return;\n }\n\n const synthesisHandle = this.#synthesizeAgentSpeech(handle!.id, llmStream);\n handle!.initialize(llmStream, synthesisHandle);\n\n // TODO(theomonnom): find a more reliable way to get the elapsed time from the last EOS\n // (VAD could not have detected any speech — maybe unlikely?)\n const elapsed = !!this.#lastEndOfSpeechTime\n ? Math.round((Date.now() - this.#lastEndOfSpeechTime) * 1000) / 1000\n : -1;\n\n this.#logger.child({ speechId: handle!.id, elapsed }).debug('synthesizing agent reply');\n resolve();\n });\n }\n\n async #playSpeech(handle: SpeechHandle) {\n try {\n await handle.waitForInitialization();\n } catch {\n return;\n }\n await this.#agentPublication!.waitForSubscription();\n const synthesisHandle = handle.synthesisHandle;\n if (synthesisHandle.interrupted) return;\n\n const userQuestion = handle.userQuestion;\n const playHandle = synthesisHandle.play();\n const joinFut = playHandle.join();\n\n const commitUserQuestionIfNeeded = () => {\n if (!userQuestion || synthesisHandle.interrupted || handle.userCommitted) return;\n const isUsingTools =\n handle.source instanceof LLMStream && !!handle.source.functionCalls.length;\n\n // make sure at least some speech was played before committing the user message\n // since we try to validate as fast as possible it is possible the agent gets interrupted\n // really quickly (barely audible), we don't want to mark this question as \"answered\".\n if (\n handle.allowInterruptions &&\n !isUsingTools &&\n playHandle.timePlayed < this.MIN_TIME_PLAYED_FOR_COMMIT &&\n !joinFut.done\n ) {\n return;\n }\n\n this.#logger.child({ userTranscript: userQuestion }).debug('committed user transcript');\n const userMsg = ChatMessage.create({ text: userQuestion, role: ChatRole.USER });\n this.chatCtx.messages.push(userMsg);\n this.emit(VPAEvent.USER_SPEECH_COMMITTED, userMsg);\n\n this.#transcribedText = this.#transcribedText.slice(userQuestion.length);\n handle.markUserCommitted();\n };\n\n // wait for the playHandle to finish and check every 1s if user question should be committed\n commitUserQuestionIfNeeded();\n\n while (!joinFut.done) {\n await new Promise<void>(async (resolve) => {\n setTimeout(resolve, 500);\n await joinFut.await;\n resolve();\n });\n commitUserQuestionIfNeeded();\n if (handle.interrupted) break;\n }\n commitUserQuestionIfNeeded();\n\n // TODO(nbsp): what goes here\n let collectedText = '';\n const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;\n const extraToolsMessages = []; // additional messages from the functions to add to the context\n let interrupted = handle.interrupted;\n\n // if the answer is using tools, execute the functions and automatically generate\n // a response to the user question from the returned values\n if (isUsingTools && !interrupted) {\n if (!userQuestion || !handle.userCommitted) {\n throw new Error('user speech should have been committed before using tools');\n }\n const llmStream = handle.source;\n let newFunctionCalls = llmStream.functionCalls;\n\n for (let i = 0; i < this.#opts.maxRecursiveFncCalls; i++) {\n this.emit(VPAEvent.FUNCTION_CALLS_COLLECTED, newFunctionCalls);\n const calledFuncs: FunctionCallInfo[] = [];\n for (const func of newFunctionCalls) {\n const task = func.func.execute(func.params).then(\n (result) => ({ name: func.name, toolCallId: func.toolCallId, result }),\n (error) => ({ name: func.name, toolCallId: func.toolCallId, error }),\n );\n calledFuncs.push({ ...func, task });\n this.#logger\n .child({ function: func.name, speechId: handle.id })\n .debug('executing AI function');\n try {\n await task;\n } catch {\n this.#logger\n .child({ function: func.name, speechId: handle.id })\n .error('error executing AI function');\n }\n }\n\n const toolCallsInfo = [];\n const toolCallsResults = [];\n for (const fnc of calledFuncs) {\n // ignore the function calls that return void\n const task = await fnc.task;\n if (!task || task.result === undefined) continue;\n toolCallsInfo.push(fnc);\n toolCallsResults.push(ChatMessage.createToolFromFunctionResult(task));\n }\n\n if (!toolCallsInfo.length) break;\n\n // generate an answer from the tool calls\n extraToolsMessages.push(ChatMessage.createToolCalls(toolCallsInfo, collectedText));\n extraToolsMessages.push(...toolCallsResults);\n\n const chatCtx = handle.source.chatCtx.copy();\n chatCtx.messages.push(...extraToolsMessages);\n\n const answerLLMStream = this.llm.chat({\n chatCtx,\n fncCtx: this.fncCtx,\n });\n const answerSynthesis = this.#synthesizeAgentSpeech(handle.id, answerLLMStream);\n // replace the synthesis handle with the new one to allow interruption\n handle.synthesisHandle = answerSynthesis;\n const playHandle = answerSynthesis.play();\n await playHandle.join().await;\n\n // TODO(nbsp): what text goes here\n collectedText = '';\n interrupted = answerSynthesis.interrupted;\n newFunctionCalls = answerLLMStream.functionCalls;\n\n this.emit(VPAEvent.FUNCTION_CALLS_FINISHED, calledFuncs);\n if (!newFunctionCalls) break;\n }\n }\n\n if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) {\n this.chatCtx.messages.push(...extraToolsMessages);\n if (interrupted) {\n collectedText + '…';\n }\n\n const msg = ChatMessage.create({ text: collectedText, role: ChatRole.ASSISTANT });\n this.chatCtx.messages.push(msg);\n\n handle.markSpeechCommitted();\n if (interrupted) {\n this.emit(VPAEvent.AGENT_SPEECH_INTERRUPTED, msg);\n } else {\n this.emit(VPAEvent.AGENT_SPEECH_COMMITTED, msg);\n }\n\n this.#logger\n .child({\n agentTranscript: collectedText,\n interrupted,\n speechId: handle.id,\n })\n .debug('committed agent speech');\n }\n }\n\n #synthesizeAgentSpeech(\n speechId: string,\n source: string | LLMStream | AsyncIterable<string>,\n ): SynthesisHandle {\n if (!this.#agentOutput) {\n throw new Error('agent output should be initialized when ready');\n }\n\n if (source instanceof LLMStream) {\n source = llmStreamToStringIterable(speechId, source);\n }\n\n const ogSource = source;\n if (!(typeof source === 'string')) {\n // TODO(nbsp): itertools.tee\n }\n\n const ttsSource = this.#opts.beforeTTSCallback(this, ogSource);\n if (!ttsSource) {\n throw new Error('beforeTTSCallback must return string or AsyncIterable<string>');\n }\n\n return this.#agentOutput.synthesize(speechId, ttsSource);\n }\n\n async #validateReplyIfPossible() {\n if (this.#playingSpeech && !this.#playingSpeech.allowInterruptions) {\n this.#logger\n .child({ speechId: this.#playingSpeech.id })\n .debug('skipping validation, agent is speaking and does not allow interruptions');\n return;\n }\n\n if (!this.#pendingAgentReply) {\n if (this.#opts.preemptiveSynthesis || !this.#transcribedText) {\n return;\n }\n this.#synthesizeAgentReply();\n }\n\n if (!this.#pendingAgentReply) {\n throw new Error('pending agent reply is undefined');\n }\n\n // in some bad timimg, we could end up with two pushed agent replies inside the speech queue.\n // so make sure we directly interrupt every reply when validating a new one\n if (this.#speechQueueOpen.done) {\n for await (const speech of this.#speechQueue) {\n if (speech === VoicePipelineAgent.FLUSH_SENTINEL) break;\n if (!speech.isReply) continue;\n if (speech.allowInterruptions) speech.interrupt();\n }\n }\n\n this.#logger.child({ speechId: this.#pendingAgentReply.id }).debug('validated agent reply');\n\n this.#addSpeechForPlayout(this.#pendingAgentReply);\n this.#pendingAgentReply = undefined;\n this.#transcribedInterimText = '';\n }\n\n #interruptIfPossible() {\n if (\n !this.#playingSpeech ||\n !this.#playingSpeech.allowInterruptions ||\n this.#playingSpeech.interrupted\n ) {\n return;\n }\n\n if (this.#opts.interruptMinWords !== 0) {\n // check the final/interim transcribed text for the minimum word count\n // to interrupt the agent speech\n const interimWords = this.#opts.transcription.wordTokenizer.tokenize(\n this.#transcribedInterimText,\n );\n if (interimWords.length < this.#opts.interruptMinWords) {\n return;\n }\n }\n this.#playingSpeech.interrupt();\n }\n\n #addSpeechForPlayout(handle: SpeechHandle) {\n this.#speechQueue.put(handle);\n this.#speechQueue.put(VoicePipelineAgent.FLUSH_SENTINEL);\n this.#speechQueueOpen.resolve();\n }\n\n /** Close the voice assistant. */\n async close() {\n if (!this.#started) {\n return;\n }\n\n this.#room?.removeAllListeners(RoomEvent.ParticipantConnected);\n // TODO(nbsp): await this.#deferredValidation.close()\n }\n}\n\nasync function* llmStreamToStringIterable(\n speechId: string,\n stream: LLMStream,\n): AsyncIterable<string> {\n const startTime = Date.now();\n let firstFrame = true;\n for await (const chunk of stream) {\n const content = chunk.choices[0]?.delta.content;\n if (!content) continue;\n\n if (firstFrame) {\n firstFrame = false;\n log()\n .child({ speechId, elapsed: Math.round(Date.now() - startTime) })\n .debug('received first LLM token');\n }\n yield content;\n }\n}\n\n/** This class is used to try to find the best time to validate the agent reply. */\nclass DeferredReplyValidation {\n // if the STT gives us punctuation, we can try to validate the reply faster.\n readonly PUNCTUATION = '.!?';\n readonly PUNCTUATION_REDUCE_FACTOR = 0.75;\n readonly LATE_TRANSCRIPT_TOLERANCE = 1.5; // late compared to end of speech\n\n #validateFunc: () => Promise<void>;\n #validatingPromise?: Promise<void>;\n #validatingFuture = new Future();\n #lastFinalTranscript = '';\n #lastRecvEndOfSpeechTime = 0;\n #speaking = false;\n #endOfSpeechDelay: number;\n #finalTranscriptDelay: number;\n\n constructor(validateFunc: () => Promise<void>, minEndpointingDelay: number) {\n this.#validateFunc = validateFunc;\n this.#endOfSpeechDelay = minEndpointingDelay;\n this.#finalTranscriptDelay = minEndpointingDelay;\n }\n\n get validating(): boolean {\n return !this.#validatingFuture.done;\n }\n\n onHumanFinalTranscript(transcript: string) {\n this.#lastFinalTranscript = transcript.trim();\n if (this.#speaking) return;\n\n const hasRecentEndOfSpeech =\n Date.now() - this.#lastRecvEndOfSpeechTime < this.LATE_TRANSCRIPT_TOLERANCE;\n let delay = hasRecentEndOfSpeech ? this.#endOfSpeechDelay : this.#finalTranscriptDelay;\n delay = this.#endWithPunctuation() ? delay * this.PUNCTUATION_REDUCE_FACTOR : 1;\n\n this.#run(delay);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n onHumanStartOfSpeech(_: VADEvent) {\n this.#speaking = true;\n // TODO(nbsp):\n // if (this.validating) {\n // this.#validatingPromise.cancel()\n // }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n onHumanEndOfSpeech(_: VADEvent) {\n this.#speaking = false;\n this.#lastRecvEndOfSpeechTime = Date.now();\n\n if (this.#lastFinalTranscript) {\n const delay = this.#endWithPunctuation()\n ? this.#endOfSpeechDelay * this.PUNCTUATION_REDUCE_FACTOR\n : 1;\n this.#run(delay);\n }\n }\n\n // TODO(nbsp): aclose\n\n #endWithPunctuation(): boolean {\n return (\n this.#lastFinalTranscript.length > 0 &&\n this.PUNCTUATION.includes(this.#lastFinalTranscript[this.#lastFinalTranscript.length - 1]!)\n );\n }\n\n #resetStates() {\n this.#lastFinalTranscript = '';\n this.#lastRecvEndOfSpeechTime = 0;\n }\n\n #run(delay: number) {\n const runTask = async (delay: number) => {\n await new Promise((resolve) => setTimeout(resolve, delay));\n this.#resetStates();\n await this.#validateFunc();\n };\n\n this.#validatingFuture = new Future();\n this.#validatingPromise = runTask(delay);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,sBAMO;AAEP,yBAAyB;AAOzB,iBAA0B;AAC1B,IAAAA,cAAmD;AACnD,iBAAoB;AACpB,iBAA4D;AAC5D,mBAIO;AAGP,iBAAkD;AAClD,mBAAiF;AAGjF,0BAA4B;AAC5B,2BAAgD;AAChD,yBAA4C;AAC5C,2BAA6B;AAGtB,MAAM,wBAAwB;AAY9B,IAAK,WAAL,kBAAKC,cAAL;AACL,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AATU,SAAAA;AAAA,GAAA;AAwBL,MAAM,iBAAiB;AAAA,EAC5B;AAAA,EACA;AAAA,EACA,YAAY,oBAAI,IAAiB;AAAA,EACjC,OAAO;AAAA,EAEP,YAAY,OAA2B,WAAsB;AAC3D,SAAK,SAAS;AACd,SAAK,aAAa;AAClB,qBAAiB,WAAW;AAAA,EAC9B;AAAA,EAEA,OAAO,aAA+B;AACpC,WAAO,iBAAiB;AAAA,EAC1B;AAAA,EAEA,IAAI,QAA4B;AAC9B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,cAAc,KAAa,OAAY;AACrC,SAAK,UAAU,IAAI,KAAK,KAAK;AAAA,EAC/B;AAAA,EAEA,YAAY,KAAa,YAAiB,QAAW;AACnD,WAAO,KAAK,UAAU,IAAI,GAAG,KAAK;AAAA,EACpC;AAAA,EAEA,IAAI,YAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AACF;AAEA,MAAM,2BAA8C,CAClD,OACA,YACc;AACd,SAAO,MAAM,IAAI,KAAK,EAAE,SAAS,QAAQ,MAAM,OAAO,CAAC;AACzD;AAEA,MAAM,2BAA8C,CAElD,GACA,SACmC;AACnC,SAAO;AACT;AA6BA,MAAM,mCAA8D;AAAA,EAClE,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,0BAA0B;AAAA,EAC1B,mBAAmB,IAAI,aAAAC,kBAAuB;AAAA,EAC9C,eAAe,IAAI,aAAAC,cAAmB,KAAK;AAAA,EAC3C,eAAe;AACjB;AAuCA,MAAM,oBAAgC;AAAA,EACpC,SAAS,IAAI,wBAAY;AAAA,EACzB,oBAAoB;AAAA,EACpB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,qBAAqB;AAAA,EACrB,sBAAsB;AAAA,EACtB,qBAAqB;AAAA,EACrB,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAGO,MAAM,2BAA4B,mBAAAC,QAAsD;AAAA;AAAA,EAEpF,6BAA6B;AAAA,EACtC,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAElE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,qBAAqB,IAAI,oBAAO;AAAA,EAChC;AAAA,EACA;AAAA,EACA;AAAA,EACA,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,mBAAmB,IAAI,oBAAO;AAAA,EAC9B,eAAe,IAAI,gCAA4E;AAAA,EAC/F;AAAA,EACA;AAAA,EACA,WAAW;AAAA,EACX;AAAA,EACA,eAAkD;AAAA,EAClD;AAAA,EACA,cAAU,gBAAI;AAAA,EACd;AAAA,EAEA,YAEE,KAEA,KAEA,KAEA,KAEA,OAA4B,mBAC5B;AACA,UAAM;AAEN,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,CAAC,IAAI,aAAa,WAAW;AAC/B,YAAM,IAAI,WAAAC,cAAiB,KAAK,GAAG;AAAA,IACrC;AAEA,QAAI,CAAC,IAAI,aAAa,WAAW;AAC/B,YAAM,IAAI,WAAAC,cAAiB,KAAK,IAAI,aAAAJ,kBAAuB,CAAC;AAAA,IAC9D;AAEA,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,OAAO;AAEZ,SAAK,sBAAsB,IAAI;AAAA,MAC7B,KAAK,yBAAyB,KAAK,IAAI;AAAA,MACvC,KAAK,MAAM;AAAA,IACb;AAAA,EACF;AAAA,EAEA,IAAI,SAAsC;AACxC,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,IAAI,OAAO,KAAsB;AAC/B,SAAK,MAAM,SAAS;AAAA,EACtB;AAAA,EAEA,IAAI,UAAuB;AACzB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAEE,MAQA,cAAiD,MACjD;AACA,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,SAAK,GAAG,0BAAU,sBAAsB,CAACK,iBAAmC;AAE1E,UAAI,KAAK,cAAc;AACrB;AAAA,MACF;AACA,WAAK,iBAAiB,KAAK,MAAMA,aAAY,QAAQ;AAAA,IACvD,CAAC;AAED,SAAK,QAAQ;AACb,SAAK,eAAe;AAEpB,QAAI,aAAa;AACf,UAAI,OAAO,gBAAgB,UAAU;AACnC,aAAK,iBAAiB,WAAW;AAAA,MACnC,OAAO;AACL,aAAK,iBAAiB,YAAY,QAAQ;AAAA,MAC5C;AAAA,IACF;AAEA,SAAK,KAAK;AAAA,EACZ;AAAA;AAAA,EAGA,MAAM,IACJ,QACA,qBAAqB,MACrB,eAAe,MACf;AACA,UAAM,KAAK,mBAAmB;AAC9B,UAAM,YAAY,kCAAa,sBAAsB,oBAAoB,YAAY;AACrF,UAAM,kBAAkB,KAAK,uBAAuB,UAAU,IAAI,MAAM;AACxE,cAAU,WAAW,QAAQ,eAAe;AAC5C,SAAK,qBAAqB,SAAS;AAAA,EACrC;AAAA,EAEA,aAAa,OAAmB,QAAQ,GAAG;AACzC,UAAM,UAAU,CAACC,WAA4C;AAC3D,aAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AApWpE;AAqWQ,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AACD,cAAM,IAAI,QAAQ,CAACC,aAAY,WAAWA,UAASD,MAAK,CAAC;AACzD,aAAI,UAAK,UAAL,mBAAY,aAAa;AAC3B,cAAI,CAAC,WAAW;AACd,oBAAM,UAAK,MAAM,qBAAX,mBAA6B,cAAc,EAAE,CAAC,qBAAqB,GAAG,MAAM;AAAA,UACpF;AAAA,QACF;AACA,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAEA,QAAI,KAAK,kBAAkB;AACzB,WAAK,iBAAiB,OAAO;AAAA,IAC/B;AAEA,SAAK,mBAAmB,QAAQ,KAAK;AAAA,EACvC;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,eAAe,KAAK,MAAM,mBAAmB,IAAI,mBAAmB,KAAK;AAC9E,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,SAAK,cAAc,IAAI,8BAAW,KAAK,OAAO,KAAK,MAAM,KAAK,MAAM,KAAK,YAAY;AACrF,SAAK,YAAY,GAAG,mCAAgB,iBAAiB,CAAC,UAAU;AAC9D,WAAK,KAAK,6BAA8B;AACxC,WAAK,oBAAoB,qBAAqB,KAAK;AAAA,IACrD,CAAC;AACD,SAAK,YAAY,GAAG,mCAAgB,oBAAoB,CAAC,UAAU;AACjE,UAAI,CAAC,KAAK,mBAAmB,MAAM;AACjC;AAAA,MACF;AACA,UAAI,CAAC,KAAK,cAAc;AACtB,cAAM,IAAI,MAAM,2BAA2B;AAAA,MAC7C;AAEA,UAAI,KAAK;AACT,UAAI,KAAK,MAAM,oBAAoB;AACjC,aAAK,KAAK,IAAI,GAAG,IAAI,MAAM,WAAW;AACtC,aAAK,aAAa,QAAQ,eAAe;AAAA,MAC3C;AAEA,UAAI,MAAM,kBAAkB,KAAK,MAAM,yBAAyB;AAC9D,aAAK,qBAAqB;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,SAAK,YAAY,GAAG,mCAAgB,eAAe,CAAC,UAAU;AAC5D,WAAK,KAAK,6BAA8B;AACxC,WAAK,oBAAoB,mBAAmB,KAAK;AACjD,WAAK,uBAAuB,KAAK,IAAI;AAAA,IACvC,CAAC;AACD,SAAK,YAAY,GAAG,mCAAgB,oBAAoB,CAAC,UAAU;AACjE,WAAK,0BAA0B,MAAM,aAAc,CAAC,EAAE;AAAA,IACxD,CAAC;AACD,SAAK,YAAY,GAAG,mCAAgB,kBAAkB,CAAC,UAAU;AAC/D,YAAM,gBAAgB,MAAM,aAAc,CAAC,EAAE;AAC7C,UAAI,CAAC,cAAe;AAEpB,WAAK,QAAQ,MAAM,EAAE,gBAAgB,cAAc,CAAC,EAAE,MAAM,0BAA0B;AACtF,WAAK,qBAAqB,KAAK,mBAAmB,MAAM,MAAM;AAE9D,UACE,KAAK,MAAM,wBACV,CAAC,KAAK,kBAAkB,KAAK,eAAe,qBAC7C;AACA,aAAK,sBAAsB;AAAA,MAC7B;AAEA,WAAK,oBAAoB,uBAAuB,aAAa;AAE7D,YAAM,QAAQ,KAAK,MAAM,cAAc,cAAc,SAAS,aAAa;AAC3E,UAAI,MAAM,UAAU,GAAG;AAGrB,aAAK,qBAAqB;AAAA,MAC5B;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,OAAO;AA9bf;AA+bI,SAAK,aAAa,cAAc;AAChC,UAAM,cAAc,IAAI,4BAAY,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAC/E,UAAM,QAAQ,gCAAgB,iBAAiB,mBAAmB,WAAW;AAC7E,SAAK,oBAAoB,QAAM,gBAAK,UAAL,mBAAY,qBAAZ,mBAA8B;AAAA,MAC3D;AAAA,MACA,IAAI,oCAAoB,EAAE,QAAQ,4BAAY,kBAAkB,CAAC;AAAA;AAGnE,UAAM,eAAe,IAAI,kCAAa,WAAW;AACjD,SAAK,eAAe,IAAI,gCAAY,cAAc,KAAK,IAAI;AAE3D,iBAAa,GAAG,uCAAkB,iBAAiB,MAAM;AACvD,WAAK,KAAK,8BAA+B;AACzC,WAAK,aAAa,UAAU;AAAA,IAC9B,CAAC;AAED,iBAAa,GAAG,uCAAkB,iBAAiB,CAAC,MAAM;AACxD,WAAK,KAAK,8BAA+B;AACzC,WAAK,aAAa,WAAW;AAAA,IAC/B,CAAC;AAED,SAAK,mBAAmB,QAAQ;AAEhC,WAAO,MAAM;AACX,YAAM,KAAK,iBAAiB;AAC5B,uBAAiB,UAAU,KAAK,cAAc;AAC5C,YAAI,WAAW,mBAAmB,eAAgB;AAClD,aAAK,iBAAiB;AACtB,cAAM,KAAK,YAAY,MAAM;AAC7B,aAAK,iBAAiB;AAAA,MACxB;AACA,WAAK,mBAAmB,IAAI,oBAAO;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,wBAAwB;AAle1B;AAmeI,eAAK,uBAAL,mBAAyB;AACzB,QAAI,KAAK,eAAe,KAAK,YAAY,UAAU;AACjD,WAAK,aAAa,YAAY,GAAG;AAAA,IACnC;AAEA,SAAK,qBAAqB,kCAAa;AAAA,MACrC,KAAK,MAAM;AAAA,MACX;AAAA,MACA,KAAK;AAAA,IACP;AACA,UAAM,YAAY,KAAK;AACvB,SAAK,kBAAkB,KAAK,sBAAsB,KAAK,iBAAiB,SAAS;AAAA,EACnF;AAAA,EAEA,sBACE,SACA,QAC0B;AAC1B,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,UAAI,YAAY;AAChB,eAAS,MAAM;AACb,oBAAY;AAAA,MACd,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,YAAM,YAAY,KAAK,QAAQ,KAAK;AACpC,YAAM,gBAAgB,KAAK;AAC3B,UAAI,iBAAiB,cAAc,aAAa;AAC9C,aACG,CAAC,cAAc,gBAAgB,cAAc,kBAC9C,CAAC,cAAc,iBACf;AAGA,oBAAU,SAAS;AAAA,YACjB,wBAAY,OAAO;AAAA;AAAA;AAAA,cAGjB,MAAM,qBAAS;AAAA,YACjB,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,gBAAU,SAAS;AAAA,QACjB,wBAAY,OAAO;AAAA,UACjB,MAAM,iCAAQ;AAAA,UACd,MAAM,qBAAS;AAAA,QACjB,CAAC;AAAA,MACH;AAEA,UAAI,UAAW,SAAQ;AACvB,UAAI,YAAY,MAAM,KAAK,MAAM,kBAAkB,MAAM,SAAS;AAClE,UAAI,cAAc,OAAO;AACvB,yCAAQ;AACR;AAAA,MACF;AAEA,UAAI,UAAW,SAAQ;AAEvB,UAAI,EAAE,qBAAqB,uBAAY;AACrC,oBAAa,MAAM,yBAAyB,MAAM,SAAS;AAAA,MAC7D;AAEA,UAAI,OAAQ,aAAa;AACvB;AAAA,MACF;AAEA,YAAM,kBAAkB,KAAK,uBAAuB,OAAQ,IAAI,SAAS;AACzE,aAAQ,WAAW,WAAW,eAAe;AAI7C,YAAM,UAAU,CAAC,CAAC,KAAK,uBACnB,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,wBAAwB,GAAI,IAAI,MAC9D;AAEJ,WAAK,QAAQ,MAAM,EAAE,UAAU,OAAQ,IAAI,QAAQ,CAAC,EAAE,MAAM,0BAA0B;AACtF,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,YAAY,QAAsB;AACtC,QAAI;AACF,YAAM,OAAO,sBAAsB;AAAA,IACrC,QAAQ;AACN;AAAA,IACF;AACA,UAAM,KAAK,kBAAmB,oBAAoB;AAClD,UAAM,kBAAkB,OAAO;AAC/B,QAAI,gBAAgB,YAAa;AAEjC,UAAM,eAAe,OAAO;AAC5B,UAAM,aAAa,gBAAgB,KAAK;AACxC,UAAM,UAAU,WAAW,KAAK;AAEhC,UAAM,6BAA6B,MAAM;AACvC,UAAI,CAAC,gBAAgB,gBAAgB,eAAe,OAAO,cAAe;AAC1E,YAAME,gBACJ,OAAO,kBAAkB,wBAAa,CAAC,CAAC,OAAO,OAAO,cAAc;AAKtE,UACE,OAAO,sBACP,CAACA,iBACD,WAAW,aAAa,KAAK,8BAC7B,CAAC,QAAQ,MACT;AACA;AAAA,MACF;AAEA,WAAK,QAAQ,MAAM,EAAE,gBAAgB,aAAa,CAAC,EAAE,MAAM,2BAA2B;AACtF,YAAM,UAAU,wBAAY,OAAO,EAAE,MAAM,cAAc,MAAM,qBAAS,KAAK,CAAC;AAC9E,WAAK,QAAQ,SAAS,KAAK,OAAO;AAClC,WAAK,KAAK,+BAAgC,OAAO;AAEjD,WAAK,mBAAmB,KAAK,iBAAiB,MAAM,aAAa,MAAM;AACvE,aAAO,kBAAkB;AAAA,IAC3B;AAGA,+BAA2B;AAE3B,WAAO,CAAC,QAAQ,MAAM;AACpB,YAAM,IAAI,QAAc,OAAO,YAAY;AACzC,mBAAW,SAAS,GAAG;AACvB,cAAM,QAAQ;AACd,gBAAQ;AAAA,MACV,CAAC;AACD,iCAA2B;AAC3B,UAAI,OAAO,YAAa;AAAA,IAC1B;AACA,+BAA2B;AAG3B,QAAI,gBAAgB;AACpB,UAAM,eAAe,OAAO,kBAAkB,wBAAa,CAAC,CAAC,OAAO,OAAO,cAAc;AACzF,UAAM,qBAAqB,CAAC;AAC5B,QAAI,cAAc,OAAO;AAIzB,QAAI,gBAAgB,CAAC,aAAa;AAChC,UAAI,CAAC,gBAAgB,CAAC,OAAO,eAAe;AAC1C,cAAM,IAAI,MAAM,2DAA2D;AAAA,MAC7E;AACA,YAAM,YAAY,OAAO;AACzB,UAAI,mBAAmB,UAAU;AAEjC,eAAS,IAAI,GAAG,IAAI,KAAK,MAAM,sBAAsB,KAAK;AACxD,aAAK,KAAK,kCAAmC,gBAAgB;AAC7D,cAAM,cAAkC,CAAC;AACzC,mBAAW,QAAQ,kBAAkB;AACnC,gBAAM,OAAO,KAAK,KAAK,QAAQ,KAAK,MAAM,EAAE;AAAA,YAC1C,CAAC,YAAY,EAAE,MAAM,KAAK,MAAM,YAAY,KAAK,YAAY,OAAO;AAAA,YACpE,CAAC,WAAW,EAAE,MAAM,KAAK,MAAM,YAAY,KAAK,YAAY,MAAM;AAAA,UACpE;AACA,sBAAY,KAAK,EAAE,GAAG,MAAM,KAAK,CAAC;AAClC,eAAK,QACF,MAAM,EAAE,UAAU,KAAK,MAAM,UAAU,OAAO,GAAG,CAAC,EAClD,MAAM,uBAAuB;AAChC,cAAI;AACF,kBAAM;AAAA,UACR,QAAQ;AACN,iBAAK,QACF,MAAM,EAAE,UAAU,KAAK,MAAM,UAAU,OAAO,GAAG,CAAC,EAClD,MAAM,6BAA6B;AAAA,UACxC;AAAA,QACF;AAEA,cAAM,gBAAgB,CAAC;AACvB,cAAM,mBAAmB,CAAC;AAC1B,mBAAW,OAAO,aAAa;AAE7B,gBAAM,OAAO,MAAM,IAAI;AACvB,cAAI,CAAC,QAAQ,KAAK,WAAW,OAAW;AACxC,wBAAc,KAAK,GAAG;AACtB,2BAAiB,KAAK,wBAAY,6BAA6B,IAAI,CAAC;AAAA,QACtE;AAEA,YAAI,CAAC,cAAc,OAAQ;AAG3B,2BAAmB,KAAK,wBAAY,gBAAgB,eAAe,aAAa,CAAC;AACjF,2BAAmB,KAAK,GAAG,gBAAgB;AAE3C,cAAM,UAAU,OAAO,OAAO,QAAQ,KAAK;AAC3C,gBAAQ,SAAS,KAAK,GAAG,kBAAkB;AAE3C,cAAM,kBAAkB,KAAK,IAAI,KAAK;AAAA,UACpC;AAAA,UACA,QAAQ,KAAK;AAAA,QACf,CAAC;AACD,cAAM,kBAAkB,KAAK,uBAAuB,OAAO,IAAI,eAAe;AAE9E,eAAO,kBAAkB;AACzB,cAAMC,cAAa,gBAAgB,KAAK;AACxC,cAAMA,YAAW,KAAK,EAAE;AAGxB,wBAAgB;AAChB,sBAAc,gBAAgB;AAC9B,2BAAmB,gBAAgB;AAEnC,aAAK,KAAK,iCAAkC,WAAW;AACvD,YAAI,CAAC,iBAAkB;AAAA,MACzB;AAAA,IACF;AAEA,QAAI,OAAO,iBAAiB,CAAC,gBAAgB,OAAO,gBAAgB;AAClE,WAAK,QAAQ,SAAS,KAAK,GAAG,kBAAkB;AAChD,UAAI,aAAa;AACf,wBAAgB;AAAA,MAClB;AAEA,YAAM,MAAM,wBAAY,OAAO,EAAE,MAAM,eAAe,MAAM,qBAAS,UAAU,CAAC;AAChF,WAAK,QAAQ,SAAS,KAAK,GAAG;AAE9B,aAAO,oBAAoB;AAC3B,UAAI,aAAa;AACf,aAAK,KAAK,kCAAmC,GAAG;AAAA,MAClD,OAAO;AACL,aAAK,KAAK,gCAAiC,GAAG;AAAA,MAChD;AAEA,WAAK,QACF,MAAM;AAAA,QACL,iBAAiB;AAAA,QACjB;AAAA,QACA,UAAU,OAAO;AAAA,MACnB,CAAC,EACA,MAAM,wBAAwB;AAAA,IACnC;AAAA,EACF;AAAA,EAEA,uBACE,UACA,QACiB;AACjB,QAAI,CAAC,KAAK,cAAc;AACtB,YAAM,IAAI,MAAM,+CAA+C;AAAA,IACjE;AAEA,QAAI,kBAAkB,sBAAW;AAC/B,eAAS,0BAA0B,UAAU,MAAM;AAAA,IACrD;AAEA,UAAM,WAAW;AACjB,QAAI,EAAE,OAAO,WAAW,WAAW;AAAA,IAEnC;AAEA,UAAM,YAAY,KAAK,MAAM,kBAAkB,MAAM,QAAQ;AAC7D,QAAI,CAAC,WAAW;AACd,YAAM,IAAI,MAAM,+DAA+D;AAAA,IACjF;AAEA,WAAO,KAAK,aAAa,WAAW,UAAU,SAAS;AAAA,EACzD;AAAA,EAEA,MAAM,2BAA2B;AAC/B,QAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,oBAAoB;AAClE,WAAK,QACF,MAAM,EAAE,UAAU,KAAK,eAAe,GAAG,CAAC,EAC1C,MAAM,yEAAyE;AAClF;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,oBAAoB;AAC5B,UAAI,KAAK,MAAM,uBAAuB,CAAC,KAAK,kBAAkB;AAC5D;AAAA,MACF;AACA,WAAK,sBAAsB;AAAA,IAC7B;AAEA,QAAI,CAAC,KAAK,oBAAoB;AAC5B,YAAM,IAAI,MAAM,kCAAkC;AAAA,IACpD;AAIA,QAAI,KAAK,iBAAiB,MAAM;AAC9B,uBAAiB,UAAU,KAAK,cAAc;AAC5C,YAAI,WAAW,mBAAmB,eAAgB;AAClD,YAAI,CAAC,OAAO,QAAS;AACrB,YAAI,OAAO,mBAAoB,QAAO,UAAU;AAAA,MAClD;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,mBAAmB,GAAG,CAAC,EAAE,MAAM,uBAAuB;AAE1F,SAAK,qBAAqB,KAAK,kBAAkB;AACjD,SAAK,qBAAqB;AAC1B,SAAK,0BAA0B;AAAA,EACjC;AAAA,EAEA,uBAAuB;AACrB,QACE,CAAC,KAAK,kBACN,CAAC,KAAK,eAAe,sBACrB,KAAK,eAAe,aACpB;AACA;AAAA,IACF;AAEA,QAAI,KAAK,MAAM,sBAAsB,GAAG;AAGtC,YAAM,eAAe,KAAK,MAAM,cAAc,cAAc;AAAA,QAC1D,KAAK;AAAA,MACP;AACA,UAAI,aAAa,SAAS,KAAK,MAAM,mBAAmB;AACtD;AAAA,MACF;AAAA,IACF;AACA,SAAK,eAAe,UAAU;AAAA,EAChC;AAAA,EAEA,qBAAqB,QAAsB;AACzC,SAAK,aAAa,IAAI,MAAM;AAC5B,SAAK,aAAa,IAAI,mBAAmB,cAAc;AACvD,SAAK,iBAAiB,QAAQ;AAAA,EAChC;AAAA;AAAA,EAGA,MAAM,QAAQ;AA7yBhB;AA8yBI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AAEA,eAAK,UAAL,mBAAY,mBAAmB,0BAAU;AAAA,EAE3C;AACF;AAEA,gBAAgB,0BACd,UACA,QACuB;AA1zBzB;AA2zBE,QAAM,YAAY,KAAK,IAAI;AAC3B,MAAI,aAAa;AACjB,mBAAiB,SAAS,QAAQ;AAChC,UAAM,WAAU,WAAM,QAAQ,CAAC,MAAf,mBAAkB,MAAM;AACxC,QAAI,CAAC,QAAS;AAEd,QAAI,YAAY;AACd,mBAAa;AACb,0BAAI,EACD,MAAM,EAAE,UAAU,SAAS,KAAK,MAAM,KAAK,IAAI,IAAI,SAAS,EAAE,CAAC,EAC/D,MAAM,0BAA0B;AAAA,IACrC;AACA,UAAM;AAAA,EACR;AACF;AAGA,MAAM,wBAAwB;AAAA;AAAA,EAEnB,cAAc;AAAA,EACd,4BAA4B;AAAA,EAC5B,4BAA4B;AAAA;AAAA,EAErC;AAAA,EACA;AAAA,EACA,oBAAoB,IAAI,oBAAO;AAAA,EAC/B,uBAAuB;AAAA,EACvB,2BAA2B;AAAA,EAC3B,YAAY;AAAA,EACZ;AAAA,EACA;AAAA,EAEA,YAAY,cAAmC,qBAA6B;AAC1E,SAAK,gBAAgB;AACrB,SAAK,oBAAoB;AACzB,SAAK,wBAAwB;AAAA,EAC/B;AAAA,EAEA,IAAI,aAAsB;AACxB,WAAO,CAAC,KAAK,kBAAkB;AAAA,EACjC;AAAA,EAEA,uBAAuB,YAAoB;AACzC,SAAK,uBAAuB,WAAW,KAAK;AAC5C,QAAI,KAAK,UAAW;AAEpB,UAAM,uBACJ,KAAK,IAAI,IAAI,KAAK,2BAA2B,KAAK;AACpD,QAAI,QAAQ,uBAAuB,KAAK,oBAAoB,KAAK;AACjE,YAAQ,KAAK,oBAAoB,IAAI,QAAQ,KAAK,4BAA4B;AAE9E,SAAK,KAAK,KAAK;AAAA,EACjB;AAAA;AAAA,EAGA,qBAAqB,GAAa;AAChC,SAAK,YAAY;AAAA,EAKnB;AAAA;AAAA,EAGA,mBAAmB,GAAa;AAC9B,SAAK,YAAY;AACjB,SAAK,2BAA2B,KAAK,IAAI;AAEzC,QAAI,KAAK,sBAAsB;AAC7B,YAAM,QAAQ,KAAK,oBAAoB,IACnC,KAAK,oBAAoB,KAAK,4BAC9B;AACJ,WAAK,KAAK,KAAK;AAAA,IACjB;AAAA,EACF;AAAA;AAAA,EAIA,sBAA+B;AAC7B,WACE,KAAK,qBAAqB,SAAS,KACnC,KAAK,YAAY,SAAS,KAAK,qBAAqB,KAAK,qBAAqB,SAAS,CAAC,CAAE;AAAA,EAE9F;AAAA,EAEA,eAAe;AACb,SAAK,uBAAuB;AAC5B,SAAK,2BAA2B;AAAA,EAClC;AAAA,EAEA,KAAK,OAAe;AAClB,UAAM,UAAU,OAAOH,WAAkB;AACvC,YAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAASA,MAAK,CAAC;AACzD,WAAK,aAAa;AAClB,YAAM,KAAK,cAAc;AAAA,IAC3B;AAEA,SAAK,oBAAoB,IAAI,oBAAO;AACpC,SAAK,qBAAqB,QAAQ,KAAK;AAAA,EACzC;AACF;","names":["import_llm","VPAEvent","BasicSentenceTokenizer","BasicWordTokenizer","EventEmitter","STTStreamAdapter","TTSStreamAdapter","participant","delay","resolve","isUsingTools","playHandle"]}
1
+ {"version":3,"sources":["../../src/pipeline/pipeline_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { LocalTrackPublication, RemoteParticipant, Room } from '@livekit/rtc-node';\nimport {\n AudioSource,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport type {\n CallableFunctionResult,\n FunctionCallInfo,\n FunctionContext,\n LLM,\n} from '../llm/index.js';\nimport { LLMStream } from '../llm/index.js';\nimport { ChatContext, ChatMessage, ChatRole } from '../llm/index.js';\nimport { log } from '../log.js';\nimport { type STT, StreamAdapter as STTStreamAdapter } from '../stt/index.js';\nimport {\n SentenceTokenizer as BasicSentenceTokenizer,\n WordTokenizer as BasicWordTokenizer,\n hyphenateWord,\n} from '../tokenize/basic/index.js';\nimport type { SentenceTokenizer, WordTokenizer } from '../tokenize/tokenizer.js';\nimport type { TTS } from '../tts/index.js';\nimport { StreamAdapter as TTSStreamAdapter } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport type { SpeechSource, SynthesisHandle } from './agent_output.js';\nimport { AgentOutput } from './agent_output.js';\nimport { AgentPlayout, AgentPlayoutEvent } from './agent_playout.js';\nimport { HumanInput, HumanInputEvent } from './human_input.js';\nimport { SpeechHandle } from './speech_handle.js';\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\nexport type BeforeLLMCallback = (\n agent: VoicePipelineAgent,\n chatCtx: ChatContext,\n) => LLMStream | false | void | Promise<LLMStream | false | void>;\n\nexport type BeforeTTSCallback = (\n agent: VoicePipelineAgent,\n source: string | AsyncIterable<string>,\n) => SpeechSource;\n\nexport enum VPAEvent {\n USER_STARTED_SPEAKING,\n USER_STOPPED_SPEAKING,\n AGENT_STARTED_SPEAKING,\n AGENT_STOPPED_SPEAKING,\n USER_SPEECH_COMMITTED,\n AGENT_SPEECH_COMMITTED,\n AGENT_SPEECH_INTERRUPTED,\n FUNCTION_CALLS_COLLECTED,\n FUNCTION_CALLS_FINISHED,\n}\n\nexport type VPACallbacks = {\n [VPAEvent.USER_STARTED_SPEAKING]: () => void;\n [VPAEvent.USER_STOPPED_SPEAKING]: () => void;\n [VPAEvent.AGENT_STARTED_SPEAKING]: () => void;\n [VPAEvent.AGENT_STOPPED_SPEAKING]: () => void;\n [VPAEvent.USER_SPEECH_COMMITTED]: (msg: ChatMessage) => void;\n [VPAEvent.AGENT_SPEECH_COMMITTED]: (msg: ChatMessage) => void;\n [VPAEvent.AGENT_SPEECH_INTERRUPTED]: (msg: ChatMessage) => void;\n [VPAEvent.FUNCTION_CALLS_COLLECTED]: (funcs: FunctionCallInfo[]) => void;\n [VPAEvent.FUNCTION_CALLS_FINISHED]: (funcs: CallableFunctionResult[]) => void;\n};\n\nexport class AgentCallContext {\n #agent: VoicePipelineAgent;\n #llmStream: LLMStream;\n #metadata = new Map<string, any>();\n static #current: AgentCallContext;\n\n constructor(agent: VoicePipelineAgent, llmStream: LLMStream) {\n this.#agent = agent;\n this.#llmStream = llmStream;\n AgentCallContext.#current = this;\n }\n\n static getCurrent(): AgentCallContext {\n return AgentCallContext.#current;\n }\n\n get agent(): VoicePipelineAgent {\n return this.#agent;\n }\n\n storeMetadata(key: string, value: any) {\n this.#metadata.set(key, value);\n }\n\n getMetadata(key: string, orDefault: any = undefined) {\n return this.#metadata.get(key) || orDefault;\n }\n\n get llmStream(): LLMStream {\n return this.#llmStream;\n }\n}\n\nconst defaultBeforeLLMCallback: BeforeLLMCallback = (\n agent: VoicePipelineAgent,\n chatCtx: ChatContext,\n): LLMStream => {\n return agent.llm.chat({ chatCtx, fncCtx: agent.fncCtx });\n};\n\nconst defaultBeforeTTSCallback: BeforeTTSCallback = (\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n _: VoicePipelineAgent,\n text: string | AsyncIterable<string>,\n): string | AsyncIterable<string> => {\n return text;\n};\n\nexport interface AgentTranscriptionOptions {\n /** Whether to forward the user transcription to the client */\n userTranscription: boolean;\n /** Whether to forward the agent transcription to the client */\n agentTranscription: boolean;\n /**\n * The speed at which the agent's speech transcription is forwarded to the client.\n * We try to mimic the agent's speech speed by adjusting the transcription speed.\n */\n agentTranscriptionSpeech: number;\n /**\n * The tokenizer used to split the speech into sentences.\n * This is used to decide when to mark a transcript as final for the agent transcription.\n */\n sentenceTokenizer: SentenceTokenizer;\n /**\n * The tokenizer used to split the speech into words.\n * This is used to simulate the \"interim results\" of the agent transcription.\n */\n wordTokenizer: WordTokenizer;\n /**\n * A function that takes a string (word) as input and returns a list of strings,\n * representing the hyphenated parts of the word.\n */\n hyphenateWord: (word: string) => string[];\n}\n\nconst defaultAgentTranscriptionOptions: AgentTranscriptionOptions = {\n userTranscription: true,\n agentTranscription: true,\n agentTranscriptionSpeech: 1,\n sentenceTokenizer: new BasicSentenceTokenizer(),\n wordTokenizer: new BasicWordTokenizer(false),\n hyphenateWord: hyphenateWord,\n};\n\nexport interface VPAOptions {\n /** Chat context for the assistant. */\n chatCtx?: ChatContext;\n /** Function context for the assistant. */\n fncCtx?: FunctionContext;\n /** Whether to allow the user to interrupt the assistant. */\n allowInterruptions: boolean;\n /** Minimum duration of speech to consider for interruption. */\n interruptSpeechDuration: number;\n /** Minimum number of words to consider for interuption. This may increase latency. */\n interruptMinWords: number;\n /** Delay to wait before considering the user speech done. */\n minEndpointingDelay: number;\n maxRecursiveFncCalls: number;\n /* Whether to preemptively synthesize responses. */\n preemptiveSynthesis: boolean;\n /*\n * Callback called when the assistant is about to synthesize a reply.\n *\n * @remarks\n * Returning void will create a default LLM stream.\n * You can also return your own LLM stream by calling `llm.chat()`.\n * Returning `false` ill cancel the synthesis of the reply.\n */\n beforeLLMCallback: BeforeLLMCallback;\n /*\n * Callback called when the assistant is about to synthesize speech.\n *\n * @remarks\n * This can be used to customize text before synthesis\n * (e.g. editing the pronunciation of a word).\n */\n beforeTTSCallback: BeforeTTSCallback;\n /** Options for assistant transcription. */\n transcription: AgentTranscriptionOptions;\n}\n\nconst defaultVPAOptions: VPAOptions = {\n chatCtx: new ChatContext(),\n allowInterruptions: true,\n interruptSpeechDuration: 50,\n interruptMinWords: 0,\n minEndpointingDelay: 500,\n maxRecursiveFncCalls: 1,\n preemptiveSynthesis: false,\n beforeLLMCallback: defaultBeforeLLMCallback,\n beforeTTSCallback: defaultBeforeTTSCallback,\n transcription: defaultAgentTranscriptionOptions,\n};\n\n/** A pipeline agent (VAD + STT + LLM + TTS) implementation. */\nexport class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<VPACallbacks>) {\n /** Minimum time played for the user speech to be committed to the chat context. */\n readonly MIN_TIME_PLAYED_FOR_COMMIT = 1.5;\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #vad: VAD;\n #stt: STT;\n #llm: LLM;\n #tts: TTS;\n #opts: VPAOptions;\n #humanInput?: HumanInput;\n #agentOutput?: AgentOutput;\n #trackPublishedFut = new Future();\n #pendingAgentReply?: SpeechHandle;\n #agentReplyTask?: CancellablePromise<void>;\n #playingSpeech?: SpeechHandle;\n #transcribedText = '';\n #transcribedInterimText = '';\n #speechQueueOpen = new Future();\n #speechQueue = new AsyncIterableQueue<SpeechHandle | typeof VoicePipelineAgent.FLUSH_SENTINEL>();\n #lastEndOfSpeechTime?: number;\n #updateStateTask?: CancellablePromise<void>;\n #started = false;\n #room?: Room;\n #participant: RemoteParticipant | string | null = null;\n #deferredValidation: DeferredReplyValidation;\n #logger = log();\n #agentPublication?: LocalTrackPublication;\n\n constructor(\n /** Voice Activity Detection instance. */\n vad: VAD,\n /** Speech-to-Text instance. */\n stt: STT,\n /** Large Language Model instance. */\n llm: LLM,\n /** Text-to-Speech instance. */\n tts: TTS,\n /** Additional VoicePipelineAgent options. */\n opts: Partial<VPAOptions> = defaultVPAOptions,\n ) {\n super();\n\n this.#opts = { ...defaultVPAOptions, ...opts };\n\n if (!stt.capabilities.streaming) {\n stt = new STTStreamAdapter(stt, vad);\n }\n\n if (!tts.capabilities.streaming) {\n tts = new TTSStreamAdapter(tts, new BasicSentenceTokenizer());\n }\n\n this.#vad = vad;\n this.#stt = stt;\n this.#llm = llm;\n this.#tts = tts;\n\n this.#deferredValidation = new DeferredReplyValidation(\n this.#validateReplyIfPossible.bind(this),\n this.#opts.minEndpointingDelay,\n );\n }\n\n get fncCtx(): FunctionContext | undefined {\n return this.#opts.fncCtx;\n }\n\n set fncCtx(ctx: FunctionContext) {\n this.#opts.fncCtx = ctx;\n }\n\n get chatCtx(): ChatContext {\n return this.#opts.chatCtx!;\n }\n\n get llm(): LLM {\n return this.#llm;\n }\n\n get tts(): TTS {\n return this.#tts;\n }\n\n get stt(): STT {\n return this.#stt;\n }\n\n get vad(): VAD {\n return this.#vad;\n }\n\n /** Start the voice assistant. */\n start(\n /** The room to connect to. */\n room: Room,\n /**\n * The participant to listen to.\n *\n * @remarks\n * Can be a participant or an identity.\n * If omitted, the first participant in the room will be selected.\n */\n participant: RemoteParticipant | string | null = null,\n ) {\n if (this.#started) {\n throw new Error('voice assistant already started');\n }\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.#participant) {\n return;\n }\n this.#linkParticipant.call(this, participant.identity);\n });\n\n this.#room = room;\n this.#participant = participant;\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity);\n }\n }\n\n this.#run();\n }\n\n /** Play a speech source through the voice assistant. */\n async say(\n source: string | LLMStream | AsyncIterable<string>,\n allowInterruptions = true,\n addToChatCtx = true,\n ) {\n await this.#trackPublishedFut.await;\n const newHandle = SpeechHandle.createAssistantSpeech(allowInterruptions, addToChatCtx);\n const synthesisHandle = this.#synthesizeAgentSpeech(newHandle.id, source);\n newHandle.initialize(source, synthesisHandle);\n this.#addSpeechForPlayout(newHandle);\n }\n\n #updateState(state: AgentState, delay = 0) {\n const runTask = (delay: number): CancellablePromise<void> => {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n await new Promise((resolve) => setTimeout(resolve, delay));\n if (this.#room?.isConnected) {\n if (!cancelled) {\n await this.#room.localParticipant?.setAttributes({ [AGENT_STATE_ATTRIBUTE]: state });\n }\n }\n resolve();\n });\n };\n\n if (this.#updateStateTask) {\n this.#updateStateTask.cancel();\n }\n\n this.#updateStateTask = runTask(delay);\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.#room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.#participant = this.#room.remoteParticipants.get(participantIdentity) || null;\n if (!this.#participant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n this.#humanInput = new HumanInput(this.#room, this.#vad, this.#stt, this.#participant);\n this.#humanInput.on(HumanInputEvent.START_OF_SPEECH, (event) => {\n this.emit(VPAEvent.USER_STARTED_SPEAKING);\n this.#deferredValidation.onHumanStartOfSpeech(event);\n });\n this.#humanInput.on(HumanInputEvent.VAD_INFERENCE_DONE, (event) => {\n if (!this.#trackPublishedFut.done) {\n return;\n }\n if (!this.#agentOutput) {\n throw new Error('agent output is undefined');\n }\n\n let tv = 1;\n if (this.#opts.allowInterruptions) {\n tv = Math.max(0, 1 - event.probability);\n this.#agentOutput.playout.targetVolume = tv;\n }\n\n if (event.speechDuration >= this.#opts.interruptSpeechDuration) {\n this.#interruptIfPossible();\n }\n });\n this.#humanInput.on(HumanInputEvent.END_OF_SPEECH, (event) => {\n this.emit(VPAEvent.USER_STARTED_SPEAKING);\n this.#deferredValidation.onHumanEndOfSpeech(event);\n this.#lastEndOfSpeechTime = Date.now();\n });\n this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {\n this.#transcribedInterimText = event.alternatives![0].text;\n });\n this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, (event) => {\n const newTranscript = event.alternatives![0].text;\n if (!newTranscript) return;\n\n this.#logger.child({ userTranscript: newTranscript }).debug('received user transcript');\n this.#transcribedText += (this.#transcribedText ? ' ' : '') + newTranscript;\n\n if (\n this.#opts.preemptiveSynthesis &&\n (!this.#playingSpeech || this.#playingSpeech.allowInterruptions)\n ) {\n this.#synthesizeAgentReply();\n }\n\n this.#deferredValidation.onHumanFinalTranscript(newTranscript);\n\n const words = this.#opts.transcription.wordTokenizer.tokenize(newTranscript);\n if (words.length >= 3) {\n // VAD can sometimes not detect that the human is speaking.\n // to make the interruption more reliable, we also interrupt on the final transcript.\n this.#interruptIfPossible();\n }\n });\n }\n\n async #run() {\n this.#updateState('initializing');\n const audioSource = new AudioSource(this.#tts.sampleRate, this.#tts.numChannels);\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', audioSource);\n this.#agentPublication = await this.#room?.localParticipant?.publishTrack(\n track,\n new TrackPublishOptions({ source: TrackSource.SOURCE_MICROPHONE }),\n );\n\n const agentPlayout = new AgentPlayout(audioSource);\n this.#agentOutput = new AgentOutput(agentPlayout, this.#tts);\n\n agentPlayout.on(AgentPlayoutEvent.PLAYOUT_STARTED, () => {\n this.emit(VPAEvent.AGENT_STARTED_SPEAKING);\n this.#updateState('speaking');\n });\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n agentPlayout.on(AgentPlayoutEvent.PLAYOUT_STOPPED, (_) => {\n this.emit(VPAEvent.AGENT_STOPPED_SPEAKING);\n this.#updateState('listening');\n });\n\n this.#trackPublishedFut.resolve();\n\n while (true) {\n await this.#speechQueueOpen.await;\n for await (const speech of this.#speechQueue) {\n if (speech === VoicePipelineAgent.FLUSH_SENTINEL) break;\n this.#playingSpeech = speech;\n await this.#playSpeech(speech);\n this.#playingSpeech = undefined;\n }\n this.#speechQueueOpen = new Future();\n }\n }\n\n #synthesizeAgentReply() {\n this.#pendingAgentReply?.cancel();\n if (this.#humanInput && this.#humanInput.speaking) {\n this.#updateState('thinking', 200);\n }\n\n this.#pendingAgentReply = SpeechHandle.createAssistantReply(\n this.#opts.allowInterruptions,\n true,\n this.#transcribedText,\n );\n const newHandle = this.#pendingAgentReply;\n this.#agentReplyTask = this.#synthesizeAnswerTask(this.#agentReplyTask, newHandle);\n }\n\n #synthesizeAnswerTask(\n oldTask: CancellablePromise<void> | undefined,\n handle?: SpeechHandle,\n ): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n const copiedCtx = this.chatCtx.copy();\n const playingSpeech = this.#playingSpeech;\n if (playingSpeech && playingSpeech.initialized) {\n if (\n (!playingSpeech.userQuestion || playingSpeech.userCommitted) &&\n !playingSpeech.speechCommitted\n ) {\n // the speech is playing but not committed yet,\n // add it to the chat context for this new reply synthesis\n copiedCtx.messages.push(\n ChatMessage.create({\n text: playingSpeech.synthesisHandle.text,\n role: ChatRole.ASSISTANT,\n }),\n );\n }\n }\n\n copiedCtx.messages.push(\n ChatMessage.create({\n text: handle?.userQuestion,\n role: ChatRole.USER,\n }),\n );\n\n if (cancelled) resolve();\n let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);\n if (llmStream === false) {\n handle?.cancel();\n return;\n }\n\n if (cancelled) resolve();\n // fallback to default impl if no custom/user stream is returned\n if (!(llmStream instanceof LLMStream)) {\n llmStream = (await defaultBeforeLLMCallback(this, copiedCtx)) as LLMStream;\n }\n\n if (handle!.interrupted) {\n return;\n }\n\n const synthesisHandle = this.#synthesizeAgentSpeech(handle!.id, llmStream);\n handle!.initialize(llmStream, synthesisHandle);\n\n // TODO(theomonnom): find a more reliable way to get the elapsed time from the last EOS\n // (VAD could not have detected any speech — maybe unlikely?)\n const elapsed = !!this.#lastEndOfSpeechTime\n ? Math.round((Date.now() - this.#lastEndOfSpeechTime) * 1000) / 1000\n : -1;\n\n this.#logger.child({ speechId: handle!.id, elapsed }).debug('synthesizing agent reply');\n resolve();\n });\n }\n\n async #playSpeech(handle: SpeechHandle) {\n try {\n await handle.waitForInitialization();\n } catch {\n return;\n }\n await this.#agentPublication!.waitForSubscription();\n const synthesisHandle = handle.synthesisHandle;\n if (synthesisHandle.interrupted) return;\n\n const userQuestion = handle.userQuestion;\n const playHandle = synthesisHandle.play();\n const joinFut = playHandle.join();\n\n const commitUserQuestionIfNeeded = () => {\n if (!userQuestion || synthesisHandle.interrupted || handle.userCommitted) return;\n const isUsingTools =\n handle.source instanceof LLMStream && !!handle.source.functionCalls.length;\n\n // make sure at least some speech was played before committing the user message\n // since we try to validate as fast as possible it is possible the agent gets interrupted\n // really quickly (barely audible), we don't want to mark this question as \"answered\".\n if (\n handle.allowInterruptions &&\n !isUsingTools &&\n playHandle.timePlayed < this.MIN_TIME_PLAYED_FOR_COMMIT &&\n !joinFut.done\n ) {\n return;\n }\n\n this.#logger.child({ userTranscript: userQuestion }).debug('committed user transcript');\n const userMsg = ChatMessage.create({ text: userQuestion, role: ChatRole.USER });\n this.chatCtx.messages.push(userMsg);\n this.emit(VPAEvent.USER_SPEECH_COMMITTED, userMsg);\n\n this.#transcribedText = this.#transcribedText.slice(userQuestion.length);\n handle.markUserCommitted();\n };\n\n // wait for the playHandle to finish and check every 1s if user question should be committed\n commitUserQuestionIfNeeded();\n\n while (!joinFut.done) {\n await new Promise<void>(async (resolve) => {\n setTimeout(resolve, 500);\n await joinFut.await;\n resolve();\n });\n commitUserQuestionIfNeeded();\n if (handle.interrupted) break;\n }\n commitUserQuestionIfNeeded();\n\n const collectedText = handle.synthesisHandle.text;\n const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;\n const extraToolsMessages = []; // additional messages from the functions to add to the context\n let interrupted = handle.interrupted;\n\n // if the answer is using tools, execute the functions and automatically generate\n // a response to the user question from the returned values\n if (isUsingTools && !interrupted) {\n if (!userQuestion || !handle.userCommitted) {\n throw new Error('user speech should have been committed before using tools');\n }\n const llmStream = handle.source;\n let newFunctionCalls = llmStream.functionCalls;\n\n for (let i = 0; i < this.#opts.maxRecursiveFncCalls; i++) {\n this.emit(VPAEvent.FUNCTION_CALLS_COLLECTED, newFunctionCalls);\n const calledFuncs: FunctionCallInfo[] = [];\n for (const func of newFunctionCalls) {\n const task = func.func.execute(func.params).then(\n (result) => ({ name: func.name, toolCallId: func.toolCallId, result }),\n (error) => ({ name: func.name, toolCallId: func.toolCallId, error }),\n );\n calledFuncs.push({ ...func, task });\n this.#logger\n .child({ function: func.name, speechId: handle.id })\n .debug('executing AI function');\n try {\n await task;\n } catch {\n this.#logger\n .child({ function: func.name, speechId: handle.id })\n .error('error executing AI function');\n }\n }\n\n const toolCallsInfo = [];\n const toolCallsResults = [];\n for (const fnc of calledFuncs) {\n // ignore the function calls that return void\n const task = await fnc.task;\n if (!task || task.result === undefined) continue;\n toolCallsInfo.push(fnc);\n toolCallsResults.push(ChatMessage.createToolFromFunctionResult(task));\n }\n\n if (!toolCallsInfo.length) break;\n\n // generate an answer from the tool calls\n extraToolsMessages.push(ChatMessage.createToolCalls(toolCallsInfo, collectedText));\n extraToolsMessages.push(...toolCallsResults);\n\n const chatCtx = handle.source.chatCtx.copy();\n chatCtx.messages.push(...extraToolsMessages);\n\n const answerLLMStream = this.llm.chat({\n chatCtx,\n fncCtx: this.fncCtx,\n });\n const answerSynthesis = this.#synthesizeAgentSpeech(handle.id, answerLLMStream);\n // replace the synthesis handle with the new one to allow interruption\n handle.synthesisHandle = answerSynthesis;\n const playHandle = answerSynthesis.play();\n await playHandle.join().await;\n\n interrupted = answerSynthesis.interrupted;\n newFunctionCalls = answerLLMStream.functionCalls;\n\n this.emit(VPAEvent.FUNCTION_CALLS_FINISHED, calledFuncs);\n if (!newFunctionCalls) break;\n }\n }\n\n if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) {\n this.chatCtx.messages.push(...extraToolsMessages);\n if (interrupted) {\n collectedText + '…';\n }\n\n const msg = ChatMessage.create({ text: collectedText, role: ChatRole.ASSISTANT });\n this.chatCtx.messages.push(msg);\n\n handle.markSpeechCommitted();\n if (interrupted) {\n this.emit(VPAEvent.AGENT_SPEECH_INTERRUPTED, msg);\n } else {\n this.emit(VPAEvent.AGENT_SPEECH_COMMITTED, msg);\n }\n\n this.#logger\n .child({\n agentTranscript: collectedText,\n interrupted,\n speechId: handle.id,\n })\n .debug('committed agent speech');\n }\n }\n\n #synthesizeAgentSpeech(\n speechId: string,\n source: string | LLMStream | AsyncIterable<string>,\n ): SynthesisHandle {\n if (!this.#agentOutput) {\n throw new Error('agent output should be initialized when ready');\n }\n\n if (source instanceof LLMStream) {\n source = llmStreamToStringIterable(speechId, source);\n }\n\n const ogSource = source;\n if (!(typeof source === 'string')) {\n // TODO(nbsp): itertools.tee\n }\n\n const ttsSource = this.#opts.beforeTTSCallback(this, ogSource);\n if (!ttsSource) {\n throw new Error('beforeTTSCallback must return string or AsyncIterable<string>');\n }\n\n return this.#agentOutput.synthesize(speechId, ttsSource);\n }\n\n async #validateReplyIfPossible() {\n if (this.#playingSpeech && !this.#playingSpeech.allowInterruptions) {\n this.#logger\n .child({ speechId: this.#playingSpeech.id })\n .debug('skipping validation, agent is speaking and does not allow interruptions');\n return;\n }\n\n if (!this.#pendingAgentReply) {\n if (this.#opts.preemptiveSynthesis || !this.#transcribedText) {\n return;\n }\n this.#synthesizeAgentReply();\n }\n\n if (!this.#pendingAgentReply) {\n throw new Error('pending agent reply is undefined');\n }\n\n // in some bad timimg, we could end up with two pushed agent replies inside the speech queue.\n // so make sure we directly interrupt every reply when validating a new one\n if (this.#speechQueueOpen.done) {\n for await (const speech of this.#speechQueue) {\n if (speech === VoicePipelineAgent.FLUSH_SENTINEL) break;\n if (!speech.isReply) continue;\n if (speech.allowInterruptions) speech.interrupt();\n }\n }\n\n this.#logger.child({ speechId: this.#pendingAgentReply.id }).debug('validated agent reply');\n\n this.#addSpeechForPlayout(this.#pendingAgentReply);\n this.#pendingAgentReply = undefined;\n this.#transcribedInterimText = '';\n }\n\n #interruptIfPossible() {\n if (\n !this.#playingSpeech ||\n !this.#playingSpeech.allowInterruptions ||\n this.#playingSpeech.interrupted\n ) {\n return;\n }\n\n if (this.#opts.interruptMinWords !== 0) {\n // check the final/interim transcribed text for the minimum word count\n // to interrupt the agent speech\n const interimWords = this.#opts.transcription.wordTokenizer.tokenize(\n this.#transcribedInterimText,\n );\n if (interimWords.length < this.#opts.interruptMinWords) {\n return;\n }\n }\n this.#playingSpeech.interrupt();\n }\n\n #addSpeechForPlayout(handle: SpeechHandle) {\n this.#speechQueue.put(handle);\n this.#speechQueue.put(VoicePipelineAgent.FLUSH_SENTINEL);\n this.#speechQueueOpen.resolve();\n }\n\n /** Close the voice assistant. */\n async close() {\n if (!this.#started) {\n return;\n }\n\n this.#room?.removeAllListeners(RoomEvent.ParticipantConnected);\n // TODO(nbsp): await this.#deferredValidation.close()\n }\n}\n\nasync function* llmStreamToStringIterable(\n speechId: string,\n stream: LLMStream,\n): AsyncIterable<string> {\n const startTime = Date.now();\n let firstFrame = true;\n for await (const chunk of stream) {\n const content = chunk.choices[0]?.delta.content;\n if (!content) continue;\n\n if (firstFrame) {\n firstFrame = false;\n log()\n .child({ speechId, elapsed: Math.round(Date.now() - startTime) })\n .debug('received first LLM token');\n }\n yield content;\n }\n}\n\n/** This class is used to try to find the best time to validate the agent reply. */\nclass DeferredReplyValidation {\n // if the STT gives us punctuation, we can try to validate the reply faster.\n readonly PUNCTUATION = '.!?';\n readonly PUNCTUATION_REDUCE_FACTOR = 0.75;\n readonly LATE_TRANSCRIPT_TOLERANCE = 1.5; // late compared to end of speech\n\n #validateFunc: () => Promise<void>;\n #validatingPromise?: Promise<void>;\n #validatingFuture = new Future();\n #lastFinalTranscript = '';\n #lastRecvEndOfSpeechTime = 0;\n #speaking = false;\n #endOfSpeechDelay: number;\n #finalTranscriptDelay: number;\n\n constructor(validateFunc: () => Promise<void>, minEndpointingDelay: number) {\n this.#validateFunc = validateFunc;\n this.#endOfSpeechDelay = minEndpointingDelay;\n this.#finalTranscriptDelay = minEndpointingDelay;\n }\n\n get validating(): boolean {\n return !this.#validatingFuture.done;\n }\n\n onHumanFinalTranscript(transcript: string) {\n this.#lastFinalTranscript = transcript.trim();\n if (this.#speaking) return;\n\n const hasRecentEndOfSpeech =\n Date.now() - this.#lastRecvEndOfSpeechTime < this.LATE_TRANSCRIPT_TOLERANCE;\n let delay = hasRecentEndOfSpeech ? this.#endOfSpeechDelay : this.#finalTranscriptDelay;\n delay = this.#endWithPunctuation() ? delay * this.PUNCTUATION_REDUCE_FACTOR : 1;\n\n this.#run(delay);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n onHumanStartOfSpeech(_: VADEvent) {\n this.#speaking = true;\n // TODO(nbsp):\n // if (this.validating) {\n // this.#validatingPromise.cancel()\n // }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n onHumanEndOfSpeech(_: VADEvent) {\n this.#speaking = false;\n this.#lastRecvEndOfSpeechTime = Date.now();\n\n if (this.#lastFinalTranscript) {\n const delay = this.#endWithPunctuation()\n ? this.#endOfSpeechDelay * this.PUNCTUATION_REDUCE_FACTOR\n : 1;\n this.#run(delay);\n }\n }\n\n // TODO(nbsp): aclose\n\n #endWithPunctuation(): boolean {\n return (\n this.#lastFinalTranscript.length > 0 &&\n this.PUNCTUATION.includes(this.#lastFinalTranscript[this.#lastFinalTranscript.length - 1]!)\n );\n }\n\n #resetStates() {\n this.#lastFinalTranscript = '';\n this.#lastRecvEndOfSpeechTime = 0;\n }\n\n #run(delay: number) {\n const runTask = async (delay: number) => {\n await new Promise((resolve) => setTimeout(resolve, delay));\n this.#resetStates();\n await this.#validateFunc();\n };\n\n this.#validatingFuture = new Future();\n this.#validatingPromise = runTask(delay);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,sBAMO;AAEP,yBAAyB;AAOzB,iBAA0B;AAC1B,IAAAA,cAAmD;AACnD,iBAAoB;AACpB,iBAA4D;AAC5D,mBAIO;AAGP,iBAAkD;AAClD,mBAAiF;AAGjF,0BAA4B;AAC5B,2BAAgD;AAChD,yBAA4C;AAC5C,2BAA6B;AAGtB,MAAM,wBAAwB;AAY9B,IAAK,WAAL,kBAAKC,cAAL;AACL,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AATU,SAAAA;AAAA,GAAA;AAwBL,MAAM,iBAAiB;AAAA,EAC5B;AAAA,EACA;AAAA,EACA,YAAY,oBAAI,IAAiB;AAAA,EACjC,OAAO;AAAA,EAEP,YAAY,OAA2B,WAAsB;AAC3D,SAAK,SAAS;AACd,SAAK,aAAa;AAClB,qBAAiB,WAAW;AAAA,EAC9B;AAAA,EAEA,OAAO,aAA+B;AACpC,WAAO,iBAAiB;AAAA,EAC1B;AAAA,EAEA,IAAI,QAA4B;AAC9B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,cAAc,KAAa,OAAY;AACrC,SAAK,UAAU,IAAI,KAAK,KAAK;AAAA,EAC/B;AAAA,EAEA,YAAY,KAAa,YAAiB,QAAW;AACnD,WAAO,KAAK,UAAU,IAAI,GAAG,KAAK;AAAA,EACpC;AAAA,EAEA,IAAI,YAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AACF;AAEA,MAAM,2BAA8C,CAClD,OACA,YACc;AACd,SAAO,MAAM,IAAI,KAAK,EAAE,SAAS,QAAQ,MAAM,OAAO,CAAC;AACzD;AAEA,MAAM,2BAA8C,CAElD,GACA,SACmC;AACnC,SAAO;AACT;AA6BA,MAAM,mCAA8D;AAAA,EAClE,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,0BAA0B;AAAA,EAC1B,mBAAmB,IAAI,aAAAC,kBAAuB;AAAA,EAC9C,eAAe,IAAI,aAAAC,cAAmB,KAAK;AAAA,EAC3C,eAAe;AACjB;AAuCA,MAAM,oBAAgC;AAAA,EACpC,SAAS,IAAI,wBAAY;AAAA,EACzB,oBAAoB;AAAA,EACpB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,qBAAqB;AAAA,EACrB,sBAAsB;AAAA,EACtB,qBAAqB;AAAA,EACrB,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAGO,MAAM,2BAA4B,mBAAAC,QAAsD;AAAA;AAAA,EAEpF,6BAA6B;AAAA,EACtC,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAElE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,qBAAqB,IAAI,oBAAO;AAAA,EAChC;AAAA,EACA;AAAA,EACA;AAAA,EACA,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,mBAAmB,IAAI,oBAAO;AAAA,EAC9B,eAAe,IAAI,gCAA4E;AAAA,EAC/F;AAAA,EACA;AAAA,EACA,WAAW;AAAA,EACX;AAAA,EACA,eAAkD;AAAA,EAClD;AAAA,EACA,cAAU,gBAAI;AAAA,EACd;AAAA,EAEA,YAEE,KAEA,KAEA,KAEA,KAEA,OAA4B,mBAC5B;AACA,UAAM;AAEN,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,CAAC,IAAI,aAAa,WAAW;AAC/B,YAAM,IAAI,WAAAC,cAAiB,KAAK,GAAG;AAAA,IACrC;AAEA,QAAI,CAAC,IAAI,aAAa,WAAW;AAC/B,YAAM,IAAI,WAAAC,cAAiB,KAAK,IAAI,aAAAJ,kBAAuB,CAAC;AAAA,IAC9D;AAEA,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,OAAO;AAEZ,SAAK,sBAAsB,IAAI;AAAA,MAC7B,KAAK,yBAAyB,KAAK,IAAI;AAAA,MACvC,KAAK,MAAM;AAAA,IACb;AAAA,EACF;AAAA,EAEA,IAAI,SAAsC;AACxC,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,IAAI,OAAO,KAAsB;AAC/B,SAAK,MAAM,SAAS;AAAA,EACtB;AAAA,EAEA,IAAI,UAAuB;AACzB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAEE,MAQA,cAAiD,MACjD;AACA,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,SAAK,GAAG,0BAAU,sBAAsB,CAACK,iBAAmC;AAE1E,UAAI,KAAK,cAAc;AACrB;AAAA,MACF;AACA,WAAK,iBAAiB,KAAK,MAAMA,aAAY,QAAQ;AAAA,IACvD,CAAC;AAED,SAAK,QAAQ;AACb,SAAK,eAAe;AAEpB,QAAI,aAAa;AACf,UAAI,OAAO,gBAAgB,UAAU;AACnC,aAAK,iBAAiB,WAAW;AAAA,MACnC,OAAO;AACL,aAAK,iBAAiB,YAAY,QAAQ;AAAA,MAC5C;AAAA,IACF;AAEA,SAAK,KAAK;AAAA,EACZ;AAAA;AAAA,EAGA,MAAM,IACJ,QACA,qBAAqB,MACrB,eAAe,MACf;AACA,UAAM,KAAK,mBAAmB;AAC9B,UAAM,YAAY,kCAAa,sBAAsB,oBAAoB,YAAY;AACrF,UAAM,kBAAkB,KAAK,uBAAuB,UAAU,IAAI,MAAM;AACxE,cAAU,WAAW,QAAQ,eAAe;AAC5C,SAAK,qBAAqB,SAAS;AAAA,EACrC;AAAA,EAEA,aAAa,OAAmB,QAAQ,GAAG;AACzC,UAAM,UAAU,CAACC,WAA4C;AAC3D,aAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AApWpE;AAqWQ,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AACD,cAAM,IAAI,QAAQ,CAACC,aAAY,WAAWA,UAASD,MAAK,CAAC;AACzD,aAAI,UAAK,UAAL,mBAAY,aAAa;AAC3B,cAAI,CAAC,WAAW;AACd,oBAAM,UAAK,MAAM,qBAAX,mBAA6B,cAAc,EAAE,CAAC,qBAAqB,GAAG,MAAM;AAAA,UACpF;AAAA,QACF;AACA,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAEA,QAAI,KAAK,kBAAkB;AACzB,WAAK,iBAAiB,OAAO;AAAA,IAC/B;AAEA,SAAK,mBAAmB,QAAQ,KAAK;AAAA,EACvC;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,eAAe,KAAK,MAAM,mBAAmB,IAAI,mBAAmB,KAAK;AAC9E,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,SAAK,cAAc,IAAI,8BAAW,KAAK,OAAO,KAAK,MAAM,KAAK,MAAM,KAAK,YAAY;AACrF,SAAK,YAAY,GAAG,mCAAgB,iBAAiB,CAAC,UAAU;AAC9D,WAAK,KAAK,6BAA8B;AACxC,WAAK,oBAAoB,qBAAqB,KAAK;AAAA,IACrD,CAAC;AACD,SAAK,YAAY,GAAG,mCAAgB,oBAAoB,CAAC,UAAU;AACjE,UAAI,CAAC,KAAK,mBAAmB,MAAM;AACjC;AAAA,MACF;AACA,UAAI,CAAC,KAAK,cAAc;AACtB,cAAM,IAAI,MAAM,2BAA2B;AAAA,MAC7C;AAEA,UAAI,KAAK;AACT,UAAI,KAAK,MAAM,oBAAoB;AACjC,aAAK,KAAK,IAAI,GAAG,IAAI,MAAM,WAAW;AACtC,aAAK,aAAa,QAAQ,eAAe;AAAA,MAC3C;AAEA,UAAI,MAAM,kBAAkB,KAAK,MAAM,yBAAyB;AAC9D,aAAK,qBAAqB;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,SAAK,YAAY,GAAG,mCAAgB,eAAe,CAAC,UAAU;AAC5D,WAAK,KAAK,6BAA8B;AACxC,WAAK,oBAAoB,mBAAmB,KAAK;AACjD,WAAK,uBAAuB,KAAK,IAAI;AAAA,IACvC,CAAC;AACD,SAAK,YAAY,GAAG,mCAAgB,oBAAoB,CAAC,UAAU;AACjE,WAAK,0BAA0B,MAAM,aAAc,CAAC,EAAE;AAAA,IACxD,CAAC;AACD,SAAK,YAAY,GAAG,mCAAgB,kBAAkB,CAAC,UAAU;AAC/D,YAAM,gBAAgB,MAAM,aAAc,CAAC,EAAE;AAC7C,UAAI,CAAC,cAAe;AAEpB,WAAK,QAAQ,MAAM,EAAE,gBAAgB,cAAc,CAAC,EAAE,MAAM,0BAA0B;AACtF,WAAK,qBAAqB,KAAK,mBAAmB,MAAM,MAAM;AAE9D,UACE,KAAK,MAAM,wBACV,CAAC,KAAK,kBAAkB,KAAK,eAAe,qBAC7C;AACA,aAAK,sBAAsB;AAAA,MAC7B;AAEA,WAAK,oBAAoB,uBAAuB,aAAa;AAE7D,YAAM,QAAQ,KAAK,MAAM,cAAc,cAAc,SAAS,aAAa;AAC3E,UAAI,MAAM,UAAU,GAAG;AAGrB,aAAK,qBAAqB;AAAA,MAC5B;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,OAAO;AA9bf;AA+bI,SAAK,aAAa,cAAc;AAChC,UAAM,cAAc,IAAI,4BAAY,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAC/E,UAAM,QAAQ,gCAAgB,iBAAiB,mBAAmB,WAAW;AAC7E,SAAK,oBAAoB,QAAM,gBAAK,UAAL,mBAAY,qBAAZ,mBAA8B;AAAA,MAC3D;AAAA,MACA,IAAI,oCAAoB,EAAE,QAAQ,4BAAY,kBAAkB,CAAC;AAAA;AAGnE,UAAM,eAAe,IAAI,kCAAa,WAAW;AACjD,SAAK,eAAe,IAAI,gCAAY,cAAc,KAAK,IAAI;AAE3D,iBAAa,GAAG,uCAAkB,iBAAiB,MAAM;AACvD,WAAK,KAAK,8BAA+B;AACzC,WAAK,aAAa,UAAU;AAAA,IAC9B,CAAC;AAED,iBAAa,GAAG,uCAAkB,iBAAiB,CAAC,MAAM;AACxD,WAAK,KAAK,8BAA+B;AACzC,WAAK,aAAa,WAAW;AAAA,IAC/B,CAAC;AAED,SAAK,mBAAmB,QAAQ;AAEhC,WAAO,MAAM;AACX,YAAM,KAAK,iBAAiB;AAC5B,uBAAiB,UAAU,KAAK,cAAc;AAC5C,YAAI,WAAW,mBAAmB,eAAgB;AAClD,aAAK,iBAAiB;AACtB,cAAM,KAAK,YAAY,MAAM;AAC7B,aAAK,iBAAiB;AAAA,MACxB;AACA,WAAK,mBAAmB,IAAI,oBAAO;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,wBAAwB;AAle1B;AAmeI,eAAK,uBAAL,mBAAyB;AACzB,QAAI,KAAK,eAAe,KAAK,YAAY,UAAU;AACjD,WAAK,aAAa,YAAY,GAAG;AAAA,IACnC;AAEA,SAAK,qBAAqB,kCAAa;AAAA,MACrC,KAAK,MAAM;AAAA,MACX;AAAA,MACA,KAAK;AAAA,IACP;AACA,UAAM,YAAY,KAAK;AACvB,SAAK,kBAAkB,KAAK,sBAAsB,KAAK,iBAAiB,SAAS;AAAA,EACnF;AAAA,EAEA,sBACE,SACA,QAC0B;AAC1B,WAAO,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,UAAI,YAAY;AAChB,eAAS,MAAM;AACb,oBAAY;AAAA,MACd,CAAC;AAED,UAAI,SAAS;AACX,kBAAM,+BAAiB,OAAO;AAAA,MAChC;AAEA,YAAM,YAAY,KAAK,QAAQ,KAAK;AACpC,YAAM,gBAAgB,KAAK;AAC3B,UAAI,iBAAiB,cAAc,aAAa;AAC9C,aACG,CAAC,cAAc,gBAAgB,cAAc,kBAC9C,CAAC,cAAc,iBACf;AAGA,oBAAU,SAAS;AAAA,YACjB,wBAAY,OAAO;AAAA,cACjB,MAAM,cAAc,gBAAgB;AAAA,cACpC,MAAM,qBAAS;AAAA,YACjB,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,gBAAU,SAAS;AAAA,QACjB,wBAAY,OAAO;AAAA,UACjB,MAAM,iCAAQ;AAAA,UACd,MAAM,qBAAS;AAAA,QACjB,CAAC;AAAA,MACH;AAEA,UAAI,UAAW,SAAQ;AACvB,UAAI,YAAY,MAAM,KAAK,MAAM,kBAAkB,MAAM,SAAS;AAClE,UAAI,cAAc,OAAO;AACvB,yCAAQ;AACR;AAAA,MACF;AAEA,UAAI,UAAW,SAAQ;AAEvB,UAAI,EAAE,qBAAqB,uBAAY;AACrC,oBAAa,MAAM,yBAAyB,MAAM,SAAS;AAAA,MAC7D;AAEA,UAAI,OAAQ,aAAa;AACvB;AAAA,MACF;AAEA,YAAM,kBAAkB,KAAK,uBAAuB,OAAQ,IAAI,SAAS;AACzE,aAAQ,WAAW,WAAW,eAAe;AAI7C,YAAM,UAAU,CAAC,CAAC,KAAK,uBACnB,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,wBAAwB,GAAI,IAAI,MAC9D;AAEJ,WAAK,QAAQ,MAAM,EAAE,UAAU,OAAQ,IAAI,QAAQ,CAAC,EAAE,MAAM,0BAA0B;AACtF,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,YAAY,QAAsB;AACtC,QAAI;AACF,YAAM,OAAO,sBAAsB;AAAA,IACrC,QAAQ;AACN;AAAA,IACF;AACA,UAAM,KAAK,kBAAmB,oBAAoB;AAClD,UAAM,kBAAkB,OAAO;AAC/B,QAAI,gBAAgB,YAAa;AAEjC,UAAM,eAAe,OAAO;AAC5B,UAAM,aAAa,gBAAgB,KAAK;AACxC,UAAM,UAAU,WAAW,KAAK;AAEhC,UAAM,6BAA6B,MAAM;AACvC,UAAI,CAAC,gBAAgB,gBAAgB,eAAe,OAAO,cAAe;AAC1E,YAAME,gBACJ,OAAO,kBAAkB,wBAAa,CAAC,CAAC,OAAO,OAAO,cAAc;AAKtE,UACE,OAAO,sBACP,CAACA,iBACD,WAAW,aAAa,KAAK,8BAC7B,CAAC,QAAQ,MACT;AACA;AAAA,MACF;AAEA,WAAK,QAAQ,MAAM,EAAE,gBAAgB,aAAa,CAAC,EAAE,MAAM,2BAA2B;AACtF,YAAM,UAAU,wBAAY,OAAO,EAAE,MAAM,cAAc,MAAM,qBAAS,KAAK,CAAC;AAC9E,WAAK,QAAQ,SAAS,KAAK,OAAO;AAClC,WAAK,KAAK,+BAAgC,OAAO;AAEjD,WAAK,mBAAmB,KAAK,iBAAiB,MAAM,aAAa,MAAM;AACvE,aAAO,kBAAkB;AAAA,IAC3B;AAGA,+BAA2B;AAE3B,WAAO,CAAC,QAAQ,MAAM;AACpB,YAAM,IAAI,QAAc,OAAO,YAAY;AACzC,mBAAW,SAAS,GAAG;AACvB,cAAM,QAAQ;AACd,gBAAQ;AAAA,MACV,CAAC;AACD,iCAA2B;AAC3B,UAAI,OAAO,YAAa;AAAA,IAC1B;AACA,+BAA2B;AAE3B,UAAM,gBAAgB,OAAO,gBAAgB;AAC7C,UAAM,eAAe,OAAO,kBAAkB,wBAAa,CAAC,CAAC,OAAO,OAAO,cAAc;AACzF,UAAM,qBAAqB,CAAC;AAC5B,QAAI,cAAc,OAAO;AAIzB,QAAI,gBAAgB,CAAC,aAAa;AAChC,UAAI,CAAC,gBAAgB,CAAC,OAAO,eAAe;AAC1C,cAAM,IAAI,MAAM,2DAA2D;AAAA,MAC7E;AACA,YAAM,YAAY,OAAO;AACzB,UAAI,mBAAmB,UAAU;AAEjC,eAAS,IAAI,GAAG,IAAI,KAAK,MAAM,sBAAsB,KAAK;AACxD,aAAK,KAAK,kCAAmC,gBAAgB;AAC7D,cAAM,cAAkC,CAAC;AACzC,mBAAW,QAAQ,kBAAkB;AACnC,gBAAM,OAAO,KAAK,KAAK,QAAQ,KAAK,MAAM,EAAE;AAAA,YAC1C,CAAC,YAAY,EAAE,MAAM,KAAK,MAAM,YAAY,KAAK,YAAY,OAAO;AAAA,YACpE,CAAC,WAAW,EAAE,MAAM,KAAK,MAAM,YAAY,KAAK,YAAY,MAAM;AAAA,UACpE;AACA,sBAAY,KAAK,EAAE,GAAG,MAAM,KAAK,CAAC;AAClC,eAAK,QACF,MAAM,EAAE,UAAU,KAAK,MAAM,UAAU,OAAO,GAAG,CAAC,EAClD,MAAM,uBAAuB;AAChC,cAAI;AACF,kBAAM;AAAA,UACR,QAAQ;AACN,iBAAK,QACF,MAAM,EAAE,UAAU,KAAK,MAAM,UAAU,OAAO,GAAG,CAAC,EAClD,MAAM,6BAA6B;AAAA,UACxC;AAAA,QACF;AAEA,cAAM,gBAAgB,CAAC;AACvB,cAAM,mBAAmB,CAAC;AAC1B,mBAAW,OAAO,aAAa;AAE7B,gBAAM,OAAO,MAAM,IAAI;AACvB,cAAI,CAAC,QAAQ,KAAK,WAAW,OAAW;AACxC,wBAAc,KAAK,GAAG;AACtB,2BAAiB,KAAK,wBAAY,6BAA6B,IAAI,CAAC;AAAA,QACtE;AAEA,YAAI,CAAC,cAAc,OAAQ;AAG3B,2BAAmB,KAAK,wBAAY,gBAAgB,eAAe,aAAa,CAAC;AACjF,2BAAmB,KAAK,GAAG,gBAAgB;AAE3C,cAAM,UAAU,OAAO,OAAO,QAAQ,KAAK;AAC3C,gBAAQ,SAAS,KAAK,GAAG,kBAAkB;AAE3C,cAAM,kBAAkB,KAAK,IAAI,KAAK;AAAA,UACpC;AAAA,UACA,QAAQ,KAAK;AAAA,QACf,CAAC;AACD,cAAM,kBAAkB,KAAK,uBAAuB,OAAO,IAAI,eAAe;AAE9E,eAAO,kBAAkB;AACzB,cAAMC,cAAa,gBAAgB,KAAK;AACxC,cAAMA,YAAW,KAAK,EAAE;AAExB,sBAAc,gBAAgB;AAC9B,2BAAmB,gBAAgB;AAEnC,aAAK,KAAK,iCAAkC,WAAW;AACvD,YAAI,CAAC,iBAAkB;AAAA,MACzB;AAAA,IACF;AAEA,QAAI,OAAO,iBAAiB,CAAC,gBAAgB,OAAO,gBAAgB;AAClE,WAAK,QAAQ,SAAS,KAAK,GAAG,kBAAkB;AAChD,UAAI,aAAa;AACf,wBAAgB;AAAA,MAClB;AAEA,YAAM,MAAM,wBAAY,OAAO,EAAE,MAAM,eAAe,MAAM,qBAAS,UAAU,CAAC;AAChF,WAAK,QAAQ,SAAS,KAAK,GAAG;AAE9B,aAAO,oBAAoB;AAC3B,UAAI,aAAa;AACf,aAAK,KAAK,kCAAmC,GAAG;AAAA,MAClD,OAAO;AACL,aAAK,KAAK,gCAAiC,GAAG;AAAA,MAChD;AAEA,WAAK,QACF,MAAM;AAAA,QACL,iBAAiB;AAAA,QACjB;AAAA,QACA,UAAU,OAAO;AAAA,MACnB,CAAC,EACA,MAAM,wBAAwB;AAAA,IACnC;AAAA,EACF;AAAA,EAEA,uBACE,UACA,QACiB;AACjB,QAAI,CAAC,KAAK,cAAc;AACtB,YAAM,IAAI,MAAM,+CAA+C;AAAA,IACjE;AAEA,QAAI,kBAAkB,sBAAW;AAC/B,eAAS,0BAA0B,UAAU,MAAM;AAAA,IACrD;AAEA,UAAM,WAAW;AACjB,QAAI,EAAE,OAAO,WAAW,WAAW;AAAA,IAEnC;AAEA,UAAM,YAAY,KAAK,MAAM,kBAAkB,MAAM,QAAQ;AAC7D,QAAI,CAAC,WAAW;AACd,YAAM,IAAI,MAAM,+DAA+D;AAAA,IACjF;AAEA,WAAO,KAAK,aAAa,WAAW,UAAU,SAAS;AAAA,EACzD;AAAA,EAEA,MAAM,2BAA2B;AAC/B,QAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,oBAAoB;AAClE,WAAK,QACF,MAAM,EAAE,UAAU,KAAK,eAAe,GAAG,CAAC,EAC1C,MAAM,yEAAyE;AAClF;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,oBAAoB;AAC5B,UAAI,KAAK,MAAM,uBAAuB,CAAC,KAAK,kBAAkB;AAC5D;AAAA,MACF;AACA,WAAK,sBAAsB;AAAA,IAC7B;AAEA,QAAI,CAAC,KAAK,oBAAoB;AAC5B,YAAM,IAAI,MAAM,kCAAkC;AAAA,IACpD;AAIA,QAAI,KAAK,iBAAiB,MAAM;AAC9B,uBAAiB,UAAU,KAAK,cAAc;AAC5C,YAAI,WAAW,mBAAmB,eAAgB;AAClD,YAAI,CAAC,OAAO,QAAS;AACrB,YAAI,OAAO,mBAAoB,QAAO,UAAU;AAAA,MAClD;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,mBAAmB,GAAG,CAAC,EAAE,MAAM,uBAAuB;AAE1F,SAAK,qBAAqB,KAAK,kBAAkB;AACjD,SAAK,qBAAqB;AAC1B,SAAK,0BAA0B;AAAA,EACjC;AAAA,EAEA,uBAAuB;AACrB,QACE,CAAC,KAAK,kBACN,CAAC,KAAK,eAAe,sBACrB,KAAK,eAAe,aACpB;AACA;AAAA,IACF;AAEA,QAAI,KAAK,MAAM,sBAAsB,GAAG;AAGtC,YAAM,eAAe,KAAK,MAAM,cAAc,cAAc;AAAA,QAC1D,KAAK;AAAA,MACP;AACA,UAAI,aAAa,SAAS,KAAK,MAAM,mBAAmB;AACtD;AAAA,MACF;AAAA,IACF;AACA,SAAK,eAAe,UAAU;AAAA,EAChC;AAAA,EAEA,qBAAqB,QAAsB;AACzC,SAAK,aAAa,IAAI,MAAM;AAC5B,SAAK,aAAa,IAAI,mBAAmB,cAAc;AACvD,SAAK,iBAAiB,QAAQ;AAAA,EAChC;AAAA;AAAA,EAGA,MAAM,QAAQ;AAzyBhB;AA0yBI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AAEA,eAAK,UAAL,mBAAY,mBAAmB,0BAAU;AAAA,EAE3C;AACF;AAEA,gBAAgB,0BACd,UACA,QACuB;AAtzBzB;AAuzBE,QAAM,YAAY,KAAK,IAAI;AAC3B,MAAI,aAAa;AACjB,mBAAiB,SAAS,QAAQ;AAChC,UAAM,WAAU,WAAM,QAAQ,CAAC,MAAf,mBAAkB,MAAM;AACxC,QAAI,CAAC,QAAS;AAEd,QAAI,YAAY;AACd,mBAAa;AACb,0BAAI,EACD,MAAM,EAAE,UAAU,SAAS,KAAK,MAAM,KAAK,IAAI,IAAI,SAAS,EAAE,CAAC,EAC/D,MAAM,0BAA0B;AAAA,IACrC;AACA,UAAM;AAAA,EACR;AACF;AAGA,MAAM,wBAAwB;AAAA;AAAA,EAEnB,cAAc;AAAA,EACd,4BAA4B;AAAA,EAC5B,4BAA4B;AAAA;AAAA,EAErC;AAAA,EACA;AAAA,EACA,oBAAoB,IAAI,oBAAO;AAAA,EAC/B,uBAAuB;AAAA,EACvB,2BAA2B;AAAA,EAC3B,YAAY;AAAA,EACZ;AAAA,EACA;AAAA,EAEA,YAAY,cAAmC,qBAA6B;AAC1E,SAAK,gBAAgB;AACrB,SAAK,oBAAoB;AACzB,SAAK,wBAAwB;AAAA,EAC/B;AAAA,EAEA,IAAI,aAAsB;AACxB,WAAO,CAAC,KAAK,kBAAkB;AAAA,EACjC;AAAA,EAEA,uBAAuB,YAAoB;AACzC,SAAK,uBAAuB,WAAW,KAAK;AAC5C,QAAI,KAAK,UAAW;AAEpB,UAAM,uBACJ,KAAK,IAAI,IAAI,KAAK,2BAA2B,KAAK;AACpD,QAAI,QAAQ,uBAAuB,KAAK,oBAAoB,KAAK;AACjE,YAAQ,KAAK,oBAAoB,IAAI,QAAQ,KAAK,4BAA4B;AAE9E,SAAK,KAAK,KAAK;AAAA,EACjB;AAAA;AAAA,EAGA,qBAAqB,GAAa;AAChC,SAAK,YAAY;AAAA,EAKnB;AAAA;AAAA,EAGA,mBAAmB,GAAa;AAC9B,SAAK,YAAY;AACjB,SAAK,2BAA2B,KAAK,IAAI;AAEzC,QAAI,KAAK,sBAAsB;AAC7B,YAAM,QAAQ,KAAK,oBAAoB,IACnC,KAAK,oBAAoB,KAAK,4BAC9B;AACJ,WAAK,KAAK,KAAK;AAAA,IACjB;AAAA,EACF;AAAA;AAAA,EAIA,sBAA+B;AAC7B,WACE,KAAK,qBAAqB,SAAS,KACnC,KAAK,YAAY,SAAS,KAAK,qBAAqB,KAAK,qBAAqB,SAAS,CAAC,CAAE;AAAA,EAE9F;AAAA,EAEA,eAAe;AACb,SAAK,uBAAuB;AAC5B,SAAK,2BAA2B;AAAA,EAClC;AAAA,EAEA,KAAK,OAAe;AAClB,UAAM,UAAU,OAAOH,WAAkB;AACvC,YAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAASA,MAAK,CAAC;AACzD,WAAK,aAAa;AAClB,YAAM,KAAK,cAAc;AAAA,IAC3B;AAEA,SAAK,oBAAoB,IAAI,oBAAO;AACpC,SAAK,qBAAqB,QAAQ,KAAK;AAAA,EACzC;AACF;","names":["import_llm","VPAEvent","BasicSentenceTokenizer","BasicWordTokenizer","EventEmitter","STTStreamAdapter","TTSStreamAdapter","participant","delay","resolve","isUsingTools","playHandle"]}
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline_agent.d.ts","sourceRoot":"","sources":["../../src/pipeline/pipeline_agent.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAyB,iBAAiB,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAQxF,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EACV,sBAAsB,EACtB,gBAAgB,EAChB,eAAe,EACf,GAAG,EACJ,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,WAAW,EAAY,MAAM,iBAAiB,CAAC;AAErE,OAAO,EAAE,KAAK,GAAG,EAAqC,MAAM,iBAAiB,CAAC;AAM9E,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACjF,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAG3C,OAAO,KAAK,EAAE,GAAG,EAAY,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,YAAY,EAAmB,MAAM,mBAAmB,CAAC;AAMvE,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;AAChF,eAAO,MAAM,qBAAqB,mBAAmB,CAAC;AAEtD,MAAM,MAAM,iBAAiB,GAAG,CAC9B,KAAK,EAAE,kBAAkB,EACzB,OAAO,EAAE,WAAW,KACjB,SAAS,GAAG,KAAK,GAAG,IAAI,GAAG,OAAO,CAAC,SAAS,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC;AAElE,MAAM,MAAM,iBAAiB,GAAG,CAC9B,KAAK,EAAE,kBAAkB,EACzB,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,KACnC,YAAY,CAAC;AAElB,oBAAY,QAAQ;IAClB,qBAAqB,IAAA;IACrB,qBAAqB,IAAA;IACrB,sBAAsB,IAAA;IACtB,sBAAsB,IAAA;IACtB,qBAAqB,IAAA;IACrB,sBAAsB,IAAA;IACtB,wBAAwB,IAAA;IACxB,wBAAwB,IAAA;IACxB,uBAAuB,IAAA;CACxB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7C,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7C,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC9C,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC9C,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7D,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAC9D,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAChE,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,EAAE,KAAK,IAAI,CAAC;IACzE,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,KAAK,IAAI,CAAC;CAC/E,CAAC;AAEF,qBAAa,gBAAgB;;gBAMf,KAAK,EAAE,kBAAkB,EAAE,SAAS,EAAE,SAAS;IAM3D,MAAM,CAAC,UAAU,IAAI,gBAAgB;IAIrC,IAAI,KAAK,IAAI,kBAAkB,CAE9B;IAED,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG;IAIrC,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,GAAE,GAAe;IAInD,IAAI,SAAS,IAAI,SAAS,CAEzB;CACF;AAiBD,MAAM,WAAW,yBAAyB;IACxC,8DAA8D;IAC9D,iBAAiB,EAAE,OAAO,CAAC;IAC3B,+DAA+D;IAC/D,kBAAkB,EAAE,OAAO,CAAC;IAC5B;;;OAGG;IACH,wBAAwB,EAAE,MAAM,CAAC;IACjC;;;OAGG;IACH,iBAAiB,EAAE,iBAAiB,CAAC;IACrC;;;OAGG;IACH,aAAa,EAAE,aAAa,CAAC;IAC7B;;;OAGG;IACH,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CAC3C;AAWD,MAAM,WAAW,UAAU;IACzB,sCAAsC;IACtC,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,0CAA0C;IAC1C,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB,4DAA4D;IAC5D,kBAAkB,EAAE,OAAO,CAAC;IAC5B,+DAA+D;IAC/D,uBAAuB,EAAE,MAAM,CAAC;IAChC,sFAAsF;IACtF,iBAAiB,EAAE,MAAM,CAAC;IAC1B,6DAA6D;IAC7D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,oBAAoB,EAAE,MAAM,CAAC;IAE7B,mBAAmB,EAAE,OAAO,CAAC;IAS7B,iBAAiB,EAAE,iBAAiB,CAAC;IAQrC,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,2CAA2C;IAC3C,aAAa,EAAE,yBAAyB,CAAC;CAC1C;iDAgBkE,aAAa,YAAY,CAAC;AAD7F,+DAA+D;AAC/D,qBAAa,kBAAmB,SAAQ,uBAAsD;;IAC5F,mFAAmF;IACnF,QAAQ,CAAC,0BAA0B,OAAO;IAC1C,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;;IA2BlE,yCAAyC;IACzC,GAAG,EAAE,GAAG;IACR,+BAA+B;IAC/B,GAAG,EAAE,GAAG;IACR,qCAAqC;IACrC,GAAG,EAAE,GAAG;IACR,+BAA+B;IAC/B,GAAG,EAAE,GAAG;IACR,6CAA6C;IAC7C,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAyB/C,IAAI,MAAM,IAAI,eAAe,GAAG,SAAS,CAExC;IAED,IAAI,MAAM,CAAC,GAAG,EAAE,eAAe,EAE9B;IAED,IAAI,OAAO,IAAI,WAAW,CAEzB;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,iCAAiC;IACjC,KAAK;IACH,8BAA8B;IAC9B,IAAI,EAAE,IAAI;IACV;;;;;;OAMG;IACH,WAAW,GAAE,iBAAiB,GAAG,MAAM,GAAG,IAAW;IA2BvD,wDAAwD;IAClD,GAAG,CACP,MAAM,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAAC,MAAM,CAAC,EAClD,kBAAkB,UAAO,EACzB,YAAY,UAAO;IAmdrB,iCAAiC;IAC3B,KAAK;CAQZ"}
1
+ {"version":3,"file":"pipeline_agent.d.ts","sourceRoot":"","sources":["../../src/pipeline/pipeline_agent.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAyB,iBAAiB,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAQxF,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EACV,sBAAsB,EACtB,gBAAgB,EAChB,eAAe,EACf,GAAG,EACJ,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,WAAW,EAAY,MAAM,iBAAiB,CAAC;AAErE,OAAO,EAAE,KAAK,GAAG,EAAqC,MAAM,iBAAiB,CAAC;AAM9E,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACjF,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAG3C,OAAO,KAAK,EAAE,GAAG,EAAY,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,YAAY,EAAmB,MAAM,mBAAmB,CAAC;AAMvE,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;AAChF,eAAO,MAAM,qBAAqB,mBAAmB,CAAC;AAEtD,MAAM,MAAM,iBAAiB,GAAG,CAC9B,KAAK,EAAE,kBAAkB,EACzB,OAAO,EAAE,WAAW,KACjB,SAAS,GAAG,KAAK,GAAG,IAAI,GAAG,OAAO,CAAC,SAAS,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC;AAElE,MAAM,MAAM,iBAAiB,GAAG,CAC9B,KAAK,EAAE,kBAAkB,EACzB,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,KACnC,YAAY,CAAC;AAElB,oBAAY,QAAQ;IAClB,qBAAqB,IAAA;IACrB,qBAAqB,IAAA;IACrB,sBAAsB,IAAA;IACtB,sBAAsB,IAAA;IACtB,qBAAqB,IAAA;IACrB,sBAAsB,IAAA;IACtB,wBAAwB,IAAA;IACxB,wBAAwB,IAAA;IACxB,uBAAuB,IAAA;CACxB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7C,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7C,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC9C,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC9C,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7D,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAC9D,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAChE,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,EAAE,KAAK,IAAI,CAAC;IACzE,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,KAAK,IAAI,CAAC;CAC/E,CAAC;AAEF,qBAAa,gBAAgB;;gBAMf,KAAK,EAAE,kBAAkB,EAAE,SAAS,EAAE,SAAS;IAM3D,MAAM,CAAC,UAAU,IAAI,gBAAgB;IAIrC,IAAI,KAAK,IAAI,kBAAkB,CAE9B;IAED,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG;IAIrC,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,GAAE,GAAe;IAInD,IAAI,SAAS,IAAI,SAAS,CAEzB;CACF;AAiBD,MAAM,WAAW,yBAAyB;IACxC,8DAA8D;IAC9D,iBAAiB,EAAE,OAAO,CAAC;IAC3B,+DAA+D;IAC/D,kBAAkB,EAAE,OAAO,CAAC;IAC5B;;;OAGG;IACH,wBAAwB,EAAE,MAAM,CAAC;IACjC;;;OAGG;IACH,iBAAiB,EAAE,iBAAiB,CAAC;IACrC;;;OAGG;IACH,aAAa,EAAE,aAAa,CAAC;IAC7B;;;OAGG;IACH,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CAC3C;AAWD,MAAM,WAAW,UAAU;IACzB,sCAAsC;IACtC,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,0CAA0C;IAC1C,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB,4DAA4D;IAC5D,kBAAkB,EAAE,OAAO,CAAC;IAC5B,+DAA+D;IAC/D,uBAAuB,EAAE,MAAM,CAAC;IAChC,sFAAsF;IACtF,iBAAiB,EAAE,MAAM,CAAC;IAC1B,6DAA6D;IAC7D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,oBAAoB,EAAE,MAAM,CAAC;IAE7B,mBAAmB,EAAE,OAAO,CAAC;IAS7B,iBAAiB,EAAE,iBAAiB,CAAC;IAQrC,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,2CAA2C;IAC3C,aAAa,EAAE,yBAAyB,CAAC;CAC1C;iDAgBkE,aAAa,YAAY,CAAC;AAD7F,+DAA+D;AAC/D,qBAAa,kBAAmB,SAAQ,uBAAsD;;IAC5F,mFAAmF;IACnF,QAAQ,CAAC,0BAA0B,OAAO;IAC1C,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;;IA2BlE,yCAAyC;IACzC,GAAG,EAAE,GAAG;IACR,+BAA+B;IAC/B,GAAG,EAAE,GAAG;IACR,qCAAqC;IACrC,GAAG,EAAE,GAAG;IACR,+BAA+B;IAC/B,GAAG,EAAE,GAAG;IACR,6CAA6C;IAC7C,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAyB/C,IAAI,MAAM,IAAI,eAAe,GAAG,SAAS,CAExC;IAED,IAAI,MAAM,CAAC,GAAG,EAAE,eAAe,EAE9B;IAED,IAAI,OAAO,IAAI,WAAW,CAEzB;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,iCAAiC;IACjC,KAAK;IACH,8BAA8B;IAC9B,IAAI,EAAE,IAAI;IACV;;;;;;OAMG;IACH,WAAW,GAAE,iBAAiB,GAAG,MAAM,GAAG,IAAW;IA2BvD,wDAAwD;IAClD,GAAG,CACP,MAAM,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAAC,MAAM,CAAC,EAClD,kBAAkB,UAAO,EACzB,YAAY,UAAO;IA+crB,iCAAiC;IAC3B,KAAK;CAQZ"}
@@ -318,8 +318,7 @@ class VoicePipelineAgent extends EventEmitter {
318
318
  if ((!playingSpeech.userQuestion || playingSpeech.userCommitted) && !playingSpeech.speechCommitted) {
319
319
  copiedCtx.messages.push(
320
320
  ChatMessage.create({
321
- // TODO(nbsp): uhhh unsure where to get the played text here
322
- // text: playingSpeech.synthesisHandle.(theres no ttsForwarder here)
321
+ text: playingSpeech.synthesisHandle.text,
323
322
  role: ChatRole.ASSISTANT
324
323
  })
325
324
  );
@@ -387,7 +386,7 @@ class VoicePipelineAgent extends EventEmitter {
387
386
  if (handle.interrupted) break;
388
387
  }
389
388
  commitUserQuestionIfNeeded();
390
- let collectedText = "";
389
+ const collectedText = handle.synthesisHandle.text;
391
390
  const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;
392
391
  const extraToolsMessages = [];
393
392
  let interrupted = handle.interrupted;
@@ -434,7 +433,6 @@ class VoicePipelineAgent extends EventEmitter {
434
433
  handle.synthesisHandle = answerSynthesis;
435
434
  const playHandle2 = answerSynthesis.play();
436
435
  await playHandle2.join().await;
437
- collectedText = "";
438
436
  interrupted = answerSynthesis.interrupted;
439
437
  newFunctionCalls = answerLLMStream.functionCalls;
440
438
  this.emit(8 /* FUNCTION_CALLS_FINISHED */, calledFuncs);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/pipeline/pipeline_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { LocalTrackPublication, RemoteParticipant, Room } from '@livekit/rtc-node';\nimport {\n AudioSource,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport type {\n CallableFunctionResult,\n FunctionCallInfo,\n FunctionContext,\n LLM,\n} from '../llm/index.js';\nimport { LLMStream } from '../llm/index.js';\nimport { ChatContext, ChatMessage, ChatRole } from '../llm/index.js';\nimport { log } from '../log.js';\nimport { type STT, StreamAdapter as STTStreamAdapter } from '../stt/index.js';\nimport {\n SentenceTokenizer as BasicSentenceTokenizer,\n WordTokenizer as BasicWordTokenizer,\n hyphenateWord,\n} from '../tokenize/basic/index.js';\nimport type { SentenceTokenizer, WordTokenizer } from '../tokenize/tokenizer.js';\nimport type { TTS } from '../tts/index.js';\nimport { StreamAdapter as TTSStreamAdapter } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport type { SpeechSource, SynthesisHandle } from './agent_output.js';\nimport { AgentOutput } from './agent_output.js';\nimport { AgentPlayout, AgentPlayoutEvent } from './agent_playout.js';\nimport { HumanInput, HumanInputEvent } from './human_input.js';\nimport { SpeechHandle } from './speech_handle.js';\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\nexport type BeforeLLMCallback = (\n agent: VoicePipelineAgent,\n chatCtx: ChatContext,\n) => LLMStream | false | void | Promise<LLMStream | false | void>;\n\nexport type BeforeTTSCallback = (\n agent: VoicePipelineAgent,\n source: string | AsyncIterable<string>,\n) => SpeechSource;\n\nexport enum VPAEvent {\n USER_STARTED_SPEAKING,\n USER_STOPPED_SPEAKING,\n AGENT_STARTED_SPEAKING,\n AGENT_STOPPED_SPEAKING,\n USER_SPEECH_COMMITTED,\n AGENT_SPEECH_COMMITTED,\n AGENT_SPEECH_INTERRUPTED,\n FUNCTION_CALLS_COLLECTED,\n FUNCTION_CALLS_FINISHED,\n}\n\nexport type VPACallbacks = {\n [VPAEvent.USER_STARTED_SPEAKING]: () => void;\n [VPAEvent.USER_STOPPED_SPEAKING]: () => void;\n [VPAEvent.AGENT_STARTED_SPEAKING]: () => void;\n [VPAEvent.AGENT_STOPPED_SPEAKING]: () => void;\n [VPAEvent.USER_SPEECH_COMMITTED]: (msg: ChatMessage) => void;\n [VPAEvent.AGENT_SPEECH_COMMITTED]: (msg: ChatMessage) => void;\n [VPAEvent.AGENT_SPEECH_INTERRUPTED]: (msg: ChatMessage) => void;\n [VPAEvent.FUNCTION_CALLS_COLLECTED]: (funcs: FunctionCallInfo[]) => void;\n [VPAEvent.FUNCTION_CALLS_FINISHED]: (funcs: CallableFunctionResult[]) => void;\n};\n\nexport class AgentCallContext {\n #agent: VoicePipelineAgent;\n #llmStream: LLMStream;\n #metadata = new Map<string, any>();\n static #current: AgentCallContext;\n\n constructor(agent: VoicePipelineAgent, llmStream: LLMStream) {\n this.#agent = agent;\n this.#llmStream = llmStream;\n AgentCallContext.#current = this;\n }\n\n static getCurrent(): AgentCallContext {\n return AgentCallContext.#current;\n }\n\n get agent(): VoicePipelineAgent {\n return this.#agent;\n }\n\n storeMetadata(key: string, value: any) {\n this.#metadata.set(key, value);\n }\n\n getMetadata(key: string, orDefault: any = undefined) {\n return this.#metadata.get(key) || orDefault;\n }\n\n get llmStream(): LLMStream {\n return this.#llmStream;\n }\n}\n\nconst defaultBeforeLLMCallback: BeforeLLMCallback = (\n agent: VoicePipelineAgent,\n chatCtx: ChatContext,\n): LLMStream => {\n return agent.llm.chat({ chatCtx, fncCtx: agent.fncCtx });\n};\n\nconst defaultBeforeTTSCallback: BeforeTTSCallback = (\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n _: VoicePipelineAgent,\n text: string | AsyncIterable<string>,\n): string | AsyncIterable<string> => {\n return text;\n};\n\nexport interface AgentTranscriptionOptions {\n /** Whether to forward the user transcription to the client */\n userTranscription: boolean;\n /** Whether to forward the agent transcription to the client */\n agentTranscription: boolean;\n /**\n * The speed at which the agent's speech transcription is forwarded to the client.\n * We try to mimic the agent's speech speed by adjusting the transcription speed.\n */\n agentTranscriptionSpeech: number;\n /**\n * The tokenizer used to split the speech into sentences.\n * This is used to decide when to mark a transcript as final for the agent transcription.\n */\n sentenceTokenizer: SentenceTokenizer;\n /**\n * The tokenizer used to split the speech into words.\n * This is used to simulate the \"interim results\" of the agent transcription.\n */\n wordTokenizer: WordTokenizer;\n /**\n * A function that takes a string (word) as input and returns a list of strings,\n * representing the hyphenated parts of the word.\n */\n hyphenateWord: (word: string) => string[];\n}\n\nconst defaultAgentTranscriptionOptions: AgentTranscriptionOptions = {\n userTranscription: true,\n agentTranscription: true,\n agentTranscriptionSpeech: 1,\n sentenceTokenizer: new BasicSentenceTokenizer(),\n wordTokenizer: new BasicWordTokenizer(false),\n hyphenateWord: hyphenateWord,\n};\n\nexport interface VPAOptions {\n /** Chat context for the assistant. */\n chatCtx?: ChatContext;\n /** Function context for the assistant. */\n fncCtx?: FunctionContext;\n /** Whether to allow the user to interrupt the assistant. */\n allowInterruptions: boolean;\n /** Minimum duration of speech to consider for interruption. */\n interruptSpeechDuration: number;\n /** Minimum number of words to consider for interuption. This may increase latency. */\n interruptMinWords: number;\n /** Delay to wait before considering the user speech done. */\n minEndpointingDelay: number;\n maxRecursiveFncCalls: number;\n /* Whether to preemptively synthesize responses. */\n preemptiveSynthesis: boolean;\n /*\n * Callback called when the assistant is about to synthesize a reply.\n *\n * @remarks\n * Returning void will create a default LLM stream.\n * You can also return your own LLM stream by calling `llm.chat()`.\n * Returning `false` ill cancel the synthesis of the reply.\n */\n beforeLLMCallback: BeforeLLMCallback;\n /*\n * Callback called when the assistant is about to synthesize speech.\n *\n * @remarks\n * This can be used to customize text before synthesis\n * (e.g. editing the pronunciation of a word).\n */\n beforeTTSCallback: BeforeTTSCallback;\n /** Options for assistant transcription. */\n transcription: AgentTranscriptionOptions;\n}\n\nconst defaultVPAOptions: VPAOptions = {\n chatCtx: new ChatContext(),\n allowInterruptions: true,\n interruptSpeechDuration: 50,\n interruptMinWords: 0,\n minEndpointingDelay: 500,\n maxRecursiveFncCalls: 1,\n preemptiveSynthesis: false,\n beforeLLMCallback: defaultBeforeLLMCallback,\n beforeTTSCallback: defaultBeforeTTSCallback,\n transcription: defaultAgentTranscriptionOptions,\n};\n\n/** A pipeline agent (VAD + STT + LLM + TTS) implementation. */\nexport class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<VPACallbacks>) {\n /** Minimum time played for the user speech to be committed to the chat context. */\n readonly MIN_TIME_PLAYED_FOR_COMMIT = 1.5;\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #vad: VAD;\n #stt: STT;\n #llm: LLM;\n #tts: TTS;\n #opts: VPAOptions;\n #humanInput?: HumanInput;\n #agentOutput?: AgentOutput;\n #trackPublishedFut = new Future();\n #pendingAgentReply?: SpeechHandle;\n #agentReplyTask?: CancellablePromise<void>;\n #playingSpeech?: SpeechHandle;\n #transcribedText = '';\n #transcribedInterimText = '';\n #speechQueueOpen = new Future();\n #speechQueue = new AsyncIterableQueue<SpeechHandle | typeof VoicePipelineAgent.FLUSH_SENTINEL>();\n #lastEndOfSpeechTime?: number;\n #updateStateTask?: CancellablePromise<void>;\n #started = false;\n #room?: Room;\n #participant: RemoteParticipant | string | null = null;\n #deferredValidation: DeferredReplyValidation;\n #logger = log();\n #agentPublication?: LocalTrackPublication;\n\n constructor(\n /** Voice Activity Detection instance. */\n vad: VAD,\n /** Speech-to-Text instance. */\n stt: STT,\n /** Large Language Model instance. */\n llm: LLM,\n /** Text-to-Speech instance. */\n tts: TTS,\n /** Additional VoicePipelineAgent options. */\n opts: Partial<VPAOptions> = defaultVPAOptions,\n ) {\n super();\n\n this.#opts = { ...defaultVPAOptions, ...opts };\n\n if (!stt.capabilities.streaming) {\n stt = new STTStreamAdapter(stt, vad);\n }\n\n if (!tts.capabilities.streaming) {\n tts = new TTSStreamAdapter(tts, new BasicSentenceTokenizer());\n }\n\n this.#vad = vad;\n this.#stt = stt;\n this.#llm = llm;\n this.#tts = tts;\n\n this.#deferredValidation = new DeferredReplyValidation(\n this.#validateReplyIfPossible.bind(this),\n this.#opts.minEndpointingDelay,\n );\n }\n\n get fncCtx(): FunctionContext | undefined {\n return this.#opts.fncCtx;\n }\n\n set fncCtx(ctx: FunctionContext) {\n this.#opts.fncCtx = ctx;\n }\n\n get chatCtx(): ChatContext {\n return this.#opts.chatCtx!;\n }\n\n get llm(): LLM {\n return this.#llm;\n }\n\n get tts(): TTS {\n return this.#tts;\n }\n\n get stt(): STT {\n return this.#stt;\n }\n\n get vad(): VAD {\n return this.#vad;\n }\n\n /** Start the voice assistant. */\n start(\n /** The room to connect to. */\n room: Room,\n /**\n * The participant to listen to.\n *\n * @remarks\n * Can be a participant or an identity.\n * If omitted, the first participant in the room will be selected.\n */\n participant: RemoteParticipant | string | null = null,\n ) {\n if (this.#started) {\n throw new Error('voice assistant already started');\n }\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.#participant) {\n return;\n }\n this.#linkParticipant.call(this, participant.identity);\n });\n\n this.#room = room;\n this.#participant = participant;\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity);\n }\n }\n\n this.#run();\n }\n\n /** Play a speech source through the voice assistant. */\n async say(\n source: string | LLMStream | AsyncIterable<string>,\n allowInterruptions = true,\n addToChatCtx = true,\n ) {\n await this.#trackPublishedFut.await;\n const newHandle = SpeechHandle.createAssistantSpeech(allowInterruptions, addToChatCtx);\n const synthesisHandle = this.#synthesizeAgentSpeech(newHandle.id, source);\n newHandle.initialize(source, synthesisHandle);\n this.#addSpeechForPlayout(newHandle);\n }\n\n #updateState(state: AgentState, delay = 0) {\n const runTask = (delay: number): CancellablePromise<void> => {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n await new Promise((resolve) => setTimeout(resolve, delay));\n if (this.#room?.isConnected) {\n if (!cancelled) {\n await this.#room.localParticipant?.setAttributes({ [AGENT_STATE_ATTRIBUTE]: state });\n }\n }\n resolve();\n });\n };\n\n if (this.#updateStateTask) {\n this.#updateStateTask.cancel();\n }\n\n this.#updateStateTask = runTask(delay);\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.#room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.#participant = this.#room.remoteParticipants.get(participantIdentity) || null;\n if (!this.#participant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n this.#humanInput = new HumanInput(this.#room, this.#vad, this.#stt, this.#participant);\n this.#humanInput.on(HumanInputEvent.START_OF_SPEECH, (event) => {\n this.emit(VPAEvent.USER_STARTED_SPEAKING);\n this.#deferredValidation.onHumanStartOfSpeech(event);\n });\n this.#humanInput.on(HumanInputEvent.VAD_INFERENCE_DONE, (event) => {\n if (!this.#trackPublishedFut.done) {\n return;\n }\n if (!this.#agentOutput) {\n throw new Error('agent output is undefined');\n }\n\n let tv = 1;\n if (this.#opts.allowInterruptions) {\n tv = Math.max(0, 1 - event.probability);\n this.#agentOutput.playout.targetVolume = tv;\n }\n\n if (event.speechDuration >= this.#opts.interruptSpeechDuration) {\n this.#interruptIfPossible();\n }\n });\n this.#humanInput.on(HumanInputEvent.END_OF_SPEECH, (event) => {\n this.emit(VPAEvent.USER_STARTED_SPEAKING);\n this.#deferredValidation.onHumanEndOfSpeech(event);\n this.#lastEndOfSpeechTime = Date.now();\n });\n this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {\n this.#transcribedInterimText = event.alternatives![0].text;\n });\n this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, (event) => {\n const newTranscript = event.alternatives![0].text;\n if (!newTranscript) return;\n\n this.#logger.child({ userTranscript: newTranscript }).debug('received user transcript');\n this.#transcribedText += (this.#transcribedText ? ' ' : '') + newTranscript;\n\n if (\n this.#opts.preemptiveSynthesis &&\n (!this.#playingSpeech || this.#playingSpeech.allowInterruptions)\n ) {\n this.#synthesizeAgentReply();\n }\n\n this.#deferredValidation.onHumanFinalTranscript(newTranscript);\n\n const words = this.#opts.transcription.wordTokenizer.tokenize(newTranscript);\n if (words.length >= 3) {\n // VAD can sometimes not detect that the human is speaking.\n // to make the interruption more reliable, we also interrupt on the final transcript.\n this.#interruptIfPossible();\n }\n });\n }\n\n async #run() {\n this.#updateState('initializing');\n const audioSource = new AudioSource(this.#tts.sampleRate, this.#tts.numChannels);\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', audioSource);\n this.#agentPublication = await this.#room?.localParticipant?.publishTrack(\n track,\n new TrackPublishOptions({ source: TrackSource.SOURCE_MICROPHONE }),\n );\n\n const agentPlayout = new AgentPlayout(audioSource);\n this.#agentOutput = new AgentOutput(agentPlayout, this.#tts);\n\n agentPlayout.on(AgentPlayoutEvent.PLAYOUT_STARTED, () => {\n this.emit(VPAEvent.AGENT_STARTED_SPEAKING);\n this.#updateState('speaking');\n });\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n agentPlayout.on(AgentPlayoutEvent.PLAYOUT_STOPPED, (_) => {\n this.emit(VPAEvent.AGENT_STOPPED_SPEAKING);\n this.#updateState('listening');\n });\n\n this.#trackPublishedFut.resolve();\n\n while (true) {\n await this.#speechQueueOpen.await;\n for await (const speech of this.#speechQueue) {\n if (speech === VoicePipelineAgent.FLUSH_SENTINEL) break;\n this.#playingSpeech = speech;\n await this.#playSpeech(speech);\n this.#playingSpeech = undefined;\n }\n this.#speechQueueOpen = new Future();\n }\n }\n\n #synthesizeAgentReply() {\n this.#pendingAgentReply?.cancel();\n if (this.#humanInput && this.#humanInput.speaking) {\n this.#updateState('thinking', 200);\n }\n\n this.#pendingAgentReply = SpeechHandle.createAssistantReply(\n this.#opts.allowInterruptions,\n true,\n this.#transcribedText,\n );\n const newHandle = this.#pendingAgentReply;\n this.#agentReplyTask = this.#synthesizeAnswerTask(this.#agentReplyTask, newHandle);\n }\n\n #synthesizeAnswerTask(\n oldTask: CancellablePromise<void> | undefined,\n handle?: SpeechHandle,\n ): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n const copiedCtx = this.chatCtx.copy();\n const playingSpeech = this.#playingSpeech;\n if (playingSpeech && playingSpeech.initialized) {\n if (\n (!playingSpeech.userQuestion || playingSpeech.userCommitted) &&\n !playingSpeech.speechCommitted\n ) {\n // the speech is playing but not committed yet,\n // add it to the chat context for this new reply synthesis\n copiedCtx.messages.push(\n ChatMessage.create({\n // TODO(nbsp): uhhh unsure where to get the played text here\n // text: playingSpeech.synthesisHandle.(theres no ttsForwarder here)\n role: ChatRole.ASSISTANT,\n }),\n );\n }\n }\n\n copiedCtx.messages.push(\n ChatMessage.create({\n text: handle?.userQuestion,\n role: ChatRole.USER,\n }),\n );\n\n if (cancelled) resolve();\n let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);\n if (llmStream === false) {\n handle?.cancel();\n return;\n }\n\n if (cancelled) resolve();\n // fallback to default impl if no custom/user stream is returned\n if (!(llmStream instanceof LLMStream)) {\n llmStream = (await defaultBeforeLLMCallback(this, copiedCtx)) as LLMStream;\n }\n\n if (handle!.interrupted) {\n return;\n }\n\n const synthesisHandle = this.#synthesizeAgentSpeech(handle!.id, llmStream);\n handle!.initialize(llmStream, synthesisHandle);\n\n // TODO(theomonnom): find a more reliable way to get the elapsed time from the last EOS\n // (VAD could not have detected any speech — maybe unlikely?)\n const elapsed = !!this.#lastEndOfSpeechTime\n ? Math.round((Date.now() - this.#lastEndOfSpeechTime) * 1000) / 1000\n : -1;\n\n this.#logger.child({ speechId: handle!.id, elapsed }).debug('synthesizing agent reply');\n resolve();\n });\n }\n\n async #playSpeech(handle: SpeechHandle) {\n try {\n await handle.waitForInitialization();\n } catch {\n return;\n }\n await this.#agentPublication!.waitForSubscription();\n const synthesisHandle = handle.synthesisHandle;\n if (synthesisHandle.interrupted) return;\n\n const userQuestion = handle.userQuestion;\n const playHandle = synthesisHandle.play();\n const joinFut = playHandle.join();\n\n const commitUserQuestionIfNeeded = () => {\n if (!userQuestion || synthesisHandle.interrupted || handle.userCommitted) return;\n const isUsingTools =\n handle.source instanceof LLMStream && !!handle.source.functionCalls.length;\n\n // make sure at least some speech was played before committing the user message\n // since we try to validate as fast as possible it is possible the agent gets interrupted\n // really quickly (barely audible), we don't want to mark this question as \"answered\".\n if (\n handle.allowInterruptions &&\n !isUsingTools &&\n playHandle.timePlayed < this.MIN_TIME_PLAYED_FOR_COMMIT &&\n !joinFut.done\n ) {\n return;\n }\n\n this.#logger.child({ userTranscript: userQuestion }).debug('committed user transcript');\n const userMsg = ChatMessage.create({ text: userQuestion, role: ChatRole.USER });\n this.chatCtx.messages.push(userMsg);\n this.emit(VPAEvent.USER_SPEECH_COMMITTED, userMsg);\n\n this.#transcribedText = this.#transcribedText.slice(userQuestion.length);\n handle.markUserCommitted();\n };\n\n // wait for the playHandle to finish and check every 1s if user question should be committed\n commitUserQuestionIfNeeded();\n\n while (!joinFut.done) {\n await new Promise<void>(async (resolve) => {\n setTimeout(resolve, 500);\n await joinFut.await;\n resolve();\n });\n commitUserQuestionIfNeeded();\n if (handle.interrupted) break;\n }\n commitUserQuestionIfNeeded();\n\n // TODO(nbsp): what goes here\n let collectedText = '';\n const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;\n const extraToolsMessages = []; // additional messages from the functions to add to the context\n let interrupted = handle.interrupted;\n\n // if the answer is using tools, execute the functions and automatically generate\n // a response to the user question from the returned values\n if (isUsingTools && !interrupted) {\n if (!userQuestion || !handle.userCommitted) {\n throw new Error('user speech should have been committed before using tools');\n }\n const llmStream = handle.source;\n let newFunctionCalls = llmStream.functionCalls;\n\n for (let i = 0; i < this.#opts.maxRecursiveFncCalls; i++) {\n this.emit(VPAEvent.FUNCTION_CALLS_COLLECTED, newFunctionCalls);\n const calledFuncs: FunctionCallInfo[] = [];\n for (const func of newFunctionCalls) {\n const task = func.func.execute(func.params).then(\n (result) => ({ name: func.name, toolCallId: func.toolCallId, result }),\n (error) => ({ name: func.name, toolCallId: func.toolCallId, error }),\n );\n calledFuncs.push({ ...func, task });\n this.#logger\n .child({ function: func.name, speechId: handle.id })\n .debug('executing AI function');\n try {\n await task;\n } catch {\n this.#logger\n .child({ function: func.name, speechId: handle.id })\n .error('error executing AI function');\n }\n }\n\n const toolCallsInfo = [];\n const toolCallsResults = [];\n for (const fnc of calledFuncs) {\n // ignore the function calls that return void\n const task = await fnc.task;\n if (!task || task.result === undefined) continue;\n toolCallsInfo.push(fnc);\n toolCallsResults.push(ChatMessage.createToolFromFunctionResult(task));\n }\n\n if (!toolCallsInfo.length) break;\n\n // generate an answer from the tool calls\n extraToolsMessages.push(ChatMessage.createToolCalls(toolCallsInfo, collectedText));\n extraToolsMessages.push(...toolCallsResults);\n\n const chatCtx = handle.source.chatCtx.copy();\n chatCtx.messages.push(...extraToolsMessages);\n\n const answerLLMStream = this.llm.chat({\n chatCtx,\n fncCtx: this.fncCtx,\n });\n const answerSynthesis = this.#synthesizeAgentSpeech(handle.id, answerLLMStream);\n // replace the synthesis handle with the new one to allow interruption\n handle.synthesisHandle = answerSynthesis;\n const playHandle = answerSynthesis.play();\n await playHandle.join().await;\n\n // TODO(nbsp): what text goes here\n collectedText = '';\n interrupted = answerSynthesis.interrupted;\n newFunctionCalls = answerLLMStream.functionCalls;\n\n this.emit(VPAEvent.FUNCTION_CALLS_FINISHED, calledFuncs);\n if (!newFunctionCalls) break;\n }\n }\n\n if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) {\n this.chatCtx.messages.push(...extraToolsMessages);\n if (interrupted) {\n collectedText + '…';\n }\n\n const msg = ChatMessage.create({ text: collectedText, role: ChatRole.ASSISTANT });\n this.chatCtx.messages.push(msg);\n\n handle.markSpeechCommitted();\n if (interrupted) {\n this.emit(VPAEvent.AGENT_SPEECH_INTERRUPTED, msg);\n } else {\n this.emit(VPAEvent.AGENT_SPEECH_COMMITTED, msg);\n }\n\n this.#logger\n .child({\n agentTranscript: collectedText,\n interrupted,\n speechId: handle.id,\n })\n .debug('committed agent speech');\n }\n }\n\n #synthesizeAgentSpeech(\n speechId: string,\n source: string | LLMStream | AsyncIterable<string>,\n ): SynthesisHandle {\n if (!this.#agentOutput) {\n throw new Error('agent output should be initialized when ready');\n }\n\n if (source instanceof LLMStream) {\n source = llmStreamToStringIterable(speechId, source);\n }\n\n const ogSource = source;\n if (!(typeof source === 'string')) {\n // TODO(nbsp): itertools.tee\n }\n\n const ttsSource = this.#opts.beforeTTSCallback(this, ogSource);\n if (!ttsSource) {\n throw new Error('beforeTTSCallback must return string or AsyncIterable<string>');\n }\n\n return this.#agentOutput.synthesize(speechId, ttsSource);\n }\n\n async #validateReplyIfPossible() {\n if (this.#playingSpeech && !this.#playingSpeech.allowInterruptions) {\n this.#logger\n .child({ speechId: this.#playingSpeech.id })\n .debug('skipping validation, agent is speaking and does not allow interruptions');\n return;\n }\n\n if (!this.#pendingAgentReply) {\n if (this.#opts.preemptiveSynthesis || !this.#transcribedText) {\n return;\n }\n this.#synthesizeAgentReply();\n }\n\n if (!this.#pendingAgentReply) {\n throw new Error('pending agent reply is undefined');\n }\n\n // in some bad timimg, we could end up with two pushed agent replies inside the speech queue.\n // so make sure we directly interrupt every reply when validating a new one\n if (this.#speechQueueOpen.done) {\n for await (const speech of this.#speechQueue) {\n if (speech === VoicePipelineAgent.FLUSH_SENTINEL) break;\n if (!speech.isReply) continue;\n if (speech.allowInterruptions) speech.interrupt();\n }\n }\n\n this.#logger.child({ speechId: this.#pendingAgentReply.id }).debug('validated agent reply');\n\n this.#addSpeechForPlayout(this.#pendingAgentReply);\n this.#pendingAgentReply = undefined;\n this.#transcribedInterimText = '';\n }\n\n #interruptIfPossible() {\n if (\n !this.#playingSpeech ||\n !this.#playingSpeech.allowInterruptions ||\n this.#playingSpeech.interrupted\n ) {\n return;\n }\n\n if (this.#opts.interruptMinWords !== 0) {\n // check the final/interim transcribed text for the minimum word count\n // to interrupt the agent speech\n const interimWords = this.#opts.transcription.wordTokenizer.tokenize(\n this.#transcribedInterimText,\n );\n if (interimWords.length < this.#opts.interruptMinWords) {\n return;\n }\n }\n this.#playingSpeech.interrupt();\n }\n\n #addSpeechForPlayout(handle: SpeechHandle) {\n this.#speechQueue.put(handle);\n this.#speechQueue.put(VoicePipelineAgent.FLUSH_SENTINEL);\n this.#speechQueueOpen.resolve();\n }\n\n /** Close the voice assistant. */\n async close() {\n if (!this.#started) {\n return;\n }\n\n this.#room?.removeAllListeners(RoomEvent.ParticipantConnected);\n // TODO(nbsp): await this.#deferredValidation.close()\n }\n}\n\nasync function* llmStreamToStringIterable(\n speechId: string,\n stream: LLMStream,\n): AsyncIterable<string> {\n const startTime = Date.now();\n let firstFrame = true;\n for await (const chunk of stream) {\n const content = chunk.choices[0]?.delta.content;\n if (!content) continue;\n\n if (firstFrame) {\n firstFrame = false;\n log()\n .child({ speechId, elapsed: Math.round(Date.now() - startTime) })\n .debug('received first LLM token');\n }\n yield content;\n }\n}\n\n/** This class is used to try to find the best time to validate the agent reply. */\nclass DeferredReplyValidation {\n // if the STT gives us punctuation, we can try to validate the reply faster.\n readonly PUNCTUATION = '.!?';\n readonly PUNCTUATION_REDUCE_FACTOR = 0.75;\n readonly LATE_TRANSCRIPT_TOLERANCE = 1.5; // late compared to end of speech\n\n #validateFunc: () => Promise<void>;\n #validatingPromise?: Promise<void>;\n #validatingFuture = new Future();\n #lastFinalTranscript = '';\n #lastRecvEndOfSpeechTime = 0;\n #speaking = false;\n #endOfSpeechDelay: number;\n #finalTranscriptDelay: number;\n\n constructor(validateFunc: () => Promise<void>, minEndpointingDelay: number) {\n this.#validateFunc = validateFunc;\n this.#endOfSpeechDelay = minEndpointingDelay;\n this.#finalTranscriptDelay = minEndpointingDelay;\n }\n\n get validating(): boolean {\n return !this.#validatingFuture.done;\n }\n\n onHumanFinalTranscript(transcript: string) {\n this.#lastFinalTranscript = transcript.trim();\n if (this.#speaking) return;\n\n const hasRecentEndOfSpeech =\n Date.now() - this.#lastRecvEndOfSpeechTime < this.LATE_TRANSCRIPT_TOLERANCE;\n let delay = hasRecentEndOfSpeech ? this.#endOfSpeechDelay : this.#finalTranscriptDelay;\n delay = this.#endWithPunctuation() ? delay * this.PUNCTUATION_REDUCE_FACTOR : 1;\n\n this.#run(delay);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n onHumanStartOfSpeech(_: VADEvent) {\n this.#speaking = true;\n // TODO(nbsp):\n // if (this.validating) {\n // this.#validatingPromise.cancel()\n // }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n onHumanEndOfSpeech(_: VADEvent) {\n this.#speaking = false;\n this.#lastRecvEndOfSpeechTime = Date.now();\n\n if (this.#lastFinalTranscript) {\n const delay = this.#endWithPunctuation()\n ? this.#endOfSpeechDelay * this.PUNCTUATION_REDUCE_FACTOR\n : 1;\n this.#run(delay);\n }\n }\n\n // TODO(nbsp): aclose\n\n #endWithPunctuation(): boolean {\n return (\n this.#lastFinalTranscript.length > 0 &&\n this.PUNCTUATION.includes(this.#lastFinalTranscript[this.#lastFinalTranscript.length - 1]!)\n );\n }\n\n #resetStates() {\n this.#lastFinalTranscript = '';\n this.#lastRecvEndOfSpeechTime = 0;\n }\n\n #run(delay: number) {\n const runTask = async (delay: number) => {\n await new Promise((resolve) => setTimeout(resolve, delay));\n this.#resetStates();\n await this.#validateFunc();\n };\n\n this.#validatingFuture = new Future();\n this.#validatingPromise = runTask(delay);\n }\n}\n"],"mappings":"AAIA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,OAAO,kBAAkB;AAOzB,SAAS,iBAAiB;AAC1B,SAAS,aAAa,aAAa,gBAAgB;AACnD,SAAS,WAAW;AACpB,SAAmB,iBAAiB,wBAAwB;AAC5D;AAAA,EACE,qBAAqB;AAAA,EACrB,iBAAiB;AAAA,EACjB;AAAA,OACK;AAGP,SAAS,iBAAiB,wBAAwB;AAClD,SAAS,oBAAoB,oBAAoB,QAAQ,wBAAwB;AAGjF,SAAS,mBAAmB;AAC5B,SAAS,cAAc,yBAAyB;AAChD,SAAS,YAAY,uBAAuB;AAC5C,SAAS,oBAAoB;AAGtB,MAAM,wBAAwB;AAY9B,IAAK,WAAL,kBAAKA,cAAL;AACL,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AATU,SAAAA;AAAA,GAAA;AAwBL,MAAM,iBAAiB;AAAA,EAC5B;AAAA,EACA;AAAA,EACA,YAAY,oBAAI,IAAiB;AAAA,EACjC,OAAO;AAAA,EAEP,YAAY,OAA2B,WAAsB;AAC3D,SAAK,SAAS;AACd,SAAK,aAAa;AAClB,qBAAiB,WAAW;AAAA,EAC9B;AAAA,EAEA,OAAO,aAA+B;AACpC,WAAO,iBAAiB;AAAA,EAC1B;AAAA,EAEA,IAAI,QAA4B;AAC9B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,cAAc,KAAa,OAAY;AACrC,SAAK,UAAU,IAAI,KAAK,KAAK;AAAA,EAC/B;AAAA,EAEA,YAAY,KAAa,YAAiB,QAAW;AACnD,WAAO,KAAK,UAAU,IAAI,GAAG,KAAK;AAAA,EACpC;AAAA,EAEA,IAAI,YAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AACF;AAEA,MAAM,2BAA8C,CAClD,OACA,YACc;AACd,SAAO,MAAM,IAAI,KAAK,EAAE,SAAS,QAAQ,MAAM,OAAO,CAAC;AACzD;AAEA,MAAM,2BAA8C,CAElD,GACA,SACmC;AACnC,SAAO;AACT;AA6BA,MAAM,mCAA8D;AAAA,EAClE,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,0BAA0B;AAAA,EAC1B,mBAAmB,IAAI,uBAAuB;AAAA,EAC9C,eAAe,IAAI,mBAAmB,KAAK;AAAA,EAC3C;AACF;AAuCA,MAAM,oBAAgC;AAAA,EACpC,SAAS,IAAI,YAAY;AAAA,EACzB,oBAAoB;AAAA,EACpB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,qBAAqB;AAAA,EACrB,sBAAsB;AAAA,EACtB,qBAAqB;AAAA,EACrB,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAGO,MAAM,2BAA4B,aAAsD;AAAA;AAAA,EAEpF,6BAA6B;AAAA,EACtC,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAElE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,qBAAqB,IAAI,OAAO;AAAA,EAChC;AAAA,EACA;AAAA,EACA;AAAA,EACA,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,mBAAmB,IAAI,OAAO;AAAA,EAC9B,eAAe,IAAI,mBAA4E;AAAA,EAC/F;AAAA,EACA;AAAA,EACA,WAAW;AAAA,EACX;AAAA,EACA,eAAkD;AAAA,EAClD;AAAA,EACA,UAAU,IAAI;AAAA,EACd;AAAA,EAEA,YAEE,KAEA,KAEA,KAEA,KAEA,OAA4B,mBAC5B;AACA,UAAM;AAEN,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,CAAC,IAAI,aAAa,WAAW;AAC/B,YAAM,IAAI,iBAAiB,KAAK,GAAG;AAAA,IACrC;AAEA,QAAI,CAAC,IAAI,aAAa,WAAW;AAC/B,YAAM,IAAI,iBAAiB,KAAK,IAAI,uBAAuB,CAAC;AAAA,IAC9D;AAEA,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,OAAO;AAEZ,SAAK,sBAAsB,IAAI;AAAA,MAC7B,KAAK,yBAAyB,KAAK,IAAI;AAAA,MACvC,KAAK,MAAM;AAAA,IACb;AAAA,EACF;AAAA,EAEA,IAAI,SAAsC;AACxC,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,IAAI,OAAO,KAAsB;AAC/B,SAAK,MAAM,SAAS;AAAA,EACtB;AAAA,EAEA,IAAI,UAAuB;AACzB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAEE,MAQA,cAAiD,MACjD;AACA,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,SAAK,GAAG,UAAU,sBAAsB,CAACC,iBAAmC;AAE1E,UAAI,KAAK,cAAc;AACrB;AAAA,MACF;AACA,WAAK,iBAAiB,KAAK,MAAMA,aAAY,QAAQ;AAAA,IACvD,CAAC;AAED,SAAK,QAAQ;AACb,SAAK,eAAe;AAEpB,QAAI,aAAa;AACf,UAAI,OAAO,gBAAgB,UAAU;AACnC,aAAK,iBAAiB,WAAW;AAAA,MACnC,OAAO;AACL,aAAK,iBAAiB,YAAY,QAAQ;AAAA,MAC5C;AAAA,IACF;AAEA,SAAK,KAAK;AAAA,EACZ;AAAA;AAAA,EAGA,MAAM,IACJ,QACA,qBAAqB,MACrB,eAAe,MACf;AACA,UAAM,KAAK,mBAAmB;AAC9B,UAAM,YAAY,aAAa,sBAAsB,oBAAoB,YAAY;AACrF,UAAM,kBAAkB,KAAK,uBAAuB,UAAU,IAAI,MAAM;AACxE,cAAU,WAAW,QAAQ,eAAe;AAC5C,SAAK,qBAAqB,SAAS;AAAA,EACrC;AAAA,EAEA,aAAa,OAAmB,QAAQ,GAAG;AACzC,UAAM,UAAU,CAACC,WAA4C;AAC3D,aAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AApWpE;AAqWQ,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AACD,cAAM,IAAI,QAAQ,CAACC,aAAY,WAAWA,UAASD,MAAK,CAAC;AACzD,aAAI,UAAK,UAAL,mBAAY,aAAa;AAC3B,cAAI,CAAC,WAAW;AACd,oBAAM,UAAK,MAAM,qBAAX,mBAA6B,cAAc,EAAE,CAAC,qBAAqB,GAAG,MAAM;AAAA,UACpF;AAAA,QACF;AACA,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAEA,QAAI,KAAK,kBAAkB;AACzB,WAAK,iBAAiB,OAAO;AAAA,IAC/B;AAEA,SAAK,mBAAmB,QAAQ,KAAK;AAAA,EACvC;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,eAAe,KAAK,MAAM,mBAAmB,IAAI,mBAAmB,KAAK;AAC9E,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,SAAK,cAAc,IAAI,WAAW,KAAK,OAAO,KAAK,MAAM,KAAK,MAAM,KAAK,YAAY;AACrF,SAAK,YAAY,GAAG,gBAAgB,iBAAiB,CAAC,UAAU;AAC9D,WAAK,KAAK,6BAA8B;AACxC,WAAK,oBAAoB,qBAAqB,KAAK;AAAA,IACrD,CAAC;AACD,SAAK,YAAY,GAAG,gBAAgB,oBAAoB,CAAC,UAAU;AACjE,UAAI,CAAC,KAAK,mBAAmB,MAAM;AACjC;AAAA,MACF;AACA,UAAI,CAAC,KAAK,cAAc;AACtB,cAAM,IAAI,MAAM,2BAA2B;AAAA,MAC7C;AAEA,UAAI,KAAK;AACT,UAAI,KAAK,MAAM,oBAAoB;AACjC,aAAK,KAAK,IAAI,GAAG,IAAI,MAAM,WAAW;AACtC,aAAK,aAAa,QAAQ,eAAe;AAAA,MAC3C;AAEA,UAAI,MAAM,kBAAkB,KAAK,MAAM,yBAAyB;AAC9D,aAAK,qBAAqB;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,SAAK,YAAY,GAAG,gBAAgB,eAAe,CAAC,UAAU;AAC5D,WAAK,KAAK,6BAA8B;AACxC,WAAK,oBAAoB,mBAAmB,KAAK;AACjD,WAAK,uBAAuB,KAAK,IAAI;AAAA,IACvC,CAAC;AACD,SAAK,YAAY,GAAG,gBAAgB,oBAAoB,CAAC,UAAU;AACjE,WAAK,0BAA0B,MAAM,aAAc,CAAC,EAAE;AAAA,IACxD,CAAC;AACD,SAAK,YAAY,GAAG,gBAAgB,kBAAkB,CAAC,UAAU;AAC/D,YAAM,gBAAgB,MAAM,aAAc,CAAC,EAAE;AAC7C,UAAI,CAAC,cAAe;AAEpB,WAAK,QAAQ,MAAM,EAAE,gBAAgB,cAAc,CAAC,EAAE,MAAM,0BAA0B;AACtF,WAAK,qBAAqB,KAAK,mBAAmB,MAAM,MAAM;AAE9D,UACE,KAAK,MAAM,wBACV,CAAC,KAAK,kBAAkB,KAAK,eAAe,qBAC7C;AACA,aAAK,sBAAsB;AAAA,MAC7B;AAEA,WAAK,oBAAoB,uBAAuB,aAAa;AAE7D,YAAM,QAAQ,KAAK,MAAM,cAAc,cAAc,SAAS,aAAa;AAC3E,UAAI,MAAM,UAAU,GAAG;AAGrB,aAAK,qBAAqB;AAAA,MAC5B;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,OAAO;AA9bf;AA+bI,SAAK,aAAa,cAAc;AAChC,UAAM,cAAc,IAAI,YAAY,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAC/E,UAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,WAAW;AAC7E,SAAK,oBAAoB,QAAM,gBAAK,UAAL,mBAAY,qBAAZ,mBAA8B;AAAA,MAC3D;AAAA,MACA,IAAI,oBAAoB,EAAE,QAAQ,YAAY,kBAAkB,CAAC;AAAA;AAGnE,UAAM,eAAe,IAAI,aAAa,WAAW;AACjD,SAAK,eAAe,IAAI,YAAY,cAAc,KAAK,IAAI;AAE3D,iBAAa,GAAG,kBAAkB,iBAAiB,MAAM;AACvD,WAAK,KAAK,8BAA+B;AACzC,WAAK,aAAa,UAAU;AAAA,IAC9B,CAAC;AAED,iBAAa,GAAG,kBAAkB,iBAAiB,CAAC,MAAM;AACxD,WAAK,KAAK,8BAA+B;AACzC,WAAK,aAAa,WAAW;AAAA,IAC/B,CAAC;AAED,SAAK,mBAAmB,QAAQ;AAEhC,WAAO,MAAM;AACX,YAAM,KAAK,iBAAiB;AAC5B,uBAAiB,UAAU,KAAK,cAAc;AAC5C,YAAI,WAAW,mBAAmB,eAAgB;AAClD,aAAK,iBAAiB;AACtB,cAAM,KAAK,YAAY,MAAM;AAC7B,aAAK,iBAAiB;AAAA,MACxB;AACA,WAAK,mBAAmB,IAAI,OAAO;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,wBAAwB;AAle1B;AAmeI,eAAK,uBAAL,mBAAyB;AACzB,QAAI,KAAK,eAAe,KAAK,YAAY,UAAU;AACjD,WAAK,aAAa,YAAY,GAAG;AAAA,IACnC;AAEA,SAAK,qBAAqB,aAAa;AAAA,MACrC,KAAK,MAAM;AAAA,MACX;AAAA,MACA,KAAK;AAAA,IACP;AACA,UAAM,YAAY,KAAK;AACvB,SAAK,kBAAkB,KAAK,sBAAsB,KAAK,iBAAiB,SAAS;AAAA,EACnF;AAAA,EAEA,sBACE,SACA,QAC0B;AAC1B,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,UAAI,YAAY;AAChB,eAAS,MAAM;AACb,oBAAY;AAAA,MACd,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,YAAM,YAAY,KAAK,QAAQ,KAAK;AACpC,YAAM,gBAAgB,KAAK;AAC3B,UAAI,iBAAiB,cAAc,aAAa;AAC9C,aACG,CAAC,cAAc,gBAAgB,cAAc,kBAC9C,CAAC,cAAc,iBACf;AAGA,oBAAU,SAAS;AAAA,YACjB,YAAY,OAAO;AAAA;AAAA;AAAA,cAGjB,MAAM,SAAS;AAAA,YACjB,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,gBAAU,SAAS;AAAA,QACjB,YAAY,OAAO;AAAA,UACjB,MAAM,iCAAQ;AAAA,UACd,MAAM,SAAS;AAAA,QACjB,CAAC;AAAA,MACH;AAEA,UAAI,UAAW,SAAQ;AACvB,UAAI,YAAY,MAAM,KAAK,MAAM,kBAAkB,MAAM,SAAS;AAClE,UAAI,cAAc,OAAO;AACvB,yCAAQ;AACR;AAAA,MACF;AAEA,UAAI,UAAW,SAAQ;AAEvB,UAAI,EAAE,qBAAqB,YAAY;AACrC,oBAAa,MAAM,yBAAyB,MAAM,SAAS;AAAA,MAC7D;AAEA,UAAI,OAAQ,aAAa;AACvB;AAAA,MACF;AAEA,YAAM,kBAAkB,KAAK,uBAAuB,OAAQ,IAAI,SAAS;AACzE,aAAQ,WAAW,WAAW,eAAe;AAI7C,YAAM,UAAU,CAAC,CAAC,KAAK,uBACnB,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,wBAAwB,GAAI,IAAI,MAC9D;AAEJ,WAAK,QAAQ,MAAM,EAAE,UAAU,OAAQ,IAAI,QAAQ,CAAC,EAAE,MAAM,0BAA0B;AACtF,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,YAAY,QAAsB;AACtC,QAAI;AACF,YAAM,OAAO,sBAAsB;AAAA,IACrC,QAAQ;AACN;AAAA,IACF;AACA,UAAM,KAAK,kBAAmB,oBAAoB;AAClD,UAAM,kBAAkB,OAAO;AAC/B,QAAI,gBAAgB,YAAa;AAEjC,UAAM,eAAe,OAAO;AAC5B,UAAM,aAAa,gBAAgB,KAAK;AACxC,UAAM,UAAU,WAAW,KAAK;AAEhC,UAAM,6BAA6B,MAAM;AACvC,UAAI,CAAC,gBAAgB,gBAAgB,eAAe,OAAO,cAAe;AAC1E,YAAME,gBACJ,OAAO,kBAAkB,aAAa,CAAC,CAAC,OAAO,OAAO,cAAc;AAKtE,UACE,OAAO,sBACP,CAACA,iBACD,WAAW,aAAa,KAAK,8BAC7B,CAAC,QAAQ,MACT;AACA;AAAA,MACF;AAEA,WAAK,QAAQ,MAAM,EAAE,gBAAgB,aAAa,CAAC,EAAE,MAAM,2BAA2B;AACtF,YAAM,UAAU,YAAY,OAAO,EAAE,MAAM,cAAc,MAAM,SAAS,KAAK,CAAC;AAC9E,WAAK,QAAQ,SAAS,KAAK,OAAO;AAClC,WAAK,KAAK,+BAAgC,OAAO;AAEjD,WAAK,mBAAmB,KAAK,iBAAiB,MAAM,aAAa,MAAM;AACvE,aAAO,kBAAkB;AAAA,IAC3B;AAGA,+BAA2B;AAE3B,WAAO,CAAC,QAAQ,MAAM;AACpB,YAAM,IAAI,QAAc,OAAO,YAAY;AACzC,mBAAW,SAAS,GAAG;AACvB,cAAM,QAAQ;AACd,gBAAQ;AAAA,MACV,CAAC;AACD,iCAA2B;AAC3B,UAAI,OAAO,YAAa;AAAA,IAC1B;AACA,+BAA2B;AAG3B,QAAI,gBAAgB;AACpB,UAAM,eAAe,OAAO,kBAAkB,aAAa,CAAC,CAAC,OAAO,OAAO,cAAc;AACzF,UAAM,qBAAqB,CAAC;AAC5B,QAAI,cAAc,OAAO;AAIzB,QAAI,gBAAgB,CAAC,aAAa;AAChC,UAAI,CAAC,gBAAgB,CAAC,OAAO,eAAe;AAC1C,cAAM,IAAI,MAAM,2DAA2D;AAAA,MAC7E;AACA,YAAM,YAAY,OAAO;AACzB,UAAI,mBAAmB,UAAU;AAEjC,eAAS,IAAI,GAAG,IAAI,KAAK,MAAM,sBAAsB,KAAK;AACxD,aAAK,KAAK,kCAAmC,gBAAgB;AAC7D,cAAM,cAAkC,CAAC;AACzC,mBAAW,QAAQ,kBAAkB;AACnC,gBAAM,OAAO,KAAK,KAAK,QAAQ,KAAK,MAAM,EAAE;AAAA,YAC1C,CAAC,YAAY,EAAE,MAAM,KAAK,MAAM,YAAY,KAAK,YAAY,OAAO;AAAA,YACpE,CAAC,WAAW,EAAE,MAAM,KAAK,MAAM,YAAY,KAAK,YAAY,MAAM;AAAA,UACpE;AACA,sBAAY,KAAK,EAAE,GAAG,MAAM,KAAK,CAAC;AAClC,eAAK,QACF,MAAM,EAAE,UAAU,KAAK,MAAM,UAAU,OAAO,GAAG,CAAC,EAClD,MAAM,uBAAuB;AAChC,cAAI;AACF,kBAAM;AAAA,UACR,QAAQ;AACN,iBAAK,QACF,MAAM,EAAE,UAAU,KAAK,MAAM,UAAU,OAAO,GAAG,CAAC,EAClD,MAAM,6BAA6B;AAAA,UACxC;AAAA,QACF;AAEA,cAAM,gBAAgB,CAAC;AACvB,cAAM,mBAAmB,CAAC;AAC1B,mBAAW,OAAO,aAAa;AAE7B,gBAAM,OAAO,MAAM,IAAI;AACvB,cAAI,CAAC,QAAQ,KAAK,WAAW,OAAW;AACxC,wBAAc,KAAK,GAAG;AACtB,2BAAiB,KAAK,YAAY,6BAA6B,IAAI,CAAC;AAAA,QACtE;AAEA,YAAI,CAAC,cAAc,OAAQ;AAG3B,2BAAmB,KAAK,YAAY,gBAAgB,eAAe,aAAa,CAAC;AACjF,2BAAmB,KAAK,GAAG,gBAAgB;AAE3C,cAAM,UAAU,OAAO,OAAO,QAAQ,KAAK;AAC3C,gBAAQ,SAAS,KAAK,GAAG,kBAAkB;AAE3C,cAAM,kBAAkB,KAAK,IAAI,KAAK;AAAA,UACpC;AAAA,UACA,QAAQ,KAAK;AAAA,QACf,CAAC;AACD,cAAM,kBAAkB,KAAK,uBAAuB,OAAO,IAAI,eAAe;AAE9E,eAAO,kBAAkB;AACzB,cAAMC,cAAa,gBAAgB,KAAK;AACxC,cAAMA,YAAW,KAAK,EAAE;AAGxB,wBAAgB;AAChB,sBAAc,gBAAgB;AAC9B,2BAAmB,gBAAgB;AAEnC,aAAK,KAAK,iCAAkC,WAAW;AACvD,YAAI,CAAC,iBAAkB;AAAA,MACzB;AAAA,IACF;AAEA,QAAI,OAAO,iBAAiB,CAAC,gBAAgB,OAAO,gBAAgB;AAClE,WAAK,QAAQ,SAAS,KAAK,GAAG,kBAAkB;AAChD,UAAI,aAAa;AACf,wBAAgB;AAAA,MAClB;AAEA,YAAM,MAAM,YAAY,OAAO,EAAE,MAAM,eAAe,MAAM,SAAS,UAAU,CAAC;AAChF,WAAK,QAAQ,SAAS,KAAK,GAAG;AAE9B,aAAO,oBAAoB;AAC3B,UAAI,aAAa;AACf,aAAK,KAAK,kCAAmC,GAAG;AAAA,MAClD,OAAO;AACL,aAAK,KAAK,gCAAiC,GAAG;AAAA,MAChD;AAEA,WAAK,QACF,MAAM;AAAA,QACL,iBAAiB;AAAA,QACjB;AAAA,QACA,UAAU,OAAO;AAAA,MACnB,CAAC,EACA,MAAM,wBAAwB;AAAA,IACnC;AAAA,EACF;AAAA,EAEA,uBACE,UACA,QACiB;AACjB,QAAI,CAAC,KAAK,cAAc;AACtB,YAAM,IAAI,MAAM,+CAA+C;AAAA,IACjE;AAEA,QAAI,kBAAkB,WAAW;AAC/B,eAAS,0BAA0B,UAAU,MAAM;AAAA,IACrD;AAEA,UAAM,WAAW;AACjB,QAAI,EAAE,OAAO,WAAW,WAAW;AAAA,IAEnC;AAEA,UAAM,YAAY,KAAK,MAAM,kBAAkB,MAAM,QAAQ;AAC7D,QAAI,CAAC,WAAW;AACd,YAAM,IAAI,MAAM,+DAA+D;AAAA,IACjF;AAEA,WAAO,KAAK,aAAa,WAAW,UAAU,SAAS;AAAA,EACzD;AAAA,EAEA,MAAM,2BAA2B;AAC/B,QAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,oBAAoB;AAClE,WAAK,QACF,MAAM,EAAE,UAAU,KAAK,eAAe,GAAG,CAAC,EAC1C,MAAM,yEAAyE;AAClF;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,oBAAoB;AAC5B,UAAI,KAAK,MAAM,uBAAuB,CAAC,KAAK,kBAAkB;AAC5D;AAAA,MACF;AACA,WAAK,sBAAsB;AAAA,IAC7B;AAEA,QAAI,CAAC,KAAK,oBAAoB;AAC5B,YAAM,IAAI,MAAM,kCAAkC;AAAA,IACpD;AAIA,QAAI,KAAK,iBAAiB,MAAM;AAC9B,uBAAiB,UAAU,KAAK,cAAc;AAC5C,YAAI,WAAW,mBAAmB,eAAgB;AAClD,YAAI,CAAC,OAAO,QAAS;AACrB,YAAI,OAAO,mBAAoB,QAAO,UAAU;AAAA,MAClD;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,mBAAmB,GAAG,CAAC,EAAE,MAAM,uBAAuB;AAE1F,SAAK,qBAAqB,KAAK,kBAAkB;AACjD,SAAK,qBAAqB;AAC1B,SAAK,0BAA0B;AAAA,EACjC;AAAA,EAEA,uBAAuB;AACrB,QACE,CAAC,KAAK,kBACN,CAAC,KAAK,eAAe,sBACrB,KAAK,eAAe,aACpB;AACA;AAAA,IACF;AAEA,QAAI,KAAK,MAAM,sBAAsB,GAAG;AAGtC,YAAM,eAAe,KAAK,MAAM,cAAc,cAAc;AAAA,QAC1D,KAAK;AAAA,MACP;AACA,UAAI,aAAa,SAAS,KAAK,MAAM,mBAAmB;AACtD;AAAA,MACF;AAAA,IACF;AACA,SAAK,eAAe,UAAU;AAAA,EAChC;AAAA,EAEA,qBAAqB,QAAsB;AACzC,SAAK,aAAa,IAAI,MAAM;AAC5B,SAAK,aAAa,IAAI,mBAAmB,cAAc;AACvD,SAAK,iBAAiB,QAAQ;AAAA,EAChC;AAAA;AAAA,EAGA,MAAM,QAAQ;AA7yBhB;AA8yBI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AAEA,eAAK,UAAL,mBAAY,mBAAmB,UAAU;AAAA,EAE3C;AACF;AAEA,gBAAgB,0BACd,UACA,QACuB;AA1zBzB;AA2zBE,QAAM,YAAY,KAAK,IAAI;AAC3B,MAAI,aAAa;AACjB,mBAAiB,SAAS,QAAQ;AAChC,UAAM,WAAU,WAAM,QAAQ,CAAC,MAAf,mBAAkB,MAAM;AACxC,QAAI,CAAC,QAAS;AAEd,QAAI,YAAY;AACd,mBAAa;AACb,UAAI,EACD,MAAM,EAAE,UAAU,SAAS,KAAK,MAAM,KAAK,IAAI,IAAI,SAAS,EAAE,CAAC,EAC/D,MAAM,0BAA0B;AAAA,IACrC;AACA,UAAM;AAAA,EACR;AACF;AAGA,MAAM,wBAAwB;AAAA;AAAA,EAEnB,cAAc;AAAA,EACd,4BAA4B;AAAA,EAC5B,4BAA4B;AAAA;AAAA,EAErC;AAAA,EACA;AAAA,EACA,oBAAoB,IAAI,OAAO;AAAA,EAC/B,uBAAuB;AAAA,EACvB,2BAA2B;AAAA,EAC3B,YAAY;AAAA,EACZ;AAAA,EACA;AAAA,EAEA,YAAY,cAAmC,qBAA6B;AAC1E,SAAK,gBAAgB;AACrB,SAAK,oBAAoB;AACzB,SAAK,wBAAwB;AAAA,EAC/B;AAAA,EAEA,IAAI,aAAsB;AACxB,WAAO,CAAC,KAAK,kBAAkB;AAAA,EACjC;AAAA,EAEA,uBAAuB,YAAoB;AACzC,SAAK,uBAAuB,WAAW,KAAK;AAC5C,QAAI,KAAK,UAAW;AAEpB,UAAM,uBACJ,KAAK,IAAI,IAAI,KAAK,2BAA2B,KAAK;AACpD,QAAI,QAAQ,uBAAuB,KAAK,oBAAoB,KAAK;AACjE,YAAQ,KAAK,oBAAoB,IAAI,QAAQ,KAAK,4BAA4B;AAE9E,SAAK,KAAK,KAAK;AAAA,EACjB;AAAA;AAAA,EAGA,qBAAqB,GAAa;AAChC,SAAK,YAAY;AAAA,EAKnB;AAAA;AAAA,EAGA,mBAAmB,GAAa;AAC9B,SAAK,YAAY;AACjB,SAAK,2BAA2B,KAAK,IAAI;AAEzC,QAAI,KAAK,sBAAsB;AAC7B,YAAM,QAAQ,KAAK,oBAAoB,IACnC,KAAK,oBAAoB,KAAK,4BAC9B;AACJ,WAAK,KAAK,KAAK;AAAA,IACjB;AAAA,EACF;AAAA;AAAA,EAIA,sBAA+B;AAC7B,WACE,KAAK,qBAAqB,SAAS,KACnC,KAAK,YAAY,SAAS,KAAK,qBAAqB,KAAK,qBAAqB,SAAS,CAAC,CAAE;AAAA,EAE9F;AAAA,EAEA,eAAe;AACb,SAAK,uBAAuB;AAC5B,SAAK,2BAA2B;AAAA,EAClC;AAAA,EAEA,KAAK,OAAe;AAClB,UAAM,UAAU,OAAOH,WAAkB;AACvC,YAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAASA,MAAK,CAAC;AACzD,WAAK,aAAa;AAClB,YAAM,KAAK,cAAc;AAAA,IAC3B;AAEA,SAAK,oBAAoB,IAAI,OAAO;AACpC,SAAK,qBAAqB,QAAQ,KAAK;AAAA,EACzC;AACF;","names":["VPAEvent","participant","delay","resolve","isUsingTools","playHandle"]}
1
+ {"version":3,"sources":["../../src/pipeline/pipeline_agent.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { LocalTrackPublication, RemoteParticipant, Room } from '@livekit/rtc-node';\nimport {\n AudioSource,\n LocalAudioTrack,\n RoomEvent,\n TrackPublishOptions,\n TrackSource,\n} from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport type {\n CallableFunctionResult,\n FunctionCallInfo,\n FunctionContext,\n LLM,\n} from '../llm/index.js';\nimport { LLMStream } from '../llm/index.js';\nimport { ChatContext, ChatMessage, ChatRole } from '../llm/index.js';\nimport { log } from '../log.js';\nimport { type STT, StreamAdapter as STTStreamAdapter } from '../stt/index.js';\nimport {\n SentenceTokenizer as BasicSentenceTokenizer,\n WordTokenizer as BasicWordTokenizer,\n hyphenateWord,\n} from '../tokenize/basic/index.js';\nimport type { SentenceTokenizer, WordTokenizer } from '../tokenize/tokenizer.js';\nimport type { TTS } from '../tts/index.js';\nimport { StreamAdapter as TTSStreamAdapter } from '../tts/index.js';\nimport { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport type { SpeechSource, SynthesisHandle } from './agent_output.js';\nimport { AgentOutput } from './agent_output.js';\nimport { AgentPlayout, AgentPlayoutEvent } from './agent_playout.js';\nimport { HumanInput, HumanInputEvent } from './human_input.js';\nimport { SpeechHandle } from './speech_handle.js';\n\nexport type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';\nexport const AGENT_STATE_ATTRIBUTE = 'lk.agent.state';\n\nexport type BeforeLLMCallback = (\n agent: VoicePipelineAgent,\n chatCtx: ChatContext,\n) => LLMStream | false | void | Promise<LLMStream | false | void>;\n\nexport type BeforeTTSCallback = (\n agent: VoicePipelineAgent,\n source: string | AsyncIterable<string>,\n) => SpeechSource;\n\nexport enum VPAEvent {\n USER_STARTED_SPEAKING,\n USER_STOPPED_SPEAKING,\n AGENT_STARTED_SPEAKING,\n AGENT_STOPPED_SPEAKING,\n USER_SPEECH_COMMITTED,\n AGENT_SPEECH_COMMITTED,\n AGENT_SPEECH_INTERRUPTED,\n FUNCTION_CALLS_COLLECTED,\n FUNCTION_CALLS_FINISHED,\n}\n\nexport type VPACallbacks = {\n [VPAEvent.USER_STARTED_SPEAKING]: () => void;\n [VPAEvent.USER_STOPPED_SPEAKING]: () => void;\n [VPAEvent.AGENT_STARTED_SPEAKING]: () => void;\n [VPAEvent.AGENT_STOPPED_SPEAKING]: () => void;\n [VPAEvent.USER_SPEECH_COMMITTED]: (msg: ChatMessage) => void;\n [VPAEvent.AGENT_SPEECH_COMMITTED]: (msg: ChatMessage) => void;\n [VPAEvent.AGENT_SPEECH_INTERRUPTED]: (msg: ChatMessage) => void;\n [VPAEvent.FUNCTION_CALLS_COLLECTED]: (funcs: FunctionCallInfo[]) => void;\n [VPAEvent.FUNCTION_CALLS_FINISHED]: (funcs: CallableFunctionResult[]) => void;\n};\n\nexport class AgentCallContext {\n #agent: VoicePipelineAgent;\n #llmStream: LLMStream;\n #metadata = new Map<string, any>();\n static #current: AgentCallContext;\n\n constructor(agent: VoicePipelineAgent, llmStream: LLMStream) {\n this.#agent = agent;\n this.#llmStream = llmStream;\n AgentCallContext.#current = this;\n }\n\n static getCurrent(): AgentCallContext {\n return AgentCallContext.#current;\n }\n\n get agent(): VoicePipelineAgent {\n return this.#agent;\n }\n\n storeMetadata(key: string, value: any) {\n this.#metadata.set(key, value);\n }\n\n getMetadata(key: string, orDefault: any = undefined) {\n return this.#metadata.get(key) || orDefault;\n }\n\n get llmStream(): LLMStream {\n return this.#llmStream;\n }\n}\n\nconst defaultBeforeLLMCallback: BeforeLLMCallback = (\n agent: VoicePipelineAgent,\n chatCtx: ChatContext,\n): LLMStream => {\n return agent.llm.chat({ chatCtx, fncCtx: agent.fncCtx });\n};\n\nconst defaultBeforeTTSCallback: BeforeTTSCallback = (\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n _: VoicePipelineAgent,\n text: string | AsyncIterable<string>,\n): string | AsyncIterable<string> => {\n return text;\n};\n\nexport interface AgentTranscriptionOptions {\n /** Whether to forward the user transcription to the client */\n userTranscription: boolean;\n /** Whether to forward the agent transcription to the client */\n agentTranscription: boolean;\n /**\n * The speed at which the agent's speech transcription is forwarded to the client.\n * We try to mimic the agent's speech speed by adjusting the transcription speed.\n */\n agentTranscriptionSpeech: number;\n /**\n * The tokenizer used to split the speech into sentences.\n * This is used to decide when to mark a transcript as final for the agent transcription.\n */\n sentenceTokenizer: SentenceTokenizer;\n /**\n * The tokenizer used to split the speech into words.\n * This is used to simulate the \"interim results\" of the agent transcription.\n */\n wordTokenizer: WordTokenizer;\n /**\n * A function that takes a string (word) as input and returns a list of strings,\n * representing the hyphenated parts of the word.\n */\n hyphenateWord: (word: string) => string[];\n}\n\nconst defaultAgentTranscriptionOptions: AgentTranscriptionOptions = {\n userTranscription: true,\n agentTranscription: true,\n agentTranscriptionSpeech: 1,\n sentenceTokenizer: new BasicSentenceTokenizer(),\n wordTokenizer: new BasicWordTokenizer(false),\n hyphenateWord: hyphenateWord,\n};\n\nexport interface VPAOptions {\n /** Chat context for the assistant. */\n chatCtx?: ChatContext;\n /** Function context for the assistant. */\n fncCtx?: FunctionContext;\n /** Whether to allow the user to interrupt the assistant. */\n allowInterruptions: boolean;\n /** Minimum duration of speech to consider for interruption. */\n interruptSpeechDuration: number;\n /** Minimum number of words to consider for interuption. This may increase latency. */\n interruptMinWords: number;\n /** Delay to wait before considering the user speech done. */\n minEndpointingDelay: number;\n maxRecursiveFncCalls: number;\n /* Whether to preemptively synthesize responses. */\n preemptiveSynthesis: boolean;\n /*\n * Callback called when the assistant is about to synthesize a reply.\n *\n * @remarks\n * Returning void will create a default LLM stream.\n * You can also return your own LLM stream by calling `llm.chat()`.\n * Returning `false` ill cancel the synthesis of the reply.\n */\n beforeLLMCallback: BeforeLLMCallback;\n /*\n * Callback called when the assistant is about to synthesize speech.\n *\n * @remarks\n * This can be used to customize text before synthesis\n * (e.g. editing the pronunciation of a word).\n */\n beforeTTSCallback: BeforeTTSCallback;\n /** Options for assistant transcription. */\n transcription: AgentTranscriptionOptions;\n}\n\nconst defaultVPAOptions: VPAOptions = {\n chatCtx: new ChatContext(),\n allowInterruptions: true,\n interruptSpeechDuration: 50,\n interruptMinWords: 0,\n minEndpointingDelay: 500,\n maxRecursiveFncCalls: 1,\n preemptiveSynthesis: false,\n beforeLLMCallback: defaultBeforeLLMCallback,\n beforeTTSCallback: defaultBeforeTTSCallback,\n transcription: defaultAgentTranscriptionOptions,\n};\n\n/** A pipeline agent (VAD + STT + LLM + TTS) implementation. */\nexport class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<VPACallbacks>) {\n /** Minimum time played for the user speech to be committed to the chat context. */\n readonly MIN_TIME_PLAYED_FOR_COMMIT = 1.5;\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n\n #vad: VAD;\n #stt: STT;\n #llm: LLM;\n #tts: TTS;\n #opts: VPAOptions;\n #humanInput?: HumanInput;\n #agentOutput?: AgentOutput;\n #trackPublishedFut = new Future();\n #pendingAgentReply?: SpeechHandle;\n #agentReplyTask?: CancellablePromise<void>;\n #playingSpeech?: SpeechHandle;\n #transcribedText = '';\n #transcribedInterimText = '';\n #speechQueueOpen = new Future();\n #speechQueue = new AsyncIterableQueue<SpeechHandle | typeof VoicePipelineAgent.FLUSH_SENTINEL>();\n #lastEndOfSpeechTime?: number;\n #updateStateTask?: CancellablePromise<void>;\n #started = false;\n #room?: Room;\n #participant: RemoteParticipant | string | null = null;\n #deferredValidation: DeferredReplyValidation;\n #logger = log();\n #agentPublication?: LocalTrackPublication;\n\n constructor(\n /** Voice Activity Detection instance. */\n vad: VAD,\n /** Speech-to-Text instance. */\n stt: STT,\n /** Large Language Model instance. */\n llm: LLM,\n /** Text-to-Speech instance. */\n tts: TTS,\n /** Additional VoicePipelineAgent options. */\n opts: Partial<VPAOptions> = defaultVPAOptions,\n ) {\n super();\n\n this.#opts = { ...defaultVPAOptions, ...opts };\n\n if (!stt.capabilities.streaming) {\n stt = new STTStreamAdapter(stt, vad);\n }\n\n if (!tts.capabilities.streaming) {\n tts = new TTSStreamAdapter(tts, new BasicSentenceTokenizer());\n }\n\n this.#vad = vad;\n this.#stt = stt;\n this.#llm = llm;\n this.#tts = tts;\n\n this.#deferredValidation = new DeferredReplyValidation(\n this.#validateReplyIfPossible.bind(this),\n this.#opts.minEndpointingDelay,\n );\n }\n\n get fncCtx(): FunctionContext | undefined {\n return this.#opts.fncCtx;\n }\n\n set fncCtx(ctx: FunctionContext) {\n this.#opts.fncCtx = ctx;\n }\n\n get chatCtx(): ChatContext {\n return this.#opts.chatCtx!;\n }\n\n get llm(): LLM {\n return this.#llm;\n }\n\n get tts(): TTS {\n return this.#tts;\n }\n\n get stt(): STT {\n return this.#stt;\n }\n\n get vad(): VAD {\n return this.#vad;\n }\n\n /** Start the voice assistant. */\n start(\n /** The room to connect to. */\n room: Room,\n /**\n * The participant to listen to.\n *\n * @remarks\n * Can be a participant or an identity.\n * If omitted, the first participant in the room will be selected.\n */\n participant: RemoteParticipant | string | null = null,\n ) {\n if (this.#started) {\n throw new Error('voice assistant already started');\n }\n room.on(RoomEvent.ParticipantConnected, (participant: RemoteParticipant) => {\n // automatically link to the first participant that connects, if not already linked\n if (this.#participant) {\n return;\n }\n this.#linkParticipant.call(this, participant.identity);\n });\n\n this.#room = room;\n this.#participant = participant;\n\n if (participant) {\n if (typeof participant === 'string') {\n this.#linkParticipant(participant);\n } else {\n this.#linkParticipant(participant.identity);\n }\n }\n\n this.#run();\n }\n\n /** Play a speech source through the voice assistant. */\n async say(\n source: string | LLMStream | AsyncIterable<string>,\n allowInterruptions = true,\n addToChatCtx = true,\n ) {\n await this.#trackPublishedFut.await;\n const newHandle = SpeechHandle.createAssistantSpeech(allowInterruptions, addToChatCtx);\n const synthesisHandle = this.#synthesizeAgentSpeech(newHandle.id, source);\n newHandle.initialize(source, synthesisHandle);\n this.#addSpeechForPlayout(newHandle);\n }\n\n #updateState(state: AgentState, delay = 0) {\n const runTask = (delay: number): CancellablePromise<void> => {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n await new Promise((resolve) => setTimeout(resolve, delay));\n if (this.#room?.isConnected) {\n if (!cancelled) {\n await this.#room.localParticipant?.setAttributes({ [AGENT_STATE_ATTRIBUTE]: state });\n }\n }\n resolve();\n });\n };\n\n if (this.#updateStateTask) {\n this.#updateStateTask.cancel();\n }\n\n this.#updateStateTask = runTask(delay);\n }\n\n #linkParticipant(participantIdentity: string): void {\n if (!this.#room) {\n this.#logger.error('Room is not set');\n return;\n }\n\n this.#participant = this.#room.remoteParticipants.get(participantIdentity) || null;\n if (!this.#participant) {\n this.#logger.error(`Participant with identity ${participantIdentity} not found`);\n return;\n }\n\n this.#humanInput = new HumanInput(this.#room, this.#vad, this.#stt, this.#participant);\n this.#humanInput.on(HumanInputEvent.START_OF_SPEECH, (event) => {\n this.emit(VPAEvent.USER_STARTED_SPEAKING);\n this.#deferredValidation.onHumanStartOfSpeech(event);\n });\n this.#humanInput.on(HumanInputEvent.VAD_INFERENCE_DONE, (event) => {\n if (!this.#trackPublishedFut.done) {\n return;\n }\n if (!this.#agentOutput) {\n throw new Error('agent output is undefined');\n }\n\n let tv = 1;\n if (this.#opts.allowInterruptions) {\n tv = Math.max(0, 1 - event.probability);\n this.#agentOutput.playout.targetVolume = tv;\n }\n\n if (event.speechDuration >= this.#opts.interruptSpeechDuration) {\n this.#interruptIfPossible();\n }\n });\n this.#humanInput.on(HumanInputEvent.END_OF_SPEECH, (event) => {\n this.emit(VPAEvent.USER_STARTED_SPEAKING);\n this.#deferredValidation.onHumanEndOfSpeech(event);\n this.#lastEndOfSpeechTime = Date.now();\n });\n this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {\n this.#transcribedInterimText = event.alternatives![0].text;\n });\n this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, (event) => {\n const newTranscript = event.alternatives![0].text;\n if (!newTranscript) return;\n\n this.#logger.child({ userTranscript: newTranscript }).debug('received user transcript');\n this.#transcribedText += (this.#transcribedText ? ' ' : '') + newTranscript;\n\n if (\n this.#opts.preemptiveSynthesis &&\n (!this.#playingSpeech || this.#playingSpeech.allowInterruptions)\n ) {\n this.#synthesizeAgentReply();\n }\n\n this.#deferredValidation.onHumanFinalTranscript(newTranscript);\n\n const words = this.#opts.transcription.wordTokenizer.tokenize(newTranscript);\n if (words.length >= 3) {\n // VAD can sometimes not detect that the human is speaking.\n // to make the interruption more reliable, we also interrupt on the final transcript.\n this.#interruptIfPossible();\n }\n });\n }\n\n async #run() {\n this.#updateState('initializing');\n const audioSource = new AudioSource(this.#tts.sampleRate, this.#tts.numChannels);\n const track = LocalAudioTrack.createAudioTrack('assistant_voice', audioSource);\n this.#agentPublication = await this.#room?.localParticipant?.publishTrack(\n track,\n new TrackPublishOptions({ source: TrackSource.SOURCE_MICROPHONE }),\n );\n\n const agentPlayout = new AgentPlayout(audioSource);\n this.#agentOutput = new AgentOutput(agentPlayout, this.#tts);\n\n agentPlayout.on(AgentPlayoutEvent.PLAYOUT_STARTED, () => {\n this.emit(VPAEvent.AGENT_STARTED_SPEAKING);\n this.#updateState('speaking');\n });\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n agentPlayout.on(AgentPlayoutEvent.PLAYOUT_STOPPED, (_) => {\n this.emit(VPAEvent.AGENT_STOPPED_SPEAKING);\n this.#updateState('listening');\n });\n\n this.#trackPublishedFut.resolve();\n\n while (true) {\n await this.#speechQueueOpen.await;\n for await (const speech of this.#speechQueue) {\n if (speech === VoicePipelineAgent.FLUSH_SENTINEL) break;\n this.#playingSpeech = speech;\n await this.#playSpeech(speech);\n this.#playingSpeech = undefined;\n }\n this.#speechQueueOpen = new Future();\n }\n }\n\n #synthesizeAgentReply() {\n this.#pendingAgentReply?.cancel();\n if (this.#humanInput && this.#humanInput.speaking) {\n this.#updateState('thinking', 200);\n }\n\n this.#pendingAgentReply = SpeechHandle.createAssistantReply(\n this.#opts.allowInterruptions,\n true,\n this.#transcribedText,\n );\n const newHandle = this.#pendingAgentReply;\n this.#agentReplyTask = this.#synthesizeAnswerTask(this.#agentReplyTask, newHandle);\n }\n\n #synthesizeAnswerTask(\n oldTask: CancellablePromise<void> | undefined,\n handle?: SpeechHandle,\n ): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n const copiedCtx = this.chatCtx.copy();\n const playingSpeech = this.#playingSpeech;\n if (playingSpeech && playingSpeech.initialized) {\n if (\n (!playingSpeech.userQuestion || playingSpeech.userCommitted) &&\n !playingSpeech.speechCommitted\n ) {\n // the speech is playing but not committed yet,\n // add it to the chat context for this new reply synthesis\n copiedCtx.messages.push(\n ChatMessage.create({\n text: playingSpeech.synthesisHandle.text,\n role: ChatRole.ASSISTANT,\n }),\n );\n }\n }\n\n copiedCtx.messages.push(\n ChatMessage.create({\n text: handle?.userQuestion,\n role: ChatRole.USER,\n }),\n );\n\n if (cancelled) resolve();\n let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);\n if (llmStream === false) {\n handle?.cancel();\n return;\n }\n\n if (cancelled) resolve();\n // fallback to default impl if no custom/user stream is returned\n if (!(llmStream instanceof LLMStream)) {\n llmStream = (await defaultBeforeLLMCallback(this, copiedCtx)) as LLMStream;\n }\n\n if (handle!.interrupted) {\n return;\n }\n\n const synthesisHandle = this.#synthesizeAgentSpeech(handle!.id, llmStream);\n handle!.initialize(llmStream, synthesisHandle);\n\n // TODO(theomonnom): find a more reliable way to get the elapsed time from the last EOS\n // (VAD could not have detected any speech — maybe unlikely?)\n const elapsed = !!this.#lastEndOfSpeechTime\n ? Math.round((Date.now() - this.#lastEndOfSpeechTime) * 1000) / 1000\n : -1;\n\n this.#logger.child({ speechId: handle!.id, elapsed }).debug('synthesizing agent reply');\n resolve();\n });\n }\n\n async #playSpeech(handle: SpeechHandle) {\n try {\n await handle.waitForInitialization();\n } catch {\n return;\n }\n await this.#agentPublication!.waitForSubscription();\n const synthesisHandle = handle.synthesisHandle;\n if (synthesisHandle.interrupted) return;\n\n const userQuestion = handle.userQuestion;\n const playHandle = synthesisHandle.play();\n const joinFut = playHandle.join();\n\n const commitUserQuestionIfNeeded = () => {\n if (!userQuestion || synthesisHandle.interrupted || handle.userCommitted) return;\n const isUsingTools =\n handle.source instanceof LLMStream && !!handle.source.functionCalls.length;\n\n // make sure at least some speech was played before committing the user message\n // since we try to validate as fast as possible it is possible the agent gets interrupted\n // really quickly (barely audible), we don't want to mark this question as \"answered\".\n if (\n handle.allowInterruptions &&\n !isUsingTools &&\n playHandle.timePlayed < this.MIN_TIME_PLAYED_FOR_COMMIT &&\n !joinFut.done\n ) {\n return;\n }\n\n this.#logger.child({ userTranscript: userQuestion }).debug('committed user transcript');\n const userMsg = ChatMessage.create({ text: userQuestion, role: ChatRole.USER });\n this.chatCtx.messages.push(userMsg);\n this.emit(VPAEvent.USER_SPEECH_COMMITTED, userMsg);\n\n this.#transcribedText = this.#transcribedText.slice(userQuestion.length);\n handle.markUserCommitted();\n };\n\n // wait for the playHandle to finish and check every 1s if user question should be committed\n commitUserQuestionIfNeeded();\n\n while (!joinFut.done) {\n await new Promise<void>(async (resolve) => {\n setTimeout(resolve, 500);\n await joinFut.await;\n resolve();\n });\n commitUserQuestionIfNeeded();\n if (handle.interrupted) break;\n }\n commitUserQuestionIfNeeded();\n\n const collectedText = handle.synthesisHandle.text;\n const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;\n const extraToolsMessages = []; // additional messages from the functions to add to the context\n let interrupted = handle.interrupted;\n\n // if the answer is using tools, execute the functions and automatically generate\n // a response to the user question from the returned values\n if (isUsingTools && !interrupted) {\n if (!userQuestion || !handle.userCommitted) {\n throw new Error('user speech should have been committed before using tools');\n }\n const llmStream = handle.source;\n let newFunctionCalls = llmStream.functionCalls;\n\n for (let i = 0; i < this.#opts.maxRecursiveFncCalls; i++) {\n this.emit(VPAEvent.FUNCTION_CALLS_COLLECTED, newFunctionCalls);\n const calledFuncs: FunctionCallInfo[] = [];\n for (const func of newFunctionCalls) {\n const task = func.func.execute(func.params).then(\n (result) => ({ name: func.name, toolCallId: func.toolCallId, result }),\n (error) => ({ name: func.name, toolCallId: func.toolCallId, error }),\n );\n calledFuncs.push({ ...func, task });\n this.#logger\n .child({ function: func.name, speechId: handle.id })\n .debug('executing AI function');\n try {\n await task;\n } catch {\n this.#logger\n .child({ function: func.name, speechId: handle.id })\n .error('error executing AI function');\n }\n }\n\n const toolCallsInfo = [];\n const toolCallsResults = [];\n for (const fnc of calledFuncs) {\n // ignore the function calls that return void\n const task = await fnc.task;\n if (!task || task.result === undefined) continue;\n toolCallsInfo.push(fnc);\n toolCallsResults.push(ChatMessage.createToolFromFunctionResult(task));\n }\n\n if (!toolCallsInfo.length) break;\n\n // generate an answer from the tool calls\n extraToolsMessages.push(ChatMessage.createToolCalls(toolCallsInfo, collectedText));\n extraToolsMessages.push(...toolCallsResults);\n\n const chatCtx = handle.source.chatCtx.copy();\n chatCtx.messages.push(...extraToolsMessages);\n\n const answerLLMStream = this.llm.chat({\n chatCtx,\n fncCtx: this.fncCtx,\n });\n const answerSynthesis = this.#synthesizeAgentSpeech(handle.id, answerLLMStream);\n // replace the synthesis handle with the new one to allow interruption\n handle.synthesisHandle = answerSynthesis;\n const playHandle = answerSynthesis.play();\n await playHandle.join().await;\n\n interrupted = answerSynthesis.interrupted;\n newFunctionCalls = answerLLMStream.functionCalls;\n\n this.emit(VPAEvent.FUNCTION_CALLS_FINISHED, calledFuncs);\n if (!newFunctionCalls) break;\n }\n }\n\n if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) {\n this.chatCtx.messages.push(...extraToolsMessages);\n if (interrupted) {\n collectedText + '…';\n }\n\n const msg = ChatMessage.create({ text: collectedText, role: ChatRole.ASSISTANT });\n this.chatCtx.messages.push(msg);\n\n handle.markSpeechCommitted();\n if (interrupted) {\n this.emit(VPAEvent.AGENT_SPEECH_INTERRUPTED, msg);\n } else {\n this.emit(VPAEvent.AGENT_SPEECH_COMMITTED, msg);\n }\n\n this.#logger\n .child({\n agentTranscript: collectedText,\n interrupted,\n speechId: handle.id,\n })\n .debug('committed agent speech');\n }\n }\n\n #synthesizeAgentSpeech(\n speechId: string,\n source: string | LLMStream | AsyncIterable<string>,\n ): SynthesisHandle {\n if (!this.#agentOutput) {\n throw new Error('agent output should be initialized when ready');\n }\n\n if (source instanceof LLMStream) {\n source = llmStreamToStringIterable(speechId, source);\n }\n\n const ogSource = source;\n if (!(typeof source === 'string')) {\n // TODO(nbsp): itertools.tee\n }\n\n const ttsSource = this.#opts.beforeTTSCallback(this, ogSource);\n if (!ttsSource) {\n throw new Error('beforeTTSCallback must return string or AsyncIterable<string>');\n }\n\n return this.#agentOutput.synthesize(speechId, ttsSource);\n }\n\n async #validateReplyIfPossible() {\n if (this.#playingSpeech && !this.#playingSpeech.allowInterruptions) {\n this.#logger\n .child({ speechId: this.#playingSpeech.id })\n .debug('skipping validation, agent is speaking and does not allow interruptions');\n return;\n }\n\n if (!this.#pendingAgentReply) {\n if (this.#opts.preemptiveSynthesis || !this.#transcribedText) {\n return;\n }\n this.#synthesizeAgentReply();\n }\n\n if (!this.#pendingAgentReply) {\n throw new Error('pending agent reply is undefined');\n }\n\n // in some bad timimg, we could end up with two pushed agent replies inside the speech queue.\n // so make sure we directly interrupt every reply when validating a new one\n if (this.#speechQueueOpen.done) {\n for await (const speech of this.#speechQueue) {\n if (speech === VoicePipelineAgent.FLUSH_SENTINEL) break;\n if (!speech.isReply) continue;\n if (speech.allowInterruptions) speech.interrupt();\n }\n }\n\n this.#logger.child({ speechId: this.#pendingAgentReply.id }).debug('validated agent reply');\n\n this.#addSpeechForPlayout(this.#pendingAgentReply);\n this.#pendingAgentReply = undefined;\n this.#transcribedInterimText = '';\n }\n\n #interruptIfPossible() {\n if (\n !this.#playingSpeech ||\n !this.#playingSpeech.allowInterruptions ||\n this.#playingSpeech.interrupted\n ) {\n return;\n }\n\n if (this.#opts.interruptMinWords !== 0) {\n // check the final/interim transcribed text for the minimum word count\n // to interrupt the agent speech\n const interimWords = this.#opts.transcription.wordTokenizer.tokenize(\n this.#transcribedInterimText,\n );\n if (interimWords.length < this.#opts.interruptMinWords) {\n return;\n }\n }\n this.#playingSpeech.interrupt();\n }\n\n #addSpeechForPlayout(handle: SpeechHandle) {\n this.#speechQueue.put(handle);\n this.#speechQueue.put(VoicePipelineAgent.FLUSH_SENTINEL);\n this.#speechQueueOpen.resolve();\n }\n\n /** Close the voice assistant. */\n async close() {\n if (!this.#started) {\n return;\n }\n\n this.#room?.removeAllListeners(RoomEvent.ParticipantConnected);\n // TODO(nbsp): await this.#deferredValidation.close()\n }\n}\n\nasync function* llmStreamToStringIterable(\n speechId: string,\n stream: LLMStream,\n): AsyncIterable<string> {\n const startTime = Date.now();\n let firstFrame = true;\n for await (const chunk of stream) {\n const content = chunk.choices[0]?.delta.content;\n if (!content) continue;\n\n if (firstFrame) {\n firstFrame = false;\n log()\n .child({ speechId, elapsed: Math.round(Date.now() - startTime) })\n .debug('received first LLM token');\n }\n yield content;\n }\n}\n\n/** This class is used to try to find the best time to validate the agent reply. */\nclass DeferredReplyValidation {\n // if the STT gives us punctuation, we can try to validate the reply faster.\n readonly PUNCTUATION = '.!?';\n readonly PUNCTUATION_REDUCE_FACTOR = 0.75;\n readonly LATE_TRANSCRIPT_TOLERANCE = 1.5; // late compared to end of speech\n\n #validateFunc: () => Promise<void>;\n #validatingPromise?: Promise<void>;\n #validatingFuture = new Future();\n #lastFinalTranscript = '';\n #lastRecvEndOfSpeechTime = 0;\n #speaking = false;\n #endOfSpeechDelay: number;\n #finalTranscriptDelay: number;\n\n constructor(validateFunc: () => Promise<void>, minEndpointingDelay: number) {\n this.#validateFunc = validateFunc;\n this.#endOfSpeechDelay = minEndpointingDelay;\n this.#finalTranscriptDelay = minEndpointingDelay;\n }\n\n get validating(): boolean {\n return !this.#validatingFuture.done;\n }\n\n onHumanFinalTranscript(transcript: string) {\n this.#lastFinalTranscript = transcript.trim();\n if (this.#speaking) return;\n\n const hasRecentEndOfSpeech =\n Date.now() - this.#lastRecvEndOfSpeechTime < this.LATE_TRANSCRIPT_TOLERANCE;\n let delay = hasRecentEndOfSpeech ? this.#endOfSpeechDelay : this.#finalTranscriptDelay;\n delay = this.#endWithPunctuation() ? delay * this.PUNCTUATION_REDUCE_FACTOR : 1;\n\n this.#run(delay);\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n onHumanStartOfSpeech(_: VADEvent) {\n this.#speaking = true;\n // TODO(nbsp):\n // if (this.validating) {\n // this.#validatingPromise.cancel()\n // }\n }\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n onHumanEndOfSpeech(_: VADEvent) {\n this.#speaking = false;\n this.#lastRecvEndOfSpeechTime = Date.now();\n\n if (this.#lastFinalTranscript) {\n const delay = this.#endWithPunctuation()\n ? this.#endOfSpeechDelay * this.PUNCTUATION_REDUCE_FACTOR\n : 1;\n this.#run(delay);\n }\n }\n\n // TODO(nbsp): aclose\n\n #endWithPunctuation(): boolean {\n return (\n this.#lastFinalTranscript.length > 0 &&\n this.PUNCTUATION.includes(this.#lastFinalTranscript[this.#lastFinalTranscript.length - 1]!)\n );\n }\n\n #resetStates() {\n this.#lastFinalTranscript = '';\n this.#lastRecvEndOfSpeechTime = 0;\n }\n\n #run(delay: number) {\n const runTask = async (delay: number) => {\n await new Promise((resolve) => setTimeout(resolve, delay));\n this.#resetStates();\n await this.#validateFunc();\n };\n\n this.#validatingFuture = new Future();\n this.#validatingPromise = runTask(delay);\n }\n}\n"],"mappings":"AAIA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AAEP,OAAO,kBAAkB;AAOzB,SAAS,iBAAiB;AAC1B,SAAS,aAAa,aAAa,gBAAgB;AACnD,SAAS,WAAW;AACpB,SAAmB,iBAAiB,wBAAwB;AAC5D;AAAA,EACE,qBAAqB;AAAA,EACrB,iBAAiB;AAAA,EACjB;AAAA,OACK;AAGP,SAAS,iBAAiB,wBAAwB;AAClD,SAAS,oBAAoB,oBAAoB,QAAQ,wBAAwB;AAGjF,SAAS,mBAAmB;AAC5B,SAAS,cAAc,yBAAyB;AAChD,SAAS,YAAY,uBAAuB;AAC5C,SAAS,oBAAoB;AAGtB,MAAM,wBAAwB;AAY9B,IAAK,WAAL,kBAAKA,cAAL;AACL,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AACA,EAAAA,oBAAA;AATU,SAAAA;AAAA,GAAA;AAwBL,MAAM,iBAAiB;AAAA,EAC5B;AAAA,EACA;AAAA,EACA,YAAY,oBAAI,IAAiB;AAAA,EACjC,OAAO;AAAA,EAEP,YAAY,OAA2B,WAAsB;AAC3D,SAAK,SAAS;AACd,SAAK,aAAa;AAClB,qBAAiB,WAAW;AAAA,EAC9B;AAAA,EAEA,OAAO,aAA+B;AACpC,WAAO,iBAAiB;AAAA,EAC1B;AAAA,EAEA,IAAI,QAA4B;AAC9B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,cAAc,KAAa,OAAY;AACrC,SAAK,UAAU,IAAI,KAAK,KAAK;AAAA,EAC/B;AAAA,EAEA,YAAY,KAAa,YAAiB,QAAW;AACnD,WAAO,KAAK,UAAU,IAAI,GAAG,KAAK;AAAA,EACpC;AAAA,EAEA,IAAI,YAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AACF;AAEA,MAAM,2BAA8C,CAClD,OACA,YACc;AACd,SAAO,MAAM,IAAI,KAAK,EAAE,SAAS,QAAQ,MAAM,OAAO,CAAC;AACzD;AAEA,MAAM,2BAA8C,CAElD,GACA,SACmC;AACnC,SAAO;AACT;AA6BA,MAAM,mCAA8D;AAAA,EAClE,mBAAmB;AAAA,EACnB,oBAAoB;AAAA,EACpB,0BAA0B;AAAA,EAC1B,mBAAmB,IAAI,uBAAuB;AAAA,EAC9C,eAAe,IAAI,mBAAmB,KAAK;AAAA,EAC3C;AACF;AAuCA,MAAM,oBAAgC;AAAA,EACpC,SAAS,IAAI,YAAY;AAAA,EACzB,oBAAoB;AAAA,EACpB,yBAAyB;AAAA,EACzB,mBAAmB;AAAA,EACnB,qBAAqB;AAAA,EACrB,sBAAsB;AAAA,EACtB,qBAAqB;AAAA,EACrB,mBAAmB;AAAA,EACnB,mBAAmB;AAAA,EACnB,eAAe;AACjB;AAGO,MAAM,2BAA4B,aAAsD;AAAA;AAAA,EAEpF,6BAA6B;AAAA,EACtC,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EAElE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,qBAAqB,IAAI,OAAO;AAAA,EAChC;AAAA,EACA;AAAA,EACA;AAAA,EACA,mBAAmB;AAAA,EACnB,0BAA0B;AAAA,EAC1B,mBAAmB,IAAI,OAAO;AAAA,EAC9B,eAAe,IAAI,mBAA4E;AAAA,EAC/F;AAAA,EACA;AAAA,EACA,WAAW;AAAA,EACX;AAAA,EACA,eAAkD;AAAA,EAClD;AAAA,EACA,UAAU,IAAI;AAAA,EACd;AAAA,EAEA,YAEE,KAEA,KAEA,KAEA,KAEA,OAA4B,mBAC5B;AACA,UAAM;AAEN,SAAK,QAAQ,EAAE,GAAG,mBAAmB,GAAG,KAAK;AAE7C,QAAI,CAAC,IAAI,aAAa,WAAW;AAC/B,YAAM,IAAI,iBAAiB,KAAK,GAAG;AAAA,IACrC;AAEA,QAAI,CAAC,IAAI,aAAa,WAAW;AAC/B,YAAM,IAAI,iBAAiB,KAAK,IAAI,uBAAuB,CAAC;AAAA,IAC9D;AAEA,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,OAAO;AAEZ,SAAK,sBAAsB,IAAI;AAAA,MAC7B,KAAK,yBAAyB,KAAK,IAAI;AAAA,MACvC,KAAK,MAAM;AAAA,IACb;AAAA,EACF;AAAA,EAEA,IAAI,SAAsC;AACxC,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,IAAI,OAAO,KAAsB;AAC/B,SAAK,MAAM,SAAS;AAAA,EACtB;AAAA,EAEA,IAAI,UAAuB;AACzB,WAAO,KAAK,MAAM;AAAA,EACpB;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,MAAW;AACb,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAEE,MAQA,cAAiD,MACjD;AACA,QAAI,KAAK,UAAU;AACjB,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AACA,SAAK,GAAG,UAAU,sBAAsB,CAACC,iBAAmC;AAE1E,UAAI,KAAK,cAAc;AACrB;AAAA,MACF;AACA,WAAK,iBAAiB,KAAK,MAAMA,aAAY,QAAQ;AAAA,IACvD,CAAC;AAED,SAAK,QAAQ;AACb,SAAK,eAAe;AAEpB,QAAI,aAAa;AACf,UAAI,OAAO,gBAAgB,UAAU;AACnC,aAAK,iBAAiB,WAAW;AAAA,MACnC,OAAO;AACL,aAAK,iBAAiB,YAAY,QAAQ;AAAA,MAC5C;AAAA,IACF;AAEA,SAAK,KAAK;AAAA,EACZ;AAAA;AAAA,EAGA,MAAM,IACJ,QACA,qBAAqB,MACrB,eAAe,MACf;AACA,UAAM,KAAK,mBAAmB;AAC9B,UAAM,YAAY,aAAa,sBAAsB,oBAAoB,YAAY;AACrF,UAAM,kBAAkB,KAAK,uBAAuB,UAAU,IAAI,MAAM;AACxE,cAAU,WAAW,QAAQ,eAAe;AAC5C,SAAK,qBAAqB,SAAS;AAAA,EACrC;AAAA,EAEA,aAAa,OAAmB,QAAQ,GAAG;AACzC,UAAM,UAAU,CAACC,WAA4C;AAC3D,aAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AApWpE;AAqWQ,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AACD,cAAM,IAAI,QAAQ,CAACC,aAAY,WAAWA,UAASD,MAAK,CAAC;AACzD,aAAI,UAAK,UAAL,mBAAY,aAAa;AAC3B,cAAI,CAAC,WAAW;AACd,oBAAM,UAAK,MAAM,qBAAX,mBAA6B,cAAc,EAAE,CAAC,qBAAqB,GAAG,MAAM;AAAA,UACpF;AAAA,QACF;AACA,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAEA,QAAI,KAAK,kBAAkB;AACzB,WAAK,iBAAiB,OAAO;AAAA,IAC/B;AAEA,SAAK,mBAAmB,QAAQ,KAAK;AAAA,EACvC;AAAA,EAEA,iBAAiB,qBAAmC;AAClD,QAAI,CAAC,KAAK,OAAO;AACf,WAAK,QAAQ,MAAM,iBAAiB;AACpC;AAAA,IACF;AAEA,SAAK,eAAe,KAAK,MAAM,mBAAmB,IAAI,mBAAmB,KAAK;AAC9E,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,6BAA6B,mBAAmB,YAAY;AAC/E;AAAA,IACF;AAEA,SAAK,cAAc,IAAI,WAAW,KAAK,OAAO,KAAK,MAAM,KAAK,MAAM,KAAK,YAAY;AACrF,SAAK,YAAY,GAAG,gBAAgB,iBAAiB,CAAC,UAAU;AAC9D,WAAK,KAAK,6BAA8B;AACxC,WAAK,oBAAoB,qBAAqB,KAAK;AAAA,IACrD,CAAC;AACD,SAAK,YAAY,GAAG,gBAAgB,oBAAoB,CAAC,UAAU;AACjE,UAAI,CAAC,KAAK,mBAAmB,MAAM;AACjC;AAAA,MACF;AACA,UAAI,CAAC,KAAK,cAAc;AACtB,cAAM,IAAI,MAAM,2BAA2B;AAAA,MAC7C;AAEA,UAAI,KAAK;AACT,UAAI,KAAK,MAAM,oBAAoB;AACjC,aAAK,KAAK,IAAI,GAAG,IAAI,MAAM,WAAW;AACtC,aAAK,aAAa,QAAQ,eAAe;AAAA,MAC3C;AAEA,UAAI,MAAM,kBAAkB,KAAK,MAAM,yBAAyB;AAC9D,aAAK,qBAAqB;AAAA,MAC5B;AAAA,IACF,CAAC;AACD,SAAK,YAAY,GAAG,gBAAgB,eAAe,CAAC,UAAU;AAC5D,WAAK,KAAK,6BAA8B;AACxC,WAAK,oBAAoB,mBAAmB,KAAK;AACjD,WAAK,uBAAuB,KAAK,IAAI;AAAA,IACvC,CAAC;AACD,SAAK,YAAY,GAAG,gBAAgB,oBAAoB,CAAC,UAAU;AACjE,WAAK,0BAA0B,MAAM,aAAc,CAAC,EAAE;AAAA,IACxD,CAAC;AACD,SAAK,YAAY,GAAG,gBAAgB,kBAAkB,CAAC,UAAU;AAC/D,YAAM,gBAAgB,MAAM,aAAc,CAAC,EAAE;AAC7C,UAAI,CAAC,cAAe;AAEpB,WAAK,QAAQ,MAAM,EAAE,gBAAgB,cAAc,CAAC,EAAE,MAAM,0BAA0B;AACtF,WAAK,qBAAqB,KAAK,mBAAmB,MAAM,MAAM;AAE9D,UACE,KAAK,MAAM,wBACV,CAAC,KAAK,kBAAkB,KAAK,eAAe,qBAC7C;AACA,aAAK,sBAAsB;AAAA,MAC7B;AAEA,WAAK,oBAAoB,uBAAuB,aAAa;AAE7D,YAAM,QAAQ,KAAK,MAAM,cAAc,cAAc,SAAS,aAAa;AAC3E,UAAI,MAAM,UAAU,GAAG;AAGrB,aAAK,qBAAqB;AAAA,MAC5B;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,OAAO;AA9bf;AA+bI,SAAK,aAAa,cAAc;AAChC,UAAM,cAAc,IAAI,YAAY,KAAK,KAAK,YAAY,KAAK,KAAK,WAAW;AAC/E,UAAM,QAAQ,gBAAgB,iBAAiB,mBAAmB,WAAW;AAC7E,SAAK,oBAAoB,QAAM,gBAAK,UAAL,mBAAY,qBAAZ,mBAA8B;AAAA,MAC3D;AAAA,MACA,IAAI,oBAAoB,EAAE,QAAQ,YAAY,kBAAkB,CAAC;AAAA;AAGnE,UAAM,eAAe,IAAI,aAAa,WAAW;AACjD,SAAK,eAAe,IAAI,YAAY,cAAc,KAAK,IAAI;AAE3D,iBAAa,GAAG,kBAAkB,iBAAiB,MAAM;AACvD,WAAK,KAAK,8BAA+B;AACzC,WAAK,aAAa,UAAU;AAAA,IAC9B,CAAC;AAED,iBAAa,GAAG,kBAAkB,iBAAiB,CAAC,MAAM;AACxD,WAAK,KAAK,8BAA+B;AACzC,WAAK,aAAa,WAAW;AAAA,IAC/B,CAAC;AAED,SAAK,mBAAmB,QAAQ;AAEhC,WAAO,MAAM;AACX,YAAM,KAAK,iBAAiB;AAC5B,uBAAiB,UAAU,KAAK,cAAc;AAC5C,YAAI,WAAW,mBAAmB,eAAgB;AAClD,aAAK,iBAAiB;AACtB,cAAM,KAAK,YAAY,MAAM;AAC7B,aAAK,iBAAiB;AAAA,MACxB;AACA,WAAK,mBAAmB,IAAI,OAAO;AAAA,IACrC;AAAA,EACF;AAAA,EAEA,wBAAwB;AAle1B;AAmeI,eAAK,uBAAL,mBAAyB;AACzB,QAAI,KAAK,eAAe,KAAK,YAAY,UAAU;AACjD,WAAK,aAAa,YAAY,GAAG;AAAA,IACnC;AAEA,SAAK,qBAAqB,aAAa;AAAA,MACrC,KAAK,MAAM;AAAA,MACX;AAAA,MACA,KAAK;AAAA,IACP;AACA,UAAM,YAAY,KAAK;AACvB,SAAK,kBAAkB,KAAK,sBAAsB,KAAK,iBAAiB,SAAS;AAAA,EACnF;AAAA,EAEA,sBACE,SACA,QAC0B;AAC1B,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,UAAI,YAAY;AAChB,eAAS,MAAM;AACb,oBAAY;AAAA,MACd,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,YAAM,YAAY,KAAK,QAAQ,KAAK;AACpC,YAAM,gBAAgB,KAAK;AAC3B,UAAI,iBAAiB,cAAc,aAAa;AAC9C,aACG,CAAC,cAAc,gBAAgB,cAAc,kBAC9C,CAAC,cAAc,iBACf;AAGA,oBAAU,SAAS;AAAA,YACjB,YAAY,OAAO;AAAA,cACjB,MAAM,cAAc,gBAAgB;AAAA,cACpC,MAAM,SAAS;AAAA,YACjB,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAEA,gBAAU,SAAS;AAAA,QACjB,YAAY,OAAO;AAAA,UACjB,MAAM,iCAAQ;AAAA,UACd,MAAM,SAAS;AAAA,QACjB,CAAC;AAAA,MACH;AAEA,UAAI,UAAW,SAAQ;AACvB,UAAI,YAAY,MAAM,KAAK,MAAM,kBAAkB,MAAM,SAAS;AAClE,UAAI,cAAc,OAAO;AACvB,yCAAQ;AACR;AAAA,MACF;AAEA,UAAI,UAAW,SAAQ;AAEvB,UAAI,EAAE,qBAAqB,YAAY;AACrC,oBAAa,MAAM,yBAAyB,MAAM,SAAS;AAAA,MAC7D;AAEA,UAAI,OAAQ,aAAa;AACvB;AAAA,MACF;AAEA,YAAM,kBAAkB,KAAK,uBAAuB,OAAQ,IAAI,SAAS;AACzE,aAAQ,WAAW,WAAW,eAAe;AAI7C,YAAM,UAAU,CAAC,CAAC,KAAK,uBACnB,KAAK,OAAO,KAAK,IAAI,IAAI,KAAK,wBAAwB,GAAI,IAAI,MAC9D;AAEJ,WAAK,QAAQ,MAAM,EAAE,UAAU,OAAQ,IAAI,QAAQ,CAAC,EAAE,MAAM,0BAA0B;AACtF,cAAQ;AAAA,IACV,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,YAAY,QAAsB;AACtC,QAAI;AACF,YAAM,OAAO,sBAAsB;AAAA,IACrC,QAAQ;AACN;AAAA,IACF;AACA,UAAM,KAAK,kBAAmB,oBAAoB;AAClD,UAAM,kBAAkB,OAAO;AAC/B,QAAI,gBAAgB,YAAa;AAEjC,UAAM,eAAe,OAAO;AAC5B,UAAM,aAAa,gBAAgB,KAAK;AACxC,UAAM,UAAU,WAAW,KAAK;AAEhC,UAAM,6BAA6B,MAAM;AACvC,UAAI,CAAC,gBAAgB,gBAAgB,eAAe,OAAO,cAAe;AAC1E,YAAME,gBACJ,OAAO,kBAAkB,aAAa,CAAC,CAAC,OAAO,OAAO,cAAc;AAKtE,UACE,OAAO,sBACP,CAACA,iBACD,WAAW,aAAa,KAAK,8BAC7B,CAAC,QAAQ,MACT;AACA;AAAA,MACF;AAEA,WAAK,QAAQ,MAAM,EAAE,gBAAgB,aAAa,CAAC,EAAE,MAAM,2BAA2B;AACtF,YAAM,UAAU,YAAY,OAAO,EAAE,MAAM,cAAc,MAAM,SAAS,KAAK,CAAC;AAC9E,WAAK,QAAQ,SAAS,KAAK,OAAO;AAClC,WAAK,KAAK,+BAAgC,OAAO;AAEjD,WAAK,mBAAmB,KAAK,iBAAiB,MAAM,aAAa,MAAM;AACvE,aAAO,kBAAkB;AAAA,IAC3B;AAGA,+BAA2B;AAE3B,WAAO,CAAC,QAAQ,MAAM;AACpB,YAAM,IAAI,QAAc,OAAO,YAAY;AACzC,mBAAW,SAAS,GAAG;AACvB,cAAM,QAAQ;AACd,gBAAQ;AAAA,MACV,CAAC;AACD,iCAA2B;AAC3B,UAAI,OAAO,YAAa;AAAA,IAC1B;AACA,+BAA2B;AAE3B,UAAM,gBAAgB,OAAO,gBAAgB;AAC7C,UAAM,eAAe,OAAO,kBAAkB,aAAa,CAAC,CAAC,OAAO,OAAO,cAAc;AACzF,UAAM,qBAAqB,CAAC;AAC5B,QAAI,cAAc,OAAO;AAIzB,QAAI,gBAAgB,CAAC,aAAa;AAChC,UAAI,CAAC,gBAAgB,CAAC,OAAO,eAAe;AAC1C,cAAM,IAAI,MAAM,2DAA2D;AAAA,MAC7E;AACA,YAAM,YAAY,OAAO;AACzB,UAAI,mBAAmB,UAAU;AAEjC,eAAS,IAAI,GAAG,IAAI,KAAK,MAAM,sBAAsB,KAAK;AACxD,aAAK,KAAK,kCAAmC,gBAAgB;AAC7D,cAAM,cAAkC,CAAC;AACzC,mBAAW,QAAQ,kBAAkB;AACnC,gBAAM,OAAO,KAAK,KAAK,QAAQ,KAAK,MAAM,EAAE;AAAA,YAC1C,CAAC,YAAY,EAAE,MAAM,KAAK,MAAM,YAAY,KAAK,YAAY,OAAO;AAAA,YACpE,CAAC,WAAW,EAAE,MAAM,KAAK,MAAM,YAAY,KAAK,YAAY,MAAM;AAAA,UACpE;AACA,sBAAY,KAAK,EAAE,GAAG,MAAM,KAAK,CAAC;AAClC,eAAK,QACF,MAAM,EAAE,UAAU,KAAK,MAAM,UAAU,OAAO,GAAG,CAAC,EAClD,MAAM,uBAAuB;AAChC,cAAI;AACF,kBAAM;AAAA,UACR,QAAQ;AACN,iBAAK,QACF,MAAM,EAAE,UAAU,KAAK,MAAM,UAAU,OAAO,GAAG,CAAC,EAClD,MAAM,6BAA6B;AAAA,UACxC;AAAA,QACF;AAEA,cAAM,gBAAgB,CAAC;AACvB,cAAM,mBAAmB,CAAC;AAC1B,mBAAW,OAAO,aAAa;AAE7B,gBAAM,OAAO,MAAM,IAAI;AACvB,cAAI,CAAC,QAAQ,KAAK,WAAW,OAAW;AACxC,wBAAc,KAAK,GAAG;AACtB,2BAAiB,KAAK,YAAY,6BAA6B,IAAI,CAAC;AAAA,QACtE;AAEA,YAAI,CAAC,cAAc,OAAQ;AAG3B,2BAAmB,KAAK,YAAY,gBAAgB,eAAe,aAAa,CAAC;AACjF,2BAAmB,KAAK,GAAG,gBAAgB;AAE3C,cAAM,UAAU,OAAO,OAAO,QAAQ,KAAK;AAC3C,gBAAQ,SAAS,KAAK,GAAG,kBAAkB;AAE3C,cAAM,kBAAkB,KAAK,IAAI,KAAK;AAAA,UACpC;AAAA,UACA,QAAQ,KAAK;AAAA,QACf,CAAC;AACD,cAAM,kBAAkB,KAAK,uBAAuB,OAAO,IAAI,eAAe;AAE9E,eAAO,kBAAkB;AACzB,cAAMC,cAAa,gBAAgB,KAAK;AACxC,cAAMA,YAAW,KAAK,EAAE;AAExB,sBAAc,gBAAgB;AAC9B,2BAAmB,gBAAgB;AAEnC,aAAK,KAAK,iCAAkC,WAAW;AACvD,YAAI,CAAC,iBAAkB;AAAA,MACzB;AAAA,IACF;AAEA,QAAI,OAAO,iBAAiB,CAAC,gBAAgB,OAAO,gBAAgB;AAClE,WAAK,QAAQ,SAAS,KAAK,GAAG,kBAAkB;AAChD,UAAI,aAAa;AACf,wBAAgB;AAAA,MAClB;AAEA,YAAM,MAAM,YAAY,OAAO,EAAE,MAAM,eAAe,MAAM,SAAS,UAAU,CAAC;AAChF,WAAK,QAAQ,SAAS,KAAK,GAAG;AAE9B,aAAO,oBAAoB;AAC3B,UAAI,aAAa;AACf,aAAK,KAAK,kCAAmC,GAAG;AAAA,MAClD,OAAO;AACL,aAAK,KAAK,gCAAiC,GAAG;AAAA,MAChD;AAEA,WAAK,QACF,MAAM;AAAA,QACL,iBAAiB;AAAA,QACjB;AAAA,QACA,UAAU,OAAO;AAAA,MACnB,CAAC,EACA,MAAM,wBAAwB;AAAA,IACnC;AAAA,EACF;AAAA,EAEA,uBACE,UACA,QACiB;AACjB,QAAI,CAAC,KAAK,cAAc;AACtB,YAAM,IAAI,MAAM,+CAA+C;AAAA,IACjE;AAEA,QAAI,kBAAkB,WAAW;AAC/B,eAAS,0BAA0B,UAAU,MAAM;AAAA,IACrD;AAEA,UAAM,WAAW;AACjB,QAAI,EAAE,OAAO,WAAW,WAAW;AAAA,IAEnC;AAEA,UAAM,YAAY,KAAK,MAAM,kBAAkB,MAAM,QAAQ;AAC7D,QAAI,CAAC,WAAW;AACd,YAAM,IAAI,MAAM,+DAA+D;AAAA,IACjF;AAEA,WAAO,KAAK,aAAa,WAAW,UAAU,SAAS;AAAA,EACzD;AAAA,EAEA,MAAM,2BAA2B;AAC/B,QAAI,KAAK,kBAAkB,CAAC,KAAK,eAAe,oBAAoB;AAClE,WAAK,QACF,MAAM,EAAE,UAAU,KAAK,eAAe,GAAG,CAAC,EAC1C,MAAM,yEAAyE;AAClF;AAAA,IACF;AAEA,QAAI,CAAC,KAAK,oBAAoB;AAC5B,UAAI,KAAK,MAAM,uBAAuB,CAAC,KAAK,kBAAkB;AAC5D;AAAA,MACF;AACA,WAAK,sBAAsB;AAAA,IAC7B;AAEA,QAAI,CAAC,KAAK,oBAAoB;AAC5B,YAAM,IAAI,MAAM,kCAAkC;AAAA,IACpD;AAIA,QAAI,KAAK,iBAAiB,MAAM;AAC9B,uBAAiB,UAAU,KAAK,cAAc;AAC5C,YAAI,WAAW,mBAAmB,eAAgB;AAClD,YAAI,CAAC,OAAO,QAAS;AACrB,YAAI,OAAO,mBAAoB,QAAO,UAAU;AAAA,MAClD;AAAA,IACF;AAEA,SAAK,QAAQ,MAAM,EAAE,UAAU,KAAK,mBAAmB,GAAG,CAAC,EAAE,MAAM,uBAAuB;AAE1F,SAAK,qBAAqB,KAAK,kBAAkB;AACjD,SAAK,qBAAqB;AAC1B,SAAK,0BAA0B;AAAA,EACjC;AAAA,EAEA,uBAAuB;AACrB,QACE,CAAC,KAAK,kBACN,CAAC,KAAK,eAAe,sBACrB,KAAK,eAAe,aACpB;AACA;AAAA,IACF;AAEA,QAAI,KAAK,MAAM,sBAAsB,GAAG;AAGtC,YAAM,eAAe,KAAK,MAAM,cAAc,cAAc;AAAA,QAC1D,KAAK;AAAA,MACP;AACA,UAAI,aAAa,SAAS,KAAK,MAAM,mBAAmB;AACtD;AAAA,MACF;AAAA,IACF;AACA,SAAK,eAAe,UAAU;AAAA,EAChC;AAAA,EAEA,qBAAqB,QAAsB;AACzC,SAAK,aAAa,IAAI,MAAM;AAC5B,SAAK,aAAa,IAAI,mBAAmB,cAAc;AACvD,SAAK,iBAAiB,QAAQ;AAAA,EAChC;AAAA;AAAA,EAGA,MAAM,QAAQ;AAzyBhB;AA0yBI,QAAI,CAAC,KAAK,UAAU;AAClB;AAAA,IACF;AAEA,eAAK,UAAL,mBAAY,mBAAmB,UAAU;AAAA,EAE3C;AACF;AAEA,gBAAgB,0BACd,UACA,QACuB;AAtzBzB;AAuzBE,QAAM,YAAY,KAAK,IAAI;AAC3B,MAAI,aAAa;AACjB,mBAAiB,SAAS,QAAQ;AAChC,UAAM,WAAU,WAAM,QAAQ,CAAC,MAAf,mBAAkB,MAAM;AACxC,QAAI,CAAC,QAAS;AAEd,QAAI,YAAY;AACd,mBAAa;AACb,UAAI,EACD,MAAM,EAAE,UAAU,SAAS,KAAK,MAAM,KAAK,IAAI,IAAI,SAAS,EAAE,CAAC,EAC/D,MAAM,0BAA0B;AAAA,IACrC;AACA,UAAM;AAAA,EACR;AACF;AAGA,MAAM,wBAAwB;AAAA;AAAA,EAEnB,cAAc;AAAA,EACd,4BAA4B;AAAA,EAC5B,4BAA4B;AAAA;AAAA,EAErC;AAAA,EACA;AAAA,EACA,oBAAoB,IAAI,OAAO;AAAA,EAC/B,uBAAuB;AAAA,EACvB,2BAA2B;AAAA,EAC3B,YAAY;AAAA,EACZ;AAAA,EACA;AAAA,EAEA,YAAY,cAAmC,qBAA6B;AAC1E,SAAK,gBAAgB;AACrB,SAAK,oBAAoB;AACzB,SAAK,wBAAwB;AAAA,EAC/B;AAAA,EAEA,IAAI,aAAsB;AACxB,WAAO,CAAC,KAAK,kBAAkB;AAAA,EACjC;AAAA,EAEA,uBAAuB,YAAoB;AACzC,SAAK,uBAAuB,WAAW,KAAK;AAC5C,QAAI,KAAK,UAAW;AAEpB,UAAM,uBACJ,KAAK,IAAI,IAAI,KAAK,2BAA2B,KAAK;AACpD,QAAI,QAAQ,uBAAuB,KAAK,oBAAoB,KAAK;AACjE,YAAQ,KAAK,oBAAoB,IAAI,QAAQ,KAAK,4BAA4B;AAE9E,SAAK,KAAK,KAAK;AAAA,EACjB;AAAA;AAAA,EAGA,qBAAqB,GAAa;AAChC,SAAK,YAAY;AAAA,EAKnB;AAAA;AAAA,EAGA,mBAAmB,GAAa;AAC9B,SAAK,YAAY;AACjB,SAAK,2BAA2B,KAAK,IAAI;AAEzC,QAAI,KAAK,sBAAsB;AAC7B,YAAM,QAAQ,KAAK,oBAAoB,IACnC,KAAK,oBAAoB,KAAK,4BAC9B;AACJ,WAAK,KAAK,KAAK;AAAA,IACjB;AAAA,EACF;AAAA;AAAA,EAIA,sBAA+B;AAC7B,WACE,KAAK,qBAAqB,SAAS,KACnC,KAAK,YAAY,SAAS,KAAK,qBAAqB,KAAK,qBAAqB,SAAS,CAAC,CAAE;AAAA,EAE9F;AAAA,EAEA,eAAe;AACb,SAAK,uBAAuB;AAC5B,SAAK,2BAA2B;AAAA,EAClC;AAAA,EAEA,KAAK,OAAe;AAClB,UAAM,UAAU,OAAOH,WAAkB;AACvC,YAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAASA,MAAK,CAAC;AACzD,WAAK,aAAa;AAClB,YAAM,KAAK,cAAc;AAAA,IAC3B;AAEA,SAAK,oBAAoB,IAAI,OAAO;AACpC,SAAK,qBAAqB,QAAQ,KAAK;AAAA,EACzC;AACF;","names":["VPAEvent","participant","delay","resolve","isUsingTools","playHandle"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@livekit/agents",
3
- "version": "0.5.1",
3
+ "version": "0.5.2",
4
4
  "description": "LiveKit Agents - Node.js",
5
5
  "main": "dist/index.js",
6
6
  "require": "dist/index.cjs",
@@ -13,6 +13,7 @@ export class SynthesisHandle {
13
13
  static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');
14
14
 
15
15
  #speechId: string;
16
+ text?: string;
16
17
  ttsSource: SpeechSource;
17
18
  #agentPlayout: AgentPlayout;
18
19
  tts: TTS;
@@ -97,7 +98,7 @@ export class AgentOutput {
97
98
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
98
99
  return new CancellablePromise(async (resolve, _, onCancel) => {
99
100
  const ttsSource = await handle.ttsSource;
100
- let task: CancellablePromise<void>;
101
+ let task: CancellablePromise<string>;
101
102
  if (typeof ttsSource === 'string') {
102
103
  task = stringSynthesisTask(ttsSource, handle);
103
104
  } else {
@@ -113,6 +114,10 @@ export class AgentOutput {
113
114
  } finally {
114
115
  if (handle.intFut.done) {
115
116
  gracefullyCancel(task);
117
+ } else {
118
+ task.then((text) => {
119
+ handle.text = text;
120
+ });
116
121
  }
117
122
  }
118
123
 
@@ -121,9 +126,9 @@ export class AgentOutput {
121
126
  }
122
127
  }
123
128
 
124
- const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<void> => {
129
+ const stringSynthesisTask = (text: string, handle: SynthesisHandle): CancellablePromise<string> => {
125
130
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
126
- return new CancellablePromise<void>(async (resolve, _, onCancel) => {
131
+ return new CancellablePromise(async (resolve, _, onCancel) => {
127
132
  let cancelled = false;
128
133
  onCancel(() => {
129
134
  cancelled = true;
@@ -141,16 +146,17 @@ const stringSynthesisTask = (text: string, handle: SynthesisHandle): Cancellable
141
146
  }
142
147
  handle.queue.put(SynthesisHandle.FLUSH_SENTINEL);
143
148
 
144
- resolve();
149
+ resolve(text);
145
150
  });
146
151
  };
147
152
 
148
153
  const streamSynthesisTask = (
149
154
  stream: AsyncIterable<string>,
150
155
  handle: SynthesisHandle,
151
- ): CancellablePromise<void> => {
156
+ ): CancellablePromise<string> => {
152
157
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
153
- return new CancellablePromise<void>(async (resolve, _, onCancel) => {
158
+ return new CancellablePromise(async (resolve, _, onCancel) => {
159
+ let fullText = '';
154
160
  let cancelled = false;
155
161
  onCancel(() => {
156
162
  cancelled = true;
@@ -170,12 +176,13 @@ const streamSynthesisTask = (
170
176
  readGeneratedAudio();
171
177
 
172
178
  for await (const text of stream) {
179
+ fullText += text;
173
180
  if (cancelled) break;
174
181
  ttsStream.pushText(text);
175
182
  }
176
183
  ttsStream.flush();
177
184
  ttsStream.endInput();
178
185
 
179
- resolve();
186
+ resolve(fullText);
180
187
  });
181
188
  };
@@ -520,8 +520,7 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
520
520
  // add it to the chat context for this new reply synthesis
521
521
  copiedCtx.messages.push(
522
522
  ChatMessage.create({
523
- // TODO(nbsp): uhhh unsure where to get the played text here
524
- // text: playingSpeech.synthesisHandle.(theres no ttsForwarder here)
523
+ text: playingSpeech.synthesisHandle.text,
525
524
  role: ChatRole.ASSISTANT,
526
525
  }),
527
526
  );
@@ -620,8 +619,7 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
620
619
  }
621
620
  commitUserQuestionIfNeeded();
622
621
 
623
- // TODO(nbsp): what goes here
624
- let collectedText = '';
622
+ const collectedText = handle.synthesisHandle.text;
625
623
  const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;
626
624
  const extraToolsMessages = []; // additional messages from the functions to add to the context
627
625
  let interrupted = handle.interrupted;
@@ -685,8 +683,6 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter<
685
683
  const playHandle = answerSynthesis.play();
686
684
  await playHandle.join().await;
687
685
 
688
- // TODO(nbsp): what text goes here
689
- collectedText = '';
690
686
  interrupted = answerSynthesis.interrupted;
691
687
  newFunctionCalls = answerLLMStream.functionCalls;
692
688