@livekit/agents 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.ts +2 -1
  4. package/dist/index.d.ts.map +1 -1
  5. package/dist/index.js +2 -0
  6. package/dist/index.js.map +1 -1
  7. package/dist/llm/index.cjs +2 -0
  8. package/dist/llm/index.cjs.map +1 -1
  9. package/dist/llm/index.d.ts +1 -1
  10. package/dist/llm/index.d.ts.map +1 -1
  11. package/dist/llm/index.js +2 -0
  12. package/dist/llm/index.js.map +1 -1
  13. package/dist/llm/llm.cjs +47 -3
  14. package/dist/llm/llm.cjs.map +1 -1
  15. package/dist/llm/llm.d.ts +15 -2
  16. package/dist/llm/llm.d.ts.map +1 -1
  17. package/dist/llm/llm.js +46 -3
  18. package/dist/llm/llm.js.map +1 -1
  19. package/dist/metrics/base.cjs +44 -0
  20. package/dist/metrics/base.cjs.map +1 -0
  21. package/dist/metrics/base.d.ts +96 -0
  22. package/dist/metrics/base.d.ts.map +1 -0
  23. package/dist/metrics/base.js +20 -0
  24. package/dist/metrics/base.js.map +1 -0
  25. package/dist/metrics/index.cjs +35 -0
  26. package/dist/metrics/index.cjs.map +1 -0
  27. package/dist/metrics/index.d.ts +5 -0
  28. package/dist/metrics/index.d.ts.map +1 -0
  29. package/dist/metrics/index.js +9 -0
  30. package/dist/metrics/index.js.map +1 -0
  31. package/dist/metrics/usage_collector.cjs +53 -0
  32. package/dist/metrics/usage_collector.cjs.map +1 -0
  33. package/dist/metrics/usage_collector.d.ts +14 -0
  34. package/dist/metrics/usage_collector.d.ts.map +1 -0
  35. package/dist/metrics/usage_collector.js +29 -0
  36. package/dist/metrics/usage_collector.js.map +1 -0
  37. package/dist/metrics/utils.cjs +104 -0
  38. package/dist/metrics/utils.cjs.map +1 -0
  39. package/dist/metrics/utils.d.ts +10 -0
  40. package/dist/metrics/utils.d.ts.map +1 -0
  41. package/dist/metrics/utils.js +73 -0
  42. package/dist/metrics/utils.js.map +1 -0
  43. package/dist/multimodal/multimodal_agent.cjs +7 -13
  44. package/dist/multimodal/multimodal_agent.cjs.map +1 -1
  45. package/dist/multimodal/multimodal_agent.d.ts +1 -4
  46. package/dist/multimodal/multimodal_agent.d.ts.map +1 -1
  47. package/dist/multimodal/multimodal_agent.js +7 -13
  48. package/dist/multimodal/multimodal_agent.js.map +1 -1
  49. package/dist/pipeline/agent_output.cjs +9 -2
  50. package/dist/pipeline/agent_output.cjs.map +1 -1
  51. package/dist/pipeline/agent_output.d.ts +1 -0
  52. package/dist/pipeline/agent_output.d.ts.map +1 -1
  53. package/dist/pipeline/agent_output.js +9 -2
  54. package/dist/pipeline/agent_output.js.map +1 -1
  55. package/dist/pipeline/index.cjs +2 -0
  56. package/dist/pipeline/index.cjs.map +1 -1
  57. package/dist/pipeline/index.d.ts +1 -1
  58. package/dist/pipeline/index.d.ts.map +1 -1
  59. package/dist/pipeline/index.js +3 -1
  60. package/dist/pipeline/index.js.map +1 -1
  61. package/dist/pipeline/pipeline_agent.cjs +168 -70
  62. package/dist/pipeline/pipeline_agent.cjs.map +1 -1
  63. package/dist/pipeline/pipeline_agent.d.ts +10 -4
  64. package/dist/pipeline/pipeline_agent.d.ts.map +1 -1
  65. package/dist/pipeline/pipeline_agent.js +171 -73
  66. package/dist/pipeline/pipeline_agent.js.map +1 -1
  67. package/dist/pipeline/speech_handle.cjs +49 -1
  68. package/dist/pipeline/speech_handle.cjs.map +1 -1
  69. package/dist/pipeline/speech_handle.d.ts +12 -2
  70. package/dist/pipeline/speech_handle.d.ts.map +1 -1
  71. package/dist/pipeline/speech_handle.js +50 -2
  72. package/dist/pipeline/speech_handle.js.map +1 -1
  73. package/dist/stt/index.cjs.map +1 -1
  74. package/dist/stt/index.d.ts +1 -1
  75. package/dist/stt/index.d.ts.map +1 -1
  76. package/dist/stt/index.js.map +1 -1
  77. package/dist/stt/stream_adapter.cjs +15 -5
  78. package/dist/stt/stream_adapter.cjs.map +1 -1
  79. package/dist/stt/stream_adapter.d.ts +4 -1
  80. package/dist/stt/stream_adapter.d.ts.map +1 -1
  81. package/dist/stt/stream_adapter.js +15 -5
  82. package/dist/stt/stream_adapter.js.map +1 -1
  83. package/dist/stt/stt.cjs +46 -2
  84. package/dist/stt/stt.cjs.map +1 -1
  85. package/dist/stt/stt.d.ts +25 -3
  86. package/dist/stt/stt.d.ts.map +1 -1
  87. package/dist/stt/stt.js +46 -2
  88. package/dist/stt/stt.js.map +1 -1
  89. package/dist/tts/index.cjs +4 -2
  90. package/dist/tts/index.cjs.map +1 -1
  91. package/dist/tts/index.d.ts +1 -1
  92. package/dist/tts/index.d.ts.map +1 -1
  93. package/dist/tts/index.js +3 -1
  94. package/dist/tts/index.js.map +1 -1
  95. package/dist/tts/stream_adapter.cjs +14 -3
  96. package/dist/tts/stream_adapter.cjs.map +1 -1
  97. package/dist/tts/stream_adapter.d.ts +3 -0
  98. package/dist/tts/stream_adapter.d.ts.map +1 -1
  99. package/dist/tts/stream_adapter.js +15 -4
  100. package/dist/tts/stream_adapter.js.map +1 -1
  101. package/dist/tts/tts.cjs +109 -6
  102. package/dist/tts/tts.cjs.map +1 -1
  103. package/dist/tts/tts.d.ts +24 -1
  104. package/dist/tts/tts.d.ts.map +1 -1
  105. package/dist/tts/tts.js +107 -5
  106. package/dist/tts/tts.js.map +1 -1
  107. package/dist/vad.cjs +43 -2
  108. package/dist/vad.cjs.map +1 -1
  109. package/dist/vad.d.ts +21 -4
  110. package/dist/vad.d.ts.map +1 -1
  111. package/dist/vad.js +43 -2
  112. package/dist/vad.js.map +1 -1
  113. package/package.json +1 -1
  114. package/src/index.ts +2 -1
  115. package/src/llm/index.ts +2 -0
  116. package/src/llm/llm.ts +55 -3
  117. package/src/metrics/base.ts +127 -0
  118. package/src/metrics/index.ts +20 -0
  119. package/src/metrics/usage_collector.ts +40 -0
  120. package/src/metrics/utils.ts +100 -0
  121. package/src/multimodal/multimodal_agent.ts +12 -17
  122. package/src/pipeline/agent_output.ts +14 -7
  123. package/src/pipeline/index.ts +1 -1
  124. package/src/pipeline/pipeline_agent.ts +210 -95
  125. package/src/pipeline/speech_handle.ts +67 -2
  126. package/src/stt/index.ts +2 -0
  127. package/src/stt/stream_adapter.ts +17 -5
  128. package/src/stt/stt.ts +67 -3
  129. package/src/tts/index.ts +2 -0
  130. package/src/tts/stream_adapter.ts +17 -4
  131. package/src/tts/tts.ts +127 -4
  132. package/src/vad.ts +61 -4
@@ -3,11 +3,13 @@ import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
3
3
  import type { CallableFunctionResult, FunctionCallInfo, FunctionContext, LLM } from '../llm/index.js';
4
4
  import { LLMStream } from '../llm/index.js';
5
5
  import { ChatContext, ChatMessage } from '../llm/index.js';
6
+ import type { AgentMetrics } from '../metrics/base.js';
6
7
  import { type STT } from '../stt/index.js';
7
8
  import type { SentenceTokenizer, WordTokenizer } from '../tokenize/tokenizer.js';
8
9
  import type { TTS } from '../tts/index.js';
9
- import type { VAD } from '../vad.js';
10
+ import { type VAD } from '../vad.js';
10
11
  import type { SpeechSource } from './agent_output.js';
12
+ import { SpeechHandle } from './speech_handle.js';
11
13
  export type AgentState = 'initializing' | 'thinking' | 'listening' | 'speaking';
12
14
  export declare const AGENT_STATE_ATTRIBUTE = "lk.agent.state";
13
15
  export type BeforeLLMCallback = (agent: VoicePipelineAgent, chatCtx: ChatContext) => LLMStream | false | void | Promise<LLMStream | false | void>;
@@ -21,7 +23,8 @@ export declare enum VPAEvent {
21
23
  AGENT_SPEECH_COMMITTED = 5,
22
24
  AGENT_SPEECH_INTERRUPTED = 6,
23
25
  FUNCTION_CALLS_COLLECTED = 7,
24
- FUNCTION_CALLS_FINISHED = 8
26
+ FUNCTION_CALLS_FINISHED = 8,
27
+ METRICS_COLLECTED = 9
25
28
  }
26
29
  export type VPACallbacks = {
27
30
  [VPAEvent.USER_STARTED_SPEAKING]: () => void;
@@ -33,6 +36,7 @@ export type VPACallbacks = {
33
36
  [VPAEvent.AGENT_SPEECH_INTERRUPTED]: (msg: ChatMessage) => void;
34
37
  [VPAEvent.FUNCTION_CALLS_COLLECTED]: (funcs: FunctionCallInfo[]) => void;
35
38
  [VPAEvent.FUNCTION_CALLS_FINISHED]: (funcs: CallableFunctionResult[]) => void;
39
+ [VPAEvent.METRICS_COLLECTED]: (metrics: AgentMetrics) => void;
36
40
  };
37
41
  export declare class AgentCallContext {
38
42
  #private;
@@ -42,6 +46,8 @@ export declare class AgentCallContext {
42
46
  storeMetadata(key: string, value: any): void;
43
47
  getMetadata(key: string, orDefault?: any): any;
44
48
  get llmStream(): LLMStream;
49
+ get extraChatMessages(): ChatMessage[];
50
+ addExtraChatMessage(message: ChatMessage): void;
45
51
  }
46
52
  export interface AgentTranscriptionOptions {
47
53
  /** Whether to forward the user transcription to the client */
@@ -82,7 +88,7 @@ export interface VPAOptions {
82
88
  interruptMinWords: number;
83
89
  /** Delay to wait before considering the user speech done. */
84
90
  minEndpointingDelay: number;
85
- maxRecursiveFncCalls: number;
91
+ maxNestedFncCalls: number;
86
92
  preemptiveSynthesis: boolean;
87
93
  beforeLLMCallback: BeforeLLMCallback;
88
94
  beforeTTSCallback: BeforeTTSCallback;
@@ -127,7 +133,7 @@ export declare class VoicePipelineAgent extends VoicePipelineAgent_base {
127
133
  */
128
134
  participant?: RemoteParticipant | string | null): void;
129
135
  /** Play a speech source through the voice assistant. */
130
- say(source: string | LLMStream | AsyncIterable<string>, allowInterruptions?: boolean, addToChatCtx?: boolean): Promise<void>;
136
+ say(source: string | LLMStream | AsyncIterable<string>, allowInterruptions?: boolean, addToChatCtx?: boolean): Promise<SpeechHandle>;
131
137
  /** Close the voice assistant. */
132
138
  close(): Promise<void>;
133
139
  }
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline_agent.d.ts","sourceRoot":"","sources":["../../src/pipeline/pipeline_agent.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAyB,iBAAiB,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAQxF,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EACV,sBAAsB,EACtB,gBAAgB,EAChB,eAAe,EACf,GAAG,EACJ,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAC5C,OAAO,EAAE,WAAW,EAAE,WAAW,EAAY,MAAM,iBAAiB,CAAC;AAErE,OAAO,EAAE,KAAK,GAAG,EAAqC,MAAM,iBAAiB,CAAC;AAM9E,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACjF,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAG3C,OAAO,KAAK,EAAE,GAAG,EAAY,MAAM,WAAW,CAAC;AAC/C,OAAO,KAAK,EAAE,YAAY,EAAmB,MAAM,mBAAmB,CAAC;AAMvE,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;AAChF,eAAO,MAAM,qBAAqB,mBAAmB,CAAC;AAEtD,MAAM,MAAM,iBAAiB,GAAG,CAC9B,KAAK,EAAE,kBAAkB,EACzB,OAAO,EAAE,WAAW,KACjB,SAAS,GAAG,KAAK,GAAG,IAAI,GAAG,OAAO,CAAC,SAAS,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC;AAElE,MAAM,MAAM,iBAAiB,GAAG,CAC9B,KAAK,EAAE,kBAAkB,EACzB,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,KACnC,YAAY,CAAC;AAElB,oBAAY,QAAQ;IAClB,qBAAqB,IAAA;IACrB,qBAAqB,IAAA;IACrB,sBAAsB,IAAA;IACtB,sBAAsB,IAAA;IACtB,qBAAqB,IAAA;IACrB,sBAAsB,IAAA;IACtB,wBAAwB,IAAA;IACxB,wBAAwB,IAAA;IACxB,uBAAuB,IAAA;CACxB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7C,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7C,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC9C,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC9C,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7D,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAC9D,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAChE,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,EAAE,KAAK,IAAI,CAAC;IACzE,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,KAAK,IAAI,CAAC;CAC/E,CAAC;AAEF,qBAAa,gBAAgB;;gBAMf,KAAK,EAAE,kBAAkB,EAAE,SAAS,EAAE,SAAS;IAM3D,MAAM,CAAC,UAAU,IAAI,gBAAgB;IAIrC,IAAI,KAAK,IAAI,kBAAkB,CAE9B;IAED,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG;IAIrC,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,GAAE,GAAe;IAInD,IAAI,SAAS,IAAI,SAAS,CAEzB;CACF;AAiBD,MAAM,WAAW,yBAAyB;IACxC,8DAA8D;IAC9D,iBAAiB,EAAE,OAAO,CAAC;IAC3B,+DAA+D;IAC/D,kBAAkB,EAAE,OAAO,CAAC;IAC5B;;;OAGG;IACH,wBAAwB,EAAE,MAAM,CAAC;IACjC;;;OAGG;IACH,iBAAiB,EAAE,iBAAiB,CAAC;IACrC;;;OAGG;IACH,aAAa,EAAE,aAAa,CAAC;IAC7B;;;OAGG;IACH,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CAC3C;AAWD,MAAM,WAAW,UAAU;IACzB,sCAAsC;IACtC,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,0CAA0C;IAC1C,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB,4DAA4D;IAC5D,kBAAkB,EAAE,OAAO,CAAC;IAC5B,+DAA+D;IAC/D,uBAAuB,EAAE,MAAM,CAAC;IAChC,sFAAsF;IACtF,iBAAiB,EAAE,MAAM,CAAC;IAC1B,6DAA6D;IAC7D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,oBAAoB,EAAE,MAAM,CAAC;IAE7B,mBAAmB,EAAE,OAAO,CAAC;IAS7B,iBAAiB,EAAE,iBAAiB,CAAC;IAQrC,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,2CAA2C;IAC3C,aAAa,EAAE,yBAAyB,CAAC;CAC1C;iDAgBkE,aAAa,YAAY,CAAC;AAD7F,+DAA+D;AAC/D,qBAAa,kBAAmB,SAAQ,uBAAsD;;IAC5F,mFAAmF;IACnF,QAAQ,CAAC,0BAA0B,OAAO;IAC1C,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;;IA2BlE,yCAAyC;IACzC,GAAG,EAAE,GAAG;IACR,+BAA+B;IAC/B,GAAG,EAAE,GAAG;IACR,qCAAqC;IACrC,GAAG,EAAE,GAAG;IACR,+BAA+B;IAC/B,GAAG,EAAE,GAAG;IACR,6CAA6C;IAC7C,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAyB/C,IAAI,MAAM,IAAI,eAAe,GAAG,SAAS,CAExC;IAED,IAAI,MAAM,CAAC,GAAG,EAAE,eAAe,EAE9B;IAED,IAAI,OAAO,IAAI,WAAW,CAEzB;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,iCAAiC;IACjC,KAAK;IACH,8BAA8B;IAC9B,IAAI,EAAE,IAAI;IACV;;;;;;OAMG;IACH,WAAW,GAAE,iBAAiB,GAAG,MAAM,GAAG,IAAW;IA2BvD,wDAAwD;IAClD,GAAG,CACP,MAAM,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAAC,MAAM,CAAC,EAClD,kBAAkB,UAAO,EACzB,YAAY,UAAO;IAmdrB,iCAAiC;IAC3B,KAAK;CAQZ"}
1
+ {"version":3,"file":"pipeline_agent.d.ts","sourceRoot":"","sources":["../../src/pipeline/pipeline_agent.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAyB,iBAAiB,EAAE,IAAI,EAAE,MAAM,mBAAmB,CAAC;AAQxF,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAEhF,OAAO,KAAK,EACV,sBAAsB,EACtB,gBAAgB,EAChB,eAAe,EACf,GAAG,EACJ,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAY,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAE,WAAW,EAAY,MAAM,iBAAiB,CAAC;AAErE,OAAO,KAAK,EAAE,YAAY,EAAsB,MAAM,oBAAoB,CAAC;AAC3E,OAAO,EAAE,KAAK,GAAG,EAAsD,MAAM,iBAAiB,CAAC;AAM/F,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,0BAA0B,CAAC;AACjF,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,iBAAiB,CAAC;AAG3C,OAAO,EAAE,KAAK,GAAG,EAA+B,MAAM,WAAW,CAAC;AAClE,OAAO,KAAK,EAAE,YAAY,EAAmB,MAAM,mBAAmB,CAAC;AAIvE,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD,MAAM,MAAM,UAAU,GAAG,cAAc,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,CAAC;AAChF,eAAO,MAAM,qBAAqB,mBAAmB,CAAC;AAGtD,MAAM,MAAM,iBAAiB,GAAG,CAC9B,KAAK,EAAE,kBAAkB,EACzB,OAAO,EAAE,WAAW,KACjB,SAAS,GAAG,KAAK,GAAG,IAAI,GAAG,OAAO,CAAC,SAAS,GAAG,KAAK,GAAG,IAAI,CAAC,CAAC;AAElE,MAAM,MAAM,iBAAiB,GAAG,CAC9B,KAAK,EAAE,kBAAkB,EACzB,MAAM,EAAE,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,KACnC,YAAY,CAAC;AAElB,oBAAY,QAAQ;IAClB,qBAAqB,IAAA;IACrB,qBAAqB,IAAA;IACrB,sBAAsB,IAAA;IACtB,sBAAsB,IAAA;IACtB,qBAAqB,IAAA;IACrB,sBAAsB,IAAA;IACtB,wBAAwB,IAAA;IACxB,wBAAwB,IAAA;IACxB,uBAAuB,IAAA;IACvB,iBAAiB,IAAA;CAClB;AAED,MAAM,MAAM,YAAY,GAAG;IACzB,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7C,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC7C,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC9C,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,MAAM,IAAI,CAAC;IAC9C,CAAC,QAAQ,CAAC,qBAAqB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAC7D,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAC9D,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,IAAI,CAAC;IAChE,CAAC,QAAQ,CAAC,wBAAwB,CAAC,EAAE,CAAC,KAAK,EAAE,gBAAgB,EAAE,KAAK,IAAI,CAAC;IACzE,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC,KAAK,EAAE,sBAAsB,EAAE,KAAK,IAAI,CAAC;IAC9E,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC,OAAO,EAAE,YAAY,KAAK,IAAI,CAAC;CAC/D,CAAC;AAEF,qBAAa,gBAAgB;;gBAOf,KAAK,EAAE,kBAAkB,EAAE,SAAS,EAAE,SAAS;IAM3D,MAAM,CAAC,UAAU,IAAI,gBAAgB;IAIrC,IAAI,KAAK,IAAI,kBAAkB,CAE9B;IAED,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG;IAIrC,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,GAAE,GAAe;IAInD,IAAI,SAAS,IAAI,SAAS,CAEzB;IAED,IAAI,iBAAiB,kBAEpB;IAED,mBAAmB,CAAC,OAAO,EAAE,WAAW;CAGzC;AAiBD,MAAM,WAAW,yBAAyB;IACxC,8DAA8D;IAC9D,iBAAiB,EAAE,OAAO,CAAC;IAC3B,+DAA+D;IAC/D,kBAAkB,EAAE,OAAO,CAAC;IAC5B;;;OAGG;IACH,wBAAwB,EAAE,MAAM,CAAC;IACjC;;;OAGG;IACH,iBAAiB,EAAE,iBAAiB,CAAC;IACrC;;;OAGG;IACH,aAAa,EAAE,aAAa,CAAC;IAC7B;;;OAGG;IACH,aAAa,EAAE,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,EAAE,CAAC;CAC3C;AAWD,MAAM,WAAW,UAAU;IACzB,sCAAsC;IACtC,OAAO,CAAC,EAAE,WAAW,CAAC;IACtB,0CAA0C;IAC1C,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB,4DAA4D;IAC5D,kBAAkB,EAAE,OAAO,CAAC;IAC5B,+DAA+D;IAC/D,uBAAuB,EAAE,MAAM,CAAC;IAChC,sFAAsF;IACtF,iBAAiB,EAAE,MAAM,CAAC;IAC1B,6DAA6D;IAC7D,mBAAmB,EAAE,MAAM,CAAC;IAC5B,iBAAiB,EAAE,MAAM,CAAC;IAE1B,mBAAmB,EAAE,OAAO,CAAC;IAS7B,iBAAiB,EAAE,iBAAiB,CAAC;IAQrC,iBAAiB,EAAE,iBAAiB,CAAC;IACrC,2CAA2C;IAC3C,aAAa,EAAE,yBAAyB,CAAC;CAC1C;iDAgBkE,aAAa,YAAY,CAAC;AAD7F,+DAA+D;AAC/D,qBAAa,kBAAmB,SAAQ,uBAAsD;;IAC5F,mFAAmF;IACnF,QAAQ,CAAC,0BAA0B,OAAO;IAC1C,SAAS,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,gBAA4B;;IA4BlE,yCAAyC;IACzC,GAAG,EAAE,GAAG;IACR,+BAA+B;IAC/B,GAAG,EAAE,GAAG;IACR,qCAAqC;IACrC,GAAG,EAAE,GAAG;IACR,+BAA+B;IAC/B,GAAG,EAAE,GAAG;IACR,6CAA6C;IAC7C,IAAI,GAAE,OAAO,CAAC,UAAU,CAAqB;IAyB/C,IAAI,MAAM,IAAI,eAAe,GAAG,SAAS,CAExC;IAED,IAAI,MAAM,CAAC,GAAG,EAAE,eAAe,EAE9B;IAED,IAAI,OAAO,IAAI,WAAW,CAEzB;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,IAAI,GAAG,IAAI,GAAG,CAEb;IAED,iCAAiC;IACjC,KAAK;IACH,8BAA8B;IAC9B,IAAI,EAAE,IAAI;IACV;;;;;;OAMG;IACH,WAAW,GAAE,iBAAiB,GAAG,MAAM,GAAG,IAAW;IA8CvD,wDAAwD;IAClD,GAAG,CACP,MAAM,EAAE,MAAM,GAAG,SAAS,GAAG,aAAa,CAAC,MAAM,CAAC,EAClD,kBAAkB,UAAO,EACzB,YAAY,UAAO,GAClB,OAAO,CAAC,YAAY,CAAC;IAoiBxB,iCAAiC;IAC3B,KAAK;CAQZ"}
@@ -6,22 +6,24 @@ import {
6
6
  TrackSource
7
7
  } from "@livekit/rtc-node";
8
8
  import EventEmitter from "node:events";
9
- import { LLMStream } from "../llm/index.js";
9
+ import { LLMEvent, LLMStream } from "../llm/index.js";
10
10
  import { ChatContext, ChatMessage, ChatRole } from "../llm/index.js";
11
11
  import { log } from "../log.js";
12
- import { StreamAdapter as STTStreamAdapter } from "../stt/index.js";
12
+ import { StreamAdapter as STTStreamAdapter, SpeechEventType } from "../stt/index.js";
13
13
  import {
14
14
  SentenceTokenizer as BasicSentenceTokenizer,
15
15
  WordTokenizer as BasicWordTokenizer,
16
16
  hyphenateWord
17
17
  } from "../tokenize/basic/index.js";
18
- import { StreamAdapter as TTSStreamAdapter } from "../tts/index.js";
18
+ import { TTSEvent, StreamAdapter as TTSStreamAdapter } from "../tts/index.js";
19
19
  import { AsyncIterableQueue, CancellablePromise, Future, gracefullyCancel } from "../utils.js";
20
+ import { VADEventType } from "../vad.js";
20
21
  import { AgentOutput } from "./agent_output.js";
21
22
  import { AgentPlayout, AgentPlayoutEvent } from "./agent_playout.js";
22
23
  import { HumanInput, HumanInputEvent } from "./human_input.js";
23
24
  import { SpeechHandle } from "./speech_handle.js";
24
25
  const AGENT_STATE_ATTRIBUTE = "lk.agent.state";
26
+ let speechData;
25
27
  var VPAEvent = /* @__PURE__ */ ((VPAEvent2) => {
26
28
  VPAEvent2[VPAEvent2["USER_STARTED_SPEAKING"] = 0] = "USER_STARTED_SPEAKING";
27
29
  VPAEvent2[VPAEvent2["USER_STOPPED_SPEAKING"] = 1] = "USER_STOPPED_SPEAKING";
@@ -32,12 +34,14 @@ var VPAEvent = /* @__PURE__ */ ((VPAEvent2) => {
32
34
  VPAEvent2[VPAEvent2["AGENT_SPEECH_INTERRUPTED"] = 6] = "AGENT_SPEECH_INTERRUPTED";
33
35
  VPAEvent2[VPAEvent2["FUNCTION_CALLS_COLLECTED"] = 7] = "FUNCTION_CALLS_COLLECTED";
34
36
  VPAEvent2[VPAEvent2["FUNCTION_CALLS_FINISHED"] = 8] = "FUNCTION_CALLS_FINISHED";
37
+ VPAEvent2[VPAEvent2["METRICS_COLLECTED"] = 9] = "METRICS_COLLECTED";
35
38
  return VPAEvent2;
36
39
  })(VPAEvent || {});
37
40
  class AgentCallContext {
38
41
  #agent;
39
42
  #llmStream;
40
43
  #metadata = /* @__PURE__ */ new Map();
44
+ #extraChatMessages = [];
41
45
  static #current;
42
46
  constructor(agent, llmStream) {
43
47
  this.#agent = agent;
@@ -59,6 +63,12 @@ class AgentCallContext {
59
63
  get llmStream() {
60
64
  return this.#llmStream;
61
65
  }
66
+ get extraChatMessages() {
67
+ return this.#extraChatMessages;
68
+ }
69
+ addExtraChatMessage(message) {
70
+ this.#extraChatMessages.push(message);
71
+ }
62
72
  }
63
73
  const defaultBeforeLLMCallback = (agent, chatCtx) => {
64
74
  return agent.llm.chat({ chatCtx, fncCtx: agent.fncCtx });
@@ -80,7 +90,7 @@ const defaultVPAOptions = {
80
90
  interruptSpeechDuration: 50,
81
91
  interruptMinWords: 0,
82
92
  minEndpointingDelay: 500,
83
- maxRecursiveFncCalls: 1,
93
+ maxNestedFncCalls: 1,
84
94
  preemptiveSynthesis: false,
85
95
  beforeLLMCallback: defaultBeforeLLMCallback,
86
96
  beforeTTSCallback: defaultBeforeTTSCallback,
@@ -105,7 +115,6 @@ class VoicePipelineAgent extends EventEmitter {
105
115
  #transcribedInterimText = "";
106
116
  #speechQueueOpen = new Future();
107
117
  #speechQueue = new AsyncIterableQueue();
108
- #lastEndOfSpeechTime;
109
118
  #updateStateTask;
110
119
  #started = false;
111
120
  #room;
@@ -113,6 +122,8 @@ class VoicePipelineAgent extends EventEmitter {
113
122
  #deferredValidation;
114
123
  #logger = log();
115
124
  #agentPublication;
125
+ #lastFinalTranscriptTime;
126
+ #lastSpeechTime;
116
127
  constructor(vad, stt, llm, tts, opts = defaultVPAOptions) {
117
128
  super();
118
129
  this.#opts = { ...defaultVPAOptions, ...opts };
@@ -157,6 +168,20 @@ class VoicePipelineAgent extends EventEmitter {
157
168
  if (this.#started) {
158
169
  throw new Error("voice assistant already started");
159
170
  }
171
+ this.#stt.on(SpeechEventType.METRICS_COLLECTED, (metrics) => {
172
+ this.emit(9 /* METRICS_COLLECTED */, metrics);
173
+ });
174
+ this.#tts.on(TTSEvent.METRICS_COLLECTED, (metrics) => {
175
+ if (!speechData) return;
176
+ this.emit(9 /* METRICS_COLLECTED */, { ...metrics, sequenceId: speechData.sequenceId });
177
+ });
178
+ this.#llm.on(LLMEvent.METRICS_COLLECTED, (metrics) => {
179
+ if (!speechData) return;
180
+ this.emit(9 /* METRICS_COLLECTED */, { ...metrics, sequenceId: speechData.sequenceId });
181
+ });
182
+ this.#vad.on(VADEventType.METRICS_COLLECTED, (metrics) => {
183
+ this.emit(9 /* METRICS_COLLECTED */, metrics);
184
+ });
160
185
  room.on(RoomEvent.ParticipantConnected, (participant2) => {
161
186
  if (this.#participant) {
162
187
  return;
@@ -177,10 +202,43 @@ class VoicePipelineAgent extends EventEmitter {
177
202
  /** Play a speech source through the voice assistant. */
178
203
  async say(source, allowInterruptions = true, addToChatCtx = true) {
179
204
  await this.#trackPublishedFut.await;
205
+ let callContext;
206
+ let fncSource;
207
+ if (addToChatCtx) {
208
+ callContext = AgentCallContext.getCurrent();
209
+ if (source instanceof LLMStream) {
210
+ this.#logger.warn("LLMStream will be ignored for function call chat context");
211
+ } else if (typeof source === "string") {
212
+ fncSource = source;
213
+ } else {
214
+ fncSource = source;
215
+ source = new AsyncIterableQueue();
216
+ }
217
+ }
180
218
  const newHandle = SpeechHandle.createAssistantSpeech(allowInterruptions, addToChatCtx);
181
219
  const synthesisHandle = this.#synthesizeAgentSpeech(newHandle.id, source);
182
220
  newHandle.initialize(source, synthesisHandle);
183
- this.#addSpeechForPlayout(newHandle);
221
+ if (this.#playingSpeech && !this.#playingSpeech.nestedSpeechFinished) {
222
+ this.#playingSpeech.addNestedSpeech(newHandle);
223
+ } else {
224
+ this.#addSpeechForPlayout(newHandle);
225
+ }
226
+ if (callContext && fncSource) {
227
+ let text;
228
+ if (typeof source === "string") {
229
+ text = fncSource;
230
+ } else {
231
+ text = "";
232
+ for await (const chunk of fncSource) {
233
+ source.put(chunk);
234
+ text += chunk;
235
+ }
236
+ source.close();
237
+ }
238
+ callContext.addExtraChatMessage(ChatMessage.create({ text, role: ChatRole.ASSISTANT }));
239
+ this.#logger.child({ text }).debug("added speech to function call chat context");
240
+ }
241
+ return newHandle;
184
242
  }
185
243
  #updateState(state, delay = 0) {
186
244
  const runTask = (delay2) => {
@@ -234,11 +292,13 @@ class VoicePipelineAgent extends EventEmitter {
234
292
  if (event.speechDuration >= this.#opts.interruptSpeechDuration) {
235
293
  this.#interruptIfPossible();
236
294
  }
295
+ if (event.rawAccumulatedSpeech > 0) {
296
+ this.#lastSpeechTime = Date.now() - event.rawAccumulatedSilence;
297
+ }
237
298
  });
238
299
  this.#humanInput.on(HumanInputEvent.END_OF_SPEECH, (event) => {
239
300
  this.emit(0 /* USER_STARTED_SPEAKING */);
240
301
  this.#deferredValidation.onHumanEndOfSpeech(event);
241
- this.#lastEndOfSpeechTime = Date.now();
242
302
  });
243
303
  this.#humanInput.on(HumanInputEvent.INTERIM_TRANSCRIPT, (event) => {
244
304
  this.#transcribedInterimText = event.alternatives[0].text;
@@ -246,7 +306,7 @@ class VoicePipelineAgent extends EventEmitter {
246
306
  this.#humanInput.on(HumanInputEvent.FINAL_TRANSCRIPT, (event) => {
247
307
  const newTranscript = event.alternatives[0].text;
248
308
  if (!newTranscript) return;
249
- this.#logger.child({ userTranscript: newTranscript }).debug("received user transcript");
309
+ this.#lastFinalTranscriptTime = Date.now();
250
310
  this.#transcribedText += (this.#transcribedText ? " " : "") + newTranscript;
251
311
  if (this.#opts.preemptiveSynthesis && (!this.#playingSpeech || this.#playingSpeech.allowInterruptions)) {
252
312
  this.#synthesizeAgentReply();
@@ -318,8 +378,7 @@ class VoicePipelineAgent extends EventEmitter {
318
378
  if ((!playingSpeech.userQuestion || playingSpeech.userCommitted) && !playingSpeech.speechCommitted) {
319
379
  copiedCtx.messages.push(
320
380
  ChatMessage.create({
321
- // TODO(nbsp): uhhh unsure where to get the played text here
322
- // text: playingSpeech.synthesisHandle.(theres no ttsForwarder here)
381
+ text: playingSpeech.synthesisHandle.text,
323
382
  role: ChatRole.ASSISTANT
324
383
  })
325
384
  );
@@ -331,23 +390,26 @@ class VoicePipelineAgent extends EventEmitter {
331
390
  role: ChatRole.USER
332
391
  })
333
392
  );
334
- if (cancelled) resolve();
335
- let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);
336
- if (llmStream === false) {
337
- handle == null ? void 0 : handle.cancel();
338
- return;
339
- }
340
- if (cancelled) resolve();
341
- if (!(llmStream instanceof LLMStream)) {
342
- llmStream = await defaultBeforeLLMCallback(this, copiedCtx);
343
- }
344
- if (handle.interrupted) {
345
- return;
393
+ speechData = { sequenceId: handle.id };
394
+ try {
395
+ if (cancelled) resolve();
396
+ let llmStream = await this.#opts.beforeLLMCallback(this, copiedCtx);
397
+ if (llmStream === false) {
398
+ handle == null ? void 0 : handle.cancel();
399
+ return;
400
+ }
401
+ if (cancelled) resolve();
402
+ if (!(llmStream instanceof LLMStream)) {
403
+ llmStream = await defaultBeforeLLMCallback(this, copiedCtx);
404
+ }
405
+ if (handle.interrupted) {
406
+ return;
407
+ }
408
+ const synthesisHandle = this.#synthesizeAgentSpeech(handle.id, llmStream);
409
+ handle.initialize(llmStream, synthesisHandle);
410
+ } finally {
411
+ speechData = void 0;
346
412
  }
347
- const synthesisHandle = this.#synthesizeAgentSpeech(handle.id, llmStream);
348
- handle.initialize(llmStream, synthesisHandle);
349
- const elapsed = !!this.#lastEndOfSpeechTime ? Math.round((Date.now() - this.#lastEndOfSpeechTime) * 1e3) / 1e3 : -1;
350
- this.#logger.child({ speechId: handle.id, elapsed }).debug("synthesizing agent reply");
351
413
  resolve();
352
414
  });
353
415
  }
@@ -387,62 +449,83 @@ class VoicePipelineAgent extends EventEmitter {
387
449
  if (handle.interrupted) break;
388
450
  }
389
451
  commitUserQuestionIfNeeded();
390
- let collectedText = "";
452
+ const collectedText = handle.synthesisHandle.text;
391
453
  const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length;
392
- const extraToolsMessages = [];
393
- let interrupted = handle.interrupted;
394
- if (isUsingTools && !interrupted) {
454
+ const interrupted = handle.interrupted;
455
+ const executeFunctionCalls = async () => {
456
+ if (!isUsingTools || interrupted) return;
457
+ if (handle.fncNestedDepth >= this.#opts.maxNestedFncCalls) {
458
+ this.#logger.child({ speechId: handle.id, fncNestedDepth: handle.fncNestedDepth }).warn("max function calls nested depth reached");
459
+ return;
460
+ }
395
461
  if (!userQuestion || !handle.userCommitted) {
396
462
  throw new Error("user speech should have been committed before using tools");
397
463
  }
398
464
  const llmStream = handle.source;
399
- let newFunctionCalls = llmStream.functionCalls;
400
- for (let i = 0; i < this.#opts.maxRecursiveFncCalls; i++) {
401
- this.emit(7 /* FUNCTION_CALLS_COLLECTED */, newFunctionCalls);
402
- const calledFuncs = [];
403
- for (const func of newFunctionCalls) {
404
- const task = func.func.execute(func.params).then(
405
- (result) => ({ name: func.name, toolCallId: func.toolCallId, result }),
406
- (error) => ({ name: func.name, toolCallId: func.toolCallId, error })
407
- );
408
- calledFuncs.push({ ...func, task });
409
- this.#logger.child({ function: func.name, speechId: handle.id }).debug("executing AI function");
410
- try {
411
- await task;
412
- } catch {
413
- this.#logger.child({ function: func.name, speechId: handle.id }).error("error executing AI function");
414
- }
415
- }
416
- const toolCallsInfo = [];
417
- const toolCallsResults = [];
418
- for (const fnc of calledFuncs) {
419
- const task = await fnc.task;
420
- if (!task || task.result === void 0) continue;
421
- toolCallsInfo.push(fnc);
422
- toolCallsResults.push(ChatMessage.createToolFromFunctionResult(task));
465
+ const newFunctionCalls = llmStream.functionCalls;
466
+ new AgentCallContext(this, llmStream);
467
+ this.emit(7 /* FUNCTION_CALLS_COLLECTED */, newFunctionCalls);
468
+ const calledFuncs = [];
469
+ for (const func of newFunctionCalls) {
470
+ const task2 = func.func.execute(func.params).then(
471
+ (result) => ({ name: func.name, toolCallId: func.toolCallId, result }),
472
+ (error) => ({ name: func.name, toolCallId: func.toolCallId, error })
473
+ );
474
+ calledFuncs.push({ ...func, task: task2 });
475
+ this.#logger.child({ function: func.name, speechId: handle.id }).debug("executing AI function");
476
+ try {
477
+ await task2;
478
+ } catch {
479
+ this.#logger.child({ function: func.name, speechId: handle.id }).error("error executing AI function");
423
480
  }
424
- if (!toolCallsInfo.length) break;
425
- extraToolsMessages.push(ChatMessage.createToolCalls(toolCallsInfo, collectedText));
426
- extraToolsMessages.push(...toolCallsResults);
427
- const chatCtx = handle.source.chatCtx.copy();
428
- chatCtx.messages.push(...extraToolsMessages);
429
- const answerLLMStream = this.llm.chat({
430
- chatCtx,
431
- fncCtx: this.fncCtx
432
- });
433
- const answerSynthesis = this.#synthesizeAgentSpeech(handle.id, answerLLMStream);
434
- handle.synthesisHandle = answerSynthesis;
435
- const playHandle2 = answerSynthesis.play();
436
- await playHandle2.join().await;
437
- collectedText = "";
438
- interrupted = answerSynthesis.interrupted;
439
- newFunctionCalls = answerLLMStream.functionCalls;
440
- this.emit(8 /* FUNCTION_CALLS_FINISHED */, calledFuncs);
441
- if (!newFunctionCalls) break;
481
+ }
482
+ const toolCallsInfo = [];
483
+ const toolCallsResults = [];
484
+ for (const fnc of calledFuncs) {
485
+ const task2 = await fnc.task;
486
+ if (!task2 || task2.result === void 0) continue;
487
+ toolCallsInfo.push(fnc);
488
+ toolCallsResults.push(ChatMessage.createToolFromFunctionResult(task2));
489
+ }
490
+ if (!toolCallsInfo.length) return;
491
+ const extraToolsMessages = [ChatMessage.createToolCalls(toolCallsInfo, collectedText)];
492
+ extraToolsMessages.push(...toolCallsResults);
493
+ const newSpeechHandle = SpeechHandle.createToolSpeech(
494
+ handle.allowInterruptions,
495
+ handle.addToChatCtx,
496
+ handle.fncNestedDepth + 1,
497
+ extraToolsMessages
498
+ );
499
+ const chatCtx = handle.source.chatCtx.copy();
500
+ chatCtx.messages.push(...extraToolsMessages);
501
+ chatCtx.messages.push(...AgentCallContext.getCurrent().extraChatMessages);
502
+ const answerLLMStream = this.llm.chat({
503
+ chatCtx,
504
+ fncCtx: this.fncCtx
505
+ });
506
+ const answerSynthesis = this.#synthesizeAgentSpeech(newSpeechHandle.id, answerLLMStream);
507
+ newSpeechHandle.initialize(answerLLMStream, answerSynthesis);
508
+ handle.addNestedSpeech(newSpeechHandle);
509
+ this.emit(8 /* FUNCTION_CALLS_FINISHED */, calledFuncs);
510
+ };
511
+ const task = executeFunctionCalls().then(() => {
512
+ handle.markNestedSpeechFinished();
513
+ });
514
+ while (!handle.nestedSpeechFinished) {
515
+ const changed = handle.nestedSpeechChanged();
516
+ await Promise.race([changed, task]);
517
+ while (handle.nestedSpeechHandles.length) {
518
+ const speech = handle.nestedSpeechHandles[0];
519
+ this.#playingSpeech = speech;
520
+ await this.#playSpeech(speech);
521
+ handle.nestedSpeechHandles.shift();
522
+ this.#playingSpeech = handle;
442
523
  }
443
524
  }
444
525
  if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) {
445
- this.chatCtx.messages.push(...extraToolsMessages);
526
+ if (handle.extraToolsMessages) {
527
+ this.chatCtx.messages.push(...handle.extraToolsMessages);
528
+ }
446
529
  if (interrupted) {
447
530
  collectedText + "\u2026";
448
531
  }
@@ -459,6 +542,7 @@ class VoicePipelineAgent extends EventEmitter {
459
542
  interrupted,
460
543
  speechId: handle.id
461
544
  }).debug("committed agent speech");
545
+ handle.setDone();
462
546
  }
463
547
  }
464
548
  #synthesizeAgentSpeech(speechId, source) {
@@ -499,6 +583,20 @@ class VoicePipelineAgent extends EventEmitter {
499
583
  }
500
584
  }
501
585
  this.#logger.child({ speechId: this.#pendingAgentReply.id }).debug("validated agent reply");
586
+ if (this.#lastSpeechTime) {
587
+ const timeSinceLastSpeech = Date.now() - this.#lastSpeechTime;
588
+ const transcriptionDelay = Math.max(
589
+ (this.#lastFinalTranscriptTime || 0) - this.#lastSpeechTime,
590
+ 0
591
+ );
592
+ const metrics = {
593
+ timestamp: Date.now(),
594
+ sequenceId: this.#pendingAgentReply.id,
595
+ endOfUtteranceDelay: timeSinceLastSpeech,
596
+ transcriptionDelay
597
+ };
598
+ this.emit(9 /* METRICS_COLLECTED */, metrics);
599
+ }
502
600
  this.#addSpeechForPlayout(this.#pendingAgentReply);
503
601
  this.#pendingAgentReply = void 0;
504
602
  this.#transcribedInterimText = "";