@livekit/agents 1.0.46 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/dist/cli.cjs +14 -20
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +14 -20
  5. package/dist/cli.js.map +1 -1
  6. package/dist/ipc/job_proc_lazy_main.cjs +14 -5
  7. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  8. package/dist/ipc/job_proc_lazy_main.js +14 -5
  9. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  10. package/dist/llm/chat_context.cjs +19 -0
  11. package/dist/llm/chat_context.cjs.map +1 -1
  12. package/dist/llm/chat_context.d.cts +4 -0
  13. package/dist/llm/chat_context.d.ts +4 -0
  14. package/dist/llm/chat_context.d.ts.map +1 -1
  15. package/dist/llm/chat_context.js +19 -0
  16. package/dist/llm/chat_context.js.map +1 -1
  17. package/dist/llm/provider_format/index.cjs +2 -0
  18. package/dist/llm/provider_format/index.cjs.map +1 -1
  19. package/dist/llm/provider_format/index.d.cts +1 -1
  20. package/dist/llm/provider_format/index.d.ts +1 -1
  21. package/dist/llm/provider_format/index.d.ts.map +1 -1
  22. package/dist/llm/provider_format/index.js +6 -1
  23. package/dist/llm/provider_format/index.js.map +1 -1
  24. package/dist/llm/provider_format/openai.cjs +82 -2
  25. package/dist/llm/provider_format/openai.cjs.map +1 -1
  26. package/dist/llm/provider_format/openai.d.cts +1 -0
  27. package/dist/llm/provider_format/openai.d.ts +1 -0
  28. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  29. package/dist/llm/provider_format/openai.js +80 -1
  30. package/dist/llm/provider_format/openai.js.map +1 -1
  31. package/dist/llm/provider_format/openai.test.cjs +326 -0
  32. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  33. package/dist/llm/provider_format/openai.test.js +327 -1
  34. package/dist/llm/provider_format/openai.test.js.map +1 -1
  35. package/dist/llm/provider_format/utils.cjs +4 -3
  36. package/dist/llm/provider_format/utils.cjs.map +1 -1
  37. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  38. package/dist/llm/provider_format/utils.js +4 -3
  39. package/dist/llm/provider_format/utils.js.map +1 -1
  40. package/dist/llm/realtime.cjs.map +1 -1
  41. package/dist/llm/realtime.d.cts +1 -0
  42. package/dist/llm/realtime.d.ts +1 -0
  43. package/dist/llm/realtime.d.ts.map +1 -1
  44. package/dist/llm/realtime.js.map +1 -1
  45. package/dist/log.cjs +5 -2
  46. package/dist/log.cjs.map +1 -1
  47. package/dist/log.d.ts.map +1 -1
  48. package/dist/log.js +5 -2
  49. package/dist/log.js.map +1 -1
  50. package/dist/stream/deferred_stream.cjs +15 -6
  51. package/dist/stream/deferred_stream.cjs.map +1 -1
  52. package/dist/stream/deferred_stream.d.ts.map +1 -1
  53. package/dist/stream/deferred_stream.js +15 -6
  54. package/dist/stream/deferred_stream.js.map +1 -1
  55. package/dist/utils.cjs +31 -2
  56. package/dist/utils.cjs.map +1 -1
  57. package/dist/utils.d.cts +7 -0
  58. package/dist/utils.d.ts +7 -0
  59. package/dist/utils.d.ts.map +1 -1
  60. package/dist/utils.js +31 -2
  61. package/dist/utils.js.map +1 -1
  62. package/dist/utils.test.cjs +71 -0
  63. package/dist/utils.test.cjs.map +1 -1
  64. package/dist/utils.test.js +71 -0
  65. package/dist/utils.test.js.map +1 -1
  66. package/dist/version.cjs +1 -1
  67. package/dist/version.cjs.map +1 -1
  68. package/dist/version.d.cts +1 -1
  69. package/dist/version.d.ts +1 -1
  70. package/dist/version.d.ts.map +1 -1
  71. package/dist/version.js +1 -1
  72. package/dist/version.js.map +1 -1
  73. package/dist/voice/agent.cjs +144 -12
  74. package/dist/voice/agent.cjs.map +1 -1
  75. package/dist/voice/agent.d.cts +29 -4
  76. package/dist/voice/agent.d.ts +29 -4
  77. package/dist/voice/agent.d.ts.map +1 -1
  78. package/dist/voice/agent.js +140 -11
  79. package/dist/voice/agent.js.map +1 -1
  80. package/dist/voice/agent.test.cjs +120 -0
  81. package/dist/voice/agent.test.cjs.map +1 -1
  82. package/dist/voice/agent.test.js +122 -2
  83. package/dist/voice/agent.test.js.map +1 -1
  84. package/dist/voice/agent_activity.cjs +383 -298
  85. package/dist/voice/agent_activity.cjs.map +1 -1
  86. package/dist/voice/agent_activity.d.cts +34 -7
  87. package/dist/voice/agent_activity.d.ts +34 -7
  88. package/dist/voice/agent_activity.d.ts.map +1 -1
  89. package/dist/voice/agent_activity.js +383 -293
  90. package/dist/voice/agent_activity.js.map +1 -1
  91. package/dist/voice/agent_session.cjs +140 -40
  92. package/dist/voice/agent_session.cjs.map +1 -1
  93. package/dist/voice/agent_session.d.cts +19 -7
  94. package/dist/voice/agent_session.d.ts +19 -7
  95. package/dist/voice/agent_session.d.ts.map +1 -1
  96. package/dist/voice/agent_session.js +137 -37
  97. package/dist/voice/agent_session.js.map +1 -1
  98. package/dist/voice/audio_recognition.cjs +4 -0
  99. package/dist/voice/audio_recognition.cjs.map +1 -1
  100. package/dist/voice/audio_recognition.d.ts.map +1 -1
  101. package/dist/voice/audio_recognition.js +4 -0
  102. package/dist/voice/audio_recognition.js.map +1 -1
  103. package/dist/voice/generation.cjs +39 -19
  104. package/dist/voice/generation.cjs.map +1 -1
  105. package/dist/voice/generation.d.ts.map +1 -1
  106. package/dist/voice/generation.js +44 -20
  107. package/dist/voice/generation.js.map +1 -1
  108. package/dist/voice/index.cjs +2 -0
  109. package/dist/voice/index.cjs.map +1 -1
  110. package/dist/voice/index.d.cts +1 -1
  111. package/dist/voice/index.d.ts +1 -1
  112. package/dist/voice/index.d.ts.map +1 -1
  113. package/dist/voice/index.js +2 -1
  114. package/dist/voice/index.js.map +1 -1
  115. package/dist/voice/speech_handle.cjs +7 -1
  116. package/dist/voice/speech_handle.cjs.map +1 -1
  117. package/dist/voice/speech_handle.d.cts +2 -0
  118. package/dist/voice/speech_handle.d.ts +2 -0
  119. package/dist/voice/speech_handle.d.ts.map +1 -1
  120. package/dist/voice/speech_handle.js +8 -2
  121. package/dist/voice/speech_handle.js.map +1 -1
  122. package/dist/voice/testing/run_result.cjs +66 -15
  123. package/dist/voice/testing/run_result.cjs.map +1 -1
  124. package/dist/voice/testing/run_result.d.cts +14 -3
  125. package/dist/voice/testing/run_result.d.ts +14 -3
  126. package/dist/voice/testing/run_result.d.ts.map +1 -1
  127. package/dist/voice/testing/run_result.js +66 -15
  128. package/dist/voice/testing/run_result.js.map +1 -1
  129. package/package.json +1 -1
  130. package/src/cli.ts +20 -33
  131. package/src/ipc/job_proc_lazy_main.ts +16 -5
  132. package/src/llm/chat_context.ts +35 -0
  133. package/src/llm/provider_format/index.ts +7 -2
  134. package/src/llm/provider_format/openai.test.ts +385 -1
  135. package/src/llm/provider_format/openai.ts +103 -0
  136. package/src/llm/provider_format/utils.ts +6 -4
  137. package/src/llm/realtime.ts +1 -0
  138. package/src/log.ts +5 -2
  139. package/src/stream/deferred_stream.ts +17 -6
  140. package/src/utils.test.ts +87 -0
  141. package/src/utils.ts +36 -2
  142. package/src/version.ts +1 -1
  143. package/src/voice/agent.test.ts +140 -2
  144. package/src/voice/agent.ts +189 -10
  145. package/src/voice/agent_activity.ts +427 -289
  146. package/src/voice/agent_session.ts +178 -40
  147. package/src/voice/audio_recognition.ts +4 -0
  148. package/src/voice/generation.ts +52 -23
  149. package/src/voice/index.ts +1 -1
  150. package/src/voice/speech_handle.ts +9 -2
  151. package/src/voice/testing/run_result.ts +81 -23
@@ -10,14 +10,20 @@ import {
10
10
  } from "../llm/index.js";
11
11
  import { isSameToolChoice, isSameToolContext } from "../llm/tool_context.js";
12
12
  import { log } from "../log.js";
13
- import { DeferredReadableStream } from "../stream/deferred_stream.js";
13
+ import { MultiInputStream } from "../stream/multi_input_stream.js";
14
14
  import { STT } from "../stt/stt.js";
15
15
  import { recordRealtimeMetrics, traceTypes, tracer } from "../telemetry/index.js";
16
16
  import { splitWords } from "../tokenize/basic/word.js";
17
17
  import { TTS } from "../tts/tts.js";
18
18
  import { Future, Task, cancelAndWait, waitFor } from "../utils.js";
19
19
  import { VAD } from "../vad.js";
20
- import { StopResponse, asyncLocalStorage } from "./agent.js";
20
+ import {
21
+ StopResponse,
22
+ _getActivityTaskInfo,
23
+ _setActivityTaskInfo,
24
+ functionCallStorage,
25
+ speechHandleStorage
26
+ } from "./agent.js";
21
27
  import {} from "./agent_session.js";
22
28
  import {
23
29
  AudioRecognition
@@ -41,8 +47,10 @@ import {
41
47
  } from "./generation.js";
42
48
  import { SpeechHandle } from "./speech_handle.js";
43
49
  import { setParticipantSpanAttributes } from "./utils.js";
44
- const speechHandleStorage = new AsyncLocalStorage();
50
+ const agentActivityStorage = new AsyncLocalStorage();
45
51
  class AgentActivity {
52
+ agent;
53
+ agentSession;
46
54
  static REPLY_TASK_CANCEL_TIMEOUT = 5e3;
47
55
  started = false;
48
56
  audioRecognition;
@@ -51,22 +59,29 @@ class AgentActivity {
51
59
  // Maps response_id to OTEL span for metrics recording
52
60
  turnDetectionMode;
53
61
  logger = log();
54
- _draining = false;
62
+ _schedulingPaused = true;
63
+ _drainBlockedTasks = [];
55
64
  _currentSpeech;
56
65
  speechQueue;
57
66
  // [priority, timestamp, speechHandle]
58
67
  q_updated;
59
68
  speechTasks = /* @__PURE__ */ new Set();
60
69
  lock = new Mutex();
61
- audioStream = new DeferredReadableStream();
70
+ audioStream = new MultiInputStream();
71
+ audioStreamId;
62
72
  // default to null as None, which maps to the default provider tool choice value
63
73
  toolChoice = null;
64
74
  _preemptiveGeneration;
65
- agent;
66
- agentSession;
67
75
  /** @internal */
68
76
  _mainTask;
77
+ _onEnterTask;
78
+ _onExitTask;
69
79
  _userTurnCompletedTask;
80
+ onRealtimeGenerationCreated = (ev) => this.onGenerationCreated(ev);
81
+ onRealtimeInputSpeechStarted = (ev) => this.onInputSpeechStarted(ev);
82
+ onRealtimeInputSpeechStopped = (ev) => this.onInputSpeechStopped(ev);
83
+ onRealtimeInputAudioTranscriptionCompleted = (ev) => this.onInputAudioTranscriptionCompleted(ev);
84
+ onModelError = (ev) => this.onError(ev);
70
85
  constructor(agent, agentSession) {
71
86
  this.agent = agent;
72
87
  this.agentSession = agentSession;
@@ -77,7 +92,7 @@ class AgentActivity {
77
92
  this.turnDetectionMode = typeof this.turnDetection === "string" ? this.turnDetection : void 0;
78
93
  if (this.turnDetectionMode === "vad" && this.vad === void 0) {
79
94
  this.logger.warn(
80
- 'turnDetection is set to "vad", but no VAD model is provided, ignoring the turnDdetection setting'
95
+ 'turnDetection is set to "vad", but no VAD model is provided, ignoring the turnDetection setting'
81
96
  );
82
97
  this.turnDetectionMode = void 0;
83
98
  }
@@ -127,107 +142,121 @@ class AgentActivity {
127
142
  }
128
143
  }
129
144
  async start() {
130
- var _a;
131
145
  const unlock = await this.lock.lock();
132
146
  try {
133
- const startSpan = tracer.startSpan({
134
- name: "start_agent_activity",
135
- attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
136
- context: ROOT_CONTEXT
137
- });
138
- this.agent._agentActivity = this;
139
- if (this.llm instanceof RealtimeModel) {
140
- this.realtimeSession = this.llm.session();
141
- this.realtimeSpans = /* @__PURE__ */ new Map();
142
- this.realtimeSession.on("generation_created", (ev) => this.onGenerationCreated(ev));
143
- this.realtimeSession.on("input_speech_started", (ev) => this.onInputSpeechStarted(ev));
144
- this.realtimeSession.on("input_speech_stopped", (ev) => this.onInputSpeechStopped(ev));
145
- this.realtimeSession.on(
146
- "input_audio_transcription_completed",
147
- (ev) => this.onInputAudioTranscriptionCompleted(ev)
148
- );
149
- this.realtimeSession.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
150
- this.realtimeSession.on("error", (ev) => this.onError(ev));
151
- removeInstructions(this.agent._chatCtx);
152
- try {
153
- await this.realtimeSession.updateInstructions(this.agent.instructions);
154
- } catch (error) {
155
- this.logger.error(error, "failed to update the instructions");
156
- }
157
- try {
158
- await this.realtimeSession.updateChatCtx(this.agent.chatCtx);
159
- } catch (error) {
160
- this.logger.error(error, "failed to update the chat context");
161
- }
162
- try {
163
- await this.realtimeSession.updateTools(this.tools);
164
- } catch (error) {
165
- this.logger.error(error, "failed to update the tools");
166
- }
167
- if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
168
- this.logger.error(
169
- "audio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model."
170
- );
171
- }
172
- } else if (this.llm instanceof LLM) {
173
- try {
174
- updateInstructions({
175
- chatCtx: this.agent._chatCtx,
176
- instructions: this.agent.instructions,
177
- addIfMissing: true
178
- });
179
- } catch (error) {
180
- this.logger.error("failed to update the instructions", error);
181
- }
147
+ await this._startSession({ spanName: "start_agent_activity", runOnEnter: true });
148
+ } finally {
149
+ unlock();
150
+ }
151
+ }
152
+ async resume() {
153
+ const unlock = await this.lock.lock();
154
+ try {
155
+ await this._startSession({ spanName: "resume_agent_activity", runOnEnter: false });
156
+ } finally {
157
+ unlock();
158
+ }
159
+ }
160
+ async _startSession(options) {
161
+ var _a;
162
+ const { spanName, runOnEnter } = options;
163
+ const startSpan = tracer.startSpan({
164
+ name: spanName,
165
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
166
+ context: ROOT_CONTEXT
167
+ });
168
+ this.agent._agentActivity = this;
169
+ if (this.llm instanceof RealtimeModel) {
170
+ this.realtimeSession = this.llm.session();
171
+ this.realtimeSpans = /* @__PURE__ */ new Map();
172
+ this.realtimeSession.on("generation_created", this.onRealtimeGenerationCreated);
173
+ this.realtimeSession.on("input_speech_started", this.onRealtimeInputSpeechStarted);
174
+ this.realtimeSession.on("input_speech_stopped", this.onRealtimeInputSpeechStopped);
175
+ this.realtimeSession.on(
176
+ "input_audio_transcription_completed",
177
+ this.onRealtimeInputAudioTranscriptionCompleted
178
+ );
179
+ this.realtimeSession.on("metrics_collected", this.onMetricsCollected);
180
+ this.realtimeSession.on("error", this.onModelError);
181
+ removeInstructions(this.agent._chatCtx);
182
+ try {
183
+ await this.realtimeSession.updateInstructions(this.agent.instructions);
184
+ } catch (error) {
185
+ this.logger.error(error, "failed to update the instructions");
182
186
  }
183
- if (this.llm instanceof LLM) {
184
- this.llm.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
185
- this.llm.on("error", (ev) => this.onError(ev));
187
+ try {
188
+ await this.realtimeSession.updateChatCtx(this.agent.chatCtx);
189
+ } catch (error) {
190
+ this.logger.error(error, "failed to update the chat context");
186
191
  }
187
- if (this.stt instanceof STT) {
188
- this.stt.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
189
- this.stt.on("error", (ev) => this.onError(ev));
192
+ try {
193
+ await this.realtimeSession.updateTools(this.tools);
194
+ } catch (error) {
195
+ this.logger.error(error, "failed to update the tools");
190
196
  }
191
- if (this.tts instanceof TTS) {
192
- this.tts.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
193
- this.tts.on("error", (ev) => this.onError(ev));
197
+ if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
198
+ this.logger.error(
199
+ "audio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model."
200
+ );
194
201
  }
195
- if (this.vad instanceof VAD) {
196
- this.vad.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
202
+ } else if (this.llm instanceof LLM) {
203
+ try {
204
+ updateInstructions({
205
+ chatCtx: this.agent._chatCtx,
206
+ instructions: this.agent.instructions,
207
+ addIfMissing: true
208
+ });
209
+ } catch (error) {
210
+ this.logger.error("failed to update the instructions", error);
197
211
  }
198
- this.audioRecognition = new AudioRecognition({
199
- recognitionHooks: this,
200
- // Disable stt node if stt is not provided
201
- stt: this.stt ? (...args) => this.agent.sttNode(...args) : void 0,
202
- vad: this.vad,
203
- turnDetector: typeof this.turnDetection === "string" ? void 0 : this.turnDetection,
204
- turnDetectionMode: this.turnDetectionMode,
205
- minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
206
- maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
207
- rootSpanContext: this.agentSession.rootSpanContext,
208
- sttModel: (_a = this.stt) == null ? void 0 : _a.label,
209
- sttProvider: this.getSttProvider(),
210
- getLinkedParticipant: () => {
211
- var _a2;
212
- return (_a2 = this.agentSession._roomIO) == null ? void 0 : _a2.linkedParticipant;
213
- }
214
- });
215
- this.audioRecognition.start();
216
- this.started = true;
217
- this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
218
- const onEnterTask = tracer.startActiveSpan(async () => this.agent.onEnter(), {
219
- name: "on_enter",
220
- context: trace.setSpan(ROOT_CONTEXT, startSpan),
221
- attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
222
- });
223
- this.createSpeechTask({
224
- task: Task.from(() => onEnterTask),
212
+ }
213
+ if (this.llm instanceof LLM) {
214
+ this.llm.on("metrics_collected", this.onMetricsCollected);
215
+ this.llm.on("error", this.onModelError);
216
+ }
217
+ if (this.stt instanceof STT) {
218
+ this.stt.on("metrics_collected", this.onMetricsCollected);
219
+ this.stt.on("error", this.onModelError);
220
+ }
221
+ if (this.tts instanceof TTS) {
222
+ this.tts.on("metrics_collected", this.onMetricsCollected);
223
+ this.tts.on("error", this.onModelError);
224
+ }
225
+ if (this.vad instanceof VAD) {
226
+ this.vad.on("metrics_collected", this.onMetricsCollected);
227
+ }
228
+ this.audioRecognition = new AudioRecognition({
229
+ recognitionHooks: this,
230
+ // Disable stt node if stt is not provided
231
+ stt: this.stt ? (...args) => this.agent.sttNode(...args) : void 0,
232
+ vad: this.vad,
233
+ turnDetector: typeof this.turnDetection === "string" ? void 0 : this.turnDetection,
234
+ turnDetectionMode: this.turnDetectionMode,
235
+ minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
236
+ maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
237
+ rootSpanContext: this.agentSession.rootSpanContext,
238
+ sttModel: (_a = this.stt) == null ? void 0 : _a.label,
239
+ sttProvider: this.getSttProvider(),
240
+ getLinkedParticipant: () => {
241
+ var _a2;
242
+ return (_a2 = this.agentSession._roomIO) == null ? void 0 : _a2.linkedParticipant;
243
+ }
244
+ });
245
+ this.audioRecognition.start();
246
+ this.started = true;
247
+ this._resumeSchedulingTask();
248
+ if (runOnEnter) {
249
+ this._onEnterTask = this.createSpeechTask({
250
+ taskFn: () => tracer.startActiveSpan(async () => this.agent.onEnter(), {
251
+ name: "on_enter",
252
+ context: trace.setSpan(ROOT_CONTEXT, startSpan),
253
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
254
+ }),
255
+ inlineTask: true,
225
256
  name: "AgentActivity_onEnter"
226
257
  });
227
- startSpan.end();
228
- } finally {
229
- unlock();
230
258
  }
259
+ startSpan.end();
231
260
  }
232
261
  get currentSpeech() {
233
262
  return this._currentSpeech;
@@ -256,8 +285,8 @@ class AgentActivity {
256
285
  get tools() {
257
286
  return this.agent.toolCtx;
258
287
  }
259
- get draining() {
260
- return this._draining;
288
+ get schedulingPaused() {
289
+ return this._schedulingPaused;
261
290
  }
262
291
  get realtimeLLMSession() {
263
292
  return this.realtimeSession;
@@ -297,11 +326,9 @@ class AgentActivity {
297
326
  }
298
327
  }
299
328
  attachAudioInput(audioStream) {
300
- if (this.audioStream.isSourceSet) {
301
- this.logger.debug("detaching existing audio input in agent activity");
302
- this.audioStream.detachSource();
303
- }
304
- this.audioStream.setSource(audioStream);
329
+ void this.audioStream.close();
330
+ this.audioStream = new MultiInputStream();
331
+ this.audioStreamId = this.audioStream.addInputStream(audioStream);
305
332
  const [realtimeAudioStream, recognitionAudioStream] = this.audioStream.stream.tee();
306
333
  if (this.realtimeSession) {
307
334
  this.realtimeSession.setInputAudioStream(realtimeAudioStream);
@@ -311,13 +338,21 @@ class AgentActivity {
311
338
  }
312
339
  }
313
340
  detachAudioInput() {
314
- this.audioStream.detachSource();
341
+ if (this.audioStreamId === void 0) {
342
+ return;
343
+ }
344
+ void this.audioStream.close();
345
+ this.audioStream = new MultiInputStream();
346
+ this.audioStreamId = void 0;
315
347
  }
316
- commitUserTurn() {
348
+ commitUserTurn(options = {}) {
349
+ const { audioDetached = false, throwIfNotReady = true } = options;
317
350
  if (!this.audioRecognition) {
318
- throw new Error("AudioRecognition is not initialized");
351
+ if (throwIfNotReady) {
352
+ throw new Error("AudioRecognition is not initialized");
353
+ }
354
+ return;
319
355
  }
320
- const audioDetached = false;
321
356
  this.audioRecognition.commitUserTurn(audioDetached);
322
357
  }
323
358
  clearUserTurn() {
@@ -353,13 +388,11 @@ class AgentActivity {
353
388
  })
354
389
  );
355
390
  const task = this.createSpeechTask({
356
- task: Task.from(
357
- (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
358
- ),
391
+ taskFn: (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio),
359
392
  ownedSpeechHandle: handle,
360
393
  name: "AgentActivity.say_tts"
361
394
  });
362
- task.finally(() => this.onPipelineReplyDone());
395
+ task.result.finally(() => this.onPipelineReplyDone());
363
396
  this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
364
397
  return handle;
365
398
  }
@@ -449,8 +482,8 @@ class AgentActivity {
449
482
  if (ev.userInitiated) {
450
483
  return;
451
484
  }
452
- if (this.draining) {
453
- this.logger.warn("skipping new realtime generation, the agent is draining");
485
+ if (this.schedulingPaused) {
486
+ this.logger.warn("skipping new realtime generation, the speech scheduling is not running");
454
487
  return;
455
488
  }
456
489
  const handle = SpeechHandle.create({
@@ -466,9 +499,7 @@ class AgentActivity {
466
499
  );
467
500
  this.logger.info({ speech_id: handle.id }, "Creating speech handle");
468
501
  this.createSpeechTask({
469
- task: Task.from(
470
- (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
471
- ),
502
+ taskFn: (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController),
472
503
  ownedSpeechHandle: handle,
473
504
  name: "AgentActivity.realtimeGeneration"
474
505
  });
@@ -555,7 +586,7 @@ class AgentActivity {
555
586
  }
556
587
  }
557
588
  onPreemptiveGeneration(info) {
558
- if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof LLM)) {
589
+ if (!this.agentSession.options.preemptiveGeneration || this.schedulingPaused || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof LLM)) {
559
590
  return;
560
591
  }
561
592
  this.cancelPreemptiveGeneration();
@@ -593,7 +624,21 @@ class AgentActivity {
593
624
  }
594
625
  }
595
626
  createSpeechTask(options) {
596
- const { task, ownedSpeechHandle } = options;
627
+ const { taskFn, controller, ownedSpeechHandle, inlineTask, name } = options;
628
+ const wrappedFn = (ctrl) => {
629
+ return agentActivityStorage.run(this, () => {
630
+ const currentTask = Task.current();
631
+ if (currentTask) {
632
+ _setActivityTaskInfo(currentTask, { speechHandle: ownedSpeechHandle, inlineTask });
633
+ }
634
+ if (ownedSpeechHandle) {
635
+ return speechHandleStorage.run(ownedSpeechHandle, () => taskFn(ctrl));
636
+ }
637
+ return taskFn(ctrl);
638
+ });
639
+ };
640
+ const task = Task.from(wrappedFn, controller, name);
641
+ _setActivityTaskInfo(task, { speechHandle: ownedSpeechHandle, inlineTask });
597
642
  this.speechTasks.add(task);
598
643
  task.addDoneCallback(() => {
599
644
  this.speechTasks.delete(task);
@@ -609,12 +654,15 @@ class AgentActivity {
609
654
  task.addDoneCallback(() => {
610
655
  this.wakeupMainTask();
611
656
  });
612
- return task.result;
657
+ return task;
613
658
  }
614
659
  async onEndOfTurn(info) {
615
- if (this.draining) {
660
+ if (this.schedulingPaused) {
616
661
  this.cancelPreemptiveGeneration();
617
- this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
662
+ this.logger.warn(
663
+ { user_input: info.newTranscript },
664
+ "skipping user input, speech scheduling is paused"
665
+ );
618
666
  return true;
619
667
  }
620
668
  if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0) {
@@ -633,7 +681,7 @@ class AgentActivity {
633
681
  }
634
682
  const oldTask = this._userTurnCompletedTask;
635
683
  this._userTurnCompletedTask = this.createSpeechTask({
636
- task: Task.from(() => this.userTurnCompleted(info, oldTask)),
684
+ taskFn: () => this.userTurnCompleted(info, oldTask),
637
685
  name: "AgentActivity.userTurnCompleted"
638
686
  });
639
687
  return true;
@@ -663,14 +711,41 @@ class AgentActivity {
663
711
  await speechHandle._waitForGeneration();
664
712
  this._currentSpeech = void 0;
665
713
  }
666
- if (this.draining && this.speechTasks.size === 0) {
667
- this.logger.info("mainTask: draining and no more speech tasks");
714
+ const toWait = this.getDrainPendingSpeechTasks();
715
+ if (this._schedulingPaused && toWait.length === 0) {
716
+ this.logger.info("mainTask: scheduling paused and no more speech tasks to wait");
668
717
  break;
669
718
  }
670
719
  this.q_updated = new Future();
671
720
  }
672
721
  this.logger.info("AgentActivity mainTask: exiting");
673
722
  }
723
+ getDrainPendingSpeechTasks() {
724
+ const blockedHandles = [];
725
+ for (const task of this._drainBlockedTasks) {
726
+ const info = _getActivityTaskInfo(task);
727
+ if (!info) {
728
+ this.logger.error("blocked task without activity info; skipping.");
729
+ continue;
730
+ }
731
+ if (!info.speechHandle) {
732
+ continue;
733
+ }
734
+ blockedHandles.push(info.speechHandle);
735
+ }
736
+ const toWait = [];
737
+ for (const task of this.speechTasks) {
738
+ if (this._drainBlockedTasks.includes(task)) {
739
+ continue;
740
+ }
741
+ const info = _getActivityTaskInfo(task);
742
+ if (info && info.speechHandle && blockedHandles.includes(info.speechHandle)) {
743
+ continue;
744
+ }
745
+ toWait.push(task);
746
+ }
747
+ return toWait;
748
+ }
674
749
  wakeupMainTask() {
675
750
  this.q_updated.resolve();
676
751
  }
@@ -696,7 +771,7 @@ class AgentActivity {
696
771
  if (this.llm === void 0) {
697
772
  throw new Error("trying to generate reply without an LLM model");
698
773
  }
699
- const functionCall = (_a = asyncLocalStorage.getStore()) == null ? void 0 : _a.functionCall;
774
+ const functionCall = (_a = functionCallStorage.getStore()) == null ? void 0 : _a.functionCall;
700
775
  if (toolChoice === void 0 && functionCall !== void 0) {
701
776
  toolChoice = "none";
702
777
  }
@@ -714,19 +789,17 @@ class AgentActivity {
714
789
  this.logger.info({ speech_id: handle.id }, "Creating speech handle");
715
790
  if (this.llm instanceof RealtimeModel) {
716
791
  this.createSpeechTask({
717
- task: Task.from(
718
- (abortController) => this.realtimeReplyTask({
719
- speechHandle: handle,
720
- // TODO(brian): support llm.ChatMessage for the realtime model
721
- userInput: userMessage == null ? void 0 : userMessage.textContent,
722
- instructions,
723
- modelSettings: {
724
- // isGiven(toolChoice) = toolChoice !== undefined
725
- toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
726
- },
727
- abortController
728
- })
729
- ),
792
+ taskFn: (abortController) => this.realtimeReplyTask({
793
+ speechHandle: handle,
794
+ // TODO(brian): support llm.ChatMessage for the realtime model
795
+ userInput: userMessage == null ? void 0 : userMessage.textContent,
796
+ instructions,
797
+ modelSettings: {
798
+ // isGiven(toolChoice) = toolChoice !== undefined
799
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
800
+ },
801
+ abortController
802
+ }),
730
803
  ownedSpeechHandle: handle,
731
804
  name: "AgentActivity.realtimeReply"
732
805
  });
@@ -736,36 +809,36 @@ class AgentActivity {
736
809
  ${instructions}`;
737
810
  }
738
811
  const task = this.createSpeechTask({
739
- task: Task.from(
740
- (abortController) => this.pipelineReplyTask(
741
- handle,
742
- chatCtx ?? this.agent.chatCtx,
743
- this.agent.toolCtx,
744
- {
745
- toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
746
- },
747
- abortController,
748
- instructions,
749
- userMessage
750
- )
812
+ taskFn: (abortController) => this.pipelineReplyTask(
813
+ handle,
814
+ chatCtx ?? this.agent.chatCtx,
815
+ this.agent.toolCtx,
816
+ {
817
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
818
+ },
819
+ abortController,
820
+ instructions,
821
+ userMessage
751
822
  ),
752
823
  ownedSpeechHandle: handle,
753
824
  name: "AgentActivity.pipelineReply"
754
825
  });
755
- task.finally(() => this.onPipelineReplyDone());
826
+ task.result.finally(() => this.onPipelineReplyDone());
756
827
  }
757
828
  if (scheduleSpeech) {
758
829
  this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
759
830
  }
760
831
  return handle;
761
832
  }
762
- interrupt() {
833
+ interrupt(options = {}) {
763
834
  var _a;
835
+ const { force = false } = options;
836
+ this.cancelPreemptiveGeneration();
764
837
  const future = new Future();
765
838
  const currentSpeech = this._currentSpeech;
766
- currentSpeech == null ? void 0 : currentSpeech.interrupt();
839
+ currentSpeech == null ? void 0 : currentSpeech.interrupt(force);
767
840
  for (const [_, __, speech] of this.speechQueue) {
768
- speech.interrupt();
841
+ speech.interrupt(force);
769
842
  }
770
843
  (_a = this.realtimeSession) == null ? void 0 : _a.interrupt();
771
844
  if (currentSpeech === void 0) {
@@ -786,7 +859,7 @@ ${instructions}`;
786
859
  async userTurnCompleted(info, oldTask) {
787
860
  var _a, _b;
788
861
  if (oldTask) {
789
- await oldTask;
862
+ await oldTask.result;
790
863
  }
791
864
  if (this.llm instanceof RealtimeModel) {
792
865
  if (this.llm.capabilities.turnDetection) {
@@ -973,7 +1046,7 @@ ${instructions}`;
973
1046
  toolsMessages,
974
1047
  span
975
1048
  }) => {
976
- var _a, _b, _c, _d, _e;
1049
+ var _a, _b;
977
1050
  speechHandle._agentTurnContext = otelContext.active();
978
1051
  span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
979
1052
  if (instructions) {
@@ -1119,11 +1192,11 @@ ${instructions}`;
1119
1192
  for (const msg of toolsMessages) {
1120
1193
  msg.createdAt = replyStartedAt;
1121
1194
  }
1122
- this.agent._chatCtx.insert(toolsMessages);
1123
1195
  const toolCallOutputs = toolsMessages.filter(
1124
1196
  (m) => m.type === "function_call_output"
1125
1197
  );
1126
1198
  if (toolCallOutputs.length > 0) {
1199
+ this.agent._chatCtx.insert(toolCallOutputs);
1127
1200
  this.agentSession._toolItemsAdded(toolCallOutputs);
1128
1201
  }
1129
1202
  }
@@ -1211,45 +1284,15 @@ ${instructions}`;
1211
1284
  );
1212
1285
  return;
1213
1286
  }
1214
- const functionToolsExecutedEvent = createFunctionToolsExecutedEvent({
1215
- functionCalls: [],
1216
- functionCallOutputs: []
1217
- });
1218
- let shouldGenerateToolReply = false;
1219
- let newAgentTask = null;
1220
- let ignoreTaskSwitch = false;
1221
- for (const sanitizedOut of toolOutput.output) {
1222
- if (sanitizedOut.toolCallOutput !== void 0) {
1223
- functionToolsExecutedEvent.functionCalls.push(sanitizedOut.toolCall);
1224
- functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
1225
- if (sanitizedOut.replyRequired) {
1226
- shouldGenerateToolReply = true;
1227
- }
1228
- }
1229
- if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
1230
- this.logger.error("expected to receive only one agent task from the tool executions");
1231
- ignoreTaskSwitch = true;
1232
- }
1233
- newAgentTask = sanitizedOut.agentTask ?? null;
1234
- this.logger.debug(
1235
- {
1236
- speechId: speechHandle.id,
1237
- name: (_c = sanitizedOut.toolCall) == null ? void 0 : _c.name,
1238
- args: sanitizedOut.toolCall.args,
1239
- output: (_d = sanitizedOut.toolCallOutput) == null ? void 0 : _d.output,
1240
- isError: (_e = sanitizedOut.toolCallOutput) == null ? void 0 : _e.isError
1241
- },
1242
- "Tool call execution finished"
1243
- );
1244
- }
1287
+ const { functionToolsExecutedEvent, shouldGenerateToolReply, newAgentTask, ignoreTaskSwitch } = this.summarizeToolExecutionOutput(toolOutput, speechHandle);
1245
1288
  this.agentSession.emit(
1246
1289
  AgentSessionEventTypes.FunctionToolsExecuted,
1247
1290
  functionToolsExecutedEvent
1248
1291
  );
1249
- let draining = this.draining;
1292
+ let schedulingPaused = this.schedulingPaused;
1250
1293
  if (!ignoreTaskSwitch && newAgentTask !== null) {
1251
1294
  this.agentSession.updateAgent(newAgentTask);
1252
- draining = true;
1295
+ schedulingPaused = true;
1253
1296
  }
1254
1297
  const toolMessages = [
1255
1298
  ...functionToolsExecutedEvent.functionCalls,
@@ -1258,34 +1301,32 @@ ${instructions}`;
1258
1301
  if (shouldGenerateToolReply) {
1259
1302
  chatCtx.insert(toolMessages);
1260
1303
  speechHandle._numSteps += 1;
1261
- const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1304
+ const respondToolChoice = schedulingPaused || modelSettings.toolChoice === "none" ? "none" : "auto";
1262
1305
  const toolResponseTask = this.createSpeechTask({
1263
- task: Task.from(
1264
- () => this.pipelineReplyTask(
1265
- speechHandle,
1266
- chatCtx,
1267
- toolCtx,
1268
- { toolChoice: respondToolChoice },
1269
- replyAbortController,
1270
- instructions,
1271
- void 0,
1272
- toolMessages
1273
- )
1306
+ taskFn: () => this.pipelineReplyTask(
1307
+ speechHandle,
1308
+ chatCtx,
1309
+ toolCtx,
1310
+ { toolChoice: respondToolChoice },
1311
+ replyAbortController,
1312
+ instructions,
1313
+ void 0,
1314
+ toolMessages
1274
1315
  ),
1275
1316
  ownedSpeechHandle: speechHandle,
1276
1317
  name: "AgentActivity.pipelineReply"
1277
1318
  });
1278
- toolResponseTask.finally(() => this.onPipelineReplyDone());
1319
+ toolResponseTask.result.finally(() => this.onPipelineReplyDone());
1279
1320
  this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1280
1321
  } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
1281
1322
  for (const msg of toolMessages) {
1282
1323
  msg.createdAt = replyStartedAt;
1283
1324
  }
1284
- this.agent._chatCtx.insert(toolMessages);
1285
1325
  const toolCallOutputs = toolMessages.filter(
1286
1326
  (m) => m.type === "function_call_output"
1287
1327
  );
1288
1328
  if (toolCallOutputs.length > 0) {
1329
+ this.agent._chatCtx.insert(toolCallOutputs);
1289
1330
  this.agentSession._toolItemsAdded(toolCallOutputs);
1290
1331
  }
1291
1332
  }
@@ -1329,7 +1370,7 @@ ${instructions}`;
1329
1370
  replyAbortController,
1330
1371
  span
1331
1372
  }) {
1332
- var _a, _b, _c, _d;
1373
+ var _a;
1333
1374
  speechHandle._agentTurnContext = otelContext.active();
1334
1375
  span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
1335
1376
  const localParticipant = (_a = this.agentSession._roomIO) == null ? void 0 : _a.localParticipant;
@@ -1589,44 +1630,15 @@ ${instructions}`;
1589
1630
  );
1590
1631
  return;
1591
1632
  }
1592
- const functionToolsExecutedEvent = createFunctionToolsExecutedEvent({
1593
- functionCalls: [],
1594
- functionCallOutputs: []
1595
- });
1596
- let shouldGenerateToolReply = false;
1597
- let newAgentTask = null;
1598
- let ignoreTaskSwitch = false;
1599
- for (const sanitizedOut of toolOutput.output) {
1600
- if (sanitizedOut.toolCallOutput !== void 0) {
1601
- functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
1602
- if (sanitizedOut.replyRequired) {
1603
- shouldGenerateToolReply = true;
1604
- }
1605
- }
1606
- if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
1607
- this.logger.error("expected to receive only one agent task from the tool executions");
1608
- ignoreTaskSwitch = true;
1609
- }
1610
- newAgentTask = sanitizedOut.agentTask ?? null;
1611
- this.logger.debug(
1612
- {
1613
- speechId: speechHandle.id,
1614
- name: (_b = sanitizedOut.toolCall) == null ? void 0 : _b.name,
1615
- args: sanitizedOut.toolCall.args,
1616
- output: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.output,
1617
- isError: (_d = sanitizedOut.toolCallOutput) == null ? void 0 : _d.isError
1618
- },
1619
- "Tool call execution finished"
1620
- );
1621
- }
1633
+ const { functionToolsExecutedEvent, shouldGenerateToolReply, newAgentTask, ignoreTaskSwitch } = this.summarizeToolExecutionOutput(toolOutput, speechHandle);
1622
1634
  this.agentSession.emit(
1623
1635
  AgentSessionEventTypes.FunctionToolsExecuted,
1624
1636
  functionToolsExecutedEvent
1625
1637
  );
1626
- let draining = this.draining;
1638
+ let schedulingPaused = this.schedulingPaused;
1627
1639
  if (!ignoreTaskSwitch && newAgentTask !== null) {
1628
1640
  this.agentSession.updateAgent(newAgentTask);
1629
- draining = true;
1641
+ schedulingPaused = true;
1630
1642
  }
1631
1643
  if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
1632
1644
  while (this.currentSpeech || this.speechQueue.size() > 0) {
@@ -1667,20 +1679,58 @@ ${instructions}`;
1667
1679
  speechHandle: replySpeechHandle
1668
1680
  })
1669
1681
  );
1670
- const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1682
+ const toolChoice = schedulingPaused || modelSettings.toolChoice === "none" ? "none" : "auto";
1671
1683
  this.createSpeechTask({
1672
- task: Task.from(
1673
- (abortController) => this.realtimeReplyTask({
1674
- speechHandle: replySpeechHandle,
1675
- modelSettings: { toolChoice },
1676
- abortController
1677
- })
1678
- ),
1684
+ taskFn: (abortController) => this.realtimeReplyTask({
1685
+ speechHandle: replySpeechHandle,
1686
+ modelSettings: { toolChoice },
1687
+ abortController
1688
+ }),
1679
1689
  ownedSpeechHandle: replySpeechHandle,
1680
1690
  name: "AgentActivity.realtime_reply"
1681
1691
  });
1682
1692
  this.scheduleSpeech(replySpeechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1683
1693
  }
1694
+ summarizeToolExecutionOutput(toolOutput, speechHandle) {
1695
+ var _a, _b, _c;
1696
+ const functionToolsExecutedEvent = createFunctionToolsExecutedEvent({
1697
+ functionCalls: [],
1698
+ functionCallOutputs: []
1699
+ });
1700
+ let shouldGenerateToolReply = false;
1701
+ let newAgentTask = null;
1702
+ let ignoreTaskSwitch = false;
1703
+ for (const sanitizedOut of toolOutput.output) {
1704
+ if (sanitizedOut.toolCallOutput !== void 0) {
1705
+ functionToolsExecutedEvent.functionCalls.push(sanitizedOut.toolCall);
1706
+ functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
1707
+ if (sanitizedOut.replyRequired) {
1708
+ shouldGenerateToolReply = true;
1709
+ }
1710
+ }
1711
+ if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
1712
+ this.logger.error("expected to receive only one agent task from the tool executions");
1713
+ ignoreTaskSwitch = true;
1714
+ }
1715
+ newAgentTask = sanitizedOut.agentTask ?? null;
1716
+ this.logger.debug(
1717
+ {
1718
+ speechId: speechHandle.id,
1719
+ name: (_a = sanitizedOut.toolCall) == null ? void 0 : _a.name,
1720
+ args: sanitizedOut.toolCall.args,
1721
+ output: (_b = sanitizedOut.toolCallOutput) == null ? void 0 : _b.output,
1722
+ isError: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.isError
1723
+ },
1724
+ "Tool call execution finished"
1725
+ );
1726
+ }
1727
+ return {
1728
+ functionToolsExecutedEvent,
1729
+ shouldGenerateToolReply,
1730
+ newAgentTask,
1731
+ ignoreTaskSwitch
1732
+ };
1733
+ }
1684
1734
  async realtimeReplyTask({
1685
1735
  speechHandle,
1686
1736
  modelSettings: { toolChoice },
@@ -1722,13 +1772,45 @@ ${instructions}`;
1722
1772
  }
1723
1773
  }
1724
1774
  scheduleSpeech(speechHandle, priority, force = false) {
1725
- if (this.draining && !force) {
1726
- throw new Error("cannot schedule new speech, the agent is draining");
1775
+ if (this.schedulingPaused && !force) {
1776
+ throw new Error("cannot schedule new speech, the speech scheduling is draining/pausing");
1727
1777
  }
1728
1778
  this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
1729
1779
  speechHandle._markScheduled();
1730
1780
  this.wakeupMainTask();
1731
1781
  }
1782
+ async _pauseSchedulingTask(blockedTasks) {
1783
+ if (this._schedulingPaused) return;
1784
+ this._schedulingPaused = true;
1785
+ this._drainBlockedTasks = blockedTasks;
1786
+ this.wakeupMainTask();
1787
+ if (this._mainTask) {
1788
+ await this._mainTask.result;
1789
+ }
1790
+ }
1791
+ _resumeSchedulingTask() {
1792
+ if (!this._schedulingPaused) return;
1793
+ this._schedulingPaused = false;
1794
+ this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
1795
+ }
1796
+ async pause(options = {}) {
1797
+ const { blockedTasks = [] } = options;
1798
+ const unlock = await this.lock.lock();
1799
+ try {
1800
+ const span = tracer.startSpan({
1801
+ name: "pause_agent_activity",
1802
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
1803
+ });
1804
+ try {
1805
+ await this._pauseSchedulingTask(blockedTasks);
1806
+ await this._closeSessionResources();
1807
+ } finally {
1808
+ span.end();
1809
+ }
1810
+ } finally {
1811
+ unlock();
1812
+ }
1813
+ }
1732
1814
  async drain() {
1733
1815
  return tracer.startActiveSpan(async (span) => this._drainImpl(span), {
1734
1816
  name: "drain_agent_activity",
@@ -1736,71 +1818,79 @@ ${instructions}`;
1736
1818
  });
1737
1819
  }
1738
1820
  async _drainImpl(span) {
1739
- var _a;
1740
1821
  span.setAttribute(traceTypes.ATTR_AGENT_LABEL, this.agent.id);
1741
1822
  const unlock = await this.lock.lock();
1742
1823
  try {
1743
- if (this._draining) return;
1744
- this.cancelPreemptiveGeneration();
1745
- const onExitTask = tracer.startActiveSpan(async () => this.agent.onExit(), {
1746
- name: "on_exit",
1747
- attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
1748
- });
1749
- this.createSpeechTask({
1750
- task: Task.from(() => onExitTask),
1824
+ if (this._schedulingPaused) return;
1825
+ this._onExitTask = this.createSpeechTask({
1826
+ taskFn: () => tracer.startActiveSpan(async () => this.agent.onExit(), {
1827
+ name: "on_exit",
1828
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
1829
+ }),
1830
+ inlineTask: true,
1751
1831
  name: "AgentActivity_onExit"
1752
1832
  });
1753
- this.wakeupMainTask();
1754
- this._draining = true;
1755
- await ((_a = this._mainTask) == null ? void 0 : _a.result);
1833
+ this.cancelPreemptiveGeneration();
1834
+ await this._onExitTask.result;
1835
+ await this._pauseSchedulingTask([]);
1756
1836
  } finally {
1757
1837
  unlock();
1758
1838
  }
1759
1839
  }
1760
1840
  async close() {
1761
- var _a, _b, _c, _d;
1762
1841
  const unlock = await this.lock.lock();
1763
1842
  try {
1764
- if (!this._draining) {
1765
- this.logger.warn("task closing without draining");
1766
- }
1767
1843
  this.cancelPreemptiveGeneration();
1768
- if (this.llm instanceof LLM) {
1769
- this.llm.off("metrics_collected", this.onMetricsCollected);
1770
- }
1771
- if (this.realtimeSession) {
1772
- this.realtimeSession.off("generation_created", this.onGenerationCreated);
1773
- this.realtimeSession.off("input_speech_started", this.onInputSpeechStarted);
1774
- this.realtimeSession.off("input_speech_stopped", this.onInputSpeechStopped);
1775
- this.realtimeSession.off(
1776
- "input_audio_transcription_completed",
1777
- this.onInputAudioTranscriptionCompleted
1778
- );
1779
- this.realtimeSession.off("metrics_collected", this.onMetricsCollected);
1780
- }
1781
- if (this.stt instanceof STT) {
1782
- this.stt.off("metrics_collected", this.onMetricsCollected);
1844
+ await this._closeSessionResources();
1845
+ if (this._mainTask) {
1846
+ await this._mainTask.cancelAndWait();
1783
1847
  }
1784
- if (this.tts instanceof TTS) {
1785
- this.tts.off("metrics_collected", this.onMetricsCollected);
1786
- }
1787
- if (this.vad instanceof VAD) {
1788
- this.vad.off("metrics_collected", this.onMetricsCollected);
1789
- }
1790
- this.detachAudioInput();
1791
- (_a = this.realtimeSpans) == null ? void 0 : _a.clear();
1792
- await ((_b = this.realtimeSession) == null ? void 0 : _b.close());
1793
- await ((_c = this.audioRecognition) == null ? void 0 : _c.close());
1794
- await ((_d = this._mainTask) == null ? void 0 : _d.cancelAndWait());
1848
+ this.agent._agentActivity = void 0;
1795
1849
  } finally {
1796
1850
  unlock();
1797
1851
  }
1798
1852
  }
1853
+ async _closeSessionResources() {
1854
+ var _a, _b, _c;
1855
+ if (this.llm instanceof LLM) {
1856
+ this.llm.off("metrics_collected", this.onMetricsCollected);
1857
+ this.llm.off("error", this.onModelError);
1858
+ }
1859
+ if (this.realtimeSession) {
1860
+ this.realtimeSession.off("generation_created", this.onRealtimeGenerationCreated);
1861
+ this.realtimeSession.off("input_speech_started", this.onRealtimeInputSpeechStarted);
1862
+ this.realtimeSession.off("input_speech_stopped", this.onRealtimeInputSpeechStopped);
1863
+ this.realtimeSession.off(
1864
+ "input_audio_transcription_completed",
1865
+ this.onRealtimeInputAudioTranscriptionCompleted
1866
+ );
1867
+ this.realtimeSession.off("metrics_collected", this.onMetricsCollected);
1868
+ this.realtimeSession.off("error", this.onModelError);
1869
+ }
1870
+ if (this.stt instanceof STT) {
1871
+ this.stt.off("metrics_collected", this.onMetricsCollected);
1872
+ this.stt.off("error", this.onModelError);
1873
+ }
1874
+ if (this.tts instanceof TTS) {
1875
+ this.tts.off("metrics_collected", this.onMetricsCollected);
1876
+ this.tts.off("error", this.onModelError);
1877
+ }
1878
+ if (this.vad instanceof VAD) {
1879
+ this.vad.off("metrics_collected", this.onMetricsCollected);
1880
+ }
1881
+ this.detachAudioInput();
1882
+ (_a = this.realtimeSpans) == null ? void 0 : _a.clear();
1883
+ await ((_b = this.realtimeSession) == null ? void 0 : _b.close());
1884
+ await ((_c = this.audioRecognition) == null ? void 0 : _c.close());
1885
+ this.realtimeSession = void 0;
1886
+ this.audioRecognition = void 0;
1887
+ }
1799
1888
  }
1800
1889
  function toOaiToolChoice(toolChoice) {
1801
1890
  return toolChoice !== null ? toolChoice : void 0;
1802
1891
  }
1803
1892
  export {
1804
- AgentActivity
1893
+ AgentActivity,
1894
+ agentActivityStorage
1805
1895
  };
1806
1896
  //# sourceMappingURL=agent_activity.js.map