@livekit/agents 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/dist/index.cjs +2 -5
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -3
  4. package/dist/index.d.ts +2 -3
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +1 -3
  7. package/dist/index.js.map +1 -1
  8. package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
  9. package/dist/tokenize/basic/hyphenator.js.map +1 -1
  10. package/dist/utils.cjs +77 -0
  11. package/dist/utils.cjs.map +1 -1
  12. package/dist/utils.d.cts +21 -0
  13. package/dist/utils.d.ts +21 -0
  14. package/dist/utils.d.ts.map +1 -1
  15. package/dist/utils.js +76 -1
  16. package/dist/utils.js.map +1 -1
  17. package/dist/voice/agent_activity.cjs +112 -71
  18. package/dist/voice/agent_activity.cjs.map +1 -1
  19. package/dist/voice/agent_activity.d.ts.map +1 -1
  20. package/dist/voice/agent_activity.js +112 -71
  21. package/dist/voice/agent_activity.js.map +1 -1
  22. package/dist/voice/avatar/datastream_io.cjs +204 -0
  23. package/dist/voice/avatar/datastream_io.cjs.map +1 -0
  24. package/dist/voice/avatar/datastream_io.d.cts +37 -0
  25. package/dist/voice/avatar/datastream_io.d.ts +37 -0
  26. package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
  27. package/dist/voice/avatar/datastream_io.js +188 -0
  28. package/dist/voice/avatar/datastream_io.js.map +1 -0
  29. package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
  30. package/dist/voice/avatar/index.cjs.map +1 -0
  31. package/dist/voice/avatar/index.d.cts +2 -0
  32. package/dist/voice/avatar/index.d.ts +2 -0
  33. package/dist/voice/avatar/index.d.ts.map +1 -0
  34. package/dist/voice/avatar/index.js +2 -0
  35. package/dist/voice/avatar/index.js.map +1 -0
  36. package/dist/voice/index.cjs +2 -0
  37. package/dist/voice/index.cjs.map +1 -1
  38. package/dist/voice/index.d.cts +1 -0
  39. package/dist/voice/index.d.ts +1 -0
  40. package/dist/voice/index.d.ts.map +1 -1
  41. package/dist/voice/index.js +1 -0
  42. package/dist/voice/index.js.map +1 -1
  43. package/dist/voice/io.cjs.map +1 -1
  44. package/dist/voice/io.d.cts +1 -1
  45. package/dist/voice/io.d.ts +1 -1
  46. package/dist/voice/io.d.ts.map +1 -1
  47. package/dist/voice/io.js.map +1 -1
  48. package/dist/voice/room_io/_input.cjs +2 -1
  49. package/dist/voice/room_io/_input.cjs.map +1 -1
  50. package/dist/voice/room_io/_input.d.ts.map +1 -1
  51. package/dist/voice/room_io/_input.js +2 -1
  52. package/dist/voice/room_io/_input.js.map +1 -1
  53. package/dist/voice/run_context.cjs +13 -0
  54. package/dist/voice/run_context.cjs.map +1 -1
  55. package/dist/voice/run_context.d.cts +10 -0
  56. package/dist/voice/run_context.d.ts +10 -0
  57. package/dist/voice/run_context.d.ts.map +1 -1
  58. package/dist/voice/run_context.js +13 -0
  59. package/dist/voice/run_context.js.map +1 -1
  60. package/dist/voice/speech_handle.cjs +152 -30
  61. package/dist/voice/speech_handle.cjs.map +1 -1
  62. package/dist/voice/speech_handle.d.cts +67 -16
  63. package/dist/voice/speech_handle.d.ts +67 -16
  64. package/dist/voice/speech_handle.d.ts.map +1 -1
  65. package/dist/voice/speech_handle.js +153 -31
  66. package/dist/voice/speech_handle.js.map +1 -1
  67. package/dist/worker.cjs +4 -1
  68. package/dist/worker.cjs.map +1 -1
  69. package/dist/worker.d.ts.map +1 -1
  70. package/dist/worker.js +4 -1
  71. package/dist/worker.js.map +1 -1
  72. package/package.json +2 -2
  73. package/src/index.ts +2 -3
  74. package/src/tokenize/basic/hyphenator.ts +1 -1
  75. package/src/utils.ts +121 -1
  76. package/src/voice/agent_activity.ts +128 -78
  77. package/src/voice/avatar/datastream_io.ts +247 -0
  78. package/src/voice/avatar/index.ts +4 -0
  79. package/src/voice/index.ts +2 -0
  80. package/src/voice/io.ts +1 -1
  81. package/src/voice/room_io/_input.ts +8 -3
  82. package/src/voice/run_context.ts +16 -2
  83. package/src/voice/speech_handle.ts +183 -38
  84. package/src/worker.ts +5 -1
  85. package/dist/multimodal/agent_playout.cjs +0 -233
  86. package/dist/multimodal/agent_playout.cjs.map +0 -1
  87. package/dist/multimodal/agent_playout.d.cts +0 -34
  88. package/dist/multimodal/agent_playout.d.ts +0 -34
  89. package/dist/multimodal/agent_playout.d.ts.map +0 -1
  90. package/dist/multimodal/agent_playout.js +0 -207
  91. package/dist/multimodal/agent_playout.js.map +0 -1
  92. package/dist/multimodal/index.cjs.map +0 -1
  93. package/dist/multimodal/index.d.cts +0 -2
  94. package/dist/multimodal/index.d.ts +0 -2
  95. package/dist/multimodal/index.d.ts.map +0 -1
  96. package/dist/multimodal/index.js +0 -2
  97. package/dist/multimodal/index.js.map +0 -1
  98. package/src/multimodal/agent_playout.ts +0 -266
  99. package/src/multimodal/index.ts +0 -4
@@ -0,0 +1,247 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { Mutex } from '@livekit/mutex';
5
+ import {
6
+ type AudioFrame,
7
+ type ByteStreamWriter,
8
+ type Room,
9
+ RoomEvent,
10
+ type RpcInvocationData,
11
+ type TrackKind,
12
+ } from '@livekit/rtc-node';
13
+ import { log } from '../../log.js';
14
+ import {
15
+ Future,
16
+ Task,
17
+ shortuuid,
18
+ waitForParticipant,
19
+ waitForTrackPublication,
20
+ } from '../../utils.js';
21
+ import { AudioOutput, type PlaybackFinishedEvent } from '../io.js';
22
+
23
+ const RPC_CLEAR_BUFFER = 'lk.clear_buffer';
24
+ const RPC_PLAYBACK_FINISHED = 'lk.playback_finished';
25
+ const AUDIO_STREAM_TOPIC = 'lk.audio_stream';
26
+
27
+ export interface DataStreamAudioOutputOptions {
28
+ room: Room;
29
+ destinationIdentity: string;
30
+ sampleRate?: number;
31
+ waitRemoteTrack?: TrackKind;
32
+ }
33
+
34
+ /**
35
+ * AudioOutput implementation that streams audio to a remote avatar worker using LiveKit DataStream.
36
+ */
37
+ export class DataStreamAudioOutput extends AudioOutput {
38
+ static _playbackFinishedRpcRegistered: boolean = false;
39
+ static _playbackFinishedHandlers: Record<string, (data: RpcInvocationData) => string> = {};
40
+
41
+ private room: Room;
42
+ private destinationIdentity: string;
43
+ private roomConnectedFuture: Future<void>;
44
+ private waitRemoteTrack?: TrackKind;
45
+ private streamWriter?: ByteStreamWriter;
46
+ private pushedDuration: number = 0;
47
+ private started: boolean = false;
48
+ private lock = new Mutex();
49
+ private startTask?: Task<void>;
50
+
51
+ #logger = log();
52
+
53
+ constructor(opts: DataStreamAudioOutputOptions) {
54
+ super(opts.sampleRate, undefined);
55
+
56
+ const { room, destinationIdentity, sampleRate, waitRemoteTrack } = opts;
57
+ this.room = room;
58
+ this.destinationIdentity = destinationIdentity;
59
+ this.sampleRate = sampleRate;
60
+ this.waitRemoteTrack = waitRemoteTrack;
61
+
62
+ const onRoomConnected = async () => {
63
+ if (this.startTask) return;
64
+
65
+ await this.roomConnectedFuture.await;
66
+
67
+ // register the rpc method right after the room is connected
68
+ DataStreamAudioOutput.registerPlaybackFinishedRpc({
69
+ room,
70
+ callerIdentity: this.destinationIdentity,
71
+ handler: (data) => this.handlePlaybackFinished(data),
72
+ });
73
+
74
+ this.startTask = Task.from(({ signal }) => this._start(signal));
75
+ };
76
+
77
+ this.roomConnectedFuture = new Future<void>();
78
+
79
+ this.room.on(RoomEvent.ConnectionStateChanged, (_) => {
80
+ if (room.isConnected && !this.roomConnectedFuture.done) {
81
+ this.roomConnectedFuture.resolve(undefined);
82
+ }
83
+ });
84
+
85
+ if (this.room.isConnected) {
86
+ this.roomConnectedFuture.resolve(undefined);
87
+ }
88
+
89
+ onRoomConnected();
90
+ }
91
+
92
+ private async _start(_abortSignal: AbortSignal) {
93
+ const unlock = await this.lock.lock();
94
+
95
+ try {
96
+ if (this.started) return;
97
+
98
+ await this.roomConnectedFuture.await;
99
+
100
+ this.#logger.debug(
101
+ {
102
+ identity: this.destinationIdentity,
103
+ },
104
+ 'waiting for the remote participant',
105
+ );
106
+
107
+ await waitForParticipant({
108
+ room: this.room,
109
+ identity: this.destinationIdentity,
110
+ });
111
+
112
+ if (this.waitRemoteTrack) {
113
+ this.#logger.debug(
114
+ {
115
+ identity: this.destinationIdentity,
116
+ kind: this.waitRemoteTrack,
117
+ },
118
+ 'waiting for the remote track',
119
+ );
120
+
121
+ await waitForTrackPublication({
122
+ room: this.room,
123
+ identity: this.destinationIdentity,
124
+ kind: this.waitRemoteTrack,
125
+ });
126
+ }
127
+
128
+ this.#logger.debug(
129
+ {
130
+ identity: this.destinationIdentity,
131
+ },
132
+ 'remote participant ready',
133
+ );
134
+
135
+ this.started = true;
136
+ } finally {
137
+ unlock();
138
+ }
139
+ }
140
+
141
+ async captureFrame(frame: AudioFrame): Promise<void> {
142
+ if (!this.startTask) {
143
+ this.startTask = Task.from(({ signal }) => this._start(signal));
144
+ }
145
+
146
+ await this.startTask.result;
147
+ await super.captureFrame(frame);
148
+
149
+ if (!this.streamWriter) {
150
+ this.streamWriter = await this.room.localParticipant!.streamBytes({
151
+ name: shortuuid('AUDIO_'),
152
+ topic: AUDIO_STREAM_TOPIC,
153
+ destinationIdentities: [this.destinationIdentity],
154
+ attributes: {
155
+ sample_rate: frame.sampleRate.toString(),
156
+ num_channels: frame.channels.toString(),
157
+ },
158
+ });
159
+ this.pushedDuration = 0;
160
+ }
161
+
162
+ // frame.data is a Int16Array, write accepts a Uint8Array
163
+ await this.streamWriter.write(new Uint8Array(frame.data.buffer));
164
+ this.pushedDuration += frame.samplesPerChannel / frame.sampleRate;
165
+ }
166
+
167
+ flush(): void {
168
+ super.flush();
169
+
170
+ if (this.streamWriter === undefined || !this.started) {
171
+ return;
172
+ }
173
+
174
+ this.streamWriter.close().finally(() => {
175
+ this.streamWriter = undefined;
176
+ });
177
+ }
178
+
179
+ clearBuffer(): void {
180
+ if (!this.started) return;
181
+
182
+ this.room.localParticipant!.performRpc({
183
+ destinationIdentity: this.destinationIdentity,
184
+ method: RPC_CLEAR_BUFFER,
185
+ payload: '',
186
+ });
187
+ }
188
+
189
+ private handlePlaybackFinished(data: RpcInvocationData): string {
190
+ if (data.callerIdentity !== this.destinationIdentity) {
191
+ this.#logger.warn(
192
+ {
193
+ callerIdentity: data.callerIdentity,
194
+ destinationIdentity: this.destinationIdentity,
195
+ },
196
+ 'playback finished event received from unexpected participant',
197
+ );
198
+ return 'reject';
199
+ }
200
+
201
+ this.#logger.info(
202
+ {
203
+ callerIdentity: data.callerIdentity,
204
+ },
205
+ 'playback finished event received',
206
+ );
207
+
208
+ const playbackFinishedEvent = JSON.parse(data.payload) as PlaybackFinishedEvent;
209
+ this.onPlaybackFinished(playbackFinishedEvent);
210
+ return 'ok';
211
+ }
212
+
213
+ static registerPlaybackFinishedRpc({
214
+ room,
215
+ callerIdentity,
216
+ handler,
217
+ }: {
218
+ room: Room;
219
+ callerIdentity: string;
220
+ handler: (data: RpcInvocationData) => string;
221
+ }) {
222
+ DataStreamAudioOutput._playbackFinishedHandlers[callerIdentity] = handler;
223
+
224
+ if (DataStreamAudioOutput._playbackFinishedRpcRegistered) {
225
+ return;
226
+ }
227
+
228
+ const rpcHandler = async (data: RpcInvocationData): Promise<string> => {
229
+ const handler = DataStreamAudioOutput._playbackFinishedHandlers[data.callerIdentity];
230
+ if (!handler) {
231
+ log().warn(
232
+ {
233
+ callerIdentity: data.callerIdentity,
234
+ expectedIdentities: Object.keys(DataStreamAudioOutput._playbackFinishedHandlers),
235
+ },
236
+ 'playback finished event received from unexpected participant',
237
+ );
238
+
239
+ return 'reject';
240
+ }
241
+ return handler(data);
242
+ };
243
+
244
+ room.localParticipant?.registerRpcMethod(RPC_PLAYBACK_FINISHED, rpcHandler);
245
+ DataStreamAudioOutput._playbackFinishedRpcRegistered = true;
246
+ }
247
+ }
@@ -0,0 +1,4 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ export * from './datastream_io.js';
@@ -3,5 +3,7 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  export { Agent, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
5
5
  export { AgentSession, type AgentSessionOptions } from './agent_session.js';
6
+
7
+ export * from './avatar/index.js';
6
8
  export * from './events.js';
7
9
  export { RunContext } from './run_context.js';
package/src/voice/io.ts CHANGED
@@ -55,7 +55,7 @@ export abstract class AudioOutput extends EventEmitter {
55
55
  protected logger = log();
56
56
 
57
57
  constructor(
58
- readonly sampleRate?: number,
58
+ public sampleRate?: number,
59
59
  protected readonly nextInChain?: AudioOutput,
60
60
  ) {
61
61
  super();
@@ -1,6 +1,7 @@
1
1
  // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import type { AudioFrame } from '@livekit/rtc-node';
4
5
  import {
5
6
  AudioStream,
6
7
  type NoiseCancellationOptions,
@@ -11,6 +12,7 @@ import {
11
12
  RoomEvent,
12
13
  TrackSource,
13
14
  } from '@livekit/rtc-node';
15
+ import type { ReadableStream } from 'node:stream/web';
14
16
  import { log } from '../../log.js';
15
17
  import { resampleStream } from '../../utils.js';
16
18
  import { AudioInput } from '../io.js';
@@ -64,8 +66,10 @@ export class ParticipantAudioInputStream extends AudioInput {
64
66
  ? participant
65
67
  : this.room.remoteParticipants.get(participantIdentity);
66
68
 
69
+ // We need to check if the participant has a microphone track and subscribe to it
70
+ // in case we miss the tracksubscribed event
67
71
  if (participantValue) {
68
- for (const publication of Object.values(participantValue.trackPublications)) {
72
+ for (const publication of participantValue.trackPublications.values()) {
69
73
  if (publication.track && publication.source === TrackSource.SOURCE_MICROPHONE) {
70
74
  this.onTrackSubscribed(publication.track, publication, participantValue);
71
75
  break;
@@ -127,12 +131,13 @@ export class ParticipantAudioInputStream extends AudioInput {
127
131
  return true;
128
132
  };
129
133
 
130
- private createStream(track: RemoteTrack) {
134
+ private createStream(track: RemoteTrack): ReadableStream<AudioFrame> {
131
135
  return new AudioStream(track, {
132
136
  sampleRate: this.sampleRate,
133
137
  numChannels: this.numChannels,
134
138
  noiseCancellation: this.noiseCancellation,
135
- });
139
+ // TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting
140
+ }) as unknown as ReadableStream<AudioFrame>;
136
141
  }
137
142
 
138
143
  async close() {
@@ -8,13 +8,27 @@ import type { SpeechHandle } from './speech_handle.js';
8
8
  export type UnknownUserData = unknown;
9
9
 
10
10
  export class RunContext<UserData = UnknownUserData> {
11
+ private readonly initialStepIdx: number;
11
12
  constructor(
12
13
  public readonly session: AgentSession<UserData>,
13
14
  public readonly speechHandle: SpeechHandle,
14
15
  public readonly functionCall: FunctionCall,
15
- ) {}
16
-
16
+ ) {
17
+ this.initialStepIdx = speechHandle.numSteps - 1;
18
+ }
17
19
  get userData(): UserData {
18
20
  return this.session.userData;
19
21
  }
22
+
23
+ /**
24
+ * Waits for the speech playout corresponding to this function call step.
25
+ *
26
+ * Unlike {@link SpeechHandle.waitForPlayout}, which waits for the full
27
+ * assistant turn to complete (including all function tools),
28
+ * this method only waits for the assistant's spoken response prior to running
29
+ * this tool to finish playing.
30
+ */
31
+ async waitForPlayout() {
32
+ return this.speechHandle._waitForGeneration(this.initialStepIdx);
33
+ }
20
34
  }
@@ -1,8 +1,10 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import type { ChatMessage } from '../llm/index.js';
5
- import { Future, shortuuid } from '../utils.js';
4
+ import type { ChatItem } from '../llm/index.js';
5
+ import { Event, Future, shortuuid } from '../utils.js';
6
+ import type { Task } from '../utils.js';
7
+ import { asyncLocalStorage } from './agent.js';
6
8
 
7
9
  export class SpeechHandle {
8
10
  /** Priority for messages that should be played after all other messages in the queue */
@@ -12,25 +14,40 @@ export class SpeechHandle {
12
14
  /** Priority for important messages that should be played before others. */
13
15
  static SPEECH_PRIORITY_HIGH = 10;
14
16
 
15
- private interruptFut = new Future();
16
- private authorizeFut = new Future();
17
- private playoutDoneFut = new Future();
17
+ private interruptFut = new Future<void>();
18
+ private authorizedEvent = new Event();
19
+ private scheduledFut = new Future<void>();
20
+ private doneFut = new Future<void>();
18
21
 
19
- private _chatMessage?: ChatMessage;
22
+ private generations: Future<void>[] = [];
23
+ /** @internal */
24
+ _tasks: Task<void>[] = [];
25
+ private _chatItems: ChatItem[] = [];
26
+ private _numSteps = 1;
27
+
28
+ private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
29
+ private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
20
30
 
21
31
  constructor(
22
- readonly id: string,
23
- readonly allowInterruptions: boolean,
24
- readonly stepIndex: number,
32
+ private _id: string,
33
+ private _allowInterruptions: boolean,
34
+ /** @internal */
35
+ public _stepIndex: number,
25
36
  readonly parent?: SpeechHandle,
26
- ) {}
37
+ ) {
38
+ this.doneFut.await.finally(() => {
39
+ for (const callback of this.doneCallbacks) {
40
+ callback(this);
41
+ }
42
+ });
43
+ }
27
44
 
28
- static create(options: {
45
+ static create(options?: {
29
46
  allowInterruptions?: boolean;
30
47
  stepIndex?: number;
31
48
  parent?: SpeechHandle;
32
49
  }) {
33
- const { allowInterruptions = false, stepIndex = 0, parent } = options ?? {};
50
+ const { allowInterruptions = true, stepIndex = 0, parent } = options ?? {};
34
51
 
35
52
  return new SpeechHandle(shortuuid('speech_'), allowInterruptions, stepIndex, parent);
36
53
  }
@@ -39,12 +56,47 @@ export class SpeechHandle {
39
56
  return this.interruptFut.done;
40
57
  }
41
58
 
42
- get done(): boolean {
43
- return this.playoutDoneFut.done;
59
+ get numSteps(): number {
60
+ return this._numSteps;
61
+ }
62
+
63
+ get id(): string {
64
+ return this._id;
44
65
  }
45
66
 
46
- get chatMessage(): ChatMessage | undefined {
47
- return this._chatMessage;
67
+ get scheduled(): boolean {
68
+ return this.scheduledFut.done;
69
+ }
70
+
71
+ get allowInterruptions(): boolean {
72
+ return this._allowInterruptions;
73
+ }
74
+
75
+ /**
76
+ * Allow or disallow interruptions on this SpeechHandle.
77
+ *
78
+ * When set to false, the SpeechHandle will no longer accept any incoming
79
+ * interruption requests until re-enabled. If the handle is already
80
+ * interrupted, clearing interruptions is not allowed.
81
+ *
82
+ * @param value - true to allow interruptions, false to disallow
83
+ * @throws Error If attempting to disable interruptions when already interrupted
84
+ */
85
+ set allowInterruptions(value: boolean) {
86
+ if (this.interrupted && !value) {
87
+ throw new Error(
88
+ 'Cannot set allow_interruptions to False, the SpeechHandle is already interrupted',
89
+ );
90
+ }
91
+ this._allowInterruptions = value;
92
+ }
93
+
94
+ done(): boolean {
95
+ return this.doneFut.done;
96
+ }
97
+
98
+ get chatItems(): ChatItem[] {
99
+ return this._chatItems;
48
100
  }
49
101
 
50
102
  /**
@@ -54,23 +106,33 @@ export class SpeechHandle {
54
106
  *
55
107
  * @returns The same speech handle that was interrupted.
56
108
  */
57
- interrupt(): SpeechHandle {
58
- if (!this.allowInterruptions) {
59
- throw new Error('interruptions are not allowed');
109
+ interrupt(force: boolean = false): SpeechHandle {
110
+ if (!force && !this.allowInterruptions) {
111
+ throw new Error('This generation handle does not allow interruptions');
60
112
  }
61
113
 
62
- if (this.done) return this;
63
-
64
- this.interruptFut.resolve();
114
+ this._cancel();
65
115
  return this;
66
116
  }
67
117
 
68
- then(callback: (sh: SpeechHandle) => void) {
69
- return this.playoutDoneFut.await.finally(() => callback(this));
70
- }
71
-
72
- async waitForPlayout() {
73
- return this.playoutDoneFut.await;
118
+ /**
119
+ * Waits for the entire assistant turn to complete playback.
120
+ *
121
+ * This method waits until the assistant has fully finished speaking,
122
+ * including any finalization steps beyond initial response generation.
123
+ * This is appropriate to call when you want to ensure the speech output
124
+ * has entirely played out, including any tool calls and response follow-ups.
125
+ */
126
+ async waitForPlayout(): Promise<void> {
127
+ const store = asyncLocalStorage.getStore();
128
+ if (store && store?.functionCall) {
129
+ throw new Error(
130
+ `Cannot call 'SpeechHandle.waitForPlayout()' from inside the function tool '${store.functionCall.name}'. ` +
131
+ 'This creates a circular wait: the speech handle is waiting for the function tool to complete, ' +
132
+ 'while the function tool is simultaneously waiting for the speech handle.\n' +
133
+ "To wait for the assistant's spoken response prior to running this tool, use RunContext.wait_for_playout() instead.",
134
+ );
135
+ }
74
136
  }
75
137
 
76
138
  async waitIfNotInterrupted(aw: Promise<unknown>[]): Promise<void> {
@@ -79,26 +141,109 @@ export class SpeechHandle {
79
141
  await Promise.race(fs);
80
142
  }
81
143
 
144
+ addDoneCallback(callback: (sh: SpeechHandle) => void) {
145
+ this.doneCallbacks.add(callback);
146
+ }
147
+
148
+ removeDoneCallback(callback: (sh: SpeechHandle) => void) {
149
+ this.doneCallbacks.delete(callback);
150
+ }
151
+
152
+ /** @internal */
153
+ _cancel(): SpeechHandle {
154
+ if (this.done()) {
155
+ return this;
156
+ }
157
+
158
+ if (!this.interruptFut.done) {
159
+ this.interruptFut.resolve();
160
+ }
161
+
162
+ return this;
163
+ }
164
+
82
165
  /** @internal */
83
- _setChatMessage(chatMessage: ChatMessage) {
84
- if (this.done) {
85
- throw new Error('cannot set chat message after speech has been played');
166
+ _authorizeGeneration(): void {
167
+ const fut = new Future<void>();
168
+ this.generations.push(fut);
169
+ this.authorizedEvent.set();
170
+ }
171
+
172
+ /** @internal */
173
+ _clearAuthorization(): void {
174
+ this.authorizedEvent.clear();
175
+ }
176
+
177
+ /** @internal */
178
+ async _waitForAuthorization(): Promise<void> {
179
+ await this.authorizedEvent.wait();
180
+ }
181
+
182
+ /** @internal */
183
+ async _waitForGeneration(stepIdx: number = -1): Promise<void> {
184
+ if (this.generations.length === 0) {
185
+ throw new Error('cannot use wait_for_generation: no active generation is running.');
86
186
  }
87
- this._chatMessage = chatMessage;
187
+
188
+ const index = stepIdx === -1 ? this.generations.length - 1 : stepIdx;
189
+ const generation = this.generations[index];
190
+ if (!generation) {
191
+ throw new Error(`Generation at index ${index} not found.`);
192
+ }
193
+ return generation.await;
88
194
  }
89
195
 
90
196
  /** @internal */
91
- _authorizePlayout() {
92
- this.authorizeFut.resolve();
197
+ async _waitForScheduled(): Promise<void> {
198
+ return this.scheduledFut.await;
93
199
  }
94
200
 
95
201
  /** @internal */
96
- async _waitForAuthorization() {
97
- return this.authorizeFut.await;
202
+ _markGenerationDone(): void {
203
+ if (this.generations.length === 0) {
204
+ throw new Error('cannot use mark_generation_done: no active generation is running.');
205
+ }
206
+
207
+ const lastGeneration = this.generations[this.generations.length - 1];
208
+ if (lastGeneration && !lastGeneration.done) {
209
+ lastGeneration.resolve();
210
+ }
211
+ }
212
+
213
+ /** @internal */
214
+ _markDone(): void {
215
+ if (!this.doneFut.done) {
216
+ this.doneFut.resolve();
217
+ if (this.generations.length > 0) {
218
+ this._markGenerationDone(); // preemptive generation could be cancelled before being scheduled
219
+ }
220
+ }
221
+ }
222
+
223
+ /** @internal */
224
+ _markScheduled(): void {
225
+ if (!this.scheduledFut.done) {
226
+ this.scheduledFut.resolve();
227
+ }
228
+ }
229
+
230
+ /** @internal */
231
+ _addItemAddedCallback(callback: (item: ChatItem) => void): void {
232
+ this.itemAddedCallbacks.add(callback);
98
233
  }
99
234
 
100
235
  /** @internal */
101
- _markPlayoutDone() {
102
- this.playoutDoneFut.resolve();
236
+ _removeItemAddedCallback(callback: (item: ChatItem) => void): void {
237
+ this.itemAddedCallbacks.delete(callback);
238
+ }
239
+
240
+ /** @internal */
241
+ _itemAdded(items: ChatItem[]): void {
242
+ for (const item of items) {
243
+ for (const cb of this.itemAddedCallbacks) {
244
+ cb(item);
245
+ }
246
+ this._chatItems.push(item);
247
+ }
103
248
  }
104
249
  }
package/src/worker.ts CHANGED
@@ -484,7 +484,11 @@ export class Worker {
484
484
  let participant: ParticipantInfo | undefined = undefined;
485
485
  if (participantIdentity) {
486
486
  try {
487
- participant = await client.getParticipant(roomName, participantIdentity);
487
+ // TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting
488
+ participant = (await client.getParticipant(
489
+ roomName,
490
+ participantIdentity,
491
+ )) as unknown as ParticipantInfo;
488
492
  } catch (e) {
489
493
  this.#logger.fatal(
490
494
  `participant with identity ${participantIdentity} not found in room ${roomName}`,