@livekit/agents 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -3
- package/dist/index.d.ts +2 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -3
- package/dist/index.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +3 -2
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +4 -3
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +20 -14
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +11 -5
- package/dist/job.d.ts +11 -5
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +17 -12
- package/dist/job.js.map +1 -1
- package/dist/tokenize/basic/hyphenator.cjs.map +1 -1
- package/dist/tokenize/basic/hyphenator.js.map +1 -1
- package/dist/utils.cjs +77 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +21 -0
- package/dist/utils.d.ts +21 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +76 -1
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +112 -71
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +112 -71
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +9 -2
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +9 -2
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +204 -0
- package/dist/voice/avatar/datastream_io.cjs.map +1 -0
- package/dist/voice/avatar/datastream_io.d.cts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts +37 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
- package/dist/voice/avatar/datastream_io.js +188 -0
- package/dist/voice/avatar/datastream_io.js.map +1 -0
- package/dist/{multimodal → voice/avatar}/index.cjs +4 -4
- package/dist/voice/avatar/index.cjs.map +1 -0
- package/dist/voice/avatar/index.d.cts +2 -0
- package/dist/voice/avatar/index.d.ts +2 -0
- package/dist/voice/avatar/index.d.ts.map +1 -0
- package/dist/voice/avatar/index.js +2 -0
- package/dist/voice/avatar/index.js.map +1 -0
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +1 -1
- package/dist/voice/io.d.ts +1 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +2 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +2 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/run_context.cjs +13 -0
- package/dist/voice/run_context.cjs.map +1 -1
- package/dist/voice/run_context.d.cts +10 -0
- package/dist/voice/run_context.d.ts +10 -0
- package/dist/voice/run_context.d.ts.map +1 -1
- package/dist/voice/run_context.js +13 -0
- package/dist/voice/run_context.js.map +1 -1
- package/dist/voice/speech_handle.cjs +152 -30
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +67 -16
- package/dist/voice/speech_handle.d.ts +67 -16
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +153 -31
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/worker.cjs +4 -1
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +4 -1
- package/dist/worker.js.map +1 -1
- package/package.json +2 -2
- package/src/index.ts +2 -3
- package/src/ipc/job_proc_lazy_main.ts +6 -3
- package/src/job.ts +27 -12
- package/src/tokenize/basic/hyphenator.ts +1 -1
- package/src/utils.ts +121 -1
- package/src/voice/agent_activity.ts +128 -78
- package/src/voice/agent_session.ts +11 -2
- package/src/voice/avatar/datastream_io.ts +247 -0
- package/src/voice/avatar/index.ts +4 -0
- package/src/voice/index.ts +2 -0
- package/src/voice/io.ts +1 -1
- package/src/voice/room_io/_input.ts +8 -3
- package/src/voice/run_context.ts +16 -2
- package/src/voice/speech_handle.ts +183 -38
- package/src/worker.ts +5 -1
- package/dist/multimodal/agent_playout.cjs +0 -233
- package/dist/multimodal/agent_playout.cjs.map +0 -1
- package/dist/multimodal/agent_playout.d.cts +0 -34
- package/dist/multimodal/agent_playout.d.ts +0 -34
- package/dist/multimodal/agent_playout.d.ts.map +0 -1
- package/dist/multimodal/agent_playout.js +0 -207
- package/dist/multimodal/agent_playout.js.map +0 -1
- package/dist/multimodal/index.cjs.map +0 -1
- package/dist/multimodal/index.d.cts +0 -2
- package/dist/multimodal/index.d.ts +0 -2
- package/dist/multimodal/index.d.ts.map +0 -1
- package/dist/multimodal/index.js +0 -2
- package/dist/multimodal/index.js.map +0 -1
- package/src/multimodal/agent_playout.ts +0 -266
- package/src/multimodal/index.ts +0 -4
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { Mutex } from '@livekit/mutex';
|
|
5
|
+
import {
|
|
6
|
+
type AudioFrame,
|
|
7
|
+
type ByteStreamWriter,
|
|
8
|
+
type Room,
|
|
9
|
+
RoomEvent,
|
|
10
|
+
type RpcInvocationData,
|
|
11
|
+
type TrackKind,
|
|
12
|
+
} from '@livekit/rtc-node';
|
|
13
|
+
import { log } from '../../log.js';
|
|
14
|
+
import {
|
|
15
|
+
Future,
|
|
16
|
+
Task,
|
|
17
|
+
shortuuid,
|
|
18
|
+
waitForParticipant,
|
|
19
|
+
waitForTrackPublication,
|
|
20
|
+
} from '../../utils.js';
|
|
21
|
+
import { AudioOutput, type PlaybackFinishedEvent } from '../io.js';
|
|
22
|
+
|
|
23
|
+
const RPC_CLEAR_BUFFER = 'lk.clear_buffer';
|
|
24
|
+
const RPC_PLAYBACK_FINISHED = 'lk.playback_finished';
|
|
25
|
+
const AUDIO_STREAM_TOPIC = 'lk.audio_stream';
|
|
26
|
+
|
|
27
|
+
export interface DataStreamAudioOutputOptions {
|
|
28
|
+
room: Room;
|
|
29
|
+
destinationIdentity: string;
|
|
30
|
+
sampleRate?: number;
|
|
31
|
+
waitRemoteTrack?: TrackKind;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* AudioOutput implementation that streams audio to a remote avatar worker using LiveKit DataStream.
|
|
36
|
+
*/
|
|
37
|
+
export class DataStreamAudioOutput extends AudioOutput {
|
|
38
|
+
static _playbackFinishedRpcRegistered: boolean = false;
|
|
39
|
+
static _playbackFinishedHandlers: Record<string, (data: RpcInvocationData) => string> = {};
|
|
40
|
+
|
|
41
|
+
private room: Room;
|
|
42
|
+
private destinationIdentity: string;
|
|
43
|
+
private roomConnectedFuture: Future<void>;
|
|
44
|
+
private waitRemoteTrack?: TrackKind;
|
|
45
|
+
private streamWriter?: ByteStreamWriter;
|
|
46
|
+
private pushedDuration: number = 0;
|
|
47
|
+
private started: boolean = false;
|
|
48
|
+
private lock = new Mutex();
|
|
49
|
+
private startTask?: Task<void>;
|
|
50
|
+
|
|
51
|
+
#logger = log();
|
|
52
|
+
|
|
53
|
+
constructor(opts: DataStreamAudioOutputOptions) {
|
|
54
|
+
super(opts.sampleRate, undefined);
|
|
55
|
+
|
|
56
|
+
const { room, destinationIdentity, sampleRate, waitRemoteTrack } = opts;
|
|
57
|
+
this.room = room;
|
|
58
|
+
this.destinationIdentity = destinationIdentity;
|
|
59
|
+
this.sampleRate = sampleRate;
|
|
60
|
+
this.waitRemoteTrack = waitRemoteTrack;
|
|
61
|
+
|
|
62
|
+
const onRoomConnected = async () => {
|
|
63
|
+
if (this.startTask) return;
|
|
64
|
+
|
|
65
|
+
await this.roomConnectedFuture.await;
|
|
66
|
+
|
|
67
|
+
// register the rpc method right after the room is connected
|
|
68
|
+
DataStreamAudioOutput.registerPlaybackFinishedRpc({
|
|
69
|
+
room,
|
|
70
|
+
callerIdentity: this.destinationIdentity,
|
|
71
|
+
handler: (data) => this.handlePlaybackFinished(data),
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
this.startTask = Task.from(({ signal }) => this._start(signal));
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
this.roomConnectedFuture = new Future<void>();
|
|
78
|
+
|
|
79
|
+
this.room.on(RoomEvent.ConnectionStateChanged, (_) => {
|
|
80
|
+
if (room.isConnected && !this.roomConnectedFuture.done) {
|
|
81
|
+
this.roomConnectedFuture.resolve(undefined);
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
if (this.room.isConnected) {
|
|
86
|
+
this.roomConnectedFuture.resolve(undefined);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
onRoomConnected();
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
private async _start(_abortSignal: AbortSignal) {
|
|
93
|
+
const unlock = await this.lock.lock();
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
if (this.started) return;
|
|
97
|
+
|
|
98
|
+
await this.roomConnectedFuture.await;
|
|
99
|
+
|
|
100
|
+
this.#logger.debug(
|
|
101
|
+
{
|
|
102
|
+
identity: this.destinationIdentity,
|
|
103
|
+
},
|
|
104
|
+
'waiting for the remote participant',
|
|
105
|
+
);
|
|
106
|
+
|
|
107
|
+
await waitForParticipant({
|
|
108
|
+
room: this.room,
|
|
109
|
+
identity: this.destinationIdentity,
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
if (this.waitRemoteTrack) {
|
|
113
|
+
this.#logger.debug(
|
|
114
|
+
{
|
|
115
|
+
identity: this.destinationIdentity,
|
|
116
|
+
kind: this.waitRemoteTrack,
|
|
117
|
+
},
|
|
118
|
+
'waiting for the remote track',
|
|
119
|
+
);
|
|
120
|
+
|
|
121
|
+
await waitForTrackPublication({
|
|
122
|
+
room: this.room,
|
|
123
|
+
identity: this.destinationIdentity,
|
|
124
|
+
kind: this.waitRemoteTrack,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
this.#logger.debug(
|
|
129
|
+
{
|
|
130
|
+
identity: this.destinationIdentity,
|
|
131
|
+
},
|
|
132
|
+
'remote participant ready',
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
this.started = true;
|
|
136
|
+
} finally {
|
|
137
|
+
unlock();
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async captureFrame(frame: AudioFrame): Promise<void> {
|
|
142
|
+
if (!this.startTask) {
|
|
143
|
+
this.startTask = Task.from(({ signal }) => this._start(signal));
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
await this.startTask.result;
|
|
147
|
+
await super.captureFrame(frame);
|
|
148
|
+
|
|
149
|
+
if (!this.streamWriter) {
|
|
150
|
+
this.streamWriter = await this.room.localParticipant!.streamBytes({
|
|
151
|
+
name: shortuuid('AUDIO_'),
|
|
152
|
+
topic: AUDIO_STREAM_TOPIC,
|
|
153
|
+
destinationIdentities: [this.destinationIdentity],
|
|
154
|
+
attributes: {
|
|
155
|
+
sample_rate: frame.sampleRate.toString(),
|
|
156
|
+
num_channels: frame.channels.toString(),
|
|
157
|
+
},
|
|
158
|
+
});
|
|
159
|
+
this.pushedDuration = 0;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// frame.data is a Int16Array, write accepts a Uint8Array
|
|
163
|
+
await this.streamWriter.write(new Uint8Array(frame.data.buffer));
|
|
164
|
+
this.pushedDuration += frame.samplesPerChannel / frame.sampleRate;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
flush(): void {
|
|
168
|
+
super.flush();
|
|
169
|
+
|
|
170
|
+
if (this.streamWriter === undefined || !this.started) {
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
this.streamWriter.close().finally(() => {
|
|
175
|
+
this.streamWriter = undefined;
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
clearBuffer(): void {
|
|
180
|
+
if (!this.started) return;
|
|
181
|
+
|
|
182
|
+
this.room.localParticipant!.performRpc({
|
|
183
|
+
destinationIdentity: this.destinationIdentity,
|
|
184
|
+
method: RPC_CLEAR_BUFFER,
|
|
185
|
+
payload: '',
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
private handlePlaybackFinished(data: RpcInvocationData): string {
|
|
190
|
+
if (data.callerIdentity !== this.destinationIdentity) {
|
|
191
|
+
this.#logger.warn(
|
|
192
|
+
{
|
|
193
|
+
callerIdentity: data.callerIdentity,
|
|
194
|
+
destinationIdentity: this.destinationIdentity,
|
|
195
|
+
},
|
|
196
|
+
'playback finished event received from unexpected participant',
|
|
197
|
+
);
|
|
198
|
+
return 'reject';
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
this.#logger.info(
|
|
202
|
+
{
|
|
203
|
+
callerIdentity: data.callerIdentity,
|
|
204
|
+
},
|
|
205
|
+
'playback finished event received',
|
|
206
|
+
);
|
|
207
|
+
|
|
208
|
+
const playbackFinishedEvent = JSON.parse(data.payload) as PlaybackFinishedEvent;
|
|
209
|
+
this.onPlaybackFinished(playbackFinishedEvent);
|
|
210
|
+
return 'ok';
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
static registerPlaybackFinishedRpc({
|
|
214
|
+
room,
|
|
215
|
+
callerIdentity,
|
|
216
|
+
handler,
|
|
217
|
+
}: {
|
|
218
|
+
room: Room;
|
|
219
|
+
callerIdentity: string;
|
|
220
|
+
handler: (data: RpcInvocationData) => string;
|
|
221
|
+
}) {
|
|
222
|
+
DataStreamAudioOutput._playbackFinishedHandlers[callerIdentity] = handler;
|
|
223
|
+
|
|
224
|
+
if (DataStreamAudioOutput._playbackFinishedRpcRegistered) {
|
|
225
|
+
return;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const rpcHandler = async (data: RpcInvocationData): Promise<string> => {
|
|
229
|
+
const handler = DataStreamAudioOutput._playbackFinishedHandlers[data.callerIdentity];
|
|
230
|
+
if (!handler) {
|
|
231
|
+
log().warn(
|
|
232
|
+
{
|
|
233
|
+
callerIdentity: data.callerIdentity,
|
|
234
|
+
expectedIdentities: Object.keys(DataStreamAudioOutput._playbackFinishedHandlers),
|
|
235
|
+
},
|
|
236
|
+
'playback finished event received from unexpected participant',
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
return 'reject';
|
|
240
|
+
}
|
|
241
|
+
return handler(data);
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
room.localParticipant?.registerRpcMethod(RPC_PLAYBACK_FINISHED, rpcHandler);
|
|
245
|
+
DataStreamAudioOutput._playbackFinishedRpcRegistered = true;
|
|
246
|
+
}
|
|
247
|
+
}
|
package/src/voice/index.ts
CHANGED
|
@@ -3,5 +3,7 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
export { Agent, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
|
|
5
5
|
export { AgentSession, type AgentSessionOptions } from './agent_session.js';
|
|
6
|
+
|
|
7
|
+
export * from './avatar/index.js';
|
|
6
8
|
export * from './events.js';
|
|
7
9
|
export { RunContext } from './run_context.js';
|
package/src/voice/io.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { AudioFrame } from '@livekit/rtc-node';
|
|
4
5
|
import {
|
|
5
6
|
AudioStream,
|
|
6
7
|
type NoiseCancellationOptions,
|
|
@@ -11,6 +12,7 @@ import {
|
|
|
11
12
|
RoomEvent,
|
|
12
13
|
TrackSource,
|
|
13
14
|
} from '@livekit/rtc-node';
|
|
15
|
+
import type { ReadableStream } from 'node:stream/web';
|
|
14
16
|
import { log } from '../../log.js';
|
|
15
17
|
import { resampleStream } from '../../utils.js';
|
|
16
18
|
import { AudioInput } from '../io.js';
|
|
@@ -64,8 +66,10 @@ export class ParticipantAudioInputStream extends AudioInput {
|
|
|
64
66
|
? participant
|
|
65
67
|
: this.room.remoteParticipants.get(participantIdentity);
|
|
66
68
|
|
|
69
|
+
// We need to check if the participant has a microphone track and subscribe to it
|
|
70
|
+
// in case we miss the tracksubscribed event
|
|
67
71
|
if (participantValue) {
|
|
68
|
-
for (const publication of
|
|
72
|
+
for (const publication of participantValue.trackPublications.values()) {
|
|
69
73
|
if (publication.track && publication.source === TrackSource.SOURCE_MICROPHONE) {
|
|
70
74
|
this.onTrackSubscribed(publication.track, publication, participantValue);
|
|
71
75
|
break;
|
|
@@ -127,12 +131,13 @@ export class ParticipantAudioInputStream extends AudioInput {
|
|
|
127
131
|
return true;
|
|
128
132
|
};
|
|
129
133
|
|
|
130
|
-
private createStream(track: RemoteTrack) {
|
|
134
|
+
private createStream(track: RemoteTrack): ReadableStream<AudioFrame> {
|
|
131
135
|
return new AudioStream(track, {
|
|
132
136
|
sampleRate: this.sampleRate,
|
|
133
137
|
numChannels: this.numChannels,
|
|
134
138
|
noiseCancellation: this.noiseCancellation,
|
|
135
|
-
|
|
139
|
+
// TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting
|
|
140
|
+
}) as unknown as ReadableStream<AudioFrame>;
|
|
136
141
|
}
|
|
137
142
|
|
|
138
143
|
async close() {
|
package/src/voice/run_context.ts
CHANGED
|
@@ -8,13 +8,27 @@ import type { SpeechHandle } from './speech_handle.js';
|
|
|
8
8
|
export type UnknownUserData = unknown;
|
|
9
9
|
|
|
10
10
|
export class RunContext<UserData = UnknownUserData> {
|
|
11
|
+
private readonly initialStepIdx: number;
|
|
11
12
|
constructor(
|
|
12
13
|
public readonly session: AgentSession<UserData>,
|
|
13
14
|
public readonly speechHandle: SpeechHandle,
|
|
14
15
|
public readonly functionCall: FunctionCall,
|
|
15
|
-
) {
|
|
16
|
-
|
|
16
|
+
) {
|
|
17
|
+
this.initialStepIdx = speechHandle.numSteps - 1;
|
|
18
|
+
}
|
|
17
19
|
get userData(): UserData {
|
|
18
20
|
return this.session.userData;
|
|
19
21
|
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Waits for the speech playout corresponding to this function call step.
|
|
25
|
+
*
|
|
26
|
+
* Unlike {@link SpeechHandle.waitForPlayout}, which waits for the full
|
|
27
|
+
* assistant turn to complete (including all function tools),
|
|
28
|
+
* this method only waits for the assistant's spoken response prior to running
|
|
29
|
+
* this tool to finish playing.
|
|
30
|
+
*/
|
|
31
|
+
async waitForPlayout() {
|
|
32
|
+
return this.speechHandle._waitForGeneration(this.initialStepIdx);
|
|
33
|
+
}
|
|
20
34
|
}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import type {
|
|
5
|
-
import { Future, shortuuid } from '../utils.js';
|
|
4
|
+
import type { ChatItem } from '../llm/index.js';
|
|
5
|
+
import { Event, Future, shortuuid } from '../utils.js';
|
|
6
|
+
import type { Task } from '../utils.js';
|
|
7
|
+
import { asyncLocalStorage } from './agent.js';
|
|
6
8
|
|
|
7
9
|
export class SpeechHandle {
|
|
8
10
|
/** Priority for messages that should be played after all other messages in the queue */
|
|
@@ -12,25 +14,40 @@ export class SpeechHandle {
|
|
|
12
14
|
/** Priority for important messages that should be played before others. */
|
|
13
15
|
static SPEECH_PRIORITY_HIGH = 10;
|
|
14
16
|
|
|
15
|
-
private interruptFut = new Future();
|
|
16
|
-
private
|
|
17
|
-
private
|
|
17
|
+
private interruptFut = new Future<void>();
|
|
18
|
+
private authorizedEvent = new Event();
|
|
19
|
+
private scheduledFut = new Future<void>();
|
|
20
|
+
private doneFut = new Future<void>();
|
|
18
21
|
|
|
19
|
-
private
|
|
22
|
+
private generations: Future<void>[] = [];
|
|
23
|
+
/** @internal */
|
|
24
|
+
_tasks: Task<void>[] = [];
|
|
25
|
+
private _chatItems: ChatItem[] = [];
|
|
26
|
+
private _numSteps = 1;
|
|
27
|
+
|
|
28
|
+
private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
|
|
29
|
+
private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
|
|
20
30
|
|
|
21
31
|
constructor(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
32
|
+
private _id: string,
|
|
33
|
+
private _allowInterruptions: boolean,
|
|
34
|
+
/** @internal */
|
|
35
|
+
public _stepIndex: number,
|
|
25
36
|
readonly parent?: SpeechHandle,
|
|
26
|
-
) {
|
|
37
|
+
) {
|
|
38
|
+
this.doneFut.await.finally(() => {
|
|
39
|
+
for (const callback of this.doneCallbacks) {
|
|
40
|
+
callback(this);
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
}
|
|
27
44
|
|
|
28
|
-
static create(options
|
|
45
|
+
static create(options?: {
|
|
29
46
|
allowInterruptions?: boolean;
|
|
30
47
|
stepIndex?: number;
|
|
31
48
|
parent?: SpeechHandle;
|
|
32
49
|
}) {
|
|
33
|
-
const { allowInterruptions =
|
|
50
|
+
const { allowInterruptions = true, stepIndex = 0, parent } = options ?? {};
|
|
34
51
|
|
|
35
52
|
return new SpeechHandle(shortuuid('speech_'), allowInterruptions, stepIndex, parent);
|
|
36
53
|
}
|
|
@@ -39,12 +56,47 @@ export class SpeechHandle {
|
|
|
39
56
|
return this.interruptFut.done;
|
|
40
57
|
}
|
|
41
58
|
|
|
42
|
-
get
|
|
43
|
-
return this.
|
|
59
|
+
get numSteps(): number {
|
|
60
|
+
return this._numSteps;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
get id(): string {
|
|
64
|
+
return this._id;
|
|
44
65
|
}
|
|
45
66
|
|
|
46
|
-
get
|
|
47
|
-
return this.
|
|
67
|
+
get scheduled(): boolean {
|
|
68
|
+
return this.scheduledFut.done;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
get allowInterruptions(): boolean {
|
|
72
|
+
return this._allowInterruptions;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Allow or disallow interruptions on this SpeechHandle.
|
|
77
|
+
*
|
|
78
|
+
* When set to false, the SpeechHandle will no longer accept any incoming
|
|
79
|
+
* interruption requests until re-enabled. If the handle is already
|
|
80
|
+
* interrupted, clearing interruptions is not allowed.
|
|
81
|
+
*
|
|
82
|
+
* @param value - true to allow interruptions, false to disallow
|
|
83
|
+
* @throws Error If attempting to disable interruptions when already interrupted
|
|
84
|
+
*/
|
|
85
|
+
set allowInterruptions(value: boolean) {
|
|
86
|
+
if (this.interrupted && !value) {
|
|
87
|
+
throw new Error(
|
|
88
|
+
'Cannot set allow_interruptions to False, the SpeechHandle is already interrupted',
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
this._allowInterruptions = value;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
done(): boolean {
|
|
95
|
+
return this.doneFut.done;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
get chatItems(): ChatItem[] {
|
|
99
|
+
return this._chatItems;
|
|
48
100
|
}
|
|
49
101
|
|
|
50
102
|
/**
|
|
@@ -54,23 +106,33 @@ export class SpeechHandle {
|
|
|
54
106
|
*
|
|
55
107
|
* @returns The same speech handle that was interrupted.
|
|
56
108
|
*/
|
|
57
|
-
interrupt(): SpeechHandle {
|
|
58
|
-
if (!this.allowInterruptions) {
|
|
59
|
-
throw new Error('
|
|
109
|
+
interrupt(force: boolean = false): SpeechHandle {
|
|
110
|
+
if (!force && !this.allowInterruptions) {
|
|
111
|
+
throw new Error('This generation handle does not allow interruptions');
|
|
60
112
|
}
|
|
61
113
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
this.interruptFut.resolve();
|
|
114
|
+
this._cancel();
|
|
65
115
|
return this;
|
|
66
116
|
}
|
|
67
117
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
118
|
+
/**
|
|
119
|
+
* Waits for the entire assistant turn to complete playback.
|
|
120
|
+
*
|
|
121
|
+
* This method waits until the assistant has fully finished speaking,
|
|
122
|
+
* including any finalization steps beyond initial response generation.
|
|
123
|
+
* This is appropriate to call when you want to ensure the speech output
|
|
124
|
+
* has entirely played out, including any tool calls and response follow-ups.
|
|
125
|
+
*/
|
|
126
|
+
async waitForPlayout(): Promise<void> {
|
|
127
|
+
const store = asyncLocalStorage.getStore();
|
|
128
|
+
if (store && store?.functionCall) {
|
|
129
|
+
throw new Error(
|
|
130
|
+
`Cannot call 'SpeechHandle.waitForPlayout()' from inside the function tool '${store.functionCall.name}'. ` +
|
|
131
|
+
'This creates a circular wait: the speech handle is waiting for the function tool to complete, ' +
|
|
132
|
+
'while the function tool is simultaneously waiting for the speech handle.\n' +
|
|
133
|
+
"To wait for the assistant's spoken response prior to running this tool, use RunContext.wait_for_playout() instead.",
|
|
134
|
+
);
|
|
135
|
+
}
|
|
74
136
|
}
|
|
75
137
|
|
|
76
138
|
async waitIfNotInterrupted(aw: Promise<unknown>[]): Promise<void> {
|
|
@@ -79,26 +141,109 @@ export class SpeechHandle {
|
|
|
79
141
|
await Promise.race(fs);
|
|
80
142
|
}
|
|
81
143
|
|
|
144
|
+
addDoneCallback(callback: (sh: SpeechHandle) => void) {
|
|
145
|
+
this.doneCallbacks.add(callback);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
removeDoneCallback(callback: (sh: SpeechHandle) => void) {
|
|
149
|
+
this.doneCallbacks.delete(callback);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/** @internal */
|
|
153
|
+
_cancel(): SpeechHandle {
|
|
154
|
+
if (this.done()) {
|
|
155
|
+
return this;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (!this.interruptFut.done) {
|
|
159
|
+
this.interruptFut.resolve();
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return this;
|
|
163
|
+
}
|
|
164
|
+
|
|
82
165
|
/** @internal */
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
166
|
+
_authorizeGeneration(): void {
|
|
167
|
+
const fut = new Future<void>();
|
|
168
|
+
this.generations.push(fut);
|
|
169
|
+
this.authorizedEvent.set();
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** @internal */
|
|
173
|
+
_clearAuthorization(): void {
|
|
174
|
+
this.authorizedEvent.clear();
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/** @internal */
|
|
178
|
+
async _waitForAuthorization(): Promise<void> {
|
|
179
|
+
await this.authorizedEvent.wait();
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/** @internal */
|
|
183
|
+
async _waitForGeneration(stepIdx: number = -1): Promise<void> {
|
|
184
|
+
if (this.generations.length === 0) {
|
|
185
|
+
throw new Error('cannot use wait_for_generation: no active generation is running.');
|
|
86
186
|
}
|
|
87
|
-
|
|
187
|
+
|
|
188
|
+
const index = stepIdx === -1 ? this.generations.length - 1 : stepIdx;
|
|
189
|
+
const generation = this.generations[index];
|
|
190
|
+
if (!generation) {
|
|
191
|
+
throw new Error(`Generation at index ${index} not found.`);
|
|
192
|
+
}
|
|
193
|
+
return generation.await;
|
|
88
194
|
}
|
|
89
195
|
|
|
90
196
|
/** @internal */
|
|
91
|
-
|
|
92
|
-
this.
|
|
197
|
+
async _waitForScheduled(): Promise<void> {
|
|
198
|
+
return this.scheduledFut.await;
|
|
93
199
|
}
|
|
94
200
|
|
|
95
201
|
/** @internal */
|
|
96
|
-
|
|
97
|
-
|
|
202
|
+
_markGenerationDone(): void {
|
|
203
|
+
if (this.generations.length === 0) {
|
|
204
|
+
throw new Error('cannot use mark_generation_done: no active generation is running.');
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const lastGeneration = this.generations[this.generations.length - 1];
|
|
208
|
+
if (lastGeneration && !lastGeneration.done) {
|
|
209
|
+
lastGeneration.resolve();
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/** @internal */
|
|
214
|
+
_markDone(): void {
|
|
215
|
+
if (!this.doneFut.done) {
|
|
216
|
+
this.doneFut.resolve();
|
|
217
|
+
if (this.generations.length > 0) {
|
|
218
|
+
this._markGenerationDone(); // preemptive generation could be cancelled before being scheduled
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/** @internal */
|
|
224
|
+
_markScheduled(): void {
|
|
225
|
+
if (!this.scheduledFut.done) {
|
|
226
|
+
this.scheduledFut.resolve();
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/** @internal */
|
|
231
|
+
_addItemAddedCallback(callback: (item: ChatItem) => void): void {
|
|
232
|
+
this.itemAddedCallbacks.add(callback);
|
|
98
233
|
}
|
|
99
234
|
|
|
100
235
|
/** @internal */
|
|
101
|
-
|
|
102
|
-
this.
|
|
236
|
+
_removeItemAddedCallback(callback: (item: ChatItem) => void): void {
|
|
237
|
+
this.itemAddedCallbacks.delete(callback);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/** @internal */
|
|
241
|
+
_itemAdded(items: ChatItem[]): void {
|
|
242
|
+
for (const item of items) {
|
|
243
|
+
for (const cb of this.itemAddedCallbacks) {
|
|
244
|
+
cb(item);
|
|
245
|
+
}
|
|
246
|
+
this._chatItems.push(item);
|
|
247
|
+
}
|
|
103
248
|
}
|
|
104
249
|
}
|
package/src/worker.ts
CHANGED
|
@@ -484,7 +484,11 @@ export class Worker {
|
|
|
484
484
|
let participant: ParticipantInfo | undefined = undefined;
|
|
485
485
|
if (participantIdentity) {
|
|
486
486
|
try {
|
|
487
|
-
|
|
487
|
+
// TODO(AJS-269): resolve compatibility issue with node-sdk to remove the forced type casting
|
|
488
|
+
participant = (await client.getParticipant(
|
|
489
|
+
roomName,
|
|
490
|
+
participantIdentity,
|
|
491
|
+
)) as unknown as ParticipantInfo;
|
|
488
492
|
} catch (e) {
|
|
489
493
|
this.#logger.fatal(
|
|
490
494
|
`participant with identity ${participantIdentity} not found in room ${roomName}`,
|