@livekit/agents 1.0.46 → 1.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +14 -20
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +14 -20
- package/dist/cli.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +14 -5
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +14 -5
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/llm/chat_context.cjs +19 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +4 -0
- package/dist/llm/chat_context.d.ts +4 -0
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +19 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/provider_format/index.cjs +2 -0
- package/dist/llm/provider_format/index.cjs.map +1 -1
- package/dist/llm/provider_format/index.d.cts +1 -1
- package/dist/llm/provider_format/index.d.ts +1 -1
- package/dist/llm/provider_format/index.d.ts.map +1 -1
- package/dist/llm/provider_format/index.js +6 -1
- package/dist/llm/provider_format/index.js.map +1 -1
- package/dist/llm/provider_format/openai.cjs +82 -2
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.d.cts +1 -0
- package/dist/llm/provider_format/openai.d.ts +1 -0
- package/dist/llm/provider_format/openai.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.js +80 -1
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +326 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +327 -1
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/provider_format/utils.cjs +4 -3
- package/dist/llm/provider_format/utils.cjs.map +1 -1
- package/dist/llm/provider_format/utils.d.ts.map +1 -1
- package/dist/llm/provider_format/utils.js +4 -3
- package/dist/llm/provider_format/utils.js.map +1 -1
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +1 -0
- package/dist/llm/realtime.d.ts +1 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js.map +1 -1
- package/dist/log.cjs +5 -2
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +5 -2
- package/dist/log.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +15 -6
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +15 -6
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/utils.cjs +31 -2
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +7 -0
- package/dist/utils.d.ts +7 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +31 -2
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +71 -0
- package/dist/utils.test.cjs.map +1 -1
- package/dist/utils.test.js +71 -0
- package/dist/utils.test.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.cjs.map +1 -1
- package/dist/version.d.cts +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.d.ts.map +1 -1
- package/dist/version.js +1 -1
- package/dist/version.js.map +1 -1
- package/dist/voice/agent.cjs +144 -12
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +29 -4
- package/dist/voice/agent.d.ts +29 -4
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +140 -11
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +120 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +122 -2
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +383 -298
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +34 -7
- package/dist/voice/agent_activity.d.ts +34 -7
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +383 -293
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +140 -40
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +19 -7
- package/dist/voice/agent_session.d.ts +19 -7
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +137 -37
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +4 -0
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +4 -0
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs +39 -19
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +44 -20
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +2 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/speech_handle.cjs +7 -1
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +2 -0
- package/dist/voice/speech_handle.d.ts +2 -0
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +8 -2
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/voice/testing/run_result.cjs +66 -15
- package/dist/voice/testing/run_result.cjs.map +1 -1
- package/dist/voice/testing/run_result.d.cts +14 -3
- package/dist/voice/testing/run_result.d.ts +14 -3
- package/dist/voice/testing/run_result.d.ts.map +1 -1
- package/dist/voice/testing/run_result.js +66 -15
- package/dist/voice/testing/run_result.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +20 -33
- package/src/ipc/job_proc_lazy_main.ts +16 -5
- package/src/llm/chat_context.ts +35 -0
- package/src/llm/provider_format/index.ts +7 -2
- package/src/llm/provider_format/openai.test.ts +385 -1
- package/src/llm/provider_format/openai.ts +103 -0
- package/src/llm/provider_format/utils.ts +6 -4
- package/src/llm/realtime.ts +1 -0
- package/src/log.ts +5 -2
- package/src/stream/deferred_stream.ts +17 -6
- package/src/utils.test.ts +87 -0
- package/src/utils.ts +36 -2
- package/src/version.ts +1 -1
- package/src/voice/agent.test.ts +140 -2
- package/src/voice/agent.ts +189 -10
- package/src/voice/agent_activity.ts +427 -289
- package/src/voice/agent_session.ts +178 -40
- package/src/voice/audio_recognition.ts +4 -0
- package/src/voice/generation.ts +52 -23
- package/src/voice/index.ts +1 -1
- package/src/voice/speech_handle.ts +9 -2
- package/src/voice/testing/run_result.ts +81 -23
package/src/utils.ts
CHANGED
|
@@ -9,6 +9,7 @@ import type {
|
|
|
9
9
|
TrackKind,
|
|
10
10
|
} from '@livekit/rtc-node';
|
|
11
11
|
import { AudioFrame, AudioResampler, RoomEvent } from '@livekit/rtc-node';
|
|
12
|
+
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
12
13
|
import { EventEmitter, once } from 'node:events';
|
|
13
14
|
import type { ReadableStream } from 'node:stream/web';
|
|
14
15
|
import { TransformStream, type TransformStreamDefaultController } from 'node:stream/web';
|
|
@@ -434,7 +435,9 @@ export enum TaskResult {
|
|
|
434
435
|
* @param T - The type of the task result
|
|
435
436
|
*/
|
|
436
437
|
export class Task<T> {
|
|
438
|
+
private static readonly currentTaskStorage = new AsyncLocalStorage<Task<unknown>>();
|
|
437
439
|
private resultFuture: Future<T>;
|
|
440
|
+
private doneCallbacks: Set<() => void> = new Set();
|
|
438
441
|
|
|
439
442
|
#logger = log();
|
|
440
443
|
|
|
@@ -444,6 +447,21 @@ export class Task<T> {
|
|
|
444
447
|
readonly name?: string,
|
|
445
448
|
) {
|
|
446
449
|
this.resultFuture = new Future();
|
|
450
|
+
void this.resultFuture.await
|
|
451
|
+
.then(
|
|
452
|
+
() => undefined,
|
|
453
|
+
() => undefined,
|
|
454
|
+
)
|
|
455
|
+
.finally(() => {
|
|
456
|
+
for (const callback of this.doneCallbacks) {
|
|
457
|
+
try {
|
|
458
|
+
callback();
|
|
459
|
+
} catch (error) {
|
|
460
|
+
this.#logger.error({ error }, 'Task done callback failed');
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
this.doneCallbacks.clear();
|
|
464
|
+
});
|
|
447
465
|
this.runTask();
|
|
448
466
|
}
|
|
449
467
|
|
|
@@ -463,6 +481,13 @@ export class Task<T> {
|
|
|
463
481
|
return new Task(fn, abortController, name);
|
|
464
482
|
}
|
|
465
483
|
|
|
484
|
+
/**
|
|
485
|
+
* Returns the currently running task in this async context, if available.
|
|
486
|
+
*/
|
|
487
|
+
static current(): Task<unknown> | undefined {
|
|
488
|
+
return Task.currentTaskStorage.getStore();
|
|
489
|
+
}
|
|
490
|
+
|
|
466
491
|
private async runTask() {
|
|
467
492
|
const run = async () => {
|
|
468
493
|
if (this.name) {
|
|
@@ -471,7 +496,8 @@ export class Task<T> {
|
|
|
471
496
|
return await this.fn(this.controller);
|
|
472
497
|
};
|
|
473
498
|
|
|
474
|
-
return
|
|
499
|
+
return Task.currentTaskStorage
|
|
500
|
+
.run(this as Task<unknown>, run)
|
|
475
501
|
.then((value) => {
|
|
476
502
|
this.resultFuture.resolve(value);
|
|
477
503
|
return value;
|
|
@@ -543,7 +569,15 @@ export class Task<T> {
|
|
|
543
569
|
}
|
|
544
570
|
|
|
545
571
|
addDoneCallback(callback: () => void) {
|
|
546
|
-
this.
|
|
572
|
+
if (this.done) {
|
|
573
|
+
queueMicrotask(callback);
|
|
574
|
+
return;
|
|
575
|
+
}
|
|
576
|
+
this.doneCallbacks.add(callback);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
removeDoneCallback(callback: () => void) {
|
|
580
|
+
this.doneCallbacks.delete(callback);
|
|
547
581
|
}
|
|
548
582
|
}
|
|
549
583
|
|
package/src/version.ts
CHANGED
package/src/voice/agent.test.ts
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import { describe, expect, it } from 'vitest';
|
|
4
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
5
5
|
import { z } from 'zod';
|
|
6
6
|
import { tool } from '../llm/index.js';
|
|
7
|
-
import {
|
|
7
|
+
import { initializeLogger } from '../log.js';
|
|
8
|
+
import { Task } from '../utils.js';
|
|
9
|
+
import { Agent, AgentTask, _setActivityTaskInfo } from './agent.js';
|
|
10
|
+
import { agentActivityStorage } from './agent_activity.js';
|
|
11
|
+
|
|
12
|
+
initializeLogger({ pretty: false, level: 'error' });
|
|
8
13
|
|
|
9
14
|
describe('Agent', () => {
|
|
10
15
|
it('should create agent with basic instructions', () => {
|
|
@@ -77,4 +82,137 @@ describe('Agent', () => {
|
|
|
77
82
|
expect(tools1).toEqual(tools2);
|
|
78
83
|
expect(tools1).toEqual(tools);
|
|
79
84
|
});
|
|
85
|
+
|
|
86
|
+
it('should require AgentTask to run inside task context', async () => {
|
|
87
|
+
class TestTask extends AgentTask<string> {
|
|
88
|
+
constructor() {
|
|
89
|
+
super({ instructions: 'test task' });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const task = new TestTask();
|
|
94
|
+
await expect(task.run()).rejects.toThrow('must be executed inside a Task context');
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('should require AgentTask to run inside inline task context', async () => {
|
|
98
|
+
class TestTask extends AgentTask<string> {
|
|
99
|
+
constructor() {
|
|
100
|
+
super({ instructions: 'test task' });
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const task = new TestTask();
|
|
105
|
+
const wrapper = Task.from(async () => {
|
|
106
|
+
return await task.run();
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
await expect(wrapper.result).rejects.toThrow(
|
|
110
|
+
'should only be awaited inside function tools or the onEnter/onExit methods of an Agent',
|
|
111
|
+
);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('should allow AgentTask run from inline task context', async () => {
|
|
115
|
+
class TestTask extends AgentTask<string> {
|
|
116
|
+
constructor() {
|
|
117
|
+
super({ instructions: 'test task' });
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
const task = new TestTask();
|
|
122
|
+
const oldAgent = new Agent({ instructions: 'old agent' });
|
|
123
|
+
const mockSession = {
|
|
124
|
+
currentAgent: oldAgent,
|
|
125
|
+
_globalRunState: undefined,
|
|
126
|
+
_updateActivity: async (agent: Agent) => {
|
|
127
|
+
if (agent === task) {
|
|
128
|
+
task.complete('ok');
|
|
129
|
+
}
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
const mockActivity = {
|
|
134
|
+
agent: oldAgent,
|
|
135
|
+
agentSession: mockSession,
|
|
136
|
+
_onEnterTask: undefined,
|
|
137
|
+
llm: undefined,
|
|
138
|
+
close: async () => {},
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
const wrapper = Task.from(async () => {
|
|
142
|
+
const currentTask = Task.current();
|
|
143
|
+
if (!currentTask) {
|
|
144
|
+
throw new Error('expected task context');
|
|
145
|
+
}
|
|
146
|
+
_setActivityTaskInfo(currentTask, { inlineTask: true });
|
|
147
|
+
return await agentActivityStorage.run(mockActivity as any, () => task.run());
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
await expect(wrapper.result).resolves.toBe('ok');
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it('should require AgentTask to run inside AgentActivity context', async () => {
|
|
154
|
+
class TestTask extends AgentTask<string> {
|
|
155
|
+
constructor() {
|
|
156
|
+
super({ instructions: 'test task' });
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const task = new TestTask();
|
|
161
|
+
const wrapper = Task.from(async () => {
|
|
162
|
+
const currentTask = Task.current();
|
|
163
|
+
if (!currentTask) {
|
|
164
|
+
throw new Error('expected task context');
|
|
165
|
+
}
|
|
166
|
+
_setActivityTaskInfo(currentTask, { inlineTask: true });
|
|
167
|
+
return await task.run();
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
await expect(wrapper.result).rejects.toThrow(
|
|
171
|
+
'must be executed inside an AgentActivity context',
|
|
172
|
+
);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it('should close old activity when current agent changes while AgentTask is pending', async () => {
|
|
176
|
+
class TestTask extends AgentTask<string> {
|
|
177
|
+
constructor() {
|
|
178
|
+
super({ instructions: 'test task' });
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const task = new TestTask();
|
|
183
|
+
const oldAgent = new Agent({ instructions: 'old agent' });
|
|
184
|
+
const switchedAgent = new Agent({ instructions: 'switched agent' });
|
|
185
|
+
const closeOldActivity = vi.fn(async () => {});
|
|
186
|
+
|
|
187
|
+
const mockSession = {
|
|
188
|
+
currentAgent: oldAgent as Agent,
|
|
189
|
+
_globalRunState: undefined,
|
|
190
|
+
_updateActivity: async (agent: Agent) => {
|
|
191
|
+
if (agent === task) {
|
|
192
|
+
mockSession.currentAgent = switchedAgent;
|
|
193
|
+
task.complete('ok');
|
|
194
|
+
}
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
const mockActivity = {
|
|
199
|
+
agent: oldAgent,
|
|
200
|
+
agentSession: mockSession,
|
|
201
|
+
_onEnterTask: undefined,
|
|
202
|
+
llm: undefined,
|
|
203
|
+
close: closeOldActivity,
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
const wrapper = Task.from(async () => {
|
|
207
|
+
const currentTask = Task.current();
|
|
208
|
+
if (!currentTask) {
|
|
209
|
+
throw new Error('expected task context');
|
|
210
|
+
}
|
|
211
|
+
_setActivityTaskInfo(currentTask, { inlineTask: true });
|
|
212
|
+
return await agentActivityStorage.run(mockActivity as any, () => task.run());
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
await expect(wrapper.result).resolves.toBe('ok');
|
|
216
|
+
expect(closeOldActivity).toHaveBeenCalledTimes(1);
|
|
217
|
+
});
|
|
80
218
|
});
|
package/src/voice/agent.ts
CHANGED
|
@@ -13,26 +13,71 @@ import {
|
|
|
13
13
|
type TTSModelString,
|
|
14
14
|
} from '../inference/index.js';
|
|
15
15
|
import { ReadonlyChatContext } from '../llm/chat_context.js';
|
|
16
|
-
import type { ChatMessage, FunctionCall
|
|
16
|
+
import type { ChatMessage, FunctionCall } from '../llm/index.js';
|
|
17
17
|
import {
|
|
18
18
|
type ChatChunk,
|
|
19
19
|
ChatContext,
|
|
20
20
|
LLM,
|
|
21
|
+
RealtimeModel,
|
|
21
22
|
type ToolChoice,
|
|
22
23
|
type ToolContext,
|
|
23
24
|
} from '../llm/index.js';
|
|
25
|
+
import { log } from '../log.js';
|
|
24
26
|
import type { STT, SpeechEvent } from '../stt/index.js';
|
|
25
27
|
import { StreamAdapter as STTStreamAdapter } from '../stt/index.js';
|
|
26
28
|
import { SentenceTokenizer as BasicSentenceTokenizer } from '../tokenize/basic/index.js';
|
|
27
29
|
import type { TTS } from '../tts/index.js';
|
|
28
30
|
import { SynthesizeStream, StreamAdapter as TTSStreamAdapter } from '../tts/index.js';
|
|
29
31
|
import { USERDATA_TIMED_TRANSCRIPT } from '../types.js';
|
|
32
|
+
import { Future, Task } from '../utils.js';
|
|
30
33
|
import type { VAD } from '../vad.js';
|
|
31
|
-
import type
|
|
34
|
+
import { type AgentActivity, agentActivityStorage } from './agent_activity.js';
|
|
32
35
|
import type { AgentSession, TurnDetectionMode } from './agent_session.js';
|
|
33
36
|
import type { TimedString } from './io.js';
|
|
37
|
+
import type { SpeechHandle } from './speech_handle.js';
|
|
38
|
+
|
|
39
|
+
export const functionCallStorage = new AsyncLocalStorage<{ functionCall?: FunctionCall }>();
|
|
40
|
+
export const speechHandleStorage = new AsyncLocalStorage<SpeechHandle>();
|
|
41
|
+
const activityTaskInfoStorage = new WeakMap<Task<any>, _ActivityTaskInfo>();
|
|
42
|
+
|
|
43
|
+
type _ActivityTaskInfo = {
|
|
44
|
+
functionCall: FunctionCall | null;
|
|
45
|
+
speechHandle: SpeechHandle | null;
|
|
46
|
+
inlineTask: boolean;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
/** @internal */
|
|
50
|
+
export function _setActivityTaskInfo<T>(
|
|
51
|
+
task: Task<T>,
|
|
52
|
+
options: {
|
|
53
|
+
functionCall?: FunctionCall | null;
|
|
54
|
+
speechHandle?: SpeechHandle | null;
|
|
55
|
+
inlineTask?: boolean;
|
|
56
|
+
},
|
|
57
|
+
): void {
|
|
58
|
+
const info = activityTaskInfoStorage.get(task) ?? {
|
|
59
|
+
functionCall: null,
|
|
60
|
+
speechHandle: null,
|
|
61
|
+
inlineTask: false,
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
if (Object.hasOwn(options, 'functionCall')) {
|
|
65
|
+
info.functionCall = options.functionCall ?? null;
|
|
66
|
+
}
|
|
67
|
+
if (Object.hasOwn(options, 'speechHandle')) {
|
|
68
|
+
info.speechHandle = options.speechHandle ?? null;
|
|
69
|
+
}
|
|
70
|
+
if (Object.hasOwn(options, 'inlineTask')) {
|
|
71
|
+
info.inlineTask = options.inlineTask ?? false;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
activityTaskInfoStorage.set(task, info);
|
|
75
|
+
}
|
|
34
76
|
|
|
35
|
-
|
|
77
|
+
/** @internal */
|
|
78
|
+
export function _getActivityTaskInfo<T>(task: Task<T>): _ActivityTaskInfo | undefined {
|
|
79
|
+
return activityTaskInfoStorage.get(task);
|
|
80
|
+
}
|
|
36
81
|
export const STOP_RESPONSE_SYMBOL = Symbol('StopResponse');
|
|
37
82
|
|
|
38
83
|
export class StopResponse extends Error {
|
|
@@ -268,20 +313,20 @@ export class Agent<UserData = any> {
|
|
|
268
313
|
throw new Error('sttNode called but no STT node is available');
|
|
269
314
|
}
|
|
270
315
|
|
|
271
|
-
let
|
|
316
|
+
let wrappedStt = activity.stt;
|
|
272
317
|
|
|
273
|
-
if (!
|
|
318
|
+
if (!wrappedStt.capabilities.streaming) {
|
|
274
319
|
const vad = agent.vad || activity.vad;
|
|
275
320
|
if (!vad) {
|
|
276
321
|
throw new Error(
|
|
277
322
|
'STT does not support streaming, add a VAD to the AgentTask/VoiceAgent to enable streaming',
|
|
278
323
|
);
|
|
279
324
|
}
|
|
280
|
-
|
|
325
|
+
wrappedStt = new STTStreamAdapter(wrappedStt, vad);
|
|
281
326
|
}
|
|
282
327
|
|
|
283
328
|
const connOptions = activity.agentSession.connOptions.sttConnOptions;
|
|
284
|
-
const stream =
|
|
329
|
+
const stream = wrappedStt.stream({ connOptions });
|
|
285
330
|
|
|
286
331
|
// Set startTimeOffset to provide linear timestamps across reconnections
|
|
287
332
|
const audioInputStartedAt =
|
|
@@ -382,14 +427,14 @@ export class Agent<UserData = any> {
|
|
|
382
427
|
throw new Error('ttsNode called but no TTS node is available');
|
|
383
428
|
}
|
|
384
429
|
|
|
385
|
-
let
|
|
430
|
+
let wrappedTts = activity.tts;
|
|
386
431
|
|
|
387
432
|
if (!activity.tts.capabilities.streaming) {
|
|
388
|
-
|
|
433
|
+
wrappedTts = new TTSStreamAdapter(wrappedTts, new BasicSentenceTokenizer());
|
|
389
434
|
}
|
|
390
435
|
|
|
391
436
|
const connOptions = activity.agentSession.connOptions.ttsConnOptions;
|
|
392
|
-
const stream =
|
|
437
|
+
const stream = wrappedTts.stream({ connOptions });
|
|
393
438
|
stream.updateInputStream(text);
|
|
394
439
|
|
|
395
440
|
let cleaned = false;
|
|
@@ -440,3 +485,137 @@ export class Agent<UserData = any> {
|
|
|
440
485
|
},
|
|
441
486
|
};
|
|
442
487
|
}
|
|
488
|
+
|
|
489
|
+
export class AgentTask<ResultT = unknown, UserData = any> extends Agent<UserData> {
|
|
490
|
+
private started = false;
|
|
491
|
+
private future = new Future<ResultT>();
|
|
492
|
+
|
|
493
|
+
#logger = log();
|
|
494
|
+
|
|
495
|
+
get done(): boolean {
|
|
496
|
+
return this.future.done;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
complete(result: ResultT | Error): void {
|
|
500
|
+
if (this.future.done) {
|
|
501
|
+
throw new Error(`${this.constructor.name} is already done`);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
if (result instanceof Error) {
|
|
505
|
+
this.future.reject(result);
|
|
506
|
+
} else {
|
|
507
|
+
this.future.resolve(result);
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
const speechHandle = speechHandleStorage.getStore();
|
|
511
|
+
if (speechHandle) {
|
|
512
|
+
speechHandle._maybeRunFinalOutput = result;
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
async run(): Promise<ResultT> {
|
|
517
|
+
if (this.started) {
|
|
518
|
+
throw new Error(
|
|
519
|
+
`Task ${this.constructor.name} has already started and cannot be awaited multiple times`,
|
|
520
|
+
);
|
|
521
|
+
}
|
|
522
|
+
this.started = true;
|
|
523
|
+
|
|
524
|
+
const currentTask = Task.current();
|
|
525
|
+
if (!currentTask) {
|
|
526
|
+
throw new Error(`${this.constructor.name} must be executed inside a Task context`);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
const taskInfo = _getActivityTaskInfo(currentTask);
|
|
530
|
+
if (!taskInfo || !taskInfo.inlineTask) {
|
|
531
|
+
throw new Error(
|
|
532
|
+
`${this.constructor.name} should only be awaited inside function tools or the onEnter/onExit methods of an Agent`,
|
|
533
|
+
);
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
const speechHandle = speechHandleStorage.getStore();
|
|
537
|
+
const oldActivity = agentActivityStorage.getStore();
|
|
538
|
+
if (!oldActivity) {
|
|
539
|
+
throw new Error(`${this.constructor.name} must be executed inside an AgentActivity context`);
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
currentTask.addDoneCallback(() => {
|
|
543
|
+
if (this.future.done) return;
|
|
544
|
+
|
|
545
|
+
// If the Task finished before the AgentTask was completed, complete the AgentTask with an error.
|
|
546
|
+
this.#logger.error(`The Task finished before ${this.constructor.name} was completed.`);
|
|
547
|
+
this.complete(new Error(`The Task finished before ${this.constructor.name} was completed.`));
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
const oldAgent = oldActivity.agent;
|
|
551
|
+
const session = oldActivity.agentSession;
|
|
552
|
+
|
|
553
|
+
const blockedTasks: Task<any>[] = [currentTask];
|
|
554
|
+
const onEnterTask = oldActivity._onEnterTask;
|
|
555
|
+
|
|
556
|
+
if (onEnterTask && !onEnterTask.done && onEnterTask !== currentTask) {
|
|
557
|
+
blockedTasks.push(onEnterTask);
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
if (
|
|
561
|
+
taskInfo.functionCall &&
|
|
562
|
+
oldActivity.llm instanceof RealtimeModel &&
|
|
563
|
+
!oldActivity.llm.capabilities.manualFunctionCalls
|
|
564
|
+
) {
|
|
565
|
+
this.#logger.error(
|
|
566
|
+
`Realtime model does not support resuming function calls from chat context, ` +
|
|
567
|
+
`using AgentTask inside a function tool may have unexpected behavior.`,
|
|
568
|
+
);
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
await session._updateActivity(this, {
|
|
572
|
+
previousActivity: 'pause',
|
|
573
|
+
newActivity: 'start',
|
|
574
|
+
blockedTasks,
|
|
575
|
+
});
|
|
576
|
+
|
|
577
|
+
let runState = session._globalRunState;
|
|
578
|
+
if (speechHandle && runState && !runState.done()) {
|
|
579
|
+
// Only unwatch the parent speech handle if there are other handles keeping the run alive.
|
|
580
|
+
// When watchedHandleCount is 1 (only the parent), unwatching would drop it to 0 and
|
|
581
|
+
// mark the run done prematurely — before function_call_output and assistant message arrive.
|
|
582
|
+
if (runState._watchedHandleCount() > 1) {
|
|
583
|
+
runState._unwatchHandle(speechHandle);
|
|
584
|
+
}
|
|
585
|
+
// it is OK to call _markDoneIfNeeded here, the above _updateActivity will call onEnter
|
|
586
|
+
// and newly added handles keep the run alive.
|
|
587
|
+
runState._markDoneIfNeeded();
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
try {
|
|
591
|
+
return await this.future.await;
|
|
592
|
+
} finally {
|
|
593
|
+
// runState could have changed after future resolved
|
|
594
|
+
runState = session._globalRunState;
|
|
595
|
+
|
|
596
|
+
if (session.currentAgent !== this) {
|
|
597
|
+
this.#logger.warn(
|
|
598
|
+
`${this.constructor.name} completed, but the agent has changed in the meantime. ` +
|
|
599
|
+
`Ignoring handoff to the previous agent, likely due to AgentSession.updateAgent being invoked.`,
|
|
600
|
+
);
|
|
601
|
+
await oldActivity.close();
|
|
602
|
+
} else {
|
|
603
|
+
if (speechHandle && runState && !runState.done()) {
|
|
604
|
+
runState._watchHandle(speechHandle);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
const mergedChatCtx = oldAgent._chatCtx.merge(this._chatCtx, {
|
|
608
|
+
excludeFunctionCall: true,
|
|
609
|
+
excludeInstructions: true,
|
|
610
|
+
});
|
|
611
|
+
oldAgent._chatCtx.items = mergedChatCtx.items;
|
|
612
|
+
|
|
613
|
+
await session._updateActivity(oldAgent, {
|
|
614
|
+
previousActivity: 'close',
|
|
615
|
+
newActivity: 'resume',
|
|
616
|
+
waitOnEnter: false,
|
|
617
|
+
});
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
}
|