@livekit/agents 1.0.47 → 1.0.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/beta/index.cjs +29 -0
- package/dist/beta/index.cjs.map +1 -0
- package/dist/beta/index.d.cts +2 -0
- package/dist/beta/index.d.ts +2 -0
- package/dist/beta/index.d.ts.map +1 -0
- package/dist/beta/index.js +7 -0
- package/dist/beta/index.js.map +1 -0
- package/dist/beta/workflows/index.cjs +29 -0
- package/dist/beta/workflows/index.cjs.map +1 -0
- package/dist/beta/workflows/index.d.cts +2 -0
- package/dist/beta/workflows/index.d.ts +2 -0
- package/dist/beta/workflows/index.d.ts.map +1 -0
- package/dist/beta/workflows/index.js +7 -0
- package/dist/beta/workflows/index.js.map +1 -0
- package/dist/beta/workflows/task_group.cjs +162 -0
- package/dist/beta/workflows/task_group.cjs.map +1 -0
- package/dist/beta/workflows/task_group.d.cts +32 -0
- package/dist/beta/workflows/task_group.d.ts +32 -0
- package/dist/beta/workflows/task_group.d.ts.map +1 -0
- package/dist/beta/workflows/task_group.js +138 -0
- package/dist/beta/workflows/task_group.js.map +1 -0
- package/dist/cpu.cjs +189 -0
- package/dist/cpu.cjs.map +1 -0
- package/dist/cpu.d.cts +24 -0
- package/dist/cpu.d.ts +24 -0
- package/dist/cpu.d.ts.map +1 -0
- package/dist/cpu.js +152 -0
- package/dist/cpu.js.map +1 -0
- package/dist/cpu.test.cjs +227 -0
- package/dist/cpu.test.cjs.map +1 -0
- package/dist/cpu.test.js +204 -0
- package/dist/cpu.test.js.map +1 -0
- package/dist/index.cjs +3 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.d.cts +59 -59
- package/dist/inference/api_protos.d.ts +59 -59
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +6 -0
- package/dist/inference/tts.d.ts +6 -0
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js.map +1 -1
- package/dist/llm/chat_context.cjs +89 -1
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +10 -1
- package/dist/llm/chat_context.d.ts +10 -1
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +89 -1
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/chat_context.test.cjs +43 -0
- package/dist/llm/chat_context.test.cjs.map +1 -1
- package/dist/llm/chat_context.test.js +43 -0
- package/dist/llm/chat_context.test.js.map +1 -1
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +3 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/provider_format/index.d.cts +1 -1
- package/dist/llm/provider_format/index.d.ts +1 -1
- package/dist/llm/tool_context.cjs +7 -0
- package/dist/llm/tool_context.cjs.map +1 -1
- package/dist/llm/tool_context.d.cts +10 -2
- package/dist/llm/tool_context.d.ts +10 -2
- package/dist/llm/tool_context.d.ts.map +1 -1
- package/dist/llm/tool_context.js +6 -0
- package/dist/llm/tool_context.js.map +1 -1
- package/dist/utils.cjs +1 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +1 -0
- package/dist/utils.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +9 -0
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +1 -0
- package/dist/voice/agent.d.ts +1 -0
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +9 -0
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +67 -16
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +7 -0
- package/dist/voice/agent_activity.d.ts +7 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +68 -17
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +27 -1
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +6 -0
- package/dist/voice/agent_session.d.ts +6 -0
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +27 -1
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +11 -2
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +12 -3
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/testing/fake_llm.cjs +127 -0
- package/dist/voice/testing/fake_llm.cjs.map +1 -0
- package/dist/voice/testing/fake_llm.d.cts +30 -0
- package/dist/voice/testing/fake_llm.d.ts +30 -0
- package/dist/voice/testing/fake_llm.d.ts.map +1 -0
- package/dist/voice/testing/fake_llm.js +103 -0
- package/dist/voice/testing/fake_llm.js.map +1 -0
- package/dist/voice/testing/index.cjs +3 -0
- package/dist/voice/testing/index.cjs.map +1 -1
- package/dist/voice/testing/index.d.cts +1 -0
- package/dist/voice/testing/index.d.ts +1 -0
- package/dist/voice/testing/index.d.ts.map +1 -1
- package/dist/voice/testing/index.js +2 -0
- package/dist/voice/testing/index.js.map +1 -1
- package/dist/worker.cjs +6 -29
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +6 -19
- package/dist/worker.js.map +1 -1
- package/package.json +1 -1
- package/src/beta/index.ts +9 -0
- package/src/beta/workflows/index.ts +9 -0
- package/src/beta/workflows/task_group.ts +194 -0
- package/src/cpu.test.ts +239 -0
- package/src/cpu.ts +173 -0
- package/src/index.ts +2 -1
- package/src/inference/llm.ts +2 -0
- package/src/inference/tts.ts +8 -1
- package/src/llm/chat_context.test.ts +48 -0
- package/src/llm/chat_context.ts +123 -0
- package/src/llm/index.ts +1 -0
- package/src/llm/tool_context.ts +14 -0
- package/src/utils.ts +5 -0
- package/src/voice/agent.ts +11 -0
- package/src/voice/agent_activity.ts +102 -16
- package/src/voice/agent_session.ts +33 -2
- package/src/voice/room_io/room_io.ts +14 -3
- package/src/voice/testing/fake_llm.ts +138 -0
- package/src/voice/testing/index.ts +2 -0
- package/src/worker.ts +34 -50
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { describe, expect, it } from 'vitest';
|
|
5
|
+
import { initializeLogger } from '../log.js';
|
|
6
|
+
import { FakeLLM } from '../voice/testing/fake_llm.js';
|
|
5
7
|
import {
|
|
6
8
|
type AudioContent,
|
|
7
9
|
ChatContext,
|
|
@@ -13,6 +15,8 @@ import {
|
|
|
13
15
|
ReadonlyChatContext,
|
|
14
16
|
} from './chat_context.js';
|
|
15
17
|
|
|
18
|
+
initializeLogger({ pretty: false, level: 'error' });
|
|
19
|
+
|
|
16
20
|
describe('ChatContext.toJSON', () => {
|
|
17
21
|
it('should match snapshot for empty context', () => {
|
|
18
22
|
const context = new ChatContext();
|
|
@@ -283,6 +287,50 @@ describe('ChatContext.toJSON', () => {
|
|
|
283
287
|
});
|
|
284
288
|
});
|
|
285
289
|
|
|
290
|
+
describe('ChatContext._summarize', () => {
|
|
291
|
+
it('keeps chronological timestamps with summary + tail', async () => {
|
|
292
|
+
const ctx = new ChatContext();
|
|
293
|
+
ctx.addMessage({ role: 'system', content: 'System prompt', createdAt: 0 });
|
|
294
|
+
ctx.addMessage({ role: 'user', content: 'hello', createdAt: 1000 });
|
|
295
|
+
ctx.addMessage({ role: 'assistant', content: 'hi there', createdAt: 2000 });
|
|
296
|
+
ctx.insert(
|
|
297
|
+
new FunctionCallOutput({
|
|
298
|
+
callId: 'call_1',
|
|
299
|
+
name: 'lookup',
|
|
300
|
+
output: '{"ok":true}',
|
|
301
|
+
isError: false,
|
|
302
|
+
createdAt: 3500,
|
|
303
|
+
}),
|
|
304
|
+
);
|
|
305
|
+
ctx.addMessage({ role: 'user', content: 'my color is blue', createdAt: 3000 });
|
|
306
|
+
ctx.addMessage({ role: 'assistant', content: 'noted', createdAt: 4000 });
|
|
307
|
+
|
|
308
|
+
const fake = new FakeLLM([
|
|
309
|
+
{
|
|
310
|
+
input: 'Conversation to summarize:\n\nuser: hello\nassistant: hi there',
|
|
311
|
+
content: 'condensed head',
|
|
312
|
+
},
|
|
313
|
+
]);
|
|
314
|
+
|
|
315
|
+
await ctx._summarize(fake, { keepLastTurns: 1 });
|
|
316
|
+
|
|
317
|
+
const summary = ctx.items.find(
|
|
318
|
+
(item) =>
|
|
319
|
+
item.type === 'message' && item.role === 'assistant' && item.extra?.is_summary === true,
|
|
320
|
+
);
|
|
321
|
+
expect(summary).toBeDefined();
|
|
322
|
+
if (!summary || summary.type !== 'message') {
|
|
323
|
+
throw new Error('summary message is missing');
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
expect(summary.createdAt).toBeCloseTo(2999.999, 6);
|
|
327
|
+
|
|
328
|
+
const createdAts = ctx.items.map((item) => item.createdAt);
|
|
329
|
+
const sorted = [...createdAts].sort((a, b) => a - b);
|
|
330
|
+
expect(createdAts).toEqual(sorted);
|
|
331
|
+
});
|
|
332
|
+
});
|
|
333
|
+
|
|
286
334
|
describe('ReadonlyChatContext with immutable array', () => {
|
|
287
335
|
it('should have readonly property set to true', () => {
|
|
288
336
|
const items: ChatItem[] = [
|
package/src/llm/chat_context.ts
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { AudioFrame, VideoFrame } from '@livekit/rtc-node';
|
|
5
5
|
import { createImmutableArray, shortuuid } from '../utils.js';
|
|
6
|
+
import type { LLM } from './llm.js';
|
|
6
7
|
import { type ProviderFormat, toChatCtx } from './provider_format/index.js';
|
|
7
8
|
import type { JSONObject, JSONValue, ToolContext } from './tool_context.js';
|
|
8
9
|
|
|
@@ -95,12 +96,15 @@ export class ChatMessage {
|
|
|
95
96
|
|
|
96
97
|
createdAt: number;
|
|
97
98
|
|
|
99
|
+
extra: Record<string, unknown>;
|
|
100
|
+
|
|
98
101
|
constructor(params: {
|
|
99
102
|
role: ChatRole;
|
|
100
103
|
content: ChatContent[] | string;
|
|
101
104
|
id?: string;
|
|
102
105
|
interrupted?: boolean;
|
|
103
106
|
createdAt?: number;
|
|
107
|
+
extra?: Record<string, unknown>;
|
|
104
108
|
}) {
|
|
105
109
|
const {
|
|
106
110
|
role,
|
|
@@ -108,12 +112,14 @@ export class ChatMessage {
|
|
|
108
112
|
id = shortuuid('item_'),
|
|
109
113
|
interrupted = false,
|
|
110
114
|
createdAt = Date.now(),
|
|
115
|
+
extra = {},
|
|
111
116
|
} = params;
|
|
112
117
|
this.id = id;
|
|
113
118
|
this.role = role;
|
|
114
119
|
this.content = Array.isArray(content) ? content : [content];
|
|
115
120
|
this.interrupted = interrupted;
|
|
116
121
|
this.createdAt = createdAt;
|
|
122
|
+
this.extra = extra;
|
|
117
123
|
}
|
|
118
124
|
|
|
119
125
|
static create(params: {
|
|
@@ -122,6 +128,7 @@ export class ChatMessage {
|
|
|
122
128
|
id?: string;
|
|
123
129
|
interrupted?: boolean;
|
|
124
130
|
createdAt?: number;
|
|
131
|
+
extra?: Record<string, unknown>;
|
|
125
132
|
}) {
|
|
126
133
|
return new ChatMessage(params);
|
|
127
134
|
}
|
|
@@ -401,6 +408,7 @@ export class AgentHandoffItem {
|
|
|
401
408
|
}
|
|
402
409
|
}
|
|
403
410
|
|
|
411
|
+
// TODO(parity): Add AgentConfigUpdate type to ChatItem union
|
|
404
412
|
export type ChatItem = ChatMessage | FunctionCall | FunctionCallOutput | AgentHandoffItem;
|
|
405
413
|
|
|
406
414
|
export class ChatContext {
|
|
@@ -431,6 +439,7 @@ export class ChatContext {
|
|
|
431
439
|
id?: string;
|
|
432
440
|
interrupted?: boolean;
|
|
433
441
|
createdAt?: number;
|
|
442
|
+
extra?: Record<string, unknown>;
|
|
434
443
|
}): ChatMessage {
|
|
435
444
|
const msg = new ChatMessage(params);
|
|
436
445
|
if (params.createdAt !== undefined) {
|
|
@@ -463,11 +472,13 @@ export class ChatContext {
|
|
|
463
472
|
return idx !== -1 ? idx : undefined;
|
|
464
473
|
}
|
|
465
474
|
|
|
475
|
+
// TODO(parity): Add excludeConfigUpdate option when AgentConfigUpdate is ported
|
|
466
476
|
copy(
|
|
467
477
|
options: {
|
|
468
478
|
excludeFunctionCall?: boolean;
|
|
469
479
|
excludeInstructions?: boolean;
|
|
470
480
|
excludeEmptyMessage?: boolean;
|
|
481
|
+
excludeHandoff?: boolean;
|
|
471
482
|
toolCtx?: ToolContext<any>; // eslint-disable-line @typescript-eslint/no-explicit-any
|
|
472
483
|
} = {},
|
|
473
484
|
): ChatContext {
|
|
@@ -475,6 +486,7 @@ export class ChatContext {
|
|
|
475
486
|
excludeFunctionCall = false,
|
|
476
487
|
excludeInstructions = false,
|
|
477
488
|
excludeEmptyMessage = false,
|
|
489
|
+
excludeHandoff = false,
|
|
478
490
|
toolCtx,
|
|
479
491
|
} = options;
|
|
480
492
|
const items: ChatItem[] = [];
|
|
@@ -500,6 +512,10 @@ export class ChatContext {
|
|
|
500
512
|
continue;
|
|
501
513
|
}
|
|
502
514
|
|
|
515
|
+
if (excludeHandoff && item.type === 'agent_handoff') {
|
|
516
|
+
continue;
|
|
517
|
+
}
|
|
518
|
+
|
|
503
519
|
if (toolCtx !== undefined && isToolCallOrOutput(item) && toolCtx[item.name] === undefined) {
|
|
504
520
|
continue;
|
|
505
521
|
}
|
|
@@ -510,6 +526,7 @@ export class ChatContext {
|
|
|
510
526
|
return new ChatContext(items);
|
|
511
527
|
}
|
|
512
528
|
|
|
529
|
+
// TODO(parity): Add excludeConfigUpdate option when AgentConfigUpdate is ported
|
|
513
530
|
merge(
|
|
514
531
|
other: ChatContext,
|
|
515
532
|
options: {
|
|
@@ -762,6 +779,112 @@ export class ChatContext {
|
|
|
762
779
|
return true;
|
|
763
780
|
}
|
|
764
781
|
|
|
782
|
+
async _summarize(llm: LLM, options: { keepLastTurns?: number } = {}): Promise<ChatContext> {
|
|
783
|
+
const { keepLastTurns = 2 } = options;
|
|
784
|
+
|
|
785
|
+
const toSummarize: ChatMessage[] = [];
|
|
786
|
+
for (const item of this._items) {
|
|
787
|
+
if (item.type !== 'message') continue;
|
|
788
|
+
if (item.role !== 'user' && item.role !== 'assistant') continue;
|
|
789
|
+
if (item.extra?.is_summary === true) continue;
|
|
790
|
+
|
|
791
|
+
const text = (item.textContent ?? '').trim();
|
|
792
|
+
if (text) {
|
|
793
|
+
toSummarize.push(item);
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
if (toSummarize.length === 0) {
|
|
798
|
+
return this;
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
const tailN = Math.max(0, Math.min(toSummarize.length, keepLastTurns * 2));
|
|
802
|
+
let head: ChatMessage[];
|
|
803
|
+
let tail: ChatMessage[];
|
|
804
|
+
if (tailN === 0) {
|
|
805
|
+
head = toSummarize;
|
|
806
|
+
tail = [];
|
|
807
|
+
} else {
|
|
808
|
+
head = toSummarize.slice(0, -tailN);
|
|
809
|
+
tail = toSummarize.slice(-tailN);
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
if (head.length === 0) {
|
|
813
|
+
return this;
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
const sourceText = head
|
|
817
|
+
.map((m) => `${m.role}: ${(m.textContent ?? '').trim()}`)
|
|
818
|
+
.join('\n')
|
|
819
|
+
.trim();
|
|
820
|
+
|
|
821
|
+
if (!sourceText) {
|
|
822
|
+
return this;
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
// TODO: refactor this into LLMStream.collect API.
|
|
826
|
+
const promptCtx = new ChatContext();
|
|
827
|
+
promptCtx.addMessage({
|
|
828
|
+
role: 'system',
|
|
829
|
+
content:
|
|
830
|
+
'Compress older chat history into a short, faithful summary.\n' +
|
|
831
|
+
'Focus on user goals, constraints, decisions, key facts/preferences/entities, and pending tasks.\n' +
|
|
832
|
+
'Exclude chit-chat and greetings. Be concise.',
|
|
833
|
+
});
|
|
834
|
+
promptCtx.addMessage({
|
|
835
|
+
role: 'user',
|
|
836
|
+
content: `Conversation to summarize:\n\n${sourceText}`,
|
|
837
|
+
});
|
|
838
|
+
|
|
839
|
+
const chunks: string[] = [];
|
|
840
|
+
for await (const chunk of llm.chat({ chatCtx: promptCtx })) {
|
|
841
|
+
if (chunk.delta?.content) {
|
|
842
|
+
chunks.push(chunk.delta.content);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
const summary = chunks.join('').trim();
|
|
847
|
+
if (!summary) {
|
|
848
|
+
return this;
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
const tailStartTs = tail.length > 0 ? tail[0]!.createdAt : Infinity;
|
|
852
|
+
|
|
853
|
+
const preserved: ChatItem[] = [];
|
|
854
|
+
for (const it of this._items) {
|
|
855
|
+
if (
|
|
856
|
+
(it.type === 'function_call' || it.type === 'function_call_output') &&
|
|
857
|
+
it.createdAt < tailStartTs
|
|
858
|
+
) {
|
|
859
|
+
continue;
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
if (it.type === 'message' && (it.role === 'user' || it.role === 'assistant')) {
|
|
863
|
+
continue;
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
preserved.push(it);
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
this._items = preserved;
|
|
870
|
+
|
|
871
|
+
const createdAtHint =
|
|
872
|
+
tail.length > 0 ? tail[0]!.createdAt - 1e-3 : head[head.length - 1]!.createdAt + 1e-3;
|
|
873
|
+
|
|
874
|
+
this.addMessage({
|
|
875
|
+
role: 'assistant',
|
|
876
|
+
content: `[history summary]\n${summary}`,
|
|
877
|
+
createdAt: createdAtHint,
|
|
878
|
+
extra: { is_summary: true },
|
|
879
|
+
});
|
|
880
|
+
|
|
881
|
+
for (const msg of tail) {
|
|
882
|
+
this.insert(msg);
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
return this;
|
|
886
|
+
}
|
|
887
|
+
|
|
765
888
|
/**
|
|
766
889
|
* Indicates whether the context is read-only
|
|
767
890
|
*/
|
package/src/llm/index.ts
CHANGED
package/src/llm/tool_context.ts
CHANGED
|
@@ -80,6 +80,13 @@ export class ToolError extends Error {
|
|
|
80
80
|
}
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
export const ToolFlag = {
|
|
84
|
+
NONE: 0,
|
|
85
|
+
IGNORE_ON_ENTER: 1 << 0,
|
|
86
|
+
} as const;
|
|
87
|
+
|
|
88
|
+
export type ToolFlag = (typeof ToolFlag)[keyof typeof ToolFlag];
|
|
89
|
+
|
|
83
90
|
export interface AgentHandoff {
|
|
84
91
|
/**
|
|
85
92
|
* The agent to handoff to.
|
|
@@ -178,6 +185,8 @@ export interface FunctionTool<
|
|
|
178
185
|
*/
|
|
179
186
|
execute: ToolExecuteFunction<Parameters, UserData, Result>;
|
|
180
187
|
|
|
188
|
+
flags: number;
|
|
189
|
+
|
|
181
190
|
[FUNCTION_TOOL_SYMBOL]: true;
|
|
182
191
|
}
|
|
183
192
|
|
|
@@ -242,10 +251,12 @@ export function tool<
|
|
|
242
251
|
description,
|
|
243
252
|
parameters,
|
|
244
253
|
execute,
|
|
254
|
+
flags,
|
|
245
255
|
}: {
|
|
246
256
|
description: string;
|
|
247
257
|
parameters: Schema;
|
|
248
258
|
execute: ToolExecuteFunction<InferToolInput<Schema>, UserData, Result>;
|
|
259
|
+
flags?: number;
|
|
249
260
|
}): FunctionTool<InferToolInput<Schema>, UserData, Result>;
|
|
250
261
|
|
|
251
262
|
/**
|
|
@@ -254,10 +265,12 @@ export function tool<
|
|
|
254
265
|
export function tool<UserData = UnknownUserData, Result = unknown>({
|
|
255
266
|
description,
|
|
256
267
|
execute,
|
|
268
|
+
flags,
|
|
257
269
|
}: {
|
|
258
270
|
description: string;
|
|
259
271
|
parameters?: never;
|
|
260
272
|
execute: ToolExecuteFunction<Record<string, never>, UserData, Result>;
|
|
273
|
+
flags?: number;
|
|
261
274
|
}): FunctionTool<Record<string, never>, UserData, Result>;
|
|
262
275
|
|
|
263
276
|
/**
|
|
@@ -295,6 +308,7 @@ export function tool(tool: any): any {
|
|
|
295
308
|
description: tool.description,
|
|
296
309
|
parameters,
|
|
297
310
|
execute: tool.execute,
|
|
311
|
+
flags: tool.flags ?? ToolFlag.NONE,
|
|
298
312
|
[TOOL_SYMBOL]: true,
|
|
299
313
|
[FUNCTION_TOOL_SYMBOL]: true,
|
|
300
314
|
};
|
package/src/utils.ts
CHANGED
|
@@ -173,6 +173,11 @@ export class Future<T = void> {
|
|
|
173
173
|
this.#rejected = true;
|
|
174
174
|
this.#error = error;
|
|
175
175
|
this.#rejectPromise(error);
|
|
176
|
+
// Python calls Future.exception() right after set_exception() to silence
|
|
177
|
+
// "exception was never retrieved" warnings. In JS, consume the rejection
|
|
178
|
+
// immediately so Node does not emit unhandled-rejection noise before a
|
|
179
|
+
// later await/catch observes it.
|
|
180
|
+
void this.#await.catch(() => undefined);
|
|
176
181
|
}
|
|
177
182
|
}
|
|
178
183
|
|
package/src/voice/agent.ts
CHANGED
|
@@ -302,6 +302,17 @@ export class Agent<UserData = any> {
|
|
|
302
302
|
this._agentActivity.updateChatCtx(chatCtx);
|
|
303
303
|
}
|
|
304
304
|
|
|
305
|
+
// TODO(parity): Add when AgentConfigUpdate is ported to ChatContext.
|
|
306
|
+
async updateTools(tools: ToolContext): Promise<void> {
|
|
307
|
+
if (!this._agentActivity) {
|
|
308
|
+
this._tools = { ...tools };
|
|
309
|
+
this._chatCtx = this._chatCtx.copy({ toolCtx: this._tools });
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
await this._agentActivity.updateTools(tools);
|
|
314
|
+
}
|
|
315
|
+
|
|
305
316
|
static default = {
|
|
306
317
|
async sttNode(
|
|
307
318
|
agent: Agent,
|
|
@@ -7,7 +7,7 @@ import type { Span } from '@opentelemetry/api';
|
|
|
7
7
|
import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
|
|
8
8
|
import { Heap } from 'heap-js';
|
|
9
9
|
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
10
|
-
import { ReadableStream } from 'node:stream/web';
|
|
10
|
+
import { ReadableStream, TransformStream } from 'node:stream/web';
|
|
11
11
|
import { type ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
12
12
|
import {
|
|
13
13
|
type ChatItem,
|
|
@@ -23,6 +23,7 @@ import {
|
|
|
23
23
|
type RealtimeSession,
|
|
24
24
|
type ToolChoice,
|
|
25
25
|
type ToolContext,
|
|
26
|
+
ToolFlag,
|
|
26
27
|
} from '../llm/index.js';
|
|
27
28
|
import type { LLMError } from '../llm/llm.js';
|
|
28
29
|
import { isSameToolChoice, isSameToolContext } from '../llm/tool_context.js';
|
|
@@ -83,6 +84,12 @@ import { SpeechHandle } from './speech_handle.js';
|
|
|
83
84
|
import { setParticipantSpanAttributes } from './utils.js';
|
|
84
85
|
|
|
85
86
|
export const agentActivityStorage = new AsyncLocalStorage<AgentActivity>();
|
|
87
|
+
export const onEnterStorage = new AsyncLocalStorage<OnEnterData>();
|
|
88
|
+
|
|
89
|
+
interface OnEnterData {
|
|
90
|
+
session: AgentSession;
|
|
91
|
+
agent: Agent;
|
|
92
|
+
}
|
|
86
93
|
|
|
87
94
|
interface PreemptiveGeneration {
|
|
88
95
|
speechHandle: SpeechHandle;
|
|
@@ -312,6 +319,8 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
312
319
|
}
|
|
313
320
|
}
|
|
314
321
|
|
|
322
|
+
// TODO(parity): Record initial AgentConfigUpdate in chat context
|
|
323
|
+
|
|
315
324
|
// metrics and error handling
|
|
316
325
|
if (this.llm instanceof LLM) {
|
|
317
326
|
this.llm.on('metrics_collected', this.onMetricsCollected);
|
|
@@ -354,11 +363,13 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
354
363
|
if (runOnEnter) {
|
|
355
364
|
this._onEnterTask = this.createSpeechTask({
|
|
356
365
|
taskFn: () =>
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
366
|
+
onEnterStorage.run({ session: this.agentSession, agent: this.agent }, () =>
|
|
367
|
+
tracer.startActiveSpan(async () => this.agent.onEnter(), {
|
|
368
|
+
name: 'on_enter',
|
|
369
|
+
context: trace.setSpan(ROOT_CONTEXT, startSpan),
|
|
370
|
+
attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
|
|
371
|
+
}),
|
|
372
|
+
),
|
|
362
373
|
inlineTask: true,
|
|
363
374
|
name: 'AgentActivity_onEnter',
|
|
364
375
|
});
|
|
@@ -446,6 +457,20 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
446
457
|
}
|
|
447
458
|
}
|
|
448
459
|
|
|
460
|
+
// TODO: Add when AgentConfigUpdate is ported to ChatContext.
|
|
461
|
+
async updateTools(tools: ToolContext): Promise<void> {
|
|
462
|
+
this.agent._tools = { ...tools };
|
|
463
|
+
|
|
464
|
+
if (this.realtimeSession) {
|
|
465
|
+
await this.realtimeSession.updateTools(tools);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
if (this.llm instanceof LLM) {
|
|
469
|
+
// for realtime LLM, we assume the server will remove unvalid tool messages
|
|
470
|
+
await this.updateChatCtx(this.agent._chatCtx.copy({ toolCtx: tools }));
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
449
474
|
updateOptions({ toolChoice }: { toolChoice?: ToolChoice | null }): void {
|
|
450
475
|
if (toolChoice !== undefined) {
|
|
451
476
|
this.toolChoice = toolChoice;
|
|
@@ -460,15 +485,36 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
460
485
|
void this.audioStream.close();
|
|
461
486
|
this.audioStream = new MultiInputStream<AudioFrame>();
|
|
462
487
|
|
|
488
|
+
// Filter is applied on this.audioStream.stream (downstream of MultiInputStream) rather
|
|
489
|
+
// than on the source audioStream via pipeThrough. pipeThrough locks its source stream, so
|
|
490
|
+
// if it were applied directly on audioStream, that lock would survive MultiInputStream.close()
|
|
491
|
+
// and make audioStream permanently locked for subsequent attachAudioInput calls (e.g. handoff).
|
|
492
|
+
const aecWarmupAudioFilter = new TransformStream<AudioFrame, AudioFrame>({
|
|
493
|
+
transform: (frame, controller) => {
|
|
494
|
+
const shouldDiscardForAecWarmup =
|
|
495
|
+
this.agentSession.agentState === 'speaking' && this.agentSession._aecWarmupRemaining > 0;
|
|
496
|
+
if (!shouldDiscardForAecWarmup) {
|
|
497
|
+
controller.enqueue(frame);
|
|
498
|
+
}
|
|
499
|
+
},
|
|
500
|
+
});
|
|
501
|
+
|
|
463
502
|
this.audioStreamId = this.audioStream.addInputStream(audioStream);
|
|
464
|
-
const [realtimeAudioStream, recognitionAudioStream] = this.audioStream.stream.tee();
|
|
465
503
|
|
|
466
|
-
if (this.realtimeSession) {
|
|
504
|
+
if (this.realtimeSession && this.audioRecognition) {
|
|
505
|
+
const [realtimeAudioStream, recognitionAudioStream] = this.audioStream.stream
|
|
506
|
+
.pipeThrough(aecWarmupAudioFilter)
|
|
507
|
+
.tee();
|
|
467
508
|
this.realtimeSession.setInputAudioStream(realtimeAudioStream);
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
if (this.audioRecognition) {
|
|
471
509
|
this.audioRecognition.setInputAudioStream(recognitionAudioStream);
|
|
510
|
+
} else if (this.realtimeSession) {
|
|
511
|
+
this.realtimeSession.setInputAudioStream(
|
|
512
|
+
this.audioStream.stream.pipeThrough(aecWarmupAudioFilter),
|
|
513
|
+
);
|
|
514
|
+
} else if (this.audioRecognition) {
|
|
515
|
+
this.audioRecognition.setInputAudioStream(
|
|
516
|
+
this.audioStream.stream.pipeThrough(aecWarmupAudioFilter),
|
|
517
|
+
);
|
|
472
518
|
}
|
|
473
519
|
}
|
|
474
520
|
|
|
@@ -730,6 +776,11 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
730
776
|
}
|
|
731
777
|
|
|
732
778
|
private interruptByAudioActivity(): void {
|
|
779
|
+
if (this.agentSession._aecWarmupRemaining > 0) {
|
|
780
|
+
// Disable interruption from audio activity while AEC warmup is active.
|
|
781
|
+
return;
|
|
782
|
+
}
|
|
783
|
+
|
|
733
784
|
if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
|
|
734
785
|
// skip speech handle interruption if server side turn detection is enabled
|
|
735
786
|
return;
|
|
@@ -1129,12 +1180,25 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1129
1180
|
instructions = `${this.agent.instructions}\n${instructions}`;
|
|
1130
1181
|
}
|
|
1131
1182
|
|
|
1183
|
+
// Filter out tools with IGNORE_ON_ENTER flag when generateReply is called inside onEnter
|
|
1184
|
+
const onEnterData = onEnterStorage.getStore();
|
|
1185
|
+
const shouldFilterTools =
|
|
1186
|
+
onEnterData?.agent === this.agent && onEnterData?.session === this.agentSession;
|
|
1187
|
+
|
|
1188
|
+
const tools = shouldFilterTools
|
|
1189
|
+
? Object.fromEntries(
|
|
1190
|
+
Object.entries(this.agent.toolCtx).filter(
|
|
1191
|
+
([, fnTool]) => !(fnTool.flags & ToolFlag.IGNORE_ON_ENTER),
|
|
1192
|
+
),
|
|
1193
|
+
)
|
|
1194
|
+
: this.agent.toolCtx;
|
|
1195
|
+
|
|
1132
1196
|
const task = this.createSpeechTask({
|
|
1133
1197
|
taskFn: (abortController: AbortController) =>
|
|
1134
1198
|
this.pipelineReplyTask(
|
|
1135
1199
|
handle,
|
|
1136
1200
|
chatCtx ?? this.agent.chatCtx,
|
|
1137
|
-
|
|
1201
|
+
tools,
|
|
1138
1202
|
{
|
|
1139
1203
|
toolChoice: toOaiToolChoice(toolChoice !== undefined ? toolChoice : this.toolChoice),
|
|
1140
1204
|
},
|
|
@@ -1172,7 +1236,24 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1172
1236
|
|
|
1173
1237
|
this.realtimeSession?.interrupt();
|
|
1174
1238
|
|
|
1175
|
-
if (
|
|
1239
|
+
if (force) {
|
|
1240
|
+
// Force-interrupt (used during shutdown): cancel all speech tasks so they
|
|
1241
|
+
// don't block on I/O that will never complete (e.g. audioOutput.waitForPlayout()
|
|
1242
|
+
// when the room is disconnected). Mark the current speech as done immediately
|
|
1243
|
+
// so the interrupt future resolves without waiting for tasks to finish.
|
|
1244
|
+
// Clear the queue so mainTask doesn't dequeue already-interrupted handles
|
|
1245
|
+
// and hang on _waitForGeneration() (the generation future created by
|
|
1246
|
+
// _authorizeGeneration would never resolve since _markDone is a no-op
|
|
1247
|
+
// once doneFut is already settled).
|
|
1248
|
+
for (const task of this.speechTasks) {
|
|
1249
|
+
task.cancel();
|
|
1250
|
+
}
|
|
1251
|
+
if (currentSpeech && !currentSpeech.done()) {
|
|
1252
|
+
currentSpeech._markDone();
|
|
1253
|
+
}
|
|
1254
|
+
this.speechQueue.clear();
|
|
1255
|
+
future.resolve();
|
|
1256
|
+
} else if (currentSpeech === undefined) {
|
|
1176
1257
|
future.resolve();
|
|
1177
1258
|
} else {
|
|
1178
1259
|
currentSpeech.addDoneCallback(() => {
|
|
@@ -1680,9 +1761,7 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1680
1761
|
}
|
|
1681
1762
|
|
|
1682
1763
|
replyAbortController.abort();
|
|
1683
|
-
await
|
|
1684
|
-
tasks.map((task) => task.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT)),
|
|
1685
|
-
);
|
|
1764
|
+
await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
1686
1765
|
|
|
1687
1766
|
let forwardedText = textOut?.text || '';
|
|
1688
1767
|
|
|
@@ -2511,6 +2590,13 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
2511
2590
|
const unlock = await this.lock.lock();
|
|
2512
2591
|
try {
|
|
2513
2592
|
this.cancelPreemptiveGeneration();
|
|
2593
|
+
|
|
2594
|
+
await cancelAndWait(Array.from(this.speechTasks), AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
|
|
2595
|
+
|
|
2596
|
+
if (this._currentSpeech && !this._currentSpeech.done()) {
|
|
2597
|
+
this._currentSpeech._markDone();
|
|
2598
|
+
}
|
|
2599
|
+
|
|
2514
2600
|
await this._closeSessionResources();
|
|
2515
2601
|
|
|
2516
2602
|
if (this._mainTask) {
|
|
@@ -77,6 +77,7 @@ export interface VoiceOptions {
|
|
|
77
77
|
maxToolSteps: number;
|
|
78
78
|
preemptiveGeneration: boolean;
|
|
79
79
|
userAwayTimeout?: number | null;
|
|
80
|
+
aecWarmupDuration: number | null;
|
|
80
81
|
useTtsAlignedTranscript: boolean;
|
|
81
82
|
}
|
|
82
83
|
|
|
@@ -90,6 +91,7 @@ const defaultVoiceOptions: VoiceOptions = {
|
|
|
90
91
|
maxToolSteps: 3,
|
|
91
92
|
preemptiveGeneration: false,
|
|
92
93
|
userAwayTimeout: 15.0,
|
|
94
|
+
aecWarmupDuration: 3000,
|
|
93
95
|
useTtsAlignedTranscript: true,
|
|
94
96
|
} as const;
|
|
95
97
|
|
|
@@ -158,6 +160,8 @@ export class AgentSession<
|
|
|
158
160
|
private closingTask: Promise<void> | null = null;
|
|
159
161
|
private userAwayTimer: NodeJS.Timeout | null = null;
|
|
160
162
|
|
|
163
|
+
private _aecWarmupTimer: NodeJS.Timeout | null = null;
|
|
164
|
+
|
|
161
165
|
// Connection options for STT, LLM, and TTS
|
|
162
166
|
private _connOptions: ResolvedSessionConnectOptions;
|
|
163
167
|
|
|
@@ -169,6 +173,9 @@ export class AgentSession<
|
|
|
169
173
|
private userSpeakingSpan?: Span;
|
|
170
174
|
private agentSpeakingSpan?: Span;
|
|
171
175
|
|
|
176
|
+
/** @internal */
|
|
177
|
+
_aecWarmupRemaining = 0;
|
|
178
|
+
|
|
172
179
|
/** @internal */
|
|
173
180
|
_recorderIO?: RecorderIO;
|
|
174
181
|
|
|
@@ -241,6 +248,7 @@ export class AgentSession<
|
|
|
241
248
|
// This is the "global" chat context, it holds the entire conversation history
|
|
242
249
|
this._chatCtx = ChatContext.empty();
|
|
243
250
|
this.options = { ...defaultVoiceOptions, ...voiceOptions };
|
|
251
|
+
this._aecWarmupRemaining = this.options.aecWarmupDuration ?? 0;
|
|
244
252
|
|
|
245
253
|
this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
|
|
246
254
|
this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
@@ -774,7 +782,9 @@ export class AgentSession<
|
|
|
774
782
|
if (this.closingTask) {
|
|
775
783
|
return;
|
|
776
784
|
}
|
|
777
|
-
this.closeImpl(reason, error, drain)
|
|
785
|
+
this.closingTask = this.closeImpl(reason, error, drain).finally(() => {
|
|
786
|
+
this.closingTask = null;
|
|
787
|
+
});
|
|
778
788
|
}
|
|
779
789
|
|
|
780
790
|
/** @internal */
|
|
@@ -845,6 +855,14 @@ export class AgentSession<
|
|
|
845
855
|
this.agentSpeakingSpan = undefined;
|
|
846
856
|
}
|
|
847
857
|
|
|
858
|
+
if (state === 'speaking' && this._aecWarmupRemaining > 0 && this._aecWarmupTimer === null) {
|
|
859
|
+
this._aecWarmupTimer = setTimeout(() => this._onAecWarmupExpired(), this._aecWarmupRemaining);
|
|
860
|
+
this.logger.debug(
|
|
861
|
+
{ warmupDurationMs: this._aecWarmupRemaining },
|
|
862
|
+
'aec warmup active, disabling interruptions',
|
|
863
|
+
);
|
|
864
|
+
}
|
|
865
|
+
|
|
848
866
|
const oldState = this._agentState;
|
|
849
867
|
this._agentState = state;
|
|
850
868
|
|
|
@@ -938,6 +956,19 @@ export class AgentSession<
|
|
|
938
956
|
}
|
|
939
957
|
}
|
|
940
958
|
|
|
959
|
+
/** @internal */
|
|
960
|
+
_onAecWarmupExpired(): void {
|
|
961
|
+
if (this._aecWarmupRemaining > 0) {
|
|
962
|
+
this.logger.debug('aec warmup expired, re-enabling interruptions');
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
this._aecWarmupRemaining = 0;
|
|
966
|
+
if (this._aecWarmupTimer !== null) {
|
|
967
|
+
clearTimeout(this._aecWarmupTimer);
|
|
968
|
+
this._aecWarmupTimer = null;
|
|
969
|
+
}
|
|
970
|
+
}
|
|
971
|
+
|
|
941
972
|
private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
|
|
942
973
|
if (this.userState === 'away' && ev.isFinal) {
|
|
943
974
|
this.logger.debug('User returned from away state due to speech input');
|
|
@@ -969,6 +1000,7 @@ export class AgentSession<
|
|
|
969
1000
|
}
|
|
970
1001
|
|
|
971
1002
|
this._cancelUserAwayTimer();
|
|
1003
|
+
this._onAecWarmupExpired();
|
|
972
1004
|
this.off(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
973
1005
|
|
|
974
1006
|
if (this.activity) {
|
|
@@ -976,7 +1008,6 @@ export class AgentSession<
|
|
|
976
1008
|
try {
|
|
977
1009
|
await this.activity.interrupt({ force: true }).await;
|
|
978
1010
|
} catch (error) {
|
|
979
|
-
// Uninterruptible speech can throw during forced interruption.
|
|
980
1011
|
this.logger.warn({ error }, 'Error interrupting activity');
|
|
981
1012
|
}
|
|
982
1013
|
}
|