@livekit/agents 1.0.46 → 1.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +14 -20
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +14 -20
- package/dist/cli.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +14 -5
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +14 -5
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/llm/chat_context.cjs +19 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +4 -0
- package/dist/llm/chat_context.d.ts +4 -0
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +19 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/provider_format/index.cjs +2 -0
- package/dist/llm/provider_format/index.cjs.map +1 -1
- package/dist/llm/provider_format/index.d.cts +1 -1
- package/dist/llm/provider_format/index.d.ts +1 -1
- package/dist/llm/provider_format/index.d.ts.map +1 -1
- package/dist/llm/provider_format/index.js +6 -1
- package/dist/llm/provider_format/index.js.map +1 -1
- package/dist/llm/provider_format/openai.cjs +82 -2
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.d.cts +1 -0
- package/dist/llm/provider_format/openai.d.ts +1 -0
- package/dist/llm/provider_format/openai.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.js +80 -1
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +326 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +327 -1
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/provider_format/utils.cjs +4 -3
- package/dist/llm/provider_format/utils.cjs.map +1 -1
- package/dist/llm/provider_format/utils.d.ts.map +1 -1
- package/dist/llm/provider_format/utils.js +4 -3
- package/dist/llm/provider_format/utils.js.map +1 -1
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +1 -0
- package/dist/llm/realtime.d.ts +1 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js.map +1 -1
- package/dist/log.cjs +5 -2
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +5 -2
- package/dist/log.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +15 -6
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +15 -6
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/utils.cjs +31 -2
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +7 -0
- package/dist/utils.d.ts +7 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +31 -2
- package/dist/utils.js.map +1 -1
- package/dist/utils.test.cjs +71 -0
- package/dist/utils.test.cjs.map +1 -1
- package/dist/utils.test.js +71 -0
- package/dist/utils.test.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.cjs.map +1 -1
- package/dist/version.d.cts +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.d.ts.map +1 -1
- package/dist/version.js +1 -1
- package/dist/version.js.map +1 -1
- package/dist/voice/agent.cjs +144 -12
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +29 -4
- package/dist/voice/agent.d.ts +29 -4
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +140 -11
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +120 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +122 -2
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +383 -298
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +34 -7
- package/dist/voice/agent_activity.d.ts +34 -7
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +383 -293
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +140 -40
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +19 -7
- package/dist/voice/agent_session.d.ts +19 -7
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +137 -37
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +4 -0
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +4 -0
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs +39 -19
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +44 -20
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +2 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/speech_handle.cjs +7 -1
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +2 -0
- package/dist/voice/speech_handle.d.ts +2 -0
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +8 -2
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/voice/testing/run_result.cjs +66 -15
- package/dist/voice/testing/run_result.cjs.map +1 -1
- package/dist/voice/testing/run_result.d.cts +14 -3
- package/dist/voice/testing/run_result.d.ts +14 -3
- package/dist/voice/testing/run_result.d.ts.map +1 -1
- package/dist/voice/testing/run_result.js +66 -15
- package/dist/voice/testing/run_result.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +20 -33
- package/src/ipc/job_proc_lazy_main.ts +16 -5
- package/src/llm/chat_context.ts +35 -0
- package/src/llm/provider_format/index.ts +7 -2
- package/src/llm/provider_format/openai.test.ts +385 -1
- package/src/llm/provider_format/openai.ts +103 -0
- package/src/llm/provider_format/utils.ts +6 -4
- package/src/llm/realtime.ts +1 -0
- package/src/log.ts +5 -2
- package/src/stream/deferred_stream.ts +17 -6
- package/src/utils.test.ts +87 -0
- package/src/utils.ts +36 -2
- package/src/version.ts +1 -1
- package/src/voice/agent.test.ts +140 -2
- package/src/voice/agent.ts +189 -10
- package/src/voice/agent_activity.ts +427 -289
- package/src/voice/agent_session.ts +178 -40
- package/src/voice/audio_recognition.ts +4 -0
- package/src/voice/generation.ts +52 -23
- package/src/voice/index.ts +1 -1
- package/src/voice/speech_handle.ts +9 -2
- package/src/voice/testing/run_result.ts +81 -23
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { Mutex } from '@livekit/mutex';
|
|
4
5
|
import type { AudioFrame, Room } from '@livekit/rtc-node';
|
|
5
6
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
6
7
|
import type { Context, Span } from '@opentelemetry/api';
|
|
7
8
|
import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
|
|
8
9
|
import { EventEmitter } from 'node:events';
|
|
9
10
|
import type { ReadableStream } from 'node:stream/web';
|
|
11
|
+
import type { z } from 'zod';
|
|
10
12
|
import {
|
|
11
13
|
LLM as InferenceLLM,
|
|
12
14
|
STT as InferenceSTT,
|
|
@@ -31,6 +33,7 @@ import {
|
|
|
31
33
|
type ResolvedSessionConnectOptions,
|
|
32
34
|
type SessionConnectOptions,
|
|
33
35
|
} from '../types.js';
|
|
36
|
+
import { Task } from '../utils.js';
|
|
34
37
|
import type { VAD } from '../vad.js';
|
|
35
38
|
import type { Agent } from './agent.js';
|
|
36
39
|
import { AgentActivity } from './agent_activity.js';
|
|
@@ -115,6 +118,13 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
|
|
|
115
118
|
connOptions?: SessionConnectOptions;
|
|
116
119
|
};
|
|
117
120
|
|
|
121
|
+
type ActivityTransitionOptions = {
|
|
122
|
+
previousActivity?: 'close' | 'pause';
|
|
123
|
+
newActivity?: 'start' | 'resume';
|
|
124
|
+
blockedTasks?: Task<any>[];
|
|
125
|
+
waitOnEnter?: boolean;
|
|
126
|
+
};
|
|
127
|
+
|
|
118
128
|
export class AgentSession<
|
|
119
129
|
UserData = UnknownUserData,
|
|
120
130
|
> extends (EventEmitter as new () => TypedEmitter<AgentSessionCallbacks>) {
|
|
@@ -129,8 +139,10 @@ export class AgentSession<
|
|
|
129
139
|
private agent?: Agent;
|
|
130
140
|
private activity?: AgentActivity;
|
|
131
141
|
private nextActivity?: AgentActivity;
|
|
142
|
+
private updateActivityTask?: Task<void>;
|
|
132
143
|
private started = false;
|
|
133
144
|
private userState: UserState = 'listening';
|
|
145
|
+
private readonly activityLock = new Mutex();
|
|
134
146
|
|
|
135
147
|
/** @internal */
|
|
136
148
|
_roomIO?: RoomIO;
|
|
@@ -360,7 +372,8 @@ export class AgentSession<
|
|
|
360
372
|
}
|
|
361
373
|
|
|
362
374
|
// TODO(AJS-265): add shutdown callback to job context
|
|
363
|
-
|
|
375
|
+
// Initial start does not wait on onEnter
|
|
376
|
+
tasks.push(this._updateActivity(this.agent, { waitOnEnter: false }));
|
|
364
377
|
|
|
365
378
|
await Promise.allSettled(tasks);
|
|
366
379
|
|
|
@@ -432,8 +445,34 @@ export class AgentSession<
|
|
|
432
445
|
updateAgent(agent: Agent): void {
|
|
433
446
|
this.agent = agent;
|
|
434
447
|
|
|
435
|
-
if (this.started) {
|
|
436
|
-
|
|
448
|
+
if (!this.started) {
|
|
449
|
+
return;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
const _updateActivityTask = async (oldTask: Task<void> | undefined, agent: Agent) => {
|
|
453
|
+
if (oldTask) {
|
|
454
|
+
try {
|
|
455
|
+
await oldTask.result;
|
|
456
|
+
} catch (error) {
|
|
457
|
+
this.logger.error(error, 'previous updateAgent transition failed');
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
await this._updateActivity(agent);
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
const oldTask = this.updateActivityTask;
|
|
465
|
+
this.updateActivityTask = Task.from(
|
|
466
|
+
async () => _updateActivityTask(oldTask, agent),
|
|
467
|
+
undefined,
|
|
468
|
+
'AgentSession_updateActivityTask',
|
|
469
|
+
);
|
|
470
|
+
|
|
471
|
+
const runState = this._globalRunState;
|
|
472
|
+
if (runState) {
|
|
473
|
+
// Don't mark the RunResult as done, if there is currently an agent transition happening.
|
|
474
|
+
// (used to make sure we're correctly adding the AgentHandoffResult before completion)
|
|
475
|
+
runState._watchHandle(this.updateActivityTask);
|
|
437
476
|
}
|
|
438
477
|
}
|
|
439
478
|
|
|
@@ -464,24 +503,42 @@ export class AgentSession<
|
|
|
464
503
|
throw new Error('AgentSession is not running');
|
|
465
504
|
}
|
|
466
505
|
|
|
467
|
-
const doSay = (activity: AgentActivity) => {
|
|
506
|
+
const doSay = (activity: AgentActivity, nextActivity?: AgentActivity) => {
|
|
507
|
+
if (activity.schedulingPaused) {
|
|
508
|
+
if (!nextActivity) {
|
|
509
|
+
throw new Error('AgentSession is closing, cannot use say()');
|
|
510
|
+
}
|
|
511
|
+
return nextActivity.say(text, options);
|
|
512
|
+
}
|
|
468
513
|
return activity.say(text, options);
|
|
469
514
|
};
|
|
470
515
|
|
|
516
|
+
const runState = this._globalRunState;
|
|
517
|
+
let handle: SpeechHandle;
|
|
518
|
+
|
|
471
519
|
// attach to the session span if called outside of the AgentSession
|
|
472
520
|
const activeSpan = trace.getActiveSpan();
|
|
473
521
|
if (!activeSpan && this.rootSpanContext) {
|
|
474
|
-
|
|
522
|
+
handle = otelContext.with(this.rootSpanContext, () =>
|
|
523
|
+
doSay(this.activity!, this.nextActivity),
|
|
524
|
+
);
|
|
525
|
+
} else {
|
|
526
|
+
handle = doSay(this.activity, this.nextActivity);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
if (runState) {
|
|
530
|
+
runState._watchHandle(handle);
|
|
475
531
|
}
|
|
476
532
|
|
|
477
|
-
return
|
|
533
|
+
return handle;
|
|
478
534
|
}
|
|
479
535
|
|
|
480
|
-
interrupt() {
|
|
536
|
+
interrupt(options?: { force?: boolean }) {
|
|
481
537
|
if (!this.activity) {
|
|
482
538
|
throw new Error('AgentSession is not running');
|
|
483
539
|
}
|
|
484
|
-
|
|
540
|
+
|
|
541
|
+
return this.activity.interrupt(options);
|
|
485
542
|
}
|
|
486
543
|
|
|
487
544
|
generateReply(options?: {
|
|
@@ -502,7 +559,7 @@ export class AgentSession<
|
|
|
502
559
|
: undefined;
|
|
503
560
|
|
|
504
561
|
const doGenerateReply = (activity: AgentActivity, nextActivity?: AgentActivity) => {
|
|
505
|
-
if (activity.
|
|
562
|
+
if (activity.schedulingPaused) {
|
|
506
563
|
if (!nextActivity) {
|
|
507
564
|
throw new Error('AgentSession is closing, cannot use generateReply()');
|
|
508
565
|
}
|
|
@@ -542,53 +599,128 @@ export class AgentSession<
|
|
|
542
599
|
* result.expect.noMoreEvents();
|
|
543
600
|
* ```
|
|
544
601
|
*
|
|
545
|
-
* @param options - Run options including user input
|
|
602
|
+
* @param options - Run options including user input and optional output type
|
|
546
603
|
* @returns A RunResult that resolves when the agent finishes responding
|
|
547
|
-
*
|
|
548
|
-
* TODO: Add outputType parameter for typed outputs (parity with Python)
|
|
549
604
|
*/
|
|
550
|
-
run
|
|
605
|
+
run<T = unknown>({
|
|
606
|
+
userInput,
|
|
607
|
+
outputType,
|
|
608
|
+
}: {
|
|
609
|
+
userInput: string;
|
|
610
|
+
outputType?: z.ZodType<T>;
|
|
611
|
+
}): RunResult<T> {
|
|
551
612
|
if (this._globalRunState && !this._globalRunState.done()) {
|
|
552
613
|
throw new Error('nested runs are not supported');
|
|
553
614
|
}
|
|
554
615
|
|
|
555
|
-
const runState = new RunResult({
|
|
616
|
+
const runState = new RunResult<T>({
|
|
617
|
+
userInput,
|
|
618
|
+
outputType,
|
|
619
|
+
});
|
|
620
|
+
|
|
556
621
|
this._globalRunState = runState;
|
|
557
|
-
|
|
622
|
+
|
|
623
|
+
// Defer generateReply through the activityLock to ensure any in-progress
|
|
624
|
+
// activity transition (e.g. AgentTask started from onEnter) completes first.
|
|
625
|
+
// TS Task.from starts onEnter synchronously, so the transition may already be
|
|
626
|
+
// mid-flight by the time run() is called after session.start() resolves.
|
|
627
|
+
// Acquiring and immediately releasing the lock guarantees FIFO ordering:
|
|
628
|
+
// the transition's lock section finishes before we route generateReply.
|
|
629
|
+
(async () => {
|
|
630
|
+
try {
|
|
631
|
+
const unlock = await this.activityLock.lock();
|
|
632
|
+
unlock();
|
|
633
|
+
this.generateReply({ userInput });
|
|
634
|
+
} catch (e) {
|
|
635
|
+
runState._reject(e instanceof Error ? e : new Error(String(e)));
|
|
636
|
+
}
|
|
637
|
+
})();
|
|
558
638
|
|
|
559
639
|
return runState;
|
|
560
640
|
}
|
|
561
641
|
|
|
562
|
-
|
|
642
|
+
/** @internal */
|
|
643
|
+
async _updateActivity(agent: Agent, options: ActivityTransitionOptions = {}): Promise<void> {
|
|
644
|
+
const { previousActivity = 'close', newActivity = 'start', blockedTasks = [] } = options;
|
|
645
|
+
const waitOnEnter = options.waitOnEnter ?? newActivity === 'start';
|
|
646
|
+
|
|
563
647
|
const runWithContext = async () => {
|
|
564
|
-
|
|
565
|
-
|
|
648
|
+
const unlock = await this.activityLock.lock();
|
|
649
|
+
let onEnterTask: Task<void> | undefined;
|
|
566
650
|
|
|
567
|
-
|
|
651
|
+
try {
|
|
652
|
+
this.agent = agent;
|
|
653
|
+
const prevActivityObj = this.activity;
|
|
654
|
+
|
|
655
|
+
if (newActivity === 'start') {
|
|
656
|
+
const prevAgent = prevActivityObj?.agent;
|
|
657
|
+
if (
|
|
658
|
+
agent._agentActivity &&
|
|
659
|
+
// allow updating the same agent that is running
|
|
660
|
+
(agent !== prevAgent || previousActivity !== 'close')
|
|
661
|
+
) {
|
|
662
|
+
throw new Error('Cannot start agent: an activity is already running');
|
|
663
|
+
}
|
|
664
|
+
this.nextActivity = new AgentActivity(agent, this);
|
|
665
|
+
} else if (newActivity === 'resume') {
|
|
666
|
+
if (!agent._agentActivity) {
|
|
667
|
+
throw new Error('Cannot resume agent: no existing activity to resume');
|
|
668
|
+
}
|
|
669
|
+
this.nextActivity = agent._agentActivity;
|
|
670
|
+
}
|
|
568
671
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
672
|
+
if (prevActivityObj && prevActivityObj !== this.nextActivity) {
|
|
673
|
+
if (previousActivity === 'pause') {
|
|
674
|
+
await prevActivityObj.pause({ blockedTasks });
|
|
675
|
+
} else {
|
|
676
|
+
await prevActivityObj.drain();
|
|
677
|
+
await prevActivityObj.close();
|
|
678
|
+
}
|
|
679
|
+
}
|
|
573
680
|
|
|
574
|
-
|
|
575
|
-
|
|
681
|
+
this.activity = this.nextActivity;
|
|
682
|
+
this.nextActivity = undefined;
|
|
576
683
|
|
|
577
|
-
|
|
578
|
-
new AgentHandoffItem({
|
|
579
|
-
oldAgentId:
|
|
684
|
+
const runState = this._globalRunState;
|
|
685
|
+
const handoffItem = new AgentHandoffItem({
|
|
686
|
+
oldAgentId: prevActivityObj?.agent.id,
|
|
580
687
|
newAgentId: agent.id,
|
|
581
|
-
})
|
|
582
|
-
);
|
|
583
|
-
this.logger.debug(
|
|
584
|
-
{ previousAgentId: previousActivity?.agent.id, newAgentId: agent.id },
|
|
585
|
-
'Agent handoff inserted into chat context',
|
|
586
|
-
);
|
|
688
|
+
});
|
|
587
689
|
|
|
588
|
-
|
|
690
|
+
if (runState) {
|
|
691
|
+
runState._agentHandoff({
|
|
692
|
+
item: handoffItem,
|
|
693
|
+
oldAgent: prevActivityObj?.agent,
|
|
694
|
+
newAgent: this.activity!.agent,
|
|
695
|
+
});
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
this._chatCtx.insert(handoffItem);
|
|
699
|
+
this.logger.debug(
|
|
700
|
+
{ previousAgentId: prevActivityObj?.agent.id, newAgentId: agent.id },
|
|
701
|
+
'Agent handoff inserted into chat context',
|
|
702
|
+
);
|
|
703
|
+
|
|
704
|
+
if (newActivity === 'start') {
|
|
705
|
+
await this.activity!.start();
|
|
706
|
+
} else {
|
|
707
|
+
await this.activity!.resume();
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
onEnterTask = this.activity!._onEnterTask;
|
|
711
|
+
|
|
712
|
+
if (this._input.audio) {
|
|
713
|
+
this.activity!.attachAudioInput(this._input.audio.stream);
|
|
714
|
+
}
|
|
715
|
+
} finally {
|
|
716
|
+
unlock();
|
|
717
|
+
}
|
|
589
718
|
|
|
590
|
-
if (
|
|
591
|
-
|
|
719
|
+
if (waitOnEnter) {
|
|
720
|
+
if (!onEnterTask) {
|
|
721
|
+
throw new Error('expected onEnter task to be available while waitOnEnter=true');
|
|
722
|
+
}
|
|
723
|
+
await onEnterTask.result;
|
|
592
724
|
}
|
|
593
725
|
};
|
|
594
726
|
|
|
@@ -842,15 +974,21 @@ export class AgentSession<
|
|
|
842
974
|
if (this.activity) {
|
|
843
975
|
if (!drain) {
|
|
844
976
|
try {
|
|
845
|
-
this.activity.interrupt();
|
|
977
|
+
await this.activity.interrupt({ force: true }).await;
|
|
846
978
|
} catch (error) {
|
|
847
|
-
//
|
|
848
|
-
|
|
979
|
+
// Uninterruptible speech can throw during forced interruption.
|
|
980
|
+
this.logger.warn({ error }, 'Error interrupting activity');
|
|
849
981
|
}
|
|
850
982
|
}
|
|
983
|
+
|
|
851
984
|
await this.activity.drain();
|
|
852
985
|
// wait any uninterruptible speech to finish
|
|
853
986
|
await this.activity.currentSpeech?.waitForPlayout();
|
|
987
|
+
|
|
988
|
+
if (reason !== CloseReason.ERROR) {
|
|
989
|
+
this.activity.commitUserTurn({ audioDetached: true, throwIfNotReady: false });
|
|
990
|
+
}
|
|
991
|
+
|
|
854
992
|
try {
|
|
855
993
|
this.activity.detachAudioInput();
|
|
856
994
|
} catch (error) {
|
|
@@ -768,6 +768,10 @@ export class AudioRecognition {
|
|
|
768
768
|
this.logger.debug('User turn committed');
|
|
769
769
|
})
|
|
770
770
|
.catch((err: unknown) => {
|
|
771
|
+
if (err instanceof Error && err.name === 'AbortError') {
|
|
772
|
+
this.logger.debug('User turn commit task cancelled');
|
|
773
|
+
return;
|
|
774
|
+
}
|
|
771
775
|
this.logger.error(err, 'Error in user turn commit task:');
|
|
772
776
|
});
|
|
773
777
|
}
|
package/src/voice/generation.ts
CHANGED
|
@@ -26,7 +26,13 @@ import { IdentityTransform } from '../stream/identity_transform.js';
|
|
|
26
26
|
import { traceTypes, tracer } from '../telemetry/index.js';
|
|
27
27
|
import { USERDATA_TIMED_TRANSCRIPT } from '../types.js';
|
|
28
28
|
import { Future, Task, shortuuid, toError, waitForAbort } from '../utils.js';
|
|
29
|
-
import {
|
|
29
|
+
import {
|
|
30
|
+
type Agent,
|
|
31
|
+
type ModelSettings,
|
|
32
|
+
_setActivityTaskInfo,
|
|
33
|
+
functionCallStorage,
|
|
34
|
+
isStopResponse,
|
|
35
|
+
} from './agent.js';
|
|
30
36
|
import type { AgentSession } from './agent_session.js';
|
|
31
37
|
import {
|
|
32
38
|
AudioOutput,
|
|
@@ -719,7 +725,7 @@ export interface _AudioOut {
|
|
|
719
725
|
|
|
720
726
|
async function forwardAudio(
|
|
721
727
|
ttsStream: ReadableStream<AudioFrame>,
|
|
722
|
-
|
|
728
|
+
audioOutput: AudioOutput,
|
|
723
729
|
out: _AudioOut,
|
|
724
730
|
signal?: AbortSignal,
|
|
725
731
|
): Promise<void> {
|
|
@@ -733,8 +739,8 @@ async function forwardAudio(
|
|
|
733
739
|
};
|
|
734
740
|
|
|
735
741
|
try {
|
|
736
|
-
|
|
737
|
-
|
|
742
|
+
audioOutput.on(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
|
|
743
|
+
audioOutput.resume();
|
|
738
744
|
|
|
739
745
|
while (true) {
|
|
740
746
|
if (signal?.aborted) {
|
|
@@ -748,36 +754,36 @@ async function forwardAudio(
|
|
|
748
754
|
|
|
749
755
|
if (
|
|
750
756
|
!out.firstFrameFut.done &&
|
|
751
|
-
|
|
752
|
-
|
|
757
|
+
audioOutput.sampleRate &&
|
|
758
|
+
audioOutput.sampleRate !== frame.sampleRate &&
|
|
753
759
|
!resampler
|
|
754
760
|
) {
|
|
755
|
-
resampler = new AudioResampler(frame.sampleRate,
|
|
761
|
+
resampler = new AudioResampler(frame.sampleRate, audioOutput.sampleRate, 1);
|
|
756
762
|
}
|
|
757
763
|
|
|
758
764
|
if (resampler) {
|
|
759
765
|
for (const f of resampler.push(frame)) {
|
|
760
|
-
await
|
|
766
|
+
await audioOutput.captureFrame(f);
|
|
761
767
|
}
|
|
762
768
|
} else {
|
|
763
|
-
await
|
|
769
|
+
await audioOutput.captureFrame(frame);
|
|
764
770
|
}
|
|
765
771
|
}
|
|
766
772
|
|
|
767
773
|
if (resampler) {
|
|
768
774
|
for (const f of resampler.flush()) {
|
|
769
|
-
await
|
|
775
|
+
await audioOutput.captureFrame(f);
|
|
770
776
|
}
|
|
771
777
|
}
|
|
772
778
|
} finally {
|
|
773
|
-
|
|
779
|
+
audioOutput.off(AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
|
|
774
780
|
|
|
775
781
|
if (!out.firstFrameFut.done) {
|
|
776
782
|
out.firstFrameFut.reject(new Error('audio forwarding cancelled before playback started'));
|
|
777
783
|
}
|
|
778
784
|
|
|
779
785
|
reader?.releaseLock();
|
|
780
|
-
|
|
786
|
+
audioOutput.flush();
|
|
781
787
|
}
|
|
782
788
|
}
|
|
783
789
|
|
|
@@ -836,7 +842,7 @@ export function performToolExecutions({
|
|
|
836
842
|
const signal = controller.signal;
|
|
837
843
|
const reader = toolCallStream.getReader();
|
|
838
844
|
|
|
839
|
-
const tasks:
|
|
845
|
+
const tasks: Task<void>[] = [];
|
|
840
846
|
while (!signal.aborted) {
|
|
841
847
|
const { done, value: toolCall } = await reader.read();
|
|
842
848
|
if (signal.aborted) break;
|
|
@@ -929,14 +935,6 @@ export function performToolExecutions({
|
|
|
929
935
|
'Executing LLM tool call',
|
|
930
936
|
);
|
|
931
937
|
|
|
932
|
-
const toolExecution = asyncLocalStorage.run({ functionCall: toolCall }, async () => {
|
|
933
|
-
return await tool.execute(parsedArgs, {
|
|
934
|
-
ctx: new RunContext(session, speechHandle, toolCall),
|
|
935
|
-
toolCallId: toolCall.callId,
|
|
936
|
-
abortSignal: signal,
|
|
937
|
-
});
|
|
938
|
-
});
|
|
939
|
-
|
|
940
938
|
const _tracableToolExecutionImpl = async (toolExecTask: Promise<unknown>, span: Span) => {
|
|
941
939
|
span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_NAME, toolCall.name);
|
|
942
940
|
span.setAttribute(traceTypes.ATTR_FUNCTION_TOOL_ARGS, toolCall.args);
|
|
@@ -993,11 +991,42 @@ export function performToolExecutions({
|
|
|
993
991
|
name: 'function_tool',
|
|
994
992
|
});
|
|
995
993
|
|
|
994
|
+
const toolTask = Task.from(
|
|
995
|
+
async () => {
|
|
996
|
+
// Ensure this task is marked inline before user tool code executes.
|
|
997
|
+
const currentTask = Task.current();
|
|
998
|
+
if (currentTask) {
|
|
999
|
+
_setActivityTaskInfo(currentTask, {
|
|
1000
|
+
speechHandle,
|
|
1001
|
+
functionCall: toolCall,
|
|
1002
|
+
inlineTask: true,
|
|
1003
|
+
});
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
const toolExecution = functionCallStorage.run({ functionCall: toolCall }, async () => {
|
|
1007
|
+
return await tool.execute(parsedArgs, {
|
|
1008
|
+
ctx: new RunContext(session, speechHandle, toolCall),
|
|
1009
|
+
toolCallId: toolCall.callId,
|
|
1010
|
+
abortSignal: signal,
|
|
1011
|
+
});
|
|
1012
|
+
});
|
|
1013
|
+
|
|
1014
|
+
await tracableToolExecution(toolExecution);
|
|
1015
|
+
},
|
|
1016
|
+
controller,
|
|
1017
|
+
`performToolExecution:${toolCall.name}`,
|
|
1018
|
+
);
|
|
1019
|
+
|
|
1020
|
+
_setActivityTaskInfo(toolTask, {
|
|
1021
|
+
speechHandle,
|
|
1022
|
+
functionCall: toolCall,
|
|
1023
|
+
inlineTask: true,
|
|
1024
|
+
});
|
|
996
1025
|
// wait, not cancelling all tool calling tasks
|
|
997
|
-
tasks.push(
|
|
1026
|
+
tasks.push(toolTask);
|
|
998
1027
|
}
|
|
999
1028
|
|
|
1000
|
-
await Promise.allSettled(tasks);
|
|
1029
|
+
await Promise.allSettled(tasks.map((task) => task.result));
|
|
1001
1030
|
if (toolOutput.output.length > 0) {
|
|
1002
1031
|
logger.debug(
|
|
1003
1032
|
{
|
package/src/voice/index.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
export { Agent, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
|
|
4
|
+
export { Agent, AgentTask, StopResponse, type AgentOptions, type ModelSettings } from './agent.js';
|
|
5
5
|
export { AgentSession, type AgentSessionOptions, type VoiceOptions } from './agent_session.js';
|
|
6
6
|
export * from './avatar/index.js';
|
|
7
7
|
export * from './background_audio.js';
|
|
@@ -5,7 +5,7 @@ import type { Context } from '@opentelemetry/api';
|
|
|
5
5
|
import type { ChatItem } from '../llm/index.js';
|
|
6
6
|
import type { Task } from '../utils.js';
|
|
7
7
|
import { Event, Future, shortuuid } from '../utils.js';
|
|
8
|
-
import {
|
|
8
|
+
import { functionCallStorage } from './agent.js';
|
|
9
9
|
|
|
10
10
|
/** Symbol used to identify SpeechHandle instances */
|
|
11
11
|
const SPEECH_HANDLE_SYMBOL = Symbol.for('livekit.agents.SpeechHandle');
|
|
@@ -46,6 +46,9 @@ export class SpeechHandle {
|
|
|
46
46
|
/** @internal - OpenTelemetry context for the agent turn span */
|
|
47
47
|
_agentTurnContext?: Context;
|
|
48
48
|
|
|
49
|
+
/** @internal - used by AgentTask/RunResult final output plumbing */
|
|
50
|
+
_maybeRunFinalOutput?: unknown;
|
|
51
|
+
|
|
49
52
|
private itemAddedCallbacks: Set<(item: ChatItem) => void> = new Set();
|
|
50
53
|
private doneCallbacks: Set<(sh: SpeechHandle) => void> = new Set();
|
|
51
54
|
|
|
@@ -148,7 +151,7 @@ export class SpeechHandle {
|
|
|
148
151
|
* has entirely played out, including any tool calls and response follow-ups.
|
|
149
152
|
*/
|
|
150
153
|
async waitForPlayout(): Promise<void> {
|
|
151
|
-
const store =
|
|
154
|
+
const store = functionCallStorage.getStore();
|
|
152
155
|
if (store && store?.functionCall) {
|
|
153
156
|
throw new Error(
|
|
154
157
|
`Cannot call 'SpeechHandle.waitForPlayout()' from inside the function tool '${store.functionCall.name}'. ` +
|
|
@@ -167,6 +170,10 @@ export class SpeechHandle {
|
|
|
167
170
|
}
|
|
168
171
|
|
|
169
172
|
addDoneCallback(callback: (sh: SpeechHandle) => void) {
|
|
173
|
+
if (this.done()) {
|
|
174
|
+
queueMicrotask(() => callback(this));
|
|
175
|
+
return;
|
|
176
|
+
}
|
|
170
177
|
this.doneCallbacks.add(callback);
|
|
171
178
|
}
|
|
172
179
|
|