@livekit/agents 1.0.49 → 1.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +12 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -13
- package/dist/index.d.ts +13 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -10
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.d.cts +67 -67
- package/dist/inference/api_protos.d.ts +67 -67
- package/dist/inference/llm.cjs +10 -8
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +3 -7
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +20 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +3 -2
- package/dist/inference/stt.d.ts +3 -2
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +20 -12
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +14 -0
- package/dist/inference/stt.test.cjs.map +1 -1
- package/dist/inference/stt.test.js +14 -0
- package/dist/inference/stt.test.js.map +1 -1
- package/dist/inference/tts.cjs +13 -4
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +2 -1
- package/dist/inference/tts.d.ts +2 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +13 -4
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +10 -0
- package/dist/inference/tts.test.cjs.map +1 -1
- package/dist/inference/tts.test.js +10 -0
- package/dist/inference/tts.test.js.map +1 -1
- package/dist/inference/utils.cjs +5 -5
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.js +1 -1
- package/dist/inference/utils.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +13 -4
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +13 -4
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/stream/deferred_stream.cjs +6 -2
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +6 -2
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +2 -1
- package/dist/stt/stt.d.ts +2 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent_activity.cjs +4 -1
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +4 -1
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +3 -2
- package/dist/voice/audio_recognition.d.ts +3 -2
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +3 -2
- package/dist/voice/events.d.ts +3 -2
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +13 -15
- package/src/inference/llm.ts +3 -8
- package/src/inference/stt.test.ts +17 -0
- package/src/inference/stt.ts +22 -14
- package/src/inference/tts.test.ts +12 -0
- package/src/inference/tts.ts +14 -5
- package/src/inference/utils.ts +1 -1
- package/src/ipc/job_proc_lazy_main.ts +15 -4
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/stream/deferred_stream.ts +5 -1
- package/src/stt/stt.ts +2 -1
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +11 -1
- package/src/voice/audio_recognition.ts +4 -3
- package/src/voice/events.ts +3 -2
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Regression tests for mainTask speech handle processing.
|
|
7
|
+
*
|
|
8
|
+
* When a speech handle is interrupted after _authorizeGeneration() but before the
|
|
9
|
+
* reply task calls _markGenerationDone(), mainTask hangs on _waitForGeneration()
|
|
10
|
+
* indefinitely. All subsequent speech handles queue behind it and the agent becomes
|
|
11
|
+
* unresponsive.
|
|
12
|
+
*
|
|
13
|
+
* Fix: race _waitForGeneration() against the interrupt future via waitIfNotInterrupted().
|
|
14
|
+
*
|
|
15
|
+
* Related: #1124, #1089, #836
|
|
16
|
+
*/
|
|
17
|
+
import { Heap } from 'heap-js';
|
|
18
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
19
|
+
import { Future } from '../utils.js';
|
|
20
|
+
import { AgentActivity } from './agent_activity.js';
|
|
21
|
+
import { SpeechHandle } from './speech_handle.js';
|
|
22
|
+
|
|
23
|
+
// Break circular dependency: agent_activity.ts → agent.js → beta/workflows/task_group.ts
|
|
24
|
+
vi.mock('./agent.js', () => {
|
|
25
|
+
class Agent {}
|
|
26
|
+
class AgentTask extends Agent {}
|
|
27
|
+
class StopResponse {}
|
|
28
|
+
return {
|
|
29
|
+
Agent,
|
|
30
|
+
AgentTask,
|
|
31
|
+
StopResponse,
|
|
32
|
+
_getActivityTaskInfo: () => null,
|
|
33
|
+
_setActivityTaskInfo: () => {},
|
|
34
|
+
functionCallStorage: {
|
|
35
|
+
getStore: () => undefined,
|
|
36
|
+
enterWith: () => {},
|
|
37
|
+
run: (_: unknown, fn: () => unknown) => fn(),
|
|
38
|
+
},
|
|
39
|
+
speechHandleStorage: {
|
|
40
|
+
getStore: () => undefined,
|
|
41
|
+
enterWith: () => {},
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
vi.mock('../version.js', () => ({ version: '0.0.0-test' }));
|
|
47
|
+
|
|
48
|
+
async function raceTimeout(promise: Promise<unknown>, ms: number): Promise<'resolved' | 'timeout'> {
|
|
49
|
+
let timer: ReturnType<typeof setTimeout>;
|
|
50
|
+
const timeout = new Promise<'timeout'>((resolve) => {
|
|
51
|
+
timer = setTimeout(() => resolve('timeout'), ms);
|
|
52
|
+
});
|
|
53
|
+
return Promise.race([promise.then(() => 'resolved' as const), timeout]).finally(() =>
|
|
54
|
+
clearTimeout(timer),
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Build a minimal stand-in with just enough state for mainTask to run.
|
|
60
|
+
*
|
|
61
|
+
* mainTask accesses: q_updated, speechQueue, _currentSpeech, _schedulingPaused,
|
|
62
|
+
* getDrainPendingSpeechTasks(), and logger. We provide stubs for all of these,
|
|
63
|
+
* then bind the real AgentActivity.prototype.mainTask to this object.
|
|
64
|
+
*/
|
|
65
|
+
function buildMainTaskRunner() {
|
|
66
|
+
const q_updated = new Future<void>();
|
|
67
|
+
type HeapItem = [number, number, SpeechHandle];
|
|
68
|
+
const speechQueue = new Heap<HeapItem>((a: HeapItem, b: HeapItem) => b[0] - a[0] || a[1] - b[1]);
|
|
69
|
+
|
|
70
|
+
const fakeActivity = {
|
|
71
|
+
q_updated,
|
|
72
|
+
speechQueue,
|
|
73
|
+
_currentSpeech: undefined as SpeechHandle | undefined,
|
|
74
|
+
_schedulingPaused: false,
|
|
75
|
+
getDrainPendingSpeechTasks: () => [],
|
|
76
|
+
logger: {
|
|
77
|
+
info: () => {},
|
|
78
|
+
debug: () => {},
|
|
79
|
+
warn: () => {},
|
|
80
|
+
error: () => {},
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const mainTask = (AgentActivity.prototype as Record<string, unknown>).mainTask as (
|
|
85
|
+
signal: AbortSignal,
|
|
86
|
+
) => Promise<void>;
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
fakeActivity,
|
|
90
|
+
mainTask: mainTask.bind(fakeActivity),
|
|
91
|
+
speechQueue,
|
|
92
|
+
q_updated,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
describe('AgentActivity - mainTask', () => {
|
|
97
|
+
it('should recover when speech handle is interrupted after authorization', async () => {
|
|
98
|
+
const { fakeActivity, mainTask, speechQueue, q_updated } = buildMainTaskRunner();
|
|
99
|
+
|
|
100
|
+
const handle = SpeechHandle.create({ allowInterruptions: true });
|
|
101
|
+
|
|
102
|
+
speechQueue.push([SpeechHandle.SPEECH_PRIORITY_NORMAL, 1, handle]);
|
|
103
|
+
handle._markScheduled();
|
|
104
|
+
q_updated.resolve();
|
|
105
|
+
|
|
106
|
+
const ac = new AbortController();
|
|
107
|
+
const mainTaskPromise = mainTask(ac.signal);
|
|
108
|
+
|
|
109
|
+
// Give mainTask time to pop the handle and call _authorizeGeneration
|
|
110
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
111
|
+
|
|
112
|
+
// Interrupt while waiting for generation
|
|
113
|
+
handle.interrupt();
|
|
114
|
+
|
|
115
|
+
// Let mainTask react to the interrupt, then signal exit
|
|
116
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
117
|
+
fakeActivity._schedulingPaused = true;
|
|
118
|
+
fakeActivity.q_updated = new Future();
|
|
119
|
+
fakeActivity.q_updated.resolve();
|
|
120
|
+
ac.abort();
|
|
121
|
+
|
|
122
|
+
const result = await raceTimeout(mainTaskPromise, 2000);
|
|
123
|
+
expect(result).toBe('resolved');
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('should process next queued handle after an interrupted one', async () => {
|
|
127
|
+
const { fakeActivity, mainTask, speechQueue, q_updated } = buildMainTaskRunner();
|
|
128
|
+
|
|
129
|
+
const handleA = SpeechHandle.create({ allowInterruptions: true });
|
|
130
|
+
const handleB = SpeechHandle.create({ allowInterruptions: true });
|
|
131
|
+
|
|
132
|
+
speechQueue.push([SpeechHandle.SPEECH_PRIORITY_NORMAL, 1, handleA]);
|
|
133
|
+
handleA._markScheduled();
|
|
134
|
+
speechQueue.push([SpeechHandle.SPEECH_PRIORITY_NORMAL, 2, handleB]);
|
|
135
|
+
handleB._markScheduled();
|
|
136
|
+
q_updated.resolve();
|
|
137
|
+
|
|
138
|
+
const ac = new AbortController();
|
|
139
|
+
const mainTaskPromise = mainTask(ac.signal);
|
|
140
|
+
|
|
141
|
+
// Wait for mainTask to pick up handle A
|
|
142
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
143
|
+
|
|
144
|
+
// Interrupt handle A
|
|
145
|
+
handleA.interrupt();
|
|
146
|
+
|
|
147
|
+
// Wait for mainTask to move to handle B and authorize it
|
|
148
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
149
|
+
|
|
150
|
+
// Resolve handle B's generation (simulating normal reply task completion).
|
|
151
|
+
// If mainTask is stuck on handle A (bug), handle B was never authorized and this
|
|
152
|
+
// throws — we catch it and let the timeout assert the real failure.
|
|
153
|
+
try {
|
|
154
|
+
handleB._markGenerationDone();
|
|
155
|
+
} catch {
|
|
156
|
+
// Expected when fix is absent: handle B has no active generation
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Let mainTask finish
|
|
160
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
161
|
+
fakeActivity._schedulingPaused = true;
|
|
162
|
+
fakeActivity.q_updated = new Future();
|
|
163
|
+
fakeActivity.q_updated.resolve();
|
|
164
|
+
ac.abort();
|
|
165
|
+
|
|
166
|
+
const result = await raceTimeout(mainTaskPromise, 2000);
|
|
167
|
+
expect(result).toBe('resolved');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it('should skip handles that were interrupted before being popped', async () => {
|
|
171
|
+
const { fakeActivity, mainTask, speechQueue, q_updated } = buildMainTaskRunner();
|
|
172
|
+
|
|
173
|
+
const handle = SpeechHandle.create({ allowInterruptions: true });
|
|
174
|
+
|
|
175
|
+
// Interrupt before mainTask ever sees it
|
|
176
|
+
handle.interrupt();
|
|
177
|
+
|
|
178
|
+
speechQueue.push([SpeechHandle.SPEECH_PRIORITY_NORMAL, 1, handle]);
|
|
179
|
+
handle._markScheduled();
|
|
180
|
+
q_updated.resolve();
|
|
181
|
+
|
|
182
|
+
const ac = new AbortController();
|
|
183
|
+
const mainTaskPromise = mainTask(ac.signal);
|
|
184
|
+
|
|
185
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
186
|
+
fakeActivity._schedulingPaused = true;
|
|
187
|
+
fakeActivity.q_updated = new Future();
|
|
188
|
+
fakeActivity.q_updated.resolve();
|
|
189
|
+
ac.abort();
|
|
190
|
+
|
|
191
|
+
const result = await raceTimeout(mainTaskPromise, 2000);
|
|
192
|
+
expect(result).toBe('resolved');
|
|
193
|
+
});
|
|
194
|
+
});
|
|
@@ -1040,9 +1040,19 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1040
1040
|
throw new Error('Speech queue is empty');
|
|
1041
1041
|
}
|
|
1042
1042
|
const speechHandle = heapItem[2];
|
|
1043
|
+
|
|
1044
|
+
// Skip speech handles that were already interrupted/done before being
|
|
1045
|
+
// picked up from the queue (e.g. interrupted during shutdown before the
|
|
1046
|
+
// main loop had a chance to process them). Calling _authorizeGeneration
|
|
1047
|
+
// on a done handle would create a generation Future that nobody resolves,
|
|
1048
|
+
// causing the main loop to hang forever.
|
|
1049
|
+
if (speechHandle.interrupted || speechHandle.done()) {
|
|
1050
|
+
continue;
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1043
1053
|
this._currentSpeech = speechHandle;
|
|
1044
1054
|
speechHandle._authorizeGeneration();
|
|
1045
|
-
await speechHandle._waitForGeneration();
|
|
1055
|
+
await speechHandle.waitIfNotInterrupted([speechHandle._waitForGeneration()]);
|
|
1046
1056
|
this._currentSpeech = undefined;
|
|
1047
1057
|
}
|
|
1048
1058
|
|
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
} from '@opentelemetry/api';
|
|
13
13
|
import type { WritableStreamDefaultWriter } from 'node:stream/web';
|
|
14
14
|
import { ReadableStream } from 'node:stream/web';
|
|
15
|
+
import type { LanguageCode } from '../language.js';
|
|
15
16
|
import { type ChatContext } from '../llm/chat_context.js';
|
|
16
17
|
import { log } from '../log.js';
|
|
17
18
|
import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';
|
|
@@ -58,8 +59,8 @@ export interface RecognitionHooks {
|
|
|
58
59
|
}
|
|
59
60
|
|
|
60
61
|
export interface _TurnDetector {
|
|
61
|
-
unlikelyThreshold: (language?:
|
|
62
|
-
supportsLanguage: (language?:
|
|
62
|
+
unlikelyThreshold: (language?: LanguageCode) => Promise<number | undefined>;
|
|
63
|
+
supportsLanguage: (language?: LanguageCode) => Promise<boolean>;
|
|
63
64
|
predictEndOfTurn(chatCtx: ChatContext): Promise<number>;
|
|
64
65
|
}
|
|
65
66
|
|
|
@@ -106,7 +107,7 @@ export class AudioRecognition {
|
|
|
106
107
|
private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
|
|
107
108
|
private minEndpointingDelay: number;
|
|
108
109
|
private maxEndpointingDelay: number;
|
|
109
|
-
private lastLanguage?:
|
|
110
|
+
private lastLanguage?: LanguageCode;
|
|
110
111
|
private rootSpanContext?: Context;
|
|
111
112
|
private sttModel?: string;
|
|
112
113
|
private sttProvider?: string;
|
package/src/voice/events.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { LanguageCode } from '../language.js';
|
|
4
5
|
import type {
|
|
5
6
|
ChatMessage,
|
|
6
7
|
FunctionCall,
|
|
@@ -87,7 +88,7 @@ export type UserInputTranscribedEvent = {
|
|
|
87
88
|
/** Not supported yet. Always null by default. */
|
|
88
89
|
speakerId: string | null;
|
|
89
90
|
createdAt: number;
|
|
90
|
-
language:
|
|
91
|
+
language: LanguageCode | null;
|
|
91
92
|
};
|
|
92
93
|
|
|
93
94
|
export const createUserInputTranscribedEvent = ({
|
|
@@ -100,7 +101,7 @@ export const createUserInputTranscribedEvent = ({
|
|
|
100
101
|
transcript: string;
|
|
101
102
|
isFinal: boolean;
|
|
102
103
|
speakerId?: string | null;
|
|
103
|
-
language?:
|
|
104
|
+
language?: LanguageCode | null;
|
|
104
105
|
createdAt?: number;
|
|
105
106
|
}): UserInputTranscribedEvent => ({
|
|
106
107
|
type: 'user_input_transcribed',
|