@livekit/agents 1.1.0-dev.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +2 -0
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +2 -0
- package/dist/cli.js.map +1 -1
- package/dist/constants.cjs +3 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +1 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +2 -0
- package/dist/constants.js.map +1 -1
- package/dist/cpu.cjs +189 -0
- package/dist/cpu.cjs.map +1 -0
- package/dist/cpu.d.cts +24 -0
- package/dist/cpu.d.ts +24 -0
- package/dist/cpu.d.ts.map +1 -0
- package/dist/cpu.js +152 -0
- package/dist/cpu.js.map +1 -0
- package/dist/cpu.test.cjs +227 -0
- package/dist/cpu.test.cjs.map +1 -0
- package/dist/cpu.test.js +204 -0
- package/dist/cpu.test.js.map +1 -0
- package/dist/index.cjs +12 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -13
- package/dist/index.d.ts +13 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -10
- package/dist/index.js.map +1 -1
- package/dist/inference/interruption/defaults.cjs +1 -1
- package/dist/inference/interruption/defaults.cjs.map +1 -1
- package/dist/inference/interruption/defaults.d.cts +1 -1
- package/dist/inference/interruption/defaults.d.ts +1 -1
- package/dist/inference/interruption/defaults.d.ts.map +1 -1
- package/dist/inference/interruption/defaults.js +1 -1
- package/dist/inference/interruption/defaults.js.map +1 -1
- package/dist/inference/interruption/http_transport.cjs +44 -28
- package/dist/inference/interruption/http_transport.cjs.map +1 -1
- package/dist/inference/interruption/http_transport.d.ts.map +1 -1
- package/dist/inference/interruption/http_transport.js +45 -29
- package/dist/inference/interruption/http_transport.js.map +1 -1
- package/dist/inference/interruption/interruption_detector.cjs +22 -5
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
- package/dist/inference/interruption/interruption_detector.d.cts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
- package/dist/inference/interruption/interruption_detector.js +22 -5
- package/dist/inference/interruption/interruption_detector.js.map +1 -1
- package/dist/inference/interruption/interruption_stream.cjs +4 -4
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
- package/dist/inference/interruption/interruption_stream.js +4 -4
- package/dist/inference/interruption/interruption_stream.js.map +1 -1
- package/dist/inference/interruption/types.cjs.map +1 -1
- package/dist/inference/interruption/types.d.cts +2 -2
- package/dist/inference/interruption/types.d.ts +2 -2
- package/dist/inference/interruption/types.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.cjs +60 -47
- package/dist/inference/interruption/ws_transport.cjs.map +1 -1
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.js +60 -47
- package/dist/inference/interruption/ws_transport.js.map +1 -1
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +20 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +3 -2
- package/dist/inference/stt.d.ts +3 -2
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +20 -12
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +14 -0
- package/dist/inference/stt.test.cjs.map +1 -1
- package/dist/inference/stt.test.js +14 -0
- package/dist/inference/stt.test.js.map +1 -1
- package/dist/inference/tts.cjs +13 -4
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +8 -1
- package/dist/inference/tts.d.ts +8 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +13 -4
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +10 -0
- package/dist/inference/tts.test.cjs.map +1 -1
- package/dist/inference/tts.test.js +10 -0
- package/dist/inference/tts.test.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +41 -23
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +41 -23
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +1 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.js +1 -1
- package/dist/job.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +6 -2
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +6 -2
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +2 -1
- package/dist/stt/stt.d.ts +2 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/utils.cjs +15 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +8 -0
- package/dist/utils.d.ts +8 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +13 -0
- package/dist/utils.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +14 -17
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -11
- package/dist/voice/agent.d.ts +10 -11
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +15 -18
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +194 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +195 -1
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +116 -39
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +2 -0
- package/dist/voice/agent_activity.d.ts +2 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +117 -40
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/agent_session.cjs +38 -38
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +65 -56
- package/dist/voice/agent_session.d.ts +65 -56
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +37 -37
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +106 -52
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +4 -2
- package/dist/voice/audio_recognition.d.ts +4 -2
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +106 -52
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +84 -22
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
- package/dist/voice/audio_recognition_span.test.js +90 -23
- package/dist/voice/audio_recognition_span.test.js.map +1 -1
- package/dist/voice/events.cjs +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +4 -3
- package/dist/voice/events.d.ts +4 -3
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -1
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/index.cjs +9 -1
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +10 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/remote_session.cjs +922 -0
- package/dist/voice/remote_session.cjs.map +1 -0
- package/dist/voice/remote_session.d.cts +108 -0
- package/dist/voice/remote_session.d.ts +108 -0
- package/dist/voice/remote_session.d.ts.map +1 -0
- package/dist/voice/remote_session.js +887 -0
- package/dist/voice/remote_session.js.map +1 -0
- package/dist/voice/report.cjs +11 -10
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -3
- package/dist/voice/report.d.ts +5 -3
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +11 -10
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +15 -0
- package/dist/voice/report.test.cjs.map +1 -1
- package/dist/voice/report.test.js +15 -0
- package/dist/voice/report.test.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +39 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +3 -1
- package/dist/voice/room_io/room_io.d.ts +3 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +40 -1
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/interruption.cjs.map +1 -1
- package/dist/voice/turn_config/interruption.d.cts +1 -1
- package/dist/voice/turn_config/interruption.d.ts +1 -1
- package/dist/voice/turn_config/interruption.d.ts.map +1 -1
- package/dist/voice/turn_config/interruption.js.map +1 -1
- package/dist/voice/turn_config/utils.cjs +95 -35
- package/dist/voice/turn_config/utils.cjs.map +1 -1
- package/dist/voice/turn_config/utils.d.cts +17 -5
- package/dist/voice/turn_config/utils.d.ts +17 -5
- package/dist/voice/turn_config/utils.d.ts.map +1 -1
- package/dist/voice/turn_config/utils.js +93 -35
- package/dist/voice/turn_config/utils.js.map +1 -1
- package/dist/voice/turn_config/utils.test.cjs +83 -41
- package/dist/voice/turn_config/utils.test.cjs.map +1 -1
- package/dist/voice/turn_config/utils.test.js +84 -42
- package/dist/voice/turn_config/utils.test.js.map +1 -1
- package/dist/worker.cjs +6 -29
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +6 -19
- package/dist/worker.js.map +1 -1
- package/package.json +3 -2
- package/src/cli.ts +2 -0
- package/src/constants.ts +1 -0
- package/src/cpu.test.ts +239 -0
- package/src/cpu.ts +173 -0
- package/src/index.ts +13 -15
- package/src/inference/interruption/defaults.ts +1 -1
- package/src/inference/interruption/http_transport.ts +49 -30
- package/src/inference/interruption/interruption_detector.ts +22 -6
- package/src/inference/interruption/interruption_stream.ts +4 -4
- package/src/inference/interruption/types.ts +2 -2
- package/src/inference/interruption/ws_transport.ts +63 -59
- package/src/inference/llm.ts +3 -1
- package/src/inference/stt.test.ts +17 -0
- package/src/inference/stt.ts +22 -14
- package/src/inference/tts.test.ts +12 -0
- package/src/inference/tts.ts +22 -6
- package/src/ipc/job_proc_lazy_main.ts +44 -24
- package/src/job.ts +1 -1
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/llm/index.ts +2 -0
- package/src/stream/deferred_stream.ts +5 -1
- package/src/stt/stt.ts +2 -1
- package/src/utils.ts +20 -0
- package/src/voice/agent.test.ts +208 -1
- package/src/voice/agent.ts +21 -22
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +161 -43
- package/src/voice/agent_session.ts +103 -92
- package/src/voice/audio_recognition.ts +124 -61
- package/src/voice/audio_recognition_span.test.ts +115 -35
- package/src/voice/events.ts +4 -3
- package/src/voice/index.ts +10 -1
- package/src/voice/remote_session.ts +1083 -0
- package/src/voice/report.test.ts +22 -3
- package/src/voice/report.ts +31 -14
- package/src/voice/room_io/room_io.ts +52 -2
- package/src/voice/turn_config/interruption.ts +1 -1
- package/src/voice/turn_config/utils.test.ts +91 -43
- package/src/voice/turn_config/utils.ts +120 -56
- package/src/worker.ts +34 -50
- package/dist/voice/client_events.cjs +0 -554
- package/dist/voice/client_events.cjs.map +0 -1
- package/dist/voice/client_events.d.cts +0 -195
- package/dist/voice/client_events.d.ts +0 -195
- package/dist/voice/client_events.d.ts.map +0 -1
- package/dist/voice/client_events.js +0 -548
- package/dist/voice/client_events.js.map +0 -1
- package/dist/voice/wire_format.cjs +0 -798
- package/dist/voice/wire_format.cjs.map +0 -1
- package/dist/voice/wire_format.d.cts +0 -5503
- package/dist/voice/wire_format.d.ts +0 -5503
- package/dist/voice/wire_format.d.ts.map +0 -1
- package/dist/voice/wire_format.js +0 -728
- package/dist/voice/wire_format.js.map +0 -1
- package/src/voice/client_events.ts +0 -838
- package/src/voice/wire_format.ts +0 -827
|
@@ -2,14 +2,27 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { ParticipantKind } from '@livekit/rtc-node';
|
|
5
|
-
import {
|
|
5
|
+
import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
|
|
6
|
+
import {
|
|
7
|
+
InMemorySpanExporter,
|
|
8
|
+
type ReadableSpan,
|
|
9
|
+
SimpleSpanProcessor,
|
|
10
|
+
} from '@opentelemetry/sdk-trace-base';
|
|
6
11
|
import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
|
|
12
|
+
import { ReadableStream } from 'node:stream/web';
|
|
7
13
|
import { describe, expect, it, vi } from 'vitest';
|
|
14
|
+
import { ChatContext } from '../llm/chat_context.js';
|
|
8
15
|
import { initializeLogger } from '../log.js';
|
|
9
16
|
import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
|
|
10
|
-
import { setTracerProvider } from '../telemetry/index.js';
|
|
17
|
+
import { setTracerProvider, tracer } from '../telemetry/index.js';
|
|
11
18
|
import { VAD, type VADEvent, VADEventType, type VADStream } from '../vad.js';
|
|
12
|
-
import {
|
|
19
|
+
import { AgentSession } from './agent_session.js';
|
|
20
|
+
import {
|
|
21
|
+
AudioRecognition,
|
|
22
|
+
type RecognitionHooks,
|
|
23
|
+
type _TurnDetector,
|
|
24
|
+
} from './audio_recognition.js';
|
|
25
|
+
import type { STTNode } from './io.js';
|
|
13
26
|
|
|
14
27
|
function setupInMemoryTracing() {
|
|
15
28
|
const exporter = new InMemorySpanExporter();
|
|
@@ -20,10 +33,25 @@ function setupInMemoryTracing() {
|
|
|
20
33
|
return { exporter };
|
|
21
34
|
}
|
|
22
35
|
|
|
23
|
-
function spanByName(spans:
|
|
36
|
+
function spanByName(spans: ReadableSpan[], name: string) {
|
|
24
37
|
return spans.find((s) => s.name === name);
|
|
25
38
|
}
|
|
26
39
|
|
|
40
|
+
function createFakeSession(rootSpanContext = ROOT_CONTEXT): AgentSession {
|
|
41
|
+
return {
|
|
42
|
+
_agentState: 'listening',
|
|
43
|
+
_roomIO: {
|
|
44
|
+
linkedParticipant: { sid: 'p3', identity: 'charlie', kind: ParticipantKind.AGENT },
|
|
45
|
+
},
|
|
46
|
+
_setUserAwayTimer: vi.fn(),
|
|
47
|
+
_cancelUserAwayTimer: vi.fn(),
|
|
48
|
+
_userSpeakingSpan: undefined,
|
|
49
|
+
_userState: 'listening',
|
|
50
|
+
emit: vi.fn(),
|
|
51
|
+
rootSpanContext,
|
|
52
|
+
} as unknown as AgentSession;
|
|
53
|
+
}
|
|
54
|
+
|
|
27
55
|
class FakeVADStream extends (Object as unknown as { new (): VADStream }) {
|
|
28
56
|
// We intentionally avoid extending the real VADStream (it is not exported as a value in JS output
|
|
29
57
|
// in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.
|
|
@@ -61,6 +89,8 @@ class FakeVAD extends VAD {
|
|
|
61
89
|
}
|
|
62
90
|
|
|
63
91
|
const alwaysTrueTurnDetector: _TurnDetector = {
|
|
92
|
+
model: 'test-turn-detector',
|
|
93
|
+
provider: 'test-provider',
|
|
64
94
|
supportsLanguage: async () => true,
|
|
65
95
|
unlikelyThreshold: async () => undefined,
|
|
66
96
|
predictEndOfTurn: async () => 1.0,
|
|
@@ -72,23 +102,15 @@ describe('AudioRecognition user_turn span parity', () => {
|
|
|
72
102
|
it('creates user_turn and parents eou_detection under it (stt mode)', async () => {
|
|
73
103
|
const { exporter } = setupInMemoryTracing();
|
|
74
104
|
|
|
75
|
-
const hooks = {
|
|
105
|
+
const hooks: RecognitionHooks = {
|
|
106
|
+
onInterruption: vi.fn(),
|
|
76
107
|
onStartOfSpeech: vi.fn(),
|
|
77
108
|
onVADInferenceDone: vi.fn(),
|
|
78
109
|
onEndOfSpeech: vi.fn(),
|
|
79
110
|
onInterimTranscript: vi.fn(),
|
|
80
111
|
onFinalTranscript: vi.fn(),
|
|
81
112
|
onPreemptiveGeneration: vi.fn(),
|
|
82
|
-
retrieveChatCtx: () =>
|
|
83
|
-
({
|
|
84
|
-
copy() {
|
|
85
|
-
return this;
|
|
86
|
-
},
|
|
87
|
-
addMessage() {},
|
|
88
|
-
toJSON() {
|
|
89
|
-
return { items: [] };
|
|
90
|
-
},
|
|
91
|
-
}) as any,
|
|
113
|
+
retrieveChatCtx: () => ChatContext.empty(),
|
|
92
114
|
onEndOfTurn: vi.fn(async () => true),
|
|
93
115
|
};
|
|
94
116
|
|
|
@@ -109,8 +131,8 @@ describe('AudioRecognition user_turn span parity', () => {
|
|
|
109
131
|
{ type: SpeechEventType.END_OF_SPEECH },
|
|
110
132
|
];
|
|
111
133
|
|
|
112
|
-
const sttNode = async () =>
|
|
113
|
-
new ReadableStream<SpeechEvent>({
|
|
134
|
+
const sttNode: STTNode = async () =>
|
|
135
|
+
new ReadableStream<SpeechEvent | string>({
|
|
114
136
|
start(controller) {
|
|
115
137
|
for (const ev of sttEvents) controller.enqueue(ev);
|
|
116
138
|
controller.close();
|
|
@@ -118,8 +140,8 @@ describe('AudioRecognition user_turn span parity', () => {
|
|
|
118
140
|
});
|
|
119
141
|
|
|
120
142
|
const ar = new AudioRecognition({
|
|
121
|
-
recognitionHooks: hooks
|
|
122
|
-
stt: sttNode
|
|
143
|
+
recognitionHooks: hooks,
|
|
144
|
+
stt: sttNode,
|
|
123
145
|
vad: undefined,
|
|
124
146
|
turnDetector: alwaysTrueTurnDetector,
|
|
125
147
|
turnDetectionMode: 'stt',
|
|
@@ -140,6 +162,9 @@ describe('AudioRecognition user_turn span parity', () => {
|
|
|
140
162
|
const eou = spanByName(spans, 'eou_detection');
|
|
141
163
|
expect(userTurn, 'user_turn span missing').toBeTruthy();
|
|
142
164
|
expect(eou, 'eou_detection span missing').toBeTruthy();
|
|
165
|
+
if (!userTurn || !eou) {
|
|
166
|
+
throw new Error('expected user_turn and eou_detection spans');
|
|
167
|
+
}
|
|
143
168
|
|
|
144
169
|
expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
|
|
145
170
|
|
|
@@ -158,23 +183,15 @@ describe('AudioRecognition user_turn span parity', () => {
|
|
|
158
183
|
it('creates user_turn from VAD startTime (vad mode) and keeps same parenting', async () => {
|
|
159
184
|
const { exporter } = setupInMemoryTracing();
|
|
160
185
|
|
|
161
|
-
const hooks = {
|
|
186
|
+
const hooks: RecognitionHooks = {
|
|
187
|
+
onInterruption: vi.fn(),
|
|
162
188
|
onStartOfSpeech: vi.fn(),
|
|
163
189
|
onVADInferenceDone: vi.fn(),
|
|
164
190
|
onEndOfSpeech: vi.fn(),
|
|
165
191
|
onInterimTranscript: vi.fn(),
|
|
166
192
|
onFinalTranscript: vi.fn(),
|
|
167
193
|
onPreemptiveGeneration: vi.fn(),
|
|
168
|
-
retrieveChatCtx: () =>
|
|
169
|
-
({
|
|
170
|
-
copy() {
|
|
171
|
-
return this;
|
|
172
|
-
},
|
|
173
|
-
addMessage() {},
|
|
174
|
-
toJSON() {
|
|
175
|
-
return { items: [] };
|
|
176
|
-
},
|
|
177
|
-
}) as any,
|
|
194
|
+
retrieveChatCtx: () => ChatContext.empty(),
|
|
178
195
|
onEndOfTurn: vi.fn(async () => true),
|
|
179
196
|
};
|
|
180
197
|
|
|
@@ -223,8 +240,8 @@ describe('AudioRecognition user_turn span parity', () => {
|
|
|
223
240
|
},
|
|
224
241
|
];
|
|
225
242
|
|
|
226
|
-
const sttNode = async () =>
|
|
227
|
-
new ReadableStream<SpeechEvent>({
|
|
243
|
+
const sttNode: STTNode = async () =>
|
|
244
|
+
new ReadableStream<SpeechEvent | string>({
|
|
228
245
|
start(controller) {
|
|
229
246
|
for (const ev of sttEvents) controller.enqueue(ev);
|
|
230
247
|
controller.close();
|
|
@@ -232,9 +249,9 @@ describe('AudioRecognition user_turn span parity', () => {
|
|
|
232
249
|
});
|
|
233
250
|
|
|
234
251
|
const ar = new AudioRecognition({
|
|
235
|
-
recognitionHooks: hooks
|
|
236
|
-
stt: sttNode
|
|
237
|
-
vad: new FakeVAD(vadEvents)
|
|
252
|
+
recognitionHooks: hooks,
|
|
253
|
+
stt: sttNode,
|
|
254
|
+
vad: new FakeVAD(vadEvents),
|
|
238
255
|
turnDetector: alwaysTrueTurnDetector,
|
|
239
256
|
turnDetectionMode: 'vad',
|
|
240
257
|
minEndpointingDelay: 0,
|
|
@@ -253,9 +270,72 @@ describe('AudioRecognition user_turn span parity', () => {
|
|
|
253
270
|
const eou = spanByName(spans, 'eou_detection');
|
|
254
271
|
expect(userTurn).toBeTruthy();
|
|
255
272
|
expect(eou).toBeTruthy();
|
|
273
|
+
if (!userTurn || !eou) {
|
|
274
|
+
throw new Error('expected user_turn and eou_detection spans');
|
|
275
|
+
}
|
|
256
276
|
expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
|
|
257
277
|
|
|
258
278
|
expect(hooks.onStartOfSpeech).toHaveBeenCalled();
|
|
259
279
|
expect(hooks.onEndOfSpeech).toHaveBeenCalled();
|
|
260
280
|
});
|
|
281
|
+
|
|
282
|
+
it('parents user_speaking under user_turn when an explicit speech context is provided', () => {
|
|
283
|
+
const { exporter } = setupInMemoryTracing();
|
|
284
|
+
const sessionSpan = tracer.startSpan({ name: 'agent_session', context: ROOT_CONTEXT });
|
|
285
|
+
const sessionContext = trace.setSpan(ROOT_CONTEXT, sessionSpan);
|
|
286
|
+
const fakeSession = createFakeSession(sessionContext);
|
|
287
|
+
const userTurn = tracer.startSpan({ name: 'user_turn', context: sessionContext });
|
|
288
|
+
const userTurnContext = trace.setSpan(sessionContext, userTurn);
|
|
289
|
+
const speakingStartedAt = Date.now() - 100;
|
|
290
|
+
const speakingEndedAt = Date.now();
|
|
291
|
+
|
|
292
|
+
otelContext.with(userTurnContext, () => {
|
|
293
|
+
AgentSession.prototype._updateUserState.call(fakeSession, 'speaking', {
|
|
294
|
+
lastSpeakingTime: speakingStartedAt,
|
|
295
|
+
otelContext: otelContext.active(),
|
|
296
|
+
});
|
|
297
|
+
AgentSession.prototype._updateUserState.call(fakeSession, 'listening', {
|
|
298
|
+
lastSpeakingTime: speakingEndedAt,
|
|
299
|
+
otelContext: otelContext.active(),
|
|
300
|
+
});
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
userTurn.end();
|
|
304
|
+
sessionSpan.end();
|
|
305
|
+
|
|
306
|
+
const spans = exporter.getFinishedSpans();
|
|
307
|
+
const userSpeaking = spanByName(spans, 'user_speaking');
|
|
308
|
+
const exportedUserTurn = spanByName(spans, 'user_turn');
|
|
309
|
+
expect(userSpeaking).toBeTruthy();
|
|
310
|
+
expect(exportedUserTurn).toBeTruthy();
|
|
311
|
+
if (!userSpeaking || !exportedUserTurn) {
|
|
312
|
+
throw new Error('expected user_speaking and user_turn spans');
|
|
313
|
+
}
|
|
314
|
+
expect(userSpeaking.parentSpanId).toBe(exportedUserTurn.spanContext().spanId);
|
|
315
|
+
expect(userSpeaking.attributes['lk.participant_id']).toBe('p3');
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
it('keeps user_speaking attached to the session root without an explicit speech context', () => {
|
|
319
|
+
const { exporter } = setupInMemoryTracing();
|
|
320
|
+
const sessionSpan = tracer.startSpan({ name: 'agent_session', context: ROOT_CONTEXT });
|
|
321
|
+
const sessionContext = trace.setSpan(ROOT_CONTEXT, sessionSpan);
|
|
322
|
+
const fakeSession = createFakeSession(sessionContext);
|
|
323
|
+
|
|
324
|
+
AgentSession.prototype._updateUserState.call(fakeSession, 'speaking', {
|
|
325
|
+
lastSpeakingTime: Date.now() - 100,
|
|
326
|
+
});
|
|
327
|
+
AgentSession.prototype._updateUserState.call(fakeSession, 'listening', {
|
|
328
|
+
lastSpeakingTime: Date.now(),
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
sessionSpan.end();
|
|
332
|
+
|
|
333
|
+
const spans = exporter.getFinishedSpans();
|
|
334
|
+
const userSpeaking = spanByName(spans, 'user_speaking');
|
|
335
|
+
expect(userSpeaking).toBeTruthy();
|
|
336
|
+
if (!userSpeaking) {
|
|
337
|
+
throw new Error('expected user_speaking span');
|
|
338
|
+
}
|
|
339
|
+
expect(userSpeaking.parentSpanId).toBe(sessionSpan.spanContext().spanId);
|
|
340
|
+
});
|
|
261
341
|
});
|
package/src/voice/events.ts
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { InterruptionDetectionError } from '../inference/interruption/errors.js';
|
|
5
5
|
import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
|
|
6
|
+
import type { LanguageCode } from '../language.js';
|
|
6
7
|
import type {
|
|
7
8
|
ChatMessage,
|
|
8
9
|
FunctionCall,
|
|
@@ -27,7 +28,7 @@ export enum AgentSessionEventTypes {
|
|
|
27
28
|
FunctionToolsExecuted = 'function_tools_executed',
|
|
28
29
|
MetricsCollected = 'metrics_collected',
|
|
29
30
|
SpeechCreated = 'speech_created',
|
|
30
|
-
|
|
31
|
+
OverlappingSpeech = 'overlapping_speech',
|
|
31
32
|
Error = 'error',
|
|
32
33
|
Close = 'close',
|
|
33
34
|
}
|
|
@@ -90,7 +91,7 @@ export type UserInputTranscribedEvent = {
|
|
|
90
91
|
/** Not supported yet. Always null by default. */
|
|
91
92
|
speakerId: string | null;
|
|
92
93
|
createdAt: number;
|
|
93
|
-
language:
|
|
94
|
+
language: LanguageCode | null;
|
|
94
95
|
};
|
|
95
96
|
|
|
96
97
|
export const createUserInputTranscribedEvent = ({
|
|
@@ -103,7 +104,7 @@ export const createUserInputTranscribedEvent = ({
|
|
|
103
104
|
transcript: string;
|
|
104
105
|
isFinal: boolean;
|
|
105
106
|
speakerId?: string | null;
|
|
106
|
-
language?:
|
|
107
|
+
language?: LanguageCode | null;
|
|
107
108
|
createdAt?: number;
|
|
108
109
|
}): UserInputTranscribedEvent => ({
|
|
109
110
|
type: 'user_input_transcribed',
|
package/src/voice/index.ts
CHANGED
|
@@ -5,7 +5,16 @@ export { Agent, AgentTask, StopResponse, type AgentOptions, type ModelSettings }
|
|
|
5
5
|
export { AgentSession, type AgentSessionOptions, type VoiceOptions } from './agent_session.js';
|
|
6
6
|
export * from './avatar/index.js';
|
|
7
7
|
export * from './background_audio.js';
|
|
8
|
-
export {
|
|
8
|
+
export {
|
|
9
|
+
type TextInputCallback,
|
|
10
|
+
type TextInputEvent,
|
|
11
|
+
RemoteSession,
|
|
12
|
+
type RemoteSessionCallbacks,
|
|
13
|
+
type RemoteSessionEventTypes,
|
|
14
|
+
SessionHost,
|
|
15
|
+
SessionTransport,
|
|
16
|
+
RoomSessionTransport,
|
|
17
|
+
} from './remote_session.js';
|
|
9
18
|
export * from './events.js';
|
|
10
19
|
export { type TimedString } from './io.js';
|
|
11
20
|
export * from './report.js';
|