@livekit/agents 1.0.44 → 1.0.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ipc/supervised_proc.cjs +1 -1
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.js +1 -1
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/llm/llm.cjs +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.js +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.cjs +13 -9
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +1 -1
- package/dist/log.d.ts +1 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +13 -9
- package/dist/log.js.map +1 -1
- package/dist/stream/index.cjs +3 -0
- package/dist/stream/index.cjs.map +1 -1
- package/dist/stream/index.d.cts +1 -0
- package/dist/stream/index.d.ts +1 -0
- package/dist/stream/index.d.ts.map +1 -1
- package/dist/stream/index.js +2 -0
- package/dist/stream/index.js.map +1 -1
- package/dist/stream/multi_input_stream.cjs +139 -0
- package/dist/stream/multi_input_stream.cjs.map +1 -0
- package/dist/stream/multi_input_stream.d.cts +55 -0
- package/dist/stream/multi_input_stream.d.ts +55 -0
- package/dist/stream/multi_input_stream.d.ts.map +1 -0
- package/dist/stream/multi_input_stream.js +115 -0
- package/dist/stream/multi_input_stream.js.map +1 -0
- package/dist/stream/multi_input_stream.test.cjs +340 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -0
- package/dist/stream/multi_input_stream.test.js +339 -0
- package/dist/stream/multi_input_stream.test.js.map +1 -0
- package/dist/stt/stt.cjs +2 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.js +2 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/trace_types.cjs +42 -0
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +14 -0
- package/dist/telemetry/trace_types.d.ts +14 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +28 -0
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/tts/fallback_adapter.cjs +466 -0
- package/dist/tts/fallback_adapter.cjs.map +1 -0
- package/dist/tts/fallback_adapter.d.cts +110 -0
- package/dist/tts/fallback_adapter.d.ts +110 -0
- package/dist/tts/fallback_adapter.d.ts.map +1 -0
- package/dist/tts/fallback_adapter.js +442 -0
- package/dist/tts/fallback_adapter.js.map +1 -0
- package/dist/tts/index.cjs +3 -0
- package/dist/tts/index.cjs.map +1 -1
- package/dist/tts/index.d.cts +1 -0
- package/dist/tts/index.d.ts +1 -0
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +2 -0
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/tts.cjs +2 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.js +2 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +13 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +1 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +13 -0
- package/dist/utils.js.map +1 -1
- package/dist/vad.cjs +11 -10
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +5 -3
- package/dist/vad.d.ts +5 -3
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +11 -10
- package/dist/vad.js.map +1 -1
- package/dist/voice/agent_activity.cjs +35 -10
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +1 -0
- package/dist/voice/agent_activity.d.ts +1 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +35 -10
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +19 -7
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +3 -2
- package/dist/voice/agent_session.d.ts +3 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +19 -7
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +85 -36
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +22 -1
- package/dist/voice/audio_recognition.d.ts +22 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +89 -36
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +233 -0
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
- package/dist/voice/audio_recognition_span.test.js +232 -0
- package/dist/voice/audio_recognition_span.test.js.map +1 -0
- package/dist/voice/io.cjs +6 -3
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +3 -2
- package/dist/voice/io.d.ts +3 -2
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +6 -3
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs +3 -1
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
- package/dist/voice/recorder_io/recorder_io.js +3 -1
- package/dist/voice/recorder_io/recorder_io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +23 -20
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.cts +2 -2
- package/dist/voice/room_io/_input.d.ts +2 -2
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +13 -9
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +9 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +3 -1
- package/dist/voice/room_io/room_io.d.ts +3 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +9 -0
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/utils.cjs +47 -0
- package/dist/voice/utils.cjs.map +1 -0
- package/dist/voice/utils.d.cts +4 -0
- package/dist/voice/utils.d.ts +4 -0
- package/dist/voice/utils.d.ts.map +1 -0
- package/dist/voice/utils.js +23 -0
- package/dist/voice/utils.js.map +1 -0
- package/package.json +1 -1
- package/src/ipc/supervised_proc.ts +1 -1
- package/src/llm/llm.ts +1 -1
- package/src/log.ts +22 -11
- package/src/stream/index.ts +1 -0
- package/src/stream/multi_input_stream.test.ts +540 -0
- package/src/stream/multi_input_stream.ts +172 -0
- package/src/stt/stt.ts +2 -2
- package/src/telemetry/trace_types.ts +18 -0
- package/src/tts/fallback_adapter.ts +579 -0
- package/src/tts/index.ts +1 -0
- package/src/tts/tts.ts +2 -2
- package/src/utils.ts +16 -0
- package/src/vad.ts +12 -11
- package/src/voice/agent_activity.ts +25 -0
- package/src/voice/agent_session.ts +17 -11
- package/src/voice/audio_recognition.ts +114 -38
- package/src/voice/audio_recognition_span.test.ts +261 -0
- package/src/voice/io.ts +7 -4
- package/src/voice/recorder_io/recorder_io.ts +2 -1
- package/src/voice/room_io/_input.ts +16 -10
- package/src/voice/room_io/room_io.ts +12 -0
- package/src/voice/utils.ts +29 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { ParticipantKind } from '@livekit/rtc-node';
|
|
5
|
+
import { InMemorySpanExporter, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';
|
|
6
|
+
import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
|
|
7
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
8
|
+
import { initializeLogger } from '../log.js';
|
|
9
|
+
import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
|
|
10
|
+
import { setTracerProvider } from '../telemetry/index.js';
|
|
11
|
+
import { VAD, type VADEvent, VADEventType, type VADStream } from '../vad.js';
|
|
12
|
+
import { AudioRecognition, type _TurnDetector } from './audio_recognition.js';
|
|
13
|
+
|
|
14
|
+
function setupInMemoryTracing() {
|
|
15
|
+
const exporter = new InMemorySpanExporter();
|
|
16
|
+
const provider = new NodeTracerProvider();
|
|
17
|
+
provider.addSpanProcessor(new SimpleSpanProcessor(exporter));
|
|
18
|
+
provider.register();
|
|
19
|
+
setTracerProvider(provider);
|
|
20
|
+
return { exporter };
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function spanByName(spans: any[], name: string) {
|
|
24
|
+
return spans.find((s) => s.name === name);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
class FakeVADStream extends (Object as unknown as { new (): VADStream }) {
|
|
28
|
+
// We intentionally avoid extending the real VADStream (it is not exported as a value in JS output
|
|
29
|
+
// in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.
|
|
30
|
+
private events: VADEvent[];
|
|
31
|
+
private idx = 0;
|
|
32
|
+
constructor(events: VADEvent[]) {
|
|
33
|
+
super();
|
|
34
|
+
this.events = events;
|
|
35
|
+
}
|
|
36
|
+
updateInputStream() {}
|
|
37
|
+
detachInputStream() {}
|
|
38
|
+
close() {}
|
|
39
|
+
[Symbol.asyncIterator]() {
|
|
40
|
+
return this;
|
|
41
|
+
}
|
|
42
|
+
async next(): Promise<IteratorResult<VADEvent>> {
|
|
43
|
+
if (this.idx >= this.events.length) {
|
|
44
|
+
return { done: true, value: undefined };
|
|
45
|
+
}
|
|
46
|
+
const value = this.events[this.idx++]!;
|
|
47
|
+
return { done: false, value };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
class FakeVAD extends VAD {
|
|
52
|
+
label = 'fake-vad';
|
|
53
|
+
private events: VADEvent[];
|
|
54
|
+
constructor(events: VADEvent[]) {
|
|
55
|
+
super({ updateInterval: 1 });
|
|
56
|
+
this.events = events;
|
|
57
|
+
}
|
|
58
|
+
stream(): any {
|
|
59
|
+
return new FakeVADStream(this.events);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const alwaysTrueTurnDetector: _TurnDetector = {
|
|
64
|
+
supportsLanguage: async () => true,
|
|
65
|
+
unlikelyThreshold: async () => undefined,
|
|
66
|
+
predictEndOfTurn: async () => 1.0,
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
describe('AudioRecognition user_turn span parity', () => {
|
|
70
|
+
initializeLogger({ pretty: false, level: 'silent' });
|
|
71
|
+
|
|
72
|
+
it('creates user_turn and parents eou_detection under it (stt mode)', async () => {
|
|
73
|
+
const { exporter } = setupInMemoryTracing();
|
|
74
|
+
|
|
75
|
+
const hooks = {
|
|
76
|
+
onStartOfSpeech: vi.fn(),
|
|
77
|
+
onVADInferenceDone: vi.fn(),
|
|
78
|
+
onEndOfSpeech: vi.fn(),
|
|
79
|
+
onInterimTranscript: vi.fn(),
|
|
80
|
+
onFinalTranscript: vi.fn(),
|
|
81
|
+
onPreemptiveGeneration: vi.fn(),
|
|
82
|
+
retrieveChatCtx: () =>
|
|
83
|
+
({
|
|
84
|
+
copy() {
|
|
85
|
+
return this;
|
|
86
|
+
},
|
|
87
|
+
addMessage() {},
|
|
88
|
+
toJSON() {
|
|
89
|
+
return { items: [] };
|
|
90
|
+
},
|
|
91
|
+
}) as any,
|
|
92
|
+
onEndOfTurn: vi.fn(async () => true),
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
const sttEvents: SpeechEvent[] = [
|
|
96
|
+
{ type: SpeechEventType.START_OF_SPEECH },
|
|
97
|
+
{
|
|
98
|
+
type: SpeechEventType.FINAL_TRANSCRIPT,
|
|
99
|
+
alternatives: [
|
|
100
|
+
{
|
|
101
|
+
language: 'en',
|
|
102
|
+
text: 'hello',
|
|
103
|
+
startTime: 0,
|
|
104
|
+
endTime: 0,
|
|
105
|
+
confidence: 0.9,
|
|
106
|
+
},
|
|
107
|
+
],
|
|
108
|
+
},
|
|
109
|
+
{ type: SpeechEventType.END_OF_SPEECH },
|
|
110
|
+
];
|
|
111
|
+
|
|
112
|
+
const sttNode = async () =>
|
|
113
|
+
new ReadableStream<SpeechEvent>({
|
|
114
|
+
start(controller) {
|
|
115
|
+
for (const ev of sttEvents) controller.enqueue(ev);
|
|
116
|
+
controller.close();
|
|
117
|
+
},
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const ar = new AudioRecognition({
|
|
121
|
+
recognitionHooks: hooks as any,
|
|
122
|
+
stt: sttNode as any,
|
|
123
|
+
vad: undefined,
|
|
124
|
+
turnDetector: alwaysTrueTurnDetector,
|
|
125
|
+
turnDetectionMode: 'stt',
|
|
126
|
+
minEndpointingDelay: 0,
|
|
127
|
+
maxEndpointingDelay: 0,
|
|
128
|
+
sttModel: 'deepgram-nova2',
|
|
129
|
+
sttProvider: 'deepgram',
|
|
130
|
+
getLinkedParticipant: () => ({ sid: 'p1', identity: 'bob', kind: ParticipantKind.AGENT }),
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
await ar.start();
|
|
134
|
+
// allow background task to drain
|
|
135
|
+
await new Promise((r) => setTimeout(r, 20));
|
|
136
|
+
await ar.close();
|
|
137
|
+
|
|
138
|
+
const spans = exporter.getFinishedSpans();
|
|
139
|
+
const userTurn = spanByName(spans, 'user_turn');
|
|
140
|
+
const eou = spanByName(spans, 'eou_detection');
|
|
141
|
+
expect(userTurn, 'user_turn span missing').toBeTruthy();
|
|
142
|
+
expect(eou, 'eou_detection span missing').toBeTruthy();
|
|
143
|
+
|
|
144
|
+
expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
|
|
145
|
+
|
|
146
|
+
// creation-time attributes
|
|
147
|
+
expect(userTurn.attributes['lk.participant_id']).toBe('p1');
|
|
148
|
+
expect(userTurn.attributes['lk.participant_identity']).toBe('bob');
|
|
149
|
+
expect(userTurn.attributes['lk.participant_kind']).toBe('AGENT');
|
|
150
|
+
expect(userTurn.attributes['gen_ai.request.model']).toBe('deepgram-nova2');
|
|
151
|
+
expect(userTurn.attributes['gen_ai.provider.name']).toBe('deepgram');
|
|
152
|
+
|
|
153
|
+
// end-of-turn attributes
|
|
154
|
+
expect(userTurn.attributes['lk.user_transcript']).toContain('hello');
|
|
155
|
+
expect(userTurn.attributes['lk.transcript_confidence']).toBeGreaterThan(0);
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it('creates user_turn from VAD startTime (vad mode) and keeps same parenting', async () => {
|
|
159
|
+
const { exporter } = setupInMemoryTracing();
|
|
160
|
+
|
|
161
|
+
const hooks = {
|
|
162
|
+
onStartOfSpeech: vi.fn(),
|
|
163
|
+
onVADInferenceDone: vi.fn(),
|
|
164
|
+
onEndOfSpeech: vi.fn(),
|
|
165
|
+
onInterimTranscript: vi.fn(),
|
|
166
|
+
onFinalTranscript: vi.fn(),
|
|
167
|
+
onPreemptiveGeneration: vi.fn(),
|
|
168
|
+
retrieveChatCtx: () =>
|
|
169
|
+
({
|
|
170
|
+
copy() {
|
|
171
|
+
return this;
|
|
172
|
+
},
|
|
173
|
+
addMessage() {},
|
|
174
|
+
toJSON() {
|
|
175
|
+
return { items: [] };
|
|
176
|
+
},
|
|
177
|
+
}) as any,
|
|
178
|
+
onEndOfTurn: vi.fn(async () => true),
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
const now = Date.now();
|
|
182
|
+
const vadEvents: VADEvent[] = [
|
|
183
|
+
{
|
|
184
|
+
type: VADEventType.START_OF_SPEECH,
|
|
185
|
+
samplesIndex: 0,
|
|
186
|
+
timestamp: now,
|
|
187
|
+
speechDuration: 100,
|
|
188
|
+
silenceDuration: 0,
|
|
189
|
+
frames: [],
|
|
190
|
+
probability: 0,
|
|
191
|
+
inferenceDuration: 0,
|
|
192
|
+
speaking: true,
|
|
193
|
+
rawAccumulatedSilence: 0,
|
|
194
|
+
rawAccumulatedSpeech: 0,
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
type: VADEventType.END_OF_SPEECH,
|
|
198
|
+
samplesIndex: 0,
|
|
199
|
+
timestamp: now + 200,
|
|
200
|
+
speechDuration: 100,
|
|
201
|
+
silenceDuration: 100,
|
|
202
|
+
frames: [],
|
|
203
|
+
probability: 0,
|
|
204
|
+
inferenceDuration: 0,
|
|
205
|
+
speaking: false,
|
|
206
|
+
rawAccumulatedSilence: 0,
|
|
207
|
+
rawAccumulatedSpeech: 0,
|
|
208
|
+
},
|
|
209
|
+
];
|
|
210
|
+
|
|
211
|
+
const sttEvents: SpeechEvent[] = [
|
|
212
|
+
{
|
|
213
|
+
type: SpeechEventType.FINAL_TRANSCRIPT,
|
|
214
|
+
alternatives: [
|
|
215
|
+
{
|
|
216
|
+
language: 'en',
|
|
217
|
+
text: 'test',
|
|
218
|
+
startTime: 0,
|
|
219
|
+
endTime: 0,
|
|
220
|
+
confidence: 0.8,
|
|
221
|
+
},
|
|
222
|
+
],
|
|
223
|
+
},
|
|
224
|
+
];
|
|
225
|
+
|
|
226
|
+
const sttNode = async () =>
|
|
227
|
+
new ReadableStream<SpeechEvent>({
|
|
228
|
+
start(controller) {
|
|
229
|
+
for (const ev of sttEvents) controller.enqueue(ev);
|
|
230
|
+
controller.close();
|
|
231
|
+
},
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
const ar = new AudioRecognition({
|
|
235
|
+
recognitionHooks: hooks as any,
|
|
236
|
+
stt: sttNode as any,
|
|
237
|
+
vad: new FakeVAD(vadEvents) as any,
|
|
238
|
+
turnDetector: alwaysTrueTurnDetector,
|
|
239
|
+
turnDetectionMode: 'vad',
|
|
240
|
+
minEndpointingDelay: 0,
|
|
241
|
+
maxEndpointingDelay: 0,
|
|
242
|
+
sttModel: 'stt-model',
|
|
243
|
+
sttProvider: 'stt-provider',
|
|
244
|
+
getLinkedParticipant: () => ({ sid: 'p2', identity: 'alice', kind: ParticipantKind.AGENT }),
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
await ar.start();
|
|
248
|
+
await new Promise((r) => setTimeout(r, 20));
|
|
249
|
+
await ar.close();
|
|
250
|
+
|
|
251
|
+
const spans = exporter.getFinishedSpans();
|
|
252
|
+
const userTurn = spanByName(spans, 'user_turn');
|
|
253
|
+
const eou = spanByName(spans, 'eou_detection');
|
|
254
|
+
expect(userTurn).toBeTruthy();
|
|
255
|
+
expect(eou).toBeTruthy();
|
|
256
|
+
expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
|
|
257
|
+
|
|
258
|
+
expect(hooks.onStartOfSpeech).toHaveBeenCalled();
|
|
259
|
+
expect(hooks.onEndOfSpeech).toHaveBeenCalled();
|
|
260
|
+
});
|
|
261
|
+
});
|
package/src/voice/io.ts
CHANGED
|
@@ -8,7 +8,7 @@ import type { ChatContext } from '../llm/chat_context.js';
|
|
|
8
8
|
import type { ChatChunk } from '../llm/llm.js';
|
|
9
9
|
import type { ToolContext } from '../llm/tool_context.js';
|
|
10
10
|
import { log } from '../log.js';
|
|
11
|
-
import {
|
|
11
|
+
import { MultiInputStream } from '../stream/multi_input_stream.js';
|
|
12
12
|
import type { SpeechEvent } from '../stt/stt.js';
|
|
13
13
|
import { Future } from '../utils.js';
|
|
14
14
|
import type { ModelSettings } from './agent.js';
|
|
@@ -84,11 +84,14 @@ export interface AudioOutputCapabilities {
|
|
|
84
84
|
}
|
|
85
85
|
|
|
86
86
|
export abstract class AudioInput {
|
|
87
|
-
protected
|
|
88
|
-
new DeferredReadableStream<AudioFrame>();
|
|
87
|
+
protected multiStream: MultiInputStream<AudioFrame> = new MultiInputStream<AudioFrame>();
|
|
89
88
|
|
|
90
89
|
get stream(): ReadableStream<AudioFrame> {
|
|
91
|
-
return this.
|
|
90
|
+
return this.multiStream.stream;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async close(): Promise<void> {
|
|
94
|
+
await this.multiStream.close();
|
|
92
95
|
}
|
|
93
96
|
|
|
94
97
|
onAttached(): void {}
|
|
@@ -105,6 +105,7 @@ export class RecorderIO {
|
|
|
105
105
|
await this.outChan.close();
|
|
106
106
|
await this.closeFuture.await;
|
|
107
107
|
await cancelAndWait([this.forwardTask!, this.encodeTask!]);
|
|
108
|
+
await this.inRecord?.close();
|
|
108
109
|
|
|
109
110
|
this.started = false;
|
|
110
111
|
} finally {
|
|
@@ -378,7 +379,7 @@ class RecorderAudioInput extends AudioInput {
|
|
|
378
379
|
this.source = source;
|
|
379
380
|
|
|
380
381
|
// Set up the intercepting stream
|
|
381
|
-
this.
|
|
382
|
+
this.multiStream.addInputStream(this.createInterceptingStream());
|
|
382
383
|
}
|
|
383
384
|
|
|
384
385
|
/**
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import { type AudioFrame, FrameProcessor } from '@livekit/rtc-node';
|
|
5
4
|
import {
|
|
5
|
+
type AudioFrame,
|
|
6
6
|
AudioStream,
|
|
7
|
+
FrameProcessor,
|
|
7
8
|
type NoiseCancellationOptions,
|
|
8
9
|
RemoteParticipant,
|
|
9
10
|
type RemoteTrack,
|
|
@@ -25,7 +26,9 @@ export class ParticipantAudioInputStream extends AudioInput {
|
|
|
25
26
|
private frameProcessor?: FrameProcessor<AudioFrame>;
|
|
26
27
|
private publication: RemoteTrackPublication | null = null;
|
|
27
28
|
private participantIdentity: string | null = null;
|
|
29
|
+
private currentInputId: string | null = null;
|
|
28
30
|
private logger = log();
|
|
31
|
+
|
|
29
32
|
constructor({
|
|
30
33
|
room,
|
|
31
34
|
sampleRate,
|
|
@@ -60,8 +63,10 @@ export class ParticipantAudioInputStream extends AudioInput {
|
|
|
60
63
|
if (this.participantIdentity === participantIdentity) {
|
|
61
64
|
return;
|
|
62
65
|
}
|
|
66
|
+
if (this.participantIdentity) {
|
|
67
|
+
this.closeStream();
|
|
68
|
+
}
|
|
63
69
|
this.participantIdentity = participantIdentity;
|
|
64
|
-
this.closeStream();
|
|
65
70
|
|
|
66
71
|
if (!participantIdentity) {
|
|
67
72
|
return;
|
|
@@ -119,12 +124,11 @@ export class ParticipantAudioInputStream extends AudioInput {
|
|
|
119
124
|
};
|
|
120
125
|
|
|
121
126
|
private closeStream() {
|
|
122
|
-
if (this.
|
|
123
|
-
this.
|
|
127
|
+
if (this.currentInputId) {
|
|
128
|
+
void this.multiStream.removeInputStream(this.currentInputId);
|
|
129
|
+
this.currentInputId = null;
|
|
124
130
|
}
|
|
125
131
|
|
|
126
|
-
this.frameProcessor?.close();
|
|
127
|
-
|
|
128
132
|
this.publication = null;
|
|
129
133
|
}
|
|
130
134
|
|
|
@@ -143,7 +147,7 @@ export class ParticipantAudioInputStream extends AudioInput {
|
|
|
143
147
|
}
|
|
144
148
|
this.closeStream();
|
|
145
149
|
this.publication = publication;
|
|
146
|
-
this.
|
|
150
|
+
this.currentInputId = this.multiStream.addInputStream(
|
|
147
151
|
resampleStream({
|
|
148
152
|
stream: this.createStream(track),
|
|
149
153
|
outputRate: this.sampleRate,
|
|
@@ -179,12 +183,14 @@ export class ParticipantAudioInputStream extends AudioInput {
|
|
|
179
183
|
}) as unknown as ReadableStream<AudioFrame>;
|
|
180
184
|
}
|
|
181
185
|
|
|
182
|
-
async close() {
|
|
186
|
+
override async close() {
|
|
183
187
|
this.room.off(RoomEvent.TrackSubscribed, this.onTrackSubscribed);
|
|
184
188
|
this.room.off(RoomEvent.TrackUnpublished, this.onTrackUnpublished);
|
|
185
189
|
this.room.off(RoomEvent.TokenRefreshed, this.onTokenRefreshed);
|
|
186
190
|
this.closeStream();
|
|
187
|
-
|
|
188
|
-
|
|
191
|
+
await super.close();
|
|
192
|
+
|
|
193
|
+
this.frameProcessor?.close();
|
|
194
|
+
this.frameProcessor = undefined;
|
|
189
195
|
}
|
|
190
196
|
}
|
|
@@ -376,6 +376,18 @@ export class RoomIO {
|
|
|
376
376
|
return this.participantAvailableFuture.done;
|
|
377
377
|
}
|
|
378
378
|
|
|
379
|
+
get linkedParticipant(): RemoteParticipant | undefined {
|
|
380
|
+
if (!this.isParticipantAvailable) {
|
|
381
|
+
return undefined;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
return this.participantAvailableFuture.result;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
get localParticipant(): Participant | undefined {
|
|
388
|
+
return this.room.localParticipant ?? undefined;
|
|
389
|
+
}
|
|
390
|
+
|
|
379
391
|
/** Switch to a different participant */
|
|
380
392
|
setParticipant(participantIdentity: string | null) {
|
|
381
393
|
this.logger.debug({ participantIdentity }, 'setting participant');
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { Participant, ParticipantKind } from '@livekit/rtc-node';
|
|
5
|
+
import type { Span } from '@opentelemetry/api';
|
|
6
|
+
import { traceTypes } from '../telemetry/index.js';
|
|
7
|
+
|
|
8
|
+
export function setParticipantSpanAttributes(
|
|
9
|
+
span: Span,
|
|
10
|
+
participant: Pick<Participant, 'sid' | 'identity' | 'kind'>,
|
|
11
|
+
): void {
|
|
12
|
+
if (participant.sid) {
|
|
13
|
+
span.setAttribute(traceTypes.ATTR_PARTICIPANT_ID, participant.sid);
|
|
14
|
+
}
|
|
15
|
+
span.setAttribute(traceTypes.ATTR_PARTICIPANT_IDENTITY, participant.identity);
|
|
16
|
+
span.setAttribute(traceTypes.ATTR_PARTICIPANT_KIND, participantKindName(participant.kind));
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function participantKindName(kind: ParticipantKind): string {
|
|
20
|
+
const names: Record<number, string> = {
|
|
21
|
+
0: 'STANDARD',
|
|
22
|
+
1: 'INGRESS',
|
|
23
|
+
2: 'EGRESS',
|
|
24
|
+
3: 'SIP',
|
|
25
|
+
4: 'AGENT',
|
|
26
|
+
5: 'CONNECTOR',
|
|
27
|
+
};
|
|
28
|
+
return names[kind as number] ?? String(kind);
|
|
29
|
+
}
|