@livekit/agents 1.0.44 → 1.0.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ipc/supervised_proc.cjs +1 -1
- package/dist/ipc/supervised_proc.cjs.map +1 -1
- package/dist/ipc/supervised_proc.js +1 -1
- package/dist/ipc/supervised_proc.js.map +1 -1
- package/dist/llm/llm.cjs +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.js +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.cjs +13 -9
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +1 -1
- package/dist/log.d.ts +1 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +13 -9
- package/dist/log.js.map +1 -1
- package/dist/stream/index.cjs +3 -0
- package/dist/stream/index.cjs.map +1 -1
- package/dist/stream/index.d.cts +1 -0
- package/dist/stream/index.d.ts +1 -0
- package/dist/stream/index.d.ts.map +1 -1
- package/dist/stream/index.js +2 -0
- package/dist/stream/index.js.map +1 -1
- package/dist/stream/multi_input_stream.cjs +139 -0
- package/dist/stream/multi_input_stream.cjs.map +1 -0
- package/dist/stream/multi_input_stream.d.cts +55 -0
- package/dist/stream/multi_input_stream.d.ts +55 -0
- package/dist/stream/multi_input_stream.d.ts.map +1 -0
- package/dist/stream/multi_input_stream.js +115 -0
- package/dist/stream/multi_input_stream.js.map +1 -0
- package/dist/stream/multi_input_stream.test.cjs +340 -0
- package/dist/stream/multi_input_stream.test.cjs.map +1 -0
- package/dist/stream/multi_input_stream.test.js +339 -0
- package/dist/stream/multi_input_stream.test.js.map +1 -0
- package/dist/stt/stt.cjs +2 -2
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.js +2 -2
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/trace_types.cjs +42 -0
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +14 -0
- package/dist/telemetry/trace_types.d.ts +14 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +28 -0
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/tts/fallback_adapter.cjs +466 -0
- package/dist/tts/fallback_adapter.cjs.map +1 -0
- package/dist/tts/fallback_adapter.d.cts +110 -0
- package/dist/tts/fallback_adapter.d.ts +110 -0
- package/dist/tts/fallback_adapter.d.ts.map +1 -0
- package/dist/tts/fallback_adapter.js +442 -0
- package/dist/tts/fallback_adapter.js.map +1 -0
- package/dist/tts/index.cjs +3 -0
- package/dist/tts/index.cjs.map +1 -1
- package/dist/tts/index.d.cts +1 -0
- package/dist/tts/index.d.ts +1 -0
- package/dist/tts/index.d.ts.map +1 -1
- package/dist/tts/index.js +2 -0
- package/dist/tts/index.js.map +1 -1
- package/dist/tts/tts.cjs +2 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.js +2 -2
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +13 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +1 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +13 -0
- package/dist/utils.js.map +1 -1
- package/dist/vad.cjs +11 -10
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +5 -3
- package/dist/vad.d.ts +5 -3
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +11 -10
- package/dist/vad.js.map +1 -1
- package/dist/voice/agent_activity.cjs +35 -10
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +1 -0
- package/dist/voice/agent_activity.d.ts +1 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +35 -10
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +19 -7
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +3 -2
- package/dist/voice/agent_session.d.ts +3 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +19 -7
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +85 -36
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +22 -1
- package/dist/voice/audio_recognition.d.ts +22 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +89 -36
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +233 -0
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
- package/dist/voice/audio_recognition_span.test.js +232 -0
- package/dist/voice/audio_recognition_span.test.js.map +1 -0
- package/dist/voice/io.cjs +6 -3
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +3 -2
- package/dist/voice/io.d.ts +3 -2
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +6 -3
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs +3 -1
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
- package/dist/voice/recorder_io/recorder_io.js +3 -1
- package/dist/voice/recorder_io/recorder_io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +23 -20
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.cts +2 -2
- package/dist/voice/room_io/_input.d.ts +2 -2
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +13 -9
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +9 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +3 -1
- package/dist/voice/room_io/room_io.d.ts +3 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +9 -0
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/utils.cjs +47 -0
- package/dist/voice/utils.cjs.map +1 -0
- package/dist/voice/utils.d.cts +4 -0
- package/dist/voice/utils.d.ts +4 -0
- package/dist/voice/utils.d.ts.map +1 -0
- package/dist/voice/utils.js +23 -0
- package/dist/voice/utils.js.map +1 -0
- package/package.json +1 -1
- package/src/ipc/supervised_proc.ts +1 -1
- package/src/llm/llm.ts +1 -1
- package/src/log.ts +22 -11
- package/src/stream/index.ts +1 -0
- package/src/stream/multi_input_stream.test.ts +540 -0
- package/src/stream/multi_input_stream.ts +172 -0
- package/src/stt/stt.ts +2 -2
- package/src/telemetry/trace_types.ts +18 -0
- package/src/tts/fallback_adapter.ts +579 -0
- package/src/tts/index.ts +1 -0
- package/src/tts/tts.ts +2 -2
- package/src/utils.ts +16 -0
- package/src/vad.ts +12 -11
- package/src/voice/agent_activity.ts +25 -0
- package/src/voice/agent_session.ts +17 -11
- package/src/voice/audio_recognition.ts +114 -38
- package/src/voice/audio_recognition_span.test.ts +261 -0
- package/src/voice/io.ts +7 -4
- package/src/voice/recorder_io/recorder_io.ts +2 -1
- package/src/voice/room_io/_input.ts +16 -10
- package/src/voice/room_io/room_io.ts +12 -0
- package/src/voice/utils.ts +29 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
import { ReadableStream } from "node:stream/web";
|
|
2
|
+
import { describe, expect, it } from "vitest";
|
|
3
|
+
import { delay } from "../utils.js";
|
|
4
|
+
import { MultiInputStream } from "./multi_input_stream.js";
|
|
5
|
+
function streamFrom(values) {
|
|
6
|
+
return new ReadableStream({
|
|
7
|
+
start(controller) {
|
|
8
|
+
for (const v of values) controller.enqueue(v);
|
|
9
|
+
controller.close();
|
|
10
|
+
}
|
|
11
|
+
});
|
|
12
|
+
}
|
|
13
|
+
describe("MultiInputStream", () => {
|
|
14
|
+
it("should create a readable output stream", () => {
|
|
15
|
+
const multi = new MultiInputStream();
|
|
16
|
+
expect(multi.stream).toBeInstanceOf(ReadableStream);
|
|
17
|
+
expect(multi.inputCount).toBe(0);
|
|
18
|
+
expect(multi.isClosed).toBe(false);
|
|
19
|
+
});
|
|
20
|
+
it("should read data from a single input stream", async () => {
|
|
21
|
+
const multi = new MultiInputStream();
|
|
22
|
+
const reader = multi.stream.getReader();
|
|
23
|
+
multi.addInputStream(streamFrom(["a", "b", "c"]));
|
|
24
|
+
const results = [];
|
|
25
|
+
for (let i = 0; i < 3; i++) {
|
|
26
|
+
const { value } = await reader.read();
|
|
27
|
+
results.push(value);
|
|
28
|
+
}
|
|
29
|
+
expect(results).toEqual(["a", "b", "c"]);
|
|
30
|
+
reader.releaseLock();
|
|
31
|
+
await multi.close();
|
|
32
|
+
});
|
|
33
|
+
it("should merge data from multiple input streams", async () => {
|
|
34
|
+
const multi = new MultiInputStream();
|
|
35
|
+
const reader = multi.stream.getReader();
|
|
36
|
+
multi.addInputStream(streamFrom([1, 2]));
|
|
37
|
+
multi.addInputStream(streamFrom([3, 4]));
|
|
38
|
+
const results = [];
|
|
39
|
+
for (let i = 0; i < 4; i++) {
|
|
40
|
+
const { value } = await reader.read();
|
|
41
|
+
results.push(value);
|
|
42
|
+
}
|
|
43
|
+
expect(results.sort()).toEqual([1, 2, 3, 4]);
|
|
44
|
+
reader.releaseLock();
|
|
45
|
+
await multi.close();
|
|
46
|
+
});
|
|
47
|
+
it("should allow adding inputs dynamically while reading", async () => {
|
|
48
|
+
const multi = new MultiInputStream();
|
|
49
|
+
const reader = multi.stream.getReader();
|
|
50
|
+
multi.addInputStream(streamFrom(["first"]));
|
|
51
|
+
const r1 = await reader.read();
|
|
52
|
+
expect(r1.value).toBe("first");
|
|
53
|
+
multi.addInputStream(streamFrom(["second"]));
|
|
54
|
+
const r2 = await reader.read();
|
|
55
|
+
expect(r2.value).toBe("second");
|
|
56
|
+
reader.releaseLock();
|
|
57
|
+
await multi.close();
|
|
58
|
+
});
|
|
59
|
+
it("should continue reading from remaining inputs after removing one", async () => {
|
|
60
|
+
const multi = new MultiInputStream();
|
|
61
|
+
const reader = multi.stream.getReader();
|
|
62
|
+
const slowSource = new ReadableStream({
|
|
63
|
+
async start(controller) {
|
|
64
|
+
controller.enqueue("slow-1");
|
|
65
|
+
await delay(50);
|
|
66
|
+
controller.enqueue("slow-2");
|
|
67
|
+
await delay(50);
|
|
68
|
+
controller.enqueue("slow-3");
|
|
69
|
+
controller.close();
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
const slowId = multi.addInputStream(slowSource);
|
|
73
|
+
const r1 = await reader.read();
|
|
74
|
+
expect(r1.value).toBe("slow-1");
|
|
75
|
+
await multi.removeInputStream(slowId);
|
|
76
|
+
multi.addInputStream(streamFrom(["fast-1", "fast-2"]));
|
|
77
|
+
const r2 = await reader.read();
|
|
78
|
+
expect(r2.value).toBe("fast-1");
|
|
79
|
+
const r3 = await reader.read();
|
|
80
|
+
expect(r3.value).toBe("fast-2");
|
|
81
|
+
reader.releaseLock();
|
|
82
|
+
await multi.close();
|
|
83
|
+
});
|
|
84
|
+
it("should handle swapping inputs (remove then add)", async () => {
|
|
85
|
+
const multi = new MultiInputStream();
|
|
86
|
+
const reader = multi.stream.getReader();
|
|
87
|
+
const id1 = multi.addInputStream(streamFrom(["from-A"]));
|
|
88
|
+
const r1 = await reader.read();
|
|
89
|
+
expect(r1.value).toBe("from-A");
|
|
90
|
+
await multi.removeInputStream(id1);
|
|
91
|
+
const id2 = multi.addInputStream(streamFrom(["from-B"]));
|
|
92
|
+
const r2 = await reader.read();
|
|
93
|
+
expect(r2.value).toBe("from-B");
|
|
94
|
+
await multi.removeInputStream(id2);
|
|
95
|
+
reader.releaseLock();
|
|
96
|
+
await multi.close();
|
|
97
|
+
});
|
|
98
|
+
it("should keep reader awaiting until an input is added", async () => {
|
|
99
|
+
const multi = new MultiInputStream();
|
|
100
|
+
const reader = multi.stream.getReader();
|
|
101
|
+
let readCompleted = false;
|
|
102
|
+
const readPromise = reader.read().then((result2) => {
|
|
103
|
+
readCompleted = true;
|
|
104
|
+
return result2;
|
|
105
|
+
});
|
|
106
|
+
await delay(50);
|
|
107
|
+
expect(readCompleted).toBe(false);
|
|
108
|
+
multi.addInputStream(streamFrom(["hello"]));
|
|
109
|
+
const result = await readPromise;
|
|
110
|
+
expect(readCompleted).toBe(true);
|
|
111
|
+
expect(result.value).toBe("hello");
|
|
112
|
+
reader.releaseLock();
|
|
113
|
+
await multi.close();
|
|
114
|
+
});
|
|
115
|
+
it("should handle empty input streams without closing the output", async () => {
|
|
116
|
+
const multi = new MultiInputStream();
|
|
117
|
+
const reader = multi.stream.getReader();
|
|
118
|
+
multi.addInputStream(streamFrom([]));
|
|
119
|
+
await delay(20);
|
|
120
|
+
multi.addInputStream(streamFrom(["data"]));
|
|
121
|
+
const result = await reader.read();
|
|
122
|
+
expect(result.value).toBe("data");
|
|
123
|
+
reader.releaseLock();
|
|
124
|
+
await multi.close();
|
|
125
|
+
});
|
|
126
|
+
it("should remove errored input without killing the output", async () => {
|
|
127
|
+
const multi = new MultiInputStream();
|
|
128
|
+
const reader = multi.stream.getReader();
|
|
129
|
+
const errorSource = new ReadableStream({
|
|
130
|
+
async start(controller) {
|
|
131
|
+
controller.enqueue("before-error");
|
|
132
|
+
await delay(20);
|
|
133
|
+
controller.error(new Error("boom"));
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
multi.addInputStream(errorSource);
|
|
137
|
+
const r1 = await reader.read();
|
|
138
|
+
expect(r1.value).toBe("before-error");
|
|
139
|
+
await delay(50);
|
|
140
|
+
expect(multi.inputCount).toBe(0);
|
|
141
|
+
multi.addInputStream(streamFrom(["after-error"]));
|
|
142
|
+
const r2 = await reader.read();
|
|
143
|
+
expect(r2.value).toBe("after-error");
|
|
144
|
+
reader.releaseLock();
|
|
145
|
+
await multi.close();
|
|
146
|
+
});
|
|
147
|
+
it("should keep other inputs alive when one errors", async () => {
|
|
148
|
+
const multi = new MultiInputStream();
|
|
149
|
+
const reader = multi.stream.getReader();
|
|
150
|
+
const goodSource = new ReadableStream({
|
|
151
|
+
async start(controller) {
|
|
152
|
+
await delay(60);
|
|
153
|
+
controller.enqueue("good");
|
|
154
|
+
controller.close();
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
const badSource = new ReadableStream({
|
|
158
|
+
async start(controller) {
|
|
159
|
+
controller.error(new Error("bad"));
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
multi.addInputStream(goodSource);
|
|
163
|
+
multi.addInputStream(badSource);
|
|
164
|
+
await delay(10);
|
|
165
|
+
const result = await reader.read();
|
|
166
|
+
expect(result.value).toBe("good");
|
|
167
|
+
reader.releaseLock();
|
|
168
|
+
await multi.close();
|
|
169
|
+
});
|
|
170
|
+
it("should end the output stream with done:true when close is called", async () => {
|
|
171
|
+
const multi = new MultiInputStream();
|
|
172
|
+
const reader = multi.stream.getReader();
|
|
173
|
+
multi.addInputStream(streamFrom(["data"]));
|
|
174
|
+
const r1 = await reader.read();
|
|
175
|
+
expect(r1.value).toBe("data");
|
|
176
|
+
await multi.close();
|
|
177
|
+
const r2 = await reader.read();
|
|
178
|
+
expect(r2.done).toBe(true);
|
|
179
|
+
expect(r2.value).toBeUndefined();
|
|
180
|
+
reader.releaseLock();
|
|
181
|
+
});
|
|
182
|
+
it("should resolve pending reads as done when close is called", async () => {
|
|
183
|
+
const multi = new MultiInputStream();
|
|
184
|
+
const reader = multi.stream.getReader();
|
|
185
|
+
const readPromise = reader.read();
|
|
186
|
+
await delay(10);
|
|
187
|
+
await multi.close();
|
|
188
|
+
const result = await readPromise;
|
|
189
|
+
expect(result.done).toBe(true);
|
|
190
|
+
expect(result.value).toBeUndefined();
|
|
191
|
+
reader.releaseLock();
|
|
192
|
+
});
|
|
193
|
+
it("should be idempotent for multiple close calls", async () => {
|
|
194
|
+
const multi = new MultiInputStream();
|
|
195
|
+
await multi.close();
|
|
196
|
+
await multi.close();
|
|
197
|
+
expect(multi.isClosed).toBe(true);
|
|
198
|
+
});
|
|
199
|
+
it("should throw when adding input after close", async () => {
|
|
200
|
+
const multi = new MultiInputStream();
|
|
201
|
+
await multi.close();
|
|
202
|
+
expect(() => multi.addInputStream(streamFrom(["x"]))).toThrow("MultiInputStream is closed");
|
|
203
|
+
});
|
|
204
|
+
it("should no-op when removing a non-existent input", async () => {
|
|
205
|
+
const multi = new MultiInputStream();
|
|
206
|
+
await multi.removeInputStream("does-not-exist");
|
|
207
|
+
await multi.close();
|
|
208
|
+
});
|
|
209
|
+
it("should release the source reader lock so the source can be reused", async () => {
|
|
210
|
+
const multi = new MultiInputStream();
|
|
211
|
+
const reader = multi.stream.getReader();
|
|
212
|
+
const source = new ReadableStream({
|
|
213
|
+
async start(controller) {
|
|
214
|
+
controller.enqueue("chunk-0");
|
|
215
|
+
await delay(30);
|
|
216
|
+
controller.enqueue("chunk-1");
|
|
217
|
+
controller.close();
|
|
218
|
+
}
|
|
219
|
+
});
|
|
220
|
+
const id = multi.addInputStream(source);
|
|
221
|
+
const r1 = await reader.read();
|
|
222
|
+
expect(r1.value).toBe("chunk-0");
|
|
223
|
+
await multi.removeInputStream(id);
|
|
224
|
+
const sourceReader = source.getReader();
|
|
225
|
+
const sr = await sourceReader.read();
|
|
226
|
+
expect(sr.value).toBe("chunk-1");
|
|
227
|
+
sourceReader.releaseLock();
|
|
228
|
+
reader.releaseLock();
|
|
229
|
+
await multi.close();
|
|
230
|
+
});
|
|
231
|
+
it("should track inputCount correctly through add / remove / natural end", async () => {
|
|
232
|
+
const multi = new MultiInputStream();
|
|
233
|
+
expect(multi.inputCount).toBe(0);
|
|
234
|
+
const id1 = multi.addInputStream(streamFrom(["a"]));
|
|
235
|
+
const id2 = multi.addInputStream(streamFrom(["b"]));
|
|
236
|
+
expect(multi.inputCount).toBe(2);
|
|
237
|
+
await multi.removeInputStream(id1);
|
|
238
|
+
expect(multi.inputCount).toBeLessThanOrEqual(1);
|
|
239
|
+
await delay(20);
|
|
240
|
+
expect(multi.inputCount).toBe(0);
|
|
241
|
+
await multi.removeInputStream(id2);
|
|
242
|
+
expect(multi.inputCount).toBe(0);
|
|
243
|
+
await multi.close();
|
|
244
|
+
});
|
|
245
|
+
it("should handle concurrent reads and slow writes", async () => {
|
|
246
|
+
const multi = new MultiInputStream();
|
|
247
|
+
const reader = multi.stream.getReader();
|
|
248
|
+
const chunks = ["a", "b", "c", "d", "e"];
|
|
249
|
+
let idx = 0;
|
|
250
|
+
const source = new ReadableStream({
|
|
251
|
+
start(controller) {
|
|
252
|
+
const writeNext = () => {
|
|
253
|
+
if (idx < chunks.length) {
|
|
254
|
+
controller.enqueue(chunks[idx++]);
|
|
255
|
+
setTimeout(writeNext, 5);
|
|
256
|
+
} else {
|
|
257
|
+
controller.close();
|
|
258
|
+
}
|
|
259
|
+
};
|
|
260
|
+
writeNext();
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
multi.addInputStream(source);
|
|
264
|
+
const results = [];
|
|
265
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
266
|
+
const { value } = await reader.read();
|
|
267
|
+
results.push(value);
|
|
268
|
+
}
|
|
269
|
+
expect(results).toEqual(chunks);
|
|
270
|
+
reader.releaseLock();
|
|
271
|
+
await multi.close();
|
|
272
|
+
});
|
|
273
|
+
it("should handle backpressure with large data", async () => {
|
|
274
|
+
const multi = new MultiInputStream();
|
|
275
|
+
const largeChunks = Array.from({ length: 1e3 }, (_, i) => `chunk-${i}`);
|
|
276
|
+
multi.addInputStream(streamFrom(largeChunks));
|
|
277
|
+
const reader = multi.stream.getReader();
|
|
278
|
+
const results = [];
|
|
279
|
+
let result = await reader.read();
|
|
280
|
+
while (!result.done) {
|
|
281
|
+
results.push(result.value);
|
|
282
|
+
if (results.length === largeChunks.length) break;
|
|
283
|
+
result = await reader.read();
|
|
284
|
+
}
|
|
285
|
+
expect(results).toEqual(largeChunks);
|
|
286
|
+
reader.releaseLock();
|
|
287
|
+
await multi.close();
|
|
288
|
+
});
|
|
289
|
+
it("should support tee on the output stream", async () => {
|
|
290
|
+
const multi = new MultiInputStream();
|
|
291
|
+
const [s1, s2] = multi.stream.tee();
|
|
292
|
+
const r1 = s1.getReader();
|
|
293
|
+
const r2 = s2.getReader();
|
|
294
|
+
multi.addInputStream(streamFrom([10, 20]));
|
|
295
|
+
const [a1, a2] = await Promise.all([r1.read(), r2.read()]);
|
|
296
|
+
expect(a1.value).toBe(10);
|
|
297
|
+
expect(a2.value).toBe(10);
|
|
298
|
+
const [b1, b2] = await Promise.all([r1.read(), r2.read()]);
|
|
299
|
+
expect(b1.value).toBe(20);
|
|
300
|
+
expect(b2.value).toBe(20);
|
|
301
|
+
r1.releaseLock();
|
|
302
|
+
r2.releaseLock();
|
|
303
|
+
await multi.close();
|
|
304
|
+
});
|
|
305
|
+
it("should return unique IDs from addInputStream", () => {
|
|
306
|
+
const multi = new MultiInputStream();
|
|
307
|
+
const id1 = multi.addInputStream(streamFrom(["a"]));
|
|
308
|
+
const id2 = multi.addInputStream(streamFrom(["b"]));
|
|
309
|
+
const id3 = multi.addInputStream(streamFrom(["c"]));
|
|
310
|
+
expect(id1).not.toBe(id2);
|
|
311
|
+
expect(id2).not.toBe(id3);
|
|
312
|
+
expect(id1).not.toBe(id3);
|
|
313
|
+
});
|
|
314
|
+
it("should cleanly close while pumps are actively writing", async () => {
|
|
315
|
+
const multi = new MultiInputStream();
|
|
316
|
+
const reader = multi.stream.getReader();
|
|
317
|
+
const infiniteSource = new ReadableStream({
|
|
318
|
+
async start(controller) {
|
|
319
|
+
let i = 0;
|
|
320
|
+
while (true) {
|
|
321
|
+
try {
|
|
322
|
+
controller.enqueue(`tick-${i++}`);
|
|
323
|
+
} catch {
|
|
324
|
+
break;
|
|
325
|
+
}
|
|
326
|
+
await delay(5);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
multi.addInputStream(infiniteSource);
|
|
331
|
+
const r1 = await reader.read();
|
|
332
|
+
expect(r1.done).toBe(false);
|
|
333
|
+
await multi.close();
|
|
334
|
+
const r2 = await reader.read();
|
|
335
|
+
expect(r2.done).toBe(true);
|
|
336
|
+
reader.releaseLock();
|
|
337
|
+
});
|
|
338
|
+
});
|
|
339
|
+
//# sourceMappingURL=multi_input_stream.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/stream/multi_input_stream.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { ReadableStream } from 'node:stream/web';\nimport { describe, expect, it } from 'vitest';\nimport { delay } from '../utils.js';\nimport { MultiInputStream } from './multi_input_stream.js';\n\nfunction streamFrom<T>(values: T[]): ReadableStream<T> {\n return new ReadableStream<T>({\n start(controller) {\n for (const v of values) controller.enqueue(v);\n controller.close();\n },\n });\n}\n\ndescribe('MultiInputStream', () => {\n // ---------------------------------------------------------------------------\n // Basic functionality\n // ---------------------------------------------------------------------------\n\n it('should create a readable output stream', () => {\n const multi = new MultiInputStream<string>();\n expect(multi.stream).toBeInstanceOf(ReadableStream);\n expect(multi.inputCount).toBe(0);\n expect(multi.isClosed).toBe(false);\n });\n\n it('should read data from a single input stream', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n multi.addInputStream(streamFrom(['a', 'b', 'c']));\n\n const results: string[] = [];\n // Read three values then close manually (output stays open after input ends).\n for (let i = 0; i < 3; i++) {\n const { value } = await reader.read();\n results.push(value!);\n }\n\n expect(results).toEqual(['a', 'b', 'c']);\n reader.releaseLock();\n await multi.close();\n });\n\n it('should merge data from multiple input streams', async () => {\n const multi = new MultiInputStream<number>();\n const reader = multi.stream.getReader();\n\n multi.addInputStream(streamFrom([1, 2]));\n multi.addInputStream(streamFrom([3, 4]));\n\n const results: number[] = [];\n for (let i = 0; i < 4; i++) {\n const { value } = await reader.read();\n results.push(value!);\n }\n\n // Order is non-deterministic but all values must arrive.\n expect(results.sort()).toEqual([1, 2, 3, 4]);\n reader.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Dynamic add / remove\n // ---------------------------------------------------------------------------\n\n it('should allow adding inputs dynamically while reading', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n multi.addInputStream(streamFrom(['first']));\n\n const r1 = await reader.read();\n expect(r1.value).toBe('first');\n\n // Add a second input after reading from the first.\n multi.addInputStream(streamFrom(['second']));\n\n const r2 = await reader.read();\n expect(r2.value).toBe('second');\n\n reader.releaseLock();\n await multi.close();\n });\n\n it('should continue reading from remaining inputs after removing one', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n // A slow stream that emits over time.\n const slowSource = new ReadableStream<string>({\n async start(controller) {\n controller.enqueue('slow-1');\n await delay(50);\n controller.enqueue('slow-2');\n await delay(50);\n controller.enqueue('slow-3');\n controller.close();\n },\n });\n\n const slowId = multi.addInputStream(slowSource);\n\n // Read first value from slow source.\n const r1 = await reader.read();\n expect(r1.value).toBe('slow-1');\n\n // Remove the slow source and add a fast one.\n await multi.removeInputStream(slowId);\n\n multi.addInputStream(streamFrom(['fast-1', 'fast-2']));\n\n const r2 = await reader.read();\n expect(r2.value).toBe('fast-1');\n\n const r3 = await reader.read();\n expect(r3.value).toBe('fast-2');\n\n reader.releaseLock();\n await multi.close();\n });\n\n it('should handle swapping inputs (remove then add)', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n const id1 = multi.addInputStream(streamFrom(['from-A']));\n\n const r1 = await reader.read();\n expect(r1.value).toBe('from-A');\n\n await multi.removeInputStream(id1);\n\n const id2 = multi.addInputStream(streamFrom(['from-B']));\n\n const r2 = await reader.read();\n expect(r2.value).toBe('from-B');\n\n await multi.removeInputStream(id2);\n reader.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Reading before any input is added\n // ---------------------------------------------------------------------------\n\n it('should keep reader awaiting until an input is added', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n let readCompleted = false;\n const readPromise = reader.read().then((result) => {\n readCompleted = true;\n return result;\n });\n\n await delay(50);\n expect(readCompleted).toBe(false);\n\n // Now add an input to unblock the read.\n multi.addInputStream(streamFrom(['hello']));\n\n const result = await readPromise;\n expect(readCompleted).toBe(true);\n expect(result.value).toBe('hello');\n\n reader.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Empty input streams\n // ---------------------------------------------------------------------------\n\n it('should handle empty input streams without closing the output', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n // Add an empty stream — it should end immediately without affecting the output.\n multi.addInputStream(streamFrom([]));\n\n await delay(20);\n\n // The output should still be open. Adding a real input should work.\n multi.addInputStream(streamFrom(['data']));\n\n const result = await reader.read();\n expect(result.value).toBe('data');\n\n reader.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Error handling\n // ---------------------------------------------------------------------------\n\n it('should remove errored input without killing the output', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n // An input that errors after emitting one value.\n const errorSource = new ReadableStream<string>({\n async start(controller) {\n controller.enqueue('before-error');\n await delay(20);\n controller.error(new Error('boom'));\n },\n });\n\n multi.addInputStream(errorSource);\n\n const r1 = await reader.read();\n expect(r1.value).toBe('before-error');\n\n // Wait for the error to propagate and the input to be removed.\n await delay(50);\n\n expect(multi.inputCount).toBe(0);\n\n // The output is still alive — we can add another input.\n multi.addInputStream(streamFrom(['after-error']));\n\n const r2 = await reader.read();\n expect(r2.value).toBe('after-error');\n\n reader.releaseLock();\n await multi.close();\n });\n\n it('should keep other inputs alive when one errors', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n const goodSource = new ReadableStream<string>({\n async start(controller) {\n await delay(60);\n controller.enqueue('good');\n controller.close();\n },\n });\n\n const badSource = new ReadableStream<string>({\n async start(controller) {\n controller.error(new Error('bad'));\n },\n });\n\n multi.addInputStream(goodSource);\n multi.addInputStream(badSource);\n\n // Wait a bit for the bad source to error and be removed.\n await delay(10);\n\n // The good source should still be pumping.\n const result = await reader.read();\n expect(result.value).toBe('good');\n\n reader.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Close semantics\n // ---------------------------------------------------------------------------\n\n it('should end the output stream with done:true when close is called', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n multi.addInputStream(streamFrom(['data']));\n\n const r1 = await reader.read();\n expect(r1.value).toBe('data');\n\n await multi.close();\n\n const r2 = await reader.read();\n expect(r2.done).toBe(true);\n expect(r2.value).toBeUndefined();\n\n reader.releaseLock();\n });\n\n it('should resolve pending reads as done when close is called', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n // No inputs — read will be pending.\n const readPromise = reader.read();\n\n await delay(10);\n await multi.close();\n\n const result = await readPromise;\n expect(result.done).toBe(true);\n expect(result.value).toBeUndefined();\n\n reader.releaseLock();\n });\n\n it('should be idempotent for multiple close calls', async () => {\n const multi = new MultiInputStream<string>();\n\n await multi.close();\n await multi.close();\n\n expect(multi.isClosed).toBe(true);\n });\n\n it('should throw when adding input after close', async () => {\n const multi = new MultiInputStream<string>();\n await multi.close();\n\n expect(() => multi.addInputStream(streamFrom(['x']))).toThrow('MultiInputStream is closed');\n });\n\n // ---------------------------------------------------------------------------\n // removeInputStream edge cases\n // ---------------------------------------------------------------------------\n\n it('should no-op when removing a non-existent input', async () => {\n const multi = new MultiInputStream<string>();\n\n // Should not throw.\n await multi.removeInputStream('does-not-exist');\n\n await multi.close();\n });\n\n it('should release the source reader lock so the source can be reused', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n const source = new ReadableStream<string>({\n async start(controller) {\n controller.enqueue('chunk-0');\n await delay(30);\n controller.enqueue('chunk-1');\n controller.close();\n },\n });\n\n const id = multi.addInputStream(source);\n\n const r1 = await reader.read();\n expect(r1.value).toBe('chunk-0');\n\n await multi.removeInputStream(id);\n\n // The source's reader lock should be released — we can get a new reader.\n const sourceReader = source.getReader();\n const sr = await sourceReader.read();\n expect(sr.value).toBe('chunk-1');\n sourceReader.releaseLock();\n\n reader.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Input count tracking\n // ---------------------------------------------------------------------------\n\n it('should track inputCount correctly through add / remove / natural end', async () => {\n const multi = new MultiInputStream<string>();\n\n expect(multi.inputCount).toBe(0);\n\n const id1 = multi.addInputStream(streamFrom(['a']));\n const id2 = multi.addInputStream(streamFrom(['b']));\n\n expect(multi.inputCount).toBe(2);\n\n await multi.removeInputStream(id1);\n expect(multi.inputCount).toBeLessThanOrEqual(1);\n\n // Let the remaining stream finish.\n await delay(20);\n expect(multi.inputCount).toBe(0);\n\n await multi.removeInputStream(id2); // already gone, no-op\n expect(multi.inputCount).toBe(0);\n\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Concurrent reads and writes\n // ---------------------------------------------------------------------------\n\n it('should handle concurrent reads and slow writes', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n const chunks = ['a', 'b', 'c', 'd', 'e'];\n let idx = 0;\n\n const source = new ReadableStream<string>({\n start(controller) {\n const writeNext = () => {\n if (idx < chunks.length) {\n controller.enqueue(chunks[idx++]);\n setTimeout(writeNext, 5);\n } else {\n controller.close();\n }\n };\n writeNext();\n },\n });\n\n multi.addInputStream(source);\n\n const results: string[] = [];\n for (let i = 0; i < chunks.length; i++) {\n const { value } = await reader.read();\n results.push(value!);\n }\n\n expect(results).toEqual(chunks);\n\n reader.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Backpressure\n // ---------------------------------------------------------------------------\n\n it('should handle backpressure with large data', async () => {\n const multi = new MultiInputStream<string>();\n\n const largeChunks = Array.from({ length: 1000 }, (_, i) => `chunk-${i}`);\n multi.addInputStream(streamFrom(largeChunks));\n\n const reader = multi.stream.getReader();\n const results: string[] = [];\n\n let result = await reader.read();\n while (!result.done) {\n results.push(result.value);\n // Check if we've collected all expected values before reading again,\n // to avoid hanging on the output which stays open after input ends.\n if (results.length === largeChunks.length) break;\n result = await reader.read();\n }\n\n expect(results).toEqual(largeChunks);\n\n reader.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Multiple tee / concurrent consumers\n // ---------------------------------------------------------------------------\n\n it('should support tee on the output stream', async () => {\n const multi = new MultiInputStream<number>();\n\n const [s1, s2] = multi.stream.tee();\n const r1 = s1.getReader();\n const r2 = s2.getReader();\n\n multi.addInputStream(streamFrom([10, 20]));\n\n const [a1, a2] = await Promise.all([r1.read(), r2.read()]);\n expect(a1.value).toBe(10);\n expect(a2.value).toBe(10);\n\n const [b1, b2] = await Promise.all([r1.read(), r2.read()]);\n expect(b1.value).toBe(20);\n expect(b2.value).toBe(20);\n\n r1.releaseLock();\n r2.releaseLock();\n await multi.close();\n });\n\n // ---------------------------------------------------------------------------\n // Return value of addInputStream\n // ---------------------------------------------------------------------------\n\n it('should return unique IDs from addInputStream', () => {\n const multi = new MultiInputStream<string>();\n\n const id1 = multi.addInputStream(streamFrom(['a']));\n const id2 = multi.addInputStream(streamFrom(['b']));\n const id3 = multi.addInputStream(streamFrom(['c']));\n\n expect(id1).not.toBe(id2);\n expect(id2).not.toBe(id3);\n expect(id1).not.toBe(id3);\n });\n\n // ---------------------------------------------------------------------------\n // close() while pumps are actively writing\n // ---------------------------------------------------------------------------\n\n it('should cleanly close while pumps are actively writing', async () => {\n const multi = new MultiInputStream<string>();\n const reader = multi.stream.getReader();\n\n // A source that never stops on its own.\n const infiniteSource = new ReadableStream<string>({\n async start(controller) {\n let i = 0;\n while (true) {\n try {\n controller.enqueue(`tick-${i++}`);\n } catch {\n // controller.enqueue throws after stream is canceled\n break;\n }\n await delay(5);\n }\n },\n });\n\n multi.addInputStream(infiniteSource);\n\n // Read a couple of values.\n const r1 = await reader.read();\n expect(r1.done).toBe(false);\n\n // Close while the infinite source is still pumping.\n await multi.close();\n\n const r2 = await reader.read();\n expect(r2.done).toBe(true);\n\n reader.releaseLock();\n });\n});\n"],"mappings":"AAGA,SAAS,sBAAsB;AAC/B,SAAS,UAAU,QAAQ,UAAU;AACrC,SAAS,aAAa;AACtB,SAAS,wBAAwB;AAEjC,SAAS,WAAc,QAAgC;AACrD,SAAO,IAAI,eAAkB;AAAA,IAC3B,MAAM,YAAY;AAChB,iBAAW,KAAK,OAAQ,YAAW,QAAQ,CAAC;AAC5C,iBAAW,MAAM;AAAA,IACnB;AAAA,EACF,CAAC;AACH;AAEA,SAAS,oBAAoB,MAAM;AAKjC,KAAG,0CAA0C,MAAM;AACjD,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,WAAO,MAAM,MAAM,EAAE,eAAe,cAAc;AAClD,WAAO,MAAM,UAAU,EAAE,KAAK,CAAC;AAC/B,WAAO,MAAM,QAAQ,EAAE,KAAK,KAAK;AAAA,EACnC,CAAC;AAED,KAAG,+CAA+C,YAAY;AAC5D,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,UAAM,eAAe,WAAW,CAAC,KAAK,KAAK,GAAG,CAAC,CAAC;AAEhD,UAAM,UAAoB,CAAC;AAE3B,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,YAAM,EAAE,MAAM,IAAI,MAAM,OAAO,KAAK;AACpC,cAAQ,KAAK,KAAM;AAAA,IACrB;AAEA,WAAO,OAAO,EAAE,QAAQ,CAAC,KAAK,KAAK,GAAG,CAAC;AACvC,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAED,KAAG,iDAAiD,YAAY;AAC9D,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,UAAM,eAAe,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;AACvC,UAAM,eAAe,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;AAEvC,UAAM,UAAoB,CAAC;AAC3B,aAAS,IAAI,GAAG,IAAI,GAAG,KAAK;AAC1B,YAAM,EAAE,MAAM,IAAI,MAAM,OAAO,KAAK;AACpC,cAAQ,KAAK,KAAM;AAAA,IACrB;AAGA,WAAO,QAAQ,KAAK,CAAC,EAAE,QAAQ,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;AAC3C,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,wDAAwD,YAAY;AACrE,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,UAAM,eAAe,WAAW,CAAC,OAAO,CAAC,CAAC;AAE1C,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,OAAO;AAG7B,UAAM,eAAe,WAAW,CAAC,QAAQ,CAAC,CAAC;AAE3C,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,QAAQ;AAE9B,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAED,KAAG,oEAAoE,YAAY;AACjF,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAGtC,UAAM,aAAa,IAAI,eAAuB;AAAA,MAC5C,MAAM,MAAM,YAAY;AACtB,mBAAW,QAAQ,QAAQ;AAC3B,cAAM,MAAM,EAAE;AACd,mBAAW,QAAQ,QAAQ;AAC3B,cAAM,MAAM,EAAE;AACd,mBAAW,QAAQ,QAAQ;AAC3B,mBAAW,MAAM;AAAA,MACnB;AAAA,IACF,CAAC;AAED,UAAM,SAAS,MAAM,eAAe,UAAU;AAG9C,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,QAAQ;AAG9B,UAAM,MAAM,kBAAkB,MAAM;AAEpC,UAAM,eAAe,WAAW,CAAC,UAAU,QAAQ,CAAC,CAAC;AAErD,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,QAAQ;AAE9B,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,QAAQ;AAE9B,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAED,KAAG,mDAAmD,YAAY;AAChE,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,UAAM,MAAM,MAAM,eAAe,WAAW,CAAC,QAAQ,CAAC,CAAC;AAEvD,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,QAAQ;AAE9B,UAAM,MAAM,kBAAkB,GAAG;AAEjC,UAAM,MAAM,MAAM,eAAe,WAAW,CAAC,QAAQ,CAAC,CAAC;AAEvD,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,QAAQ;AAE9B,UAAM,MAAM,kBAAkB,GAAG;AACjC,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,uDAAuD,YAAY;AACpE,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,QAAI,gBAAgB;AACpB,UAAM,cAAc,OAAO,KAAK,EAAE,KAAK,CAACA,YAAW;AACjD,sBAAgB;AAChB,aAAOA;AAAA,IACT,CAAC;AAED,UAAM,MAAM,EAAE;AACd,WAAO,aAAa,EAAE,KAAK,KAAK;AAGhC,UAAM,eAAe,WAAW,CAAC,OAAO,CAAC,CAAC;AAE1C,UAAM,SAAS,MAAM;AACrB,WAAO,aAAa,EAAE,KAAK,IAAI;AAC/B,WAAO,OAAO,KAAK,EAAE,KAAK,OAAO;AAEjC,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,gEAAgE,YAAY;AAC7E,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAGtC,UAAM,eAAe,WAAW,CAAC,CAAC,CAAC;AAEnC,UAAM,MAAM,EAAE;AAGd,UAAM,eAAe,WAAW,CAAC,MAAM,CAAC,CAAC;AAEzC,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,WAAO,OAAO,KAAK,EAAE,KAAK,MAAM;AAEhC,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,0DAA0D,YAAY;AACvE,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAGtC,UAAM,cAAc,IAAI,eAAuB;AAAA,MAC7C,MAAM,MAAM,YAAY;AACtB,mBAAW,QAAQ,cAAc;AACjC,cAAM,MAAM,EAAE;AACd,mBAAW,MAAM,IAAI,MAAM,MAAM,CAAC;AAAA,MACpC;AAAA,IACF,CAAC;AAED,UAAM,eAAe,WAAW;AAEhC,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,cAAc;AAGpC,UAAM,MAAM,EAAE;AAEd,WAAO,MAAM,UAAU,EAAE,KAAK,CAAC;AAG/B,UAAM,eAAe,WAAW,CAAC,aAAa,CAAC,CAAC;AAEhD,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,aAAa;AAEnC,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAED,KAAG,kDAAkD,YAAY;AAC/D,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,UAAM,aAAa,IAAI,eAAuB;AAAA,MAC5C,MAAM,MAAM,YAAY;AACtB,cAAM,MAAM,EAAE;AACd,mBAAW,QAAQ,MAAM;AACzB,mBAAW,MAAM;AAAA,MACnB;AAAA,IACF,CAAC;AAED,UAAM,YAAY,IAAI,eAAuB;AAAA,MAC3C,MAAM,MAAM,YAAY;AACtB,mBAAW,MAAM,IAAI,MAAM,KAAK,CAAC;AAAA,MACnC;AAAA,IACF,CAAC;AAED,UAAM,eAAe,UAAU;AAC/B,UAAM,eAAe,SAAS;AAG9B,UAAM,MAAM,EAAE;AAGd,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,WAAO,OAAO,KAAK,EAAE,KAAK,MAAM;AAEhC,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,oEAAoE,YAAY;AACjF,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,UAAM,eAAe,WAAW,CAAC,MAAM,CAAC,CAAC;AAEzC,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,MAAM;AAE5B,UAAM,MAAM,MAAM;AAElB,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,IAAI,EAAE,KAAK,IAAI;AACzB,WAAO,GAAG,KAAK,EAAE,cAAc;AAE/B,WAAO,YAAY;AAAA,EACrB,CAAC;AAED,KAAG,6DAA6D,YAAY;AAC1E,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAGtC,UAAM,cAAc,OAAO,KAAK;AAEhC,UAAM,MAAM,EAAE;AACd,UAAM,MAAM,MAAM;AAElB,UAAM,SAAS,MAAM;AACrB,WAAO,OAAO,IAAI,EAAE,KAAK,IAAI;AAC7B,WAAO,OAAO,KAAK,EAAE,cAAc;AAEnC,WAAO,YAAY;AAAA,EACrB,CAAC;AAED,KAAG,iDAAiD,YAAY;AAC9D,UAAM,QAAQ,IAAI,iBAAyB;AAE3C,UAAM,MAAM,MAAM;AAClB,UAAM,MAAM,MAAM;AAElB,WAAO,MAAM,QAAQ,EAAE,KAAK,IAAI;AAAA,EAClC,CAAC;AAED,KAAG,8CAA8C,YAAY;AAC3D,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,MAAM,MAAM;AAElB,WAAO,MAAM,MAAM,eAAe,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,QAAQ,4BAA4B;AAAA,EAC5F,CAAC;AAMD,KAAG,mDAAmD,YAAY;AAChE,UAAM,QAAQ,IAAI,iBAAyB;AAG3C,UAAM,MAAM,kBAAkB,gBAAgB;AAE9C,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAED,KAAG,qEAAqE,YAAY;AAClF,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,UAAM,SAAS,IAAI,eAAuB;AAAA,MACxC,MAAM,MAAM,YAAY;AACtB,mBAAW,QAAQ,SAAS;AAC5B,cAAM,MAAM,EAAE;AACd,mBAAW,QAAQ,SAAS;AAC5B,mBAAW,MAAM;AAAA,MACnB;AAAA,IACF,CAAC;AAED,UAAM,KAAK,MAAM,eAAe,MAAM;AAEtC,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,KAAK,EAAE,KAAK,SAAS;AAE/B,UAAM,MAAM,kBAAkB,EAAE;AAGhC,UAAM,eAAe,OAAO,UAAU;AACtC,UAAM,KAAK,MAAM,aAAa,KAAK;AACnC,WAAO,GAAG,KAAK,EAAE,KAAK,SAAS;AAC/B,iBAAa,YAAY;AAEzB,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,wEAAwE,YAAY;AACrF,UAAM,QAAQ,IAAI,iBAAyB;AAE3C,WAAO,MAAM,UAAU,EAAE,KAAK,CAAC;AAE/B,UAAM,MAAM,MAAM,eAAe,WAAW,CAAC,GAAG,CAAC,CAAC;AAClD,UAAM,MAAM,MAAM,eAAe,WAAW,CAAC,GAAG,CAAC,CAAC;AAElD,WAAO,MAAM,UAAU,EAAE,KAAK,CAAC;AAE/B,UAAM,MAAM,kBAAkB,GAAG;AACjC,WAAO,MAAM,UAAU,EAAE,oBAAoB,CAAC;AAG9C,UAAM,MAAM,EAAE;AACd,WAAO,MAAM,UAAU,EAAE,KAAK,CAAC;AAE/B,UAAM,MAAM,kBAAkB,GAAG;AACjC,WAAO,MAAM,UAAU,EAAE,KAAK,CAAC;AAE/B,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,kDAAkD,YAAY;AAC/D,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAEtC,UAAM,SAAS,CAAC,KAAK,KAAK,KAAK,KAAK,GAAG;AACvC,QAAI,MAAM;AAEV,UAAM,SAAS,IAAI,eAAuB;AAAA,MACxC,MAAM,YAAY;AAChB,cAAM,YAAY,MAAM;AACtB,cAAI,MAAM,OAAO,QAAQ;AACvB,uBAAW,QAAQ,OAAO,KAAK,CAAC;AAChC,uBAAW,WAAW,CAAC;AAAA,UACzB,OAAO;AACL,uBAAW,MAAM;AAAA,UACnB;AAAA,QACF;AACA,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,UAAM,eAAe,MAAM;AAE3B,UAAM,UAAoB,CAAC;AAC3B,aAAS,IAAI,GAAG,IAAI,OAAO,QAAQ,KAAK;AACtC,YAAM,EAAE,MAAM,IAAI,MAAM,OAAO,KAAK;AACpC,cAAQ,KAAK,KAAM;AAAA,IACrB;AAEA,WAAO,OAAO,EAAE,QAAQ,MAAM;AAE9B,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,8CAA8C,YAAY;AAC3D,UAAM,QAAQ,IAAI,iBAAyB;AAE3C,UAAM,cAAc,MAAM,KAAK,EAAE,QAAQ,IAAK,GAAG,CAAC,GAAG,MAAM,SAAS,CAAC,EAAE;AACvE,UAAM,eAAe,WAAW,WAAW,CAAC;AAE5C,UAAM,SAAS,MAAM,OAAO,UAAU;AACtC,UAAM,UAAoB,CAAC;AAE3B,QAAI,SAAS,MAAM,OAAO,KAAK;AAC/B,WAAO,CAAC,OAAO,MAAM;AACnB,cAAQ,KAAK,OAAO,KAAK;AAGzB,UAAI,QAAQ,WAAW,YAAY,OAAQ;AAC3C,eAAS,MAAM,OAAO,KAAK;AAAA,IAC7B;AAEA,WAAO,OAAO,EAAE,QAAQ,WAAW;AAEnC,WAAO,YAAY;AACnB,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,2CAA2C,YAAY;AACxD,UAAM,QAAQ,IAAI,iBAAyB;AAE3C,UAAM,CAAC,IAAI,EAAE,IAAI,MAAM,OAAO,IAAI;AAClC,UAAM,KAAK,GAAG,UAAU;AACxB,UAAM,KAAK,GAAG,UAAU;AAExB,UAAM,eAAe,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC;AAEzC,UAAM,CAAC,IAAI,EAAE,IAAI,MAAM,QAAQ,IAAI,CAAC,GAAG,KAAK,GAAG,GAAG,KAAK,CAAC,CAAC;AACzD,WAAO,GAAG,KAAK,EAAE,KAAK,EAAE;AACxB,WAAO,GAAG,KAAK,EAAE,KAAK,EAAE;AAExB,UAAM,CAAC,IAAI,EAAE,IAAI,MAAM,QAAQ,IAAI,CAAC,GAAG,KAAK,GAAG,GAAG,KAAK,CAAC,CAAC;AACzD,WAAO,GAAG,KAAK,EAAE,KAAK,EAAE;AACxB,WAAO,GAAG,KAAK,EAAE,KAAK,EAAE;AAExB,OAAG,YAAY;AACf,OAAG,YAAY;AACf,UAAM,MAAM,MAAM;AAAA,EACpB,CAAC;AAMD,KAAG,gDAAgD,MAAM;AACvD,UAAM,QAAQ,IAAI,iBAAyB;AAE3C,UAAM,MAAM,MAAM,eAAe,WAAW,CAAC,GAAG,CAAC,CAAC;AAClD,UAAM,MAAM,MAAM,eAAe,WAAW,CAAC,GAAG,CAAC,CAAC;AAClD,UAAM,MAAM,MAAM,eAAe,WAAW,CAAC,GAAG,CAAC,CAAC;AAElD,WAAO,GAAG,EAAE,IAAI,KAAK,GAAG;AACxB,WAAO,GAAG,EAAE,IAAI,KAAK,GAAG;AACxB,WAAO,GAAG,EAAE,IAAI,KAAK,GAAG;AAAA,EAC1B,CAAC;AAMD,KAAG,yDAAyD,YAAY;AACtE,UAAM,QAAQ,IAAI,iBAAyB;AAC3C,UAAM,SAAS,MAAM,OAAO,UAAU;AAGtC,UAAM,iBAAiB,IAAI,eAAuB;AAAA,MAChD,MAAM,MAAM,YAAY;AACtB,YAAI,IAAI;AACR,eAAO,MAAM;AACX,cAAI;AACF,uBAAW,QAAQ,QAAQ,GAAG,EAAE;AAAA,UAClC,QAAQ;AAEN;AAAA,UACF;AACA,gBAAM,MAAM,CAAC;AAAA,QACf;AAAA,MACF;AAAA,IACF,CAAC;AAED,UAAM,eAAe,cAAc;AAGnC,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,IAAI,EAAE,KAAK,KAAK;AAG1B,UAAM,MAAM,MAAM;AAElB,UAAM,KAAK,MAAM,OAAO,KAAK;AAC7B,WAAO,GAAG,IAAI,EAAE,KAAK,IAAI;AAEzB,WAAO,YAAY;AAAA,EACrB,CAAC;AACH,CAAC;","names":["result"]}
|
package/dist/stt/stt.cjs
CHANGED
|
@@ -111,8 +111,8 @@ class SpeechStream {
|
|
|
111
111
|
});
|
|
112
112
|
} else {
|
|
113
113
|
this.logger.warn(
|
|
114
|
-
{
|
|
115
|
-
`failed to recognize speech, retrying in ${retryInterval}
|
|
114
|
+
{ stt: this.#stt.label, attempt: i + 1, error },
|
|
115
|
+
`failed to recognize speech, retrying in ${retryInterval}ms`
|
|
116
116
|
);
|
|
117
117
|
}
|
|
118
118
|
if (retryInterval > 0) {
|
package/dist/stt/stt.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport type { TimedString } from '../voice/index.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n /** Language code of the speech. */\n language: string;\n /** Transcribed text. */\n text: string;\n /** Start time of the speech segment in seconds. */\n startTime: number;\n /** End time of the speech segment in seconds. */\n endTime: number;\n /** Confidence score of the transcription (0-1). */\n confidence: number;\n /** Word-level timing information. */\n words?: TimedString[];\n}\n\nexport interface RecognitionUsage {\n /** Duration of the audio that was recognized in seconds. */\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n /**\n * Whether this STT supports aligned transcripts with word/chunk timestamps.\n * - 'word': Provider returns word-level timestamps\n * - 'chunk': Provider returns chunk-level timestamps (e.g., sentence/phrase boundaries)\n * - false: Provider does not support aligned transcripts\n */\n alignedTranscript?: 'word' | 'chunk' | false;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame, abortSignal);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(\n frame: AudioBuffer,\n abortSignal?: AbortSignal,\n ): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n *\n * @param options - Optional configuration including connection options\n */\n abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n private _startTimeOffset: number = 0;\n\n protected abortController = new AbortController();\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().finally(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = intervalForRetry(this._connOptions, i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n protected get abortSignal(): AbortSignal {\n return this.abortController.signal;\n }\n\n get startTimeOffset(): number {\n return this._startTimeOffset;\n }\n\n set startTimeOffset(value: number) {\n if (value < 0) {\n throw new Error('startTimeOffset must be non-negative');\n }\n this._startTimeOffset = value;\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (frame.samplesPerChannel === 0) {\n this.input.put(frame);\n return;\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n if (!this.abortController.signal.aborted) this.abortController.abort();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAsF;AAEtF,mBAA8D;AAIvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAkGL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAAoB,aAAiD;AACnF,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,OAAO,WAAW;AACtD,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EAeA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EACA,mBAA2B;AAAA,EAEzB,kBAAkB,IAAI,gBAAgB;AAAA,EAEhD,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,QAAQ,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EACnE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,oBAAgB,+BAAiB,KAAK,cAAc,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,IAAc,cAA2B;AACvC,WAAO,KAAK,gBAAgB;AAAA,EAC9B;AAAA,EAEA,IAAI,kBAA0B;AAC5B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,gBAAgB,OAAe;AACjC,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AACA,SAAK,mBAAmB;AAAA,EAC1B;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,MAAM,sBAAsB,GAAG;AACjC,WAAK,MAAM,IAAI,KAAK;AACpB;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,QAAI,CAAC,KAAK,gBAAgB,OAAO,QAAS,MAAK,gBAAgB,MAAM;AACrE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport type { TimedString } from '../voice/index.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n /** Language code of the speech. */\n language: string;\n /** Transcribed text. */\n text: string;\n /** Start time of the speech segment in seconds. */\n startTime: number;\n /** End time of the speech segment in seconds. */\n endTime: number;\n /** Confidence score of the transcription (0-1). */\n confidence: number;\n /** Word-level timing information. */\n words?: TimedString[];\n}\n\nexport interface RecognitionUsage {\n /** Duration of the audio that was recognized in seconds. */\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n /**\n * Whether this STT supports aligned transcripts with word/chunk timestamps.\n * - 'word': Provider returns word-level timestamps\n * - 'chunk': Provider returns chunk-level timestamps (e.g., sentence/phrase boundaries)\n * - false: Provider does not support aligned transcripts\n */\n alignedTranscript?: 'word' | 'chunk' | false;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame, abortSignal);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(\n frame: AudioBuffer,\n abortSignal?: AbortSignal,\n ): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n *\n * @param options - Optional configuration including connection options\n */\n abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n private _startTimeOffset: number = 0;\n\n protected abortController = new AbortController();\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().finally(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = intervalForRetry(this._connOptions, i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { stt: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}ms`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n protected get abortSignal(): AbortSignal {\n return this.abortController.signal;\n }\n\n get startTimeOffset(): number {\n return this._startTimeOffset;\n }\n\n set startTimeOffset(value: number) {\n if (value < 0) {\n throw new Error('startTimeOffset must be non-negative');\n }\n this._startTimeOffset = value;\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (frame.samplesPerChannel === 0) {\n this.input.put(frame);\n return;\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n if (!this.abortController.signal.aborted) this.abortController.abort();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAA8C;AAC9C,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAsF;AAEtF,mBAA8D;AAIvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAkGL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAAoB,aAAiD;AACnF,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,OAAO,WAAW;AACtD,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,UAAM,4CAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EAeA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EACA,mBAA2B;AAAA,EAEzB,kBAAkB,IAAI,gBAAgB;AAAA,EAEhD,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,QAAQ,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EACnE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,oBAAgB,+BAAiB,KAAK,cAAc,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,IAAc,cAA2B;AACvC,WAAO,KAAK,gBAAgB;AAAA,EAC9B;AAAA,EAEA,IAAI,kBAA0B;AAC5B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,gBAAgB,OAAe;AACjC,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AACA,SAAK,mBAAmB;AAAA,EAC1B;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,MAAM,sBAAsB,GAAG;AACjC,WAAK,MAAM,IAAI,KAAK;AACpB;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,QAAI,CAAC,KAAK,gBAAgB,OAAO,QAAS,MAAK,gBAAgB,MAAM;AACrE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
package/dist/stt/stt.js
CHANGED
|
@@ -86,8 +86,8 @@ class SpeechStream {
|
|
|
86
86
|
});
|
|
87
87
|
} else {
|
|
88
88
|
this.logger.warn(
|
|
89
|
-
{
|
|
90
|
-
`failed to recognize speech, retrying in ${retryInterval}
|
|
89
|
+
{ stt: this.#stt.label, attempt: i + 1, error },
|
|
90
|
+
`failed to recognize speech, retrying in ${retryInterval}ms`
|
|
91
91
|
);
|
|
92
92
|
}
|
|
93
93
|
if (retryInterval > 0) {
|
package/dist/stt/stt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport type { TimedString } from '../voice/index.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n /** Language code of the speech. */\n language: string;\n /** Transcribed text. */\n text: string;\n /** Start time of the speech segment in seconds. */\n startTime: number;\n /** End time of the speech segment in seconds. */\n endTime: number;\n /** Confidence score of the transcription (0-1). */\n confidence: number;\n /** Word-level timing information. */\n words?: TimedString[];\n}\n\nexport interface RecognitionUsage {\n /** Duration of the audio that was recognized in seconds. */\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n /**\n * Whether this STT supports aligned transcripts with word/chunk timestamps.\n * - 'word': Provider returns word-level timestamps\n * - 'chunk': Provider returns chunk-level timestamps (e.g., sentence/phrase boundaries)\n * - false: Provider does not support aligned transcripts\n */\n alignedTranscript?: 'word' | 'chunk' | false;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame, abortSignal);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(\n frame: AudioBuffer,\n abortSignal?: AbortSignal,\n ): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n *\n * @param options - Optional configuration including connection options\n */\n abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n private _startTimeOffset: number = 0;\n\n protected abortController = new AbortController();\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().finally(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = intervalForRetry(this._connOptions, i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n protected get abortSignal(): AbortSignal {\n return this.abortController.signal;\n }\n\n get startTimeOffset(): number {\n return this._startTimeOffset;\n }\n\n set startTimeOffset(value: number) {\n if (value < 0) {\n throw new Error('startTimeOffset must be non-negative');\n }\n this._startTimeOffset = value;\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (frame.samplesPerChannel === 0) {\n this.input.put(frame);\n return;\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n if (!this.abortController.signal.aborted) this.abortController.abort();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAGA,SAA0B,sBAAsB;AAEhD,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,qCAAqC;AAC9C,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,6BAA6B,wBAAwB;AAEtF,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAIvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAkGL,MAAe,YAAa,aAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAAoB,aAAiD;AACnF,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,OAAO,WAAW;AACtD,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,MAAM,8BAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EAeA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,SAAS,IAAI,mBAAgC;AAAA,EAC7C,QAAQ,IAAI,mBAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EACb;AAAA,EACA,mBAA2B;AAAA,EAEzB,kBAAkB,IAAI,gBAAgB;AAAA,EAEhD,YACE,KACA,YACA,oBAAuC,6BACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,cAAU,MAAM,KAAK,SAAS,EAAE,QAAQ,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EACnE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,iBAAiB,KAAK,cAAc,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,IAAc,cAA2B;AACvC,WAAO,KAAK,gBAAgB;AAAA,EAC9B;AAAA,EAEA,IAAI,kBAA0B;AAC5B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,gBAAgB,OAAe;AACjC,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AACA,SAAK,mBAAmB;AAAA,EAC1B;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,eAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,MAAM,sBAAsB,GAAG;AACjC,WAAK,MAAM,IAAI,KAAK;AACpB;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,QAAI,CAAC,KAAK,gBAAgB,OAAO,QAAS,MAAK,gBAAgB,MAAM;AACrE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|
|
1
|
+
{"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDurationSeconds } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';\nimport type { TimedString } from '../voice/index.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n /**\n * Preflight transcript, emitted before final transcript when STT has high confidence\n * but hasn't fully committed yet. Includes all pre-committed transcripts including\n * final transcript from the previous STT run.\n */\n PREFLIGHT_TRANSCRIPT = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n /** Language code of the speech. */\n language: string;\n /** Transcribed text. */\n text: string;\n /** Start time of the speech segment in seconds. */\n startTime: number;\n /** End time of the speech segment in seconds. */\n endTime: number;\n /** Confidence score of the transcription (0-1). */\n confidence: number;\n /** Word-level timing information. */\n words?: TimedString[];\n}\n\nexport interface RecognitionUsage {\n /** Duration of the audio that was recognized in seconds. */\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n /**\n * Whether this STT supports aligned transcripts with word/chunk timestamps.\n * - 'word': Provider returns word-level timestamps\n * - 'chunk': Provider returns chunk-level timestamps (e.g., sentence/phrase boundaries)\n * - false: Provider does not support aligned transcripts\n */\n alignedTranscript?: 'word' | 'chunk' | false;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame, abortSignal);\n const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n durationMs,\n label: this.label,\n audioDurationMs: Math.round(calculateAudioDurationSeconds(frame) * 1000),\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(\n frame: AudioBuffer,\n abortSignal?: AbortSignal,\n ): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n *\n * @param options - Optional configuration including connection options\n */\n abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;\n\n async close(): Promise<void> {\n return;\n }\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n private _startTimeOffset: number = 0;\n\n protected abortController = new AbortController();\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().finally(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = intervalForRetry(this._connOptions, i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n // Don't emit error event for recoverable errors during retry loop\n // to avoid ERR_UNHANDLED_ERROR or premature session termination\n this.logger.warn(\n { stt: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}ms`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n if (!this.output.closed) {\n try {\n this.output.put(event);\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n this.logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n }\n }\n }\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n durationMs: 0,\n label: this.#stt.label,\n audioDurationMs: Math.round(event.recognitionUsage!.audioDuration * 1000),\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n if (!this.output.closed) {\n this.output.close();\n }\n }\n\n protected abstract run(): Promise<void>;\n\n protected get abortSignal(): AbortSignal {\n return this.abortController.signal;\n }\n\n get startTimeOffset(): number {\n return this._startTimeOffset;\n }\n\n set startTimeOffset(value: number) {\n if (value < 0) {\n throw new Error('startTimeOffset must be non-negative');\n }\n this._startTimeOffset = value;\n }\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (frame.samplesPerChannel === 0) {\n this.input.put(frame);\n return;\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n if (!this.input.closed) this.input.close();\n if (!this.queue.closed) this.queue.close();\n if (!this.output.closed) this.output.close();\n if (!this.abortController.signal.aborted) this.abortController.abort();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":"AAGA,SAA0B,sBAAsB;AAEhD,SAAS,oBAAoB;AAE7B,SAAS,oBAAoB,gBAAgB;AAC7C,SAAS,qCAAqC;AAC9C,SAAS,WAAW;AAEpB,SAAS,8BAA8B;AACvC,SAAiC,6BAA6B,wBAAwB;AAEtF,SAAS,oBAAoB,OAAO,WAAW,eAAe;AAIvD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAMA,EAAAA,kCAAA,0BAAuB,KAAvB;AA5BU,SAAAA;AAAA,GAAA;AAkGL,MAAe,YAAa,aAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAAoB,aAAiD;AACnF,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,OAAO,WAAW;AACtD,UAAM,aAAa,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AACjF,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,iBAAiB,KAAK,MAAM,8BAA8B,KAAK,IAAI,GAAI;AAAA,MACvE,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAAA,EAeA,MAAM,QAAuB;AAC3B;AAAA,EACF;AACF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,mBAAoE;AAAA,EAChF,SAAS,IAAI,mBAAgC;AAAA,EAC7C,QAAQ,IAAI,mBAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,SAAS,IAAI;AAAA,EACb;AAAA,EACA,mBAA2B;AAAA,EAEzB,kBAAkB,IAAI,gBAAgB;AAAA,EAEhD,YACE,KACA,YACA,oBAAuC,6BACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,uBAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,cAAU,MAAM,KAAK,SAAS,EAAE,QAAQ,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EACnE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,UAAU;AAC7B,gBAAM,gBAAgB,iBAAiB,KAAK,cAAc,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,mBAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AAGL,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,kBAAM,MAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,OAAO,QAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,UAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,YAAI;AACF,eAAK,OAAO,IAAI,KAAK;AAAA,QACvB,SAAS,GAAG;AACV,cAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAC/D,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,EAAE;AAAA,cACT;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF;AACA,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,YAAY;AAAA,QACZ,OAAO,KAAK,KAAK;AAAA,QACjB,iBAAiB,KAAK,MAAM,MAAM,iBAAkB,gBAAgB,GAAI;AAAA,QACxE,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,QAAI,CAAC,KAAK,OAAO,QAAQ;AACvB,WAAK,OAAO,MAAM;AAAA,IACpB;AAAA,EACF;AAAA,EAIA,IAAc,cAA2B;AACvC,WAAO,KAAK,gBAAgB;AAAA,EAC9B;AAAA,EAEA,IAAI,kBAA0B;AAC5B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,gBAAgB,OAAe;AACjC,QAAI,QAAQ,GAAG;AACb,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AACA,SAAK,mBAAmB;AAAA,EAC1B;AAAA,EAEA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,eAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,MAAM,sBAAsB,GAAG;AACjC,WAAK,MAAM,IAAI,KAAK;AACpB;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,MAAM,OAAQ,MAAK,MAAM,MAAM;AACzC,QAAI,CAAC,KAAK,OAAO,OAAQ,MAAK,OAAO,MAAM;AAC3C,QAAI,CAAC,KAAK,gBAAgB,OAAO,QAAS,MAAK,gBAAgB,MAAM;AACrE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
|