@livekit/agents 1.0.36 → 1.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs.map +1 -1
- package/dist/inference/api_protos.cjs +68 -0
- package/dist/inference/api_protos.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +345 -4
- package/dist/inference/api_protos.d.ts +345 -4
- package/dist/inference/api_protos.d.ts.map +1 -1
- package/dist/inference/api_protos.js +60 -0
- package/dist/inference/api_protos.js.map +1 -1
- package/dist/inference/stt.cjs +32 -21
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +34 -21
- package/dist/inference/stt.js.map +1 -1
- package/dist/ipc/inference_proc_executor.cjs.map +1 -1
- package/dist/ipc/job_proc_executor.cjs.map +1 -1
- package/dist/stt/stt.cjs +10 -0
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +12 -0
- package/dist/stt/stt.d.ts +12 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +10 -0
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/traces.cjs +4 -3
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.cts +2 -0
- package/dist/telemetry/traces.d.ts +2 -0
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +4 -3
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/utils.cjs +6 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +2 -0
- package/dist/utils.d.ts +2 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +6 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent.cjs +5 -0
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +5 -0
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +49 -23
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +1 -1
- package/dist/voice/agent_activity.d.ts +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +50 -24
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +7 -5
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +5 -2
- package/dist/voice/agent_session.d.ts +5 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +7 -5
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +3 -1
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +3 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/avatar/datastream_io.cjs +6 -0
- package/dist/voice/avatar/datastream_io.cjs.map +1 -1
- package/dist/voice/avatar/datastream_io.d.cts +1 -0
- package/dist/voice/avatar/datastream_io.d.ts +1 -0
- package/dist/voice/avatar/datastream_io.d.ts.map +1 -1
- package/dist/voice/avatar/datastream_io.js +6 -0
- package/dist/voice/avatar/datastream_io.js.map +1 -1
- package/dist/voice/background_audio.cjs.map +1 -1
- package/dist/voice/generation.cjs +14 -5
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.cts +3 -2
- package/dist/voice/generation.d.ts +3 -2
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +14 -5
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/io.cjs +12 -0
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +19 -1
- package/dist/voice/io.d.ts +19 -1
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +12 -0
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/recorder_io.cjs +91 -28
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
- package/dist/voice/recorder_io/recorder_io.d.cts +7 -1
- package/dist/voice/recorder_io/recorder_io.d.ts +7 -1
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
- package/dist/voice/recorder_io/recorder_io.js +91 -28
- package/dist/voice/recorder_io/recorder_io.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +40 -11
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.cts +4 -1
- package/dist/voice/room_io/_input.d.ts +4 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +31 -2
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/room_io/_output.cjs +6 -0
- package/dist/voice/room_io/_output.cjs.map +1 -1
- package/dist/voice/room_io/_output.d.cts +1 -0
- package/dist/voice/room_io/_output.d.ts +1 -0
- package/dist/voice/room_io/_output.d.ts.map +1 -1
- package/dist/voice/room_io/_output.js +6 -0
- package/dist/voice/room_io/_output.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +2 -2
- package/dist/voice/room_io/room_io.d.ts +2 -2
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/speech_handle.cjs +2 -0
- package/dist/voice/speech_handle.cjs.map +1 -1
- package/dist/voice/speech_handle.d.cts +3 -0
- package/dist/voice/speech_handle.d.ts +3 -0
- package/dist/voice/speech_handle.d.ts.map +1 -1
- package/dist/voice/speech_handle.js +2 -0
- package/dist/voice/speech_handle.js.map +1 -1
- package/dist/voice/testing/index.cjs +2 -0
- package/dist/voice/testing/index.cjs.map +1 -1
- package/dist/voice/testing/index.d.cts +1 -1
- package/dist/voice/testing/index.d.ts +1 -1
- package/dist/voice/testing/index.d.ts.map +1 -1
- package/dist/voice/testing/index.js +2 -0
- package/dist/voice/testing/index.js.map +1 -1
- package/dist/voice/testing/run_result.cjs +294 -5
- package/dist/voice/testing/run_result.cjs.map +1 -1
- package/dist/voice/testing/run_result.d.cts +149 -1
- package/dist/voice/testing/run_result.d.ts +149 -1
- package/dist/voice/testing/run_result.d.ts.map +1 -1
- package/dist/voice/testing/run_result.js +293 -5
- package/dist/voice/testing/run_result.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/api_protos.ts +83 -0
- package/src/inference/stt.ts +39 -22
- package/src/stt/stt.ts +21 -0
- package/src/telemetry/traces.ts +6 -2
- package/src/utils.ts +7 -0
- package/src/voice/agent.ts +9 -0
- package/src/voice/agent_activity.ts +72 -26
- package/src/voice/agent_session.ts +6 -5
- package/src/voice/audio_recognition.ts +2 -0
- package/src/voice/avatar/datastream_io.ts +8 -0
- package/src/voice/generation.ts +24 -12
- package/src/voice/io.ts +27 -5
- package/src/voice/recorder_io/recorder_io.ts +123 -31
- package/src/voice/room_io/_input.ts +32 -4
- package/src/voice/room_io/_output.ts +8 -0
- package/src/voice/room_io/room_io.ts +3 -1
- package/src/voice/speech_handle.ts +4 -0
- package/src/voice/testing/index.ts +1 -0
- package/src/voice/testing/run_result.ts +373 -12
|
@@ -13,18 +13,18 @@ export declare const ttsSessionCreateEventSchema: z.ZodObject<{
|
|
|
13
13
|
extra: Record<string, unknown>;
|
|
14
14
|
sample_rate: string;
|
|
15
15
|
encoding: string;
|
|
16
|
-
language?: string | undefined;
|
|
17
16
|
model?: string | undefined;
|
|
18
17
|
transcript?: string | undefined;
|
|
18
|
+
language?: string | undefined;
|
|
19
19
|
voice?: string | undefined;
|
|
20
20
|
}, {
|
|
21
21
|
type: "session.create";
|
|
22
22
|
extra: Record<string, unknown>;
|
|
23
23
|
sample_rate: string;
|
|
24
24
|
encoding: string;
|
|
25
|
-
language?: string | undefined;
|
|
26
25
|
model?: string | undefined;
|
|
27
26
|
transcript?: string | undefined;
|
|
27
|
+
language?: string | undefined;
|
|
28
28
|
voice?: string | undefined;
|
|
29
29
|
}>;
|
|
30
30
|
export declare const ttsInputTranscriptEventSchema: z.ZodObject<{
|
|
@@ -121,18 +121,18 @@ export declare const ttsClientEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
121
121
|
extra: Record<string, unknown>;
|
|
122
122
|
sample_rate: string;
|
|
123
123
|
encoding: string;
|
|
124
|
-
language?: string | undefined;
|
|
125
124
|
model?: string | undefined;
|
|
126
125
|
transcript?: string | undefined;
|
|
126
|
+
language?: string | undefined;
|
|
127
127
|
voice?: string | undefined;
|
|
128
128
|
}, {
|
|
129
129
|
type: "session.create";
|
|
130
130
|
extra: Record<string, unknown>;
|
|
131
131
|
sample_rate: string;
|
|
132
132
|
encoding: string;
|
|
133
|
-
language?: string | undefined;
|
|
134
133
|
model?: string | undefined;
|
|
135
134
|
transcript?: string | undefined;
|
|
135
|
+
language?: string | undefined;
|
|
136
136
|
voice?: string | undefined;
|
|
137
137
|
}>, z.ZodObject<{
|
|
138
138
|
type: z.ZodLiteral<"input_transcript">;
|
|
@@ -219,4 +219,345 @@ export type TtsSessionClosedEvent = z.infer<typeof ttsSessionClosedEventSchema>;
|
|
|
219
219
|
export type TtsErrorEvent = z.infer<typeof ttsErrorEventSchema>;
|
|
220
220
|
export type TtsClientEvent = z.infer<typeof ttsClientEventSchema>;
|
|
221
221
|
export type TtsServerEvent = z.infer<typeof ttsServerEventSchema>;
|
|
222
|
+
export declare const sttWordSchema: z.ZodObject<{
|
|
223
|
+
word: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
224
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
225
|
+
end: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
226
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
227
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
228
|
+
}, "strip", z.ZodTypeAny, {
|
|
229
|
+
end: number;
|
|
230
|
+
confidence: number;
|
|
231
|
+
start: number;
|
|
232
|
+
word: string;
|
|
233
|
+
extra?: unknown;
|
|
234
|
+
}, {
|
|
235
|
+
end?: number | undefined;
|
|
236
|
+
extra?: unknown;
|
|
237
|
+
confidence?: number | undefined;
|
|
238
|
+
start?: number | undefined;
|
|
239
|
+
word?: string | undefined;
|
|
240
|
+
}>;
|
|
241
|
+
export declare const sttInterimTranscriptEventSchema: z.ZodObject<{
|
|
242
|
+
type: z.ZodLiteral<"interim_transcript">;
|
|
243
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
244
|
+
transcript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
245
|
+
language: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
246
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
247
|
+
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
248
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
249
|
+
words: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
250
|
+
word: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
251
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
252
|
+
end: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
253
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
254
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
255
|
+
}, "strip", z.ZodTypeAny, {
|
|
256
|
+
end: number;
|
|
257
|
+
confidence: number;
|
|
258
|
+
start: number;
|
|
259
|
+
word: string;
|
|
260
|
+
extra?: unknown;
|
|
261
|
+
}, {
|
|
262
|
+
end?: number | undefined;
|
|
263
|
+
extra?: unknown;
|
|
264
|
+
confidence?: number | undefined;
|
|
265
|
+
start?: number | undefined;
|
|
266
|
+
word?: string | undefined;
|
|
267
|
+
}>, "many">>>;
|
|
268
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
269
|
+
}, "strip", z.ZodTypeAny, {
|
|
270
|
+
type: "interim_transcript";
|
|
271
|
+
transcript: string;
|
|
272
|
+
language: string;
|
|
273
|
+
confidence: number;
|
|
274
|
+
start: number;
|
|
275
|
+
duration: number;
|
|
276
|
+
words: {
|
|
277
|
+
end: number;
|
|
278
|
+
confidence: number;
|
|
279
|
+
start: number;
|
|
280
|
+
word: string;
|
|
281
|
+
extra?: unknown;
|
|
282
|
+
}[];
|
|
283
|
+
extra?: unknown;
|
|
284
|
+
session_id?: string | undefined;
|
|
285
|
+
}, {
|
|
286
|
+
type: "interim_transcript";
|
|
287
|
+
extra?: unknown;
|
|
288
|
+
transcript?: string | undefined;
|
|
289
|
+
language?: string | undefined;
|
|
290
|
+
confidence?: number | undefined;
|
|
291
|
+
start?: number | undefined;
|
|
292
|
+
session_id?: string | undefined;
|
|
293
|
+
duration?: number | undefined;
|
|
294
|
+
words?: {
|
|
295
|
+
end?: number | undefined;
|
|
296
|
+
extra?: unknown;
|
|
297
|
+
confidence?: number | undefined;
|
|
298
|
+
start?: number | undefined;
|
|
299
|
+
word?: string | undefined;
|
|
300
|
+
}[] | undefined;
|
|
301
|
+
}>;
|
|
302
|
+
export declare const sttFinalTranscriptEventSchema: z.ZodObject<{
|
|
303
|
+
type: z.ZodLiteral<"final_transcript">;
|
|
304
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
305
|
+
transcript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
306
|
+
language: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
307
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
308
|
+
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
309
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
310
|
+
words: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
311
|
+
word: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
312
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
313
|
+
end: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
314
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
315
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
316
|
+
}, "strip", z.ZodTypeAny, {
|
|
317
|
+
end: number;
|
|
318
|
+
confidence: number;
|
|
319
|
+
start: number;
|
|
320
|
+
word: string;
|
|
321
|
+
extra?: unknown;
|
|
322
|
+
}, {
|
|
323
|
+
end?: number | undefined;
|
|
324
|
+
extra?: unknown;
|
|
325
|
+
confidence?: number | undefined;
|
|
326
|
+
start?: number | undefined;
|
|
327
|
+
word?: string | undefined;
|
|
328
|
+
}>, "many">>>;
|
|
329
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
330
|
+
}, "strip", z.ZodTypeAny, {
|
|
331
|
+
type: "final_transcript";
|
|
332
|
+
transcript: string;
|
|
333
|
+
language: string;
|
|
334
|
+
confidence: number;
|
|
335
|
+
start: number;
|
|
336
|
+
duration: number;
|
|
337
|
+
words: {
|
|
338
|
+
end: number;
|
|
339
|
+
confidence: number;
|
|
340
|
+
start: number;
|
|
341
|
+
word: string;
|
|
342
|
+
extra?: unknown;
|
|
343
|
+
}[];
|
|
344
|
+
extra?: unknown;
|
|
345
|
+
session_id?: string | undefined;
|
|
346
|
+
}, {
|
|
347
|
+
type: "final_transcript";
|
|
348
|
+
extra?: unknown;
|
|
349
|
+
transcript?: string | undefined;
|
|
350
|
+
language?: string | undefined;
|
|
351
|
+
confidence?: number | undefined;
|
|
352
|
+
start?: number | undefined;
|
|
353
|
+
session_id?: string | undefined;
|
|
354
|
+
duration?: number | undefined;
|
|
355
|
+
words?: {
|
|
356
|
+
end?: number | undefined;
|
|
357
|
+
extra?: unknown;
|
|
358
|
+
confidence?: number | undefined;
|
|
359
|
+
start?: number | undefined;
|
|
360
|
+
word?: string | undefined;
|
|
361
|
+
}[] | undefined;
|
|
362
|
+
}>;
|
|
363
|
+
export declare const sttSessionCreatedEventSchema: z.ZodObject<{
|
|
364
|
+
type: z.ZodLiteral<"session.created">;
|
|
365
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
366
|
+
}, "strip", z.ZodTypeAny, {
|
|
367
|
+
type: "session.created";
|
|
368
|
+
session_id?: string | undefined;
|
|
369
|
+
}, {
|
|
370
|
+
type: "session.created";
|
|
371
|
+
session_id?: string | undefined;
|
|
372
|
+
}>;
|
|
373
|
+
export declare const sttSessionFinalizedEventSchema: z.ZodObject<{
|
|
374
|
+
type: z.ZodLiteral<"session.finalized">;
|
|
375
|
+
}, "strip", z.ZodTypeAny, {
|
|
376
|
+
type: "session.finalized";
|
|
377
|
+
}, {
|
|
378
|
+
type: "session.finalized";
|
|
379
|
+
}>;
|
|
380
|
+
export declare const sttSessionClosedEventSchema: z.ZodObject<{
|
|
381
|
+
type: z.ZodLiteral<"session.closed">;
|
|
382
|
+
}, "strip", z.ZodTypeAny, {
|
|
383
|
+
type: "session.closed";
|
|
384
|
+
}, {
|
|
385
|
+
type: "session.closed";
|
|
386
|
+
}>;
|
|
387
|
+
export declare const sttErrorEventSchema: z.ZodObject<{
|
|
388
|
+
type: z.ZodLiteral<"error">;
|
|
389
|
+
message: z.ZodOptional<z.ZodString>;
|
|
390
|
+
code: z.ZodOptional<z.ZodString>;
|
|
391
|
+
}, "strip", z.ZodTypeAny, {
|
|
392
|
+
type: "error";
|
|
393
|
+
message?: string | undefined;
|
|
394
|
+
code?: string | undefined;
|
|
395
|
+
}, {
|
|
396
|
+
type: "error";
|
|
397
|
+
message?: string | undefined;
|
|
398
|
+
code?: string | undefined;
|
|
399
|
+
}>;
|
|
400
|
+
export declare const sttServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<{
|
|
401
|
+
type: z.ZodLiteral<"session.created">;
|
|
402
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
403
|
+
}, "strip", z.ZodTypeAny, {
|
|
404
|
+
type: "session.created";
|
|
405
|
+
session_id?: string | undefined;
|
|
406
|
+
}, {
|
|
407
|
+
type: "session.created";
|
|
408
|
+
session_id?: string | undefined;
|
|
409
|
+
}>, z.ZodObject<{
|
|
410
|
+
type: z.ZodLiteral<"session.finalized">;
|
|
411
|
+
}, "strip", z.ZodTypeAny, {
|
|
412
|
+
type: "session.finalized";
|
|
413
|
+
}, {
|
|
414
|
+
type: "session.finalized";
|
|
415
|
+
}>, z.ZodObject<{
|
|
416
|
+
type: z.ZodLiteral<"session.closed">;
|
|
417
|
+
}, "strip", z.ZodTypeAny, {
|
|
418
|
+
type: "session.closed";
|
|
419
|
+
}, {
|
|
420
|
+
type: "session.closed";
|
|
421
|
+
}>, z.ZodObject<{
|
|
422
|
+
type: z.ZodLiteral<"interim_transcript">;
|
|
423
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
424
|
+
transcript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
425
|
+
language: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
426
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
427
|
+
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
428
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
429
|
+
words: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
430
|
+
word: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
431
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
432
|
+
end: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
433
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
434
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
435
|
+
}, "strip", z.ZodTypeAny, {
|
|
436
|
+
end: number;
|
|
437
|
+
confidence: number;
|
|
438
|
+
start: number;
|
|
439
|
+
word: string;
|
|
440
|
+
extra?: unknown;
|
|
441
|
+
}, {
|
|
442
|
+
end?: number | undefined;
|
|
443
|
+
extra?: unknown;
|
|
444
|
+
confidence?: number | undefined;
|
|
445
|
+
start?: number | undefined;
|
|
446
|
+
word?: string | undefined;
|
|
447
|
+
}>, "many">>>;
|
|
448
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
449
|
+
}, "strip", z.ZodTypeAny, {
|
|
450
|
+
type: "interim_transcript";
|
|
451
|
+
transcript: string;
|
|
452
|
+
language: string;
|
|
453
|
+
confidence: number;
|
|
454
|
+
start: number;
|
|
455
|
+
duration: number;
|
|
456
|
+
words: {
|
|
457
|
+
end: number;
|
|
458
|
+
confidence: number;
|
|
459
|
+
start: number;
|
|
460
|
+
word: string;
|
|
461
|
+
extra?: unknown;
|
|
462
|
+
}[];
|
|
463
|
+
extra?: unknown;
|
|
464
|
+
session_id?: string | undefined;
|
|
465
|
+
}, {
|
|
466
|
+
type: "interim_transcript";
|
|
467
|
+
extra?: unknown;
|
|
468
|
+
transcript?: string | undefined;
|
|
469
|
+
language?: string | undefined;
|
|
470
|
+
confidence?: number | undefined;
|
|
471
|
+
start?: number | undefined;
|
|
472
|
+
session_id?: string | undefined;
|
|
473
|
+
duration?: number | undefined;
|
|
474
|
+
words?: {
|
|
475
|
+
end?: number | undefined;
|
|
476
|
+
extra?: unknown;
|
|
477
|
+
confidence?: number | undefined;
|
|
478
|
+
start?: number | undefined;
|
|
479
|
+
word?: string | undefined;
|
|
480
|
+
}[] | undefined;
|
|
481
|
+
}>, z.ZodObject<{
|
|
482
|
+
type: z.ZodLiteral<"final_transcript">;
|
|
483
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
484
|
+
transcript: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
485
|
+
language: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
486
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
487
|
+
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
488
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
489
|
+
words: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
490
|
+
word: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
491
|
+
start: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
492
|
+
end: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
493
|
+
confidence: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
494
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
495
|
+
}, "strip", z.ZodTypeAny, {
|
|
496
|
+
end: number;
|
|
497
|
+
confidence: number;
|
|
498
|
+
start: number;
|
|
499
|
+
word: string;
|
|
500
|
+
extra?: unknown;
|
|
501
|
+
}, {
|
|
502
|
+
end?: number | undefined;
|
|
503
|
+
extra?: unknown;
|
|
504
|
+
confidence?: number | undefined;
|
|
505
|
+
start?: number | undefined;
|
|
506
|
+
word?: string | undefined;
|
|
507
|
+
}>, "many">>>;
|
|
508
|
+
extra: z.ZodOptional<z.ZodNullable<z.ZodUnknown>>;
|
|
509
|
+
}, "strip", z.ZodTypeAny, {
|
|
510
|
+
type: "final_transcript";
|
|
511
|
+
transcript: string;
|
|
512
|
+
language: string;
|
|
513
|
+
confidence: number;
|
|
514
|
+
start: number;
|
|
515
|
+
duration: number;
|
|
516
|
+
words: {
|
|
517
|
+
end: number;
|
|
518
|
+
confidence: number;
|
|
519
|
+
start: number;
|
|
520
|
+
word: string;
|
|
521
|
+
extra?: unknown;
|
|
522
|
+
}[];
|
|
523
|
+
extra?: unknown;
|
|
524
|
+
session_id?: string | undefined;
|
|
525
|
+
}, {
|
|
526
|
+
type: "final_transcript";
|
|
527
|
+
extra?: unknown;
|
|
528
|
+
transcript?: string | undefined;
|
|
529
|
+
language?: string | undefined;
|
|
530
|
+
confidence?: number | undefined;
|
|
531
|
+
start?: number | undefined;
|
|
532
|
+
session_id?: string | undefined;
|
|
533
|
+
duration?: number | undefined;
|
|
534
|
+
words?: {
|
|
535
|
+
end?: number | undefined;
|
|
536
|
+
extra?: unknown;
|
|
537
|
+
confidence?: number | undefined;
|
|
538
|
+
start?: number | undefined;
|
|
539
|
+
word?: string | undefined;
|
|
540
|
+
}[] | undefined;
|
|
541
|
+
}>, z.ZodObject<{
|
|
542
|
+
type: z.ZodLiteral<"error">;
|
|
543
|
+
message: z.ZodOptional<z.ZodString>;
|
|
544
|
+
code: z.ZodOptional<z.ZodString>;
|
|
545
|
+
}, "strip", z.ZodTypeAny, {
|
|
546
|
+
type: "error";
|
|
547
|
+
message?: string | undefined;
|
|
548
|
+
code?: string | undefined;
|
|
549
|
+
}, {
|
|
550
|
+
type: "error";
|
|
551
|
+
message?: string | undefined;
|
|
552
|
+
code?: string | undefined;
|
|
553
|
+
}>]>;
|
|
554
|
+
export type SttWord = z.infer<typeof sttWordSchema>;
|
|
555
|
+
export type SttInterimTranscriptEvent = z.infer<typeof sttInterimTranscriptEventSchema>;
|
|
556
|
+
export type SttFinalTranscriptEvent = z.infer<typeof sttFinalTranscriptEventSchema>;
|
|
557
|
+
export type SttTranscriptEvent = SttInterimTranscriptEvent | SttFinalTranscriptEvent;
|
|
558
|
+
export type SttSessionCreatedEvent = z.infer<typeof sttSessionCreatedEventSchema>;
|
|
559
|
+
export type SttSessionFinalizedEvent = z.infer<typeof sttSessionFinalizedEventSchema>;
|
|
560
|
+
export type SttSessionClosedEvent = z.infer<typeof sttSessionClosedEventSchema>;
|
|
561
|
+
export type SttErrorEvent = z.infer<typeof sttErrorEventSchema>;
|
|
562
|
+
export type SttServerEvent = z.infer<typeof sttServerEventSchema>;
|
|
222
563
|
//# sourceMappingURL=api_protos.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api_protos.d.ts","sourceRoot":"","sources":["../../src/inference/api_protos.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;EAStC,CAAC;AAEH,eAAO,MAAM,6BAA6B;;;;;;;;;EAGxC,CAAC;AAEH,eAAO,MAAM,0BAA0B;;;;;;EAErC,CAAC;AAEH,eAAO,MAAM,0BAA0B;;;;;;EAErC,CAAC;AAEH,eAAO,MAAM,4BAA4B;;;;;;;;;EAGvC,CAAC;AAEH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;EAIpC,CAAC;AAEH,eAAO,MAAM,kBAAkB;;;;;;;;;EAG7B,CAAC;AAEH,eAAO,MAAM,2BAA2B;;;;;;;;;EAGtC,CAAC;AAEH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;EAI9B,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAK/B,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAM/B,CAAC;AAEH,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAChF,MAAM,MAAM,uBAAuB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,6BAA6B,CAAC,CAAC;AACpF,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,0BAA0B,CAAC,CAAC;AAC9E,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,0BAA0B,CAAC,CAAC;AAC9E,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,4BAA4B,CAAC,CAAC;AAClF,MAAM,MAAM,mBAAmB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAC;AAC5E,MAAM,MAAM,YAAY,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,kBAAkB,CAAC,CAAC;AAC9D,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAChF,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"api_protos.d.ts","sourceRoot":"","sources":["../../src/inference/api_protos.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,eAAO,MAAM,2BAA2B;;;;;;;;;;;;;;;;;;;;;;;;;;;EAStC,CAAC;AAEH,eAAO,MAAM,6BAA6B;;;;;;;;;EAGxC,CAAC;AAEH,eAAO,MAAM,0BAA0B;;;;;;EAErC,CAAC;AAEH,eAAO,MAAM,0BAA0B;;;;;;EAErC,CAAC;AAEH,eAAO,MAAM,4BAA4B;;;;;;;;;EAGvC,CAAC;AAEH,eAAO,MAAM,yBAAyB;;;;;;;;;;;;EAIpC,CAAC;AAEH,eAAO,MAAM,kBAAkB;;;;;;;;;EAG7B,CAAC;AAEH,eAAO,MAAM,2BAA2B;;;;;;;;;EAGtC,CAAC;AAEH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;EAI9B,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAK/B,CAAC;AAEH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAM/B,CAAC;AAEH,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAChF,MAAM,MAAM,uBAAuB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,6BAA6B,CAAC,CAAC;AACpF,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,0BAA0B,CAAC,CAAC;AAC9E,MAAM,MAAM,oBAAoB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,0BAA0B,CAAC,CAAC;AAC9E,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,4BAA4B,CAAC,CAAC;AAClF,MAAM,MAAM,mBAAmB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,yBAAyB,CAAC,CAAC;AAC5E,MAAM,MAAM,YAAY,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,kBAAkB,CAAC,CAAC;AAC9D,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAChF,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAClE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAOlE,eAAO,MAAM,aAAa;;;;;;;;;;;;;;;;;;EAMxB,CAAC;AAGH,eAAO,MAAM,+BAA+B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAU1C,CAAC;AAGH,eAAO,MAAM,6BAA6B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAUxC,CAAC;AAGH,eAAO,MAAM,4BAA4B;;;;;;;;;EAGvC,CAAC;AAGH,eAAO,MAAM,8BAA8B;;;;;;EAEzC,CAAC;AAGH,eAAO,MAAM,2BAA2B;;;;;;EAEtC,CAAC;AAGH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;EAI9B,CAAC;AAGH,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAO/B,CAAC;AAGH,MAAM,MAAM,OAAO,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,aAAa,CAAC,CAAC;AACpD,MAAM,MAAM,yBAAyB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,+BAA+B,CAAC,CAAC;AACxF,MAAM,MAAM,uBAAuB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,6BAA6B,CAAC,CAAC;AACpF,MAAM,MAAM,kBAAkB,GAAG,yBAAyB,GAAG,uBAAuB,CAAC;AACrF,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,4BAA4B,CAAC,CAAC;AAClF,MAAM,MAAM,wBAAwB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,8BAA8B,CAAC,CAAC;AACtF,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,2BAA2B,CAAC,CAAC;AAChF,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAChE,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC"}
|
|
@@ -54,7 +54,67 @@ const ttsServerEventSchema = z.discriminatedUnion("type", [
|
|
|
54
54
|
ttsSessionClosedEventSchema,
|
|
55
55
|
ttsErrorEventSchema
|
|
56
56
|
]);
|
|
57
|
+
const sttWordSchema = z.object({
|
|
58
|
+
word: z.string().optional().default(""),
|
|
59
|
+
start: z.number().optional().default(0),
|
|
60
|
+
end: z.number().optional().default(0),
|
|
61
|
+
confidence: z.number().optional().default(0),
|
|
62
|
+
extra: z.unknown().nullable().optional()
|
|
63
|
+
});
|
|
64
|
+
const sttInterimTranscriptEventSchema = z.object({
|
|
65
|
+
type: z.literal("interim_transcript"),
|
|
66
|
+
session_id: z.string().optional(),
|
|
67
|
+
transcript: z.string().optional().default(""),
|
|
68
|
+
language: z.string().optional().default(""),
|
|
69
|
+
start: z.number().optional().default(0),
|
|
70
|
+
duration: z.number().optional().default(0),
|
|
71
|
+
confidence: z.number().optional().default(1),
|
|
72
|
+
words: z.array(sttWordSchema).optional().default([]),
|
|
73
|
+
extra: z.unknown().nullable().optional()
|
|
74
|
+
});
|
|
75
|
+
const sttFinalTranscriptEventSchema = z.object({
|
|
76
|
+
type: z.literal("final_transcript"),
|
|
77
|
+
session_id: z.string().optional(),
|
|
78
|
+
transcript: z.string().optional().default(""),
|
|
79
|
+
language: z.string().optional().default(""),
|
|
80
|
+
start: z.number().optional().default(0),
|
|
81
|
+
duration: z.number().optional().default(0),
|
|
82
|
+
confidence: z.number().optional().default(1),
|
|
83
|
+
words: z.array(sttWordSchema).optional().default([]),
|
|
84
|
+
extra: z.unknown().nullable().optional()
|
|
85
|
+
});
|
|
86
|
+
const sttSessionCreatedEventSchema = z.object({
|
|
87
|
+
type: z.literal("session.created"),
|
|
88
|
+
session_id: z.string().optional()
|
|
89
|
+
});
|
|
90
|
+
const sttSessionFinalizedEventSchema = z.object({
|
|
91
|
+
type: z.literal("session.finalized")
|
|
92
|
+
});
|
|
93
|
+
const sttSessionClosedEventSchema = z.object({
|
|
94
|
+
type: z.literal("session.closed")
|
|
95
|
+
});
|
|
96
|
+
const sttErrorEventSchema = z.object({
|
|
97
|
+
type: z.literal("error"),
|
|
98
|
+
message: z.string().optional(),
|
|
99
|
+
code: z.string().optional()
|
|
100
|
+
});
|
|
101
|
+
const sttServerEventSchema = z.discriminatedUnion("type", [
|
|
102
|
+
sttSessionCreatedEventSchema,
|
|
103
|
+
sttSessionFinalizedEventSchema,
|
|
104
|
+
sttSessionClosedEventSchema,
|
|
105
|
+
sttInterimTranscriptEventSchema,
|
|
106
|
+
sttFinalTranscriptEventSchema,
|
|
107
|
+
sttErrorEventSchema
|
|
108
|
+
]);
|
|
57
109
|
export {
|
|
110
|
+
sttErrorEventSchema,
|
|
111
|
+
sttFinalTranscriptEventSchema,
|
|
112
|
+
sttInterimTranscriptEventSchema,
|
|
113
|
+
sttServerEventSchema,
|
|
114
|
+
sttSessionClosedEventSchema,
|
|
115
|
+
sttSessionCreatedEventSchema,
|
|
116
|
+
sttSessionFinalizedEventSchema,
|
|
117
|
+
sttWordSchema,
|
|
58
118
|
ttsClientEventSchema,
|
|
59
119
|
ttsDoneEventSchema,
|
|
60
120
|
ttsErrorEventSchema,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/api_protos.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { z } from 'zod';\n\nexport const ttsSessionCreateEventSchema = z.object({\n type: z.literal('session.create'),\n sample_rate: z.string(),\n encoding: z.string(),\n model: z.string().optional(),\n voice: z.string().optional(),\n language: z.string().optional(),\n extra: z.record(z.string(), z.unknown()),\n transcript: z.string().optional(),\n});\n\nexport const ttsInputTranscriptEventSchema = z.object({\n type: z.literal('input_transcript'),\n transcript: z.string(),\n});\n\nexport const ttsSessionFlushEventSchema = z.object({\n type: z.literal('session.flush'),\n});\n\nexport const ttsSessionCloseEventSchema = z.object({\n type: z.literal('session.close'),\n});\n\nexport const ttsSessionCreatedEventSchema = z.object({\n type: z.literal('session.created'),\n session_id: z.string(),\n});\n\nexport const ttsOutputAudioEventSchema = z.object({\n type: z.literal('output_audio'),\n audio: z.string(),\n session_id: z.string(),\n});\n\nexport const ttsDoneEventSchema = z.object({\n type: z.literal('done'),\n session_id: z.string(),\n});\n\nexport const ttsSessionClosedEventSchema = z.object({\n type: z.literal('session.closed'),\n session_id: z.string(),\n});\n\nexport const ttsErrorEventSchema = z.object({\n type: z.literal('error'),\n message: z.string().optional(),\n session_id: z.string().optional(),\n});\n\nexport const ttsClientEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreateEventSchema,\n ttsInputTranscriptEventSchema,\n ttsSessionFlushEventSchema,\n ttsSessionCloseEventSchema,\n]);\n\nexport const ttsServerEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreatedEventSchema,\n ttsOutputAudioEventSchema,\n ttsDoneEventSchema,\n ttsSessionClosedEventSchema,\n ttsErrorEventSchema,\n]);\n\nexport type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;\nexport type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;\nexport type TtsSessionFlushEvent = z.infer<typeof ttsSessionFlushEventSchema>;\nexport type TtsSessionCloseEvent = z.infer<typeof ttsSessionCloseEventSchema>;\nexport type TtsSessionCreatedEvent = z.infer<typeof ttsSessionCreatedEventSchema>;\nexport type TtsOutputAudioEvent = z.infer<typeof ttsOutputAudioEventSchema>;\nexport type TtsDoneEvent = z.infer<typeof ttsDoneEventSchema>;\nexport type TtsSessionClosedEvent = z.infer<typeof ttsSessionClosedEventSchema>;\nexport type TtsErrorEvent = z.infer<typeof ttsErrorEventSchema>;\nexport type TtsClientEvent = z.infer<typeof ttsClientEventSchema>;\nexport type TtsServerEvent = z.infer<typeof ttsServerEventSchema>;\n"],"mappings":"AAGA,SAAS,SAAS;AAEX,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,aAAa,EAAE,OAAO;AAAA,EACtB,UAAU,EAAE,OAAO;AAAA,EACnB,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,OAAO,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC;AAAA,EACvC,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,MAAM,EAAE,QAAQ,kBAAkB;AAAA,EAClC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,+BAA+B,EAAE,OAAO;AAAA,EACnD,MAAM,EAAE,QAAQ,iBAAiB;AAAA,EACjC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EAAE,QAAQ,cAAc;AAAA,EAC9B,OAAO,EAAE,OAAO;AAAA,EAChB,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,OAAO;AAAA,EACvB,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,MAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/api_protos.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { z } from 'zod';\n\nexport const ttsSessionCreateEventSchema = z.object({\n type: z.literal('session.create'),\n sample_rate: z.string(),\n encoding: z.string(),\n model: z.string().optional(),\n voice: z.string().optional(),\n language: z.string().optional(),\n extra: z.record(z.string(), z.unknown()),\n transcript: z.string().optional(),\n});\n\nexport const ttsInputTranscriptEventSchema = z.object({\n type: z.literal('input_transcript'),\n transcript: z.string(),\n});\n\nexport const ttsSessionFlushEventSchema = z.object({\n type: z.literal('session.flush'),\n});\n\nexport const ttsSessionCloseEventSchema = z.object({\n type: z.literal('session.close'),\n});\n\nexport const ttsSessionCreatedEventSchema = z.object({\n type: z.literal('session.created'),\n session_id: z.string(),\n});\n\nexport const ttsOutputAudioEventSchema = z.object({\n type: z.literal('output_audio'),\n audio: z.string(),\n session_id: z.string(),\n});\n\nexport const ttsDoneEventSchema = z.object({\n type: z.literal('done'),\n session_id: z.string(),\n});\n\nexport const ttsSessionClosedEventSchema = z.object({\n type: z.literal('session.closed'),\n session_id: z.string(),\n});\n\nexport const ttsErrorEventSchema = z.object({\n type: z.literal('error'),\n message: z.string().optional(),\n session_id: z.string().optional(),\n});\n\nexport const ttsClientEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreateEventSchema,\n ttsInputTranscriptEventSchema,\n ttsSessionFlushEventSchema,\n ttsSessionCloseEventSchema,\n]);\n\nexport const ttsServerEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreatedEventSchema,\n ttsOutputAudioEventSchema,\n ttsDoneEventSchema,\n ttsSessionClosedEventSchema,\n ttsErrorEventSchema,\n]);\n\nexport type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;\nexport type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;\nexport type TtsSessionFlushEvent = z.infer<typeof ttsSessionFlushEventSchema>;\nexport type TtsSessionCloseEvent = z.infer<typeof ttsSessionCloseEventSchema>;\nexport type TtsSessionCreatedEvent = z.infer<typeof ttsSessionCreatedEventSchema>;\nexport type TtsOutputAudioEvent = z.infer<typeof ttsOutputAudioEventSchema>;\nexport type TtsDoneEvent = z.infer<typeof ttsDoneEventSchema>;\nexport type TtsSessionClosedEvent = z.infer<typeof ttsSessionClosedEventSchema>;\nexport type TtsErrorEvent = z.infer<typeof ttsErrorEventSchema>;\nexport type TtsClientEvent = z.infer<typeof ttsClientEventSchema>;\nexport type TtsServerEvent = z.infer<typeof ttsServerEventSchema>;\n\n// ============================================================================\n// STT Schemas\n// ============================================================================\n\n// Word-level timing data\nexport const sttWordSchema = z.object({\n word: z.string().optional().default(''),\n start: z.number().optional().default(0),\n end: z.number().optional().default(0),\n confidence: z.number().optional().default(0.0),\n extra: z.unknown().nullable().optional(),\n});\n\n// Interim transcript event\nexport const sttInterimTranscriptEventSchema = z.object({\n type: z.literal('interim_transcript'),\n session_id: z.string().optional(),\n transcript: z.string().optional().default(''),\n language: z.string().optional().default(''),\n start: z.number().optional().default(0),\n duration: z.number().optional().default(0),\n confidence: z.number().optional().default(1.0),\n words: z.array(sttWordSchema).optional().default([]),\n extra: z.unknown().nullable().optional(),\n});\n\n// Final transcript event\nexport const sttFinalTranscriptEventSchema = z.object({\n type: z.literal('final_transcript'),\n session_id: z.string().optional(),\n transcript: z.string().optional().default(''),\n language: z.string().optional().default(''),\n start: z.number().optional().default(0),\n duration: z.number().optional().default(0),\n confidence: z.number().optional().default(1.0),\n words: z.array(sttWordSchema).optional().default([]),\n extra: z.unknown().nullable().optional(),\n});\n\n// Session created event\nexport const sttSessionCreatedEventSchema = z.object({\n type: z.literal('session.created'),\n session_id: z.string().optional(),\n});\n\n// Session finalized event\nexport const sttSessionFinalizedEventSchema = z.object({\n type: z.literal('session.finalized'),\n});\n\n// Session closed event\nexport const sttSessionClosedEventSchema = z.object({\n type: z.literal('session.closed'),\n});\n\n// Error event\nexport const sttErrorEventSchema = z.object({\n type: z.literal('error'),\n message: z.string().optional(),\n code: z.string().optional(),\n});\n\n// Discriminated union for all STT server events\nexport const sttServerEventSchema = z.discriminatedUnion('type', [\n sttSessionCreatedEventSchema,\n sttSessionFinalizedEventSchema,\n sttSessionClosedEventSchema,\n sttInterimTranscriptEventSchema,\n sttFinalTranscriptEventSchema,\n sttErrorEventSchema,\n]);\n\n// Type exports for STT\nexport type SttWord = z.infer<typeof sttWordSchema>;\nexport type SttInterimTranscriptEvent = z.infer<typeof sttInterimTranscriptEventSchema>;\nexport type SttFinalTranscriptEvent = z.infer<typeof sttFinalTranscriptEventSchema>;\nexport type SttTranscriptEvent = SttInterimTranscriptEvent | SttFinalTranscriptEvent;\nexport type SttSessionCreatedEvent = z.infer<typeof sttSessionCreatedEventSchema>;\nexport type SttSessionFinalizedEvent = z.infer<typeof sttSessionFinalizedEventSchema>;\nexport type SttSessionClosedEvent = z.infer<typeof sttSessionClosedEventSchema>;\nexport type SttErrorEvent = z.infer<typeof sttErrorEventSchema>;\nexport type SttServerEvent = z.infer<typeof sttServerEventSchema>;\n"],"mappings":"AAGA,SAAS,SAAS;AAEX,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,aAAa,EAAE,OAAO;AAAA,EACtB,UAAU,EAAE,OAAO;AAAA,EACnB,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,OAAO,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC;AAAA,EACvC,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,MAAM,EAAE,QAAQ,kBAAkB;AAAA,EAClC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,+BAA+B,EAAE,OAAO;AAAA,EACnD,MAAM,EAAE,QAAQ,iBAAiB;AAAA,EACjC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EAAE,QAAQ,cAAc;AAAA,EAC9B,OAAO,EAAE,OAAO;AAAA,EAChB,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,OAAO;AAAA,EACvB,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,MAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAmBM,MAAM,gBAAgB,EAAE,OAAO;AAAA,EACpC,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,EACtC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC;AAAA,EACtC,KAAK,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC;AAAA,EACpC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAG;AAAA,EAC7C,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS;AACzC,CAAC;AAGM,MAAM,kCAAkC,EAAE,OAAO;AAAA,EACtD,MAAM,EAAE,QAAQ,oBAAoB;AAAA,EACpC,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,EAC5C,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,EAC1C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC;AAAA,EACtC,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC;AAAA,EACzC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAG;AAAA,EAC7C,OAAO,EAAE,MAAM,aAAa,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EACnD,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS;AACzC,CAAC;AAGM,MAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,MAAM,EAAE,QAAQ,kBAAkB;AAAA,EAClC,YAAY,EAAE,OAAO,EAAE,SAAS;AAAA,EAChC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,EAC5C,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE;AAAA,EAC1C,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC;AAAA,EACtC,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC;AAAA,EACzC,YAAY,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAG;AAAA,EAC7C,OAAO,EAAE,MAAM,aAAa,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AAAA,EACnD,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS;AACzC,CAAC;AAGM,MAAM,+BAA+B,EAAE,OAAO;AAAA,EACnD,MAAM,EAAE,QAAQ,iBAAiB;AAAA,EACjC,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAGM,MAAM,iCAAiC,EAAE,OAAO;AAAA,EACrD,MAAM,EAAE,QAAQ,mBAAmB;AACrC,CAAC;AAGM,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAClC,CAAC;AAGM,MAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,OAAO;AAAA,EACvB,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,MAAM,EAAE,OAAO,EAAE,SAAS;AAC5B,CAAC;AAGM,MAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;","names":[]}
|
package/dist/inference/stt.cjs
CHANGED
|
@@ -30,6 +30,7 @@ var import_stream_channel = require("../stream/stream_channel.cjs");
|
|
|
30
30
|
var import_stt = require("../stt/index.cjs");
|
|
31
31
|
var import_types = require("../types.cjs");
|
|
32
32
|
var import_utils = require("../utils.cjs");
|
|
33
|
+
var import_api_protos = require("./api_protos.cjs");
|
|
33
34
|
var import_utils2 = require("./utils.cjs");
|
|
34
35
|
const DEFAULT_ENCODING = "pcm_s16le";
|
|
35
36
|
const DEFAULT_SAMPLE_RATE = 16e3;
|
|
@@ -40,7 +41,7 @@ class STT extends import_stt.STT {
|
|
|
40
41
|
streams = /* @__PURE__ */ new Set();
|
|
41
42
|
#logger = (0, import_log.log)();
|
|
42
43
|
constructor(opts) {
|
|
43
|
-
super({ streaming: true, interimResults: true });
|
|
44
|
+
super({ streaming: true, interimResults: true, alignedTranscript: "word" });
|
|
44
45
|
const {
|
|
45
46
|
model,
|
|
46
47
|
language,
|
|
@@ -242,9 +243,16 @@ class SpeechStream extends import_stt.SpeechStream {
|
|
|
242
243
|
const result = await reader.read();
|
|
243
244
|
if (signal.aborted) return;
|
|
244
245
|
if (result.done) return;
|
|
245
|
-
const
|
|
246
|
-
|
|
247
|
-
|
|
246
|
+
const parseResult = await import_api_protos.sttServerEventSchema.safeParseAsync(result.value);
|
|
247
|
+
if (!parseResult.success) {
|
|
248
|
+
this.#logger.warn(
|
|
249
|
+
{ error: parseResult.error, rawData: result.value },
|
|
250
|
+
"Failed to parse STT server event"
|
|
251
|
+
);
|
|
252
|
+
continue;
|
|
253
|
+
}
|
|
254
|
+
const event = parseResult.data;
|
|
255
|
+
switch (event.type) {
|
|
248
256
|
case "session.created":
|
|
249
257
|
case "session.finalized":
|
|
250
258
|
break;
|
|
@@ -253,21 +261,15 @@ class SpeechStream extends import_stt.SpeechStream {
|
|
|
253
261
|
resourceCleanup();
|
|
254
262
|
break;
|
|
255
263
|
case "interim_transcript":
|
|
256
|
-
this.processTranscript(
|
|
264
|
+
this.processTranscript(event, false);
|
|
257
265
|
break;
|
|
258
266
|
case "final_transcript":
|
|
259
|
-
this.processTranscript(
|
|
267
|
+
this.processTranscript(event, true);
|
|
260
268
|
break;
|
|
261
269
|
case "error":
|
|
262
|
-
this.#logger.error({ error:
|
|
270
|
+
this.#logger.error({ error: event }, "Received error from LiveKit STT");
|
|
263
271
|
resourceCleanup();
|
|
264
|
-
throw new import_exceptions.APIError(`LiveKit STT returned error: ${JSON.stringify(
|
|
265
|
-
default:
|
|
266
|
-
this.#logger.warn(
|
|
267
|
-
{ message: json },
|
|
268
|
-
"Received unexpected message from LiveKit STT"
|
|
269
|
-
);
|
|
270
|
-
break;
|
|
272
|
+
throw new import_exceptions.APIError(`LiveKit STT returned error: ${JSON.stringify(event)}`);
|
|
271
273
|
}
|
|
272
274
|
}
|
|
273
275
|
} finally {
|
|
@@ -310,9 +312,9 @@ class SpeechStream extends import_stt.SpeechStream {
|
|
|
310
312
|
}
|
|
311
313
|
processTranscript(data, isFinal) {
|
|
312
314
|
if (this.queue.closed) return;
|
|
313
|
-
const requestId = data.
|
|
314
|
-
const text = data.transcript
|
|
315
|
-
const language = data.language
|
|
315
|
+
const requestId = data.session_id || this.requestId;
|
|
316
|
+
const text = data.transcript;
|
|
317
|
+
const language = data.language || this.opts.language || "en";
|
|
316
318
|
if (!text && !isFinal) return;
|
|
317
319
|
try {
|
|
318
320
|
if (!this.speaking) {
|
|
@@ -321,10 +323,19 @@ class SpeechStream extends import_stt.SpeechStream {
|
|
|
321
323
|
}
|
|
322
324
|
const speechData = {
|
|
323
325
|
language,
|
|
324
|
-
startTime: data.start
|
|
325
|
-
endTime: data.
|
|
326
|
-
confidence: data.confidence
|
|
327
|
-
text
|
|
326
|
+
startTime: this.startTimeOffset + data.start,
|
|
327
|
+
endTime: this.startTimeOffset + data.start + data.duration,
|
|
328
|
+
confidence: data.confidence,
|
|
329
|
+
text,
|
|
330
|
+
words: data.words.map(
|
|
331
|
+
(word) => ({
|
|
332
|
+
text: word.word,
|
|
333
|
+
startTime: word.start + this.startTimeOffset,
|
|
334
|
+
endTime: word.end + this.startTimeOffset,
|
|
335
|
+
startTimeOffset: this.startTimeOffset,
|
|
336
|
+
confidence: word.confidence
|
|
337
|
+
})
|
|
338
|
+
)
|
|
328
339
|
};
|
|
329
340
|
if (isFinal) {
|
|
330
341
|
if (this.speechDuration > 0) {
|