@livekit/agents 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/api_protos.cjs +2 -2
- package/dist/inference/api_protos.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +16 -16
- package/dist/inference/api_protos.d.ts +16 -16
- package/dist/inference/api_protos.js +2 -2
- package/dist/inference/api_protos.js.map +1 -1
- package/dist/inference/stt.cjs +42 -30
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +42 -30
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs +2 -3
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +2 -3
- package/dist/inference/tts.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +35 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +13 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +52 -6
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +2 -0
- package/dist/job.d.ts +2 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +52 -6
- package/dist/job.js.map +1 -1
- package/dist/llm/llm.cjs +38 -3
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -0
- package/dist/llm/llm.d.ts +1 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +38 -3
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.cjs +34 -10
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +7 -0
- package/dist/log.d.ts +7 -0
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +34 -11
- package/dist/log.js.map +1 -1
- package/dist/stt/stt.cjs +18 -5
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +18 -5
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/index.cjs +23 -2
- package/dist/telemetry/index.cjs.map +1 -1
- package/dist/telemetry/index.d.cts +4 -1
- package/dist/telemetry/index.d.ts +4 -1
- package/dist/telemetry/index.d.ts.map +1 -1
- package/dist/telemetry/index.js +27 -2
- package/dist/telemetry/index.js.map +1 -1
- package/dist/telemetry/logging.cjs +65 -0
- package/dist/telemetry/logging.cjs.map +1 -0
- package/dist/telemetry/logging.d.cts +21 -0
- package/dist/telemetry/logging.d.ts +21 -0
- package/dist/telemetry/logging.d.ts.map +1 -0
- package/dist/telemetry/logging.js +40 -0
- package/dist/telemetry/logging.js.map +1 -0
- package/dist/telemetry/otel_http_exporter.cjs +144 -0
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
- package/dist/telemetry/otel_http_exporter.d.cts +62 -0
- package/dist/telemetry/otel_http_exporter.d.ts +62 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
- package/dist/telemetry/otel_http_exporter.js +120 -0
- package/dist/telemetry/otel_http_exporter.js.map +1 -0
- package/dist/telemetry/pino_otel_transport.cjs +217 -0
- package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
- package/dist/telemetry/pino_otel_transport.d.cts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
- package/dist/telemetry/pino_otel_transport.js +189 -0
- package/dist/telemetry/pino_otel_transport.js.map +1 -0
- package/dist/telemetry/traces.cjs +225 -16
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.cts +17 -0
- package/dist/telemetry/traces.d.ts +17 -0
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +211 -14
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +68 -20
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +2 -0
- package/dist/tts/tts.d.ts +2 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +68 -20
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +6 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +1 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +5 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +93 -7
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +3 -0
- package/dist/voice/agent_activity.d.ts +3 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +93 -7
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +122 -27
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +15 -0
- package/dist/voice/agent_session.d.ts +15 -0
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +122 -27
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +69 -22
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +5 -0
- package/dist/voice/audio_recognition.d.ts +5 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +69 -22
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs +43 -3
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -3
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/report.cjs +3 -2
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +7 -1
- package/dist/voice/report.d.ts +7 -1
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +3 -2
- package/dist/voice/report.js.map +1 -1
- package/package.json +8 -2
- package/src/inference/api_protos.ts +2 -2
- package/src/inference/stt.ts +48 -33
- package/src/inference/tts.ts +4 -3
- package/src/ipc/job_proc_lazy_main.ts +12 -1
- package/src/job.ts +59 -10
- package/src/llm/llm.ts +48 -5
- package/src/log.ts +52 -15
- package/src/stt/stt.ts +18 -5
- package/src/telemetry/index.ts +22 -4
- package/src/telemetry/logging.ts +55 -0
- package/src/telemetry/otel_http_exporter.ts +191 -0
- package/src/telemetry/pino_otel_transport.ts +265 -0
- package/src/telemetry/traces.ts +320 -20
- package/src/tts/tts.ts +85 -24
- package/src/utils.ts +5 -0
- package/src/voice/agent_activity.ts +140 -22
- package/src/voice/agent_session.ts +174 -34
- package/src/voice/audio_recognition.ts +85 -26
- package/src/voice/generation.ts +59 -7
- package/src/voice/report.ts +10 -4
|
@@ -71,8 +71,8 @@ const ttsSessionClosedEventSchema = import_zod.z.object({
|
|
|
71
71
|
});
|
|
72
72
|
const ttsErrorEventSchema = import_zod.z.object({
|
|
73
73
|
type: import_zod.z.literal("error"),
|
|
74
|
-
message: import_zod.z.string(),
|
|
75
|
-
session_id: import_zod.z.string()
|
|
74
|
+
message: import_zod.z.string().optional(),
|
|
75
|
+
session_id: import_zod.z.string().optional()
|
|
76
76
|
});
|
|
77
77
|
const ttsClientEventSchema = import_zod.z.discriminatedUnion("type", [
|
|
78
78
|
ttsSessionCreateEventSchema,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/api_protos.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { z } from 'zod';\n\nexport const ttsSessionCreateEventSchema = z.object({\n type: z.literal('session.create'),\n sample_rate: z.string(),\n encoding: z.string(),\n model: z.string().optional(),\n voice: z.string().optional(),\n language: z.string().optional(),\n extra: z.record(z.string(), z.unknown()),\n transcript: z.string().optional(),\n});\n\nexport const ttsInputTranscriptEventSchema = z.object({\n type: z.literal('input_transcript'),\n transcript: z.string(),\n});\n\nexport const ttsSessionFlushEventSchema = z.object({\n type: z.literal('session.flush'),\n});\n\nexport const ttsSessionCloseEventSchema = z.object({\n type: z.literal('session.close'),\n});\n\nexport const ttsSessionCreatedEventSchema = z.object({\n type: z.literal('session.created'),\n session_id: z.string(),\n});\n\nexport const ttsOutputAudioEventSchema = z.object({\n type: z.literal('output_audio'),\n audio: z.string(),\n session_id: z.string(),\n});\n\nexport const ttsDoneEventSchema = z.object({\n type: z.literal('done'),\n session_id: z.string(),\n});\n\nexport const ttsSessionClosedEventSchema = z.object({\n type: z.literal('session.closed'),\n session_id: z.string(),\n});\n\nexport const ttsErrorEventSchema = z.object({\n type: z.literal('error'),\n message: z.string(),\n session_id: z.string(),\n});\n\nexport const ttsClientEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreateEventSchema,\n ttsInputTranscriptEventSchema,\n ttsSessionFlushEventSchema,\n ttsSessionCloseEventSchema,\n]);\n\nexport const ttsServerEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreatedEventSchema,\n ttsOutputAudioEventSchema,\n ttsDoneEventSchema,\n ttsSessionClosedEventSchema,\n ttsErrorEventSchema,\n]);\n\nexport type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;\nexport type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;\nexport type TtsSessionFlushEvent = z.infer<typeof ttsSessionFlushEventSchema>;\nexport type TtsSessionCloseEvent = z.infer<typeof ttsSessionCloseEventSchema>;\nexport type TtsSessionCreatedEvent = z.infer<typeof ttsSessionCreatedEventSchema>;\nexport type TtsOutputAudioEvent = z.infer<typeof ttsOutputAudioEventSchema>;\nexport type TtsDoneEvent = z.infer<typeof ttsDoneEventSchema>;\nexport type TtsSessionClosedEvent = z.infer<typeof ttsSessionClosedEventSchema>;\nexport type TtsErrorEvent = z.infer<typeof ttsErrorEventSchema>;\nexport type TtsClientEvent = z.infer<typeof ttsClientEventSchema>;\nexport type TtsServerEvent = z.infer<typeof ttsServerEventSchema>;\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,iBAAkB;AAEX,MAAM,8BAA8B,aAAE,OAAO;AAAA,EAClD,MAAM,aAAE,QAAQ,gBAAgB;AAAA,EAChC,aAAa,aAAE,OAAO;AAAA,EACtB,UAAU,aAAE,OAAO;AAAA,EACnB,OAAO,aAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,OAAO,aAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,UAAU,aAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,OAAO,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,QAAQ,CAAC;AAAA,EACvC,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,gCAAgC,aAAE,OAAO;AAAA,EACpD,MAAM,aAAE,QAAQ,kBAAkB;AAAA,EAClC,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,6BAA6B,aAAE,OAAO;AAAA,EACjD,MAAM,aAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,6BAA6B,aAAE,OAAO;AAAA,EACjD,MAAM,aAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,+BAA+B,aAAE,OAAO;AAAA,EACnD,MAAM,aAAE,QAAQ,iBAAiB;AAAA,EACjC,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,4BAA4B,aAAE,OAAO;AAAA,EAChD,MAAM,aAAE,QAAQ,cAAc;AAAA,EAC9B,OAAO,aAAE,OAAO;AAAA,EAChB,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,qBAAqB,aAAE,OAAO;AAAA,EACzC,MAAM,aAAE,QAAQ,MAAM;AAAA,EACtB,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,8BAA8B,aAAE,OAAO;AAAA,EAClD,MAAM,aAAE,QAAQ,gBAAgB;AAAA,EAChC,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,sBAAsB,aAAE,OAAO;AAAA,EAC1C,MAAM,aAAE,QAAQ,OAAO;AAAA,EACvB,SAAS,aAAE,OAAO;AAAA,
|
|
1
|
+
{"version":3,"sources":["../../src/inference/api_protos.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { z } from 'zod';\n\nexport const ttsSessionCreateEventSchema = z.object({\n type: z.literal('session.create'),\n sample_rate: z.string(),\n encoding: z.string(),\n model: z.string().optional(),\n voice: z.string().optional(),\n language: z.string().optional(),\n extra: z.record(z.string(), z.unknown()),\n transcript: z.string().optional(),\n});\n\nexport const ttsInputTranscriptEventSchema = z.object({\n type: z.literal('input_transcript'),\n transcript: z.string(),\n});\n\nexport const ttsSessionFlushEventSchema = z.object({\n type: z.literal('session.flush'),\n});\n\nexport const ttsSessionCloseEventSchema = z.object({\n type: z.literal('session.close'),\n});\n\nexport const ttsSessionCreatedEventSchema = z.object({\n type: z.literal('session.created'),\n session_id: z.string(),\n});\n\nexport const ttsOutputAudioEventSchema = z.object({\n type: z.literal('output_audio'),\n audio: z.string(),\n session_id: z.string(),\n});\n\nexport const ttsDoneEventSchema = z.object({\n type: z.literal('done'),\n session_id: z.string(),\n});\n\nexport const ttsSessionClosedEventSchema = z.object({\n type: z.literal('session.closed'),\n session_id: z.string(),\n});\n\nexport const ttsErrorEventSchema = z.object({\n type: z.literal('error'),\n message: z.string().optional(),\n session_id: z.string().optional(),\n});\n\nexport const ttsClientEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreateEventSchema,\n ttsInputTranscriptEventSchema,\n ttsSessionFlushEventSchema,\n ttsSessionCloseEventSchema,\n]);\n\nexport const ttsServerEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreatedEventSchema,\n ttsOutputAudioEventSchema,\n ttsDoneEventSchema,\n ttsSessionClosedEventSchema,\n ttsErrorEventSchema,\n]);\n\nexport type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;\nexport type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;\nexport type TtsSessionFlushEvent = z.infer<typeof ttsSessionFlushEventSchema>;\nexport type TtsSessionCloseEvent = z.infer<typeof ttsSessionCloseEventSchema>;\nexport type TtsSessionCreatedEvent = z.infer<typeof ttsSessionCreatedEventSchema>;\nexport type TtsOutputAudioEvent = z.infer<typeof ttsOutputAudioEventSchema>;\nexport type TtsDoneEvent = z.infer<typeof ttsDoneEventSchema>;\nexport type TtsSessionClosedEvent = z.infer<typeof ttsSessionClosedEventSchema>;\nexport type TtsErrorEvent = z.infer<typeof ttsErrorEventSchema>;\nexport type TtsClientEvent = z.infer<typeof ttsClientEventSchema>;\nexport type TtsServerEvent = z.infer<typeof ttsServerEventSchema>;\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,iBAAkB;AAEX,MAAM,8BAA8B,aAAE,OAAO;AAAA,EAClD,MAAM,aAAE,QAAQ,gBAAgB;AAAA,EAChC,aAAa,aAAE,OAAO;AAAA,EACtB,UAAU,aAAE,OAAO;AAAA,EACnB,OAAO,aAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,OAAO,aAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,UAAU,aAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,OAAO,aAAE,OAAO,aAAE,OAAO,GAAG,aAAE,QAAQ,CAAC;AAAA,EACvC,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,gCAAgC,aAAE,OAAO;AAAA,EACpD,MAAM,aAAE,QAAQ,kBAAkB;AAAA,EAClC,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,6BAA6B,aAAE,OAAO;AAAA,EACjD,MAAM,aAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,6BAA6B,aAAE,OAAO;AAAA,EACjD,MAAM,aAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,+BAA+B,aAAE,OAAO;AAAA,EACnD,MAAM,aAAE,QAAQ,iBAAiB;AAAA,EACjC,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,4BAA4B,aAAE,OAAO;AAAA,EAChD,MAAM,aAAE,QAAQ,cAAc;AAAA,EAC9B,OAAO,aAAE,OAAO;AAAA,EAChB,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,qBAAqB,aAAE,OAAO;AAAA,EACzC,MAAM,aAAE,QAAQ,MAAM;AAAA,EACtB,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,8BAA8B,aAAE,OAAO;AAAA,EAClD,MAAM,aAAE,QAAQ,gBAAgB;AAAA,EAChC,YAAY,aAAE,OAAO;AACvB,CAAC;AAEM,MAAM,sBAAsB,aAAE,OAAO;AAAA,EAC1C,MAAM,aAAE,QAAQ,OAAO;AAAA,EACvB,SAAS,aAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,aAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,uBAAuB,aAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,MAAM,uBAAuB,aAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;","names":[]}
|
|
@@ -13,18 +13,18 @@ export declare const ttsSessionCreateEventSchema: z.ZodObject<{
|
|
|
13
13
|
sample_rate: string;
|
|
14
14
|
encoding: string;
|
|
15
15
|
extra: Record<string, unknown>;
|
|
16
|
+
language?: string | undefined;
|
|
16
17
|
model?: string | undefined;
|
|
17
18
|
transcript?: string | undefined;
|
|
18
|
-
language?: string | undefined;
|
|
19
19
|
voice?: string | undefined;
|
|
20
20
|
}, {
|
|
21
21
|
type: "session.create";
|
|
22
22
|
sample_rate: string;
|
|
23
23
|
encoding: string;
|
|
24
24
|
extra: Record<string, unknown>;
|
|
25
|
+
language?: string | undefined;
|
|
25
26
|
model?: string | undefined;
|
|
26
27
|
transcript?: string | undefined;
|
|
27
|
-
language?: string | undefined;
|
|
28
28
|
voice?: string | undefined;
|
|
29
29
|
}>;
|
|
30
30
|
export declare const ttsInputTranscriptEventSchema: z.ZodObject<{
|
|
@@ -96,16 +96,16 @@ export declare const ttsSessionClosedEventSchema: z.ZodObject<{
|
|
|
96
96
|
}>;
|
|
97
97
|
export declare const ttsErrorEventSchema: z.ZodObject<{
|
|
98
98
|
type: z.ZodLiteral<"error">;
|
|
99
|
-
message: z.ZodString
|
|
100
|
-
session_id: z.ZodString
|
|
99
|
+
message: z.ZodOptional<z.ZodString>;
|
|
100
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
101
101
|
}, "strip", z.ZodTypeAny, {
|
|
102
|
-
message: string;
|
|
103
102
|
type: "error";
|
|
104
|
-
|
|
103
|
+
message?: string | undefined;
|
|
104
|
+
session_id?: string | undefined;
|
|
105
105
|
}, {
|
|
106
|
-
message: string;
|
|
107
106
|
type: "error";
|
|
108
|
-
|
|
107
|
+
message?: string | undefined;
|
|
108
|
+
session_id?: string | undefined;
|
|
109
109
|
}>;
|
|
110
110
|
export declare const ttsClientEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<{
|
|
111
111
|
type: z.ZodLiteral<"session.create">;
|
|
@@ -121,18 +121,18 @@ export declare const ttsClientEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
121
121
|
sample_rate: string;
|
|
122
122
|
encoding: string;
|
|
123
123
|
extra: Record<string, unknown>;
|
|
124
|
+
language?: string | undefined;
|
|
124
125
|
model?: string | undefined;
|
|
125
126
|
transcript?: string | undefined;
|
|
126
|
-
language?: string | undefined;
|
|
127
127
|
voice?: string | undefined;
|
|
128
128
|
}, {
|
|
129
129
|
type: "session.create";
|
|
130
130
|
sample_rate: string;
|
|
131
131
|
encoding: string;
|
|
132
132
|
extra: Record<string, unknown>;
|
|
133
|
+
language?: string | undefined;
|
|
133
134
|
model?: string | undefined;
|
|
134
135
|
transcript?: string | undefined;
|
|
135
|
-
language?: string | undefined;
|
|
136
136
|
voice?: string | undefined;
|
|
137
137
|
}>, z.ZodObject<{
|
|
138
138
|
type: z.ZodLiteral<"input_transcript">;
|
|
@@ -197,16 +197,16 @@ export declare const ttsServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
197
197
|
session_id: string;
|
|
198
198
|
}>, z.ZodObject<{
|
|
199
199
|
type: z.ZodLiteral<"error">;
|
|
200
|
-
message: z.ZodString
|
|
201
|
-
session_id: z.ZodString
|
|
200
|
+
message: z.ZodOptional<z.ZodString>;
|
|
201
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
202
202
|
}, "strip", z.ZodTypeAny, {
|
|
203
|
-
message: string;
|
|
204
203
|
type: "error";
|
|
205
|
-
|
|
204
|
+
message?: string | undefined;
|
|
205
|
+
session_id?: string | undefined;
|
|
206
206
|
}, {
|
|
207
|
-
message: string;
|
|
208
207
|
type: "error";
|
|
209
|
-
|
|
208
|
+
message?: string | undefined;
|
|
209
|
+
session_id?: string | undefined;
|
|
210
210
|
}>]>;
|
|
211
211
|
export type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;
|
|
212
212
|
export type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;
|
|
@@ -13,18 +13,18 @@ export declare const ttsSessionCreateEventSchema: z.ZodObject<{
|
|
|
13
13
|
sample_rate: string;
|
|
14
14
|
encoding: string;
|
|
15
15
|
extra: Record<string, unknown>;
|
|
16
|
+
language?: string | undefined;
|
|
16
17
|
model?: string | undefined;
|
|
17
18
|
transcript?: string | undefined;
|
|
18
|
-
language?: string | undefined;
|
|
19
19
|
voice?: string | undefined;
|
|
20
20
|
}, {
|
|
21
21
|
type: "session.create";
|
|
22
22
|
sample_rate: string;
|
|
23
23
|
encoding: string;
|
|
24
24
|
extra: Record<string, unknown>;
|
|
25
|
+
language?: string | undefined;
|
|
25
26
|
model?: string | undefined;
|
|
26
27
|
transcript?: string | undefined;
|
|
27
|
-
language?: string | undefined;
|
|
28
28
|
voice?: string | undefined;
|
|
29
29
|
}>;
|
|
30
30
|
export declare const ttsInputTranscriptEventSchema: z.ZodObject<{
|
|
@@ -96,16 +96,16 @@ export declare const ttsSessionClosedEventSchema: z.ZodObject<{
|
|
|
96
96
|
}>;
|
|
97
97
|
export declare const ttsErrorEventSchema: z.ZodObject<{
|
|
98
98
|
type: z.ZodLiteral<"error">;
|
|
99
|
-
message: z.ZodString
|
|
100
|
-
session_id: z.ZodString
|
|
99
|
+
message: z.ZodOptional<z.ZodString>;
|
|
100
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
101
101
|
}, "strip", z.ZodTypeAny, {
|
|
102
|
-
message: string;
|
|
103
102
|
type: "error";
|
|
104
|
-
|
|
103
|
+
message?: string | undefined;
|
|
104
|
+
session_id?: string | undefined;
|
|
105
105
|
}, {
|
|
106
|
-
message: string;
|
|
107
106
|
type: "error";
|
|
108
|
-
|
|
107
|
+
message?: string | undefined;
|
|
108
|
+
session_id?: string | undefined;
|
|
109
109
|
}>;
|
|
110
110
|
export declare const ttsClientEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<{
|
|
111
111
|
type: z.ZodLiteral<"session.create">;
|
|
@@ -121,18 +121,18 @@ export declare const ttsClientEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
121
121
|
sample_rate: string;
|
|
122
122
|
encoding: string;
|
|
123
123
|
extra: Record<string, unknown>;
|
|
124
|
+
language?: string | undefined;
|
|
124
125
|
model?: string | undefined;
|
|
125
126
|
transcript?: string | undefined;
|
|
126
|
-
language?: string | undefined;
|
|
127
127
|
voice?: string | undefined;
|
|
128
128
|
}, {
|
|
129
129
|
type: "session.create";
|
|
130
130
|
sample_rate: string;
|
|
131
131
|
encoding: string;
|
|
132
132
|
extra: Record<string, unknown>;
|
|
133
|
+
language?: string | undefined;
|
|
133
134
|
model?: string | undefined;
|
|
134
135
|
transcript?: string | undefined;
|
|
135
|
-
language?: string | undefined;
|
|
136
136
|
voice?: string | undefined;
|
|
137
137
|
}>, z.ZodObject<{
|
|
138
138
|
type: z.ZodLiteral<"input_transcript">;
|
|
@@ -197,16 +197,16 @@ export declare const ttsServerEventSchema: z.ZodDiscriminatedUnion<"type", [z.Zo
|
|
|
197
197
|
session_id: string;
|
|
198
198
|
}>, z.ZodObject<{
|
|
199
199
|
type: z.ZodLiteral<"error">;
|
|
200
|
-
message: z.ZodString
|
|
201
|
-
session_id: z.ZodString
|
|
200
|
+
message: z.ZodOptional<z.ZodString>;
|
|
201
|
+
session_id: z.ZodOptional<z.ZodString>;
|
|
202
202
|
}, "strip", z.ZodTypeAny, {
|
|
203
|
-
message: string;
|
|
204
203
|
type: "error";
|
|
205
|
-
|
|
204
|
+
message?: string | undefined;
|
|
205
|
+
session_id?: string | undefined;
|
|
206
206
|
}, {
|
|
207
|
-
message: string;
|
|
208
207
|
type: "error";
|
|
209
|
-
|
|
208
|
+
message?: string | undefined;
|
|
209
|
+
session_id?: string | undefined;
|
|
210
210
|
}>]>;
|
|
211
211
|
export type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;
|
|
212
212
|
export type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;
|
|
@@ -38,8 +38,8 @@ const ttsSessionClosedEventSchema = z.object({
|
|
|
38
38
|
});
|
|
39
39
|
const ttsErrorEventSchema = z.object({
|
|
40
40
|
type: z.literal("error"),
|
|
41
|
-
message: z.string(),
|
|
42
|
-
session_id: z.string()
|
|
41
|
+
message: z.string().optional(),
|
|
42
|
+
session_id: z.string().optional()
|
|
43
43
|
});
|
|
44
44
|
const ttsClientEventSchema = z.discriminatedUnion("type", [
|
|
45
45
|
ttsSessionCreateEventSchema,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/api_protos.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { z } from 'zod';\n\nexport const ttsSessionCreateEventSchema = z.object({\n type: z.literal('session.create'),\n sample_rate: z.string(),\n encoding: z.string(),\n model: z.string().optional(),\n voice: z.string().optional(),\n language: z.string().optional(),\n extra: z.record(z.string(), z.unknown()),\n transcript: z.string().optional(),\n});\n\nexport const ttsInputTranscriptEventSchema = z.object({\n type: z.literal('input_transcript'),\n transcript: z.string(),\n});\n\nexport const ttsSessionFlushEventSchema = z.object({\n type: z.literal('session.flush'),\n});\n\nexport const ttsSessionCloseEventSchema = z.object({\n type: z.literal('session.close'),\n});\n\nexport const ttsSessionCreatedEventSchema = z.object({\n type: z.literal('session.created'),\n session_id: z.string(),\n});\n\nexport const ttsOutputAudioEventSchema = z.object({\n type: z.literal('output_audio'),\n audio: z.string(),\n session_id: z.string(),\n});\n\nexport const ttsDoneEventSchema = z.object({\n type: z.literal('done'),\n session_id: z.string(),\n});\n\nexport const ttsSessionClosedEventSchema = z.object({\n type: z.literal('session.closed'),\n session_id: z.string(),\n});\n\nexport const ttsErrorEventSchema = z.object({\n type: z.literal('error'),\n message: z.string(),\n session_id: z.string(),\n});\n\nexport const ttsClientEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreateEventSchema,\n ttsInputTranscriptEventSchema,\n ttsSessionFlushEventSchema,\n ttsSessionCloseEventSchema,\n]);\n\nexport const ttsServerEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreatedEventSchema,\n ttsOutputAudioEventSchema,\n ttsDoneEventSchema,\n ttsSessionClosedEventSchema,\n ttsErrorEventSchema,\n]);\n\nexport type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;\nexport type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;\nexport type TtsSessionFlushEvent = z.infer<typeof ttsSessionFlushEventSchema>;\nexport type TtsSessionCloseEvent = z.infer<typeof ttsSessionCloseEventSchema>;\nexport type TtsSessionCreatedEvent = z.infer<typeof ttsSessionCreatedEventSchema>;\nexport type TtsOutputAudioEvent = z.infer<typeof ttsOutputAudioEventSchema>;\nexport type TtsDoneEvent = z.infer<typeof ttsDoneEventSchema>;\nexport type TtsSessionClosedEvent = z.infer<typeof ttsSessionClosedEventSchema>;\nexport type TtsErrorEvent = z.infer<typeof ttsErrorEventSchema>;\nexport type TtsClientEvent = z.infer<typeof ttsClientEventSchema>;\nexport type TtsServerEvent = z.infer<typeof ttsServerEventSchema>;\n"],"mappings":"AAGA,SAAS,SAAS;AAEX,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,aAAa,EAAE,OAAO;AAAA,EACtB,UAAU,EAAE,OAAO;AAAA,EACnB,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,OAAO,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC;AAAA,EACvC,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,MAAM,EAAE,QAAQ,kBAAkB;AAAA,EAClC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,+BAA+B,EAAE,OAAO;AAAA,EACnD,MAAM,EAAE,QAAQ,iBAAiB;AAAA,EACjC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EAAE,QAAQ,cAAc;AAAA,EAC9B,OAAO,EAAE,OAAO;AAAA,EAChB,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,OAAO;AAAA,EACvB,SAAS,EAAE,OAAO;AAAA,
|
|
1
|
+
{"version":3,"sources":["../../src/inference/api_protos.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { z } from 'zod';\n\nexport const ttsSessionCreateEventSchema = z.object({\n type: z.literal('session.create'),\n sample_rate: z.string(),\n encoding: z.string(),\n model: z.string().optional(),\n voice: z.string().optional(),\n language: z.string().optional(),\n extra: z.record(z.string(), z.unknown()),\n transcript: z.string().optional(),\n});\n\nexport const ttsInputTranscriptEventSchema = z.object({\n type: z.literal('input_transcript'),\n transcript: z.string(),\n});\n\nexport const ttsSessionFlushEventSchema = z.object({\n type: z.literal('session.flush'),\n});\n\nexport const ttsSessionCloseEventSchema = z.object({\n type: z.literal('session.close'),\n});\n\nexport const ttsSessionCreatedEventSchema = z.object({\n type: z.literal('session.created'),\n session_id: z.string(),\n});\n\nexport const ttsOutputAudioEventSchema = z.object({\n type: z.literal('output_audio'),\n audio: z.string(),\n session_id: z.string(),\n});\n\nexport const ttsDoneEventSchema = z.object({\n type: z.literal('done'),\n session_id: z.string(),\n});\n\nexport const ttsSessionClosedEventSchema = z.object({\n type: z.literal('session.closed'),\n session_id: z.string(),\n});\n\nexport const ttsErrorEventSchema = z.object({\n type: z.literal('error'),\n message: z.string().optional(),\n session_id: z.string().optional(),\n});\n\nexport const ttsClientEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreateEventSchema,\n ttsInputTranscriptEventSchema,\n ttsSessionFlushEventSchema,\n ttsSessionCloseEventSchema,\n]);\n\nexport const ttsServerEventSchema = z.discriminatedUnion('type', [\n ttsSessionCreatedEventSchema,\n ttsOutputAudioEventSchema,\n ttsDoneEventSchema,\n ttsSessionClosedEventSchema,\n ttsErrorEventSchema,\n]);\n\nexport type TtsSessionCreateEvent = z.infer<typeof ttsSessionCreateEventSchema>;\nexport type TtsInputTranscriptEvent = z.infer<typeof ttsInputTranscriptEventSchema>;\nexport type TtsSessionFlushEvent = z.infer<typeof ttsSessionFlushEventSchema>;\nexport type TtsSessionCloseEvent = z.infer<typeof ttsSessionCloseEventSchema>;\nexport type TtsSessionCreatedEvent = z.infer<typeof ttsSessionCreatedEventSchema>;\nexport type TtsOutputAudioEvent = z.infer<typeof ttsOutputAudioEventSchema>;\nexport type TtsDoneEvent = z.infer<typeof ttsDoneEventSchema>;\nexport type TtsSessionClosedEvent = z.infer<typeof ttsSessionClosedEventSchema>;\nexport type TtsErrorEvent = z.infer<typeof ttsErrorEventSchema>;\nexport type TtsClientEvent = z.infer<typeof ttsClientEventSchema>;\nexport type TtsServerEvent = z.infer<typeof ttsServerEventSchema>;\n"],"mappings":"AAGA,SAAS,SAAS;AAEX,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,aAAa,EAAE,OAAO;AAAA,EACtB,UAAU,EAAE,OAAO;AAAA,EACnB,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,OAAO,EAAE,OAAO,EAAE,SAAS;AAAA,EAC3B,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,OAAO,EAAE,OAAO,EAAE,OAAO,GAAG,EAAE,QAAQ,CAAC;AAAA,EACvC,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,gCAAgC,EAAE,OAAO;AAAA,EACpD,MAAM,EAAE,QAAQ,kBAAkB;AAAA,EAClC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,6BAA6B,EAAE,OAAO;AAAA,EACjD,MAAM,EAAE,QAAQ,eAAe;AACjC,CAAC;AAEM,MAAM,+BAA+B,EAAE,OAAO;AAAA,EACnD,MAAM,EAAE,QAAQ,iBAAiB;AAAA,EACjC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,MAAM,EAAE,QAAQ,cAAc;AAAA,EAC9B,OAAO,EAAE,OAAO;AAAA,EAChB,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,MAAM,EAAE,QAAQ,MAAM;AAAA,EACtB,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,8BAA8B,EAAE,OAAO;AAAA,EAClD,MAAM,EAAE,QAAQ,gBAAgB;AAAA,EAChC,YAAY,EAAE,OAAO;AACvB,CAAC;AAEM,MAAM,sBAAsB,EAAE,OAAO;AAAA,EAC1C,MAAM,EAAE,QAAQ,OAAO;AAAA,EACvB,SAAS,EAAE,OAAO,EAAE,SAAS;AAAA,EAC7B,YAAY,EAAE,OAAO,EAAE,SAAS;AAClC,CAAC;AAEM,MAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAEM,MAAM,uBAAuB,EAAE,mBAAmB,QAAQ;AAAA,EAC/D;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;","names":[]}
|
package/dist/inference/stt.cjs
CHANGED
|
@@ -309,45 +309,57 @@ class SpeechStream extends import_stt.SpeechStream {
|
|
|
309
309
|
}
|
|
310
310
|
}
|
|
311
311
|
processTranscript(data, isFinal) {
|
|
312
|
+
if (this.queue.closed) return;
|
|
312
313
|
const requestId = data.request_id ?? this.requestId;
|
|
313
314
|
const text = data.transcript ?? "";
|
|
314
315
|
const language = data.language ?? this.opts.language ?? "en";
|
|
315
316
|
if (!text && !isFinal) return;
|
|
316
|
-
|
|
317
|
-
this.speaking
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
if (
|
|
317
|
+
try {
|
|
318
|
+
if (!this.speaking) {
|
|
319
|
+
this.speaking = true;
|
|
320
|
+
this.queue.put({ type: import_stt.SpeechEventType.START_OF_SPEECH });
|
|
321
|
+
}
|
|
322
|
+
const speechData = {
|
|
323
|
+
language,
|
|
324
|
+
startTime: data.start ?? 0,
|
|
325
|
+
endTime: data.duration ?? 0,
|
|
326
|
+
confidence: data.confidence ?? 1,
|
|
327
|
+
text
|
|
328
|
+
};
|
|
329
|
+
if (isFinal) {
|
|
330
|
+
if (this.speechDuration > 0) {
|
|
331
|
+
this.queue.put({
|
|
332
|
+
type: import_stt.SpeechEventType.RECOGNITION_USAGE,
|
|
333
|
+
requestId,
|
|
334
|
+
recognitionUsage: { audioDuration: this.speechDuration }
|
|
335
|
+
});
|
|
336
|
+
this.speechDuration = 0;
|
|
337
|
+
}
|
|
338
|
+
this.queue.put({
|
|
339
|
+
type: import_stt.SpeechEventType.FINAL_TRANSCRIPT,
|
|
340
|
+
requestId,
|
|
341
|
+
alternatives: [speechData]
|
|
342
|
+
});
|
|
343
|
+
if (this.speaking) {
|
|
344
|
+
this.speaking = false;
|
|
345
|
+
this.queue.put({ type: import_stt.SpeechEventType.END_OF_SPEECH });
|
|
346
|
+
}
|
|
347
|
+
} else {
|
|
329
348
|
this.queue.put({
|
|
330
|
-
type: import_stt.SpeechEventType.
|
|
349
|
+
type: import_stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
331
350
|
requestId,
|
|
332
|
-
|
|
351
|
+
alternatives: [speechData]
|
|
333
352
|
});
|
|
334
|
-
this.speechDuration = 0;
|
|
335
353
|
}
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
this.
|
|
354
|
+
} catch (e) {
|
|
355
|
+
if (e instanceof Error && e.message.includes("Queue is closed")) {
|
|
356
|
+
this.#logger.warn(
|
|
357
|
+
{ err: e },
|
|
358
|
+
"Queue closed during transcript processing (expected during disconnect)"
|
|
359
|
+
);
|
|
360
|
+
} else {
|
|
361
|
+
this.#logger.error({ err: e }, "Error putting transcript to queue");
|
|
344
362
|
}
|
|
345
|
-
} else {
|
|
346
|
-
this.queue.put({
|
|
347
|
-
type: import_stt.SpeechEventType.INTERIM_TRANSCRIPT,
|
|
348
|
-
requestId,
|
|
349
|
-
alternatives: [speechData]
|
|
350
|
-
});
|
|
351
363
|
}
|
|
352
364
|
}
|
|
353
365
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/inference/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame } from '@livekit/rtc-node';\nimport type { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport {\n STT as BaseSTT,\n SpeechStream as BaseSpeechStream,\n type SpeechData,\n type SpeechEvent,\n SpeechEventType,\n} from '../stt/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { type AudioBuffer, Event, Task, cancelAndWait, shortuuid, waitForAbort } from '../utils.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type DeepgramModels =\n | 'deepgram'\n | 'deepgram/nova-3'\n | 'deepgram/nova-3-general'\n | 'deepgram/nova-3-medical'\n | 'deepgram/nova-2-conversationalai'\n | 'deepgram/nova-2'\n | 'deepgram/nova-2-general'\n | 'deepgram/nova-2-medical'\n | 'deepgram/nova-2-phonecall';\n\nexport type CartesiaModels = 'cartesia' | 'cartesia/ink-whisper';\n\nexport type AssemblyaiModels = 'assemblyai' | 'assemblyai/universal-streaming';\n\nexport interface CartesiaOptions {\n min_volume?: number; // default: not specified\n max_silence_duration_secs?: number; // default: not specified\n}\n\nexport interface DeepgramOptions {\n filler_words?: boolean; // default: true\n interim_results?: boolean; // default: true\n endpointing?: number; // default: 25 (ms)\n punctuate?: boolean; // default: false\n smart_format?: boolean;\n keywords?: Array<[string, number]>;\n keyterms?: string[];\n profanity_filter?: boolean;\n numerals?: boolean;\n mip_opt_out?: boolean;\n}\n\nexport interface AssemblyAIOptions {\n format_turns?: boolean; // default: false\n end_of_turn_confidence_threshold?: number; // default: 0.01\n min_end_of_turn_silence_when_confident?: number; // default: 0\n max_turn_silence?: number; // default: not specified\n keyterms_prompt?: string[]; // default: not specified\n}\n\nexport type STTLanguages =\n | 'multi'\n | 'en'\n | 'de'\n | 'es'\n | 'fr'\n | 'ja'\n | 'pt'\n | 'zh'\n | 'hi'\n | AnyString;\n\ntype _STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels;\n\nexport type STTModels = _STTModels | 'auto' | AnyString;\n\nexport type ModelWithLanguage = `${_STTModels}:${STTLanguages}` | STTModels;\n\nexport type STTOptions<TModel extends STTModels> = TModel extends DeepgramModels\n ? DeepgramOptions\n : TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends AssemblyaiModels\n ? AssemblyAIOptions\n : Record<string, unknown>;\n\nexport type STTEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';\nconst DEFAULT_CANCEL_TIMEOUT = 5000;\n\nexport interface InferenceSTTOptions<TModel extends STTModels> {\n model?: TModel;\n language?: STTLanguages;\n encoding: STTEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: STTOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference STT\n */\nexport class STT<TModel extends STTModels> extends BaseSTT {\n private opts: InferenceSTTOptions<TModel>;\n private streams: Set<SpeechStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts?: {\n model?: TModel;\n language?: STTLanguages;\n baseURL?: string;\n encoding?: STTEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: STTOptions<TModel>;\n }) {\n super({ streaming: true, interimResults: true });\n\n const {\n model,\n language,\n baseURL,\n encoding = DEFAULT_ENCODING,\n sampleRate = DEFAULT_SAMPLE_RATE,\n apiKey,\n apiSecret,\n modelOptions = {} as STTOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n this.opts = {\n model,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label(): string {\n return 'inference.STT';\n }\n\n static fromModelString(modelString: string): STT<AnyString> {\n if (modelString.includes(':')) {\n const [model, language] = modelString.split(':') as [AnyString, STTLanguages];\n return new STT({ model, language });\n }\n return new STT({ model: modelString });\n }\n\n protected async _recognize(_: AudioBuffer): Promise<SpeechEvent> {\n throw new Error('LiveKit STT does not support batch recognition, use stream() instead');\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n stream(options?: {\n language?: STTLanguages | string;\n connOptions?: APIConnectOptions;\n }): SpeechStream<TModel> {\n const { language, connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const streamOpts = {\n ...this.opts,\n language: language ?? this.opts.language,\n } as InferenceSTTOptions<TModel>;\n\n const stream = new SpeechStream(this, streamOpts, connOptions);\n this.streams.add(stream);\n\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n const params = {\n settings: {\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n },\n } as Record<string, unknown>;\n\n if (this.opts.model && this.opts.model !== 'auto') {\n params.model = this.opts.model;\n }\n\n if (this.opts.language) {\n (params.settings as Record<string, unknown>).language = this.opts.language;\n }\n\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/stt`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const socket = await connectWs(url, headers, timeout);\n const msg = { ...params, type: 'session.create' };\n socket.send(JSON.stringify(msg));\n\n return socket;\n }\n}\n\nexport class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {\n private opts: InferenceSTTOptions<TModel>;\n private requestId = shortuuid('stt_request_');\n private speaking = false;\n private speechDuration = 0;\n private reconnectEvent = new Event();\n private stt: STT<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(\n sttImpl: STT<TModel>,\n opts: InferenceSTTOptions<TModel>,\n connOptions: APIConnectOptions,\n ) {\n super(sttImpl, opts.sampleRate, connOptions);\n this.opts = opts;\n this.stt = sttImpl;\n this.connOptions = connOptions;\n }\n\n get label(): string {\n return 'inference.SpeechStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n this.reconnectEvent.set();\n }\n\n protected async run(): Promise<void> {\n while (true) {\n // Create fresh resources for each connection attempt\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n\n type SttServerEvent = Record<string, any>;\n const eventChannel = createStreamChannel<SttServerEvent>();\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const createWsListener = async (ws: WebSocket, signal: AbortSignal) => {\n return new Promise<void>((resolve, reject) => {\n const onAbort = () => {\n resourceCleanup();\n reject(new Error('WebSocket connection aborted'));\n };\n\n signal.addEventListener('abort', onAbort, { once: true });\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString()) as SttServerEvent;\n eventChannel.write(json);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', (code: number) => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'LiveKit STT connection closed unexpectedly',\n options: { statusCode: code },\n }),\n );\n });\n });\n };\n\n const send = async (socket: WebSocket, signal: AbortSignal) => {\n const audioStream = new AudioByteStream(\n this.opts.sampleRate,\n 1,\n Math.floor(this.opts.sampleRate / 20), // 50ms\n );\n\n // Create abort promise once to avoid memory leak\n const abortPromise = new Promise<never>((_, reject) => {\n if (signal.aborted) {\n return reject(new Error('Send aborted'));\n }\n const onAbort = () => reject(new Error('Send aborted'));\n signal.addEventListener('abort', onAbort, { once: true });\n });\n\n // Manual iteration to support cancellation\n const iterator = this.input[Symbol.asyncIterator]();\n try {\n while (true) {\n const result = await Promise.race([iterator.next(), abortPromise]);\n\n if (result.done) break;\n const ev = result.value;\n\n let frames: AudioFrame[];\n if (ev === SpeechStream.FLUSH_SENTINEL) {\n frames = audioStream.flush();\n } else {\n const frame = ev as AudioFrame;\n frames = audioStream.write(new Int16Array(frame.data).buffer);\n }\n\n for (const frame of frames) {\n this.speechDuration += frame.samplesPerChannel / frame.sampleRate;\n const base64 = Buffer.from(frame.data.buffer).toString('base64');\n const msg = { type: 'input_audio', audio: base64 };\n socket.send(JSON.stringify(msg));\n }\n }\n\n closing = true;\n socket.send(JSON.stringify({ type: 'session.finalize' }));\n } catch (e) {\n if ((e as Error).message === 'Send aborted') {\n // Expected abort, don't log\n return;\n }\n throw e;\n }\n };\n\n const recv = async (signal: AbortSignal) => {\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !signal.aborted) {\n const result = await reader.read();\n if (signal.aborted) return;\n if (result.done) return;\n\n const json = result.value;\n const type = json.type as string | undefined;\n\n switch (type) {\n case 'session.created':\n case 'session.finalized':\n break;\n case 'session.closed':\n finalReceived = true;\n resourceCleanup();\n break;\n case 'interim_transcript':\n this.processTranscript(json, false);\n break;\n case 'final_transcript':\n this.processTranscript(json, true);\n break;\n case 'error':\n this.#logger.error({ error: json }, 'Received error from LiveKit STT');\n resourceCleanup();\n throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);\n default:\n this.#logger.warn(\n { message: json },\n 'Received unexpected message from LiveKit STT',\n );\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.stt.connectWs(this.connOptions.timeoutMs);\n\n // Wrap tasks for proper cancellation support using Task signals\n const controller = new AbortController();\n const sendTask = Task.from(({ signal }) => send(ws!, signal), controller);\n const wsListenerTask = Task.from(({ signal }) => createWsListener(ws!, signal), controller);\n const recvTask = Task.from(({ signal }) => recv(signal), controller);\n const waitReconnectTask = Task.from(\n ({ signal }) => Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]),\n controller,\n );\n\n try {\n await Promise.race([\n Promise.all([sendTask.result, wsListenerTask.result, recvTask.result]),\n waitReconnectTask.result,\n ]);\n\n // If reconnect didn't trigger, tasks finished - exit loop\n if (!waitReconnectTask.done) break;\n\n // Reconnect triggered - clear event and continue loop\n this.reconnectEvent.clear();\n } finally {\n // Cancel all tasks to ensure cleanup\n await cancelAndWait(\n [sendTask, wsListenerTask, recvTask, waitReconnectTask],\n DEFAULT_CANCEL_TIMEOUT,\n );\n resourceCleanup();\n }\n } finally {\n // Ensure cleanup even if connectWs throws\n resourceCleanup();\n }\n }\n }\n\n private processTranscript(data: Record<string, any>, isFinal: boolean) {\n const requestId = data.request_id ?? this.requestId;\n const text = data.transcript ?? '';\n const language = data.language ?? this.opts.language ?? 'en';\n\n if (!text && !isFinal) return;\n\n // We'll have a more accurate way of detecting when speech started when we have VAD\n if (!this.speaking) {\n this.speaking = true;\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n }\n\n const speechData: SpeechData = {\n language,\n startTime: data.start ?? 0,\n endTime: data.duration ?? 0,\n confidence: data.confidence ?? 1.0,\n text,\n };\n\n if (isFinal) {\n if (this.speechDuration > 0) {\n this.queue.put({\n type: SpeechEventType.RECOGNITION_USAGE,\n requestId,\n recognitionUsage: { audioDuration: this.speechDuration },\n });\n this.speechDuration = 0;\n }\n\n this.queue.put({\n type: SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n\n if (this.speaking) {\n this.speaking = false;\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n }\n } else {\n this.queue.put({\n type: SpeechEventType.INTERIM_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgC;AAEhC,wBAAyC;AACzC,mBAAgC;AAChC,iBAAoB;AACpB,4BAAoC;AACpC,iBAMO;AACP,mBAAoE;AACpE,mBAAsF;AACtF,IAAAA,gBAA6D;AAuE7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,yBAAyB;AAgBxB,MAAM,YAAsC,WAAAC,IAAQ;AAAA,EACjD;AAAA,EACA,UAAqC,oBAAI,IAAI;AAAA,EAErD,cAAU,gBAAI;AAAA,EAEd,YAAY,MAST;AACD,UAAM,EAAE,WAAW,MAAM,gBAAgB,KAAK,CAAC;AAE/C,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,aAAa;AAAA,MACb;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAEA,SAAK,OAAO;AAAA,MACV;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,QAAQ,IAAI,YAAY,MAAM,GAAG;AAC/C,aAAO,IAAI,IAAI,EAAE,OAAO,SAAS,CAAC;AAAA,IACpC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,MAAgB,WAAW,GAAsC;AAC/D,UAAM,IAAI,MAAM,sEAAsE;AAAA,EACxF;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAEpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,OAAO,SAGkB;AACvB,UAAM,EAAE,UAAU,cAAc,yCAA4B,IAAI,WAAW,CAAC;AAC5E,UAAM,aAAa;AAAA,MACjB,GAAG,KAAK;AAAA,MACR,UAAU,YAAY,KAAK,KAAK;AAAA,IAClC;AAEA,UAAM,SAAS,IAAI,aAAa,MAAM,YAAY,WAAW;AAC7D,SAAK,QAAQ,IAAI,MAAM;AAEvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,UAAM,SAAS;AAAA,MACb,UAAU;AAAA,QACR,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,QACxC,UAAU,KAAK,KAAK;AAAA,QACpB,OAAO,KAAK,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,QAAQ;AACjD,aAAO,QAAQ,KAAK,KAAK;AAAA,IAC3B;AAEA,QAAI,KAAK,KAAK,UAAU;AACtB,MAAC,OAAO,SAAqC,WAAW,KAAK,KAAK;AAAA,IACpE;AAEA,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,UAAM,iCAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS,UAAM,yBAAU,KAAK,SAAS,OAAO;AACpD,UAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,iBAAiB;AAChD,WAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAE/B,WAAO;AAAA,EACT;AACF;AAEO,MAAM,qBAA+C,WAAAC,aAAiB;AAAA,EACnE;AAAA,EACA,gBAAY,wBAAU,cAAc;AAAA,EACpC,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,iBAAiB,IAAI,mBAAM;AAAA,EAC3B;AAAA,EACA;AAAA,EAER,cAAU,gBAAI;AAAA,EAEd,YACE,SACA,MACA,aACA;AACA,UAAM,SAAS,KAAK,YAAY,WAAW;AAC3C,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,SAAK,eAAe,IAAI;AAAA,EAC1B;AAAA,EAEA,MAAgB,MAAqB;AACnC,WAAO,MAAM;AAEX,UAAI,KAAuB;AAC3B,UAAI,UAAU;AACd,UAAI,gBAAgB;AAGpB,YAAM,mBAAe,2CAAoC;AAEzD,YAAM,kBAAkB,MAAM;AAC5B,YAAI,QAAS;AACb,kBAAU;AACV,qBAAa,MAAM;AACnB,iCAAI;AACJ,iCAAI;AAAA,MACN;AAEA,YAAM,mBAAmB,OAAOC,KAAe,WAAwB;AACrE,eAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,gBAAM,UAAU,MAAM;AACpB,4BAAgB;AAChB,mBAAO,IAAI,MAAM,8BAA8B,CAAC;AAAA,UAClD;AAEA,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAExD,UAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,kBAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,yBAAa,MAAM,IAAI;AAAA,UACzB,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,iBAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,4BAAgB;AAChB,mBAAO,CAAC;AAAA,UACV,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,SAAiB;AAC/B,4BAAgB;AAEhB,gBAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,gBAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,cACE,IAAI,iCAAe;AAAA,gBACjB,SAAS;AAAA,gBACT,SAAS,EAAE,YAAY,KAAK;AAAA,cAC9B,CAAC;AAAA,YACH;AAAA,UACF,CAAC;AAAA,QACH,CAAC;AAAA,MACH;AAEA,YAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,cAAM,cAAc,IAAI;AAAA,UACtB,KAAK,KAAK;AAAA,UACV;AAAA,UACA,KAAK,MAAM,KAAK,KAAK,aAAa,EAAE;AAAA;AAAA,QACtC;AAGA,cAAM,eAAe,IAAI,QAAe,CAAC,GAAG,WAAW;AACrD,cAAI,OAAO,SAAS;AAClB,mBAAO,OAAO,IAAI,MAAM,cAAc,CAAC;AAAA,UACzC;AACA,gBAAM,UAAU,MAAM,OAAO,IAAI,MAAM,cAAc,CAAC;AACtD,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAAA,QAC1D,CAAC;AAGD,cAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAClD,YAAI;AACF,iBAAO,MAAM;AACX,kBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,SAAS,KAAK,GAAG,YAAY,CAAC;AAEjE,gBAAI,OAAO,KAAM;AACjB,kBAAM,KAAK,OAAO;AAElB,gBAAI;AACJ,gBAAI,OAAO,aAAa,gBAAgB;AACtC,uBAAS,YAAY,MAAM;AAAA,YAC7B,OAAO;AACL,oBAAM,QAAQ;AACd,uBAAS,YAAY,MAAM,IAAI,WAAW,MAAM,IAAI,EAAE,MAAM;AAAA,YAC9D;AAEA,uBAAW,SAAS,QAAQ;AAC1B,mBAAK,kBAAkB,MAAM,oBAAoB,MAAM;AACvD,oBAAM,SAAS,OAAO,KAAK,MAAM,KAAK,MAAM,EAAE,SAAS,QAAQ;AAC/D,oBAAM,MAAM,EAAE,MAAM,eAAe,OAAO,OAAO;AACjD,qBAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,YACjC;AAAA,UACF;AAEA,oBAAU;AACV,iBAAO,KAAK,KAAK,UAAU,EAAE,MAAM,mBAAmB,CAAC,CAAC;AAAA,QAC1D,SAAS,GAAG;AACV,cAAK,EAAY,YAAY,gBAAgB;AAE3C;AAAA,UACF;AACA,gBAAM;AAAA,QACR;AAAA,MACF;AAEA,YAAM,OAAO,OAAO,WAAwB;AAC1C,cAAM,oBAAoB,aAAa,OAAO;AAC9C,cAAM,SAAS,kBAAkB,UAAU;AAE3C,YAAI;AACF,iBAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,kBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,gBAAI,OAAO,QAAS;AACpB,gBAAI,OAAO,KAAM;AAEjB,kBAAM,OAAO,OAAO;AACpB,kBAAM,OAAO,KAAK;AAElB,oBAAQ,MAAM;AAAA,cACZ,KAAK;AAAA,cACL,KAAK;AACH;AAAA,cACF,KAAK;AACH,gCAAgB;AAChB,gCAAgB;AAChB;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,MAAM,KAAK;AAClC;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,MAAM,IAAI;AACjC;AAAA,cACF,KAAK;AACH,qBAAK,QAAQ,MAAM,EAAE,OAAO,KAAK,GAAG,iCAAiC;AACrE,gCAAgB;AAChB,sBAAM,IAAI,2BAAS,+BAA+B,KAAK,UAAU,IAAI,CAAC,EAAE;AAAA,cAC1E;AACE,qBAAK,QAAQ;AAAA,kBACX,EAAE,SAAS,KAAK;AAAA,kBAChB;AAAA,gBACF;AACA;AAAA,YACJ;AAAA,UACF;AAAA,QACF,UAAE;AACA,iBAAO,YAAY;AACnB,cAAI;AACF,kBAAM,kBAAkB,OAAO;AAAA,UACjC,SAAS,GAAG;AACV,iBAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,UACxF;AAAA,QACF;AAAA,MACF;AAEA,UAAI;AACF,aAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAGxD,cAAM,aAAa,IAAI,gBAAgB;AACvC,cAAM,WAAW,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,IAAK,MAAM,GAAG,UAAU;AACxE,cAAM,iBAAiB,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,iBAAiB,IAAK,MAAM,GAAG,UAAU;AAC1F,cAAM,WAAW,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,MAAM,GAAG,UAAU;AACnE,cAAM,oBAAoB,kBAAK;AAAA,UAC7B,CAAC,EAAE,OAAO,MAAM,QAAQ,KAAK,CAAC,KAAK,eAAe,KAAK,OAAG,2BAAa,MAAM,CAAC,CAAC;AAAA,UAC/E;AAAA,QACF;AAEA,YAAI;AACF,gBAAM,QAAQ,KAAK;AAAA,YACjB,QAAQ,IAAI,CAAC,SAAS,QAAQ,eAAe,QAAQ,SAAS,MAAM,CAAC;AAAA,YACrE,kBAAkB;AAAA,UACpB,CAAC;AAGD,cAAI,CAAC,kBAAkB,KAAM;AAG7B,eAAK,eAAe,MAAM;AAAA,QAC5B,UAAE;AAEA,oBAAM;AAAA,YACJ,CAAC,UAAU,gBAAgB,UAAU,iBAAiB;AAAA,YACtD;AAAA,UACF;AACA,0BAAgB;AAAA,QAClB;AAAA,MACF,UAAE;AAEA,wBAAgB;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,kBAAkB,MAA2B,SAAkB;AACrE,UAAM,YAAY,KAAK,cAAc,KAAK;AAC1C,UAAM,OAAO,KAAK,cAAc;AAChC,UAAM,WAAW,KAAK,YAAY,KAAK,KAAK,YAAY;AAExD,QAAI,CAAC,QAAQ,CAAC,QAAS;AAGvB,QAAI,CAAC,KAAK,UAAU;AAClB,WAAK,WAAW;AAChB,WAAK,MAAM,IAAI,EAAE,MAAM,2BAAgB,gBAAgB,CAAC;AAAA,IAC1D;AAEA,UAAM,aAAyB;AAAA,MAC7B;AAAA,MACA,WAAW,KAAK,SAAS;AAAA,MACzB,SAAS,KAAK,YAAY;AAAA,MAC1B,YAAY,KAAK,cAAc;AAAA,MAC/B;AAAA,IACF;AAEA,QAAI,SAAS;AACX,UAAI,KAAK,iBAAiB,GAAG;AAC3B,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,2BAAgB;AAAA,UACtB;AAAA,UACA,kBAAkB,EAAE,eAAe,KAAK,eAAe;AAAA,QACzD,CAAC;AACD,aAAK,iBAAiB;AAAA,MACxB;AAEA,WAAK,MAAM,IAAI;AAAA,QACb,MAAM,2BAAgB;AAAA,QACtB;AAAA,QACA,cAAc,CAAC,UAAU;AAAA,MAC3B,CAAC;AAED,UAAI,KAAK,UAAU;AACjB,aAAK,WAAW;AAChB,aAAK,MAAM,IAAI,EAAE,MAAM,2BAAgB,cAAc,CAAC;AAAA,MACxD;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI;AAAA,QACb,MAAM,2BAAgB;AAAA,QACtB;AAAA,QACA,cAAc,CAAC,UAAU;AAAA,MAC3B,CAAC;AAAA,IACH;AAAA,EACF;AACF;","names":["import_utils","BaseSTT","BaseSpeechStream","ws"]}
|
|
1
|
+
{"version":3,"sources":["../../src/inference/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame } from '@livekit/rtc-node';\nimport type { WebSocket } from 'ws';\nimport { APIError, APIStatusError } from '../_exceptions.js';\nimport { AudioByteStream } from '../audio.js';\nimport { log } from '../log.js';\nimport { createStreamChannel } from '../stream/stream_channel.js';\nimport {\n STT as BaseSTT,\n SpeechStream as BaseSpeechStream,\n type SpeechData,\n type SpeechEvent,\n SpeechEventType,\n} from '../stt/index.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport { type AudioBuffer, Event, Task, cancelAndWait, shortuuid, waitForAbort } from '../utils.js';\nimport { type AnyString, connectWs, createAccessToken } from './utils.js';\n\nexport type DeepgramModels =\n | 'deepgram'\n | 'deepgram/nova-3'\n | 'deepgram/nova-3-general'\n | 'deepgram/nova-3-medical'\n | 'deepgram/nova-2-conversationalai'\n | 'deepgram/nova-2'\n | 'deepgram/nova-2-general'\n | 'deepgram/nova-2-medical'\n | 'deepgram/nova-2-phonecall';\n\nexport type CartesiaModels = 'cartesia' | 'cartesia/ink-whisper';\n\nexport type AssemblyaiModels = 'assemblyai' | 'assemblyai/universal-streaming';\n\nexport interface CartesiaOptions {\n min_volume?: number; // default: not specified\n max_silence_duration_secs?: number; // default: not specified\n}\n\nexport interface DeepgramOptions {\n filler_words?: boolean; // default: true\n interim_results?: boolean; // default: true\n endpointing?: number; // default: 25 (ms)\n punctuate?: boolean; // default: false\n smart_format?: boolean;\n keywords?: Array<[string, number]>;\n keyterms?: string[];\n profanity_filter?: boolean;\n numerals?: boolean;\n mip_opt_out?: boolean;\n}\n\nexport interface AssemblyAIOptions {\n format_turns?: boolean; // default: false\n end_of_turn_confidence_threshold?: number; // default: 0.01\n min_end_of_turn_silence_when_confident?: number; // default: 0\n max_turn_silence?: number; // default: not specified\n keyterms_prompt?: string[]; // default: not specified\n}\n\nexport type STTLanguages =\n | 'multi'\n | 'en'\n | 'de'\n | 'es'\n | 'fr'\n | 'ja'\n | 'pt'\n | 'zh'\n | 'hi'\n | AnyString;\n\ntype _STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels;\n\nexport type STTModels = _STTModels | 'auto' | AnyString;\n\nexport type ModelWithLanguage = `${_STTModels}:${STTLanguages}` | STTModels;\n\nexport type STTOptions<TModel extends STTModels> = TModel extends DeepgramModels\n ? DeepgramOptions\n : TModel extends CartesiaModels\n ? CartesiaOptions\n : TModel extends AssemblyaiModels\n ? AssemblyAIOptions\n : Record<string, unknown>;\n\nexport type STTEncoding = 'pcm_s16le';\n\nconst DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';\nconst DEFAULT_SAMPLE_RATE = 16000;\nconst DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';\nconst DEFAULT_CANCEL_TIMEOUT = 5000;\n\nexport interface InferenceSTTOptions<TModel extends STTModels> {\n model?: TModel;\n language?: STTLanguages;\n encoding: STTEncoding;\n sampleRate: number;\n baseURL: string;\n apiKey: string;\n apiSecret: string;\n modelOptions: STTOptions<TModel>;\n}\n\n/**\n * Livekit Cloud Inference STT\n */\nexport class STT<TModel extends STTModels> extends BaseSTT {\n private opts: InferenceSTTOptions<TModel>;\n private streams: Set<SpeechStream<TModel>> = new Set();\n\n #logger = log();\n\n constructor(opts?: {\n model?: TModel;\n language?: STTLanguages;\n baseURL?: string;\n encoding?: STTEncoding;\n sampleRate?: number;\n apiKey?: string;\n apiSecret?: string;\n modelOptions?: STTOptions<TModel>;\n }) {\n super({ streaming: true, interimResults: true });\n\n const {\n model,\n language,\n baseURL,\n encoding = DEFAULT_ENCODING,\n sampleRate = DEFAULT_SAMPLE_RATE,\n apiKey,\n apiSecret,\n modelOptions = {} as STTOptions<TModel>,\n } = opts || {};\n\n const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;\n const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;\n if (!lkApiKey) {\n throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');\n }\n\n const lkApiSecret =\n apiSecret || process.env.LIVEKIT_INFERENCE_API_SECRET || process.env.LIVEKIT_API_SECRET;\n if (!lkApiSecret) {\n throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');\n }\n\n this.opts = {\n model,\n language,\n encoding,\n sampleRate,\n baseURL: lkBaseURL,\n apiKey: lkApiKey,\n apiSecret: lkApiSecret,\n modelOptions,\n };\n }\n\n get label(): string {\n return 'inference.STT';\n }\n\n static fromModelString(modelString: string): STT<AnyString> {\n if (modelString.includes(':')) {\n const [model, language] = modelString.split(':') as [AnyString, STTLanguages];\n return new STT({ model, language });\n }\n return new STT({ model: modelString });\n }\n\n protected async _recognize(_: AudioBuffer): Promise<SpeechEvent> {\n throw new Error('LiveKit STT does not support batch recognition, use stream() instead');\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n\n for (const stream of this.streams) {\n stream.updateOptions(opts);\n }\n }\n\n stream(options?: {\n language?: STTLanguages | string;\n connOptions?: APIConnectOptions;\n }): SpeechStream<TModel> {\n const { language, connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};\n const streamOpts = {\n ...this.opts,\n language: language ?? this.opts.language,\n } as InferenceSTTOptions<TModel>;\n\n const stream = new SpeechStream(this, streamOpts, connOptions);\n this.streams.add(stream);\n\n return stream;\n }\n\n async connectWs(timeout: number): Promise<WebSocket> {\n const params = {\n settings: {\n sample_rate: String(this.opts.sampleRate),\n encoding: this.opts.encoding,\n extra: this.opts.modelOptions,\n },\n } as Record<string, unknown>;\n\n if (this.opts.model && this.opts.model !== 'auto') {\n params.model = this.opts.model;\n }\n\n if (this.opts.language) {\n (params.settings as Record<string, unknown>).language = this.opts.language;\n }\n\n let baseURL = this.opts.baseURL;\n if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {\n baseURL = baseURL.replace('http', 'ws');\n }\n\n const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);\n const url = `${baseURL}/stt`;\n const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;\n\n const socket = await connectWs(url, headers, timeout);\n const msg = { ...params, type: 'session.create' };\n socket.send(JSON.stringify(msg));\n\n return socket;\n }\n}\n\nexport class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {\n private opts: InferenceSTTOptions<TModel>;\n private requestId = shortuuid('stt_request_');\n private speaking = false;\n private speechDuration = 0;\n private reconnectEvent = new Event();\n private stt: STT<TModel>;\n private connOptions: APIConnectOptions;\n\n #logger = log();\n\n constructor(\n sttImpl: STT<TModel>,\n opts: InferenceSTTOptions<TModel>,\n connOptions: APIConnectOptions,\n ) {\n super(sttImpl, opts.sampleRate, connOptions);\n this.opts = opts;\n this.stt = sttImpl;\n this.connOptions = connOptions;\n }\n\n get label(): string {\n return 'inference.SpeechStream';\n }\n\n updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {\n this.opts = { ...this.opts, ...opts };\n this.reconnectEvent.set();\n }\n\n protected async run(): Promise<void> {\n while (true) {\n // Create fresh resources for each connection attempt\n let ws: WebSocket | null = null;\n let closing = false;\n let finalReceived = false;\n\n type SttServerEvent = Record<string, any>;\n const eventChannel = createStreamChannel<SttServerEvent>();\n\n const resourceCleanup = () => {\n if (closing) return;\n closing = true;\n eventChannel.close();\n ws?.removeAllListeners();\n ws?.close();\n };\n\n const createWsListener = async (ws: WebSocket, signal: AbortSignal) => {\n return new Promise<void>((resolve, reject) => {\n const onAbort = () => {\n resourceCleanup();\n reject(new Error('WebSocket connection aborted'));\n };\n\n signal.addEventListener('abort', onAbort, { once: true });\n\n ws.on('message', (data) => {\n const json = JSON.parse(data.toString()) as SttServerEvent;\n eventChannel.write(json);\n });\n\n ws.on('error', (e) => {\n this.#logger.error({ error: e }, 'WebSocket error');\n resourceCleanup();\n reject(e);\n });\n\n ws.on('close', (code: number) => {\n resourceCleanup();\n\n if (!closing) return this.#logger.error('WebSocket closed unexpectedly');\n if (finalReceived) return resolve();\n\n reject(\n new APIStatusError({\n message: 'LiveKit STT connection closed unexpectedly',\n options: { statusCode: code },\n }),\n );\n });\n });\n };\n\n const send = async (socket: WebSocket, signal: AbortSignal) => {\n const audioStream = new AudioByteStream(\n this.opts.sampleRate,\n 1,\n Math.floor(this.opts.sampleRate / 20), // 50ms\n );\n\n // Create abort promise once to avoid memory leak\n const abortPromise = new Promise<never>((_, reject) => {\n if (signal.aborted) {\n return reject(new Error('Send aborted'));\n }\n const onAbort = () => reject(new Error('Send aborted'));\n signal.addEventListener('abort', onAbort, { once: true });\n });\n\n // Manual iteration to support cancellation\n const iterator = this.input[Symbol.asyncIterator]();\n try {\n while (true) {\n const result = await Promise.race([iterator.next(), abortPromise]);\n\n if (result.done) break;\n const ev = result.value;\n\n let frames: AudioFrame[];\n if (ev === SpeechStream.FLUSH_SENTINEL) {\n frames = audioStream.flush();\n } else {\n const frame = ev as AudioFrame;\n frames = audioStream.write(new Int16Array(frame.data).buffer);\n }\n\n for (const frame of frames) {\n this.speechDuration += frame.samplesPerChannel / frame.sampleRate;\n const base64 = Buffer.from(frame.data.buffer).toString('base64');\n const msg = { type: 'input_audio', audio: base64 };\n socket.send(JSON.stringify(msg));\n }\n }\n\n closing = true;\n socket.send(JSON.stringify({ type: 'session.finalize' }));\n } catch (e) {\n if ((e as Error).message === 'Send aborted') {\n // Expected abort, don't log\n return;\n }\n throw e;\n }\n };\n\n const recv = async (signal: AbortSignal) => {\n const serverEventStream = eventChannel.stream();\n const reader = serverEventStream.getReader();\n\n try {\n while (!this.closed && !signal.aborted) {\n const result = await reader.read();\n if (signal.aborted) return;\n if (result.done) return;\n\n const json = result.value;\n const type = json.type as string | undefined;\n\n switch (type) {\n case 'session.created':\n case 'session.finalized':\n break;\n case 'session.closed':\n finalReceived = true;\n resourceCleanup();\n break;\n case 'interim_transcript':\n this.processTranscript(json, false);\n break;\n case 'final_transcript':\n this.processTranscript(json, true);\n break;\n case 'error':\n this.#logger.error({ error: json }, 'Received error from LiveKit STT');\n resourceCleanup();\n throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);\n default:\n this.#logger.warn(\n { message: json },\n 'Received unexpected message from LiveKit STT',\n );\n break;\n }\n }\n } finally {\n reader.releaseLock();\n try {\n await serverEventStream.cancel();\n } catch (e) {\n this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);\n }\n }\n };\n\n try {\n ws = await this.stt.connectWs(this.connOptions.timeoutMs);\n\n // Wrap tasks for proper cancellation support using Task signals\n const controller = new AbortController();\n const sendTask = Task.from(({ signal }) => send(ws!, signal), controller);\n const wsListenerTask = Task.from(({ signal }) => createWsListener(ws!, signal), controller);\n const recvTask = Task.from(({ signal }) => recv(signal), controller);\n const waitReconnectTask = Task.from(\n ({ signal }) => Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]),\n controller,\n );\n\n try {\n await Promise.race([\n Promise.all([sendTask.result, wsListenerTask.result, recvTask.result]),\n waitReconnectTask.result,\n ]);\n\n // If reconnect didn't trigger, tasks finished - exit loop\n if (!waitReconnectTask.done) break;\n\n // Reconnect triggered - clear event and continue loop\n this.reconnectEvent.clear();\n } finally {\n // Cancel all tasks to ensure cleanup\n await cancelAndWait(\n [sendTask, wsListenerTask, recvTask, waitReconnectTask],\n DEFAULT_CANCEL_TIMEOUT,\n );\n resourceCleanup();\n }\n } finally {\n // Ensure cleanup even if connectWs throws\n resourceCleanup();\n }\n }\n }\n\n private processTranscript(data: Record<string, any>, isFinal: boolean) {\n // Check if queue is closed to avoid race condition during disconnect\n if (this.queue.closed) return;\n\n const requestId = data.request_id ?? this.requestId;\n const text = data.transcript ?? '';\n const language = data.language ?? this.opts.language ?? 'en';\n\n if (!text && !isFinal) return;\n\n try {\n // We'll have a more accurate way of detecting when speech started when we have VAD\n if (!this.speaking) {\n this.speaking = true;\n this.queue.put({ type: SpeechEventType.START_OF_SPEECH });\n }\n\n const speechData: SpeechData = {\n language,\n startTime: data.start ?? 0,\n endTime: data.duration ?? 0,\n confidence: data.confidence ?? 1.0,\n text,\n };\n\n if (isFinal) {\n if (this.speechDuration > 0) {\n this.queue.put({\n type: SpeechEventType.RECOGNITION_USAGE,\n requestId,\n recognitionUsage: { audioDuration: this.speechDuration },\n });\n this.speechDuration = 0;\n }\n\n this.queue.put({\n type: SpeechEventType.FINAL_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n\n if (this.speaking) {\n this.speaking = false;\n this.queue.put({ type: SpeechEventType.END_OF_SPEECH });\n }\n } else {\n this.queue.put({\n type: SpeechEventType.INTERIM_TRANSCRIPT,\n requestId,\n alternatives: [speechData],\n });\n }\n } catch (e) {\n if (e instanceof Error && e.message.includes('Queue is closed')) {\n // Expected behavior on disconnect, log as warning\n this.#logger.warn(\n { err: e },\n 'Queue closed during transcript processing (expected during disconnect)',\n );\n } else {\n this.#logger.error({ err: e }, 'Error putting transcript to queue');\n }\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgC;AAEhC,wBAAyC;AACzC,mBAAgC;AAChC,iBAAoB;AACpB,4BAAoC;AACpC,iBAMO;AACP,mBAAoE;AACpE,mBAAsF;AACtF,IAAAA,gBAA6D;AAuE7D,MAAM,mBAAgC;AACtC,MAAM,sBAAsB;AAC5B,MAAM,mBAAmB;AACzB,MAAM,yBAAyB;AAgBxB,MAAM,YAAsC,WAAAC,IAAQ;AAAA,EACjD;AAAA,EACA,UAAqC,oBAAI,IAAI;AAAA,EAErD,cAAU,gBAAI;AAAA,EAEd,YAAY,MAST;AACD,UAAM,EAAE,WAAW,MAAM,gBAAgB,KAAK,CAAC;AAE/C,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,aAAa;AAAA,MACb;AAAA,MACA;AAAA,MACA,eAAe,CAAC;AAAA,IAClB,IAAI,QAAQ,CAAC;AAEb,UAAM,YAAY,WAAW,QAAQ,IAAI,yBAAyB;AAClE,UAAM,WAAW,UAAU,QAAQ,IAAI,6BAA6B,QAAQ,IAAI;AAChF,QAAI,CAAC,UAAU;AACb,YAAM,IAAI,MAAM,wDAAwD;AAAA,IAC1E;AAEA,UAAM,cACJ,aAAa,QAAQ,IAAI,gCAAgC,QAAQ,IAAI;AACvE,QAAI,CAAC,aAAa;AAChB,YAAM,IAAI,MAAM,iEAAiE;AAAA,IACnF;AAEA,SAAK,OAAO;AAAA,MACV;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,MACR,WAAW;AAAA,MACX;AAAA,IACF;AAAA,EACF;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,OAAO,gBAAgB,aAAqC;AAC1D,QAAI,YAAY,SAAS,GAAG,GAAG;AAC7B,YAAM,CAAC,OAAO,QAAQ,IAAI,YAAY,MAAM,GAAG;AAC/C,aAAO,IAAI,IAAI,EAAE,OAAO,SAAS,CAAC;AAAA,IACpC;AACA,WAAO,IAAI,IAAI,EAAE,OAAO,YAAY,CAAC;AAAA,EACvC;AAAA,EAEA,MAAgB,WAAW,GAAsC;AAC/D,UAAM,IAAI,MAAM,sEAAsE;AAAA,EACxF;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AAEpC,eAAW,UAAU,KAAK,SAAS;AACjC,aAAO,cAAc,IAAI;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,OAAO,SAGkB;AACvB,UAAM,EAAE,UAAU,cAAc,yCAA4B,IAAI,WAAW,CAAC;AAC5E,UAAM,aAAa;AAAA,MACjB,GAAG,KAAK;AAAA,MACR,UAAU,YAAY,KAAK,KAAK;AAAA,IAClC;AAEA,UAAM,SAAS,IAAI,aAAa,MAAM,YAAY,WAAW;AAC7D,SAAK,QAAQ,IAAI,MAAM;AAEvB,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UAAU,SAAqC;AACnD,UAAM,SAAS;AAAA,MACb,UAAU;AAAA,QACR,aAAa,OAAO,KAAK,KAAK,UAAU;AAAA,QACxC,UAAU,KAAK,KAAK;AAAA,QACpB,OAAO,KAAK,KAAK;AAAA,MACnB;AAAA,IACF;AAEA,QAAI,KAAK,KAAK,SAAS,KAAK,KAAK,UAAU,QAAQ;AACjD,aAAO,QAAQ,KAAK,KAAK;AAAA,IAC3B;AAEA,QAAI,KAAK,KAAK,UAAU;AACtB,MAAC,OAAO,SAAqC,WAAW,KAAK,KAAK;AAAA,IACpE;AAEA,QAAI,UAAU,KAAK,KAAK;AACxB,QAAI,QAAQ,WAAW,SAAS,KAAK,QAAQ,WAAW,UAAU,GAAG;AACnE,gBAAU,QAAQ,QAAQ,QAAQ,IAAI;AAAA,IACxC;AAEA,UAAM,QAAQ,UAAM,iCAAkB,KAAK,KAAK,QAAQ,KAAK,KAAK,SAAS;AAC3E,UAAM,MAAM,GAAG,OAAO;AACtB,UAAM,UAAU,EAAE,eAAe,UAAU,KAAK,GAAG;AAEnD,UAAM,SAAS,UAAM,yBAAU,KAAK,SAAS,OAAO;AACpD,UAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,iBAAiB;AAChD,WAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAE/B,WAAO;AAAA,EACT;AACF;AAEO,MAAM,qBAA+C,WAAAC,aAAiB;AAAA,EACnE;AAAA,EACA,gBAAY,wBAAU,cAAc;AAAA,EACpC,WAAW;AAAA,EACX,iBAAiB;AAAA,EACjB,iBAAiB,IAAI,mBAAM;AAAA,EAC3B;AAAA,EACA;AAAA,EAER,cAAU,gBAAI;AAAA,EAEd,YACE,SACA,MACA,aACA;AACA,UAAM,SAAS,KAAK,YAAY,WAAW;AAC3C,SAAK,OAAO;AACZ,SAAK,MAAM;AACX,SAAK,cAAc;AAAA,EACrB;AAAA,EAEA,IAAI,QAAgB;AAClB,WAAO;AAAA,EACT;AAAA,EAEA,cAAc,MAA8E;AAC1F,SAAK,OAAO,EAAE,GAAG,KAAK,MAAM,GAAG,KAAK;AACpC,SAAK,eAAe,IAAI;AAAA,EAC1B;AAAA,EAEA,MAAgB,MAAqB;AACnC,WAAO,MAAM;AAEX,UAAI,KAAuB;AAC3B,UAAI,UAAU;AACd,UAAI,gBAAgB;AAGpB,YAAM,mBAAe,2CAAoC;AAEzD,YAAM,kBAAkB,MAAM;AAC5B,YAAI,QAAS;AACb,kBAAU;AACV,qBAAa,MAAM;AACnB,iCAAI;AACJ,iCAAI;AAAA,MACN;AAEA,YAAM,mBAAmB,OAAOC,KAAe,WAAwB;AACrE,eAAO,IAAI,QAAc,CAAC,SAAS,WAAW;AAC5C,gBAAM,UAAU,MAAM;AACpB,4BAAgB;AAChB,mBAAO,IAAI,MAAM,8BAA8B,CAAC;AAAA,UAClD;AAEA,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAExD,UAAAA,IAAG,GAAG,WAAW,CAAC,SAAS;AACzB,kBAAM,OAAO,KAAK,MAAM,KAAK,SAAS,CAAC;AACvC,yBAAa,MAAM,IAAI;AAAA,UACzB,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,MAAM;AACpB,iBAAK,QAAQ,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AAClD,4BAAgB;AAChB,mBAAO,CAAC;AAAA,UACV,CAAC;AAED,UAAAA,IAAG,GAAG,SAAS,CAAC,SAAiB;AAC/B,4BAAgB;AAEhB,gBAAI,CAAC,QAAS,QAAO,KAAK,QAAQ,MAAM,+BAA+B;AACvE,gBAAI,cAAe,QAAO,QAAQ;AAElC;AAAA,cACE,IAAI,iCAAe;AAAA,gBACjB,SAAS;AAAA,gBACT,SAAS,EAAE,YAAY,KAAK;AAAA,cAC9B,CAAC;AAAA,YACH;AAAA,UACF,CAAC;AAAA,QACH,CAAC;AAAA,MACH;AAEA,YAAM,OAAO,OAAO,QAAmB,WAAwB;AAC7D,cAAM,cAAc,IAAI;AAAA,UACtB,KAAK,KAAK;AAAA,UACV;AAAA,UACA,KAAK,MAAM,KAAK,KAAK,aAAa,EAAE;AAAA;AAAA,QACtC;AAGA,cAAM,eAAe,IAAI,QAAe,CAAC,GAAG,WAAW;AACrD,cAAI,OAAO,SAAS;AAClB,mBAAO,OAAO,IAAI,MAAM,cAAc,CAAC;AAAA,UACzC;AACA,gBAAM,UAAU,MAAM,OAAO,IAAI,MAAM,cAAc,CAAC;AACtD,iBAAO,iBAAiB,SAAS,SAAS,EAAE,MAAM,KAAK,CAAC;AAAA,QAC1D,CAAC;AAGD,cAAM,WAAW,KAAK,MAAM,OAAO,aAAa,EAAE;AAClD,YAAI;AACF,iBAAO,MAAM;AACX,kBAAM,SAAS,MAAM,QAAQ,KAAK,CAAC,SAAS,KAAK,GAAG,YAAY,CAAC;AAEjE,gBAAI,OAAO,KAAM;AACjB,kBAAM,KAAK,OAAO;AAElB,gBAAI;AACJ,gBAAI,OAAO,aAAa,gBAAgB;AACtC,uBAAS,YAAY,MAAM;AAAA,YAC7B,OAAO;AACL,oBAAM,QAAQ;AACd,uBAAS,YAAY,MAAM,IAAI,WAAW,MAAM,IAAI,EAAE,MAAM;AAAA,YAC9D;AAEA,uBAAW,SAAS,QAAQ;AAC1B,mBAAK,kBAAkB,MAAM,oBAAoB,MAAM;AACvD,oBAAM,SAAS,OAAO,KAAK,MAAM,KAAK,MAAM,EAAE,SAAS,QAAQ;AAC/D,oBAAM,MAAM,EAAE,MAAM,eAAe,OAAO,OAAO;AACjD,qBAAO,KAAK,KAAK,UAAU,GAAG,CAAC;AAAA,YACjC;AAAA,UACF;AAEA,oBAAU;AACV,iBAAO,KAAK,KAAK,UAAU,EAAE,MAAM,mBAAmB,CAAC,CAAC;AAAA,QAC1D,SAAS,GAAG;AACV,cAAK,EAAY,YAAY,gBAAgB;AAE3C;AAAA,UACF;AACA,gBAAM;AAAA,QACR;AAAA,MACF;AAEA,YAAM,OAAO,OAAO,WAAwB;AAC1C,cAAM,oBAAoB,aAAa,OAAO;AAC9C,cAAM,SAAS,kBAAkB,UAAU;AAE3C,YAAI;AACF,iBAAO,CAAC,KAAK,UAAU,CAAC,OAAO,SAAS;AACtC,kBAAM,SAAS,MAAM,OAAO,KAAK;AACjC,gBAAI,OAAO,QAAS;AACpB,gBAAI,OAAO,KAAM;AAEjB,kBAAM,OAAO,OAAO;AACpB,kBAAM,OAAO,KAAK;AAElB,oBAAQ,MAAM;AAAA,cACZ,KAAK;AAAA,cACL,KAAK;AACH;AAAA,cACF,KAAK;AACH,gCAAgB;AAChB,gCAAgB;AAChB;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,MAAM,KAAK;AAClC;AAAA,cACF,KAAK;AACH,qBAAK,kBAAkB,MAAM,IAAI;AACjC;AAAA,cACF,KAAK;AACH,qBAAK,QAAQ,MAAM,EAAE,OAAO,KAAK,GAAG,iCAAiC;AACrE,gCAAgB;AAChB,sBAAM,IAAI,2BAAS,+BAA+B,KAAK,UAAU,IAAI,CAAC,EAAE;AAAA,cAC1E;AACE,qBAAK,QAAQ;AAAA,kBACX,EAAE,SAAS,KAAK;AAAA,kBAChB;AAAA,gBACF;AACA;AAAA,YACJ;AAAA,UACF;AAAA,QACF,UAAE;AACA,iBAAO,YAAY;AACnB,cAAI;AACF,kBAAM,kBAAkB,OAAO;AAAA,UACjC,SAAS,GAAG;AACV,iBAAK,QAAQ,MAAM,kEAAkE,CAAC;AAAA,UACxF;AAAA,QACF;AAAA,MACF;AAEA,UAAI;AACF,aAAK,MAAM,KAAK,IAAI,UAAU,KAAK,YAAY,SAAS;AAGxD,cAAM,aAAa,IAAI,gBAAgB;AACvC,cAAM,WAAW,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,IAAK,MAAM,GAAG,UAAU;AACxE,cAAM,iBAAiB,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,iBAAiB,IAAK,MAAM,GAAG,UAAU;AAC1F,cAAM,WAAW,kBAAK,KAAK,CAAC,EAAE,OAAO,MAAM,KAAK,MAAM,GAAG,UAAU;AACnE,cAAM,oBAAoB,kBAAK;AAAA,UAC7B,CAAC,EAAE,OAAO,MAAM,QAAQ,KAAK,CAAC,KAAK,eAAe,KAAK,OAAG,2BAAa,MAAM,CAAC,CAAC;AAAA,UAC/E;AAAA,QACF;AAEA,YAAI;AACF,gBAAM,QAAQ,KAAK;AAAA,YACjB,QAAQ,IAAI,CAAC,SAAS,QAAQ,eAAe,QAAQ,SAAS,MAAM,CAAC;AAAA,YACrE,kBAAkB;AAAA,UACpB,CAAC;AAGD,cAAI,CAAC,kBAAkB,KAAM;AAG7B,eAAK,eAAe,MAAM;AAAA,QAC5B,UAAE;AAEA,oBAAM;AAAA,YACJ,CAAC,UAAU,gBAAgB,UAAU,iBAAiB;AAAA,YACtD;AAAA,UACF;AACA,0BAAgB;AAAA,QAClB;AAAA,MACF,UAAE;AAEA,wBAAgB;AAAA,MAClB;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,kBAAkB,MAA2B,SAAkB;AAErE,QAAI,KAAK,MAAM,OAAQ;AAEvB,UAAM,YAAY,KAAK,cAAc,KAAK;AAC1C,UAAM,OAAO,KAAK,cAAc;AAChC,UAAM,WAAW,KAAK,YAAY,KAAK,KAAK,YAAY;AAExD,QAAI,CAAC,QAAQ,CAAC,QAAS;AAEvB,QAAI;AAEF,UAAI,CAAC,KAAK,UAAU;AAClB,aAAK,WAAW;AAChB,aAAK,MAAM,IAAI,EAAE,MAAM,2BAAgB,gBAAgB,CAAC;AAAA,MAC1D;AAEA,YAAM,aAAyB;AAAA,QAC7B;AAAA,QACA,WAAW,KAAK,SAAS;AAAA,QACzB,SAAS,KAAK,YAAY;AAAA,QAC1B,YAAY,KAAK,cAAc;AAAA,QAC/B;AAAA,MACF;AAEA,UAAI,SAAS;AACX,YAAI,KAAK,iBAAiB,GAAG;AAC3B,eAAK,MAAM,IAAI;AAAA,YACb,MAAM,2BAAgB;AAAA,YACtB;AAAA,YACA,kBAAkB,EAAE,eAAe,KAAK,eAAe;AAAA,UACzD,CAAC;AACD,eAAK,iBAAiB;AAAA,QACxB;AAEA,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,2BAAgB;AAAA,UACtB;AAAA,UACA,cAAc,CAAC,UAAU;AAAA,QAC3B,CAAC;AAED,YAAI,KAAK,UAAU;AACjB,eAAK,WAAW;AAChB,eAAK,MAAM,IAAI,EAAE,MAAM,2BAAgB,cAAc,CAAC;AAAA,QACxD;AAAA,MACF,OAAO;AACL,aAAK,MAAM,IAAI;AAAA,UACb,MAAM,2BAAgB;AAAA,UACtB;AAAA,UACA,cAAc,CAAC,UAAU;AAAA,QAC3B,CAAC;AAAA,MACH;AAAA,IACF,SAAS,GAAG;AACV,UAAI,aAAa,SAAS,EAAE,QAAQ,SAAS,iBAAiB,GAAG;AAE/D,aAAK,QAAQ;AAAA,UACX,EAAE,KAAK,EAAE;AAAA,UACT;AAAA,QACF;AAAA,MACF,OAAO;AACL,aAAK,QAAQ,MAAM,EAAE,KAAK,EAAE,GAAG,mCAAmC;AAAA,MACpE;AAAA,IACF;AAAA,EACF;AACF;","names":["import_utils","BaseSTT","BaseSpeechStream","ws"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../src/inference/stt.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAKpC,OAAO,EACL,GAAG,IAAI,OAAO,EACd,YAAY,IAAI,gBAAgB,EAEhC,KAAK,WAAW,EAEjB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,EAAE,KAAK,WAAW,EAAuD,MAAM,aAAa,CAAC;AACpG,OAAO,EAAE,KAAK,SAAS,EAAgC,MAAM,YAAY,CAAC;AAE1E,MAAM,MAAM,cAAc,GACtB,UAAU,GACV,iBAAiB,GACjB,yBAAyB,GACzB,yBAAyB,GACzB,kCAAkC,GAClC,iBAAiB,GACjB,yBAAyB,GACzB,yBAAyB,GACzB,2BAA2B,CAAC;AAEhC,MAAM,MAAM,cAAc,GAAG,UAAU,GAAG,sBAAsB,CAAC;AAEjE,MAAM,MAAM,gBAAgB,GAAG,YAAY,GAAG,gCAAgC,CAAC;AAE/E,MAAM,WAAW,eAAe;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,yBAAyB,CAAC,EAAE,MAAM,CAAC;CACpC;AAED,MAAM,WAAW,eAAe;IAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACnC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,gCAAgC,CAAC,EAAE,MAAM,CAAC;IAC1C,sCAAsC,CAAC,EAAE,MAAM,CAAC;IAChD,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC5B;AAED,MAAM,MAAM,YAAY,GACpB,OAAO,GACP,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,SAAS,CAAC;AAEd,KAAK,UAAU,GAAG,cAAc,GAAG,cAAc,GAAG,gBAAgB,CAAC;AAErE,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,MAAM,GAAG,SAAS,CAAC;AAExD,MAAM,MAAM,iBAAiB,GAAG,GAAG,UAAU,IAAI,YAAY,EAAE,GAAG,SAAS,CAAC;AAE5E,MAAM,MAAM,UAAU,CAAC,MAAM,SAAS,SAAS,IAAI,MAAM,SAAS,cAAc,GAC5E,eAAe,GACf,MAAM,SAAS,cAAc,GAC3B,eAAe,GACf,MAAM,SAAS,gBAAgB,GAC7B,iBAAiB,GACjB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAEhC,MAAM,MAAM,WAAW,GAAG,WAAW,CAAC;AAOtC,MAAM,WAAW,mBAAmB,CAAC,MAAM,SAAS,SAAS;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,YAAY,CAAC;IACxB,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;CAClC;AAED;;GAEG;AACH,qBAAa,GAAG,CAAC,MAAM,SAAS,SAAS,CAAE,SAAQ,OAAO;;IACxD,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,OAAO,CAAwC;gBAI3C,IAAI,CAAC,EAAE;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,YAAY,CAAC;QACxB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,WAAW,CAAC;QACvB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;KACnC;IAsCD,IAAI,KAAK,IAAI,MAAM,CAElB;IAED,MAAM,CAAC,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC;cAQ3C,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAIhE,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,CAAC,CAAC,GAAG,IAAI;IAQ3F,MAAM,CAAC,OAAO,CAAC,EAAE;QACf,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;QACjC,WAAW,CAAC,EAAE,iBAAiB,CAAC;KACjC,GAAG,YAAY,CAAC,MAAM,CAAC;IAalB,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC;CAgCrD;AAED,qBAAa,YAAY,CAAC,MAAM,SAAS,SAAS,CAAE,SAAQ,gBAAgB;;IAC1E,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,SAAS,CAA6B;IAC9C,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,cAAc,CAAe;IACrC,OAAO,CAAC,GAAG,CAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;gBAKrC,OAAO,EAAE,GAAG,CAAC,MAAM,CAAC,EACpB,IAAI,EAAE,mBAAmB,CAAC,MAAM,CAAC,EACjC,WAAW,EAAE,iBAAiB;IAQhC,IAAI,KAAK,IAAI,MAAM,CAElB;IAED,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,CAAC,CAAC,GAAG,IAAI;cAK3E,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAkMpC,OAAO,CAAC,iBAAiB;
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../src/inference/stt.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AAKpC,OAAO,EACL,GAAG,IAAI,OAAO,EACd,YAAY,IAAI,gBAAgB,EAEhC,KAAK,WAAW,EAEjB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAE,KAAK,iBAAiB,EAA+B,MAAM,aAAa,CAAC;AAClF,OAAO,EAAE,KAAK,WAAW,EAAuD,MAAM,aAAa,CAAC;AACpG,OAAO,EAAE,KAAK,SAAS,EAAgC,MAAM,YAAY,CAAC;AAE1E,MAAM,MAAM,cAAc,GACtB,UAAU,GACV,iBAAiB,GACjB,yBAAyB,GACzB,yBAAyB,GACzB,kCAAkC,GAClC,iBAAiB,GACjB,yBAAyB,GACzB,yBAAyB,GACzB,2BAA2B,CAAC;AAEhC,MAAM,MAAM,cAAc,GAAG,UAAU,GAAG,sBAAsB,CAAC;AAEjE,MAAM,MAAM,gBAAgB,GAAG,YAAY,GAAG,gCAAgC,CAAC;AAE/E,MAAM,WAAW,eAAe;IAC9B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,yBAAyB,CAAC,EAAE,MAAM,CAAC;CACpC;AAED,MAAM,WAAW,eAAe;IAC9B,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IACnC,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,gBAAgB,CAAC,EAAE,OAAO,CAAC;IAC3B,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED,MAAM,WAAW,iBAAiB;IAChC,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,gCAAgC,CAAC,EAAE,MAAM,CAAC;IAC1C,sCAAsC,CAAC,EAAE,MAAM,CAAC;IAChD,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;CAC5B;AAED,MAAM,MAAM,YAAY,GACpB,OAAO,GACP,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,IAAI,GACJ,SAAS,CAAC;AAEd,KAAK,UAAU,GAAG,cAAc,GAAG,cAAc,GAAG,gBAAgB,CAAC;AAErE,MAAM,MAAM,SAAS,GAAG,UAAU,GAAG,MAAM,GAAG,SAAS,CAAC;AAExD,MAAM,MAAM,iBAAiB,GAAG,GAAG,UAAU,IAAI,YAAY,EAAE,GAAG,SAAS,CAAC;AAE5E,MAAM,MAAM,UAAU,CAAC,MAAM,SAAS,SAAS,IAAI,MAAM,SAAS,cAAc,GAC5E,eAAe,GACf,MAAM,SAAS,cAAc,GAC3B,eAAe,GACf,MAAM,SAAS,gBAAgB,GAC7B,iBAAiB,GACjB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAEhC,MAAM,MAAM,WAAW,GAAG,WAAW,CAAC;AAOtC,MAAM,WAAW,mBAAmB,CAAC,MAAM,SAAS,SAAS;IAC3D,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,YAAY,CAAC;IACxB,QAAQ,EAAE,WAAW,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;CAClC;AAED;;GAEG;AACH,qBAAa,GAAG,CAAC,MAAM,SAAS,SAAS,CAAE,SAAQ,OAAO;;IACxD,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,OAAO,CAAwC;gBAI3C,IAAI,CAAC,EAAE;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,YAAY,CAAC;QACxB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,QAAQ,CAAC,EAAE,WAAW,CAAC;QACvB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;KACnC;IAsCD,IAAI,KAAK,IAAI,MAAM,CAElB;IAED,MAAM,CAAC,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,GAAG,CAAC,SAAS,CAAC;cAQ3C,UAAU,CAAC,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAIhE,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,CAAC,CAAC,GAAG,IAAI;IAQ3F,MAAM,CAAC,OAAO,CAAC,EAAE;QACf,QAAQ,CAAC,EAAE,YAAY,GAAG,MAAM,CAAC;QACjC,WAAW,CAAC,EAAE,iBAAiB,CAAC;KACjC,GAAG,YAAY,CAAC,MAAM,CAAC;IAalB,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC;CAgCrD;AAED,qBAAa,YAAY,CAAC,MAAM,SAAS,SAAS,CAAE,SAAQ,gBAAgB;;IAC1E,OAAO,CAAC,IAAI,CAA8B;IAC1C,OAAO,CAAC,SAAS,CAA6B;IAC9C,OAAO,CAAC,QAAQ,CAAS;IACzB,OAAO,CAAC,cAAc,CAAK;IAC3B,OAAO,CAAC,cAAc,CAAe;IACrC,OAAO,CAAC,GAAG,CAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;gBAKrC,OAAO,EAAE,GAAG,CAAC,MAAM,CAAC,EACpB,IAAI,EAAE,mBAAmB,CAAC,MAAM,CAAC,EACjC,WAAW,EAAE,iBAAiB;IAQhC,IAAI,KAAK,IAAI,MAAM,CAElB;IAED,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,CAAC,CAAC,GAAG,IAAI;cAK3E,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;IAkMpC,OAAO,CAAC,iBAAiB;CAgE1B"}
|
package/dist/inference/stt.js
CHANGED
|
@@ -289,45 +289,57 @@ class SpeechStream extends BaseSpeechStream {
|
|
|
289
289
|
}
|
|
290
290
|
}
|
|
291
291
|
processTranscript(data, isFinal) {
|
|
292
|
+
if (this.queue.closed) return;
|
|
292
293
|
const requestId = data.request_id ?? this.requestId;
|
|
293
294
|
const text = data.transcript ?? "";
|
|
294
295
|
const language = data.language ?? this.opts.language ?? "en";
|
|
295
296
|
if (!text && !isFinal) return;
|
|
296
|
-
|
|
297
|
-
this.speaking
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
if (
|
|
297
|
+
try {
|
|
298
|
+
if (!this.speaking) {
|
|
299
|
+
this.speaking = true;
|
|
300
|
+
this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
|
|
301
|
+
}
|
|
302
|
+
const speechData = {
|
|
303
|
+
language,
|
|
304
|
+
startTime: data.start ?? 0,
|
|
305
|
+
endTime: data.duration ?? 0,
|
|
306
|
+
confidence: data.confidence ?? 1,
|
|
307
|
+
text
|
|
308
|
+
};
|
|
309
|
+
if (isFinal) {
|
|
310
|
+
if (this.speechDuration > 0) {
|
|
311
|
+
this.queue.put({
|
|
312
|
+
type: SpeechEventType.RECOGNITION_USAGE,
|
|
313
|
+
requestId,
|
|
314
|
+
recognitionUsage: { audioDuration: this.speechDuration }
|
|
315
|
+
});
|
|
316
|
+
this.speechDuration = 0;
|
|
317
|
+
}
|
|
318
|
+
this.queue.put({
|
|
319
|
+
type: SpeechEventType.FINAL_TRANSCRIPT,
|
|
320
|
+
requestId,
|
|
321
|
+
alternatives: [speechData]
|
|
322
|
+
});
|
|
323
|
+
if (this.speaking) {
|
|
324
|
+
this.speaking = false;
|
|
325
|
+
this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
|
|
326
|
+
}
|
|
327
|
+
} else {
|
|
309
328
|
this.queue.put({
|
|
310
|
-
type: SpeechEventType.
|
|
329
|
+
type: SpeechEventType.INTERIM_TRANSCRIPT,
|
|
311
330
|
requestId,
|
|
312
|
-
|
|
331
|
+
alternatives: [speechData]
|
|
313
332
|
});
|
|
314
|
-
this.speechDuration = 0;
|
|
315
333
|
}
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
this.
|
|
334
|
+
} catch (e) {
|
|
335
|
+
if (e instanceof Error && e.message.includes("Queue is closed")) {
|
|
336
|
+
this.#logger.warn(
|
|
337
|
+
{ err: e },
|
|
338
|
+
"Queue closed during transcript processing (expected during disconnect)"
|
|
339
|
+
);
|
|
340
|
+
} else {
|
|
341
|
+
this.#logger.error({ err: e }, "Error putting transcript to queue");
|
|
324
342
|
}
|
|
325
|
-
} else {
|
|
326
|
-
this.queue.put({
|
|
327
|
-
type: SpeechEventType.INTERIM_TRANSCRIPT,
|
|
328
|
-
requestId,
|
|
329
|
-
alternatives: [speechData]
|
|
330
|
-
});
|
|
331
343
|
}
|
|
332
344
|
}
|
|
333
345
|
}
|