@livekit/agents 1.1.0-dev.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +2 -0
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +2 -0
- package/dist/cli.js.map +1 -1
- package/dist/constants.cjs +3 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +1 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +2 -0
- package/dist/constants.js.map +1 -1
- package/dist/cpu.cjs +189 -0
- package/dist/cpu.cjs.map +1 -0
- package/dist/cpu.d.cts +24 -0
- package/dist/cpu.d.ts +24 -0
- package/dist/cpu.d.ts.map +1 -0
- package/dist/cpu.js +152 -0
- package/dist/cpu.js.map +1 -0
- package/dist/cpu.test.cjs +227 -0
- package/dist/cpu.test.cjs.map +1 -0
- package/dist/cpu.test.js +204 -0
- package/dist/cpu.test.js.map +1 -0
- package/dist/index.cjs +12 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -13
- package/dist/index.d.ts +13 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -10
- package/dist/index.js.map +1 -1
- package/dist/inference/interruption/defaults.cjs +1 -1
- package/dist/inference/interruption/defaults.cjs.map +1 -1
- package/dist/inference/interruption/defaults.d.cts +1 -1
- package/dist/inference/interruption/defaults.d.ts +1 -1
- package/dist/inference/interruption/defaults.d.ts.map +1 -1
- package/dist/inference/interruption/defaults.js +1 -1
- package/dist/inference/interruption/defaults.js.map +1 -1
- package/dist/inference/interruption/http_transport.cjs +44 -28
- package/dist/inference/interruption/http_transport.cjs.map +1 -1
- package/dist/inference/interruption/http_transport.d.ts.map +1 -1
- package/dist/inference/interruption/http_transport.js +45 -29
- package/dist/inference/interruption/http_transport.js.map +1 -1
- package/dist/inference/interruption/interruption_detector.cjs +22 -5
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
- package/dist/inference/interruption/interruption_detector.d.cts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
- package/dist/inference/interruption/interruption_detector.js +22 -5
- package/dist/inference/interruption/interruption_detector.js.map +1 -1
- package/dist/inference/interruption/interruption_stream.cjs +4 -4
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
- package/dist/inference/interruption/interruption_stream.js +4 -4
- package/dist/inference/interruption/interruption_stream.js.map +1 -1
- package/dist/inference/interruption/types.cjs.map +1 -1
- package/dist/inference/interruption/types.d.cts +2 -2
- package/dist/inference/interruption/types.d.ts +2 -2
- package/dist/inference/interruption/types.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.cjs +60 -47
- package/dist/inference/interruption/ws_transport.cjs.map +1 -1
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.js +60 -47
- package/dist/inference/interruption/ws_transport.js.map +1 -1
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +20 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +3 -2
- package/dist/inference/stt.d.ts +3 -2
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +20 -12
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +14 -0
- package/dist/inference/stt.test.cjs.map +1 -1
- package/dist/inference/stt.test.js +14 -0
- package/dist/inference/stt.test.js.map +1 -1
- package/dist/inference/tts.cjs +13 -4
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +8 -1
- package/dist/inference/tts.d.ts +8 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +13 -4
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +10 -0
- package/dist/inference/tts.test.cjs.map +1 -1
- package/dist/inference/tts.test.js +10 -0
- package/dist/inference/tts.test.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +41 -23
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +41 -23
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +1 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.js +1 -1
- package/dist/job.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +6 -2
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +6 -2
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +2 -1
- package/dist/stt/stt.d.ts +2 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/utils.cjs +15 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +8 -0
- package/dist/utils.d.ts +8 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +13 -0
- package/dist/utils.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +14 -17
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -11
- package/dist/voice/agent.d.ts +10 -11
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +15 -18
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +194 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +195 -1
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +116 -39
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +2 -0
- package/dist/voice/agent_activity.d.ts +2 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +117 -40
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/agent_session.cjs +38 -38
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +65 -56
- package/dist/voice/agent_session.d.ts +65 -56
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +37 -37
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +106 -52
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +4 -2
- package/dist/voice/audio_recognition.d.ts +4 -2
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +106 -52
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +84 -22
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
- package/dist/voice/audio_recognition_span.test.js +90 -23
- package/dist/voice/audio_recognition_span.test.js.map +1 -1
- package/dist/voice/events.cjs +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +4 -3
- package/dist/voice/events.d.ts +4 -3
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -1
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/index.cjs +9 -1
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +10 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/remote_session.cjs +922 -0
- package/dist/voice/remote_session.cjs.map +1 -0
- package/dist/voice/remote_session.d.cts +108 -0
- package/dist/voice/remote_session.d.ts +108 -0
- package/dist/voice/remote_session.d.ts.map +1 -0
- package/dist/voice/remote_session.js +887 -0
- package/dist/voice/remote_session.js.map +1 -0
- package/dist/voice/report.cjs +11 -10
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -3
- package/dist/voice/report.d.ts +5 -3
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +11 -10
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +15 -0
- package/dist/voice/report.test.cjs.map +1 -1
- package/dist/voice/report.test.js +15 -0
- package/dist/voice/report.test.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +39 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +3 -1
- package/dist/voice/room_io/room_io.d.ts +3 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +40 -1
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/interruption.cjs.map +1 -1
- package/dist/voice/turn_config/interruption.d.cts +1 -1
- package/dist/voice/turn_config/interruption.d.ts +1 -1
- package/dist/voice/turn_config/interruption.d.ts.map +1 -1
- package/dist/voice/turn_config/interruption.js.map +1 -1
- package/dist/voice/turn_config/utils.cjs +95 -35
- package/dist/voice/turn_config/utils.cjs.map +1 -1
- package/dist/voice/turn_config/utils.d.cts +17 -5
- package/dist/voice/turn_config/utils.d.ts +17 -5
- package/dist/voice/turn_config/utils.d.ts.map +1 -1
- package/dist/voice/turn_config/utils.js +93 -35
- package/dist/voice/turn_config/utils.js.map +1 -1
- package/dist/voice/turn_config/utils.test.cjs +83 -41
- package/dist/voice/turn_config/utils.test.cjs.map +1 -1
- package/dist/voice/turn_config/utils.test.js +84 -42
- package/dist/voice/turn_config/utils.test.js.map +1 -1
- package/dist/worker.cjs +6 -29
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +6 -19
- package/dist/worker.js.map +1 -1
- package/package.json +3 -2
- package/src/cli.ts +2 -0
- package/src/constants.ts +1 -0
- package/src/cpu.test.ts +239 -0
- package/src/cpu.ts +173 -0
- package/src/index.ts +13 -15
- package/src/inference/interruption/defaults.ts +1 -1
- package/src/inference/interruption/http_transport.ts +49 -30
- package/src/inference/interruption/interruption_detector.ts +22 -6
- package/src/inference/interruption/interruption_stream.ts +4 -4
- package/src/inference/interruption/types.ts +2 -2
- package/src/inference/interruption/ws_transport.ts +63 -59
- package/src/inference/llm.ts +3 -1
- package/src/inference/stt.test.ts +17 -0
- package/src/inference/stt.ts +22 -14
- package/src/inference/tts.test.ts +12 -0
- package/src/inference/tts.ts +22 -6
- package/src/ipc/job_proc_lazy_main.ts +44 -24
- package/src/job.ts +1 -1
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/llm/index.ts +2 -0
- package/src/stream/deferred_stream.ts +5 -1
- package/src/stt/stt.ts +2 -1
- package/src/utils.ts +20 -0
- package/src/voice/agent.test.ts +208 -1
- package/src/voice/agent.ts +21 -22
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +161 -43
- package/src/voice/agent_session.ts +103 -92
- package/src/voice/audio_recognition.ts +124 -61
- package/src/voice/audio_recognition_span.test.ts +115 -35
- package/src/voice/events.ts +4 -3
- package/src/voice/index.ts +10 -1
- package/src/voice/remote_session.ts +1083 -0
- package/src/voice/report.test.ts +22 -3
- package/src/voice/report.ts +31 -14
- package/src/voice/room_io/room_io.ts +52 -2
- package/src/voice/turn_config/interruption.ts +1 -1
- package/src/voice/turn_config/utils.test.ts +91 -43
- package/src/voice/turn_config/utils.ts +120 -56
- package/src/worker.ts +34 -50
- package/dist/voice/client_events.cjs +0 -554
- package/dist/voice/client_events.cjs.map +0 -1
- package/dist/voice/client_events.d.cts +0 -195
- package/dist/voice/client_events.d.ts +0 -195
- package/dist/voice/client_events.d.ts.map +0 -1
- package/dist/voice/client_events.js +0 -548
- package/dist/voice/client_events.js.map +0 -1
- package/dist/voice/wire_format.cjs +0 -798
- package/dist/voice/wire_format.cjs.map +0 -1
- package/dist/voice/wire_format.d.cts +0 -5503
- package/dist/voice/wire_format.d.ts +0 -5503
- package/dist/voice/wire_format.d.ts.map +0 -1
- package/dist/voice/wire_format.js +0 -728
- package/dist/voice/wire_format.js.map +0 -1
- package/src/voice/client_events.ts +0 -838
- package/src/voice/wire_format.ts +0 -827
package/src/index.ts
CHANGED
|
@@ -9,33 +9,31 @@
|
|
|
9
9
|
* @see {@link https://docs.livekit.io/agents/overview | LiveKit Agents documentation}
|
|
10
10
|
* @packageDocumentation
|
|
11
11
|
*/
|
|
12
|
-
import * as beta from './beta/index.js';
|
|
13
|
-
import * as cli from './cli.js';
|
|
14
|
-
import * as inference from './inference/index.js';
|
|
15
|
-
import * as ipc from './ipc/index.js';
|
|
16
|
-
import * as llm from './llm/index.js';
|
|
17
|
-
import * as metrics from './metrics/index.js';
|
|
18
|
-
import * as stream from './stream/index.js';
|
|
19
|
-
import * as stt from './stt/index.js';
|
|
20
|
-
import * as telemetry from './telemetry/index.js';
|
|
21
|
-
import * as tokenize from './tokenize/index.js';
|
|
22
|
-
import * as tts from './tts/index.js';
|
|
23
|
-
import * as voice from './voice/index.js';
|
|
24
|
-
|
|
25
12
|
export * from './_exceptions.js';
|
|
26
13
|
export * from './audio.js';
|
|
14
|
+
export * as beta from './beta/index.js';
|
|
15
|
+
export * as cli from './cli.js';
|
|
27
16
|
export * from './connection_pool.js';
|
|
28
17
|
export * from './generator.js';
|
|
18
|
+
export * as inference from './inference/index.js';
|
|
29
19
|
export * from './inference_runner.js';
|
|
20
|
+
export * as ipc from './ipc/index.js';
|
|
30
21
|
export * from './job.js';
|
|
22
|
+
export * from './language.js';
|
|
23
|
+
export * as llm from './llm/index.js';
|
|
31
24
|
export * from './log.js';
|
|
25
|
+
export * as metrics from './metrics/index.js';
|
|
32
26
|
export * from './plugin.js';
|
|
27
|
+
export * as stream from './stream/index.js';
|
|
28
|
+
export * as stt from './stt/index.js';
|
|
29
|
+
export * as telemetry from './telemetry/index.js';
|
|
30
|
+
export * as tokenize from './tokenize/index.js';
|
|
33
31
|
export * from './transcription.js';
|
|
32
|
+
export * as tts from './tts/index.js';
|
|
34
33
|
export * from './types.js';
|
|
35
34
|
export * from './utils.js';
|
|
36
35
|
export * from './vad.js';
|
|
37
36
|
export * from './version.js';
|
|
37
|
+
export * as voice from './voice/index.js';
|
|
38
38
|
export { createTimedString, isTimedString, type TimedString } from './voice/io.js';
|
|
39
39
|
export * from './worker.js';
|
|
40
|
-
|
|
41
|
-
export { beta, cli, inference, ipc, llm, metrics, stream, stt, telemetry, tokenize, tts, voice };
|
|
@@ -9,7 +9,7 @@ export const THRESHOLD = 0.5;
|
|
|
9
9
|
export const MAX_AUDIO_DURATION_IN_S = 3.0;
|
|
10
10
|
export const AUDIO_PREFIX_DURATION_IN_S = 0.5;
|
|
11
11
|
export const DETECTION_INTERVAL_IN_S = 0.1;
|
|
12
|
-
export const REMOTE_INFERENCE_TIMEOUT_IN_S =
|
|
12
|
+
export const REMOTE_INFERENCE_TIMEOUT_IN_S = 0.7;
|
|
13
13
|
export const SAMPLE_RATE = 16000;
|
|
14
14
|
export const FRAMES_PER_SECOND = 40;
|
|
15
15
|
export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
// SPDX-FileCopyrightText: 2026 LiveKit, Inc.
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import { ofetch } from 'ofetch';
|
|
4
|
+
import { FetchError, ofetch } from 'ofetch';
|
|
5
5
|
import { TransformStream } from 'stream/web';
|
|
6
6
|
import { z } from 'zod';
|
|
7
|
+
import { APIConnectionError, APIError, APIStatusError, isAPIError } from '../../_exceptions.js';
|
|
7
8
|
import { log } from '../../log.js';
|
|
8
9
|
import { createAccessToken } from '../utils.js';
|
|
9
|
-
import { intervalForRetry } from './defaults.js';
|
|
10
10
|
import { InterruptionCacheEntry } from './interruption_cache_entry.js';
|
|
11
11
|
import type { OverlappingSpeechEvent } from './types.js';
|
|
12
12
|
import type { BoundedCache } from './utils.js';
|
|
@@ -50,31 +50,50 @@ export async function predictHTTP(
|
|
|
50
50
|
url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());
|
|
51
51
|
url.searchParams.append('created_at', createdAt.toFixed());
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
53
|
+
try {
|
|
54
|
+
const response = await ofetch(url.toString(), {
|
|
55
|
+
retry: 0,
|
|
56
|
+
headers: {
|
|
57
|
+
'Content-Type': 'application/octet-stream',
|
|
58
|
+
Authorization: `Bearer ${options.token}`,
|
|
59
|
+
},
|
|
60
|
+
signal: options.signal,
|
|
61
|
+
timeout: options.timeout,
|
|
62
|
+
method: 'POST',
|
|
63
|
+
body: data,
|
|
64
|
+
});
|
|
65
|
+
const { created_at, is_bargein, probabilities } = predictEndpointResponseSchema.parse(response);
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
createdAt: created_at,
|
|
69
|
+
isBargein: is_bargein,
|
|
70
|
+
probabilities,
|
|
71
|
+
predictionDurationInS: (performance.now() - createdAt) / 1000,
|
|
72
|
+
};
|
|
73
|
+
} catch (err) {
|
|
74
|
+
if (isAPIError(err)) throw err;
|
|
75
|
+
if (err instanceof FetchError) {
|
|
76
|
+
if (err.statusCode) {
|
|
77
|
+
throw new APIStatusError({
|
|
78
|
+
message: `error during interruption prediction: ${err.message}`,
|
|
79
|
+
options: { statusCode: err.statusCode, body: err.data },
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
if (
|
|
83
|
+
err.cause instanceof Error &&
|
|
84
|
+
(err.cause.name === 'TimeoutError' || err.cause.name === 'AbortError')
|
|
85
|
+
) {
|
|
86
|
+
throw new APIStatusError({
|
|
87
|
+
message: `interruption inference timeout: ${err.message}`,
|
|
88
|
+
options: { statusCode: 408, retryable: false },
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
throw new APIConnectionError({
|
|
92
|
+
message: `interruption inference connection error: ${err.message}`,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
throw new APIError(`error during interruption prediction: ${err}`);
|
|
96
|
+
}
|
|
78
97
|
}
|
|
79
98
|
|
|
80
99
|
export interface HttpTransportOptions {
|
|
@@ -154,8 +173,8 @@ export function createHttpTransport(
|
|
|
154
173
|
updateUserSpeakingSpan(entry);
|
|
155
174
|
}
|
|
156
175
|
const event: OverlappingSpeechEvent = {
|
|
157
|
-
type: '
|
|
158
|
-
|
|
176
|
+
type: 'overlapping_speech',
|
|
177
|
+
detectedAt: Date.now(),
|
|
159
178
|
overlapStartedAt: overlapSpeechStartedAt,
|
|
160
179
|
isInterruption: entry.isInterruption,
|
|
161
180
|
speechInput: entry.speechInput,
|
|
@@ -177,7 +196,7 @@ export function createHttpTransport(
|
|
|
177
196
|
controller.enqueue(event);
|
|
178
197
|
}
|
|
179
198
|
} catch (err) {
|
|
180
|
-
|
|
199
|
+
controller.error(err);
|
|
181
200
|
}
|
|
182
201
|
},
|
|
183
202
|
},
|
|
@@ -7,12 +7,12 @@ import { log } from '../../log.js';
|
|
|
7
7
|
import type { InterruptionMetrics } from '../../metrics/base.js';
|
|
8
8
|
import { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';
|
|
9
9
|
import { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';
|
|
10
|
-
import
|
|
10
|
+
import { InterruptionDetectionError } from './errors.js';
|
|
11
11
|
import { InterruptionStreamBase } from './interruption_stream.js';
|
|
12
12
|
import type { InterruptionOptions, OverlappingSpeechEvent } from './types.js';
|
|
13
13
|
|
|
14
14
|
type InterruptionCallbacks = {
|
|
15
|
-
|
|
15
|
+
overlapping_speech: (event: OverlappingSpeechEvent) => void;
|
|
16
16
|
metrics_collected: (metrics: InterruptionMetrics) => void;
|
|
17
17
|
error: (error: InterruptionDetectionError) => void;
|
|
18
18
|
};
|
|
@@ -76,6 +76,15 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
|
|
|
76
76
|
} else {
|
|
77
77
|
useProxy = false;
|
|
78
78
|
}
|
|
79
|
+
const transport = useProxy ? 'websocket' : 'http';
|
|
80
|
+
this.logger.debug(
|
|
81
|
+
{
|
|
82
|
+
baseUrl: lkBaseUrl,
|
|
83
|
+
useProxy,
|
|
84
|
+
transport,
|
|
85
|
+
},
|
|
86
|
+
'=== Resolved interruption detector transport configuration',
|
|
87
|
+
);
|
|
79
88
|
|
|
80
89
|
this.options = {
|
|
81
90
|
sampleRate: SAMPLE_RATE,
|
|
@@ -104,8 +113,9 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
|
|
|
104
113
|
threshold: this.options.threshold,
|
|
105
114
|
inferenceTimeout: this.options.inferenceTimeout,
|
|
106
115
|
useProxy: this.options.useProxy,
|
|
116
|
+
transport,
|
|
107
117
|
},
|
|
108
|
-
'
|
|
118
|
+
'=== Adaptive interruption detector initialized',
|
|
109
119
|
);
|
|
110
120
|
}
|
|
111
121
|
|
|
@@ -150,9 +160,15 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
|
|
|
150
160
|
* Use this when you need direct access to the stream for pushing frames.
|
|
151
161
|
*/
|
|
152
162
|
createStream(): InterruptionStreamBase {
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
163
|
+
try {
|
|
164
|
+
const streamBase = new InterruptionStreamBase(this, {});
|
|
165
|
+
this.streams.add(streamBase);
|
|
166
|
+
return streamBase;
|
|
167
|
+
} catch (e) {
|
|
168
|
+
const cause = e instanceof Error ? e : new Error(String(e));
|
|
169
|
+
this.emitError(new InterruptionDetectionError(cause.message, Date.now(), this._label, false));
|
|
170
|
+
throw e;
|
|
171
|
+
}
|
|
156
172
|
}
|
|
157
173
|
|
|
158
174
|
/**
|
|
@@ -281,8 +281,8 @@ export class InterruptionStreamBase {
|
|
|
281
281
|
}
|
|
282
282
|
const e = latestEntry ?? InterruptionCacheEntry.default();
|
|
283
283
|
const event: OverlappingSpeechEvent = {
|
|
284
|
-
type: '
|
|
285
|
-
|
|
284
|
+
type: 'overlapping_speech',
|
|
285
|
+
detectedAt: chunk.endedAt,
|
|
286
286
|
isInterruption: false,
|
|
287
287
|
overlapStartedAt: this.overlapSpeechStartedAt,
|
|
288
288
|
speechInput: e.speechInput,
|
|
@@ -334,11 +334,11 @@ export class InterruptionStreamBase {
|
|
|
334
334
|
|
|
335
335
|
const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({
|
|
336
336
|
transform: (chunk, controller) => {
|
|
337
|
-
this.model.emit('
|
|
337
|
+
this.model.emit('overlapping_speech', chunk);
|
|
338
338
|
|
|
339
339
|
const metrics: InterruptionMetrics = {
|
|
340
340
|
type: 'interruption_metrics',
|
|
341
|
-
timestamp: chunk.
|
|
341
|
+
timestamp: chunk.detectedAt,
|
|
342
342
|
totalDuration: chunk.totalDurationInS * 1000,
|
|
343
343
|
predictionDuration: chunk.predictionDurationInS * 1000,
|
|
344
344
|
detectionDelay: chunk.detectionDelayInS * 1000,
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
import type { Span } from '@opentelemetry/api';
|
|
5
5
|
|
|
6
6
|
export interface OverlappingSpeechEvent {
|
|
7
|
-
type: '
|
|
8
|
-
|
|
7
|
+
type: 'overlapping_speech';
|
|
8
|
+
detectedAt: number;
|
|
9
9
|
isInterruption: boolean;
|
|
10
10
|
totalDurationInS: number;
|
|
11
11
|
predictionDurationInS: number;
|
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
import { TransformStream } from 'stream/web';
|
|
5
5
|
import WebSocket from 'ws';
|
|
6
6
|
import { z } from 'zod';
|
|
7
|
+
import { APIConnectionError, APIStatusError, APITimeoutError } from '../../_exceptions.js';
|
|
7
8
|
import { log } from '../../log.js';
|
|
8
9
|
import { createAccessToken } from '../utils.js';
|
|
9
|
-
import { intervalForRetry } from './defaults.js';
|
|
10
10
|
import { InterruptionCacheEntry } from './interruption_cache_entry.js';
|
|
11
11
|
import type { OverlappingSpeechEvent } from './types.js';
|
|
12
12
|
import type { BoundedCache } from './utils.js';
|
|
@@ -82,16 +82,32 @@ async function connectWebSocket(options: WsTransportOptions): Promise<WebSocket>
|
|
|
82
82
|
await new Promise<void>((resolve, reject) => {
|
|
83
83
|
const timeout = setTimeout(() => {
|
|
84
84
|
ws.terminate();
|
|
85
|
-
reject(
|
|
85
|
+
reject(
|
|
86
|
+
new APITimeoutError({
|
|
87
|
+
message: 'WebSocket connection timeout',
|
|
88
|
+
options: { retryable: false },
|
|
89
|
+
}),
|
|
90
|
+
);
|
|
86
91
|
}, options.timeout);
|
|
87
92
|
ws.once('open', () => {
|
|
88
93
|
clearTimeout(timeout);
|
|
89
94
|
resolve();
|
|
90
95
|
});
|
|
96
|
+
ws.once('unexpected-response', (_req, res) => {
|
|
97
|
+
clearTimeout(timeout);
|
|
98
|
+
ws.terminate();
|
|
99
|
+
const statusCode = res.statusCode ?? -1;
|
|
100
|
+
reject(
|
|
101
|
+
new APIStatusError({
|
|
102
|
+
message: `WebSocket connection rejected with status ${statusCode}`,
|
|
103
|
+
options: { statusCode, retryable: false },
|
|
104
|
+
}),
|
|
105
|
+
);
|
|
106
|
+
});
|
|
91
107
|
ws.once('error', (err: Error) => {
|
|
92
108
|
clearTimeout(timeout);
|
|
93
109
|
ws.terminate();
|
|
94
|
-
reject(err);
|
|
110
|
+
reject(new APIConnectionError({ message: `WebSocket connection error: ${err.message}` }));
|
|
95
111
|
});
|
|
96
112
|
});
|
|
97
113
|
|
|
@@ -133,7 +149,9 @@ export function createWsTransport(
|
|
|
133
149
|
});
|
|
134
150
|
|
|
135
151
|
socket.on('error', (err: Error) => {
|
|
136
|
-
|
|
152
|
+
outputController?.error(
|
|
153
|
+
new APIConnectionError({ message: `WebSocket error: ${err.message}` }),
|
|
154
|
+
);
|
|
137
155
|
});
|
|
138
156
|
|
|
139
157
|
socket.on('close', (code: number, reason: Buffer) => {
|
|
@@ -144,41 +162,20 @@ export function createWsTransport(
|
|
|
144
162
|
async function ensureConnection(): Promise<void> {
|
|
145
163
|
if (ws && ws.readyState === WebSocket.OPEN) return;
|
|
146
164
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
threshold: options.threshold,
|
|
162
|
-
min_frames: options.minFrames,
|
|
163
|
-
encoding: 's16le',
|
|
164
|
-
},
|
|
165
|
-
});
|
|
166
|
-
ws.send(sessionCreateMsg);
|
|
167
|
-
return;
|
|
168
|
-
} catch (err) {
|
|
169
|
-
lastError = err instanceof Error ? err : new Error(String(err));
|
|
170
|
-
if (attempt < maxRetries) {
|
|
171
|
-
const delay = intervalForRetry(attempt);
|
|
172
|
-
logger.debug(
|
|
173
|
-
{ attempt, delay, err: lastError.message },
|
|
174
|
-
'WebSocket connection failed, retrying',
|
|
175
|
-
);
|
|
176
|
-
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
throw lastError ?? new Error('Failed to connect to WebSocket after retries');
|
|
165
|
+
ws = await connectWebSocket(options);
|
|
166
|
+
setupMessageHandler(ws);
|
|
167
|
+
|
|
168
|
+
const sessionCreateMsg = JSON.stringify({
|
|
169
|
+
type: MSG_SESSION_CREATE,
|
|
170
|
+
settings: {
|
|
171
|
+
sample_rate: options.sampleRate,
|
|
172
|
+
num_channels: 1,
|
|
173
|
+
threshold: options.threshold,
|
|
174
|
+
min_frames: options.minFrames,
|
|
175
|
+
encoding: 's16le',
|
|
176
|
+
},
|
|
177
|
+
});
|
|
178
|
+
ws.send(sessionCreateMsg);
|
|
182
179
|
}
|
|
183
180
|
|
|
184
181
|
function handleMessage(message: WsMessage): void {
|
|
@@ -229,8 +226,8 @@ export function createWsTransport(
|
|
|
229
226
|
);
|
|
230
227
|
|
|
231
228
|
const event: OverlappingSpeechEvent = {
|
|
232
|
-
type: '
|
|
233
|
-
|
|
229
|
+
type: 'overlapping_speech',
|
|
230
|
+
detectedAt: Date.now(),
|
|
234
231
|
isInterruption: true,
|
|
235
232
|
totalDurationInS: entry.totalDurationInS,
|
|
236
233
|
predictionDurationInS: entry.predictionDurationInS,
|
|
@@ -288,11 +285,10 @@ export function createWsTransport(
|
|
|
288
285
|
|
|
289
286
|
case MSG_ERROR:
|
|
290
287
|
outputController?.error(
|
|
291
|
-
new
|
|
292
|
-
`LiveKit Adaptive Interruption error${
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
),
|
|
288
|
+
new APIStatusError({
|
|
289
|
+
message: `LiveKit Adaptive Interruption error: ${message.message}`,
|
|
290
|
+
options: { statusCode: message.code ?? -1 },
|
|
291
|
+
}),
|
|
296
292
|
);
|
|
297
293
|
break;
|
|
298
294
|
}
|
|
@@ -300,15 +296,12 @@ export function createWsTransport(
|
|
|
300
296
|
|
|
301
297
|
function sendAudioData(audioSlice: Int16Array): void {
|
|
302
298
|
if (!ws || ws.readyState !== WebSocket.OPEN) {
|
|
303
|
-
throw new
|
|
299
|
+
throw new APIConnectionError({ message: 'WebSocket not connected' });
|
|
304
300
|
}
|
|
305
301
|
|
|
306
302
|
const state = getState();
|
|
307
|
-
// Use truncated timestamp consistently for both cache key and header
|
|
308
|
-
// This ensures the server's response created_at matches our cache key
|
|
309
303
|
const createdAt = Math.floor(performance.now());
|
|
310
304
|
|
|
311
|
-
// Store the audio data in cache with truncated timestamp
|
|
312
305
|
state.cache.set(
|
|
313
306
|
createdAt,
|
|
314
307
|
new InterruptionCacheEntry({
|
|
@@ -318,13 +311,11 @@ export function createWsTransport(
|
|
|
318
311
|
}),
|
|
319
312
|
);
|
|
320
313
|
|
|
321
|
-
// Create header: 8-byte little-endian uint64 timestamp (milliseconds as integer)
|
|
322
314
|
const header = new ArrayBuffer(8);
|
|
323
315
|
const view = new DataView(header);
|
|
324
316
|
view.setUint32(0, createdAt >>> 0, true);
|
|
325
317
|
view.setUint32(4, Math.floor(createdAt / 0x100000000) >>> 0, true);
|
|
326
318
|
|
|
327
|
-
// Combine header and audio data
|
|
328
319
|
const audioBytes = new Uint8Array(
|
|
329
320
|
audioSlice.buffer,
|
|
330
321
|
audioSlice.byteOffset,
|
|
@@ -334,12 +325,8 @@ export function createWsTransport(
|
|
|
334
325
|
combined.set(new Uint8Array(header), 0);
|
|
335
326
|
combined.set(audioBytes, 8);
|
|
336
327
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
onRequestSent?.();
|
|
340
|
-
} catch (e: unknown) {
|
|
341
|
-
logger.error(e, `failed to send audio via websocket`);
|
|
342
|
-
}
|
|
328
|
+
ws.send(combined);
|
|
329
|
+
onRequestSent?.();
|
|
343
330
|
}
|
|
344
331
|
|
|
345
332
|
function close(): void {
|
|
@@ -383,10 +370,27 @@ export function createWsTransport(
|
|
|
383
370
|
const state = getState();
|
|
384
371
|
if (!state.overlapSpeechStartedAt || !state.overlapSpeechStarted) return;
|
|
385
372
|
|
|
373
|
+
if (options.timeout > 0) {
|
|
374
|
+
const now = performance.now();
|
|
375
|
+
for (const [, entry] of state.cache.entries()) {
|
|
376
|
+
if (entry.totalDurationInS !== 0) continue;
|
|
377
|
+
if (now - entry.createdAt > options.timeout) {
|
|
378
|
+
controller.error(
|
|
379
|
+
new APIStatusError({
|
|
380
|
+
message: `interruption inference timed out after ${((now - entry.createdAt) / 1000).toFixed(1)}s (ws)`,
|
|
381
|
+
options: { statusCode: 408, retryable: false },
|
|
382
|
+
}),
|
|
383
|
+
);
|
|
384
|
+
return;
|
|
385
|
+
}
|
|
386
|
+
break;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
386
390
|
try {
|
|
387
391
|
sendAudioData(chunk);
|
|
388
392
|
} catch (err) {
|
|
389
|
-
|
|
393
|
+
controller.error(err);
|
|
390
394
|
}
|
|
391
395
|
},
|
|
392
396
|
|
package/src/inference/llm.ts
CHANGED
|
@@ -4,12 +4,14 @@
|
|
|
4
4
|
import OpenAI from 'openai';
|
|
5
5
|
import { APIConnectionError, APIStatusError, APITimeoutError } from '../_exceptions.js';
|
|
6
6
|
import * as llm from '../llm/index.js';
|
|
7
|
-
import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
8
7
|
import type { APIConnectOptions } from '../types.js';
|
|
8
|
+
import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
9
9
|
import { type Expand, toError } from '../utils.js';
|
|
10
10
|
import { type AnyString, createAccessToken, getDefaultInferenceUrl } from './utils.js';
|
|
11
11
|
|
|
12
12
|
export type OpenAIModels =
|
|
13
|
+
| 'openai/gpt-5.4'
|
|
14
|
+
| 'openai/gpt-5.3-chat-latest'
|
|
13
15
|
| 'openai/gpt-5.2'
|
|
14
16
|
| 'openai/gpt-5.2-chat-latest'
|
|
15
17
|
| 'openai/gpt-5.1'
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { beforeAll, describe, expect, it } from 'vitest';
|
|
5
|
+
import { normalizeLanguage } from '../language.js';
|
|
5
6
|
import { initializeLogger } from '../log.js';
|
|
6
7
|
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
7
8
|
import { STT, type STTFallbackModel, normalizeSTTFallback, parseSTTModelString } from './stt.js';
|
|
@@ -34,6 +35,12 @@ describe('parseSTTModelString', () => {
|
|
|
34
35
|
expect(language).toBe('en');
|
|
35
36
|
});
|
|
36
37
|
|
|
38
|
+
it('normalizes language suffixes', () => {
|
|
39
|
+
const [model, language] = parseSTTModelString('deepgram:english');
|
|
40
|
+
expect(model).toBe('deepgram');
|
|
41
|
+
expect(language).toBe('en');
|
|
42
|
+
});
|
|
43
|
+
|
|
37
44
|
it('provider/model format without language', () => {
|
|
38
45
|
const [model, language] = parseSTTModelString('deepgram/nova-3');
|
|
39
46
|
expect(model).toBe('deepgram/nova-3');
|
|
@@ -151,6 +158,16 @@ describe('normalizeSTTFallback', () => {
|
|
|
151
158
|
});
|
|
152
159
|
|
|
153
160
|
describe('STT constructor fallback and connOptions', () => {
|
|
161
|
+
it('normalizes language in constructor and model string', () => {
|
|
162
|
+
const stt = makeStt({ model: 'deepgram/nova-3:english' });
|
|
163
|
+
expect(stt['opts'].language).toBe('en');
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it('prefers explicit normalized language over model suffix', () => {
|
|
167
|
+
const stt = makeStt({ model: 'deepgram/nova-3:english', language: 'en_US' });
|
|
168
|
+
expect(stt['opts'].language).toBe(normalizeLanguage('en_US'));
|
|
169
|
+
});
|
|
170
|
+
|
|
154
171
|
it('fallback not given defaults to undefined', () => {
|
|
155
172
|
const stt = makeStt();
|
|
156
173
|
expect(stt['opts'].fallback).toBeUndefined();
|
package/src/inference/stt.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { type AudioFrame } from '@livekit/rtc-node';
|
|
|
5
5
|
import type { WebSocket } from 'ws';
|
|
6
6
|
import { APIError, APIStatusError } from '../_exceptions.js';
|
|
7
7
|
import { AudioByteStream } from '../audio.js';
|
|
8
|
+
import { type LanguageCode, areLanguagesEquivalent, normalizeLanguage } from '../language.js';
|
|
8
9
|
import { log } from '../log.js';
|
|
9
10
|
import { createStreamChannel } from '../stream/stream_channel.js';
|
|
10
11
|
import {
|
|
@@ -121,10 +122,10 @@ export interface STTFallbackModel {
|
|
|
121
122
|
export type STTFallbackModelType = STTFallbackModel | string;
|
|
122
123
|
|
|
123
124
|
/** Parse a model string into [model, language]. Language is undefined if not specified. */
|
|
124
|
-
export function parseSTTModelString(model: string): [string,
|
|
125
|
+
export function parseSTTModelString(model: string): [string, LanguageCode | undefined] {
|
|
125
126
|
const idx = model.lastIndexOf(':');
|
|
126
127
|
if (idx !== -1) {
|
|
127
|
-
return [model.slice(0, idx), model.slice(idx + 1)];
|
|
128
|
+
return [model.slice(0, idx), normalizeLanguage(model.slice(idx + 1))];
|
|
128
129
|
}
|
|
129
130
|
return [model, undefined];
|
|
130
131
|
}
|
|
@@ -155,7 +156,7 @@ const DEFAULT_CANCEL_TIMEOUT = 5000;
|
|
|
155
156
|
|
|
156
157
|
export interface InferenceSTTOptions<TModel extends STTModels> {
|
|
157
158
|
model?: TModel;
|
|
158
|
-
language?:
|
|
159
|
+
language?: LanguageCode;
|
|
159
160
|
encoding: STTEncoding;
|
|
160
161
|
sampleRate: number;
|
|
161
162
|
baseURL: string;
|
|
@@ -218,25 +219,24 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
218
219
|
let nextModel = model;
|
|
219
220
|
let nextLanguage = language;
|
|
220
221
|
if (typeof nextModel === 'string') {
|
|
221
|
-
const
|
|
222
|
-
if (
|
|
223
|
-
|
|
224
|
-
if (nextLanguage && nextLanguage !== languageFromModel) {
|
|
222
|
+
const [parsedModel, parsedLanguage] = parseSTTModelString(nextModel);
|
|
223
|
+
if (parsedLanguage !== undefined) {
|
|
224
|
+
if (nextLanguage && !areLanguagesEquivalent(nextLanguage, parsedLanguage)) {
|
|
225
225
|
this.#logger.warn(
|
|
226
226
|
'`language` is provided via both argument and model, using the one from the argument',
|
|
227
227
|
{ language: nextLanguage, model: nextModel },
|
|
228
228
|
);
|
|
229
229
|
} else {
|
|
230
|
-
nextLanguage =
|
|
230
|
+
nextLanguage = parsedLanguage as STTLanguages;
|
|
231
231
|
}
|
|
232
|
-
nextModel =
|
|
232
|
+
nextModel = parsedModel as TModel;
|
|
233
233
|
}
|
|
234
234
|
}
|
|
235
235
|
const normalizedFallback = fallback ? normalizeSTTFallback(fallback) : undefined;
|
|
236
236
|
|
|
237
237
|
this.opts = {
|
|
238
238
|
model: nextModel as TModel,
|
|
239
|
-
language: nextLanguage,
|
|
239
|
+
language: nextLanguage ? normalizeLanguage(nextLanguage) : undefined,
|
|
240
240
|
encoding,
|
|
241
241
|
sampleRate,
|
|
242
242
|
baseURL: lkBaseURL,
|
|
@@ -270,7 +270,11 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
270
270
|
}
|
|
271
271
|
|
|
272
272
|
updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
|
|
273
|
-
this.opts = {
|
|
273
|
+
this.opts = {
|
|
274
|
+
...this.opts,
|
|
275
|
+
...opts,
|
|
276
|
+
language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
|
|
277
|
+
};
|
|
274
278
|
|
|
275
279
|
for (const stream of this.streams) {
|
|
276
280
|
stream.updateOptions(opts);
|
|
@@ -285,7 +289,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
|
|
|
285
289
|
options || {};
|
|
286
290
|
const streamOpts = {
|
|
287
291
|
...this.opts,
|
|
288
|
-
language: language
|
|
292
|
+
language: language !== undefined ? normalizeLanguage(language) : this.opts.language,
|
|
289
293
|
} as InferenceSTTOptions<TModel>;
|
|
290
294
|
|
|
291
295
|
const stream = new SpeechStream(this, streamOpts, connOptions);
|
|
@@ -371,7 +375,11 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
|
|
|
371
375
|
}
|
|
372
376
|
|
|
373
377
|
updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
|
|
374
|
-
this.opts = {
|
|
378
|
+
this.opts = {
|
|
379
|
+
...this.opts,
|
|
380
|
+
...opts,
|
|
381
|
+
language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
|
|
382
|
+
};
|
|
375
383
|
this.reconnectEvent.set();
|
|
376
384
|
}
|
|
377
385
|
|
|
@@ -576,7 +584,7 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
|
|
|
576
584
|
|
|
577
585
|
const requestId = data.session_id || this.requestId;
|
|
578
586
|
const text = data.transcript;
|
|
579
|
-
const language = data.language || this.opts.language || 'en';
|
|
587
|
+
const language = normalizeLanguage(data.language || this.opts.language || 'en');
|
|
580
588
|
|
|
581
589
|
if (!text && !isFinal) return;
|
|
582
590
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
//
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { beforeAll, describe, expect, it } from 'vitest';
|
|
5
|
+
import { normalizeLanguage } from '../language.js';
|
|
5
6
|
import { initializeLogger } from '../log.js';
|
|
6
7
|
import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
|
|
7
8
|
import { TTS, type TTSFallbackModel, normalizeTTSFallback, parseTTSModelString } from './tts.js';
|
|
@@ -165,6 +166,17 @@ describe('normalizeTTSFallback', () => {
|
|
|
165
166
|
});
|
|
166
167
|
|
|
167
168
|
describe('TTS constructor fallback and connOptions', () => {
|
|
169
|
+
it('normalizes language in constructor', () => {
|
|
170
|
+
const tts = makeTts({ language: 'english' });
|
|
171
|
+
expect(tts['opts'].language).toBe('en');
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it('normalizes updated language values', () => {
|
|
175
|
+
const tts = makeTts();
|
|
176
|
+
tts.updateOptions({ language: 'en_US' });
|
|
177
|
+
expect(tts['opts'].language).toBe(normalizeLanguage('en_US'));
|
|
178
|
+
});
|
|
179
|
+
|
|
168
180
|
it('fallback not given defaults to undefined', () => {
|
|
169
181
|
const tts = makeTts();
|
|
170
182
|
expect(tts['opts'].fallback).toBeUndefined();
|