@livekit/agents 1.1.0-dev.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +2 -0
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +2 -0
- package/dist/cli.js.map +1 -1
- package/dist/constants.cjs +3 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +1 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +2 -0
- package/dist/constants.js.map +1 -1
- package/dist/cpu.cjs +189 -0
- package/dist/cpu.cjs.map +1 -0
- package/dist/cpu.d.cts +24 -0
- package/dist/cpu.d.ts +24 -0
- package/dist/cpu.d.ts.map +1 -0
- package/dist/cpu.js +152 -0
- package/dist/cpu.js.map +1 -0
- package/dist/cpu.test.cjs +227 -0
- package/dist/cpu.test.cjs.map +1 -0
- package/dist/cpu.test.js +204 -0
- package/dist/cpu.test.js.map +1 -0
- package/dist/index.cjs +12 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -13
- package/dist/index.d.ts +13 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -10
- package/dist/index.js.map +1 -1
- package/dist/inference/interruption/defaults.cjs +1 -1
- package/dist/inference/interruption/defaults.cjs.map +1 -1
- package/dist/inference/interruption/defaults.d.cts +1 -1
- package/dist/inference/interruption/defaults.d.ts +1 -1
- package/dist/inference/interruption/defaults.d.ts.map +1 -1
- package/dist/inference/interruption/defaults.js +1 -1
- package/dist/inference/interruption/defaults.js.map +1 -1
- package/dist/inference/interruption/http_transport.cjs +44 -28
- package/dist/inference/interruption/http_transport.cjs.map +1 -1
- package/dist/inference/interruption/http_transport.d.ts.map +1 -1
- package/dist/inference/interruption/http_transport.js +45 -29
- package/dist/inference/interruption/http_transport.js.map +1 -1
- package/dist/inference/interruption/interruption_detector.cjs +22 -5
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
- package/dist/inference/interruption/interruption_detector.d.cts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
- package/dist/inference/interruption/interruption_detector.js +22 -5
- package/dist/inference/interruption/interruption_detector.js.map +1 -1
- package/dist/inference/interruption/interruption_stream.cjs +4 -4
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
- package/dist/inference/interruption/interruption_stream.js +4 -4
- package/dist/inference/interruption/interruption_stream.js.map +1 -1
- package/dist/inference/interruption/types.cjs.map +1 -1
- package/dist/inference/interruption/types.d.cts +2 -2
- package/dist/inference/interruption/types.d.ts +2 -2
- package/dist/inference/interruption/types.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.cjs +60 -47
- package/dist/inference/interruption/ws_transport.cjs.map +1 -1
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.js +60 -47
- package/dist/inference/interruption/ws_transport.js.map +1 -1
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +20 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +3 -2
- package/dist/inference/stt.d.ts +3 -2
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +20 -12
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +14 -0
- package/dist/inference/stt.test.cjs.map +1 -1
- package/dist/inference/stt.test.js +14 -0
- package/dist/inference/stt.test.js.map +1 -1
- package/dist/inference/tts.cjs +13 -4
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +8 -1
- package/dist/inference/tts.d.ts +8 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +13 -4
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +10 -0
- package/dist/inference/tts.test.cjs.map +1 -1
- package/dist/inference/tts.test.js +10 -0
- package/dist/inference/tts.test.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +41 -23
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +41 -23
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +1 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.js +1 -1
- package/dist/job.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +6 -2
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +6 -2
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +2 -1
- package/dist/stt/stt.d.ts +2 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/utils.cjs +15 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +8 -0
- package/dist/utils.d.ts +8 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +13 -0
- package/dist/utils.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +14 -17
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -11
- package/dist/voice/agent.d.ts +10 -11
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +15 -18
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +194 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +195 -1
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +116 -39
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +2 -0
- package/dist/voice/agent_activity.d.ts +2 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +117 -40
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/agent_session.cjs +38 -38
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +65 -56
- package/dist/voice/agent_session.d.ts +65 -56
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +37 -37
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +106 -52
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +4 -2
- package/dist/voice/audio_recognition.d.ts +4 -2
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +106 -52
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +84 -22
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
- package/dist/voice/audio_recognition_span.test.js +90 -23
- package/dist/voice/audio_recognition_span.test.js.map +1 -1
- package/dist/voice/events.cjs +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +4 -3
- package/dist/voice/events.d.ts +4 -3
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -1
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/index.cjs +9 -1
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +10 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/remote_session.cjs +922 -0
- package/dist/voice/remote_session.cjs.map +1 -0
- package/dist/voice/remote_session.d.cts +108 -0
- package/dist/voice/remote_session.d.ts +108 -0
- package/dist/voice/remote_session.d.ts.map +1 -0
- package/dist/voice/remote_session.js +887 -0
- package/dist/voice/remote_session.js.map +1 -0
- package/dist/voice/report.cjs +11 -10
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -3
- package/dist/voice/report.d.ts +5 -3
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +11 -10
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +15 -0
- package/dist/voice/report.test.cjs.map +1 -1
- package/dist/voice/report.test.js +15 -0
- package/dist/voice/report.test.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +39 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +3 -1
- package/dist/voice/room_io/room_io.d.ts +3 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +40 -1
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/interruption.cjs.map +1 -1
- package/dist/voice/turn_config/interruption.d.cts +1 -1
- package/dist/voice/turn_config/interruption.d.ts +1 -1
- package/dist/voice/turn_config/interruption.d.ts.map +1 -1
- package/dist/voice/turn_config/interruption.js.map +1 -1
- package/dist/voice/turn_config/utils.cjs +95 -35
- package/dist/voice/turn_config/utils.cjs.map +1 -1
- package/dist/voice/turn_config/utils.d.cts +17 -5
- package/dist/voice/turn_config/utils.d.ts +17 -5
- package/dist/voice/turn_config/utils.d.ts.map +1 -1
- package/dist/voice/turn_config/utils.js +93 -35
- package/dist/voice/turn_config/utils.js.map +1 -1
- package/dist/voice/turn_config/utils.test.cjs +83 -41
- package/dist/voice/turn_config/utils.test.cjs.map +1 -1
- package/dist/voice/turn_config/utils.test.js +84 -42
- package/dist/voice/turn_config/utils.test.js.map +1 -1
- package/dist/worker.cjs +6 -29
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +6 -19
- package/dist/worker.js.map +1 -1
- package/package.json +3 -2
- package/src/cli.ts +2 -0
- package/src/constants.ts +1 -0
- package/src/cpu.test.ts +239 -0
- package/src/cpu.ts +173 -0
- package/src/index.ts +13 -15
- package/src/inference/interruption/defaults.ts +1 -1
- package/src/inference/interruption/http_transport.ts +49 -30
- package/src/inference/interruption/interruption_detector.ts +22 -6
- package/src/inference/interruption/interruption_stream.ts +4 -4
- package/src/inference/interruption/types.ts +2 -2
- package/src/inference/interruption/ws_transport.ts +63 -59
- package/src/inference/llm.ts +3 -1
- package/src/inference/stt.test.ts +17 -0
- package/src/inference/stt.ts +22 -14
- package/src/inference/tts.test.ts +12 -0
- package/src/inference/tts.ts +22 -6
- package/src/ipc/job_proc_lazy_main.ts +44 -24
- package/src/job.ts +1 -1
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/llm/index.ts +2 -0
- package/src/stream/deferred_stream.ts +5 -1
- package/src/stt/stt.ts +2 -1
- package/src/utils.ts +20 -0
- package/src/voice/agent.test.ts +208 -1
- package/src/voice/agent.ts +21 -22
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +161 -43
- package/src/voice/agent_session.ts +103 -92
- package/src/voice/audio_recognition.ts +124 -61
- package/src/voice/audio_recognition_span.test.ts +115 -35
- package/src/voice/events.ts +4 -3
- package/src/voice/index.ts +10 -1
- package/src/voice/remote_session.ts +1083 -0
- package/src/voice/report.test.ts +22 -3
- package/src/voice/report.ts +31 -14
- package/src/voice/room_io/room_io.ts +52 -2
- package/src/voice/turn_config/interruption.ts +1 -1
- package/src/voice/turn_config/utils.test.ts +91 -43
- package/src/voice/turn_config/utils.ts +120 -56
- package/src/worker.ts +34 -50
- package/dist/voice/client_events.cjs +0 -554
- package/dist/voice/client_events.cjs.map +0 -1
- package/dist/voice/client_events.d.cts +0 -195
- package/dist/voice/client_events.d.ts +0 -195
- package/dist/voice/client_events.d.ts.map +0 -1
- package/dist/voice/client_events.js +0 -548
- package/dist/voice/client_events.js.map +0 -1
- package/dist/voice/wire_format.cjs +0 -798
- package/dist/voice/wire_format.cjs.map +0 -1
- package/dist/voice/wire_format.d.cts +0 -5503
- package/dist/voice/wire_format.d.ts +0 -5503
- package/dist/voice/wire_format.d.ts.map +0 -1
- package/dist/voice/wire_format.js +0 -728
- package/dist/voice/wire_format.js.map +0 -1
- package/src/voice/client_events.ts +0 -838
- package/src/voice/wire_format.ts +0 -827
|
@@ -41,7 +41,6 @@ import type { VAD } from '../vad.js';
|
|
|
41
41
|
import type { Agent } from './agent.js';
|
|
42
42
|
import { AgentActivity } from './agent_activity.js';
|
|
43
43
|
import type { _TurnDetector } from './audio_recognition.js';
|
|
44
|
-
import { ClientEventsHandler } from './client_events.js';
|
|
45
44
|
import {
|
|
46
45
|
type AgentEvent,
|
|
47
46
|
AgentSessionEventTypes,
|
|
@@ -65,6 +64,7 @@ import {
|
|
|
65
64
|
} from './events.js';
|
|
66
65
|
import { AgentInput, AgentOutput } from './io.js';
|
|
67
66
|
import { RecorderIO } from './recorder_io/index.js';
|
|
67
|
+
import { RoomSessionTransport, SessionHost } from './remote_session.js';
|
|
68
68
|
import {
|
|
69
69
|
DEFAULT_TEXT_INPUT_CALLBACK,
|
|
70
70
|
RoomIO,
|
|
@@ -87,68 +87,40 @@ export interface AgentSessionUsage {
|
|
|
87
87
|
modelUsage: Array<Partial<ModelUsage>>;
|
|
88
88
|
}
|
|
89
89
|
|
|
90
|
-
export interface
|
|
90
|
+
export interface InternalSessionOptions<UserData> extends AgentSessionOptions<UserData> {
|
|
91
|
+
turnHandling: InternalTurnHandlingOptions;
|
|
92
|
+
useTtsAlignedTranscript: boolean;
|
|
91
93
|
maxToolSteps: number;
|
|
92
|
-
/**
|
|
93
|
-
* Whether to speculatively begin LLM and TTS requests before an end-of-turn is detected.
|
|
94
|
-
* When `true`, the agent sends inference calls as soon as a user transcript is received rather
|
|
95
|
-
* than waiting for a definitive turn boundary. This can reduce response latency by overlapping
|
|
96
|
-
* model inference with user audio, but may incur extra compute if the user interrupts or
|
|
97
|
-
* revises mid-utterance.
|
|
98
|
-
* @defaultValue false
|
|
99
|
-
*/
|
|
100
|
-
preemptiveGeneration: boolean;
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* If set, set the user state as "away" after this amount of time after user and agent are
|
|
104
|
-
* silent. Set to `null` to disable.
|
|
105
|
-
* @defaultValue 15.0
|
|
106
|
-
*/
|
|
107
94
|
userAwayTimeout: number | null;
|
|
108
|
-
|
|
109
|
-
/**
|
|
110
|
-
* Duration in milliseconds for AEC (Acoustic Echo Cancellation) warmup, during which
|
|
111
|
-
* interruptions from audio activity are suppressed. Set to `null` to disable.
|
|
112
|
-
* @defaultValue 3000
|
|
113
|
-
*/
|
|
114
|
-
aecWarmupDuration: number | null;
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Configuration for turn handling.
|
|
118
|
-
*/
|
|
119
|
-
turnHandling: Partial<TurnHandlingOptions>;
|
|
120
|
-
|
|
121
|
-
useTtsAlignedTranscript: boolean;
|
|
122
|
-
|
|
123
|
-
/** @deprecated Use {@link SessionOptions.turnHandling}.interruption.mode instead. */
|
|
124
|
-
allowInterruptions?: boolean;
|
|
125
|
-
/** @deprecated Use {@link SessionOptions.turnHandling}.interruption.discardAudioIfUninterruptible instead. */
|
|
126
|
-
discardAudioIfUninterruptible?: boolean;
|
|
127
|
-
/** @deprecated Use {@link SessionOptions.turnHandling}.interruption.minDuration instead. */
|
|
128
|
-
minInterruptionDuration?: number;
|
|
129
|
-
/** @deprecated Use {@link SessionOptions.turnHandling}.interruption.minWords instead. */
|
|
130
|
-
minInterruptionWords?: number;
|
|
131
|
-
/** @deprecated Use {@link SessionOptions.turnHandling}.endpointing.minDelay instead. */
|
|
132
|
-
minEndpointingDelay?: number;
|
|
133
|
-
/** @deprecated Use {@link SessionOptions.turnHandling}.endpointing.maxDelay instead. */
|
|
134
|
-
maxEndpointingDelay?: number;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
export interface InternalSessionOptions extends SessionOptions {
|
|
138
|
-
turnHandling: InternalTurnHandlingOptions;
|
|
139
95
|
}
|
|
140
96
|
|
|
141
|
-
export const
|
|
97
|
+
export const defaultAgentSessionOptions = {
|
|
142
98
|
maxToolSteps: 3,
|
|
143
|
-
preemptiveGeneration:
|
|
99
|
+
preemptiveGeneration: true,
|
|
144
100
|
userAwayTimeout: 15.0,
|
|
145
101
|
aecWarmupDuration: 3000,
|
|
146
102
|
turnHandling: {},
|
|
147
103
|
useTtsAlignedTranscript: true,
|
|
148
|
-
} as const satisfies
|
|
104
|
+
} as const satisfies AgentSessionOptions;
|
|
149
105
|
|
|
150
|
-
/** @deprecated {@link VoiceOptions} has been
|
|
151
|
-
export type VoiceOptions =
|
|
106
|
+
/** @deprecated {@link VoiceOptions} has been flattened onto to {@link AgentSessionOptions} */
|
|
107
|
+
export type VoiceOptions = {
|
|
108
|
+
maxToolSteps: number;
|
|
109
|
+
preemptiveGeneration: boolean;
|
|
110
|
+
userAwayTimeout?: number | null;
|
|
111
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.mode instead. */
|
|
112
|
+
allowInterruptions?: boolean;
|
|
113
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.discardAudioIfUninterruptible instead. */
|
|
114
|
+
discardAudioIfUninterruptible?: boolean;
|
|
115
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.minDuration instead. */
|
|
116
|
+
minInterruptionDuration?: number;
|
|
117
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.minWords instead. */
|
|
118
|
+
minInterruptionWords?: number;
|
|
119
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.endpointing.minDelay instead. */
|
|
120
|
+
minEndpointingDelay?: number;
|
|
121
|
+
/** @deprecated Use {@link AgentSessionOptions.turnHandling}.endpointing.maxDelay instead. */
|
|
122
|
+
maxEndpointingDelay?: number;
|
|
123
|
+
};
|
|
152
124
|
|
|
153
125
|
export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
|
|
154
126
|
|
|
@@ -162,7 +134,7 @@ export type AgentSessionCallbacks = {
|
|
|
162
134
|
[AgentSessionEventTypes.SpeechCreated]: (ev: SpeechCreatedEvent) => void;
|
|
163
135
|
[AgentSessionEventTypes.Error]: (ev: ErrorEvent) => void;
|
|
164
136
|
[AgentSessionEventTypes.Close]: (ev: CloseEvent) => void;
|
|
165
|
-
[AgentSessionEventTypes.
|
|
137
|
+
[AgentSessionEventTypes.OverlappingSpeech]: (ev: OverlappingSpeechEvent) => void;
|
|
166
138
|
};
|
|
167
139
|
|
|
168
140
|
export type AgentSessionOptions<UserData = UnknownUserData> = {
|
|
@@ -171,13 +143,44 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
|
|
|
171
143
|
llm?: LLM | RealtimeModel | LLMModels;
|
|
172
144
|
tts?: TTS | TTSModelString;
|
|
173
145
|
userData?: UserData;
|
|
174
|
-
options?: Partial<SessionOptions>;
|
|
175
146
|
connOptions?: SessionConnectOptions;
|
|
176
147
|
|
|
177
|
-
/** @deprecated use
|
|
148
|
+
/** @deprecated use turnHandling.turnDetection instead */
|
|
178
149
|
turnDetection?: TurnDetectionMode;
|
|
179
|
-
/** @deprecated use
|
|
150
|
+
/** @deprecated use top-level SessionOptions fields instead */
|
|
180
151
|
voiceOptions?: Partial<VoiceOptions>;
|
|
152
|
+
|
|
153
|
+
maxToolSteps?: number;
|
|
154
|
+
/**
|
|
155
|
+
* Whether to speculatively begin LLM and TTS requests before an end-of-turn is detected.
|
|
156
|
+
* When `true`, the agent sends inference calls as soon as a user transcript is received rather
|
|
157
|
+
* than waiting for a definitive turn boundary. This can reduce response latency by overlapping
|
|
158
|
+
* model inference with user audio, but may incur extra compute if the user interrupts or
|
|
159
|
+
* revises mid-utterance.
|
|
160
|
+
* @defaultValue true
|
|
161
|
+
*/
|
|
162
|
+
preemptiveGeneration?: boolean;
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* If set, set the user state as "away" after this amount of time after user and agent are
|
|
166
|
+
* silent. Set to `null` to disable.
|
|
167
|
+
* @defaultValue 15.0
|
|
168
|
+
*/
|
|
169
|
+
userAwayTimeout?: number | null;
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Duration in milliseconds for AEC (Acoustic Echo Cancellation) warmup, during which
|
|
173
|
+
* interruptions from audio activity are suppressed. Set to `null` to disable.
|
|
174
|
+
* @defaultValue 3000
|
|
175
|
+
*/
|
|
176
|
+
aecWarmupDuration?: number | null;
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Configuration for turn handling.
|
|
180
|
+
*/
|
|
181
|
+
turnHandling?: Partial<TurnHandlingOptions>;
|
|
182
|
+
|
|
183
|
+
useTtsAlignedTranscript?: boolean;
|
|
181
184
|
};
|
|
182
185
|
|
|
183
186
|
type ActivityTransitionOptions = {
|
|
@@ -196,7 +199,11 @@ export class AgentSession<
|
|
|
196
199
|
tts?: TTS;
|
|
197
200
|
turnDetection?: TurnDetectionMode;
|
|
198
201
|
|
|
199
|
-
|
|
202
|
+
/** @deprecated use {@link sessionOptions } instead */
|
|
203
|
+
readonly options: VoiceOptions;
|
|
204
|
+
|
|
205
|
+
readonly sessionOptions: InternalSessionOptions<UserData>;
|
|
206
|
+
|
|
200
207
|
private readonly activityLock = new Mutex();
|
|
201
208
|
|
|
202
209
|
private agent?: Agent;
|
|
@@ -204,7 +211,7 @@ export class AgentSession<
|
|
|
204
211
|
private nextActivity?: AgentActivity;
|
|
205
212
|
private updateActivityTask?: Task<void>;
|
|
206
213
|
private started = false;
|
|
207
|
-
private
|
|
214
|
+
private sessionHost?: SessionHost;
|
|
208
215
|
|
|
209
216
|
private _chatCtx: ChatContext;
|
|
210
217
|
private _userData: UserData | undefined;
|
|
@@ -225,14 +232,14 @@ export class AgentSession<
|
|
|
225
232
|
// Unrecoverable error counts, reset after agent speaking
|
|
226
233
|
private llmErrorCounts = 0;
|
|
227
234
|
private ttsErrorCounts = 0;
|
|
228
|
-
private interruptionDetectionErrorCounts = 0;
|
|
229
235
|
|
|
230
236
|
private sessionSpan?: Span;
|
|
231
237
|
private agentSpeakingSpan?: Span;
|
|
232
238
|
|
|
233
239
|
private _interruptionDetection?: InterruptionOptions['mode'];
|
|
234
240
|
|
|
235
|
-
|
|
241
|
+
/** @internal */
|
|
242
|
+
_usageCollector: ModelUsageCollector = new ModelUsageCollector();
|
|
236
243
|
|
|
237
244
|
/** @internal */
|
|
238
245
|
_roomIO?: RoomIO;
|
|
@@ -266,9 +273,10 @@ export class AgentSession<
|
|
|
266
273
|
constructor(options: AgentSessionOptions<UserData>) {
|
|
267
274
|
super();
|
|
268
275
|
|
|
269
|
-
const opts =
|
|
276
|
+
const { agentSessionOptions: opts, legacyVoiceOptions } =
|
|
277
|
+
migrateLegacyOptions<UserData>(options);
|
|
270
278
|
|
|
271
|
-
const { vad, stt, llm, tts, userData, connOptions,
|
|
279
|
+
const { vad, stt, llm, tts, userData, connOptions, ...resolvedSessionOptions } = opts;
|
|
272
280
|
// Merge user-provided connOptions with defaults
|
|
273
281
|
this._connOptions = {
|
|
274
282
|
sttConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.sttConnOptions },
|
|
@@ -299,8 +307,8 @@ export class AgentSession<
|
|
|
299
307
|
this.tts = tts;
|
|
300
308
|
}
|
|
301
309
|
|
|
302
|
-
this.turnDetection =
|
|
303
|
-
this._interruptionDetection =
|
|
310
|
+
this.turnDetection = resolvedSessionOptions.turnHandling.turnDetection;
|
|
311
|
+
this._interruptionDetection = resolvedSessionOptions.turnHandling.interruption?.mode;
|
|
304
312
|
this._userData = userData;
|
|
305
313
|
|
|
306
314
|
// configurable IO
|
|
@@ -309,8 +317,9 @@ export class AgentSession<
|
|
|
309
317
|
|
|
310
318
|
// This is the "global" chat context, it holds the entire conversation history
|
|
311
319
|
this._chatCtx = ChatContext.empty();
|
|
312
|
-
this.
|
|
313
|
-
this.
|
|
320
|
+
this.sessionOptions = resolvedSessionOptions;
|
|
321
|
+
this.options = legacyVoiceOptions;
|
|
322
|
+
this._aecWarmupRemaining = this.sessionOptions.aecWarmupDuration ?? 0;
|
|
314
323
|
|
|
315
324
|
this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
|
|
316
325
|
this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
@@ -322,9 +331,6 @@ export class AgentSession<
|
|
|
322
331
|
): boolean {
|
|
323
332
|
const eventData = args[0] as AgentEvent;
|
|
324
333
|
this._recordedEvents.push(eventData);
|
|
325
|
-
if (event === AgentSessionEventTypes.MetricsCollected) {
|
|
326
|
-
this._usageCollector.collect((eventData as MetricsCollectedEvent).metrics);
|
|
327
|
-
}
|
|
328
334
|
return super.emit(event, ...args);
|
|
329
335
|
}
|
|
330
336
|
|
|
@@ -366,7 +372,7 @@ export class AgentSession<
|
|
|
366
372
|
}
|
|
367
373
|
|
|
368
374
|
get useTtsAlignedTranscript(): boolean {
|
|
369
|
-
return this.
|
|
375
|
+
return this.sessionOptions.useTtsAlignedTranscript;
|
|
370
376
|
}
|
|
371
377
|
|
|
372
378
|
set userData(value: UserData) {
|
|
@@ -422,9 +428,11 @@ export class AgentSession<
|
|
|
422
428
|
|
|
423
429
|
this._roomIO.start();
|
|
424
430
|
|
|
425
|
-
|
|
431
|
+
const transport = new RoomSessionTransport(room, this._roomIO);
|
|
432
|
+
this.sessionHost = new SessionHost(transport);
|
|
433
|
+
this.sessionHost.registerSession(this);
|
|
426
434
|
if (inputOptions?.textEnabled !== false) {
|
|
427
|
-
this.
|
|
435
|
+
this.sessionHost.registerTextInput(
|
|
428
436
|
inputOptions?.textInputCallback ?? DEFAULT_TEXT_INPUT_CALLBACK,
|
|
429
437
|
);
|
|
430
438
|
}
|
|
@@ -470,8 +478,8 @@ export class AgentSession<
|
|
|
470
478
|
|
|
471
479
|
await Promise.allSettled(tasks);
|
|
472
480
|
|
|
473
|
-
if (this.
|
|
474
|
-
await this.
|
|
481
|
+
if (this.sessionHost) {
|
|
482
|
+
await this.sessionHost.start();
|
|
475
483
|
}
|
|
476
484
|
|
|
477
485
|
// Log used IO configuration
|
|
@@ -877,7 +885,9 @@ export class AgentSession<
|
|
|
877
885
|
if (this.closingTask) {
|
|
878
886
|
return;
|
|
879
887
|
}
|
|
880
|
-
this.closeImpl(reason, error, drain)
|
|
888
|
+
this.closingTask = this.closeImpl(reason, error, drain).finally(() => {
|
|
889
|
+
this.closingTask = null;
|
|
890
|
+
});
|
|
881
891
|
}
|
|
882
892
|
|
|
883
893
|
/** @internal */
|
|
@@ -900,13 +910,11 @@ export class AgentSession<
|
|
|
900
910
|
return;
|
|
901
911
|
}
|
|
902
912
|
} else if (error.type === 'interruption_detection_error') {
|
|
903
|
-
this.
|
|
904
|
-
|
|
905
|
-
return;
|
|
906
|
-
}
|
|
913
|
+
this.logger.error(error.toString());
|
|
914
|
+
return;
|
|
907
915
|
}
|
|
908
916
|
|
|
909
|
-
this.logger.error(error, 'AgentSession is closing due to unrecoverable error');
|
|
917
|
+
this.logger.error(error, 'AgentSession is closing due to an unrecoverable error');
|
|
910
918
|
|
|
911
919
|
this.closingTask = (async () => {
|
|
912
920
|
await this.closeImpl(CloseReason.ERROR, error);
|
|
@@ -935,7 +943,6 @@ export class AgentSession<
|
|
|
935
943
|
if (state === 'speaking') {
|
|
936
944
|
this.llmErrorCounts = 0;
|
|
937
945
|
this.ttsErrorCounts = 0;
|
|
938
|
-
this.interruptionDetectionErrorCounts = 0;
|
|
939
946
|
|
|
940
947
|
if (this.agentSpeakingSpan === undefined) {
|
|
941
948
|
this.agentSpeakingSpan = tracer.startSpan({
|
|
@@ -980,7 +987,10 @@ export class AgentSession<
|
|
|
980
987
|
}
|
|
981
988
|
|
|
982
989
|
/** @internal */
|
|
983
|
-
_updateUserState(
|
|
990
|
+
_updateUserState(
|
|
991
|
+
state: UserState,
|
|
992
|
+
options?: { lastSpeakingTime?: number; otelContext?: Context },
|
|
993
|
+
) {
|
|
984
994
|
if (this._userState === state) {
|
|
985
995
|
return;
|
|
986
996
|
}
|
|
@@ -988,8 +998,8 @@ export class AgentSession<
|
|
|
988
998
|
if (state === 'speaking' && this._userSpeakingSpan === undefined) {
|
|
989
999
|
this._userSpeakingSpan = tracer.startSpan({
|
|
990
1000
|
name: 'user_speaking',
|
|
991
|
-
context: this.rootSpanContext,
|
|
992
|
-
startTime: lastSpeakingTime,
|
|
1001
|
+
context: options?.otelContext ?? this.rootSpanContext,
|
|
1002
|
+
startTime: options?.lastSpeakingTime,
|
|
993
1003
|
});
|
|
994
1004
|
|
|
995
1005
|
const linked = this._roomIO?.linkedParticipant;
|
|
@@ -997,7 +1007,7 @@ export class AgentSession<
|
|
|
997
1007
|
setParticipantSpanAttributes(this._userSpeakingSpan, linked);
|
|
998
1008
|
}
|
|
999
1009
|
} else if (this._userSpeakingSpan !== undefined) {
|
|
1000
|
-
this._userSpeakingSpan.end(lastSpeakingTime);
|
|
1010
|
+
this._userSpeakingSpan.end(options?.lastSpeakingTime);
|
|
1001
1011
|
this._userSpeakingSpan = undefined;
|
|
1002
1012
|
}
|
|
1003
1013
|
|
|
@@ -1035,7 +1045,10 @@ export class AgentSession<
|
|
|
1035
1045
|
private _setUserAwayTimer(): void {
|
|
1036
1046
|
this._cancelUserAwayTimer();
|
|
1037
1047
|
|
|
1038
|
-
if (
|
|
1048
|
+
if (
|
|
1049
|
+
this.sessionOptions.userAwayTimeout === null ||
|
|
1050
|
+
this.sessionOptions.userAwayTimeout === undefined
|
|
1051
|
+
) {
|
|
1039
1052
|
return;
|
|
1040
1053
|
}
|
|
1041
1054
|
|
|
@@ -1046,7 +1059,7 @@ export class AgentSession<
|
|
|
1046
1059
|
this.userAwayTimer = setTimeout(() => {
|
|
1047
1060
|
this.logger.debug('User away timeout triggered');
|
|
1048
1061
|
this._updateUserState('away');
|
|
1049
|
-
}, this.
|
|
1062
|
+
}, this.sessionOptions.userAwayTimeout * 1000);
|
|
1050
1063
|
}
|
|
1051
1064
|
|
|
1052
1065
|
private _cancelUserAwayTimer(): void {
|
|
@@ -1120,7 +1133,6 @@ export class AgentSession<
|
|
|
1120
1133
|
try {
|
|
1121
1134
|
await this.activity.interrupt({ force: true }).await;
|
|
1122
1135
|
} catch (error) {
|
|
1123
|
-
// Uninterruptible speech can throw during forced interruption.
|
|
1124
1136
|
this.logger.warn({ error }, 'Error interrupting activity');
|
|
1125
1137
|
}
|
|
1126
1138
|
}
|
|
@@ -1150,8 +1162,8 @@ export class AgentSession<
|
|
|
1150
1162
|
this.output.audio = null;
|
|
1151
1163
|
this.output.transcription = null;
|
|
1152
1164
|
|
|
1153
|
-
await this.
|
|
1154
|
-
this.
|
|
1165
|
+
await this.sessionHost?.close();
|
|
1166
|
+
this.sessionHost = undefined;
|
|
1155
1167
|
|
|
1156
1168
|
await this._roomIO?.close();
|
|
1157
1169
|
this._roomIO = undefined;
|
|
@@ -1183,7 +1195,6 @@ export class AgentSession<
|
|
|
1183
1195
|
this.rootSpanContext = undefined;
|
|
1184
1196
|
this.llmErrorCounts = 0;
|
|
1185
1197
|
this.ttsErrorCounts = 0;
|
|
1186
|
-
this.interruptionDetectionErrorCounts = 0;
|
|
1187
1198
|
|
|
1188
1199
|
this.logger.info({ reason, error }, 'AgentSession closed');
|
|
1189
1200
|
}
|
|
@@ -12,6 +12,8 @@ import {
|
|
|
12
12
|
} from '@opentelemetry/api';
|
|
13
13
|
import type { WritableStreamDefaultWriter } from 'node:stream/web';
|
|
14
14
|
import { ReadableStream } from 'node:stream/web';
|
|
15
|
+
import { isAPIError } from '../_exceptions.js';
|
|
16
|
+
import { apiConnectDefaults, intervalForRetry } from '../inference/interruption/defaults.js';
|
|
15
17
|
import { InterruptionDetectionError } from '../inference/interruption/errors.js';
|
|
16
18
|
import type { AdaptiveInterruptionDetector } from '../inference/interruption/interruption_detector.js';
|
|
17
19
|
import { InterruptionStreamSentinel } from '../inference/interruption/interruption_stream.js';
|
|
@@ -19,6 +21,7 @@ import {
|
|
|
19
21
|
type InterruptionSentinel,
|
|
20
22
|
type OverlappingSpeechEvent,
|
|
21
23
|
} from '../inference/interruption/types.js';
|
|
24
|
+
import type { LanguageCode } from '../language.js';
|
|
22
25
|
import { type ChatContext } from '../llm/chat_context.js';
|
|
23
26
|
import { log } from '../log.js';
|
|
24
27
|
import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';
|
|
@@ -71,8 +74,8 @@ export interface _TurnDetector {
|
|
|
71
74
|
readonly model: string;
|
|
72
75
|
/** The provider name for this turn detector. */
|
|
73
76
|
readonly provider: string;
|
|
74
|
-
unlikelyThreshold: (language?:
|
|
75
|
-
supportsLanguage: (language?:
|
|
77
|
+
unlikelyThreshold: (language?: LanguageCode) => Promise<number | undefined>;
|
|
78
|
+
supportsLanguage: (language?: LanguageCode) => Promise<boolean>;
|
|
76
79
|
predictEndOfTurn(chatCtx: ChatContext, timeout?: number): Promise<number>;
|
|
77
80
|
}
|
|
78
81
|
|
|
@@ -121,7 +124,7 @@ export class AudioRecognition {
|
|
|
121
124
|
private turnDetectionMode?: TurnDetectionMode;
|
|
122
125
|
private minEndpointingDelay: number;
|
|
123
126
|
private maxEndpointingDelay: number;
|
|
124
|
-
private lastLanguage?:
|
|
127
|
+
private lastLanguage?: LanguageCode;
|
|
125
128
|
private rootSpanContext?: Context;
|
|
126
129
|
private sttModel?: string;
|
|
127
130
|
private sttProvider?: string;
|
|
@@ -249,6 +252,15 @@ export class AudioRecognition {
|
|
|
249
252
|
await this.interruptionTask?.cancelAndWait();
|
|
250
253
|
}
|
|
251
254
|
|
|
255
|
+
async disableInterruptionDetection(): Promise<void> {
|
|
256
|
+
this.isInterruptionEnabled = false;
|
|
257
|
+
this.interruptionDetection = undefined;
|
|
258
|
+
await this.interruptionTask?.cancelAndWait();
|
|
259
|
+
this.interruptionTask = undefined;
|
|
260
|
+
await this.interruptionStreamChannel?.close();
|
|
261
|
+
this.interruptionStreamChannel = undefined;
|
|
262
|
+
}
|
|
263
|
+
|
|
252
264
|
async onStartOfAgentSpeech() {
|
|
253
265
|
this.isAgentSpeaking = true;
|
|
254
266
|
return this.trySendInterruptionSentinel(InterruptionStreamSentinel.agentSpeechStarted());
|
|
@@ -1000,77 +1012,128 @@ export class AudioRecognition {
|
|
|
1000
1012
|
) {
|
|
1001
1013
|
if (!interruptionDetection || !this.interruptionStreamChannel) return;
|
|
1002
1014
|
|
|
1003
|
-
|
|
1004
|
-
const
|
|
1015
|
+
let numRetries = 0;
|
|
1016
|
+
const maxRetries = apiConnectDefaults.maxRetries;
|
|
1005
1017
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1018
|
+
while (!signal.aborted) {
|
|
1019
|
+
const stream = interruptionDetection.createStream();
|
|
1020
|
+
const eventReader = stream.stream().getReader();
|
|
1021
|
+
|
|
1022
|
+
const cleanup = async () => {
|
|
1023
|
+
try {
|
|
1024
|
+
signal.removeEventListener('abort', cleanup);
|
|
1025
|
+
eventReader.releaseLock();
|
|
1026
|
+
await stream.close();
|
|
1027
|
+
} catch (e) {
|
|
1028
|
+
this.logger.debug('createInterruptionTask: error during cleanup:', e);
|
|
1029
|
+
}
|
|
1030
|
+
};
|
|
1031
|
+
|
|
1032
|
+
signal.addEventListener('abort', cleanup, { once: true });
|
|
1033
|
+
|
|
1034
|
+
let forwardTask: Promise<void> | undefined;
|
|
1015
1035
|
|
|
1016
|
-
// Forward input frames/sentinels to the interruption stream
|
|
1017
|
-
const forwardTask = (async () => {
|
|
1018
1036
|
try {
|
|
1037
|
+
// Unlike Python where _agent_speech_started lives on `self` and survives retries,
|
|
1038
|
+
// JS creates a fresh InterruptionStreamBase per retry with agentSpeechStarted = false.
|
|
1039
|
+
// Re-inject the sentinel so the new stream knows the agent is mid-speech.
|
|
1040
|
+
if (numRetries > 0 && this.isAgentSpeaking) {
|
|
1041
|
+
await stream.pushFrame(InterruptionStreamSentinel.agentSpeechStarted());
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
forwardTask = (async () => {
|
|
1045
|
+
const inputReader = this.interruptionStreamChannel!.stream().getReader();
|
|
1046
|
+
const abortPromise = waitForAbort(signal);
|
|
1047
|
+
|
|
1048
|
+
try {
|
|
1049
|
+
while (!signal.aborted) {
|
|
1050
|
+
const res = await Promise.race([inputReader.read(), abortPromise]);
|
|
1051
|
+
if (!res) break;
|
|
1052
|
+
|
|
1053
|
+
const { value, done } = res;
|
|
1054
|
+
if (done) break;
|
|
1055
|
+
|
|
1056
|
+
if (value instanceof AudioFrame) {
|
|
1057
|
+
const frameDurationMs = (value.samplesPerChannel / value.sampleRate) * 1000;
|
|
1058
|
+
this._inputStartedAt ??= Date.now() - frameDurationMs;
|
|
1059
|
+
} else {
|
|
1060
|
+
this._inputStartedAt ??= Date.now();
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
await stream.pushFrame(value);
|
|
1064
|
+
}
|
|
1065
|
+
} finally {
|
|
1066
|
+
inputReader.releaseLock();
|
|
1067
|
+
}
|
|
1068
|
+
})();
|
|
1069
|
+
|
|
1019
1070
|
const abortPromise = waitForAbort(signal);
|
|
1071
|
+
|
|
1020
1072
|
while (!signal.aborted) {
|
|
1021
|
-
const res = await Promise.race([
|
|
1073
|
+
const res = await Promise.race([eventReader.read(), abortPromise]);
|
|
1022
1074
|
if (!res) break;
|
|
1023
|
-
const {
|
|
1075
|
+
const { done, value: ev } = res;
|
|
1024
1076
|
if (done) break;
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1077
|
+
this.onOverlapSpeechEvent(ev);
|
|
1078
|
+
}
|
|
1079
|
+
break;
|
|
1080
|
+
} catch (e) {
|
|
1081
|
+
if (signal.aborted) break;
|
|
1082
|
+
|
|
1083
|
+
if (isAPIError(e)) {
|
|
1084
|
+
if (maxRetries === 0 || !e.retryable) {
|
|
1085
|
+
interruptionDetection.emitError(
|
|
1086
|
+
new InterruptionDetectionError(
|
|
1087
|
+
e.message,
|
|
1088
|
+
Date.now(),
|
|
1089
|
+
interruptionDetection.label,
|
|
1090
|
+
false,
|
|
1091
|
+
),
|
|
1092
|
+
);
|
|
1093
|
+
break;
|
|
1094
|
+
} else if (numRetries >= maxRetries) {
|
|
1095
|
+
interruptionDetection.emitError(
|
|
1096
|
+
new InterruptionDetectionError(
|
|
1097
|
+
`failed to detect interruption after ${numRetries} attempts`,
|
|
1098
|
+
Date.now(),
|
|
1099
|
+
interruptionDetection.label,
|
|
1100
|
+
false,
|
|
1101
|
+
),
|
|
1102
|
+
);
|
|
1103
|
+
break;
|
|
1029
1104
|
} else {
|
|
1030
|
-
|
|
1105
|
+
const retryInterval = intervalForRetry(numRetries);
|
|
1106
|
+
interruptionDetection.emitError(
|
|
1107
|
+
new InterruptionDetectionError(
|
|
1108
|
+
e.message,
|
|
1109
|
+
Date.now(),
|
|
1110
|
+
interruptionDetection.label,
|
|
1111
|
+
true,
|
|
1112
|
+
),
|
|
1113
|
+
);
|
|
1114
|
+
this.logger.warn(
|
|
1115
|
+
{ model: interruptionDetection.label, attempt: numRetries },
|
|
1116
|
+
`failed to detect interruption, retrying in ${retryInterval}ms`,
|
|
1117
|
+
);
|
|
1118
|
+
numRetries++;
|
|
1119
|
+
await delay(retryInterval, { signal });
|
|
1031
1120
|
}
|
|
1032
|
-
|
|
1121
|
+
} else {
|
|
1122
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1123
|
+
interruptionDetection.emitError(
|
|
1124
|
+
new InterruptionDetectionError(msg, Date.now(), interruptionDetection.label, false),
|
|
1125
|
+
);
|
|
1126
|
+
this.logger.error(e, 'Error in interruption task');
|
|
1127
|
+
break;
|
|
1033
1128
|
}
|
|
1034
1129
|
} finally {
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
// Read output events from the interruption stream
|
|
1040
|
-
const eventReader = stream.stream().getReader();
|
|
1041
|
-
const abortHandler = async () => {
|
|
1042
|
-
await cleanup();
|
|
1043
|
-
};
|
|
1044
|
-
signal.addEventListener('abort', abortHandler);
|
|
1045
|
-
|
|
1046
|
-
try {
|
|
1047
|
-
const abortPromise = waitForAbort(signal);
|
|
1048
|
-
|
|
1049
|
-
while (!signal.aborted) {
|
|
1050
|
-
const res = await Promise.race([eventReader.read(), abortPromise]);
|
|
1051
|
-
if (!res) break;
|
|
1052
|
-
const { done, value: ev } = res;
|
|
1053
|
-
if (done) break;
|
|
1054
|
-
this.onOverlapSpeechEvent(ev);
|
|
1055
|
-
}
|
|
1056
|
-
} catch (e) {
|
|
1057
|
-
if (!signal.aborted) {
|
|
1058
|
-
const cause = e instanceof Error ? e : new Error(String(e));
|
|
1059
|
-
interruptionDetection.emitError(
|
|
1060
|
-
new InterruptionDetectionError(
|
|
1061
|
-
cause.message,
|
|
1062
|
-
Date.now(),
|
|
1063
|
-
interruptionDetection.label,
|
|
1064
|
-
false,
|
|
1065
|
-
),
|
|
1066
|
-
);
|
|
1067
|
-
this.logger.error(e, 'Error in interruption task');
|
|
1130
|
+
await cleanup();
|
|
1131
|
+
await forwardTask?.catch((e) => {
|
|
1132
|
+
this.logger.debug({ err: e }, 'interruption task exited with error');
|
|
1133
|
+
});
|
|
1068
1134
|
}
|
|
1069
|
-
} finally {
|
|
1070
|
-
await cleanup();
|
|
1071
|
-
await forwardTask;
|
|
1072
|
-
this.logger.debug('Interruption task closed');
|
|
1073
1135
|
}
|
|
1136
|
+
this.logger.debug('Interruption task closed');
|
|
1074
1137
|
}
|
|
1075
1138
|
|
|
1076
1139
|
setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {
|