@livekit/agents 1.0.44 → 1.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/dist/ipc/supervised_proc.cjs +1 -1
  2. package/dist/ipc/supervised_proc.cjs.map +1 -1
  3. package/dist/ipc/supervised_proc.js +1 -1
  4. package/dist/ipc/supervised_proc.js.map +1 -1
  5. package/dist/llm/llm.cjs +1 -1
  6. package/dist/llm/llm.cjs.map +1 -1
  7. package/dist/llm/llm.js +1 -1
  8. package/dist/llm/llm.js.map +1 -1
  9. package/dist/log.cjs +13 -9
  10. package/dist/log.cjs.map +1 -1
  11. package/dist/log.d.cts +1 -1
  12. package/dist/log.d.ts +1 -1
  13. package/dist/log.d.ts.map +1 -1
  14. package/dist/log.js +13 -9
  15. package/dist/log.js.map +1 -1
  16. package/dist/stream/index.cjs +3 -0
  17. package/dist/stream/index.cjs.map +1 -1
  18. package/dist/stream/index.d.cts +1 -0
  19. package/dist/stream/index.d.ts +1 -0
  20. package/dist/stream/index.d.ts.map +1 -1
  21. package/dist/stream/index.js +2 -0
  22. package/dist/stream/index.js.map +1 -1
  23. package/dist/stream/multi_input_stream.cjs +139 -0
  24. package/dist/stream/multi_input_stream.cjs.map +1 -0
  25. package/dist/stream/multi_input_stream.d.cts +55 -0
  26. package/dist/stream/multi_input_stream.d.ts +55 -0
  27. package/dist/stream/multi_input_stream.d.ts.map +1 -0
  28. package/dist/stream/multi_input_stream.js +115 -0
  29. package/dist/stream/multi_input_stream.js.map +1 -0
  30. package/dist/stream/multi_input_stream.test.cjs +340 -0
  31. package/dist/stream/multi_input_stream.test.cjs.map +1 -0
  32. package/dist/stream/multi_input_stream.test.js +339 -0
  33. package/dist/stream/multi_input_stream.test.js.map +1 -0
  34. package/dist/stt/stt.cjs +2 -2
  35. package/dist/stt/stt.cjs.map +1 -1
  36. package/dist/stt/stt.js +2 -2
  37. package/dist/stt/stt.js.map +1 -1
  38. package/dist/telemetry/trace_types.cjs +42 -0
  39. package/dist/telemetry/trace_types.cjs.map +1 -1
  40. package/dist/telemetry/trace_types.d.cts +14 -0
  41. package/dist/telemetry/trace_types.d.ts +14 -0
  42. package/dist/telemetry/trace_types.d.ts.map +1 -1
  43. package/dist/telemetry/trace_types.js +28 -0
  44. package/dist/telemetry/trace_types.js.map +1 -1
  45. package/dist/tts/fallback_adapter.cjs +466 -0
  46. package/dist/tts/fallback_adapter.cjs.map +1 -0
  47. package/dist/tts/fallback_adapter.d.cts +110 -0
  48. package/dist/tts/fallback_adapter.d.ts +110 -0
  49. package/dist/tts/fallback_adapter.d.ts.map +1 -0
  50. package/dist/tts/fallback_adapter.js +442 -0
  51. package/dist/tts/fallback_adapter.js.map +1 -0
  52. package/dist/tts/index.cjs +3 -0
  53. package/dist/tts/index.cjs.map +1 -1
  54. package/dist/tts/index.d.cts +1 -0
  55. package/dist/tts/index.d.ts +1 -0
  56. package/dist/tts/index.d.ts.map +1 -1
  57. package/dist/tts/index.js +2 -0
  58. package/dist/tts/index.js.map +1 -1
  59. package/dist/tts/tts.cjs +2 -2
  60. package/dist/tts/tts.cjs.map +1 -1
  61. package/dist/tts/tts.js +2 -2
  62. package/dist/tts/tts.js.map +1 -1
  63. package/dist/utils.cjs +13 -0
  64. package/dist/utils.cjs.map +1 -1
  65. package/dist/utils.d.cts +1 -0
  66. package/dist/utils.d.ts +1 -0
  67. package/dist/utils.d.ts.map +1 -1
  68. package/dist/utils.js +13 -0
  69. package/dist/utils.js.map +1 -1
  70. package/dist/vad.cjs +11 -10
  71. package/dist/vad.cjs.map +1 -1
  72. package/dist/vad.d.cts +5 -3
  73. package/dist/vad.d.ts +5 -3
  74. package/dist/vad.d.ts.map +1 -1
  75. package/dist/vad.js +11 -10
  76. package/dist/vad.js.map +1 -1
  77. package/dist/voice/agent_activity.cjs +35 -10
  78. package/dist/voice/agent_activity.cjs.map +1 -1
  79. package/dist/voice/agent_activity.d.cts +1 -0
  80. package/dist/voice/agent_activity.d.ts +1 -0
  81. package/dist/voice/agent_activity.d.ts.map +1 -1
  82. package/dist/voice/agent_activity.js +35 -10
  83. package/dist/voice/agent_activity.js.map +1 -1
  84. package/dist/voice/agent_session.cjs +19 -7
  85. package/dist/voice/agent_session.cjs.map +1 -1
  86. package/dist/voice/agent_session.d.cts +3 -2
  87. package/dist/voice/agent_session.d.ts +3 -2
  88. package/dist/voice/agent_session.d.ts.map +1 -1
  89. package/dist/voice/agent_session.js +19 -7
  90. package/dist/voice/agent_session.js.map +1 -1
  91. package/dist/voice/audio_recognition.cjs +85 -36
  92. package/dist/voice/audio_recognition.cjs.map +1 -1
  93. package/dist/voice/audio_recognition.d.cts +22 -1
  94. package/dist/voice/audio_recognition.d.ts +22 -1
  95. package/dist/voice/audio_recognition.d.ts.map +1 -1
  96. package/dist/voice/audio_recognition.js +89 -36
  97. package/dist/voice/audio_recognition.js.map +1 -1
  98. package/dist/voice/audio_recognition_span.test.cjs +233 -0
  99. package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
  100. package/dist/voice/audio_recognition_span.test.js +232 -0
  101. package/dist/voice/audio_recognition_span.test.js.map +1 -0
  102. package/dist/voice/io.cjs +6 -3
  103. package/dist/voice/io.cjs.map +1 -1
  104. package/dist/voice/io.d.cts +3 -2
  105. package/dist/voice/io.d.ts +3 -2
  106. package/dist/voice/io.d.ts.map +1 -1
  107. package/dist/voice/io.js +6 -3
  108. package/dist/voice/io.js.map +1 -1
  109. package/dist/voice/recorder_io/recorder_io.cjs +3 -1
  110. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
  111. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
  112. package/dist/voice/recorder_io/recorder_io.js +3 -1
  113. package/dist/voice/recorder_io/recorder_io.js.map +1 -1
  114. package/dist/voice/room_io/_input.cjs +23 -20
  115. package/dist/voice/room_io/_input.cjs.map +1 -1
  116. package/dist/voice/room_io/_input.d.cts +2 -2
  117. package/dist/voice/room_io/_input.d.ts +2 -2
  118. package/dist/voice/room_io/_input.d.ts.map +1 -1
  119. package/dist/voice/room_io/_input.js +13 -9
  120. package/dist/voice/room_io/_input.js.map +1 -1
  121. package/dist/voice/room_io/room_io.cjs +9 -0
  122. package/dist/voice/room_io/room_io.cjs.map +1 -1
  123. package/dist/voice/room_io/room_io.d.cts +3 -1
  124. package/dist/voice/room_io/room_io.d.ts +3 -1
  125. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  126. package/dist/voice/room_io/room_io.js +9 -0
  127. package/dist/voice/room_io/room_io.js.map +1 -1
  128. package/dist/voice/utils.cjs +47 -0
  129. package/dist/voice/utils.cjs.map +1 -0
  130. package/dist/voice/utils.d.cts +4 -0
  131. package/dist/voice/utils.d.ts +4 -0
  132. package/dist/voice/utils.d.ts.map +1 -0
  133. package/dist/voice/utils.js +23 -0
  134. package/dist/voice/utils.js.map +1 -0
  135. package/package.json +1 -1
  136. package/src/ipc/supervised_proc.ts +1 -1
  137. package/src/llm/llm.ts +1 -1
  138. package/src/log.ts +22 -11
  139. package/src/stream/index.ts +1 -0
  140. package/src/stream/multi_input_stream.test.ts +540 -0
  141. package/src/stream/multi_input_stream.ts +172 -0
  142. package/src/stt/stt.ts +2 -2
  143. package/src/telemetry/trace_types.ts +18 -0
  144. package/src/tts/fallback_adapter.ts +579 -0
  145. package/src/tts/index.ts +1 -0
  146. package/src/tts/tts.ts +2 -2
  147. package/src/utils.ts +16 -0
  148. package/src/vad.ts +12 -11
  149. package/src/voice/agent_activity.ts +25 -0
  150. package/src/voice/agent_session.ts +17 -11
  151. package/src/voice/audio_recognition.ts +114 -38
  152. package/src/voice/audio_recognition_span.test.ts +261 -0
  153. package/src/voice/io.ts +7 -4
  154. package/src/voice/recorder_io/recorder_io.ts +2 -1
  155. package/src/voice/room_io/_input.ts +16 -10
  156. package/src/voice/room_io/room_io.ts +12 -0
  157. package/src/voice/utils.ts +29 -0
package/src/stt/stt.ts CHANGED
@@ -236,8 +236,8 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
236
236
  // Don't emit error event for recoverable errors during retry loop
237
237
  // to avoid ERR_UNHANDLED_ERROR or premature session termination
238
238
  this.logger.warn(
239
- { tts: this.#stt.label, attempt: i + 1, error },
240
- `failed to recognize speech, retrying in ${retryInterval}s`,
239
+ { stt: this.#stt.label, attempt: i + 1, error },
240
+ `failed to recognize speech, retrying in ${retryInterval}ms`,
241
241
  );
242
242
  }
243
243
 
@@ -20,6 +20,8 @@ export const ATTR_ROOM_NAME = 'lk.room_name';
20
20
  export const ATTR_SESSION_OPTIONS = 'lk.session_options';
21
21
 
22
22
  // assistant turn
23
+ export const ATTR_AGENT_TURN_ID = 'lk.generation_id';
24
+ export const ATTR_AGENT_PARENT_TURN_ID = 'lk.parent_generation_id';
23
25
  export const ATTR_USER_INPUT = 'lk.user_input';
24
26
  export const ATTR_INSTRUCTIONS = 'lk.instructions';
25
27
  export const ATTR_SPEECH_INTERRUPTED = 'lk.interrupted';
@@ -27,10 +29,14 @@ export const ATTR_SPEECH_INTERRUPTED = 'lk.interrupted';
27
29
  // llm node
28
30
  export const ATTR_CHAT_CTX = 'lk.chat_ctx';
29
31
  export const ATTR_FUNCTION_TOOLS = 'lk.function_tools';
32
+ export const ATTR_PROVIDER_TOOLS = 'lk.provider_tools';
33
+ export const ATTR_TOOL_SETS = 'lk.tool_sets';
30
34
  export const ATTR_RESPONSE_TEXT = 'lk.response.text';
31
35
  export const ATTR_RESPONSE_FUNCTION_CALLS = 'lk.response.function_calls';
36
+ export const ATTR_RESPONSE_TTFT = 'lk.response.ttft';
32
37
 
33
38
  // function tool
39
+ export const ATTR_FUNCTION_TOOL_ID = 'lk.function_tool.id';
34
40
  export const ATTR_FUNCTION_TOOL_NAME = 'lk.function_tool.name';
35
41
  export const ATTR_FUNCTION_TOOL_ARGS = 'lk.function_tool.arguments';
36
42
  export const ATTR_FUNCTION_TOOL_IS_ERROR = 'lk.function_tool.is_error';
@@ -40,6 +46,7 @@ export const ATTR_FUNCTION_TOOL_OUTPUT = 'lk.function_tool.output';
40
46
  export const ATTR_TTS_INPUT_TEXT = 'lk.input_text';
41
47
  export const ATTR_TTS_STREAMING = 'lk.tts.streaming';
42
48
  export const ATTR_TTS_LABEL = 'lk.tts.label';
49
+ export const ATTR_RESPONSE_TTFB = 'lk.response.ttfb';
43
50
 
44
51
  // eou detection
45
52
  export const ATTR_EOU_PROBABILITY = 'lk.eou.probability';
@@ -56,10 +63,14 @@ export const ATTR_LLM_METRICS = 'lk.llm_metrics';
56
63
  export const ATTR_TTS_METRICS = 'lk.tts_metrics';
57
64
  export const ATTR_REALTIME_MODEL_METRICS = 'lk.realtime_model_metrics';
58
65
 
66
+ // latency span attributes
67
+ export const ATTR_E2E_LATENCY = 'lk.e2e_latency';
68
+
59
69
  // OpenTelemetry GenAI attributes
60
70
  // OpenTelemetry specification: https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/
61
71
  export const ATTR_GEN_AI_OPERATION_NAME = 'gen_ai.operation.name';
62
72
  export const ATTR_GEN_AI_REQUEST_MODEL = 'gen_ai.request.model';
73
+ export const ATTR_GEN_AI_PROVIDER_NAME = 'gen_ai.provider.name';
63
74
  export const ATTR_GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens';
64
75
  export const ATTR_GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens';
65
76
 
@@ -86,3 +97,10 @@ export const ATTR_EXCEPTION_MESSAGE = 'exception.message';
86
97
 
87
98
  // Platform-specific attributes
88
99
  export const ATTR_LANGFUSE_COMPLETION_START_TIME = 'langfuse.observation.completion_start_time';
100
+
101
+ // Adaptive Interruption attributes
102
+ export const ATTR_IS_INTERRUPTION = 'lk.is_interruption';
103
+ export const ATTR_INTERRUPTION_PROBABILITY = 'lk.interruption.probability';
104
+ export const ATTR_INTERRUPTION_TOTAL_DURATION = 'lk.interruption.total_duration';
105
+ export const ATTR_INTERRUPTION_PREDICTION_DURATION = 'lk.interruption.prediction_duration';
106
+ export const ATTR_INTERRUPTION_DETECTION_DELAY = 'lk.interruption.detection_delay';
@@ -0,0 +1,579 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { AudioResampler } from '@livekit/rtc-node';
5
+ import { APIConnectionError, APIError } from '../_exceptions.js';
6
+ import { log } from '../log.js';
7
+ import { basic } from '../tokenize/index.js';
8
+ import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
9
+ import { Task, cancelAndWait } from '../utils.js';
10
+ import { StreamAdapter } from './stream_adapter.js';
11
+ import { ChunkedStream, SynthesizeStream, TTS, type TTSCapabilities } from './tts.js';
12
+
13
+ /**
14
+ * Internal status tracking for each TTS instance.
15
+ * @internal
16
+ */
17
+ interface TTSStatus {
18
+ available: boolean;
19
+ recoveringTask: Task<void> | null;
20
+ }
21
+
22
+ /**
23
+ * Options for creating a FallbackAdapter.
24
+ */
25
+ export interface FallbackAdapterOptions {
26
+ /** List of TTS instances to use for fallback (in priority order). At least one is required. */
27
+ ttsInstances: TTS[];
28
+ /** Number of internal retries per TTS instance before moving to the next one. Defaults to 2. */
29
+ maxRetryPerTTS?: number;
30
+ /** Delay in milliseconds before attempting to recover a failed TTS instance. Defaults to 1000. */
31
+ recoveryDelayMs?: number;
32
+ }
33
+
34
+ /**
35
+ * Event emitted when a TTS instance's availability changes.
36
+ */
37
+ export interface AvailabilityChangedEvent {
38
+ /** The TTS instance whose availability changed. */
39
+ tts: TTS;
40
+ /** Whether the TTS instance is now available. */
41
+ available: boolean;
42
+ }
43
+
44
+ const DEFAULT_FALLBACK_API_CONNECT_OPTIONS: APIConnectOptions = {
45
+ maxRetry: 0,
46
+ timeoutMs: DEFAULT_API_CONNECT_OPTIONS.timeoutMs,
47
+ retryIntervalMs: DEFAULT_API_CONNECT_OPTIONS.retryIntervalMs,
48
+ };
49
+
50
+ const FORWARD_POLL_MS = 10;
51
+
52
+ /**
53
+ * FallbackAdapter is a TTS wrapper that provides automatic failover between multiple TTS providers.
54
+ *
55
+ * When the primary TTS fails, it automatically switches to the next available provider in the list.
56
+ * Failed providers are monitored in the background and restored when they recover.
57
+ *
58
+ * Features:
59
+ * - Automatic failover to backup TTS providers on failure
60
+ * - Background health checks to restore recovered providers
61
+ * - Automatic audio resampling when TTS providers have different sample rates
62
+ * - Support for both streaming and non-streaming TTS providers
63
+ *
64
+ * @example
65
+ * ```typescript
66
+ * import { FallbackAdapter } from '@livekit/agents';
67
+ * import { TTS as OpenAITTS } from '@livekit/agents-plugin-openai';
68
+ * import { TTS as ElevenLabsTTS } from '@livekit/agents-plugin-elevenlabs';
69
+ *
70
+ * const fallbackTTS = new FallbackAdapter({
71
+ * ttsInstances: [
72
+ * new OpenAITTS(), // Primary
73
+ * new ElevenLabsTTS(), // Fallback
74
+ * ],
75
+ * maxRetryPerTTS: 2, // Retry each TTS twice before moving to next
76
+ * recoveryDelayMs: 1000, // Check recovery every 1 second
77
+ * });
78
+ *
79
+ * ```
80
+ */
81
+ export class FallbackAdapter extends TTS {
82
+ /** The list of TTS instances used for fallback (in priority order). */
83
+ readonly ttsInstances: TTS[];
84
+ /** Number of retries per TTS instance before falling back to the next one. */
85
+ readonly maxRetryPerTTS: number;
86
+ /** Delay in milliseconds before attempting to recover a failed TTS instance. */
87
+ readonly recoveryDelayMs: number;
88
+
89
+ private _status: TTSStatus[] = [];
90
+ private _logger = log();
91
+ private _recoveryTimeouts: Map<number, NodeJS.Timeout> = new Map();
92
+
93
+ label: string = `tts.FallbackAdapter`;
94
+
95
+ constructor(opts: FallbackAdapterOptions) {
96
+ if (!opts.ttsInstances || opts.ttsInstances.length < 1) {
97
+ throw new Error('at least one TTS instance must be provided.');
98
+ }
99
+ const numChannels = opts.ttsInstances[0]!.numChannels;
100
+ const allNumChannelsMatch = opts.ttsInstances.every((tts) => tts.numChannels === numChannels);
101
+ if (!allNumChannelsMatch) {
102
+ throw new Error('All TTS instances should have the same number of channels');
103
+ }
104
+ const sampleRate = Math.max(...opts.ttsInstances.map((t) => t.sampleRate));
105
+ const capabilities = FallbackAdapter.aggregateCapabilities(opts.ttsInstances);
106
+ super(sampleRate, numChannels, capabilities);
107
+ this.ttsInstances = opts.ttsInstances;
108
+ this.maxRetryPerTTS = opts.maxRetryPerTTS ?? 2;
109
+ this.recoveryDelayMs = opts.recoveryDelayMs ?? 1000;
110
+ this._status = opts.ttsInstances.map(() => ({
111
+ available: true,
112
+ recoveringTask: null,
113
+ }));
114
+ this.setupEventForwarding();
115
+ }
116
+ private static aggregateCapabilities(instances: TTS[]): TTSCapabilities {
117
+ const streaming = instances.some((tts) => tts.capabilities.streaming);
118
+ const alignedTranscript = instances.every((tts) => tts.capabilities.alignedTranscript === true);
119
+ return { streaming, alignedTranscript };
120
+ }
121
+
122
+ private setupEventForwarding(): void {
123
+ this.ttsInstances.forEach((tts) => {
124
+ tts.on('metrics_collected', (metrics) => {
125
+ this.emit('metrics_collected', metrics);
126
+ });
127
+ tts.on('error', (error) => {
128
+ this.emit('error', error);
129
+ });
130
+ });
131
+ }
132
+
133
+ /**
134
+ * Returns the current status of all TTS instances, including availability and recovery state.
135
+ */
136
+ get status(): TTSStatus[] {
137
+ return this._status;
138
+ }
139
+
140
+ getStreamingInstance(index: number): TTS {
141
+ const tts = this.ttsInstances[index]!;
142
+ if (tts.capabilities.streaming) {
143
+ return tts;
144
+ }
145
+ // Wrap non-streaming TTS with StreamAdapter
146
+ return new StreamAdapter(tts, new basic.SentenceTokenizer());
147
+ }
148
+
149
+ /**
150
+ * Creates a new AudioResampler for the given TTS index if needed.
151
+ * Returns null if the TTS sample rate matches the adapter's output rate.
152
+ * Each stream should create its own resampler to avoid concurrency issues.
153
+ * @internal
154
+ */
155
+ createResamplerForTTS(index: number): AudioResampler | null {
156
+ const tts = this.ttsInstances[index]!;
157
+ if (this.sampleRate !== tts.sampleRate) {
158
+ this._logger.debug(
159
+ `resampling ${tts.label} from ${tts.sampleRate}Hz to ${this.sampleRate}Hz`,
160
+ );
161
+ return new AudioResampler(tts.sampleRate, this.sampleRate, tts.numChannels);
162
+ }
163
+ return null;
164
+ }
165
+
166
+ private emitAvailabilityChanged(tts: TTS, available: boolean): void {
167
+ const event: AvailabilityChangedEvent = { tts, available };
168
+ (this as unknown as { emit: (event: string, data: AvailabilityChangedEvent) => void }).emit(
169
+ 'tts_availability_changed',
170
+ event,
171
+ );
172
+ }
173
+
174
+ private tryRecovery(index: number): void {
175
+ const status = this._status[index]!;
176
+ const tts = this.ttsInstances[index]!;
177
+ if (status.recoveringTask && !status.recoveringTask.done) {
178
+ return;
179
+ }
180
+ status.recoveringTask = Task.from(async (controller) => {
181
+ try {
182
+ const testStream = tts.synthesize(
183
+ 'Hello world, this is a recovery test.',
184
+ {
185
+ maxRetry: 0,
186
+ timeoutMs: 10000,
187
+ retryIntervalMs: 1000,
188
+ },
189
+ controller.signal,
190
+ );
191
+ let audioReceived = false;
192
+ for await (const _ of testStream) {
193
+ audioReceived = true;
194
+ }
195
+ if (!audioReceived) {
196
+ throw new Error('Recovery test completed but no audio was received');
197
+ }
198
+
199
+ status.available = true;
200
+ status.recoveringTask = null;
201
+ this._logger.info({ tts: tts.label }, 'TTS recovered');
202
+ this.emitAvailabilityChanged(tts, true);
203
+ } catch (error) {
204
+ status.recoveringTask = null;
205
+ // Don't schedule retry if we're shutting down
206
+ if (controller.signal.aborted) {
207
+ return;
208
+ }
209
+ this._logger.debug({ tts: tts.label, error }, 'TTS recovery failed, will retry');
210
+ // Retry recovery after delay (matches Python's retry behavior)
211
+ const timeoutId = setTimeout(() => {
212
+ this._recoveryTimeouts.delete(index);
213
+ this.tryRecovery(index);
214
+ }, this.recoveryDelayMs);
215
+ this._recoveryTimeouts.set(index, timeoutId);
216
+ }
217
+ });
218
+ }
219
+
220
+ markUnAvailable(index: number): void {
221
+ const status = this._status[index]!;
222
+ if (status.recoveringTask && !status.recoveringTask.done) {
223
+ return;
224
+ }
225
+ if (status.available) {
226
+ status.available = false;
227
+ this.emitAvailabilityChanged(this.ttsInstances[index]!, false);
228
+ }
229
+ this.tryRecovery(index);
230
+ }
231
+
232
+ /**
233
+ * Receives text and returns synthesis in the form of a {@link ChunkedStream}
234
+ */
235
+ synthesize(
236
+ text: string,
237
+ connOptions?: APIConnectOptions,
238
+ abortSignal?: AbortSignal,
239
+ ): ChunkedStream {
240
+ return new FallbackChunkedStream(
241
+ this,
242
+ text,
243
+ connOptions ?? DEFAULT_FALLBACK_API_CONNECT_OPTIONS,
244
+ abortSignal,
245
+ );
246
+ }
247
+
248
+ /**
249
+ * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data
250
+ *
251
+ * @param options - Optional configuration including connection options
252
+ */
253
+ stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream {
254
+ return new FallbackSynthesizeStream(
255
+ this,
256
+ options?.connOptions ?? DEFAULT_FALLBACK_API_CONNECT_OPTIONS,
257
+ );
258
+ }
259
+
260
+ /**
261
+ * Close the FallbackAdapter and all underlying TTS instances.
262
+ * This cancels any ongoing recovery tasks and cleans up resources.
263
+ */
264
+ async close(): Promise<void> {
265
+ // clear all recovery timeouts so that it does not cause issue
266
+ this._recoveryTimeouts.forEach((timeoutId) => {
267
+ clearTimeout(timeoutId);
268
+ });
269
+ this._recoveryTimeouts.clear();
270
+
271
+ // Cancel all recovery tasks
272
+ const recoveryTasks = this._status
273
+ .map((s) => s.recoveringTask)
274
+ .filter((t): t is Task<void> => t !== null);
275
+
276
+ if (recoveryTasks.length > 0) {
277
+ await cancelAndWait(recoveryTasks, 1000);
278
+ }
279
+
280
+ // Remove event listeners
281
+ for (const tts of this.ttsInstances) {
282
+ tts.removeAllListeners('metrics_collected');
283
+ tts.removeAllListeners('error');
284
+ }
285
+
286
+ // Close all TTS instances
287
+ await Promise.all(this.ttsInstances.map((tts) => tts.close()));
288
+ }
289
+ }
290
+
291
+ class FallbackChunkedStream extends ChunkedStream {
292
+ private adapter: FallbackAdapter;
293
+ private connOptions: APIConnectOptions;
294
+ private _logger = log();
295
+
296
+ label: string = 'tts.FallbackChunkedStream';
297
+
298
+ constructor(
299
+ adapter: FallbackAdapter,
300
+ text: string,
301
+ connOptions: APIConnectOptions,
302
+ abortSignal?: AbortSignal,
303
+ ) {
304
+ super(text, adapter, connOptions, abortSignal);
305
+ this.adapter = adapter;
306
+ this.connOptions = connOptions;
307
+ }
308
+
309
+ protected async run(): Promise<void> {
310
+ const allTTSFailed = this.adapter.status.every((s) => !s.available);
311
+ let lastRequestId: string = '';
312
+ let lastSegmentId: string = '';
313
+ if (allTTSFailed) {
314
+ this._logger.warn('All fallback TTS instances failed, retrying from first...');
315
+ }
316
+ for (let i = 0; i < this.adapter.ttsInstances.length; i++) {
317
+ const tts = this.adapter.ttsInstances[i]!;
318
+ const status = this.adapter.status[i]!;
319
+ if (!status.available && !allTTSFailed) {
320
+ this.adapter.markUnAvailable(i);
321
+ continue;
322
+ }
323
+ try {
324
+ this._logger.debug({ tts: tts.label }, 'attempting TTS synthesis');
325
+ const connOptions: APIConnectOptions = {
326
+ ...this.connOptions,
327
+ maxRetry: this.adapter.maxRetryPerTTS,
328
+ };
329
+ const stream = tts.synthesize(this.inputText, connOptions, this.abortSignal);
330
+ let audioReceived = false;
331
+ const resampler = this.adapter.createResamplerForTTS(i);
332
+ for await (const audio of stream) {
333
+ if (this.abortController.signal.aborted) {
334
+ stream.close();
335
+ return;
336
+ }
337
+
338
+ if (resampler) {
339
+ for (const frame of resampler.push(audio.frame)) {
340
+ this.queue.put({
341
+ ...audio,
342
+ frame,
343
+ });
344
+ audioReceived = true;
345
+ }
346
+ } else {
347
+ this.queue.put(audio);
348
+ audioReceived = true;
349
+ }
350
+ lastRequestId = audio.requestId;
351
+ lastSegmentId = audio.segmentId;
352
+ }
353
+
354
+ // Flush any remaining resampled frames
355
+ if (resampler) {
356
+ for (const frame of resampler.flush()) {
357
+ this.queue.put({
358
+ requestId: lastRequestId || '',
359
+ segmentId: lastSegmentId || '',
360
+ frame,
361
+ final: true,
362
+ });
363
+ audioReceived = true;
364
+ }
365
+ }
366
+
367
+ // Verify audio was actually received - silent failures should trigger fallback
368
+ if (!audioReceived) {
369
+ throw new APIConnectionError({
370
+ message: 'TTS synthesis completed but no audio was received',
371
+ });
372
+ }
373
+
374
+ this._logger.debug({ tts: tts.label }, 'TTS synthesis succeeded');
375
+ return;
376
+ } catch (error) {
377
+ if (error instanceof APIError || error instanceof APIConnectionError) {
378
+ this._logger.warn({ tts: tts.label, error }, 'TTS failed, switching to next instance');
379
+ this.adapter.markUnAvailable(i);
380
+ } else {
381
+ throw error;
382
+ }
383
+ }
384
+ }
385
+ const labels = this.adapter.ttsInstances.map((t) => t.label).join(', ');
386
+ throw new APIConnectionError({
387
+ message: `all TTS instances failed (${labels})`,
388
+ });
389
+ }
390
+ }
391
+
392
+ class FallbackSynthesizeStream extends SynthesizeStream {
393
+ private adapter: FallbackAdapter;
394
+ private tokenBuffer: (
395
+ | string
396
+ | typeof SynthesizeStream.FLUSH_SENTINEL
397
+ | typeof SynthesizeStream.END_OF_STREAM
398
+ )[] = [];
399
+ private audioPushed = false;
400
+ private _logger = log();
401
+
402
+ label: string = 'tts.FallbackSynthesizeStream';
403
+
404
+ constructor(adapter: FallbackAdapter, connOptions: APIConnectOptions) {
405
+ super(adapter, connOptions);
406
+ this.adapter = adapter;
407
+ }
408
+
409
+ protected async run(): Promise<void> {
410
+ const allTTSFailed = this.adapter.status.every((s) => !s.available);
411
+ if (allTTSFailed) {
412
+ this._logger.warn('All fallback TTS instances failed, retrying from first...');
413
+ }
414
+ const readInputLLMStream = (async () => {
415
+ try {
416
+ for await (const input of this.input) {
417
+ if (this.abortController.signal.aborted) break;
418
+ this.tokenBuffer.push(input);
419
+ }
420
+ } catch (error) {
421
+ this._logger.debug({ error }, 'Error reading input LLM stream');
422
+ throw error;
423
+ } finally {
424
+ this.tokenBuffer.push(SynthesizeStream.END_OF_STREAM);
425
+ }
426
+ })();
427
+
428
+ for (let i = 0; i < this.adapter.ttsInstances.length; i++) {
429
+ const tts = this.adapter.getStreamingInstance(i);
430
+ const originalTts = this.adapter.ttsInstances[i]!;
431
+ const status = this.adapter.status[i]!;
432
+ let lastRequestId: string = '';
433
+ let lastSegmentId: string = '';
434
+
435
+ if (!status.available && !allTTSFailed) {
436
+ this.adapter.markUnAvailable(i);
437
+ continue;
438
+ }
439
+
440
+ try {
441
+ this._logger.debug({ tts: originalTts.label }, 'attempting TTS stream');
442
+
443
+ const connOptions: APIConnectOptions = {
444
+ ...this.connOptions,
445
+ maxRetry: this.adapter.maxRetryPerTTS,
446
+ };
447
+
448
+ const stream = tts.stream({ connOptions });
449
+ const resampler = this.adapter.createResamplerForTTS(i);
450
+ let bufferIndex = 0;
451
+ let streamOutputCompleted = false;
452
+ const forwardBufferToTTS = async () => {
453
+ while (true) {
454
+ while (bufferIndex < this.tokenBuffer.length) {
455
+ const token = this.tokenBuffer[bufferIndex++]!;
456
+ if (token === SynthesizeStream.FLUSH_SENTINEL) {
457
+ stream.flush();
458
+ } else if (token === SynthesizeStream.END_OF_STREAM) {
459
+ stream.endInput();
460
+ return;
461
+ } else {
462
+ stream.pushText(token);
463
+ }
464
+ }
465
+ await new Promise((resolve) => setTimeout(resolve, FORWARD_POLL_MS));
466
+ if (this.abortController.signal.aborted || streamOutputCompleted) {
467
+ stream.endInput();
468
+ return;
469
+ }
470
+ }
471
+ };
472
+
473
+ const processOutput = async () => {
474
+ try {
475
+ for await (const audio of stream) {
476
+ if (this.abortController.signal.aborted) {
477
+ stream.close();
478
+ return;
479
+ }
480
+
481
+ if (audio === SynthesizeStream.END_OF_STREAM) {
482
+ // Don't forward END_OF_STREAM yet — only emit after we verify audio
483
+ // was received. Otherwise a silent failure would signal completion
484
+ // to consumers before fallback can try the next TTS.
485
+ continue;
486
+ }
487
+
488
+ if (resampler) {
489
+ for (const frame of resampler.push(audio.frame)) {
490
+ this.queue.put({
491
+ ...audio,
492
+ frame,
493
+ });
494
+ this.audioPushed = true;
495
+ }
496
+ } else {
497
+ this.queue.put(audio);
498
+ this.audioPushed = true;
499
+ }
500
+ lastRequestId = audio.requestId;
501
+ lastSegmentId = audio.segmentId;
502
+ }
503
+
504
+ // Flush resampler
505
+ if (resampler) {
506
+ for (const frame of resampler.flush()) {
507
+ this.queue.put({
508
+ requestId: lastRequestId || '',
509
+ segmentId: lastSegmentId || '',
510
+ frame,
511
+ final: true,
512
+ });
513
+ this.audioPushed = true;
514
+ }
515
+ }
516
+ } finally {
517
+ // processOutput and forwardBufferToTTS run in parallel.
518
+ // forwardBufferToTTS polls tokenBuffer and only exits when it sees END_OF_STREAM.
519
+ // But END_OF_STREAM is only added when the LLM finishes streaming (line 417).
520
+ // If the TTS fails while the LLM is still streaming, forwardBufferToTTS would
521
+ // keep polling indefinitely, blocking fallback to the next TTS.
522
+ // This flag tells it to exit early.
523
+ streamOutputCompleted = true;
524
+ }
525
+ };
526
+ const [outputResult, forwardBufferResult] = await Promise.allSettled([
527
+ processOutput(),
528
+ forwardBufferToTTS().catch((err) => {
529
+ stream.close(); // Close stream so processOutput can exit
530
+ throw err;
531
+ }),
532
+ ]);
533
+ if (outputResult.status === 'rejected') {
534
+ stream.close();
535
+ throw outputResult.reason;
536
+ }
537
+ if (forwardBufferResult.status === 'rejected') {
538
+ stream.close();
539
+ throw forwardBufferResult.reason;
540
+ }
541
+
542
+ // Verify audio was actually received - if not, the TTS failed silently
543
+ if (!this.audioPushed) {
544
+ throw new APIConnectionError({
545
+ message: 'TTS stream completed but no audio was received',
546
+ });
547
+ }
548
+
549
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
550
+ this._logger.debug({ tts: originalTts.label }, 'TTS stream succeeded');
551
+ await readInputLLMStream.catch(() => {});
552
+ return;
553
+ } catch (error) {
554
+ if (this.audioPushed) {
555
+ this._logger.error(
556
+ { tts: originalTts.label },
557
+ 'TTS failed after audio pushed, cannot fallback mid-utterance',
558
+ );
559
+ throw error;
560
+ }
561
+
562
+ if (error instanceof APIError || error instanceof APIConnectionError) {
563
+ this._logger.warn(
564
+ { tts: originalTts.label, error },
565
+ 'TTS failed, switching to next instance',
566
+ );
567
+ this.adapter.markUnAvailable(i);
568
+ } else {
569
+ throw error;
570
+ }
571
+ }
572
+ }
573
+ await readInputLLMStream.catch(() => {});
574
+ const labels = this.adapter.ttsInstances.map((t) => t.label).join(', ');
575
+ throw new APIConnectionError({
576
+ message: `all TTS instances failed (${labels})`,
577
+ });
578
+ }
579
+ }
package/src/tts/index.ts CHANGED
@@ -10,3 +10,4 @@ export {
10
10
  ChunkedStream,
11
11
  } from './tts.js';
12
12
  export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js';
13
+ export { FallbackAdapter, type AvailabilityChangedEvent } from './fallback_adapter.js';
package/src/tts/tts.ts CHANGED
@@ -220,7 +220,7 @@ export abstract class SynthesizeStream
220
220
  // to avoid ERR_UNHANDLED_ERROR or premature session termination
221
221
  this.logger.warn(
222
222
  { tts: this.#tts.label, attempt: i + 1, error },
223
- `failed to synthesize speech, retrying in ${retryInterval}s`,
223
+ `failed to synthesize speech, retrying in ${retryInterval}ms`,
224
224
  );
225
225
  }
226
226
 
@@ -499,7 +499,7 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
499
499
  // to avoid ERR_UNHANDLED_ERROR or premature session termination
500
500
  this.logger.warn(
501
501
  { tts: this.#tts.label, attempt: i + 1, error },
502
- `failed to generate TTS completion, retrying in ${retryInterval}s`,
502
+ `failed to generate TTS completion, retrying in ${retryInterval}ms`,
503
503
  );
504
504
  }
505
505