@livekit/agents 1.0.24 → 1.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/dist/inference/llm.cjs +1 -2
  2. package/dist/inference/llm.cjs.map +1 -1
  3. package/dist/inference/llm.d.ts.map +1 -1
  4. package/dist/inference/llm.js +1 -2
  5. package/dist/inference/llm.js.map +1 -1
  6. package/dist/inference/stt.cjs +1 -1
  7. package/dist/inference/stt.cjs.map +1 -1
  8. package/dist/inference/stt.d.ts.map +1 -1
  9. package/dist/inference/stt.js +1 -1
  10. package/dist/inference/stt.js.map +1 -1
  11. package/dist/inference/tts.cjs +4 -4
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.cts +0 -1
  14. package/dist/inference/tts.d.ts +0 -1
  15. package/dist/inference/tts.d.ts.map +1 -1
  16. package/dist/inference/tts.js +4 -4
  17. package/dist/inference/tts.js.map +1 -1
  18. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  19. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  20. package/dist/ipc/job_proc_lazy_main.js +1 -1
  21. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  22. package/dist/job.cjs +29 -2
  23. package/dist/job.cjs.map +1 -1
  24. package/dist/job.d.cts +6 -0
  25. package/dist/job.d.ts +6 -0
  26. package/dist/job.d.ts.map +1 -1
  27. package/dist/job.js +19 -2
  28. package/dist/job.js.map +1 -1
  29. package/dist/llm/llm.cjs +2 -1
  30. package/dist/llm/llm.cjs.map +1 -1
  31. package/dist/llm/llm.d.cts +1 -1
  32. package/dist/llm/llm.d.ts +1 -1
  33. package/dist/llm/llm.d.ts.map +1 -1
  34. package/dist/llm/llm.js +2 -1
  35. package/dist/llm/llm.js.map +1 -1
  36. package/dist/stream/deferred_stream.cjs +12 -4
  37. package/dist/stream/deferred_stream.cjs.map +1 -1
  38. package/dist/stream/deferred_stream.d.cts +6 -1
  39. package/dist/stream/deferred_stream.d.ts +6 -1
  40. package/dist/stream/deferred_stream.d.ts.map +1 -1
  41. package/dist/stream/deferred_stream.js +12 -4
  42. package/dist/stream/deferred_stream.js.map +1 -1
  43. package/dist/stream/deferred_stream.test.cjs +2 -2
  44. package/dist/stream/deferred_stream.test.cjs.map +1 -1
  45. package/dist/stream/deferred_stream.test.js +2 -2
  46. package/dist/stream/deferred_stream.test.js.map +1 -1
  47. package/dist/stt/stream_adapter.cjs +15 -8
  48. package/dist/stt/stream_adapter.cjs.map +1 -1
  49. package/dist/stt/stream_adapter.d.cts +7 -3
  50. package/dist/stt/stream_adapter.d.ts +7 -3
  51. package/dist/stt/stream_adapter.d.ts.map +1 -1
  52. package/dist/stt/stream_adapter.js +15 -8
  53. package/dist/stt/stream_adapter.js.map +1 -1
  54. package/dist/stt/stt.cjs +8 -3
  55. package/dist/stt/stt.cjs.map +1 -1
  56. package/dist/stt/stt.d.cts +9 -3
  57. package/dist/stt/stt.d.ts +9 -3
  58. package/dist/stt/stt.d.ts.map +1 -1
  59. package/dist/stt/stt.js +9 -4
  60. package/dist/stt/stt.js.map +1 -1
  61. package/dist/telemetry/traces.cjs +23 -2
  62. package/dist/telemetry/traces.cjs.map +1 -1
  63. package/dist/telemetry/traces.d.ts.map +1 -1
  64. package/dist/telemetry/traces.js +23 -2
  65. package/dist/telemetry/traces.js.map +1 -1
  66. package/dist/tts/stream_adapter.cjs +10 -7
  67. package/dist/tts/stream_adapter.cjs.map +1 -1
  68. package/dist/tts/stream_adapter.d.cts +6 -3
  69. package/dist/tts/stream_adapter.d.ts +6 -3
  70. package/dist/tts/stream_adapter.d.ts.map +1 -1
  71. package/dist/tts/stream_adapter.js +10 -7
  72. package/dist/tts/stream_adapter.js.map +1 -1
  73. package/dist/tts/tts.cjs +27 -16
  74. package/dist/tts/tts.cjs.map +1 -1
  75. package/dist/tts/tts.d.cts +12 -5
  76. package/dist/tts/tts.d.ts +12 -5
  77. package/dist/tts/tts.d.ts.map +1 -1
  78. package/dist/tts/tts.js +28 -17
  79. package/dist/tts/tts.js.map +1 -1
  80. package/dist/types.cjs +21 -32
  81. package/dist/types.cjs.map +1 -1
  82. package/dist/types.d.cts +41 -10
  83. package/dist/types.d.ts +41 -10
  84. package/dist/types.d.ts.map +1 -1
  85. package/dist/types.js +18 -30
  86. package/dist/types.js.map +1 -1
  87. package/dist/voice/agent.cjs +54 -19
  88. package/dist/voice/agent.cjs.map +1 -1
  89. package/dist/voice/agent.d.ts.map +1 -1
  90. package/dist/voice/agent.js +54 -19
  91. package/dist/voice/agent.js.map +1 -1
  92. package/dist/voice/agent_activity.cjs +0 -3
  93. package/dist/voice/agent_activity.cjs.map +1 -1
  94. package/dist/voice/agent_activity.d.ts.map +1 -1
  95. package/dist/voice/agent_activity.js +0 -3
  96. package/dist/voice/agent_activity.js.map +1 -1
  97. package/dist/voice/agent_session.cjs +107 -27
  98. package/dist/voice/agent_session.cjs.map +1 -1
  99. package/dist/voice/agent_session.d.cts +16 -2
  100. package/dist/voice/agent_session.d.ts +16 -2
  101. package/dist/voice/agent_session.d.ts.map +1 -1
  102. package/dist/voice/agent_session.js +110 -27
  103. package/dist/voice/agent_session.js.map +1 -1
  104. package/dist/voice/events.cjs.map +1 -1
  105. package/dist/voice/events.d.cts +4 -4
  106. package/dist/voice/events.d.ts +4 -4
  107. package/dist/voice/events.d.ts.map +1 -1
  108. package/dist/voice/events.js.map +1 -1
  109. package/dist/voice/generation.cjs +6 -7
  110. package/dist/voice/generation.cjs.map +1 -1
  111. package/dist/voice/generation.d.ts.map +1 -1
  112. package/dist/voice/generation.js +7 -8
  113. package/dist/voice/generation.js.map +1 -1
  114. package/dist/voice/io.cjs +16 -0
  115. package/dist/voice/io.cjs.map +1 -1
  116. package/dist/voice/io.d.cts +8 -0
  117. package/dist/voice/io.d.ts +8 -0
  118. package/dist/voice/io.d.ts.map +1 -1
  119. package/dist/voice/io.js +16 -0
  120. package/dist/voice/io.js.map +1 -1
  121. package/dist/voice/recorder_io/index.cjs +23 -0
  122. package/dist/voice/recorder_io/index.cjs.map +1 -0
  123. package/dist/voice/recorder_io/index.d.cts +2 -0
  124. package/dist/voice/recorder_io/index.d.ts +2 -0
  125. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  126. package/dist/voice/recorder_io/index.js +2 -0
  127. package/dist/voice/recorder_io/index.js.map +1 -0
  128. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  129. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  130. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  131. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  132. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  133. package/dist/voice/recorder_io/recorder_io.js +508 -0
  134. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  135. package/dist/voice/report.cjs +7 -2
  136. package/dist/voice/report.cjs.map +1 -1
  137. package/dist/voice/report.d.cts +11 -1
  138. package/dist/voice/report.d.ts +11 -1
  139. package/dist/voice/report.d.ts.map +1 -1
  140. package/dist/voice/report.js +7 -2
  141. package/dist/voice/report.js.map +1 -1
  142. package/dist/voice/room_io/_input.cjs +2 -1
  143. package/dist/voice/room_io/_input.cjs.map +1 -1
  144. package/dist/voice/room_io/_input.d.ts.map +1 -1
  145. package/dist/voice/room_io/_input.js +2 -1
  146. package/dist/voice/room_io/_input.js.map +1 -1
  147. package/dist/voice/room_io/_output.cjs +8 -7
  148. package/dist/voice/room_io/_output.cjs.map +1 -1
  149. package/dist/voice/room_io/_output.d.cts +2 -1
  150. package/dist/voice/room_io/_output.d.ts +2 -1
  151. package/dist/voice/room_io/_output.d.ts.map +1 -1
  152. package/dist/voice/room_io/_output.js +8 -7
  153. package/dist/voice/room_io/_output.js.map +1 -1
  154. package/dist/worker.cjs +4 -3
  155. package/dist/worker.cjs.map +1 -1
  156. package/dist/worker.js +4 -3
  157. package/dist/worker.js.map +1 -1
  158. package/package.json +1 -1
  159. package/src/inference/llm.ts +0 -1
  160. package/src/inference/stt.ts +1 -2
  161. package/src/inference/tts.ts +5 -4
  162. package/src/ipc/job_proc_lazy_main.ts +1 -1
  163. package/src/job.ts +21 -2
  164. package/src/llm/llm.ts +2 -2
  165. package/src/stream/deferred_stream.test.ts +3 -3
  166. package/src/stream/deferred_stream.ts +22 -5
  167. package/src/stt/stream_adapter.ts +18 -8
  168. package/src/stt/stt.ts +19 -6
  169. package/src/telemetry/traces.ts +25 -3
  170. package/src/tts/stream_adapter.ts +15 -7
  171. package/src/tts/tts.ts +46 -21
  172. package/src/types.ts +57 -33
  173. package/src/voice/agent.ts +59 -19
  174. package/src/voice/agent_activity.ts +0 -3
  175. package/src/voice/agent_session.ts +142 -35
  176. package/src/voice/events.ts +6 -3
  177. package/src/voice/generation.ts +10 -8
  178. package/src/voice/io.ts +19 -0
  179. package/src/voice/recorder_io/index.ts +4 -0
  180. package/src/voice/recorder_io/recorder_io.ts +690 -0
  181. package/src/voice/report.ts +20 -3
  182. package/src/voice/room_io/_input.ts +2 -1
  183. package/src/voice/room_io/_output.ts +10 -7
  184. package/src/worker.ts +1 -1
@@ -9,15 +9,22 @@ import type {
9
9
  import { IdentityTransform } from './identity_transform.js';
10
10
 
11
11
  /**
12
- * Check if error is related to reader.read after release lock
12
+ * Check if error is related to stream cleanup operations.
13
+ *
14
+ * These errors are expected when calling reader.read() after releaseLock()
15
+ * or when writing to already closed streams during cleanup:
13
16
  *
14
17
  * Invalid state: Releasing reader
15
18
  * Invalid state: The reader is not attached to a stream
19
+ * Invalid state: Controller is already closed
20
+ * Invalid state: WritableStream is closed
16
21
  */
17
22
  export function isStreamReaderReleaseError(e: unknown) {
18
23
  const allowedMessages = [
19
24
  'Invalid state: Releasing reader',
20
25
  'Invalid state: The reader is not attached to a stream',
26
+ 'Controller is already closed',
27
+ 'WritableStream is closed',
21
28
  ];
22
29
 
23
30
  if (e instanceof TypeError) {
@@ -66,18 +73,27 @@ export class DeferredReadableStream<T> {
66
73
  await this.writer.write(value);
67
74
  }
68
75
  } catch (e) {
69
- // skip source detach related errors
76
+ // skip stream cleanup related errors
70
77
  if (isStreamReaderReleaseError(e)) return;
78
+
71
79
  sourceError = e;
72
80
  } finally {
73
81
  // any other error from source will be propagated to the consumer
74
82
  if (sourceError) {
75
- this.writer.abort(sourceError);
83
+ try {
84
+ this.writer.abort(sourceError);
85
+ } catch (e) {
86
+ // ignore if writer is already closed
87
+ }
76
88
  return;
77
89
  }
78
90
 
79
91
  // release lock so this.stream.getReader().read() will terminate with done: true
80
- this.writer.releaseLock();
92
+ try {
93
+ this.writer.releaseLock();
94
+ } catch (e) {
95
+ // ignore if writer lock is already released
96
+ }
81
97
 
82
98
  // we only close the writable stream after done
83
99
  try {
@@ -98,7 +114,8 @@ export class DeferredReadableStream<T> {
98
114
  */
99
115
  async detachSource() {
100
116
  if (!this.isSourceSet) {
101
- throw new Error('Source not set');
117
+ // No-op if source was never set - this is a common case during cleanup
118
+ return;
102
119
  }
103
120
 
104
121
  // release lock will make any pending read() throw TypeError
@@ -3,6 +3,7 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { AudioFrame } from '@livekit/rtc-node';
5
5
  import { log } from '../log.js';
6
+ import type { APIConnectOptions } from '../types.js';
6
7
  import type { VAD, VADStream } from '../vad.js';
7
8
  import { VADEventType } from '../vad.js';
8
9
  import type { SpeechEvent } from './stt.js';
@@ -22,14 +23,18 @@ export class StreamAdapter extends STT {
22
23
  this.#stt.on('metrics_collected', (metrics) => {
23
24
  this.emit('metrics_collected', metrics);
24
25
  });
26
+
27
+ this.#stt.on('error', (error) => {
28
+ this.emit('error', error);
29
+ });
25
30
  }
26
31
 
27
- _recognize(frame: AudioFrame): Promise<SpeechEvent> {
28
- return this.#stt.recognize(frame);
32
+ _recognize(frame: AudioFrame, abortSignal?: AbortSignal): Promise<SpeechEvent> {
33
+ return this.#stt.recognize(frame, abortSignal);
29
34
  }
30
35
 
31
- stream(): StreamAdapterWrapper {
32
- return new StreamAdapterWrapper(this.#stt, this.#vad);
36
+ stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {
37
+ return new StreamAdapterWrapper(this.#stt, this.#vad, options?.connOptions);
33
38
  }
34
39
  }
35
40
 
@@ -38,13 +43,18 @@ export class StreamAdapterWrapper extends SpeechStream {
38
43
  #vadStream: VADStream;
39
44
  label: string;
40
45
 
41
- constructor(stt: STT, vad: VAD) {
42
- super(stt);
46
+ constructor(stt: STT, vad: VAD, connOptions?: APIConnectOptions) {
47
+ super(stt, undefined, connOptions);
43
48
  this.#stt = stt;
44
49
  this.#vadStream = vad.stream();
45
50
  this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`;
46
51
  }
47
52
 
53
+ close() {
54
+ super.close();
55
+ this.#vadStream.close();
56
+ }
57
+
48
58
  async monitorMetrics() {
49
59
  return; // do nothing
50
60
  }
@@ -71,7 +81,7 @@ export class StreamAdapterWrapper extends SpeechStream {
71
81
  this.output.put({ type: SpeechEventType.END_OF_SPEECH });
72
82
 
73
83
  try {
74
- const event = await this.#stt.recognize(ev.frames);
84
+ const event = await this.#stt.recognize(ev.frames, this.abortSignal);
75
85
  if (!event.alternatives![0].text) {
76
86
  continue;
77
87
  }
@@ -92,6 +102,6 @@ export class StreamAdapterWrapper extends SpeechStream {
92
102
  }
93
103
  };
94
104
 
95
- Promise.all([forwardInput(), recognize()]);
105
+ await Promise.all([forwardInput(), recognize()]);
96
106
  }
97
107
  }
package/src/stt/stt.ts CHANGED
@@ -10,7 +10,7 @@ import { calculateAudioDurationSeconds } from '../audio.js';
10
10
  import { log } from '../log.js';
11
11
  import type { STTMetrics } from '../metrics/base.js';
12
12
  import { DeferredReadableStream } from '../stream/deferred_stream.js';
13
- import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
13
+ import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';
14
14
  import type { AudioBuffer } from '../utils.js';
15
15
  import { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';
16
16
 
@@ -113,9 +113,9 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
113
113
  }
114
114
 
115
115
  /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */
116
- async recognize(frame: AudioBuffer): Promise<SpeechEvent> {
116
+ async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {
117
117
  const startTime = process.hrtime.bigint();
118
- const event = await this._recognize(frame);
118
+ const event = await this._recognize(frame, abortSignal);
119
119
  const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));
120
120
  this.emit('metrics_collected', {
121
121
  type: 'stt_metrics',
@@ -128,13 +128,19 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
128
128
  });
129
129
  return event;
130
130
  }
131
- protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;
131
+
132
+ protected abstract _recognize(
133
+ frame: AudioBuffer,
134
+ abortSignal?: AbortSignal,
135
+ ): Promise<SpeechEvent>;
132
136
 
133
137
  /**
134
138
  * Returns a {@link SpeechStream} that can be used to push audio frames and receive
135
139
  * transcriptions
140
+ *
141
+ * @param options - Optional configuration including connection options
136
142
  */
137
- abstract stream(): SpeechStream;
143
+ abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;
138
144
 
139
145
  async close(): Promise<void> {
140
146
  return;
@@ -171,6 +177,8 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
171
177
  private logger = log();
172
178
  private _connOptions: APIConnectOptions;
173
179
 
180
+ protected abortController = new AbortController();
181
+
174
182
  constructor(
175
183
  stt: STT,
176
184
  sampleRate?: number,
@@ -196,7 +204,7 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
196
204
  return await this.run();
197
205
  } catch (error) {
198
206
  if (error instanceof APIError) {
199
- const retryInterval = this._connOptions._intervalForRetry(i);
207
+ const retryInterval = intervalForRetry(this._connOptions, i);
200
208
 
201
209
  if (this._connOptions.maxRetry === 0 || !error.retryable) {
202
210
  this.emitError({ error, recoverable: false });
@@ -288,6 +296,10 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
288
296
 
289
297
  protected abstract run(): Promise<void>;
290
298
 
299
+ protected get abortSignal(): AbortSignal {
300
+ return this.abortController.signal;
301
+ }
302
+
291
303
  updateInputStream(audioStream: ReadableStream<AudioFrame>) {
292
304
  this.deferredInputStream.setSource(audioStream);
293
305
  }
@@ -352,6 +364,7 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
352
364
  if (!this.input.closed) this.input.close();
353
365
  if (!this.queue.closed) this.queue.close();
354
366
  if (!this.output.closed) this.output.close();
367
+ if (!this.abortController.signal.aborted) this.abortController.abort();
355
368
  this.closed = true;
356
369
  }
357
370
 
@@ -21,6 +21,7 @@ import { BatchSpanProcessor, NodeTracerProvider } from '@opentelemetry/sdk-trace
21
21
  import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
22
22
  import FormData from 'form-data';
23
23
  import { AccessToken } from 'livekit-server-sdk';
24
+ import fs from 'node:fs/promises';
24
25
  import type { ChatContent, ChatItem } from '../llm/index.js';
25
26
  import { enableOtelLogging } from '../log.js';
26
27
  import type { SessionReport } from '../voice/report.js';
@@ -497,12 +498,13 @@ export async function uploadSessionReport(options: {
497
498
  const formData = new FormData();
498
499
 
499
500
  // Add header (protobuf MetricsRecordingHeader)
501
+ const audioStartTime = report.audioRecordingStartedAt ?? 0;
500
502
  const headerMsg = new MetricsRecordingHeader({
501
503
  roomId: report.roomId,
502
504
  duration: BigInt(0), // TODO: Calculate actual duration from report
503
505
  startTime: {
504
- seconds: BigInt(Math.floor(report.timestamp / 1000)),
505
- nanos: Math.floor((report.timestamp % 1000) * 1e6),
506
+ seconds: BigInt(Math.floor(audioStartTime / 1000)),
507
+ nanos: Math.floor((audioStartTime % 1000) * 1e6),
506
508
  },
507
509
  });
508
510
 
@@ -530,7 +532,27 @@ export async function uploadSessionReport(options: {
530
532
  },
531
533
  });
532
534
 
533
- // TODO(brian): Add audio recording file when recorder IO is implemented
535
+ // Add audio recording file if available
536
+ if (report.audioRecordingPath && report.audioRecordingStartedAt) {
537
+ let audioBytes: Buffer;
538
+ try {
539
+ audioBytes = await fs.readFile(report.audioRecordingPath);
540
+ } catch {
541
+ audioBytes = Buffer.alloc(0);
542
+ }
543
+
544
+ if (audioBytes.length > 0) {
545
+ formData.append('audio', audioBytes, {
546
+ filename: 'recording.ogg',
547
+ contentType: 'audio/ogg',
548
+ knownLength: audioBytes.length,
549
+ header: {
550
+ 'Content-Type': 'audio/ogg',
551
+ 'Content-Length': audioBytes.length.toString(),
552
+ },
553
+ });
554
+ }
555
+ }
534
556
 
535
557
  // Upload to LiveKit Cloud using form-data's submit method
536
558
  // This properly streams the multipart form with all headers including Content-Length
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';
5
+ import type { APIConnectOptions } from '../types.js';
5
6
  import { Task } from '../utils.js';
6
7
  import type { ChunkedStream } from './tts.js';
7
8
  import { SynthesizeStream, TTS } from './tts.js';
@@ -21,14 +22,21 @@ export class StreamAdapter extends TTS {
21
22
  this.#tts.on('metrics_collected', (metrics) => {
22
23
  this.emit('metrics_collected', metrics);
23
24
  });
25
+ this.#tts.on('error', (error) => {
26
+ this.emit('error', error);
27
+ });
24
28
  }
25
29
 
26
- synthesize(text: string): ChunkedStream {
27
- return this.#tts.synthesize(text);
30
+ synthesize(
31
+ text: string,
32
+ connOptions?: APIConnectOptions,
33
+ abortSignal?: AbortSignal,
34
+ ): ChunkedStream {
35
+ return this.#tts.synthesize(text, connOptions, abortSignal);
28
36
  }
29
37
 
30
- stream(): StreamAdapterWrapper {
31
- return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer);
38
+ stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {
39
+ return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);
32
40
  }
33
41
  }
34
42
 
@@ -37,8 +45,8 @@ export class StreamAdapterWrapper extends SynthesizeStream {
37
45
  #sentenceStream: SentenceStream;
38
46
  label: string;
39
47
 
40
- constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {
41
- super(tts);
48
+ constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {
49
+ super(tts, connOptions);
42
50
  this.#tts = tts;
43
51
  this.#sentenceStream = sentenceTokenizer.stream();
44
52
  this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;
@@ -84,7 +92,7 @@ export class StreamAdapterWrapper extends SynthesizeStream {
84
92
  prevTask: Task<void> | undefined,
85
93
  controller: AbortController,
86
94
  ) => {
87
- const audioStream = this.#tts.synthesize(token);
95
+ const audioStream = this.#tts.synthesize(token, this.connOptions, this.abortSignal);
88
96
 
89
97
  // wait for previous audio transcription to complete before starting
90
98
  // to queuing audio frames of the current token
package/src/tts/tts.ts CHANGED
@@ -11,7 +11,7 @@ import { log } from '../log.js';
11
11
  import type { TTSMetrics } from '../metrics/base.js';
12
12
  import { DeferredReadableStream } from '../stream/deferred_stream.js';
13
13
  import { recordException, traceTypes, tracer } from '../telemetry/index.js';
14
- import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
14
+ import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';
15
15
  import { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';
16
16
 
17
17
  /** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */
@@ -90,12 +90,18 @@ export abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCal
90
90
  /**
91
91
  * Receives text and returns synthesis in the form of a {@link ChunkedStream}
92
92
  */
93
- abstract synthesize(text: string): ChunkedStream;
93
+ abstract synthesize(
94
+ text: string,
95
+ connOptions?: APIConnectOptions,
96
+ abortSignal?: AbortSignal,
97
+ ): ChunkedStream;
94
98
 
95
99
  /**
96
100
  * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data
101
+ *
102
+ * @param options - Optional configuration including connection options
97
103
  */
98
- abstract stream(): SynthesizeStream;
104
+ abstract stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream;
99
105
 
100
106
  async close(): Promise<void> {
101
107
  return;
@@ -129,30 +135,33 @@ export abstract class SynthesizeStream
129
135
  SynthesizedAudio | typeof SynthesizeStream.END_OF_STREAM
130
136
  >();
131
137
  protected closed = false;
132
- abstract label: string;
133
- #tts: TTS;
134
- #metricsPendingTexts: string[] = [];
135
- #metricsText = '';
136
- #monitorMetricsTask?: Promise<void>;
137
- private _connOptions: APIConnectOptions;
138
+ protected connOptions: APIConnectOptions;
138
139
  protected abortController = new AbortController();
139
- #ttsRequestSpan?: Span;
140
140
 
141
141
  private deferredInputStream: DeferredReadableStream<
142
142
  string | typeof SynthesizeStream.FLUSH_SENTINEL
143
143
  >;
144
144
  private logger = log();
145
145
 
146
+ abstract label: string;
147
+
148
+ #tts: TTS;
149
+ #metricsPendingTexts: string[] = [];
150
+ #metricsText = '';
151
+ #monitorMetricsTask?: Promise<void>;
152
+ #ttsRequestSpan?: Span;
153
+
146
154
  constructor(tts: TTS, connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS) {
147
155
  this.#tts = tts;
148
- this._connOptions = connOptions;
156
+ this.connOptions = connOptions;
149
157
  this.deferredInputStream = new DeferredReadableStream();
150
158
  this.pumpInput();
159
+
151
160
  this.abortController.signal.addEventListener('abort', () => {
152
161
  this.deferredInputStream.detachSource();
153
162
  // TODO (AJS-36) clean this up when we refactor with streams
154
- this.input.close();
155
- this.output.close();
163
+ if (!this.input.closed) this.input.close();
164
+ if (!this.output.closed) this.output.close();
156
165
  this.closed = true;
157
166
  });
158
167
 
@@ -170,7 +179,7 @@ export abstract class SynthesizeStream
170
179
  [traceTypes.ATTR_TTS_LABEL]: this.#tts.label,
171
180
  });
172
181
 
173
- for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {
182
+ for (let i = 0; i < this.connOptions.maxRetry + 1; i++) {
174
183
  try {
175
184
  return await tracer.startActiveSpan(
176
185
  async (attemptSpan) => {
@@ -186,15 +195,15 @@ export abstract class SynthesizeStream
186
195
  );
187
196
  } catch (error) {
188
197
  if (error instanceof APIError) {
189
- const retryInterval = this._connOptions._intervalForRetry(i);
198
+ const retryInterval = intervalForRetry(this.connOptions, i);
190
199
 
191
- if (this._connOptions.maxRetry === 0 || !error.retryable) {
200
+ if (this.connOptions.maxRetry === 0 || !error.retryable) {
192
201
  this.emitError({ error, recoverable: false });
193
202
  throw error;
194
- } else if (i === this._connOptions.maxRetry) {
203
+ } else if (i === this.connOptions.maxRetry) {
195
204
  this.emitError({ error, recoverable: false });
196
205
  throw new APIConnectionError({
197
- message: `failed to generate TTS completion after ${this._connOptions.maxRetry + 1} attempts`,
206
+ message: `failed to generate TTS completion after ${this.connOptions.maxRetry + 1} attempts`,
198
207
  options: { retryable: false },
199
208
  });
200
209
  } else {
@@ -378,6 +387,10 @@ export abstract class SynthesizeStream
378
387
  return this.output.next();
379
388
  }
380
389
 
390
+ get abortSignal(): AbortSignal {
391
+ return this.abortController.signal;
392
+ }
393
+
381
394
  /** Close both the input and output of the TTS stream */
382
395
  close() {
383
396
  this.abortController.abort();
@@ -413,15 +426,22 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
413
426
  private _connOptions: APIConnectOptions;
414
427
  private logger = log();
415
428
 
429
+ protected abortController = new AbortController();
430
+
416
431
  constructor(
417
432
  text: string,
418
433
  tts: TTS,
419
434
  connOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
435
+ abortSignal?: AbortSignal,
420
436
  ) {
421
437
  this.#text = text;
422
438
  this.#tts = tts;
423
439
  this._connOptions = connOptions;
424
440
 
441
+ if (abortSignal) {
442
+ abortSignal.addEventListener('abort', () => this.abortController.abort(), { once: true });
443
+ }
444
+
425
445
  this.monitorMetrics();
426
446
 
427
447
  // this is a hack to immitate asyncio.create_task so that mainTask
@@ -454,7 +474,7 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
454
474
  );
455
475
  } catch (error) {
456
476
  if (error instanceof APIError) {
457
- const retryInterval = this._connOptions._intervalForRetry(i);
477
+ const retryInterval = intervalForRetry(this._connOptions, i);
458
478
 
459
479
  if (this._connOptions.maxRetry === 0 || !error.retryable) {
460
480
  this.emitError({ error, recoverable: false });
@@ -508,6 +528,10 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
508
528
  return this.#text;
509
529
  }
510
530
 
531
+ get abortSignal(): AbortSignal {
532
+ return this.abortController.signal;
533
+ }
534
+
511
535
  protected async monitorMetrics() {
512
536
  const startTime = process.hrtime.bigint();
513
537
  let audioDurationMs = 0;
@@ -562,8 +586,9 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
562
586
 
563
587
  /** Close both the input and output of the TTS stream */
564
588
  close() {
565
- this.queue.close();
566
- this.output.close();
589
+ if (!this.queue.closed) this.queue.close();
590
+ if (!this.output.closed) this.output.close();
591
+ if (!this.abortController.signal.aborted) this.abortController.abort();
567
592
  this.closed = true;
568
593
  }
569
594
 
package/src/types.ts CHANGED
@@ -1,42 +1,66 @@
1
1
  // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- export class APIConnectOptions {
5
- /** Maximum number of retries to connect to the API. */
6
- readonly maxRetry: number;
7
- /** Interval between retries to connect to the API in milliseconds. */
8
- readonly retryIntervalMs: number;
9
- /** Timeout for connecting to the API in milliseconds. */
10
- readonly timeoutMs: number;
11
4
 
12
- constructor(options: Partial<APIConnectOptions> = {}) {
13
- this.maxRetry = options.maxRetry ?? 3;
14
- this.retryIntervalMs = options.retryIntervalMs ?? 2000;
15
- this.timeoutMs = options.timeoutMs ?? 10000;
5
+ /**
6
+ * Connection options for API calls, controlling retry and timeout behavior.
7
+ */
8
+ export interface APIConnectOptions {
9
+ /** Maximum number of retries to connect to the API. Default: 3 */
10
+ maxRetry: number;
11
+ /** Interval between retries to connect to the API in milliseconds. Default: 2000 */
12
+ retryIntervalMs: number;
13
+ /** Timeout for connecting to the API in milliseconds. Default: 10000 */
14
+ timeoutMs: number;
15
+ }
16
16
 
17
- if (this.maxRetry < 0) {
18
- throw new Error('maxRetry must be greater than or equal to 0');
19
- }
20
- if (this.retryIntervalMs < 0) {
21
- throw new Error('retryIntervalMs must be greater than or equal to 0');
22
- }
23
- if (this.timeoutMs < 0) {
24
- throw new Error('timeoutMs must be greater than or equal to 0');
25
- }
26
- }
17
+ export const DEFAULT_API_CONNECT_OPTIONS: APIConnectOptions = {
18
+ maxRetry: 3,
19
+ retryIntervalMs: 2000,
20
+ timeoutMs: 10000,
21
+ };
27
22
 
28
- /** @internal */
29
- _intervalForRetry(numRetries: number): number {
30
- /**
31
- * Return the interval for the given number of retries.
32
- *
33
- * The first retry is immediate, and then uses specified retryIntervalMs
34
- */
35
- if (numRetries === 0) {
36
- return 0.1;
37
- }
38
- return this.retryIntervalMs;
23
+ /**
24
+ * Return the interval for the given number of retries.
25
+ * The first retry is immediate, and then uses specified retryIntervalMs.
26
+ * @internal
27
+ */
28
+ export function intervalForRetry(connOptions: APIConnectOptions, numRetries: number): number {
29
+ if (numRetries === 0) {
30
+ return 0.1;
39
31
  }
32
+ return connOptions.retryIntervalMs;
33
+ }
34
+
35
+ /**
36
+ * Connection options for the agent session, controlling retry and timeout behavior
37
+ * for STT, LLM, and TTS connections.
38
+ */
39
+ export interface SessionConnectOptions {
40
+ /** Connection options for speech-to-text. */
41
+ sttConnOptions?: Partial<APIConnectOptions>;
42
+ /** Connection options for the language model. */
43
+ llmConnOptions?: Partial<APIConnectOptions>;
44
+ /** Connection options for text-to-speech. */
45
+ ttsConnOptions?: Partial<APIConnectOptions>;
46
+ /** Maximum number of consecutive unrecoverable errors from LLM or TTS before closing the session. Default: 3 */
47
+ maxUnrecoverableErrors?: number;
48
+ }
49
+
50
+ /**
51
+ * Resolved session connect options with all values populated.
52
+ * @internal
53
+ */
54
+ export interface ResolvedSessionConnectOptions {
55
+ sttConnOptions: APIConnectOptions;
56
+ llmConnOptions: APIConnectOptions;
57
+ ttsConnOptions: APIConnectOptions;
58
+ maxUnrecoverableErrors: number;
40
59
  }
41
60
 
42
- export const DEFAULT_API_CONNECT_OPTIONS = new APIConnectOptions();
61
+ export const DEFAULT_SESSION_CONNECT_OPTIONS: ResolvedSessionConnectOptions = {
62
+ sttConnOptions: DEFAULT_API_CONNECT_OPTIONS,
63
+ llmConnOptions: DEFAULT_API_CONNECT_OPTIONS,
64
+ ttsConnOptions: DEFAULT_API_CONNECT_OPTIONS,
65
+ maxUnrecoverableErrors: 3,
66
+ };