@livekit/agents 1.0.23 → 1.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/dist/inference/llm.cjs +1 -2
  2. package/dist/inference/llm.cjs.map +1 -1
  3. package/dist/inference/llm.d.ts.map +1 -1
  4. package/dist/inference/llm.js +1 -2
  5. package/dist/inference/llm.js.map +1 -1
  6. package/dist/inference/stt.cjs +1 -1
  7. package/dist/inference/stt.cjs.map +1 -1
  8. package/dist/inference/stt.d.ts.map +1 -1
  9. package/dist/inference/stt.js +1 -1
  10. package/dist/inference/stt.js.map +1 -1
  11. package/dist/inference/tts.cjs +4 -2
  12. package/dist/inference/tts.cjs.map +1 -1
  13. package/dist/inference/tts.d.ts.map +1 -1
  14. package/dist/inference/tts.js +4 -2
  15. package/dist/inference/tts.js.map +1 -1
  16. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  17. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  18. package/dist/ipc/job_proc_lazy_main.js +1 -1
  19. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  20. package/dist/job.cjs +29 -2
  21. package/dist/job.cjs.map +1 -1
  22. package/dist/job.d.cts +6 -0
  23. package/dist/job.d.ts +6 -0
  24. package/dist/job.d.ts.map +1 -1
  25. package/dist/job.js +19 -2
  26. package/dist/job.js.map +1 -1
  27. package/dist/llm/llm.cjs +2 -1
  28. package/dist/llm/llm.cjs.map +1 -1
  29. package/dist/llm/llm.d.cts +1 -1
  30. package/dist/llm/llm.d.ts +1 -1
  31. package/dist/llm/llm.d.ts.map +1 -1
  32. package/dist/llm/llm.js +2 -1
  33. package/dist/llm/llm.js.map +1 -1
  34. package/dist/stream/deferred_stream.cjs +12 -4
  35. package/dist/stream/deferred_stream.cjs.map +1 -1
  36. package/dist/stream/deferred_stream.d.cts +6 -1
  37. package/dist/stream/deferred_stream.d.ts +6 -1
  38. package/dist/stream/deferred_stream.d.ts.map +1 -1
  39. package/dist/stream/deferred_stream.js +12 -4
  40. package/dist/stream/deferred_stream.js.map +1 -1
  41. package/dist/stream/deferred_stream.test.cjs +2 -2
  42. package/dist/stream/deferred_stream.test.cjs.map +1 -1
  43. package/dist/stream/deferred_stream.test.js +2 -2
  44. package/dist/stream/deferred_stream.test.js.map +1 -1
  45. package/dist/stt/stream_adapter.cjs +15 -8
  46. package/dist/stt/stream_adapter.cjs.map +1 -1
  47. package/dist/stt/stream_adapter.d.cts +7 -3
  48. package/dist/stt/stream_adapter.d.ts +7 -3
  49. package/dist/stt/stream_adapter.d.ts.map +1 -1
  50. package/dist/stt/stream_adapter.js +15 -8
  51. package/dist/stt/stream_adapter.js.map +1 -1
  52. package/dist/stt/stt.cjs +8 -3
  53. package/dist/stt/stt.cjs.map +1 -1
  54. package/dist/stt/stt.d.cts +9 -3
  55. package/dist/stt/stt.d.ts +9 -3
  56. package/dist/stt/stt.d.ts.map +1 -1
  57. package/dist/stt/stt.js +9 -4
  58. package/dist/stt/stt.js.map +1 -1
  59. package/dist/telemetry/traces.cjs +23 -2
  60. package/dist/telemetry/traces.cjs.map +1 -1
  61. package/dist/telemetry/traces.d.ts.map +1 -1
  62. package/dist/telemetry/traces.js +23 -2
  63. package/dist/telemetry/traces.js.map +1 -1
  64. package/dist/tts/stream_adapter.cjs +4 -4
  65. package/dist/tts/stream_adapter.cjs.map +1 -1
  66. package/dist/tts/stream_adapter.d.cts +5 -2
  67. package/dist/tts/stream_adapter.d.ts +5 -2
  68. package/dist/tts/stream_adapter.d.ts.map +1 -1
  69. package/dist/tts/stream_adapter.js +4 -4
  70. package/dist/tts/stream_adapter.js.map +1 -1
  71. package/dist/tts/tts.cjs +2 -2
  72. package/dist/tts/tts.cjs.map +1 -1
  73. package/dist/tts/tts.d.cts +5 -1
  74. package/dist/tts/tts.d.ts +5 -1
  75. package/dist/tts/tts.d.ts.map +1 -1
  76. package/dist/tts/tts.js +3 -3
  77. package/dist/tts/tts.js.map +1 -1
  78. package/dist/types.cjs +21 -32
  79. package/dist/types.cjs.map +1 -1
  80. package/dist/types.d.cts +41 -10
  81. package/dist/types.d.ts +41 -10
  82. package/dist/types.d.ts.map +1 -1
  83. package/dist/types.js +18 -30
  84. package/dist/types.js.map +1 -1
  85. package/dist/voice/agent.cjs +54 -19
  86. package/dist/voice/agent.cjs.map +1 -1
  87. package/dist/voice/agent.d.ts.map +1 -1
  88. package/dist/voice/agent.js +54 -19
  89. package/dist/voice/agent.js.map +1 -1
  90. package/dist/voice/agent_activity.cjs +0 -3
  91. package/dist/voice/agent_activity.cjs.map +1 -1
  92. package/dist/voice/agent_activity.d.ts.map +1 -1
  93. package/dist/voice/agent_activity.js +0 -3
  94. package/dist/voice/agent_activity.js.map +1 -1
  95. package/dist/voice/agent_session.cjs +106 -28
  96. package/dist/voice/agent_session.cjs.map +1 -1
  97. package/dist/voice/agent_session.d.cts +16 -2
  98. package/dist/voice/agent_session.d.ts +16 -2
  99. package/dist/voice/agent_session.d.ts.map +1 -1
  100. package/dist/voice/agent_session.js +109 -28
  101. package/dist/voice/agent_session.js.map +1 -1
  102. package/dist/voice/events.cjs.map +1 -1
  103. package/dist/voice/events.d.cts +4 -4
  104. package/dist/voice/events.d.ts +4 -4
  105. package/dist/voice/events.d.ts.map +1 -1
  106. package/dist/voice/events.js.map +1 -1
  107. package/dist/voice/generation.cjs +6 -7
  108. package/dist/voice/generation.cjs.map +1 -1
  109. package/dist/voice/generation.d.ts.map +1 -1
  110. package/dist/voice/generation.js +7 -8
  111. package/dist/voice/generation.js.map +1 -1
  112. package/dist/voice/io.cjs +16 -0
  113. package/dist/voice/io.cjs.map +1 -1
  114. package/dist/voice/io.d.cts +8 -0
  115. package/dist/voice/io.d.ts +8 -0
  116. package/dist/voice/io.d.ts.map +1 -1
  117. package/dist/voice/io.js +16 -0
  118. package/dist/voice/io.js.map +1 -1
  119. package/dist/voice/recorder_io/index.cjs +23 -0
  120. package/dist/voice/recorder_io/index.cjs.map +1 -0
  121. package/dist/voice/recorder_io/index.d.cts +2 -0
  122. package/dist/voice/recorder_io/index.d.ts +2 -0
  123. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  124. package/dist/voice/recorder_io/index.js +2 -0
  125. package/dist/voice/recorder_io/index.js.map +1 -0
  126. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  127. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  128. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  129. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  130. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  131. package/dist/voice/recorder_io/recorder_io.js +508 -0
  132. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  133. package/dist/voice/report.cjs +7 -2
  134. package/dist/voice/report.cjs.map +1 -1
  135. package/dist/voice/report.d.cts +11 -1
  136. package/dist/voice/report.d.ts +11 -1
  137. package/dist/voice/report.d.ts.map +1 -1
  138. package/dist/voice/report.js +7 -2
  139. package/dist/voice/report.js.map +1 -1
  140. package/dist/voice/room_io/_input.cjs +2 -1
  141. package/dist/voice/room_io/_input.cjs.map +1 -1
  142. package/dist/voice/room_io/_input.d.ts.map +1 -1
  143. package/dist/voice/room_io/_input.js +2 -1
  144. package/dist/voice/room_io/_input.js.map +1 -1
  145. package/dist/voice/room_io/_output.cjs +8 -7
  146. package/dist/voice/room_io/_output.cjs.map +1 -1
  147. package/dist/voice/room_io/_output.d.cts +2 -1
  148. package/dist/voice/room_io/_output.d.ts +2 -1
  149. package/dist/voice/room_io/_output.d.ts.map +1 -1
  150. package/dist/voice/room_io/_output.js +8 -7
  151. package/dist/voice/room_io/_output.js.map +1 -1
  152. package/dist/worker.cjs +4 -3
  153. package/dist/worker.cjs.map +1 -1
  154. package/dist/worker.js +4 -3
  155. package/dist/worker.js.map +1 -1
  156. package/package.json +1 -1
  157. package/src/inference/llm.ts +0 -1
  158. package/src/inference/stt.ts +1 -2
  159. package/src/inference/tts.ts +5 -2
  160. package/src/ipc/job_proc_lazy_main.ts +1 -1
  161. package/src/job.ts +21 -2
  162. package/src/llm/llm.ts +2 -2
  163. package/src/stream/deferred_stream.test.ts +3 -3
  164. package/src/stream/deferred_stream.ts +22 -5
  165. package/src/stt/stream_adapter.ts +18 -8
  166. package/src/stt/stt.ts +19 -6
  167. package/src/telemetry/traces.ts +25 -3
  168. package/src/tts/stream_adapter.ts +5 -4
  169. package/src/tts/tts.ts +6 -4
  170. package/src/types.ts +57 -33
  171. package/src/voice/agent.ts +59 -19
  172. package/src/voice/agent_activity.ts +0 -3
  173. package/src/voice/agent_session.ts +141 -36
  174. package/src/voice/events.ts +6 -3
  175. package/src/voice/generation.ts +10 -8
  176. package/src/voice/io.ts +19 -0
  177. package/src/voice/recorder_io/index.ts +4 -0
  178. package/src/voice/recorder_io/recorder_io.ts +690 -0
  179. package/src/voice/report.ts +20 -3
  180. package/src/voice/room_io/_input.ts +2 -1
  181. package/src/voice/room_io/_output.ts +10 -7
  182. package/src/worker.ts +1 -1
@@ -9,15 +9,22 @@ import type {
9
9
  import { IdentityTransform } from './identity_transform.js';
10
10
 
11
11
  /**
12
- * Check if error is related to reader.read after release lock
12
+ * Check if error is related to stream cleanup operations.
13
+ *
14
+ * These errors are expected when calling reader.read() after releaseLock()
15
+ * or when writing to already closed streams during cleanup:
13
16
  *
14
17
  * Invalid state: Releasing reader
15
18
  * Invalid state: The reader is not attached to a stream
19
+ * Invalid state: Controller is already closed
20
+ * Invalid state: WritableStream is closed
16
21
  */
17
22
  export function isStreamReaderReleaseError(e: unknown) {
18
23
  const allowedMessages = [
19
24
  'Invalid state: Releasing reader',
20
25
  'Invalid state: The reader is not attached to a stream',
26
+ 'Controller is already closed',
27
+ 'WritableStream is closed',
21
28
  ];
22
29
 
23
30
  if (e instanceof TypeError) {
@@ -66,18 +73,27 @@ export class DeferredReadableStream<T> {
66
73
  await this.writer.write(value);
67
74
  }
68
75
  } catch (e) {
69
- // skip source detach related errors
76
+ // skip stream cleanup related errors
70
77
  if (isStreamReaderReleaseError(e)) return;
78
+
71
79
  sourceError = e;
72
80
  } finally {
73
81
  // any other error from source will be propagated to the consumer
74
82
  if (sourceError) {
75
- this.writer.abort(sourceError);
83
+ try {
84
+ this.writer.abort(sourceError);
85
+ } catch (e) {
86
+ // ignore if writer is already closed
87
+ }
76
88
  return;
77
89
  }
78
90
 
79
91
  // release lock so this.stream.getReader().read() will terminate with done: true
80
- this.writer.releaseLock();
92
+ try {
93
+ this.writer.releaseLock();
94
+ } catch (e) {
95
+ // ignore if writer lock is already released
96
+ }
81
97
 
82
98
  // we only close the writable stream after done
83
99
  try {
@@ -98,7 +114,8 @@ export class DeferredReadableStream<T> {
98
114
  */
99
115
  async detachSource() {
100
116
  if (!this.isSourceSet) {
101
- throw new Error('Source not set');
117
+ // No-op if source was never set - this is a common case during cleanup
118
+ return;
102
119
  }
103
120
 
104
121
  // release lock will make any pending read() throw TypeError
@@ -3,6 +3,7 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { AudioFrame } from '@livekit/rtc-node';
5
5
  import { log } from '../log.js';
6
+ import type { APIConnectOptions } from '../types.js';
6
7
  import type { VAD, VADStream } from '../vad.js';
7
8
  import { VADEventType } from '../vad.js';
8
9
  import type { SpeechEvent } from './stt.js';
@@ -22,14 +23,18 @@ export class StreamAdapter extends STT {
22
23
  this.#stt.on('metrics_collected', (metrics) => {
23
24
  this.emit('metrics_collected', metrics);
24
25
  });
26
+
27
+ this.#stt.on('error', (error) => {
28
+ this.emit('error', error);
29
+ });
25
30
  }
26
31
 
27
- _recognize(frame: AudioFrame): Promise<SpeechEvent> {
28
- return this.#stt.recognize(frame);
32
+ _recognize(frame: AudioFrame, abortSignal?: AbortSignal): Promise<SpeechEvent> {
33
+ return this.#stt.recognize(frame, abortSignal);
29
34
  }
30
35
 
31
- stream(): StreamAdapterWrapper {
32
- return new StreamAdapterWrapper(this.#stt, this.#vad);
36
+ stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {
37
+ return new StreamAdapterWrapper(this.#stt, this.#vad, options?.connOptions);
33
38
  }
34
39
  }
35
40
 
@@ -38,13 +43,18 @@ export class StreamAdapterWrapper extends SpeechStream {
38
43
  #vadStream: VADStream;
39
44
  label: string;
40
45
 
41
- constructor(stt: STT, vad: VAD) {
42
- super(stt);
46
+ constructor(stt: STT, vad: VAD, connOptions?: APIConnectOptions) {
47
+ super(stt, undefined, connOptions);
43
48
  this.#stt = stt;
44
49
  this.#vadStream = vad.stream();
45
50
  this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`;
46
51
  }
47
52
 
53
+ close() {
54
+ super.close();
55
+ this.#vadStream.close();
56
+ }
57
+
48
58
  async monitorMetrics() {
49
59
  return; // do nothing
50
60
  }
@@ -71,7 +81,7 @@ export class StreamAdapterWrapper extends SpeechStream {
71
81
  this.output.put({ type: SpeechEventType.END_OF_SPEECH });
72
82
 
73
83
  try {
74
- const event = await this.#stt.recognize(ev.frames);
84
+ const event = await this.#stt.recognize(ev.frames, this.abortSignal);
75
85
  if (!event.alternatives![0].text) {
76
86
  continue;
77
87
  }
@@ -92,6 +102,6 @@ export class StreamAdapterWrapper extends SpeechStream {
92
102
  }
93
103
  };
94
104
 
95
- Promise.all([forwardInput(), recognize()]);
105
+ await Promise.all([forwardInput(), recognize()]);
96
106
  }
97
107
  }
package/src/stt/stt.ts CHANGED
@@ -10,7 +10,7 @@ import { calculateAudioDurationSeconds } from '../audio.js';
10
10
  import { log } from '../log.js';
11
11
  import type { STTMetrics } from '../metrics/base.js';
12
12
  import { DeferredReadableStream } from '../stream/deferred_stream.js';
13
- import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
13
+ import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';
14
14
  import type { AudioBuffer } from '../utils.js';
15
15
  import { AsyncIterableQueue, delay, startSoon, toError } from '../utils.js';
16
16
 
@@ -113,9 +113,9 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
113
113
  }
114
114
 
115
115
  /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */
116
- async recognize(frame: AudioBuffer): Promise<SpeechEvent> {
116
+ async recognize(frame: AudioBuffer, abortSignal?: AbortSignal): Promise<SpeechEvent> {
117
117
  const startTime = process.hrtime.bigint();
118
- const event = await this._recognize(frame);
118
+ const event = await this._recognize(frame, abortSignal);
119
119
  const durationMs = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));
120
120
  this.emit('metrics_collected', {
121
121
  type: 'stt_metrics',
@@ -128,13 +128,19 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
128
128
  });
129
129
  return event;
130
130
  }
131
- protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;
131
+
132
+ protected abstract _recognize(
133
+ frame: AudioBuffer,
134
+ abortSignal?: AbortSignal,
135
+ ): Promise<SpeechEvent>;
132
136
 
133
137
  /**
134
138
  * Returns a {@link SpeechStream} that can be used to push audio frames and receive
135
139
  * transcriptions
140
+ *
141
+ * @param options - Optional configuration including connection options
136
142
  */
137
- abstract stream(): SpeechStream;
143
+ abstract stream(options?: { connOptions?: APIConnectOptions }): SpeechStream;
138
144
 
139
145
  async close(): Promise<void> {
140
146
  return;
@@ -171,6 +177,8 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
171
177
  private logger = log();
172
178
  private _connOptions: APIConnectOptions;
173
179
 
180
+ protected abortController = new AbortController();
181
+
174
182
  constructor(
175
183
  stt: STT,
176
184
  sampleRate?: number,
@@ -196,7 +204,7 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
196
204
  return await this.run();
197
205
  } catch (error) {
198
206
  if (error instanceof APIError) {
199
- const retryInterval = this._connOptions._intervalForRetry(i);
207
+ const retryInterval = intervalForRetry(this._connOptions, i);
200
208
 
201
209
  if (this._connOptions.maxRetry === 0 || !error.retryable) {
202
210
  this.emitError({ error, recoverable: false });
@@ -288,6 +296,10 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
288
296
 
289
297
  protected abstract run(): Promise<void>;
290
298
 
299
+ protected get abortSignal(): AbortSignal {
300
+ return this.abortController.signal;
301
+ }
302
+
291
303
  updateInputStream(audioStream: ReadableStream<AudioFrame>) {
292
304
  this.deferredInputStream.setSource(audioStream);
293
305
  }
@@ -352,6 +364,7 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
352
364
  if (!this.input.closed) this.input.close();
353
365
  if (!this.queue.closed) this.queue.close();
354
366
  if (!this.output.closed) this.output.close();
367
+ if (!this.abortController.signal.aborted) this.abortController.abort();
355
368
  this.closed = true;
356
369
  }
357
370
 
@@ -21,6 +21,7 @@ import { BatchSpanProcessor, NodeTracerProvider } from '@opentelemetry/sdk-trace
21
21
  import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
22
22
  import FormData from 'form-data';
23
23
  import { AccessToken } from 'livekit-server-sdk';
24
+ import fs from 'node:fs/promises';
24
25
  import type { ChatContent, ChatItem } from '../llm/index.js';
25
26
  import { enableOtelLogging } from '../log.js';
26
27
  import type { SessionReport } from '../voice/report.js';
@@ -497,12 +498,13 @@ export async function uploadSessionReport(options: {
497
498
  const formData = new FormData();
498
499
 
499
500
  // Add header (protobuf MetricsRecordingHeader)
501
+ const audioStartTime = report.audioRecordingStartedAt ?? 0;
500
502
  const headerMsg = new MetricsRecordingHeader({
501
503
  roomId: report.roomId,
502
504
  duration: BigInt(0), // TODO: Calculate actual duration from report
503
505
  startTime: {
504
- seconds: BigInt(Math.floor(report.timestamp / 1000)),
505
- nanos: Math.floor((report.timestamp % 1000) * 1e6),
506
+ seconds: BigInt(Math.floor(audioStartTime / 1000)),
507
+ nanos: Math.floor((audioStartTime % 1000) * 1e6),
506
508
  },
507
509
  });
508
510
 
@@ -530,7 +532,27 @@ export async function uploadSessionReport(options: {
530
532
  },
531
533
  });
532
534
 
533
- // TODO(brian): Add audio recording file when recorder IO is implemented
535
+ // Add audio recording file if available
536
+ if (report.audioRecordingPath && report.audioRecordingStartedAt) {
537
+ let audioBytes: Buffer;
538
+ try {
539
+ audioBytes = await fs.readFile(report.audioRecordingPath);
540
+ } catch {
541
+ audioBytes = Buffer.alloc(0);
542
+ }
543
+
544
+ if (audioBytes.length > 0) {
545
+ formData.append('audio', audioBytes, {
546
+ filename: 'recording.ogg',
547
+ contentType: 'audio/ogg',
548
+ knownLength: audioBytes.length,
549
+ header: {
550
+ 'Content-Type': 'audio/ogg',
551
+ 'Content-Length': audioBytes.length.toString(),
552
+ },
553
+ });
554
+ }
555
+ }
534
556
 
535
557
  // Upload to LiveKit Cloud using form-data's submit method
536
558
  // This properly streams the multipart form with all headers including Content-Length
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';
5
+ import type { APIConnectOptions } from '../types.js';
5
6
  import { Task } from '../utils.js';
6
7
  import type { ChunkedStream } from './tts.js';
7
8
  import { SynthesizeStream, TTS } from './tts.js';
@@ -27,8 +28,8 @@ export class StreamAdapter extends TTS {
27
28
  return this.#tts.synthesize(text);
28
29
  }
29
30
 
30
- stream(): StreamAdapterWrapper {
31
- return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer);
31
+ stream(options?: { connOptions?: APIConnectOptions }): StreamAdapterWrapper {
32
+ return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer, options?.connOptions);
32
33
  }
33
34
  }
34
35
 
@@ -37,8 +38,8 @@ export class StreamAdapterWrapper extends SynthesizeStream {
37
38
  #sentenceStream: SentenceStream;
38
39
  label: string;
39
40
 
40
- constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) {
41
- super(tts);
41
+ constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer, connOptions?: APIConnectOptions) {
42
+ super(tts, connOptions);
42
43
  this.#tts = tts;
43
44
  this.#sentenceStream = sentenceTokenizer.stream();
44
45
  this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;
package/src/tts/tts.ts CHANGED
@@ -11,7 +11,7 @@ import { log } from '../log.js';
11
11
  import type { TTSMetrics } from '../metrics/base.js';
12
12
  import { DeferredReadableStream } from '../stream/deferred_stream.js';
13
13
  import { recordException, traceTypes, tracer } from '../telemetry/index.js';
14
- import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
14
+ import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS, intervalForRetry } from '../types.js';
15
15
  import { AsyncIterableQueue, delay, mergeFrames, startSoon, toError } from '../utils.js';
16
16
 
17
17
  /** SynthesizedAudio is a packet of speech synthesis as returned by the TTS. */
@@ -94,8 +94,10 @@ export abstract class TTS extends (EventEmitter as new () => TypedEmitter<TTSCal
94
94
 
95
95
  /**
96
96
  * Returns a {@link SynthesizeStream} that can be used to push text and receive audio data
97
+ *
98
+ * @param options - Optional configuration including connection options
97
99
  */
98
- abstract stream(): SynthesizeStream;
100
+ abstract stream(options?: { connOptions?: APIConnectOptions }): SynthesizeStream;
99
101
 
100
102
  async close(): Promise<void> {
101
103
  return;
@@ -186,7 +188,7 @@ export abstract class SynthesizeStream
186
188
  );
187
189
  } catch (error) {
188
190
  if (error instanceof APIError) {
189
- const retryInterval = this._connOptions._intervalForRetry(i);
191
+ const retryInterval = intervalForRetry(this._connOptions, i);
190
192
 
191
193
  if (this._connOptions.maxRetry === 0 || !error.retryable) {
192
194
  this.emitError({ error, recoverable: false });
@@ -454,7 +456,7 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
454
456
  );
455
457
  } catch (error) {
456
458
  if (error instanceof APIError) {
457
- const retryInterval = this._connOptions._intervalForRetry(i);
459
+ const retryInterval = intervalForRetry(this._connOptions, i);
458
460
 
459
461
  if (this._connOptions.maxRetry === 0 || !error.retryable) {
460
462
  this.emitError({ error, recoverable: false });
package/src/types.ts CHANGED
@@ -1,42 +1,66 @@
1
1
  // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- export class APIConnectOptions {
5
- /** Maximum number of retries to connect to the API. */
6
- readonly maxRetry: number;
7
- /** Interval between retries to connect to the API in milliseconds. */
8
- readonly retryIntervalMs: number;
9
- /** Timeout for connecting to the API in milliseconds. */
10
- readonly timeoutMs: number;
11
4
 
12
- constructor(options: Partial<APIConnectOptions> = {}) {
13
- this.maxRetry = options.maxRetry ?? 3;
14
- this.retryIntervalMs = options.retryIntervalMs ?? 2000;
15
- this.timeoutMs = options.timeoutMs ?? 10000;
5
+ /**
6
+ * Connection options for API calls, controlling retry and timeout behavior.
7
+ */
8
+ export interface APIConnectOptions {
9
+ /** Maximum number of retries to connect to the API. Default: 3 */
10
+ maxRetry: number;
11
+ /** Interval between retries to connect to the API in milliseconds. Default: 2000 */
12
+ retryIntervalMs: number;
13
+ /** Timeout for connecting to the API in milliseconds. Default: 10000 */
14
+ timeoutMs: number;
15
+ }
16
16
 
17
- if (this.maxRetry < 0) {
18
- throw new Error('maxRetry must be greater than or equal to 0');
19
- }
20
- if (this.retryIntervalMs < 0) {
21
- throw new Error('retryIntervalMs must be greater than or equal to 0');
22
- }
23
- if (this.timeoutMs < 0) {
24
- throw new Error('timeoutMs must be greater than or equal to 0');
25
- }
26
- }
17
+ export const DEFAULT_API_CONNECT_OPTIONS: APIConnectOptions = {
18
+ maxRetry: 3,
19
+ retryIntervalMs: 2000,
20
+ timeoutMs: 10000,
21
+ };
27
22
 
28
- /** @internal */
29
- _intervalForRetry(numRetries: number): number {
30
- /**
31
- * Return the interval for the given number of retries.
32
- *
33
- * The first retry is immediate, and then uses specified retryIntervalMs
34
- */
35
- if (numRetries === 0) {
36
- return 0.1;
37
- }
38
- return this.retryIntervalMs;
23
+ /**
24
+ * Return the interval for the given number of retries.
25
+ * The first retry is immediate, and then uses specified retryIntervalMs.
26
+ * @internal
27
+ */
28
+ export function intervalForRetry(connOptions: APIConnectOptions, numRetries: number): number {
29
+ if (numRetries === 0) {
30
+ return 0.1;
39
31
  }
32
+ return connOptions.retryIntervalMs;
33
+ }
34
+
35
+ /**
36
+ * Connection options for the agent session, controlling retry and timeout behavior
37
+ * for STT, LLM, and TTS connections.
38
+ */
39
+ export interface SessionConnectOptions {
40
+ /** Connection options for speech-to-text. */
41
+ sttConnOptions?: Partial<APIConnectOptions>;
42
+ /** Connection options for the language model. */
43
+ llmConnOptions?: Partial<APIConnectOptions>;
44
+ /** Connection options for text-to-speech. */
45
+ ttsConnOptions?: Partial<APIConnectOptions>;
46
+ /** Maximum number of consecutive unrecoverable errors from LLM or TTS before closing the session. Default: 3 */
47
+ maxUnrecoverableErrors?: number;
48
+ }
49
+
50
+ /**
51
+ * Resolved session connect options with all values populated.
52
+ * @internal
53
+ */
54
+ export interface ResolvedSessionConnectOptions {
55
+ sttConnOptions: APIConnectOptions;
56
+ llmConnOptions: APIConnectOptions;
57
+ ttsConnOptions: APIConnectOptions;
58
+ maxUnrecoverableErrors: number;
40
59
  }
41
60
 
42
- export const DEFAULT_API_CONNECT_OPTIONS = new APIConnectOptions();
61
+ export const DEFAULT_SESSION_CONNECT_OPTIONS: ResolvedSessionConnectOptions = {
62
+ sttConnOptions: DEFAULT_API_CONNECT_OPTIONS,
63
+ llmConnOptions: DEFAULT_API_CONNECT_OPTIONS,
64
+ ttsConnOptions: DEFAULT_API_CONNECT_OPTIONS,
65
+ maxUnrecoverableErrors: 3,
66
+ };
@@ -260,27 +260,41 @@ export class Agent<UserData = any> {
260
260
  let wrapped_stt = activity.stt;
261
261
 
262
262
  if (!wrapped_stt.capabilities.streaming) {
263
- if (!agent.vad) {
263
+ const vad = agent.vad || activity.vad;
264
+ if (!vad) {
264
265
  throw new Error(
265
266
  'STT does not support streaming, add a VAD to the AgentTask/VoiceAgent to enable streaming',
266
267
  );
267
268
  }
268
- wrapped_stt = new STTStreamAdapter(wrapped_stt, agent.vad);
269
+ wrapped_stt = new STTStreamAdapter(wrapped_stt, vad);
269
270
  }
270
271
 
271
- const stream = wrapped_stt.stream();
272
+ const connOptions = activity.agentSession.connOptions.sttConnOptions;
273
+ const stream = wrapped_stt.stream({ connOptions });
272
274
  stream.updateInputStream(audio);
273
275
 
276
+ let cleaned = false;
277
+ const cleanup = () => {
278
+ if (cleaned) return;
279
+ cleaned = true;
280
+ stream.detachInputStream();
281
+ stream.close();
282
+ };
283
+
274
284
  return new ReadableStream({
275
285
  async start(controller) {
276
- for await (const event of stream) {
277
- controller.enqueue(event);
286
+ try {
287
+ for await (const event of stream) {
288
+ controller.enqueue(event);
289
+ }
290
+ controller.close();
291
+ } finally {
292
+ // Always clean up the STT stream, whether it ends naturally or is cancelled
293
+ cleanup();
278
294
  }
279
- controller.close();
280
295
  },
281
296
  cancel() {
282
- stream.detachInputStream();
283
- stream.close();
297
+ cleanup();
284
298
  },
285
299
  });
286
300
  },
@@ -304,22 +318,36 @@ export class Agent<UserData = any> {
304
318
 
305
319
  // TODO(brian): make parallelToolCalls configurable
306
320
  const { toolChoice } = modelSettings;
321
+ const connOptions = activity.agentSession.connOptions.llmConnOptions;
307
322
 
308
323
  const stream = activity.llm.chat({
309
324
  chatCtx,
310
325
  toolCtx,
311
326
  toolChoice,
327
+ connOptions,
312
328
  parallelToolCalls: true,
313
329
  });
330
+
331
+ let cleaned = false;
332
+ const cleanup = () => {
333
+ if (cleaned) return;
334
+ cleaned = true;
335
+ stream.close();
336
+ };
337
+
314
338
  return new ReadableStream({
315
339
  async start(controller) {
316
- for await (const chunk of stream) {
317
- controller.enqueue(chunk);
340
+ try {
341
+ for await (const chunk of stream) {
342
+ controller.enqueue(chunk);
343
+ }
344
+ controller.close();
345
+ } finally {
346
+ cleanup();
318
347
  }
319
- controller.close();
320
348
  },
321
349
  cancel() {
322
- stream.close();
350
+ cleanup();
323
351
  },
324
352
  });
325
353
  },
@@ -340,21 +368,33 @@ export class Agent<UserData = any> {
340
368
  wrapped_tts = new TTSStreamAdapter(wrapped_tts, new BasicSentenceTokenizer());
341
369
  }
342
370
 
343
- const stream = wrapped_tts.stream();
371
+ const connOptions = activity.agentSession.connOptions.ttsConnOptions;
372
+ const stream = wrapped_tts.stream({ connOptions });
344
373
  stream.updateInputStream(text);
345
374
 
375
+ let cleaned = false;
376
+ const cleanup = () => {
377
+ if (cleaned) return;
378
+ cleaned = true;
379
+ stream.close();
380
+ };
381
+
346
382
  return new ReadableStream({
347
383
  async start(controller) {
348
- for await (const chunk of stream) {
349
- if (chunk === SynthesizeStream.END_OF_STREAM) {
350
- break;
384
+ try {
385
+ for await (const chunk of stream) {
386
+ if (chunk === SynthesizeStream.END_OF_STREAM) {
387
+ break;
388
+ }
389
+ controller.enqueue(chunk.frame);
351
390
  }
352
- controller.enqueue(chunk.frame);
391
+ controller.close();
392
+ } finally {
393
+ cleanup();
353
394
  }
354
- controller.close();
355
395
  },
356
396
  cancel() {
357
- stream.close();
397
+ cleanup();
358
398
  },
359
399
  });
360
400
  },
@@ -2259,15 +2259,12 @@ export class AgentActivity implements RecognitionHooks {
2259
2259
  }
2260
2260
  if (this.stt instanceof STT) {
2261
2261
  this.stt.off('metrics_collected', this.onMetricsCollected);
2262
- await this.stt.close();
2263
2262
  }
2264
2263
  if (this.tts instanceof TTS) {
2265
2264
  this.tts.off('metrics_collected', this.onMetricsCollected);
2266
- await this.tts.close();
2267
2265
  }
2268
2266
  if (this.vad instanceof VAD) {
2269
2267
  this.vad.off('metrics_collected', this.onMetricsCollected);
2270
- await this.vad.close();
2271
2268
  }
2272
2269
 
2273
2270
  this.detachAudioInput();