@livekit/agents 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. package/dist/_exceptions.cjs.map +1 -1
  2. package/dist/_exceptions.d.ts.map +1 -1
  3. package/dist/_exceptions.js.map +1 -1
  4. package/dist/audio.cjs +10 -0
  5. package/dist/audio.cjs.map +1 -1
  6. package/dist/audio.d.cts +1 -1
  7. package/dist/audio.d.ts +1 -1
  8. package/dist/audio.d.ts.map +1 -1
  9. package/dist/audio.js +10 -0
  10. package/dist/audio.js.map +1 -1
  11. package/dist/beta/workflows/task_group.cjs +7 -4
  12. package/dist/beta/workflows/task_group.cjs.map +1 -1
  13. package/dist/beta/workflows/task_group.d.ts.map +1 -1
  14. package/dist/beta/workflows/task_group.js +7 -4
  15. package/dist/beta/workflows/task_group.js.map +1 -1
  16. package/dist/inference/api_protos.d.cts +26 -26
  17. package/dist/inference/api_protos.d.ts +26 -26
  18. package/dist/inference/interruption/http_transport.cjs.map +1 -1
  19. package/dist/inference/interruption/http_transport.d.cts +3 -1
  20. package/dist/inference/interruption/http_transport.d.ts +3 -1
  21. package/dist/inference/interruption/http_transport.d.ts.map +1 -1
  22. package/dist/inference/interruption/http_transport.js.map +1 -1
  23. package/dist/inference/interruption/ws_transport.cjs +37 -32
  24. package/dist/inference/interruption/ws_transport.cjs.map +1 -1
  25. package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
  26. package/dist/inference/interruption/ws_transport.js +37 -32
  27. package/dist/inference/interruption/ws_transport.js.map +1 -1
  28. package/dist/inference/tts.cjs +14 -1
  29. package/dist/inference/tts.cjs.map +1 -1
  30. package/dist/inference/tts.d.cts +42 -4
  31. package/dist/inference/tts.d.ts +42 -4
  32. package/dist/inference/tts.d.ts.map +1 -1
  33. package/dist/inference/tts.js +24 -3
  34. package/dist/inference/tts.js.map +1 -1
  35. package/dist/inference/tts.test.cjs +72 -0
  36. package/dist/inference/tts.test.cjs.map +1 -1
  37. package/dist/inference/tts.test.js +72 -0
  38. package/dist/inference/tts.test.js.map +1 -1
  39. package/dist/ipc/job_proc_lazy_main.cjs +7 -2
  40. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  41. package/dist/ipc/job_proc_lazy_main.js +7 -2
  42. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  43. package/dist/ipc/supervised_proc.cjs +4 -1
  44. package/dist/ipc/supervised_proc.cjs.map +1 -1
  45. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  46. package/dist/ipc/supervised_proc.js +4 -1
  47. package/dist/ipc/supervised_proc.js.map +1 -1
  48. package/dist/ipc/supervised_proc.test.cjs +82 -0
  49. package/dist/ipc/supervised_proc.test.cjs.map +1 -1
  50. package/dist/ipc/supervised_proc.test.js +82 -0
  51. package/dist/ipc/supervised_proc.test.js.map +1 -1
  52. package/dist/job.cjs +2 -1
  53. package/dist/job.cjs.map +1 -1
  54. package/dist/job.d.ts.map +1 -1
  55. package/dist/job.js +2 -1
  56. package/dist/job.js.map +1 -1
  57. package/dist/llm/chat_context.cjs +102 -31
  58. package/dist/llm/chat_context.cjs.map +1 -1
  59. package/dist/llm/chat_context.d.ts.map +1 -1
  60. package/dist/llm/chat_context.js +102 -31
  61. package/dist/llm/chat_context.js.map +1 -1
  62. package/dist/llm/chat_context.test.cjs +123 -5
  63. package/dist/llm/chat_context.test.cjs.map +1 -1
  64. package/dist/llm/chat_context.test.js +123 -5
  65. package/dist/llm/chat_context.test.js.map +1 -1
  66. package/dist/llm/fallback_adapter.cjs +2 -0
  67. package/dist/llm/fallback_adapter.cjs.map +1 -1
  68. package/dist/llm/fallback_adapter.d.ts.map +1 -1
  69. package/dist/llm/fallback_adapter.js +2 -0
  70. package/dist/llm/fallback_adapter.js.map +1 -1
  71. package/dist/llm/index.cjs +2 -0
  72. package/dist/llm/index.cjs.map +1 -1
  73. package/dist/llm/index.d.cts +1 -1
  74. package/dist/llm/index.d.ts +1 -1
  75. package/dist/llm/index.d.ts.map +1 -1
  76. package/dist/llm/index.js +2 -0
  77. package/dist/llm/index.js.map +1 -1
  78. package/dist/llm/utils.cjs +89 -0
  79. package/dist/llm/utils.cjs.map +1 -1
  80. package/dist/llm/utils.d.cts +8 -0
  81. package/dist/llm/utils.d.ts +8 -0
  82. package/dist/llm/utils.d.ts.map +1 -1
  83. package/dist/llm/utils.js +88 -0
  84. package/dist/llm/utils.js.map +1 -1
  85. package/dist/llm/utils.test.cjs +90 -0
  86. package/dist/llm/utils.test.cjs.map +1 -1
  87. package/dist/llm/utils.test.js +98 -2
  88. package/dist/llm/utils.test.js.map +1 -1
  89. package/dist/stt/stt.cjs +8 -0
  90. package/dist/stt/stt.cjs.map +1 -1
  91. package/dist/stt/stt.d.cts +8 -0
  92. package/dist/stt/stt.d.ts +8 -0
  93. package/dist/stt/stt.d.ts.map +1 -1
  94. package/dist/stt/stt.js +8 -0
  95. package/dist/stt/stt.js.map +1 -1
  96. package/dist/tts/fallback_adapter.cjs +6 -0
  97. package/dist/tts/fallback_adapter.cjs.map +1 -1
  98. package/dist/tts/fallback_adapter.d.ts.map +1 -1
  99. package/dist/tts/fallback_adapter.js +6 -0
  100. package/dist/tts/fallback_adapter.js.map +1 -1
  101. package/dist/typed_promise.cjs +48 -0
  102. package/dist/typed_promise.cjs.map +1 -0
  103. package/dist/typed_promise.d.cts +24 -0
  104. package/dist/typed_promise.d.ts +24 -0
  105. package/dist/typed_promise.d.ts.map +1 -0
  106. package/dist/typed_promise.js +28 -0
  107. package/dist/typed_promise.js.map +1 -0
  108. package/dist/utils.cjs +30 -2
  109. package/dist/utils.cjs.map +1 -1
  110. package/dist/utils.d.cts +18 -0
  111. package/dist/utils.d.ts +18 -0
  112. package/dist/utils.d.ts.map +1 -1
  113. package/dist/utils.js +27 -2
  114. package/dist/utils.js.map +1 -1
  115. package/dist/version.cjs +1 -1
  116. package/dist/version.js +1 -1
  117. package/dist/voice/agent_activity.cjs +10 -0
  118. package/dist/voice/agent_activity.cjs.map +1 -1
  119. package/dist/voice/agent_activity.d.ts.map +1 -1
  120. package/dist/voice/agent_activity.js +11 -0
  121. package/dist/voice/agent_activity.js.map +1 -1
  122. package/dist/voice/agent_session.cjs +1 -1
  123. package/dist/voice/agent_session.cjs.map +1 -1
  124. package/dist/voice/agent_session.d.cts +4 -2
  125. package/dist/voice/agent_session.d.ts +4 -2
  126. package/dist/voice/agent_session.d.ts.map +1 -1
  127. package/dist/voice/agent_session.js +1 -1
  128. package/dist/voice/agent_session.js.map +1 -1
  129. package/dist/voice/events.cjs +11 -0
  130. package/dist/voice/events.cjs.map +1 -1
  131. package/dist/voice/events.d.cts +12 -1
  132. package/dist/voice/events.d.ts +12 -1
  133. package/dist/voice/events.d.ts.map +1 -1
  134. package/dist/voice/events.js +10 -0
  135. package/dist/voice/events.js.map +1 -1
  136. package/dist/voice/generation.cjs +23 -4
  137. package/dist/voice/generation.cjs.map +1 -1
  138. package/dist/voice/generation.d.ts.map +1 -1
  139. package/dist/voice/generation.js +32 -5
  140. package/dist/voice/generation.js.map +1 -1
  141. package/dist/voice/generation_tts_timeout.test.cjs +85 -0
  142. package/dist/voice/generation_tts_timeout.test.cjs.map +1 -0
  143. package/dist/voice/generation_tts_timeout.test.js +84 -0
  144. package/dist/voice/generation_tts_timeout.test.js.map +1 -0
  145. package/dist/voice/index.cjs.map +1 -1
  146. package/dist/voice/index.d.cts +1 -1
  147. package/dist/voice/index.d.ts +1 -1
  148. package/dist/voice/index.d.ts.map +1 -1
  149. package/dist/voice/index.js +3 -1
  150. package/dist/voice/index.js.map +1 -1
  151. package/dist/voice/recorder_io/recorder_io.cjs +1 -2
  152. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
  153. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
  154. package/dist/voice/recorder_io/recorder_io.js +2 -3
  155. package/dist/voice/recorder_io/recorder_io.js.map +1 -1
  156. package/dist/voice/report.cjs +1 -1
  157. package/dist/voice/report.cjs.map +1 -1
  158. package/dist/voice/report.js +1 -1
  159. package/dist/voice/report.js.map +1 -1
  160. package/dist/voice/report.test.cjs +70 -0
  161. package/dist/voice/report.test.cjs.map +1 -1
  162. package/dist/voice/report.test.js +70 -0
  163. package/dist/voice/report.test.js.map +1 -1
  164. package/dist/voice/room_io/room_io.cjs +5 -1
  165. package/dist/voice/room_io/room_io.cjs.map +1 -1
  166. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  167. package/dist/voice/room_io/room_io.js +5 -1
  168. package/dist/voice/room_io/room_io.js.map +1 -1
  169. package/dist/voice/room_io/room_io.test.cjs +18 -0
  170. package/dist/voice/room_io/room_io.test.cjs.map +1 -0
  171. package/dist/voice/room_io/room_io.test.js +17 -0
  172. package/dist/voice/room_io/room_io.test.js.map +1 -0
  173. package/package.json +4 -2
  174. package/src/_exceptions.ts +5 -0
  175. package/src/audio.ts +12 -1
  176. package/src/beta/workflows/task_group.ts +14 -5
  177. package/src/inference/interruption/http_transport.ts +2 -1
  178. package/src/inference/interruption/ws_transport.ts +44 -34
  179. package/src/inference/tts.test.ts +87 -0
  180. package/src/inference/tts.ts +71 -9
  181. package/src/ipc/job_proc_lazy_main.ts +7 -2
  182. package/src/ipc/supervised_proc.test.ts +96 -0
  183. package/src/ipc/supervised_proc.ts +8 -1
  184. package/src/job.ts +1 -0
  185. package/src/llm/chat_context.test.ts +137 -5
  186. package/src/llm/chat_context.ts +119 -38
  187. package/src/llm/fallback_adapter.ts +5 -2
  188. package/src/llm/index.ts +2 -0
  189. package/src/llm/utils.test.ts +103 -2
  190. package/src/llm/utils.ts +128 -0
  191. package/src/stt/stt.ts +9 -1
  192. package/src/tts/fallback_adapter.ts +9 -2
  193. package/src/typed_promise.ts +67 -0
  194. package/src/utils.ts +45 -2
  195. package/src/voice/agent_activity.ts +11 -0
  196. package/src/voice/agent_session.ts +13 -7
  197. package/src/voice/events.ts +21 -0
  198. package/src/voice/generation.ts +35 -8
  199. package/src/voice/generation_tts_timeout.test.ts +112 -0
  200. package/src/voice/index.ts +6 -1
  201. package/src/voice/recorder_io/recorder_io.ts +2 -7
  202. package/src/voice/report.test.ts +78 -0
  203. package/src/voice/report.ts +1 -1
  204. package/src/voice/room_io/room_io.test.ts +38 -0
  205. package/src/voice/room_io/room_io.ts +7 -2
@@ -1,11 +1,13 @@
1
1
  // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import type { Throws } from '@livekit/throws-transformer/throws';
4
5
  import { TransformStream } from 'stream/web';
5
6
  import WebSocket from 'ws';
6
7
  import { z } from 'zod';
7
8
  import { APIConnectionError, APIStatusError, APITimeoutError } from '../../_exceptions.js';
8
9
  import { log } from '../../log.js';
10
+ import TypedPromise from '../../typed_promise.js';
9
11
  import { createAccessToken } from '../utils.js';
10
12
  import { InterruptionCacheEntry } from './interruption_cache_entry.js';
11
13
  import type { OverlappingSpeechEvent } from './types.js';
@@ -70,7 +72,9 @@ type WsMessage = z.infer<typeof wsMessageSchema>;
70
72
  /**
71
73
  * Creates a WebSocket connection and waits for it to open.
72
74
  */
73
- async function connectWebSocket(options: WsTransportOptions): Promise<WebSocket> {
75
+ async function connectWebSocket(
76
+ options: WsTransportOptions,
77
+ ): Promise<Throws<WebSocket, APIStatusError | APITimeoutError | APIConnectionError>> {
74
78
  const baseUrl = options.baseUrl.replace(/^http/, 'ws');
75
79
  const token = await createAccessToken(options.apiKey, options.apiSecret);
76
80
  const url = `${baseUrl}/bargein`;
@@ -79,37 +83,39 @@ async function connectWebSocket(options: WsTransportOptions): Promise<WebSocket>
79
83
  headers: { Authorization: `Bearer ${token}` },
80
84
  });
81
85
 
82
- await new Promise<void>((resolve, reject) => {
83
- const timeout = setTimeout(() => {
84
- ws.terminate();
85
- reject(
86
- new APITimeoutError({
87
- message: 'WebSocket connection timeout',
88
- options: { retryable: false },
89
- }),
90
- );
91
- }, options.timeout);
92
- ws.once('open', () => {
93
- clearTimeout(timeout);
94
- resolve();
95
- });
96
- ws.once('unexpected-response', (_req, res) => {
97
- clearTimeout(timeout);
98
- ws.terminate();
99
- const statusCode = res.statusCode ?? -1;
100
- reject(
101
- new APIStatusError({
102
- message: `WebSocket connection rejected with status ${statusCode}`,
103
- options: { statusCode, retryable: false },
104
- }),
105
- );
106
- });
107
- ws.once('error', (err: Error) => {
108
- clearTimeout(timeout);
109
- ws.terminate();
110
- reject(new APIConnectionError({ message: `WebSocket connection error: ${err.message}` }));
111
- });
112
- });
86
+ await new TypedPromise<void, APIStatusError | APITimeoutError | APIConnectionError>(
87
+ (resolve, reject) => {
88
+ const timeout = setTimeout(() => {
89
+ ws.terminate();
90
+ reject(
91
+ new APITimeoutError({
92
+ message: 'WebSocket connection timeout',
93
+ options: { retryable: false },
94
+ }),
95
+ );
96
+ }, options.timeout);
97
+ ws.once('open', () => {
98
+ clearTimeout(timeout);
99
+ resolve();
100
+ });
101
+ ws.once('unexpected-response', (_req, res) => {
102
+ clearTimeout(timeout);
103
+ ws.terminate();
104
+ const statusCode = res.statusCode ?? -1;
105
+ reject(
106
+ new APIStatusError({
107
+ message: `WebSocket connection rejected with status ${statusCode}`,
108
+ options: { statusCode, retryable: false },
109
+ }),
110
+ );
111
+ });
112
+ ws.once('error', (err: Error) => {
113
+ clearTimeout(timeout);
114
+ ws.terminate();
115
+ reject(new APIConnectionError({ message: `WebSocket connection error: ${err.message}` }));
116
+ });
117
+ },
118
+ );
113
119
 
114
120
  return ws;
115
121
  }
@@ -159,7 +165,9 @@ export function createWsTransport(
159
165
  });
160
166
  }
161
167
 
162
- async function ensureConnection(): Promise<void> {
168
+ async function ensureConnection(): Promise<
169
+ Throws<void, APIStatusError | APITimeoutError | APIConnectionError>
170
+ > {
163
171
  if (ws && ws.readyState === WebSocket.OPEN) return;
164
172
 
165
173
  ws = await connectWebSocket(options);
@@ -357,7 +365,9 @@ export function createWsTransport(
357
365
  {
358
366
  async start(controller) {
359
367
  outputController = controller;
360
- await ensureConnection();
368
+ await ensureConnection().catch((e) => {
369
+ controller.error(e);
370
+ });
361
371
  },
362
372
 
363
373
  transform(chunk, controller) {
@@ -265,3 +265,90 @@ describe('TTS constructor fallback and connOptions', () => {
265
265
  expect(tts['opts'].connOptions!.retryIntervalMs).toBe(2000);
266
266
  });
267
267
  });
268
+
269
+ describe('TTS provider modelOptions parity', () => {
270
+ it('preserves ElevenLabs inference model options', () => {
271
+ const modelOptions = {
272
+ speed: 1.2,
273
+ stability: 0.5,
274
+ similarity_boost: 0.8,
275
+ enable_logging: false,
276
+ };
277
+
278
+ const tts = new TTS({
279
+ model: 'elevenlabs/eleven_flash_v2_5' as const,
280
+ apiKey: 'test-key',
281
+ apiSecret: 'test-secret',
282
+ baseURL: 'https://example.livekit.cloud',
283
+ modelOptions,
284
+ });
285
+
286
+ expect(tts['opts'].modelOptions).toEqual(modelOptions);
287
+ });
288
+
289
+ it('accepts expanded Cartesia inference model options', () => {
290
+ const modelOptions = {
291
+ speed: 1.15,
292
+ emotion: 'curious',
293
+ add_timestamps: true,
294
+ };
295
+
296
+ const tts = new TTS({
297
+ model: 'cartesia/sonic' as const,
298
+ apiKey: 'test-key',
299
+ apiSecret: 'test-secret',
300
+ baseURL: 'https://example.livekit.cloud',
301
+ modelOptions,
302
+ });
303
+
304
+ expect(tts['opts'].modelOptions).toEqual(modelOptions);
305
+ });
306
+
307
+ it('accepts Deepgram inference model options', () => {
308
+ const modelOptions = { mip_opt_out: true };
309
+
310
+ const tts = new TTS({
311
+ model: 'deepgram/aura-2' as const,
312
+ apiKey: 'test-key',
313
+ apiSecret: 'test-secret',
314
+ baseURL: 'https://example.livekit.cloud',
315
+ modelOptions,
316
+ });
317
+
318
+ expect(tts['opts'].modelOptions).toEqual(modelOptions);
319
+ });
320
+
321
+ it('accepts Rime inference model options', () => {
322
+ const modelOptions = {
323
+ speed_alpha: 0.9,
324
+ pause_between_brackets: true,
325
+ };
326
+
327
+ const tts = new TTS({
328
+ model: 'rime/mistv2' as const,
329
+ apiKey: 'test-key',
330
+ apiSecret: 'test-secret',
331
+ baseURL: 'https://example.livekit.cloud',
332
+ modelOptions,
333
+ });
334
+
335
+ expect(tts['opts'].modelOptions).toEqual(modelOptions);
336
+ });
337
+
338
+ it('accepts Inworld inference model options', () => {
339
+ const modelOptions = {
340
+ timestamp_type: 'WORD' as const,
341
+ apply_text_normalization: 'ON' as const,
342
+ };
343
+
344
+ const tts = new TTS({
345
+ model: 'inworld/inworld-tts-1' as const,
346
+ apiKey: 'test-key',
347
+ apiSecret: 'test-secret',
348
+ baseURL: 'https://example.livekit.cloud',
349
+ modelOptions,
350
+ });
351
+
352
+ expect(tts['opts'].modelOptions).toEqual(modelOptions);
353
+ });
354
+ });
@@ -3,7 +3,7 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { AudioFrame } from '@livekit/rtc-node';
5
5
  import { WebSocket } from 'ws';
6
- import { APIError, APIStatusError } from '../_exceptions.js';
6
+ import { APIError, APIStatusError, APITimeoutError } from '../_exceptions.js';
7
7
  import { AudioByteStream } from '../audio.js';
8
8
  import { ConnectionPool } from '../connection_pool.js';
9
9
  import { type LanguageCode, normalizeLanguage } from '../language.js';
@@ -13,7 +13,15 @@ import { basic as tokenizeBasic } from '../tokenize/index.js';
13
13
  import type { ChunkedStream } from '../tts/index.js';
14
14
  import { SynthesizeStream as BaseSynthesizeStream, TTS as BaseTTS } from '../tts/index.js';
15
15
  import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
16
- import { Event, Future, Task, cancelAndWait, combineSignals, shortuuid } from '../utils.js';
16
+ import {
17
+ Event,
18
+ Future,
19
+ Task,
20
+ cancelAndWait,
21
+ combineSignals,
22
+ shortuuid,
23
+ waitUntilTimeout,
24
+ } from '../utils.js';
17
25
  import {
18
26
  type TtsClientEvent,
19
27
  type TtsServerEvent,
@@ -46,10 +54,16 @@ export type InworldModels =
46
54
  export type RimeModels = 'rime/arcana' | 'rime/mistv2';
47
55
 
48
56
  export interface CartesiaOptions {
57
+ emotion?: string;
49
58
  /** Maximum duration of audio in seconds. */
50
59
  duration?: number;
51
60
  /** Speech speed. Default: not specified. */
52
- speed?: 'slow' | 'normal' | 'fast';
61
+ speed?: 'slow' | 'normal' | 'fast' | number;
62
+ volume?: number;
63
+ max_buffer_delay_ms?: number;
64
+ add_timestamps?: boolean;
65
+ add_phoneme_timestamps?: boolean;
66
+ use_normalized_timestamps?: boolean;
53
67
  }
54
68
 
55
69
  export interface ElevenlabsOptions {
@@ -57,18 +71,50 @@ export interface ElevenlabsOptions {
57
71
  inactivity_timeout?: number;
58
72
  /** Text normalization mode. Default: "auto". */
59
73
  apply_text_normalization?: 'auto' | 'off' | 'on';
74
+ auto_mode?: boolean;
75
+ enable_logging?: boolean;
76
+ enable_ssml_parsing?: boolean;
77
+ sync_alignment?: boolean;
78
+ language_code?: string;
79
+ /** Voice stability tuning, typically in the range [0, 1]. */
80
+ stability?: number;
81
+ /** Voice similarity tuning, typically in the range [0, 1]. */
82
+ similarity_boost?: number;
83
+ /** Style exaggeration tuning, typically in the range [0, 1]. */
84
+ style?: number;
85
+ /** Speech speed multiplier. */
86
+ speed?: number;
87
+ use_speaker_boost?: boolean;
88
+ chunk_length_schedule?: number[];
89
+ preferred_alignment?: string;
60
90
  }
61
91
 
62
- export interface DeepgramTTSOptions {}
92
+ export interface DeepgramTTSOptions {
93
+ /** Default: false. */
94
+ mip_opt_out?: boolean;
95
+ }
63
96
 
64
- export interface RimeOptions {}
97
+ export interface RimeOptions {
98
+ /** Default 1.0, <1 = faster, >1 = slower. */
99
+ speed_alpha?: number;
100
+ /** Default false. */
101
+ pause_between_brackets?: boolean;
102
+ /** Default false. */
103
+ phonemize_between_brackets?: boolean;
104
+ /** Comma-separated speed factors for [bracketed] words. */
105
+ inline_speed_alpha?: string;
106
+ /** Default false. */
107
+ no_text_normalization?: boolean;
108
+ }
65
109
 
66
110
  export interface InworldOptions {
67
- /** Controls how fast the voice speaks. 1.0 is normal speed, 0.5 is half, 1.5 is 1.5x. Default: 1.0. */
111
+ /** Range >0.5, <=1.5. */
68
112
  speaking_rate?: number;
69
- /** Controls randomness in the output. Recommended between 0.6 and 1.1. Default: 1.1. */
113
+ /** Range 0-2. */
70
114
  temperature?: number;
71
- /** Controls text normalization. "ON" expands numbers, dates, abbreviations. "OFF" reads text as written. Default: "ON". */
115
+ timestamp_type?: 'TIMESTAMP_TYPE_UNSPECIFIED' | 'WORD' | 'CHARACTER';
116
+ apply_text_normalization?: 'APPLY_TEXT_NORMALIZATION_UNSPECIFIED' | 'ON' | 'OFF';
117
+ /** @deprecated Backward-compatible alias. Use `apply_text_normalization`. */
72
118
  text_normalization?: 'ON' | 'OFF';
73
119
  }
74
120
 
@@ -156,6 +202,7 @@ export interface InferenceTTSOptions<TModel extends TTSModels> {
156
202
  baseURL: string;
157
203
  apiKey: string;
158
204
  apiSecret: string;
205
+ /** Flat provider-specific inference options forwarded as the `extra` payload field. */
159
206
  modelOptions: TTSOptions<TModel>;
160
207
  fallback?: TTSFallbackModel[];
161
208
  connOptions?: APIConnectOptions;
@@ -180,6 +227,7 @@ export class TTS<TModel extends TTSModels> extends BaseTTS {
180
227
  sampleRate?: number;
181
228
  apiKey?: string;
182
229
  apiSecret?: string;
230
+ /** Flat provider-specific inference options forwarded as the `extra` payload field. */
183
231
  modelOptions?: TTSOptions<TModel>;
184
232
  fallback?: TTSFallbackModelType | TTSFallbackModelType[];
185
233
  connOptions?: APIConnectOptions;
@@ -538,6 +586,7 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
538
586
 
539
587
  const createRecvTask = async (signal: AbortSignal) => {
540
588
  let currentSessionId: string | null = null;
589
+ const recvTimeoutMs = this.connOptions.timeoutMs;
541
590
 
542
591
  const bstream = new AudioByteStream(this.opts.sampleRate, NUM_CHANNELS);
543
592
  const serverEventStream = eventChannel.stream();
@@ -547,7 +596,12 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
547
596
  await inputSentEvent.wait();
548
597
 
549
598
  while (!this.closed && !signal.aborted) {
550
- const result = await reader.read();
599
+ const result = await waitUntilTimeout(
600
+ reader.read(),
601
+ recvTimeoutMs,
602
+ () => new APITimeoutError({ message: 'TTS recv idle timeout' }),
603
+ );
604
+
551
605
  if (signal.aborted) return;
552
606
  if (result.done) return;
553
607
 
@@ -592,6 +646,14 @@ export class SynthesizeStream<TModel extends TTSModels> extends BaseSynthesizeSt
592
646
  break;
593
647
  }
594
648
  }
649
+ } catch (e) {
650
+ if (e instanceof APITimeoutError) {
651
+ this.#logger.warn('TTS recv task timed out waiting for server message');
652
+ await resourceCleanup();
653
+ completionFuture.reject(e);
654
+ return;
655
+ }
656
+ throw e;
595
657
  } finally {
596
658
  reader.releaseLock();
597
659
  try {
@@ -50,6 +50,7 @@ class PendingInference {
50
50
 
51
51
  class InfClient implements InferenceExecutor {
52
52
  #requests: { [id: string]: PendingInference } = {};
53
+ #logger = log();
53
54
 
54
55
  constructor() {
55
56
  process.on('message', (msg: IPCMessage) => {
@@ -58,7 +59,7 @@ class InfClient implements InferenceExecutor {
58
59
  const fut = this.#requests[msg.value.requestId];
59
60
  delete this.#requests[msg.value.requestId];
60
61
  if (!fut) {
61
- log().child({ resp: msg.value }).warn('received unexpected inference response');
62
+ this.#logger.child({ resp: msg.value }).warn('received unexpected inference response');
62
63
  return;
63
64
  }
64
65
  fut.resolve(msg.value);
@@ -70,7 +71,11 @@ class InfClient implements InferenceExecutor {
70
71
  async doInference(method: string, data: unknown): Promise<unknown> {
71
72
  const requestId = shortuuid('inference_job_');
72
73
  if (!safeSend({ case: 'inferenceRequest', value: { requestId, method, data } })) {
73
- throw new Error('IPC channel closed');
74
+ this.#logger.debug(
75
+ { method, requestId },
76
+ 'IPC channel closed during inference, aborting gracefully',
77
+ );
78
+ throw new Error(`Inference ${method} aborted: IPC channel closed (expected during shutdown)`);
74
79
  }
75
80
 
76
81
  this.#requests[requestId] = new PendingInference();
@@ -125,6 +125,102 @@ describe('IPC send on dead process', () => {
125
125
  });
126
126
  });
127
127
 
128
+ describe('init timeout rejection handling', () => {
129
+ it('does not produce unhandled rejection when init times out', async () => {
130
+ // Regression test: before the fix, run() was called without await in start().
131
+ // When init timed out, the rejection in run()'s `await this.init.await` escaped
132
+ // as an unhandled rejection — crashing the Node.js process.
133
+ const unhandled: unknown[] = [];
134
+ const handler = (reason: unknown) => unhandled.push(reason);
135
+ process.on('unhandledRejection', handler);
136
+
137
+ // Child that responds AFTER the timeout — simulates slow init under CPU pressure.
138
+ // Timeout fires at 50ms (init.reject), child responds at 200ms (once() resolves).
139
+ // Before the fix, init.reject caused an unhandled rejection in run().
140
+ const slowScript = join(tmpdir(), 'test_slow_init_child.mjs');
141
+ writeFileSync(
142
+ slowScript,
143
+ `process.on('message', () => {
144
+ setTimeout(() => process.send({ case: 'initializeResponse' }), 200);
145
+ });
146
+ setInterval(() => {}, 1000);`,
147
+ );
148
+
149
+ const { SupervisedProc } = await import('./supervised_proc.js');
150
+ class TestProc extends SupervisedProc {
151
+ createProcess() {
152
+ return fork(slowScript, [], { stdio: ['pipe', 'pipe', 'pipe', 'ipc'] });
153
+ }
154
+ async mainTask() {}
155
+ }
156
+
157
+ const proc = new TestProc(
158
+ 50, // initializeTimeout — fires before child responds at 200ms
159
+ 1000, // closeTimeout
160
+ 0, // memoryWarnMB
161
+ 0, // memoryLimitMB
162
+ 5000, // pingInterval
163
+ 60000, // pingTimeout
164
+ 2500, // highPingThreshold
165
+ );
166
+
167
+ await proc.start();
168
+ // initialize() returns normally: child responds at 200ms, once() resolves,
169
+ // but init was already rejected at 50ms — run() gets the rejection.
170
+ await proc.initialize();
171
+
172
+ // Give the event loop a tick for any unhandled rejection to surface
173
+ await new Promise((r) => setTimeout(r, 100));
174
+
175
+ process.off('unhandledRejection', handler);
176
+ proc.proc?.kill();
177
+ try {
178
+ unlinkSync(slowScript);
179
+ } catch {}
180
+
181
+ expect(unhandled).toEqual([]);
182
+ });
183
+
184
+ it('join() resolves after init timeout instead of hanging forever', async () => {
185
+ // When run() fails early (before registering proc event handlers),
186
+ // #join must still resolve so that join() and close() don't hang.
187
+ const slowScript = join(tmpdir(), 'test_slow_init_child_join.mjs');
188
+ writeFileSync(
189
+ slowScript,
190
+ `process.on('message', () => {
191
+ setTimeout(() => process.send({ case: 'initializeResponse' }), 200);
192
+ });
193
+ setInterval(() => {}, 1000);`,
194
+ );
195
+
196
+ const { SupervisedProc } = await import('./supervised_proc.js');
197
+ class TestProc extends SupervisedProc {
198
+ createProcess() {
199
+ return fork(slowScript, [], { stdio: ['pipe', 'pipe', 'pipe', 'ipc'] });
200
+ }
201
+ async mainTask() {}
202
+ }
203
+
204
+ const proc = new TestProc(50, 1000, 0, 0, 5000, 60000, 2500);
205
+
206
+ await proc.start();
207
+ await proc.initialize();
208
+
209
+ // join() must resolve within a reasonable time, not hang forever
210
+ const result = await Promise.race([
211
+ proc.join().then(() => 'resolved'),
212
+ new Promise((r) => setTimeout(() => r('timeout'), 2000)),
213
+ ]);
214
+
215
+ proc.proc?.kill();
216
+ try {
217
+ unlinkSync(slowScript);
218
+ } catch {}
219
+
220
+ expect(result).toBe('resolved');
221
+ });
222
+ });
223
+
128
224
  describe('timer cleanup', () => {
129
225
  it('clearInterval stops the interval', async () => {
130
226
  let count = 0;
@@ -84,7 +84,14 @@ export abstract class SupervisedProc {
84
84
  this.proc = this.createProcess();
85
85
 
86
86
  this.#started = true;
87
- this.run();
87
+ this.run().catch((err) => {
88
+ this.#logger.child({ err }).warn('supervised process run failed');
89
+ // Note: we intentionally do NOT kill the child process here. Killing it
90
+ // would race with initialize()'s `once(proc, 'message')`, causing
91
+ // initialize() to hang forever and deadlocking the caller (proc_pool).
92
+ // The child process is cleaned up when the pool shuts down.
93
+ this.#join.resolve();
94
+ });
88
95
  }
89
96
 
90
97
  async run() {
package/src/job.ts CHANGED
@@ -283,6 +283,7 @@ export class JobContext {
283
283
  startedAt: targetSession._startedAt,
284
284
  audioRecordingPath: recorderIO?.outputPath,
285
285
  audioRecordingStartedAt: recorderIO?.recordingStartedAt,
286
+ modelUsage: targetSession._usageCollector.flatten(),
286
287
  });
287
288
  }
288
289