assemblyai 4.34.6 → 4.35.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ export type AudioData = ArrayBufferLike;
6
6
  * The encoding of the audio data
7
7
  * @defaultValue "pcm_s16"le
8
8
  */
9
- export type AudioEncoding = "pcm_s16le" | "pcm_mulaw";
9
+ export type AudioEncoding = "pcm_s16le" | "pcm_mulaw" | "opus" | "ogg_opus";
10
10
  /**
11
11
  * Configure the threshold for how long to wait before ending an utterance. Default is 700ms.
12
12
  */
@@ -71,6 +71,22 @@ export type StreamingTranscriberParams = {
71
71
  websocketBaseUrl?: string;
72
72
  apiKey?: string;
73
73
  token?: string;
74
+ /**
75
+ * Milliseconds to wait for the streaming handshake (socket open + server
76
+ * `Begin`) before treating the attempt as failed. Defaults to 1000.
77
+ */
78
+ connectTimeout?: number;
79
+ /**
80
+ * Number of additional connection attempts after the first one fails on a
81
+ * transient error (timeout, network drop, unexpected close). 0 disables
82
+ * retries. Permanent failures (auth, insufficient funds, malformed config)
83
+ * are never retried. Defaults to 2.
84
+ */
85
+ maxConnectionRetries?: number;
86
+ /**
87
+ * Milliseconds to wait between connection attempts. Defaults to 500.
88
+ */
89
+ connectionRetryDelay?: number;
74
90
  sampleRate: number;
75
91
  encoding?: AudioEncoding;
76
92
  endOfTurnConfidenceThreshold?: number;
@@ -88,7 +104,19 @@ export type StreamingTranscriberParams = {
88
104
  prompt?: string;
89
105
  agentContext?: string;
90
106
  speechModel?: StreamingSpeechModel;
107
+ /**
108
+ * @deprecated Use `languageCodes` instead (pass a single-element array, e.g. `["es"]`,
109
+ * for the same behavior). Still supported for backward compatibility.
110
+ */
91
111
  languageCode?: string;
112
+ /**
113
+ * Recommended way to select languages. Steers transcription toward a set of
114
+ * languages by biasing output toward them on a per-token basis while still
115
+ * allowing native code-switching among them. Pass the languages you expect
116
+ * (e.g. `["en", "es"]`), or a single-element array (e.g. `["es"]`) for a
117
+ * monolingual session. Universal-3.5 Pro Streaming only.
118
+ */
119
+ languageCodes?: string[];
92
120
  languageDetection?: boolean;
93
121
  domain?: StreamingDomain;
94
122
  inactivityTimeout?: number;
@@ -239,10 +267,19 @@ export type StreamingUpdateConfiguration = {
239
267
  filter_profanity?: boolean;
240
268
  interruption_delay?: number;
241
269
  turn_left_pad_ms?: number;
270
+ /**
271
+ * Steer transcription toward a set of languages mid-stream. Pass an empty
272
+ * array (`[]`) to clear steering and restore the model's default
273
+ * multilingual code-switching. Universal-3.5 Pro Streaming only.
274
+ */
275
+ language_codes?: string[];
242
276
  };
243
277
  export type StreamingForceEndpoint = {
244
278
  type: "ForceEndpoint";
245
279
  };
280
+ export type StreamingKeepAlive = {
281
+ type: "KeepAlive";
282
+ };
246
283
  export type ErrorEvent = {
247
284
  type: "Error";
248
285
  error_code?: number;
@@ -282,4 +319,4 @@ export type SpeakerRevisionEvent = {
282
319
  revisions: SpeakerRevisionItem[];
283
320
  };
284
321
  export type StreamingEventMessage = BeginEvent | TurnEvent | SpeechStartedEvent | TerminationEvent | LLMGatewayResponseEvent | SpeakerRevisionEvent | ErrorEvent | WarningEvent;
285
- export type StreamingOperationMessage = StreamingUpdateConfiguration | StreamingForceEndpoint | StreamingTerminateSession;
322
+ export type StreamingOperationMessage = StreamingUpdateConfiguration | StreamingForceEndpoint | StreamingKeepAlive | StreamingTerminateSession;
package/dist/workerd.mjs CHANGED
@@ -28,7 +28,7 @@ if (typeof navigator !== "undefined" && navigator.userAgent) {
28
28
  defaultUserAgentString += navigator.userAgent;
29
29
  }
30
30
  const defaultUserAgent = {
31
- sdk: { name: "JavaScript", version: "4.34.6" },
31
+ sdk: { name: "JavaScript", version: "4.35.3" },
32
32
  };
33
33
  if (typeof process !== "undefined") {
34
34
  if (process.versions.node && defaultUserAgentString.indexOf("Node") === -1) {
@@ -983,6 +983,24 @@ function toInt16View(audio) {
983
983
  }
984
984
  const defaultStreamingUrl$1 = "wss://streaming.assemblyai.com/v3/ws";
985
985
  const terminateSessionMessage = `{"type":"Terminate"}`;
986
+ const DEFAULT_CONNECT_TIMEOUT_MS = 1000;
987
+ const DEFAULT_MAX_CONNECTION_RETRIES = 2;
988
+ const DEFAULT_CONNECTION_RETRY_DELAY_MS = 500;
989
+ /**
990
+ * Close/error codes that signal a permanent client-side problem (auth,
991
+ * billing, malformed config). A retry would hit the same failure, so the
992
+ * connection is never retried on these.
993
+ */
994
+ const NON_RETRYABLE_CLOSE_CODES = new Set([
995
+ StreamingErrorType.BadSampleRate,
996
+ StreamingErrorType.AuthFailed,
997
+ StreamingErrorType.InsufficientFunds,
998
+ StreamingErrorType.FreeTierUser,
999
+ StreamingErrorType.BadSchema,
1000
+ ]);
1001
+ function isRetryableCloseCode(code) {
1002
+ return code !== 1000 && !NON_RETRYABLE_CLOSE_CODES.has(code);
1003
+ }
986
1004
  /**
987
1005
  * Per-send chunk cap in milliseconds for the dual-channel mixer. The streaming
988
1006
  * server rejects audio messages longer than 1000 ms (`Input Duration Error`).
@@ -1116,8 +1134,12 @@ class StreamingTranscriber {
1116
1134
  searchParams.set("speech_model", this.params.speechModel.toString());
1117
1135
  }
1118
1136
  if (this.params.languageCode !== undefined) {
1137
+ console.warn("[Deprecation Warning] `languageCode` is deprecated and will be removed in a future release. Please use `languageCodes` instead.");
1119
1138
  searchParams.set("language_code", this.params.languageCode);
1120
1139
  }
1140
+ if (this.params.languageCodes !== undefined) {
1141
+ searchParams.set("language_codes", JSON.stringify(this.params.languageCodes));
1142
+ }
1121
1143
  if (this.params.languageDetection !== undefined) {
1122
1144
  searchParams.set("language_detection", this.params.languageDetection.toString());
1123
1145
  }
@@ -1191,12 +1213,81 @@ class StreamingTranscriber {
1191
1213
  on(event, listener) {
1192
1214
  this.listeners[event] = listener;
1193
1215
  }
1194
- connect() {
1195
- return new Promise((resolve) => {
1196
- if (this.socket) {
1197
- throw new Error("Already connected");
1216
+ /**
1217
+ * Open the streaming session.
1218
+ *
1219
+ * Resolves with the server's `Begin` event once the handshake completes. A
1220
+ * single attempt is bounded by `connectTimeout` (default 1000ms); transient
1221
+ * failures (timeout, network drop, unexpected close) are retried up to
1222
+ * `maxConnectionRetries` times (default 2), waiting `connectionRetryDelay`
1223
+ * (default 500ms) between attempts. Permanent failures (auth, insufficient
1224
+ * funds, malformed config) are not retried.
1225
+ *
1226
+ * Unlike previously, a failed connection now rejects this promise rather
1227
+ * than only invoking the `error` listener — necessary for the caller (and
1228
+ * the retry loop) to observe the failure.
1229
+ */
1230
+ async connect() {
1231
+ if (this.socket) {
1232
+ throw new Error("Already connected");
1233
+ }
1234
+ const maxRetries = this.params.maxConnectionRetries ?? DEFAULT_MAX_CONNECTION_RETRIES;
1235
+ const retryDelay = this.params.connectionRetryDelay ?? DEFAULT_CONNECTION_RETRY_DELAY_MS;
1236
+ let lastError;
1237
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1238
+ try {
1239
+ return await this.connectOnce();
1240
+ }
1241
+ catch (err) {
1242
+ lastError = err;
1243
+ const retryable = err.retryable === true;
1244
+ if (!retryable || attempt === maxRetries) {
1245
+ throw err;
1246
+ }
1247
+ console.warn(`Streaming connect attempt ${attempt + 1}/${maxRetries + 1} failed (${err.message}); retrying`);
1248
+ if (retryDelay > 0) {
1249
+ await new Promise((resolve) => setTimeout(resolve, retryDelay));
1250
+ }
1198
1251
  }
1252
+ }
1253
+ // The loop above always returns or throws; this only satisfies the type
1254
+ // checker that a value is produced on every path.
1255
+ throw lastError ?? new Error("Failed to connect to streaming server");
1256
+ }
1257
+ connectOnce() {
1258
+ return new Promise((resolve, reject) => {
1199
1259
  const url = this.connectionUrl();
1260
+ const timeoutMs = this.params.connectTimeout ?? DEFAULT_CONNECT_TIMEOUT_MS;
1261
+ // `settled` flips once this attempt has resolved (`Begin`) or rejected
1262
+ // (timeout / pre-`Begin` close / error). Before it flips the socket
1263
+ // handlers drive this promise; after it flips they revert to normal
1264
+ // runtime dispatch (close / error / message listeners).
1265
+ let settled = false;
1266
+ let timer;
1267
+ const failAttempt = (error) => {
1268
+ if (settled)
1269
+ return;
1270
+ settled = true;
1271
+ if (timer)
1272
+ clearTimeout(timer);
1273
+ this.discardPendingSocket();
1274
+ reject(error);
1275
+ };
1276
+ const succeed = (begin) => {
1277
+ if (settled)
1278
+ return;
1279
+ settled = true;
1280
+ if (timer)
1281
+ clearTimeout(timer);
1282
+ resolve(begin);
1283
+ };
1284
+ if (timeoutMs > 0) {
1285
+ timer = setTimeout(() => {
1286
+ const err = new StreamingError(`Streaming connection timed out after ${timeoutMs}ms`);
1287
+ err.retryable = true;
1288
+ failAttempt(err);
1289
+ }, timeoutMs);
1290
+ }
1200
1291
  if (this.token) {
1201
1292
  this.socket = factory(url.toString());
1202
1293
  }
@@ -1213,6 +1304,15 @@ class StreamingTranscriber {
1213
1304
  reason = StreamingErrorMessages[code];
1214
1305
  }
1215
1306
  }
1307
+ // A close before `Begin` is a failed connection attempt — reject so
1308
+ // connect() can retry (or surface a permanent failure).
1309
+ if (!settled) {
1310
+ const err = new StreamingError(reason || `Streaming connection closed (code=${code})`);
1311
+ err.code = code;
1312
+ err.retryable = isRetryableCloseCode(code);
1313
+ failAttempt(err);
1314
+ return;
1315
+ }
1216
1316
  // Stop the flush timer when the socket is gone (server-initiated close,
1217
1317
  // network drop, etc.) — otherwise subsequent ticks call send() on a
1218
1318
  // closed socket and spam the error listener.
@@ -1223,25 +1323,37 @@ class StreamingTranscriber {
1223
1323
  this.listeners.close?.(code, reason);
1224
1324
  };
1225
1325
  this.socket.onerror = (event) => {
1226
- if (event.error)
1227
- this.listeners.error?.(event.error);
1228
- else
1229
- this.listeners.error?.(new Error(event.message));
1326
+ const error = event.error ?? new Error(event.message);
1327
+ // A socket error before `Begin` is a failed attempt → reject/retry.
1328
+ if (!settled) {
1329
+ error.retryable = true;
1330
+ failAttempt(error);
1331
+ return;
1332
+ }
1333
+ this.listeners.error?.(error);
1230
1334
  };
1231
1335
  this.socket.onmessage = ({ data }) => {
1232
1336
  const message = JSON.parse(data.toString());
1233
1337
  if ("error" in message) {
1234
1338
  const err = new StreamingError(message.error);
1235
1339
  if ("error_code" in message) {
1236
- err.code =
1237
- message.error_code;
1340
+ err.code = message.error_code;
1341
+ }
1342
+ // A server error frame before `Begin` fails the attempt; the code
1343
+ // decides whether a retry is worthwhile.
1344
+ if (!settled) {
1345
+ const attemptErr = err;
1346
+ attemptErr.retryable =
1347
+ err.code === undefined ? true : isRetryableCloseCode(err.code);
1348
+ failAttempt(attemptErr);
1349
+ return;
1238
1350
  }
1239
1351
  this.listeners.error?.(err);
1240
1352
  return;
1241
1353
  }
1242
1354
  switch (message.type) {
1243
1355
  case "Begin": {
1244
- resolve(message);
1356
+ succeed(message);
1245
1357
  this.listeners.open?.(message);
1246
1358
  break;
1247
1359
  }
@@ -1288,6 +1400,20 @@ class StreamingTranscriber {
1288
1400
  };
1289
1401
  });
1290
1402
  }
1403
+ /** Tear down a half-open socket from a failed connection attempt. */
1404
+ discardPendingSocket() {
1405
+ if (!this.socket)
1406
+ return;
1407
+ try {
1408
+ if (this.socket.removeAllListeners)
1409
+ this.socket.removeAllListeners();
1410
+ this.socket.close();
1411
+ }
1412
+ catch {
1413
+ // Best-effort cleanup; a half-open socket may throw on close.
1414
+ }
1415
+ this.socket = undefined;
1416
+ }
1291
1417
  /**
1292
1418
  * Returns a WritableStream that pumps PCM chunks into `sendAudio`. Single-channel
1293
1419
  * only — in dual-channel mode use `sendAudio(pcm, { channel })` directly, since
@@ -1561,6 +1687,16 @@ class StreamingTranscriber {
1561
1687
  };
1562
1688
  this.send(JSON.stringify(message));
1563
1689
  }
1690
+ /**
1691
+ * Reset the server's inactivity timer. Only needed when the session was
1692
+ * created with `inactivityTimeout` and no audio is being sent.
1693
+ */
1694
+ keepAlive() {
1695
+ const message = {
1696
+ type: "KeepAlive",
1697
+ };
1698
+ this.send(JSON.stringify(message));
1699
+ }
1564
1700
  send(data) {
1565
1701
  if (!this.socket || this.socket.readyState !== this.socket.OPEN) {
1566
1702
  throw new Error("Socket is not open for communication");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "assemblyai",
3
- "version": "4.34.6",
3
+ "version": "4.35.3",
4
4
  "description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.",
5
5
  "engines": {
6
6
  "node": ">=18"
@@ -1,7 +1,7 @@
1
1
  export * from "../types/asyncapi.generated";
2
2
  export * from "../types/realtime";
3
3
  export * from "../types/helpers";
4
- export * from "../types/streaming/dual-channel";
4
+ export * from "../types/streaming";
5
5
  export * from "../services/realtime/service";
6
6
  export * from "../services/streaming/service";
7
7
  export * from "../services/streaming/factory";
@@ -18,12 +18,17 @@ import {
18
18
  SpeakerRevisionEvent,
19
19
  StreamingUpdateConfiguration,
20
20
  StreamingForceEndpoint,
21
+ StreamingKeepAlive,
21
22
  WarningEvent,
22
23
  } from "../..";
23
24
  import type { VadDetector, VadFrame } from "../../types/streaming/dual-channel";
24
25
  import { EnergyVad } from "./energy-vad";
25
26
  import { attributeTurn, rollUpTurnChannel, VadTimeline } from "./label-mapper";
26
- import { StreamingError, StreamingErrorMessages } from "../../utils/errors";
27
+ import {
28
+ StreamingError,
29
+ StreamingErrorMessages,
30
+ StreamingErrorType,
31
+ } from "../../utils/errors";
27
32
  import { StreamingErrorTypeCodes } from "../../utils/errors/streaming";
28
33
 
29
34
  /**
@@ -58,6 +63,30 @@ function toInt16View(audio: AudioData): Int16Array {
58
63
  const defaultStreamingUrl = "wss://streaming.assemblyai.com/v3/ws";
59
64
  const terminateSessionMessage = `{"type":"Terminate"}`;
60
65
 
66
+ const DEFAULT_CONNECT_TIMEOUT_MS = 1000;
67
+ const DEFAULT_MAX_CONNECTION_RETRIES = 2;
68
+ const DEFAULT_CONNECTION_RETRY_DELAY_MS = 500;
69
+
70
+ /**
71
+ * Close/error codes that signal a permanent client-side problem (auth,
72
+ * billing, malformed config). A retry would hit the same failure, so the
73
+ * connection is never retried on these.
74
+ */
75
+ const NON_RETRYABLE_CLOSE_CODES = new Set<number>([
76
+ StreamingErrorType.BadSampleRate,
77
+ StreamingErrorType.AuthFailed,
78
+ StreamingErrorType.InsufficientFunds,
79
+ StreamingErrorType.FreeTierUser,
80
+ StreamingErrorType.BadSchema,
81
+ ]);
82
+
83
+ /** Error from a single connection attempt, tagged for retry handling. */
84
+ type ConnectionAttemptError = Error & { code?: number; retryable: boolean };
85
+
86
+ function isRetryableCloseCode(code: number): boolean {
87
+ return code !== 1000 && !NON_RETRYABLE_CLOSE_CODES.has(code);
88
+ }
89
+
61
90
  /**
62
91
  * Per-send chunk cap in milliseconds for the dual-channel mixer. The streaming
63
92
  * server rejects audio messages longer than 1000 ms (`Input Duration Error`).
@@ -284,9 +313,19 @@ export class StreamingTranscriber {
284
313
  }
285
314
 
286
315
  if (this.params.languageCode !== undefined) {
316
+ console.warn(
317
+ "[Deprecation Warning] `languageCode` is deprecated and will be removed in a future release. Please use `languageCodes` instead.",
318
+ );
287
319
  searchParams.set("language_code", this.params.languageCode);
288
320
  }
289
321
 
322
+ if (this.params.languageCodes !== undefined) {
323
+ searchParams.set(
324
+ "language_codes",
325
+ JSON.stringify(this.params.languageCodes),
326
+ );
327
+ }
328
+
290
329
  if (this.params.languageDetection !== undefined) {
291
330
  searchParams.set(
292
331
  "language_detection",
@@ -432,13 +471,90 @@ export class StreamingTranscriber {
432
471
  this.listeners[event] = listener;
433
472
  }
434
473
 
435
- connect() {
436
- return new Promise<BeginEvent>((resolve) => {
437
- if (this.socket) {
438
- throw new Error("Already connected");
474
+ /**
475
+ * Open the streaming session.
476
+ *
477
+ * Resolves with the server's `Begin` event once the handshake completes. A
478
+ * single attempt is bounded by `connectTimeout` (default 1000ms); transient
479
+ * failures (timeout, network drop, unexpected close) are retried up to
480
+ * `maxConnectionRetries` times (default 2), waiting `connectionRetryDelay`
481
+ * (default 500ms) between attempts. Permanent failures (auth, insufficient
482
+ * funds, malformed config) are not retried.
483
+ *
484
+ * Unlike previously, a failed connection now rejects this promise rather
485
+ * than only invoking the `error` listener — necessary for the caller (and
486
+ * the retry loop) to observe the failure.
487
+ */
488
+ async connect(): Promise<BeginEvent> {
489
+ if (this.socket) {
490
+ throw new Error("Already connected");
491
+ }
492
+
493
+ const maxRetries =
494
+ this.params.maxConnectionRetries ?? DEFAULT_MAX_CONNECTION_RETRIES;
495
+ const retryDelay =
496
+ this.params.connectionRetryDelay ?? DEFAULT_CONNECTION_RETRY_DELAY_MS;
497
+
498
+ let lastError: Error | undefined;
499
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
500
+ try {
501
+ return await this.connectOnce();
502
+ } catch (err) {
503
+ lastError = err as Error;
504
+ const retryable = (err as ConnectionAttemptError).retryable === true;
505
+ if (!retryable || attempt === maxRetries) {
506
+ throw err;
507
+ }
508
+ console.warn(
509
+ `Streaming connect attempt ${attempt + 1}/${maxRetries + 1} failed (${(err as Error).message}); retrying`,
510
+ );
511
+ if (retryDelay > 0) {
512
+ await new Promise((resolve) => setTimeout(resolve, retryDelay));
513
+ }
439
514
  }
515
+ }
516
+ // The loop above always returns or throws; this only satisfies the type
517
+ // checker that a value is produced on every path.
518
+ throw lastError ?? new Error("Failed to connect to streaming server");
519
+ }
440
520
 
521
+ private connectOnce(): Promise<BeginEvent> {
522
+ return new Promise<BeginEvent>((resolve, reject) => {
441
523
  const url = this.connectionUrl();
524
+ const timeoutMs =
525
+ this.params.connectTimeout ?? DEFAULT_CONNECT_TIMEOUT_MS;
526
+
527
+ // `settled` flips once this attempt has resolved (`Begin`) or rejected
528
+ // (timeout / pre-`Begin` close / error). Before it flips the socket
529
+ // handlers drive this promise; after it flips they revert to normal
530
+ // runtime dispatch (close / error / message listeners).
531
+ let settled = false;
532
+ let timer: ReturnType<typeof setTimeout> | undefined;
533
+
534
+ const failAttempt = (error: ConnectionAttemptError) => {
535
+ if (settled) return;
536
+ settled = true;
537
+ if (timer) clearTimeout(timer);
538
+ this.discardPendingSocket();
539
+ reject(error);
540
+ };
541
+
542
+ const succeed = (begin: BeginEvent) => {
543
+ if (settled) return;
544
+ settled = true;
545
+ if (timer) clearTimeout(timer);
546
+ resolve(begin);
547
+ };
548
+
549
+ if (timeoutMs > 0) {
550
+ timer = setTimeout(() => {
551
+ const err = new StreamingError(
552
+ `Streaming connection timed out after ${timeoutMs}ms`,
553
+ ) as ConnectionAttemptError;
554
+ err.retryable = true;
555
+ failAttempt(err);
556
+ }, timeoutMs);
557
+ }
442
558
 
443
559
  if (this.token) {
444
560
  this.socket = polyfillWebSocketFactory(url.toString());
@@ -465,6 +581,17 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
465
581
  reason = StreamingErrorMessages[code as StreamingErrorTypeCodes];
466
582
  }
467
583
  }
584
+ // A close before `Begin` is a failed connection attempt — reject so
585
+ // connect() can retry (or surface a permanent failure).
586
+ if (!settled) {
587
+ const err = new StreamingError(
588
+ reason || `Streaming connection closed (code=${code})`,
589
+ ) as ConnectionAttemptError;
590
+ err.code = code;
591
+ err.retryable = isRetryableCloseCode(code);
592
+ failAttempt(err);
593
+ return;
594
+ }
468
595
  // Stop the flush timer when the socket is gone (server-initiated close,
469
596
  // network drop, etc.) — otherwise subsequent ticks call send() on a
470
597
  // closed socket and spam the error listener.
@@ -476,18 +603,34 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
476
603
  };
477
604
 
478
605
  this.socket.onerror = (event: ErrorEvent) => {
479
- if (event.error) this.listeners.error?.(event.error as Error);
480
- else this.listeners.error?.(new Error(event.message));
606
+ const error = (event.error as Error) ?? new Error(event.message);
607
+ // A socket error before `Begin` is a failed attempt → reject/retry.
608
+ if (!settled) {
609
+ (error as ConnectionAttemptError).retryable = true;
610
+ failAttempt(error as ConnectionAttemptError);
611
+ return;
612
+ }
613
+ this.listeners.error?.(error);
481
614
  };
482
615
 
483
616
  this.socket.onmessage = ({ data }: MessageEvent) => {
484
617
  const message = JSON.parse(data.toString()) as StreamingEventMessage;
485
618
 
486
619
  if ("error" in message) {
487
- const err = new StreamingError(message.error);
620
+ const err = new StreamingError(message.error) as StreamingError & {
621
+ code?: number;
622
+ };
488
623
  if ("error_code" in message) {
489
- (err as StreamingError & { code?: number }).code =
490
- message.error_code;
624
+ err.code = message.error_code;
625
+ }
626
+ // A server error frame before `Begin` fails the attempt; the code
627
+ // decides whether a retry is worthwhile.
628
+ if (!settled) {
629
+ const attemptErr = err as ConnectionAttemptError;
630
+ attemptErr.retryable =
631
+ err.code === undefined ? true : isRetryableCloseCode(err.code);
632
+ failAttempt(attemptErr);
633
+ return;
491
634
  }
492
635
  this.listeners.error?.(err);
493
636
  return;
@@ -495,7 +638,7 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
495
638
 
496
639
  switch (message.type) {
497
640
  case "Begin": {
498
- resolve(message);
641
+ succeed(message);
499
642
  this.listeners.open?.(message);
500
643
  break;
501
644
  }
@@ -548,6 +691,18 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
548
691
  });
549
692
  }
550
693
 
694
+ /** Tear down a half-open socket from a failed connection attempt. */
695
+ private discardPendingSocket(): void {
696
+ if (!this.socket) return;
697
+ try {
698
+ if (this.socket.removeAllListeners) this.socket.removeAllListeners();
699
+ this.socket.close();
700
+ } catch {
701
+ // Best-effort cleanup; a half-open socket may throw on close.
702
+ }
703
+ this.socket = undefined;
704
+ }
705
+
551
706
  /**
552
707
  * Returns a WritableStream that pumps PCM chunks into `sendAudio`. Single-channel
553
708
  * only — in dual-channel mode use `sendAudio(pcm, { channel })` directly, since
@@ -829,6 +984,17 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
829
984
  this.send(JSON.stringify(message));
830
985
  }
831
986
 
987
+ /**
988
+ * Reset the server's inactivity timer. Only needed when the session was
989
+ * created with `inactivityTimeout` and no audio is being sent.
990
+ */
991
+ keepAlive() {
992
+ const message: StreamingKeepAlive = {
993
+ type: "KeepAlive",
994
+ };
995
+ this.send(JSON.stringify(message));
996
+ }
997
+
832
998
  private send(data: BufferLike) {
833
999
  if (!this.socket || this.socket.readyState !== this.socket.OPEN) {
834
1000
  throw new Error("Socket is not open for communication");
@@ -25,7 +25,7 @@ export type AudioData = ArrayBufferLike;
25
25
  * The encoding of the audio data
26
26
  * @defaultValue "pcm_s16"le
27
27
  */
28
- export type AudioEncoding = "pcm_s16le" | "pcm_mulaw";
28
+ export type AudioEncoding = "pcm_s16le" | "pcm_mulaw" | "opus" | "ogg_opus";
29
29
 
30
30
  /**
31
31
  * Configure the threshold for how long to wait before ending an utterance. Default is 700ms.
@@ -76,6 +76,22 @@ export type StreamingTranscriberParams = {
76
76
  websocketBaseUrl?: string;
77
77
  apiKey?: string;
78
78
  token?: string;
79
+ /**
80
+ * Milliseconds to wait for the streaming handshake (socket open + server
81
+ * `Begin`) before treating the attempt as failed. Defaults to 1000.
82
+ */
83
+ connectTimeout?: number;
84
+ /**
85
+ * Number of additional connection attempts after the first one fails on a
86
+ * transient error (timeout, network drop, unexpected close). 0 disables
87
+ * retries. Permanent failures (auth, insufficient funds, malformed config)
88
+ * are never retried. Defaults to 2.
89
+ */
90
+ maxConnectionRetries?: number;
91
+ /**
92
+ * Milliseconds to wait between connection attempts. Defaults to 500.
93
+ */
94
+ connectionRetryDelay?: number;
79
95
  sampleRate: number;
80
96
  encoding?: AudioEncoding;
81
97
  endOfTurnConfidenceThreshold?: number;
@@ -93,7 +109,19 @@ export type StreamingTranscriberParams = {
93
109
  prompt?: string;
94
110
  agentContext?: string;
95
111
  speechModel?: StreamingSpeechModel;
112
+ /**
113
+ * @deprecated Use `languageCodes` instead (pass a single-element array, e.g. `["es"]`,
114
+ * for the same behavior). Still supported for backward compatibility.
115
+ */
96
116
  languageCode?: string;
117
+ /**
118
+ * Recommended way to select languages. Steers transcription toward a set of
119
+ * languages by biasing output toward them on a per-token basis while still
120
+ * allowing native code-switching among them. Pass the languages you expect
121
+ * (e.g. `["en", "es"]`), or a single-element array (e.g. `["es"]`) for a
122
+ * monolingual session. Universal-3.5 Pro Streaming only.
123
+ */
124
+ languageCodes?: string[];
97
125
  languageDetection?: boolean;
98
126
  domain?: StreamingDomain;
99
127
  inactivityTimeout?: number;
@@ -343,12 +371,22 @@ export type StreamingUpdateConfiguration = {
343
371
  filter_profanity?: boolean;
344
372
  interruption_delay?: number;
345
373
  turn_left_pad_ms?: number;
374
+ /**
375
+ * Steer transcription toward a set of languages mid-stream. Pass an empty
376
+ * array (`[]`) to clear steering and restore the model's default
377
+ * multilingual code-switching. Universal-3.5 Pro Streaming only.
378
+ */
379
+ language_codes?: string[];
346
380
  };
347
381
 
348
382
  export type StreamingForceEndpoint = {
349
383
  type: "ForceEndpoint";
350
384
  };
351
385
 
386
+ export type StreamingKeepAlive = {
387
+ type: "KeepAlive";
388
+ };
389
+
352
390
  export type ErrorEvent = {
353
391
  type: "Error";
354
392
  error_code?: number;
@@ -405,4 +443,5 @@ export type StreamingEventMessage =
405
443
  export type StreamingOperationMessage =
406
444
  | StreamingUpdateConfiguration
407
445
  | StreamingForceEndpoint
446
+ | StreamingKeepAlive
408
447
  | StreamingTerminateSession;