assemblyai 4.34.6 → 4.35.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.md +7 -8
- package/dist/assemblyai.streaming.umd.js +150 -10
- package/dist/assemblyai.streaming.umd.min.js +1 -1
- package/dist/assemblyai.umd.js +150 -10
- package/dist/assemblyai.umd.min.js +1 -1
- package/dist/browser.mjs +148 -12
- package/dist/bun.mjs +148 -12
- package/dist/deno.mjs +148 -12
- package/dist/exports/streaming.d.ts +1 -1
- package/dist/index.cjs +150 -10
- package/dist/index.mjs +150 -10
- package/dist/node.cjs +148 -12
- package/dist/node.mjs +148 -12
- package/dist/services/streaming/service.d.ts +22 -0
- package/dist/streaming.browser.mjs +148 -12
- package/dist/streaming.cjs +149 -9
- package/dist/streaming.mjs +149 -9
- package/dist/types/asyncapi.generated.d.ts +1 -1
- package/dist/types/streaming/index.d.ts +38 -1
- package/dist/workerd.mjs +148 -12
- package/package.json +1 -1
- package/src/exports/streaming.ts +1 -1
- package/src/services/streaming/service.ts +177 -11
- package/src/types/asyncapi.generated.ts +1 -1
- package/src/types/streaming/index.ts +39 -0
|
@@ -6,7 +6,7 @@ export type AudioData = ArrayBufferLike;
|
|
|
6
6
|
* The encoding of the audio data
|
|
7
7
|
* @defaultValue "pcm_s16"le
|
|
8
8
|
*/
|
|
9
|
-
export type AudioEncoding = "pcm_s16le" | "pcm_mulaw";
|
|
9
|
+
export type AudioEncoding = "pcm_s16le" | "pcm_mulaw" | "opus" | "ogg_opus";
|
|
10
10
|
/**
|
|
11
11
|
* Configure the threshold for how long to wait before ending an utterance. Default is 700ms.
|
|
12
12
|
*/
|
|
@@ -71,6 +71,22 @@ export type StreamingTranscriberParams = {
|
|
|
71
71
|
websocketBaseUrl?: string;
|
|
72
72
|
apiKey?: string;
|
|
73
73
|
token?: string;
|
|
74
|
+
/**
|
|
75
|
+
* Milliseconds to wait for the streaming handshake (socket open + server
|
|
76
|
+
* `Begin`) before treating the attempt as failed. Defaults to 1000.
|
|
77
|
+
*/
|
|
78
|
+
connectTimeout?: number;
|
|
79
|
+
/**
|
|
80
|
+
* Number of additional connection attempts after the first one fails on a
|
|
81
|
+
* transient error (timeout, network drop, unexpected close). 0 disables
|
|
82
|
+
* retries. Permanent failures (auth, insufficient funds, malformed config)
|
|
83
|
+
* are never retried. Defaults to 2.
|
|
84
|
+
*/
|
|
85
|
+
maxConnectionRetries?: number;
|
|
86
|
+
/**
|
|
87
|
+
* Milliseconds to wait between connection attempts. Defaults to 500.
|
|
88
|
+
*/
|
|
89
|
+
connectionRetryDelay?: number;
|
|
74
90
|
sampleRate: number;
|
|
75
91
|
encoding?: AudioEncoding;
|
|
76
92
|
endOfTurnConfidenceThreshold?: number;
|
|
@@ -88,7 +104,19 @@ export type StreamingTranscriberParams = {
|
|
|
88
104
|
prompt?: string;
|
|
89
105
|
agentContext?: string;
|
|
90
106
|
speechModel?: StreamingSpeechModel;
|
|
107
|
+
/**
|
|
108
|
+
* @deprecated Use `languageCodes` instead (pass a single-element array, e.g. `["es"]`,
|
|
109
|
+
* for the same behavior). Still supported for backward compatibility.
|
|
110
|
+
*/
|
|
91
111
|
languageCode?: string;
|
|
112
|
+
/**
|
|
113
|
+
* Recommended way to select languages. Steers transcription toward a set of
|
|
114
|
+
* languages by biasing output toward them on a per-token basis while still
|
|
115
|
+
* allowing native code-switching among them. Pass the languages you expect
|
|
116
|
+
* (e.g. `["en", "es"]`), or a single-element array (e.g. `["es"]`) for a
|
|
117
|
+
* monolingual session. Universal-3.5 Pro Streaming only.
|
|
118
|
+
*/
|
|
119
|
+
languageCodes?: string[];
|
|
92
120
|
languageDetection?: boolean;
|
|
93
121
|
domain?: StreamingDomain;
|
|
94
122
|
inactivityTimeout?: number;
|
|
@@ -239,10 +267,19 @@ export type StreamingUpdateConfiguration = {
|
|
|
239
267
|
filter_profanity?: boolean;
|
|
240
268
|
interruption_delay?: number;
|
|
241
269
|
turn_left_pad_ms?: number;
|
|
270
|
+
/**
|
|
271
|
+
* Steer transcription toward a set of languages mid-stream. Pass an empty
|
|
272
|
+
* array (`[]`) to clear steering and restore the model's default
|
|
273
|
+
* multilingual code-switching. Universal-3.5 Pro Streaming only.
|
|
274
|
+
*/
|
|
275
|
+
language_codes?: string[];
|
|
242
276
|
};
|
|
243
277
|
export type StreamingForceEndpoint = {
|
|
244
278
|
type: "ForceEndpoint";
|
|
245
279
|
};
|
|
280
|
+
export type StreamingKeepAlive = {
|
|
281
|
+
type: "KeepAlive";
|
|
282
|
+
};
|
|
246
283
|
export type ErrorEvent = {
|
|
247
284
|
type: "Error";
|
|
248
285
|
error_code?: number;
|
|
@@ -282,4 +319,4 @@ export type SpeakerRevisionEvent = {
|
|
|
282
319
|
revisions: SpeakerRevisionItem[];
|
|
283
320
|
};
|
|
284
321
|
export type StreamingEventMessage = BeginEvent | TurnEvent | SpeechStartedEvent | TerminationEvent | LLMGatewayResponseEvent | SpeakerRevisionEvent | ErrorEvent | WarningEvent;
|
|
285
|
-
export type StreamingOperationMessage = StreamingUpdateConfiguration | StreamingForceEndpoint | StreamingTerminateSession;
|
|
322
|
+
export type StreamingOperationMessage = StreamingUpdateConfiguration | StreamingForceEndpoint | StreamingKeepAlive | StreamingTerminateSession;
|
package/dist/workerd.mjs
CHANGED
|
@@ -28,7 +28,7 @@ if (typeof navigator !== "undefined" && navigator.userAgent) {
|
|
|
28
28
|
defaultUserAgentString += navigator.userAgent;
|
|
29
29
|
}
|
|
30
30
|
const defaultUserAgent = {
|
|
31
|
-
sdk: { name: "JavaScript", version: "4.
|
|
31
|
+
sdk: { name: "JavaScript", version: "4.35.3" },
|
|
32
32
|
};
|
|
33
33
|
if (typeof process !== "undefined") {
|
|
34
34
|
if (process.versions.node && defaultUserAgentString.indexOf("Node") === -1) {
|
|
@@ -983,6 +983,24 @@ function toInt16View(audio) {
|
|
|
983
983
|
}
|
|
984
984
|
const defaultStreamingUrl$1 = "wss://streaming.assemblyai.com/v3/ws";
|
|
985
985
|
const terminateSessionMessage = `{"type":"Terminate"}`;
|
|
986
|
+
const DEFAULT_CONNECT_TIMEOUT_MS = 1000;
|
|
987
|
+
const DEFAULT_MAX_CONNECTION_RETRIES = 2;
|
|
988
|
+
const DEFAULT_CONNECTION_RETRY_DELAY_MS = 500;
|
|
989
|
+
/**
|
|
990
|
+
* Close/error codes that signal a permanent client-side problem (auth,
|
|
991
|
+
* billing, malformed config). A retry would hit the same failure, so the
|
|
992
|
+
* connection is never retried on these.
|
|
993
|
+
*/
|
|
994
|
+
const NON_RETRYABLE_CLOSE_CODES = new Set([
|
|
995
|
+
StreamingErrorType.BadSampleRate,
|
|
996
|
+
StreamingErrorType.AuthFailed,
|
|
997
|
+
StreamingErrorType.InsufficientFunds,
|
|
998
|
+
StreamingErrorType.FreeTierUser,
|
|
999
|
+
StreamingErrorType.BadSchema,
|
|
1000
|
+
]);
|
|
1001
|
+
function isRetryableCloseCode(code) {
|
|
1002
|
+
return code !== 1000 && !NON_RETRYABLE_CLOSE_CODES.has(code);
|
|
1003
|
+
}
|
|
986
1004
|
/**
|
|
987
1005
|
* Per-send chunk cap in milliseconds for the dual-channel mixer. The streaming
|
|
988
1006
|
* server rejects audio messages longer than 1000 ms (`Input Duration Error`).
|
|
@@ -1116,8 +1134,12 @@ class StreamingTranscriber {
|
|
|
1116
1134
|
searchParams.set("speech_model", this.params.speechModel.toString());
|
|
1117
1135
|
}
|
|
1118
1136
|
if (this.params.languageCode !== undefined) {
|
|
1137
|
+
console.warn("[Deprecation Warning] `languageCode` is deprecated and will be removed in a future release. Please use `languageCodes` instead.");
|
|
1119
1138
|
searchParams.set("language_code", this.params.languageCode);
|
|
1120
1139
|
}
|
|
1140
|
+
if (this.params.languageCodes !== undefined) {
|
|
1141
|
+
searchParams.set("language_codes", JSON.stringify(this.params.languageCodes));
|
|
1142
|
+
}
|
|
1121
1143
|
if (this.params.languageDetection !== undefined) {
|
|
1122
1144
|
searchParams.set("language_detection", this.params.languageDetection.toString());
|
|
1123
1145
|
}
|
|
@@ -1191,12 +1213,81 @@ class StreamingTranscriber {
|
|
|
1191
1213
|
on(event, listener) {
|
|
1192
1214
|
this.listeners[event] = listener;
|
|
1193
1215
|
}
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1216
|
+
/**
|
|
1217
|
+
* Open the streaming session.
|
|
1218
|
+
*
|
|
1219
|
+
* Resolves with the server's `Begin` event once the handshake completes. A
|
|
1220
|
+
* single attempt is bounded by `connectTimeout` (default 1000ms); transient
|
|
1221
|
+
* failures (timeout, network drop, unexpected close) are retried up to
|
|
1222
|
+
* `maxConnectionRetries` times (default 2), waiting `connectionRetryDelay`
|
|
1223
|
+
* (default 500ms) between attempts. Permanent failures (auth, insufficient
|
|
1224
|
+
* funds, malformed config) are not retried.
|
|
1225
|
+
*
|
|
1226
|
+
* Unlike previously, a failed connection now rejects this promise rather
|
|
1227
|
+
* than only invoking the `error` listener — necessary for the caller (and
|
|
1228
|
+
* the retry loop) to observe the failure.
|
|
1229
|
+
*/
|
|
1230
|
+
async connect() {
|
|
1231
|
+
if (this.socket) {
|
|
1232
|
+
throw new Error("Already connected");
|
|
1233
|
+
}
|
|
1234
|
+
const maxRetries = this.params.maxConnectionRetries ?? DEFAULT_MAX_CONNECTION_RETRIES;
|
|
1235
|
+
const retryDelay = this.params.connectionRetryDelay ?? DEFAULT_CONNECTION_RETRY_DELAY_MS;
|
|
1236
|
+
let lastError;
|
|
1237
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1238
|
+
try {
|
|
1239
|
+
return await this.connectOnce();
|
|
1240
|
+
}
|
|
1241
|
+
catch (err) {
|
|
1242
|
+
lastError = err;
|
|
1243
|
+
const retryable = err.retryable === true;
|
|
1244
|
+
if (!retryable || attempt === maxRetries) {
|
|
1245
|
+
throw err;
|
|
1246
|
+
}
|
|
1247
|
+
console.warn(`Streaming connect attempt ${attempt + 1}/${maxRetries + 1} failed (${err.message}); retrying`);
|
|
1248
|
+
if (retryDelay > 0) {
|
|
1249
|
+
await new Promise((resolve) => setTimeout(resolve, retryDelay));
|
|
1250
|
+
}
|
|
1198
1251
|
}
|
|
1252
|
+
}
|
|
1253
|
+
// The loop above always returns or throws; this only satisfies the type
|
|
1254
|
+
// checker that a value is produced on every path.
|
|
1255
|
+
throw lastError ?? new Error("Failed to connect to streaming server");
|
|
1256
|
+
}
|
|
1257
|
+
connectOnce() {
|
|
1258
|
+
return new Promise((resolve, reject) => {
|
|
1199
1259
|
const url = this.connectionUrl();
|
|
1260
|
+
const timeoutMs = this.params.connectTimeout ?? DEFAULT_CONNECT_TIMEOUT_MS;
|
|
1261
|
+
// `settled` flips once this attempt has resolved (`Begin`) or rejected
|
|
1262
|
+
// (timeout / pre-`Begin` close / error). Before it flips the socket
|
|
1263
|
+
// handlers drive this promise; after it flips they revert to normal
|
|
1264
|
+
// runtime dispatch (close / error / message listeners).
|
|
1265
|
+
let settled = false;
|
|
1266
|
+
let timer;
|
|
1267
|
+
const failAttempt = (error) => {
|
|
1268
|
+
if (settled)
|
|
1269
|
+
return;
|
|
1270
|
+
settled = true;
|
|
1271
|
+
if (timer)
|
|
1272
|
+
clearTimeout(timer);
|
|
1273
|
+
this.discardPendingSocket();
|
|
1274
|
+
reject(error);
|
|
1275
|
+
};
|
|
1276
|
+
const succeed = (begin) => {
|
|
1277
|
+
if (settled)
|
|
1278
|
+
return;
|
|
1279
|
+
settled = true;
|
|
1280
|
+
if (timer)
|
|
1281
|
+
clearTimeout(timer);
|
|
1282
|
+
resolve(begin);
|
|
1283
|
+
};
|
|
1284
|
+
if (timeoutMs > 0) {
|
|
1285
|
+
timer = setTimeout(() => {
|
|
1286
|
+
const err = new StreamingError(`Streaming connection timed out after ${timeoutMs}ms`);
|
|
1287
|
+
err.retryable = true;
|
|
1288
|
+
failAttempt(err);
|
|
1289
|
+
}, timeoutMs);
|
|
1290
|
+
}
|
|
1200
1291
|
if (this.token) {
|
|
1201
1292
|
this.socket = factory(url.toString());
|
|
1202
1293
|
}
|
|
@@ -1213,6 +1304,15 @@ class StreamingTranscriber {
|
|
|
1213
1304
|
reason = StreamingErrorMessages[code];
|
|
1214
1305
|
}
|
|
1215
1306
|
}
|
|
1307
|
+
// A close before `Begin` is a failed connection attempt — reject so
|
|
1308
|
+
// connect() can retry (or surface a permanent failure).
|
|
1309
|
+
if (!settled) {
|
|
1310
|
+
const err = new StreamingError(reason || `Streaming connection closed (code=${code})`);
|
|
1311
|
+
err.code = code;
|
|
1312
|
+
err.retryable = isRetryableCloseCode(code);
|
|
1313
|
+
failAttempt(err);
|
|
1314
|
+
return;
|
|
1315
|
+
}
|
|
1216
1316
|
// Stop the flush timer when the socket is gone (server-initiated close,
|
|
1217
1317
|
// network drop, etc.) — otherwise subsequent ticks call send() on a
|
|
1218
1318
|
// closed socket and spam the error listener.
|
|
@@ -1223,25 +1323,37 @@ class StreamingTranscriber {
|
|
|
1223
1323
|
this.listeners.close?.(code, reason);
|
|
1224
1324
|
};
|
|
1225
1325
|
this.socket.onerror = (event) => {
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1326
|
+
const error = event.error ?? new Error(event.message);
|
|
1327
|
+
// A socket error before `Begin` is a failed attempt → reject/retry.
|
|
1328
|
+
if (!settled) {
|
|
1329
|
+
error.retryable = true;
|
|
1330
|
+
failAttempt(error);
|
|
1331
|
+
return;
|
|
1332
|
+
}
|
|
1333
|
+
this.listeners.error?.(error);
|
|
1230
1334
|
};
|
|
1231
1335
|
this.socket.onmessage = ({ data }) => {
|
|
1232
1336
|
const message = JSON.parse(data.toString());
|
|
1233
1337
|
if ("error" in message) {
|
|
1234
1338
|
const err = new StreamingError(message.error);
|
|
1235
1339
|
if ("error_code" in message) {
|
|
1236
|
-
err.code =
|
|
1237
|
-
|
|
1340
|
+
err.code = message.error_code;
|
|
1341
|
+
}
|
|
1342
|
+
// A server error frame before `Begin` fails the attempt; the code
|
|
1343
|
+
// decides whether a retry is worthwhile.
|
|
1344
|
+
if (!settled) {
|
|
1345
|
+
const attemptErr = err;
|
|
1346
|
+
attemptErr.retryable =
|
|
1347
|
+
err.code === undefined ? true : isRetryableCloseCode(err.code);
|
|
1348
|
+
failAttempt(attemptErr);
|
|
1349
|
+
return;
|
|
1238
1350
|
}
|
|
1239
1351
|
this.listeners.error?.(err);
|
|
1240
1352
|
return;
|
|
1241
1353
|
}
|
|
1242
1354
|
switch (message.type) {
|
|
1243
1355
|
case "Begin": {
|
|
1244
|
-
|
|
1356
|
+
succeed(message);
|
|
1245
1357
|
this.listeners.open?.(message);
|
|
1246
1358
|
break;
|
|
1247
1359
|
}
|
|
@@ -1288,6 +1400,20 @@ class StreamingTranscriber {
|
|
|
1288
1400
|
};
|
|
1289
1401
|
});
|
|
1290
1402
|
}
|
|
1403
|
+
/** Tear down a half-open socket from a failed connection attempt. */
|
|
1404
|
+
discardPendingSocket() {
|
|
1405
|
+
if (!this.socket)
|
|
1406
|
+
return;
|
|
1407
|
+
try {
|
|
1408
|
+
if (this.socket.removeAllListeners)
|
|
1409
|
+
this.socket.removeAllListeners();
|
|
1410
|
+
this.socket.close();
|
|
1411
|
+
}
|
|
1412
|
+
catch {
|
|
1413
|
+
// Best-effort cleanup; a half-open socket may throw on close.
|
|
1414
|
+
}
|
|
1415
|
+
this.socket = undefined;
|
|
1416
|
+
}
|
|
1291
1417
|
/**
|
|
1292
1418
|
* Returns a WritableStream that pumps PCM chunks into `sendAudio`. Single-channel
|
|
1293
1419
|
* only — in dual-channel mode use `sendAudio(pcm, { channel })` directly, since
|
|
@@ -1561,6 +1687,16 @@ class StreamingTranscriber {
|
|
|
1561
1687
|
};
|
|
1562
1688
|
this.send(JSON.stringify(message));
|
|
1563
1689
|
}
|
|
1690
|
+
/**
|
|
1691
|
+
* Reset the server's inactivity timer. Only needed when the session was
|
|
1692
|
+
* created with `inactivityTimeout` and no audio is being sent.
|
|
1693
|
+
*/
|
|
1694
|
+
keepAlive() {
|
|
1695
|
+
const message = {
|
|
1696
|
+
type: "KeepAlive",
|
|
1697
|
+
};
|
|
1698
|
+
this.send(JSON.stringify(message));
|
|
1699
|
+
}
|
|
1564
1700
|
send(data) {
|
|
1565
1701
|
if (!this.socket || this.socket.readyState !== this.socket.OPEN) {
|
|
1566
1702
|
throw new Error("Socket is not open for communication");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "assemblyai",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.35.3",
|
|
4
4
|
"description": "The AssemblyAI JavaScript SDK provides an easy-to-use interface for interacting with the AssemblyAI API, which supports async and real-time transcription, as well as the latest LeMUR models.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=18"
|
package/src/exports/streaming.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export * from "../types/asyncapi.generated";
|
|
2
2
|
export * from "../types/realtime";
|
|
3
3
|
export * from "../types/helpers";
|
|
4
|
-
export * from "../types/streaming
|
|
4
|
+
export * from "../types/streaming";
|
|
5
5
|
export * from "../services/realtime/service";
|
|
6
6
|
export * from "../services/streaming/service";
|
|
7
7
|
export * from "../services/streaming/factory";
|
|
@@ -18,12 +18,17 @@ import {
|
|
|
18
18
|
SpeakerRevisionEvent,
|
|
19
19
|
StreamingUpdateConfiguration,
|
|
20
20
|
StreamingForceEndpoint,
|
|
21
|
+
StreamingKeepAlive,
|
|
21
22
|
WarningEvent,
|
|
22
23
|
} from "../..";
|
|
23
24
|
import type { VadDetector, VadFrame } from "../../types/streaming/dual-channel";
|
|
24
25
|
import { EnergyVad } from "./energy-vad";
|
|
25
26
|
import { attributeTurn, rollUpTurnChannel, VadTimeline } from "./label-mapper";
|
|
26
|
-
import {
|
|
27
|
+
import {
|
|
28
|
+
StreamingError,
|
|
29
|
+
StreamingErrorMessages,
|
|
30
|
+
StreamingErrorType,
|
|
31
|
+
} from "../../utils/errors";
|
|
27
32
|
import { StreamingErrorTypeCodes } from "../../utils/errors/streaming";
|
|
28
33
|
|
|
29
34
|
/**
|
|
@@ -58,6 +63,30 @@ function toInt16View(audio: AudioData): Int16Array {
|
|
|
58
63
|
const defaultStreamingUrl = "wss://streaming.assemblyai.com/v3/ws";
|
|
59
64
|
const terminateSessionMessage = `{"type":"Terminate"}`;
|
|
60
65
|
|
|
66
|
+
const DEFAULT_CONNECT_TIMEOUT_MS = 1000;
|
|
67
|
+
const DEFAULT_MAX_CONNECTION_RETRIES = 2;
|
|
68
|
+
const DEFAULT_CONNECTION_RETRY_DELAY_MS = 500;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Close/error codes that signal a permanent client-side problem (auth,
|
|
72
|
+
* billing, malformed config). A retry would hit the same failure, so the
|
|
73
|
+
* connection is never retried on these.
|
|
74
|
+
*/
|
|
75
|
+
const NON_RETRYABLE_CLOSE_CODES = new Set<number>([
|
|
76
|
+
StreamingErrorType.BadSampleRate,
|
|
77
|
+
StreamingErrorType.AuthFailed,
|
|
78
|
+
StreamingErrorType.InsufficientFunds,
|
|
79
|
+
StreamingErrorType.FreeTierUser,
|
|
80
|
+
StreamingErrorType.BadSchema,
|
|
81
|
+
]);
|
|
82
|
+
|
|
83
|
+
/** Error from a single connection attempt, tagged for retry handling. */
|
|
84
|
+
type ConnectionAttemptError = Error & { code?: number; retryable: boolean };
|
|
85
|
+
|
|
86
|
+
function isRetryableCloseCode(code: number): boolean {
|
|
87
|
+
return code !== 1000 && !NON_RETRYABLE_CLOSE_CODES.has(code);
|
|
88
|
+
}
|
|
89
|
+
|
|
61
90
|
/**
|
|
62
91
|
* Per-send chunk cap in milliseconds for the dual-channel mixer. The streaming
|
|
63
92
|
* server rejects audio messages longer than 1000 ms (`Input Duration Error`).
|
|
@@ -284,9 +313,19 @@ export class StreamingTranscriber {
|
|
|
284
313
|
}
|
|
285
314
|
|
|
286
315
|
if (this.params.languageCode !== undefined) {
|
|
316
|
+
console.warn(
|
|
317
|
+
"[Deprecation Warning] `languageCode` is deprecated and will be removed in a future release. Please use `languageCodes` instead.",
|
|
318
|
+
);
|
|
287
319
|
searchParams.set("language_code", this.params.languageCode);
|
|
288
320
|
}
|
|
289
321
|
|
|
322
|
+
if (this.params.languageCodes !== undefined) {
|
|
323
|
+
searchParams.set(
|
|
324
|
+
"language_codes",
|
|
325
|
+
JSON.stringify(this.params.languageCodes),
|
|
326
|
+
);
|
|
327
|
+
}
|
|
328
|
+
|
|
290
329
|
if (this.params.languageDetection !== undefined) {
|
|
291
330
|
searchParams.set(
|
|
292
331
|
"language_detection",
|
|
@@ -432,13 +471,90 @@ export class StreamingTranscriber {
|
|
|
432
471
|
this.listeners[event] = listener;
|
|
433
472
|
}
|
|
434
473
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
474
|
+
/**
|
|
475
|
+
* Open the streaming session.
|
|
476
|
+
*
|
|
477
|
+
* Resolves with the server's `Begin` event once the handshake completes. A
|
|
478
|
+
* single attempt is bounded by `connectTimeout` (default 1000ms); transient
|
|
479
|
+
* failures (timeout, network drop, unexpected close) are retried up to
|
|
480
|
+
* `maxConnectionRetries` times (default 2), waiting `connectionRetryDelay`
|
|
481
|
+
* (default 500ms) between attempts. Permanent failures (auth, insufficient
|
|
482
|
+
* funds, malformed config) are not retried.
|
|
483
|
+
*
|
|
484
|
+
* Unlike previously, a failed connection now rejects this promise rather
|
|
485
|
+
* than only invoking the `error` listener — necessary for the caller (and
|
|
486
|
+
* the retry loop) to observe the failure.
|
|
487
|
+
*/
|
|
488
|
+
async connect(): Promise<BeginEvent> {
|
|
489
|
+
if (this.socket) {
|
|
490
|
+
throw new Error("Already connected");
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
const maxRetries =
|
|
494
|
+
this.params.maxConnectionRetries ?? DEFAULT_MAX_CONNECTION_RETRIES;
|
|
495
|
+
const retryDelay =
|
|
496
|
+
this.params.connectionRetryDelay ?? DEFAULT_CONNECTION_RETRY_DELAY_MS;
|
|
497
|
+
|
|
498
|
+
let lastError: Error | undefined;
|
|
499
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
500
|
+
try {
|
|
501
|
+
return await this.connectOnce();
|
|
502
|
+
} catch (err) {
|
|
503
|
+
lastError = err as Error;
|
|
504
|
+
const retryable = (err as ConnectionAttemptError).retryable === true;
|
|
505
|
+
if (!retryable || attempt === maxRetries) {
|
|
506
|
+
throw err;
|
|
507
|
+
}
|
|
508
|
+
console.warn(
|
|
509
|
+
`Streaming connect attempt ${attempt + 1}/${maxRetries + 1} failed (${(err as Error).message}); retrying`,
|
|
510
|
+
);
|
|
511
|
+
if (retryDelay > 0) {
|
|
512
|
+
await new Promise((resolve) => setTimeout(resolve, retryDelay));
|
|
513
|
+
}
|
|
439
514
|
}
|
|
515
|
+
}
|
|
516
|
+
// The loop above always returns or throws; this only satisfies the type
|
|
517
|
+
// checker that a value is produced on every path.
|
|
518
|
+
throw lastError ?? new Error("Failed to connect to streaming server");
|
|
519
|
+
}
|
|
440
520
|
|
|
521
|
+
private connectOnce(): Promise<BeginEvent> {
|
|
522
|
+
return new Promise<BeginEvent>((resolve, reject) => {
|
|
441
523
|
const url = this.connectionUrl();
|
|
524
|
+
const timeoutMs =
|
|
525
|
+
this.params.connectTimeout ?? DEFAULT_CONNECT_TIMEOUT_MS;
|
|
526
|
+
|
|
527
|
+
// `settled` flips once this attempt has resolved (`Begin`) or rejected
|
|
528
|
+
// (timeout / pre-`Begin` close / error). Before it flips the socket
|
|
529
|
+
// handlers drive this promise; after it flips they revert to normal
|
|
530
|
+
// runtime dispatch (close / error / message listeners).
|
|
531
|
+
let settled = false;
|
|
532
|
+
let timer: ReturnType<typeof setTimeout> | undefined;
|
|
533
|
+
|
|
534
|
+
const failAttempt = (error: ConnectionAttemptError) => {
|
|
535
|
+
if (settled) return;
|
|
536
|
+
settled = true;
|
|
537
|
+
if (timer) clearTimeout(timer);
|
|
538
|
+
this.discardPendingSocket();
|
|
539
|
+
reject(error);
|
|
540
|
+
};
|
|
541
|
+
|
|
542
|
+
const succeed = (begin: BeginEvent) => {
|
|
543
|
+
if (settled) return;
|
|
544
|
+
settled = true;
|
|
545
|
+
if (timer) clearTimeout(timer);
|
|
546
|
+
resolve(begin);
|
|
547
|
+
};
|
|
548
|
+
|
|
549
|
+
if (timeoutMs > 0) {
|
|
550
|
+
timer = setTimeout(() => {
|
|
551
|
+
const err = new StreamingError(
|
|
552
|
+
`Streaming connection timed out after ${timeoutMs}ms`,
|
|
553
|
+
) as ConnectionAttemptError;
|
|
554
|
+
err.retryable = true;
|
|
555
|
+
failAttempt(err);
|
|
556
|
+
}, timeoutMs);
|
|
557
|
+
}
|
|
442
558
|
|
|
443
559
|
if (this.token) {
|
|
444
560
|
this.socket = polyfillWebSocketFactory(url.toString());
|
|
@@ -465,6 +581,17 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
|
|
|
465
581
|
reason = StreamingErrorMessages[code as StreamingErrorTypeCodes];
|
|
466
582
|
}
|
|
467
583
|
}
|
|
584
|
+
// A close before `Begin` is a failed connection attempt — reject so
|
|
585
|
+
// connect() can retry (or surface a permanent failure).
|
|
586
|
+
if (!settled) {
|
|
587
|
+
const err = new StreamingError(
|
|
588
|
+
reason || `Streaming connection closed (code=${code})`,
|
|
589
|
+
) as ConnectionAttemptError;
|
|
590
|
+
err.code = code;
|
|
591
|
+
err.retryable = isRetryableCloseCode(code);
|
|
592
|
+
failAttempt(err);
|
|
593
|
+
return;
|
|
594
|
+
}
|
|
468
595
|
// Stop the flush timer when the socket is gone (server-initiated close,
|
|
469
596
|
// network drop, etc.) — otherwise subsequent ticks call send() on a
|
|
470
597
|
// closed socket and spam the error listener.
|
|
@@ -476,18 +603,34 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
|
|
|
476
603
|
};
|
|
477
604
|
|
|
478
605
|
this.socket.onerror = (event: ErrorEvent) => {
|
|
479
|
-
|
|
480
|
-
|
|
606
|
+
const error = (event.error as Error) ?? new Error(event.message);
|
|
607
|
+
// A socket error before `Begin` is a failed attempt → reject/retry.
|
|
608
|
+
if (!settled) {
|
|
609
|
+
(error as ConnectionAttemptError).retryable = true;
|
|
610
|
+
failAttempt(error as ConnectionAttemptError);
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
this.listeners.error?.(error);
|
|
481
614
|
};
|
|
482
615
|
|
|
483
616
|
this.socket.onmessage = ({ data }: MessageEvent) => {
|
|
484
617
|
const message = JSON.parse(data.toString()) as StreamingEventMessage;
|
|
485
618
|
|
|
486
619
|
if ("error" in message) {
|
|
487
|
-
const err = new StreamingError(message.error)
|
|
620
|
+
const err = new StreamingError(message.error) as StreamingError & {
|
|
621
|
+
code?: number;
|
|
622
|
+
};
|
|
488
623
|
if ("error_code" in message) {
|
|
489
|
-
|
|
490
|
-
|
|
624
|
+
err.code = message.error_code;
|
|
625
|
+
}
|
|
626
|
+
// A server error frame before `Begin` fails the attempt; the code
|
|
627
|
+
// decides whether a retry is worthwhile.
|
|
628
|
+
if (!settled) {
|
|
629
|
+
const attemptErr = err as ConnectionAttemptError;
|
|
630
|
+
attemptErr.retryable =
|
|
631
|
+
err.code === undefined ? true : isRetryableCloseCode(err.code);
|
|
632
|
+
failAttempt(attemptErr);
|
|
633
|
+
return;
|
|
491
634
|
}
|
|
492
635
|
this.listeners.error?.(err);
|
|
493
636
|
return;
|
|
@@ -495,7 +638,7 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
|
|
|
495
638
|
|
|
496
639
|
switch (message.type) {
|
|
497
640
|
case "Begin": {
|
|
498
|
-
|
|
641
|
+
succeed(message);
|
|
499
642
|
this.listeners.open?.(message);
|
|
500
643
|
break;
|
|
501
644
|
}
|
|
@@ -548,6 +691,18 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
|
|
|
548
691
|
});
|
|
549
692
|
}
|
|
550
693
|
|
|
694
|
+
/** Tear down a half-open socket from a failed connection attempt. */
|
|
695
|
+
private discardPendingSocket(): void {
|
|
696
|
+
if (!this.socket) return;
|
|
697
|
+
try {
|
|
698
|
+
if (this.socket.removeAllListeners) this.socket.removeAllListeners();
|
|
699
|
+
this.socket.close();
|
|
700
|
+
} catch {
|
|
701
|
+
// Best-effort cleanup; a half-open socket may throw on close.
|
|
702
|
+
}
|
|
703
|
+
this.socket = undefined;
|
|
704
|
+
}
|
|
705
|
+
|
|
551
706
|
/**
|
|
552
707
|
* Returns a WritableStream that pumps PCM chunks into `sendAudio`. Single-channel
|
|
553
708
|
* only — in dual-channel mode use `sendAudio(pcm, { channel })` directly, since
|
|
@@ -829,6 +984,17 @@ Learn more at https://github.com/AssemblyAI/assemblyai-node-sdk/blob/main/docs/c
|
|
|
829
984
|
this.send(JSON.stringify(message));
|
|
830
985
|
}
|
|
831
986
|
|
|
987
|
+
/**
|
|
988
|
+
* Reset the server's inactivity timer. Only needed when the session was
|
|
989
|
+
* created with `inactivityTimeout` and no audio is being sent.
|
|
990
|
+
*/
|
|
991
|
+
keepAlive() {
|
|
992
|
+
const message: StreamingKeepAlive = {
|
|
993
|
+
type: "KeepAlive",
|
|
994
|
+
};
|
|
995
|
+
this.send(JSON.stringify(message));
|
|
996
|
+
}
|
|
997
|
+
|
|
832
998
|
private send(data: BufferLike) {
|
|
833
999
|
if (!this.socket || this.socket.readyState !== this.socket.OPEN) {
|
|
834
1000
|
throw new Error("Socket is not open for communication");
|
|
@@ -25,7 +25,7 @@ export type AudioData = ArrayBufferLike;
|
|
|
25
25
|
* The encoding of the audio data
|
|
26
26
|
* @defaultValue "pcm_s16"le
|
|
27
27
|
*/
|
|
28
|
-
export type AudioEncoding = "pcm_s16le" | "pcm_mulaw";
|
|
28
|
+
export type AudioEncoding = "pcm_s16le" | "pcm_mulaw" | "opus" | "ogg_opus";
|
|
29
29
|
|
|
30
30
|
/**
|
|
31
31
|
* Configure the threshold for how long to wait before ending an utterance. Default is 700ms.
|
|
@@ -76,6 +76,22 @@ export type StreamingTranscriberParams = {
|
|
|
76
76
|
websocketBaseUrl?: string;
|
|
77
77
|
apiKey?: string;
|
|
78
78
|
token?: string;
|
|
79
|
+
/**
|
|
80
|
+
* Milliseconds to wait for the streaming handshake (socket open + server
|
|
81
|
+
* `Begin`) before treating the attempt as failed. Defaults to 1000.
|
|
82
|
+
*/
|
|
83
|
+
connectTimeout?: number;
|
|
84
|
+
/**
|
|
85
|
+
* Number of additional connection attempts after the first one fails on a
|
|
86
|
+
* transient error (timeout, network drop, unexpected close). 0 disables
|
|
87
|
+
* retries. Permanent failures (auth, insufficient funds, malformed config)
|
|
88
|
+
* are never retried. Defaults to 2.
|
|
89
|
+
*/
|
|
90
|
+
maxConnectionRetries?: number;
|
|
91
|
+
/**
|
|
92
|
+
* Milliseconds to wait between connection attempts. Defaults to 500.
|
|
93
|
+
*/
|
|
94
|
+
connectionRetryDelay?: number;
|
|
79
95
|
sampleRate: number;
|
|
80
96
|
encoding?: AudioEncoding;
|
|
81
97
|
endOfTurnConfidenceThreshold?: number;
|
|
@@ -93,7 +109,19 @@ export type StreamingTranscriberParams = {
|
|
|
93
109
|
prompt?: string;
|
|
94
110
|
agentContext?: string;
|
|
95
111
|
speechModel?: StreamingSpeechModel;
|
|
112
|
+
/**
|
|
113
|
+
* @deprecated Use `languageCodes` instead (pass a single-element array, e.g. `["es"]`,
|
|
114
|
+
* for the same behavior). Still supported for backward compatibility.
|
|
115
|
+
*/
|
|
96
116
|
languageCode?: string;
|
|
117
|
+
/**
|
|
118
|
+
* Recommended way to select languages. Steers transcription toward a set of
|
|
119
|
+
* languages by biasing output toward them on a per-token basis while still
|
|
120
|
+
* allowing native code-switching among them. Pass the languages you expect
|
|
121
|
+
* (e.g. `["en", "es"]`), or a single-element array (e.g. `["es"]`) for a
|
|
122
|
+
* monolingual session. Universal-3.5 Pro Streaming only.
|
|
123
|
+
*/
|
|
124
|
+
languageCodes?: string[];
|
|
97
125
|
languageDetection?: boolean;
|
|
98
126
|
domain?: StreamingDomain;
|
|
99
127
|
inactivityTimeout?: number;
|
|
@@ -343,12 +371,22 @@ export type StreamingUpdateConfiguration = {
|
|
|
343
371
|
filter_profanity?: boolean;
|
|
344
372
|
interruption_delay?: number;
|
|
345
373
|
turn_left_pad_ms?: number;
|
|
374
|
+
/**
|
|
375
|
+
* Steer transcription toward a set of languages mid-stream. Pass an empty
|
|
376
|
+
* array (`[]`) to clear steering and restore the model's default
|
|
377
|
+
* multilingual code-switching. Universal-3.5 Pro Streaming only.
|
|
378
|
+
*/
|
|
379
|
+
language_codes?: string[];
|
|
346
380
|
};
|
|
347
381
|
|
|
348
382
|
export type StreamingForceEndpoint = {
|
|
349
383
|
type: "ForceEndpoint";
|
|
350
384
|
};
|
|
351
385
|
|
|
386
|
+
export type StreamingKeepAlive = {
|
|
387
|
+
type: "KeepAlive";
|
|
388
|
+
};
|
|
389
|
+
|
|
352
390
|
export type ErrorEvent = {
|
|
353
391
|
type: "Error";
|
|
354
392
|
error_code?: number;
|
|
@@ -405,4 +443,5 @@ export type StreamingEventMessage =
|
|
|
405
443
|
export type StreamingOperationMessage =
|
|
406
444
|
| StreamingUpdateConfiguration
|
|
407
445
|
| StreamingForceEndpoint
|
|
446
|
+
| StreamingKeepAlive
|
|
408
447
|
| StreamingTerminateSession;
|