kugelaudio 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/index.d.mts +46 -2
- package/dist/index.d.ts +46 -2
- package/dist/index.js +89 -8
- package/dist/index.mjs +89 -8
- package/package.json +1 -1
- package/src/client.test.ts +139 -0
- package/src/client.ts +108 -5
- package/src/errors.ts +16 -4
- package/src/types.ts +7 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,15 @@
|
|
|
1
|
+
## [kugelaudio-v0.6.1](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.6.0...js-sdk-v0.6.1) (2026-06-04)
|
|
2
|
+
|
|
3
|
+
### Bug Fixes
|
|
4
|
+
|
|
5
|
+
* **python-sdk:** propagate ingress errors through SDK integrations ([#1313](https://github.com/Kugelaudio/KugelAudio/issues/1313)) ([3ae2e03](https://github.com/Kugelaudio/KugelAudio/commit/3ae2e03745b49cca0712c20d9a658c160f4b6f38))
|
|
6
|
+
|
|
7
|
+
## [kugelaudio-v0.6.0](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.5.0...js-sdk-v0.6.0) (2026-06-01)
|
|
8
|
+
|
|
9
|
+
### Features
|
|
10
|
+
|
|
11
|
+
* streaming barge-in (cancelCurrent) across server + JS/Python/Java SDKs ([#1210](https://github.com/Kugelaudio/KugelAudio/issues/1210)) ([341e54f](https://github.com/Kugelaudio/KugelAudio/commit/341e54f169b4dd9242272b249fca30f005bfc3b8))
|
|
12
|
+
|
|
1
13
|
## [kugelaudio-v0.5.0](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.4.0...js-sdk-v0.5.0) (2026-05-21)
|
|
2
14
|
|
|
3
15
|
### Features
|
package/dist/index.d.mts
CHANGED
|
@@ -383,6 +383,13 @@ interface StreamingSessionCallbacks {
|
|
|
383
383
|
onGenerationStarted?: (chunkId: number, text: string) => void;
|
|
384
384
|
/** Called when word-level timestamps arrive (requires `wordTimestamps: true`). */
|
|
385
385
|
onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
|
|
386
|
+
/**
|
|
387
|
+
* Called when the server acknowledges a barge-in
|
|
388
|
+
* ({@link StreamingSession.cancelCurrent}). After this fires, no further
|
|
389
|
+
* audio chunks from the cancelled turn will arrive and the session is
|
|
390
|
+
* ready for the next `send()`.
|
|
391
|
+
*/
|
|
392
|
+
onInterrupted?: () => void;
|
|
386
393
|
/** Called on any error. */
|
|
387
394
|
onError?: (error: Error) => void;
|
|
388
395
|
}
|
|
@@ -938,8 +945,14 @@ declare class MultiContextSession {
|
|
|
938
945
|
flush(contextId: string): void;
|
|
939
946
|
/**
|
|
940
947
|
* Close a specific context.
|
|
948
|
+
*
|
|
949
|
+
* @param contextId - The context to close.
|
|
950
|
+
* @param immediate - When `true`, **barge-in**: the server cancels the
|
|
951
|
+
* context's in-flight generation immediately and discards any buffered or
|
|
952
|
+
* queued text instead of draining it. Use this when the end user speaks
|
|
953
|
+
* over the agent. When `false` (default), queued sentences finish first.
|
|
941
954
|
*/
|
|
942
|
-
closeContext(contextId: string): void;
|
|
955
|
+
closeContext(contextId: string, immediate?: boolean): void;
|
|
943
956
|
/**
|
|
944
957
|
* Send keep-alive to reset a context's inactivity timeout.
|
|
945
958
|
*/
|
|
@@ -1016,6 +1029,34 @@ declare class StreamingSession {
|
|
|
1016
1029
|
* handle chunking via `chunkLengthSchedule` / `autoMode` instead.
|
|
1017
1030
|
*/
|
|
1018
1031
|
send(text: string, flush?: boolean): void;
|
|
1032
|
+
/**
|
|
1033
|
+
* Interrupt (barge-in) the current generation without closing the socket.
|
|
1034
|
+
*
|
|
1035
|
+
* Use this when the end user starts speaking over the agent: it tells the
|
|
1036
|
+
* server to **stop generating audio for the current turn immediately** and
|
|
1037
|
+
* drop any text that was buffered or queued but not yet spoken. Unlike
|
|
1038
|
+
* {@link endSession}, no remaining text is flushed — the turn is abandoned.
|
|
1039
|
+
*
|
|
1040
|
+
* The WebSocket stays open and a fresh session is ready, so you can call
|
|
1041
|
+
* {@link send} for the next user turn right away (config is re-sent
|
|
1042
|
+
* automatically on that first `send`).
|
|
1043
|
+
*
|
|
1044
|
+
* The returned promise resolves once the server acknowledges with an
|
|
1045
|
+
* `interrupted` frame (which also fires
|
|
1046
|
+
* {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
|
|
1047
|
+
* timeout — i.e. 5 s elapse without any server message arriving. The timer
|
|
1048
|
+
* resets on every incoming frame, so a few in-flight audio chunks still
|
|
1049
|
+
* draining at the moment of cancellation do not trip it prematurely.
|
|
1050
|
+
*
|
|
1051
|
+
* @example
|
|
1052
|
+
* ```typescript
|
|
1053
|
+
* // VAD detected the user speaking over the agent:
|
|
1054
|
+
* await session.cancelCurrent();
|
|
1055
|
+
* // Socket is still open — start the next turn immediately:
|
|
1056
|
+
* session.send(nextLlmToken);
|
|
1057
|
+
* ```
|
|
1058
|
+
*/
|
|
1059
|
+
cancelCurrent(): Promise<void>;
|
|
1019
1060
|
/**
|
|
1020
1061
|
* End the current session but keep the WebSocket connection open.
|
|
1021
1062
|
*
|
|
@@ -1183,6 +1224,8 @@ declare const ErrorCodes: {
|
|
|
1183
1224
|
readonly VALIDATION: "VALIDATION_ERROR";
|
|
1184
1225
|
readonly INTERNAL: "INTERNAL_ERROR";
|
|
1185
1226
|
readonly NOT_FOUND: "NOT_FOUND";
|
|
1227
|
+
readonly MISSING_VOICE_ID: "MISSING_VOICE_ID";
|
|
1228
|
+
readonly TOO_MANY_CONTEXTS: "TOO_MANY_CONTEXTS";
|
|
1186
1229
|
};
|
|
1187
1230
|
type ErrorCode = typeof ErrorCodes[keyof typeof ErrorCodes];
|
|
1188
1231
|
declare const WsCloseCodes: {
|
|
@@ -1264,11 +1307,12 @@ interface HttpResponseLike {
|
|
|
1264
1307
|
declare function classifyHttpError(status: number, bodyText: string, headers: HttpResponseLike['headers']): KugelAudioError;
|
|
1265
1308
|
/**
|
|
1266
1309
|
* Build a `KugelAudioError` from a server-sent WebSocket error frame
|
|
1267
|
-
* (`{error, error_code,
|
|
1310
|
+
* (`{error, error_code, code}`).
|
|
1268
1311
|
*/
|
|
1269
1312
|
declare function classifyWsFrame(data: {
|
|
1270
1313
|
error?: string;
|
|
1271
1314
|
error_code?: string;
|
|
1315
|
+
code?: number;
|
|
1272
1316
|
retry_after?: number;
|
|
1273
1317
|
}): KugelAudioError;
|
|
1274
1318
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -383,6 +383,13 @@ interface StreamingSessionCallbacks {
|
|
|
383
383
|
onGenerationStarted?: (chunkId: number, text: string) => void;
|
|
384
384
|
/** Called when word-level timestamps arrive (requires `wordTimestamps: true`). */
|
|
385
385
|
onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
|
|
386
|
+
/**
|
|
387
|
+
* Called when the server acknowledges a barge-in
|
|
388
|
+
* ({@link StreamingSession.cancelCurrent}). After this fires, no further
|
|
389
|
+
* audio chunks from the cancelled turn will arrive and the session is
|
|
390
|
+
* ready for the next `send()`.
|
|
391
|
+
*/
|
|
392
|
+
onInterrupted?: () => void;
|
|
386
393
|
/** Called on any error. */
|
|
387
394
|
onError?: (error: Error) => void;
|
|
388
395
|
}
|
|
@@ -938,8 +945,14 @@ declare class MultiContextSession {
|
|
|
938
945
|
flush(contextId: string): void;
|
|
939
946
|
/**
|
|
940
947
|
* Close a specific context.
|
|
948
|
+
*
|
|
949
|
+
* @param contextId - The context to close.
|
|
950
|
+
* @param immediate - When `true`, **barge-in**: the server cancels the
|
|
951
|
+
* context's in-flight generation immediately and discards any buffered or
|
|
952
|
+
* queued text instead of draining it. Use this when the end user speaks
|
|
953
|
+
* over the agent. When `false` (default), queued sentences finish first.
|
|
941
954
|
*/
|
|
942
|
-
closeContext(contextId: string): void;
|
|
955
|
+
closeContext(contextId: string, immediate?: boolean): void;
|
|
943
956
|
/**
|
|
944
957
|
* Send keep-alive to reset a context's inactivity timeout.
|
|
945
958
|
*/
|
|
@@ -1016,6 +1029,34 @@ declare class StreamingSession {
|
|
|
1016
1029
|
* handle chunking via `chunkLengthSchedule` / `autoMode` instead.
|
|
1017
1030
|
*/
|
|
1018
1031
|
send(text: string, flush?: boolean): void;
|
|
1032
|
+
/**
|
|
1033
|
+
* Interrupt (barge-in) the current generation without closing the socket.
|
|
1034
|
+
*
|
|
1035
|
+
* Use this when the end user starts speaking over the agent: it tells the
|
|
1036
|
+
* server to **stop generating audio for the current turn immediately** and
|
|
1037
|
+
* drop any text that was buffered or queued but not yet spoken. Unlike
|
|
1038
|
+
* {@link endSession}, no remaining text is flushed — the turn is abandoned.
|
|
1039
|
+
*
|
|
1040
|
+
* The WebSocket stays open and a fresh session is ready, so you can call
|
|
1041
|
+
* {@link send} for the next user turn right away (config is re-sent
|
|
1042
|
+
* automatically on that first `send`).
|
|
1043
|
+
*
|
|
1044
|
+
* The returned promise resolves once the server acknowledges with an
|
|
1045
|
+
* `interrupted` frame (which also fires
|
|
1046
|
+
* {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
|
|
1047
|
+
* timeout — i.e. 5 s elapse without any server message arriving. The timer
|
|
1048
|
+
* resets on every incoming frame, so a few in-flight audio chunks still
|
|
1049
|
+
* draining at the moment of cancellation do not trip it prematurely.
|
|
1050
|
+
*
|
|
1051
|
+
* @example
|
|
1052
|
+
* ```typescript
|
|
1053
|
+
* // VAD detected the user speaking over the agent:
|
|
1054
|
+
* await session.cancelCurrent();
|
|
1055
|
+
* // Socket is still open — start the next turn immediately:
|
|
1056
|
+
* session.send(nextLlmToken);
|
|
1057
|
+
* ```
|
|
1058
|
+
*/
|
|
1059
|
+
cancelCurrent(): Promise<void>;
|
|
1019
1060
|
/**
|
|
1020
1061
|
* End the current session but keep the WebSocket connection open.
|
|
1021
1062
|
*
|
|
@@ -1183,6 +1224,8 @@ declare const ErrorCodes: {
|
|
|
1183
1224
|
readonly VALIDATION: "VALIDATION_ERROR";
|
|
1184
1225
|
readonly INTERNAL: "INTERNAL_ERROR";
|
|
1185
1226
|
readonly NOT_FOUND: "NOT_FOUND";
|
|
1227
|
+
readonly MISSING_VOICE_ID: "MISSING_VOICE_ID";
|
|
1228
|
+
readonly TOO_MANY_CONTEXTS: "TOO_MANY_CONTEXTS";
|
|
1186
1229
|
};
|
|
1187
1230
|
type ErrorCode = typeof ErrorCodes[keyof typeof ErrorCodes];
|
|
1188
1231
|
declare const WsCloseCodes: {
|
|
@@ -1264,11 +1307,12 @@ interface HttpResponseLike {
|
|
|
1264
1307
|
declare function classifyHttpError(status: number, bodyText: string, headers: HttpResponseLike['headers']): KugelAudioError;
|
|
1265
1308
|
/**
|
|
1266
1309
|
* Build a `KugelAudioError` from a server-sent WebSocket error frame
|
|
1267
|
-
* (`{error, error_code,
|
|
1310
|
+
* (`{error, error_code, code}`).
|
|
1268
1311
|
*/
|
|
1269
1312
|
declare function classifyWsFrame(data: {
|
|
1270
1313
|
error?: string;
|
|
1271
1314
|
error_code?: string;
|
|
1315
|
+
code?: number;
|
|
1272
1316
|
retry_after?: number;
|
|
1273
1317
|
}): KugelAudioError;
|
|
1274
1318
|
/**
|
package/dist/index.js
CHANGED
|
@@ -233,7 +233,9 @@ var ErrorCodes = {
|
|
|
233
233
|
EMPTY_AUDIO: "EMPTY_AUDIO",
|
|
234
234
|
VALIDATION: "VALIDATION_ERROR",
|
|
235
235
|
INTERNAL: "INTERNAL_ERROR",
|
|
236
|
-
NOT_FOUND: "NOT_FOUND"
|
|
236
|
+
NOT_FOUND: "NOT_FOUND",
|
|
237
|
+
MISSING_VOICE_ID: "MISSING_VOICE_ID",
|
|
238
|
+
TOO_MANY_CONTEXTS: "TOO_MANY_CONTEXTS"
|
|
237
239
|
};
|
|
238
240
|
var WsCloseCodes = {
|
|
239
241
|
UNAUTHORIZED: 4001,
|
|
@@ -317,10 +319,10 @@ function build(status, errorCode, message, opts = {}) {
|
|
|
317
319
|
if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
|
|
318
320
|
return new InsufficientCreditsError(message || void 0, common);
|
|
319
321
|
}
|
|
320
|
-
if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
|
|
322
|
+
if (errorCode === ErrorCodes.RATE_LIMITED || errorCode === ErrorCodes.TOO_MANY_CONTEXTS || status === 429) {
|
|
321
323
|
return new RateLimitError(message || void 0, common);
|
|
322
324
|
}
|
|
323
|
-
if (errorCode === ErrorCodes.VALIDATION || status === 400) {
|
|
325
|
+
if (errorCode === ErrorCodes.VALIDATION || errorCode === ErrorCodes.MISSING_VOICE_ID || status === 400) {
|
|
324
326
|
return new ValidationError(message || "Request validation failed.", common);
|
|
325
327
|
}
|
|
326
328
|
if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
|
|
@@ -380,8 +382,9 @@ function classifyHttpError(status, bodyText, headers) {
|
|
|
380
382
|
function classifyWsFrame(data) {
|
|
381
383
|
const errorCode = data.error_code;
|
|
382
384
|
const message = data.error ?? "Server reported an error.";
|
|
385
|
+
const status = typeof data.code === "number" ? data.code : void 0;
|
|
383
386
|
const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
|
|
384
|
-
return build(
|
|
387
|
+
return build(status, errorCode, message, { retryAfter });
|
|
385
388
|
}
|
|
386
389
|
function classifyWsClose(code, reason) {
|
|
387
390
|
const reasonTxt = (reason ?? "").trim();
|
|
@@ -1303,7 +1306,7 @@ var MultiContextSession = class {
|
|
|
1303
1306
|
const data = JSON.parse(messageData);
|
|
1304
1307
|
if (data.error) {
|
|
1305
1308
|
this.callbacks.onError?.(
|
|
1306
|
-
|
|
1309
|
+
classifyWsFrame(data),
|
|
1307
1310
|
data.context_id
|
|
1308
1311
|
);
|
|
1309
1312
|
return;
|
|
@@ -1438,13 +1441,21 @@ var MultiContextSession = class {
|
|
|
1438
1441
|
}
|
|
1439
1442
|
/**
|
|
1440
1443
|
* Close a specific context.
|
|
1444
|
+
*
|
|
1445
|
+
* @param contextId - The context to close.
|
|
1446
|
+
* @param immediate - When `true`, **barge-in**: the server cancels the
|
|
1447
|
+
* context's in-flight generation immediately and discards any buffered or
|
|
1448
|
+
* queued text instead of draining it. Use this when the end user speaks
|
|
1449
|
+
* over the agent. When `false` (default), queued sentences finish first.
|
|
1441
1450
|
*/
|
|
1442
|
-
closeContext(contextId) {
|
|
1451
|
+
closeContext(contextId, immediate = false) {
|
|
1443
1452
|
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
1444
|
-
|
|
1453
|
+
const msg = {
|
|
1445
1454
|
close_context: true,
|
|
1446
1455
|
context_id: contextId
|
|
1447
|
-
}
|
|
1456
|
+
};
|
|
1457
|
+
if (immediate) msg.immediate = true;
|
|
1458
|
+
this.ws.send(JSON.stringify(msg));
|
|
1448
1459
|
}
|
|
1449
1460
|
/**
|
|
1450
1461
|
* Send keep-alive to reset a context's inactivity timeout.
|
|
@@ -1549,6 +1560,9 @@ var StreamingSession = class {
|
|
|
1549
1560
|
if (data.generation_started) {
|
|
1550
1561
|
this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
|
|
1551
1562
|
}
|
|
1563
|
+
if (data.interrupted) {
|
|
1564
|
+
this.callbacks.onInterrupted?.();
|
|
1565
|
+
}
|
|
1552
1566
|
if (data.session_closed) {
|
|
1553
1567
|
this.callbacks.onSessionClosed?.(
|
|
1554
1568
|
data.total_audio_seconds ?? 0,
|
|
@@ -1629,6 +1643,73 @@ var StreamingSession = class {
|
|
|
1629
1643
|
}
|
|
1630
1644
|
this.ws.send(JSON.stringify(msg));
|
|
1631
1645
|
}
|
|
1646
|
+
/**
|
|
1647
|
+
* Interrupt (barge-in) the current generation without closing the socket.
|
|
1648
|
+
*
|
|
1649
|
+
* Use this when the end user starts speaking over the agent: it tells the
|
|
1650
|
+
* server to **stop generating audio for the current turn immediately** and
|
|
1651
|
+
* drop any text that was buffered or queued but not yet spoken. Unlike
|
|
1652
|
+
* {@link endSession}, no remaining text is flushed — the turn is abandoned.
|
|
1653
|
+
*
|
|
1654
|
+
* The WebSocket stays open and a fresh session is ready, so you can call
|
|
1655
|
+
* {@link send} for the next user turn right away (config is re-sent
|
|
1656
|
+
* automatically on that first `send`).
|
|
1657
|
+
*
|
|
1658
|
+
* The returned promise resolves once the server acknowledges with an
|
|
1659
|
+
* `interrupted` frame (which also fires
|
|
1660
|
+
* {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
|
|
1661
|
+
* timeout — i.e. 5 s elapse without any server message arriving. The timer
|
|
1662
|
+
* resets on every incoming frame, so a few in-flight audio chunks still
|
|
1663
|
+
* draining at the moment of cancellation do not trip it prematurely.
|
|
1664
|
+
*
|
|
1665
|
+
* @example
|
|
1666
|
+
* ```typescript
|
|
1667
|
+
* // VAD detected the user speaking over the agent:
|
|
1668
|
+
* await session.cancelCurrent();
|
|
1669
|
+
* // Socket is still open — start the next turn immediately:
|
|
1670
|
+
* session.send(nextLlmToken);
|
|
1671
|
+
* ```
|
|
1672
|
+
*/
|
|
1673
|
+
cancelCurrent() {
|
|
1674
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
|
|
1675
|
+
const ws = this.ws;
|
|
1676
|
+
const QUIET_TIMEOUT_MS = 5e3;
|
|
1677
|
+
return new Promise((resolve) => {
|
|
1678
|
+
let settled = false;
|
|
1679
|
+
let timer;
|
|
1680
|
+
const prevMessage = ws.onmessage;
|
|
1681
|
+
const prevClose = ws.onclose;
|
|
1682
|
+
const done = () => {
|
|
1683
|
+
if (settled) return;
|
|
1684
|
+
settled = true;
|
|
1685
|
+
clearTimeout(timer);
|
|
1686
|
+
ws.onmessage = prevMessage;
|
|
1687
|
+
ws.onclose = prevClose;
|
|
1688
|
+
this.configSent = false;
|
|
1689
|
+
resolve();
|
|
1690
|
+
};
|
|
1691
|
+
const armQuietTimer = () => {
|
|
1692
|
+
clearTimeout(timer);
|
|
1693
|
+
timer = setTimeout(done, QUIET_TIMEOUT_MS);
|
|
1694
|
+
};
|
|
1695
|
+
armQuietTimer();
|
|
1696
|
+
ws.onmessage = (event) => {
|
|
1697
|
+
armQuietTimer();
|
|
1698
|
+
if (prevMessage) prevMessage.call(ws, event);
|
|
1699
|
+
try {
|
|
1700
|
+
const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1701
|
+
if (JSON.parse(raw).interrupted) done();
|
|
1702
|
+
} catch {
|
|
1703
|
+
}
|
|
1704
|
+
};
|
|
1705
|
+
ws.onclose = (event) => {
|
|
1706
|
+
this.ws = null;
|
|
1707
|
+
if (prevClose) prevClose.call(ws, event);
|
|
1708
|
+
done();
|
|
1709
|
+
};
|
|
1710
|
+
ws.send(JSON.stringify({ cancel: true }));
|
|
1711
|
+
});
|
|
1712
|
+
}
|
|
1632
1713
|
/**
|
|
1633
1714
|
* End the current session but keep the WebSocket connection open.
|
|
1634
1715
|
*
|
package/dist/index.mjs
CHANGED
|
@@ -195,7 +195,9 @@ var ErrorCodes = {
|
|
|
195
195
|
EMPTY_AUDIO: "EMPTY_AUDIO",
|
|
196
196
|
VALIDATION: "VALIDATION_ERROR",
|
|
197
197
|
INTERNAL: "INTERNAL_ERROR",
|
|
198
|
-
NOT_FOUND: "NOT_FOUND"
|
|
198
|
+
NOT_FOUND: "NOT_FOUND",
|
|
199
|
+
MISSING_VOICE_ID: "MISSING_VOICE_ID",
|
|
200
|
+
TOO_MANY_CONTEXTS: "TOO_MANY_CONTEXTS"
|
|
199
201
|
};
|
|
200
202
|
var WsCloseCodes = {
|
|
201
203
|
UNAUTHORIZED: 4001,
|
|
@@ -279,10 +281,10 @@ function build(status, errorCode, message, opts = {}) {
|
|
|
279
281
|
if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
|
|
280
282
|
return new InsufficientCreditsError(message || void 0, common);
|
|
281
283
|
}
|
|
282
|
-
if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
|
|
284
|
+
if (errorCode === ErrorCodes.RATE_LIMITED || errorCode === ErrorCodes.TOO_MANY_CONTEXTS || status === 429) {
|
|
283
285
|
return new RateLimitError(message || void 0, common);
|
|
284
286
|
}
|
|
285
|
-
if (errorCode === ErrorCodes.VALIDATION || status === 400) {
|
|
287
|
+
if (errorCode === ErrorCodes.VALIDATION || errorCode === ErrorCodes.MISSING_VOICE_ID || status === 400) {
|
|
286
288
|
return new ValidationError(message || "Request validation failed.", common);
|
|
287
289
|
}
|
|
288
290
|
if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
|
|
@@ -342,8 +344,9 @@ function classifyHttpError(status, bodyText, headers) {
|
|
|
342
344
|
function classifyWsFrame(data) {
|
|
343
345
|
const errorCode = data.error_code;
|
|
344
346
|
const message = data.error ?? "Server reported an error.";
|
|
347
|
+
const status = typeof data.code === "number" ? data.code : void 0;
|
|
345
348
|
const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
|
|
346
|
-
return build(
|
|
349
|
+
return build(status, errorCode, message, { retryAfter });
|
|
347
350
|
}
|
|
348
351
|
function classifyWsClose(code, reason) {
|
|
349
352
|
const reasonTxt = (reason ?? "").trim();
|
|
@@ -1265,7 +1268,7 @@ var MultiContextSession = class {
|
|
|
1265
1268
|
const data = JSON.parse(messageData);
|
|
1266
1269
|
if (data.error) {
|
|
1267
1270
|
this.callbacks.onError?.(
|
|
1268
|
-
|
|
1271
|
+
classifyWsFrame(data),
|
|
1269
1272
|
data.context_id
|
|
1270
1273
|
);
|
|
1271
1274
|
return;
|
|
@@ -1400,13 +1403,21 @@ var MultiContextSession = class {
|
|
|
1400
1403
|
}
|
|
1401
1404
|
/**
|
|
1402
1405
|
* Close a specific context.
|
|
1406
|
+
*
|
|
1407
|
+
* @param contextId - The context to close.
|
|
1408
|
+
* @param immediate - When `true`, **barge-in**: the server cancels the
|
|
1409
|
+
* context's in-flight generation immediately and discards any buffered or
|
|
1410
|
+
* queued text instead of draining it. Use this when the end user speaks
|
|
1411
|
+
* over the agent. When `false` (default), queued sentences finish first.
|
|
1403
1412
|
*/
|
|
1404
|
-
closeContext(contextId) {
|
|
1413
|
+
closeContext(contextId, immediate = false) {
|
|
1405
1414
|
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
1406
|
-
|
|
1415
|
+
const msg = {
|
|
1407
1416
|
close_context: true,
|
|
1408
1417
|
context_id: contextId
|
|
1409
|
-
}
|
|
1418
|
+
};
|
|
1419
|
+
if (immediate) msg.immediate = true;
|
|
1420
|
+
this.ws.send(JSON.stringify(msg));
|
|
1410
1421
|
}
|
|
1411
1422
|
/**
|
|
1412
1423
|
* Send keep-alive to reset a context's inactivity timeout.
|
|
@@ -1511,6 +1522,9 @@ var StreamingSession = class {
|
|
|
1511
1522
|
if (data.generation_started) {
|
|
1512
1523
|
this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
|
|
1513
1524
|
}
|
|
1525
|
+
if (data.interrupted) {
|
|
1526
|
+
this.callbacks.onInterrupted?.();
|
|
1527
|
+
}
|
|
1514
1528
|
if (data.session_closed) {
|
|
1515
1529
|
this.callbacks.onSessionClosed?.(
|
|
1516
1530
|
data.total_audio_seconds ?? 0,
|
|
@@ -1591,6 +1605,73 @@ var StreamingSession = class {
|
|
|
1591
1605
|
}
|
|
1592
1606
|
this.ws.send(JSON.stringify(msg));
|
|
1593
1607
|
}
|
|
1608
|
+
/**
|
|
1609
|
+
* Interrupt (barge-in) the current generation without closing the socket.
|
|
1610
|
+
*
|
|
1611
|
+
* Use this when the end user starts speaking over the agent: it tells the
|
|
1612
|
+
* server to **stop generating audio for the current turn immediately** and
|
|
1613
|
+
* drop any text that was buffered or queued but not yet spoken. Unlike
|
|
1614
|
+
* {@link endSession}, no remaining text is flushed — the turn is abandoned.
|
|
1615
|
+
*
|
|
1616
|
+
* The WebSocket stays open and a fresh session is ready, so you can call
|
|
1617
|
+
* {@link send} for the next user turn right away (config is re-sent
|
|
1618
|
+
* automatically on that first `send`).
|
|
1619
|
+
*
|
|
1620
|
+
* The returned promise resolves once the server acknowledges with an
|
|
1621
|
+
* `interrupted` frame (which also fires
|
|
1622
|
+
* {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
|
|
1623
|
+
* timeout — i.e. 5 s elapse without any server message arriving. The timer
|
|
1624
|
+
* resets on every incoming frame, so a few in-flight audio chunks still
|
|
1625
|
+
* draining at the moment of cancellation do not trip it prematurely.
|
|
1626
|
+
*
|
|
1627
|
+
* @example
|
|
1628
|
+
* ```typescript
|
|
1629
|
+
* // VAD detected the user speaking over the agent:
|
|
1630
|
+
* await session.cancelCurrent();
|
|
1631
|
+
* // Socket is still open — start the next turn immediately:
|
|
1632
|
+
* session.send(nextLlmToken);
|
|
1633
|
+
* ```
|
|
1634
|
+
*/
|
|
1635
|
+
cancelCurrent() {
|
|
1636
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
|
|
1637
|
+
const ws = this.ws;
|
|
1638
|
+
const QUIET_TIMEOUT_MS = 5e3;
|
|
1639
|
+
return new Promise((resolve) => {
|
|
1640
|
+
let settled = false;
|
|
1641
|
+
let timer;
|
|
1642
|
+
const prevMessage = ws.onmessage;
|
|
1643
|
+
const prevClose = ws.onclose;
|
|
1644
|
+
const done = () => {
|
|
1645
|
+
if (settled) return;
|
|
1646
|
+
settled = true;
|
|
1647
|
+
clearTimeout(timer);
|
|
1648
|
+
ws.onmessage = prevMessage;
|
|
1649
|
+
ws.onclose = prevClose;
|
|
1650
|
+
this.configSent = false;
|
|
1651
|
+
resolve();
|
|
1652
|
+
};
|
|
1653
|
+
const armQuietTimer = () => {
|
|
1654
|
+
clearTimeout(timer);
|
|
1655
|
+
timer = setTimeout(done, QUIET_TIMEOUT_MS);
|
|
1656
|
+
};
|
|
1657
|
+
armQuietTimer();
|
|
1658
|
+
ws.onmessage = (event) => {
|
|
1659
|
+
armQuietTimer();
|
|
1660
|
+
if (prevMessage) prevMessage.call(ws, event);
|
|
1661
|
+
try {
|
|
1662
|
+
const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1663
|
+
if (JSON.parse(raw).interrupted) done();
|
|
1664
|
+
} catch {
|
|
1665
|
+
}
|
|
1666
|
+
};
|
|
1667
|
+
ws.onclose = (event) => {
|
|
1668
|
+
this.ws = null;
|
|
1669
|
+
if (prevClose) prevClose.call(ws, event);
|
|
1670
|
+
done();
|
|
1671
|
+
};
|
|
1672
|
+
ws.send(JSON.stringify({ cancel: true }));
|
|
1673
|
+
});
|
|
1674
|
+
}
|
|
1594
1675
|
/**
|
|
1595
1676
|
* End the current session but keep the WebSocket connection open.
|
|
1596
1677
|
*
|
package/package.json
CHANGED
package/src/client.test.ts
CHANGED
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
10
10
|
import { KugelAudio } from './client';
|
|
11
|
+
import { RateLimitError } from './errors';
|
|
11
12
|
|
|
12
13
|
// ---------------------------------------------------------------------------
|
|
13
14
|
// Minimal WebSocket mock
|
|
@@ -333,6 +334,10 @@ function makeGenerationStartedMsg(chunkId: number, text: string): string {
|
|
|
333
334
|
});
|
|
334
335
|
}
|
|
335
336
|
|
|
337
|
+
function makeInterruptedMsg(): string {
|
|
338
|
+
return JSON.stringify({ interrupted: true });
|
|
339
|
+
}
|
|
340
|
+
|
|
336
341
|
describe('StreamingSession', () => {
|
|
337
342
|
let client: KugelAudio;
|
|
338
343
|
|
|
@@ -545,4 +550,138 @@ describe('StreamingSession', () => {
|
|
|
545
550
|
expect(session.isConnected).toBe(true);
|
|
546
551
|
expect(() => session.send('Hello.', true)).not.toThrow();
|
|
547
552
|
});
|
|
553
|
+
|
|
554
|
+
// -------------------------------------------------------------------------
|
|
555
|
+
// cancelCurrent() — barge-in (KUG-1050)
|
|
556
|
+
// -------------------------------------------------------------------------
|
|
557
|
+
|
|
558
|
+
it('cancelCurrent() sends {cancel:true}, fires onInterrupted, keeps socket open', async () => {
|
|
559
|
+
const interruptedCalls: number[] = [];
|
|
560
|
+
|
|
561
|
+
const session = client.tts.streamingSession(
|
|
562
|
+
{ voiceId: 1 },
|
|
563
|
+
{ onInterrupted: () => interruptedCalls.push(1) },
|
|
564
|
+
);
|
|
565
|
+
|
|
566
|
+
session.connect();
|
|
567
|
+
await new Promise<void>((r) => setTimeout(r, 10));
|
|
568
|
+
|
|
569
|
+
session.send('A very long sentence the user is about to talk over.');
|
|
570
|
+
mockWs.onmessage?.({ data: makeAudioMsg(0, 100) });
|
|
571
|
+
|
|
572
|
+
const cancelPromise = session.cancelCurrent();
|
|
573
|
+
|
|
574
|
+
// The barge-in frame was sent to the server.
|
|
575
|
+
const lastSent = JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string);
|
|
576
|
+
expect(lastSent.cancel).toBe(true);
|
|
577
|
+
|
|
578
|
+
// Server acks the barge-in.
|
|
579
|
+
mockWs.onmessage?.({ data: makeInterruptedMsg() });
|
|
580
|
+
await cancelPromise;
|
|
581
|
+
|
|
582
|
+
// onInterrupted fired and the socket stayed open for the next turn.
|
|
583
|
+
expect(interruptedCalls).toHaveLength(1);
|
|
584
|
+
expect(session.isConnected).toBe(true);
|
|
585
|
+
expect(mockWs.close).not.toHaveBeenCalled();
|
|
586
|
+
});
|
|
587
|
+
|
|
588
|
+
it('cancelCurrent() re-sends config on the next send (fresh server session)', async () => {
|
|
589
|
+
const session = client.tts.streamingSession({ voiceId: 42 }, {});
|
|
590
|
+
|
|
591
|
+
session.connect();
|
|
592
|
+
await new Promise<void>((r) => setTimeout(r, 10));
|
|
593
|
+
|
|
594
|
+
// First send carries config (voice_id).
|
|
595
|
+
session.send('Hello.');
|
|
596
|
+
expect(JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string).voice_id).toBe(42);
|
|
597
|
+
|
|
598
|
+
const cancelPromise = session.cancelCurrent();
|
|
599
|
+
mockWs.onmessage?.({ data: makeInterruptedMsg() });
|
|
600
|
+
await cancelPromise;
|
|
601
|
+
|
|
602
|
+
// The server started a fresh session, so the next send must re-send config.
|
|
603
|
+
session.send('Next turn.');
|
|
604
|
+
expect(JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string).voice_id).toBe(42);
|
|
605
|
+
});
|
|
606
|
+
|
|
607
|
+
it('cancelCurrent() resolves on quiet timeout if server never acks', async () => {
|
|
608
|
+
const session = client.tts.streamingSession({ voiceId: 1 }, {});
|
|
609
|
+
|
|
610
|
+
session.connect();
|
|
611
|
+
await new Promise<void>((r) => setTimeout(r, 10));
|
|
612
|
+
session.send('Hello.');
|
|
613
|
+
|
|
614
|
+
vi.useFakeTimers();
|
|
615
|
+
const cancelPromise = session.cancelCurrent();
|
|
616
|
+
|
|
617
|
+
// No interrupted ack — the 5 s quiet timeout resolves it.
|
|
618
|
+
await vi.advanceTimersByTimeAsync(6_000);
|
|
619
|
+
await cancelPromise;
|
|
620
|
+
|
|
621
|
+
vi.useRealTimers();
|
|
622
|
+
// Socket was never closed; still reusable.
|
|
623
|
+
expect(session.isConnected).toBe(true);
|
|
624
|
+
});
|
|
625
|
+
});
|
|
626
|
+
|
|
627
|
+
// ---------------------------------------------------------------------------
|
|
628
|
+
// MultiContextSession barge-in — closeContext immediate (KUG-1050)
|
|
629
|
+
// ---------------------------------------------------------------------------
|
|
630
|
+
|
|
631
|
+
describe('MultiContextSession closeContext', () => {
|
|
632
|
+
let client: KugelAudio;
|
|
633
|
+
|
|
634
|
+
beforeEach(() => {
|
|
635
|
+
client = new KugelAudio({ apiKey: 'test-key-xxx' });
|
|
636
|
+
});
|
|
637
|
+
|
|
638
|
+
it('closeContext(id, true) sends the immediate barge-in flag', async () => {
|
|
639
|
+
const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
|
|
640
|
+
await session.connect({});
|
|
641
|
+
|
|
642
|
+
session.closeContext('ctx1', true);
|
|
643
|
+
|
|
644
|
+
const sent = JSON.parse(
|
|
645
|
+
mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
|
|
646
|
+
);
|
|
647
|
+
expect(sent.close_context).toBe(true);
|
|
648
|
+
expect(sent.context_id).toBe('ctx1');
|
|
649
|
+
expect(sent.immediate).toBe(true);
|
|
650
|
+
});
|
|
651
|
+
|
|
652
|
+
it('closeContext(id) omits immediate (graceful drain)', async () => {
|
|
653
|
+
const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
|
|
654
|
+
await session.connect({});
|
|
655
|
+
|
|
656
|
+
session.closeContext('ctx1');
|
|
657
|
+
|
|
658
|
+
const sent = JSON.parse(
|
|
659
|
+
mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
|
|
660
|
+
);
|
|
661
|
+
expect(sent.close_context).toBe(true);
|
|
662
|
+
expect(sent.immediate).toBeUndefined();
|
|
663
|
+
});
|
|
664
|
+
|
|
665
|
+
it('maps ingress context-cap errors to typed callback errors', async () => {
|
|
666
|
+
const errors: Array<{ contextId?: string; error: Error }> = [];
|
|
667
|
+
const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
|
|
668
|
+
await session.connect({
|
|
669
|
+
onError: (error, contextId) => errors.push({ contextId, error }),
|
|
670
|
+
});
|
|
671
|
+
|
|
672
|
+
mockWs.onmessage?.({
|
|
673
|
+
data: JSON.stringify({
|
|
674
|
+
error: 'Too many concurrent contexts',
|
|
675
|
+
error_code: 'TOO_MANY_CONTEXTS',
|
|
676
|
+
code: 429,
|
|
677
|
+
context_id: 'ctx1',
|
|
678
|
+
}),
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
expect(errors).toHaveLength(1);
|
|
682
|
+
expect(errors[0].contextId).toBe('ctx1');
|
|
683
|
+
expect(errors[0].error).toBeInstanceOf(RateLimitError);
|
|
684
|
+
expect((errors[0].error as RateLimitError).statusCode).toBe(429);
|
|
685
|
+
expect((errors[0].error as RateLimitError).errorCode).toBe('TOO_MANY_CONTEXTS');
|
|
686
|
+
});
|
|
548
687
|
});
|
package/src/client.ts
CHANGED
|
@@ -873,7 +873,7 @@ class TTSResource {
|
|
|
873
873
|
}
|
|
874
874
|
}
|
|
875
875
|
|
|
876
|
-
private parseError(data: { error?: string; error_code?: string; retry_after?: number }): Error {
|
|
876
|
+
private parseError(data: { error?: string; error_code?: string; code?: number; retry_after?: number }): Error {
|
|
877
877
|
return classifyWsFrame(data);
|
|
878
878
|
}
|
|
879
879
|
|
|
@@ -1019,7 +1019,7 @@ class MultiContextSession {
|
|
|
1019
1019
|
|
|
1020
1020
|
if (data.error) {
|
|
1021
1021
|
this.callbacks.onError?.(
|
|
1022
|
-
|
|
1022
|
+
classifyWsFrame(data),
|
|
1023
1023
|
data.context_id
|
|
1024
1024
|
);
|
|
1025
1025
|
return;
|
|
@@ -1195,14 +1195,22 @@ class MultiContextSession {
|
|
|
1195
1195
|
|
|
1196
1196
|
/**
|
|
1197
1197
|
* Close a specific context.
|
|
1198
|
+
*
|
|
1199
|
+
* @param contextId - The context to close.
|
|
1200
|
+
* @param immediate - When `true`, **barge-in**: the server cancels the
|
|
1201
|
+
* context's in-flight generation immediately and discards any buffered or
|
|
1202
|
+
* queued text instead of draining it. Use this when the end user speaks
|
|
1203
|
+
* over the agent. When `false` (default), queued sentences finish first.
|
|
1198
1204
|
*/
|
|
1199
|
-
closeContext(contextId: string): void {
|
|
1205
|
+
closeContext(contextId: string, immediate = false): void {
|
|
1200
1206
|
if (!this.ws || this.ws.readyState !== WS_OPEN) return;
|
|
1201
1207
|
|
|
1202
|
-
|
|
1208
|
+
const msg: Record<string, unknown> = {
|
|
1203
1209
|
close_context: true,
|
|
1204
1210
|
context_id: contextId,
|
|
1205
|
-
}
|
|
1211
|
+
};
|
|
1212
|
+
if (immediate) msg.immediate = true;
|
|
1213
|
+
this.ws.send(JSON.stringify(msg));
|
|
1206
1214
|
}
|
|
1207
1215
|
|
|
1208
1216
|
/**
|
|
@@ -1362,6 +1370,10 @@ class StreamingSession {
|
|
|
1362
1370
|
this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? '');
|
|
1363
1371
|
}
|
|
1364
1372
|
|
|
1373
|
+
if (data.interrupted) {
|
|
1374
|
+
this.callbacks.onInterrupted?.();
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1365
1377
|
if (data.session_closed) {
|
|
1366
1378
|
this.callbacks.onSessionClosed?.(
|
|
1367
1379
|
data.total_audio_seconds ?? 0,
|
|
@@ -1461,6 +1473,97 @@ class StreamingSession {
|
|
|
1461
1473
|
this.ws.send(JSON.stringify(msg));
|
|
1462
1474
|
}
|
|
1463
1475
|
|
|
1476
|
+
/**
|
|
1477
|
+
* Interrupt (barge-in) the current generation without closing the socket.
|
|
1478
|
+
*
|
|
1479
|
+
* Use this when the end user starts speaking over the agent: it tells the
|
|
1480
|
+
* server to **stop generating audio for the current turn immediately** and
|
|
1481
|
+
* drop any text that was buffered or queued but not yet spoken. Unlike
|
|
1482
|
+
* {@link endSession}, no remaining text is flushed — the turn is abandoned.
|
|
1483
|
+
*
|
|
1484
|
+
* The WebSocket stays open and a fresh session is ready, so you can call
|
|
1485
|
+
* {@link send} for the next user turn right away (config is re-sent
|
|
1486
|
+
* automatically on that first `send`).
|
|
1487
|
+
*
|
|
1488
|
+
* The returned promise resolves once the server acknowledges with an
|
|
1489
|
+
* `interrupted` frame (which also fires
|
|
1490
|
+
* {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
|
|
1491
|
+
* timeout — i.e. 5 s elapse without any server message arriving. The timer
|
|
1492
|
+
* resets on every incoming frame, so a few in-flight audio chunks still
|
|
1493
|
+
* draining at the moment of cancellation do not trip it prematurely.
|
|
1494
|
+
*
|
|
1495
|
+
* @example
|
|
1496
|
+
* ```typescript
|
|
1497
|
+
* // VAD detected the user speaking over the agent:
|
|
1498
|
+
* await session.cancelCurrent();
|
|
1499
|
+
* // Socket is still open — start the next turn immediately:
|
|
1500
|
+
* session.send(nextLlmToken);
|
|
1501
|
+
* ```
|
|
1502
|
+
*/
|
|
1503
|
+
cancelCurrent(): Promise<void> {
|
|
1504
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
|
|
1505
|
+
|
|
1506
|
+
const ws = this.ws;
|
|
1507
|
+
// Quiet timeout: resets on every incoming server message. Trips only
|
|
1508
|
+
// when the server has been silent for this long. A short window is fine
|
|
1509
|
+
// here because the server cancels in-flight generation promptly; we only
|
|
1510
|
+
// need to outlast a handful of already-emitted audio frames in transit.
|
|
1511
|
+
const QUIET_TIMEOUT_MS = 5_000;
|
|
1512
|
+
|
|
1513
|
+
return new Promise<void>((resolve) => {
|
|
1514
|
+
let settled = false;
|
|
1515
|
+
let timer: ReturnType<typeof setTimeout>;
|
|
1516
|
+
|
|
1517
|
+
const prevMessage = ws.onmessage;
|
|
1518
|
+
const prevClose = ws.onclose;
|
|
1519
|
+
|
|
1520
|
+
const done = () => {
|
|
1521
|
+
if (settled) return;
|
|
1522
|
+
settled = true;
|
|
1523
|
+
clearTimeout(timer);
|
|
1524
|
+
// Restore the original handlers so subsequent calls don't stack
|
|
1525
|
+
// wrappers and the typed-error onclose installed by connect() stays
|
|
1526
|
+
// in effect for the next turn.
|
|
1527
|
+
ws.onmessage = prevMessage;
|
|
1528
|
+
ws.onclose = prevClose;
|
|
1529
|
+
// The server starts a fresh session after a cancel, so the next
|
|
1530
|
+
// send() must re-send config.
|
|
1531
|
+
this.configSent = false;
|
|
1532
|
+
resolve();
|
|
1533
|
+
};
|
|
1534
|
+
|
|
1535
|
+
const armQuietTimer = () => {
|
|
1536
|
+
clearTimeout(timer);
|
|
1537
|
+
timer = setTimeout(done, QUIET_TIMEOUT_MS);
|
|
1538
|
+
};
|
|
1539
|
+
|
|
1540
|
+
armQuietTimer();
|
|
1541
|
+
|
|
1542
|
+
ws.onmessage = (event: MessageEvent) => {
|
|
1543
|
+
// Reset the quiet timer on EVERY incoming frame — late audio chunks
|
|
1544
|
+
// from the cancelled turn count as liveness, not just the ack.
|
|
1545
|
+
armQuietTimer();
|
|
1546
|
+
if (prevMessage) prevMessage.call(ws, event);
|
|
1547
|
+
try {
|
|
1548
|
+
const raw = typeof event.data === 'string'
|
|
1549
|
+
? event.data
|
|
1550
|
+
: event.data instanceof Buffer
|
|
1551
|
+
? event.data.toString()
|
|
1552
|
+
: String(event.data);
|
|
1553
|
+
if (JSON.parse(raw).interrupted) done();
|
|
1554
|
+
} catch { /* ignore parse errors */ }
|
|
1555
|
+
};
|
|
1556
|
+
|
|
1557
|
+
ws.onclose = (event: CloseEvent) => {
|
|
1558
|
+
this.ws = null;
|
|
1559
|
+
if (prevClose) prevClose.call(ws, event);
|
|
1560
|
+
done();
|
|
1561
|
+
};
|
|
1562
|
+
|
|
1563
|
+
ws.send(JSON.stringify({ cancel: true }));
|
|
1564
|
+
});
|
|
1565
|
+
}
|
|
1566
|
+
|
|
1464
1567
|
/**
|
|
1465
1568
|
* End the current session but keep the WebSocket connection open.
|
|
1466
1569
|
*
|
package/src/errors.ts
CHANGED
|
@@ -17,6 +17,8 @@ export const ErrorCodes = {
|
|
|
17
17
|
VALIDATION: 'VALIDATION_ERROR',
|
|
18
18
|
INTERNAL: 'INTERNAL_ERROR',
|
|
19
19
|
NOT_FOUND: 'NOT_FOUND',
|
|
20
|
+
MISSING_VOICE_ID: 'MISSING_VOICE_ID',
|
|
21
|
+
TOO_MANY_CONTEXTS: 'TOO_MANY_CONTEXTS',
|
|
20
22
|
} as const;
|
|
21
23
|
export type ErrorCode = typeof ErrorCodes[keyof typeof ErrorCodes];
|
|
22
24
|
|
|
@@ -175,10 +177,18 @@ function build(
|
|
|
175
177
|
if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
|
|
176
178
|
return new InsufficientCreditsError(message || undefined, common);
|
|
177
179
|
}
|
|
178
|
-
if (
|
|
180
|
+
if (
|
|
181
|
+
errorCode === ErrorCodes.RATE_LIMITED ||
|
|
182
|
+
errorCode === ErrorCodes.TOO_MANY_CONTEXTS ||
|
|
183
|
+
status === 429
|
|
184
|
+
) {
|
|
179
185
|
return new RateLimitError(message || undefined, common);
|
|
180
186
|
}
|
|
181
|
-
if (
|
|
187
|
+
if (
|
|
188
|
+
errorCode === ErrorCodes.VALIDATION ||
|
|
189
|
+
errorCode === ErrorCodes.MISSING_VOICE_ID ||
|
|
190
|
+
status === 400
|
|
191
|
+
) {
|
|
182
192
|
return new ValidationError(message || 'Request validation failed.', common);
|
|
183
193
|
}
|
|
184
194
|
if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
|
|
@@ -263,17 +273,19 @@ export function classifyHttpError(
|
|
|
263
273
|
|
|
264
274
|
/**
|
|
265
275
|
* Build a `KugelAudioError` from a server-sent WebSocket error frame
|
|
266
|
-
* (`{error, error_code,
|
|
276
|
+
* (`{error, error_code, code}`).
|
|
267
277
|
*/
|
|
268
278
|
export function classifyWsFrame(data: {
|
|
269
279
|
error?: string;
|
|
270
280
|
error_code?: string;
|
|
281
|
+
code?: number;
|
|
271
282
|
retry_after?: number;
|
|
272
283
|
}): KugelAudioError {
|
|
273
284
|
const errorCode = data.error_code;
|
|
274
285
|
const message = data.error ?? 'Server reported an error.';
|
|
286
|
+
const status = typeof data.code === 'number' ? data.code : undefined;
|
|
275
287
|
const retryAfter = typeof data.retry_after === 'number' ? data.retry_after : undefined;
|
|
276
|
-
return build(
|
|
288
|
+
return build(status, errorCode, message, { retryAfter });
|
|
277
289
|
}
|
|
278
290
|
|
|
279
291
|
/**
|
package/src/types.ts
CHANGED
|
@@ -408,6 +408,13 @@ export interface StreamingSessionCallbacks {
|
|
|
408
408
|
onGenerationStarted?: (chunkId: number, text: string) => void;
|
|
409
409
|
/** Called when word-level timestamps arrive (requires `wordTimestamps: true`). */
|
|
410
410
|
onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
|
|
411
|
+
/**
|
|
412
|
+
* Called when the server acknowledges a barge-in
|
|
413
|
+
* ({@link StreamingSession.cancelCurrent}). After this fires, no further
|
|
414
|
+
* audio chunks from the cancelled turn will arrive and the session is
|
|
415
|
+
* ready for the next `send()`.
|
|
416
|
+
*/
|
|
417
|
+
onInterrupted?: () => void;
|
|
411
418
|
/** Called on any error. */
|
|
412
419
|
onError?: (error: Error) => void;
|
|
413
420
|
}
|