kugelaudio 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## [kugelaudio-v0.6.1](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.6.0...js-sdk-v0.6.1) (2026-06-04)
2
+
3
+ ### Bug Fixes
4
+
5
+ * **python-sdk:** propagate ingress errors through SDK integrations ([#1313](https://github.com/Kugelaudio/KugelAudio/issues/1313)) ([3ae2e03](https://github.com/Kugelaudio/KugelAudio/commit/3ae2e03745b49cca0712c20d9a658c160f4b6f38))
6
+
7
+ ## [kugelaudio-v0.6.0](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.5.0...js-sdk-v0.6.0) (2026-06-01)
8
+
9
+ ### Features
10
+
11
+ * streaming barge-in (cancelCurrent) across server + JS/Python/Java SDKs ([#1210](https://github.com/Kugelaudio/KugelAudio/issues/1210)) ([341e54f](https://github.com/Kugelaudio/KugelAudio/commit/341e54f169b4dd9242272b249fca30f005bfc3b8))
12
+
1
13
  ## [kugelaudio-v0.5.0](https://github.com/Kugelaudio/KugelAudio/compare/js-sdk-v0.4.0...js-sdk-v0.5.0) (2026-05-21)
2
14
 
3
15
  ### Features
package/dist/index.d.mts CHANGED
@@ -383,6 +383,13 @@ interface StreamingSessionCallbacks {
383
383
  onGenerationStarted?: (chunkId: number, text: string) => void;
384
384
  /** Called when word-level timestamps arrive (requires `wordTimestamps: true`). */
385
385
  onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
386
+ /**
387
+ * Called when the server acknowledges a barge-in
388
+ * ({@link StreamingSession.cancelCurrent}). After this fires, no further
389
+ * audio chunks from the cancelled turn will arrive and the session is
390
+ * ready for the next `send()`.
391
+ */
392
+ onInterrupted?: () => void;
386
393
  /** Called on any error. */
387
394
  onError?: (error: Error) => void;
388
395
  }
@@ -938,8 +945,14 @@ declare class MultiContextSession {
938
945
  flush(contextId: string): void;
939
946
  /**
940
947
  * Close a specific context.
948
+ *
949
+ * @param contextId - The context to close.
950
+ * @param immediate - When `true`, **barge-in**: the server cancels the
951
+ * context's in-flight generation immediately and discards any buffered or
952
+ * queued text instead of draining it. Use this when the end user speaks
953
+ * over the agent. When `false` (default), queued sentences finish first.
941
954
  */
942
- closeContext(contextId: string): void;
955
+ closeContext(contextId: string, immediate?: boolean): void;
943
956
  /**
944
957
  * Send keep-alive to reset a context's inactivity timeout.
945
958
  */
@@ -1016,6 +1029,34 @@ declare class StreamingSession {
1016
1029
  * handle chunking via `chunkLengthSchedule` / `autoMode` instead.
1017
1030
  */
1018
1031
  send(text: string, flush?: boolean): void;
1032
+ /**
1033
+ * Interrupt (barge-in) the current generation without closing the socket.
1034
+ *
1035
+ * Use this when the end user starts speaking over the agent: it tells the
1036
+ * server to **stop generating audio for the current turn immediately** and
1037
+ * drop any text that was buffered or queued but not yet spoken. Unlike
1038
+ * {@link endSession}, no remaining text is flushed — the turn is abandoned.
1039
+ *
1040
+ * The WebSocket stays open and a fresh session is ready, so you can call
1041
+ * {@link send} for the next user turn right away (config is re-sent
1042
+ * automatically on that first `send`).
1043
+ *
1044
+ * The returned promise resolves once the server acknowledges with an
1045
+ * `interrupted` frame (which also fires
1046
+ * {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
1047
+ * timeout — i.e. 5 s elapse without any server message arriving. The timer
1048
+ * resets on every incoming frame, so a few in-flight audio chunks still
1049
+ * draining at the moment of cancellation do not trip it prematurely.
1050
+ *
1051
+ * @example
1052
+ * ```typescript
1053
+ * // VAD detected the user speaking over the agent:
1054
+ * await session.cancelCurrent();
1055
+ * // Socket is still open — start the next turn immediately:
1056
+ * session.send(nextLlmToken);
1057
+ * ```
1058
+ */
1059
+ cancelCurrent(): Promise<void>;
1019
1060
  /**
1020
1061
  * End the current session but keep the WebSocket connection open.
1021
1062
  *
@@ -1183,6 +1224,8 @@ declare const ErrorCodes: {
1183
1224
  readonly VALIDATION: "VALIDATION_ERROR";
1184
1225
  readonly INTERNAL: "INTERNAL_ERROR";
1185
1226
  readonly NOT_FOUND: "NOT_FOUND";
1227
+ readonly MISSING_VOICE_ID: "MISSING_VOICE_ID";
1228
+ readonly TOO_MANY_CONTEXTS: "TOO_MANY_CONTEXTS";
1186
1229
  };
1187
1230
  type ErrorCode = typeof ErrorCodes[keyof typeof ErrorCodes];
1188
1231
  declare const WsCloseCodes: {
@@ -1264,11 +1307,12 @@ interface HttpResponseLike {
1264
1307
  declare function classifyHttpError(status: number, bodyText: string, headers: HttpResponseLike['headers']): KugelAudioError;
1265
1308
  /**
1266
1309
  * Build a `KugelAudioError` from a server-sent WebSocket error frame
1267
- * (`{error, error_code, retry_after}`).
1310
+ * (`{error, error_code, code}`).
1268
1311
  */
1269
1312
  declare function classifyWsFrame(data: {
1270
1313
  error?: string;
1271
1314
  error_code?: string;
1315
+ code?: number;
1272
1316
  retry_after?: number;
1273
1317
  }): KugelAudioError;
1274
1318
  /**
package/dist/index.d.ts CHANGED
@@ -383,6 +383,13 @@ interface StreamingSessionCallbacks {
383
383
  onGenerationStarted?: (chunkId: number, text: string) => void;
384
384
  /** Called when word-level timestamps arrive (requires `wordTimestamps: true`). */
385
385
  onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
386
+ /**
387
+ * Called when the server acknowledges a barge-in
388
+ * ({@link StreamingSession.cancelCurrent}). After this fires, no further
389
+ * audio chunks from the cancelled turn will arrive and the session is
390
+ * ready for the next `send()`.
391
+ */
392
+ onInterrupted?: () => void;
386
393
  /** Called on any error. */
387
394
  onError?: (error: Error) => void;
388
395
  }
@@ -938,8 +945,14 @@ declare class MultiContextSession {
938
945
  flush(contextId: string): void;
939
946
  /**
940
947
  * Close a specific context.
948
+ *
949
+ * @param contextId - The context to close.
950
+ * @param immediate - When `true`, **barge-in**: the server cancels the
951
+ * context's in-flight generation immediately and discards any buffered or
952
+ * queued text instead of draining it. Use this when the end user speaks
953
+ * over the agent. When `false` (default), queued sentences finish first.
941
954
  */
942
- closeContext(contextId: string): void;
955
+ closeContext(contextId: string, immediate?: boolean): void;
943
956
  /**
944
957
  * Send keep-alive to reset a context's inactivity timeout.
945
958
  */
@@ -1016,6 +1029,34 @@ declare class StreamingSession {
1016
1029
  * handle chunking via `chunkLengthSchedule` / `autoMode` instead.
1017
1030
  */
1018
1031
  send(text: string, flush?: boolean): void;
1032
+ /**
1033
+ * Interrupt (barge-in) the current generation without closing the socket.
1034
+ *
1035
+ * Use this when the end user starts speaking over the agent: it tells the
1036
+ * server to **stop generating audio for the current turn immediately** and
1037
+ * drop any text that was buffered or queued but not yet spoken. Unlike
1038
+ * {@link endSession}, no remaining text is flushed — the turn is abandoned.
1039
+ *
1040
+ * The WebSocket stays open and a fresh session is ready, so you can call
1041
+ * {@link send} for the next user turn right away (config is re-sent
1042
+ * automatically on that first `send`).
1043
+ *
1044
+ * The returned promise resolves once the server acknowledges with an
1045
+ * `interrupted` frame (which also fires
1046
+ * {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
1047
+ * timeout — i.e. 5 s elapse without any server message arriving. The timer
1048
+ * resets on every incoming frame, so a few in-flight audio chunks still
1049
+ * draining at the moment of cancellation do not trip it prematurely.
1050
+ *
1051
+ * @example
1052
+ * ```typescript
1053
+ * // VAD detected the user speaking over the agent:
1054
+ * await session.cancelCurrent();
1055
+ * // Socket is still open — start the next turn immediately:
1056
+ * session.send(nextLlmToken);
1057
+ * ```
1058
+ */
1059
+ cancelCurrent(): Promise<void>;
1019
1060
  /**
1020
1061
  * End the current session but keep the WebSocket connection open.
1021
1062
  *
@@ -1183,6 +1224,8 @@ declare const ErrorCodes: {
1183
1224
  readonly VALIDATION: "VALIDATION_ERROR";
1184
1225
  readonly INTERNAL: "INTERNAL_ERROR";
1185
1226
  readonly NOT_FOUND: "NOT_FOUND";
1227
+ readonly MISSING_VOICE_ID: "MISSING_VOICE_ID";
1228
+ readonly TOO_MANY_CONTEXTS: "TOO_MANY_CONTEXTS";
1186
1229
  };
1187
1230
  type ErrorCode = typeof ErrorCodes[keyof typeof ErrorCodes];
1188
1231
  declare const WsCloseCodes: {
@@ -1264,11 +1307,12 @@ interface HttpResponseLike {
1264
1307
  declare function classifyHttpError(status: number, bodyText: string, headers: HttpResponseLike['headers']): KugelAudioError;
1265
1308
  /**
1266
1309
  * Build a `KugelAudioError` from a server-sent WebSocket error frame
1267
- * (`{error, error_code, retry_after}`).
1310
+ * (`{error, error_code, code}`).
1268
1311
  */
1269
1312
  declare function classifyWsFrame(data: {
1270
1313
  error?: string;
1271
1314
  error_code?: string;
1315
+ code?: number;
1272
1316
  retry_after?: number;
1273
1317
  }): KugelAudioError;
1274
1318
  /**
package/dist/index.js CHANGED
@@ -233,7 +233,9 @@ var ErrorCodes = {
233
233
  EMPTY_AUDIO: "EMPTY_AUDIO",
234
234
  VALIDATION: "VALIDATION_ERROR",
235
235
  INTERNAL: "INTERNAL_ERROR",
236
- NOT_FOUND: "NOT_FOUND"
236
+ NOT_FOUND: "NOT_FOUND",
237
+ MISSING_VOICE_ID: "MISSING_VOICE_ID",
238
+ TOO_MANY_CONTEXTS: "TOO_MANY_CONTEXTS"
237
239
  };
238
240
  var WsCloseCodes = {
239
241
  UNAUTHORIZED: 4001,
@@ -317,10 +319,10 @@ function build(status, errorCode, message, opts = {}) {
317
319
  if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
318
320
  return new InsufficientCreditsError(message || void 0, common);
319
321
  }
320
- if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
322
+ if (errorCode === ErrorCodes.RATE_LIMITED || errorCode === ErrorCodes.TOO_MANY_CONTEXTS || status === 429) {
321
323
  return new RateLimitError(message || void 0, common);
322
324
  }
323
- if (errorCode === ErrorCodes.VALIDATION || status === 400) {
325
+ if (errorCode === ErrorCodes.VALIDATION || errorCode === ErrorCodes.MISSING_VOICE_ID || status === 400) {
324
326
  return new ValidationError(message || "Request validation failed.", common);
325
327
  }
326
328
  if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
@@ -380,8 +382,9 @@ function classifyHttpError(status, bodyText, headers) {
380
382
  function classifyWsFrame(data) {
381
383
  const errorCode = data.error_code;
382
384
  const message = data.error ?? "Server reported an error.";
385
+ const status = typeof data.code === "number" ? data.code : void 0;
383
386
  const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
384
- return build(void 0, errorCode, message, { retryAfter });
387
+ return build(status, errorCode, message, { retryAfter });
385
388
  }
386
389
  function classifyWsClose(code, reason) {
387
390
  const reasonTxt = (reason ?? "").trim();
@@ -1303,7 +1306,7 @@ var MultiContextSession = class {
1303
1306
  const data = JSON.parse(messageData);
1304
1307
  if (data.error) {
1305
1308
  this.callbacks.onError?.(
1306
- new KugelAudioError(data.error),
1309
+ classifyWsFrame(data),
1307
1310
  data.context_id
1308
1311
  );
1309
1312
  return;
@@ -1438,13 +1441,21 @@ var MultiContextSession = class {
1438
1441
  }
1439
1442
  /**
1440
1443
  * Close a specific context.
1444
+ *
1445
+ * @param contextId - The context to close.
1446
+ * @param immediate - When `true`, **barge-in**: the server cancels the
1447
+ * context's in-flight generation immediately and discards any buffered or
1448
+ * queued text instead of draining it. Use this when the end user speaks
1449
+ * over the agent. When `false` (default), queued sentences finish first.
1441
1450
  */
1442
- closeContext(contextId) {
1451
+ closeContext(contextId, immediate = false) {
1443
1452
  if (!this.ws || this.ws.readyState !== WS_OPEN) return;
1444
- this.ws.send(JSON.stringify({
1453
+ const msg = {
1445
1454
  close_context: true,
1446
1455
  context_id: contextId
1447
- }));
1456
+ };
1457
+ if (immediate) msg.immediate = true;
1458
+ this.ws.send(JSON.stringify(msg));
1448
1459
  }
1449
1460
  /**
1450
1461
  * Send keep-alive to reset a context's inactivity timeout.
@@ -1549,6 +1560,9 @@ var StreamingSession = class {
1549
1560
  if (data.generation_started) {
1550
1561
  this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
1551
1562
  }
1563
+ if (data.interrupted) {
1564
+ this.callbacks.onInterrupted?.();
1565
+ }
1552
1566
  if (data.session_closed) {
1553
1567
  this.callbacks.onSessionClosed?.(
1554
1568
  data.total_audio_seconds ?? 0,
@@ -1629,6 +1643,73 @@ var StreamingSession = class {
1629
1643
  }
1630
1644
  this.ws.send(JSON.stringify(msg));
1631
1645
  }
1646
+ /**
1647
+ * Interrupt (barge-in) the current generation without closing the socket.
1648
+ *
1649
+ * Use this when the end user starts speaking over the agent: it tells the
1650
+ * server to **stop generating audio for the current turn immediately** and
1651
+ * drop any text that was buffered or queued but not yet spoken. Unlike
1652
+ * {@link endSession}, no remaining text is flushed — the turn is abandoned.
1653
+ *
1654
+ * The WebSocket stays open and a fresh session is ready, so you can call
1655
+ * {@link send} for the next user turn right away (config is re-sent
1656
+ * automatically on that first `send`).
1657
+ *
1658
+ * The returned promise resolves once the server acknowledges with an
1659
+ * `interrupted` frame (which also fires
1660
+ * {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
1661
+ * timeout — i.e. 5 s elapse without any server message arriving. The timer
1662
+ * resets on every incoming frame, so a few in-flight audio chunks still
1663
+ * draining at the moment of cancellation do not trip it prematurely.
1664
+ *
1665
+ * @example
1666
+ * ```typescript
1667
+ * // VAD detected the user speaking over the agent:
1668
+ * await session.cancelCurrent();
1669
+ * // Socket is still open — start the next turn immediately:
1670
+ * session.send(nextLlmToken);
1671
+ * ```
1672
+ */
1673
+ cancelCurrent() {
1674
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
1675
+ const ws = this.ws;
1676
+ const QUIET_TIMEOUT_MS = 5e3;
1677
+ return new Promise((resolve) => {
1678
+ let settled = false;
1679
+ let timer;
1680
+ const prevMessage = ws.onmessage;
1681
+ const prevClose = ws.onclose;
1682
+ const done = () => {
1683
+ if (settled) return;
1684
+ settled = true;
1685
+ clearTimeout(timer);
1686
+ ws.onmessage = prevMessage;
1687
+ ws.onclose = prevClose;
1688
+ this.configSent = false;
1689
+ resolve();
1690
+ };
1691
+ const armQuietTimer = () => {
1692
+ clearTimeout(timer);
1693
+ timer = setTimeout(done, QUIET_TIMEOUT_MS);
1694
+ };
1695
+ armQuietTimer();
1696
+ ws.onmessage = (event) => {
1697
+ armQuietTimer();
1698
+ if (prevMessage) prevMessage.call(ws, event);
1699
+ try {
1700
+ const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
1701
+ if (JSON.parse(raw).interrupted) done();
1702
+ } catch {
1703
+ }
1704
+ };
1705
+ ws.onclose = (event) => {
1706
+ this.ws = null;
1707
+ if (prevClose) prevClose.call(ws, event);
1708
+ done();
1709
+ };
1710
+ ws.send(JSON.stringify({ cancel: true }));
1711
+ });
1712
+ }
1632
1713
  /**
1633
1714
  * End the current session but keep the WebSocket connection open.
1634
1715
  *
package/dist/index.mjs CHANGED
@@ -195,7 +195,9 @@ var ErrorCodes = {
195
195
  EMPTY_AUDIO: "EMPTY_AUDIO",
196
196
  VALIDATION: "VALIDATION_ERROR",
197
197
  INTERNAL: "INTERNAL_ERROR",
198
- NOT_FOUND: "NOT_FOUND"
198
+ NOT_FOUND: "NOT_FOUND",
199
+ MISSING_VOICE_ID: "MISSING_VOICE_ID",
200
+ TOO_MANY_CONTEXTS: "TOO_MANY_CONTEXTS"
199
201
  };
200
202
  var WsCloseCodes = {
201
203
  UNAUTHORIZED: 4001,
@@ -279,10 +281,10 @@ function build(status, errorCode, message, opts = {}) {
279
281
  if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
280
282
  return new InsufficientCreditsError(message || void 0, common);
281
283
  }
282
- if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
284
+ if (errorCode === ErrorCodes.RATE_LIMITED || errorCode === ErrorCodes.TOO_MANY_CONTEXTS || status === 429) {
283
285
  return new RateLimitError(message || void 0, common);
284
286
  }
285
- if (errorCode === ErrorCodes.VALIDATION || status === 400) {
287
+ if (errorCode === ErrorCodes.VALIDATION || errorCode === ErrorCodes.MISSING_VOICE_ID || status === 400) {
286
288
  return new ValidationError(message || "Request validation failed.", common);
287
289
  }
288
290
  if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
@@ -342,8 +344,9 @@ function classifyHttpError(status, bodyText, headers) {
342
344
  function classifyWsFrame(data) {
343
345
  const errorCode = data.error_code;
344
346
  const message = data.error ?? "Server reported an error.";
347
+ const status = typeof data.code === "number" ? data.code : void 0;
345
348
  const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
346
- return build(void 0, errorCode, message, { retryAfter });
349
+ return build(status, errorCode, message, { retryAfter });
347
350
  }
348
351
  function classifyWsClose(code, reason) {
349
352
  const reasonTxt = (reason ?? "").trim();
@@ -1265,7 +1268,7 @@ var MultiContextSession = class {
1265
1268
  const data = JSON.parse(messageData);
1266
1269
  if (data.error) {
1267
1270
  this.callbacks.onError?.(
1268
- new KugelAudioError(data.error),
1271
+ classifyWsFrame(data),
1269
1272
  data.context_id
1270
1273
  );
1271
1274
  return;
@@ -1400,13 +1403,21 @@ var MultiContextSession = class {
1400
1403
  }
1401
1404
  /**
1402
1405
  * Close a specific context.
1406
+ *
1407
+ * @param contextId - The context to close.
1408
+ * @param immediate - When `true`, **barge-in**: the server cancels the
1409
+ * context's in-flight generation immediately and discards any buffered or
1410
+ * queued text instead of draining it. Use this when the end user speaks
1411
+ * over the agent. When `false` (default), queued sentences finish first.
1403
1412
  */
1404
- closeContext(contextId) {
1413
+ closeContext(contextId, immediate = false) {
1405
1414
  if (!this.ws || this.ws.readyState !== WS_OPEN) return;
1406
- this.ws.send(JSON.stringify({
1415
+ const msg = {
1407
1416
  close_context: true,
1408
1417
  context_id: contextId
1409
- }));
1418
+ };
1419
+ if (immediate) msg.immediate = true;
1420
+ this.ws.send(JSON.stringify(msg));
1410
1421
  }
1411
1422
  /**
1412
1423
  * Send keep-alive to reset a context's inactivity timeout.
@@ -1511,6 +1522,9 @@ var StreamingSession = class {
1511
1522
  if (data.generation_started) {
1512
1523
  this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
1513
1524
  }
1525
+ if (data.interrupted) {
1526
+ this.callbacks.onInterrupted?.();
1527
+ }
1514
1528
  if (data.session_closed) {
1515
1529
  this.callbacks.onSessionClosed?.(
1516
1530
  data.total_audio_seconds ?? 0,
@@ -1591,6 +1605,73 @@ var StreamingSession = class {
1591
1605
  }
1592
1606
  this.ws.send(JSON.stringify(msg));
1593
1607
  }
1608
+ /**
1609
+ * Interrupt (barge-in) the current generation without closing the socket.
1610
+ *
1611
+ * Use this when the end user starts speaking over the agent: it tells the
1612
+ * server to **stop generating audio for the current turn immediately** and
1613
+ * drop any text that was buffered or queued but not yet spoken. Unlike
1614
+ * {@link endSession}, no remaining text is flushed — the turn is abandoned.
1615
+ *
1616
+ * The WebSocket stays open and a fresh session is ready, so you can call
1617
+ * {@link send} for the next user turn right away (config is re-sent
1618
+ * automatically on that first `send`).
1619
+ *
1620
+ * The returned promise resolves once the server acknowledges with an
1621
+ * `interrupted` frame (which also fires
1622
+ * {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
1623
+ * timeout — i.e. 5 s elapse without any server message arriving. The timer
1624
+ * resets on every incoming frame, so a few in-flight audio chunks still
1625
+ * draining at the moment of cancellation do not trip it prematurely.
1626
+ *
1627
+ * @example
1628
+ * ```typescript
1629
+ * // VAD detected the user speaking over the agent:
1630
+ * await session.cancelCurrent();
1631
+ * // Socket is still open — start the next turn immediately:
1632
+ * session.send(nextLlmToken);
1633
+ * ```
1634
+ */
1635
+ cancelCurrent() {
1636
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
1637
+ const ws = this.ws;
1638
+ const QUIET_TIMEOUT_MS = 5e3;
1639
+ return new Promise((resolve) => {
1640
+ let settled = false;
1641
+ let timer;
1642
+ const prevMessage = ws.onmessage;
1643
+ const prevClose = ws.onclose;
1644
+ const done = () => {
1645
+ if (settled) return;
1646
+ settled = true;
1647
+ clearTimeout(timer);
1648
+ ws.onmessage = prevMessage;
1649
+ ws.onclose = prevClose;
1650
+ this.configSent = false;
1651
+ resolve();
1652
+ };
1653
+ const armQuietTimer = () => {
1654
+ clearTimeout(timer);
1655
+ timer = setTimeout(done, QUIET_TIMEOUT_MS);
1656
+ };
1657
+ armQuietTimer();
1658
+ ws.onmessage = (event) => {
1659
+ armQuietTimer();
1660
+ if (prevMessage) prevMessage.call(ws, event);
1661
+ try {
1662
+ const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
1663
+ if (JSON.parse(raw).interrupted) done();
1664
+ } catch {
1665
+ }
1666
+ };
1667
+ ws.onclose = (event) => {
1668
+ this.ws = null;
1669
+ if (prevClose) prevClose.call(ws, event);
1670
+ done();
1671
+ };
1672
+ ws.send(JSON.stringify({ cancel: true }));
1673
+ });
1674
+ }
1594
1675
  /**
1595
1676
  * End the current session but keep the WebSocket connection open.
1596
1677
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kugelaudio",
3
- "version": "0.5.0",
3
+ "version": "0.6.1",
4
4
  "description": "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",
@@ -8,6 +8,7 @@
8
8
 
9
9
  import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
10
10
  import { KugelAudio } from './client';
11
+ import { RateLimitError } from './errors';
11
12
 
12
13
  // ---------------------------------------------------------------------------
13
14
  // Minimal WebSocket mock
@@ -333,6 +334,10 @@ function makeGenerationStartedMsg(chunkId: number, text: string): string {
333
334
  });
334
335
  }
335
336
 
337
+ function makeInterruptedMsg(): string {
338
+ return JSON.stringify({ interrupted: true });
339
+ }
340
+
336
341
  describe('StreamingSession', () => {
337
342
  let client: KugelAudio;
338
343
 
@@ -545,4 +550,138 @@ describe('StreamingSession', () => {
545
550
  expect(session.isConnected).toBe(true);
546
551
  expect(() => session.send('Hello.', true)).not.toThrow();
547
552
  });
553
+
554
+ // -------------------------------------------------------------------------
555
+ // cancelCurrent() — barge-in (KUG-1050)
556
+ // -------------------------------------------------------------------------
557
+
558
+ it('cancelCurrent() sends {cancel:true}, fires onInterrupted, keeps socket open', async () => {
559
+ const interruptedCalls: number[] = [];
560
+
561
+ const session = client.tts.streamingSession(
562
+ { voiceId: 1 },
563
+ { onInterrupted: () => interruptedCalls.push(1) },
564
+ );
565
+
566
+ session.connect();
567
+ await new Promise<void>((r) => setTimeout(r, 10));
568
+
569
+ session.send('A very long sentence the user is about to talk over.');
570
+ mockWs.onmessage?.({ data: makeAudioMsg(0, 100) });
571
+
572
+ const cancelPromise = session.cancelCurrent();
573
+
574
+ // The barge-in frame was sent to the server.
575
+ const lastSent = JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string);
576
+ expect(lastSent.cancel).toBe(true);
577
+
578
+ // Server acks the barge-in.
579
+ mockWs.onmessage?.({ data: makeInterruptedMsg() });
580
+ await cancelPromise;
581
+
582
+ // onInterrupted fired and the socket stayed open for the next turn.
583
+ expect(interruptedCalls).toHaveLength(1);
584
+ expect(session.isConnected).toBe(true);
585
+ expect(mockWs.close).not.toHaveBeenCalled();
586
+ });
587
+
588
+ it('cancelCurrent() re-sends config on the next send (fresh server session)', async () => {
589
+ const session = client.tts.streamingSession({ voiceId: 42 }, {});
590
+
591
+ session.connect();
592
+ await new Promise<void>((r) => setTimeout(r, 10));
593
+
594
+ // First send carries config (voice_id).
595
+ session.send('Hello.');
596
+ expect(JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string).voice_id).toBe(42);
597
+
598
+ const cancelPromise = session.cancelCurrent();
599
+ mockWs.onmessage?.({ data: makeInterruptedMsg() });
600
+ await cancelPromise;
601
+
602
+ // The server started a fresh session, so the next send must re-send config.
603
+ session.send('Next turn.');
604
+ expect(JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string).voice_id).toBe(42);
605
+ });
606
+
607
+ it('cancelCurrent() resolves on quiet timeout if server never acks', async () => {
608
+ const session = client.tts.streamingSession({ voiceId: 1 }, {});
609
+
610
+ session.connect();
611
+ await new Promise<void>((r) => setTimeout(r, 10));
612
+ session.send('Hello.');
613
+
614
+ vi.useFakeTimers();
615
+ const cancelPromise = session.cancelCurrent();
616
+
617
+ // No interrupted ack — the 5 s quiet timeout resolves it.
618
+ await vi.advanceTimersByTimeAsync(6_000);
619
+ await cancelPromise;
620
+
621
+ vi.useRealTimers();
622
+ // Socket was never closed; still reusable.
623
+ expect(session.isConnected).toBe(true);
624
+ });
625
+ });
626
+
627
+ // ---------------------------------------------------------------------------
628
+ // MultiContextSession barge-in — closeContext immediate (KUG-1050)
629
+ // ---------------------------------------------------------------------------
630
+
631
+ describe('MultiContextSession closeContext', () => {
632
+ let client: KugelAudio;
633
+
634
+ beforeEach(() => {
635
+ client = new KugelAudio({ apiKey: 'test-key-xxx' });
636
+ });
637
+
638
+ it('closeContext(id, true) sends the immediate barge-in flag', async () => {
639
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
640
+ await session.connect({});
641
+
642
+ session.closeContext('ctx1', true);
643
+
644
+ const sent = JSON.parse(
645
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
646
+ );
647
+ expect(sent.close_context).toBe(true);
648
+ expect(sent.context_id).toBe('ctx1');
649
+ expect(sent.immediate).toBe(true);
650
+ });
651
+
652
+ it('closeContext(id) omits immediate (graceful drain)', async () => {
653
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
654
+ await session.connect({});
655
+
656
+ session.closeContext('ctx1');
657
+
658
+ const sent = JSON.parse(
659
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
660
+ );
661
+ expect(sent.close_context).toBe(true);
662
+ expect(sent.immediate).toBeUndefined();
663
+ });
664
+
665
+ it('maps ingress context-cap errors to typed callback errors', async () => {
666
+ const errors: Array<{ contextId?: string; error: Error }> = [];
667
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
668
+ await session.connect({
669
+ onError: (error, contextId) => errors.push({ contextId, error }),
670
+ });
671
+
672
+ mockWs.onmessage?.({
673
+ data: JSON.stringify({
674
+ error: 'Too many concurrent contexts',
675
+ error_code: 'TOO_MANY_CONTEXTS',
676
+ code: 429,
677
+ context_id: 'ctx1',
678
+ }),
679
+ });
680
+
681
+ expect(errors).toHaveLength(1);
682
+ expect(errors[0].contextId).toBe('ctx1');
683
+ expect(errors[0].error).toBeInstanceOf(RateLimitError);
684
+ expect((errors[0].error as RateLimitError).statusCode).toBe(429);
685
+ expect((errors[0].error as RateLimitError).errorCode).toBe('TOO_MANY_CONTEXTS');
686
+ });
548
687
  });
package/src/client.ts CHANGED
@@ -873,7 +873,7 @@ class TTSResource {
873
873
  }
874
874
  }
875
875
 
876
- private parseError(data: { error?: string; error_code?: string; retry_after?: number }): Error {
876
+ private parseError(data: { error?: string; error_code?: string; code?: number; retry_after?: number }): Error {
877
877
  return classifyWsFrame(data);
878
878
  }
879
879
 
@@ -1019,7 +1019,7 @@ class MultiContextSession {
1019
1019
 
1020
1020
  if (data.error) {
1021
1021
  this.callbacks.onError?.(
1022
- new KugelAudioError(data.error),
1022
+ classifyWsFrame(data),
1023
1023
  data.context_id
1024
1024
  );
1025
1025
  return;
@@ -1195,14 +1195,22 @@ class MultiContextSession {
1195
1195
 
1196
1196
  /**
1197
1197
  * Close a specific context.
1198
+ *
1199
+ * @param contextId - The context to close.
1200
+ * @param immediate - When `true`, **barge-in**: the server cancels the
1201
+ * context's in-flight generation immediately and discards any buffered or
1202
+ * queued text instead of draining it. Use this when the end user speaks
1203
+ * over the agent. When `false` (default), queued sentences finish first.
1198
1204
  */
1199
- closeContext(contextId: string): void {
1205
+ closeContext(contextId: string, immediate = false): void {
1200
1206
  if (!this.ws || this.ws.readyState !== WS_OPEN) return;
1201
1207
 
1202
- this.ws.send(JSON.stringify({
1208
+ const msg: Record<string, unknown> = {
1203
1209
  close_context: true,
1204
1210
  context_id: contextId,
1205
- }));
1211
+ };
1212
+ if (immediate) msg.immediate = true;
1213
+ this.ws.send(JSON.stringify(msg));
1206
1214
  }
1207
1215
 
1208
1216
  /**
@@ -1362,6 +1370,10 @@ class StreamingSession {
1362
1370
  this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? '');
1363
1371
  }
1364
1372
 
1373
+ if (data.interrupted) {
1374
+ this.callbacks.onInterrupted?.();
1375
+ }
1376
+
1365
1377
  if (data.session_closed) {
1366
1378
  this.callbacks.onSessionClosed?.(
1367
1379
  data.total_audio_seconds ?? 0,
@@ -1461,6 +1473,97 @@ class StreamingSession {
1461
1473
  this.ws.send(JSON.stringify(msg));
1462
1474
  }
1463
1475
 
1476
+ /**
1477
+ * Interrupt (barge-in) the current generation without closing the socket.
1478
+ *
1479
+ * Use this when the end user starts speaking over the agent: it tells the
1480
+ * server to **stop generating audio for the current turn immediately** and
1481
+ * drop any text that was buffered or queued but not yet spoken. Unlike
1482
+ * {@link endSession}, no remaining text is flushed — the turn is abandoned.
1483
+ *
1484
+ * The WebSocket stays open and a fresh session is ready, so you can call
1485
+ * {@link send} for the next user turn right away (config is re-sent
1486
+ * automatically on that first `send`).
1487
+ *
1488
+ * The returned promise resolves once the server acknowledges with an
1489
+ * `interrupted` frame (which also fires
1490
+ * {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
1491
+ * timeout — i.e. 5 s elapse without any server message arriving. The timer
1492
+ * resets on every incoming frame, so a few in-flight audio chunks still
1493
+ * draining at the moment of cancellation do not trip it prematurely.
1494
+ *
1495
+ * @example
1496
+ * ```typescript
1497
+ * // VAD detected the user speaking over the agent:
1498
+ * await session.cancelCurrent();
1499
+ * // Socket is still open — start the next turn immediately:
1500
+ * session.send(nextLlmToken);
1501
+ * ```
1502
+ */
1503
+ cancelCurrent(): Promise<void> {
1504
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
1505
+
1506
+ const ws = this.ws;
1507
+ // Quiet timeout: resets on every incoming server message. Trips only
1508
+ // when the server has been silent for this long. A short window is fine
1509
+ // here because the server cancels in-flight generation promptly; we only
1510
+ // need to outlast a handful of already-emitted audio frames in transit.
1511
+ const QUIET_TIMEOUT_MS = 5_000;
1512
+
1513
+ return new Promise<void>((resolve) => {
1514
+ let settled = false;
1515
+ let timer: ReturnType<typeof setTimeout>;
1516
+
1517
+ const prevMessage = ws.onmessage;
1518
+ const prevClose = ws.onclose;
1519
+
1520
+ const done = () => {
1521
+ if (settled) return;
1522
+ settled = true;
1523
+ clearTimeout(timer);
1524
+ // Restore the original handlers so subsequent calls don't stack
1525
+ // wrappers and the typed-error onclose installed by connect() stays
1526
+ // in effect for the next turn.
1527
+ ws.onmessage = prevMessage;
1528
+ ws.onclose = prevClose;
1529
+ // The server starts a fresh session after a cancel, so the next
1530
+ // send() must re-send config.
1531
+ this.configSent = false;
1532
+ resolve();
1533
+ };
1534
+
1535
+ const armQuietTimer = () => {
1536
+ clearTimeout(timer);
1537
+ timer = setTimeout(done, QUIET_TIMEOUT_MS);
1538
+ };
1539
+
1540
+ armQuietTimer();
1541
+
1542
+ ws.onmessage = (event: MessageEvent) => {
1543
+ // Reset the quiet timer on EVERY incoming frame — late audio chunks
1544
+ // from the cancelled turn count as liveness, not just the ack.
1545
+ armQuietTimer();
1546
+ if (prevMessage) prevMessage.call(ws, event);
1547
+ try {
1548
+ const raw = typeof event.data === 'string'
1549
+ ? event.data
1550
+ : event.data instanceof Buffer
1551
+ ? event.data.toString()
1552
+ : String(event.data);
1553
+ if (JSON.parse(raw).interrupted) done();
1554
+ } catch { /* ignore parse errors */ }
1555
+ };
1556
+
1557
+ ws.onclose = (event: CloseEvent) => {
1558
+ this.ws = null;
1559
+ if (prevClose) prevClose.call(ws, event);
1560
+ done();
1561
+ };
1562
+
1563
+ ws.send(JSON.stringify({ cancel: true }));
1564
+ });
1565
+ }
1566
+
1464
1567
  /**
1465
1568
  * End the current session but keep the WebSocket connection open.
1466
1569
  *
package/src/errors.ts CHANGED
@@ -17,6 +17,8 @@ export const ErrorCodes = {
17
17
  VALIDATION: 'VALIDATION_ERROR',
18
18
  INTERNAL: 'INTERNAL_ERROR',
19
19
  NOT_FOUND: 'NOT_FOUND',
20
+ MISSING_VOICE_ID: 'MISSING_VOICE_ID',
21
+ TOO_MANY_CONTEXTS: 'TOO_MANY_CONTEXTS',
20
22
  } as const;
21
23
  export type ErrorCode = typeof ErrorCodes[keyof typeof ErrorCodes];
22
24
 
@@ -175,10 +177,18 @@ function build(
175
177
  if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
176
178
  return new InsufficientCreditsError(message || undefined, common);
177
179
  }
178
- if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
180
+ if (
181
+ errorCode === ErrorCodes.RATE_LIMITED ||
182
+ errorCode === ErrorCodes.TOO_MANY_CONTEXTS ||
183
+ status === 429
184
+ ) {
179
185
  return new RateLimitError(message || undefined, common);
180
186
  }
181
- if (errorCode === ErrorCodes.VALIDATION || status === 400) {
187
+ if (
188
+ errorCode === ErrorCodes.VALIDATION ||
189
+ errorCode === ErrorCodes.MISSING_VOICE_ID ||
190
+ status === 400
191
+ ) {
182
192
  return new ValidationError(message || 'Request validation failed.', common);
183
193
  }
184
194
  if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
@@ -263,17 +273,19 @@ export function classifyHttpError(
263
273
 
264
274
  /**
265
275
  * Build a `KugelAudioError` from a server-sent WebSocket error frame
266
- * (`{error, error_code, retry_after}`).
276
+ * (`{error, error_code, code}`).
267
277
  */
268
278
  export function classifyWsFrame(data: {
269
279
  error?: string;
270
280
  error_code?: string;
281
+ code?: number;
271
282
  retry_after?: number;
272
283
  }): KugelAudioError {
273
284
  const errorCode = data.error_code;
274
285
  const message = data.error ?? 'Server reported an error.';
286
+ const status = typeof data.code === 'number' ? data.code : undefined;
275
287
  const retryAfter = typeof data.retry_after === 'number' ? data.retry_after : undefined;
276
- return build(undefined, errorCode, message, { retryAfter });
288
+ return build(status, errorCode, message, { retryAfter });
277
289
  }
278
290
 
279
291
  /**
package/src/types.ts CHANGED
@@ -408,6 +408,13 @@ export interface StreamingSessionCallbacks {
408
408
  onGenerationStarted?: (chunkId: number, text: string) => void;
409
409
  /** Called when word-level timestamps arrive (requires `wordTimestamps: true`). */
410
410
  onWordTimestamps?: (timestamps: WordTimestamp[]) => void;
411
+ /**
412
+ * Called when the server acknowledges a barge-in
413
+ * ({@link StreamingSession.cancelCurrent}). After this fires, no further
414
+ * audio chunks from the cancelled turn will arrive and the session is
415
+ * ready for the next `send()`.
416
+ */
417
+ onInterrupted?: () => void;
411
418
  /** Called on any error. */
412
419
  onError?: (error: Error) => void;
413
420
  }