kugelaudio 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -333,6 +333,10 @@ function makeGenerationStartedMsg(chunkId: number, text: string): string {
333
333
  });
334
334
  }
335
335
 
336
+ function makeInterruptedMsg(): string {
337
+ return JSON.stringify({ interrupted: true });
338
+ }
339
+
336
340
  describe('StreamingSession', () => {
337
341
  let client: KugelAudio;
338
342
 
@@ -545,4 +549,115 @@ describe('StreamingSession', () => {
545
549
  expect(session.isConnected).toBe(true);
546
550
  expect(() => session.send('Hello.', true)).not.toThrow();
547
551
  });
552
+
553
+ // -------------------------------------------------------------------------
554
+ // cancelCurrent() — barge-in (KUG-1050)
555
+ // -------------------------------------------------------------------------
556
+
557
+ it('cancelCurrent() sends {cancel:true}, fires onInterrupted, keeps socket open', async () => {
558
+ const interruptedCalls: number[] = [];
559
+
560
+ const session = client.tts.streamingSession(
561
+ { voiceId: 1 },
562
+ { onInterrupted: () => interruptedCalls.push(1) },
563
+ );
564
+
565
+ session.connect();
566
+ await new Promise<void>((r) => setTimeout(r, 10));
567
+
568
+ session.send('A very long sentence the user is about to talk over.');
569
+ mockWs.onmessage?.({ data: makeAudioMsg(0, 100) });
570
+
571
+ const cancelPromise = session.cancelCurrent();
572
+
573
+ // The barge-in frame was sent to the server.
574
+ const lastSent = JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string);
575
+ expect(lastSent.cancel).toBe(true);
576
+
577
+ // Server acks the barge-in.
578
+ mockWs.onmessage?.({ data: makeInterruptedMsg() });
579
+ await cancelPromise;
580
+
581
+ // onInterrupted fired and the socket stayed open for the next turn.
582
+ expect(interruptedCalls).toHaveLength(1);
583
+ expect(session.isConnected).toBe(true);
584
+ expect(mockWs.close).not.toHaveBeenCalled();
585
+ });
586
+
587
+ it('cancelCurrent() re-sends config on the next send (fresh server session)', async () => {
588
+ const session = client.tts.streamingSession({ voiceId: 42 }, {});
589
+
590
+ session.connect();
591
+ await new Promise<void>((r) => setTimeout(r, 10));
592
+
593
+ // First send carries config (voice_id).
594
+ session.send('Hello.');
595
+ expect(JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string).voice_id).toBe(42);
596
+
597
+ const cancelPromise = session.cancelCurrent();
598
+ mockWs.onmessage?.({ data: makeInterruptedMsg() });
599
+ await cancelPromise;
600
+
601
+ // The server started a fresh session, so the next send must re-send config.
602
+ session.send('Next turn.');
603
+ expect(JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string).voice_id).toBe(42);
604
+ });
605
+
606
+ it('cancelCurrent() resolves on quiet timeout if server never acks', async () => {
607
+ const session = client.tts.streamingSession({ voiceId: 1 }, {});
608
+
609
+ session.connect();
610
+ await new Promise<void>((r) => setTimeout(r, 10));
611
+ session.send('Hello.');
612
+
613
+ vi.useFakeTimers();
614
+ const cancelPromise = session.cancelCurrent();
615
+
616
+ // No interrupted ack — the 5 s quiet timeout resolves it.
617
+ await vi.advanceTimersByTimeAsync(6_000);
618
+ await cancelPromise;
619
+
620
+ vi.useRealTimers();
621
+ // Socket was never closed; still reusable.
622
+ expect(session.isConnected).toBe(true);
623
+ });
624
+ });
625
+
626
+ // ---------------------------------------------------------------------------
627
+ // MultiContextSession barge-in — closeContext immediate (KUG-1050)
628
+ // ---------------------------------------------------------------------------
629
+
630
+ describe('MultiContextSession closeContext', () => {
631
+ let client: KugelAudio;
632
+
633
+ beforeEach(() => {
634
+ client = new KugelAudio({ apiKey: 'test-key-xxx' });
635
+ });
636
+
637
+ it('closeContext(id, true) sends the immediate barge-in flag', async () => {
638
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
639
+ await session.connect({});
640
+
641
+ session.closeContext('ctx1', true);
642
+
643
+ const sent = JSON.parse(
644
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
645
+ );
646
+ expect(sent.close_context).toBe(true);
647
+ expect(sent.context_id).toBe('ctx1');
648
+ expect(sent.immediate).toBe(true);
649
+ });
650
+
651
+ it('closeContext(id) omits immediate (graceful drain)', async () => {
652
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
653
+ await session.connect({});
654
+
655
+ session.closeContext('ctx1');
656
+
657
+ const sent = JSON.parse(
658
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
659
+ );
660
+ expect(sent.close_context).toBe(true);
661
+ expect(sent.immediate).toBeUndefined();
662
+ });
548
663
  });
package/src/client.ts CHANGED
@@ -2,6 +2,7 @@
2
2
  * KugelAudio API Client.
3
3
  */
4
4
 
5
+ import { DictionariesResource } from './dictionaries';
5
6
  import {
6
7
  ConnectionError,
7
8
  KugelAudioError,
@@ -1194,14 +1195,22 @@ class MultiContextSession {
1194
1195
 
1195
1196
  /**
1196
1197
  * Close a specific context.
1198
+ *
1199
+ * @param contextId - The context to close.
1200
+ * @param immediate - When `true`, **barge-in**: the server cancels the
1201
+ * context's in-flight generation immediately and discards any buffered or
1202
+ * queued text instead of draining it. Use this when the end user speaks
1203
+ * over the agent. When `false` (default), queued sentences finish first.
1197
1204
  */
1198
- closeContext(contextId: string): void {
1205
+ closeContext(contextId: string, immediate = false): void {
1199
1206
  if (!this.ws || this.ws.readyState !== WS_OPEN) return;
1200
1207
 
1201
- this.ws.send(JSON.stringify({
1208
+ const msg: Record<string, unknown> = {
1202
1209
  close_context: true,
1203
1210
  context_id: contextId,
1204
- }));
1211
+ };
1212
+ if (immediate) msg.immediate = true;
1213
+ this.ws.send(JSON.stringify(msg));
1205
1214
  }
1206
1215
 
1207
1216
  /**
@@ -1361,6 +1370,10 @@ class StreamingSession {
1361
1370
  this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? '');
1362
1371
  }
1363
1372
 
1373
+ if (data.interrupted) {
1374
+ this.callbacks.onInterrupted?.();
1375
+ }
1376
+
1364
1377
  if (data.session_closed) {
1365
1378
  this.callbacks.onSessionClosed?.(
1366
1379
  data.total_audio_seconds ?? 0,
@@ -1460,6 +1473,97 @@ class StreamingSession {
1460
1473
  this.ws.send(JSON.stringify(msg));
1461
1474
  }
1462
1475
 
1476
+ /**
1477
+ * Interrupt (barge-in) the current generation without closing the socket.
1478
+ *
1479
+ * Use this when the end user starts speaking over the agent: it tells the
1480
+ * server to **stop generating audio for the current turn immediately** and
1481
+ * drop any text that was buffered or queued but not yet spoken. Unlike
1482
+ * {@link endSession}, no remaining text is flushed — the turn is abandoned.
1483
+ *
1484
+ * The WebSocket stays open and a fresh session is ready, so you can call
1485
+ * {@link send} for the next user turn right away (config is re-sent
1486
+ * automatically on that first `send`).
1487
+ *
1488
+ * The returned promise resolves once the server acknowledges with an
1489
+ * `interrupted` frame (which also fires
1490
+ * {@link StreamingSessionCallbacks.onInterrupted}), or after a 5 s **quiet**
1491
+ * timeout — i.e. 5 s elapse without any server message arriving. The timer
1492
+ * resets on every incoming frame, so a few in-flight audio chunks still
1493
+ * draining at the moment of cancellation do not trip it prematurely.
1494
+ *
1495
+ * @example
1496
+ * ```typescript
1497
+ * // VAD detected the user speaking over the agent:
1498
+ * await session.cancelCurrent();
1499
+ * // Socket is still open — start the next turn immediately:
1500
+ * session.send(nextLlmToken);
1501
+ * ```
1502
+ */
1503
+ cancelCurrent(): Promise<void> {
1504
+ if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
1505
+
1506
+ const ws = this.ws;
1507
+ // Quiet timeout: resets on every incoming server message. Trips only
1508
+ // when the server has been silent for this long. A short window is fine
1509
+ // here because the server cancels in-flight generation promptly; we only
1510
+ // need to outlast a handful of already-emitted audio frames in transit.
1511
+ const QUIET_TIMEOUT_MS = 5_000;
1512
+
1513
+ return new Promise<void>((resolve) => {
1514
+ let settled = false;
1515
+ let timer: ReturnType<typeof setTimeout>;
1516
+
1517
+ const prevMessage = ws.onmessage;
1518
+ const prevClose = ws.onclose;
1519
+
1520
+ const done = () => {
1521
+ if (settled) return;
1522
+ settled = true;
1523
+ clearTimeout(timer);
1524
+ // Restore the original handlers so subsequent calls don't stack
1525
+ // wrappers and the typed-error onclose installed by connect() stays
1526
+ // in effect for the next turn.
1527
+ ws.onmessage = prevMessage;
1528
+ ws.onclose = prevClose;
1529
+ // The server starts a fresh session after a cancel, so the next
1530
+ // send() must re-send config.
1531
+ this.configSent = false;
1532
+ resolve();
1533
+ };
1534
+
1535
+ const armQuietTimer = () => {
1536
+ clearTimeout(timer);
1537
+ timer = setTimeout(done, QUIET_TIMEOUT_MS);
1538
+ };
1539
+
1540
+ armQuietTimer();
1541
+
1542
+ ws.onmessage = (event: MessageEvent) => {
1543
+ // Reset the quiet timer on EVERY incoming frame — late audio chunks
1544
+ // from the cancelled turn count as liveness, not just the ack.
1545
+ armQuietTimer();
1546
+ if (prevMessage) prevMessage.call(ws, event);
1547
+ try {
1548
+ const raw = typeof event.data === 'string'
1549
+ ? event.data
1550
+ : event.data instanceof Buffer
1551
+ ? event.data.toString()
1552
+ : String(event.data);
1553
+ if (JSON.parse(raw).interrupted) done();
1554
+ } catch { /* ignore parse errors */ }
1555
+ };
1556
+
1557
+ ws.onclose = (event: CloseEvent) => {
1558
+ this.ws = null;
1559
+ if (prevClose) prevClose.call(ws, event);
1560
+ done();
1561
+ };
1562
+
1563
+ ws.send(JSON.stringify({ cancel: true }));
1564
+ });
1565
+ }
1566
+
1463
1567
  /**
1464
1568
  * End the current session but keep the WebSocket connection open.
1465
1569
  *
@@ -1614,6 +1718,8 @@ export class KugelAudio {
1614
1718
  public readonly models: ModelsResource;
1615
1719
  /** Voices resource */
1616
1720
  public readonly voices: VoicesResource;
1721
+ /** Custom dictionaries resource */
1722
+ public readonly dictionaries: DictionariesResource;
1617
1723
  /** TTS resource */
1618
1724
  public readonly tts: TTSResource;
1619
1725
 
@@ -1656,6 +1762,7 @@ export class KugelAudio {
1656
1762
 
1657
1763
  this.models = new ModelsResource(this);
1658
1764
  this.voices = new VoicesResource(this);
1765
+ this.dictionaries = new DictionariesResource(this);
1659
1766
  this.tts = new TTSResource(this);
1660
1767
  }
1661
1768
 
@@ -0,0 +1,212 @@
1
+ /**
2
+ * Tests for the dictionaries resource.
3
+ *
4
+ * Stubs the global `fetch` to verify the SDK sends the right method,
5
+ * URL, params, and JSON body for each CRUD path, and that snake_case
6
+ * server responses get mapped to camelCase SDK types.
7
+ */
8
+
9
+ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
10
+ import { KugelAudio } from './client';
11
+
12
+ function jsonResponse(body: unknown, status = 200): Response {
13
+ return new Response(JSON.stringify(body), {
14
+ status,
15
+ headers: { 'Content-Type': 'application/json' },
16
+ });
17
+ }
18
+
19
+ function dictRow(overrides: Record<string, unknown> = {}) {
20
+ return {
21
+ id: 1,
22
+ project_id: 42,
23
+ name: 'Brand names',
24
+ description: null,
25
+ language: null,
26
+ is_active: true,
27
+ created_at: '2026-01-01T00:00:00+00:00',
28
+ updated_at: '2026-01-01T00:00:00+00:00',
29
+ ...overrides,
30
+ };
31
+ }
32
+
33
+ function entryRow(overrides: Record<string, unknown> = {}) {
34
+ return {
35
+ id: 11,
36
+ dictionary_id: 1,
37
+ word: 'Kubernetes',
38
+ replacement: 'koo-ber-net-eez',
39
+ ipa: null,
40
+ case_sensitive: false,
41
+ created_at: '2026-01-01T00:00:00+00:00',
42
+ updated_at: '2026-01-01T00:00:00+00:00',
43
+ ...overrides,
44
+ };
45
+ }
46
+
47
+ let fetchMock: ReturnType<typeof vi.fn>;
48
+
49
+ beforeEach(() => {
50
+ fetchMock = vi.fn();
51
+ vi.stubGlobal('fetch', fetchMock);
52
+ });
53
+
54
+ afterEach(() => {
55
+ vi.unstubAllGlobals();
56
+ });
57
+
58
+ function makeClient() {
59
+ return new KugelAudio({ apiKey: 'test-key', apiUrl: 'https://api.example.com' });
60
+ }
61
+
62
+ describe('Dictionaries CRUD', () => {
63
+ it('lists dictionaries', async () => {
64
+ fetchMock.mockResolvedValueOnce(
65
+ jsonResponse({ dictionaries: [dictRow(), dictRow({ id: 2, name: 'Other' })] }),
66
+ );
67
+ const client = makeClient();
68
+ const result = await client.dictionaries.list();
69
+ expect(result).toHaveLength(2);
70
+ expect(result[0]).toMatchObject({
71
+ id: 1,
72
+ projectId: 42,
73
+ name: 'Brand names',
74
+ isActive: true,
75
+ });
76
+ const [url, init] = fetchMock.mock.calls[0];
77
+ expect(url).toBe('https://api.example.com/v1/dictionaries');
78
+ expect(init.method).toBe('GET');
79
+ });
80
+
81
+ it('passes project_id as a query param when supplied', async () => {
82
+ fetchMock.mockResolvedValueOnce(jsonResponse({ dictionaries: [] }));
83
+ const client = makeClient();
84
+ await client.dictionaries.list({ projectId: 99 });
85
+ expect(fetchMock.mock.calls[0][0]).toBe(
86
+ 'https://api.example.com/v1/dictionaries?project_id=99',
87
+ );
88
+ });
89
+
90
+ it('creates a dictionary', async () => {
91
+ fetchMock.mockResolvedValueOnce(jsonResponse(dictRow({ id: 7, name: 'Glossary' })));
92
+ const client = makeClient();
93
+ const d = await client.dictionaries.create({
94
+ name: 'Glossary',
95
+ description: 'hi',
96
+ language: 'en',
97
+ });
98
+ expect(d.id).toBe(7);
99
+ const [url, init] = fetchMock.mock.calls[0];
100
+ expect(url).toBe('https://api.example.com/v1/dictionaries');
101
+ expect(init.method).toBe('POST');
102
+ expect(JSON.parse(init.body as string)).toEqual({
103
+ name: 'Glossary',
104
+ description: 'hi',
105
+ language: 'en',
106
+ });
107
+ });
108
+
109
+ it('updates only provided fields', async () => {
110
+ fetchMock.mockResolvedValueOnce(jsonResponse(dictRow({ is_active: false })));
111
+ const client = makeClient();
112
+ await client.dictionaries.update(1, { isActive: false });
113
+ const [url, init] = fetchMock.mock.calls[0];
114
+ expect(url).toBe('https://api.example.com/v1/dictionaries/1');
115
+ expect(init.method).toBe('PATCH');
116
+ expect(JSON.parse(init.body as string)).toEqual({ is_active: false });
117
+ });
118
+
119
+ it('deletes a dictionary', async () => {
120
+ fetchMock.mockResolvedValueOnce(jsonResponse({ deleted: true }));
121
+ const client = makeClient();
122
+ await client.dictionaries.delete(1);
123
+ const [url, init] = fetchMock.mock.calls[0];
124
+ expect(url).toBe('https://api.example.com/v1/dictionaries/1');
125
+ expect(init.method).toBe('DELETE');
126
+ });
127
+ });
128
+
129
+ describe('Dictionary entries CRUD', () => {
130
+ it('lists entries with search + pagination', async () => {
131
+ fetchMock.mockResolvedValueOnce(
132
+ jsonResponse({
133
+ entries: [entryRow()],
134
+ total: 1,
135
+ limit: 25,
136
+ offset: 0,
137
+ }),
138
+ );
139
+ const client = makeClient();
140
+ const res = await client.dictionaries.entries.list(1, {
141
+ search: 'kub',
142
+ limit: 25,
143
+ });
144
+ expect(res.total).toBe(1);
145
+ expect(res.entries[0]).toMatchObject({
146
+ id: 11,
147
+ dictionaryId: 1,
148
+ word: 'Kubernetes',
149
+ caseSensitive: false,
150
+ });
151
+ expect(fetchMock.mock.calls[0][0]).toBe(
152
+ 'https://api.example.com/v1/dictionaries/1/entries?search=kub&limit=25',
153
+ );
154
+ });
155
+
156
+ it('adds a single entry and maps camelCase fields', async () => {
157
+ fetchMock.mockResolvedValueOnce(
158
+ jsonResponse(entryRow({ word: 'Postgres', replacement: 'post-gres' })),
159
+ );
160
+ const client = makeClient();
161
+ const e = await client.dictionaries.entries.add(1, {
162
+ word: 'Postgres',
163
+ replacement: 'post-gres',
164
+ caseSensitive: true,
165
+ });
166
+ expect(e.word).toBe('Postgres');
167
+ const [, init] = fetchMock.mock.calls[0];
168
+ expect(JSON.parse(init.body as string)).toEqual({
169
+ word: 'Postgres',
170
+ replacement: 'post-gres',
171
+ case_sensitive: true,
172
+ });
173
+ });
174
+
175
+ it('updates a single entry', async () => {
176
+ fetchMock.mockResolvedValueOnce(jsonResponse(entryRow({ replacement: 'new' })));
177
+ const client = makeClient();
178
+ await client.dictionaries.entries.update(1, 11, { replacement: 'new' });
179
+ const [url, init] = fetchMock.mock.calls[0];
180
+ expect(url).toBe('https://api.example.com/v1/dictionaries/1/entries/11');
181
+ expect(init.method).toBe('PATCH');
182
+ expect(JSON.parse(init.body as string)).toEqual({ replacement: 'new' });
183
+ });
184
+
185
+ it('deletes a single entry', async () => {
186
+ fetchMock.mockResolvedValueOnce(jsonResponse({ deleted: true }));
187
+ const client = makeClient();
188
+ await client.dictionaries.entries.delete(1, 11);
189
+ expect(fetchMock.mock.calls[0][1].method).toBe('DELETE');
190
+ });
191
+
192
+ it('bulk replaces entries', async () => {
193
+ fetchMock.mockResolvedValueOnce(
194
+ jsonResponse({ upserted: 2, deleted: 3, total: 2 }),
195
+ );
196
+ const client = makeClient();
197
+ const result = await client.dictionaries.entries.replaceAll(1, [
198
+ { word: 'Postgres', replacement: 'post-gres' },
199
+ { word: 'K8s', replacement: 'kubernetes', caseSensitive: true },
200
+ ]);
201
+ expect(result).toEqual({ upserted: 2, deleted: 3, total: 2 });
202
+ const [url, init] = fetchMock.mock.calls[0];
203
+ expect(url).toBe('https://api.example.com/v1/dictionaries/1/entries');
204
+ expect(init.method).toBe('PUT');
205
+ expect(JSON.parse(init.body as string)).toEqual({
206
+ entries: [
207
+ { word: 'Postgres', replacement: 'post-gres' },
208
+ { word: 'K8s', replacement: 'kubernetes', case_sensitive: true },
209
+ ],
210
+ });
211
+ });
212
+ });