kugelaudio 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,8 +7,12 @@
7
7
  */
8
8
 
9
9
  import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
10
+ import packageJson from '../package.json';
10
11
  import { KugelAudio } from './client';
11
12
  import { RateLimitError } from './errors';
13
+ import { parseSessionUsage } from './types';
14
+
15
+ const SDK_VERSION = packageJson.version;
12
16
 
13
17
  // ---------------------------------------------------------------------------
14
18
  // Minimal WebSocket mock
@@ -18,6 +22,7 @@ type WsListener = (event: { data: string }) => void;
18
22
  type WsCloseListener = (event: { code: number }) => void;
19
23
 
20
24
  interface MockWs {
25
+ url: string;
21
26
  readyState: number;
22
27
  onopen: (() => void) | null;
23
28
  onmessage: WsListener | null;
@@ -33,6 +38,7 @@ let mockWs: MockWs;
33
38
  vi.mock('./websocket', () => ({
34
39
  getWebSocket: () => {
35
40
  return class MockWebSocket {
41
+ url: string;
36
42
  readyState = 0; // CONNECTING
37
43
  onopen: (() => void) | null = null;
38
44
  onmessage: WsListener | null = null;
@@ -42,7 +48,8 @@ vi.mock('./websocket', () => ({
42
48
  close = vi.fn();
43
49
  ping = vi.fn();
44
50
 
45
- constructor() {
51
+ constructor(url: string) {
52
+ this.url = url;
46
53
  mockWs = this as unknown as MockWs;
47
54
  // Simulate async open
48
55
  setTimeout(() => {
@@ -96,6 +103,43 @@ function collectStream(stream: NodeJS.ReadableStream): Promise<Buffer> {
96
103
  // Tests
97
104
  // ---------------------------------------------------------------------------
98
105
 
106
+ describe('parseSessionUsage (/ws/tts final + session_closed)', () => {
107
+ it('parses the usage block from a /ws/tts final frame', () => {
108
+ const usage = parseSessionUsage({
109
+ final: true,
110
+ chunks: 3,
111
+ total_samples: 1000,
112
+ dur_ms: 5400,
113
+ gen_ms: 900,
114
+ rtf: 0.17,
115
+ usage: {
116
+ audio_seconds: 5.4,
117
+ characters: 142,
118
+ cost_cents: 0.49,
119
+ currency: 'eur',
120
+ model_id: 'kugel-3',
121
+ },
122
+ });
123
+ expect(usage).not.toBeNull();
124
+ expect(usage?.audioSeconds).toBe(5.4);
125
+ expect(usage?.costCents).toBe(0.49);
126
+ expect(usage?.costAvailable).toBe(true);
127
+ });
128
+
129
+ it('reports cost null (not zero) when unavailable', () => {
130
+ const usage = parseSessionUsage({
131
+ final: true,
132
+ usage: { audio_seconds: 2.0, cost_cents: null, cost_unavailable: true },
133
+ });
134
+ expect(usage?.costCents).toBeNull();
135
+ expect(usage?.costAvailable).toBe(false);
136
+ });
137
+
138
+ it('returns null when there is no usage info', () => {
139
+ expect(parseSessionUsage({ final: true, chunks: 1 })).toBeNull();
140
+ });
141
+ });
142
+
99
143
  describe('TTSResource.toReadable()', () => {
100
144
  let client: KugelAudio;
101
145
 
@@ -247,6 +291,43 @@ describe('KugelAudio multi-region', () => {
247
291
  });
248
292
  });
249
293
 
294
+ describe('KugelAudio SDK metadata', () => {
295
+ it('adds SDK metadata headers to HTTP requests', async () => {
296
+ const originalFetch = globalThis.fetch;
297
+ const fetchMock = vi.fn<typeof fetch>(async () => ({
298
+ ok: true,
299
+ json: async () => ({ models: [] }),
300
+ } as Response));
301
+ globalThis.fetch = fetchMock;
302
+
303
+ try {
304
+ const client = new KugelAudio({ apiKey: 'ka_test123' });
305
+ await client.models.list();
306
+ } finally {
307
+ globalThis.fetch = originalFetch;
308
+ }
309
+
310
+ expect(fetchMock).toHaveBeenCalledTimes(1);
311
+ const [, init] = fetchMock.mock.calls[0];
312
+ expect(init).toMatchObject({
313
+ headers: {
314
+ 'X-KugelAudio-SDK': 'js',
315
+ 'X-KugelAudio-SDK-Version': SDK_VERSION,
316
+ },
317
+ });
318
+ });
319
+
320
+ it('adds SDK metadata query params to WebSocket URLs', async () => {
321
+ const client = new KugelAudio({ apiKey: 'ka_test123' });
322
+
323
+ client.tts.toReadable({ text: 'metadata test' });
324
+ await new Promise<void>((r) => setTimeout(r, 10));
325
+
326
+ expect(mockWs.url).toContain('sdk=js');
327
+ expect(mockWs.url).toContain(`sdk_version=${SDK_VERSION}`);
328
+ });
329
+ });
330
+
250
331
  // ---------------------------------------------------------------------------
251
332
  // Keepalive ping tests
252
333
  // ---------------------------------------------------------------------------
@@ -411,6 +492,111 @@ describe('StreamingSession', () => {
411
492
  expect(sessionClosedCalls[0].totalAudioChunks).toBe(4);
412
493
  });
413
494
 
495
+ it('fires onFinal (end-of-audio) before onSessionClosed on turn end (KUG-1238)', async () => {
496
+ const order: string[] = [];
497
+ let finalStats: { totalAudioSeconds: number; totalTextChunks: number; totalAudioChunks: number } | null = null;
498
+
499
+ const session = client.tts.streamingSession(
500
+ { voiceId: 1 },
501
+ {
502
+ onFinal: (totalAudioSeconds, totalTextChunks, totalAudioChunks) => {
503
+ order.push('final');
504
+ finalStats = { totalAudioSeconds, totalTextChunks, totalAudioChunks };
505
+ },
506
+ onSessionClosed: () => order.push('session_closed'),
507
+ },
508
+ );
509
+
510
+ session.connect();
511
+ await new Promise<void>((r) => setTimeout(r, 10));
512
+ session.send('Hello.', true);
513
+
514
+ mockWs.onmessage?.({ data: makeAudioMsg(0, 100) });
515
+ mockWs.onmessage?.({ data: makeChunkCompleteMsg(0, 1.0, 100) });
516
+ mockWs.onmessage?.({
517
+ data: JSON.stringify({
518
+ final: true,
519
+ total_audio_seconds: 1.0,
520
+ total_text_chunks: 1,
521
+ total_audio_chunks: 1,
522
+ }),
523
+ });
524
+ mockWs.onmessage?.({ data: makeSessionClosedMsg(1.0, 1, 1) });
525
+
526
+ expect(order).toEqual(['final', 'session_closed']);
527
+ expect(finalStats!.totalAudioSeconds).toBe(1.0);
528
+ expect(finalStats!.totalTextChunks).toBe(1);
529
+ expect(finalStats!.totalAudioChunks).toBe(1);
530
+ });
531
+
532
+ it('exposes typed per-session usage (cost charged) on lastUsage', async () => {
533
+ const session = client.tts.streamingSession({ voiceId: 1 }, {});
534
+ session.connect();
535
+ await new Promise<void>((r) => setTimeout(r, 10));
536
+ session.send('Hello.');
537
+
538
+ expect(session.lastUsage).toBeNull();
539
+
540
+ mockWs.onmessage?.({
541
+ data: JSON.stringify({
542
+ session_closed: true,
543
+ total_audio_seconds: 5.4,
544
+ usage: {
545
+ audio_seconds: 5.4,
546
+ characters: 142,
547
+ cost_cents: 0.49,
548
+ currency: 'eur',
549
+ model_id: 'kugel-3',
550
+ },
551
+ }),
552
+ });
553
+
554
+ expect(session.lastUsage).not.toBeNull();
555
+ expect(session.lastUsage?.audioSeconds).toBe(5.4);
556
+ expect(session.lastUsage?.characters).toBe(142);
557
+ expect(session.lastUsage?.costCents).toBe(0.49);
558
+ expect(session.lastUsage?.currency).toBe('eur');
559
+ expect(session.lastUsage?.modelId).toBe('kugel-3');
560
+ expect(session.lastUsage?.costAvailable).toBe(true);
561
+ });
562
+
563
+ it('reports cost as null (not zero) when the charge is unavailable', async () => {
564
+ const session = client.tts.streamingSession({ voiceId: 1 }, {});
565
+ session.connect();
566
+ await new Promise<void>((r) => setTimeout(r, 10));
567
+ session.send('Hi.');
568
+
569
+ mockWs.onmessage?.({
570
+ data: JSON.stringify({
571
+ session_closed: true,
572
+ total_audio_seconds: 2.0,
573
+ usage: {
574
+ audio_seconds: 2.0,
575
+ cost_cents: null,
576
+ cost_unavailable: true,
577
+ model_id: 'kugel-3',
578
+ },
579
+ }),
580
+ });
581
+
582
+ expect(session.lastUsage?.costCents).toBeNull();
583
+ expect(session.lastUsage?.costAvailable).toBe(false);
584
+ expect(session.lastUsage?.audioSeconds).toBe(2.0);
585
+ });
586
+
587
+ it('falls back to total_audio_seconds for a legacy server with no usage block', async () => {
588
+ const session = client.tts.streamingSession({ voiceId: 1 }, {});
589
+ session.connect();
590
+ await new Promise<void>((r) => setTimeout(r, 10));
591
+ session.send('Hi.');
592
+
593
+ mockWs.onmessage?.({ data: makeSessionClosedMsg(3.0, 1, 2) });
594
+
595
+ expect(session.lastUsage?.audioSeconds).toBe(3.0);
596
+ expect(session.lastUsage?.costCents).toBeNull();
597
+ expect(session.lastUsage?.costAvailable).toBe(false);
598
+ });
599
+
414
600
  it('resolves close() even if server never sends session_closed (quiet timeout)', async () => {
415
601
  const session = client.tts.streamingSession(
416
602
  { voiceId: 1 },
@@ -604,6 +790,55 @@ describe('StreamingSession', () => {
604
790
  expect(JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string).voice_id).toBe(42);
605
791
  });
606
792
 
793
+ // -------------------------------------------------------------------------
794
+ // dictionaryIds — per-request dictionary selection (KUG-1094)
795
+ // -------------------------------------------------------------------------
796
+
797
+ it('first send carries dictionary_ids when configured', async () => {
798
+ const session = client.tts.streamingSession(
799
+ { voiceId: 1, dictionaryIds: [7, 9] },
800
+ {},
801
+ );
802
+
803
+ session.connect();
804
+ await new Promise<void>((r) => setTimeout(r, 10));
805
+
806
+ session.send('Hello.');
807
+ const sent = JSON.parse(
808
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
809
+ );
810
+ expect(sent.dictionary_ids).toEqual([7, 9]);
811
+ });
812
+
813
+ it('first send carries dictionary_ids: [] (explicit opt-out)', async () => {
814
+ const session = client.tts.streamingSession(
815
+ { voiceId: 1, dictionaryIds: [] },
816
+ {},
817
+ );
818
+
819
+ session.connect();
820
+ await new Promise<void>((r) => setTimeout(r, 10));
821
+
822
+ session.send('Hello.');
823
+ const sent = JSON.parse(
824
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
825
+ );
826
+ expect(sent.dictionary_ids).toEqual([]);
827
+ });
828
+
829
+ it('omits dictionary_ids when not configured', async () => {
830
+ const session = client.tts.streamingSession({ voiceId: 1 }, {});
831
+
832
+ session.connect();
833
+ await new Promise<void>((r) => setTimeout(r, 10));
834
+
835
+ session.send('Hello.');
836
+ const sent = JSON.parse(
837
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
838
+ );
839
+ expect(sent.dictionary_ids).toBeUndefined();
840
+ });
841
+
607
842
  it('cancelCurrent() resolves on quiet timeout if server never acks', async () => {
608
843
  const session = client.tts.streamingSession({ voiceId: 1 }, {});
609
844
 
@@ -684,4 +919,163 @@ describe('MultiContextSession closeContext', () => {
684
919
  expect((errors[0].error as RateLimitError).statusCode).toBe(429);
685
920
  expect((errors[0].error as RateLimitError).errorCode).toBe('TOO_MANY_CONTEXTS');
686
921
  });
922
+
923
+ it('fires onFinal per context on flush completion and graceful close (KUG-1238)', async () => {
924
+ const finals: string[] = [];
925
+ const closed: string[] = [];
926
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
927
+ await session.connect({
928
+ onFinal: (contextId) => finals.push(contextId),
929
+ onContextClosed: (contextId) => closed.push(contextId),
930
+ });
931
+
932
+ // Flush boundary: all audio admitted before the flush has been sent.
933
+ mockWs.onmessage?.({
934
+ data: JSON.stringify({ final: true, context_id: 'a' }),
935
+ });
936
+ expect(finals).toEqual(['a']);
937
+ expect(closed).toEqual([]);
938
+
939
+ // Graceful close: final precedes context_closed.
940
+ mockWs.onmessage?.({
941
+ data: JSON.stringify({ final: true, context_id: 'a' }),
942
+ });
943
+ mockWs.onmessage?.({
944
+ data: JSON.stringify({ context_closed: true, context_id: 'a' }),
945
+ });
946
+ expect(finals).toEqual(['a', 'a']);
947
+ expect(closed).toEqual(['a']);
948
+ });
949
+
950
+ it('exposes per-context usage on context_closed (per conversation)', async () => {
951
+ const closed: Array<{ id: string; usage: unknown }> = [];
952
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
953
+ await session.connect({
954
+ onContextClosed: (contextId, usage) => closed.push({ id: contextId, usage }),
955
+ });
956
+
957
+ mockWs.onmessage?.({
958
+ data: JSON.stringify({
959
+ context_closed: true,
960
+ context_id: 'narrator',
961
+ usage: { audio_seconds: 4.1, cost_cents: 0.37, currency: 'eur', model_id: 'kugel-3' },
962
+ }),
963
+ });
964
+
965
+ // Available both via the callback arg and the per-context accessor
966
+ expect(closed).toHaveLength(1);
967
+ expect(closed[0].id).toBe('narrator');
968
+ expect((closed[0].usage as { costCents: number }).costCents).toBe(0.37);
969
+
970
+ const u = session.usageFor('narrator');
971
+ expect(u?.audioSeconds).toBe(4.1);
972
+ expect(u?.costCents).toBe(0.37);
973
+ expect(u?.costAvailable).toBe(true);
974
+ expect(session.usageFor('missing')).toBeNull();
975
+ });
976
+ });
977
+
978
+ // ---------------------------------------------------------------------------
979
+ // MultiContextSession createContext wire format (KUG-1233)
980
+ //
981
+ // The server binds a context's voice ONLY from voice_settings.voice_id at
982
+ // context creation. A top-level voice_id updates session config and leaves
983
+ // the context voiceless → MISSING_VOICE_ID on the first text. These tests
984
+ // pin the wire format so it cannot silently regress.
985
+ // ---------------------------------------------------------------------------
986
+
987
+ describe('MultiContextSession createContext wire format (KUG-1233)', () => {
988
+ let client: KugelAudio;
989
+
990
+ beforeEach(() => {
991
+ client = new KugelAudio({ apiKey: 'test-key-xxx' });
992
+ });
993
+
994
+ it('puts voice_id inside voice_settings, never top-level', async () => {
995
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
996
+ await session.connect({});
997
+
998
+ session.createContext('narrator', { voiceId: 123 });
999
+
1000
+ const sent = JSON.parse(
1001
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
1002
+ );
1003
+ expect(sent.context_id).toBe('narrator');
1004
+ expect(sent.voice_id).toBeUndefined();
1005
+ expect(sent.voice_settings).toBeDefined();
1006
+ expect(sent.voice_settings.voice_id).toBe(123);
1007
+ });
1008
+
1009
+ it('falls back to defaultVoiceId inside voice_settings', async () => {
1010
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
1011
+ await session.connect({});
1012
+
1013
+ session.createContext('narrator');
1014
+
1015
+ const sent = JSON.parse(
1016
+ mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
1017
+ );
1018
+ expect(sent.voice_id).toBeUndefined();
1019
+ expect(sent.voice_settings.voice_id).toBe(42);
1020
+ });
1021
+
1022
+ it('send() to an unknown context auto-creates it with the default voice, even after session start', async () => {
1023
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
1024
+ await session.connect({});
1025
+
1026
+ // Simulate a started session (first context confirmed by the server).
1027
+ session.createContext('first');
1028
+ mockWs.onmessage?.({
1029
+ data: JSON.stringify({ session_started: true, session_id: 's1' }),
1030
+ });
1031
+ mockWs.onmessage?.({
1032
+ data: JSON.stringify({ context_created: true, context_id: 'first' }),
1033
+ });
1034
+
1035
+ const callsBefore = mockWs.send.mock.calls.length;
1036
+ session.send('second', 'hello there', true);
1037
+ const frames = mockWs.send.mock.calls
1038
+ .slice(callsBefore)
1039
+ .map((c) => JSON.parse(c[0] as string));
1040
+
1041
+ // First frame: the auto-create with voice_settings.voice_id; then the text.
1042
+ expect(frames).toHaveLength(2);
1043
+ expect(frames[0].context_id).toBe('second');
1044
+ expect(frames[0].voice_settings.voice_id).toBe(42);
1045
+ expect(frames[1].text).toBe('hello there');
1046
+ expect(frames[1].flush).toBe(true);
1047
+ });
1048
+
1049
+ it('does not duplicate the create frame across repeated sends', async () => {
1050
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
1051
+ await session.connect({});
1052
+
1053
+ session.send('ctx', 'one');
1054
+ session.send('ctx', 'two');
1055
+
1056
+ const frames = mockWs.send.mock.calls.map((c) => JSON.parse(c[0] as string));
1057
+ const creates = frames.filter((f) => f.voice_settings?.voice_id === 42);
1058
+ expect(creates).toHaveLength(1);
1059
+ });
1060
+
1061
+ it('allows re-creating a context after the server closed it', async () => {
1062
+ const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
1063
+ await session.connect({});
1064
+
1065
+ session.send('ctx', 'one');
1066
+ mockWs.onmessage?.({
1067
+ data: JSON.stringify({ context_created: true, context_id: 'ctx' }),
1068
+ });
1069
+ mockWs.onmessage?.({
1070
+ data: JSON.stringify({ context_closed: true, context_id: 'ctx' }),
1071
+ });
1072
+
1073
+ const callsBefore = mockWs.send.mock.calls.length;
1074
+ session.send('ctx', 'again');
1075
+ const frames = mockWs.send.mock.calls
1076
+ .slice(callsBefore)
1077
+ .map((c) => JSON.parse(c[0] as string));
1078
+ expect(frames[0].voice_settings.voice_id).toBe(42);
1079
+ expect(frames[1].text).toBe('again');
1080
+ });
687
1081
  });