npm - kugelaudio - Versions diffs - 0.6.1 → 0.8.0 - Mend

kugelaudio 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/src/client.test.ts CHANGED Viewed

@@ -7,8 +7,12 @@
  */
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import packageJson from '../package.json';
 import { KugelAudio } from './client';
 import { RateLimitError } from './errors';
+import { parseSessionUsage } from './types';
+const SDK_VERSION = packageJson.version;
 // ---------------------------------------------------------------------------
 // Minimal WebSocket mock
@@ -18,6 +22,7 @@ type WsListener = (event: { data: string }) => void;
 type WsCloseListener = (event: { code: number }) => void;
 interface MockWs {
+  url: string;
   readyState: number;
   onopen: (() => void) | null;
   onmessage: WsListener | null;
@@ -33,6 +38,7 @@ let mockWs: MockWs;
 vi.mock('./websocket', () => ({
   getWebSocket: () => {
     return class MockWebSocket {
+      url: string;
       readyState = 0; // CONNECTING
       onopen: (() => void) | null = null;
       onmessage: WsListener | null = null;
@@ -42,7 +48,8 @@ vi.mock('./websocket', () => ({
       close = vi.fn();
       ping = vi.fn();
-      constructor() {
+      constructor(url: string) {
+        this.url = url;
         mockWs = this as unknown as MockWs;
         // Simulate async open
         setTimeout(() => {
@@ -96,6 +103,43 @@ function collectStream(stream: NodeJS.ReadableStream): Promise<Buffer> {
 // Tests
 // ---------------------------------------------------------------------------
+describe('parseSessionUsage (/ws/tts final + session_closed)', () => {
+  it('parses the usage block from a /ws/tts final frame', () => {
+    const usage = parseSessionUsage({
+      final: true,
+      chunks: 3,
+      total_samples: 1000,
+      dur_ms: 5400,
+      gen_ms: 900,
+      rtf: 0.17,
+      usage: {
+        audio_seconds: 5.4,
+        characters: 142,
+        cost_cents: 0.49,
+        currency: 'eur',
+        model_id: 'kugel-3',
+      },
+    });
+    expect(usage).not.toBeNull();
+    expect(usage?.audioSeconds).toBe(5.4);
+    expect(usage?.costCents).toBe(0.49);
+    expect(usage?.costAvailable).toBe(true);
+  });
+  it('reports cost null (not zero) when unavailable', () => {
+    const usage = parseSessionUsage({
+      final: true,
+      usage: { audio_seconds: 2.0, cost_cents: null, cost_unavailable: true },
+    });
+    expect(usage?.costCents).toBeNull();
+    expect(usage?.costAvailable).toBe(false);
+  });
+  it('returns null when there is no usage info', () => {
+    expect(parseSessionUsage({ final: true, chunks: 1 })).toBeNull();
+  });
+});
 describe('TTSResource.toReadable()', () => {
   let client: KugelAudio;
@@ -247,6 +291,43 @@ describe('KugelAudio multi-region', () => {
   });
 });
+describe('KugelAudio SDK metadata', () => {
+  it('adds SDK metadata headers to HTTP requests', async () => {
+    const originalFetch = globalThis.fetch;
+    const fetchMock = vi.fn<typeof fetch>(async () => ({
+      ok: true,
+      json: async () => ({ models: [] }),
+    } as Response));
+    globalThis.fetch = fetchMock;
+    try {
+      const client = new KugelAudio({ apiKey: 'ka_test123' });
+      await client.models.list();
+    } finally {
+      globalThis.fetch = originalFetch;
+    }
+    expect(fetchMock).toHaveBeenCalledTimes(1);
+    const [, init] = fetchMock.mock.calls[0];
+    expect(init).toMatchObject({
+      headers: {
+        'X-KugelAudio-SDK': 'js',
+        'X-KugelAudio-SDK-Version': SDK_VERSION,
+      },
+    });
+  });
+  it('adds SDK metadata query params to WebSocket URLs', async () => {
+    const client = new KugelAudio({ apiKey: 'ka_test123' });
+    client.tts.toReadable({ text: 'metadata test' });
+    await new Promise<void>((r) => setTimeout(r, 10));
+    expect(mockWs.url).toContain('sdk=js');
+    expect(mockWs.url).toContain(`sdk_version=${SDK_VERSION}`);
+  });
+});
 // ---------------------------------------------------------------------------
 // Keepalive ping tests
 // ---------------------------------------------------------------------------
@@ -411,6 +492,111 @@ describe('StreamingSession', () => {
     expect(sessionClosedCalls[0].totalAudioChunks).toBe(4);
   });
+  it('fires onFinal (end-of-audio) before onSessionClosed on turn end (KUG-1238)', async () => {
+    const order: string[] = [];
+    let finalStats: { totalAudioSeconds: number; totalTextChunks: number; totalAudioChunks: number } | null = null;
+    const session = client.tts.streamingSession(
+      { voiceId: 1 },
+      {
+        onFinal: (totalAudioSeconds, totalTextChunks, totalAudioChunks) => {
+          order.push('final');
+          finalStats = { totalAudioSeconds, totalTextChunks, totalAudioChunks };
+        },
+        onSessionClosed: () => order.push('session_closed'),
+      },
+    );
+    session.connect();
+    await new Promise<void>((r) => setTimeout(r, 10));
+    session.send('Hello.', true);
+    mockWs.onmessage?.({ data: makeAudioMsg(0, 100) });
+    mockWs.onmessage?.({ data: makeChunkCompleteMsg(0, 1.0, 100) });
+    mockWs.onmessage?.({
+      data: JSON.stringify({
+        final: true,
+        total_audio_seconds: 1.0,
+        total_text_chunks: 1,
+        total_audio_chunks: 1,
+      }),
+    });
+    mockWs.onmessage?.({ data: makeSessionClosedMsg(1.0, 1, 1) });
+    expect(order).toEqual(['final', 'session_closed']);
+    expect(finalStats!.totalAudioSeconds).toBe(1.0);
+    expect(finalStats!.totalTextChunks).toBe(1);
+    expect(finalStats!.totalAudioChunks).toBe(1);
+  });
+  it('exposes typed per-session usage (cost charged) on lastUsage', async () => {
+    const session = client.tts.streamingSession({ voiceId: 1 }, {});
+    session.connect();
+    await new Promise<void>((r) => setTimeout(r, 10));
+    session.send('Hello.');
+    expect(session.lastUsage).toBeNull();
+    mockWs.onmessage?.({
+      data: JSON.stringify({
+        session_closed: true,
+        total_audio_seconds: 5.4,
+        usage: {
+          audio_seconds: 5.4,
+          characters: 142,
+          cost_cents: 0.49,
+          currency: 'eur',
+          model_id: 'kugel-3',
+        },
+      }),
+    });
+    expect(session.lastUsage).not.toBeNull();
+    expect(session.lastUsage?.audioSeconds).toBe(5.4);
+    expect(session.lastUsage?.characters).toBe(142);
+    expect(session.lastUsage?.costCents).toBe(0.49);
+    expect(session.lastUsage?.currency).toBe('eur');
+    expect(session.lastUsage?.modelId).toBe('kugel-3');
+    expect(session.lastUsage?.costAvailable).toBe(true);
+  });
+  it('reports cost as null (not zero) when the charge is unavailable', async () => {
+    const session = client.tts.streamingSession({ voiceId: 1 }, {});
+    session.connect();
+    await new Promise<void>((r) => setTimeout(r, 10));
+    session.send('Hi.');
+    mockWs.onmessage?.({
+      data: JSON.stringify({
+        session_closed: true,
+        total_audio_seconds: 2.0,
+        usage: {
+          audio_seconds: 2.0,
+          cost_cents: null,
+          cost_unavailable: true,
+          model_id: 'kugel-3',
+        },
+      }),
+    });
+    expect(session.lastUsage?.costCents).toBeNull();
+    expect(session.lastUsage?.costAvailable).toBe(false);
+    expect(session.lastUsage?.audioSeconds).toBe(2.0);
+  });
+  it('falls back to total_audio_seconds for a legacy server with no usage block', async () => {
+    const session = client.tts.streamingSession({ voiceId: 1 }, {});
+    session.connect();
+    await new Promise<void>((r) => setTimeout(r, 10));
+    session.send('Hi.');
+    mockWs.onmessage?.({ data: makeSessionClosedMsg(3.0, 1, 2) });
+    expect(session.lastUsage?.audioSeconds).toBe(3.0);
+    expect(session.lastUsage?.costCents).toBeNull();
+    expect(session.lastUsage?.costAvailable).toBe(false);
+  });
   it('resolves close() even if server never sends session_closed (quiet timeout)', async () => {
     const session = client.tts.streamingSession(
       { voiceId: 1 },
@@ -604,6 +790,55 @@ describe('StreamingSession', () => {
     expect(JSON.parse(mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string).voice_id).toBe(42);
   });
+  // -------------------------------------------------------------------------
+  // dictionaryIds — per-request dictionary selection (KUG-1094)
+  // -------------------------------------------------------------------------
+  it('first send carries dictionary_ids when configured', async () => {
+    const session = client.tts.streamingSession(
+      { voiceId: 1, dictionaryIds: [7, 9] },
+      {},
+    );
+    session.connect();
+    await new Promise<void>((r) => setTimeout(r, 10));
+    session.send('Hello.');
+    const sent = JSON.parse(
+      mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
+    );
+    expect(sent.dictionary_ids).toEqual([7, 9]);
+  });
+  it('first send carries dictionary_ids: [] (explicit opt-out)', async () => {
+    const session = client.tts.streamingSession(
+      { voiceId: 1, dictionaryIds: [] },
+      {},
+    );
+    session.connect();
+    await new Promise<void>((r) => setTimeout(r, 10));
+    session.send('Hello.');
+    const sent = JSON.parse(
+      mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
+    );
+    expect(sent.dictionary_ids).toEqual([]);
+  });
+  it('omits dictionary_ids when not configured', async () => {
+    const session = client.tts.streamingSession({ voiceId: 1 }, {});
+    session.connect();
+    await new Promise<void>((r) => setTimeout(r, 10));
+    session.send('Hello.');
+    const sent = JSON.parse(
+      mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
+    );
+    expect(sent.dictionary_ids).toBeUndefined();
+  });
   it('cancelCurrent() resolves on quiet timeout if server never acks', async () => {
     const session = client.tts.streamingSession({ voiceId: 1 }, {});
@@ -684,4 +919,163 @@ describe('MultiContextSession closeContext', () => {
     expect((errors[0].error as RateLimitError).statusCode).toBe(429);
     expect((errors[0].error as RateLimitError).errorCode).toBe('TOO_MANY_CONTEXTS');
   });
+  it('fires onFinal per context on flush completion and graceful close (KUG-1238)', async () => {
+    const finals: string[] = [];
+    const closed: string[] = [];
+    const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
+    await session.connect({
+      onFinal: (contextId) => finals.push(contextId),
+      onContextClosed: (contextId) => closed.push(contextId),
+    });
+    // Flush boundary: all audio admitted before the flush has been sent.
+    mockWs.onmessage?.({
+      data: JSON.stringify({ final: true, context_id: 'a' }),
+    });
+    expect(finals).toEqual(['a']);
+    expect(closed).toEqual([]);
+    // Graceful close: final precedes context_closed.
+    mockWs.onmessage?.({
+      data: JSON.stringify({ final: true, context_id: 'a' }),
+    });
+    mockWs.onmessage?.({
+      data: JSON.stringify({ context_closed: true, context_id: 'a' }),
+    });
+    expect(finals).toEqual(['a', 'a']);
+    expect(closed).toEqual(['a']);
+  });
+  it('exposes per-context usage on context_closed (per conversation)', async () => {
+    const closed: Array<{ id: string; usage: unknown }> = [];
+    const session = client.tts.createMultiContextSession({ defaultVoiceId: 1 });
+    await session.connect({
+      onContextClosed: (contextId, usage) => closed.push({ id: contextId, usage }),
+    });
+    mockWs.onmessage?.({
+      data: JSON.stringify({
+        context_closed: true,
+        context_id: 'narrator',
+        usage: { audio_seconds: 4.1, cost_cents: 0.37, currency: 'eur', model_id: 'kugel-3' },
+      }),
+    });
+    // Available both via the callback arg and the per-context accessor
+    expect(closed).toHaveLength(1);
+    expect(closed[0].id).toBe('narrator');
+    expect((closed[0].usage as { costCents: number }).costCents).toBe(0.37);
+    const u = session.usageFor('narrator');
+    expect(u?.audioSeconds).toBe(4.1);
+    expect(u?.costCents).toBe(0.37);
+    expect(u?.costAvailable).toBe(true);
+    expect(session.usageFor('missing')).toBeNull();
+  });
+});
+// ---------------------------------------------------------------------------
+// MultiContextSession createContext wire format (KUG-1233)
+//
+// The server binds a context's voice ONLY from voice_settings.voice_id at
+// context creation. A top-level voice_id updates session config and leaves
+// the context voiceless → MISSING_VOICE_ID on the first text. These tests
+// pin the wire format so it cannot silently regress.
+// ---------------------------------------------------------------------------
+describe('MultiContextSession createContext wire format (KUG-1233)', () => {
+  let client: KugelAudio;
+  beforeEach(() => {
+    client = new KugelAudio({ apiKey: 'test-key-xxx' });
+  });
+  it('puts voice_id inside voice_settings, never top-level', async () => {
+    const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
+    await session.connect({});
+    session.createContext('narrator', { voiceId: 123 });
+    const sent = JSON.parse(
+      mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
+    );
+    expect(sent.context_id).toBe('narrator');
+    expect(sent.voice_id).toBeUndefined();
+    expect(sent.voice_settings).toBeDefined();
+    expect(sent.voice_settings.voice_id).toBe(123);
+  });
+  it('falls back to defaultVoiceId inside voice_settings', async () => {
+    const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
+    await session.connect({});
+    session.createContext('narrator');
+    const sent = JSON.parse(
+      mockWs.send.mock.calls[mockWs.send.mock.calls.length - 1][0] as string
+    );
+    expect(sent.voice_id).toBeUndefined();
+    expect(sent.voice_settings.voice_id).toBe(42);
+  });
+  it('send() to an unknown context auto-creates it with the default voice, even after session start', async () => {
+    const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
+    await session.connect({});
+    // Simulate a started session (first context confirmed by the server).
+    session.createContext('first');
+    mockWs.onmessage?.({
+      data: JSON.stringify({ session_started: true, session_id: 's1' }),
+    });
+    mockWs.onmessage?.({
+      data: JSON.stringify({ context_created: true, context_id: 'first' }),
+    });
+    const callsBefore = mockWs.send.mock.calls.length;
+    session.send('second', 'hello there', true);
+    const frames = mockWs.send.mock.calls
+      .slice(callsBefore)
+      .map((c) => JSON.parse(c[0] as string));
+    // First frame: the auto-create with voice_settings.voice_id; then the text.
+    expect(frames).toHaveLength(2);
+    expect(frames[0].context_id).toBe('second');
+    expect(frames[0].voice_settings.voice_id).toBe(42);
+    expect(frames[1].text).toBe('hello there');
+    expect(frames[1].flush).toBe(true);
+  });
+  it('does not duplicate the create frame across repeated sends', async () => {
+    const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
+    await session.connect({});
+    session.send('ctx', 'one');
+    session.send('ctx', 'two');
+    const frames = mockWs.send.mock.calls.map((c) => JSON.parse(c[0] as string));
+    const creates = frames.filter((f) => f.voice_settings?.voice_id === 42);
+    expect(creates).toHaveLength(1);
+  });
+  it('allows re-creating a context after the server closed it', async () => {
+    const session = client.tts.createMultiContextSession({ defaultVoiceId: 42 });
+    await session.connect({});
+    session.send('ctx', 'one');
+    mockWs.onmessage?.({
+      data: JSON.stringify({ context_created: true, context_id: 'ctx' }),
+    });
+    mockWs.onmessage?.({
+      data: JSON.stringify({ context_closed: true, context_id: 'ctx' }),
+    });
+    const callsBefore = mockWs.send.mock.calls.length;
+    session.send('ctx', 'again');
+    const frames = mockWs.send.mock.calls
+      .slice(callsBefore)
+      .map((c) => JSON.parse(c[0] as string));
+    expect(frames[0].voice_settings.voice_id).toBe(42);
+    expect(frames[1].text).toBe('again');
+  });
 });