npm - playkit-sdk - Versions diffs - 1.4.0-beta.1 → 1.4.0-beta.3 - Mend

playkit-sdk 1.4.0-beta.1 → 1.4.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +31 -0
package/dist/playkit-sdk.cjs.js +192 -88
package/dist/playkit-sdk.cjs.js.map +1 -1
package/dist/playkit-sdk.d.ts +125 -56
package/dist/playkit-sdk.esm.js +192 -88
package/dist/playkit-sdk.esm.js.map +1 -1
package/dist/playkit-sdk.umd.js +192 -88
package/dist/playkit-sdk.umd.js.map +1 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -68,6 +68,33 @@ await chat.chatStream(
 );
 ```
+### Reasoning (Thinking)
+Reasoning-capable models can think before answering. Enable it with the
+`thinking` option (set the `effort` level) and read the model's reasoning
+separately from its answer.
+```typescript
+// Non-streaming: reasoning is returned on `result.reasoning`
+const result = await chat.textGeneration({
+  messages: [{ role: 'user', content: 'Solve: 17 * 24, show your work.' }],
+  thinking: { effort: 'high' },
+});
+console.log('Answer:', result.content);
+console.log('Reasoning:', result.reasoning);
+// Streaming: reasoning arrives via the `onReasoning` callback,
+// kept separate from the answer text in `onChunk`
+await chat.textGenerationStream({
+  messages: [{ role: 'user', content: 'Solve: 17 * 24, show your work.' }],
+  thinking: { effort: 'high' },
+  onReasoning: (chunk) => process.stdout.write(`[thinking] ${chunk}`),
+  onChunk: (chunk) => process.stdout.write(chunk),
+  onComplete: (fullText) => console.log('\nComplete:', fullText),
+});
+```
 ### Image Generation
 ```typescript
@@ -256,6 +283,10 @@ This SDK is proprietary software owned by Agentland Lab. Use of this SDK is subj
 ## Changelog
+### 1.4.0-beta.3
+- Added `thinking` reasoning-effort option on chat (`thinking: { effort: 'high' }`)
+- Surface model reasoning: `result.reasoning` (non-streaming) and the `onReasoning` callback (streaming)
 ### 1.0.0-beta.1
 - Initial public beta release
 - AI chat support (text generation)

package/dist/playkit-sdk.cjs.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * playkit-sdk v1.4.0-beta.1
+ * playkit-sdk v1.4.0-beta.3
  * PlayKit SDK for JavaScript
  * @license SEE LICENSE IN LICENSE
  */
@@ -830,7 +830,7 @@ class TokenStorage {
 }
 const SDK_TYPE = 'Javascript';
-const SDK_VERSION = '"1.4.0-beta.1"';
+const SDK_VERSION = '"1.4.0-beta.3"';
 function getSDKHeaders() {
     return {
         'X-SDK-Type': SDK_TYPE,
@@ -3930,6 +3930,9 @@ class ChatProvider {
             stop: chatConfig.stop || null,
             top_p: chatConfig.topP || null,
         };
+        if (chatConfig.thinking) {
+            requestBody.thinking = chatConfig.thinking;
+        }
         try {
             const response = await fetch(`${this.baseURL}${endpoint}`, {
                 method: 'POST',
@@ -3987,6 +3990,9 @@ class ChatProvider {
             stop: chatConfig.stop || null,
             top_p: chatConfig.topP || null,
         };
+        if (chatConfig.thinking) {
+            requestBody.thinking = chatConfig.thinking;
+        }
         try {
             const response = await fetch(`${this.baseURL}${endpoint}`, {
                 method: 'POST',
@@ -4051,6 +4057,9 @@ class ChatProvider {
         if (chatConfig.tool_choice) {
             requestBody.tool_choice = chatConfig.tool_choice;
         }
+        if (chatConfig.thinking) {
+            requestBody.thinking = chatConfig.thinking;
+        }
         try {
             const response = await fetch(`${this.baseURL}${endpoint}`, {
                 method: 'POST',
@@ -4109,6 +4118,9 @@ class ChatProvider {
         if (chatConfig.tool_choice) {
             requestBody.tool_choice = chatConfig.tool_choice;
         }
+        if (chatConfig.thinking) {
+            requestBody.thinking = chatConfig.thinking;
+        }
         try {
             const response = await fetch(`${this.baseURL}${endpoint}`, {
                 method: 'POST',
@@ -4418,6 +4430,19 @@ class TranscriptionProvider {
  */
 // @ts-ignore - replaced at build time
 const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
+/** Decode a base64 string to an ArrayBuffer (browser + Node). */
+function base64ToArrayBuffer(b64) {
+    if (typeof atob === 'function') {
+        const bin = atob(b64);
+        const bytes = new Uint8Array(bin.length);
+        for (let i = 0; i < bin.length; i++)
+            bytes[i] = bin.charCodeAt(i);
+        return bytes.buffer;
+    }
+    // Node fallback
+    const buf = globalThis.Buffer.from(b64, 'base64');
+    return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
+}
 class TTSProvider {
     constructor(authManager, config) {
         this.authManager = authManager;
@@ -4430,69 +4455,69 @@ class TTSProvider {
     setPlayerClient(playerClient) {
         this.playerClient = playerClient;
     }
-    /**
-     * Synthesize text into speech audio
-     */
-    async synthesize(ttsConfig) {
-        // Ensure token is valid, auto-refresh if needed (browser mode only)
+    /** Build the shared request body from a TTS config (new fields + legacy). */
+    buildRequestBody(ttsConfig) {
+        const model = ttsConfig.model || this.config.defaultTTSModel || 'default-tts-model';
+        const body = { model, text: ttsConfig.text };
+        if (ttsConfig.voice !== undefined)
+            body.voice = ttsConfig.voice;
+        if (ttsConfig.voiceMix !== undefined)
+            body.voice_mix = ttsConfig.voiceMix;
+        if (ttsConfig.voiceSettings !== undefined) {
+            body.voice_settings = ttsConfig.voiceSettings;
+        }
+        if (ttsConfig.outputFormat !== undefined) {
+            body.output_format = ttsConfig.outputFormat;
+        }
+        if (ttsConfig.language !== undefined)
+            body.language = ttsConfig.language;
+        if (ttsConfig.providerOptions !== undefined) {
+            body.provider_options = ttsConfig.providerOptions;
+        }
+        return body;
+    }
+    /** POST to a TTS endpoint; throws a PlayKitError on a non-ok response. */
+    async post(endpoint, body) {
         await this.authManager.ensureValidToken();
         const token = this.authManager.getToken();
         if (!token) {
             throw new PlayKitError('Not authenticated', 'NOT_AUTHENTICATED');
         }
-        const model = ttsConfig.model || this.config.defaultTTSModel || 'default-tts-model';
-        const endpoint = `/ai/${this.config.gameId}/v2/audio/speech`;
-        const requestBody = {
-            model,
-            text: ttsConfig.text,
-        };
-        // Add optional parameters (only when defined)
-        if (ttsConfig.voice !== undefined) {
-            requestBody.voice = ttsConfig.voice;
-        }
-        if (ttsConfig.speed !== undefined) {
-            requestBody.speed = ttsConfig.speed;
-        }
-        if (ttsConfig.vol !== undefined) {
-            requestBody.vol = ttsConfig.vol;
-        }
-        if (ttsConfig.pitch !== undefined) {
-            requestBody.pitch = ttsConfig.pitch;
-        }
-        if (ttsConfig.emotion !== undefined) {
-            requestBody.emotion = ttsConfig.emotion;
-        }
-        if (ttsConfig.languageBoost !== undefined) {
-            requestBody.language_boost = ttsConfig.languageBoost;
-        }
-        if (ttsConfig.format !== undefined) {
-            requestBody.response_format = ttsConfig.format;
-        }
-        if (ttsConfig.voiceSetting !== undefined) {
-            requestBody.voice_setting = ttsConfig.voiceSetting;
+        const response = await fetch(`${this.baseURL}${endpoint}`, {
+            method: 'POST',
+            headers: Object.assign({ Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, getSDKHeaders()),
+            body: JSON.stringify(body),
+        });
+        if (!response.ok) {
+            const error = await response
+                .json()
+                .catch(() => ({ message: 'Speech synthesis failed' }));
+            const playKitError = new PlayKitError(error.message || 'Speech synthesis failed', error.code, response.status);
+            if (error.code === 'INSUFFICIENT_CREDITS' ||
+                error.code === 'PLAYER_INSUFFICIENT_CREDIT' ||
+                response.status === 402) {
+                if (this.playerClient) {
+                    await this.playerClient.handleInsufficientCredits(playKitError);
+                }
+            }
+            throw playKitError;
         }
-        if (ttsConfig.audioSetting !== undefined) {
-            requestBody.audio_setting = ttsConfig.audioSetting;
+        return response;
+    }
+    checkBalanceAfter() {
+        if (this.playerClient) {
+            this.playerClient.checkBalanceAfterApiCall().catch(() => {
+                /* silently fail */
+            });
         }
+    }
+    /**
+     * Synthesize text into speech audio (raw bytes).
+     */
+    async synthesize(ttsConfig) {
+        const endpoint = `/ai/${this.config.gameId}/v2/audio/speech`;
         try {
-            const response = await fetch(`${this.baseURL}${endpoint}`, {
-                method: 'POST',
-                headers: Object.assign({ Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, getSDKHeaders()),
-                body: JSON.stringify(requestBody),
-            });
-            if (!response.ok) {
-                const error = await response.json().catch(() => ({ message: 'Speech synthesis failed' }));
-                const playKitError = new PlayKitError(error.message || 'Speech synthesis failed', error.code, response.status);
-                // Check for insufficient credits error
-                if (error.code === 'INSUFFICIENT_CREDITS' ||
-                    error.code === 'PLAYER_INSUFFICIENT_CREDIT' ||
-                    response.status === 402) {
-                    if (this.playerClient) {
-                        await this.playerClient.handleInsufficientCredits(playKitError);
-                    }
-                }
-                throw playKitError;
-            }
+            const response = await this.post(endpoint, this.buildRequestBody(ttsConfig));
             // SUCCESS: response is raw audio bytes, NOT JSON.
             const audio = await response.arrayBuffer();
             const contentType = response.headers.get('Content-Type');
@@ -4500,24 +4525,66 @@ class TTSProvider {
             const audioLengthHeader = response.headers.get('X-Audio-Length-Ms');
             const result = {
                 audio,
-                format: contentType || ttsConfig.format || 'mp3',
+                format: contentType || 'mp3',
                 usageCharacters: Number(usageHeader) || 0,
             };
             if (audioLengthHeader !== null) {
                 result.audioLengthMs = Number(audioLengthHeader) || 0;
             }
-            // Check balance after successful API call
-            if (this.playerClient) {
-                this.playerClient.checkBalanceAfterApiCall().catch(() => {
-                    // Silently fail
-                });
-            }
+            this.checkBalanceAfter();
             return result;
         }
         catch (error) {
-            if (error instanceof PlayKitError) {
+            if (error instanceof PlayKitError)
                 throw error;
+            throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
+        }
+    }
+    /**
+     * Synthesize text into speech AND return timestamp alignment. Hits the
+     * `speech-with-timestamps` variant, whose success response is a JSON envelope
+     * (base64 audio + alignment), so it is parsed as JSON — not raw bytes.
+     */
+    async synthesizeWithTimestamps(ttsConfig) {
+        const endpoint = `/ai/${this.config.gameId}/v2/audio/speech-with-timestamps`;
+        const body = this.buildRequestBody(ttsConfig);
+        if (ttsConfig.granularity !== undefined) {
+            body.subtitle_type = ttsConfig.granularity;
+        }
+        try {
+            const response = await this.post(endpoint, body);
+            const json = (await response.json());
+            let alignment = null;
+            if (json.alignment && Array.isArray(json.alignment.items)) {
+                alignment = {
+                    granularity: json.alignment.granularity || 'word',
+                    items: json.alignment.items.map((it) => {
+                        var _a, _b, _c;
+                        return ({
+                            text: (_a = it.text) !== null && _a !== void 0 ? _a : '',
+                            startMs: (_b = it.start_ms) !== null && _b !== void 0 ? _b : 0,
+                            endMs: (_c = it.end_ms) !== null && _c !== void 0 ? _c : 0,
+                            textStart: it.text_start,
+                            textEnd: it.text_end,
+                        });
+                    }),
+                };
+            }
+            const result = {
+                audio: base64ToArrayBuffer(json.audio_base64),
+                format: json.format || 'mp3',
+                usageCharacters: Number(json.usage_characters) || 0,
+                alignment,
+            };
+            if (json.audio_length_ms != null) {
+                result.audioLengthMs = Number(json.audio_length_ms) || 0;
             }
+            this.checkBalanceAfter();
+            return result;
+        }
+        catch (error) {
+            if (error instanceof PlayKitError)
+                throw error;
             throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
         }
     }
@@ -4601,6 +4668,7 @@ function createDecoder() {
 class StreamParser {
     /**
      * Parse SSE stream using ReadableStream
+     * Yields typed parts so callers can separate text from reasoning.
      */
     static parseStream(reader) {
         return __asyncGenerator(this, arguments, function* parseStream_1() {
@@ -4631,9 +4699,9 @@ class StreamParser {
                             const data = trimmed.substring(6); // Remove 'data: ' prefix
                             try {
                                 const parsed = JSON.parse(data);
-                                const text = this.extractTextFromChunk(parsed);
-                                if (text) {
-                                    yield yield __await(text);
+                                const part = this.extractPartFromChunk(parsed);
+                                if (part) {
+                                    yield yield __await(part);
                                 }
                                 // Stream termination events
                                 if (parsed.type === 'done' || parsed.type === 'finish' || parsed.finish_reason) {
@@ -4650,7 +4718,7 @@ class StreamParser {
                             }
                             catch (error) {
                                 // If JSON parse fails, treat as plain text
-                                yield yield __await(data);
+                                yield yield __await({ kind: 'text', delta: data });
                             }
                         }
                     }
@@ -4662,22 +4730,33 @@ class StreamParser {
         });
     }
     /**
-     * Extract text from a stream chunk
-     * Supports multiple formats (UI Message Stream and OpenAI)
+     * Extract a typed part (text or reasoning) from a stream chunk
+     * Supports multiple formats (UI Message Stream and OpenAI).
+     * Reasoning is detected before the generic text fallback so thinking
+     * deltas never leak into the text stream.
      */
-    static extractTextFromChunk(chunk) {
-        var _a, _b;
-        // UI Message Stream format: { type: "text-delta", delta: "..." }
+    static extractPartFromChunk(chunk) {
+        var _a, _b, _c, _d;
+        // UI Message Stream reasoning: { type: "reasoning-delta", delta: "..." }
+        if (chunk.type === 'reasoning-delta' && chunk.delta) {
+            return { kind: 'reasoning', delta: chunk.delta };
+        }
+        // UI Message Stream text: { type: "text-delta", delta: "..." }
         if (chunk.type === 'text-delta' && chunk.delta) {
-            return chunk.delta;
+            return { kind: 'text', delta: chunk.delta };
+        }
+        // OpenAI reasoning (defensive): { choices: [{ delta: { reasoning_content: "..." } }] }
+        if (chunk.choices && ((_b = (_a = chunk.choices[0]) === null || _a === void 0 ? void 0 : _a.delta) === null || _b === void 0 ? void 0 : _b.reasoning_content)) {
+            return { kind: 'reasoning', delta: chunk.choices[0].delta.reasoning_content };
         }
-        // OpenAI format: { choices: [{ delta: { content: "..." } }] }
-        if (chunk.choices && ((_b = (_a = chunk.choices[0]) === null || _a === void 0 ? void 0 : _a.delta) === null || _b === void 0 ? void 0 : _b.content)) {
-            return chunk.choices[0].delta.content;
+        // OpenAI text: { choices: [{ delta: { content: "..." } }] }
+        if (chunk.choices && ((_d = (_c = chunk.choices[0]) === null || _c === void 0 ? void 0 : _c.delta) === null || _d === void 0 ? void 0 : _d.content)) {
+            return { kind: 'text', delta: chunk.choices[0].delta.content };
         }
-        // Direct delta format
+        // Direct delta format (text)
         if (chunk.delta) {
-            return typeof chunk.delta === 'string' ? chunk.delta : chunk.delta.content || null;
+            const text = typeof chunk.delta === 'string' ? chunk.delta : chunk.delta.content || null;
+            return text ? { kind: 'text', delta: text } : null;
         }
         return null;
     }
@@ -4691,8 +4770,10 @@ class StreamParser {
             for (var _d = true, _e = __asyncValues(this.parseStream(reader)), _f; _f = await _e.next(), _a = _f.done, !_a; _d = true) {
                 _c = _f.value;
                 _d = false;
-                const chunk = _c;
-                fullText += chunk;
+                const part = _c;
+                if (part.kind === 'text') {
+                    fullText += part.delta;
+                }
             }
         }
         catch (e_1_1) { e_1 = { error: e_1_1 }; }
@@ -4706,8 +4787,9 @@ class StreamParser {
     }
     /**
      * Stream with callbacks
+     * Text deltas go to onChunk; reasoning (thinking) deltas go to onReasoning.
      */
-    static async streamWithCallbacks(reader, onChunk, onComplete, onError) {
+    static async streamWithCallbacks(reader, onChunk, onComplete, onError, onReasoning) {
         var _a, e_2, _b, _c;
         let fullText = '';
         try {
@@ -4715,9 +4797,15 @@ class StreamParser {
                 for (var _d = true, _e = __asyncValues(this.parseStream(reader)), _f; _f = await _e.next(), _a = _f.done, !_a; _d = true) {
                     _c = _f.value;
                     _d = false;
-                    const chunk = _c;
-                    fullText += chunk;
-                    onChunk(chunk);
+                    const part = _c;
+                    if (part.kind === 'reasoning') {
+                        if (onReasoning) {
+                            onReasoning(part.delta);
+                        }
+                        continue;
+                    }
+                    fullText += part.delta;
+                    onChunk(part.delta);
                 }
             }
             catch (e_2_1) { e_2 = { error: e_2_1 }; }
@@ -4818,6 +4906,7 @@ class ChatClient {
                 : undefined,
             id: response.id,
             created: response.created,
+            reasoning: choice.message.reasoning_content,
         };
     }
     /**
@@ -4826,7 +4915,7 @@ class ChatClient {
     async textGenerationStream(config) {
         const chatConfig = Object.assign(Object.assign({}, config), { model: config.model || this.model });
         const reader = await this.provider.chatCompletionStream(chatConfig);
-        await StreamParser.streamWithCallbacks(reader, config.onChunk, config.onComplete, config.onError);
+        await StreamParser.streamWithCallbacks(reader, config.onChunk, config.onComplete, config.onError, config.onReasoning);
     }
     // ===== Structured Output Generation =====
     /**
@@ -4984,6 +5073,7 @@ class ChatClient {
             id: response.id,
             created: response.created,
             tool_calls: choice.message.tool_calls,
+            reasoning: choice.message.reasoning_content,
         };
     }
     /**
@@ -4994,6 +5084,7 @@ class ChatClient {
         const chatConfig = Object.assign(Object.assign({}, config), { model: config.model || this.model });
         const reader = await this.provider.chatCompletionWithToolsStream(chatConfig);
         let fullContent = '';
+        let fullReasoning = '';
         let toolCalls = [];
         await StreamParser.streamWithCallbacks(reader, (chunk) => {
             fullContent += chunk;
@@ -5006,9 +5097,14 @@ class ChatClient {
                     model: chatConfig.model || this.model,
                     finishReason: toolCalls.length > 0 ? 'tool_calls' : 'stop',
                     tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
+                    reasoning: fullReasoning || undefined,
                 });
             }
-        }, config.onError);
+        }, config.onError, (chunk) => {
+            var _a;
+            fullReasoning += chunk;
+            (_a = config.onReasoning) === null || _a === void 0 ? void 0 : _a.call(config, chunk);
+        });
     }
 }
@@ -5236,6 +5332,14 @@ class TTSClient {
     async synthesize(config) {
         return this.provider.synthesize(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
     }
+    /**
+     * Synthesize text into speech AND return timestamp alignment (word/sentence
+     * timings). Returns the audio bytes plus an `alignment` object.
+     * @param config - TTS configuration; `granularity` defaults to 'word'.
+     */
+    async synthesizeWithTimestamps(config) {
+        return this.provider.synthesizeWithTimestamps(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
+    }
     /**
      * Synthesize text into speech and return it as a Blob (browser-friendly)
      * @param config - Full TTS configuration