playkit-sdk 1.3.0 → 1.4.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -413,6 +413,8 @@ interface SDKConfig {
413
413
  defaultImageModel?: string;
414
414
  /** Default transcription model to use */
415
415
  defaultTranscriptionModel?: string;
416
+ /** Default text-to-speech model to use */
417
+ defaultTTSModel?: string;
416
418
  /**
417
419
  * Enable debug logging
418
420
  * @deprecated Use `logging.level` instead. Will be removed in v2.0.
@@ -876,6 +878,112 @@ interface TranscriptionResponse {
876
878
  segments?: TranscriptionSegment[];
877
879
  }
878
880
 
881
+ /**
882
+ * Text-to-speech (TTS) type definitions
883
+ */
884
+ /** One voice in a `voiceMix` blend. */
885
+ interface VoiceMixEntry {
886
+ /** Voice id to include in the blend. */
887
+ voice: string;
888
+ /** Relative weight, integer 1–100. */
889
+ weight: number;
890
+ }
891
+ /** Neutral voice tuning knobs (ElevenLabs-style nested object). */
892
+ interface VoiceSettings {
893
+ /** Playback speed multiplier (0.5–2). */
894
+ speed?: number;
895
+ /** Volume (0–10). */
896
+ volume?: number;
897
+ /** Pitch adjustment (-12–12). */
898
+ pitch?: number;
899
+ /** Emotion, e.g. 'happy' | 'sad' | 'calm'. */
900
+ emotion?: string;
901
+ }
902
+ /**
903
+ * Configuration for text-to-speech requests.
904
+ *
905
+ * Inline markup in `text` is supported: pause via `[pause 1.5s]` or
906
+ * `<break time="1.5s"/>`, and interjections via `[laughs]` / `[breath]` (on
907
+ * models that support them). These are translated server-side.
908
+ */
909
+ interface TTSConfig {
910
+ /** Text to synthesize into speech (max 10000 characters). */
911
+ text: string;
912
+ /**
913
+ * Model to use for synthesis.
914
+ * Defaults to 'default-tts-model' (alias resolved by the backend).
915
+ */
916
+ model?: string;
917
+ /** Voice id to use (e.g., 'male-qn-qingse'). Mutually exclusive with `voiceMix`. */
918
+ voice?: string;
919
+ /**
920
+ * Blend multiple voices (1–4 entries, weights 1–100). Mutually exclusive with
921
+ * `voice`.
922
+ */
923
+ voiceMix?: VoiceMixEntry[];
924
+ /** Neutral voice tuning (speed/volume/pitch/emotion). */
925
+ voiceSettings?: VoiceSettings;
926
+ /**
927
+ * Output audio format, ElevenLabs-style: `{codec}_{sampleRate}_{bitrateKbps}`.
928
+ * Examples: 'mp3', 'mp3_44100_128', 'pcm_24000', 'wav', 'flac_44100', 'opus'.
929
+ */
930
+ outputFormat?: string;
931
+ /** Language hint to improve pronunciation, e.g. 'Chinese' | 'English' | 'auto'. */
932
+ language?: string;
933
+ /**
934
+ * Escape hatch for advanced provider-specific fields not modeled above
935
+ * (e.g. voice effects, pronunciation dictionaries).
936
+ */
937
+ providerOptions?: Record<string, unknown>;
938
+ }
939
+ /**
940
+ * Options for simplified text-to-speech methods (everything except the text)
941
+ */
942
+ type TTSOptions = Omit<TTSConfig, 'text'>;
943
+ /** Config for `synthesizeWithTimestamps`: adds subtitle granularity. */
944
+ interface TTSTimestampsConfig extends TTSConfig {
945
+ /** Timestamp granularity; defaults to 'word'. */
946
+ granularity?: 'sentence' | 'word';
947
+ }
948
+ /**
949
+ * Result of a text-to-speech request
950
+ */
951
+ interface TTSResult {
952
+ /** Raw audio bytes. */
953
+ audio: ArrayBuffer;
954
+ /** Audio format / content type (e.g., 'audio/mpeg' or 'mp3'). */
955
+ format: string;
956
+ /** Number of characters billed for this request. */
957
+ usageCharacters: number;
958
+ /** Length of the generated audio in milliseconds (if reported). */
959
+ audioLengthMs?: number;
960
+ }
961
+ /** One timed unit (word or sentence) in an {@link Alignment}. */
962
+ interface AlignmentItem {
963
+ /** The spoken text of this unit. */
964
+ text: string;
965
+ /** Start time in milliseconds. */
966
+ startMs: number;
967
+ /** End time in milliseconds. */
968
+ endMs: number;
969
+ /** Character offset of this unit's start in the input text (if reported). */
970
+ textStart?: number;
971
+ /** Character offset of this unit's end in the input text (if reported). */
972
+ textEnd?: number;
973
+ }
974
+ /** Timestamp alignment for synthesized speech. */
975
+ interface Alignment {
976
+ /** 'word' | 'sentence' — the granularity of `items`. */
977
+ granularity: string;
978
+ /** Timed units in order. */
979
+ items: AlignmentItem[];
980
+ }
981
+ /** Result of `synthesizeWithTimestamps`: audio plus timestamp alignment. */
982
+ interface TTSTimestampsResult extends TTSResult {
983
+ /** Word/sentence timings, or null if unavailable. */
984
+ alignment: Alignment | null;
985
+ }
986
+
879
987
  /**
880
988
  * Device Authorization Flow Manager
881
989
  * Manages Device Auth polling flow for desktop/CLI/Unity applications
@@ -1911,6 +2019,76 @@ declare class TranscriptionClient {
1911
2019
  transcribeFile(file: File, options?: TranscriptionOptions): Promise<TranscriptionResult>;
1912
2020
  }
1913
2021
 
2022
+ /**
2023
+ * TTS provider for HTTP communication with the text-to-speech API
2024
+ */
2025
+
2026
+ declare class TTSProvider {
2027
+ private authManager;
2028
+ private config;
2029
+ private baseURL;
2030
+ private playerClient?;
2031
+ constructor(authManager: AuthManager, config: SDKConfig);
2032
+ /**
2033
+ * Set player client for balance checking
2034
+ */
2035
+ setPlayerClient(playerClient: PlayerClient): void;
2036
+ /** Build the shared request body from a TTS config (new fields + legacy). */
2037
+ private buildRequestBody;
2038
+ /** POST to a TTS endpoint; throws a PlayKitError on a non-ok response. */
2039
+ private post;
2040
+ private checkBalanceAfter;
2041
+ /**
2042
+ * Synthesize text into speech audio (raw bytes).
2043
+ */
2044
+ synthesize(ttsConfig: TTSConfig): Promise<TTSResult>;
2045
+ /**
2046
+ * Synthesize text into speech AND return timestamp alignment. Hits the
2047
+ * `speech-with-timestamps` variant, whose success response is a JSON envelope
2048
+ * (base64 audio + alignment), so it is parsed as JSON — not raw bytes.
2049
+ */
2050
+ synthesizeWithTimestamps(ttsConfig: TTSTimestampsConfig): Promise<TTSTimestampsResult>;
2051
+ }
2052
+
2053
+ /**
2054
+ * High-level client for text-to-speech synthesis
2055
+ */
2056
+
2057
+ declare class TTSClient {
2058
+ private provider;
2059
+ private model;
2060
+ constructor(provider: TTSProvider, model?: string);
2061
+ /**
2062
+ * Get the current model name
2063
+ */
2064
+ get modelName(): string;
2065
+ /**
2066
+ * Synthesize text into speech audio
2067
+ * @param config - Full TTS configuration
2068
+ * @returns TTS result containing raw audio bytes and usage metadata
2069
+ */
2070
+ synthesize(config: TTSConfig): Promise<TTSResult>;
2071
+ /**
2072
+ * Synthesize text into speech AND return timestamp alignment (word/sentence
2073
+ * timings). Returns the audio bytes plus an `alignment` object.
2074
+ * @param config - TTS configuration; `granularity` defaults to 'word'.
2075
+ */
2076
+ synthesizeWithTimestamps(config: TTSTimestampsConfig): Promise<TTSTimestampsResult>;
2077
+ /**
2078
+ * Synthesize text into speech and return it as a Blob (browser-friendly)
2079
+ * @param config - Full TTS configuration
2080
+ * @returns Audio Blob with the appropriate MIME type
2081
+ */
2082
+ synthesizeToBlob(config: TTSConfig): Promise<Blob>;
2083
+ /**
2084
+ * Synthesize text into speech and return an object URL (browser only)
2085
+ * @param config - Full TTS configuration
2086
+ * @returns An object URL that can be assigned to an <audio> element
2087
+ * @throws PlayKitError if URL.createObjectURL is unavailable (e.g. Node.js)
2088
+ */
2089
+ synthesizeToObjectURL(config: TTSConfig): Promise<string>;
2090
+ }
2091
+
1914
2092
  /**
1915
2093
  * NPC Client for simplified conversation management
1916
2094
  * Automatically handles conversation history
@@ -2359,6 +2537,7 @@ declare class PlayKitSDK extends EventEmitter {
2359
2537
  private chatProvider;
2360
2538
  private imageProvider;
2361
2539
  private transcriptionProvider;
2540
+ private ttsProvider;
2362
2541
  private contextManager;
2363
2542
  private schemaLibrary;
2364
2543
  private initialized;
@@ -2428,6 +2607,11 @@ declare class PlayKitSDK extends EventEmitter {
2428
2607
  * @param model - Transcription model to use (default: 'whisper-large')
2429
2608
  */
2430
2609
  createTranscriptionClient(model?: string): TranscriptionClient;
2610
+ /**
2611
+ * Create a TTS client for text-to-speech
2612
+ * @param model - TTS model to use (default: 'default-tts-model')
2613
+ */
2614
+ createTTSClient(model?: string): TTSClient;
2431
2615
  /**
2432
2616
  * Create an NPC client
2433
2617
  * Automatically registers with AIContextManager
@@ -2957,5 +3141,5 @@ declare global {
2957
3141
  }
2958
3142
  }
2959
3143
 
2960
- export { AIContextManager, AuthFlowManager, AuthManager, BrowserStorage, BufferLogHandler, CallbackLogHandler, ChatClient, DeviceAuthFlowManager, ImageClient, LogLevel, Logger, MemoryStorage, NPCClient, PlayKitError, PlayKitSDK, PlayerClient, RechargeManager, SchemaLibrary, StreamParser, TokenStorage, TokenValidator, TranscriptionClient, createMultimodalMessage, createStorage, createTextMessage, PlayKitSDK as default, defaultContextManager, defaultSchemaLibrary, defaultTokenValidator, isLocalStorageAvailable };
2961
- export type { AIContextManagerConfig, AIContextManagerEvents, APIResult, AudioContentPart, AuthState, ChatCompletionResponse, ChatConfig, ChatResult, ChatStreamConfig, ChatWithToolsConfig, ChatWithToolsStreamConfig, ConversationSaveData, DeveloperTokenFallbackConfig, DeviceAuthFlowOptions, DeviceAuthInitResult, DeviceAuthResult, GameInfo, GeneratedImage, IStorage, ImageContentPart, ImageGenerationConfig, ImageGenerationResponse, ImageInput, ImageSize, LogConfig, LogEntry, LogHandler, MemoryEntry, Message, MessageContent, MessageContentPart, MessageRole, NPCConfig, PlayerInfo, RechargeConfig, RechargeEvents, RechargeModalOptions, SDKConfig, SDKMode, SchemaEntry, SetNicknameRequest, SetNicknameResponse, StreamChunk, StructuredGenerationConfig, StructuredOutputConfig, StructuredResult, TextContentPart, TokenRefreshResult, TokenScope, TokenStorageOptions, TokenValidatorOptions, TokenVerificationResult, TranscriptionConfig, TranscriptionOptions, TranscriptionResult, TranscriptionSegment, ValidatedPlayerInfo };
3144
+ export { AIContextManager, AuthFlowManager, AuthManager, BrowserStorage, BufferLogHandler, CallbackLogHandler, ChatClient, DeviceAuthFlowManager, ImageClient, LogLevel, Logger, MemoryStorage, NPCClient, PlayKitError, PlayKitSDK, PlayerClient, RechargeManager, SchemaLibrary, StreamParser, TTSClient, TokenStorage, TokenValidator, TranscriptionClient, createMultimodalMessage, createStorage, createTextMessage, PlayKitSDK as default, defaultContextManager, defaultSchemaLibrary, defaultTokenValidator, isLocalStorageAvailable };
3145
+ export type { AIContextManagerConfig, AIContextManagerEvents, APIResult, Alignment, AlignmentItem, AudioContentPart, AuthState, ChatCompletionResponse, ChatConfig, ChatResult, ChatStreamConfig, ChatWithToolsConfig, ChatWithToolsStreamConfig, ConversationSaveData, DeveloperTokenFallbackConfig, DeviceAuthFlowOptions, DeviceAuthInitResult, DeviceAuthResult, GameInfo, GeneratedImage, IStorage, ImageContentPart, ImageGenerationConfig, ImageGenerationResponse, ImageInput, ImageSize, LogConfig, LogEntry, LogHandler, MemoryEntry, Message, MessageContent, MessageContentPart, MessageRole, NPCConfig, PlayerInfo, RechargeConfig, RechargeEvents, RechargeModalOptions, SDKConfig, SDKMode, SchemaEntry, SetNicknameRequest, SetNicknameResponse, StreamChunk, StructuredGenerationConfig, StructuredOutputConfig, StructuredResult, TTSConfig, TTSOptions, TTSResult, TTSTimestampsConfig, TTSTimestampsResult, TextContentPart, TokenRefreshResult, TokenScope, TokenStorageOptions, TokenValidatorOptions, TokenVerificationResult, TranscriptionConfig, TranscriptionOptions, TranscriptionResult, TranscriptionSegment, ValidatedPlayerInfo, VoiceMixEntry, VoiceSettings };
@@ -1,5 +1,5 @@
1
1
  /**
2
- * playkit-sdk v1.3.0
2
+ * playkit-sdk v1.4.0-beta.2
3
3
  * PlayKit SDK for JavaScript
4
4
  * @license SEE LICENSE IN LICENSE
5
5
  */
@@ -826,7 +826,7 @@ class TokenStorage {
826
826
  }
827
827
 
828
828
  const SDK_TYPE = 'Javascript';
829
- const SDK_VERSION = '"1.3.0"';
829
+ const SDK_VERSION = '"1.4.0-beta.2"';
830
830
  function getSDKHeaders() {
831
831
  return {
832
832
  'X-SDK-Type': SDK_TYPE,
@@ -2422,7 +2422,7 @@ DeviceAuthFlowManager.activeInstance = null;
2422
2422
  * Handles JWT exchange and token management
2423
2423
  */
2424
2424
  // @ts-ignore - replaced at build time
2425
- const DEFAULT_BASE_URL$5 = "https://api.playkit.ai";
2425
+ const DEFAULT_BASE_URL$6 = "https://api.playkit.ai";
2426
2426
  const JWT_EXCHANGE_ENDPOINT = '/api/external/exchange-jwt';
2427
2427
  const TOKEN_REFRESH_ENDPOINT = '/api/auth/refresh';
2428
2428
  class AuthManager extends EventEmitter {
@@ -2440,7 +2440,7 @@ class AuthManager extends EventEmitter {
2440
2440
  this.storage = new TokenStorage({
2441
2441
  mode: config.mode === 'server' ? 'server' : 'browser',
2442
2442
  });
2443
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$5;
2443
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$6;
2444
2444
  this.authState = {
2445
2445
  isAuthenticated: false,
2446
2446
  };
@@ -3529,7 +3529,7 @@ class RechargeManager extends EventEmitter {
3529
3529
  * Player client for managing player information and credits
3530
3530
  */
3531
3531
  // @ts-ignore - replaced at build time
3532
- const DEFAULT_BASE_URL$4 = "https://api.playkit.ai";
3532
+ const DEFAULT_BASE_URL$5 = "https://api.playkit.ai";
3533
3533
  const PLAYER_INFO_ENDPOINT = '/api/external/player-info';
3534
3534
  const SET_NICKNAME_ENDPOINT = '/api/external/set-game-player-nickname';
3535
3535
  class PlayerClient extends EventEmitter {
@@ -3541,7 +3541,7 @@ class PlayerClient extends EventEmitter {
3541
3541
  this.balanceCheckInterval = null;
3542
3542
  this.logger = Logger.getLogger('PlayerClient');
3543
3543
  this.authManager = authManager;
3544
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$4;
3544
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$5;
3545
3545
  this.gameId = config.gameId;
3546
3546
  this.rechargeConfig = {
3547
3547
  autoShowBalanceModal: (_a = rechargeConfig.autoShowBalanceModal) !== null && _a !== void 0 ? _a : true,
@@ -3889,12 +3889,12 @@ function contentToString$1(content) {
3889
3889
  return textParts.map(part => part.text).join('');
3890
3890
  }
3891
3891
  // @ts-ignore - replaced at build time
3892
- const DEFAULT_BASE_URL$3 = "https://api.playkit.ai";
3892
+ const DEFAULT_BASE_URL$4 = "https://api.playkit.ai";
3893
3893
  class ChatProvider {
3894
3894
  constructor(authManager, config) {
3895
3895
  this.authManager = authManager;
3896
3896
  this.config = config;
3897
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$3;
3897
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$4;
3898
3898
  }
3899
3899
  /**
3900
3900
  * Set player client for balance checking
@@ -4221,12 +4221,12 @@ class ChatProvider {
4221
4221
  * Image generation provider for HTTP communication with image API
4222
4222
  */
4223
4223
  // @ts-ignore - replaced at build time
4224
- const DEFAULT_BASE_URL$2 = "https://api.playkit.ai";
4224
+ const DEFAULT_BASE_URL$3 = "https://api.playkit.ai";
4225
4225
  class ImageProvider {
4226
4226
  constructor(authManager, config) {
4227
4227
  this.authManager = authManager;
4228
4228
  this.config = config;
4229
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$2;
4229
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$3;
4230
4230
  }
4231
4231
  /**
4232
4232
  * Set player client for balance checking
@@ -4310,12 +4310,12 @@ class ImageProvider {
4310
4310
  * Transcription provider for HTTP communication with audio transcription API
4311
4311
  */
4312
4312
  // @ts-ignore - replaced at build time
4313
- const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
4313
+ const DEFAULT_BASE_URL$2 = "https://api.playkit.ai";
4314
4314
  class TranscriptionProvider {
4315
4315
  constructor(authManager, config) {
4316
4316
  this.authManager = authManager;
4317
4317
  this.config = config;
4318
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$1;
4318
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$2;
4319
4319
  }
4320
4320
  /**
4321
4321
  * Set player client for balance checking
@@ -4409,6 +4409,171 @@ class TranscriptionProvider {
4409
4409
  }
4410
4410
  }
4411
4411
 
4412
+ /**
4413
+ * TTS provider for HTTP communication with the text-to-speech API
4414
+ */
4415
+ // @ts-ignore - replaced at build time
4416
+ const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
4417
+ /** Decode a base64 string to an ArrayBuffer (browser + Node). */
4418
+ function base64ToArrayBuffer(b64) {
4419
+ if (typeof atob === 'function') {
4420
+ const bin = atob(b64);
4421
+ const bytes = new Uint8Array(bin.length);
4422
+ for (let i = 0; i < bin.length; i++)
4423
+ bytes[i] = bin.charCodeAt(i);
4424
+ return bytes.buffer;
4425
+ }
4426
+ // Node fallback
4427
+ const buf = globalThis.Buffer.from(b64, 'base64');
4428
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
4429
+ }
4430
+ class TTSProvider {
4431
+ constructor(authManager, config) {
4432
+ this.authManager = authManager;
4433
+ this.config = config;
4434
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$1;
4435
+ }
4436
+ /**
4437
+ * Set player client for balance checking
4438
+ */
4439
+ setPlayerClient(playerClient) {
4440
+ this.playerClient = playerClient;
4441
+ }
4442
+ /** Build the shared request body from a TTS config (new fields + legacy). */
4443
+ buildRequestBody(ttsConfig) {
4444
+ const model = ttsConfig.model || this.config.defaultTTSModel || 'default-tts-model';
4445
+ const body = { model, text: ttsConfig.text };
4446
+ if (ttsConfig.voice !== undefined)
4447
+ body.voice = ttsConfig.voice;
4448
+ if (ttsConfig.voiceMix !== undefined)
4449
+ body.voice_mix = ttsConfig.voiceMix;
4450
+ if (ttsConfig.voiceSettings !== undefined) {
4451
+ body.voice_settings = ttsConfig.voiceSettings;
4452
+ }
4453
+ if (ttsConfig.outputFormat !== undefined) {
4454
+ body.output_format = ttsConfig.outputFormat;
4455
+ }
4456
+ if (ttsConfig.language !== undefined)
4457
+ body.language = ttsConfig.language;
4458
+ if (ttsConfig.providerOptions !== undefined) {
4459
+ body.provider_options = ttsConfig.providerOptions;
4460
+ }
4461
+ return body;
4462
+ }
4463
+ /** POST to a TTS endpoint; throws a PlayKitError on a non-ok response. */
4464
+ async post(endpoint, body) {
4465
+ await this.authManager.ensureValidToken();
4466
+ const token = this.authManager.getToken();
4467
+ if (!token) {
4468
+ throw new PlayKitError('Not authenticated', 'NOT_AUTHENTICATED');
4469
+ }
4470
+ const response = await fetch(`${this.baseURL}${endpoint}`, {
4471
+ method: 'POST',
4472
+ headers: Object.assign({ Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, getSDKHeaders()),
4473
+ body: JSON.stringify(body),
4474
+ });
4475
+ if (!response.ok) {
4476
+ const error = await response
4477
+ .json()
4478
+ .catch(() => ({ message: 'Speech synthesis failed' }));
4479
+ const playKitError = new PlayKitError(error.message || 'Speech synthesis failed', error.code, response.status);
4480
+ if (error.code === 'INSUFFICIENT_CREDITS' ||
4481
+ error.code === 'PLAYER_INSUFFICIENT_CREDIT' ||
4482
+ response.status === 402) {
4483
+ if (this.playerClient) {
4484
+ await this.playerClient.handleInsufficientCredits(playKitError);
4485
+ }
4486
+ }
4487
+ throw playKitError;
4488
+ }
4489
+ return response;
4490
+ }
4491
+ checkBalanceAfter() {
4492
+ if (this.playerClient) {
4493
+ this.playerClient.checkBalanceAfterApiCall().catch(() => {
4494
+ /* silently fail */
4495
+ });
4496
+ }
4497
+ }
4498
+ /**
4499
+ * Synthesize text into speech audio (raw bytes).
4500
+ */
4501
+ async synthesize(ttsConfig) {
4502
+ const endpoint = `/ai/${this.config.gameId}/v2/audio/speech`;
4503
+ try {
4504
+ const response = await this.post(endpoint, this.buildRequestBody(ttsConfig));
4505
+ // SUCCESS: response is raw audio bytes, NOT JSON.
4506
+ const audio = await response.arrayBuffer();
4507
+ const contentType = response.headers.get('Content-Type');
4508
+ const usageHeader = response.headers.get('X-Usage-Characters');
4509
+ const audioLengthHeader = response.headers.get('X-Audio-Length-Ms');
4510
+ const result = {
4511
+ audio,
4512
+ format: contentType || 'mp3',
4513
+ usageCharacters: Number(usageHeader) || 0,
4514
+ };
4515
+ if (audioLengthHeader !== null) {
4516
+ result.audioLengthMs = Number(audioLengthHeader) || 0;
4517
+ }
4518
+ this.checkBalanceAfter();
4519
+ return result;
4520
+ }
4521
+ catch (error) {
4522
+ if (error instanceof PlayKitError)
4523
+ throw error;
4524
+ throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
4525
+ }
4526
+ }
4527
+ /**
4528
+ * Synthesize text into speech AND return timestamp alignment. Hits the
4529
+ * `speech-with-timestamps` variant, whose success response is a JSON envelope
4530
+ * (base64 audio + alignment), so it is parsed as JSON — not raw bytes.
4531
+ */
4532
+ async synthesizeWithTimestamps(ttsConfig) {
4533
+ const endpoint = `/ai/${this.config.gameId}/v2/audio/speech-with-timestamps`;
4534
+ const body = this.buildRequestBody(ttsConfig);
4535
+ if (ttsConfig.granularity !== undefined) {
4536
+ body.subtitle_type = ttsConfig.granularity;
4537
+ }
4538
+ try {
4539
+ const response = await this.post(endpoint, body);
4540
+ const json = (await response.json());
4541
+ let alignment = null;
4542
+ if (json.alignment && Array.isArray(json.alignment.items)) {
4543
+ alignment = {
4544
+ granularity: json.alignment.granularity || 'word',
4545
+ items: json.alignment.items.map((it) => {
4546
+ var _a, _b, _c;
4547
+ return ({
4548
+ text: (_a = it.text) !== null && _a !== void 0 ? _a : '',
4549
+ startMs: (_b = it.start_ms) !== null && _b !== void 0 ? _b : 0,
4550
+ endMs: (_c = it.end_ms) !== null && _c !== void 0 ? _c : 0,
4551
+ textStart: it.text_start,
4552
+ textEnd: it.text_end,
4553
+ });
4554
+ }),
4555
+ };
4556
+ }
4557
+ const result = {
4558
+ audio: base64ToArrayBuffer(json.audio_base64),
4559
+ format: json.format || 'mp3',
4560
+ usageCharacters: Number(json.usage_characters) || 0,
4561
+ alignment,
4562
+ };
4563
+ if (json.audio_length_ms != null) {
4564
+ result.audioLengthMs = Number(json.audio_length_ms) || 0;
4565
+ }
4566
+ this.checkBalanceAfter();
4567
+ return result;
4568
+ }
4569
+ catch (error) {
4570
+ if (error instanceof PlayKitError)
4571
+ throw error;
4572
+ throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
4573
+ }
4574
+ }
4575
+ }
4576
+
4412
4577
  /******************************************************************************
4413
4578
  Copyright (c) Microsoft Corporation.
4414
4579
 
@@ -5073,6 +5238,88 @@ class TranscriptionClient {
5073
5238
  }
5074
5239
  }
5075
5240
 
5241
+ /**
5242
+ * High-level client for text-to-speech synthesis
5243
+ */
5244
+ /**
5245
+ * Resolve an audio format/content-type string into a valid MIME type.
5246
+ * The provider's `result.format` may be a full MIME (e.g. 'audio/mpeg' from
5247
+ * the Content-Type header) or a bare token (e.g. 'mp3' from the fallback).
5248
+ * Full MIME strings pass through; bare tokens are mapped.
5249
+ */
5250
+ function contentTypeFor(format) {
5251
+ if (format.includes('/')) {
5252
+ return format;
5253
+ }
5254
+ switch (format.toLowerCase()) {
5255
+ case 'mp3':
5256
+ return 'audio/mpeg';
5257
+ case 'wav':
5258
+ return 'audio/wav';
5259
+ case 'ogg':
5260
+ return 'audio/ogg';
5261
+ case 'flac':
5262
+ return 'audio/flac';
5263
+ case 'aac':
5264
+ return 'audio/aac';
5265
+ case 'pcm':
5266
+ return 'audio/pcm';
5267
+ default:
5268
+ return 'audio/mpeg';
5269
+ }
5270
+ }
5271
+ class TTSClient {
5272
+ constructor(provider, model) {
5273
+ this.provider = provider;
5274
+ this.model = model || 'default-tts-model';
5275
+ }
5276
+ /**
5277
+ * Get the current model name
5278
+ */
5279
+ get modelName() {
5280
+ return this.model;
5281
+ }
5282
+ /**
5283
+ * Synthesize text into speech audio
5284
+ * @param config - Full TTS configuration
5285
+ * @returns TTS result containing raw audio bytes and usage metadata
5286
+ */
5287
+ async synthesize(config) {
5288
+ return this.provider.synthesize(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
5289
+ }
5290
+ /**
5291
+ * Synthesize text into speech AND return timestamp alignment (word/sentence
5292
+ * timings). Returns the audio bytes plus an `alignment` object.
5293
+ * @param config - TTS configuration; `granularity` defaults to 'word'.
5294
+ */
5295
+ async synthesizeWithTimestamps(config) {
5296
+ return this.provider.synthesizeWithTimestamps(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
5297
+ }
5298
+ /**
5299
+ * Synthesize text into speech and return it as a Blob (browser-friendly)
5300
+ * @param config - Full TTS configuration
5301
+ * @returns Audio Blob with the appropriate MIME type
5302
+ */
5303
+ async synthesizeToBlob(config) {
5304
+ const result = await this.synthesize(config);
5305
+ return new Blob([result.audio], { type: contentTypeFor(result.format) });
5306
+ }
5307
+ /**
5308
+ * Synthesize text into speech and return an object URL (browser only)
5309
+ * @param config - Full TTS configuration
5310
+ * @returns An object URL that can be assigned to an <audio> element
5311
+ * @throws PlayKitError if URL.createObjectURL is unavailable (e.g. Node.js)
5312
+ */
5313
+ async synthesizeToObjectURL(config) {
5314
+ if (typeof URL === 'undefined' || typeof URL.createObjectURL !== 'function') {
5315
+ throw new PlayKitError('URL.createObjectURL is not available in this environment. ' +
5316
+ 'Use synthesize() to access the raw audio bytes instead.', 'TTS_OBJECT_URL_UNAVAILABLE');
5317
+ }
5318
+ const blob = await this.synthesizeToBlob(config);
5319
+ return URL.createObjectURL(blob);
5320
+ }
5321
+ }
5322
+
5076
5323
  /**
5077
5324
  * Global AI Context Manager for managing NPC conversations and player context.
5078
5325
  *
@@ -6396,10 +6643,12 @@ class PlayKitSDK extends EventEmitter {
6396
6643
  this.chatProvider = new ChatProvider(this.authManager, this.config);
6397
6644
  this.imageProvider = new ImageProvider(this.authManager, this.config);
6398
6645
  this.transcriptionProvider = new TranscriptionProvider(this.authManager, this.config);
6646
+ this.ttsProvider = new TTSProvider(this.authManager, this.config);
6399
6647
  // Connect providers to player client for balance checking
6400
6648
  this.chatProvider.setPlayerClient(this.playerClient);
6401
6649
  this.imageProvider.setPlayerClient(this.playerClient);
6402
6650
  this.transcriptionProvider.setPlayerClient(this.playerClient);
6651
+ this.ttsProvider.setPlayerClient(this.playerClient);
6403
6652
  // Initialize AI context manager
6404
6653
  this.contextManager = new AIContextManager(this.config.aiContext);
6405
6654
  // Set chat client factory for compaction
@@ -6648,6 +6897,14 @@ class PlayKitSDK extends EventEmitter {
6648
6897
  this.ensureInitialized();
6649
6898
  return new TranscriptionClient(this.transcriptionProvider, model || this.config.defaultTranscriptionModel);
6650
6899
  }
6900
+ /**
6901
+ * Create a TTS client for text-to-speech
6902
+ * @param model - TTS model to use (default: 'default-tts-model')
6903
+ */
6904
+ createTTSClient(model) {
6905
+ this.ensureInitialized();
6906
+ return new TTSClient(this.ttsProvider, model || this.config.defaultTTSModel);
6907
+ }
6651
6908
  /**
6652
6909
  * Create an NPC client
6653
6910
  * Automatically registers with AIContextManager
@@ -7038,5 +7295,5 @@ class TokenValidator {
7038
7295
  */
7039
7296
  const defaultTokenValidator = new TokenValidator();
7040
7297
 
7041
- export { AIContextManager, AuthFlowManager, AuthManager, BrowserStorage, BufferLogHandler, CallbackLogHandler, ChatClient, DeviceAuthFlowManager, ImageClient, LogLevel, Logger, MemoryStorage, NPCClient, PlayKitSDK, PlayerClient, RechargeManager, SchemaLibrary, StreamParser, TokenStorage, TokenValidator, TranscriptionClient, createMultimodalMessage, createStorage, createTextMessage, PlayKitSDK as default, defaultContextManager, defaultSchemaLibrary, defaultTokenValidator, isLocalStorageAvailable };
7298
+ export { AIContextManager, AuthFlowManager, AuthManager, BrowserStorage, BufferLogHandler, CallbackLogHandler, ChatClient, DeviceAuthFlowManager, ImageClient, LogLevel, Logger, MemoryStorage, NPCClient, PlayKitSDK, PlayerClient, RechargeManager, SchemaLibrary, StreamParser, TTSClient, TokenStorage, TokenValidator, TranscriptionClient, createMultimodalMessage, createStorage, createTextMessage, PlayKitSDK as default, defaultContextManager, defaultSchemaLibrary, defaultTokenValidator, isLocalStorageAvailable };
7042
7299
  //# sourceMappingURL=playkit-sdk.esm.js.map