playkit-sdk 1.3.0 → 1.4.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -83,6 +83,26 @@ const imgElement = await image.toHTMLImage();
83
83
  document.body.appendChild(imgElement);
84
84
  ```
85
85
 
86
+ ### Text-to-Speech (TTS)
87
+
88
+ ```typescript
89
+ const tts = sdk.createTTSClient(); // defaults to 'default-tts-model'
90
+
91
+ // Get raw audio bytes plus usage metadata
92
+ const result = await tts.synthesize({
93
+ text: 'Welcome to the game, brave adventurer!',
94
+ voice: 'male-qn-qingse',
95
+ format: 'mp3',
96
+ });
97
+ console.log('Characters billed:', result.usageCharacters);
98
+ console.log('Audio length (ms):', result.audioLengthMs);
99
+
100
+ // Or get a playable object URL directly (browser)
101
+ const url = await tts.synthesizeToObjectURL({ text: 'Hello there!' });
102
+ const audio = new Audio(url);
103
+ audio.play();
104
+ ```
105
+
86
106
  ### NPC Conversations
87
107
 
88
108
  ```typescript
@@ -1,5 +1,5 @@
1
1
  /**
2
- * playkit-sdk v1.3.0
2
+ * playkit-sdk v1.4.0-beta.2
3
3
  * PlayKit SDK for JavaScript
4
4
  * @license SEE LICENSE IN LICENSE
5
5
  */
@@ -830,7 +830,7 @@ class TokenStorage {
830
830
  }
831
831
 
832
832
  const SDK_TYPE = 'Javascript';
833
- const SDK_VERSION = '"1.3.0"';
833
+ const SDK_VERSION = '"1.4.0-beta.2"';
834
834
  function getSDKHeaders() {
835
835
  return {
836
836
  'X-SDK-Type': SDK_TYPE,
@@ -2426,7 +2426,7 @@ DeviceAuthFlowManager.activeInstance = null;
2426
2426
  * Handles JWT exchange and token management
2427
2427
  */
2428
2428
  // @ts-ignore - replaced at build time
2429
- const DEFAULT_BASE_URL$5 = "https://api.playkit.ai";
2429
+ const DEFAULT_BASE_URL$6 = "https://api.playkit.ai";
2430
2430
  const JWT_EXCHANGE_ENDPOINT = '/api/external/exchange-jwt';
2431
2431
  const TOKEN_REFRESH_ENDPOINT = '/api/auth/refresh';
2432
2432
  class AuthManager extends EventEmitter {
@@ -2444,7 +2444,7 @@ class AuthManager extends EventEmitter {
2444
2444
  this.storage = new TokenStorage({
2445
2445
  mode: config.mode === 'server' ? 'server' : 'browser',
2446
2446
  });
2447
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$5;
2447
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$6;
2448
2448
  this.authState = {
2449
2449
  isAuthenticated: false,
2450
2450
  };
@@ -3533,7 +3533,7 @@ class RechargeManager extends EventEmitter {
3533
3533
  * Player client for managing player information and credits
3534
3534
  */
3535
3535
  // @ts-ignore - replaced at build time
3536
- const DEFAULT_BASE_URL$4 = "https://api.playkit.ai";
3536
+ const DEFAULT_BASE_URL$5 = "https://api.playkit.ai";
3537
3537
  const PLAYER_INFO_ENDPOINT = '/api/external/player-info';
3538
3538
  const SET_NICKNAME_ENDPOINT = '/api/external/set-game-player-nickname';
3539
3539
  class PlayerClient extends EventEmitter {
@@ -3545,7 +3545,7 @@ class PlayerClient extends EventEmitter {
3545
3545
  this.balanceCheckInterval = null;
3546
3546
  this.logger = Logger.getLogger('PlayerClient');
3547
3547
  this.authManager = authManager;
3548
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$4;
3548
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$5;
3549
3549
  this.gameId = config.gameId;
3550
3550
  this.rechargeConfig = {
3551
3551
  autoShowBalanceModal: (_a = rechargeConfig.autoShowBalanceModal) !== null && _a !== void 0 ? _a : true,
@@ -3893,12 +3893,12 @@ function contentToString$1(content) {
3893
3893
  return textParts.map(part => part.text).join('');
3894
3894
  }
3895
3895
  // @ts-ignore - replaced at build time
3896
- const DEFAULT_BASE_URL$3 = "https://api.playkit.ai";
3896
+ const DEFAULT_BASE_URL$4 = "https://api.playkit.ai";
3897
3897
  class ChatProvider {
3898
3898
  constructor(authManager, config) {
3899
3899
  this.authManager = authManager;
3900
3900
  this.config = config;
3901
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$3;
3901
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$4;
3902
3902
  }
3903
3903
  /**
3904
3904
  * Set player client for balance checking
@@ -4225,12 +4225,12 @@ class ChatProvider {
4225
4225
  * Image generation provider for HTTP communication with image API
4226
4226
  */
4227
4227
  // @ts-ignore - replaced at build time
4228
- const DEFAULT_BASE_URL$2 = "https://api.playkit.ai";
4228
+ const DEFAULT_BASE_URL$3 = "https://api.playkit.ai";
4229
4229
  class ImageProvider {
4230
4230
  constructor(authManager, config) {
4231
4231
  this.authManager = authManager;
4232
4232
  this.config = config;
4233
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$2;
4233
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$3;
4234
4234
  }
4235
4235
  /**
4236
4236
  * Set player client for balance checking
@@ -4314,12 +4314,12 @@ class ImageProvider {
4314
4314
  * Transcription provider for HTTP communication with audio transcription API
4315
4315
  */
4316
4316
  // @ts-ignore - replaced at build time
4317
- const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
4317
+ const DEFAULT_BASE_URL$2 = "https://api.playkit.ai";
4318
4318
  class TranscriptionProvider {
4319
4319
  constructor(authManager, config) {
4320
4320
  this.authManager = authManager;
4321
4321
  this.config = config;
4322
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$1;
4322
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$2;
4323
4323
  }
4324
4324
  /**
4325
4325
  * Set player client for balance checking
@@ -4413,6 +4413,171 @@ class TranscriptionProvider {
4413
4413
  }
4414
4414
  }
4415
4415
 
4416
+ /**
4417
+ * TTS provider for HTTP communication with the text-to-speech API
4418
+ */
4419
+ // @ts-ignore - replaced at build time
4420
+ const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
4421
+ /** Decode a base64 string to an ArrayBuffer (browser + Node). */
4422
+ function base64ToArrayBuffer(b64) {
4423
+ if (typeof atob === 'function') {
4424
+ const bin = atob(b64);
4425
+ const bytes = new Uint8Array(bin.length);
4426
+ for (let i = 0; i < bin.length; i++)
4427
+ bytes[i] = bin.charCodeAt(i);
4428
+ return bytes.buffer;
4429
+ }
4430
+ // Node fallback
4431
+ const buf = globalThis.Buffer.from(b64, 'base64');
4432
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
4433
+ }
4434
+ class TTSProvider {
4435
+ constructor(authManager, config) {
4436
+ this.authManager = authManager;
4437
+ this.config = config;
4438
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$1;
4439
+ }
4440
+ /**
4441
+ * Set player client for balance checking
4442
+ */
4443
+ setPlayerClient(playerClient) {
4444
+ this.playerClient = playerClient;
4445
+ }
4446
+ /** Build the shared request body from a TTS config (new fields + legacy). */
4447
+ buildRequestBody(ttsConfig) {
4448
+ const model = ttsConfig.model || this.config.defaultTTSModel || 'default-tts-model';
4449
+ const body = { model, text: ttsConfig.text };
4450
+ if (ttsConfig.voice !== undefined)
4451
+ body.voice = ttsConfig.voice;
4452
+ if (ttsConfig.voiceMix !== undefined)
4453
+ body.voice_mix = ttsConfig.voiceMix;
4454
+ if (ttsConfig.voiceSettings !== undefined) {
4455
+ body.voice_settings = ttsConfig.voiceSettings;
4456
+ }
4457
+ if (ttsConfig.outputFormat !== undefined) {
4458
+ body.output_format = ttsConfig.outputFormat;
4459
+ }
4460
+ if (ttsConfig.language !== undefined)
4461
+ body.language = ttsConfig.language;
4462
+ if (ttsConfig.providerOptions !== undefined) {
4463
+ body.provider_options = ttsConfig.providerOptions;
4464
+ }
4465
+ return body;
4466
+ }
4467
+ /** POST to a TTS endpoint; throws a PlayKitError on a non-ok response. */
4468
+ async post(endpoint, body) {
4469
+ await this.authManager.ensureValidToken();
4470
+ const token = this.authManager.getToken();
4471
+ if (!token) {
4472
+ throw new PlayKitError('Not authenticated', 'NOT_AUTHENTICATED');
4473
+ }
4474
+ const response = await fetch(`${this.baseURL}${endpoint}`, {
4475
+ method: 'POST',
4476
+ headers: Object.assign({ Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, getSDKHeaders()),
4477
+ body: JSON.stringify(body),
4478
+ });
4479
+ if (!response.ok) {
4480
+ const error = await response
4481
+ .json()
4482
+ .catch(() => ({ message: 'Speech synthesis failed' }));
4483
+ const playKitError = new PlayKitError(error.message || 'Speech synthesis failed', error.code, response.status);
4484
+ if (error.code === 'INSUFFICIENT_CREDITS' ||
4485
+ error.code === 'PLAYER_INSUFFICIENT_CREDIT' ||
4486
+ response.status === 402) {
4487
+ if (this.playerClient) {
4488
+ await this.playerClient.handleInsufficientCredits(playKitError);
4489
+ }
4490
+ }
4491
+ throw playKitError;
4492
+ }
4493
+ return response;
4494
+ }
4495
+ checkBalanceAfter() {
4496
+ if (this.playerClient) {
4497
+ this.playerClient.checkBalanceAfterApiCall().catch(() => {
4498
+ /* silently fail */
4499
+ });
4500
+ }
4501
+ }
4502
+ /**
4503
+ * Synthesize text into speech audio (raw bytes).
4504
+ */
4505
+ async synthesize(ttsConfig) {
4506
+ const endpoint = `/ai/${this.config.gameId}/v2/audio/speech`;
4507
+ try {
4508
+ const response = await this.post(endpoint, this.buildRequestBody(ttsConfig));
4509
+ // SUCCESS: response is raw audio bytes, NOT JSON.
4510
+ const audio = await response.arrayBuffer();
4511
+ const contentType = response.headers.get('Content-Type');
4512
+ const usageHeader = response.headers.get('X-Usage-Characters');
4513
+ const audioLengthHeader = response.headers.get('X-Audio-Length-Ms');
4514
+ const result = {
4515
+ audio,
4516
+ format: contentType || 'mp3',
4517
+ usageCharacters: Number(usageHeader) || 0,
4518
+ };
4519
+ if (audioLengthHeader !== null) {
4520
+ result.audioLengthMs = Number(audioLengthHeader) || 0;
4521
+ }
4522
+ this.checkBalanceAfter();
4523
+ return result;
4524
+ }
4525
+ catch (error) {
4526
+ if (error instanceof PlayKitError)
4527
+ throw error;
4528
+ throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
4529
+ }
4530
+ }
4531
+ /**
4532
+ * Synthesize text into speech AND return timestamp alignment. Hits the
4533
+ * `speech-with-timestamps` variant, whose success response is a JSON envelope
4534
+ * (base64 audio + alignment), so it is parsed as JSON — not raw bytes.
4535
+ */
4536
+ async synthesizeWithTimestamps(ttsConfig) {
4537
+ const endpoint = `/ai/${this.config.gameId}/v2/audio/speech-with-timestamps`;
4538
+ const body = this.buildRequestBody(ttsConfig);
4539
+ if (ttsConfig.granularity !== undefined) {
4540
+ body.subtitle_type = ttsConfig.granularity;
4541
+ }
4542
+ try {
4543
+ const response = await this.post(endpoint, body);
4544
+ const json = (await response.json());
4545
+ let alignment = null;
4546
+ if (json.alignment && Array.isArray(json.alignment.items)) {
4547
+ alignment = {
4548
+ granularity: json.alignment.granularity || 'word',
4549
+ items: json.alignment.items.map((it) => {
4550
+ var _a, _b, _c;
4551
+ return ({
4552
+ text: (_a = it.text) !== null && _a !== void 0 ? _a : '',
4553
+ startMs: (_b = it.start_ms) !== null && _b !== void 0 ? _b : 0,
4554
+ endMs: (_c = it.end_ms) !== null && _c !== void 0 ? _c : 0,
4555
+ textStart: it.text_start,
4556
+ textEnd: it.text_end,
4557
+ });
4558
+ }),
4559
+ };
4560
+ }
4561
+ const result = {
4562
+ audio: base64ToArrayBuffer(json.audio_base64),
4563
+ format: json.format || 'mp3',
4564
+ usageCharacters: Number(json.usage_characters) || 0,
4565
+ alignment,
4566
+ };
4567
+ if (json.audio_length_ms != null) {
4568
+ result.audioLengthMs = Number(json.audio_length_ms) || 0;
4569
+ }
4570
+ this.checkBalanceAfter();
4571
+ return result;
4572
+ }
4573
+ catch (error) {
4574
+ if (error instanceof PlayKitError)
4575
+ throw error;
4576
+ throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
4577
+ }
4578
+ }
4579
+ }
4580
+
4416
4581
  /******************************************************************************
4417
4582
  Copyright (c) Microsoft Corporation.
4418
4583
 
@@ -5077,6 +5242,88 @@ class TranscriptionClient {
5077
5242
  }
5078
5243
  }
5079
5244
 
5245
+ /**
5246
+ * High-level client for text-to-speech synthesis
5247
+ */
5248
+ /**
5249
+ * Resolve an audio format/content-type string into a valid MIME type.
5250
+ * The provider's `result.format` may be a full MIME (e.g. 'audio/mpeg' from
5251
+ * the Content-Type header) or a bare token (e.g. 'mp3' from the fallback).
5252
+ * Full MIME strings pass through; bare tokens are mapped.
5253
+ */
5254
+ function contentTypeFor(format) {
5255
+ if (format.includes('/')) {
5256
+ return format;
5257
+ }
5258
+ switch (format.toLowerCase()) {
5259
+ case 'mp3':
5260
+ return 'audio/mpeg';
5261
+ case 'wav':
5262
+ return 'audio/wav';
5263
+ case 'ogg':
5264
+ return 'audio/ogg';
5265
+ case 'flac':
5266
+ return 'audio/flac';
5267
+ case 'aac':
5268
+ return 'audio/aac';
5269
+ case 'pcm':
5270
+ return 'audio/pcm';
5271
+ default:
5272
+ return 'audio/mpeg';
5273
+ }
5274
+ }
5275
+ class TTSClient {
5276
+ constructor(provider, model) {
5277
+ this.provider = provider;
5278
+ this.model = model || 'default-tts-model';
5279
+ }
5280
+ /**
5281
+ * Get the current model name
5282
+ */
5283
+ get modelName() {
5284
+ return this.model;
5285
+ }
5286
+ /**
5287
+ * Synthesize text into speech audio
5288
+ * @param config - Full TTS configuration
5289
+ * @returns TTS result containing raw audio bytes and usage metadata
5290
+ */
5291
+ async synthesize(config) {
5292
+ return this.provider.synthesize(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
5293
+ }
5294
+ /**
5295
+ * Synthesize text into speech AND return timestamp alignment (word/sentence
5296
+ * timings). Returns the audio bytes plus an `alignment` object.
5297
+ * @param config - TTS configuration; `granularity` defaults to 'word'.
5298
+ */
5299
+ async synthesizeWithTimestamps(config) {
5300
+ return this.provider.synthesizeWithTimestamps(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
5301
+ }
5302
+ /**
5303
+ * Synthesize text into speech and return it as a Blob (browser-friendly)
5304
+ * @param config - Full TTS configuration
5305
+ * @returns Audio Blob with the appropriate MIME type
5306
+ */
5307
+ async synthesizeToBlob(config) {
5308
+ const result = await this.synthesize(config);
5309
+ return new Blob([result.audio], { type: contentTypeFor(result.format) });
5310
+ }
5311
+ /**
5312
+ * Synthesize text into speech and return an object URL (browser only)
5313
+ * @param config - Full TTS configuration
5314
+ * @returns An object URL that can be assigned to an <audio> element
5315
+ * @throws PlayKitError if URL.createObjectURL is unavailable (e.g. Node.js)
5316
+ */
5317
+ async synthesizeToObjectURL(config) {
5318
+ if (typeof URL === 'undefined' || typeof URL.createObjectURL !== 'function') {
5319
+ throw new PlayKitError('URL.createObjectURL is not available in this environment. ' +
5320
+ 'Use synthesize() to access the raw audio bytes instead.', 'TTS_OBJECT_URL_UNAVAILABLE');
5321
+ }
5322
+ const blob = await this.synthesizeToBlob(config);
5323
+ return URL.createObjectURL(blob);
5324
+ }
5325
+ }
5326
+
5080
5327
  /**
5081
5328
  * Global AI Context Manager for managing NPC conversations and player context.
5082
5329
  *
@@ -6400,10 +6647,12 @@ class PlayKitSDK extends EventEmitter {
6400
6647
  this.chatProvider = new ChatProvider(this.authManager, this.config);
6401
6648
  this.imageProvider = new ImageProvider(this.authManager, this.config);
6402
6649
  this.transcriptionProvider = new TranscriptionProvider(this.authManager, this.config);
6650
+ this.ttsProvider = new TTSProvider(this.authManager, this.config);
6403
6651
  // Connect providers to player client for balance checking
6404
6652
  this.chatProvider.setPlayerClient(this.playerClient);
6405
6653
  this.imageProvider.setPlayerClient(this.playerClient);
6406
6654
  this.transcriptionProvider.setPlayerClient(this.playerClient);
6655
+ this.ttsProvider.setPlayerClient(this.playerClient);
6407
6656
  // Initialize AI context manager
6408
6657
  this.contextManager = new AIContextManager(this.config.aiContext);
6409
6658
  // Set chat client factory for compaction
@@ -6652,6 +6901,14 @@ class PlayKitSDK extends EventEmitter {
6652
6901
  this.ensureInitialized();
6653
6902
  return new TranscriptionClient(this.transcriptionProvider, model || this.config.defaultTranscriptionModel);
6654
6903
  }
6904
+ /**
6905
+ * Create a TTS client for text-to-speech
6906
+ * @param model - TTS model to use (default: 'default-tts-model')
6907
+ */
6908
+ createTTSClient(model) {
6909
+ this.ensureInitialized();
6910
+ return new TTSClient(this.ttsProvider, model || this.config.defaultTTSModel);
6911
+ }
6655
6912
  /**
6656
6913
  * Create an NPC client
6657
6914
  * Automatically registers with AIContextManager
@@ -7059,6 +7316,7 @@ exports.PlayerClient = PlayerClient;
7059
7316
  exports.RechargeManager = RechargeManager;
7060
7317
  exports.SchemaLibrary = SchemaLibrary;
7061
7318
  exports.StreamParser = StreamParser;
7319
+ exports.TTSClient = TTSClient;
7062
7320
  exports.TokenStorage = TokenStorage;
7063
7321
  exports.TokenValidator = TokenValidator;
7064
7322
  exports.TranscriptionClient = TranscriptionClient;