playkit-sdk 1.4.0-beta.1 → 1.4.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  /**
2
- * playkit-sdk v1.4.0-beta.1
2
+ * playkit-sdk v1.4.0-beta.2
3
3
  * PlayKit SDK for JavaScript
4
4
  * @license SEE LICENSE IN LICENSE
5
5
  */
@@ -830,7 +830,7 @@ class TokenStorage {
830
830
  }
831
831
 
832
832
  const SDK_TYPE = 'Javascript';
833
- const SDK_VERSION = '"1.4.0-beta.1"';
833
+ const SDK_VERSION = '"1.4.0-beta.2"';
834
834
  function getSDKHeaders() {
835
835
  return {
836
836
  'X-SDK-Type': SDK_TYPE,
@@ -4418,6 +4418,19 @@ class TranscriptionProvider {
4418
4418
  */
4419
4419
  // @ts-ignore - replaced at build time
4420
4420
  const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
4421
+ /** Decode a base64 string to an ArrayBuffer (browser + Node). */
4422
+ function base64ToArrayBuffer(b64) {
4423
+ if (typeof atob === 'function') {
4424
+ const bin = atob(b64);
4425
+ const bytes = new Uint8Array(bin.length);
4426
+ for (let i = 0; i < bin.length; i++)
4427
+ bytes[i] = bin.charCodeAt(i);
4428
+ return bytes.buffer;
4429
+ }
4430
+ // Node fallback
4431
+ const buf = globalThis.Buffer.from(b64, 'base64');
4432
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
4433
+ }
4421
4434
  class TTSProvider {
4422
4435
  constructor(authManager, config) {
4423
4436
  this.authManager = authManager;
@@ -4430,69 +4443,69 @@ class TTSProvider {
4430
4443
  setPlayerClient(playerClient) {
4431
4444
  this.playerClient = playerClient;
4432
4445
  }
4433
- /**
4434
- * Synthesize text into speech audio
4435
- */
4436
- async synthesize(ttsConfig) {
4437
- // Ensure token is valid, auto-refresh if needed (browser mode only)
4446
+ /** Build the shared request body from a TTS config (new fields + legacy). */
4447
+ buildRequestBody(ttsConfig) {
4448
+ const model = ttsConfig.model || this.config.defaultTTSModel || 'default-tts-model';
4449
+ const body = { model, text: ttsConfig.text };
4450
+ if (ttsConfig.voice !== undefined)
4451
+ body.voice = ttsConfig.voice;
4452
+ if (ttsConfig.voiceMix !== undefined)
4453
+ body.voice_mix = ttsConfig.voiceMix;
4454
+ if (ttsConfig.voiceSettings !== undefined) {
4455
+ body.voice_settings = ttsConfig.voiceSettings;
4456
+ }
4457
+ if (ttsConfig.outputFormat !== undefined) {
4458
+ body.output_format = ttsConfig.outputFormat;
4459
+ }
4460
+ if (ttsConfig.language !== undefined)
4461
+ body.language = ttsConfig.language;
4462
+ if (ttsConfig.providerOptions !== undefined) {
4463
+ body.provider_options = ttsConfig.providerOptions;
4464
+ }
4465
+ return body;
4466
+ }
4467
+ /** POST to a TTS endpoint; throws a PlayKitError on a non-ok response. */
4468
+ async post(endpoint, body) {
4438
4469
  await this.authManager.ensureValidToken();
4439
4470
  const token = this.authManager.getToken();
4440
4471
  if (!token) {
4441
4472
  throw new PlayKitError('Not authenticated', 'NOT_AUTHENTICATED');
4442
4473
  }
4443
- const model = ttsConfig.model || this.config.defaultTTSModel || 'default-tts-model';
4444
- const endpoint = `/ai/${this.config.gameId}/v2/audio/speech`;
4445
- const requestBody = {
4446
- model,
4447
- text: ttsConfig.text,
4448
- };
4449
- // Add optional parameters (only when defined)
4450
- if (ttsConfig.voice !== undefined) {
4451
- requestBody.voice = ttsConfig.voice;
4452
- }
4453
- if (ttsConfig.speed !== undefined) {
4454
- requestBody.speed = ttsConfig.speed;
4455
- }
4456
- if (ttsConfig.vol !== undefined) {
4457
- requestBody.vol = ttsConfig.vol;
4458
- }
4459
- if (ttsConfig.pitch !== undefined) {
4460
- requestBody.pitch = ttsConfig.pitch;
4461
- }
4462
- if (ttsConfig.emotion !== undefined) {
4463
- requestBody.emotion = ttsConfig.emotion;
4464
- }
4465
- if (ttsConfig.languageBoost !== undefined) {
4466
- requestBody.language_boost = ttsConfig.languageBoost;
4467
- }
4468
- if (ttsConfig.format !== undefined) {
4469
- requestBody.response_format = ttsConfig.format;
4470
- }
4471
- if (ttsConfig.voiceSetting !== undefined) {
4472
- requestBody.voice_setting = ttsConfig.voiceSetting;
4474
+ const response = await fetch(`${this.baseURL}${endpoint}`, {
4475
+ method: 'POST',
4476
+ headers: Object.assign({ Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, getSDKHeaders()),
4477
+ body: JSON.stringify(body),
4478
+ });
4479
+ if (!response.ok) {
4480
+ const error = await response
4481
+ .json()
4482
+ .catch(() => ({ message: 'Speech synthesis failed' }));
4483
+ const playKitError = new PlayKitError(error.message || 'Speech synthesis failed', error.code, response.status);
4484
+ if (error.code === 'INSUFFICIENT_CREDITS' ||
4485
+ error.code === 'PLAYER_INSUFFICIENT_CREDIT' ||
4486
+ response.status === 402) {
4487
+ if (this.playerClient) {
4488
+ await this.playerClient.handleInsufficientCredits(playKitError);
4489
+ }
4490
+ }
4491
+ throw playKitError;
4473
4492
  }
4474
- if (ttsConfig.audioSetting !== undefined) {
4475
- requestBody.audio_setting = ttsConfig.audioSetting;
4493
+ return response;
4494
+ }
4495
+ checkBalanceAfter() {
4496
+ if (this.playerClient) {
4497
+ this.playerClient.checkBalanceAfterApiCall().catch(() => {
4498
+ /* silently fail */
4499
+ });
4476
4500
  }
4501
+ }
4502
+ /**
4503
+ * Synthesize text into speech audio (raw bytes).
4504
+ */
4505
+ async synthesize(ttsConfig) {
4506
+ const endpoint = `/ai/${this.config.gameId}/v2/audio/speech`;
4477
4507
  try {
4478
- const response = await fetch(`${this.baseURL}${endpoint}`, {
4479
- method: 'POST',
4480
- headers: Object.assign({ Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, getSDKHeaders()),
4481
- body: JSON.stringify(requestBody),
4482
- });
4483
- if (!response.ok) {
4484
- const error = await response.json().catch(() => ({ message: 'Speech synthesis failed' }));
4485
- const playKitError = new PlayKitError(error.message || 'Speech synthesis failed', error.code, response.status);
4486
- // Check for insufficient credits error
4487
- if (error.code === 'INSUFFICIENT_CREDITS' ||
4488
- error.code === 'PLAYER_INSUFFICIENT_CREDIT' ||
4489
- response.status === 402) {
4490
- if (this.playerClient) {
4491
- await this.playerClient.handleInsufficientCredits(playKitError);
4492
- }
4493
- }
4494
- throw playKitError;
4495
- }
4508
+ const response = await this.post(endpoint, this.buildRequestBody(ttsConfig));
4496
4509
  // SUCCESS: response is raw audio bytes, NOT JSON.
4497
4510
  const audio = await response.arrayBuffer();
4498
4511
  const contentType = response.headers.get('Content-Type');
@@ -4500,24 +4513,66 @@ class TTSProvider {
4500
4513
  const audioLengthHeader = response.headers.get('X-Audio-Length-Ms');
4501
4514
  const result = {
4502
4515
  audio,
4503
- format: contentType || ttsConfig.format || 'mp3',
4516
+ format: contentType || 'mp3',
4504
4517
  usageCharacters: Number(usageHeader) || 0,
4505
4518
  };
4506
4519
  if (audioLengthHeader !== null) {
4507
4520
  result.audioLengthMs = Number(audioLengthHeader) || 0;
4508
4521
  }
4509
- // Check balance after successful API call
4510
- if (this.playerClient) {
4511
- this.playerClient.checkBalanceAfterApiCall().catch(() => {
4512
- // Silently fail
4513
- });
4514
- }
4522
+ this.checkBalanceAfter();
4515
4523
  return result;
4516
4524
  }
4517
4525
  catch (error) {
4518
- if (error instanceof PlayKitError) {
4526
+ if (error instanceof PlayKitError)
4519
4527
  throw error;
4528
+ throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
4529
+ }
4530
+ }
4531
+ /**
4532
+ * Synthesize text into speech AND return timestamp alignment. Hits the
4533
+ * `speech-with-timestamps` variant, whose success response is a JSON envelope
4534
+ * (base64 audio + alignment), so it is parsed as JSON — not raw bytes.
4535
+ */
4536
+ async synthesizeWithTimestamps(ttsConfig) {
4537
+ const endpoint = `/ai/${this.config.gameId}/v2/audio/speech-with-timestamps`;
4538
+ const body = this.buildRequestBody(ttsConfig);
4539
+ if (ttsConfig.granularity !== undefined) {
4540
+ body.subtitle_type = ttsConfig.granularity;
4541
+ }
4542
+ try {
4543
+ const response = await this.post(endpoint, body);
4544
+ const json = (await response.json());
4545
+ let alignment = null;
4546
+ if (json.alignment && Array.isArray(json.alignment.items)) {
4547
+ alignment = {
4548
+ granularity: json.alignment.granularity || 'word',
4549
+ items: json.alignment.items.map((it) => {
4550
+ var _a, _b, _c;
4551
+ return ({
4552
+ text: (_a = it.text) !== null && _a !== void 0 ? _a : '',
4553
+ startMs: (_b = it.start_ms) !== null && _b !== void 0 ? _b : 0,
4554
+ endMs: (_c = it.end_ms) !== null && _c !== void 0 ? _c : 0,
4555
+ textStart: it.text_start,
4556
+ textEnd: it.text_end,
4557
+ });
4558
+ }),
4559
+ };
4560
+ }
4561
+ const result = {
4562
+ audio: base64ToArrayBuffer(json.audio_base64),
4563
+ format: json.format || 'mp3',
4564
+ usageCharacters: Number(json.usage_characters) || 0,
4565
+ alignment,
4566
+ };
4567
+ if (json.audio_length_ms != null) {
4568
+ result.audioLengthMs = Number(json.audio_length_ms) || 0;
4520
4569
  }
4570
+ this.checkBalanceAfter();
4571
+ return result;
4572
+ }
4573
+ catch (error) {
4574
+ if (error instanceof PlayKitError)
4575
+ throw error;
4521
4576
  throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
4522
4577
  }
4523
4578
  }
@@ -5236,6 +5291,14 @@ class TTSClient {
5236
5291
  async synthesize(config) {
5237
5292
  return this.provider.synthesize(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
5238
5293
  }
5294
+ /**
5295
+ * Synthesize text into speech AND return timestamp alignment (word/sentence
5296
+ * timings). Returns the audio bytes plus an `alignment` object.
5297
+ * @param config - TTS configuration; `granularity` defaults to 'word'.
5298
+ */
5299
+ async synthesizeWithTimestamps(config) {
5300
+ return this.provider.synthesizeWithTimestamps(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
5301
+ }
5239
5302
  /**
5240
5303
  * Synthesize text into speech and return it as a Blob (browser-friendly)
5241
5304
  * @param config - Full TTS configuration