playkit-sdk 1.3.0 → 1.4.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  /**
2
- * playkit-sdk v1.3.0
2
+ * playkit-sdk v1.4.0-beta.2
3
3
  * PlayKit SDK for JavaScript
4
4
  * @license SEE LICENSE IN LICENSE
5
5
  */
@@ -1177,7 +1177,7 @@
1177
1177
  }
1178
1178
 
1179
1179
  const SDK_TYPE = 'Javascript';
1180
- const SDK_VERSION = '"1.3.0"';
1180
+ const SDK_VERSION = '"1.4.0-beta.2"';
1181
1181
  function getSDKHeaders() {
1182
1182
  return {
1183
1183
  'X-SDK-Type': SDK_TYPE,
@@ -2773,7 +2773,7 @@
2773
2773
  * Handles JWT exchange and token management
2774
2774
  */
2775
2775
  // @ts-ignore - replaced at build time
2776
- const DEFAULT_BASE_URL$5 = "https://api.playkit.ai";
2776
+ const DEFAULT_BASE_URL$6 = "https://api.playkit.ai";
2777
2777
  const JWT_EXCHANGE_ENDPOINT = '/api/external/exchange-jwt';
2778
2778
  const TOKEN_REFRESH_ENDPOINT = '/api/auth/refresh';
2779
2779
  class AuthManager extends EventEmitter {
@@ -2791,7 +2791,7 @@
2791
2791
  this.storage = new TokenStorage({
2792
2792
  mode: config.mode === 'server' ? 'server' : 'browser',
2793
2793
  });
2794
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$5;
2794
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$6;
2795
2795
  this.authState = {
2796
2796
  isAuthenticated: false,
2797
2797
  };
@@ -3880,7 +3880,7 @@
3880
3880
  * Player client for managing player information and credits
3881
3881
  */
3882
3882
  // @ts-ignore - replaced at build time
3883
- const DEFAULT_BASE_URL$4 = "https://api.playkit.ai";
3883
+ const DEFAULT_BASE_URL$5 = "https://api.playkit.ai";
3884
3884
  const PLAYER_INFO_ENDPOINT = '/api/external/player-info';
3885
3885
  const SET_NICKNAME_ENDPOINT = '/api/external/set-game-player-nickname';
3886
3886
  class PlayerClient extends EventEmitter {
@@ -3892,7 +3892,7 @@
3892
3892
  this.balanceCheckInterval = null;
3893
3893
  this.logger = Logger.getLogger('PlayerClient');
3894
3894
  this.authManager = authManager;
3895
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$4;
3895
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$5;
3896
3896
  this.gameId = config.gameId;
3897
3897
  this.rechargeConfig = {
3898
3898
  autoShowBalanceModal: (_a = rechargeConfig.autoShowBalanceModal) !== null && _a !== void 0 ? _a : true,
@@ -4240,12 +4240,12 @@
4240
4240
  return textParts.map(part => part.text).join('');
4241
4241
  }
4242
4242
  // @ts-ignore - replaced at build time
4243
- const DEFAULT_BASE_URL$3 = "https://api.playkit.ai";
4243
+ const DEFAULT_BASE_URL$4 = "https://api.playkit.ai";
4244
4244
  class ChatProvider {
4245
4245
  constructor(authManager, config) {
4246
4246
  this.authManager = authManager;
4247
4247
  this.config = config;
4248
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$3;
4248
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$4;
4249
4249
  }
4250
4250
  /**
4251
4251
  * Set player client for balance checking
@@ -4572,12 +4572,12 @@
4572
4572
  * Image generation provider for HTTP communication with image API
4573
4573
  */
4574
4574
  // @ts-ignore - replaced at build time
4575
- const DEFAULT_BASE_URL$2 = "https://api.playkit.ai";
4575
+ const DEFAULT_BASE_URL$3 = "https://api.playkit.ai";
4576
4576
  class ImageProvider {
4577
4577
  constructor(authManager, config) {
4578
4578
  this.authManager = authManager;
4579
4579
  this.config = config;
4580
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$2;
4580
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$3;
4581
4581
  }
4582
4582
  /**
4583
4583
  * Set player client for balance checking
@@ -4661,12 +4661,12 @@
4661
4661
  * Transcription provider for HTTP communication with audio transcription API
4662
4662
  */
4663
4663
  // @ts-ignore - replaced at build time
4664
- const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
4664
+ const DEFAULT_BASE_URL$2 = "https://api.playkit.ai";
4665
4665
  class TranscriptionProvider {
4666
4666
  constructor(authManager, config) {
4667
4667
  this.authManager = authManager;
4668
4668
  this.config = config;
4669
- this.baseURL = config.baseURL || DEFAULT_BASE_URL$1;
4669
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$2;
4670
4670
  }
4671
4671
  /**
4672
4672
  * Set player client for balance checking
@@ -4760,6 +4760,171 @@
4760
4760
  }
4761
4761
  }
4762
4762
 
4763
+ /**
4764
+ * TTS provider for HTTP communication with the text-to-speech API
4765
+ */
4766
+ // @ts-ignore - replaced at build time
4767
+ const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
4768
+ /** Decode a base64 string to an ArrayBuffer (browser + Node). */
4769
+ function base64ToArrayBuffer(b64) {
4770
+ if (typeof atob === 'function') {
4771
+ const bin = atob(b64);
4772
+ const bytes = new Uint8Array(bin.length);
4773
+ for (let i = 0; i < bin.length; i++)
4774
+ bytes[i] = bin.charCodeAt(i);
4775
+ return bytes.buffer;
4776
+ }
4777
+ // Node fallback
4778
+ const buf = globalThis.Buffer.from(b64, 'base64');
4779
+ return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
4780
+ }
4781
+ class TTSProvider {
4782
+ constructor(authManager, config) {
4783
+ this.authManager = authManager;
4784
+ this.config = config;
4785
+ this.baseURL = config.baseURL || DEFAULT_BASE_URL$1;
4786
+ }
4787
+ /**
4788
+ * Set player client for balance checking
4789
+ */
4790
+ setPlayerClient(playerClient) {
4791
+ this.playerClient = playerClient;
4792
+ }
4793
+ /** Build the shared request body from a TTS config (new fields + legacy). */
4794
+ buildRequestBody(ttsConfig) {
4795
+ const model = ttsConfig.model || this.config.defaultTTSModel || 'default-tts-model';
4796
+ const body = { model, text: ttsConfig.text };
4797
+ if (ttsConfig.voice !== undefined)
4798
+ body.voice = ttsConfig.voice;
4799
+ if (ttsConfig.voiceMix !== undefined)
4800
+ body.voice_mix = ttsConfig.voiceMix;
4801
+ if (ttsConfig.voiceSettings !== undefined) {
4802
+ body.voice_settings = ttsConfig.voiceSettings;
4803
+ }
4804
+ if (ttsConfig.outputFormat !== undefined) {
4805
+ body.output_format = ttsConfig.outputFormat;
4806
+ }
4807
+ if (ttsConfig.language !== undefined)
4808
+ body.language = ttsConfig.language;
4809
+ if (ttsConfig.providerOptions !== undefined) {
4810
+ body.provider_options = ttsConfig.providerOptions;
4811
+ }
4812
+ return body;
4813
+ }
4814
+ /** POST to a TTS endpoint; throws a PlayKitError on a non-ok response. */
4815
+ async post(endpoint, body) {
4816
+ await this.authManager.ensureValidToken();
4817
+ const token = this.authManager.getToken();
4818
+ if (!token) {
4819
+ throw new PlayKitError('Not authenticated', 'NOT_AUTHENTICATED');
4820
+ }
4821
+ const response = await fetch(`${this.baseURL}${endpoint}`, {
4822
+ method: 'POST',
4823
+ headers: Object.assign({ Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, getSDKHeaders()),
4824
+ body: JSON.stringify(body),
4825
+ });
4826
+ if (!response.ok) {
4827
+ const error = await response
4828
+ .json()
4829
+ .catch(() => ({ message: 'Speech synthesis failed' }));
4830
+ const playKitError = new PlayKitError(error.message || 'Speech synthesis failed', error.code, response.status);
4831
+ if (error.code === 'INSUFFICIENT_CREDITS' ||
4832
+ error.code === 'PLAYER_INSUFFICIENT_CREDIT' ||
4833
+ response.status === 402) {
4834
+ if (this.playerClient) {
4835
+ await this.playerClient.handleInsufficientCredits(playKitError);
4836
+ }
4837
+ }
4838
+ throw playKitError;
4839
+ }
4840
+ return response;
4841
+ }
4842
+ checkBalanceAfter() {
4843
+ if (this.playerClient) {
4844
+ this.playerClient.checkBalanceAfterApiCall().catch(() => {
4845
+ /* silently fail */
4846
+ });
4847
+ }
4848
+ }
4849
+ /**
4850
+ * Synthesize text into speech audio (raw bytes).
4851
+ */
4852
+ async synthesize(ttsConfig) {
4853
+ const endpoint = `/ai/${this.config.gameId}/v2/audio/speech`;
4854
+ try {
4855
+ const response = await this.post(endpoint, this.buildRequestBody(ttsConfig));
4856
+ // SUCCESS: response is raw audio bytes, NOT JSON.
4857
+ const audio = await response.arrayBuffer();
4858
+ const contentType = response.headers.get('Content-Type');
4859
+ const usageHeader = response.headers.get('X-Usage-Characters');
4860
+ const audioLengthHeader = response.headers.get('X-Audio-Length-Ms');
4861
+ const result = {
4862
+ audio,
4863
+ format: contentType || 'mp3',
4864
+ usageCharacters: Number(usageHeader) || 0,
4865
+ };
4866
+ if (audioLengthHeader !== null) {
4867
+ result.audioLengthMs = Number(audioLengthHeader) || 0;
4868
+ }
4869
+ this.checkBalanceAfter();
4870
+ return result;
4871
+ }
4872
+ catch (error) {
4873
+ if (error instanceof PlayKitError)
4874
+ throw error;
4875
+ throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
4876
+ }
4877
+ }
4878
+ /**
4879
+ * Synthesize text into speech AND return timestamp alignment. Hits the
4880
+ * `speech-with-timestamps` variant, whose success response is a JSON envelope
4881
+ * (base64 audio + alignment), so it is parsed as JSON — not raw bytes.
4882
+ */
4883
+ async synthesizeWithTimestamps(ttsConfig) {
4884
+ const endpoint = `/ai/${this.config.gameId}/v2/audio/speech-with-timestamps`;
4885
+ const body = this.buildRequestBody(ttsConfig);
4886
+ if (ttsConfig.granularity !== undefined) {
4887
+ body.subtitle_type = ttsConfig.granularity;
4888
+ }
4889
+ try {
4890
+ const response = await this.post(endpoint, body);
4891
+ const json = (await response.json());
4892
+ let alignment = null;
4893
+ if (json.alignment && Array.isArray(json.alignment.items)) {
4894
+ alignment = {
4895
+ granularity: json.alignment.granularity || 'word',
4896
+ items: json.alignment.items.map((it) => {
4897
+ var _a, _b, _c;
4898
+ return ({
4899
+ text: (_a = it.text) !== null && _a !== void 0 ? _a : '',
4900
+ startMs: (_b = it.start_ms) !== null && _b !== void 0 ? _b : 0,
4901
+ endMs: (_c = it.end_ms) !== null && _c !== void 0 ? _c : 0,
4902
+ textStart: it.text_start,
4903
+ textEnd: it.text_end,
4904
+ });
4905
+ }),
4906
+ };
4907
+ }
4908
+ const result = {
4909
+ audio: base64ToArrayBuffer(json.audio_base64),
4910
+ format: json.format || 'mp3',
4911
+ usageCharacters: Number(json.usage_characters) || 0,
4912
+ alignment,
4913
+ };
4914
+ if (json.audio_length_ms != null) {
4915
+ result.audioLengthMs = Number(json.audio_length_ms) || 0;
4916
+ }
4917
+ this.checkBalanceAfter();
4918
+ return result;
4919
+ }
4920
+ catch (error) {
4921
+ if (error instanceof PlayKitError)
4922
+ throw error;
4923
+ throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
4924
+ }
4925
+ }
4926
+ }
4927
+
4763
4928
  /******************************************************************************
4764
4929
  Copyright (c) Microsoft Corporation.
4765
4930
 
@@ -5424,6 +5589,88 @@
5424
5589
  }
5425
5590
  }
5426
5591
 
5592
+ /**
5593
+ * High-level client for text-to-speech synthesis
5594
+ */
5595
+ /**
5596
+ * Resolve an audio format/content-type string into a valid MIME type.
5597
+ * The provider's `result.format` may be a full MIME (e.g. 'audio/mpeg' from
5598
+ * the Content-Type header) or a bare token (e.g. 'mp3' from the fallback).
5599
+ * Full MIME strings pass through; bare tokens are mapped.
5600
+ */
5601
+ function contentTypeFor(format) {
5602
+ if (format.includes('/')) {
5603
+ return format;
5604
+ }
5605
+ switch (format.toLowerCase()) {
5606
+ case 'mp3':
5607
+ return 'audio/mpeg';
5608
+ case 'wav':
5609
+ return 'audio/wav';
5610
+ case 'ogg':
5611
+ return 'audio/ogg';
5612
+ case 'flac':
5613
+ return 'audio/flac';
5614
+ case 'aac':
5615
+ return 'audio/aac';
5616
+ case 'pcm':
5617
+ return 'audio/pcm';
5618
+ default:
5619
+ return 'audio/mpeg';
5620
+ }
5621
+ }
5622
+ class TTSClient {
5623
+ constructor(provider, model) {
5624
+ this.provider = provider;
5625
+ this.model = model || 'default-tts-model';
5626
+ }
5627
+ /**
5628
+ * Get the current model name
5629
+ */
5630
+ get modelName() {
5631
+ return this.model;
5632
+ }
5633
+ /**
5634
+ * Synthesize text into speech audio
5635
+ * @param config - Full TTS configuration
5636
+ * @returns TTS result containing raw audio bytes and usage metadata
5637
+ */
5638
+ async synthesize(config) {
5639
+ return this.provider.synthesize(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
5640
+ }
5641
+ /**
5642
+ * Synthesize text into speech AND return timestamp alignment (word/sentence
5643
+ * timings). Returns the audio bytes plus an `alignment` object.
5644
+ * @param config - TTS configuration; `granularity` defaults to 'word'.
5645
+ */
5646
+ async synthesizeWithTimestamps(config) {
5647
+ return this.provider.synthesizeWithTimestamps(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
5648
+ }
5649
+ /**
5650
+ * Synthesize text into speech and return it as a Blob (browser-friendly)
5651
+ * @param config - Full TTS configuration
5652
+ * @returns Audio Blob with the appropriate MIME type
5653
+ */
5654
+ async synthesizeToBlob(config) {
5655
+ const result = await this.synthesize(config);
5656
+ return new Blob([result.audio], { type: contentTypeFor(result.format) });
5657
+ }
5658
+ /**
5659
+ * Synthesize text into speech and return an object URL (browser only)
5660
+ * @param config - Full TTS configuration
5661
+ * @returns An object URL that can be assigned to an <audio> element
5662
+ * @throws PlayKitError if URL.createObjectURL is unavailable (e.g. Node.js)
5663
+ */
5664
+ async synthesizeToObjectURL(config) {
5665
+ if (typeof URL === 'undefined' || typeof URL.createObjectURL !== 'function') {
5666
+ throw new PlayKitError('URL.createObjectURL is not available in this environment. ' +
5667
+ 'Use synthesize() to access the raw audio bytes instead.', 'TTS_OBJECT_URL_UNAVAILABLE');
5668
+ }
5669
+ const blob = await this.synthesizeToBlob(config);
5670
+ return URL.createObjectURL(blob);
5671
+ }
5672
+ }
5673
+
5427
5674
  /**
5428
5675
  * Global AI Context Manager for managing NPC conversations and player context.
5429
5676
  *
@@ -6747,10 +6994,12 @@ Output ONLY a JSON array of ${predictionNum} strings, nothing else:
6747
6994
  this.chatProvider = new ChatProvider(this.authManager, this.config);
6748
6995
  this.imageProvider = new ImageProvider(this.authManager, this.config);
6749
6996
  this.transcriptionProvider = new TranscriptionProvider(this.authManager, this.config);
6997
+ this.ttsProvider = new TTSProvider(this.authManager, this.config);
6750
6998
  // Connect providers to player client for balance checking
6751
6999
  this.chatProvider.setPlayerClient(this.playerClient);
6752
7000
  this.imageProvider.setPlayerClient(this.playerClient);
6753
7001
  this.transcriptionProvider.setPlayerClient(this.playerClient);
7002
+ this.ttsProvider.setPlayerClient(this.playerClient);
6754
7003
  // Initialize AI context manager
6755
7004
  this.contextManager = new AIContextManager(this.config.aiContext);
6756
7005
  // Set chat client factory for compaction
@@ -6999,6 +7248,14 @@ Output ONLY a JSON array of ${predictionNum} strings, nothing else:
6999
7248
  this.ensureInitialized();
7000
7249
  return new TranscriptionClient(this.transcriptionProvider, model || this.config.defaultTranscriptionModel);
7001
7250
  }
7251
+ /**
7252
+ * Create a TTS client for text-to-speech
7253
+ * @param model - TTS model to use (default: 'default-tts-model')
7254
+ */
7255
+ createTTSClient(model) {
7256
+ this.ensureInitialized();
7257
+ return new TTSClient(this.ttsProvider, model || this.config.defaultTTSModel);
7258
+ }
7002
7259
  /**
7003
7260
  * Create an NPC client
7004
7261
  * Automatically registers with AIContextManager
@@ -7415,6 +7672,7 @@ Output ONLY a JSON array of ${predictionNum} strings, nothing else:
7415
7672
  RechargeManager: RechargeManager,
7416
7673
  SchemaLibrary: SchemaLibrary,
7417
7674
  StreamParser: StreamParser,
7675
+ TTSClient: TTSClient,
7418
7676
  TokenStorage: TokenStorage,
7419
7677
  TokenValidator: TokenValidator,
7420
7678
  TranscriptionClient: TranscriptionClient,