playkit-sdk 1.3.0 → 1.4.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -0
- package/dist/playkit-sdk.cjs.js +270 -12
- package/dist/playkit-sdk.cjs.js.map +1 -1
- package/dist/playkit-sdk.d.ts +186 -2
- package/dist/playkit-sdk.esm.js +270 -13
- package/dist/playkit-sdk.esm.js.map +1 -1
- package/dist/playkit-sdk.umd.js +270 -12
- package/dist/playkit-sdk.umd.js.map +1 -1
- package/package.json +3 -1
package/dist/playkit-sdk.d.ts
CHANGED
|
@@ -413,6 +413,8 @@ interface SDKConfig {
|
|
|
413
413
|
defaultImageModel?: string;
|
|
414
414
|
/** Default transcription model to use */
|
|
415
415
|
defaultTranscriptionModel?: string;
|
|
416
|
+
/** Default text-to-speech model to use */
|
|
417
|
+
defaultTTSModel?: string;
|
|
416
418
|
/**
|
|
417
419
|
* Enable debug logging
|
|
418
420
|
* @deprecated Use `logging.level` instead. Will be removed in v2.0.
|
|
@@ -876,6 +878,112 @@ interface TranscriptionResponse {
|
|
|
876
878
|
segments?: TranscriptionSegment[];
|
|
877
879
|
}
|
|
878
880
|
|
|
881
|
+
/**
|
|
882
|
+
* Text-to-speech (TTS) type definitions
|
|
883
|
+
*/
|
|
884
|
+
/** One voice in a `voiceMix` blend. */
|
|
885
|
+
interface VoiceMixEntry {
|
|
886
|
+
/** Voice id to include in the blend. */
|
|
887
|
+
voice: string;
|
|
888
|
+
/** Relative weight, integer 1–100. */
|
|
889
|
+
weight: number;
|
|
890
|
+
}
|
|
891
|
+
/** Neutral voice tuning knobs (ElevenLabs-style nested object). */
|
|
892
|
+
interface VoiceSettings {
|
|
893
|
+
/** Playback speed multiplier (0.5–2). */
|
|
894
|
+
speed?: number;
|
|
895
|
+
/** Volume (0–10). */
|
|
896
|
+
volume?: number;
|
|
897
|
+
/** Pitch adjustment (-12–12). */
|
|
898
|
+
pitch?: number;
|
|
899
|
+
/** Emotion, e.g. 'happy' | 'sad' | 'calm'. */
|
|
900
|
+
emotion?: string;
|
|
901
|
+
}
|
|
902
|
+
/**
|
|
903
|
+
* Configuration for text-to-speech requests.
|
|
904
|
+
*
|
|
905
|
+
* Inline markup in `text` is supported: pause via `[pause 1.5s]` or
|
|
906
|
+
* `<break time="1.5s"/>`, and interjections via `[laughs]` / `[breath]` (on
|
|
907
|
+
* models that support them). These are translated server-side.
|
|
908
|
+
*/
|
|
909
|
+
interface TTSConfig {
|
|
910
|
+
/** Text to synthesize into speech (max 10000 characters). */
|
|
911
|
+
text: string;
|
|
912
|
+
/**
|
|
913
|
+
* Model to use for synthesis.
|
|
914
|
+
* Defaults to 'default-tts-model' (alias resolved by the backend).
|
|
915
|
+
*/
|
|
916
|
+
model?: string;
|
|
917
|
+
/** Voice id to use (e.g., 'male-qn-qingse'). Mutually exclusive with `voiceMix`. */
|
|
918
|
+
voice?: string;
|
|
919
|
+
/**
|
|
920
|
+
* Blend multiple voices (1–4 entries, weights 1–100). Mutually exclusive with
|
|
921
|
+
* `voice`.
|
|
922
|
+
*/
|
|
923
|
+
voiceMix?: VoiceMixEntry[];
|
|
924
|
+
/** Neutral voice tuning (speed/volume/pitch/emotion). */
|
|
925
|
+
voiceSettings?: VoiceSettings;
|
|
926
|
+
/**
|
|
927
|
+
* Output audio format, ElevenLabs-style: `{codec}_{sampleRate}_{bitrateKbps}`.
|
|
928
|
+
* Examples: 'mp3', 'mp3_44100_128', 'pcm_24000', 'wav', 'flac_44100', 'opus'.
|
|
929
|
+
*/
|
|
930
|
+
outputFormat?: string;
|
|
931
|
+
/** Language hint to improve pronunciation, e.g. 'Chinese' | 'English' | 'auto'. */
|
|
932
|
+
language?: string;
|
|
933
|
+
/**
|
|
934
|
+
* Escape hatch for advanced provider-specific fields not modeled above
|
|
935
|
+
* (e.g. voice effects, pronunciation dictionaries).
|
|
936
|
+
*/
|
|
937
|
+
providerOptions?: Record<string, unknown>;
|
|
938
|
+
}
|
|
939
|
+
/**
|
|
940
|
+
* Options for simplified text-to-speech methods (everything except the text)
|
|
941
|
+
*/
|
|
942
|
+
type TTSOptions = Omit<TTSConfig, 'text'>;
|
|
943
|
+
/** Config for `synthesizeWithTimestamps`: adds subtitle granularity. */
|
|
944
|
+
interface TTSTimestampsConfig extends TTSConfig {
|
|
945
|
+
/** Timestamp granularity; defaults to 'word'. */
|
|
946
|
+
granularity?: 'sentence' | 'word';
|
|
947
|
+
}
|
|
948
|
+
/**
|
|
949
|
+
* Result of a text-to-speech request
|
|
950
|
+
*/
|
|
951
|
+
interface TTSResult {
|
|
952
|
+
/** Raw audio bytes. */
|
|
953
|
+
audio: ArrayBuffer;
|
|
954
|
+
/** Audio format / content type (e.g., 'audio/mpeg' or 'mp3'). */
|
|
955
|
+
format: string;
|
|
956
|
+
/** Number of characters billed for this request. */
|
|
957
|
+
usageCharacters: number;
|
|
958
|
+
/** Length of the generated audio in milliseconds (if reported). */
|
|
959
|
+
audioLengthMs?: number;
|
|
960
|
+
}
|
|
961
|
+
/** One timed unit (word or sentence) in an {@link Alignment}. */
|
|
962
|
+
interface AlignmentItem {
|
|
963
|
+
/** The spoken text of this unit. */
|
|
964
|
+
text: string;
|
|
965
|
+
/** Start time in milliseconds. */
|
|
966
|
+
startMs: number;
|
|
967
|
+
/** End time in milliseconds. */
|
|
968
|
+
endMs: number;
|
|
969
|
+
/** Character offset of this unit's start in the input text (if reported). */
|
|
970
|
+
textStart?: number;
|
|
971
|
+
/** Character offset of this unit's end in the input text (if reported). */
|
|
972
|
+
textEnd?: number;
|
|
973
|
+
}
|
|
974
|
+
/** Timestamp alignment for synthesized speech. */
|
|
975
|
+
interface Alignment {
|
|
976
|
+
/** 'word' | 'sentence' — the granularity of `items`. */
|
|
977
|
+
granularity: string;
|
|
978
|
+
/** Timed units in order. */
|
|
979
|
+
items: AlignmentItem[];
|
|
980
|
+
}
|
|
981
|
+
/** Result of `synthesizeWithTimestamps`: audio plus timestamp alignment. */
|
|
982
|
+
interface TTSTimestampsResult extends TTSResult {
|
|
983
|
+
/** Word/sentence timings, or null if unavailable. */
|
|
984
|
+
alignment: Alignment | null;
|
|
985
|
+
}
|
|
986
|
+
|
|
879
987
|
/**
|
|
880
988
|
* Device Authorization Flow Manager
|
|
881
989
|
* Manages Device Auth polling flow for desktop/CLI/Unity applications
|
|
@@ -1911,6 +2019,76 @@ declare class TranscriptionClient {
|
|
|
1911
2019
|
transcribeFile(file: File, options?: TranscriptionOptions): Promise<TranscriptionResult>;
|
|
1912
2020
|
}
|
|
1913
2021
|
|
|
2022
|
+
/**
|
|
2023
|
+
* TTS provider for HTTP communication with the text-to-speech API
|
|
2024
|
+
*/
|
|
2025
|
+
|
|
2026
|
+
declare class TTSProvider {
|
|
2027
|
+
private authManager;
|
|
2028
|
+
private config;
|
|
2029
|
+
private baseURL;
|
|
2030
|
+
private playerClient?;
|
|
2031
|
+
constructor(authManager: AuthManager, config: SDKConfig);
|
|
2032
|
+
/**
|
|
2033
|
+
* Set player client for balance checking
|
|
2034
|
+
*/
|
|
2035
|
+
setPlayerClient(playerClient: PlayerClient): void;
|
|
2036
|
+
/** Build the shared request body from a TTS config (new fields + legacy). */
|
|
2037
|
+
private buildRequestBody;
|
|
2038
|
+
/** POST to a TTS endpoint; throws a PlayKitError on a non-ok response. */
|
|
2039
|
+
private post;
|
|
2040
|
+
private checkBalanceAfter;
|
|
2041
|
+
/**
|
|
2042
|
+
* Synthesize text into speech audio (raw bytes).
|
|
2043
|
+
*/
|
|
2044
|
+
synthesize(ttsConfig: TTSConfig): Promise<TTSResult>;
|
|
2045
|
+
/**
|
|
2046
|
+
* Synthesize text into speech AND return timestamp alignment. Hits the
|
|
2047
|
+
* `speech-with-timestamps` variant, whose success response is a JSON envelope
|
|
2048
|
+
* (base64 audio + alignment), so it is parsed as JSON — not raw bytes.
|
|
2049
|
+
*/
|
|
2050
|
+
synthesizeWithTimestamps(ttsConfig: TTSTimestampsConfig): Promise<TTSTimestampsResult>;
|
|
2051
|
+
}
|
|
2052
|
+
|
|
2053
|
+
/**
|
|
2054
|
+
* High-level client for text-to-speech synthesis
|
|
2055
|
+
*/
|
|
2056
|
+
|
|
2057
|
+
declare class TTSClient {
|
|
2058
|
+
private provider;
|
|
2059
|
+
private model;
|
|
2060
|
+
constructor(provider: TTSProvider, model?: string);
|
|
2061
|
+
/**
|
|
2062
|
+
* Get the current model name
|
|
2063
|
+
*/
|
|
2064
|
+
get modelName(): string;
|
|
2065
|
+
/**
|
|
2066
|
+
* Synthesize text into speech audio
|
|
2067
|
+
* @param config - Full TTS configuration
|
|
2068
|
+
* @returns TTS result containing raw audio bytes and usage metadata
|
|
2069
|
+
*/
|
|
2070
|
+
synthesize(config: TTSConfig): Promise<TTSResult>;
|
|
2071
|
+
/**
|
|
2072
|
+
* Synthesize text into speech AND return timestamp alignment (word/sentence
|
|
2073
|
+
* timings). Returns the audio bytes plus an `alignment` object.
|
|
2074
|
+
* @param config - TTS configuration; `granularity` defaults to 'word'.
|
|
2075
|
+
*/
|
|
2076
|
+
synthesizeWithTimestamps(config: TTSTimestampsConfig): Promise<TTSTimestampsResult>;
|
|
2077
|
+
/**
|
|
2078
|
+
* Synthesize text into speech and return it as a Blob (browser-friendly)
|
|
2079
|
+
* @param config - Full TTS configuration
|
|
2080
|
+
* @returns Audio Blob with the appropriate MIME type
|
|
2081
|
+
*/
|
|
2082
|
+
synthesizeToBlob(config: TTSConfig): Promise<Blob>;
|
|
2083
|
+
/**
|
|
2084
|
+
* Synthesize text into speech and return an object URL (browser only)
|
|
2085
|
+
* @param config - Full TTS configuration
|
|
2086
|
+
* @returns An object URL that can be assigned to an <audio> element
|
|
2087
|
+
* @throws PlayKitError if URL.createObjectURL is unavailable (e.g. Node.js)
|
|
2088
|
+
*/
|
|
2089
|
+
synthesizeToObjectURL(config: TTSConfig): Promise<string>;
|
|
2090
|
+
}
|
|
2091
|
+
|
|
1914
2092
|
/**
|
|
1915
2093
|
* NPC Client for simplified conversation management
|
|
1916
2094
|
* Automatically handles conversation history
|
|
@@ -2359,6 +2537,7 @@ declare class PlayKitSDK extends EventEmitter {
|
|
|
2359
2537
|
private chatProvider;
|
|
2360
2538
|
private imageProvider;
|
|
2361
2539
|
private transcriptionProvider;
|
|
2540
|
+
private ttsProvider;
|
|
2362
2541
|
private contextManager;
|
|
2363
2542
|
private schemaLibrary;
|
|
2364
2543
|
private initialized;
|
|
@@ -2428,6 +2607,11 @@ declare class PlayKitSDK extends EventEmitter {
|
|
|
2428
2607
|
* @param model - Transcription model to use (default: 'whisper-large')
|
|
2429
2608
|
*/
|
|
2430
2609
|
createTranscriptionClient(model?: string): TranscriptionClient;
|
|
2610
|
+
/**
|
|
2611
|
+
* Create a TTS client for text-to-speech
|
|
2612
|
+
* @param model - TTS model to use (default: 'default-tts-model')
|
|
2613
|
+
*/
|
|
2614
|
+
createTTSClient(model?: string): TTSClient;
|
|
2431
2615
|
/**
|
|
2432
2616
|
* Create an NPC client
|
|
2433
2617
|
* Automatically registers with AIContextManager
|
|
@@ -2957,5 +3141,5 @@ declare global {
|
|
|
2957
3141
|
}
|
|
2958
3142
|
}
|
|
2959
3143
|
|
|
2960
|
-
export { AIContextManager, AuthFlowManager, AuthManager, BrowserStorage, BufferLogHandler, CallbackLogHandler, ChatClient, DeviceAuthFlowManager, ImageClient, LogLevel, Logger, MemoryStorage, NPCClient, PlayKitError, PlayKitSDK, PlayerClient, RechargeManager, SchemaLibrary, StreamParser, TokenStorage, TokenValidator, TranscriptionClient, createMultimodalMessage, createStorage, createTextMessage, PlayKitSDK as default, defaultContextManager, defaultSchemaLibrary, defaultTokenValidator, isLocalStorageAvailable };
|
|
2961
|
-
export type { AIContextManagerConfig, AIContextManagerEvents, APIResult, AudioContentPart, AuthState, ChatCompletionResponse, ChatConfig, ChatResult, ChatStreamConfig, ChatWithToolsConfig, ChatWithToolsStreamConfig, ConversationSaveData, DeveloperTokenFallbackConfig, DeviceAuthFlowOptions, DeviceAuthInitResult, DeviceAuthResult, GameInfo, GeneratedImage, IStorage, ImageContentPart, ImageGenerationConfig, ImageGenerationResponse, ImageInput, ImageSize, LogConfig, LogEntry, LogHandler, MemoryEntry, Message, MessageContent, MessageContentPart, MessageRole, NPCConfig, PlayerInfo, RechargeConfig, RechargeEvents, RechargeModalOptions, SDKConfig, SDKMode, SchemaEntry, SetNicknameRequest, SetNicknameResponse, StreamChunk, StructuredGenerationConfig, StructuredOutputConfig, StructuredResult, TextContentPart, TokenRefreshResult, TokenScope, TokenStorageOptions, TokenValidatorOptions, TokenVerificationResult, TranscriptionConfig, TranscriptionOptions, TranscriptionResult, TranscriptionSegment, ValidatedPlayerInfo };
|
|
3144
|
+
export { AIContextManager, AuthFlowManager, AuthManager, BrowserStorage, BufferLogHandler, CallbackLogHandler, ChatClient, DeviceAuthFlowManager, ImageClient, LogLevel, Logger, MemoryStorage, NPCClient, PlayKitError, PlayKitSDK, PlayerClient, RechargeManager, SchemaLibrary, StreamParser, TTSClient, TokenStorage, TokenValidator, TranscriptionClient, createMultimodalMessage, createStorage, createTextMessage, PlayKitSDK as default, defaultContextManager, defaultSchemaLibrary, defaultTokenValidator, isLocalStorageAvailable };
|
|
3145
|
+
export type { AIContextManagerConfig, AIContextManagerEvents, APIResult, Alignment, AlignmentItem, AudioContentPart, AuthState, ChatCompletionResponse, ChatConfig, ChatResult, ChatStreamConfig, ChatWithToolsConfig, ChatWithToolsStreamConfig, ConversationSaveData, DeveloperTokenFallbackConfig, DeviceAuthFlowOptions, DeviceAuthInitResult, DeviceAuthResult, GameInfo, GeneratedImage, IStorage, ImageContentPart, ImageGenerationConfig, ImageGenerationResponse, ImageInput, ImageSize, LogConfig, LogEntry, LogHandler, MemoryEntry, Message, MessageContent, MessageContentPart, MessageRole, NPCConfig, PlayerInfo, RechargeConfig, RechargeEvents, RechargeModalOptions, SDKConfig, SDKMode, SchemaEntry, SetNicknameRequest, SetNicknameResponse, StreamChunk, StructuredGenerationConfig, StructuredOutputConfig, StructuredResult, TTSConfig, TTSOptions, TTSResult, TTSTimestampsConfig, TTSTimestampsResult, TextContentPart, TokenRefreshResult, TokenScope, TokenStorageOptions, TokenValidatorOptions, TokenVerificationResult, TranscriptionConfig, TranscriptionOptions, TranscriptionResult, TranscriptionSegment, ValidatedPlayerInfo, VoiceMixEntry, VoiceSettings };
|
package/dist/playkit-sdk.esm.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* playkit-sdk v1.
|
|
2
|
+
* playkit-sdk v1.4.0-beta.2
|
|
3
3
|
* PlayKit SDK for JavaScript
|
|
4
4
|
* @license SEE LICENSE IN LICENSE
|
|
5
5
|
*/
|
|
@@ -826,7 +826,7 @@ class TokenStorage {
|
|
|
826
826
|
}
|
|
827
827
|
|
|
828
828
|
const SDK_TYPE = 'Javascript';
|
|
829
|
-
const SDK_VERSION = '"1.
|
|
829
|
+
const SDK_VERSION = '"1.4.0-beta.2"';
|
|
830
830
|
function getSDKHeaders() {
|
|
831
831
|
return {
|
|
832
832
|
'X-SDK-Type': SDK_TYPE,
|
|
@@ -2422,7 +2422,7 @@ DeviceAuthFlowManager.activeInstance = null;
|
|
|
2422
2422
|
* Handles JWT exchange and token management
|
|
2423
2423
|
*/
|
|
2424
2424
|
// @ts-ignore - replaced at build time
|
|
2425
|
-
const DEFAULT_BASE_URL$
|
|
2425
|
+
const DEFAULT_BASE_URL$6 = "https://api.playkit.ai";
|
|
2426
2426
|
const JWT_EXCHANGE_ENDPOINT = '/api/external/exchange-jwt';
|
|
2427
2427
|
const TOKEN_REFRESH_ENDPOINT = '/api/auth/refresh';
|
|
2428
2428
|
class AuthManager extends EventEmitter {
|
|
@@ -2440,7 +2440,7 @@ class AuthManager extends EventEmitter {
|
|
|
2440
2440
|
this.storage = new TokenStorage({
|
|
2441
2441
|
mode: config.mode === 'server' ? 'server' : 'browser',
|
|
2442
2442
|
});
|
|
2443
|
-
this.baseURL = config.baseURL || DEFAULT_BASE_URL$
|
|
2443
|
+
this.baseURL = config.baseURL || DEFAULT_BASE_URL$6;
|
|
2444
2444
|
this.authState = {
|
|
2445
2445
|
isAuthenticated: false,
|
|
2446
2446
|
};
|
|
@@ -3529,7 +3529,7 @@ class RechargeManager extends EventEmitter {
|
|
|
3529
3529
|
* Player client for managing player information and credits
|
|
3530
3530
|
*/
|
|
3531
3531
|
// @ts-ignore - replaced at build time
|
|
3532
|
-
const DEFAULT_BASE_URL$
|
|
3532
|
+
const DEFAULT_BASE_URL$5 = "https://api.playkit.ai";
|
|
3533
3533
|
const PLAYER_INFO_ENDPOINT = '/api/external/player-info';
|
|
3534
3534
|
const SET_NICKNAME_ENDPOINT = '/api/external/set-game-player-nickname';
|
|
3535
3535
|
class PlayerClient extends EventEmitter {
|
|
@@ -3541,7 +3541,7 @@ class PlayerClient extends EventEmitter {
|
|
|
3541
3541
|
this.balanceCheckInterval = null;
|
|
3542
3542
|
this.logger = Logger.getLogger('PlayerClient');
|
|
3543
3543
|
this.authManager = authManager;
|
|
3544
|
-
this.baseURL = config.baseURL || DEFAULT_BASE_URL$
|
|
3544
|
+
this.baseURL = config.baseURL || DEFAULT_BASE_URL$5;
|
|
3545
3545
|
this.gameId = config.gameId;
|
|
3546
3546
|
this.rechargeConfig = {
|
|
3547
3547
|
autoShowBalanceModal: (_a = rechargeConfig.autoShowBalanceModal) !== null && _a !== void 0 ? _a : true,
|
|
@@ -3889,12 +3889,12 @@ function contentToString$1(content) {
|
|
|
3889
3889
|
return textParts.map(part => part.text).join('');
|
|
3890
3890
|
}
|
|
3891
3891
|
// @ts-ignore - replaced at build time
|
|
3892
|
-
const DEFAULT_BASE_URL$
|
|
3892
|
+
const DEFAULT_BASE_URL$4 = "https://api.playkit.ai";
|
|
3893
3893
|
class ChatProvider {
|
|
3894
3894
|
constructor(authManager, config) {
|
|
3895
3895
|
this.authManager = authManager;
|
|
3896
3896
|
this.config = config;
|
|
3897
|
-
this.baseURL = config.baseURL || DEFAULT_BASE_URL$
|
|
3897
|
+
this.baseURL = config.baseURL || DEFAULT_BASE_URL$4;
|
|
3898
3898
|
}
|
|
3899
3899
|
/**
|
|
3900
3900
|
* Set player client for balance checking
|
|
@@ -4221,12 +4221,12 @@ class ChatProvider {
|
|
|
4221
4221
|
* Image generation provider for HTTP communication with image API
|
|
4222
4222
|
*/
|
|
4223
4223
|
// @ts-ignore - replaced at build time
|
|
4224
|
-
const DEFAULT_BASE_URL$
|
|
4224
|
+
const DEFAULT_BASE_URL$3 = "https://api.playkit.ai";
|
|
4225
4225
|
class ImageProvider {
|
|
4226
4226
|
constructor(authManager, config) {
|
|
4227
4227
|
this.authManager = authManager;
|
|
4228
4228
|
this.config = config;
|
|
4229
|
-
this.baseURL = config.baseURL || DEFAULT_BASE_URL$
|
|
4229
|
+
this.baseURL = config.baseURL || DEFAULT_BASE_URL$3;
|
|
4230
4230
|
}
|
|
4231
4231
|
/**
|
|
4232
4232
|
* Set player client for balance checking
|
|
@@ -4310,12 +4310,12 @@ class ImageProvider {
|
|
|
4310
4310
|
* Transcription provider for HTTP communication with audio transcription API
|
|
4311
4311
|
*/
|
|
4312
4312
|
// @ts-ignore - replaced at build time
|
|
4313
|
-
const DEFAULT_BASE_URL$
|
|
4313
|
+
const DEFAULT_BASE_URL$2 = "https://api.playkit.ai";
|
|
4314
4314
|
class TranscriptionProvider {
|
|
4315
4315
|
constructor(authManager, config) {
|
|
4316
4316
|
this.authManager = authManager;
|
|
4317
4317
|
this.config = config;
|
|
4318
|
-
this.baseURL = config.baseURL || DEFAULT_BASE_URL$
|
|
4318
|
+
this.baseURL = config.baseURL || DEFAULT_BASE_URL$2;
|
|
4319
4319
|
}
|
|
4320
4320
|
/**
|
|
4321
4321
|
* Set player client for balance checking
|
|
@@ -4409,6 +4409,171 @@ class TranscriptionProvider {
|
|
|
4409
4409
|
}
|
|
4410
4410
|
}
|
|
4411
4411
|
|
|
4412
|
+
/**
|
|
4413
|
+
* TTS provider for HTTP communication with the text-to-speech API
|
|
4414
|
+
*/
|
|
4415
|
+
// @ts-ignore - replaced at build time
|
|
4416
|
+
const DEFAULT_BASE_URL$1 = "https://api.playkit.ai";
|
|
4417
|
+
/** Decode a base64 string to an ArrayBuffer (browser + Node). */
|
|
4418
|
+
function base64ToArrayBuffer(b64) {
|
|
4419
|
+
if (typeof atob === 'function') {
|
|
4420
|
+
const bin = atob(b64);
|
|
4421
|
+
const bytes = new Uint8Array(bin.length);
|
|
4422
|
+
for (let i = 0; i < bin.length; i++)
|
|
4423
|
+
bytes[i] = bin.charCodeAt(i);
|
|
4424
|
+
return bytes.buffer;
|
|
4425
|
+
}
|
|
4426
|
+
// Node fallback
|
|
4427
|
+
const buf = globalThis.Buffer.from(b64, 'base64');
|
|
4428
|
+
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
|
|
4429
|
+
}
|
|
4430
|
+
class TTSProvider {
|
|
4431
|
+
constructor(authManager, config) {
|
|
4432
|
+
this.authManager = authManager;
|
|
4433
|
+
this.config = config;
|
|
4434
|
+
this.baseURL = config.baseURL || DEFAULT_BASE_URL$1;
|
|
4435
|
+
}
|
|
4436
|
+
/**
|
|
4437
|
+
* Set player client for balance checking
|
|
4438
|
+
*/
|
|
4439
|
+
setPlayerClient(playerClient) {
|
|
4440
|
+
this.playerClient = playerClient;
|
|
4441
|
+
}
|
|
4442
|
+
/** Build the shared request body from a TTS config (new fields + legacy). */
|
|
4443
|
+
buildRequestBody(ttsConfig) {
|
|
4444
|
+
const model = ttsConfig.model || this.config.defaultTTSModel || 'default-tts-model';
|
|
4445
|
+
const body = { model, text: ttsConfig.text };
|
|
4446
|
+
if (ttsConfig.voice !== undefined)
|
|
4447
|
+
body.voice = ttsConfig.voice;
|
|
4448
|
+
if (ttsConfig.voiceMix !== undefined)
|
|
4449
|
+
body.voice_mix = ttsConfig.voiceMix;
|
|
4450
|
+
if (ttsConfig.voiceSettings !== undefined) {
|
|
4451
|
+
body.voice_settings = ttsConfig.voiceSettings;
|
|
4452
|
+
}
|
|
4453
|
+
if (ttsConfig.outputFormat !== undefined) {
|
|
4454
|
+
body.output_format = ttsConfig.outputFormat;
|
|
4455
|
+
}
|
|
4456
|
+
if (ttsConfig.language !== undefined)
|
|
4457
|
+
body.language = ttsConfig.language;
|
|
4458
|
+
if (ttsConfig.providerOptions !== undefined) {
|
|
4459
|
+
body.provider_options = ttsConfig.providerOptions;
|
|
4460
|
+
}
|
|
4461
|
+
return body;
|
|
4462
|
+
}
|
|
4463
|
+
/** POST to a TTS endpoint; throws a PlayKitError on a non-ok response. */
|
|
4464
|
+
async post(endpoint, body) {
|
|
4465
|
+
await this.authManager.ensureValidToken();
|
|
4466
|
+
const token = this.authManager.getToken();
|
|
4467
|
+
if (!token) {
|
|
4468
|
+
throw new PlayKitError('Not authenticated', 'NOT_AUTHENTICATED');
|
|
4469
|
+
}
|
|
4470
|
+
const response = await fetch(`${this.baseURL}${endpoint}`, {
|
|
4471
|
+
method: 'POST',
|
|
4472
|
+
headers: Object.assign({ Authorization: `Bearer ${token}`, 'Content-Type': 'application/json' }, getSDKHeaders()),
|
|
4473
|
+
body: JSON.stringify(body),
|
|
4474
|
+
});
|
|
4475
|
+
if (!response.ok) {
|
|
4476
|
+
const error = await response
|
|
4477
|
+
.json()
|
|
4478
|
+
.catch(() => ({ message: 'Speech synthesis failed' }));
|
|
4479
|
+
const playKitError = new PlayKitError(error.message || 'Speech synthesis failed', error.code, response.status);
|
|
4480
|
+
if (error.code === 'INSUFFICIENT_CREDITS' ||
|
|
4481
|
+
error.code === 'PLAYER_INSUFFICIENT_CREDIT' ||
|
|
4482
|
+
response.status === 402) {
|
|
4483
|
+
if (this.playerClient) {
|
|
4484
|
+
await this.playerClient.handleInsufficientCredits(playKitError);
|
|
4485
|
+
}
|
|
4486
|
+
}
|
|
4487
|
+
throw playKitError;
|
|
4488
|
+
}
|
|
4489
|
+
return response;
|
|
4490
|
+
}
|
|
4491
|
+
checkBalanceAfter() {
|
|
4492
|
+
if (this.playerClient) {
|
|
4493
|
+
this.playerClient.checkBalanceAfterApiCall().catch(() => {
|
|
4494
|
+
/* silently fail */
|
|
4495
|
+
});
|
|
4496
|
+
}
|
|
4497
|
+
}
|
|
4498
|
+
/**
|
|
4499
|
+
* Synthesize text into speech audio (raw bytes).
|
|
4500
|
+
*/
|
|
4501
|
+
async synthesize(ttsConfig) {
|
|
4502
|
+
const endpoint = `/ai/${this.config.gameId}/v2/audio/speech`;
|
|
4503
|
+
try {
|
|
4504
|
+
const response = await this.post(endpoint, this.buildRequestBody(ttsConfig));
|
|
4505
|
+
// SUCCESS: response is raw audio bytes, NOT JSON.
|
|
4506
|
+
const audio = await response.arrayBuffer();
|
|
4507
|
+
const contentType = response.headers.get('Content-Type');
|
|
4508
|
+
const usageHeader = response.headers.get('X-Usage-Characters');
|
|
4509
|
+
const audioLengthHeader = response.headers.get('X-Audio-Length-Ms');
|
|
4510
|
+
const result = {
|
|
4511
|
+
audio,
|
|
4512
|
+
format: contentType || 'mp3',
|
|
4513
|
+
usageCharacters: Number(usageHeader) || 0,
|
|
4514
|
+
};
|
|
4515
|
+
if (audioLengthHeader !== null) {
|
|
4516
|
+
result.audioLengthMs = Number(audioLengthHeader) || 0;
|
|
4517
|
+
}
|
|
4518
|
+
this.checkBalanceAfter();
|
|
4519
|
+
return result;
|
|
4520
|
+
}
|
|
4521
|
+
catch (error) {
|
|
4522
|
+
if (error instanceof PlayKitError)
|
|
4523
|
+
throw error;
|
|
4524
|
+
throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
|
|
4525
|
+
}
|
|
4526
|
+
}
|
|
4527
|
+
/**
|
|
4528
|
+
* Synthesize text into speech AND return timestamp alignment. Hits the
|
|
4529
|
+
* `speech-with-timestamps` variant, whose success response is a JSON envelope
|
|
4530
|
+
* (base64 audio + alignment), so it is parsed as JSON — not raw bytes.
|
|
4531
|
+
*/
|
|
4532
|
+
async synthesizeWithTimestamps(ttsConfig) {
|
|
4533
|
+
const endpoint = `/ai/${this.config.gameId}/v2/audio/speech-with-timestamps`;
|
|
4534
|
+
const body = this.buildRequestBody(ttsConfig);
|
|
4535
|
+
if (ttsConfig.granularity !== undefined) {
|
|
4536
|
+
body.subtitle_type = ttsConfig.granularity;
|
|
4537
|
+
}
|
|
4538
|
+
try {
|
|
4539
|
+
const response = await this.post(endpoint, body);
|
|
4540
|
+
const json = (await response.json());
|
|
4541
|
+
let alignment = null;
|
|
4542
|
+
if (json.alignment && Array.isArray(json.alignment.items)) {
|
|
4543
|
+
alignment = {
|
|
4544
|
+
granularity: json.alignment.granularity || 'word',
|
|
4545
|
+
items: json.alignment.items.map((it) => {
|
|
4546
|
+
var _a, _b, _c;
|
|
4547
|
+
return ({
|
|
4548
|
+
text: (_a = it.text) !== null && _a !== void 0 ? _a : '',
|
|
4549
|
+
startMs: (_b = it.start_ms) !== null && _b !== void 0 ? _b : 0,
|
|
4550
|
+
endMs: (_c = it.end_ms) !== null && _c !== void 0 ? _c : 0,
|
|
4551
|
+
textStart: it.text_start,
|
|
4552
|
+
textEnd: it.text_end,
|
|
4553
|
+
});
|
|
4554
|
+
}),
|
|
4555
|
+
};
|
|
4556
|
+
}
|
|
4557
|
+
const result = {
|
|
4558
|
+
audio: base64ToArrayBuffer(json.audio_base64),
|
|
4559
|
+
format: json.format || 'mp3',
|
|
4560
|
+
usageCharacters: Number(json.usage_characters) || 0,
|
|
4561
|
+
alignment,
|
|
4562
|
+
};
|
|
4563
|
+
if (json.audio_length_ms != null) {
|
|
4564
|
+
result.audioLengthMs = Number(json.audio_length_ms) || 0;
|
|
4565
|
+
}
|
|
4566
|
+
this.checkBalanceAfter();
|
|
4567
|
+
return result;
|
|
4568
|
+
}
|
|
4569
|
+
catch (error) {
|
|
4570
|
+
if (error instanceof PlayKitError)
|
|
4571
|
+
throw error;
|
|
4572
|
+
throw new PlayKitError(error instanceof Error ? error.message : 'Unknown error', 'TTS_ERROR');
|
|
4573
|
+
}
|
|
4574
|
+
}
|
|
4575
|
+
}
|
|
4576
|
+
|
|
4412
4577
|
/******************************************************************************
|
|
4413
4578
|
Copyright (c) Microsoft Corporation.
|
|
4414
4579
|
|
|
@@ -5073,6 +5238,88 @@ class TranscriptionClient {
|
|
|
5073
5238
|
}
|
|
5074
5239
|
}
|
|
5075
5240
|
|
|
5241
|
+
/**
|
|
5242
|
+
* High-level client for text-to-speech synthesis
|
|
5243
|
+
*/
|
|
5244
|
+
/**
|
|
5245
|
+
* Resolve an audio format/content-type string into a valid MIME type.
|
|
5246
|
+
* The provider's `result.format` may be a full MIME (e.g. 'audio/mpeg' from
|
|
5247
|
+
* the Content-Type header) or a bare token (e.g. 'mp3' from the fallback).
|
|
5248
|
+
* Full MIME strings pass through; bare tokens are mapped.
|
|
5249
|
+
*/
|
|
5250
|
+
function contentTypeFor(format) {
|
|
5251
|
+
if (format.includes('/')) {
|
|
5252
|
+
return format;
|
|
5253
|
+
}
|
|
5254
|
+
switch (format.toLowerCase()) {
|
|
5255
|
+
case 'mp3':
|
|
5256
|
+
return 'audio/mpeg';
|
|
5257
|
+
case 'wav':
|
|
5258
|
+
return 'audio/wav';
|
|
5259
|
+
case 'ogg':
|
|
5260
|
+
return 'audio/ogg';
|
|
5261
|
+
case 'flac':
|
|
5262
|
+
return 'audio/flac';
|
|
5263
|
+
case 'aac':
|
|
5264
|
+
return 'audio/aac';
|
|
5265
|
+
case 'pcm':
|
|
5266
|
+
return 'audio/pcm';
|
|
5267
|
+
default:
|
|
5268
|
+
return 'audio/mpeg';
|
|
5269
|
+
}
|
|
5270
|
+
}
|
|
5271
|
+
class TTSClient {
|
|
5272
|
+
constructor(provider, model) {
|
|
5273
|
+
this.provider = provider;
|
|
5274
|
+
this.model = model || 'default-tts-model';
|
|
5275
|
+
}
|
|
5276
|
+
/**
|
|
5277
|
+
* Get the current model name
|
|
5278
|
+
*/
|
|
5279
|
+
get modelName() {
|
|
5280
|
+
return this.model;
|
|
5281
|
+
}
|
|
5282
|
+
/**
|
|
5283
|
+
* Synthesize text into speech audio
|
|
5284
|
+
* @param config - Full TTS configuration
|
|
5285
|
+
* @returns TTS result containing raw audio bytes and usage metadata
|
|
5286
|
+
*/
|
|
5287
|
+
async synthesize(config) {
|
|
5288
|
+
return this.provider.synthesize(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
|
|
5289
|
+
}
|
|
5290
|
+
/**
|
|
5291
|
+
* Synthesize text into speech AND return timestamp alignment (word/sentence
|
|
5292
|
+
* timings). Returns the audio bytes plus an `alignment` object.
|
|
5293
|
+
* @param config - TTS configuration; `granularity` defaults to 'word'.
|
|
5294
|
+
*/
|
|
5295
|
+
async synthesizeWithTimestamps(config) {
|
|
5296
|
+
return this.provider.synthesizeWithTimestamps(Object.assign(Object.assign({}, config), { model: config.model || this.model }));
|
|
5297
|
+
}
|
|
5298
|
+
/**
|
|
5299
|
+
* Synthesize text into speech and return it as a Blob (browser-friendly)
|
|
5300
|
+
* @param config - Full TTS configuration
|
|
5301
|
+
* @returns Audio Blob with the appropriate MIME type
|
|
5302
|
+
*/
|
|
5303
|
+
async synthesizeToBlob(config) {
|
|
5304
|
+
const result = await this.synthesize(config);
|
|
5305
|
+
return new Blob([result.audio], { type: contentTypeFor(result.format) });
|
|
5306
|
+
}
|
|
5307
|
+
/**
|
|
5308
|
+
* Synthesize text into speech and return an object URL (browser only)
|
|
5309
|
+
* @param config - Full TTS configuration
|
|
5310
|
+
* @returns An object URL that can be assigned to an <audio> element
|
|
5311
|
+
* @throws PlayKitError if URL.createObjectURL is unavailable (e.g. Node.js)
|
|
5312
|
+
*/
|
|
5313
|
+
async synthesizeToObjectURL(config) {
|
|
5314
|
+
if (typeof URL === 'undefined' || typeof URL.createObjectURL !== 'function') {
|
|
5315
|
+
throw new PlayKitError('URL.createObjectURL is not available in this environment. ' +
|
|
5316
|
+
'Use synthesize() to access the raw audio bytes instead.', 'TTS_OBJECT_URL_UNAVAILABLE');
|
|
5317
|
+
}
|
|
5318
|
+
const blob = await this.synthesizeToBlob(config);
|
|
5319
|
+
return URL.createObjectURL(blob);
|
|
5320
|
+
}
|
|
5321
|
+
}
|
|
5322
|
+
|
|
5076
5323
|
/**
|
|
5077
5324
|
* Global AI Context Manager for managing NPC conversations and player context.
|
|
5078
5325
|
*
|
|
@@ -6396,10 +6643,12 @@ class PlayKitSDK extends EventEmitter {
|
|
|
6396
6643
|
this.chatProvider = new ChatProvider(this.authManager, this.config);
|
|
6397
6644
|
this.imageProvider = new ImageProvider(this.authManager, this.config);
|
|
6398
6645
|
this.transcriptionProvider = new TranscriptionProvider(this.authManager, this.config);
|
|
6646
|
+
this.ttsProvider = new TTSProvider(this.authManager, this.config);
|
|
6399
6647
|
// Connect providers to player client for balance checking
|
|
6400
6648
|
this.chatProvider.setPlayerClient(this.playerClient);
|
|
6401
6649
|
this.imageProvider.setPlayerClient(this.playerClient);
|
|
6402
6650
|
this.transcriptionProvider.setPlayerClient(this.playerClient);
|
|
6651
|
+
this.ttsProvider.setPlayerClient(this.playerClient);
|
|
6403
6652
|
// Initialize AI context manager
|
|
6404
6653
|
this.contextManager = new AIContextManager(this.config.aiContext);
|
|
6405
6654
|
// Set chat client factory for compaction
|
|
@@ -6648,6 +6897,14 @@ class PlayKitSDK extends EventEmitter {
|
|
|
6648
6897
|
this.ensureInitialized();
|
|
6649
6898
|
return new TranscriptionClient(this.transcriptionProvider, model || this.config.defaultTranscriptionModel);
|
|
6650
6899
|
}
|
|
6900
|
+
/**
|
|
6901
|
+
* Create a TTS client for text-to-speech
|
|
6902
|
+
* @param model - TTS model to use (default: 'default-tts-model')
|
|
6903
|
+
*/
|
|
6904
|
+
createTTSClient(model) {
|
|
6905
|
+
this.ensureInitialized();
|
|
6906
|
+
return new TTSClient(this.ttsProvider, model || this.config.defaultTTSModel);
|
|
6907
|
+
}
|
|
6651
6908
|
/**
|
|
6652
6909
|
* Create an NPC client
|
|
6653
6910
|
* Automatically registers with AIContextManager
|
|
@@ -7038,5 +7295,5 @@ class TokenValidator {
|
|
|
7038
7295
|
*/
|
|
7039
7296
|
const defaultTokenValidator = new TokenValidator();
|
|
7040
7297
|
|
|
7041
|
-
export { AIContextManager, AuthFlowManager, AuthManager, BrowserStorage, BufferLogHandler, CallbackLogHandler, ChatClient, DeviceAuthFlowManager, ImageClient, LogLevel, Logger, MemoryStorage, NPCClient, PlayKitSDK, PlayerClient, RechargeManager, SchemaLibrary, StreamParser, TokenStorage, TokenValidator, TranscriptionClient, createMultimodalMessage, createStorage, createTextMessage, PlayKitSDK as default, defaultContextManager, defaultSchemaLibrary, defaultTokenValidator, isLocalStorageAvailable };
|
|
7298
|
+
export { AIContextManager, AuthFlowManager, AuthManager, BrowserStorage, BufferLogHandler, CallbackLogHandler, ChatClient, DeviceAuthFlowManager, ImageClient, LogLevel, Logger, MemoryStorage, NPCClient, PlayKitSDK, PlayerClient, RechargeManager, SchemaLibrary, StreamParser, TTSClient, TokenStorage, TokenValidator, TranscriptionClient, createMultimodalMessage, createStorage, createTextMessage, PlayKitSDK as default, defaultContextManager, defaultSchemaLibrary, defaultTokenValidator, isLocalStorageAvailable };
|
|
7042
7299
|
//# sourceMappingURL=playkit-sdk.esm.js.map
|