@supertone/supertone 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +4 -4
  2. package/custom_test/realtime_tts_player.ts +120 -16
  3. package/custom_test/test_pronunciation_dictionary.ts +227 -0
  4. package/custom_test/test_real_api.ts +580 -0
  5. package/custom_test/test_text_utils_chunk_text_punctuation.ts +55 -0
  6. package/dist/commonjs/lib/config.d.ts +1 -1
  7. package/dist/commonjs/lib/config.d.ts.map +1 -1
  8. package/dist/commonjs/lib/config.js +1 -1
  9. package/dist/commonjs/lib/config.js.map +1 -1
  10. package/dist/commonjs/lib/custom_utils/index.d.ts +1 -0
  11. package/dist/commonjs/lib/custom_utils/index.d.ts.map +1 -1
  12. package/dist/commonjs/lib/custom_utils/index.js +5 -1
  13. package/dist/commonjs/lib/custom_utils/index.js.map +1 -1
  14. package/dist/commonjs/lib/custom_utils/pronunciation_utils.d.ts +24 -0
  15. package/dist/commonjs/lib/custom_utils/pronunciation_utils.d.ts.map +1 -0
  16. package/dist/commonjs/lib/custom_utils/pronunciation_utils.js +145 -0
  17. package/dist/commonjs/lib/custom_utils/pronunciation_utils.js.map +1 -0
  18. package/dist/commonjs/lib/custom_utils/text_utils.d.ts +1 -1
  19. package/dist/commonjs/lib/custom_utils/text_utils.d.ts.map +1 -1
  20. package/dist/commonjs/lib/custom_utils/text_utils.js +21 -4
  21. package/dist/commonjs/lib/custom_utils/text_utils.js.map +1 -1
  22. package/dist/commonjs/sdk/texttospeech.d.ts +17 -6
  23. package/dist/commonjs/sdk/texttospeech.d.ts.map +1 -1
  24. package/dist/commonjs/sdk/texttospeech.js +48 -25
  25. package/dist/commonjs/sdk/texttospeech.js.map +1 -1
  26. package/dist/esm/lib/config.d.ts +1 -1
  27. package/dist/esm/lib/config.d.ts.map +1 -1
  28. package/dist/esm/lib/config.js +1 -1
  29. package/dist/esm/lib/config.js.map +1 -1
  30. package/dist/esm/lib/custom_utils/index.d.ts +1 -0
  31. package/dist/esm/lib/custom_utils/index.d.ts.map +1 -1
  32. package/dist/esm/lib/custom_utils/index.js +2 -0
  33. package/dist/esm/lib/custom_utils/index.js.map +1 -1
  34. package/dist/esm/lib/custom_utils/pronunciation_utils.d.ts +24 -0
  35. package/dist/esm/lib/custom_utils/pronunciation_utils.d.ts.map +1 -0
  36. package/dist/esm/lib/custom_utils/pronunciation_utils.js +140 -0
  37. package/dist/esm/lib/custom_utils/pronunciation_utils.js.map +1 -0
  38. package/dist/esm/lib/custom_utils/text_utils.d.ts +1 -1
  39. package/dist/esm/lib/custom_utils/text_utils.d.ts.map +1 -1
  40. package/dist/esm/lib/custom_utils/text_utils.js +21 -4
  41. package/dist/esm/lib/custom_utils/text_utils.js.map +1 -1
  42. package/dist/esm/sdk/texttospeech.d.ts +17 -6
  43. package/dist/esm/sdk/texttospeech.d.ts.map +1 -1
  44. package/dist/esm/sdk/texttospeech.js +49 -26
  45. package/dist/esm/sdk/texttospeech.js.map +1 -1
  46. package/jsr.json +1 -1
  47. package/package.json +1 -1
  48. package/src/lib/config.ts +41 -41
  49. package/src/lib/custom_utils/index.ts +7 -0
  50. package/src/lib/custom_utils/pronunciation_utils.ts +193 -0
  51. package/src/lib/custom_utils/text_utils.ts +25 -4
  52. package/src/sdk/texttospeech.ts +99 -68
package/src/lib/config.ts CHANGED
@@ -11,58 +11,58 @@ import { Params, pathToFunc } from "./url.js";
11
11
  * Contains the list of servers available to the SDK
12
12
  */
13
13
  export const ServerList = [
14
- /**
15
- * Production
16
- */
17
- "https://supertoneapi.com",
14
+ /**
15
+ * Production
16
+ */
17
+ "https://supertoneapi.com",
18
18
  ] as const;
19
19
 
20
20
  export type SDKOptions = {
21
- apiKey?: string | (() => Promise<string>) | undefined;
21
+ apiKey?: string | (() => Promise<string>) | undefined;
22
22
 
23
- httpClient?: HTTPClient;
24
- /**
25
- * Allows overriding the default server used by the SDK
26
- */
27
- serverIdx?: number | undefined;
28
- /**
29
- * Allows overriding the default server URL used by the SDK
30
- */
31
- serverURL?: string | undefined;
32
- /**
33
- * Allows overriding the default user agent used by the SDK
34
- */
35
- userAgent?: string | undefined;
36
- /**
37
- * Allows overriding the default retry config used by the SDK
38
- */
39
- retryConfig?: RetryConfig;
40
- timeoutMs?: number;
41
- debugLogger?: Logger;
23
+ httpClient?: HTTPClient;
24
+ /**
25
+ * Allows overriding the default server used by the SDK
26
+ */
27
+ serverIdx?: number | undefined;
28
+ /**
29
+ * Allows overriding the default server URL used by the SDK
30
+ */
31
+ serverURL?: string | undefined;
32
+ /**
33
+ * Allows overriding the default user agent used by the SDK
34
+ */
35
+ userAgent?: string | undefined;
36
+ /**
37
+ * Allows overriding the default retry config used by the SDK
38
+ */
39
+ retryConfig?: RetryConfig;
40
+ timeoutMs?: number;
41
+ debugLogger?: Logger;
42
42
  };
43
43
 
44
44
  export function serverURLFromOptions(options: SDKOptions): URL | null {
45
- let serverURL = options.serverURL;
45
+ let serverURL = options.serverURL;
46
46
 
47
- const params: Params = {};
47
+ const params: Params = {};
48
48
 
49
- if (!serverURL) {
50
- const serverIdx = options.serverIdx ?? 0;
51
- if (serverIdx < 0 || serverIdx >= ServerList.length) {
52
- throw new Error(`Invalid server index ${serverIdx}`);
53
- }
54
- serverURL = ServerList[serverIdx] || "";
55
- }
49
+ if (!serverURL) {
50
+ const serverIdx = options.serverIdx ?? 0;
51
+ if (serverIdx < 0 || serverIdx >= ServerList.length) {
52
+ throw new Error(`Invalid server index ${serverIdx}`);
53
+ }
54
+ serverURL = ServerList[serverIdx] || "";
55
+ }
56
56
 
57
- const u = pathToFunc(serverURL)(params);
58
- return new URL(u);
57
+ const u = pathToFunc(serverURL)(params);
58
+ return new URL(u);
59
59
  }
60
60
 
61
61
  export const SDK_METADATA = {
62
- language: "typescript",
63
- openapiDocVersion: "0.8.69",
64
- sdkVersion: "0.1.2",
65
- genVersion: "2.686.7",
66
- userAgent:
67
- "speakeasy-sdk/typescript 0.1.2 2.686.7 0.8.69 @supertone/supertone",
62
+ language: "typescript",
63
+ openapiDocVersion: "0.8.69",
64
+ sdkVersion: "0.1.3",
65
+ genVersion: "2.686.7",
66
+ userAgent:
67
+ "speakeasy-sdk/typescript 0.1.2 2.686.7 0.8.69 @supertone/supertone",
68
68
  } as const;
@@ -11,6 +11,13 @@ export * from "./constants.js";
11
11
  // Export text utilities
12
12
  export { chunkText, extractAudioFromNdjson } from "./text_utils.js";
13
13
 
14
+ // Export pronunciation utilities
15
+ export {
16
+ applyPronunciationDictionary,
17
+ PronunciationDictionaryValidationError,
18
+ type PronunciationDictionaryEntry,
19
+ } from "./pronunciation_utils.js";
20
+
14
21
  // Export audio utilities
15
22
  export {
16
23
  mergeWavBinary,
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Pronunciation dictionary substitution utilities.
3
+ *
4
+ * Mirrors the Python implementation policy:
5
+ * - Apply rules in input order
6
+ * - partial_match=false: word-boundary exact matches only
7
+ * - partial_match=true: substring matches (no boundaries)
8
+ * - No re-substitution: replaced segments are shielded via opaque tokens
9
+ *
10
+ * Validation:
11
+ * - pronunciation_dictionary omitted/undefined/null -> return original text
12
+ * - pronunciation_dictionary must be an array of objects
13
+ * - each object must have: text (string, non-empty), pronunciation (string, non-empty), partial_match (boolean)
14
+ */
15
+
16
+ export class PronunciationDictionaryValidationError extends Error {
17
+ constructor(message: string) {
18
+ super(message);
19
+ this.name = "PronunciationDictionaryValidationError";
20
+ }
21
+ }
22
+
23
+ export type PronunciationDictionaryEntry = {
24
+ text: string;
25
+ pronunciation: string;
26
+ partial_match: boolean;
27
+ };
28
+
29
+ export function applyPronunciationDictionary(
30
+ text: string,
31
+ pronunciation_dictionary?: unknown
32
+ ): string {
33
+ // Match Python behavior: return early for null, undefined, or empty array
34
+ if (
35
+ pronunciation_dictionary == null ||
36
+ (Array.isArray(pronunciation_dictionary) &&
37
+ pronunciation_dictionary.length === 0)
38
+ ) {
39
+ return text;
40
+ }
41
+
42
+ if (typeof text !== "string") {
43
+ throw new PronunciationDictionaryValidationError(
44
+ `\`text\` must be string, got ${typeof text}`
45
+ );
46
+ }
47
+
48
+ if (!Array.isArray(pronunciation_dictionary)) {
49
+ throw new PronunciationDictionaryValidationError(
50
+ "`pronunciation_dictionary` must be an array of objects"
51
+ );
52
+ }
53
+
54
+ // Prevent re-substitution:
55
+ // replace matches with unique opaque tokens first,
56
+ // then expand tokens to pronunciations at the end.
57
+ const tokenToPronunciation = new Map<string, string>();
58
+ let working = text;
59
+
60
+ for (let idx = 0; idx < pronunciation_dictionary.length; idx++) {
61
+ const entry = validateEntry(pronunciation_dictionary[idx], idx);
62
+ const src = entry.text;
63
+ const dst = entry.pronunciation;
64
+ const partial = entry.partial_match;
65
+
66
+ const token = makeUniqueToken(idx, working, tokenToPronunciation);
67
+
68
+ if (partial) {
69
+ const re = new RegExp(escapeRegExp(src), "g");
70
+ const newWorking = working.replace(re, token);
71
+ if (newWorking === working) continue; // No match found
72
+ tokenToPronunciation.set(token, dst);
73
+ working = newWorking;
74
+ continue;
75
+ }
76
+
77
+ // Exact match with word-boundary semantics (Unicode-aware-ish).
78
+ // Python uses Unicode \w; in JS, \w is ASCII-only. To mirror behavior better across scripts,
79
+ // we define "word char" as: letter or number or underscore.
80
+ //
81
+ // We avoid lookbehind for broader runtime compatibility by capturing the left boundary.
82
+ //
83
+ // Pattern: (^|[^WORD_CHARS]) (SRC) (?=[^WORD_CHARS]|$)
84
+ // (IMPORTANT) WORD_CHARS must not include surrounding [] because we embed it into other [].
85
+ const WORD_CHARS = "\\p{L}\\p{N}_";
86
+ const srcEsc = escapeRegExp(src);
87
+ const pattern = `(^|[^${WORD_CHARS}])(${srcEsc})(?=[^${WORD_CHARS}]|$)`;
88
+ const re = new RegExp(pattern, "gu");
89
+
90
+ // Replace keeping the left boundary (group 1)
91
+ const newWorking = working.replace(re, `$1${token}`);
92
+ if (newWorking === working) continue; // No match found
93
+ tokenToPronunciation.set(token, dst);
94
+ working = newWorking;
95
+ }
96
+
97
+ // Expand tokens into pronunciations.
98
+ for (const [token, pron] of tokenToPronunciation.entries()) {
99
+ working = working.split(token).join(pron);
100
+ }
101
+
102
+ return working;
103
+ }
104
+
105
+ function validateEntry(raw: unknown, idx: number): PronunciationDictionaryEntry {
106
+ if (raw == null || typeof raw !== "object" || Array.isArray(raw)) {
107
+ throw new PronunciationDictionaryValidationError(
108
+ `pronunciation_dictionary[${idx}] must be an object, got ${
109
+ raw === null ? "null" : Array.isArray(raw) ? "array" : typeof raw
110
+ }`
111
+ );
112
+ }
113
+
114
+ const obj = raw as Record<string, unknown>;
115
+ const missing: string[] = [];
116
+ if (!("text" in obj)) missing.push("text");
117
+ if (!("pronunciation" in obj)) missing.push("pronunciation");
118
+ if (!("partial_match" in obj)) missing.push("partial_match");
119
+ if (missing.length) {
120
+ throw new PronunciationDictionaryValidationError(
121
+ `pronunciation_dictionary[${idx}] missing required field(s): ${missing.join(", ")}`
122
+ );
123
+ }
124
+
125
+ const src = obj["text"];
126
+ const dst = obj["pronunciation"];
127
+ const partial = obj["partial_match"];
128
+
129
+ if (typeof src !== "string") {
130
+ throw new PronunciationDictionaryValidationError(
131
+ `pronunciation_dictionary[${idx}].text must be string, got ${typeof src}`
132
+ );
133
+ }
134
+ if (typeof dst !== "string") {
135
+ throw new PronunciationDictionaryValidationError(
136
+ `pronunciation_dictionary[${idx}].pronunciation must be string, got ${typeof dst}`
137
+ );
138
+ }
139
+ if (typeof partial !== "boolean") {
140
+ throw new PronunciationDictionaryValidationError(
141
+ `pronunciation_dictionary[${idx}].partial_match must be boolean, got ${typeof partial}`
142
+ );
143
+ }
144
+
145
+ if (src === "") {
146
+ throw new PronunciationDictionaryValidationError(
147
+ `pronunciation_dictionary[${idx}].text must not be empty`
148
+ );
149
+ }
150
+ if (dst === "") {
151
+ throw new PronunciationDictionaryValidationError(
152
+ `pronunciation_dictionary[${idx}].pronunciation must not be empty`
153
+ );
154
+ }
155
+
156
+ return { text: src, pronunciation: dst, partial_match: partial };
157
+ }
158
+
159
+ function escapeRegExp(s: string): string {
160
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
161
+ }
162
+
163
+ function makeUniqueToken(
164
+ idx: number,
165
+ working: string,
166
+ existing: Map<string, string>
167
+ ): string {
168
+ // Private Use Area markers to minimize collision with typical text.
169
+ const base = `\uE000PD${idx}\uE001`;
170
+ if (!working.includes(base) && !existing.has(base)) return base;
171
+
172
+ while (true) {
173
+ const suffix = safeRandomHex();
174
+ const token = `\uE000PD${idx}_${suffix}\uE001`;
175
+ if (!working.includes(token) && !existing.has(token)) return token;
176
+ }
177
+ }
178
+
179
+ function safeRandomHex(): string {
180
+ // Prefer crypto.randomUUID when available (browser / modern runtimes)
181
+ const c = (globalThis as any).crypto;
182
+ if (c && typeof c.randomUUID === "function") {
183
+ return String(c.randomUUID()).replace(/-/g, "");
184
+ }
185
+ // Fallback: not cryptographically strong, but fine for uniqueness tokenization.
186
+ return (
187
+ Math.random().toString(16).slice(2) +
188
+ Math.random().toString(16).slice(2) +
189
+ Date.now().toString(16)
190
+ );
191
+ }
192
+
193
+
@@ -7,6 +7,28 @@
7
7
 
8
8
  import { DEFAULT_MAX_TEXT_LENGTH } from "./constants.js";
9
9
 
10
+ /**
11
+ * Sentence-ending punctuation pattern for multilingual support.
12
+ *
13
+ * Supported languages: English, Korean, Japanese, Bulgarian, Czech, Danish,
14
+ * Greek, Spanish, Estonian, Finnish, Hungarian, Italian, Dutch, Polish,
15
+ * Portuguese, Romanian, Arabic, German, French, Hindi, Indonesian, Russian,
16
+ * Vietnamese, Chinese, Thai, and more.
17
+ *
18
+ * Punctuation groups:
19
+ * - ASCII basics: . ! ? ; :
20
+ * - Ellipsis: … (U+2026), ‥ (U+2025)
21
+ * - CJK fullwidth: 。!?;:。、
22
+ * - Arabic/Urdu: ؟ ؛ ۔ ،
23
+ * - Devanagari (Hindi/Sanskrit): । ॥
24
+ * - Greek question mark: ; (U+037E)
25
+ */
26
+ const SENTENCE_PUNCTUATION = ".!?;:…‥。!?;:。、؟؛۔،।॥\u037E";
27
+ const SENTENCE_SPLIT_PATTERN = new RegExp(
28
+ `([${SENTENCE_PUNCTUATION}]+\\s*)`,
29
+ "u"
30
+ );
31
+
10
32
  /**
11
33
  * Check if text contains spaces (to determine if word-based splitting is possible)
12
34
  *
@@ -104,7 +126,7 @@ function splitOversizedChunk(chunk: string, maxLength: number): string[] {
104
126
  * word/character boundaries when necessary.
105
127
  *
106
128
  * Chunking Strategy:
107
- * 1. First, split by sentence boundaries (punctuation: .!?;:)
129
+ * 1. First, split by sentence boundaries (multilingual punctuation)
108
130
  * 2. Merge sentences into chunks up to maxLength
109
131
  * 3. If a sentence exceeds maxLength:
110
132
  * - For text with spaces: split by words
@@ -122,9 +144,8 @@ export function chunkText(
122
144
  return [text];
123
145
  }
124
146
 
125
- // Step 1: Split by sentence boundaries (including various punctuation marks)
126
- // Includes Western punctuation (.!?;:) and CJK punctuation (。!?;:)
127
- const sentences = text.split(/([.!?;:。!?;:]+\s*)/);
147
+ // Step 1: Split by sentence boundaries (multilingual punctuation)
148
+ const sentences = text.split(SENTENCE_SPLIT_PATTERN);
128
149
 
129
150
  const preliminaryChunks: string[] = [];
130
151
  let currentChunk = "";
@@ -21,9 +21,23 @@ import {
21
21
  detectAudioFormat,
22
22
  mergeMp3Binary,
23
23
  mergeWavBinary,
24
+ applyPronunciationDictionary,
25
+ type PronunciationDictionaryEntry,
24
26
  removeMp3Header,
25
27
  removeWavHeader,
26
28
  } from "../lib/custom_utils/index.js";
29
+
30
+ type CreateSpeechOptions = RequestOptions & {
31
+ acceptHeaderOverride?: CreateSpeechAcceptEnum;
32
+ maxTextLength?: number;
33
+ pronunciationDictionary?: PronunciationDictionaryEntry[];
34
+ };
35
+
36
+ type StreamSpeechOptions = RequestOptions & {
37
+ acceptHeaderOverride?: StreamSpeechAcceptEnum;
38
+ maxTextLength?: number;
39
+ pronunciationDictionary?: PronunciationDictionaryEntry[];
40
+ };
27
41
  // #endregion imports
28
42
 
29
43
  export { CreateSpeechAcceptEnum } from "../funcs/textToSpeechCreateSpeech.js";
@@ -53,11 +67,22 @@ export class TextToSpeech extends ClientSDK {
53
67
  return text.length > maxLength;
54
68
  }
55
69
 
70
+ /**
71
+ * Apply pronunciation dictionary before chunking (opt-in).
72
+ */
73
+ private applyPronunciationDictionary(
74
+ text: string,
75
+ pronunciationDictionary?: PronunciationDictionaryEntry[]
76
+ ): string {
77
+ if (!pronunciationDictionary) return text;
78
+ return applyPronunciationDictionary(text, pronunciationDictionary);
79
+ }
80
+
56
81
  /**
57
82
  * Extract audio data from response
58
83
  */
59
84
  private async extractAudioFromResponse(
60
- response: operations.CreateSpeechResponse | operations.StreamSpeechResponse,
85
+ response: operations.CreateSpeechResponse | operations.StreamSpeechResponse
61
86
  ): Promise<Uint8Array> {
62
87
  const result = response.result;
63
88
 
@@ -74,9 +99,9 @@ export class TextToSpeech extends ClientSDK {
74
99
  }
75
100
 
76
101
  if (
77
- typeof result === "object"
78
- && result !== null
79
- && "getReader" in result
102
+ typeof result === "object" &&
103
+ result !== null &&
104
+ "getReader" in result
80
105
  ) {
81
106
  // ReadableStream
82
107
  const reader = (result as ReadableStream<Uint8Array>).getReader();
@@ -131,14 +156,15 @@ export class TextToSpeech extends ClientSDK {
131
156
  // Enhanced error message with object inspection
132
157
  const resultType = typeof result;
133
158
  const resultConstructor = result?.constructor?.name || "unknown";
134
- const resultKeys = result && typeof result === "object"
135
- ? Object.keys(result).join(", ")
136
- : "N/A";
159
+ const resultKeys =
160
+ result && typeof result === "object"
161
+ ? Object.keys(result).join(", ")
162
+ : "N/A";
137
163
 
138
164
  throw new Error(
139
- `Unsupported result type: ${resultType}, `
140
- + `constructor: ${resultConstructor}, `
141
- + `keys: [${resultKeys}]`,
165
+ `Unsupported result type: ${resultType}, ` +
166
+ `constructor: ${resultConstructor}, ` +
167
+ `keys: [${resultKeys}]`
142
168
  );
143
169
  }
144
170
 
@@ -146,7 +172,7 @@ export class TextToSpeech extends ClientSDK {
146
172
  * Merge multiple audio responses into one
147
173
  */
148
174
  private async mergeAudioResponses(
149
- responses: operations.CreateSpeechResponse[],
175
+ responses: operations.CreateSpeechResponse[]
150
176
  ): Promise<operations.CreateSpeechResponse> {
151
177
  if (responses.length === 0) {
152
178
  throw new Error("No responses to merge");
@@ -163,7 +189,7 @@ export class TextToSpeech extends ClientSDK {
163
189
 
164
190
  // Extract audio data from all responses
165
191
  const audioChunks: Uint8Array[] = await Promise.all(
166
- responses.map((r) => this.extractAudioFromResponse(r)),
192
+ responses.map((r) => this.extractAudioFromResponse(r))
167
193
  );
168
194
 
169
195
  const firstChunk = audioChunks[0];
@@ -208,14 +234,14 @@ export class TextToSpeech extends ClientSDK {
208
234
  originalRequest: operations.StreamSpeechRequest,
209
235
  options?: RequestOptions & {
210
236
  acceptHeaderOverride?: StreamSpeechAcceptEnum;
211
- },
237
+ }
212
238
  ): operations.StreamSpeechResponse {
213
239
  let audioFormat: "wav" | "mp3" | null = null;
214
240
  let isFirstAudioChunk = true;
215
241
 
216
242
  // Use arrow function to preserve 'this' context
217
243
  const processStream = async (
218
- controller: ReadableStreamDefaultController<Uint8Array>,
244
+ controller: ReadableStreamDefaultController<Uint8Array>
219
245
  ) => {
220
246
  try {
221
247
  // Stream first response (first text chunk)
@@ -263,7 +289,7 @@ export class TextToSpeech extends ClientSDK {
263
289
  }
264
290
  const chunkResponse = await this._streamSpeechOriginal(
265
291
  chunkRequest,
266
- options,
292
+ options
267
293
  );
268
294
 
269
295
  // Stream this text chunk's audio
@@ -323,40 +349,51 @@ export class TextToSpeech extends ClientSDK {
323
349
  */
324
350
  private async createSpeechWithChunking(
325
351
  request: operations.CreateSpeechRequest,
326
- options?: RequestOptions & {
327
- acceptHeaderOverride?: CreateSpeechAcceptEnum;
328
- maxTextLength?: number;
329
- },
352
+ options?: CreateSpeechOptions
330
353
  ): Promise<operations.CreateSpeechResponse> {
354
+ const { pronunciationDictionary, ...restOptions } = options ?? {};
331
355
  const maxLength = options?.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH;
332
- const text = request.apiConvertTextToSpeechUsingCharacterRequest?.text
333
- ?? "";
356
+ const text =
357
+ request.apiConvertTextToSpeechUsingCharacterRequest?.text ?? "";
358
+ const normalizedText = this.applyPronunciationDictionary(
359
+ text,
360
+ pronunciationDictionary
361
+ );
362
+
363
+ const baseRequest: operations.CreateSpeechRequest = {
364
+ ...request,
365
+ apiConvertTextToSpeechUsingCharacterRequest: {
366
+ ...request.apiConvertTextToSpeechUsingCharacterRequest,
367
+ text: normalizedText,
368
+ },
369
+ };
334
370
 
335
371
  // Short text: call original method directly
336
- if (!this.shouldChunkText(text, maxLength)) {
372
+ if (!this.shouldChunkText(normalizedText, maxLength)) {
337
373
  if (!this._createSpeechOriginal) {
338
374
  throw new Error("Original createSpeech method not found");
339
375
  }
340
- return this._createSpeechOriginal(request, options);
376
+ return this._createSpeechOriginal(baseRequest, restOptions);
341
377
  }
342
378
 
343
379
  // Long text: chunk, process sequentially (to avoid schema parsing issues), and merge
344
- const textChunks = chunkText(text, maxLength);
380
+ const textChunks = chunkText(normalizedText, maxLength);
345
381
 
346
382
  // Determine Accept header based on output format
347
- const outputFormat = request.apiConvertTextToSpeechUsingCharacterRequest
348
- ?.outputFormat;
349
- const acceptHeader: CreateSpeechAcceptEnum = outputFormat === "mp3"
350
- ? CreateSpeechAcceptEnum.audioMpeg
351
- : CreateSpeechAcceptEnum.audioWav;
383
+ const outputFormat =
384
+ baseRequest.apiConvertTextToSpeechUsingCharacterRequest?.outputFormat;
385
+ const acceptHeader: CreateSpeechAcceptEnum =
386
+ outputFormat === "mp3"
387
+ ? CreateSpeechAcceptEnum.audioMpeg
388
+ : CreateSpeechAcceptEnum.audioWav;
352
389
 
353
390
  // Process chunks sequentially to avoid race conditions in schema parsing
354
391
  const responses: operations.CreateSpeechResponse[] = [];
355
392
  for (const chunk of textChunks) {
356
393
  const chunkRequest: operations.CreateSpeechRequest = {
357
- ...request,
394
+ ...baseRequest,
358
395
  apiConvertTextToSpeechUsingCharacterRequest: {
359
- ...request.apiConvertTextToSpeechUsingCharacterRequest,
396
+ ...baseRequest.apiConvertTextToSpeechUsingCharacterRequest,
360
397
  text: chunk,
361
398
  },
362
399
  };
@@ -364,7 +401,7 @@ export class TextToSpeech extends ClientSDK {
364
401
  throw new Error("Original createSpeech method not found");
365
402
  }
366
403
  const response = await this._createSpeechOriginal(chunkRequest, {
367
- ...options,
404
+ ...restOptions,
368
405
  acceptHeaderOverride: acceptHeader,
369
406
  });
370
407
  responses.push(response);
@@ -378,25 +415,35 @@ export class TextToSpeech extends ClientSDK {
378
415
  */
379
416
  private async streamSpeechWithChunking(
380
417
  request: operations.StreamSpeechRequest,
381
- options?: RequestOptions & {
382
- acceptHeaderOverride?: StreamSpeechAcceptEnum;
383
- maxTextLength?: number;
384
- },
418
+ options?: StreamSpeechOptions
385
419
  ): Promise<operations.StreamSpeechResponse> {
420
+ const { pronunciationDictionary, ...restOptions } = options ?? {};
386
421
  const maxLength = options?.maxTextLength ?? DEFAULT_MAX_TEXT_LENGTH;
387
- const text = request.apiConvertTextToSpeechUsingCharacterRequest?.text
388
- ?? "";
422
+ const text =
423
+ request.apiConvertTextToSpeechUsingCharacterRequest?.text ?? "";
424
+ const normalizedText = this.applyPronunciationDictionary(
425
+ text,
426
+ pronunciationDictionary
427
+ );
428
+
429
+ const baseRequest: operations.StreamSpeechRequest = {
430
+ ...request,
431
+ apiConvertTextToSpeechUsingCharacterRequest: {
432
+ ...request.apiConvertTextToSpeechUsingCharacterRequest,
433
+ text: normalizedText,
434
+ },
435
+ };
389
436
 
390
437
  // Short text: call original method directly
391
- if (!this.shouldChunkText(text, maxLength)) {
438
+ if (!this.shouldChunkText(normalizedText, maxLength)) {
392
439
  if (!this._streamSpeechOriginal) {
393
440
  throw new Error("Original streamSpeech method not found");
394
441
  }
395
- return this._streamSpeechOriginal(request, options);
442
+ return this._streamSpeechOriginal(baseRequest, restOptions);
396
443
  }
397
444
 
398
445
  // Long text: chunk and stream sequentially
399
- const textChunks = chunkText(text, maxLength);
446
+ const textChunks = chunkText(normalizedText, maxLength);
400
447
 
401
448
  if (textChunks.length === 0) {
402
449
  throw new Error("No text chunks to process");
@@ -409,9 +456,9 @@ export class TextToSpeech extends ClientSDK {
409
456
 
410
457
  // Get first response to start streaming
411
458
  const firstChunkRequest: operations.StreamSpeechRequest = {
412
- ...request,
459
+ ...baseRequest,
413
460
  apiConvertTextToSpeechUsingCharacterRequest: {
414
- ...request.apiConvertTextToSpeechUsingCharacterRequest,
461
+ ...baseRequest.apiConvertTextToSpeechUsingCharacterRequest,
415
462
  text: firstChunk,
416
463
  },
417
464
  };
@@ -421,7 +468,7 @@ export class TextToSpeech extends ClientSDK {
421
468
  }
422
469
  const firstResponse = await this._streamSpeechOriginal(
423
470
  firstChunkRequest,
424
- options,
471
+ restOptions
425
472
  );
426
473
 
427
474
  // Single chunk: return as-is
@@ -434,8 +481,8 @@ export class TextToSpeech extends ClientSDK {
434
481
  return this.createExtendedStreamingResponse(
435
482
  firstResponse,
436
483
  remainingChunks,
437
- request,
438
- options,
484
+ baseRequest,
485
+ restOptions
439
486
  );
440
487
  }
441
488
  // #endregion sdk-class-body
@@ -448,15 +495,9 @@ export class TextToSpeech extends ClientSDK {
448
495
  */
449
496
  async createSpeech(
450
497
  request: operations.CreateSpeechRequest,
451
- options?: RequestOptions & {
452
- acceptHeaderOverride?: CreateSpeechAcceptEnum;
453
- },
498
+ options?: CreateSpeechOptions
454
499
  ): Promise<operations.CreateSpeechResponse> {
455
- return unwrapAsync(textToSpeechCreateSpeech(
456
- this,
457
- request,
458
- options,
459
- ));
500
+ return unwrapAsync(textToSpeechCreateSpeech(this, request, options));
460
501
  }
461
502
 
462
503
  /**
@@ -467,15 +508,9 @@ export class TextToSpeech extends ClientSDK {
467
508
  */
468
509
  async streamSpeech(
469
510
  request: operations.StreamSpeechRequest,
470
- options?: RequestOptions & {
471
- acceptHeaderOverride?: StreamSpeechAcceptEnum;
472
- },
511
+ options?: StreamSpeechOptions
473
512
  ): Promise<operations.StreamSpeechResponse> {
474
- return unwrapAsync(textToSpeechStreamSpeech(
475
- this,
476
- request,
477
- options,
478
- ));
513
+ return unwrapAsync(textToSpeechStreamSpeech(this, request, options));
479
514
  }
480
515
 
481
516
  /**
@@ -486,12 +521,8 @@ export class TextToSpeech extends ClientSDK {
486
521
  */
487
522
  async predictDuration(
488
523
  request: operations.PredictDurationRequest,
489
- options?: RequestOptions,
524
+ options?: RequestOptions
490
525
  ): Promise<operations.PredictDurationResponse> {
491
- return unwrapAsync(textToSpeechPredictDuration(
492
- this,
493
- request,
494
- options,
495
- ));
526
+ return unwrapAsync(textToSpeechPredictDuration(this, request, options));
496
527
  }
497
528
  }