@supertone/supertone 0.1.1 ā 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -73
- package/custom_test/realtime_tts_player.ts +64 -3
- package/custom_test/test_real_api.ts +1040 -105
- package/dist/commonjs/lib/config.d.ts +2 -2
- package/dist/commonjs/lib/config.js +2 -2
- package/dist/commonjs/lib/custom_utils/text_utils.d.ts +8 -1
- package/dist/commonjs/lib/custom_utils/text_utils.d.ts.map +1 -1
- package/dist/commonjs/lib/custom_utils/text_utils.js +108 -7
- package/dist/commonjs/lib/custom_utils/text_utils.js.map +1 -1
- package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.d.ts +92 -1
- package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.d.ts.map +1 -1
- package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.js +48 -3
- package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.js.map +1 -1
- package/dist/commonjs/models/predictttsdurationusingcharacterrequest.d.ts +92 -1
- package/dist/commonjs/models/predictttsdurationusingcharacterrequest.d.ts.map +1 -1
- package/dist/commonjs/models/predictttsdurationusingcharacterrequest.js +46 -3
- package/dist/commonjs/models/predictttsdurationusingcharacterrequest.js.map +1 -1
- package/dist/esm/lib/config.d.ts +2 -2
- package/dist/esm/lib/config.js +2 -2
- package/dist/esm/lib/custom_utils/text_utils.d.ts +8 -1
- package/dist/esm/lib/custom_utils/text_utils.d.ts.map +1 -1
- package/dist/esm/lib/custom_utils/text_utils.js +108 -7
- package/dist/esm/lib/custom_utils/text_utils.js.map +1 -1
- package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.d.ts +92 -1
- package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.d.ts.map +1 -1
- package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.js +47 -2
- package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.js.map +1 -1
- package/dist/esm/models/predictttsdurationusingcharacterrequest.d.ts +92 -1
- package/dist/esm/models/predictttsdurationusingcharacterrequest.d.ts.map +1 -1
- package/dist/esm/models/predictttsdurationusingcharacterrequest.js +45 -2
- package/dist/esm/models/predictttsdurationusingcharacterrequest.js.map +1 -1
- package/examples/custom_voices/create_cloned_voice.ts +4 -3
- package/examples/custom_voices/delete_custom_voice.ts +2 -7
- package/examples/custom_voices/edit_custom_voice.ts +2 -6
- package/examples/custom_voices/get_custom_voice.ts +2 -7
- package/examples/custom_voices/list_custom_voices.ts +2 -7
- package/examples/custom_voices/search_custom_voices.ts +2 -6
- package/examples/text_to_speech/create_speech.ts +3 -8
- package/examples/text_to_speech/create_speech_long_text.ts +3 -7
- package/examples/text_to_speech/create_speech_with_phonemes.ts +3 -7
- package/examples/text_to_speech/create_speech_with_voice_settings.ts +3 -8
- package/examples/text_to_speech/predict_duration.ts +3 -7
- package/examples/text_to_speech/stream_speech.ts +3 -7
- package/examples/text_to_speech/stream_speech_long_text.ts +3 -7
- package/examples/text_to_speech/stream_speech_with_phonemes.ts +3 -7
- package/examples/text_to_speech/stream_speech_with_voice_settings.ts +3 -7
- package/examples/usage/get_credit_balance.ts +2 -6
- package/examples/usage/get_usage.ts +2 -6
- package/examples/usage/get_voice_usage.ts +2 -7
- package/examples/voices/get_voice.ts +2 -6
- package/examples/voices/list_voices.ts +2 -6
- package/examples/voices/search_voices.ts +2 -7
- package/jsr.json +1 -1
- package/openapi.json +101 -9
- package/package.json +1 -1
- package/src/lib/config.ts +2 -2
- package/src/lib/custom_utils/text_utils.ts +117 -7
- package/src/models/apiconverttexttospeechusingcharacterrequest.ts +62 -3
- package/src/models/predictttsdurationusingcharacterrequest.ts +64 -3
|
@@ -127,19 +127,25 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
|
|
|
127
127
|
console.log(` š Debug - has audioBase64: ${"audioBase64" in result}`);
|
|
128
128
|
console.log(` š Debug - has getReader: ${"getReader" in result}`);
|
|
129
129
|
}
|
|
130
|
-
|
|
130
|
+
|
|
131
131
|
// Check for capital-case Result (SDK internal structure)
|
|
132
|
-
if (
|
|
132
|
+
if (
|
|
133
|
+
!result ||
|
|
134
|
+
(typeof result === "object" && Object.keys(result).length === 0)
|
|
135
|
+
) {
|
|
133
136
|
console.log(` š” Checking SDK internal Result field...`);
|
|
134
137
|
if ((response as any).Result) {
|
|
135
138
|
result = (response as any).Result;
|
|
136
139
|
console.log(` ā
Found Result (capital R) - using that instead`);
|
|
137
140
|
}
|
|
138
141
|
}
|
|
139
|
-
|
|
142
|
+
|
|
140
143
|
// Debug response headers
|
|
141
144
|
if (response.headers) {
|
|
142
|
-
console.log(
|
|
145
|
+
console.log(
|
|
146
|
+
` š Debug - response headers:`,
|
|
147
|
+
JSON.stringify(response.headers, null, 2)
|
|
148
|
+
);
|
|
143
149
|
}
|
|
144
150
|
|
|
145
151
|
if (result instanceof Uint8Array) {
|
|
@@ -198,7 +204,7 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
|
|
|
198
204
|
return bytes;
|
|
199
205
|
}
|
|
200
206
|
}
|
|
201
|
-
|
|
207
|
+
|
|
202
208
|
// Handle empty object case - this might happen when the SDK doesn't properly parse audio responses
|
|
203
209
|
if (
|
|
204
210
|
typeof result === "object" &&
|
|
@@ -207,22 +213,25 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
|
|
|
207
213
|
) {
|
|
208
214
|
console.log(` ā ļø Warning: Empty result object detected`);
|
|
209
215
|
console.log(` š” This might be a parsing issue with the SDK`);
|
|
210
|
-
console.log(
|
|
211
|
-
|
|
216
|
+
console.log(
|
|
217
|
+
` š” Check if the response was actually a stream but got parsed as an empty object`
|
|
218
|
+
);
|
|
219
|
+
|
|
212
220
|
throw new Error(
|
|
213
221
|
`Empty result object - SDK may have failed to parse audio stream response. ` +
|
|
214
|
-
|
|
222
|
+
`This usually happens when audio/* content-type responses are not properly handled.`
|
|
215
223
|
);
|
|
216
224
|
}
|
|
217
225
|
|
|
218
226
|
// Enhanced error message with debug info
|
|
219
|
-
const errorDetails =
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
227
|
+
const errorDetails =
|
|
228
|
+
typeof result === "object" && result !== null
|
|
229
|
+
? `constructor: ${result.constructor.name}, keys: [${Object.keys(
|
|
230
|
+
result
|
|
231
|
+
).join(", ")}]`
|
|
232
|
+
: `value: ${result}`;
|
|
233
|
+
|
|
234
|
+
throw new Error(`Unsupported result type: ${typeof result}, ${errorDetails}`);
|
|
226
235
|
}
|
|
227
236
|
|
|
228
237
|
/**
|
|
@@ -927,6 +936,154 @@ async function testCreateSpeechLongText(
|
|
|
927
936
|
}
|
|
928
937
|
}
|
|
929
938
|
|
|
939
|
+
/**
|
|
940
|
+
* Test TTS with long text WITHOUT punctuation (word-based chunking)
|
|
941
|
+
* This tests the word-based splitting fallback when sentences exceed 300 chars
|
|
942
|
+
*/
|
|
943
|
+
async function testCreateSpeechLongSentenceNoPunctuation(
|
|
944
|
+
voiceId: string | null
|
|
945
|
+
): Promise<[boolean, any]> {
|
|
946
|
+
console.log(
|
|
947
|
+
"š Long Sentence WITHOUT Punctuation Test (Word-based chunking)"
|
|
948
|
+
);
|
|
949
|
+
|
|
950
|
+
if (!voiceId) {
|
|
951
|
+
console.log(" ā ļø No voice ID available");
|
|
952
|
+
return [false, null];
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
try {
|
|
956
|
+
const { Supertone } = await import("../src/index.js");
|
|
957
|
+
const models = await import("../src/models/index.js");
|
|
958
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
959
|
+
|
|
960
|
+
// Long text without punctuation - forces word-based splitting
|
|
961
|
+
// This is a single continuous sentence with no periods or other punctuation marks
|
|
962
|
+
const longSentenceNoPunctuation =
|
|
963
|
+
"This is a very long sentence without any punctuation marks that is designed to test the word based chunking feature of the SDK when a sentence exceeds the maximum character limit of three hundred characters the system should automatically split this text by word boundaries rather than sentence boundaries to ensure proper processing and this behavior is critical for handling user generated content that may not follow standard punctuation conventions such as chat messages or informal text inputs that users commonly provide in real world applications where grammatically correct sentences are not always guaranteed";
|
|
964
|
+
|
|
965
|
+
const actualLength = longSentenceNoPunctuation.length;
|
|
966
|
+
console.log(
|
|
967
|
+
` š Text length: ${actualLength} characters (single sentence, no punctuation)`
|
|
968
|
+
);
|
|
969
|
+
console.log(` š§ Expected behavior: Word-based chunking`);
|
|
970
|
+
console.log(" ā ļø This test consumes credits!");
|
|
971
|
+
|
|
972
|
+
const response = await client.textToSpeech.createSpeech({
|
|
973
|
+
voiceId,
|
|
974
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
975
|
+
text: longSentenceNoPunctuation,
|
|
976
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
977
|
+
outputFormat:
|
|
978
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
979
|
+
style: "neutral",
|
|
980
|
+
model: "sona_speech_1",
|
|
981
|
+
},
|
|
982
|
+
});
|
|
983
|
+
|
|
984
|
+
if (response.result) {
|
|
985
|
+
const audioData = await extractAudioData(response);
|
|
986
|
+
|
|
987
|
+
console.log(
|
|
988
|
+
` ā
Word-based chunking TTS success: ${audioData.length} bytes`
|
|
989
|
+
);
|
|
990
|
+
console.log(
|
|
991
|
+
` šÆ Long sentence without punctuation processed correctly!`
|
|
992
|
+
);
|
|
993
|
+
|
|
994
|
+
const outputFile = "test_word_chunking_speech_output.wav";
|
|
995
|
+
fs.writeFileSync(outputFile, audioData);
|
|
996
|
+
console.log(` š¾ Audio saved: ${outputFile}`);
|
|
997
|
+
|
|
998
|
+
const estimatedChunks = Math.ceil(actualLength / 300);
|
|
999
|
+
console.log(` š Estimated chunks: ${estimatedChunks}`);
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
return [true, response];
|
|
1003
|
+
} catch (e: any) {
|
|
1004
|
+
logDetailedError(e, "Long sentence word-based chunking");
|
|
1005
|
+
return [false, e];
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
/**
|
|
1010
|
+
* Test TTS with Japanese text (character-based chunking)
|
|
1011
|
+
* Japanese doesn't use spaces, AND this test uses NO punctuation marks (ćļ¼ļ¼etc)
|
|
1012
|
+
* to ensure the SDK uses character-based splitting
|
|
1013
|
+
*/
|
|
1014
|
+
async function testCreateSpeechJapaneseNoSpaces(
|
|
1015
|
+
voiceId: string | null
|
|
1016
|
+
): Promise<[boolean, any]> {
|
|
1017
|
+
console.log("šÆšµ Japanese Text Test (Character-based chunking)");
|
|
1018
|
+
|
|
1019
|
+
if (!voiceId) {
|
|
1020
|
+
console.log(" ā ļø No voice ID available");
|
|
1021
|
+
return [false, null];
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
try {
|
|
1025
|
+
const { Supertone } = await import("../src/index.js");
|
|
1026
|
+
const models = await import("../src/models/index.js");
|
|
1027
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1028
|
+
|
|
1029
|
+
// Long Japanese text WITHOUT spaces AND WITHOUT punctuation - forces character-based splitting
|
|
1030
|
+
// This text intentionally has NO punctuation marks (ćļ¼ļ¼etc) to test pure character-based chunking
|
|
1031
|
+
// Text length: ~450 characters (exceeds 300 char limit)
|
|
1032
|
+
const longJapaneseText =
|
|
1033
|
+
"ę„ę¬čŖć®ććć¹ććÆéåøøć¹ćć¼ć¹ćå«ć¾ćŖćććē¹å„ćŖå¦ēćåæ
č¦ć§ć" +
|
|
1034
|
+
"ćć®ćć¹ććÆäøē¾ęåćč¶
ććé·ćę„ę¬čŖććć¹ććę£ććå¦ēćććććØć確čŖćć¾ć" +
|
|
1035
|
+
"čŖē¶čØčŖå¦ēęč”ć®ēŗå±ć«ććé³å£°åęć®åč³ŖćÆå¤§å¹
ć«åäøćć¾ćć" +
|
|
1036
|
+
"ē¹ć«ćć£ć¼ćć©ć¼ćć³ć°ćę“»ēØććęę°ć®ććć¹ćé³å£°å¤ęć·ć¹ćć ćÆäŗŗéć®ēŗč©±ć«éåøøć«čæćčŖē¶ćŖé³å£°ćēęć§ćć¾ć" +
|
|
1037
|
+
"ć¹ćć¼ć¹ććŖćčØčŖć§ćÆęååä½ć§ć®åå²ćåæ
č¦ć§ćććć®SDKćÆćć®ćććŖē¶ę³ćčŖåēć«ę¤åŗćć¦é©åć«å¦ēćć¾ć" +
|
|
1038
|
+
"ććć«ććę„ę¬čŖäøå½čŖéå½čŖćŖć©ć®ć¢ćøć¢čØčŖć§ćåé”ćŖćé·ćććć¹ććé³å£°ć«å¤ęććććØćć§ćć¾ć" +
|
|
1039
|
+
"é³å£°åęęč”ćÆč¦č¦é害č
ć®ććć®ć¢ćÆć»ć·ććŖćć£ćć¼ć«ćć対話åAIć¢ć·ć¹ćæć³ćć¾ć§å¹
åŗćēØéć§ę“»ēØććć¦ćć¾ć" +
|
|
1040
|
+
"ććć«ćŖć¢ć«ćæć¤ć ć¹ććŖć¼ćć³ć°ęč”ćØēµćæåćććććØć§å¾
ć”ęéć大å¹
ć«ēēø®ćåŖććć¦ć¼ć¶ć¼ä½éØćęä¾ććććØćć§ćć¾ć" +
|
|
1041
|
+
"ęę°ć®é³å£°åęęč”ćÆęę
ćęęćčŖē¶ć«č”Øē¾ć§ććććć«ćŖćć¾ćć";
|
|
1042
|
+
|
|
1043
|
+
const actualLength = longJapaneseText.length;
|
|
1044
|
+
console.log(
|
|
1045
|
+
` š Text length: ${actualLength} characters (Japanese, no spaces, no punctuation)`
|
|
1046
|
+
);
|
|
1047
|
+
console.log(
|
|
1048
|
+
` š§ Expected behavior: Character-based chunking (300 chars per chunk)`
|
|
1049
|
+
);
|
|
1050
|
+
console.log(" ā ļø This test consumes credits!");
|
|
1051
|
+
|
|
1052
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1053
|
+
voiceId,
|
|
1054
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1055
|
+
text: longJapaneseText,
|
|
1056
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Ja,
|
|
1057
|
+
outputFormat:
|
|
1058
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1059
|
+
style: "neutral",
|
|
1060
|
+
model: "sona_speech_1",
|
|
1061
|
+
},
|
|
1062
|
+
});
|
|
1063
|
+
|
|
1064
|
+
if (response.result) {
|
|
1065
|
+
const audioData = await extractAudioData(response);
|
|
1066
|
+
|
|
1067
|
+
console.log(
|
|
1068
|
+
` ā
Character-based chunking TTS success: ${audioData.length} bytes`
|
|
1069
|
+
);
|
|
1070
|
+
console.log(` šÆ Japanese text without spaces processed correctly!`);
|
|
1071
|
+
|
|
1072
|
+
const outputFile = "test_japanese_char_chunking_speech_output.wav";
|
|
1073
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1074
|
+
console.log(` š¾ Audio saved: ${outputFile}`);
|
|
1075
|
+
|
|
1076
|
+
const estimatedChunks = Math.ceil(actualLength / 300);
|
|
1077
|
+
console.log(` š Estimated chunks: ${estimatedChunks}`);
|
|
1078
|
+
}
|
|
1079
|
+
|
|
1080
|
+
return [true, response];
|
|
1081
|
+
} catch (e: any) {
|
|
1082
|
+
logDetailedError(e, "Japanese character-based chunking");
|
|
1083
|
+
return [false, e];
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
|
|
930
1087
|
/**
|
|
931
1088
|
* Test TTS streaming with long text
|
|
932
1089
|
*/
|
|
@@ -1168,13 +1325,53 @@ async function testStreamSpeechWithPhonemes(
|
|
|
1168
1325
|
}
|
|
1169
1326
|
}
|
|
1170
1327
|
|
|
1328
|
+
// =============================================================================
|
|
1329
|
+
// Model & Language Compatibility Tests
|
|
1330
|
+
// =============================================================================
|
|
1331
|
+
|
|
1171
1332
|
/**
|
|
1172
|
-
*
|
|
1333
|
+
* Model-Language compatibility matrix
|
|
1334
|
+
* - sona_speech_1: ko, en, ja
|
|
1335
|
+
* - sona_speech_2: all languages (23 languages)
|
|
1336
|
+
* - supertonic_api_1: ko, en, ja, es, pt
|
|
1173
1337
|
*/
|
|
1174
|
-
|
|
1338
|
+
const MODEL_LANGUAGE_MATRIX = {
|
|
1339
|
+
sona_speech_1: ["ko", "en", "ja"],
|
|
1340
|
+
sona_speech_2: [
|
|
1341
|
+
"en",
|
|
1342
|
+
"ko",
|
|
1343
|
+
"ja",
|
|
1344
|
+
"bg",
|
|
1345
|
+
"cs",
|
|
1346
|
+
"da",
|
|
1347
|
+
"el",
|
|
1348
|
+
"es",
|
|
1349
|
+
"et",
|
|
1350
|
+
"fi",
|
|
1351
|
+
"hu",
|
|
1352
|
+
"it",
|
|
1353
|
+
"nl",
|
|
1354
|
+
"pl",
|
|
1355
|
+
"pt",
|
|
1356
|
+
"ro",
|
|
1357
|
+
"ar",
|
|
1358
|
+
"de",
|
|
1359
|
+
"fr",
|
|
1360
|
+
"hi",
|
|
1361
|
+
"id",
|
|
1362
|
+
"ru",
|
|
1363
|
+
"vi",
|
|
1364
|
+
],
|
|
1365
|
+
supertonic_api_1: ["ko", "en", "ja", "es", "pt"],
|
|
1366
|
+
} as const;
|
|
1367
|
+
|
|
1368
|
+
/**
|
|
1369
|
+
* Test TTS with sona_speech_2 model
|
|
1370
|
+
*/
|
|
1371
|
+
async function testCreateSpeechWithSonaSpeech2(
|
|
1175
1372
|
voiceId: string | null
|
|
1176
1373
|
): Promise<[boolean, any]> {
|
|
1177
|
-
console.log("
|
|
1374
|
+
console.log("š¤ TTS with sona_speech_2 Model Test");
|
|
1178
1375
|
|
|
1179
1376
|
if (!voiceId) {
|
|
1180
1377
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1186,40 +1383,50 @@ async function testPredictDurationWithVoiceSettings(
|
|
|
1186
1383
|
const models = await import("../src/models/index.js");
|
|
1187
1384
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1188
1385
|
|
|
1189
|
-
const
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
console.log(
|
|
1194
|
-
|
|
1195
|
-
);
|
|
1196
|
-
console.log(` Settings: speed=${voiceSettings.speed}`);
|
|
1386
|
+
const testText =
|
|
1387
|
+
"Hello! Testing sona_speech_2 model for text-to-speech conversion.";
|
|
1388
|
+
console.log(` š Creating speech with sona_speech_2 model`);
|
|
1389
|
+
console.log(` Voice ID: ${voiceId}`);
|
|
1390
|
+
console.log(` Model: sona_speech_2`);
|
|
1391
|
+
console.log(" ā ļø This test consumes credits!");
|
|
1197
1392
|
|
|
1198
|
-
const response = await client.textToSpeech.
|
|
1393
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1199
1394
|
voiceId,
|
|
1200
|
-
|
|
1201
|
-
text:
|
|
1202
|
-
language: models.
|
|
1203
|
-
|
|
1395
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1396
|
+
text: testText,
|
|
1397
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1398
|
+
outputFormat:
|
|
1399
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1400
|
+
model:
|
|
1401
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech2,
|
|
1204
1402
|
},
|
|
1205
1403
|
});
|
|
1206
1404
|
|
|
1207
|
-
console.log(` ā
|
|
1405
|
+
console.log(` ā
sona_speech_2 TTS success`);
|
|
1406
|
+
|
|
1407
|
+
if (response.result) {
|
|
1408
|
+
const audioData = await extractAudioData(response);
|
|
1409
|
+
const outputFile = "test_sona_speech_2_output.wav";
|
|
1410
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1411
|
+
console.log(
|
|
1412
|
+
` š¾ Audio saved: ${outputFile} (${audioData.length} bytes)`
|
|
1413
|
+
);
|
|
1414
|
+
}
|
|
1208
1415
|
|
|
1209
1416
|
return [true, response];
|
|
1210
1417
|
} catch (e: any) {
|
|
1211
|
-
|
|
1418
|
+
logDetailedError(e, "sona_speech_2 TTS");
|
|
1212
1419
|
return [false, e];
|
|
1213
1420
|
}
|
|
1214
1421
|
}
|
|
1215
1422
|
|
|
1216
1423
|
/**
|
|
1217
|
-
* Test TTS
|
|
1424
|
+
* Test TTS with supertonic_api_1 model
|
|
1218
1425
|
*/
|
|
1219
|
-
async function
|
|
1426
|
+
async function testCreateSpeechWithSupertonicApi1(
|
|
1220
1427
|
voiceId: string | null
|
|
1221
1428
|
): Promise<[boolean, any]> {
|
|
1222
|
-
console.log("
|
|
1429
|
+
console.log("š¤ TTS with supertonic_api_1 Model Test");
|
|
1223
1430
|
|
|
1224
1431
|
if (!voiceId) {
|
|
1225
1432
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1231,46 +1438,51 @@ async function testStreamSpeechWithVoiceSettings(
|
|
|
1231
1438
|
const models = await import("../src/models/index.js");
|
|
1232
1439
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1233
1440
|
|
|
1234
|
-
const
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
};
|
|
1238
|
-
|
|
1239
|
-
console.log(
|
|
1240
|
-
` š Streaming speech with voice settings for voice '${voiceId}'...`
|
|
1241
|
-
);
|
|
1242
|
-
console.log(
|
|
1243
|
-
` Settings: pitchShift=${voiceSettings.pitchShift}, speed=${voiceSettings.speed}`
|
|
1244
|
-
);
|
|
1441
|
+
const testText =
|
|
1442
|
+
"Hello! Testing supertonic_api_1 model for text-to-speech conversion.";
|
|
1443
|
+
console.log(` š Creating speech with supertonic_api_1 model`);
|
|
1444
|
+
console.log(` Voice ID: ${voiceId}`);
|
|
1445
|
+
console.log(` Model: supertonic_api_1`);
|
|
1245
1446
|
console.log(" ā ļø This test consumes credits!");
|
|
1246
1447
|
|
|
1247
|
-
const response = await client.textToSpeech.
|
|
1448
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1248
1449
|
voiceId,
|
|
1249
1450
|
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1250
|
-
text:
|
|
1451
|
+
text: testText,
|
|
1251
1452
|
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1252
1453
|
outputFormat:
|
|
1253
1454
|
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1254
|
-
|
|
1455
|
+
model:
|
|
1456
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
1457
|
+
.SupertonicApi1,
|
|
1255
1458
|
},
|
|
1256
1459
|
});
|
|
1257
1460
|
|
|
1258
|
-
console.log(` ā
|
|
1461
|
+
console.log(` ā
supertonic_api_1 TTS success`);
|
|
1462
|
+
|
|
1463
|
+
if (response.result) {
|
|
1464
|
+
const audioData = await extractAudioData(response);
|
|
1465
|
+
const outputFile = "test_supertonic_api_1_output.wav";
|
|
1466
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1467
|
+
console.log(
|
|
1468
|
+
` š¾ Audio saved: ${outputFile} (${audioData.length} bytes)`
|
|
1469
|
+
);
|
|
1470
|
+
}
|
|
1259
1471
|
|
|
1260
1472
|
return [true, response];
|
|
1261
1473
|
} catch (e: any) {
|
|
1262
|
-
|
|
1474
|
+
logDetailedError(e, "supertonic_api_1 TTS");
|
|
1263
1475
|
return [false, e];
|
|
1264
1476
|
}
|
|
1265
1477
|
}
|
|
1266
1478
|
|
|
1267
1479
|
/**
|
|
1268
|
-
* Test
|
|
1480
|
+
* Test TTS with unsupported model (should fail with validation error)
|
|
1269
1481
|
*/
|
|
1270
|
-
async function
|
|
1482
|
+
async function testCreateSpeechWithUnsupportedModel(
|
|
1271
1483
|
voiceId: string | null
|
|
1272
1484
|
): Promise<[boolean, any]> {
|
|
1273
|
-
console.log("
|
|
1485
|
+
console.log("š« TTS with Unsupported Model Test (Expected to Fail)");
|
|
1274
1486
|
|
|
1275
1487
|
if (!voiceId) {
|
|
1276
1488
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1282,62 +1494,82 @@ async function testCreateSpeechMp3(
|
|
|
1282
1494
|
const models = await import("../src/models/index.js");
|
|
1283
1495
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1284
1496
|
|
|
1285
|
-
|
|
1286
|
-
console.log(
|
|
1497
|
+
const testText = "This should fail with unsupported model.";
|
|
1498
|
+
console.log(
|
|
1499
|
+
` š Attempting TTS with unsupported model: 'invalid_model_xyz'`
|
|
1500
|
+
);
|
|
1287
1501
|
|
|
1502
|
+
// Using type assertion to bypass TypeScript validation for testing
|
|
1288
1503
|
const response = await client.textToSpeech.createSpeech({
|
|
1289
1504
|
voiceId,
|
|
1290
1505
|
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1291
|
-
text:
|
|
1506
|
+
text: testText,
|
|
1292
1507
|
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1293
1508
|
outputFormat:
|
|
1294
|
-
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.
|
|
1295
|
-
|
|
1296
|
-
model: "sona_speech_1",
|
|
1509
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1510
|
+
model: "invalid_model_xyz" as any, // Intentionally invalid model
|
|
1297
1511
|
},
|
|
1298
1512
|
});
|
|
1299
1513
|
|
|
1300
|
-
|
|
1514
|
+
// If we reach here, the test failed (should have thrown an error)
|
|
1515
|
+
console.log(` ā Expected error but got success - this is unexpected!`);
|
|
1516
|
+
return [false, response];
|
|
1517
|
+
} catch (e: any) {
|
|
1518
|
+
// Expected to fail - this is the success case for this test
|
|
1519
|
+
console.log(` ā
Correctly rejected unsupported model`);
|
|
1520
|
+
console.log(` š Error type: ${e.constructor?.name || typeof e}`);
|
|
1521
|
+
console.log(` š Error message: ${e.message?.substring(0, 100) || e}`);
|
|
1522
|
+
return [true, e];
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1301
1525
|
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1526
|
+
/**
|
|
1527
|
+
* Test prediction with sona_speech_2 model
|
|
1528
|
+
*/
|
|
1529
|
+
async function testPredictDurationWithSonaSpeech2(
|
|
1530
|
+
voiceId: string | null
|
|
1531
|
+
): Promise<[boolean, any]> {
|
|
1532
|
+
console.log("ā±ļø Duration Prediction with sona_speech_2 Model Test");
|
|
1305
1533
|
|
|
1306
|
-
|
|
1307
|
-
|
|
1534
|
+
if (!voiceId) {
|
|
1535
|
+
console.log(" ā ļø No voice ID available");
|
|
1536
|
+
return [false, null];
|
|
1537
|
+
}
|
|
1308
1538
|
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
}
|
|
1539
|
+
try {
|
|
1540
|
+
const { Supertone } = await import("../src/index.js");
|
|
1541
|
+
const models = await import("../src/models/index.js");
|
|
1542
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1543
|
+
|
|
1544
|
+
const testText = "Testing duration prediction with sona_speech_2 model.";
|
|
1545
|
+
console.log(` š Predicting duration with sona_speech_2 model`);
|
|
1546
|
+
|
|
1547
|
+
const response = await client.textToSpeech.predictDuration({
|
|
1548
|
+
voiceId,
|
|
1549
|
+
predictTTSDurationUsingCharacterRequest: {
|
|
1550
|
+
text: testText,
|
|
1551
|
+
language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
|
|
1552
|
+
model: models.PredictTTSDurationUsingCharacterRequestModel.SonaSpeech2,
|
|
1553
|
+
},
|
|
1554
|
+
});
|
|
1326
1555
|
|
|
1556
|
+
console.log(
|
|
1557
|
+
` ā
sona_speech_2 duration prediction: ${response.duration}s`
|
|
1558
|
+
);
|
|
1327
1559
|
return [true, response];
|
|
1328
1560
|
} catch (e: any) {
|
|
1329
|
-
|
|
1561
|
+
logDetailedError(e, "sona_speech_2 duration prediction");
|
|
1330
1562
|
return [false, e];
|
|
1331
1563
|
}
|
|
1332
1564
|
}
|
|
1333
1565
|
|
|
1334
1566
|
/**
|
|
1335
|
-
* Test
|
|
1567
|
+
* Test prediction with supertonic_api_1 model
|
|
1336
1568
|
*/
|
|
1337
|
-
async function
|
|
1569
|
+
async function testPredictDurationWithSupertonicApi1(
|
|
1338
1570
|
voiceId: string | null
|
|
1339
1571
|
): Promise<[boolean, any]> {
|
|
1340
|
-
console.log("
|
|
1572
|
+
console.log("ā±ļø Duration Prediction with supertonic_api_1 Model Test");
|
|
1341
1573
|
|
|
1342
1574
|
if (!voiceId) {
|
|
1343
1575
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1349,20 +1581,639 @@ async function testCreateSpeechLongTextMp3(
|
|
|
1349
1581
|
const models = await import("../src/models/index.js");
|
|
1350
1582
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1351
1583
|
|
|
1352
|
-
const
|
|
1353
|
-
|
|
1354
|
-
The newly implemented SDK automatically divides long text into multiple chunks for processing.
|
|
1355
|
-
Real-time streaming text-to-speech technology plays a crucial role in modern AI applications.
|
|
1356
|
-
It is an indispensable technology especially in conversational services, live broadcasting, and real-time translation services.
|
|
1357
|
-
Through the auto-chunking feature, long texts are naturally divided into multiple small segments for processing.
|
|
1358
|
-
Each segment is intelligently segmented considering sentence and word boundaries, enabling natural speech generation.
|
|
1359
|
-
Now users don't need to worry about text length or output format, as the SDK automatically handles everything in MP3 format too.
|
|
1360
|
-
`.trim();
|
|
1584
|
+
const testText = "Testing duration prediction with supertonic_api_1 model.";
|
|
1585
|
+
console.log(` š Predicting duration with supertonic_api_1 model`);
|
|
1361
1586
|
|
|
1362
|
-
const
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1587
|
+
const response = await client.textToSpeech.predictDuration({
|
|
1588
|
+
voiceId,
|
|
1589
|
+
predictTTSDurationUsingCharacterRequest: {
|
|
1590
|
+
text: testText,
|
|
1591
|
+
language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
|
|
1592
|
+
model:
|
|
1593
|
+
models.PredictTTSDurationUsingCharacterRequestModel.SupertonicApi1,
|
|
1594
|
+
},
|
|
1595
|
+
});
|
|
1596
|
+
|
|
1597
|
+
console.log(
|
|
1598
|
+
` ā
supertonic_api_1 duration prediction: ${response.duration}s`
|
|
1599
|
+
);
|
|
1600
|
+
return [true, response];
|
|
1601
|
+
} catch (e: any) {
|
|
1602
|
+
logDetailedError(e, "supertonic_api_1 duration prediction");
|
|
1603
|
+
return [false, e];
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
/**
|
|
1608
|
+
* Test prediction with unsupported model (should fail with validation error)
|
|
1609
|
+
*/
|
|
1610
|
+
async function testPredictDurationWithUnsupportedModel(
|
|
1611
|
+
voiceId: string | null
|
|
1612
|
+
): Promise<[boolean, any]> {
|
|
1613
|
+
console.log(
|
|
1614
|
+
"š« Duration Prediction with Unsupported Model Test (Expected to Fail)"
|
|
1615
|
+
);
|
|
1616
|
+
|
|
1617
|
+
if (!voiceId) {
|
|
1618
|
+
console.log(" ā ļø No voice ID available");
|
|
1619
|
+
return [false, null];
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
try {
|
|
1623
|
+
const { Supertone } = await import("../src/index.js");
|
|
1624
|
+
const models = await import("../src/models/index.js");
|
|
1625
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1626
|
+
|
|
1627
|
+
const testText = "This should fail with unsupported model.";
|
|
1628
|
+
console.log(
|
|
1629
|
+
` š Attempting prediction with unsupported model: 'invalid_model_xyz'`
|
|
1630
|
+
);
|
|
1631
|
+
|
|
1632
|
+
const response = await client.textToSpeech.predictDuration({
|
|
1633
|
+
voiceId,
|
|
1634
|
+
predictTTSDurationUsingCharacterRequest: {
|
|
1635
|
+
text: testText,
|
|
1636
|
+
language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
|
|
1637
|
+
model: "invalid_model_xyz" as any, // Intentionally invalid model
|
|
1638
|
+
},
|
|
1639
|
+
});
|
|
1640
|
+
|
|
1641
|
+
console.log(` ā Expected error but got success - this is unexpected!`);
|
|
1642
|
+
return [false, response];
|
|
1643
|
+
} catch (e: any) {
|
|
1644
|
+
console.log(` ā
Correctly rejected unsupported model`);
|
|
1645
|
+
console.log(` š Error type: ${e.constructor?.name || typeof e}`);
|
|
1646
|
+
console.log(` š Error message: ${e.message?.substring(0, 100) || e}`);
|
|
1647
|
+
return [true, e];
|
|
1648
|
+
}
|
|
1649
|
+
}
|
|
1650
|
+
|
|
1651
|
+
// =============================================================================
|
|
1652
|
+
// Multilingual Tests per Model
|
|
1653
|
+
// =============================================================================
|
|
1654
|
+
|
|
1655
|
+
/**
|
|
1656
|
+
* Test TTS multilingual support with sona_speech_1 (supports: ko, en, ja)
|
|
1657
|
+
*/
|
|
1658
|
+
async function testMultilingualSonaSpeech1(
|
|
1659
|
+
voiceId: string | null
|
|
1660
|
+
): Promise<[boolean, any]> {
|
|
1661
|
+
console.log("š Multilingual Test - sona_speech_1 (ko, en, ja)");
|
|
1662
|
+
|
|
1663
|
+
if (!voiceId) {
|
|
1664
|
+
console.log(" ā ļø No voice ID available");
|
|
1665
|
+
return [false, null];
|
|
1666
|
+
}
|
|
1667
|
+
|
|
1668
|
+
const testCases = [
|
|
1669
|
+
{
|
|
1670
|
+
lang: "ko" as const,
|
|
1671
|
+
text: "ģė
ķģøģ, ģė ģ¤ķ¼ģ¹ ģ ėŖØėøģ
ėė¤.",
|
|
1672
|
+
label: "Korean",
|
|
1673
|
+
},
|
|
1674
|
+
{
|
|
1675
|
+
lang: "en" as const,
|
|
1676
|
+
text: "Hello, this is sona_speech_1 model.",
|
|
1677
|
+
label: "English",
|
|
1678
|
+
},
|
|
1679
|
+
{
|
|
1680
|
+
lang: "ja" as const,
|
|
1681
|
+
text: "ććć«ć”ćÆćć½ćć¹ćć¼ććÆć³ć¢ćć«ć§ćć",
|
|
1682
|
+
label: "Japanese",
|
|
1683
|
+
},
|
|
1684
|
+
];
|
|
1685
|
+
|
|
1686
|
+
try {
|
|
1687
|
+
const { Supertone } = await import("../src/index.js");
|
|
1688
|
+
const models = await import("../src/models/index.js");
|
|
1689
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1690
|
+
|
|
1691
|
+
let allPassed = true;
|
|
1692
|
+
const results: any[] = [];
|
|
1693
|
+
|
|
1694
|
+
for (const tc of testCases) {
|
|
1695
|
+
console.log(` š Testing ${tc.label} (${tc.lang})...`);
|
|
1696
|
+
|
|
1697
|
+
try {
|
|
1698
|
+
const langEnum =
|
|
1699
|
+
models.APIConvertTextToSpeechUsingCharacterRequestLanguage[
|
|
1700
|
+
(tc.lang.charAt(0).toUpperCase() +
|
|
1701
|
+
tc.lang.slice(
|
|
1702
|
+
1
|
|
1703
|
+
)) as keyof typeof models.APIConvertTextToSpeechUsingCharacterRequestLanguage
|
|
1704
|
+
];
|
|
1705
|
+
|
|
1706
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1707
|
+
voiceId,
|
|
1708
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1709
|
+
text: tc.text,
|
|
1710
|
+
language: langEnum,
|
|
1711
|
+
outputFormat:
|
|
1712
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
|
|
1713
|
+
.Wav,
|
|
1714
|
+
model:
|
|
1715
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
1716
|
+
.SonaSpeech1,
|
|
1717
|
+
},
|
|
1718
|
+
});
|
|
1719
|
+
|
|
1720
|
+
console.log(` ā
${tc.label} success`);
|
|
1721
|
+
results.push({ lang: tc.lang, success: true });
|
|
1722
|
+
} catch (e: any) {
|
|
1723
|
+
console.log(
|
|
1724
|
+
` ā ${tc.label} failed: ${e.message?.substring(0, 50)}`
|
|
1725
|
+
);
|
|
1726
|
+
results.push({ lang: tc.lang, success: false, error: e.message });
|
|
1727
|
+
allPassed = false;
|
|
1728
|
+
}
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
console.log(
|
|
1732
|
+
` š Result: ${results.filter((r) => r.success).length}/${
|
|
1733
|
+
testCases.length
|
|
1734
|
+
} languages passed`
|
|
1735
|
+
);
|
|
1736
|
+
return [allPassed, results];
|
|
1737
|
+
} catch (e: any) {
|
|
1738
|
+
logDetailedError(e, "sona_speech_1 multilingual");
|
|
1739
|
+
return [false, e];
|
|
1740
|
+
}
|
|
1741
|
+
}
|
|
1742
|
+
|
|
1743
|
+
/**
|
|
1744
|
+
* Test TTS multilingual support with sona_speech_2 (supports all languages)
|
|
1745
|
+
*/
|
|
1746
|
+
async function testMultilingualSonaSpeech2(
|
|
1747
|
+
voiceId: string | null
|
|
1748
|
+
): Promise<[boolean, any]> {
|
|
1749
|
+
console.log("š Multilingual Test - sona_speech_2 (all languages sample)");
|
|
1750
|
+
|
|
1751
|
+
if (!voiceId) {
|
|
1752
|
+
console.log(" ā ļø No voice ID available");
|
|
1753
|
+
return [false, null];
|
|
1754
|
+
}
|
|
1755
|
+
|
|
1756
|
+
// Test a diverse subset of languages
|
|
1757
|
+
const testCases = [
|
|
1758
|
+
{ lang: "Ko" as const, text: "ģė
ķģøģ.", label: "Korean" },
|
|
1759
|
+
{ lang: "En" as const, text: "Hello.", label: "English" },
|
|
1760
|
+
{ lang: "Ja" as const, text: "ććć«ć”ćÆć", label: "Japanese" },
|
|
1761
|
+
{ lang: "Es" as const, text: "Hola.", label: "Spanish" },
|
|
1762
|
+
{ lang: "Fr" as const, text: "Bonjour.", label: "French" },
|
|
1763
|
+
{ lang: "De" as const, text: "Hallo.", label: "German" },
|
|
1764
|
+
{ lang: "Ar" as const, text: "Ł
Ų±ŲŲØŲ§.", label: "Arabic" },
|
|
1765
|
+
{ lang: "Hi" as const, text: "ą¤Øą¤®ą¤øą„ą¤¤ą„ą„¤", label: "Hindi" },
|
|
1766
|
+
];
|
|
1767
|
+
|
|
1768
|
+
try {
|
|
1769
|
+
const { Supertone } = await import("../src/index.js");
|
|
1770
|
+
const models = await import("../src/models/index.js");
|
|
1771
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1772
|
+
|
|
1773
|
+
let allPassed = true;
|
|
1774
|
+
const results: any[] = [];
|
|
1775
|
+
|
|
1776
|
+
for (const tc of testCases) {
|
|
1777
|
+
console.log(` š Testing ${tc.label} (${tc.lang})...`);
|
|
1778
|
+
|
|
1779
|
+
try {
|
|
1780
|
+
const langEnum =
|
|
1781
|
+
models.APIConvertTextToSpeechUsingCharacterRequestLanguage[tc.lang];
|
|
1782
|
+
|
|
1783
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1784
|
+
voiceId,
|
|
1785
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1786
|
+
text: tc.text,
|
|
1787
|
+
language: langEnum,
|
|
1788
|
+
outputFormat:
|
|
1789
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
|
|
1790
|
+
.Wav,
|
|
1791
|
+
model:
|
|
1792
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
1793
|
+
.SonaSpeech2,
|
|
1794
|
+
},
|
|
1795
|
+
});
|
|
1796
|
+
|
|
1797
|
+
console.log(` ā
${tc.label} success`);
|
|
1798
|
+
results.push({ lang: tc.lang, success: true });
|
|
1799
|
+
} catch (e: any) {
|
|
1800
|
+
console.log(
|
|
1801
|
+
` ā ${tc.label} failed: ${e.message?.substring(0, 50)}`
|
|
1802
|
+
);
|
|
1803
|
+
results.push({ lang: tc.lang, success: false, error: e.message });
|
|
1804
|
+
allPassed = false;
|
|
1805
|
+
}
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
console.log(
|
|
1809
|
+
` š Result: ${results.filter((r) => r.success).length}/${
|
|
1810
|
+
testCases.length
|
|
1811
|
+
} languages passed`
|
|
1812
|
+
);
|
|
1813
|
+
return [allPassed, results];
|
|
1814
|
+
} catch (e: any) {
|
|
1815
|
+
logDetailedError(e, "sona_speech_2 multilingual");
|
|
1816
|
+
return [false, e];
|
|
1817
|
+
}
|
|
1818
|
+
}
|
|
1819
|
+
|
|
1820
|
+
/**
|
|
1821
|
+
* Test TTS multilingual support with supertonic_api_1 (supports: ko, en, ja, es, pt)
|
|
1822
|
+
*/
|
|
1823
|
+
async function testMultilingualSupertonicApi1(
|
|
1824
|
+
voiceId: string | null
|
|
1825
|
+
): Promise<[boolean, any]> {
|
|
1826
|
+
console.log("š Multilingual Test - supertonic_api_1 (ko, en, ja, es, pt)");
|
|
1827
|
+
|
|
1828
|
+
if (!voiceId) {
|
|
1829
|
+
console.log(" ā ļø No voice ID available");
|
|
1830
|
+
return [false, null];
|
|
1831
|
+
}
|
|
1832
|
+
|
|
1833
|
+
const testCases = [
|
|
1834
|
+
{
|
|
1835
|
+
lang: "Ko" as const,
|
|
1836
|
+
text: "ģė
ķģøģ, ģķ¼ķ ė API ģ ėŖØėøģ
ėė¤.",
|
|
1837
|
+
label: "Korean",
|
|
1838
|
+
},
|
|
1839
|
+
{
|
|
1840
|
+
lang: "En" as const,
|
|
1841
|
+
text: "Hello, this is supertonic_api_1 model.",
|
|
1842
|
+
label: "English",
|
|
1843
|
+
},
|
|
1844
|
+
{
|
|
1845
|
+
lang: "Ja" as const,
|
|
1846
|
+
text: "ććć«ć”ćÆćć¹ć¼ćć¼ććććÆAPIćÆć³ć§ćć",
|
|
1847
|
+
label: "Japanese",
|
|
1848
|
+
},
|
|
1849
|
+
{
|
|
1850
|
+
lang: "Es" as const,
|
|
1851
|
+
text: "Hola, este es el modelo supertonic_api_1.",
|
|
1852
|
+
label: "Spanish",
|
|
1853
|
+
},
|
|
1854
|
+
{
|
|
1855
|
+
lang: "Pt" as const,
|
|
1856
|
+
text: "OlĆ”, este Ć© o modelo supertonic_api_1.",
|
|
1857
|
+
label: "Portuguese",
|
|
1858
|
+
},
|
|
1859
|
+
];
|
|
1860
|
+
|
|
1861
|
+
try {
|
|
1862
|
+
const { Supertone } = await import("../src/index.js");
|
|
1863
|
+
const models = await import("../src/models/index.js");
|
|
1864
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1865
|
+
|
|
1866
|
+
let allPassed = true;
|
|
1867
|
+
const results: any[] = [];
|
|
1868
|
+
|
|
1869
|
+
for (const tc of testCases) {
|
|
1870
|
+
console.log(` š Testing ${tc.label} (${tc.lang})...`);
|
|
1871
|
+
|
|
1872
|
+
try {
|
|
1873
|
+
const langEnum =
|
|
1874
|
+
models.APIConvertTextToSpeechUsingCharacterRequestLanguage[tc.lang];
|
|
1875
|
+
|
|
1876
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1877
|
+
voiceId,
|
|
1878
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1879
|
+
text: tc.text,
|
|
1880
|
+
language: langEnum,
|
|
1881
|
+
outputFormat:
|
|
1882
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
|
|
1883
|
+
.Wav,
|
|
1884
|
+
model:
|
|
1885
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
1886
|
+
.SupertonicApi1,
|
|
1887
|
+
},
|
|
1888
|
+
});
|
|
1889
|
+
|
|
1890
|
+
console.log(` ā
${tc.label} success`);
|
|
1891
|
+
results.push({ lang: tc.lang, success: true });
|
|
1892
|
+
} catch (e: any) {
|
|
1893
|
+
console.log(
|
|
1894
|
+
` ā ${tc.label} failed: ${e.message?.substring(0, 50)}`
|
|
1895
|
+
);
|
|
1896
|
+
results.push({ lang: tc.lang, success: false, error: e.message });
|
|
1897
|
+
allPassed = false;
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
|
|
1901
|
+
console.log(
|
|
1902
|
+
` š Result: ${results.filter((r) => r.success).length}/${
|
|
1903
|
+
testCases.length
|
|
1904
|
+
} languages passed`
|
|
1905
|
+
);
|
|
1906
|
+
return [allPassed, results];
|
|
1907
|
+
} catch (e: any) {
|
|
1908
|
+
logDetailedError(e, "supertonic_api_1 multilingual");
|
|
1909
|
+
return [false, e];
|
|
1910
|
+
}
|
|
1911
|
+
}
|
|
1912
|
+
|
|
1913
|
+
/**
|
|
1914
|
+
* Test unsupported language for sona_speech_1 (should fail with French)
|
|
1915
|
+
*/
|
|
1916
|
+
async function testUnsupportedLanguageSonaSpeech1(
|
|
1917
|
+
voiceId: string | null
|
|
1918
|
+
): Promise<[boolean, any]> {
|
|
1919
|
+
console.log(
|
|
1920
|
+
"š« Unsupported Language Test - sona_speech_1 with French (Expected to Fail)"
|
|
1921
|
+
);
|
|
1922
|
+
|
|
1923
|
+
if (!voiceId) {
|
|
1924
|
+
console.log(" ā ļø No voice ID available");
|
|
1925
|
+
return [false, null];
|
|
1926
|
+
}
|
|
1927
|
+
|
|
1928
|
+
try {
|
|
1929
|
+
const { Supertone } = await import("../src/index.js");
|
|
1930
|
+
const models = await import("../src/models/index.js");
|
|
1931
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1932
|
+
|
|
1933
|
+
console.log(` š Attempting sona_speech_1 with French (unsupported)`);
|
|
1934
|
+
|
|
1935
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1936
|
+
voiceId,
|
|
1937
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1938
|
+
text: "Bonjour, ceci est un test.",
|
|
1939
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Fr, // French - not supported by sona_speech_1
|
|
1940
|
+
outputFormat:
|
|
1941
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1942
|
+
model:
|
|
1943
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech1,
|
|
1944
|
+
},
|
|
1945
|
+
});
|
|
1946
|
+
|
|
1947
|
+
// If we reach here, the API didn't reject - may need server-side validation
|
|
1948
|
+
console.log(
|
|
1949
|
+
` ā ļø API accepted the request - server-side validation may not enforce language restriction`
|
|
1950
|
+
);
|
|
1951
|
+
console.log(
|
|
1952
|
+
` š Note: Language restriction may be enforced at API level, not SDK level`
|
|
1953
|
+
);
|
|
1954
|
+
return [
|
|
1955
|
+
true,
|
|
1956
|
+
{ note: "API accepted - language restriction may be server-side" },
|
|
1957
|
+
];
|
|
1958
|
+
} catch (e: any) {
|
|
1959
|
+
console.log(
|
|
1960
|
+
` ā
Correctly rejected unsupported language for sona_speech_1`
|
|
1961
|
+
);
|
|
1962
|
+
console.log(` š Error: ${e.message?.substring(0, 100)}`);
|
|
1963
|
+
return [true, e];
|
|
1964
|
+
}
|
|
1965
|
+
}
|
|
1966
|
+
|
|
1967
|
+
/**
|
|
1968
|
+
* Test unsupported language for supertonic_api_1 (should fail with German)
|
|
1969
|
+
*/
|
|
1970
|
+
async function testUnsupportedLanguageSupertonicApi1(
|
|
1971
|
+
voiceId: string | null
|
|
1972
|
+
): Promise<[boolean, any]> {
|
|
1973
|
+
console.log(
|
|
1974
|
+
"š« Unsupported Language Test - supertonic_api_1 with German (Expected to Fail)"
|
|
1975
|
+
);
|
|
1976
|
+
|
|
1977
|
+
if (!voiceId) {
|
|
1978
|
+
console.log(" ā ļø No voice ID available");
|
|
1979
|
+
return [false, null];
|
|
1980
|
+
}
|
|
1981
|
+
|
|
1982
|
+
try {
|
|
1983
|
+
const { Supertone } = await import("../src/index.js");
|
|
1984
|
+
const models = await import("../src/models/index.js");
|
|
1985
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1986
|
+
|
|
1987
|
+
console.log(` š Attempting supertonic_api_1 with German (unsupported)`);
|
|
1988
|
+
|
|
1989
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1990
|
+
voiceId,
|
|
1991
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1992
|
+
text: "Hallo, das ist ein Test.",
|
|
1993
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.De, // German - not supported by supertonic_api_1
|
|
1994
|
+
outputFormat:
|
|
1995
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1996
|
+
model:
|
|
1997
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
1998
|
+
.SupertonicApi1,
|
|
1999
|
+
},
|
|
2000
|
+
});
|
|
2001
|
+
|
|
2002
|
+
// If we reach here, the API didn't reject - may need server-side validation
|
|
2003
|
+
console.log(
|
|
2004
|
+
` ā ļø API accepted the request - server-side validation may not enforce language restriction`
|
|
2005
|
+
);
|
|
2006
|
+
console.log(
|
|
2007
|
+
` š Note: Language restriction may be enforced at API level, not SDK level`
|
|
2008
|
+
);
|
|
2009
|
+
return [
|
|
2010
|
+
true,
|
|
2011
|
+
{ note: "API accepted - language restriction may be server-side" },
|
|
2012
|
+
];
|
|
2013
|
+
} catch (e: any) {
|
|
2014
|
+
console.log(
|
|
2015
|
+
` ā
Correctly rejected unsupported language for supertonic_api_1`
|
|
2016
|
+
);
|
|
2017
|
+
console.log(` š Error: ${e.message?.substring(0, 100)}`);
|
|
2018
|
+
return [true, e];
|
|
2019
|
+
}
|
|
2020
|
+
}
|
|
2021
|
+
|
|
2022
|
+
/**
|
|
2023
|
+
* Test duration prediction with voice settings
|
|
2024
|
+
*/
|
|
2025
|
+
async function testPredictDurationWithVoiceSettings(
|
|
2026
|
+
voiceId: string | null
|
|
2027
|
+
): Promise<[boolean, any]> {
|
|
2028
|
+
console.log("ā±ļø Duration Prediction with Voice Settings Test");
|
|
2029
|
+
|
|
2030
|
+
if (!voiceId) {
|
|
2031
|
+
console.log(" ā ļø No voice ID available");
|
|
2032
|
+
return [false, null];
|
|
2033
|
+
}
|
|
2034
|
+
|
|
2035
|
+
try {
|
|
2036
|
+
const { Supertone } = await import("../src/index.js");
|
|
2037
|
+
const models = await import("../src/models/index.js");
|
|
2038
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2039
|
+
|
|
2040
|
+
const voiceSettings = {
|
|
2041
|
+
speed: 0.8,
|
|
2042
|
+
};
|
|
2043
|
+
|
|
2044
|
+
console.log(
|
|
2045
|
+
` š Predicting duration with voice settings for voice '${voiceId}'...`
|
|
2046
|
+
);
|
|
2047
|
+
console.log(` Settings: speed=${voiceSettings.speed}`);
|
|
2048
|
+
|
|
2049
|
+
const response = await client.textToSpeech.predictDuration({
|
|
2050
|
+
voiceId,
|
|
2051
|
+
predictTTSDurationUsingCharacterRequest: {
|
|
2052
|
+
text: "This is a duration test with adjusted speed.",
|
|
2053
|
+
language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
|
|
2054
|
+
voiceSettings,
|
|
2055
|
+
},
|
|
2056
|
+
});
|
|
2057
|
+
|
|
2058
|
+
console.log(` ā
Predicted duration: ${response.duration}s`);
|
|
2059
|
+
|
|
2060
|
+
return [true, response];
|
|
2061
|
+
} catch (e: any) {
|
|
2062
|
+
console.error(` ā Error: ${e.message || e}`);
|
|
2063
|
+
return [false, e];
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
|
|
2067
|
+
/**
|
|
2068
|
+
* Test TTS streaming with voice settings
|
|
2069
|
+
*/
|
|
2070
|
+
async function testStreamSpeechWithVoiceSettings(
|
|
2071
|
+
voiceId: string | null
|
|
2072
|
+
): Promise<[boolean, any]> {
|
|
2073
|
+
console.log("š” TTS Streaming with Voice Settings Test");
|
|
2074
|
+
|
|
2075
|
+
if (!voiceId) {
|
|
2076
|
+
console.log(" ā ļø No voice ID available");
|
|
2077
|
+
return [false, null];
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
try {
|
|
2081
|
+
const { Supertone } = await import("../src/index.js");
|
|
2082
|
+
const models = await import("../src/models/index.js");
|
|
2083
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2084
|
+
|
|
2085
|
+
const voiceSettings = {
|
|
2086
|
+
pitchShift: 1.05,
|
|
2087
|
+
speed: 1.1,
|
|
2088
|
+
};
|
|
2089
|
+
|
|
2090
|
+
console.log(
|
|
2091
|
+
` š Streaming speech with voice settings for voice '${voiceId}'...`
|
|
2092
|
+
);
|
|
2093
|
+
console.log(
|
|
2094
|
+
` Settings: pitchShift=${voiceSettings.pitchShift}, speed=${voiceSettings.speed}`
|
|
2095
|
+
);
|
|
2096
|
+
console.log(" ā ļø This test consumes credits!");
|
|
2097
|
+
|
|
2098
|
+
const response = await client.textToSpeech.streamSpeech({
|
|
2099
|
+
voiceId,
|
|
2100
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2101
|
+
text: "Streaming with adjusted voice settings.",
|
|
2102
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
2103
|
+
outputFormat:
|
|
2104
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
2105
|
+
voiceSettings,
|
|
2106
|
+
},
|
|
2107
|
+
});
|
|
2108
|
+
|
|
2109
|
+
console.log(` ā
Stream with voice settings started successfully`);
|
|
2110
|
+
|
|
2111
|
+
return [true, response];
|
|
2112
|
+
} catch (e: any) {
|
|
2113
|
+
console.error(` ā Error: ${e.message || e}`);
|
|
2114
|
+
return [false, e];
|
|
2115
|
+
}
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2118
|
+
/**
|
|
2119
|
+
* Test MP3 format TTS
|
|
2120
|
+
*/
|
|
2121
|
+
async function testCreateSpeechMp3(
|
|
2122
|
+
voiceId: string | null
|
|
2123
|
+
): Promise<[boolean, any]> {
|
|
2124
|
+
console.log("š¤ MP3 Format TTS Test");
|
|
2125
|
+
|
|
2126
|
+
if (!voiceId) {
|
|
2127
|
+
console.log(" ā ļø No voice ID available");
|
|
2128
|
+
return [false, null];
|
|
2129
|
+
}
|
|
2130
|
+
|
|
2131
|
+
try {
|
|
2132
|
+
const { Supertone } = await import("../src/index.js");
|
|
2133
|
+
const models = await import("../src/models/index.js");
|
|
2134
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2135
|
+
|
|
2136
|
+
console.log(` š MP3 TTS conversion with voice '${voiceId}'...`);
|
|
2137
|
+
console.log(" ā ļø This test consumes credits!");
|
|
2138
|
+
|
|
2139
|
+
const response = await client.textToSpeech.createSpeech({
|
|
2140
|
+
voiceId,
|
|
2141
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2142
|
+
text: "Hello! This is an MP3 format SDK test. Let's verify if it works correctly.",
|
|
2143
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
2144
|
+
outputFormat:
|
|
2145
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Mp3,
|
|
2146
|
+
style: "neutral",
|
|
2147
|
+
model: "sona_speech_1",
|
|
2148
|
+
},
|
|
2149
|
+
});
|
|
2150
|
+
|
|
2151
|
+
console.log(` ā
MP3 TTS conversion success`);
|
|
2152
|
+
|
|
2153
|
+
if (response.result) {
|
|
2154
|
+
const outputFile = "test_create_speech_output.mp3";
|
|
2155
|
+
const audioData = await extractAudioData(response);
|
|
2156
|
+
|
|
2157
|
+
fs.writeFileSync(outputFile, audioData);
|
|
2158
|
+
console.log(` š¾ MP3 audio file saved: ${outputFile}`);
|
|
2159
|
+
|
|
2160
|
+
// Verify MP3 header
|
|
2161
|
+
const header = audioData.slice(0, 10);
|
|
2162
|
+
if (header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) {
|
|
2163
|
+
console.log(` ā
Valid MP3 file generated (ID3 tag)`);
|
|
2164
|
+
} else if (
|
|
2165
|
+
(header[0] === 0xff && header[1] === 0xfb) ||
|
|
2166
|
+
(header[0] === 0xff && header[1] === 0xfa)
|
|
2167
|
+
) {
|
|
2168
|
+
console.log(` ā
Valid MP3 file generated (MPEG frame)`);
|
|
2169
|
+
} else {
|
|
2170
|
+
console.log(
|
|
2171
|
+
` š MP3 header: ${Array.from(header.slice(0, 10))
|
|
2172
|
+
.map((b) => b.toString(16).padStart(2, "0"))
|
|
2173
|
+
.join(" ")} (needs verification)`
|
|
2174
|
+
);
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2177
|
+
|
|
2178
|
+
return [true, response];
|
|
2179
|
+
} catch (e: any) {
|
|
2180
|
+
console.error(` ā Error: ${e.message || e}`);
|
|
2181
|
+
return [false, e];
|
|
2182
|
+
}
|
|
2183
|
+
}
|
|
2184
|
+
|
|
2185
|
+
/**
|
|
2186
|
+
* Test MP3 format with long text
|
|
2187
|
+
*/
|
|
2188
|
+
async function testCreateSpeechLongTextMp3(
|
|
2189
|
+
voiceId: string | null
|
|
2190
|
+
): Promise<[boolean, any]> {
|
|
2191
|
+
console.log("š Long Text MP3 Auto-Chunking TTS Test (300+ chars)");
|
|
2192
|
+
|
|
2193
|
+
if (!voiceId) {
|
|
2194
|
+
console.log(" ā ļø No voice ID available");
|
|
2195
|
+
return [false, null];
|
|
2196
|
+
}
|
|
2197
|
+
|
|
2198
|
+
try {
|
|
2199
|
+
const { Supertone } = await import("../src/index.js");
|
|
2200
|
+
const models = await import("../src/models/index.js");
|
|
2201
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2202
|
+
|
|
2203
|
+
const longText = `
|
|
2204
|
+
Hello! This is a very long text MP3 auto-chunking TTS test exceeding 300 characters.
|
|
2205
|
+
The newly implemented SDK automatically divides long text into multiple chunks for processing.
|
|
2206
|
+
Real-time streaming text-to-speech technology plays a crucial role in modern AI applications.
|
|
2207
|
+
It is an indispensable technology especially in conversational services, live broadcasting, and real-time translation services.
|
|
2208
|
+
Through the auto-chunking feature, long texts are naturally divided into multiple small segments for processing.
|
|
2209
|
+
Each segment is intelligently segmented considering sentence and word boundaries, enabling natural speech generation.
|
|
2210
|
+
Now users don't need to worry about text length or output format, as the SDK automatically handles everything in MP3 format too.
|
|
2211
|
+
`.trim();
|
|
2212
|
+
|
|
2213
|
+
const actualLength = longText.length;
|
|
2214
|
+
console.log(
|
|
2215
|
+
` š Test text length: ${actualLength} characters (exceeds 300)`
|
|
2216
|
+
);
|
|
1366
2217
|
console.log(` š§ Auto-chunking enabled for MP3 format`);
|
|
1367
2218
|
|
|
1368
2219
|
console.log(` š Converting long text to MP3 with voice '${voiceId}'...`);
|
|
@@ -1549,7 +2400,7 @@ async function main(): Promise<boolean> {
|
|
|
1549
2400
|
console.log("");
|
|
1550
2401
|
|
|
1551
2402
|
const testResults: TestResult = {};
|
|
1552
|
-
|
|
2403
|
+
const voiceIdForTTS: string = "91992bbd4758bdcf9c9b01";
|
|
1553
2404
|
let customVoiceId: string | null = null;
|
|
1554
2405
|
let createdCustomVoiceId: string | null = null;
|
|
1555
2406
|
|
|
@@ -1572,9 +2423,6 @@ async function main(): Promise<boolean> {
|
|
|
1572
2423
|
|
|
1573
2424
|
[success, result] = await testListVoices();
|
|
1574
2425
|
testResults["list_voices"] = success;
|
|
1575
|
-
if (success && result.voiceId) {
|
|
1576
|
-
voiceIdForTTS = result.voiceId;
|
|
1577
|
-
}
|
|
1578
2426
|
|
|
1579
2427
|
[success, result] = await testSearchVoices();
|
|
1580
2428
|
testResults["search_voices"] = success;
|
|
@@ -1643,6 +2491,67 @@ async function main(): Promise<boolean> {
|
|
|
1643
2491
|
[success, result] = await testStreamSpeech(voiceIdForTTS);
|
|
1644
2492
|
testResults["stream_speech"] = success;
|
|
1645
2493
|
|
|
2494
|
+
// 5.5 New Model Tests (sona_speech_2, supertonic_api_1)
|
|
2495
|
+
console.log("\nš¤ New Model Tests (sona_speech_2, supertonic_api_1)");
|
|
2496
|
+
console.log("-".repeat(60));
|
|
2497
|
+
console.log("ā ļø These tests consume credits!");
|
|
2498
|
+
console.log("");
|
|
2499
|
+
|
|
2500
|
+
[success, result] = await testCreateSpeechWithSonaSpeech2(voiceIdForTTS);
|
|
2501
|
+
testResults["create_speech_sona_speech_2"] = success;
|
|
2502
|
+
|
|
2503
|
+
[success, result] = await testCreateSpeechWithSupertonicApi1(voiceIdForTTS);
|
|
2504
|
+
testResults["create_speech_supertonic_api_1"] = success;
|
|
2505
|
+
|
|
2506
|
+
[success, result] = await testCreateSpeechWithUnsupportedModel(
|
|
2507
|
+
voiceIdForTTS
|
|
2508
|
+
);
|
|
2509
|
+
testResults["create_speech_unsupported_model"] = success;
|
|
2510
|
+
|
|
2511
|
+
[success, result] = await testPredictDurationWithSonaSpeech2(voiceIdForTTS);
|
|
2512
|
+
testResults["predict_duration_sona_speech_2"] = success;
|
|
2513
|
+
|
|
2514
|
+
[success, result] = await testPredictDurationWithSupertonicApi1(
|
|
2515
|
+
voiceIdForTTS
|
|
2516
|
+
);
|
|
2517
|
+
testResults["predict_duration_supertonic_api_1"] = success;
|
|
2518
|
+
|
|
2519
|
+
[success, result] = await testPredictDurationWithUnsupportedModel(
|
|
2520
|
+
voiceIdForTTS
|
|
2521
|
+
);
|
|
2522
|
+
testResults["predict_duration_unsupported_model"] = success;
|
|
2523
|
+
|
|
2524
|
+
// 5.6 Multilingual Tests per Model
|
|
2525
|
+
console.log("\nš Multilingual Tests per Model");
|
|
2526
|
+
console.log("-".repeat(60));
|
|
2527
|
+
console.log("ā ļø These tests consume credits!");
|
|
2528
|
+
console.log("");
|
|
2529
|
+
|
|
2530
|
+
[success, result] = await testMultilingualSonaSpeech1(voiceIdForTTS);
|
|
2531
|
+
testResults["multilingual_sona_speech_1"] = success;
|
|
2532
|
+
|
|
2533
|
+
[success, result] = await testMultilingualSonaSpeech2(voiceIdForTTS);
|
|
2534
|
+
testResults["multilingual_sona_speech_2"] = success;
|
|
2535
|
+
|
|
2536
|
+
[success, result] = await testMultilingualSupertonicApi1(voiceIdForTTS);
|
|
2537
|
+
testResults["multilingual_supertonic_api_1"] = success;
|
|
2538
|
+
|
|
2539
|
+
// 5.7 Unsupported Language Tests
|
|
2540
|
+
console.log("\nš« Unsupported Language Tests");
|
|
2541
|
+
console.log("-".repeat(60));
|
|
2542
|
+
console.log(
|
|
2543
|
+
"ā ļø These tests verify error handling for unsupported model-language combinations!"
|
|
2544
|
+
);
|
|
2545
|
+
console.log("");
|
|
2546
|
+
|
|
2547
|
+
[success, result] = await testUnsupportedLanguageSonaSpeech1(voiceIdForTTS);
|
|
2548
|
+
testResults["unsupported_lang_sona_speech_1"] = success;
|
|
2549
|
+
|
|
2550
|
+
[success, result] = await testUnsupportedLanguageSupertonicApi1(
|
|
2551
|
+
voiceIdForTTS
|
|
2552
|
+
);
|
|
2553
|
+
testResults["unsupported_lang_supertonic_api_1"] = success;
|
|
2554
|
+
|
|
1646
2555
|
// 6. TTS Long Text Tests
|
|
1647
2556
|
console.log("\nš Text-to-Speech Long Text Tests");
|
|
1648
2557
|
console.log("-".repeat(60));
|
|
@@ -1652,6 +2561,14 @@ async function main(): Promise<boolean> {
|
|
|
1652
2561
|
[success, result] = await testCreateSpeechLongText(voiceIdForTTS);
|
|
1653
2562
|
testResults["create_speech_long_text"] = success;
|
|
1654
2563
|
|
|
2564
|
+
[success, result] = await testCreateSpeechLongSentenceNoPunctuation(
|
|
2565
|
+
voiceIdForTTS
|
|
2566
|
+
);
|
|
2567
|
+
testResults["create_speech_long_sentence_no_punctuation"] = success;
|
|
2568
|
+
|
|
2569
|
+
[success, result] = await testCreateSpeechJapaneseNoSpaces(voiceIdForTTS);
|
|
2570
|
+
testResults["create_speech_japanese_no_spaces"] = success;
|
|
2571
|
+
|
|
1655
2572
|
[success, result] = await testStreamSpeechLongText(voiceIdForTTS);
|
|
1656
2573
|
testResults["stream_speech_long_text"] = success;
|
|
1657
2574
|
|
|
@@ -1761,6 +2678,9 @@ async function main(): Promise<boolean> {
|
|
|
1761
2678
|
" ⢠Text-to-Speech: predictDuration, createSpeech, streamSpeech"
|
|
1762
2679
|
);
|
|
1763
2680
|
console.log(" ⢠TTS Long Text: createSpeechLongText, streamSpeechLongText");
|
|
2681
|
+
console.log(
|
|
2682
|
+
" ⢠TTS Chunking Strategies: Word-based (no punctuation), Character-based (Japanese)"
|
|
2683
|
+
);
|
|
1764
2684
|
console.log(
|
|
1765
2685
|
" ⢠TTS with Voice Settings: createSpeechWithVoiceSettings, predictDurationWithVoiceSettings, streamSpeechWithVoiceSettings"
|
|
1766
2686
|
);
|
|
@@ -1773,6 +2693,21 @@ async function main(): Promise<boolean> {
|
|
|
1773
2693
|
console.log(
|
|
1774
2694
|
" ⢠Custom Features: Auto-chunking in createSpeech/streamSpeech (transparent)"
|
|
1775
2695
|
);
|
|
2696
|
+
console.log("");
|
|
2697
|
+
console.log("š¤ New Model & Language Tests:");
|
|
2698
|
+
console.log(
|
|
2699
|
+
" ⢠New Models: sona_speech_2, supertonic_api_1 (createSpeech & predictDuration)"
|
|
2700
|
+
);
|
|
2701
|
+
console.log(
|
|
2702
|
+
" ⢠Unsupported Model Validation: Error handling for invalid model names"
|
|
2703
|
+
);
|
|
2704
|
+
console.log(" ⢠Multilingual per Model:");
|
|
2705
|
+
console.log(" - sona_speech_1: ko, en, ja");
|
|
2706
|
+
console.log(" - sona_speech_2: all 23 languages");
|
|
2707
|
+
console.log(" - supertonic_api_1: ko, en, ja, es, pt");
|
|
2708
|
+
console.log(
|
|
2709
|
+
" ⢠Unsupported Language Validation: Error handling for invalid model-language combinations"
|
|
2710
|
+
);
|
|
1776
2711
|
|
|
1777
2712
|
if (customVoiceId) {
|
|
1778
2713
|
console.log("");
|