@supertone/supertone 0.1.1 ā 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -69
- package/custom_test/realtime_tts_player.ts +177 -12
- package/custom_test/test_pronunciation_dictionary.ts +227 -0
- package/custom_test/test_real_api.ts +1677 -162
- package/custom_test/test_text_utils_chunk_text_punctuation.ts +55 -0
- package/dist/commonjs/lib/config.d.ts +2 -2
- package/dist/commonjs/lib/config.d.ts.map +1 -1
- package/dist/commonjs/lib/config.js +2 -2
- package/dist/commonjs/lib/config.js.map +1 -1
- package/dist/commonjs/lib/custom_utils/index.d.ts +1 -0
- package/dist/commonjs/lib/custom_utils/index.d.ts.map +1 -1
- package/dist/commonjs/lib/custom_utils/index.js +5 -1
- package/dist/commonjs/lib/custom_utils/index.js.map +1 -1
- package/dist/commonjs/lib/custom_utils/pronunciation_utils.d.ts +24 -0
- package/dist/commonjs/lib/custom_utils/pronunciation_utils.d.ts.map +1 -0
- package/dist/commonjs/lib/custom_utils/pronunciation_utils.js +145 -0
- package/dist/commonjs/lib/custom_utils/pronunciation_utils.js.map +1 -0
- package/dist/commonjs/lib/custom_utils/text_utils.d.ts +8 -1
- package/dist/commonjs/lib/custom_utils/text_utils.d.ts.map +1 -1
- package/dist/commonjs/lib/custom_utils/text_utils.js +125 -7
- package/dist/commonjs/lib/custom_utils/text_utils.js.map +1 -1
- package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.d.ts +92 -1
- package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.d.ts.map +1 -1
- package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.js +48 -3
- package/dist/commonjs/models/apiconverttexttospeechusingcharacterrequest.js.map +1 -1
- package/dist/commonjs/models/predictttsdurationusingcharacterrequest.d.ts +92 -1
- package/dist/commonjs/models/predictttsdurationusingcharacterrequest.d.ts.map +1 -1
- package/dist/commonjs/models/predictttsdurationusingcharacterrequest.js +46 -3
- package/dist/commonjs/models/predictttsdurationusingcharacterrequest.js.map +1 -1
- package/dist/commonjs/sdk/texttospeech.d.ts +17 -6
- package/dist/commonjs/sdk/texttospeech.d.ts.map +1 -1
- package/dist/commonjs/sdk/texttospeech.js +48 -25
- package/dist/commonjs/sdk/texttospeech.js.map +1 -1
- package/dist/esm/lib/config.d.ts +2 -2
- package/dist/esm/lib/config.d.ts.map +1 -1
- package/dist/esm/lib/config.js +2 -2
- package/dist/esm/lib/config.js.map +1 -1
- package/dist/esm/lib/custom_utils/index.d.ts +1 -0
- package/dist/esm/lib/custom_utils/index.d.ts.map +1 -1
- package/dist/esm/lib/custom_utils/index.js +2 -0
- package/dist/esm/lib/custom_utils/index.js.map +1 -1
- package/dist/esm/lib/custom_utils/pronunciation_utils.d.ts +24 -0
- package/dist/esm/lib/custom_utils/pronunciation_utils.d.ts.map +1 -0
- package/dist/esm/lib/custom_utils/pronunciation_utils.js +140 -0
- package/dist/esm/lib/custom_utils/pronunciation_utils.js.map +1 -0
- package/dist/esm/lib/custom_utils/text_utils.d.ts +8 -1
- package/dist/esm/lib/custom_utils/text_utils.d.ts.map +1 -1
- package/dist/esm/lib/custom_utils/text_utils.js +125 -7
- package/dist/esm/lib/custom_utils/text_utils.js.map +1 -1
- package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.d.ts +92 -1
- package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.d.ts.map +1 -1
- package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.js +47 -2
- package/dist/esm/models/apiconverttexttospeechusingcharacterrequest.js.map +1 -1
- package/dist/esm/models/predictttsdurationusingcharacterrequest.d.ts +92 -1
- package/dist/esm/models/predictttsdurationusingcharacterrequest.d.ts.map +1 -1
- package/dist/esm/models/predictttsdurationusingcharacterrequest.js +45 -2
- package/dist/esm/models/predictttsdurationusingcharacterrequest.js.map +1 -1
- package/dist/esm/sdk/texttospeech.d.ts +17 -6
- package/dist/esm/sdk/texttospeech.d.ts.map +1 -1
- package/dist/esm/sdk/texttospeech.js +49 -26
- package/dist/esm/sdk/texttospeech.js.map +1 -1
- package/examples/custom_voices/create_cloned_voice.ts +4 -3
- package/examples/custom_voices/delete_custom_voice.ts +2 -7
- package/examples/custom_voices/edit_custom_voice.ts +2 -6
- package/examples/custom_voices/get_custom_voice.ts +2 -7
- package/examples/custom_voices/list_custom_voices.ts +2 -7
- package/examples/custom_voices/search_custom_voices.ts +2 -6
- package/examples/text_to_speech/create_speech.ts +3 -8
- package/examples/text_to_speech/create_speech_long_text.ts +3 -7
- package/examples/text_to_speech/create_speech_with_phonemes.ts +3 -7
- package/examples/text_to_speech/create_speech_with_voice_settings.ts +3 -8
- package/examples/text_to_speech/predict_duration.ts +3 -7
- package/examples/text_to_speech/stream_speech.ts +3 -7
- package/examples/text_to_speech/stream_speech_long_text.ts +3 -7
- package/examples/text_to_speech/stream_speech_with_phonemes.ts +3 -7
- package/examples/text_to_speech/stream_speech_with_voice_settings.ts +3 -7
- package/examples/usage/get_credit_balance.ts +2 -6
- package/examples/usage/get_usage.ts +2 -6
- package/examples/usage/get_voice_usage.ts +2 -7
- package/examples/voices/get_voice.ts +2 -6
- package/examples/voices/list_voices.ts +2 -6
- package/examples/voices/search_voices.ts +2 -7
- package/jsr.json +1 -1
- package/openapi.json +101 -9
- package/package.json +1 -1
- package/src/lib/config.ts +41 -41
- package/src/lib/custom_utils/index.ts +7 -0
- package/src/lib/custom_utils/pronunciation_utils.ts +193 -0
- package/src/lib/custom_utils/text_utils.ts +138 -7
- package/src/models/apiconverttexttospeechusingcharacterrequest.ts +62 -3
- package/src/models/predictttsdurationusingcharacterrequest.ts +64 -3
- package/src/sdk/texttospeech.ts +99 -68
|
@@ -127,19 +127,25 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
|
|
|
127
127
|
console.log(` š Debug - has audioBase64: ${"audioBase64" in result}`);
|
|
128
128
|
console.log(` š Debug - has getReader: ${"getReader" in result}`);
|
|
129
129
|
}
|
|
130
|
-
|
|
130
|
+
|
|
131
131
|
// Check for capital-case Result (SDK internal structure)
|
|
132
|
-
if (
|
|
132
|
+
if (
|
|
133
|
+
!result ||
|
|
134
|
+
(typeof result === "object" && Object.keys(result).length === 0)
|
|
135
|
+
) {
|
|
133
136
|
console.log(` š” Checking SDK internal Result field...`);
|
|
134
137
|
if ((response as any).Result) {
|
|
135
138
|
result = (response as any).Result;
|
|
136
139
|
console.log(` ā
Found Result (capital R) - using that instead`);
|
|
137
140
|
}
|
|
138
141
|
}
|
|
139
|
-
|
|
142
|
+
|
|
140
143
|
// Debug response headers
|
|
141
144
|
if (response.headers) {
|
|
142
|
-
console.log(
|
|
145
|
+
console.log(
|
|
146
|
+
` š Debug - response headers:`,
|
|
147
|
+
JSON.stringify(response.headers, null, 2)
|
|
148
|
+
);
|
|
143
149
|
}
|
|
144
150
|
|
|
145
151
|
if (result instanceof Uint8Array) {
|
|
@@ -198,7 +204,7 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
|
|
|
198
204
|
return bytes;
|
|
199
205
|
}
|
|
200
206
|
}
|
|
201
|
-
|
|
207
|
+
|
|
202
208
|
// Handle empty object case - this might happen when the SDK doesn't properly parse audio responses
|
|
203
209
|
if (
|
|
204
210
|
typeof result === "object" &&
|
|
@@ -207,22 +213,25 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
|
|
|
207
213
|
) {
|
|
208
214
|
console.log(` ā ļø Warning: Empty result object detected`);
|
|
209
215
|
console.log(` š” This might be a parsing issue with the SDK`);
|
|
210
|
-
console.log(
|
|
211
|
-
|
|
216
|
+
console.log(
|
|
217
|
+
` š” Check if the response was actually a stream but got parsed as an empty object`
|
|
218
|
+
);
|
|
219
|
+
|
|
212
220
|
throw new Error(
|
|
213
221
|
`Empty result object - SDK may have failed to parse audio stream response. ` +
|
|
214
|
-
|
|
222
|
+
`This usually happens when audio/* content-type responses are not properly handled.`
|
|
215
223
|
);
|
|
216
224
|
}
|
|
217
225
|
|
|
218
226
|
// Enhanced error message with debug info
|
|
219
|
-
const errorDetails =
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
227
|
+
const errorDetails =
|
|
228
|
+
typeof result === "object" && result !== null
|
|
229
|
+
? `constructor: ${result.constructor.name}, keys: [${Object.keys(
|
|
230
|
+
result
|
|
231
|
+
).join(", ")}]`
|
|
232
|
+
: `value: ${result}`;
|
|
233
|
+
|
|
234
|
+
throw new Error(`Unsupported result type: ${typeof result}, ${errorDetails}`);
|
|
226
235
|
}
|
|
227
236
|
|
|
228
237
|
/**
|
|
@@ -928,12 +937,15 @@ async function testCreateSpeechLongText(
|
|
|
928
937
|
}
|
|
929
938
|
|
|
930
939
|
/**
|
|
931
|
-
* Test TTS
|
|
940
|
+
* Test TTS with long text WITHOUT punctuation (word-based chunking)
|
|
941
|
+
* This tests the word-based splitting fallback when sentences exceed 300 chars
|
|
932
942
|
*/
|
|
933
|
-
async function
|
|
943
|
+
async function testCreateSpeechLongSentenceNoPunctuation(
|
|
934
944
|
voiceId: string | null
|
|
935
945
|
): Promise<[boolean, any]> {
|
|
936
|
-
console.log(
|
|
946
|
+
console.log(
|
|
947
|
+
"š Long Sentence WITHOUT Punctuation Test (Word-based chunking)"
|
|
948
|
+
);
|
|
937
949
|
|
|
938
950
|
if (!voiceId) {
|
|
939
951
|
console.log(" ā ļø No voice ID available");
|
|
@@ -945,44 +957,64 @@ async function testStreamSpeechLongText(
|
|
|
945
957
|
const models = await import("../src/models/index.js");
|
|
946
958
|
const client = new Supertone({ apiKey: API_KEY });
|
|
947
959
|
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
`
|
|
953
|
-
.trim()
|
|
954
|
-
.repeat(3);
|
|
960
|
+
// Long text without punctuation - forces word-based splitting
|
|
961
|
+
// This is a single continuous sentence with no periods or other punctuation marks
|
|
962
|
+
const longSentenceNoPunctuation =
|
|
963
|
+
"This is a very long sentence without any punctuation marks that is designed to test the word based chunking feature of the SDK when a sentence exceeds the maximum character limit of three hundred characters the system should automatically split this text by word boundaries rather than sentence boundaries to ensure proper processing and this behavior is critical for handling user generated content that may not follow standard punctuation conventions such as chat messages or informal text inputs that users commonly provide in real world applications where grammatically correct sentences are not always guaranteed";
|
|
955
964
|
|
|
956
|
-
|
|
957
|
-
console.log(
|
|
965
|
+
const actualLength = longSentenceNoPunctuation.length;
|
|
966
|
+
console.log(
|
|
967
|
+
` š Text length: ${actualLength} characters (single sentence, no punctuation)`
|
|
968
|
+
);
|
|
969
|
+
console.log(` š§ Expected behavior: Word-based chunking`);
|
|
958
970
|
console.log(" ā ļø This test consumes credits!");
|
|
959
971
|
|
|
960
|
-
const response = await client.textToSpeech.
|
|
972
|
+
const response = await client.textToSpeech.createSpeech({
|
|
961
973
|
voiceId,
|
|
962
974
|
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
963
|
-
text:
|
|
975
|
+
text: longSentenceNoPunctuation,
|
|
964
976
|
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
965
977
|
outputFormat:
|
|
966
978
|
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
979
|
+
style: "neutral",
|
|
980
|
+
model: "sona_speech_1",
|
|
967
981
|
},
|
|
968
982
|
});
|
|
969
983
|
|
|
970
|
-
|
|
984
|
+
if (response.result) {
|
|
985
|
+
const audioData = await extractAudioData(response);
|
|
986
|
+
|
|
987
|
+
console.log(
|
|
988
|
+
` ā
Word-based chunking TTS success: ${audioData.length} bytes`
|
|
989
|
+
);
|
|
990
|
+
console.log(
|
|
991
|
+
` šÆ Long sentence without punctuation processed correctly!`
|
|
992
|
+
);
|
|
993
|
+
|
|
994
|
+
const outputFile = "test_word_chunking_speech_output.wav";
|
|
995
|
+
fs.writeFileSync(outputFile, audioData);
|
|
996
|
+
console.log(` š¾ Audio saved: ${outputFile}`);
|
|
997
|
+
|
|
998
|
+
const estimatedChunks = Math.ceil(actualLength / 300);
|
|
999
|
+
console.log(` š Estimated chunks: ${estimatedChunks}`);
|
|
1000
|
+
}
|
|
971
1001
|
|
|
972
1002
|
return [true, response];
|
|
973
1003
|
} catch (e: any) {
|
|
974
|
-
|
|
1004
|
+
logDetailedError(e, "Long sentence word-based chunking");
|
|
975
1005
|
return [false, e];
|
|
976
1006
|
}
|
|
977
1007
|
}
|
|
978
1008
|
|
|
979
1009
|
/**
|
|
980
|
-
* Test TTS with
|
|
1010
|
+
* Test TTS with Japanese text (character-based chunking)
|
|
1011
|
+
* Japanese doesn't use spaces, AND this test uses NO punctuation marks (ćļ¼ļ¼etc)
|
|
1012
|
+
* to ensure the SDK uses character-based splitting
|
|
981
1013
|
*/
|
|
982
|
-
async function
|
|
1014
|
+
async function testCreateSpeechJapaneseNoSpaces(
|
|
983
1015
|
voiceId: string | null
|
|
984
1016
|
): Promise<[boolean, any]> {
|
|
985
|
-
console.log("
|
|
1017
|
+
console.log("šÆšµ Japanese Text Test (Character-based chunking)");
|
|
986
1018
|
|
|
987
1019
|
if (!voiceId) {
|
|
988
1020
|
console.log(" ā ļø No voice ID available");
|
|
@@ -994,58 +1026,72 @@ async function testCreateSpeechWithVoiceSettings(
|
|
|
994
1026
|
const models = await import("../src/models/index.js");
|
|
995
1027
|
const client = new Supertone({ apiKey: API_KEY });
|
|
996
1028
|
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1029
|
+
// Long Japanese text WITHOUT spaces AND WITHOUT punctuation - forces character-based splitting
|
|
1030
|
+
// This text intentionally has NO punctuation marks (ćļ¼ļ¼etc) to test pure character-based chunking
|
|
1031
|
+
// Text length: ~450 characters (exceeds 300 char limit)
|
|
1032
|
+
const longJapaneseText =
|
|
1033
|
+
"ę„ę¬čŖć®ććć¹ććÆéåøøć¹ćć¼ć¹ćå«ć¾ćŖćććē¹å„ćŖå¦ēćåæ
č¦ć§ć" +
|
|
1034
|
+
"ćć®ćć¹ććÆäøē¾ęåćč¶
ććé·ćę„ę¬čŖććć¹ććę£ććå¦ēćććććØć確čŖćć¾ć" +
|
|
1035
|
+
"čŖē¶čØčŖå¦ēęč”ć®ēŗå±ć«ććé³å£°åęć®åč³ŖćÆå¤§å¹
ć«åäøćć¾ćć" +
|
|
1036
|
+
"ē¹ć«ćć£ć¼ćć©ć¼ćć³ć°ćę“»ēØććęę°ć®ććć¹ćé³å£°å¤ęć·ć¹ćć ćÆäŗŗéć®ēŗč©±ć«éåøøć«čæćčŖē¶ćŖé³å£°ćēęć§ćć¾ć" +
|
|
1037
|
+
"ć¹ćć¼ć¹ććŖćčØčŖć§ćÆęååä½ć§ć®åå²ćåæ
č¦ć§ćććć®SDKćÆćć®ćććŖē¶ę³ćčŖåēć«ę¤åŗćć¦é©åć«å¦ēćć¾ć" +
|
|
1038
|
+
"ććć«ććę„ę¬čŖäøå½čŖéå½čŖćŖć©ć®ć¢ćøć¢čØčŖć§ćåé”ćŖćé·ćććć¹ććé³å£°ć«å¤ęććććØćć§ćć¾ć" +
|
|
1039
|
+
"é³å£°åęęč”ćÆč¦č¦é害č
ć®ććć®ć¢ćÆć»ć·ććŖćć£ćć¼ć«ćć対話åAIć¢ć·ć¹ćæć³ćć¾ć§å¹
åŗćēØéć§ę“»ēØććć¦ćć¾ć" +
|
|
1040
|
+
"ććć«ćŖć¢ć«ćæć¤ć ć¹ććŖć¼ćć³ć°ęč”ćØēµćæåćććććØć§å¾
ć”ęéć大å¹
ć«ēēø®ćåŖććć¦ć¼ć¶ć¼ä½éØćęä¾ććććØćć§ćć¾ć" +
|
|
1041
|
+
"ęę°ć®é³å£°åęęč”ćÆęę
ćęęćčŖē¶ć«č”Øē¾ć§ććććć«ćŖćć¾ćć";
|
|
1042
|
+
|
|
1043
|
+
const actualLength = longJapaneseText.length;
|
|
1003
1044
|
console.log(
|
|
1004
|
-
`
|
|
1045
|
+
` š Text length: ${actualLength} characters (Japanese, no spaces, no punctuation)`
|
|
1005
1046
|
);
|
|
1006
1047
|
console.log(
|
|
1007
|
-
`
|
|
1048
|
+
` š§ Expected behavior: Character-based chunking (300 chars per chunk)`
|
|
1008
1049
|
);
|
|
1009
1050
|
console.log(" ā ļø This test consumes credits!");
|
|
1010
1051
|
|
|
1011
1052
|
const response = await client.textToSpeech.createSpeech({
|
|
1012
1053
|
voiceId,
|
|
1013
1054
|
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1014
|
-
text:
|
|
1015
|
-
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.
|
|
1055
|
+
text: longJapaneseText,
|
|
1056
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Ja,
|
|
1016
1057
|
outputFormat:
|
|
1017
1058
|
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1018
1059
|
style: "neutral",
|
|
1019
1060
|
model: "sona_speech_1",
|
|
1020
|
-
voiceSettings,
|
|
1021
|
-
includePhonemes: false,
|
|
1022
1061
|
},
|
|
1023
1062
|
});
|
|
1024
1063
|
|
|
1025
|
-
console.log(` ā
TTS with voice settings success`);
|
|
1026
|
-
|
|
1027
1064
|
if (response.result) {
|
|
1028
|
-
const outputFile = "test_voice_settings_speech_output.wav";
|
|
1029
1065
|
const audioData = await extractAudioData(response);
|
|
1030
1066
|
|
|
1067
|
+
console.log(
|
|
1068
|
+
` ā
Character-based chunking TTS success: ${audioData.length} bytes`
|
|
1069
|
+
);
|
|
1070
|
+
console.log(` šÆ Japanese text without spaces processed correctly!`);
|
|
1071
|
+
|
|
1072
|
+
const outputFile = "test_japanese_char_chunking_speech_output.wav";
|
|
1031
1073
|
fs.writeFileSync(outputFile, audioData);
|
|
1032
|
-
console.log(` š¾
|
|
1074
|
+
console.log(` š¾ Audio saved: ${outputFile}`);
|
|
1075
|
+
|
|
1076
|
+
const estimatedChunks = Math.ceil(actualLength / 300);
|
|
1077
|
+
console.log(` š Estimated chunks: ${estimatedChunks}`);
|
|
1033
1078
|
}
|
|
1034
1079
|
|
|
1035
1080
|
return [true, response];
|
|
1036
1081
|
} catch (e: any) {
|
|
1037
|
-
|
|
1082
|
+
logDetailedError(e, "Japanese character-based chunking");
|
|
1038
1083
|
return [false, e];
|
|
1039
1084
|
}
|
|
1040
1085
|
}
|
|
1041
1086
|
|
|
1042
1087
|
/**
|
|
1043
|
-
* Test TTS with
|
|
1088
|
+
* Test TTS with Arabic text and Arabic punctuation marks (Ų Ų Ū)
|
|
1089
|
+
* This tests multilingual sentence punctuation support added in fix/text_utils
|
|
1044
1090
|
*/
|
|
1045
|
-
async function
|
|
1091
|
+
async function testCreateSpeechArabicPunctuation(
|
|
1046
1092
|
voiceId: string | null
|
|
1047
1093
|
): Promise<[boolean, any]> {
|
|
1048
|
-
console.log("
|
|
1094
|
+
console.log("šøš¦ Arabic Text with Arabic Punctuation Test");
|
|
1049
1095
|
|
|
1050
1096
|
if (!voiceId) {
|
|
1051
1097
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1057,81 +1103,71 @@ async function testCreateSpeechWithPhonemes(
|
|
|
1057
1103
|
const models = await import("../src/models/index.js");
|
|
1058
1104
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1059
1105
|
|
|
1106
|
+
// Arabic text with Arabic punctuation marks (Ų Ų Ū Ų)
|
|
1107
|
+
// Text length: ~350 characters (exceeds 300 char limit)
|
|
1108
|
+
const arabicText =
|
|
1109
|
+
"Ł
Ų±ŲŲØŲ§ ŲØŁŁ
ŁŁ Ų§Ų®ŲŖŲØŲ§Ų± ŲŖŁŁŁŲ© ŲŖŲŁŁŁ Ų§ŁŁŲµ Ų„ŁŁ ŁŁŲ§Ł
Ų " +
|
|
1110
|
+
"ŁŲ°Ų§ Ų§ŁŁŲøŲ§Ł
ŁŲÆŲ¹Ł
Ų§ŁŁŲŗŲ© Ų§ŁŲ¹Ų±ŲØŁŲ© ŲØŲ“ŁŁ ŁŲ§Ł
ŁŲ " +
|
|
1111
|
+
"ŁŁ
ŁŁŁ Ų§ŁŲŖŲ¹Ų±Ł Ų¹ŁŁ Ų¹ŁŲ§Ł
Ų§ŲŖ Ų§ŁŲŖŲ±ŁŁŁ
Ų§ŁŲ¹Ų±ŲØŁŲ© Ł
Ų«Ł Ų¹ŁŲ§Ł
Ų© Ų§ŁŲ§Ų³ŲŖŁŁŲ§Ł
ŁŲ¹ŁŲ§Ł
Ų© Ų§ŁŁŲ§ŲµŁŲ© Ų§ŁŁ
ŁŁŁŲ·Ų©Ū " +
|
|
1112
|
+
"ŲŖŁŁŁŲ© Ų§ŁŲ°ŁŲ§Ų” Ų§ŁŲ§ŲµŲ·ŁŲ§Ų¹Ł ŲŖŲŖŲ·ŁŲ± ŲØŲ³Ų±Ų¹Ų© ŁŲØŁŲ±Ų©Ų " +
|
|
1113
|
+
"ŁŲ§ŁŲ¢Ł ŁŁ
ŁŁŁŲ§ ŲŖŲŁŁŁ Ų§ŁŁŲµŁŲµ Ų§ŁŲ·ŁŁŁŲ© Ų„ŁŁ ŁŁŲ§Ł
Ų·ŲØŁŲ¹ŁŲ " +
|
|
1114
|
+
"ŁŲ°Ų§ Ų§ŁŲ§Ų®ŲŖŲØŲ§Ų± ŁŲŖŲŁŁ Ł
Ł Ų£Ł Ų§ŁŁŲøŲ§Ł
ŁŁŲ³Ł
Ų§ŁŁŲµ ŲØŲ“ŁŁ ŲµŲŁŲ Ų¹ŁŲÆ Ų¹ŁŲ§Ł
Ų§ŲŖ Ų§ŁŲŖŲ±ŁŁŁ
Ų§ŁŲ¹Ų±ŲØŁŲ©Ų " +
|
|
1115
|
+
"ŁŲ£Ł
٠أ٠ŁŲ¹Ł
Ł ŁŁ Ų“ŁŲ” ŲØŲ“ŁŁ Ł
Ų«Ų§ŁŁŪ";
|
|
1116
|
+
|
|
1117
|
+
const actualLength = arabicText.length;
|
|
1060
1118
|
console.log(
|
|
1061
|
-
`
|
|
1119
|
+
` š Text length: ${actualLength} characters (Arabic with Arabic punctuation)`
|
|
1062
1120
|
);
|
|
1121
|
+
console.log(` š§ Expected behavior: Sentence-based chunking with Arabic punctuation (Ų Ų Ū)`);
|
|
1063
1122
|
console.log(" ā ļø This test consumes credits!");
|
|
1064
1123
|
|
|
1124
|
+
if (actualLength <= 300) {
|
|
1125
|
+
console.log(` ā Text length ${actualLength} is <= 300, test may not trigger chunking`);
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1065
1128
|
const response = await client.textToSpeech.createSpeech({
|
|
1066
1129
|
voiceId,
|
|
1067
1130
|
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1068
|
-
text:
|
|
1069
|
-
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.
|
|
1131
|
+
text: arabicText,
|
|
1132
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Ar,
|
|
1070
1133
|
outputFormat:
|
|
1071
1134
|
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1072
1135
|
style: "neutral",
|
|
1073
|
-
model:
|
|
1074
|
-
includePhonemes: true,
|
|
1136
|
+
model: models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech2,
|
|
1075
1137
|
},
|
|
1076
1138
|
});
|
|
1077
1139
|
|
|
1078
|
-
console.log(` ā
TTS with phonemes success`);
|
|
1079
|
-
|
|
1080
1140
|
if (response.result) {
|
|
1081
|
-
const
|
|
1141
|
+
const audioData = await extractAudioData(response);
|
|
1082
1142
|
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
) {
|
|
1088
|
-
const audioData = await extractAudioData(response);
|
|
1089
|
-
fs.writeFileSync(outputFile, audioData);
|
|
1090
|
-
console.log(` š¾ Phoneme audio file saved: ${outputFile}`);
|
|
1143
|
+
console.log(
|
|
1144
|
+
` ā
Arabic punctuation chunking TTS success: ${audioData.length} bytes`
|
|
1145
|
+
);
|
|
1146
|
+
console.log(` šÆ Arabic text with Arabic punctuation processed correctly!`);
|
|
1091
1147
|
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
console.log(` Symbols count: ${phonemes.symbols?.length || 0}`);
|
|
1099
|
-
console.log(
|
|
1100
|
-
` Durations count: ${phonemes.durations_seconds?.length || 0}`
|
|
1101
|
-
);
|
|
1102
|
-
console.log(
|
|
1103
|
-
` Start times count: ${
|
|
1104
|
-
phonemes.start_times_seconds?.length || 0
|
|
1105
|
-
}`
|
|
1106
|
-
);
|
|
1107
|
-
if (phonemes.symbols && phonemes.symbols.length > 0) {
|
|
1108
|
-
console.log(
|
|
1109
|
-
` First 5 symbols: ${phonemes.symbols.slice(0, 5).join(", ")}`
|
|
1110
|
-
);
|
|
1111
|
-
}
|
|
1112
|
-
}
|
|
1113
|
-
} else {
|
|
1114
|
-
// Binary audio without phonemes
|
|
1115
|
-
const audioData = await extractAudioData(response);
|
|
1116
|
-
fs.writeFileSync(outputFile, audioData);
|
|
1117
|
-
console.log(` š¾ Phoneme audio file saved: ${outputFile}`);
|
|
1118
|
-
}
|
|
1148
|
+
const outputFile = "test_arabic_punctuation_speech_output.wav";
|
|
1149
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1150
|
+
console.log(` š¾ Audio saved: ${outputFile}`);
|
|
1151
|
+
|
|
1152
|
+
const estimatedChunks = Math.ceil(actualLength / 300);
|
|
1153
|
+
console.log(` š Estimated chunks: ${estimatedChunks}`);
|
|
1119
1154
|
}
|
|
1120
1155
|
|
|
1121
1156
|
return [true, response];
|
|
1122
1157
|
} catch (e: any) {
|
|
1123
|
-
|
|
1158
|
+
logDetailedError(e, "Arabic punctuation chunking");
|
|
1124
1159
|
return [false, e];
|
|
1125
1160
|
}
|
|
1126
1161
|
}
|
|
1127
1162
|
|
|
1128
1163
|
/**
|
|
1129
|
-
* Test TTS
|
|
1164
|
+
* Test TTS with Hindi text and Devanagari punctuation marks (ą„¤ ą„„)
|
|
1165
|
+
* This tests multilingual sentence punctuation support added in fix/text_utils
|
|
1130
1166
|
*/
|
|
1131
|
-
async function
|
|
1167
|
+
async function testCreateSpeechHindiPunctuation(
|
|
1132
1168
|
voiceId: string | null
|
|
1133
1169
|
): Promise<[boolean, any]> {
|
|
1134
|
-
console.log("
|
|
1170
|
+
console.log("š®š³ Hindi Text with Devanagari Punctuation Test");
|
|
1135
1171
|
|
|
1136
1172
|
if (!voiceId) {
|
|
1137
1173
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1143,38 +1179,71 @@ async function testStreamSpeechWithPhonemes(
|
|
|
1143
1179
|
const models = await import("../src/models/index.js");
|
|
1144
1180
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1145
1181
|
|
|
1182
|
+
// Hindi text with Devanagari punctuation marks (ą„¤ ą„„)
|
|
1183
|
+
// Text length: ~380 characters (exceeds 300 char limit)
|
|
1184
|
+
const hindiText =
|
|
1185
|
+
"ą¤Øą¤®ą¤øą„ą¤¤ą„ ą¤ą¤° ą¤øą„ą¤µą¤¾ą¤ą¤¤ ą¤¹ą„ ą¤ą¤Ŗą¤ą¤¾ ą¤ą¤ø ą¤Ŗą¤°ą„ą¤ą„षण ą¤®ą„ą¤ą„¤ " +
|
|
1186
|
+
"यह ą¤Ŗą„ą¤°ą¤£ą¤¾ą¤²ą„ ą¤¹ą¤æą¤ą¤¦ą„ ą¤ą¤¾ą¤·ą¤¾ ą¤ą¤¾ ą¤Ŗą„ą¤°ą„ण ą¤øą¤®ą¤°ą„ą¤„न ą¤ą¤°ą¤¤ą„ ą¤¹ą„ą„¤ " +
|
|
1187
|
+
"ą¤¦ą„ą¤µą¤Øą¤¾ą¤ą¤°ą„ लिपि ą¤®ą„ą¤ ą¤Ŗą„ą¤°ą„ण विराम ą¤ą¤° ą¤¦ą„ą¤¹ą¤°ą¤¾ ą¤¦ą¤ą¤” ą¤ą„ą¤øą„ ą¤µą¤æą¤°ą¤¾ą¤® ą¤ą¤æą¤¹ą„न ą¤¹ą„ą¤¤ą„ ą¤¹ą„ą¤ą„„ " +
|
|
1188
|
+
"ą¤ą„ą¤¤ą„ą¤°ą¤æą¤® ą¤¬ą„ą¤¦ą„ą¤§ą¤æą¤®ą¤¤ą„ą¤¤ą¤¾ ą¤ą„ ą¤¤ą¤ą¤Øą„ą¤ ą¤¬ą¤¹ą„ą¤¤ ą¤¤ą„ą¤ą„ ą¤øą„ ą¤µą¤æą¤ą¤øą¤æą¤¤ ą¤¹ą„ ą¤°ą¤¹ą„ ą¤¹ą„ą„¤ " +
|
|
1189
|
+
"ą¤
ब हम ą¤²ą¤ą¤¬ą„ ą¤Ŗą¤¾ą¤ ą„ą¤ ą¤ą„ ą¤øą„ą¤µą¤¾ą¤ą¤¾ą¤µą¤æą¤ ą¤µą¤¾ą¤£ą„ ą¤®ą„ą¤ बदल ą¤øą¤ą¤¤ą„ ą¤¹ą„ą¤ą„¤ " +
|
|
1190
|
+
"यह ą¤Ŗą¤°ą„ą¤ą„षण ą¤ą¤¾ą¤ą¤ą¤¤ą¤¾ ą¤¹ą„ ą¤ą¤æ ą¤øą¤æą¤øą„ą¤ą¤® ą¤¹ą¤æą¤ą¤¦ą„ विराम ą¤ą¤æą¤¹ą„ą¤Øą„ą¤ पर ą¤øą¤¹ą„ ą¤¢ą¤ą¤ ą¤øą„ ą¤Ŗą¤¾ą¤ ą¤ą„ ą¤µą¤æą¤ą¤¾ą¤ą¤æą¤¤ ą¤ą¤°ą¤¤ą¤¾ ą¤¹ą„ą„¤ " +
|
|
1191
|
+
"ą¤¹ą¤®ą„ą¤ ą¤ą¤¶ą¤¾ ą¤¹ą„ ą¤ą¤æ सब ą¤ą„ą¤ ą¤ ą„ą¤ ą¤øą„ ą¤ą¤¾ą¤® ą¤ą¤°ą„ą¤ą¤¾ą„„";
|
|
1192
|
+
|
|
1193
|
+
const actualLength = hindiText.length;
|
|
1146
1194
|
console.log(
|
|
1147
|
-
`
|
|
1195
|
+
` š Text length: ${actualLength} characters (Hindi with Devanagari punctuation)`
|
|
1148
1196
|
);
|
|
1197
|
+
console.log(` š§ Expected behavior: Sentence-based chunking with Devanagari punctuation (ą„¤ ą„„)`);
|
|
1149
1198
|
console.log(" ā ļø This test consumes credits!");
|
|
1150
1199
|
|
|
1151
|
-
|
|
1200
|
+
if (actualLength <= 300) {
|
|
1201
|
+
console.log(` ā Text length ${actualLength} is <= 300, test may not trigger chunking`);
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1152
1205
|
voiceId,
|
|
1153
1206
|
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1154
|
-
text:
|
|
1155
|
-
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.
|
|
1207
|
+
text: hindiText,
|
|
1208
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Hi,
|
|
1156
1209
|
outputFormat:
|
|
1157
1210
|
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1158
|
-
|
|
1211
|
+
style: "neutral",
|
|
1212
|
+
model: models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech2,
|
|
1159
1213
|
},
|
|
1160
1214
|
});
|
|
1161
1215
|
|
|
1162
|
-
|
|
1216
|
+
if (response.result) {
|
|
1217
|
+
const audioData = await extractAudioData(response);
|
|
1218
|
+
|
|
1219
|
+
console.log(
|
|
1220
|
+
` ā
Hindi punctuation chunking TTS success: ${audioData.length} bytes`
|
|
1221
|
+
);
|
|
1222
|
+
console.log(` šÆ Hindi text with Devanagari punctuation processed correctly!`);
|
|
1223
|
+
|
|
1224
|
+
const outputFile = "test_hindi_punctuation_speech_output.wav";
|
|
1225
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1226
|
+
console.log(` š¾ Audio saved: ${outputFile}`);
|
|
1227
|
+
|
|
1228
|
+
const estimatedChunks = Math.ceil(actualLength / 300);
|
|
1229
|
+
console.log(` š Estimated chunks: ${estimatedChunks}`);
|
|
1230
|
+
}
|
|
1163
1231
|
|
|
1164
1232
|
return [true, response];
|
|
1165
1233
|
} catch (e: any) {
|
|
1166
|
-
|
|
1234
|
+
logDetailedError(e, "Hindi punctuation chunking");
|
|
1167
1235
|
return [false, e];
|
|
1168
1236
|
}
|
|
1169
1237
|
}
|
|
1170
1238
|
|
|
1171
1239
|
/**
|
|
1172
|
-
* Test
|
|
1240
|
+
* Test TTS with ellipsis punctuation marks (⦠ā„)
|
|
1241
|
+
* This tests multilingual sentence punctuation support added in fix/text_utils
|
|
1173
1242
|
*/
|
|
1174
|
-
async function
|
|
1243
|
+
async function testCreateSpeechEllipsisPunctuation(
|
|
1175
1244
|
voiceId: string | null
|
|
1176
1245
|
): Promise<[boolean, any]> {
|
|
1177
|
-
console.log("
|
|
1246
|
+
console.log("ā³ Text with Ellipsis Punctuation Test (⦠ā„)");
|
|
1178
1247
|
|
|
1179
1248
|
if (!voiceId) {
|
|
1180
1249
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1186,40 +1255,71 @@ async function testPredictDurationWithVoiceSettings(
|
|
|
1186
1255
|
const models = await import("../src/models/index.js");
|
|
1187
1256
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1188
1257
|
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1258
|
+
// Text with ellipsis punctuation marks (⦠ā„)
|
|
1259
|
+
// Text length: ~380 characters (exceeds 300 char limit)
|
|
1260
|
+
const ellipsisText =
|
|
1261
|
+
"Sometimes we need to pause and think⦠" +
|
|
1262
|
+
"The ellipsis character is used to indicate a trailing thought or a pause in speech⦠" +
|
|
1263
|
+
"This test verifies that the text chunking system correctly handles Unicode ellipsis charactersā„ " +
|
|
1264
|
+
"There are actually multiple types of ellipsis in Unicode⦠" +
|
|
1265
|
+
"The horizontal ellipsis U+2026 and the two dot leader U+2025 are both supportedā„ " +
|
|
1266
|
+
"When processing long texts the SDK should split at these punctuation marks⦠" +
|
|
1267
|
+
"This ensures natural pauses in the generated speech outputā„ " +
|
|
1268
|
+
"Let us verify that everything works correctlyā¦";
|
|
1269
|
+
|
|
1270
|
+
const actualLength = ellipsisText.length;
|
|
1193
1271
|
console.log(
|
|
1194
|
-
`
|
|
1272
|
+
` š Text length: ${actualLength} characters (with ellipsis punctuation)`
|
|
1195
1273
|
);
|
|
1196
|
-
console.log(`
|
|
1274
|
+
console.log(` š§ Expected behavior: Sentence-based chunking with ellipsis (⦠ā„)`);
|
|
1275
|
+
console.log(" ā ļø This test consumes credits!");
|
|
1197
1276
|
|
|
1198
|
-
|
|
1277
|
+
if (actualLength <= 300) {
|
|
1278
|
+
console.log(` ā Text length ${actualLength} is <= 300, test may not trigger chunking`);
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1199
1282
|
voiceId,
|
|
1200
|
-
|
|
1201
|
-
text:
|
|
1202
|
-
language: models.
|
|
1203
|
-
|
|
1283
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1284
|
+
text: ellipsisText,
|
|
1285
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1286
|
+
outputFormat:
|
|
1287
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1288
|
+
style: "neutral",
|
|
1289
|
+
model: models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech1,
|
|
1204
1290
|
},
|
|
1205
1291
|
});
|
|
1206
1292
|
|
|
1207
|
-
|
|
1293
|
+
if (response.result) {
|
|
1294
|
+
const audioData = await extractAudioData(response);
|
|
1295
|
+
|
|
1296
|
+
console.log(
|
|
1297
|
+
` ā
Ellipsis punctuation chunking TTS success: ${audioData.length} bytes`
|
|
1298
|
+
);
|
|
1299
|
+
console.log(` šÆ Text with ellipsis punctuation processed correctly!`);
|
|
1300
|
+
|
|
1301
|
+
const outputFile = "test_ellipsis_punctuation_speech_output.wav";
|
|
1302
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1303
|
+
console.log(` š¾ Audio saved: ${outputFile}`);
|
|
1304
|
+
|
|
1305
|
+
const estimatedChunks = Math.ceil(actualLength / 300);
|
|
1306
|
+
console.log(` š Estimated chunks: ${estimatedChunks}`);
|
|
1307
|
+
}
|
|
1208
1308
|
|
|
1209
1309
|
return [true, response];
|
|
1210
1310
|
} catch (e: any) {
|
|
1211
|
-
|
|
1311
|
+
logDetailedError(e, "Ellipsis punctuation chunking");
|
|
1212
1312
|
return [false, e];
|
|
1213
1313
|
}
|
|
1214
1314
|
}
|
|
1215
1315
|
|
|
1216
1316
|
/**
|
|
1217
|
-
* Test TTS streaming with
|
|
1317
|
+
* Test TTS streaming with long text
|
|
1218
1318
|
*/
|
|
1219
|
-
async function
|
|
1319
|
+
async function testStreamSpeechLongText(
|
|
1220
1320
|
voiceId: string | null
|
|
1221
1321
|
): Promise<[boolean, any]> {
|
|
1222
|
-
console.log("š”
|
|
1322
|
+
console.log("š” Long Text Streaming TTS Test");
|
|
1223
1323
|
|
|
1224
1324
|
if (!voiceId) {
|
|
1225
1325
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1231,31 +1331,29 @@ async function testStreamSpeechWithVoiceSettings(
|
|
|
1231
1331
|
const models = await import("../src/models/index.js");
|
|
1232
1332
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1233
1333
|
|
|
1234
|
-
const
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1334
|
+
const longText = `
|
|
1335
|
+
Hello! This is a long text streaming test.
|
|
1336
|
+
The SDK automatically chunks and streams the audio in real-time.
|
|
1337
|
+
This enables efficient processing of longer content without waiting for complete generation.
|
|
1338
|
+
`
|
|
1339
|
+
.trim()
|
|
1340
|
+
.repeat(3);
|
|
1238
1341
|
|
|
1239
|
-
console.log(
|
|
1240
|
-
|
|
1241
|
-
);
|
|
1242
|
-
console.log(
|
|
1243
|
-
` Settings: pitchShift=${voiceSettings.pitchShift}, speed=${voiceSettings.speed}`
|
|
1244
|
-
);
|
|
1342
|
+
console.log(` š Streaming long text with voice '${voiceId}'...`);
|
|
1343
|
+
console.log(` Text length: ${longText.length} characters`);
|
|
1245
1344
|
console.log(" ā ļø This test consumes credits!");
|
|
1246
1345
|
|
|
1247
1346
|
const response = await client.textToSpeech.streamSpeech({
|
|
1248
1347
|
voiceId,
|
|
1249
1348
|
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1250
|
-
text:
|
|
1349
|
+
text: longText,
|
|
1251
1350
|
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1252
1351
|
outputFormat:
|
|
1253
1352
|
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1254
|
-
voiceSettings,
|
|
1255
1353
|
},
|
|
1256
1354
|
});
|
|
1257
1355
|
|
|
1258
|
-
console.log(` ā
Stream
|
|
1356
|
+
console.log(` ā
Stream started successfully`);
|
|
1259
1357
|
|
|
1260
1358
|
return [true, response];
|
|
1261
1359
|
} catch (e: any) {
|
|
@@ -1265,12 +1363,12 @@ async function testStreamSpeechWithVoiceSettings(
|
|
|
1265
1363
|
}
|
|
1266
1364
|
|
|
1267
1365
|
/**
|
|
1268
|
-
* Test
|
|
1366
|
+
* Test TTS with voice settings
|
|
1269
1367
|
*/
|
|
1270
|
-
async function
|
|
1368
|
+
async function testCreateSpeechWithVoiceSettings(
|
|
1271
1369
|
voiceId: string | null
|
|
1272
1370
|
): Promise<[boolean, any]> {
|
|
1273
|
-
console.log("
|
|
1371
|
+
console.log("šļø TTS with Voice Settings Test");
|
|
1274
1372
|
|
|
1275
1373
|
if (!voiceId) {
|
|
1276
1374
|
console.log(" ā ļø No voice ID available");
|
|
@@ -1282,39 +1380,1021 @@ async function testCreateSpeechMp3(
|
|
|
1282
1380
|
const models = await import("../src/models/index.js");
|
|
1283
1381
|
const client = new Supertone({ apiKey: API_KEY });
|
|
1284
1382
|
|
|
1285
|
-
|
|
1383
|
+
const voiceSettings = {
|
|
1384
|
+
pitchShift: 0.95,
|
|
1385
|
+
pitchVariance: 1.1,
|
|
1386
|
+
speed: 0.9,
|
|
1387
|
+
};
|
|
1388
|
+
|
|
1389
|
+
console.log(
|
|
1390
|
+
` š TTS conversion with voice settings using voice '${voiceId}'...`
|
|
1391
|
+
);
|
|
1392
|
+
console.log(
|
|
1393
|
+
` Settings: pitchShift=${voiceSettings.pitchShift}, speed=${voiceSettings.speed}`
|
|
1394
|
+
);
|
|
1286
1395
|
console.log(" ā ļø This test consumes credits!");
|
|
1287
1396
|
|
|
1288
1397
|
const response = await client.textToSpeech.createSpeech({
|
|
1289
1398
|
voiceId,
|
|
1290
1399
|
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1291
|
-
text: "Hello! This is
|
|
1400
|
+
text: "Hello world! This is a voice settings test. You can hear the adjusted pitch and speed.",
|
|
1292
1401
|
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1293
1402
|
outputFormat:
|
|
1294
|
-
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.
|
|
1403
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1295
1404
|
style: "neutral",
|
|
1296
1405
|
model: "sona_speech_1",
|
|
1406
|
+
voiceSettings,
|
|
1407
|
+
includePhonemes: false,
|
|
1297
1408
|
},
|
|
1298
1409
|
});
|
|
1299
1410
|
|
|
1300
|
-
console.log(` ā
|
|
1411
|
+
console.log(` ā
TTS with voice settings success`);
|
|
1301
1412
|
|
|
1302
1413
|
if (response.result) {
|
|
1303
|
-
const outputFile = "
|
|
1414
|
+
const outputFile = "test_voice_settings_speech_output.wav";
|
|
1304
1415
|
const audioData = await extractAudioData(response);
|
|
1305
1416
|
|
|
1306
1417
|
fs.writeFileSync(outputFile, audioData);
|
|
1307
|
-
console.log(` š¾
|
|
1418
|
+
console.log(` š¾ Voice settings audio file saved: ${outputFile}`);
|
|
1419
|
+
}
|
|
1308
1420
|
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1421
|
+
return [true, response];
|
|
1422
|
+
} catch (e: any) {
|
|
1423
|
+
console.error(` ā Error: ${e.message || e}`);
|
|
1424
|
+
return [false, e];
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
/**
|
|
1429
|
+
* Test TTS with phoneme information
|
|
1430
|
+
*/
|
|
1431
|
+
async function testCreateSpeechWithPhonemes(
|
|
1432
|
+
voiceId: string | null
|
|
1433
|
+
): Promise<[boolean, any]> {
|
|
1434
|
+
console.log("š¤ TTS with Phoneme Information Test");
|
|
1435
|
+
|
|
1436
|
+
if (!voiceId) {
|
|
1437
|
+
console.log(" ā ļø No voice ID available");
|
|
1438
|
+
return [false, null];
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1441
|
+
try {
|
|
1442
|
+
const { Supertone } = await import("../src/index.js");
|
|
1443
|
+
const models = await import("../src/models/index.js");
|
|
1444
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1445
|
+
|
|
1446
|
+
console.log(
|
|
1447
|
+
` š TTS conversion with phonemes using voice '${voiceId}'...`
|
|
1448
|
+
);
|
|
1449
|
+
console.log(" ā ļø This test consumes credits!");
|
|
1450
|
+
|
|
1451
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1452
|
+
voiceId,
|
|
1453
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1454
|
+
text: "Hello world! This is a phoneme timing test.",
|
|
1455
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1456
|
+
outputFormat:
|
|
1457
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1458
|
+
style: "neutral",
|
|
1459
|
+
model: "sona_speech_1",
|
|
1460
|
+
includePhonemes: true,
|
|
1461
|
+
},
|
|
1462
|
+
});
|
|
1463
|
+
|
|
1464
|
+
console.log(` ā
TTS with phonemes success`);
|
|
1465
|
+
|
|
1466
|
+
if (response.result) {
|
|
1467
|
+
const outputFile = "test_phoneme_speech_output.wav";
|
|
1468
|
+
|
|
1469
|
+
// Check if response is JSON with phonemes data
|
|
1470
|
+
if (
|
|
1471
|
+
typeof response.result === "object" &&
|
|
1472
|
+
"audioBase64" in response.result
|
|
1473
|
+
) {
|
|
1474
|
+
const audioData = await extractAudioData(response);
|
|
1475
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1476
|
+
console.log(` š¾ Phoneme audio file saved: ${outputFile}`);
|
|
1477
|
+
|
|
1478
|
+
// Display phoneme information as JSON
|
|
1479
|
+
const phonemes = (response.result as any).phonemes;
|
|
1480
|
+
if (phonemes) {
|
|
1481
|
+
console.log(` š Phoneme data (JSON):`);
|
|
1482
|
+
console.log(JSON.stringify(phonemes, null, 2));
|
|
1483
|
+
console.log(` š Summary:`);
|
|
1484
|
+
console.log(` Symbols count: ${phonemes.symbols?.length || 0}`);
|
|
1485
|
+
console.log(
|
|
1486
|
+
` Durations count: ${phonemes.durations_seconds?.length || 0}`
|
|
1487
|
+
);
|
|
1488
|
+
console.log(
|
|
1489
|
+
` Start times count: ${
|
|
1490
|
+
phonemes.start_times_seconds?.length || 0
|
|
1491
|
+
}`
|
|
1492
|
+
);
|
|
1493
|
+
if (phonemes.symbols && phonemes.symbols.length > 0) {
|
|
1494
|
+
console.log(
|
|
1495
|
+
` First 5 symbols: ${phonemes.symbols.slice(0, 5).join(", ")}`
|
|
1496
|
+
);
|
|
1497
|
+
}
|
|
1498
|
+
}
|
|
1499
|
+
} else {
|
|
1500
|
+
// Binary audio without phonemes
|
|
1501
|
+
const audioData = await extractAudioData(response);
|
|
1502
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1503
|
+
console.log(` š¾ Phoneme audio file saved: ${outputFile}`);
|
|
1504
|
+
}
|
|
1505
|
+
}
|
|
1506
|
+
|
|
1507
|
+
return [true, response];
|
|
1508
|
+
} catch (e: any) {
|
|
1509
|
+
console.error(` ā Error: ${e.message || e}`);
|
|
1510
|
+
return [false, e];
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
|
|
1514
|
+
/**
|
|
1515
|
+
* Test TTS streaming with phonemes
|
|
1516
|
+
*/
|
|
1517
|
+
async function testStreamSpeechWithPhonemes(
|
|
1518
|
+
voiceId: string | null
|
|
1519
|
+
): Promise<[boolean, any]> {
|
|
1520
|
+
console.log("š” TTS Streaming with Phonemes Test");
|
|
1521
|
+
|
|
1522
|
+
if (!voiceId) {
|
|
1523
|
+
console.log(" ā ļø No voice ID available");
|
|
1524
|
+
return [false, null];
|
|
1525
|
+
}
|
|
1526
|
+
|
|
1527
|
+
try {
|
|
1528
|
+
const { Supertone } = await import("../src/index.js");
|
|
1529
|
+
const models = await import("../src/models/index.js");
|
|
1530
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1531
|
+
|
|
1532
|
+
console.log(
|
|
1533
|
+
` š Streaming speech with phonemes for voice '${voiceId}'...`
|
|
1534
|
+
);
|
|
1535
|
+
console.log(" ā ļø This test consumes credits!");
|
|
1536
|
+
|
|
1537
|
+
const response = await client.textToSpeech.streamSpeech({
|
|
1538
|
+
voiceId,
|
|
1539
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1540
|
+
text: "Streaming with phoneme timing information.",
|
|
1541
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1542
|
+
outputFormat:
|
|
1543
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1544
|
+
includePhonemes: true,
|
|
1545
|
+
},
|
|
1546
|
+
});
|
|
1547
|
+
|
|
1548
|
+
console.log(` ā
Stream with phonemes started successfully`);
|
|
1549
|
+
|
|
1550
|
+
return [true, response];
|
|
1551
|
+
} catch (e: any) {
|
|
1552
|
+
console.error(` ā Error: ${e.message || e}`);
|
|
1553
|
+
return [false, e];
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
|
|
1557
|
+
// =============================================================================
|
|
1558
|
+
// Model & Language Compatibility Tests
|
|
1559
|
+
// =============================================================================
|
|
1560
|
+
|
|
1561
|
+
/**
|
|
1562
|
+
* Model-Language compatibility matrix
|
|
1563
|
+
* - sona_speech_1: ko, en, ja
|
|
1564
|
+
* - sona_speech_2: all languages (23 languages)
|
|
1565
|
+
* - supertonic_api_1: ko, en, ja, es, pt
|
|
1566
|
+
*/
|
|
1567
|
+
const MODEL_LANGUAGE_MATRIX = {
|
|
1568
|
+
sona_speech_1: ["ko", "en", "ja"],
|
|
1569
|
+
sona_speech_2: [
|
|
1570
|
+
"en",
|
|
1571
|
+
"ko",
|
|
1572
|
+
"ja",
|
|
1573
|
+
"bg",
|
|
1574
|
+
"cs",
|
|
1575
|
+
"da",
|
|
1576
|
+
"el",
|
|
1577
|
+
"es",
|
|
1578
|
+
"et",
|
|
1579
|
+
"fi",
|
|
1580
|
+
"hu",
|
|
1581
|
+
"it",
|
|
1582
|
+
"nl",
|
|
1583
|
+
"pl",
|
|
1584
|
+
"pt",
|
|
1585
|
+
"ro",
|
|
1586
|
+
"ar",
|
|
1587
|
+
"de",
|
|
1588
|
+
"fr",
|
|
1589
|
+
"hi",
|
|
1590
|
+
"id",
|
|
1591
|
+
"ru",
|
|
1592
|
+
"vi",
|
|
1593
|
+
],
|
|
1594
|
+
supertonic_api_1: ["ko", "en", "ja", "es", "pt"],
|
|
1595
|
+
} as const;
|
|
1596
|
+
|
|
1597
|
+
/**
|
|
1598
|
+
* Test TTS with sona_speech_2 model
|
|
1599
|
+
*/
|
|
1600
|
+
async function testCreateSpeechWithSonaSpeech2(
|
|
1601
|
+
voiceId: string | null
|
|
1602
|
+
): Promise<[boolean, any]> {
|
|
1603
|
+
console.log("š¤ TTS with sona_speech_2 Model Test");
|
|
1604
|
+
|
|
1605
|
+
if (!voiceId) {
|
|
1606
|
+
console.log(" ā ļø No voice ID available");
|
|
1607
|
+
return [false, null];
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
try {
|
|
1611
|
+
const { Supertone } = await import("../src/index.js");
|
|
1612
|
+
const models = await import("../src/models/index.js");
|
|
1613
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1614
|
+
|
|
1615
|
+
const testText =
|
|
1616
|
+
"Hello! Testing sona_speech_2 model for text-to-speech conversion.";
|
|
1617
|
+
console.log(` š Creating speech with sona_speech_2 model`);
|
|
1618
|
+
console.log(` Voice ID: ${voiceId}`);
|
|
1619
|
+
console.log(` Model: sona_speech_2`);
|
|
1620
|
+
console.log(" ā ļø This test consumes credits!");
|
|
1621
|
+
|
|
1622
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1623
|
+
voiceId,
|
|
1624
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1625
|
+
text: testText,
|
|
1626
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1627
|
+
outputFormat:
|
|
1628
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1629
|
+
model:
|
|
1630
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech2,
|
|
1631
|
+
},
|
|
1632
|
+
});
|
|
1633
|
+
|
|
1634
|
+
console.log(` ā
sona_speech_2 TTS success`);
|
|
1635
|
+
|
|
1636
|
+
if (response.result) {
|
|
1637
|
+
const audioData = await extractAudioData(response);
|
|
1638
|
+
const outputFile = "test_sona_speech_2_output.wav";
|
|
1639
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1640
|
+
console.log(
|
|
1641
|
+
` š¾ Audio saved: ${outputFile} (${audioData.length} bytes)`
|
|
1642
|
+
);
|
|
1643
|
+
}
|
|
1644
|
+
|
|
1645
|
+
return [true, response];
|
|
1646
|
+
} catch (e: any) {
|
|
1647
|
+
logDetailedError(e, "sona_speech_2 TTS");
|
|
1648
|
+
return [false, e];
|
|
1649
|
+
}
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
/**
|
|
1653
|
+
* Test TTS with supertonic_api_1 model
|
|
1654
|
+
*/
|
|
1655
|
+
async function testCreateSpeechWithSupertonicApi1(
|
|
1656
|
+
voiceId: string | null
|
|
1657
|
+
): Promise<[boolean, any]> {
|
|
1658
|
+
console.log("š¤ TTS with supertonic_api_1 Model Test");
|
|
1659
|
+
|
|
1660
|
+
if (!voiceId) {
|
|
1661
|
+
console.log(" ā ļø No voice ID available");
|
|
1662
|
+
return [false, null];
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1665
|
+
try {
|
|
1666
|
+
const { Supertone } = await import("../src/index.js");
|
|
1667
|
+
const models = await import("../src/models/index.js");
|
|
1668
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1669
|
+
|
|
1670
|
+
const testText =
|
|
1671
|
+
"Hello! Testing supertonic_api_1 model for text-to-speech conversion.";
|
|
1672
|
+
console.log(` š Creating speech with supertonic_api_1 model`);
|
|
1673
|
+
console.log(` Voice ID: ${voiceId}`);
|
|
1674
|
+
console.log(` Model: supertonic_api_1`);
|
|
1675
|
+
console.log(" ā ļø This test consumes credits!");
|
|
1676
|
+
|
|
1677
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1678
|
+
voiceId,
|
|
1679
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1680
|
+
text: testText,
|
|
1681
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1682
|
+
outputFormat:
|
|
1683
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1684
|
+
model:
|
|
1685
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
1686
|
+
.SupertonicApi1,
|
|
1687
|
+
},
|
|
1688
|
+
});
|
|
1689
|
+
|
|
1690
|
+
console.log(` ā
supertonic_api_1 TTS success`);
|
|
1691
|
+
|
|
1692
|
+
if (response.result) {
|
|
1693
|
+
const audioData = await extractAudioData(response);
|
|
1694
|
+
const outputFile = "test_supertonic_api_1_output.wav";
|
|
1695
|
+
fs.writeFileSync(outputFile, audioData);
|
|
1696
|
+
console.log(
|
|
1697
|
+
` š¾ Audio saved: ${outputFile} (${audioData.length} bytes)`
|
|
1698
|
+
);
|
|
1699
|
+
}
|
|
1700
|
+
|
|
1701
|
+
return [true, response];
|
|
1702
|
+
} catch (e: any) {
|
|
1703
|
+
logDetailedError(e, "supertonic_api_1 TTS");
|
|
1704
|
+
return [false, e];
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1708
|
+
/**
|
|
1709
|
+
* Test TTS with unsupported model (should fail with validation error)
|
|
1710
|
+
*/
|
|
1711
|
+
async function testCreateSpeechWithUnsupportedModel(
|
|
1712
|
+
voiceId: string | null
|
|
1713
|
+
): Promise<[boolean, any]> {
|
|
1714
|
+
console.log("š« TTS with Unsupported Model Test (Expected to Fail)");
|
|
1715
|
+
|
|
1716
|
+
if (!voiceId) {
|
|
1717
|
+
console.log(" ā ļø No voice ID available");
|
|
1718
|
+
return [false, null];
|
|
1719
|
+
}
|
|
1720
|
+
|
|
1721
|
+
try {
|
|
1722
|
+
const { Supertone } = await import("../src/index.js");
|
|
1723
|
+
const models = await import("../src/models/index.js");
|
|
1724
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1725
|
+
|
|
1726
|
+
const testText = "This should fail with unsupported model.";
|
|
1727
|
+
console.log(
|
|
1728
|
+
` š Attempting TTS with unsupported model: 'invalid_model_xyz'`
|
|
1729
|
+
);
|
|
1730
|
+
|
|
1731
|
+
// Using type assertion to bypass TypeScript validation for testing
|
|
1732
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1733
|
+
voiceId,
|
|
1734
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1735
|
+
text: testText,
|
|
1736
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
1737
|
+
outputFormat:
|
|
1738
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
1739
|
+
model: "invalid_model_xyz" as any, // Intentionally invalid model
|
|
1740
|
+
},
|
|
1741
|
+
});
|
|
1742
|
+
|
|
1743
|
+
// If we reach here, the test failed (should have thrown an error)
|
|
1744
|
+
console.log(` ā Expected error but got success - this is unexpected!`);
|
|
1745
|
+
return [false, response];
|
|
1746
|
+
} catch (e: any) {
|
|
1747
|
+
// Expected to fail - this is the success case for this test
|
|
1748
|
+
console.log(` ā
Correctly rejected unsupported model`);
|
|
1749
|
+
console.log(` š Error type: ${e.constructor?.name || typeof e}`);
|
|
1750
|
+
console.log(` š Error message: ${e.message?.substring(0, 100) || e}`);
|
|
1751
|
+
return [true, e];
|
|
1752
|
+
}
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
/**
|
|
1756
|
+
* Test prediction with sona_speech_2 model
|
|
1757
|
+
*/
|
|
1758
|
+
async function testPredictDurationWithSonaSpeech2(
|
|
1759
|
+
voiceId: string | null
|
|
1760
|
+
): Promise<[boolean, any]> {
|
|
1761
|
+
console.log("ā±ļø Duration Prediction with sona_speech_2 Model Test");
|
|
1762
|
+
|
|
1763
|
+
if (!voiceId) {
|
|
1764
|
+
console.log(" ā ļø No voice ID available");
|
|
1765
|
+
return [false, null];
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
try {
|
|
1769
|
+
const { Supertone } = await import("../src/index.js");
|
|
1770
|
+
const models = await import("../src/models/index.js");
|
|
1771
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1772
|
+
|
|
1773
|
+
const testText = "Testing duration prediction with sona_speech_2 model.";
|
|
1774
|
+
console.log(` š Predicting duration with sona_speech_2 model`);
|
|
1775
|
+
|
|
1776
|
+
const response = await client.textToSpeech.predictDuration({
|
|
1777
|
+
voiceId,
|
|
1778
|
+
predictTTSDurationUsingCharacterRequest: {
|
|
1779
|
+
text: testText,
|
|
1780
|
+
language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
|
|
1781
|
+
model: models.PredictTTSDurationUsingCharacterRequestModel.SonaSpeech2,
|
|
1782
|
+
},
|
|
1783
|
+
});
|
|
1784
|
+
|
|
1785
|
+
console.log(
|
|
1786
|
+
` ā
sona_speech_2 duration prediction: ${response.duration}s`
|
|
1787
|
+
);
|
|
1788
|
+
return [true, response];
|
|
1789
|
+
} catch (e: any) {
|
|
1790
|
+
logDetailedError(e, "sona_speech_2 duration prediction");
|
|
1791
|
+
return [false, e];
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1795
|
+
/**
|
|
1796
|
+
* Test prediction with supertonic_api_1 model
|
|
1797
|
+
*/
|
|
1798
|
+
async function testPredictDurationWithSupertonicApi1(
|
|
1799
|
+
voiceId: string | null
|
|
1800
|
+
): Promise<[boolean, any]> {
|
|
1801
|
+
console.log("ā±ļø Duration Prediction with supertonic_api_1 Model Test");
|
|
1802
|
+
|
|
1803
|
+
if (!voiceId) {
|
|
1804
|
+
console.log(" ā ļø No voice ID available");
|
|
1805
|
+
return [false, null];
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
try {
|
|
1809
|
+
const { Supertone } = await import("../src/index.js");
|
|
1810
|
+
const models = await import("../src/models/index.js");
|
|
1811
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1812
|
+
|
|
1813
|
+
const testText = "Testing duration prediction with supertonic_api_1 model.";
|
|
1814
|
+
console.log(` š Predicting duration with supertonic_api_1 model`);
|
|
1815
|
+
|
|
1816
|
+
const response = await client.textToSpeech.predictDuration({
|
|
1817
|
+
voiceId,
|
|
1818
|
+
predictTTSDurationUsingCharacterRequest: {
|
|
1819
|
+
text: testText,
|
|
1820
|
+
language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
|
|
1821
|
+
model:
|
|
1822
|
+
models.PredictTTSDurationUsingCharacterRequestModel.SupertonicApi1,
|
|
1823
|
+
},
|
|
1824
|
+
});
|
|
1825
|
+
|
|
1826
|
+
console.log(
|
|
1827
|
+
` ā
supertonic_api_1 duration prediction: ${response.duration}s`
|
|
1828
|
+
);
|
|
1829
|
+
return [true, response];
|
|
1830
|
+
} catch (e: any) {
|
|
1831
|
+
logDetailedError(e, "supertonic_api_1 duration prediction");
|
|
1832
|
+
return [false, e];
|
|
1833
|
+
}
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
/**
|
|
1837
|
+
* Test prediction with unsupported model (should fail with validation error)
|
|
1838
|
+
*/
|
|
1839
|
+
async function testPredictDurationWithUnsupportedModel(
|
|
1840
|
+
voiceId: string | null
|
|
1841
|
+
): Promise<[boolean, any]> {
|
|
1842
|
+
console.log(
|
|
1843
|
+
"š« Duration Prediction with Unsupported Model Test (Expected to Fail)"
|
|
1844
|
+
);
|
|
1845
|
+
|
|
1846
|
+
if (!voiceId) {
|
|
1847
|
+
console.log(" ā ļø No voice ID available");
|
|
1848
|
+
return [false, null];
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
try {
|
|
1852
|
+
const { Supertone } = await import("../src/index.js");
|
|
1853
|
+
const models = await import("../src/models/index.js");
|
|
1854
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1855
|
+
|
|
1856
|
+
const testText = "This should fail with unsupported model.";
|
|
1857
|
+
console.log(
|
|
1858
|
+
` š Attempting prediction with unsupported model: 'invalid_model_xyz'`
|
|
1859
|
+
);
|
|
1860
|
+
|
|
1861
|
+
const response = await client.textToSpeech.predictDuration({
|
|
1862
|
+
voiceId,
|
|
1863
|
+
predictTTSDurationUsingCharacterRequest: {
|
|
1864
|
+
text: testText,
|
|
1865
|
+
language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
|
|
1866
|
+
model: "invalid_model_xyz" as any, // Intentionally invalid model
|
|
1867
|
+
},
|
|
1868
|
+
});
|
|
1869
|
+
|
|
1870
|
+
console.log(` ā Expected error but got success - this is unexpected!`);
|
|
1871
|
+
return [false, response];
|
|
1872
|
+
} catch (e: any) {
|
|
1873
|
+
console.log(` ā
Correctly rejected unsupported model`);
|
|
1874
|
+
console.log(` š Error type: ${e.constructor?.name || typeof e}`);
|
|
1875
|
+
console.log(` š Error message: ${e.message?.substring(0, 100) || e}`);
|
|
1876
|
+
return [true, e];
|
|
1877
|
+
}
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
// =============================================================================
|
|
1881
|
+
// Multilingual Tests per Model
|
|
1882
|
+
// =============================================================================
|
|
1883
|
+
|
|
1884
|
+
/**
|
|
1885
|
+
* Test TTS multilingual support with sona_speech_1 (supports: ko, en, ja)
|
|
1886
|
+
*/
|
|
1887
|
+
async function testMultilingualSonaSpeech1(
|
|
1888
|
+
voiceId: string | null
|
|
1889
|
+
): Promise<[boolean, any]> {
|
|
1890
|
+
console.log("š Multilingual Test - sona_speech_1 (ko, en, ja)");
|
|
1891
|
+
|
|
1892
|
+
if (!voiceId) {
|
|
1893
|
+
console.log(" ā ļø No voice ID available");
|
|
1894
|
+
return [false, null];
|
|
1895
|
+
}
|
|
1896
|
+
|
|
1897
|
+
const testCases = [
|
|
1898
|
+
{
|
|
1899
|
+
lang: "ko" as const,
|
|
1900
|
+
text: "ģė
ķģøģ, ģė ģ¤ķ¼ģ¹ ģ ėŖØėøģ
ėė¤.",
|
|
1901
|
+
label: "Korean",
|
|
1902
|
+
},
|
|
1903
|
+
{
|
|
1904
|
+
lang: "en" as const,
|
|
1905
|
+
text: "Hello, this is sona_speech_1 model.",
|
|
1906
|
+
label: "English",
|
|
1907
|
+
},
|
|
1908
|
+
{
|
|
1909
|
+
lang: "ja" as const,
|
|
1910
|
+
text: "ććć«ć”ćÆćć½ćć¹ćć¼ććÆć³ć¢ćć«ć§ćć",
|
|
1911
|
+
label: "Japanese",
|
|
1912
|
+
},
|
|
1913
|
+
];
|
|
1914
|
+
|
|
1915
|
+
try {
|
|
1916
|
+
const { Supertone } = await import("../src/index.js");
|
|
1917
|
+
const models = await import("../src/models/index.js");
|
|
1918
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
1919
|
+
|
|
1920
|
+
let allPassed = true;
|
|
1921
|
+
const results: any[] = [];
|
|
1922
|
+
|
|
1923
|
+
for (const tc of testCases) {
|
|
1924
|
+
console.log(` š Testing ${tc.label} (${tc.lang})...`);
|
|
1925
|
+
|
|
1926
|
+
try {
|
|
1927
|
+
const langEnum =
|
|
1928
|
+
models.APIConvertTextToSpeechUsingCharacterRequestLanguage[
|
|
1929
|
+
(tc.lang.charAt(0).toUpperCase() +
|
|
1930
|
+
tc.lang.slice(
|
|
1931
|
+
1
|
|
1932
|
+
)) as keyof typeof models.APIConvertTextToSpeechUsingCharacterRequestLanguage
|
|
1933
|
+
];
|
|
1934
|
+
|
|
1935
|
+
const response = await client.textToSpeech.createSpeech({
|
|
1936
|
+
voiceId,
|
|
1937
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
1938
|
+
text: tc.text,
|
|
1939
|
+
language: langEnum,
|
|
1940
|
+
outputFormat:
|
|
1941
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
|
|
1942
|
+
.Wav,
|
|
1943
|
+
model:
|
|
1944
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
1945
|
+
.SonaSpeech1,
|
|
1946
|
+
},
|
|
1947
|
+
});
|
|
1948
|
+
|
|
1949
|
+
console.log(` ā
${tc.label} success`);
|
|
1950
|
+
results.push({ lang: tc.lang, success: true });
|
|
1951
|
+
} catch (e: any) {
|
|
1952
|
+
console.log(
|
|
1953
|
+
` ā ${tc.label} failed: ${e.message?.substring(0, 50)}`
|
|
1954
|
+
);
|
|
1955
|
+
results.push({ lang: tc.lang, success: false, error: e.message });
|
|
1956
|
+
allPassed = false;
|
|
1957
|
+
}
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
console.log(
|
|
1961
|
+
` š Result: ${results.filter((r) => r.success).length}/${
|
|
1962
|
+
testCases.length
|
|
1963
|
+
} languages passed`
|
|
1964
|
+
);
|
|
1965
|
+
return [allPassed, results];
|
|
1966
|
+
} catch (e: any) {
|
|
1967
|
+
logDetailedError(e, "sona_speech_1 multilingual");
|
|
1968
|
+
return [false, e];
|
|
1969
|
+
}
|
|
1970
|
+
}
|
|
1971
|
+
|
|
1972
|
+
/**
|
|
1973
|
+
* Test TTS multilingual support with sona_speech_2 (supports all languages)
|
|
1974
|
+
*/
|
|
1975
|
+
async function testMultilingualSonaSpeech2(
|
|
1976
|
+
voiceId: string | null
|
|
1977
|
+
): Promise<[boolean, any]> {
|
|
1978
|
+
console.log("š Multilingual Test - sona_speech_2 (all languages sample)");
|
|
1979
|
+
|
|
1980
|
+
if (!voiceId) {
|
|
1981
|
+
console.log(" ā ļø No voice ID available");
|
|
1982
|
+
return [false, null];
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
// Test a diverse subset of languages
|
|
1986
|
+
const testCases = [
|
|
1987
|
+
{ lang: "Ko" as const, text: "ģė
ķģøģ.", label: "Korean" },
|
|
1988
|
+
{ lang: "En" as const, text: "Hello.", label: "English" },
|
|
1989
|
+
{ lang: "Ja" as const, text: "ććć«ć”ćÆć", label: "Japanese" },
|
|
1990
|
+
{ lang: "Es" as const, text: "Hola.", label: "Spanish" },
|
|
1991
|
+
{ lang: "Fr" as const, text: "Bonjour.", label: "French" },
|
|
1992
|
+
{ lang: "De" as const, text: "Hallo.", label: "German" },
|
|
1993
|
+
{ lang: "Ar" as const, text: "Ł
Ų±ŲŲØŲ§.", label: "Arabic" },
|
|
1994
|
+
{ lang: "Hi" as const, text: "ą¤Øą¤®ą¤øą„ą¤¤ą„ą„¤", label: "Hindi" },
|
|
1995
|
+
];
|
|
1996
|
+
|
|
1997
|
+
try {
|
|
1998
|
+
const { Supertone } = await import("../src/index.js");
|
|
1999
|
+
const models = await import("../src/models/index.js");
|
|
2000
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2001
|
+
|
|
2002
|
+
let allPassed = true;
|
|
2003
|
+
const results: any[] = [];
|
|
2004
|
+
|
|
2005
|
+
for (const tc of testCases) {
|
|
2006
|
+
console.log(` š Testing ${tc.label} (${tc.lang})...`);
|
|
2007
|
+
|
|
2008
|
+
try {
|
|
2009
|
+
const langEnum =
|
|
2010
|
+
models.APIConvertTextToSpeechUsingCharacterRequestLanguage[tc.lang];
|
|
2011
|
+
|
|
2012
|
+
const response = await client.textToSpeech.createSpeech({
|
|
2013
|
+
voiceId,
|
|
2014
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2015
|
+
text: tc.text,
|
|
2016
|
+
language: langEnum,
|
|
2017
|
+
outputFormat:
|
|
2018
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
|
|
2019
|
+
.Wav,
|
|
2020
|
+
model:
|
|
2021
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
2022
|
+
.SonaSpeech2,
|
|
2023
|
+
},
|
|
2024
|
+
});
|
|
2025
|
+
|
|
2026
|
+
console.log(` ā
${tc.label} success`);
|
|
2027
|
+
results.push({ lang: tc.lang, success: true });
|
|
2028
|
+
} catch (e: any) {
|
|
2029
|
+
console.log(
|
|
2030
|
+
` ā ${tc.label} failed: ${e.message?.substring(0, 50)}`
|
|
2031
|
+
);
|
|
2032
|
+
results.push({ lang: tc.lang, success: false, error: e.message });
|
|
2033
|
+
allPassed = false;
|
|
2034
|
+
}
|
|
2035
|
+
}
|
|
2036
|
+
|
|
2037
|
+
console.log(
|
|
2038
|
+
` š Result: ${results.filter((r) => r.success).length}/${
|
|
2039
|
+
testCases.length
|
|
2040
|
+
} languages passed`
|
|
2041
|
+
);
|
|
2042
|
+
return [allPassed, results];
|
|
2043
|
+
} catch (e: any) {
|
|
2044
|
+
logDetailedError(e, "sona_speech_2 multilingual");
|
|
2045
|
+
return [false, e];
|
|
2046
|
+
}
|
|
2047
|
+
}
|
|
2048
|
+
|
|
2049
|
+
/**
|
|
2050
|
+
* Test TTS multilingual support with supertonic_api_1 (supports: ko, en, ja, es, pt)
|
|
2051
|
+
*/
|
|
2052
|
+
async function testMultilingualSupertonicApi1(
|
|
2053
|
+
voiceId: string | null
|
|
2054
|
+
): Promise<[boolean, any]> {
|
|
2055
|
+
console.log("š Multilingual Test - supertonic_api_1 (ko, en, ja, es, pt)");
|
|
2056
|
+
|
|
2057
|
+
if (!voiceId) {
|
|
2058
|
+
console.log(" ā ļø No voice ID available");
|
|
2059
|
+
return [false, null];
|
|
2060
|
+
}
|
|
2061
|
+
|
|
2062
|
+
const testCases = [
|
|
2063
|
+
{
|
|
2064
|
+
lang: "Ko" as const,
|
|
2065
|
+
text: "ģė
ķģøģ, ģķ¼ķ ė API ģ ėŖØėøģ
ėė¤.",
|
|
2066
|
+
label: "Korean",
|
|
2067
|
+
},
|
|
2068
|
+
{
|
|
2069
|
+
lang: "En" as const,
|
|
2070
|
+
text: "Hello, this is supertonic_api_1 model.",
|
|
2071
|
+
label: "English",
|
|
2072
|
+
},
|
|
2073
|
+
{
|
|
2074
|
+
lang: "Ja" as const,
|
|
2075
|
+
text: "ććć«ć”ćÆćć¹ć¼ćć¼ććććÆAPIćÆć³ć§ćć",
|
|
2076
|
+
label: "Japanese",
|
|
2077
|
+
},
|
|
2078
|
+
{
|
|
2079
|
+
lang: "Es" as const,
|
|
2080
|
+
text: "Hola, este es el modelo supertonic_api_1.",
|
|
2081
|
+
label: "Spanish",
|
|
2082
|
+
},
|
|
2083
|
+
{
|
|
2084
|
+
lang: "Pt" as const,
|
|
2085
|
+
text: "OlĆ”, este Ć© o modelo supertonic_api_1.",
|
|
2086
|
+
label: "Portuguese",
|
|
2087
|
+
},
|
|
2088
|
+
];
|
|
2089
|
+
|
|
2090
|
+
try {
|
|
2091
|
+
const { Supertone } = await import("../src/index.js");
|
|
2092
|
+
const models = await import("../src/models/index.js");
|
|
2093
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2094
|
+
|
|
2095
|
+
let allPassed = true;
|
|
2096
|
+
const results: any[] = [];
|
|
2097
|
+
|
|
2098
|
+
for (const tc of testCases) {
|
|
2099
|
+
console.log(` š Testing ${tc.label} (${tc.lang})...`);
|
|
2100
|
+
|
|
2101
|
+
try {
|
|
2102
|
+
const langEnum =
|
|
2103
|
+
models.APIConvertTextToSpeechUsingCharacterRequestLanguage[tc.lang];
|
|
2104
|
+
|
|
2105
|
+
const response = await client.textToSpeech.createSpeech({
|
|
2106
|
+
voiceId,
|
|
2107
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2108
|
+
text: tc.text,
|
|
2109
|
+
language: langEnum,
|
|
2110
|
+
outputFormat:
|
|
2111
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
|
|
2112
|
+
.Wav,
|
|
2113
|
+
model:
|
|
2114
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
2115
|
+
.SupertonicApi1,
|
|
2116
|
+
},
|
|
2117
|
+
});
|
|
2118
|
+
|
|
2119
|
+
console.log(` ā
${tc.label} success`);
|
|
2120
|
+
results.push({ lang: tc.lang, success: true });
|
|
2121
|
+
} catch (e: any) {
|
|
2122
|
+
console.log(
|
|
2123
|
+
` ā ${tc.label} failed: ${e.message?.substring(0, 50)}`
|
|
2124
|
+
);
|
|
2125
|
+
results.push({ lang: tc.lang, success: false, error: e.message });
|
|
2126
|
+
allPassed = false;
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
2129
|
+
|
|
2130
|
+
console.log(
|
|
2131
|
+
` š Result: ${results.filter((r) => r.success).length}/${
|
|
2132
|
+
testCases.length
|
|
2133
|
+
} languages passed`
|
|
2134
|
+
);
|
|
2135
|
+
return [allPassed, results];
|
|
2136
|
+
} catch (e: any) {
|
|
2137
|
+
logDetailedError(e, "supertonic_api_1 multilingual");
|
|
2138
|
+
return [false, e];
|
|
2139
|
+
}
|
|
2140
|
+
}
|
|
2141
|
+
|
|
2142
|
+
/**
|
|
2143
|
+
* Test unsupported language for sona_speech_1 (should fail with French)
|
|
2144
|
+
*/
|
|
2145
|
+
async function testUnsupportedLanguageSonaSpeech1(
|
|
2146
|
+
voiceId: string | null
|
|
2147
|
+
): Promise<[boolean, any]> {
|
|
2148
|
+
console.log(
|
|
2149
|
+
"š« Unsupported Language Test - sona_speech_1 with French (Expected to Fail)"
|
|
2150
|
+
);
|
|
2151
|
+
|
|
2152
|
+
if (!voiceId) {
|
|
2153
|
+
console.log(" ā ļø No voice ID available");
|
|
2154
|
+
return [false, null];
|
|
2155
|
+
}
|
|
2156
|
+
|
|
2157
|
+
try {
|
|
2158
|
+
const { Supertone } = await import("../src/index.js");
|
|
2159
|
+
const models = await import("../src/models/index.js");
|
|
2160
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2161
|
+
|
|
2162
|
+
console.log(` š Attempting sona_speech_1 with French (unsupported)`);
|
|
2163
|
+
|
|
2164
|
+
const response = await client.textToSpeech.createSpeech({
|
|
2165
|
+
voiceId,
|
|
2166
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2167
|
+
text: "Bonjour, ceci est un test.",
|
|
2168
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Fr, // French - not supported by sona_speech_1
|
|
2169
|
+
outputFormat:
|
|
2170
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
2171
|
+
model:
|
|
2172
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech1,
|
|
2173
|
+
},
|
|
2174
|
+
});
|
|
2175
|
+
|
|
2176
|
+
// If we reach here, the API didn't reject - may need server-side validation
|
|
2177
|
+
console.log(
|
|
2178
|
+
` ā ļø API accepted the request - server-side validation may not enforce language restriction`
|
|
2179
|
+
);
|
|
2180
|
+
console.log(
|
|
2181
|
+
` š Note: Language restriction may be enforced at API level, not SDK level`
|
|
2182
|
+
);
|
|
2183
|
+
return [
|
|
2184
|
+
true,
|
|
2185
|
+
{ note: "API accepted - language restriction may be server-side" },
|
|
2186
|
+
];
|
|
2187
|
+
} catch (e: any) {
|
|
2188
|
+
console.log(
|
|
2189
|
+
` ā
Correctly rejected unsupported language for sona_speech_1`
|
|
2190
|
+
);
|
|
2191
|
+
console.log(` š Error: ${e.message?.substring(0, 100)}`);
|
|
2192
|
+
return [true, e];
|
|
2193
|
+
}
|
|
2194
|
+
}
|
|
2195
|
+
|
|
2196
|
+
/**
|
|
2197
|
+
* Test unsupported language for supertonic_api_1 (should fail with German)
|
|
2198
|
+
*/
|
|
2199
|
+
async function testUnsupportedLanguageSupertonicApi1(
|
|
2200
|
+
voiceId: string | null
|
|
2201
|
+
): Promise<[boolean, any]> {
|
|
2202
|
+
console.log(
|
|
2203
|
+
"š« Unsupported Language Test - supertonic_api_1 with German (Expected to Fail)"
|
|
2204
|
+
);
|
|
2205
|
+
|
|
2206
|
+
if (!voiceId) {
|
|
2207
|
+
console.log(" ā ļø No voice ID available");
|
|
2208
|
+
return [false, null];
|
|
2209
|
+
}
|
|
2210
|
+
|
|
2211
|
+
try {
|
|
2212
|
+
const { Supertone } = await import("../src/index.js");
|
|
2213
|
+
const models = await import("../src/models/index.js");
|
|
2214
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2215
|
+
|
|
2216
|
+
console.log(` š Attempting supertonic_api_1 with German (unsupported)`);
|
|
2217
|
+
|
|
2218
|
+
const response = await client.textToSpeech.createSpeech({
|
|
2219
|
+
voiceId,
|
|
2220
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2221
|
+
text: "Hallo, das ist ein Test.",
|
|
2222
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.De, // German - not supported by supertonic_api_1
|
|
2223
|
+
outputFormat:
|
|
2224
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
2225
|
+
model:
|
|
2226
|
+
models.APIConvertTextToSpeechUsingCharacterRequestModel
|
|
2227
|
+
.SupertonicApi1,
|
|
2228
|
+
},
|
|
2229
|
+
});
|
|
2230
|
+
|
|
2231
|
+
// If we reach here, the API didn't reject - may need server-side validation
|
|
2232
|
+
console.log(
|
|
2233
|
+
` ā ļø API accepted the request - server-side validation may not enforce language restriction`
|
|
2234
|
+
);
|
|
2235
|
+
console.log(
|
|
2236
|
+
` š Note: Language restriction may be enforced at API level, not SDK level`
|
|
2237
|
+
);
|
|
2238
|
+
return [
|
|
2239
|
+
true,
|
|
2240
|
+
{ note: "API accepted - language restriction may be server-side" },
|
|
2241
|
+
];
|
|
2242
|
+
} catch (e: any) {
|
|
2243
|
+
console.log(
|
|
2244
|
+
` ā
Correctly rejected unsupported language for supertonic_api_1`
|
|
2245
|
+
);
|
|
2246
|
+
console.log(` š Error: ${e.message?.substring(0, 100)}`);
|
|
2247
|
+
return [true, e];
|
|
2248
|
+
}
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
/**
|
|
2252
|
+
* Test duration prediction with voice settings
|
|
2253
|
+
*/
|
|
2254
|
+
async function testPredictDurationWithVoiceSettings(
|
|
2255
|
+
voiceId: string | null
|
|
2256
|
+
): Promise<[boolean, any]> {
|
|
2257
|
+
console.log("ā±ļø Duration Prediction with Voice Settings Test");
|
|
2258
|
+
|
|
2259
|
+
if (!voiceId) {
|
|
2260
|
+
console.log(" ā ļø No voice ID available");
|
|
2261
|
+
return [false, null];
|
|
2262
|
+
}
|
|
2263
|
+
|
|
2264
|
+
try {
|
|
2265
|
+
const { Supertone } = await import("../src/index.js");
|
|
2266
|
+
const models = await import("../src/models/index.js");
|
|
2267
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2268
|
+
|
|
2269
|
+
const voiceSettings = {
|
|
2270
|
+
speed: 0.8,
|
|
2271
|
+
};
|
|
2272
|
+
|
|
2273
|
+
console.log(
|
|
2274
|
+
` š Predicting duration with voice settings for voice '${voiceId}'...`
|
|
2275
|
+
);
|
|
2276
|
+
console.log(` Settings: speed=${voiceSettings.speed}`);
|
|
2277
|
+
|
|
2278
|
+
const response = await client.textToSpeech.predictDuration({
|
|
2279
|
+
voiceId,
|
|
2280
|
+
predictTTSDurationUsingCharacterRequest: {
|
|
2281
|
+
text: "This is a duration test with adjusted speed.",
|
|
2282
|
+
language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
|
|
2283
|
+
voiceSettings,
|
|
2284
|
+
},
|
|
2285
|
+
});
|
|
2286
|
+
|
|
2287
|
+
console.log(` ā
Predicted duration: ${response.duration}s`);
|
|
2288
|
+
|
|
2289
|
+
return [true, response];
|
|
2290
|
+
} catch (e: any) {
|
|
2291
|
+
console.error(` ā Error: ${e.message || e}`);
|
|
2292
|
+
return [false, e];
|
|
2293
|
+
}
|
|
2294
|
+
}
|
|
2295
|
+
|
|
2296
|
+
/**
|
|
2297
|
+
* Test TTS streaming with voice settings
|
|
2298
|
+
*/
|
|
2299
|
+
async function testStreamSpeechWithVoiceSettings(
|
|
2300
|
+
voiceId: string | null
|
|
2301
|
+
): Promise<[boolean, any]> {
|
|
2302
|
+
console.log("š” TTS Streaming with Voice Settings Test");
|
|
2303
|
+
|
|
2304
|
+
if (!voiceId) {
|
|
2305
|
+
console.log(" ā ļø No voice ID available");
|
|
2306
|
+
return [false, null];
|
|
2307
|
+
}
|
|
2308
|
+
|
|
2309
|
+
try {
|
|
2310
|
+
const { Supertone } = await import("../src/index.js");
|
|
2311
|
+
const models = await import("../src/models/index.js");
|
|
2312
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2313
|
+
|
|
2314
|
+
const voiceSettings = {
|
|
2315
|
+
pitchShift: 1.05,
|
|
2316
|
+
speed: 1.1,
|
|
2317
|
+
};
|
|
2318
|
+
|
|
2319
|
+
console.log(
|
|
2320
|
+
` š Streaming speech with voice settings for voice '${voiceId}'...`
|
|
2321
|
+
);
|
|
2322
|
+
console.log(
|
|
2323
|
+
` Settings: pitchShift=${voiceSettings.pitchShift}, speed=${voiceSettings.speed}`
|
|
2324
|
+
);
|
|
2325
|
+
console.log(" ā ļø This test consumes credits!");
|
|
2326
|
+
|
|
2327
|
+
const response = await client.textToSpeech.streamSpeech({
|
|
2328
|
+
voiceId,
|
|
2329
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2330
|
+
text: "Streaming with adjusted voice settings.",
|
|
2331
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
2332
|
+
outputFormat:
|
|
2333
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
2334
|
+
voiceSettings,
|
|
2335
|
+
},
|
|
2336
|
+
});
|
|
2337
|
+
|
|
2338
|
+
console.log(` ā
Stream with voice settings started successfully`);
|
|
2339
|
+
|
|
2340
|
+
return [true, response];
|
|
2341
|
+
} catch (e: any) {
|
|
2342
|
+
console.error(` ā Error: ${e.message || e}`);
|
|
2343
|
+
return [false, e];
|
|
2344
|
+
}
|
|
2345
|
+
}
|
|
2346
|
+
|
|
2347
|
+
/**
|
|
2348
|
+
* Test MP3 format TTS
|
|
2349
|
+
*/
|
|
2350
|
+
async function testCreateSpeechMp3(
|
|
2351
|
+
voiceId: string | null
|
|
2352
|
+
): Promise<[boolean, any]> {
|
|
2353
|
+
console.log("š¤ MP3 Format TTS Test");
|
|
2354
|
+
|
|
2355
|
+
if (!voiceId) {
|
|
2356
|
+
console.log(" ā ļø No voice ID available");
|
|
2357
|
+
return [false, null];
|
|
2358
|
+
}
|
|
2359
|
+
|
|
2360
|
+
try {
|
|
2361
|
+
const { Supertone } = await import("../src/index.js");
|
|
2362
|
+
const models = await import("../src/models/index.js");
|
|
2363
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2364
|
+
|
|
2365
|
+
console.log(` š MP3 TTS conversion with voice '${voiceId}'...`);
|
|
2366
|
+
console.log(" ā ļø This test consumes credits!");
|
|
2367
|
+
|
|
2368
|
+
const response = await client.textToSpeech.createSpeech({
|
|
2369
|
+
voiceId,
|
|
2370
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2371
|
+
text: "Hello! This is an MP3 format SDK test. Let's verify if it works correctly.",
|
|
2372
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
2373
|
+
outputFormat:
|
|
2374
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Mp3,
|
|
2375
|
+
style: "neutral",
|
|
2376
|
+
model: "sona_speech_1",
|
|
2377
|
+
},
|
|
2378
|
+
});
|
|
2379
|
+
|
|
2380
|
+
console.log(` ā
MP3 TTS conversion success`);
|
|
2381
|
+
|
|
2382
|
+
if (response.result) {
|
|
2383
|
+
const outputFile = "test_create_speech_output.mp3";
|
|
2384
|
+
const audioData = await extractAudioData(response);
|
|
2385
|
+
|
|
2386
|
+
fs.writeFileSync(outputFile, audioData);
|
|
2387
|
+
console.log(` š¾ MP3 audio file saved: ${outputFile}`);
|
|
2388
|
+
|
|
2389
|
+
// Verify MP3 header
|
|
2390
|
+
const header = audioData.slice(0, 10);
|
|
2391
|
+
if (header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) {
|
|
2392
|
+
console.log(` ā
Valid MP3 file generated (ID3 tag)`);
|
|
2393
|
+
} else if (
|
|
2394
|
+
(header[0] === 0xff && header[1] === 0xfb) ||
|
|
2395
|
+
(header[0] === 0xff && header[1] === 0xfa)
|
|
2396
|
+
) {
|
|
2397
|
+
console.log(` ā
Valid MP3 file generated (MPEG frame)`);
|
|
1318
2398
|
} else {
|
|
1319
2399
|
console.log(
|
|
1320
2400
|
` š MP3 header: ${Array.from(header.slice(0, 10))
|
|
@@ -1536,6 +2616,304 @@ async function testCreateSpeechWithChunking(
|
|
|
1536
2616
|
}
|
|
1537
2617
|
}
|
|
1538
2618
|
|
|
2619
|
+
// =============================================================================
|
|
2620
|
+
// Pronunciation Dictionary Tests
|
|
2621
|
+
// =============================================================================
|
|
2622
|
+
|
|
2623
|
+
/**
|
|
2624
|
+
* Test TTS with pronunciation dictionary (basic test with partial_match=true/false)
|
|
2625
|
+
*/
|
|
2626
|
+
async function testCreateSpeechWithPronunciationDictionary(
|
|
2627
|
+
voiceId: string | null
|
|
2628
|
+
): Promise<[boolean, any]> {
|
|
2629
|
+
console.log("š TTS with Pronunciation Dictionary Test");
|
|
2630
|
+
|
|
2631
|
+
if (!voiceId) {
|
|
2632
|
+
console.log(" ā ļø No voice ID available");
|
|
2633
|
+
return [false, null];
|
|
2634
|
+
}
|
|
2635
|
+
|
|
2636
|
+
try {
|
|
2637
|
+
const { Supertone } = await import("../src/index.js");
|
|
2638
|
+
const models = await import("../src/models/index.js");
|
|
2639
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2640
|
+
|
|
2641
|
+
// Test text with abbreviations and special terms
|
|
2642
|
+
const testText =
|
|
2643
|
+
"The CEO of OpenAI announced that GPT models are improving. Dr. Smith from MIT said AI research is accelerating.";
|
|
2644
|
+
|
|
2645
|
+
// Pronunciation dictionary with both partial_match=true and partial_match=false cases
|
|
2646
|
+
const pronunciationDictionary = [
|
|
2647
|
+
// partial_match=false: exact word boundary match
|
|
2648
|
+
{ text: "CEO", pronunciation: "Chief Executive Officer", partial_match: false },
|
|
2649
|
+
{ text: "MIT", pronunciation: "Massachusetts Institute of Technology", partial_match: false },
|
|
2650
|
+
{ text: "AI", pronunciation: "Artificial Intelligence", partial_match: false },
|
|
2651
|
+
// partial_match=true: substring match (will match "OpenAI" -> "OpenArtificial Intelligence")
|
|
2652
|
+
{ text: "GPT", pronunciation: "Generative Pre-trained Transformer", partial_match: true },
|
|
2653
|
+
{ text: "Dr.", pronunciation: "Doctor", partial_match: true },
|
|
2654
|
+
];
|
|
2655
|
+
|
|
2656
|
+
console.log(` š Original text: "${testText}"`);
|
|
2657
|
+
console.log(` š Pronunciation dictionary entries: ${pronunciationDictionary.length}`);
|
|
2658
|
+
console.log(` - partial_match=false: CEO, MIT, AI (word boundary match)`);
|
|
2659
|
+
console.log(` - partial_match=true: GPT, Dr. (substring match)`);
|
|
2660
|
+
console.log(" ā ļø This test consumes credits!");
|
|
2661
|
+
|
|
2662
|
+
const response = await client.textToSpeech.createSpeech(
|
|
2663
|
+
{
|
|
2664
|
+
voiceId,
|
|
2665
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2666
|
+
text: testText,
|
|
2667
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
2668
|
+
outputFormat:
|
|
2669
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
2670
|
+
style: "neutral",
|
|
2671
|
+
model: "sona_speech_1",
|
|
2672
|
+
},
|
|
2673
|
+
},
|
|
2674
|
+
{
|
|
2675
|
+
pronunciationDictionary,
|
|
2676
|
+
}
|
|
2677
|
+
);
|
|
2678
|
+
|
|
2679
|
+
console.log(` ā
TTS with pronunciation dictionary success`);
|
|
2680
|
+
|
|
2681
|
+
if (response.result) {
|
|
2682
|
+
const audioData = await extractAudioData(response);
|
|
2683
|
+
const outputFile = "test_pronunciation_dictionary_output.wav";
|
|
2684
|
+
fs.writeFileSync(outputFile, audioData);
|
|
2685
|
+
console.log(` š¾ Audio saved: ${outputFile} (${audioData.length} bytes)`);
|
|
2686
|
+
}
|
|
2687
|
+
|
|
2688
|
+
return [true, response];
|
|
2689
|
+
} catch (e: any) {
|
|
2690
|
+
logDetailedError(e, "Pronunciation dictionary TTS");
|
|
2691
|
+
return [false, e];
|
|
2692
|
+
}
|
|
2693
|
+
}
|
|
2694
|
+
|
|
2695
|
+
/**
|
|
2696
|
+
* Test TTS with pronunciation dictionary causing text to exceed 300 chars (triggers chunking)
|
|
2697
|
+
*/
|
|
2698
|
+
async function testCreateSpeechWithPronunciationDictionaryLongText(
|
|
2699
|
+
voiceId: string | null
|
|
2700
|
+
): Promise<[boolean, any]> {
|
|
2701
|
+
console.log("š TTS with Pronunciation Dictionary + Long Text Chunking Test");
|
|
2702
|
+
|
|
2703
|
+
if (!voiceId) {
|
|
2704
|
+
console.log(" ā ļø No voice ID available");
|
|
2705
|
+
return [false, null];
|
|
2706
|
+
}
|
|
2707
|
+
|
|
2708
|
+
try {
|
|
2709
|
+
const { Supertone } = await import("../src/index.js");
|
|
2710
|
+
const models = await import("../src/models/index.js");
|
|
2711
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2712
|
+
|
|
2713
|
+
// Short original text (~200 chars) that will exceed 300 chars after pronunciation dictionary expansion
|
|
2714
|
+
const testText =
|
|
2715
|
+
"AI and ML are revolutionizing tech. The CEO of OpenAI discussed GPT advancements. " +
|
|
2716
|
+
"Dr. Kim from MIT explained how NLP and CV work together. AWS and GCP provide cloud AI services.";
|
|
2717
|
+
|
|
2718
|
+
// Pronunciation dictionary that expands abbreviations significantly
|
|
2719
|
+
const pronunciationDictionary = [
|
|
2720
|
+
// partial_match=false: exact word boundary matches
|
|
2721
|
+
{ text: "AI", pronunciation: "Artificial Intelligence", partial_match: false },
|
|
2722
|
+
{ text: "ML", pronunciation: "Machine Learning", partial_match: false },
|
|
2723
|
+
{ text: "CEO", pronunciation: "Chief Executive Officer", partial_match: false },
|
|
2724
|
+
{ text: "MIT", pronunciation: "Massachusetts Institute of Technology", partial_match: false },
|
|
2725
|
+
{ text: "NLP", pronunciation: "Natural Language Processing", partial_match: false },
|
|
2726
|
+
{ text: "CV", pronunciation: "Computer Vision", partial_match: false },
|
|
2727
|
+
{ text: "AWS", pronunciation: "Amazon Web Services", partial_match: false },
|
|
2728
|
+
{ text: "GCP", pronunciation: "Google Cloud Platform", partial_match: false },
|
|
2729
|
+
// partial_match=true: substring matches
|
|
2730
|
+
{ text: "GPT", pronunciation: "Generative Pre-trained Transformer", partial_match: true },
|
|
2731
|
+
{ text: "Dr.", pronunciation: "Doctor", partial_match: true },
|
|
2732
|
+
{ text: "tech", pronunciation: "technology", partial_match: true },
|
|
2733
|
+
];
|
|
2734
|
+
|
|
2735
|
+
const originalLength = testText.length;
|
|
2736
|
+
|
|
2737
|
+
console.log(` š Original text length: ${originalLength} characters (under 300)`);
|
|
2738
|
+
console.log(` š Pronunciation dictionary entries: ${pronunciationDictionary.length}`);
|
|
2739
|
+
console.log(` - partial_match=false: AI, ML, CEO, MIT, NLP, CV, AWS, GCP`);
|
|
2740
|
+
console.log(` - partial_match=true: GPT, Dr., tech`);
|
|
2741
|
+
console.log(` š§ Expected: Text will expand to 300+ chars, triggering auto-chunking`);
|
|
2742
|
+
console.log(" ā ļø This test consumes credits!");
|
|
2743
|
+
|
|
2744
|
+
const response = await client.textToSpeech.createSpeech(
|
|
2745
|
+
{
|
|
2746
|
+
voiceId,
|
|
2747
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2748
|
+
text: testText,
|
|
2749
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
2750
|
+
outputFormat:
|
|
2751
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
2752
|
+
style: "neutral",
|
|
2753
|
+
model: "sona_speech_1",
|
|
2754
|
+
},
|
|
2755
|
+
},
|
|
2756
|
+
{
|
|
2757
|
+
pronunciationDictionary,
|
|
2758
|
+
}
|
|
2759
|
+
);
|
|
2760
|
+
|
|
2761
|
+
console.log(` ā
TTS with pronunciation dictionary + long text chunking success`);
|
|
2762
|
+
console.log(` šÆ Auto-chunking was triggered after pronunciation expansion!`);
|
|
2763
|
+
|
|
2764
|
+
if (response.result) {
|
|
2765
|
+
const audioData = await extractAudioData(response);
|
|
2766
|
+
const outputFile = "test_pronunciation_dictionary_long_text_output.wav";
|
|
2767
|
+
fs.writeFileSync(outputFile, audioData);
|
|
2768
|
+
console.log(` š¾ Audio saved: ${outputFile} (${audioData.length} bytes)`);
|
|
2769
|
+
}
|
|
2770
|
+
|
|
2771
|
+
return [true, response];
|
|
2772
|
+
} catch (e: any) {
|
|
2773
|
+
logDetailedError(e, "Pronunciation dictionary long text TTS");
|
|
2774
|
+
return [false, e];
|
|
2775
|
+
}
|
|
2776
|
+
}
|
|
2777
|
+
|
|
2778
|
+
/**
|
|
2779
|
+
* Test TTS streaming with pronunciation dictionary
|
|
2780
|
+
*/
|
|
2781
|
+
async function testStreamSpeechWithPronunciationDictionary(
|
|
2782
|
+
voiceId: string | null
|
|
2783
|
+
): Promise<[boolean, any]> {
|
|
2784
|
+
console.log("š” TTS Streaming with Pronunciation Dictionary Test");
|
|
2785
|
+
|
|
2786
|
+
if (!voiceId) {
|
|
2787
|
+
console.log(" ā ļø No voice ID available");
|
|
2788
|
+
return [false, null];
|
|
2789
|
+
}
|
|
2790
|
+
|
|
2791
|
+
try {
|
|
2792
|
+
const { Supertone } = await import("../src/index.js");
|
|
2793
|
+
const models = await import("../src/models/index.js");
|
|
2794
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2795
|
+
|
|
2796
|
+
const testText =
|
|
2797
|
+
"The API documentation explains how to use the SDK. " +
|
|
2798
|
+
"Dr. Lee from NASA discussed the new AI system.";
|
|
2799
|
+
|
|
2800
|
+
const pronunciationDictionary = [
|
|
2801
|
+
{ text: "API", pronunciation: "Application Programming Interface", partial_match: false },
|
|
2802
|
+
{ text: "SDK", pronunciation: "Software Development Kit", partial_match: false },
|
|
2803
|
+
{ text: "NASA", pronunciation: "National Aeronautics and Space Administration", partial_match: false },
|
|
2804
|
+
{ text: "AI", pronunciation: "Artificial Intelligence", partial_match: false },
|
|
2805
|
+
{ text: "Dr.", pronunciation: "Doctor", partial_match: true },
|
|
2806
|
+
];
|
|
2807
|
+
|
|
2808
|
+
console.log(` š Original text: "${testText}"`);
|
|
2809
|
+
console.log(` š Pronunciation dictionary entries: ${pronunciationDictionary.length}`);
|
|
2810
|
+
console.log(" ā ļø This test consumes credits!");
|
|
2811
|
+
|
|
2812
|
+
const response = await client.textToSpeech.streamSpeech(
|
|
2813
|
+
{
|
|
2814
|
+
voiceId,
|
|
2815
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2816
|
+
text: testText,
|
|
2817
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
2818
|
+
outputFormat:
|
|
2819
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
2820
|
+
},
|
|
2821
|
+
},
|
|
2822
|
+
{
|
|
2823
|
+
pronunciationDictionary,
|
|
2824
|
+
}
|
|
2825
|
+
);
|
|
2826
|
+
|
|
2827
|
+
console.log(` ā
Stream with pronunciation dictionary started successfully`);
|
|
2828
|
+
|
|
2829
|
+
// Consume the stream and save to file
|
|
2830
|
+
if (response.result) {
|
|
2831
|
+
const audioData = await extractAudioData(response);
|
|
2832
|
+
const outputFile = "test_pronunciation_dictionary_stream_output.wav";
|
|
2833
|
+
fs.writeFileSync(outputFile, audioData);
|
|
2834
|
+
console.log(` š¾ Audio saved: ${outputFile} (${audioData.length} bytes)`);
|
|
2835
|
+
}
|
|
2836
|
+
|
|
2837
|
+
return [true, response];
|
|
2838
|
+
} catch (e: any) {
|
|
2839
|
+
logDetailedError(e, "Pronunciation dictionary streaming TTS");
|
|
2840
|
+
return [false, e];
|
|
2841
|
+
}
|
|
2842
|
+
}
|
|
2843
|
+
|
|
2844
|
+
/**
|
|
2845
|
+
* Test TTS streaming with pronunciation dictionary + long text (triggers chunking)
|
|
2846
|
+
*/
|
|
2847
|
+
async function testStreamSpeechWithPronunciationDictionaryLongText(
|
|
2848
|
+
voiceId: string | null
|
|
2849
|
+
): Promise<[boolean, any]> {
|
|
2850
|
+
console.log("š” TTS Streaming with Pronunciation Dictionary + Long Text Test");
|
|
2851
|
+
|
|
2852
|
+
if (!voiceId) {
|
|
2853
|
+
console.log(" ā ļø No voice ID available");
|
|
2854
|
+
return [false, null];
|
|
2855
|
+
}
|
|
2856
|
+
|
|
2857
|
+
try {
|
|
2858
|
+
const { Supertone } = await import("../src/index.js");
|
|
2859
|
+
const models = await import("../src/models/index.js");
|
|
2860
|
+
const client = new Supertone({ apiKey: API_KEY });
|
|
2861
|
+
|
|
2862
|
+
// Short text that will expand after pronunciation dictionary
|
|
2863
|
+
const testText =
|
|
2864
|
+
"AI is everywhere. ML powers many apps. The CEO spoke about GPT. " +
|
|
2865
|
+
"Dr. Smith from MIT and UCLA collaborated on NLP research. AWS and GCP offer AI services.";
|
|
2866
|
+
|
|
2867
|
+
const pronunciationDictionary = [
|
|
2868
|
+
{ text: "AI", pronunciation: "Artificial Intelligence", partial_match: false },
|
|
2869
|
+
{ text: "ML", pronunciation: "Machine Learning", partial_match: false },
|
|
2870
|
+
{ text: "CEO", pronunciation: "Chief Executive Officer", partial_match: false },
|
|
2871
|
+
{ text: "MIT", pronunciation: "Massachusetts Institute of Technology", partial_match: false },
|
|
2872
|
+
{ text: "UCLA", pronunciation: "University of California Los Angeles", partial_match: false },
|
|
2873
|
+
{ text: "NLP", pronunciation: "Natural Language Processing", partial_match: false },
|
|
2874
|
+
{ text: "AWS", pronunciation: "Amazon Web Services", partial_match: false },
|
|
2875
|
+
{ text: "GCP", pronunciation: "Google Cloud Platform", partial_match: false },
|
|
2876
|
+
{ text: "GPT", pronunciation: "Generative Pre-trained Transformer", partial_match: true },
|
|
2877
|
+
{ text: "Dr.", pronunciation: "Doctor", partial_match: true },
|
|
2878
|
+
];
|
|
2879
|
+
|
|
2880
|
+
console.log(` š Original text length: ${testText.length} characters`);
|
|
2881
|
+
console.log(` š Pronunciation dictionary entries: ${pronunciationDictionary.length}`);
|
|
2882
|
+
console.log(` š§ Expected: Text will expand to 300+ chars, triggering stream chunking`);
|
|
2883
|
+
console.log(" ā ļø This test consumes credits!");
|
|
2884
|
+
|
|
2885
|
+
const response = await client.textToSpeech.streamSpeech(
|
|
2886
|
+
{
|
|
2887
|
+
voiceId,
|
|
2888
|
+
apiConvertTextToSpeechUsingCharacterRequest: {
|
|
2889
|
+
text: testText,
|
|
2890
|
+
language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
|
|
2891
|
+
outputFormat:
|
|
2892
|
+
models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
|
|
2893
|
+
},
|
|
2894
|
+
},
|
|
2895
|
+
{
|
|
2896
|
+
pronunciationDictionary,
|
|
2897
|
+
}
|
|
2898
|
+
);
|
|
2899
|
+
|
|
2900
|
+
console.log(` ā
Stream with pronunciation dictionary + long text started successfully`);
|
|
2901
|
+
console.log(` šÆ Stream chunking was triggered after pronunciation expansion!`);
|
|
2902
|
+
|
|
2903
|
+
if (response.result) {
|
|
2904
|
+
const audioData = await extractAudioData(response);
|
|
2905
|
+
const outputFile = "test_pronunciation_dictionary_stream_long_text_output.wav";
|
|
2906
|
+
fs.writeFileSync(outputFile, audioData);
|
|
2907
|
+
console.log(` š¾ Audio saved: ${outputFile} (${audioData.length} bytes)`);
|
|
2908
|
+
}
|
|
2909
|
+
|
|
2910
|
+
return [true, response];
|
|
2911
|
+
} catch (e: any) {
|
|
2912
|
+
logDetailedError(e, "Pronunciation dictionary streaming long text TTS");
|
|
2913
|
+
return [false, e];
|
|
2914
|
+
}
|
|
2915
|
+
}
|
|
2916
|
+
|
|
1539
2917
|
/**
|
|
1540
2918
|
* Main test execution
|
|
1541
2919
|
*/
|
|
@@ -1549,7 +2927,7 @@ async function main(): Promise<boolean> {
|
|
|
1549
2927
|
console.log("");
|
|
1550
2928
|
|
|
1551
2929
|
const testResults: TestResult = {};
|
|
1552
|
-
|
|
2930
|
+
const voiceIdForTTS: string = "91992bbd4758bdcf9c9b01";
|
|
1553
2931
|
let customVoiceId: string | null = null;
|
|
1554
2932
|
let createdCustomVoiceId: string | null = null;
|
|
1555
2933
|
|
|
@@ -1572,9 +2950,6 @@ async function main(): Promise<boolean> {
|
|
|
1572
2950
|
|
|
1573
2951
|
[success, result] = await testListVoices();
|
|
1574
2952
|
testResults["list_voices"] = success;
|
|
1575
|
-
if (success && result.voiceId) {
|
|
1576
|
-
voiceIdForTTS = result.voiceId;
|
|
1577
|
-
}
|
|
1578
2953
|
|
|
1579
2954
|
[success, result] = await testSearchVoices();
|
|
1580
2955
|
testResults["search_voices"] = success;
|
|
@@ -1643,6 +3018,67 @@ async function main(): Promise<boolean> {
|
|
|
1643
3018
|
[success, result] = await testStreamSpeech(voiceIdForTTS);
|
|
1644
3019
|
testResults["stream_speech"] = success;
|
|
1645
3020
|
|
|
3021
|
+
// 5.5 New Model Tests (sona_speech_2, supertonic_api_1)
|
|
3022
|
+
console.log("\nš¤ New Model Tests (sona_speech_2, supertonic_api_1)");
|
|
3023
|
+
console.log("-".repeat(60));
|
|
3024
|
+
console.log("ā ļø These tests consume credits!");
|
|
3025
|
+
console.log("");
|
|
3026
|
+
|
|
3027
|
+
[success, result] = await testCreateSpeechWithSonaSpeech2(voiceIdForTTS);
|
|
3028
|
+
testResults["create_speech_sona_speech_2"] = success;
|
|
3029
|
+
|
|
3030
|
+
[success, result] = await testCreateSpeechWithSupertonicApi1(voiceIdForTTS);
|
|
3031
|
+
testResults["create_speech_supertonic_api_1"] = success;
|
|
3032
|
+
|
|
3033
|
+
[success, result] = await testCreateSpeechWithUnsupportedModel(
|
|
3034
|
+
voiceIdForTTS
|
|
3035
|
+
);
|
|
3036
|
+
testResults["create_speech_unsupported_model"] = success;
|
|
3037
|
+
|
|
3038
|
+
[success, result] = await testPredictDurationWithSonaSpeech2(voiceIdForTTS);
|
|
3039
|
+
testResults["predict_duration_sona_speech_2"] = success;
|
|
3040
|
+
|
|
3041
|
+
[success, result] = await testPredictDurationWithSupertonicApi1(
|
|
3042
|
+
voiceIdForTTS
|
|
3043
|
+
);
|
|
3044
|
+
testResults["predict_duration_supertonic_api_1"] = success;
|
|
3045
|
+
|
|
3046
|
+
[success, result] = await testPredictDurationWithUnsupportedModel(
|
|
3047
|
+
voiceIdForTTS
|
|
3048
|
+
);
|
|
3049
|
+
testResults["predict_duration_unsupported_model"] = success;
|
|
3050
|
+
|
|
3051
|
+
// 5.6 Multilingual Tests per Model
|
|
3052
|
+
console.log("\nš Multilingual Tests per Model");
|
|
3053
|
+
console.log("-".repeat(60));
|
|
3054
|
+
console.log("ā ļø These tests consume credits!");
|
|
3055
|
+
console.log("");
|
|
3056
|
+
|
|
3057
|
+
[success, result] = await testMultilingualSonaSpeech1(voiceIdForTTS);
|
|
3058
|
+
testResults["multilingual_sona_speech_1"] = success;
|
|
3059
|
+
|
|
3060
|
+
[success, result] = await testMultilingualSonaSpeech2(voiceIdForTTS);
|
|
3061
|
+
testResults["multilingual_sona_speech_2"] = success;
|
|
3062
|
+
|
|
3063
|
+
[success, result] = await testMultilingualSupertonicApi1(voiceIdForTTS);
|
|
3064
|
+
testResults["multilingual_supertonic_api_1"] = success;
|
|
3065
|
+
|
|
3066
|
+
// 5.7 Unsupported Language Tests
|
|
3067
|
+
console.log("\nš« Unsupported Language Tests");
|
|
3068
|
+
console.log("-".repeat(60));
|
|
3069
|
+
console.log(
|
|
3070
|
+
"ā ļø These tests verify error handling for unsupported model-language combinations!"
|
|
3071
|
+
);
|
|
3072
|
+
console.log("");
|
|
3073
|
+
|
|
3074
|
+
[success, result] = await testUnsupportedLanguageSonaSpeech1(voiceIdForTTS);
|
|
3075
|
+
testResults["unsupported_lang_sona_speech_1"] = success;
|
|
3076
|
+
|
|
3077
|
+
[success, result] = await testUnsupportedLanguageSupertonicApi1(
|
|
3078
|
+
voiceIdForTTS
|
|
3079
|
+
);
|
|
3080
|
+
testResults["unsupported_lang_supertonic_api_1"] = success;
|
|
3081
|
+
|
|
1646
3082
|
// 6. TTS Long Text Tests
|
|
1647
3083
|
console.log("\nš Text-to-Speech Long Text Tests");
|
|
1648
3084
|
console.log("-".repeat(60));
|
|
@@ -1652,6 +3088,29 @@ async function main(): Promise<boolean> {
|
|
|
1652
3088
|
[success, result] = await testCreateSpeechLongText(voiceIdForTTS);
|
|
1653
3089
|
testResults["create_speech_long_text"] = success;
|
|
1654
3090
|
|
|
3091
|
+
[success, result] = await testCreateSpeechLongSentenceNoPunctuation(
|
|
3092
|
+
voiceIdForTTS
|
|
3093
|
+
);
|
|
3094
|
+
testResults["create_speech_long_sentence_no_punctuation"] = success;
|
|
3095
|
+
|
|
3096
|
+
[success, result] = await testCreateSpeechJapaneseNoSpaces(voiceIdForTTS);
|
|
3097
|
+
testResults["create_speech_japanese_no_spaces"] = success;
|
|
3098
|
+
|
|
3099
|
+
// 6.5 Multilingual Punctuation Tests (fix/text_utils)
|
|
3100
|
+
console.log("\nš Multilingual Punctuation Chunking Tests");
|
|
3101
|
+
console.log("-".repeat(60));
|
|
3102
|
+
console.log("ā ļø These tests verify multilingual sentence punctuation support!");
|
|
3103
|
+
console.log("");
|
|
3104
|
+
|
|
3105
|
+
[success, result] = await testCreateSpeechArabicPunctuation(voiceIdForTTS);
|
|
3106
|
+
testResults["create_speech_arabic_punctuation"] = success;
|
|
3107
|
+
|
|
3108
|
+
[success, result] = await testCreateSpeechHindiPunctuation(voiceIdForTTS);
|
|
3109
|
+
testResults["create_speech_hindi_punctuation"] = success;
|
|
3110
|
+
|
|
3111
|
+
[success, result] = await testCreateSpeechEllipsisPunctuation(voiceIdForTTS);
|
|
3112
|
+
testResults["create_speech_ellipsis_punctuation"] = success;
|
|
3113
|
+
|
|
1655
3114
|
[success, result] = await testStreamSpeechLongText(voiceIdForTTS);
|
|
1656
3115
|
testResults["stream_speech_long_text"] = success;
|
|
1657
3116
|
|
|
@@ -1704,6 +3163,32 @@ async function main(): Promise<boolean> {
|
|
|
1704
3163
|
|
|
1705
3164
|
[success, result] = await testStreamSpeechLongTextMp3(voiceIdForTTS);
|
|
1706
3165
|
testResults["stream_speech_long_text_mp3"] = success;
|
|
3166
|
+
|
|
3167
|
+
// 10. Pronunciation Dictionary Tests
|
|
3168
|
+
console.log("\nš Pronunciation Dictionary Tests");
|
|
3169
|
+
console.log("-".repeat(60));
|
|
3170
|
+
console.log("ā ļø These tests consume credits!");
|
|
3171
|
+
console.log("");
|
|
3172
|
+
|
|
3173
|
+
[success, result] = await testCreateSpeechWithPronunciationDictionary(
|
|
3174
|
+
voiceIdForTTS
|
|
3175
|
+
);
|
|
3176
|
+
testResults["create_speech_pronunciation_dictionary"] = success;
|
|
3177
|
+
|
|
3178
|
+
[success, result] = await testCreateSpeechWithPronunciationDictionaryLongText(
|
|
3179
|
+
voiceIdForTTS
|
|
3180
|
+
);
|
|
3181
|
+
testResults["create_speech_pronunciation_dictionary_long_text"] = success;
|
|
3182
|
+
|
|
3183
|
+
[success, result] = await testStreamSpeechWithPronunciationDictionary(
|
|
3184
|
+
voiceIdForTTS
|
|
3185
|
+
);
|
|
3186
|
+
testResults["stream_speech_pronunciation_dictionary"] = success;
|
|
3187
|
+
|
|
3188
|
+
[success, result] = await testStreamSpeechWithPronunciationDictionaryLongText(
|
|
3189
|
+
voiceIdForTTS
|
|
3190
|
+
);
|
|
3191
|
+
testResults["stream_speech_pronunciation_dictionary_long_text"] = success;
|
|
1707
3192
|
}
|
|
1708
3193
|
|
|
1709
3194
|
// Results Summary
|
|
@@ -1761,6 +3246,12 @@ async function main(): Promise<boolean> {
|
|
|
1761
3246
|
" ⢠Text-to-Speech: predictDuration, createSpeech, streamSpeech"
|
|
1762
3247
|
);
|
|
1763
3248
|
console.log(" ⢠TTS Long Text: createSpeechLongText, streamSpeechLongText");
|
|
3249
|
+
console.log(
|
|
3250
|
+
" ⢠TTS Chunking Strategies: Word-based (no punctuation), Character-based (Japanese)"
|
|
3251
|
+
);
|
|
3252
|
+
console.log(
|
|
3253
|
+
" ⢠Multilingual Punctuation: Arabic (Ų Ų Ū), Hindi (ą„¤ ą„„), Ellipsis (⦠ā„)"
|
|
3254
|
+
);
|
|
1764
3255
|
console.log(
|
|
1765
3256
|
" ⢠TTS with Voice Settings: createSpeechWithVoiceSettings, predictDurationWithVoiceSettings, streamSpeechWithVoiceSettings"
|
|
1766
3257
|
);
|
|
@@ -1773,6 +3264,30 @@ async function main(): Promise<boolean> {
|
|
|
1773
3264
|
console.log(
|
|
1774
3265
|
" ⢠Custom Features: Auto-chunking in createSpeech/streamSpeech (transparent)"
|
|
1775
3266
|
);
|
|
3267
|
+
console.log(
|
|
3268
|
+
" ⢠Pronunciation Dictionary: createSpeech/streamSpeech with pronunciationDictionary option"
|
|
3269
|
+
);
|
|
3270
|
+
console.log(
|
|
3271
|
+
" - partial_match=false (word boundary) and partial_match=true (substring)"
|
|
3272
|
+
);
|
|
3273
|
+
console.log(
|
|
3274
|
+
" - Long text chunking after pronunciation expansion"
|
|
3275
|
+
);
|
|
3276
|
+
console.log("");
|
|
3277
|
+
console.log("š¤ New Model & Language Tests:");
|
|
3278
|
+
console.log(
|
|
3279
|
+
" ⢠New Models: sona_speech_2, supertonic_api_1 (createSpeech & predictDuration)"
|
|
3280
|
+
);
|
|
3281
|
+
console.log(
|
|
3282
|
+
" ⢠Unsupported Model Validation: Error handling for invalid model names"
|
|
3283
|
+
);
|
|
3284
|
+
console.log(" ⢠Multilingual per Model:");
|
|
3285
|
+
console.log(" - sona_speech_1: ko, en, ja");
|
|
3286
|
+
console.log(" - sona_speech_2: all 23 languages");
|
|
3287
|
+
console.log(" - supertonic_api_1: ko, en, ja, es, pt");
|
|
3288
|
+
console.log(
|
|
3289
|
+
" ⢠Unsupported Language Validation: Error handling for invalid model-language combinations"
|
|
3290
|
+
);
|
|
1776
3291
|
|
|
1777
3292
|
if (customVoiceId) {
|
|
1778
3293
|
console.log("");
|