@supertone/supertone 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +4 -4
  2. package/custom_test/realtime_tts_player.ts +120 -16
  3. package/custom_test/test_pronunciation_dictionary.ts +227 -0
  4. package/custom_test/test_real_api.ts +580 -0
  5. package/custom_test/test_text_utils_chunk_text_punctuation.ts +55 -0
  6. package/dist/commonjs/lib/config.d.ts +2 -2
  7. package/dist/commonjs/lib/config.d.ts.map +1 -1
  8. package/dist/commonjs/lib/config.js +2 -2
  9. package/dist/commonjs/lib/config.js.map +1 -1
  10. package/dist/commonjs/lib/custom_utils/index.d.ts +1 -0
  11. package/dist/commonjs/lib/custom_utils/index.d.ts.map +1 -1
  12. package/dist/commonjs/lib/custom_utils/index.js +5 -1
  13. package/dist/commonjs/lib/custom_utils/index.js.map +1 -1
  14. package/dist/commonjs/lib/custom_utils/pronunciation_utils.d.ts +24 -0
  15. package/dist/commonjs/lib/custom_utils/pronunciation_utils.d.ts.map +1 -0
  16. package/dist/commonjs/lib/custom_utils/pronunciation_utils.js +145 -0
  17. package/dist/commonjs/lib/custom_utils/pronunciation_utils.js.map +1 -0
  18. package/dist/commonjs/lib/custom_utils/text_utils.d.ts +1 -1
  19. package/dist/commonjs/lib/custom_utils/text_utils.d.ts.map +1 -1
  20. package/dist/commonjs/lib/custom_utils/text_utils.js +21 -4
  21. package/dist/commonjs/lib/custom_utils/text_utils.js.map +1 -1
  22. package/dist/commonjs/sdk/texttospeech.d.ts +17 -6
  23. package/dist/commonjs/sdk/texttospeech.d.ts.map +1 -1
  24. package/dist/commonjs/sdk/texttospeech.js +48 -25
  25. package/dist/commonjs/sdk/texttospeech.js.map +1 -1
  26. package/dist/esm/lib/config.d.ts +2 -2
  27. package/dist/esm/lib/config.d.ts.map +1 -1
  28. package/dist/esm/lib/config.js +2 -2
  29. package/dist/esm/lib/config.js.map +1 -1
  30. package/dist/esm/lib/custom_utils/index.d.ts +1 -0
  31. package/dist/esm/lib/custom_utils/index.d.ts.map +1 -1
  32. package/dist/esm/lib/custom_utils/index.js +2 -0
  33. package/dist/esm/lib/custom_utils/index.js.map +1 -1
  34. package/dist/esm/lib/custom_utils/pronunciation_utils.d.ts +24 -0
  35. package/dist/esm/lib/custom_utils/pronunciation_utils.d.ts.map +1 -0
  36. package/dist/esm/lib/custom_utils/pronunciation_utils.js +140 -0
  37. package/dist/esm/lib/custom_utils/pronunciation_utils.js.map +1 -0
  38. package/dist/esm/lib/custom_utils/text_utils.d.ts +1 -1
  39. package/dist/esm/lib/custom_utils/text_utils.d.ts.map +1 -1
  40. package/dist/esm/lib/custom_utils/text_utils.js +21 -4
  41. package/dist/esm/lib/custom_utils/text_utils.js.map +1 -1
  42. package/dist/esm/sdk/texttospeech.d.ts +17 -6
  43. package/dist/esm/sdk/texttospeech.d.ts.map +1 -1
  44. package/dist/esm/sdk/texttospeech.js +49 -26
  45. package/dist/esm/sdk/texttospeech.js.map +1 -1
  46. package/jsr.json +1 -1
  47. package/package.json +1 -1
  48. package/src/lib/config.ts +41 -41
  49. package/src/lib/custom_utils/index.ts +7 -0
  50. package/src/lib/custom_utils/pronunciation_utils.ts +193 -0
  51. package/src/lib/custom_utils/text_utils.ts +25 -4
  52. package/src/sdk/texttospeech.ts +99 -68
@@ -1084,6 +1084,235 @@ async function testCreateSpeechJapaneseNoSpaces(
1084
1084
  }
1085
1085
  }
1086
1086
 
1087
+ /**
1088
+ * Test TTS with Arabic text and Arabic punctuation marks (؟ ؛ ۔)
1089
+ * This tests multilingual sentence punctuation support added in fix/text_utils
1090
+ */
1091
+ async function testCreateSpeechArabicPunctuation(
1092
+ voiceId: string | null
1093
+ ): Promise<[boolean, any]> {
1094
+ console.log("🇸🇦 Arabic Text with Arabic Punctuation Test");
1095
+
1096
+ if (!voiceId) {
1097
+ console.log(" ⚠️ No voice ID available");
1098
+ return [false, null];
1099
+ }
1100
+
1101
+ try {
1102
+ const { Supertone } = await import("../src/index.js");
1103
+ const models = await import("../src/models/index.js");
1104
+ const client = new Supertone({ apiKey: API_KEY });
1105
+
1106
+ // Arabic text with Arabic punctuation marks (؟ ؛ ۔ ،)
1107
+ // Text length: ~350 characters (exceeds 300 char limit)
1108
+ const arabicText =
1109
+ "مرحبا بكم في اختبار تقنية تحويل النص إلى كلام؟ " +
1110
+ "هذا النظام يدعم اللغة العربية بشكل كامل؛ " +
1111
+ "يمكنه التعرف على علامات الترقيم العربية مثل علامة الاستفهام وعلامة الفاصلة المنقوطة۔ " +
1112
+ "تقنية الذكاء الاصطناعي تتطور بسرعة كبيرة، " +
1113
+ "والآن يمكننا تحويل النصوص الطويلة إلى كلام طبيعي؟ " +
1114
+ "هذا الاختبار يتحقق من أن النظام يقسم النص بشكل صحيح عند علامات الترقيم العربية؛ " +
1115
+ "نأمل أن يعمل كل شيء بشكل مثالي۔";
1116
+
1117
+ const actualLength = arabicText.length;
1118
+ console.log(
1119
+ ` 📏 Text length: ${actualLength} characters (Arabic with Arabic punctuation)`
1120
+ );
1121
+ console.log(` 🔧 Expected behavior: Sentence-based chunking with Arabic punctuation (؟ ؛ ۔)`);
1122
+ console.log(" ⚠️ This test consumes credits!");
1123
+
1124
+ if (actualLength <= 300) {
1125
+ console.log(` ❌ Text length ${actualLength} is <= 300, test may not trigger chunking`);
1126
+ }
1127
+
1128
+ const response = await client.textToSpeech.createSpeech({
1129
+ voiceId,
1130
+ apiConvertTextToSpeechUsingCharacterRequest: {
1131
+ text: arabicText,
1132
+ language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Ar,
1133
+ outputFormat:
1134
+ models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
1135
+ style: "neutral",
1136
+ model: models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech2,
1137
+ },
1138
+ });
1139
+
1140
+ if (response.result) {
1141
+ const audioData = await extractAudioData(response);
1142
+
1143
+ console.log(
1144
+ ` ✅ Arabic punctuation chunking TTS success: ${audioData.length} bytes`
1145
+ );
1146
+ console.log(` 🎯 Arabic text with Arabic punctuation processed correctly!`);
1147
+
1148
+ const outputFile = "test_arabic_punctuation_speech_output.wav";
1149
+ fs.writeFileSync(outputFile, audioData);
1150
+ console.log(` 💾 Audio saved: ${outputFile}`);
1151
+
1152
+ const estimatedChunks = Math.ceil(actualLength / 300);
1153
+ console.log(` 📊 Estimated chunks: ${estimatedChunks}`);
1154
+ }
1155
+
1156
+ return [true, response];
1157
+ } catch (e: any) {
1158
+ logDetailedError(e, "Arabic punctuation chunking");
1159
+ return [false, e];
1160
+ }
1161
+ }
1162
+
1163
+ /**
1164
+ * Test TTS with Hindi text and Devanagari punctuation marks (। ॥)
1165
+ * This tests multilingual sentence punctuation support added in fix/text_utils
1166
+ */
1167
+ async function testCreateSpeechHindiPunctuation(
1168
+ voiceId: string | null
1169
+ ): Promise<[boolean, any]> {
1170
+ console.log("🇮🇳 Hindi Text with Devanagari Punctuation Test");
1171
+
1172
+ if (!voiceId) {
1173
+ console.log(" ⚠️ No voice ID available");
1174
+ return [false, null];
1175
+ }
1176
+
1177
+ try {
1178
+ const { Supertone } = await import("../src/index.js");
1179
+ const models = await import("../src/models/index.js");
1180
+ const client = new Supertone({ apiKey: API_KEY });
1181
+
1182
+ // Hindi text with Devanagari punctuation marks (। ॥)
1183
+ // Text length: ~380 characters (exceeds 300 char limit)
1184
+ const hindiText =
1185
+ "नमस्ते और स्वागत है आपका इस परीक्षण में। " +
1186
+ "यह प्रणाली हिंदी भाषा का पूर्ण समर्थन करती है। " +
1187
+ "देवनागरी लिपि में पूर्ण विराम और दोहरा दंड जैसे विराम चिह्न होते हैं॥ " +
1188
+ "कृत्रिम बुद्धिमत्ता की तकनीक बहुत तेजी से विकसित हो रही है। " +
1189
+ "अब हम लंबे पाठों को स्वाभाविक वाणी में बदल सकते हैं। " +
1190
+ "यह परीक्षण जांचता है कि सिस्टम हिंदी विराम चिह्नों पर सही ढंग से पाठ को विभाजित करता है। " +
1191
+ "हमें आशा है कि सब कुछ ठीक से काम करेगा॥";
1192
+
1193
+ const actualLength = hindiText.length;
1194
+ console.log(
1195
+ ` 📏 Text length: ${actualLength} characters (Hindi with Devanagari punctuation)`
1196
+ );
1197
+ console.log(` 🔧 Expected behavior: Sentence-based chunking with Devanagari punctuation (। ॥)`);
1198
+ console.log(" ⚠️ This test consumes credits!");
1199
+
1200
+ if (actualLength <= 300) {
1201
+ console.log(` ❌ Text length ${actualLength} is <= 300, test may not trigger chunking`);
1202
+ }
1203
+
1204
+ const response = await client.textToSpeech.createSpeech({
1205
+ voiceId,
1206
+ apiConvertTextToSpeechUsingCharacterRequest: {
1207
+ text: hindiText,
1208
+ language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Hi,
1209
+ outputFormat:
1210
+ models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
1211
+ style: "neutral",
1212
+ model: models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech2,
1213
+ },
1214
+ });
1215
+
1216
+ if (response.result) {
1217
+ const audioData = await extractAudioData(response);
1218
+
1219
+ console.log(
1220
+ ` ✅ Hindi punctuation chunking TTS success: ${audioData.length} bytes`
1221
+ );
1222
+ console.log(` 🎯 Hindi text with Devanagari punctuation processed correctly!`);
1223
+
1224
+ const outputFile = "test_hindi_punctuation_speech_output.wav";
1225
+ fs.writeFileSync(outputFile, audioData);
1226
+ console.log(` 💾 Audio saved: ${outputFile}`);
1227
+
1228
+ const estimatedChunks = Math.ceil(actualLength / 300);
1229
+ console.log(` 📊 Estimated chunks: ${estimatedChunks}`);
1230
+ }
1231
+
1232
+ return [true, response];
1233
+ } catch (e: any) {
1234
+ logDetailedError(e, "Hindi punctuation chunking");
1235
+ return [false, e];
1236
+ }
1237
+ }
1238
+
1239
+ /**
1240
+ * Test TTS with ellipsis punctuation marks (… ‥)
1241
+ * This tests multilingual sentence punctuation support added in fix/text_utils
1242
+ */
1243
+ async function testCreateSpeechEllipsisPunctuation(
1244
+ voiceId: string | null
1245
+ ): Promise<[boolean, any]> {
1246
+ console.log("⏳ Text with Ellipsis Punctuation Test (… ‥)");
1247
+
1248
+ if (!voiceId) {
1249
+ console.log(" ⚠️ No voice ID available");
1250
+ return [false, null];
1251
+ }
1252
+
1253
+ try {
1254
+ const { Supertone } = await import("../src/index.js");
1255
+ const models = await import("../src/models/index.js");
1256
+ const client = new Supertone({ apiKey: API_KEY });
1257
+
1258
+ // Text with ellipsis punctuation marks (… ‥)
1259
+ // Text length: ~380 characters (exceeds 300 char limit)
1260
+ const ellipsisText =
1261
+ "Sometimes we need to pause and think… " +
1262
+ "The ellipsis character is used to indicate a trailing thought or a pause in speech… " +
1263
+ "This test verifies that the text chunking system correctly handles Unicode ellipsis characters‥ " +
1264
+ "There are actually multiple types of ellipsis in Unicode… " +
1265
+ "The horizontal ellipsis U+2026 and the two dot leader U+2025 are both supported‥ " +
1266
+ "When processing long texts the SDK should split at these punctuation marks… " +
1267
+ "This ensures natural pauses in the generated speech output‥ " +
1268
+ "Let us verify that everything works correctly…";
1269
+
1270
+ const actualLength = ellipsisText.length;
1271
+ console.log(
1272
+ ` 📏 Text length: ${actualLength} characters (with ellipsis punctuation)`
1273
+ );
1274
+ console.log(` 🔧 Expected behavior: Sentence-based chunking with ellipsis (… ‥)`);
1275
+ console.log(" ⚠️ This test consumes credits!");
1276
+
1277
+ if (actualLength <= 300) {
1278
+ console.log(` ❌ Text length ${actualLength} is <= 300, test may not trigger chunking`);
1279
+ }
1280
+
1281
+ const response = await client.textToSpeech.createSpeech({
1282
+ voiceId,
1283
+ apiConvertTextToSpeechUsingCharacterRequest: {
1284
+ text: ellipsisText,
1285
+ language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
1286
+ outputFormat:
1287
+ models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
1288
+ style: "neutral",
1289
+ model: models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech1,
1290
+ },
1291
+ });
1292
+
1293
+ if (response.result) {
1294
+ const audioData = await extractAudioData(response);
1295
+
1296
+ console.log(
1297
+ ` ✅ Ellipsis punctuation chunking TTS success: ${audioData.length} bytes`
1298
+ );
1299
+ console.log(` 🎯 Text with ellipsis punctuation processed correctly!`);
1300
+
1301
+ const outputFile = "test_ellipsis_punctuation_speech_output.wav";
1302
+ fs.writeFileSync(outputFile, audioData);
1303
+ console.log(` 💾 Audio saved: ${outputFile}`);
1304
+
1305
+ const estimatedChunks = Math.ceil(actualLength / 300);
1306
+ console.log(` 📊 Estimated chunks: ${estimatedChunks}`);
1307
+ }
1308
+
1309
+ return [true, response];
1310
+ } catch (e: any) {
1311
+ logDetailedError(e, "Ellipsis punctuation chunking");
1312
+ return [false, e];
1313
+ }
1314
+ }
1315
+
1087
1316
  /**
1088
1317
  * Test TTS streaming with long text
1089
1318
  */
@@ -2387,6 +2616,304 @@ async function testCreateSpeechWithChunking(
2387
2616
  }
2388
2617
  }
2389
2618
 
2619
+ // =============================================================================
2620
+ // Pronunciation Dictionary Tests
2621
+ // =============================================================================
2622
+
2623
+ /**
2624
+ * Test TTS with pronunciation dictionary (basic test with partial_match=true/false)
2625
+ */
2626
+ async function testCreateSpeechWithPronunciationDictionary(
2627
+ voiceId: string | null
2628
+ ): Promise<[boolean, any]> {
2629
+ console.log("📖 TTS with Pronunciation Dictionary Test");
2630
+
2631
+ if (!voiceId) {
2632
+ console.log(" ⚠️ No voice ID available");
2633
+ return [false, null];
2634
+ }
2635
+
2636
+ try {
2637
+ const { Supertone } = await import("../src/index.js");
2638
+ const models = await import("../src/models/index.js");
2639
+ const client = new Supertone({ apiKey: API_KEY });
2640
+
2641
+ // Test text with abbreviations and special terms
2642
+ const testText =
2643
+ "The CEO of OpenAI announced that GPT models are improving. Dr. Smith from MIT said AI research is accelerating.";
2644
+
2645
+ // Pronunciation dictionary with both partial_match=true and partial_match=false cases
2646
+ const pronunciationDictionary = [
2647
+ // partial_match=false: exact word boundary match
2648
+ { text: "CEO", pronunciation: "Chief Executive Officer", partial_match: false },
2649
+ { text: "MIT", pronunciation: "Massachusetts Institute of Technology", partial_match: false },
2650
+ { text: "AI", pronunciation: "Artificial Intelligence", partial_match: false },
2651
+ // partial_match=true: substring match (will match "OpenAI" -> "OpenArtificial Intelligence")
2652
+ { text: "GPT", pronunciation: "Generative Pre-trained Transformer", partial_match: true },
2653
+ { text: "Dr.", pronunciation: "Doctor", partial_match: true },
2654
+ ];
2655
+
2656
+ console.log(` 🔍 Original text: "${testText}"`);
2657
+ console.log(` 📖 Pronunciation dictionary entries: ${pronunciationDictionary.length}`);
2658
+ console.log(` - partial_match=false: CEO, MIT, AI (word boundary match)`);
2659
+ console.log(` - partial_match=true: GPT, Dr. (substring match)`);
2660
+ console.log(" ⚠️ This test consumes credits!");
2661
+
2662
+ const response = await client.textToSpeech.createSpeech(
2663
+ {
2664
+ voiceId,
2665
+ apiConvertTextToSpeechUsingCharacterRequest: {
2666
+ text: testText,
2667
+ language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
2668
+ outputFormat:
2669
+ models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
2670
+ style: "neutral",
2671
+ model: "sona_speech_1",
2672
+ },
2673
+ },
2674
+ {
2675
+ pronunciationDictionary,
2676
+ }
2677
+ );
2678
+
2679
+ console.log(` ✅ TTS with pronunciation dictionary success`);
2680
+
2681
+ if (response.result) {
2682
+ const audioData = await extractAudioData(response);
2683
+ const outputFile = "test_pronunciation_dictionary_output.wav";
2684
+ fs.writeFileSync(outputFile, audioData);
2685
+ console.log(` 💾 Audio saved: ${outputFile} (${audioData.length} bytes)`);
2686
+ }
2687
+
2688
+ return [true, response];
2689
+ } catch (e: any) {
2690
+ logDetailedError(e, "Pronunciation dictionary TTS");
2691
+ return [false, e];
2692
+ }
2693
+ }
2694
+
2695
+ /**
2696
+ * Test TTS with pronunciation dictionary causing text to exceed 300 chars (triggers chunking)
2697
+ */
2698
+ async function testCreateSpeechWithPronunciationDictionaryLongText(
2699
+ voiceId: string | null
2700
+ ): Promise<[boolean, any]> {
2701
+ console.log("📖 TTS with Pronunciation Dictionary + Long Text Chunking Test");
2702
+
2703
+ if (!voiceId) {
2704
+ console.log(" ⚠️ No voice ID available");
2705
+ return [false, null];
2706
+ }
2707
+
2708
+ try {
2709
+ const { Supertone } = await import("../src/index.js");
2710
+ const models = await import("../src/models/index.js");
2711
+ const client = new Supertone({ apiKey: API_KEY });
2712
+
2713
+ // Short original text (~200 chars) that will exceed 300 chars after pronunciation dictionary expansion
2714
+ const testText =
2715
+ "AI and ML are revolutionizing tech. The CEO of OpenAI discussed GPT advancements. " +
2716
+ "Dr. Kim from MIT explained how NLP and CV work together. AWS and GCP provide cloud AI services.";
2717
+
2718
+ // Pronunciation dictionary that expands abbreviations significantly
2719
+ const pronunciationDictionary = [
2720
+ // partial_match=false: exact word boundary matches
2721
+ { text: "AI", pronunciation: "Artificial Intelligence", partial_match: false },
2722
+ { text: "ML", pronunciation: "Machine Learning", partial_match: false },
2723
+ { text: "CEO", pronunciation: "Chief Executive Officer", partial_match: false },
2724
+ { text: "MIT", pronunciation: "Massachusetts Institute of Technology", partial_match: false },
2725
+ { text: "NLP", pronunciation: "Natural Language Processing", partial_match: false },
2726
+ { text: "CV", pronunciation: "Computer Vision", partial_match: false },
2727
+ { text: "AWS", pronunciation: "Amazon Web Services", partial_match: false },
2728
+ { text: "GCP", pronunciation: "Google Cloud Platform", partial_match: false },
2729
+ // partial_match=true: substring matches
2730
+ { text: "GPT", pronunciation: "Generative Pre-trained Transformer", partial_match: true },
2731
+ { text: "Dr.", pronunciation: "Doctor", partial_match: true },
2732
+ { text: "tech", pronunciation: "technology", partial_match: true },
2733
+ ];
2734
+
2735
+ const originalLength = testText.length;
2736
+
2737
+ console.log(` 🔍 Original text length: ${originalLength} characters (under 300)`);
2738
+ console.log(` 📖 Pronunciation dictionary entries: ${pronunciationDictionary.length}`);
2739
+ console.log(` - partial_match=false: AI, ML, CEO, MIT, NLP, CV, AWS, GCP`);
2740
+ console.log(` - partial_match=true: GPT, Dr., tech`);
2741
+ console.log(` 🔧 Expected: Text will expand to 300+ chars, triggering auto-chunking`);
2742
+ console.log(" ⚠️ This test consumes credits!");
2743
+
2744
+ const response = await client.textToSpeech.createSpeech(
2745
+ {
2746
+ voiceId,
2747
+ apiConvertTextToSpeechUsingCharacterRequest: {
2748
+ text: testText,
2749
+ language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
2750
+ outputFormat:
2751
+ models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
2752
+ style: "neutral",
2753
+ model: "sona_speech_1",
2754
+ },
2755
+ },
2756
+ {
2757
+ pronunciationDictionary,
2758
+ }
2759
+ );
2760
+
2761
+ console.log(` ✅ TTS with pronunciation dictionary + long text chunking success`);
2762
+ console.log(` 🎯 Auto-chunking was triggered after pronunciation expansion!`);
2763
+
2764
+ if (response.result) {
2765
+ const audioData = await extractAudioData(response);
2766
+ const outputFile = "test_pronunciation_dictionary_long_text_output.wav";
2767
+ fs.writeFileSync(outputFile, audioData);
2768
+ console.log(` 💾 Audio saved: ${outputFile} (${audioData.length} bytes)`);
2769
+ }
2770
+
2771
+ return [true, response];
2772
+ } catch (e: any) {
2773
+ logDetailedError(e, "Pronunciation dictionary long text TTS");
2774
+ return [false, e];
2775
+ }
2776
+ }
2777
+
2778
+ /**
2779
+ * Test TTS streaming with pronunciation dictionary
2780
+ */
2781
+ async function testStreamSpeechWithPronunciationDictionary(
2782
+ voiceId: string | null
2783
+ ): Promise<[boolean, any]> {
2784
+ console.log("📡 TTS Streaming with Pronunciation Dictionary Test");
2785
+
2786
+ if (!voiceId) {
2787
+ console.log(" ⚠️ No voice ID available");
2788
+ return [false, null];
2789
+ }
2790
+
2791
+ try {
2792
+ const { Supertone } = await import("../src/index.js");
2793
+ const models = await import("../src/models/index.js");
2794
+ const client = new Supertone({ apiKey: API_KEY });
2795
+
2796
+ const testText =
2797
+ "The API documentation explains how to use the SDK. " +
2798
+ "Dr. Lee from NASA discussed the new AI system.";
2799
+
2800
+ const pronunciationDictionary = [
2801
+ { text: "API", pronunciation: "Application Programming Interface", partial_match: false },
2802
+ { text: "SDK", pronunciation: "Software Development Kit", partial_match: false },
2803
+ { text: "NASA", pronunciation: "National Aeronautics and Space Administration", partial_match: false },
2804
+ { text: "AI", pronunciation: "Artificial Intelligence", partial_match: false },
2805
+ { text: "Dr.", pronunciation: "Doctor", partial_match: true },
2806
+ ];
2807
+
2808
+ console.log(` 🔍 Original text: "${testText}"`);
2809
+ console.log(` 📖 Pronunciation dictionary entries: ${pronunciationDictionary.length}`);
2810
+ console.log(" ⚠️ This test consumes credits!");
2811
+
2812
+ const response = await client.textToSpeech.streamSpeech(
2813
+ {
2814
+ voiceId,
2815
+ apiConvertTextToSpeechUsingCharacterRequest: {
2816
+ text: testText,
2817
+ language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
2818
+ outputFormat:
2819
+ models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
2820
+ },
2821
+ },
2822
+ {
2823
+ pronunciationDictionary,
2824
+ }
2825
+ );
2826
+
2827
+ console.log(` ✅ Stream with pronunciation dictionary started successfully`);
2828
+
2829
+ // Consume the stream and save to file
2830
+ if (response.result) {
2831
+ const audioData = await extractAudioData(response);
2832
+ const outputFile = "test_pronunciation_dictionary_stream_output.wav";
2833
+ fs.writeFileSync(outputFile, audioData);
2834
+ console.log(` 💾 Audio saved: ${outputFile} (${audioData.length} bytes)`);
2835
+ }
2836
+
2837
+ return [true, response];
2838
+ } catch (e: any) {
2839
+ logDetailedError(e, "Pronunciation dictionary streaming TTS");
2840
+ return [false, e];
2841
+ }
2842
+ }
2843
+
2844
+ /**
2845
+ * Test TTS streaming with pronunciation dictionary + long text (triggers chunking)
2846
+ */
2847
+ async function testStreamSpeechWithPronunciationDictionaryLongText(
2848
+ voiceId: string | null
2849
+ ): Promise<[boolean, any]> {
2850
+ console.log("📡 TTS Streaming with Pronunciation Dictionary + Long Text Test");
2851
+
2852
+ if (!voiceId) {
2853
+ console.log(" ⚠️ No voice ID available");
2854
+ return [false, null];
2855
+ }
2856
+
2857
+ try {
2858
+ const { Supertone } = await import("../src/index.js");
2859
+ const models = await import("../src/models/index.js");
2860
+ const client = new Supertone({ apiKey: API_KEY });
2861
+
2862
+ // Short text that will expand after pronunciation dictionary
2863
+ const testText =
2864
+ "AI is everywhere. ML powers many apps. The CEO spoke about GPT. " +
2865
+ "Dr. Smith from MIT and UCLA collaborated on NLP research. AWS and GCP offer AI services.";
2866
+
2867
+ const pronunciationDictionary = [
2868
+ { text: "AI", pronunciation: "Artificial Intelligence", partial_match: false },
2869
+ { text: "ML", pronunciation: "Machine Learning", partial_match: false },
2870
+ { text: "CEO", pronunciation: "Chief Executive Officer", partial_match: false },
2871
+ { text: "MIT", pronunciation: "Massachusetts Institute of Technology", partial_match: false },
2872
+ { text: "UCLA", pronunciation: "University of California Los Angeles", partial_match: false },
2873
+ { text: "NLP", pronunciation: "Natural Language Processing", partial_match: false },
2874
+ { text: "AWS", pronunciation: "Amazon Web Services", partial_match: false },
2875
+ { text: "GCP", pronunciation: "Google Cloud Platform", partial_match: false },
2876
+ { text: "GPT", pronunciation: "Generative Pre-trained Transformer", partial_match: true },
2877
+ { text: "Dr.", pronunciation: "Doctor", partial_match: true },
2878
+ ];
2879
+
2880
+ console.log(` 🔍 Original text length: ${testText.length} characters`);
2881
+ console.log(` 📖 Pronunciation dictionary entries: ${pronunciationDictionary.length}`);
2882
+ console.log(` 🔧 Expected: Text will expand to 300+ chars, triggering stream chunking`);
2883
+ console.log(" ⚠️ This test consumes credits!");
2884
+
2885
+ const response = await client.textToSpeech.streamSpeech(
2886
+ {
2887
+ voiceId,
2888
+ apiConvertTextToSpeechUsingCharacterRequest: {
2889
+ text: testText,
2890
+ language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
2891
+ outputFormat:
2892
+ models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
2893
+ },
2894
+ },
2895
+ {
2896
+ pronunciationDictionary,
2897
+ }
2898
+ );
2899
+
2900
+ console.log(` ✅ Stream with pronunciation dictionary + long text started successfully`);
2901
+ console.log(` 🎯 Stream chunking was triggered after pronunciation expansion!`);
2902
+
2903
+ if (response.result) {
2904
+ const audioData = await extractAudioData(response);
2905
+ const outputFile = "test_pronunciation_dictionary_stream_long_text_output.wav";
2906
+ fs.writeFileSync(outputFile, audioData);
2907
+ console.log(` 💾 Audio saved: ${outputFile} (${audioData.length} bytes)`);
2908
+ }
2909
+
2910
+ return [true, response];
2911
+ } catch (e: any) {
2912
+ logDetailedError(e, "Pronunciation dictionary streaming long text TTS");
2913
+ return [false, e];
2914
+ }
2915
+ }
2916
+
2390
2917
  /**
2391
2918
  * Main test execution
2392
2919
  */
@@ -2569,6 +3096,21 @@ async function main(): Promise<boolean> {
2569
3096
  [success, result] = await testCreateSpeechJapaneseNoSpaces(voiceIdForTTS);
2570
3097
  testResults["create_speech_japanese_no_spaces"] = success;
2571
3098
 
3099
+ // 6.5 Multilingual Punctuation Tests (fix/text_utils)
3100
+ console.log("\n🌍 Multilingual Punctuation Chunking Tests");
3101
+ console.log("-".repeat(60));
3102
+ console.log("⚠️ These tests verify multilingual sentence punctuation support!");
3103
+ console.log("");
3104
+
3105
+ [success, result] = await testCreateSpeechArabicPunctuation(voiceIdForTTS);
3106
+ testResults["create_speech_arabic_punctuation"] = success;
3107
+
3108
+ [success, result] = await testCreateSpeechHindiPunctuation(voiceIdForTTS);
3109
+ testResults["create_speech_hindi_punctuation"] = success;
3110
+
3111
+ [success, result] = await testCreateSpeechEllipsisPunctuation(voiceIdForTTS);
3112
+ testResults["create_speech_ellipsis_punctuation"] = success;
3113
+
2572
3114
  [success, result] = await testStreamSpeechLongText(voiceIdForTTS);
2573
3115
  testResults["stream_speech_long_text"] = success;
2574
3116
 
@@ -2621,6 +3163,32 @@ async function main(): Promise<boolean> {
2621
3163
 
2622
3164
  [success, result] = await testStreamSpeechLongTextMp3(voiceIdForTTS);
2623
3165
  testResults["stream_speech_long_text_mp3"] = success;
3166
+
3167
+ // 10. Pronunciation Dictionary Tests
3168
+ console.log("\n📖 Pronunciation Dictionary Tests");
3169
+ console.log("-".repeat(60));
3170
+ console.log("⚠️ These tests consume credits!");
3171
+ console.log("");
3172
+
3173
+ [success, result] = await testCreateSpeechWithPronunciationDictionary(
3174
+ voiceIdForTTS
3175
+ );
3176
+ testResults["create_speech_pronunciation_dictionary"] = success;
3177
+
3178
+ [success, result] = await testCreateSpeechWithPronunciationDictionaryLongText(
3179
+ voiceIdForTTS
3180
+ );
3181
+ testResults["create_speech_pronunciation_dictionary_long_text"] = success;
3182
+
3183
+ [success, result] = await testStreamSpeechWithPronunciationDictionary(
3184
+ voiceIdForTTS
3185
+ );
3186
+ testResults["stream_speech_pronunciation_dictionary"] = success;
3187
+
3188
+ [success, result] = await testStreamSpeechWithPronunciationDictionaryLongText(
3189
+ voiceIdForTTS
3190
+ );
3191
+ testResults["stream_speech_pronunciation_dictionary_long_text"] = success;
2624
3192
  }
2625
3193
 
2626
3194
  // Results Summary
@@ -2681,6 +3249,9 @@ async function main(): Promise<boolean> {
2681
3249
  console.log(
2682
3250
  " • TTS Chunking Strategies: Word-based (no punctuation), Character-based (Japanese)"
2683
3251
  );
3252
+ console.log(
3253
+ " • Multilingual Punctuation: Arabic (؟ ؛ ۔), Hindi (। ॥), Ellipsis (… ‥)"
3254
+ );
2684
3255
  console.log(
2685
3256
  " • TTS with Voice Settings: createSpeechWithVoiceSettings, predictDurationWithVoiceSettings, streamSpeechWithVoiceSettings"
2686
3257
  );
@@ -2693,6 +3264,15 @@ async function main(): Promise<boolean> {
2693
3264
  console.log(
2694
3265
  " • Custom Features: Auto-chunking in createSpeech/streamSpeech (transparent)"
2695
3266
  );
3267
+ console.log(
3268
+ " • Pronunciation Dictionary: createSpeech/streamSpeech with pronunciationDictionary option"
3269
+ );
3270
+ console.log(
3271
+ " - partial_match=false (word boundary) and partial_match=true (substring)"
3272
+ );
3273
+ console.log(
3274
+ " - Long text chunking after pronunciation expansion"
3275
+ );
2696
3276
  console.log("");
2697
3277
  console.log("🤖 New Model & Language Tests:");
2698
3278
  console.log(
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Smoke test for multilingual sentence punctuation splitting in chunkText().
4
+ *
5
+ * Run:
6
+ * npx ts-node custom_test/test_text_utils_chunk_text_punctuation.ts
7
+ * # or after build:
8
+ * node dist/custom_test/test_text_utils_chunk_text_punctuation.js
9
+ */
10
+
11
+ import { chunkText } from "../src/lib/custom_utils/text_utils.js";
12
+
13
+ function assertSplits(
14
+ text: string,
15
+ expectedChunks: string[],
16
+ maxLength: number
17
+ ): void {
18
+ const got = chunkText(text, maxLength);
19
+ const passed = JSON.stringify(got) === JSON.stringify(expectedChunks);
20
+
21
+ if (!passed) {
22
+ throw new Error(
23
+ `\ntext=${JSON.stringify(text)}\nexpected=${JSON.stringify(expectedChunks)}\ngot=${JSON.stringify(got)}`
24
+ );
25
+ }
26
+ }
27
+
28
+ function main(): void {
29
+ // English / many EU languages
30
+ assertSplits("Hello. World!", ["Hello. ", "World!"], 8);
31
+
32
+ // Korean (mostly ASCII punctuation in practice, plus ellipsis)
33
+ assertSplits("안...반가… 네.", ["안...", "반가… ", "네."], 4);
34
+
35
+ // Japanese
36
+ assertSplits(
37
+ "こんにちは。元気ですか?はい!",
38
+ ["こんにちは。", "元気ですか?", "はい!"],
39
+ 6
40
+ );
41
+
42
+ // Arabic (short samples to avoid max_length merge issues)
43
+ assertSplits("مر؟ نعم۔", ["مر؟ ", "نعم۔"], 5);
44
+
45
+ // Hindi
46
+ assertSplits("हाँ। नहीं॥", ["हाँ। ", "नहीं॥"], 6);
47
+
48
+ // Greek question mark (U+037E)
49
+ assertSplits("Γεια;Καλά.", ["Γεια;", "Καλά."], 5);
50
+
51
+ console.log("OK: chunkText punctuation smoke test passed");
52
+ }
53
+
54
+ main();
55
+
@@ -31,8 +31,8 @@ export declare function serverURLFromOptions(options: SDKOptions): URL | null;
31
31
  export declare const SDK_METADATA: {
32
32
  readonly language: "typescript";
33
33
  readonly openapiDocVersion: "0.8.69";
34
- readonly sdkVersion: "0.1.2";
34
+ readonly sdkVersion: "0.1.4";
35
35
  readonly genVersion: "2.686.7";
36
- readonly userAgent: "speakeasy-sdk/typescript 0.1.2 2.686.7 0.8.69 @supertone/supertone";
36
+ readonly userAgent: "speakeasy-sdk/typescript 0.1.4 2.686.7 0.8.69 @supertone/supertone";
37
37
  };
38
38
  //# sourceMappingURL=config.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../../src/lib/config.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAG3C;;GAEG;AACH,eAAO,MAAM,UAAU,uCAKb,CAAC;AAEX,MAAM,MAAM,UAAU,GAAG;IACvB,MAAM,CAAC,EAAE,MAAM,GAAG,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,SAAS,CAAC;IAEtD,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB,CAAC;AAEF,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,UAAU,GAAG,GAAG,GAAG,IAAI,CAepE;AAED,eAAO,MAAM,YAAY;;;;;;CAOf,CAAC"}
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../../src/lib/config.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,UAAU,EAAE,MAAM,WAAW,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAG3C;;GAEG;AACH,eAAO,MAAM,UAAU,uCAKb,CAAC;AAEX,MAAM,MAAM,UAAU,GAAG;IACxB,MAAM,CAAC,EAAE,MAAM,GAAG,CAAC,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,SAAS,CAAC;IAEtD,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC/B;;OAEG;IACH,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACrB,CAAC;AAEF,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,UAAU,GAAG,GAAG,GAAG,IAAI,CAepE;AAED,eAAO,MAAM,YAAY;;;;;;CAOf,CAAC"}