npm - @supertone/supertone - Versions diffs - 0.1.0 → 0.1.2 - Mend

@supertone/supertone 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/custom_test/test_real_api.ts CHANGED Viewed

@@ -127,19 +127,25 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
 		console.log(`  🔍 Debug - has audioBase64: ${"audioBase64" in result}`);
 		console.log(`  🔍 Debug - has getReader: ${"getReader" in result}`);
 	}
 	// Check for capital-case Result (SDK internal structure)
-	if (!result || (typeof result === "object" && Object.keys(result).length === 0)) {
+	if (
+		!result ||
+		(typeof result === "object" && Object.keys(result).length === 0)
+	) {
 		console.log(`  💡 Checking SDK internal Result field...`);
 		if ((response as any).Result) {
 			result = (response as any).Result;
 			console.log(`  ✅ Found Result (capital R) - using that instead`);
 		}
 	}
 	// Debug response headers
 	if (response.headers) {
-		console.log(`  🔍 Debug - response headers:`, JSON.stringify(response.headers, null, 2));
+		console.log(
+			`  🔍 Debug - response headers:`,
+			JSON.stringify(response.headers, null, 2)
+		);
 	}
 	if (result instanceof Uint8Array) {
@@ -198,7 +204,7 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
 			return bytes;
 		}
 	}
 	// Handle empty object case - this might happen when the SDK doesn't properly parse audio responses
 	if (
 		typeof result === "object" &&
@@ -207,22 +213,25 @@ async function extractAudioData(response: any): Promise<Uint8Array> {
 	) {
 		console.log(`  ⚠️  Warning: Empty result object detected`);
 		console.log(`  💡 This might be a parsing issue with the SDK`);
-		console.log(`  💡 Check if the response was actually a stream but got parsed as an empty object`);
+		console.log(
+			`  💡 Check if the response was actually a stream but got parsed as an empty object`
+		);
 		throw new Error(
 			`Empty result object - SDK may have failed to parse audio stream response. ` +
-			`This usually happens when audio/* content-type responses are not properly handled.`
+				`This usually happens when audio/* content-type responses are not properly handled.`
 		);
 	}
 	// Enhanced error message with debug info
-	const errorDetails = typeof result === "object" && result !== null
-		? `constructor: ${result.constructor.name}, keys: [${Object.keys(result).join(", ")}]`
-		: `value: ${result}`;
-	throw new Error(
-		`Unsupported result type: ${typeof result}, ${errorDetails}`
-	);
+	const errorDetails =
+		typeof result === "object" && result !== null
+			? `constructor: ${result.constructor.name}, keys: [${Object.keys(
+					result
+			  ).join(", ")}]`
+			: `value: ${result}`;
+	throw new Error(`Unsupported result type: ${typeof result}, ${errorDetails}`);
 }
 /**
@@ -927,6 +936,154 @@ async function testCreateSpeechLongText(
 	}
 }
+/**
+ * Test TTS with long text WITHOUT punctuation (word-based chunking)
+ * This tests the word-based splitting fallback when sentences exceed 300 chars
+ */
+async function testCreateSpeechLongSentenceNoPunctuation(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log(
+		"📜 Long Sentence WITHOUT Punctuation Test (Word-based chunking)"
+	);
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		// Long text without punctuation - forces word-based splitting
+		// This is a single continuous sentence with no periods or other punctuation marks
+		const longSentenceNoPunctuation =
+			"This is a very long sentence without any punctuation marks that is designed to test the word based chunking feature of the SDK when a sentence exceeds the maximum character limit of three hundred characters the system should automatically split this text by word boundaries rather than sentence boundaries to ensure proper processing and this behavior is critical for handling user generated content that may not follow standard punctuation conventions such as chat messages or informal text inputs that users commonly provide in real world applications where grammatically correct sentences are not always guaranteed";
+		const actualLength = longSentenceNoPunctuation.length;
+		console.log(
+			`  📏 Text length: ${actualLength} characters (single sentence, no punctuation)`
+		);
+		console.log(`  🔧 Expected behavior: Word-based chunking`);
+		console.log("  ⚠️  This test consumes credits!");
+		const response = await client.textToSpeech.createSpeech({
+			voiceId,
+			apiConvertTextToSpeechUsingCharacterRequest: {
+				text: longSentenceNoPunctuation,
+				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
+				outputFormat:
+					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
+				style: "neutral",
+				model: "sona_speech_1",
+			},
+		});
+		if (response.result) {
+			const audioData = await extractAudioData(response);
+			console.log(
+				`  ✅ Word-based chunking TTS success: ${audioData.length} bytes`
+			);
+			console.log(
+				`  🎯 Long sentence without punctuation processed correctly!`
+			);
+			const outputFile = "test_word_chunking_speech_output.wav";
+			fs.writeFileSync(outputFile, audioData);
+			console.log(`  💾 Audio saved: ${outputFile}`);
+			const estimatedChunks = Math.ceil(actualLength / 300);
+			console.log(`  📊 Estimated chunks: ${estimatedChunks}`);
+		}
+		return [true, response];
+	} catch (e: any) {
+		logDetailedError(e, "Long sentence word-based chunking");
+		return [false, e];
+	}
+}
+/**
+ * Test TTS with Japanese text (character-based chunking)
+ * Japanese doesn't use spaces, AND this test uses NO punctuation marks (。！？etc)
+ * to ensure the SDK uses character-based splitting
+ */
+async function testCreateSpeechJapaneseNoSpaces(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("🇯🇵 Japanese Text Test (Character-based chunking)");
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		// Long Japanese text WITHOUT spaces AND WITHOUT punctuation - forces character-based splitting
+		// This text intentionally has NO punctuation marks (。！？etc) to test pure character-based chunking
+		// Text length: ~450 characters (exceeds 300 char limit)
+		const longJapaneseText =
+			"日本語のテキストは通常スペースを含まないため特別な処理が必要です" +
+			"このテストは三百文字を超える長い日本語テキストが正しく処理されることを確認します" +
+			"自然言語処理技術の発展により音声合成の品質は大幅に向上しました" +
+			"特にディープラーニングを活用した最新のテキスト音声変換システムは人間の発話に非常に近い自然な音声を生成できます" +
+			"スペースがない言語では文字単位での分割が必要でありこのSDKはそのような状況を自動的に検出して適切に処理します" +
+			"これにより日本語中国語韓国語などのアジア言語でも問題なく長いテキストを音声に変換することができます" +
+			"音声合成技術は視覚障害者のためのアクセシビリティツールから対話型AIアシスタントまで幅広い用途で活用されています" +
+			"さらにリアルタイムストリーミング技術と組み合わせることで待ち時間を大幅に短縮し優れたユーザー体験を提供することができます" +
+			"最新の音声合成技術は感情や抑揚も自然に表現できるようになりました";
+		const actualLength = longJapaneseText.length;
+		console.log(
+			`  📏 Text length: ${actualLength} characters (Japanese, no spaces, no punctuation)`
+		);
+		console.log(
+			`  🔧 Expected behavior: Character-based chunking (300 chars per chunk)`
+		);
+		console.log("  ⚠️  This test consumes credits!");
+		const response = await client.textToSpeech.createSpeech({
+			voiceId,
+			apiConvertTextToSpeechUsingCharacterRequest: {
+				text: longJapaneseText,
+				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Ja,
+				outputFormat:
+					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
+				style: "neutral",
+				model: "sona_speech_1",
+			},
+		});
+		if (response.result) {
+			const audioData = await extractAudioData(response);
+			console.log(
+				`  ✅ Character-based chunking TTS success: ${audioData.length} bytes`
+			);
+			console.log(`  🎯 Japanese text without spaces processed correctly!`);
+			const outputFile = "test_japanese_char_chunking_speech_output.wav";
+			fs.writeFileSync(outputFile, audioData);
+			console.log(`  💾 Audio saved: ${outputFile}`);
+			const estimatedChunks = Math.ceil(actualLength / 300);
+			console.log(`  📊 Estimated chunks: ${estimatedChunks}`);
+		}
+		return [true, response];
+	} catch (e: any) {
+		logDetailedError(e, "Japanese character-based chunking");
+		return [false, e];
+	}
+}
 /**
  * Test TTS streaming with long text
  */
@@ -1168,13 +1325,53 @@ async function testStreamSpeechWithPhonemes(
 	}
 }
+// =============================================================================
+// Model & Language Compatibility Tests
+// =============================================================================
 /**
- * Test duration prediction with voice settings
+ * Model-Language compatibility matrix
+ * - sona_speech_1: ko, en, ja
+ * - sona_speech_2: all languages (23 languages)
+ * - supertonic_api_1: ko, en, ja, es, pt
  */
-async function testPredictDurationWithVoiceSettings(
+const MODEL_LANGUAGE_MATRIX = {
+	sona_speech_1: ["ko", "en", "ja"],
+	sona_speech_2: [
+		"en",
+		"ko",
+		"ja",
+		"bg",
+		"cs",
+		"da",
+		"el",
+		"es",
+		"et",
+		"fi",
+		"hu",
+		"it",
+		"nl",
+		"pl",
+		"pt",
+		"ro",
+		"ar",
+		"de",
+		"fr",
+		"hi",
+		"id",
+		"ru",
+		"vi",
+	],
+	supertonic_api_1: ["ko", "en", "ja", "es", "pt"],
+} as const;
+/**
+ * Test TTS with sona_speech_2 model
+ */
+async function testCreateSpeechWithSonaSpeech2(
 	voiceId: string | null
 ): Promise<[boolean, any]> {
-	console.log("⏱️  Duration Prediction with Voice Settings Test");
+	console.log("🤖 TTS with sona_speech_2 Model Test");
 	if (!voiceId) {
 		console.log("  ⚠️  No voice ID available");
@@ -1186,40 +1383,50 @@ async function testPredictDurationWithVoiceSettings(
 		const models = await import("../src/models/index.js");
 		const client = new Supertone({ apiKey: API_KEY });
-		const voiceSettings = {
-			speed: 0.8,
-		};
-		console.log(
-			`  🔍 Predicting duration with voice settings for voice '${voiceId}'...`
-		);
-		console.log(`     Settings: speed=${voiceSettings.speed}`);
+		const testText =
+			"Hello! Testing sona_speech_2 model for text-to-speech conversion.";
+		console.log(`  🔍 Creating speech with sona_speech_2 model`);
+		console.log(`     Voice ID: ${voiceId}`);
+		console.log(`     Model: sona_speech_2`);
+		console.log("  ⚠️  This test consumes credits!");
-		const response = await client.textToSpeech.predictDuration({
+		const response = await client.textToSpeech.createSpeech({
 			voiceId,
-			predictTTSDurationUsingCharacterRequest: {
-				text: "This is a duration test with adjusted speed.",
-				language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
-				voiceSettings,
+			apiConvertTextToSpeechUsingCharacterRequest: {
+				text: testText,
+				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
+				outputFormat:
+					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
+				model:
+					models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech2,
 			},
 		});
-		console.log(`  ✅ Predicted duration: ${response.duration}s`);
+		console.log(`  ✅ sona_speech_2 TTS success`);
+		if (response.result) {
+			const audioData = await extractAudioData(response);
+			const outputFile = "test_sona_speech_2_output.wav";
+			fs.writeFileSync(outputFile, audioData);
+			console.log(
+				`  💾 Audio saved: ${outputFile} (${audioData.length} bytes)`
+			);
+		}
 		return [true, response];
 	} catch (e: any) {
-		console.error(`  ❌ Error: ${e.message || e}`);
+		logDetailedError(e, "sona_speech_2 TTS");
 		return [false, e];
 	}
 }
 /**
- * Test TTS streaming with voice settings
+ * Test TTS with supertonic_api_1 model
  */
-async function testStreamSpeechWithVoiceSettings(
+async function testCreateSpeechWithSupertonicApi1(
 	voiceId: string | null
 ): Promise<[boolean, any]> {
-	console.log("📡 TTS Streaming with Voice Settings Test");
+	console.log("🤖 TTS with supertonic_api_1 Model Test");
 	if (!voiceId) {
 		console.log("  ⚠️  No voice ID available");
@@ -1231,46 +1438,51 @@ async function testStreamSpeechWithVoiceSettings(
 		const models = await import("../src/models/index.js");
 		const client = new Supertone({ apiKey: API_KEY });
-		const voiceSettings = {
-			pitchShift: 1.05,
-			speed: 1.1,
-		};
-		console.log(
-			`  🔍 Streaming speech with voice settings for voice '${voiceId}'...`
-		);
-		console.log(
-			`     Settings: pitchShift=${voiceSettings.pitchShift}, speed=${voiceSettings.speed}`
-		);
+		const testText =
+			"Hello! Testing supertonic_api_1 model for text-to-speech conversion.";
+		console.log(`  🔍 Creating speech with supertonic_api_1 model`);
+		console.log(`     Voice ID: ${voiceId}`);
+		console.log(`     Model: supertonic_api_1`);
 		console.log("  ⚠️  This test consumes credits!");
-		const response = await client.textToSpeech.streamSpeech({
+		const response = await client.textToSpeech.createSpeech({
 			voiceId,
 			apiConvertTextToSpeechUsingCharacterRequest: {
-				text: "Streaming with adjusted voice settings.",
+				text: testText,
 				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
 				outputFormat:
 					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
-				voiceSettings,
+				model:
+					models.APIConvertTextToSpeechUsingCharacterRequestModel
+						.SupertonicApi1,
 			},
 		});
-		console.log(`  ✅ Stream with voice settings started successfully`);
+		console.log(`  ✅ supertonic_api_1 TTS success`);
+		if (response.result) {
+			const audioData = await extractAudioData(response);
+			const outputFile = "test_supertonic_api_1_output.wav";
+			fs.writeFileSync(outputFile, audioData);
+			console.log(
+				`  💾 Audio saved: ${outputFile} (${audioData.length} bytes)`
+			);
+		}
 		return [true, response];
 	} catch (e: any) {
-		console.error(`  ❌ Error: ${e.message || e}`);
+		logDetailedError(e, "supertonic_api_1 TTS");
 		return [false, e];
 	}
 }
 /**
- * Test MP3 format TTS
+ * Test TTS with unsupported model (should fail with validation error)
  */
-async function testCreateSpeechMp3(
+async function testCreateSpeechWithUnsupportedModel(
 	voiceId: string | null
 ): Promise<[boolean, any]> {
-	console.log("🎤 MP3 Format TTS Test");
+	console.log("🚫 TTS with Unsupported Model Test (Expected to Fail)");
 	if (!voiceId) {
 		console.log("  ⚠️  No voice ID available");
@@ -1282,62 +1494,82 @@ async function testCreateSpeechMp3(
 		const models = await import("../src/models/index.js");
 		const client = new Supertone({ apiKey: API_KEY });
-		console.log(`  🔍 MP3 TTS conversion with voice '${voiceId}'...`);
-		console.log("  ⚠️  This test consumes credits!");
+		const testText = "This should fail with unsupported model.";
+		console.log(
+			`  🔍 Attempting TTS with unsupported model: 'invalid_model_xyz'`
+		);
+		// Using type assertion to bypass TypeScript validation for testing
 		const response = await client.textToSpeech.createSpeech({
 			voiceId,
 			apiConvertTextToSpeechUsingCharacterRequest: {
-				text: "Hello! This is an MP3 format SDK test. Let's verify if it works correctly.",
+				text: testText,
 				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
 				outputFormat:
-					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Mp3,
-				style: "neutral",
-				model: "sona_speech_1",
+					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
+				model: "invalid_model_xyz" as any, // Intentionally invalid model
 			},
 		});
-		console.log(`  ✅ MP3 TTS conversion success`);
+		// If we reach here, the test failed (should have thrown an error)
+		console.log(`  ❌ Expected error but got success - this is unexpected!`);
+		return [false, response];
+	} catch (e: any) {
+		// Expected to fail - this is the success case for this test
+		console.log(`  ✅ Correctly rejected unsupported model`);
+		console.log(`  📋 Error type: ${e.constructor?.name || typeof e}`);
+		console.log(`  📋 Error message: ${e.message?.substring(0, 100) || e}`);
+		return [true, e];
+	}
+}
-		if (response.result) {
-			const outputFile = "test_create_speech_output.mp3";
-			const audioData = await extractAudioData(response);
+/**
+ * Test prediction with sona_speech_2 model
+ */
+async function testPredictDurationWithSonaSpeech2(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("⏱️  Duration Prediction with sona_speech_2 Model Test");
-			fs.writeFileSync(outputFile, audioData);
-			console.log(`  💾 MP3 audio file saved: ${outputFile}`);
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
-			// Verify MP3 header
-			const header = audioData.slice(0, 10);
-			if (header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) {
-				console.log(`  ✅ Valid MP3 file generated (ID3 tag)`);
-			} else if (
-				(header[0] === 0xff && header[1] === 0xfb) ||
-				(header[0] === 0xff && header[1] === 0xfa)
-			) {
-				console.log(`  ✅ Valid MP3 file generated (MPEG frame)`);
-			} else {
-				console.log(
-					`  📄 MP3 header: ${Array.from(header.slice(0, 10))
-						.map((b) => b.toString(16).padStart(2, "0"))
-						.join(" ")} (needs verification)`
-				);
-			}
-		}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		const testText = "Testing duration prediction with sona_speech_2 model.";
+		console.log(`  🔍 Predicting duration with sona_speech_2 model`);
+		const response = await client.textToSpeech.predictDuration({
+			voiceId,
+			predictTTSDurationUsingCharacterRequest: {
+				text: testText,
+				language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
+				model: models.PredictTTSDurationUsingCharacterRequestModel.SonaSpeech2,
+			},
+		});
+		console.log(
+			`  ✅ sona_speech_2 duration prediction: ${response.duration}s`
+		);
 		return [true, response];
 	} catch (e: any) {
-		console.error(`  ❌ Error: ${e.message || e}`);
+		logDetailedError(e, "sona_speech_2 duration prediction");
 		return [false, e];
 	}
 }
 /**
- * Test MP3 format with long text
+ * Test prediction with supertonic_api_1 model
  */
-async function testCreateSpeechLongTextMp3(
+async function testPredictDurationWithSupertonicApi1(
 	voiceId: string | null
 ): Promise<[boolean, any]> {
-	console.log("📜 Long Text MP3 Auto-Chunking TTS Test (300+ chars)");
+	console.log("⏱️  Duration Prediction with supertonic_api_1 Model Test");
 	if (!voiceId) {
 		console.log("  ⚠️  No voice ID available");
@@ -1349,20 +1581,639 @@ async function testCreateSpeechLongTextMp3(
 		const models = await import("../src/models/index.js");
 		const client = new Supertone({ apiKey: API_KEY });
-		const longText = `
-		Hello! This is a very long text MP3 auto-chunking TTS test exceeding 300 characters.
-		The newly implemented SDK automatically divides long text into multiple chunks for processing.
-		Real-time streaming text-to-speech technology plays a crucial role in modern AI applications.
-		It is an indispensable technology especially in conversational services, live broadcasting, and real-time translation services.
-		Through the auto-chunking feature, long texts are naturally divided into multiple small segments for processing.
-		Each segment is intelligently segmented considering sentence and word boundaries, enabling natural speech generation.
-		Now users don't need to worry about text length or output format, as the SDK automatically handles everything in MP3 format too.
-		`.trim();
+		const testText = "Testing duration prediction with supertonic_api_1 model.";
+		console.log(`  🔍 Predicting duration with supertonic_api_1 model`);
-		const actualLength = longText.length;
-		console.log(
-			`  📏 Test text length: ${actualLength} characters (exceeds 300)`
-		);
+		const response = await client.textToSpeech.predictDuration({
+			voiceId,
+			predictTTSDurationUsingCharacterRequest: {
+				text: testText,
+				language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
+				model:
+					models.PredictTTSDurationUsingCharacterRequestModel.SupertonicApi1,
+			},
+		});
+		console.log(
+			`  ✅ supertonic_api_1 duration prediction: ${response.duration}s`
+		);
+		return [true, response];
+	} catch (e: any) {
+		logDetailedError(e, "supertonic_api_1 duration prediction");
+		return [false, e];
+	}
+}
+/**
+ * Test prediction with unsupported model (should fail with validation error)
+ */
+async function testPredictDurationWithUnsupportedModel(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log(
+		"🚫 Duration Prediction with Unsupported Model Test (Expected to Fail)"
+	);
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		const testText = "This should fail with unsupported model.";
+		console.log(
+			`  🔍 Attempting prediction with unsupported model: 'invalid_model_xyz'`
+		);
+		const response = await client.textToSpeech.predictDuration({
+			voiceId,
+			predictTTSDurationUsingCharacterRequest: {
+				text: testText,
+				language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
+				model: "invalid_model_xyz" as any, // Intentionally invalid model
+			},
+		});
+		console.log(`  ❌ Expected error but got success - this is unexpected!`);
+		return [false, response];
+	} catch (e: any) {
+		console.log(`  ✅ Correctly rejected unsupported model`);
+		console.log(`  📋 Error type: ${e.constructor?.name || typeof e}`);
+		console.log(`  📋 Error message: ${e.message?.substring(0, 100) || e}`);
+		return [true, e];
+	}
+}
+// =============================================================================
+// Multilingual Tests per Model
+// =============================================================================
+/**
+ * Test TTS multilingual support with sona_speech_1 (supports: ko, en, ja)
+ */
+async function testMultilingualSonaSpeech1(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("🌍 Multilingual Test - sona_speech_1 (ko, en, ja)");
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	const testCases = [
+		{
+			lang: "ko" as const,
+			text: "안녕하세요, 소나 스피치 원 모델입니다.",
+			label: "Korean",
+		},
+		{
+			lang: "en" as const,
+			text: "Hello, this is sona_speech_1 model.",
+			label: "English",
+		},
+		{
+			lang: "ja" as const,
+			text: "こんにちは、ソナスピーチワンモデルです。",
+			label: "Japanese",
+		},
+	];
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		let allPassed = true;
+		const results: any[] = [];
+		for (const tc of testCases) {
+			console.log(`  🔍 Testing ${tc.label} (${tc.lang})...`);
+			try {
+				const langEnum =
+					models.APIConvertTextToSpeechUsingCharacterRequestLanguage[
+						(tc.lang.charAt(0).toUpperCase() +
+							tc.lang.slice(
+								1
+							)) as keyof typeof models.APIConvertTextToSpeechUsingCharacterRequestLanguage
+					];
+				const response = await client.textToSpeech.createSpeech({
+					voiceId,
+					apiConvertTextToSpeechUsingCharacterRequest: {
+						text: tc.text,
+						language: langEnum,
+						outputFormat:
+							models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
+								.Wav,
+						model:
+							models.APIConvertTextToSpeechUsingCharacterRequestModel
+								.SonaSpeech1,
+					},
+				});
+				console.log(`     ✅ ${tc.label} success`);
+				results.push({ lang: tc.lang, success: true });
+			} catch (e: any) {
+				console.log(
+					`     ❌ ${tc.label} failed: ${e.message?.substring(0, 50)}`
+				);
+				results.push({ lang: tc.lang, success: false, error: e.message });
+				allPassed = false;
+			}
+		}
+		console.log(
+			`  📊 Result: ${results.filter((r) => r.success).length}/${
+				testCases.length
+			} languages passed`
+		);
+		return [allPassed, results];
+	} catch (e: any) {
+		logDetailedError(e, "sona_speech_1 multilingual");
+		return [false, e];
+	}
+}
+/**
+ * Test TTS multilingual support with sona_speech_2 (supports all languages)
+ */
+async function testMultilingualSonaSpeech2(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("🌍 Multilingual Test - sona_speech_2 (all languages sample)");
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	// Test a diverse subset of languages
+	const testCases = [
+		{ lang: "Ko" as const, text: "안녕하세요.", label: "Korean" },
+		{ lang: "En" as const, text: "Hello.", label: "English" },
+		{ lang: "Ja" as const, text: "こんにちは。", label: "Japanese" },
+		{ lang: "Es" as const, text: "Hola.", label: "Spanish" },
+		{ lang: "Fr" as const, text: "Bonjour.", label: "French" },
+		{ lang: "De" as const, text: "Hallo.", label: "German" },
+		{ lang: "Ar" as const, text: "مرحبا.", label: "Arabic" },
+		{ lang: "Hi" as const, text: "नमस्ते।", label: "Hindi" },
+	];
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		let allPassed = true;
+		const results: any[] = [];
+		for (const tc of testCases) {
+			console.log(`  🔍 Testing ${tc.label} (${tc.lang})...`);
+			try {
+				const langEnum =
+					models.APIConvertTextToSpeechUsingCharacterRequestLanguage[tc.lang];
+				const response = await client.textToSpeech.createSpeech({
+					voiceId,
+					apiConvertTextToSpeechUsingCharacterRequest: {
+						text: tc.text,
+						language: langEnum,
+						outputFormat:
+							models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
+								.Wav,
+						model:
+							models.APIConvertTextToSpeechUsingCharacterRequestModel
+								.SonaSpeech2,
+					},
+				});
+				console.log(`     ✅ ${tc.label} success`);
+				results.push({ lang: tc.lang, success: true });
+			} catch (e: any) {
+				console.log(
+					`     ❌ ${tc.label} failed: ${e.message?.substring(0, 50)}`
+				);
+				results.push({ lang: tc.lang, success: false, error: e.message });
+				allPassed = false;
+			}
+		}
+		console.log(
+			`  📊 Result: ${results.filter((r) => r.success).length}/${
+				testCases.length
+			} languages passed`
+		);
+		return [allPassed, results];
+	} catch (e: any) {
+		logDetailedError(e, "sona_speech_2 multilingual");
+		return [false, e];
+	}
+}
+/**
+ * Test TTS multilingual support with supertonic_api_1 (supports: ko, en, ja, es, pt)
+ */
+async function testMultilingualSupertonicApi1(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("🌍 Multilingual Test - supertonic_api_1 (ko, en, ja, es, pt)");
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	const testCases = [
+		{
+			lang: "Ko" as const,
+			text: "안녕하세요, 슈퍼토닉 API 원 모델입니다.",
+			label: "Korean",
+		},
+		{
+			lang: "En" as const,
+			text: "Hello, this is supertonic_api_1 model.",
+			label: "English",
+		},
+		{
+			lang: "Ja" as const,
+			text: "こんにちは、スーパートニックAPIワンです。",
+			label: "Japanese",
+		},
+		{
+			lang: "Es" as const,
+			text: "Hola, este es el modelo supertonic_api_1.",
+			label: "Spanish",
+		},
+		{
+			lang: "Pt" as const,
+			text: "Olá, este é o modelo supertonic_api_1.",
+			label: "Portuguese",
+		},
+	];
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		let allPassed = true;
+		const results: any[] = [];
+		for (const tc of testCases) {
+			console.log(`  🔍 Testing ${tc.label} (${tc.lang})...`);
+			try {
+				const langEnum =
+					models.APIConvertTextToSpeechUsingCharacterRequestLanguage[tc.lang];
+				const response = await client.textToSpeech.createSpeech({
+					voiceId,
+					apiConvertTextToSpeechUsingCharacterRequest: {
+						text: tc.text,
+						language: langEnum,
+						outputFormat:
+							models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat
+								.Wav,
+						model:
+							models.APIConvertTextToSpeechUsingCharacterRequestModel
+								.SupertonicApi1,
+					},
+				});
+				console.log(`     ✅ ${tc.label} success`);
+				results.push({ lang: tc.lang, success: true });
+			} catch (e: any) {
+				console.log(
+					`     ❌ ${tc.label} failed: ${e.message?.substring(0, 50)}`
+				);
+				results.push({ lang: tc.lang, success: false, error: e.message });
+				allPassed = false;
+			}
+		}
+		console.log(
+			`  📊 Result: ${results.filter((r) => r.success).length}/${
+				testCases.length
+			} languages passed`
+		);
+		return [allPassed, results];
+	} catch (e: any) {
+		logDetailedError(e, "supertonic_api_1 multilingual");
+		return [false, e];
+	}
+}
+/**
+ * Test unsupported language for sona_speech_1 (should fail with French)
+ */
+async function testUnsupportedLanguageSonaSpeech1(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log(
+		"🚫 Unsupported Language Test - sona_speech_1 with French (Expected to Fail)"
+	);
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		console.log(`  🔍 Attempting sona_speech_1 with French (unsupported)`);
+		const response = await client.textToSpeech.createSpeech({
+			voiceId,
+			apiConvertTextToSpeechUsingCharacterRequest: {
+				text: "Bonjour, ceci est un test.",
+				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.Fr, // French - not supported by sona_speech_1
+				outputFormat:
+					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
+				model:
+					models.APIConvertTextToSpeechUsingCharacterRequestModel.SonaSpeech1,
+			},
+		});
+		// If we reach here, the API didn't reject - may need server-side validation
+		console.log(
+			`  ⚠️  API accepted the request - server-side validation may not enforce language restriction`
+		);
+		console.log(
+			`  📋 Note: Language restriction may be enforced at API level, not SDK level`
+		);
+		return [
+			true,
+			{ note: "API accepted - language restriction may be server-side" },
+		];
+	} catch (e: any) {
+		console.log(
+			`  ✅ Correctly rejected unsupported language for sona_speech_1`
+		);
+		console.log(`  📋 Error: ${e.message?.substring(0, 100)}`);
+		return [true, e];
+	}
+}
+/**
+ * Test unsupported language for supertonic_api_1 (should fail with German)
+ */
+async function testUnsupportedLanguageSupertonicApi1(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log(
+		"🚫 Unsupported Language Test - supertonic_api_1 with German (Expected to Fail)"
+	);
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		console.log(`  🔍 Attempting supertonic_api_1 with German (unsupported)`);
+		const response = await client.textToSpeech.createSpeech({
+			voiceId,
+			apiConvertTextToSpeechUsingCharacterRequest: {
+				text: "Hallo, das ist ein Test.",
+				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.De, // German - not supported by supertonic_api_1
+				outputFormat:
+					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
+				model:
+					models.APIConvertTextToSpeechUsingCharacterRequestModel
+						.SupertonicApi1,
+			},
+		});
+		// If we reach here, the API didn't reject - may need server-side validation
+		console.log(
+			`  ⚠️  API accepted the request - server-side validation may not enforce language restriction`
+		);
+		console.log(
+			`  📋 Note: Language restriction may be enforced at API level, not SDK level`
+		);
+		return [
+			true,
+			{ note: "API accepted - language restriction may be server-side" },
+		];
+	} catch (e: any) {
+		console.log(
+			`  ✅ Correctly rejected unsupported language for supertonic_api_1`
+		);
+		console.log(`  📋 Error: ${e.message?.substring(0, 100)}`);
+		return [true, e];
+	}
+}
+/**
+ * Test duration prediction with voice settings
+ */
+async function testPredictDurationWithVoiceSettings(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("⏱️  Duration Prediction with Voice Settings Test");
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		const voiceSettings = {
+			speed: 0.8,
+		};
+		console.log(
+			`  🔍 Predicting duration with voice settings for voice '${voiceId}'...`
+		);
+		console.log(`     Settings: speed=${voiceSettings.speed}`);
+		const response = await client.textToSpeech.predictDuration({
+			voiceId,
+			predictTTSDurationUsingCharacterRequest: {
+				text: "This is a duration test with adjusted speed.",
+				language: models.PredictTTSDurationUsingCharacterRequestLanguage.En,
+				voiceSettings,
+			},
+		});
+		console.log(`  ✅ Predicted duration: ${response.duration}s`);
+		return [true, response];
+	} catch (e: any) {
+		console.error(`  ❌ Error: ${e.message || e}`);
+		return [false, e];
+	}
+}
+/**
+ * Test TTS streaming with voice settings
+ */
+async function testStreamSpeechWithVoiceSettings(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("📡 TTS Streaming with Voice Settings Test");
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		const voiceSettings = {
+			pitchShift: 1.05,
+			speed: 1.1,
+		};
+		console.log(
+			`  🔍 Streaming speech with voice settings for voice '${voiceId}'...`
+		);
+		console.log(
+			`     Settings: pitchShift=${voiceSettings.pitchShift}, speed=${voiceSettings.speed}`
+		);
+		console.log("  ⚠️  This test consumes credits!");
+		const response = await client.textToSpeech.streamSpeech({
+			voiceId,
+			apiConvertTextToSpeechUsingCharacterRequest: {
+				text: "Streaming with adjusted voice settings.",
+				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
+				outputFormat:
+					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Wav,
+				voiceSettings,
+			},
+		});
+		console.log(`  ✅ Stream with voice settings started successfully`);
+		return [true, response];
+	} catch (e: any) {
+		console.error(`  ❌ Error: ${e.message || e}`);
+		return [false, e];
+	}
+}
+/**
+ * Test MP3 format TTS
+ */
+async function testCreateSpeechMp3(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("🎤 MP3 Format TTS Test");
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		console.log(`  🔍 MP3 TTS conversion with voice '${voiceId}'...`);
+		console.log("  ⚠️  This test consumes credits!");
+		const response = await client.textToSpeech.createSpeech({
+			voiceId,
+			apiConvertTextToSpeechUsingCharacterRequest: {
+				text: "Hello! This is an MP3 format SDK test. Let's verify if it works correctly.",
+				language: models.APIConvertTextToSpeechUsingCharacterRequestLanguage.En,
+				outputFormat:
+					models.APIConvertTextToSpeechUsingCharacterRequestOutputFormat.Mp3,
+				style: "neutral",
+				model: "sona_speech_1",
+			},
+		});
+		console.log(`  ✅ MP3 TTS conversion success`);
+		if (response.result) {
+			const outputFile = "test_create_speech_output.mp3";
+			const audioData = await extractAudioData(response);
+			fs.writeFileSync(outputFile, audioData);
+			console.log(`  💾 MP3 audio file saved: ${outputFile}`);
+			// Verify MP3 header
+			const header = audioData.slice(0, 10);
+			if (header[0] === 0x49 && header[1] === 0x44 && header[2] === 0x33) {
+				console.log(`  ✅ Valid MP3 file generated (ID3 tag)`);
+			} else if (
+				(header[0] === 0xff && header[1] === 0xfb) ||
+				(header[0] === 0xff && header[1] === 0xfa)
+			) {
+				console.log(`  ✅ Valid MP3 file generated (MPEG frame)`);
+			} else {
+				console.log(
+					`  📄 MP3 header: ${Array.from(header.slice(0, 10))
+						.map((b) => b.toString(16).padStart(2, "0"))
+						.join(" ")} (needs verification)`
+				);
+			}
+		}
+		return [true, response];
+	} catch (e: any) {
+		console.error(`  ❌ Error: ${e.message || e}`);
+		return [false, e];
+	}
+}
+/**
+ * Test MP3 format with long text
+ */
+async function testCreateSpeechLongTextMp3(
+	voiceId: string | null
+): Promise<[boolean, any]> {
+	console.log("📜 Long Text MP3 Auto-Chunking TTS Test (300+ chars)");
+	if (!voiceId) {
+		console.log("  ⚠️  No voice ID available");
+		return [false, null];
+	}
+	try {
+		const { Supertone } = await import("../src/index.js");
+		const models = await import("../src/models/index.js");
+		const client = new Supertone({ apiKey: API_KEY });
+		const longText = `
+		Hello! This is a very long text MP3 auto-chunking TTS test exceeding 300 characters.
+		The newly implemented SDK automatically divides long text into multiple chunks for processing.
+		Real-time streaming text-to-speech technology plays a crucial role in modern AI applications.
+		It is an indispensable technology especially in conversational services, live broadcasting, and real-time translation services.
+		Through the auto-chunking feature, long texts are naturally divided into multiple small segments for processing.
+		Each segment is intelligently segmented considering sentence and word boundaries, enabling natural speech generation.
+		Now users don't need to worry about text length or output format, as the SDK automatically handles everything in MP3 format too.
+		`.trim();
+		const actualLength = longText.length;
+		console.log(
+			`  📏 Test text length: ${actualLength} characters (exceeds 300)`
+		);
 		console.log(`  🔧 Auto-chunking enabled for MP3 format`);
 		console.log(`  🔍 Converting long text to MP3 with voice '${voiceId}'...`);
@@ -1549,7 +2400,7 @@ async function main(): Promise<boolean> {
 	console.log("");
 	const testResults: TestResult = {};
-	let voiceIdForTTS: string | null = null;
+	const voiceIdForTTS: string = "91992bbd4758bdcf9c9b01";
 	let customVoiceId: string | null = null;
 	let createdCustomVoiceId: string | null = null;
@@ -1572,9 +2423,6 @@ async function main(): Promise<boolean> {
 	[success, result] = await testListVoices();
 	testResults["list_voices"] = success;
-	if (success && result.voiceId) {
-		voiceIdForTTS = result.voiceId;
-	}
 	[success, result] = await testSearchVoices();
 	testResults["search_voices"] = success;
@@ -1643,6 +2491,67 @@ async function main(): Promise<boolean> {
 		[success, result] = await testStreamSpeech(voiceIdForTTS);
 		testResults["stream_speech"] = success;
+		// 5.5 New Model Tests (sona_speech_2, supertonic_api_1)
+		console.log("\n🤖 New Model Tests (sona_speech_2, supertonic_api_1)");
+		console.log("-".repeat(60));
+		console.log("⚠️  These tests consume credits!");
+		console.log("");
+		[success, result] = await testCreateSpeechWithSonaSpeech2(voiceIdForTTS);
+		testResults["create_speech_sona_speech_2"] = success;
+		[success, result] = await testCreateSpeechWithSupertonicApi1(voiceIdForTTS);
+		testResults["create_speech_supertonic_api_1"] = success;
+		[success, result] = await testCreateSpeechWithUnsupportedModel(
+			voiceIdForTTS
+		);
+		testResults["create_speech_unsupported_model"] = success;
+		[success, result] = await testPredictDurationWithSonaSpeech2(voiceIdForTTS);
+		testResults["predict_duration_sona_speech_2"] = success;
+		[success, result] = await testPredictDurationWithSupertonicApi1(
+			voiceIdForTTS
+		);
+		testResults["predict_duration_supertonic_api_1"] = success;
+		[success, result] = await testPredictDurationWithUnsupportedModel(
+			voiceIdForTTS
+		);
+		testResults["predict_duration_unsupported_model"] = success;
+		// 5.6 Multilingual Tests per Model
+		console.log("\n🌍 Multilingual Tests per Model");
+		console.log("-".repeat(60));
+		console.log("⚠️  These tests consume credits!");
+		console.log("");
+		[success, result] = await testMultilingualSonaSpeech1(voiceIdForTTS);
+		testResults["multilingual_sona_speech_1"] = success;
+		[success, result] = await testMultilingualSonaSpeech2(voiceIdForTTS);
+		testResults["multilingual_sona_speech_2"] = success;
+		[success, result] = await testMultilingualSupertonicApi1(voiceIdForTTS);
+		testResults["multilingual_supertonic_api_1"] = success;
+		// 5.7 Unsupported Language Tests
+		console.log("\n🚫 Unsupported Language Tests");
+		console.log("-".repeat(60));
+		console.log(
+			"⚠️  These tests verify error handling for unsupported model-language combinations!"
+		);
+		console.log("");
+		[success, result] = await testUnsupportedLanguageSonaSpeech1(voiceIdForTTS);
+		testResults["unsupported_lang_sona_speech_1"] = success;
+		[success, result] = await testUnsupportedLanguageSupertonicApi1(
+			voiceIdForTTS
+		);
+		testResults["unsupported_lang_supertonic_api_1"] = success;
 		// 6. TTS Long Text Tests
 		console.log("\n📜 Text-to-Speech Long Text Tests");
 		console.log("-".repeat(60));
@@ -1652,6 +2561,14 @@ async function main(): Promise<boolean> {
 		[success, result] = await testCreateSpeechLongText(voiceIdForTTS);
 		testResults["create_speech_long_text"] = success;
+		[success, result] = await testCreateSpeechLongSentenceNoPunctuation(
+			voiceIdForTTS
+		);
+		testResults["create_speech_long_sentence_no_punctuation"] = success;
+		[success, result] = await testCreateSpeechJapaneseNoSpaces(voiceIdForTTS);
+		testResults["create_speech_japanese_no_spaces"] = success;
 		[success, result] = await testStreamSpeechLongText(voiceIdForTTS);
 		testResults["stream_speech_long_text"] = success;
@@ -1761,6 +2678,9 @@ async function main(): Promise<boolean> {
 		"  • Text-to-Speech: predictDuration, createSpeech, streamSpeech"
 	);
 	console.log("  • TTS Long Text: createSpeechLongText, streamSpeechLongText");
+	console.log(
+		"  • TTS Chunking Strategies: Word-based (no punctuation), Character-based (Japanese)"
+	);
 	console.log(
 		"  • TTS with Voice Settings: createSpeechWithVoiceSettings, predictDurationWithVoiceSettings, streamSpeechWithVoiceSettings"
 	);
@@ -1773,6 +2693,21 @@ async function main(): Promise<boolean> {
 	console.log(
 		"  • Custom Features: Auto-chunking in createSpeech/streamSpeech (transparent)"
 	);
+	console.log("");
+	console.log("🤖 New Model & Language Tests:");
+	console.log(
+		"  • New Models: sona_speech_2, supertonic_api_1 (createSpeech & predictDuration)"
+	);
+	console.log(
+		"  • Unsupported Model Validation: Error handling for invalid model names"
+	);
+	console.log("  • Multilingual per Model:");
+	console.log("    - sona_speech_1: ko, en, ja");
+	console.log("    - sona_speech_2: all 23 languages");
+	console.log("    - supertonic_api_1: ko, en, ja, es, pt");
+	console.log(
+		"  • Unsupported Language Validation: Error handling for invalid model-language combinations"
+	);
 	if (customVoiceId) {
 		console.log("");