npm - @supertone/supertone - Versions diffs - 0.1.0 → 0.1.2 - Mend

@supertone/supertone 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/src/lib/custom_utils/text_utils.ts CHANGED Viewed

@@ -7,6 +7,94 @@
 import { DEFAULT_MAX_TEXT_LENGTH } from "./constants.js";
+/**
+ * Check if text contains spaces (to determine if word-based splitting is possible)
+ *
+ * @param text - Text to check
+ * @returns true if text contains spaces
+ */
+function hasSpaces(text: string): boolean {
+	return /\s/.test(text);
+}
+/**
+ * Split text by words, ensuring each chunk is under maxLength.
+ * Used for languages with spaces (English, Korean, etc.)
+ *
+ * @param text - Text to split
+ * @param maxLength - Maximum length of each chunk
+ * @returns Array of text chunks
+ */
+function splitByWords(text: string, maxLength: number): string[] {
+	const words = text.split(/(\s+)/);
+	const chunks: string[] = [];
+	let currentChunk = "";
+	for (const word of words) {
+		if (currentChunk.length + word.length <= maxLength) {
+			currentChunk += word;
+		} else {
+			if (currentChunk.trim()) {
+				chunks.push(currentChunk.trim());
+			}
+			// If a single word exceeds maxLength, split by characters
+			if (word.trim().length > maxLength) {
+				const charChunks = splitByCharacters(word.trim(), maxLength);
+				chunks.push(...charChunks);
+				currentChunk = "";
+			} else {
+				currentChunk = word;
+			}
+		}
+	}
+	if (currentChunk.trim()) {
+		chunks.push(currentChunk.trim());
+	}
+	return chunks;
+}
+/**
+ * Split text by characters, ensuring each chunk is under maxLength.
+ * Used for languages without spaces (Japanese, Chinese, etc.)
+ *
+ * @param text - Text to split
+ * @param maxLength - Maximum length of each chunk
+ * @returns Array of text chunks
+ */
+function splitByCharacters(text: string, maxLength: number): string[] {
+	const chunks: string[] = [];
+	for (let i = 0; i < text.length; i += maxLength) {
+		chunks.push(text.slice(i, i + maxLength));
+	}
+	return chunks;
+}
+/**
+ * Split a single chunk that exceeds maxLength into smaller chunks.
+ * Uses word-based splitting for texts with spaces, character-based for texts without.
+ *
+ * @param chunk - Text chunk to split
+ * @param maxLength - Maximum length of each chunk
+ * @returns Array of text chunks, all under maxLength
+ */
+function splitOversizedChunk(chunk: string, maxLength: number): string[] {
+	if (chunk.length <= maxLength) {
+		return [chunk];
+	}
+	// Check if text has spaces (word-based splitting possible)
+	if (hasSpaces(chunk)) {
+		return splitByWords(chunk, maxLength);
+	}
+	// No spaces: use character-based splitting (Japanese, Chinese, etc.)
+	return splitByCharacters(chunk, maxLength);
+}
 /**
  * Split input text into sentence chunks suitable for TTS processing.
  *
@@ -15,9 +103,16 @@ import { DEFAULT_MAX_TEXT_LENGTH } from "./constants.js";
  * It handles various punctuation patterns and provides graceful fallback to
  * word/character boundaries when necessary.
  *
+ * Chunking Strategy:
+ * 1. First, split by sentence boundaries (punctuation: .!?;:)
+ * 2. Merge sentences into chunks up to maxLength
+ * 3. If a sentence exceeds maxLength:
+ *    - For text with spaces: split by words
+ *    - For text without spaces (Japanese, etc.): split by characters
+ *
  * @param text - Input text to be segmented
  * @param maxLength - Maximum length of each chunk
- * @returns Array of text chunks
+ * @returns Array of text chunks, each guaranteed to be <= maxLength
  */
 export function chunkText(
 	text: string,
@@ -27,28 +122,43 @@ export function chunkText(
 		return [text];
 	}
-	// Split by sentence boundaries
-	const sentences = text.split(/([.!?;:]+\s*)/);
+	// Step 1: Split by sentence boundaries (including various punctuation marks)
+	// Includes Western punctuation (.!?;:) and CJK punctuation (。！？；：)
+	const sentences = text.split(/([.!?;:。！？；：]+\s*)/);
-	const chunks: string[] = [];
+	const preliminaryChunks: string[] = [];
 	let currentChunk = "";
+	// Step 2: Merge sentences into chunks up to maxLength
 	for (const sentence of sentences) {
 		if (currentChunk.length + sentence.length <= maxLength) {
 			currentChunk += sentence;
 		} else {
 			if (currentChunk) {
-				chunks.push(currentChunk);
+				preliminaryChunks.push(currentChunk);
 			}
 			currentChunk = sentence;
 		}
 	}
 	if (currentChunk) {
-		chunks.push(currentChunk);
+		preliminaryChunks.push(currentChunk);
 	}
-	return chunks;
+	// Step 3: Handle oversized chunks (split by words or characters)
+	const finalChunks: string[] = [];
+	for (const chunk of preliminaryChunks) {
+		if (chunk.length <= maxLength) {
+			finalChunks.push(chunk);
+		} else {
+			// Chunk exceeds maxLength, need to split further
+			const subChunks = splitOversizedChunk(chunk, maxLength);
+			finalChunks.push(...subChunks);
+		}
+	}
+	// Filter out empty chunks
+	return finalChunks.filter((chunk) => chunk.length > 0);
 }
 /**

package/src/models/apiconverttexttospeechusingcharacterrequest.ts CHANGED Viewed

@@ -22,8 +22,26 @@ export const APIConvertTextToSpeechUsingCharacterRequestLanguage = {
   En: "en",
   Ko: "ko",
   Ja: "ja",
+  Bg: "bg",
+  Cs: "cs",
+  Da: "da",
+  El: "el",
   Es: "es",
+  Et: "et",
+  Fi: "fi",
+  Hu: "hu",
+  It: "it",
+  Nl: "nl",
+  Pl: "pl",
   Pt: "pt",
+  Ro: "ro",
+  Ar: "ar",
+  De: "de",
+  Fr: "fr",
+  Hi: "hi",
+  Id: "id",
+  Ru: "ru",
+  Vi: "vi",
 } as const;
 /**
  * The language code of the text
@@ -32,6 +50,22 @@ export type APIConvertTextToSpeechUsingCharacterRequestLanguage = ClosedEnum<
   typeof APIConvertTextToSpeechUsingCharacterRequestLanguage
 >;
+/**
+ * The model type to use for the text-to-speech conversion
+ */
+export const APIConvertTextToSpeechUsingCharacterRequestModel = {
+  SonaSpeech1: "sona_speech_1",
+  SonaSpeech2: "sona_speech_2",
+  SonaSpeech2t: "sona_speech_2t",
+  SupertonicApi1: "supertonic_api_1",
+} as const;
+/**
+ * The model type to use for the text-to-speech conversion
+ */
+export type APIConvertTextToSpeechUsingCharacterRequestModel = ClosedEnum<
+  typeof APIConvertTextToSpeechUsingCharacterRequestModel
+>;
 /**
  * The desired output format of the audio file (wav, mp3). Default is wav.
  */
@@ -61,7 +95,7 @@ export type APIConvertTextToSpeechUsingCharacterRequest = {
   /**
    * The model type to use for the text-to-speech conversion
    */
-  model?: string | undefined;
+  model?: APIConvertTextToSpeechUsingCharacterRequestModel | undefined;
   /**
    * The desired output format of the audio file (wav, mp3). Default is wav.
    */
@@ -98,6 +132,29 @@ export namespace APIConvertTextToSpeechUsingCharacterRequestLanguage$ {
     APIConvertTextToSpeechUsingCharacterRequestLanguage$outboundSchema;
 }
+/** @internal */
+export const APIConvertTextToSpeechUsingCharacterRequestModel$inboundSchema:
+  z.ZodNativeEnum<typeof APIConvertTextToSpeechUsingCharacterRequestModel> = z
+    .nativeEnum(APIConvertTextToSpeechUsingCharacterRequestModel);
+/** @internal */
+export const APIConvertTextToSpeechUsingCharacterRequestModel$outboundSchema:
+  z.ZodNativeEnum<typeof APIConvertTextToSpeechUsingCharacterRequestModel> =
+    APIConvertTextToSpeechUsingCharacterRequestModel$inboundSchema;
+/**
+ * @internal
+ * @deprecated This namespace will be removed in future versions. Use schemas and types that are exported directly from this module.
+ */
+export namespace APIConvertTextToSpeechUsingCharacterRequestModel$ {
+  /** @deprecated use `APIConvertTextToSpeechUsingCharacterRequestModel$inboundSchema` instead. */
+  export const inboundSchema =
+    APIConvertTextToSpeechUsingCharacterRequestModel$inboundSchema;
+  /** @deprecated use `APIConvertTextToSpeechUsingCharacterRequestModel$outboundSchema` instead. */
+  export const outboundSchema =
+    APIConvertTextToSpeechUsingCharacterRequestModel$outboundSchema;
+}
 /** @internal */
 export const APIConvertTextToSpeechUsingCharacterRequestOutputFormat$inboundSchema:
   z.ZodNativeEnum<
@@ -133,7 +190,8 @@ export const APIConvertTextToSpeechUsingCharacterRequest$inboundSchema:
     text: z.string(),
     language: APIConvertTextToSpeechUsingCharacterRequestLanguage$inboundSchema,
     style: z.string().optional(),
-    model: z.string().default("sona_speech_1"),
+    model: APIConvertTextToSpeechUsingCharacterRequestModel$inboundSchema
+      .default("sona_speech_1"),
     output_format:
       APIConvertTextToSpeechUsingCharacterRequestOutputFormat$inboundSchema
         .default("wav"),
@@ -169,7 +227,8 @@ export const APIConvertTextToSpeechUsingCharacterRequest$outboundSchema:
     language:
       APIConvertTextToSpeechUsingCharacterRequestLanguage$outboundSchema,
     style: z.string().optional(),
-    model: z.string().default("sona_speech_1"),
+    model: APIConvertTextToSpeechUsingCharacterRequestModel$outboundSchema
+      .default("sona_speech_1"),
     outputFormat:
       APIConvertTextToSpeechUsingCharacterRequestOutputFormat$outboundSchema
         .default("wav"),

package/src/models/predictttsdurationusingcharacterrequest.ts CHANGED Viewed

@@ -22,8 +22,26 @@ export const PredictTTSDurationUsingCharacterRequestLanguage = {
   En: "en",
   Ko: "ko",
   Ja: "ja",
+  Bg: "bg",
+  Cs: "cs",
+  Da: "da",
+  El: "el",
   Es: "es",
+  Et: "et",
+  Fi: "fi",
+  Hu: "hu",
+  It: "it",
+  Nl: "nl",
+  Pl: "pl",
   Pt: "pt",
+  Ro: "ro",
+  Ar: "ar",
+  De: "de",
+  Fr: "fr",
+  Hi: "hi",
+  Id: "id",
+  Ru: "ru",
+  Vi: "vi",
 } as const;
 /**
  * Language code of the voice
@@ -32,6 +50,22 @@ export type PredictTTSDurationUsingCharacterRequestLanguage = ClosedEnum<
   typeof PredictTTSDurationUsingCharacterRequestLanguage
 >;
+/**
+ * The model type to use for the text-to-speech conversion
+ */
+export const PredictTTSDurationUsingCharacterRequestModel = {
+  SonaSpeech1: "sona_speech_1",
+  SonaSpeech2: "sona_speech_2",
+  SonaSpeech2t: "sona_speech_2t",
+  SupertonicApi1: "supertonic_api_1",
+} as const;
+/**
+ * The model type to use for the text-to-speech conversion
+ */
+export type PredictTTSDurationUsingCharacterRequestModel = ClosedEnum<
+  typeof PredictTTSDurationUsingCharacterRequestModel
+>;
 /**
  * The desired output format of the audio file (wav, mp3). Default is wav.
  */
@@ -62,7 +96,7 @@ export type PredictTTSDurationUsingCharacterRequest = {
   /**
    * The model type to use for the text-to-speech conversion
    */
-  model?: string | undefined;
+  model?: PredictTTSDurationUsingCharacterRequestModel | undefined;
   /**
    * The desired output format of the audio file (wav, mp3). Default is wav.
    */
@@ -95,6 +129,29 @@ export namespace PredictTTSDurationUsingCharacterRequestLanguage$ {
     PredictTTSDurationUsingCharacterRequestLanguage$outboundSchema;
 }
+/** @internal */
+export const PredictTTSDurationUsingCharacterRequestModel$inboundSchema:
+  z.ZodNativeEnum<typeof PredictTTSDurationUsingCharacterRequestModel> = z
+    .nativeEnum(PredictTTSDurationUsingCharacterRequestModel);
+/** @internal */
+export const PredictTTSDurationUsingCharacterRequestModel$outboundSchema:
+  z.ZodNativeEnum<typeof PredictTTSDurationUsingCharacterRequestModel> =
+    PredictTTSDurationUsingCharacterRequestModel$inboundSchema;
+/**
+ * @internal
+ * @deprecated This namespace will be removed in future versions. Use schemas and types that are exported directly from this module.
+ */
+export namespace PredictTTSDurationUsingCharacterRequestModel$ {
+  /** @deprecated use `PredictTTSDurationUsingCharacterRequestModel$inboundSchema` instead. */
+  export const inboundSchema =
+    PredictTTSDurationUsingCharacterRequestModel$inboundSchema;
+  /** @deprecated use `PredictTTSDurationUsingCharacterRequestModel$outboundSchema` instead. */
+  export const outboundSchema =
+    PredictTTSDurationUsingCharacterRequestModel$outboundSchema;
+}
 /** @internal */
 export const PredictTTSDurationUsingCharacterRequestOutputFormat$inboundSchema:
   z.ZodNativeEnum<typeof PredictTTSDurationUsingCharacterRequestOutputFormat> =
@@ -127,7 +184,9 @@ export const PredictTTSDurationUsingCharacterRequest$inboundSchema: z.ZodType<
   text: z.string(),
   language: PredictTTSDurationUsingCharacterRequestLanguage$inboundSchema,
   style: z.string().optional(),
-  model: z.string().default("sona_speech_1"),
+  model: PredictTTSDurationUsingCharacterRequestModel$inboundSchema.default(
+    "sona_speech_1",
+  ),
   output_format:
     PredictTTSDurationUsingCharacterRequestOutputFormat$inboundSchema.default(
       "wav",
@@ -159,7 +218,9 @@ export const PredictTTSDurationUsingCharacterRequest$outboundSchema: z.ZodType<
   text: z.string(),
   language: PredictTTSDurationUsingCharacterRequestLanguage$outboundSchema,
   style: z.string().optional(),
-  model: z.string().default("sona_speech_1"),
+  model: PredictTTSDurationUsingCharacterRequestModel$outboundSchema.default(
+    "sona_speech_1",
+  ),
   outputFormat:
     PredictTTSDurationUsingCharacterRequestOutputFormat$outboundSchema.default(
       "wav",