npm - mulmocast - Versions diffs - 2.1.9 → 2.1.10 - Mend

mulmocast 2.1.9 → 2.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/lib/agents/tts_gemini_agent.js +9 -2
package/lib/types/agent.d.ts +1 -0
package/package.json +1 -1
package/scripts/test/test_audio_gemini.json +9 -1
package/scripts/test/test_media.json +1 -1

package/lib/agents/tts_gemini_agent.js CHANGED Viewed

@@ -3,9 +3,16 @@ import { GoogleGenAI } from "@google/genai";
 import { provider2TTSAgent } from "../utils/provider2agent.js";
 import { agentIncorrectAPIKeyError, apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget, getGenAIErrorReason, } from "../utils/error_cause.js";
 import { pcmToMp3 } from "../utils/ffmpeg_utils.js";
+const getPrompt = (text, instructions) => {
+    // https://ai.google.dev/gemini-api/docs/speech-generation?hl=ja#controllable
+    if (instructions) {
+        return `### DIRECTOR'S NOTES\n${instructions}\n\n#### TRANSCRIPT\n${text}`;
+    }
+    return text;
+};
 export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
     const { text } = namedInputs;
-    const { model, voice, suppressError } = params;
+    const { model, voice, suppressError, instructions } = params;
     const apiKey = config?.apiKey;
     if (!apiKey) {
         throw new Error("Google GenAI API key is required (GEMINI_API_KEY)", {
@@ -16,7 +23,7 @@ export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
         const ai = new GoogleGenAI({ apiKey });
         const response = await ai.models.generateContent({
             model: model ?? provider2TTSAgent.gemini.defaultModel,
-            contents: [{ parts: [{ text }] }],
+            contents: [{ parts: [{ text: getPrompt(text, instructions) }] }],
             config: {
                 responseModalities: ["AUDIO"],
                 speechConfig: {

package/lib/types/agent.d.ts CHANGED Viewed

@@ -122,6 +122,7 @@ export type KotodamaTTSAgentParams = TTSAgentParams & {
 export type GoogleTTSAgentParams = TTSAgentParams & {
     speed: number;
     model: string;
+    instructions: string;
 };
 export type ElevenlabsTTSAgentParams = TTSAgentParams & {
     model: string;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mulmocast",
-  "version": "2.1.9",
+  "version": "2.1.10",
   "description": "",
   "type": "module",
   "main": "lib/index.node.js",

package/scripts/test/test_audio_gemini.json CHANGED Viewed

@@ -14,6 +14,14 @@
         "provider": "gemini",
         "model": "gemini-2.5-pro-preview-tts",
         "voiceId": "Puck"
+      },
+      "Presenter2": {
+        "provider": "gemini",
+        "model": "gemini-2.5-pro-preview-tts",
+        "voiceId": "leda",
+        "speechOptions": {
+          "instruction": "Role: You are a classic Tsundere character. Tone: Sharp, impatient, and defensive, but with underlying hesitation that suggests you actually care deeply. Voice Quality: Higher pitch, energetic, and slightly haughty."
+        }
       }
     }
   },
@@ -52,7 +60,7 @@
       }
     },
     {
-      "speaker": "Presenter",
+      "speaker": "Presenter2",
       "text": "Hello, I'm a presenter. I have a whisper instruction.",
       "speechOptions": {
         "instruction": "Whisper softly, like a pillow talk."

package/scripts/test/test_media.json CHANGED Viewed

@@ -60,7 +60,7 @@
       "speaker": "Presenter",
       "text": "",
       "duration": 0.5,
-      "id": "{1A57B3F5-B6CB-4948-96BB-6F018DCCBBD4}",
+      "id": "1A57B3F5-B6CB-4948-96BB-6F018DCCBBD4",
       "image": {
         "type": "textSlide",
         "slide": {