mulmocast 2.1.9 → 2.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,7 +127,7 @@ export const mulmoViewerBundle = async (context) => {
127
127
  });
128
128
  // BGM
129
129
  const bgmFileName = await processBgm(context.studio?.script.audioParams?.bgm, dir, zipper);
130
- const bundleData = { beats: resultJson, bgmSource: bgmFileName };
130
+ const bundleData = { beats: resultJson, bgmSource: bgmFileName, title: context.studio.script.title };
131
131
  fs.writeFileSync(path.resolve(dir, viewJsonFileName), JSON.stringify(bundleData, null, 2));
132
132
  zipper.addFile(path.resolve(dir, viewJsonFileName));
133
133
  if (isZip) {
@@ -3,9 +3,16 @@ import { GoogleGenAI } from "@google/genai";
3
3
  import { provider2TTSAgent } from "../utils/provider2agent.js";
4
4
  import { agentIncorrectAPIKeyError, apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget, getGenAIErrorReason, } from "../utils/error_cause.js";
5
5
  import { pcmToMp3 } from "../utils/ffmpeg_utils.js";
6
+ const getPrompt = (text, instructions) => {
7
+ // https://ai.google.dev/gemini-api/docs/speech-generation?hl=ja#controllable
8
+ if (instructions) {
9
+ return `### DIRECTOR'S NOTES\n${instructions}\n\n#### TRANSCRIPT\n${text}`;
10
+ }
11
+ return text;
12
+ };
6
13
  export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
7
14
  const { text } = namedInputs;
8
- const { model, voice, suppressError } = params;
15
+ const { model, voice, suppressError, instructions } = params;
9
16
  const apiKey = config?.apiKey;
10
17
  if (!apiKey) {
11
18
  throw new Error("Google GenAI API key is required (GEMINI_API_KEY)", {
@@ -16,7 +23,7 @@ export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
16
23
  const ai = new GoogleGenAI({ apiKey });
17
24
  const response = await ai.models.generateContent({
18
25
  model: model ?? provider2TTSAgent.gemini.defaultModel,
19
- contents: [{ parts: [{ text }] }],
26
+ contents: [{ parts: [{ text: getPrompt(text, instructions) }] }],
20
27
  config: {
21
28
  responseModalities: ["AUDIO"],
22
29
  speechConfig: {
@@ -122,6 +122,7 @@ export type KotodamaTTSAgentParams = TTSAgentParams & {
122
122
  export type GoogleTTSAgentParams = TTSAgentParams & {
123
123
  speed: number;
124
124
  model: string;
125
+ instructions: string;
125
126
  };
126
127
  export type ElevenlabsTTSAgentParams = TTSAgentParams & {
127
128
  model: string;
@@ -154,4 +154,5 @@ export type MulmoViewerBeat = {
154
154
  export type MulmoViewerData = {
155
155
  beats: MulmoViewerBeat[];
156
156
  bgmSource?: string;
157
+ title?: string;
157
158
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.1.9",
3
+ "version": "2.1.11",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -84,13 +84,13 @@
84
84
  "@graphai/stream_agent_filter": "^2.0.2",
85
85
  "@graphai/vanilla": "^2.0.12",
86
86
  "@graphai/vanilla_node_agents": "^2.0.4",
87
- "@inquirer/input": "^4.3.0",
88
- "@inquirer/select": "^4.4.1",
87
+ "@inquirer/input": "^5.0.3",
88
+ "@inquirer/select": "^5.0.3",
89
89
  "@modelcontextprotocol/sdk": "^1.25.1",
90
90
  "@mozilla/readability": "^0.6.0",
91
91
  "@tavily/core": "^0.5.11",
92
92
  "archiver": "^7.0.1",
93
- "clipboardy": "^4.0.0",
93
+ "clipboardy": "^5.0.2",
94
94
  "dotenv": "^17.2.3",
95
95
  "fluent-ffmpeg": "^2.1.3",
96
96
  "graphai": "^2.0.16",
@@ -98,7 +98,7 @@
98
98
  "marked": "^17.0.1",
99
99
  "mulmocast-vision": "^1.0.8",
100
100
  "ora": "^9.0.0",
101
- "puppeteer": "^24.33.0",
101
+ "puppeteer": "^24.34.0",
102
102
  "replicate": "^1.4.0",
103
103
  "yaml": "^2.8.2",
104
104
  "yargs": "^18.0.0",
@@ -117,7 +117,7 @@
117
117
  "prettier": "^3.7.4",
118
118
  "tsx": "^4.21.0",
119
119
  "typescript": "^5.9.3",
120
- "typescript-eslint": "^8.50.0"
120
+ "typescript-eslint": "^8.50.1"
121
121
  },
122
122
  "engines": {
123
123
  "node": ">=20.0.0"
@@ -14,6 +14,14 @@
14
14
  "provider": "gemini",
15
15
  "model": "gemini-2.5-pro-preview-tts",
16
16
  "voiceId": "Puck"
17
+ },
18
+ "Presenter2": {
19
+ "provider": "gemini",
20
+ "model": "gemini-2.5-pro-preview-tts",
21
+ "voiceId": "leda",
22
+ "speechOptions": {
23
+ "instruction": "Role: You are a classic Tsundere character. Tone: Sharp, impatient, and defensive, but with underlying hesitation that suggests you actually care deeply. Voice Quality: Higher pitch, energetic, and slightly haughty."
24
+ }
17
25
  }
18
26
  }
19
27
  },
@@ -52,7 +60,7 @@
52
60
  }
53
61
  },
54
62
  {
55
- "speaker": "Presenter",
63
+ "speaker": "Presenter2",
56
64
  "text": "Hello, I'm a presenter. I have a whisper instruction.",
57
65
  "speechOptions": {
58
66
  "instruction": "Whisper softly, like a pillow talk."
@@ -60,7 +60,7 @@
60
60
  "speaker": "Presenter",
61
61
  "text": "",
62
62
  "duration": 0.5,
63
- "id": "{1A57B3F5-B6CB-4948-96BB-6F018DCCBBD4}",
63
+ "id": "1A57B3F5-B6CB-4948-96BB-6F018DCCBBD4",
64
64
  "image": {
65
65
  "type": "textSlide",
66
66
  "slide": {