mulmocast 2.1.8 → 2.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/bundle.js +11 -4
- package/lib/agents/tts_gemini_agent.js +9 -2
- package/lib/types/agent.d.ts +1 -0
- package/lib/utils/context.d.ts +1 -0
- package/lib/utils/context.js +2 -1
- package/package.json +5 -5
- package/scripts/test/test_audio_gemini.json +9 -1
- package/scripts/test/test_media.json +1 -1
package/lib/actions/bundle.js
CHANGED
|
@@ -6,6 +6,7 @@ import { mkdir } from "../utils/file.js";
|
|
|
6
6
|
import { ZipBuilder } from "../utils/zip.js";
|
|
7
7
|
import { bundleTargetLang } from "../utils/const.js";
|
|
8
8
|
import { createSilentAudio } from "../utils/ffmpeg_utils.js";
|
|
9
|
+
import { silentMp3 } from "../utils/context.js";
|
|
9
10
|
const downloadFile = async (url, destPath) => {
|
|
10
11
|
const response = await fetch(url);
|
|
11
12
|
if (!response.ok) {
|
|
@@ -53,7 +54,7 @@ const imageSourceMappings = [
|
|
|
53
54
|
];
|
|
54
55
|
export const mulmoViewerBundle = async (context) => {
|
|
55
56
|
const isZip = true;
|
|
56
|
-
const dir =
|
|
57
|
+
const dir = context.fileDirs.outDirPath;
|
|
57
58
|
mkdir(dir);
|
|
58
59
|
const zipper = new ZipBuilder(path.resolve(dir, zipFileName));
|
|
59
60
|
// text
|
|
@@ -67,18 +68,24 @@ export const mulmoViewerBundle = async (context) => {
|
|
|
67
68
|
// audio
|
|
68
69
|
for (const lang of bundleTargetLang) {
|
|
69
70
|
const audios = listLocalizedAudioPaths({ ...context, lang });
|
|
70
|
-
audios.
|
|
71
|
+
await Promise.all(audios.map(async (audio, index) => {
|
|
71
72
|
if (audio) {
|
|
72
73
|
const fileName = path.basename(audio ?? "");
|
|
73
74
|
if (resultJson[index] && resultJson[index].audioSources) {
|
|
74
75
|
resultJson[index].audioSources[lang] = fileName;
|
|
75
76
|
}
|
|
76
|
-
if (
|
|
77
|
+
if (fileName === "silent300.mp3") {
|
|
78
|
+
// Download from GitHub URL
|
|
79
|
+
const destPath = path.resolve(dir, fileName);
|
|
80
|
+
await downloadFile(silentMp3, destPath);
|
|
81
|
+
zipper.addFile(destPath, fileName);
|
|
82
|
+
}
|
|
83
|
+
else if (fs.existsSync(audio)) {
|
|
77
84
|
fs.copyFileSync(audio, path.resolve(dir, fileName));
|
|
78
85
|
zipper.addFile(audio, fileName);
|
|
79
86
|
}
|
|
80
87
|
}
|
|
81
|
-
});
|
|
88
|
+
}));
|
|
82
89
|
}
|
|
83
90
|
// image, movie
|
|
84
91
|
context.studio.beats.forEach((image, index) => {
|
|
@@ -3,9 +3,16 @@ import { GoogleGenAI } from "@google/genai";
|
|
|
3
3
|
import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
4
4
|
import { agentIncorrectAPIKeyError, apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget, getGenAIErrorReason, } from "../utils/error_cause.js";
|
|
5
5
|
import { pcmToMp3 } from "../utils/ffmpeg_utils.js";
|
|
6
|
+
const getPrompt = (text, instructions) => {
|
|
7
|
+
// https://ai.google.dev/gemini-api/docs/speech-generation?hl=ja#controllable
|
|
8
|
+
if (instructions) {
|
|
9
|
+
return `### DIRECTOR'S NOTES\n${instructions}\n\n#### TRANSCRIPT\n${text}`;
|
|
10
|
+
}
|
|
11
|
+
return text;
|
|
12
|
+
};
|
|
6
13
|
export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
|
|
7
14
|
const { text } = namedInputs;
|
|
8
|
-
const { model, voice, suppressError } = params;
|
|
15
|
+
const { model, voice, suppressError, instructions } = params;
|
|
9
16
|
const apiKey = config?.apiKey;
|
|
10
17
|
if (!apiKey) {
|
|
11
18
|
throw new Error("Google GenAI API key is required (GEMINI_API_KEY)", {
|
|
@@ -16,7 +23,7 @@ export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
|
|
|
16
23
|
const ai = new GoogleGenAI({ apiKey });
|
|
17
24
|
const response = await ai.models.generateContent({
|
|
18
25
|
model: model ?? provider2TTSAgent.gemini.defaultModel,
|
|
19
|
-
contents: [{ parts: [{ text }] }],
|
|
26
|
+
contents: [{ parts: [{ text: getPrompt(text, instructions) }] }],
|
|
20
27
|
config: {
|
|
21
28
|
responseModalities: ["AUDIO"],
|
|
22
29
|
speechConfig: {
|
package/lib/types/agent.d.ts
CHANGED
|
@@ -122,6 +122,7 @@ export type KotodamaTTSAgentParams = TTSAgentParams & {
|
|
|
122
122
|
export type GoogleTTSAgentParams = TTSAgentParams & {
|
|
123
123
|
speed: number;
|
|
124
124
|
model: string;
|
|
125
|
+
instructions: string;
|
|
125
126
|
};
|
|
126
127
|
export type ElevenlabsTTSAgentParams = TTSAgentParams & {
|
|
127
128
|
model: string;
|
package/lib/utils/context.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { MulmoStudioBeat, MulmoScript, MulmoPresentationStyle, MulmoStudioMultiLingual, FileObject } from "../types/type.js";
|
|
2
|
+
export declare const silentMp3 = "https://github.com/receptron/mulmocast-cli/raw/refs/heads/main/assets/audio/silent300.mp3";
|
|
2
3
|
export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: string, videoCaptionLang?: string, presentationStyle?: MulmoPresentationStyle | null) => {
|
|
3
4
|
script: {
|
|
4
5
|
$mulmocast: {
|
package/lib/utils/context.js
CHANGED
|
@@ -3,6 +3,7 @@ import { readMulmoScriptFile, fetchMulmoScriptFile, isFile } from "./file.js";
|
|
|
3
3
|
import { beatId, multiLingualObjectToArray } from "./utils.js";
|
|
4
4
|
import { mulmoStudioSchema, mulmoCaptionParamsSchema, mulmoPresentationStyleSchema } from "../types/schema.js";
|
|
5
5
|
import { MulmoPresentationStyleMethods, MulmoScriptMethods, MulmoStudioMultiLingualMethod } from "../methods/index.js";
|
|
6
|
+
export const silentMp3 = "https://github.com/receptron/mulmocast-cli/raw/refs/heads/main/assets/audio/silent300.mp3";
|
|
6
7
|
const mulmoCredit = (speaker) => {
|
|
7
8
|
return {
|
|
8
9
|
id: "mulmo_credit",
|
|
@@ -19,7 +20,7 @@ const mulmoCredit = (speaker) => {
|
|
|
19
20
|
type: "audio",
|
|
20
21
|
source: {
|
|
21
22
|
kind: "url",
|
|
22
|
-
url:
|
|
23
|
+
url: silentMp3,
|
|
23
24
|
},
|
|
24
25
|
},
|
|
25
26
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mulmocast",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.10",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "lib/index.node.js",
|
|
@@ -74,7 +74,7 @@
|
|
|
74
74
|
"homepage": "https://github.com/receptron/mulmocast-cli#readme",
|
|
75
75
|
"dependencies": {
|
|
76
76
|
"@google-cloud/text-to-speech": "^6.4.0",
|
|
77
|
-
"@google/genai": "^1.
|
|
77
|
+
"@google/genai": "^1.34.0",
|
|
78
78
|
"@graphai/anthropic_agent": "^2.0.12",
|
|
79
79
|
"@graphai/browserless_agent": "^2.0.1",
|
|
80
80
|
"@graphai/gemini_agent": "^2.0.1",
|
|
@@ -86,7 +86,7 @@
|
|
|
86
86
|
"@graphai/vanilla_node_agents": "^2.0.4",
|
|
87
87
|
"@inquirer/input": "^4.3.0",
|
|
88
88
|
"@inquirer/select": "^4.4.1",
|
|
89
|
-
"@modelcontextprotocol/sdk": "^1.
|
|
89
|
+
"@modelcontextprotocol/sdk": "^1.25.1",
|
|
90
90
|
"@mozilla/readability": "^0.6.0",
|
|
91
91
|
"@tavily/core": "^0.5.11",
|
|
92
92
|
"archiver": "^7.0.1",
|
|
@@ -102,7 +102,7 @@
|
|
|
102
102
|
"replicate": "^1.4.0",
|
|
103
103
|
"yaml": "^2.8.2",
|
|
104
104
|
"yargs": "^18.0.0",
|
|
105
|
-
"zod": "^4.1
|
|
105
|
+
"zod": "^4.2.1"
|
|
106
106
|
},
|
|
107
107
|
"devDependencies": {
|
|
108
108
|
"@receptron/test_utils": "^2.0.3",
|
|
@@ -117,7 +117,7 @@
|
|
|
117
117
|
"prettier": "^3.7.4",
|
|
118
118
|
"tsx": "^4.21.0",
|
|
119
119
|
"typescript": "^5.9.3",
|
|
120
|
-
"typescript-eslint": "^8.
|
|
120
|
+
"typescript-eslint": "^8.50.0"
|
|
121
121
|
},
|
|
122
122
|
"engines": {
|
|
123
123
|
"node": ">=20.0.0"
|
|
@@ -14,6 +14,14 @@
|
|
|
14
14
|
"provider": "gemini",
|
|
15
15
|
"model": "gemini-2.5-pro-preview-tts",
|
|
16
16
|
"voiceId": "Puck"
|
|
17
|
+
},
|
|
18
|
+
"Presenter2": {
|
|
19
|
+
"provider": "gemini",
|
|
20
|
+
"model": "gemini-2.5-pro-preview-tts",
|
|
21
|
+
"voiceId": "leda",
|
|
22
|
+
"speechOptions": {
|
|
23
|
+
"instruction": "Role: You are a classic Tsundere character. Tone: Sharp, impatient, and defensive, but with underlying hesitation that suggests you actually care deeply. Voice Quality: Higher pitch, energetic, and slightly haughty."
|
|
24
|
+
}
|
|
17
25
|
}
|
|
18
26
|
}
|
|
19
27
|
},
|
|
@@ -52,7 +60,7 @@
|
|
|
52
60
|
}
|
|
53
61
|
},
|
|
54
62
|
{
|
|
55
|
-
"speaker": "
|
|
63
|
+
"speaker": "Presenter2",
|
|
56
64
|
"text": "Hello, I'm a presenter. I have a whisper instruction.",
|
|
57
65
|
"speechOptions": {
|
|
58
66
|
"instruction": "Whisper softly, like a pillow talk."
|