mulmocast 2.1.24 → 2.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -128,9 +128,6 @@ REPLICATE_API_TOKEN=your_replicate_api_key
128
128
 
129
129
  #### (Optional) For TTS models
130
130
  ```bash
131
- # For Nijivoice TTS
132
- NIJIVOICE_API_KEY=your_nijivoice_api_key
133
-
134
131
  # For ElevenLabs TTS
135
132
  ELEVENLABS_API_KEY=your_elevenlabs_api_key
136
133
  ```
@@ -140,6 +137,76 @@ ELEVENLABS_API_KEY=your_elevenlabs_api_key
140
137
  BROWSERLESS_API_TOKEN=your_browserless_api_token # to access web in mulmo tool
141
138
  ```
142
139
 
140
+ ### Google Vertex AI
141
+
142
+ For enterprise/production environments or to access models like Imagen 4, use Vertex AI with Application Default Credentials (ADC):
143
+
144
+ ```bash
145
+ # Install gcloud CLI and authenticate
146
+ gcloud auth application-default login
147
+ ```
148
+
149
+ Configure in MulmoScript:
150
+ ```json
151
+ {
152
+ "imageParams": {
153
+ "provider": "google",
154
+ "model": "imagen-4.0-generate-001",
155
+ "vertexai_project": "your-project-id",
156
+ "vertexai_location": "us-central1"
157
+ }
158
+ }
159
+ ```
160
+
161
+ | Parameter | Description | Default |
162
+ |-----------|-------------|---------|
163
+ | `vertexai_project` | Google Cloud Project ID | None (enables Vertex AI mode when set) |
164
+ | `vertexai_location` | Region | `us-central1` |
165
+
166
+ For detailed setup instructions, see [Vertex AI Setup Guide](./docs/vertexai_en.md).
167
+
168
+ ### Azure OpenAI
169
+
170
+ To use Azure OpenAI instead of OpenAI API:
171
+
172
+ ```bash
173
+ # For image generation
174
+ IMAGE_OPENAI_API_KEY=<your-azure-openai-api-key>
175
+ IMAGE_OPENAI_BASE_URL=https://<resource-name>.openai.azure.com/
176
+
177
+ # For TTS (Text-to-Speech)
178
+ TTS_OPENAI_API_KEY=<your-azure-openai-api-key>
179
+ TTS_OPENAI_BASE_URL=https://<resource-name>.openai.azure.com/
180
+
181
+ # For LLM (translate, scripting)
182
+ LLM_OPENAI_API_KEY=<your-azure-openai-api-key>
183
+ LLM_OPENAI_BASE_URL=https://<resource-name>.openai.azure.com/
184
+ LLM_OPENAI_API_VERSION=2025-04-01-preview # optional
185
+ ```
186
+
187
+ MulmoScript configuration (same as OpenAI):
188
+ ```json
189
+ {
190
+ "imageParams": {
191
+ "provider": "openai",
192
+ "model": "gpt-image-1.5"
193
+ },
194
+ "speechParams": {
195
+ "speakers": {
196
+ "Presenter": {
197
+ "provider": "openai",
198
+ "voiceId": "alloy",
199
+ "model": "tts"
200
+ }
201
+ }
202
+ }
203
+ }
204
+ ```
205
+
206
+ **Important**: Azure deployment names must match model names exactly (e.g., deployment name `gpt-image-1.5` for model `gpt-image-1.5`).
207
+
208
+ For detailed setup and region availability, see [Azure OpenAI Usage Guide](./docs/azure_openai_usage.md).
209
+
143
210
  ## Workflow
144
211
 
145
212
  1. Create a MulmoScript JSON file with `mulmo tool scripting`
@@ -217,8 +284,7 @@ mulmo tool scripting --input-file story.txt
217
284
  mulmo tool scripting -i
218
285
  ```
219
286
 
220
- Note:
221
- - When using the `⁠sensei_and_taro` template, a Nijivoice API key is required
287
+ Note:
222
288
  - When -i is specified, --input-file value will be ignored
223
289
  - When --input-file is specified, -u value will be ignored
224
290
 
@@ -58,21 +58,27 @@ const calculateCumulativeRatios = (ratios) => {
58
58
  };
59
59
  // Generate caption files for a single beat
60
60
  const generateBeatCaptions = async (beat, context, index) => {
61
- const captionParams = mulmoCaptionParamsSchema.parse({ ...context.studio.script.captionParams, ...beat.captionParams });
61
+ const globalCaptionParamsRaw = context.studio.script.captionParams ?? {};
62
+ const beatCaptionParamsRaw = beat.captionParams ?? {};
63
+ const mergedCaptionParams = mulmoCaptionParamsSchema.parse({
64
+ ...globalCaptionParamsRaw,
65
+ ...beatCaptionParamsRaw,
66
+ styles: Object.hasOwn(beatCaptionParamsRaw, "styles") ? beatCaptionParamsRaw.styles : globalCaptionParamsRaw.styles,
67
+ });
62
68
  const canvasSize = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
63
69
  const template = getHTMLFile("caption");
64
- if (captionParams.lang && !context.multiLingual?.[index]?.multiLingualTexts?.[captionParams.lang]) {
65
- GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${captionParams.lang}`);
70
+ if (mergedCaptionParams.lang && !context.multiLingual?.[index]?.multiLingualTexts?.[mergedCaptionParams.lang]) {
71
+ GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${mergedCaptionParams.lang}`);
66
72
  }
67
- const text = localizedText(beat, context.multiLingual?.[index], captionParams.lang, context.studio.script.lang);
73
+ const text = localizedText(beat, context.multiLingual?.[index], mergedCaptionParams.lang, context.studio.script.lang);
68
74
  // Get beat timing info
69
75
  const studioBeat = context.studio.beats[index];
70
76
  const beatStartAt = studioBeat.startAt ?? 0;
71
77
  const beatDuration = studioBeat.duration ?? 0;
72
78
  const introPadding = MulmoStudioContextMethods.getIntroPadding(context);
73
79
  // Determine split texts based on captionSplit setting
74
- const captionSplit = captionParams.captionSplit ?? "none";
75
- const splitTexts = captionSplit === "estimate" ? getSplitTexts(text, beat.texts, captionParams.textSplit) : [text];
80
+ const captionSplit = mergedCaptionParams.captionSplit ?? "none";
81
+ const splitTexts = captionSplit === "estimate" ? getSplitTexts(text, beat.texts, mergedCaptionParams.textSplit) : [text];
76
82
  // Calculate timing
77
83
  const cumulativeRatios = calculateCumulativeRatios(calculateTimingRatios(splitTexts));
78
84
  // Generate caption images with absolute timing
@@ -82,7 +88,7 @@ const generateBeatCaptions = async (beat, context, index) => {
82
88
  caption: processLineBreaks(segmentText),
83
89
  width: `${canvasSize.width}`,
84
90
  height: `${canvasSize.height}`,
85
- styles: captionParams.styles.join(";\n"),
91
+ styles: (mergedCaptionParams.styles ?? []).join(";\n"),
86
92
  });
87
93
  await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
88
94
  return {
@@ -0,0 +1,8 @@
1
+ import { Argv } from "yargs";
2
+ export declare const builder: (yargs: Argv) => Argv<{
3
+ o: string | undefined;
4
+ } & {
5
+ t: string | undefined;
6
+ } & {
7
+ file: string;
8
+ }>;
@@ -0,0 +1,23 @@
1
+ import { getAvailablePromptTemplates } from "../../../../utils/file.js";
2
+ const availableTemplateNames = getAvailablePromptTemplates().map((template) => template.filename);
3
+ export const builder = (yargs) => {
4
+ return yargs
5
+ .option("o", {
6
+ alias: "output",
7
+ description: "Output file path (default: <file>_completed.json)",
8
+ demandOption: false,
9
+ type: "string",
10
+ })
11
+ .option("t", {
12
+ alias: "template",
13
+ description: "Template/style name to apply",
14
+ demandOption: false,
15
+ choices: availableTemplateNames,
16
+ type: "string",
17
+ })
18
+ .positional("file", {
19
+ description: "Input beats file path (JSON)",
20
+ type: "string",
21
+ demandOption: true,
22
+ });
23
+ };
@@ -0,0 +1,8 @@
1
+ type CompleteHandlerArgs = {
2
+ file: string;
3
+ o?: string;
4
+ t?: string;
5
+ v?: boolean;
6
+ };
7
+ export declare const handler: (argv: CompleteHandlerArgs) => Promise<void>;
8
+ export {};
@@ -0,0 +1,40 @@
1
+ import { readFileSync, writeFileSync } from "fs";
2
+ import path from "path";
3
+ import { GraphAILogger } from "graphai";
4
+ import { completeScript, templateExists } from "../../../../tools/complete_script.js";
5
+ export const handler = async (argv) => {
6
+ const { file, o: outputPath, t: templateName, v: verbose } = argv;
7
+ if (!file) {
8
+ GraphAILogger.error("Error: Input file is required");
9
+ process.exit(1);
10
+ }
11
+ const inputPath = path.resolve(file);
12
+ const inputData = (() => {
13
+ try {
14
+ const content = readFileSync(inputPath, "utf-8");
15
+ return JSON.parse(content);
16
+ }
17
+ catch (error) {
18
+ GraphAILogger.error(`Error reading file: ${inputPath}`);
19
+ GraphAILogger.error(error.message);
20
+ process.exit(1);
21
+ }
22
+ })();
23
+ if (templateName && !templateExists(templateName)) {
24
+ GraphAILogger.warn(`Warning: Template '${templateName}' not found`);
25
+ }
26
+ const result = completeScript(inputData, templateName);
27
+ if (!result.success) {
28
+ GraphAILogger.error("Validation errors:");
29
+ result.error.issues.forEach((issue) => {
30
+ GraphAILogger.error(` - ${issue.path.join(".")}: ${issue.message}`);
31
+ });
32
+ process.exit(1);
33
+ }
34
+ if (verbose && templateName) {
35
+ GraphAILogger.info(`Applied template: ${templateName}`);
36
+ }
37
+ const outputFilePath = outputPath ? path.resolve(outputPath) : inputPath.replace(/\.json$/, "_completed.json");
38
+ writeFileSync(outputFilePath, JSON.stringify(result.data, null, 2));
39
+ GraphAILogger.info(`Completed script written to: ${outputFilePath}`);
40
+ };
@@ -0,0 +1,4 @@
1
+ export declare const command = "complete <file>";
2
+ export declare const desc = "Complete MulmoScript with schema defaults and optional style";
3
+ export { builder } from "./builder.js";
4
+ export { handler } from "./handler.js";
@@ -0,0 +1,4 @@
1
+ export const command = "complete <file>";
2
+ export const desc = "Complete MulmoScript with schema defaults and optional style";
3
+ export { builder } from "./builder.js";
4
+ export { handler } from "./handler.js";
@@ -3,7 +3,8 @@ import * as promptCmd from "./prompt/index.js";
3
3
  import * as schemaCmd from "./schema/index.js";
4
4
  import * as storyToScriptCmd from "./story_to_script/index.js";
5
5
  import * as whisperCmd from "./whisper/index.js";
6
+ import * as completeCmd from "./complete/index.js";
6
7
  export const command = "tool <command>";
7
8
  export const desc = "Generate Mulmo script and other tools";
8
- export const builder = (y) => y.command(scriptingCmd).command(promptCmd).command(schemaCmd).command(storyToScriptCmd).command(whisperCmd).demandCommand().strict();
9
+ export const builder = (y) => y.command(scriptingCmd).command(promptCmd).command(schemaCmd).command(storyToScriptCmd).command(whisperCmd).command(completeCmd).demandCommand().strict();
9
10
  export const handler = (__argv) => { };
@@ -0,0 +1,255 @@
1
+ export declare const styles: ({
2
+ filename: string;
3
+ presentationStyle: {
4
+ $mulmocast: {
5
+ credit: string;
6
+ version: string;
7
+ };
8
+ canvasSize: {
9
+ height: number;
10
+ width: number;
11
+ };
12
+ imageParams: {
13
+ images: {
14
+ girl: {
15
+ source: {
16
+ kind: string;
17
+ url: string;
18
+ };
19
+ type: string;
20
+ };
21
+ ani?: undefined;
22
+ presenter?: undefined;
23
+ optimus?: undefined;
24
+ };
25
+ style: string;
26
+ model?: undefined;
27
+ provider?: undefined;
28
+ };
29
+ audioParams?: undefined;
30
+ movieParams?: undefined;
31
+ speechParams?: undefined;
32
+ };
33
+ } | {
34
+ filename: string;
35
+ presentationStyle: {
36
+ $mulmocast: {
37
+ credit: string;
38
+ version: string;
39
+ };
40
+ audioParams: {
41
+ bgm: {
42
+ kind: string;
43
+ url: string;
44
+ };
45
+ };
46
+ canvasSize: {
47
+ height: number;
48
+ width: number;
49
+ };
50
+ imageParams: {
51
+ images: {
52
+ ani: {
53
+ source: {
54
+ kind: string;
55
+ url: string;
56
+ };
57
+ type: string;
58
+ };
59
+ girl?: undefined;
60
+ presenter?: undefined;
61
+ optimus?: undefined;
62
+ };
63
+ model: string;
64
+ provider: string;
65
+ style: string;
66
+ };
67
+ movieParams: {
68
+ model: string;
69
+ provider: string;
70
+ };
71
+ speechParams: {
72
+ speakers: {
73
+ Presenter: {
74
+ provider: string;
75
+ speechOptions: {
76
+ instruction: string;
77
+ };
78
+ voiceId: string;
79
+ };
80
+ };
81
+ provider?: undefined;
82
+ };
83
+ };
84
+ } | {
85
+ filename: string;
86
+ presentationStyle: {
87
+ $mulmocast: {
88
+ credit: string;
89
+ version: string;
90
+ };
91
+ canvasSize: {
92
+ height: number;
93
+ width: number;
94
+ };
95
+ imageParams: {
96
+ style: string;
97
+ images?: undefined;
98
+ model?: undefined;
99
+ provider?: undefined;
100
+ };
101
+ audioParams?: undefined;
102
+ movieParams?: undefined;
103
+ speechParams?: undefined;
104
+ };
105
+ } | {
106
+ filename: string;
107
+ presentationStyle: {
108
+ $mulmocast: {
109
+ credit: string;
110
+ version: string;
111
+ };
112
+ canvasSize: {
113
+ height: number;
114
+ width: number;
115
+ };
116
+ imageParams: {
117
+ images: {
118
+ presenter: {
119
+ source: {
120
+ kind: string;
121
+ url: string;
122
+ };
123
+ type: string;
124
+ };
125
+ girl?: undefined;
126
+ ani?: undefined;
127
+ optimus?: undefined;
128
+ };
129
+ style: string;
130
+ model?: undefined;
131
+ provider?: undefined;
132
+ };
133
+ audioParams?: undefined;
134
+ movieParams?: undefined;
135
+ speechParams?: undefined;
136
+ };
137
+ } | {
138
+ filename: string;
139
+ presentationStyle: {
140
+ $mulmocast: {
141
+ credit: string;
142
+ version: string;
143
+ };
144
+ canvasSize: {
145
+ height: number;
146
+ width: number;
147
+ };
148
+ imageParams: {
149
+ images: {
150
+ presenter: {
151
+ source: {
152
+ kind: string;
153
+ url: string;
154
+ };
155
+ type: string;
156
+ };
157
+ girl?: undefined;
158
+ ani?: undefined;
159
+ optimus?: undefined;
160
+ };
161
+ style: string;
162
+ model?: undefined;
163
+ provider?: undefined;
164
+ };
165
+ speechParams: {
166
+ provider: string;
167
+ speakers: {
168
+ Presenter: {
169
+ speechOptions: {
170
+ instruction: string;
171
+ };
172
+ voiceId: string;
173
+ provider?: undefined;
174
+ };
175
+ };
176
+ };
177
+ audioParams?: undefined;
178
+ movieParams?: undefined;
179
+ };
180
+ } | {
181
+ filename: string;
182
+ presentationStyle: {
183
+ $mulmocast: {
184
+ credit: string;
185
+ version: string;
186
+ };
187
+ canvasSize: {
188
+ height: number;
189
+ width: number;
190
+ };
191
+ imageParams: {
192
+ images: {
193
+ optimus: {
194
+ source: {
195
+ kind: string;
196
+ url: string;
197
+ };
198
+ type: string;
199
+ };
200
+ presenter: {
201
+ source: {
202
+ kind: string;
203
+ url: string;
204
+ };
205
+ type: string;
206
+ };
207
+ girl?: undefined;
208
+ ani?: undefined;
209
+ };
210
+ style: string;
211
+ model?: undefined;
212
+ provider?: undefined;
213
+ };
214
+ audioParams?: undefined;
215
+ movieParams?: undefined;
216
+ speechParams?: undefined;
217
+ };
218
+ } | {
219
+ filename: string;
220
+ presentationStyle: {
221
+ $mulmocast: {
222
+ credit: string;
223
+ version: string;
224
+ };
225
+ audioParams: {
226
+ bgm: {
227
+ kind: string;
228
+ url: string;
229
+ };
230
+ };
231
+ canvasSize: {
232
+ height: number;
233
+ width: number;
234
+ };
235
+ imageParams: {
236
+ model: string;
237
+ provider: string;
238
+ images?: undefined;
239
+ style?: undefined;
240
+ };
241
+ speechParams: {
242
+ speakers: {
243
+ Presenter: {
244
+ provider: string;
245
+ speechOptions: {
246
+ instruction: string;
247
+ };
248
+ voiceId: string;
249
+ };
250
+ };
251
+ provider?: undefined;
252
+ };
253
+ movieParams?: undefined;
254
+ };
255
+ })[];