mulmocast 1.2.23 → 1.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
@@ -1,5 +1,5 @@
1
1
  {
2
- "title": "Ghibli-style comic strips with nano banana.",
2
+ "title": "Ghibli-style comic strips",
3
3
  "description": "Ghibli-style comic strips with nano banana.",
4
4
  "systemPrompt": "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
5
  "presentationStyle": {
@@ -0,0 +1,6 @@
1
+ {
2
+ "title": "Business Analysis",
3
+ "description": "Template for business analysis presentation.",
4
+ "systemPrompt": "First, determine a set of slides (=beats) to present, and choose an appropriate style for each beat (from the JSON template blow) and add required data for it. For each beat, put an appropriate text to the text field for the presenter to read for that slide in details. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "scriptName": "vision.json"
6
+ }
@@ -1,7 +1,9 @@
1
1
  import "dotenv/config";
2
+ import type { GraphData } from "graphai";
2
3
  import { MulmoStudioContext, MulmoBeat, PublicAPIArgs } from "../types/index.js";
3
4
  export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
4
5
  export declare const listLocalizedAudioPaths: (context: MulmoStudioContext) => (string | undefined)[];
6
+ export declare const audio_graph_data: GraphData;
5
7
  export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, args?: PublicAPIArgs & {
6
8
  langs: string[];
7
9
  }) => Promise<void>;
@@ -141,7 +141,7 @@ const graph_tts_map = {
141
141
  },
142
142
  },
143
143
  };
144
- const graph_data = {
144
+ export const audio_graph_data = {
145
145
  version: 0.5,
146
146
  concurrency: 8,
147
147
  nodes: {
@@ -276,7 +276,7 @@ export const audio = async (context, args) => {
276
276
  mkdir(audioSegmentDirPath);
277
277
  const config = settings2GraphAIConfig(settings, process.env);
278
278
  const taskManager = new TaskManager(getConcurrency(context));
279
- const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager, config });
279
+ const graph = new GraphAI(audio_graph_data, audioAgents, { agentFilters, taskManager, config });
280
280
  graph.injectValue("context", context);
281
281
  graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
282
282
  graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
@@ -1,2 +1,4 @@
1
1
  import { MulmoStudioContext, PublicAPIArgs } from "../types/index.js";
2
+ import type { GraphData } from "graphai";
3
+ export declare const caption_graph_data: GraphData;
2
4
  export declare const captions: (context: MulmoStudioContext, args?: PublicAPIArgs) => Promise<MulmoStudioContext>;
@@ -7,7 +7,7 @@ import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
7
7
  import { MulmoStudioContextMethods, MulmoPresentationStyleMethods } from "../methods/index.js";
8
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
9
9
  const vanillaAgents = agents.default ?? agents;
10
- const graph_data = {
10
+ export const caption_graph_data = {
11
11
  version: 0.5,
12
12
  nodes: {
13
13
  context: {},
@@ -74,7 +74,7 @@ export const captions = async (context, args) => {
74
74
  if (MulmoStudioContextMethods.getCaption(context)) {
75
75
  try {
76
76
  MulmoStudioContextMethods.setSessionState(context, "caption", true);
77
- const graph = new GraphAI(graph_data, { ...vanillaAgents, fileWriteAgent });
77
+ const graph = new GraphAI(caption_graph_data, { ...vanillaAgents, fileWriteAgent });
78
78
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
79
79
  const fileName = MulmoStudioContextMethods.getFileName(context);
80
80
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
@@ -1,5 +1,396 @@
1
- import type { GraphOptions } from "graphai";
2
- import { MulmoStudioContext, PublicAPIArgs } from "../types/index.js";
1
+ import type { GraphOptions, GraphData } from "graphai";
2
+ import { MulmoStudioContext, MulmoImageParams, PublicAPIArgs } from "../types/index.js";
3
+ export declare const beat_graph_data: {
4
+ version: number;
5
+ concurrency: number;
6
+ nodes: {
7
+ context: {};
8
+ htmlImageAgentInfo: {};
9
+ imageRefs: {};
10
+ beat: {};
11
+ __mapIndex: {};
12
+ forceMovie: {
13
+ value: boolean;
14
+ };
15
+ forceImage: {
16
+ value: boolean;
17
+ };
18
+ forceLipSync: {
19
+ value: boolean;
20
+ };
21
+ forceSoundEffect: {
22
+ value: boolean;
23
+ };
24
+ preprocessor: {
25
+ agent: (namedInputs: {
26
+ context: MulmoStudioContext;
27
+ beat: import("../types/type.js").MulmoBeat;
28
+ index: number;
29
+ imageRefs: Record<string, string>;
30
+ }) => Promise<{
31
+ imagePath: string;
32
+ htmlPrompt: string | undefined;
33
+ htmlImageFile: string;
34
+ htmlPath: string;
35
+ htmlImageSystemPrompt: string;
36
+ } | {
37
+ imagePath: string | undefined;
38
+ referenceImageForMovie: string | undefined;
39
+ imageParams: MulmoImageParams;
40
+ movieFile: string | undefined;
41
+ soundEffectFile?: string;
42
+ soundEffectPrompt?: string;
43
+ soundEffectModel?: string;
44
+ soundEffectAgentInfo?: {
45
+ agentName: string;
46
+ defaultModel: string;
47
+ };
48
+ lipSyncFile?: string;
49
+ lipSyncModel?: string;
50
+ lipSyncAgentName?: string;
51
+ lipSyncTrimAudio?: boolean;
52
+ bgmFile?: string | null;
53
+ startAt?: number;
54
+ duration?: number;
55
+ audioFile?: string;
56
+ beatDuration?: number;
57
+ htmlPrompt?: undefined;
58
+ htmlImageFile?: undefined;
59
+ htmlPath?: undefined;
60
+ htmlImageSystemPrompt?: undefined;
61
+ } | {
62
+ imagePath: string;
63
+ imageFromMovie: boolean;
64
+ movieAgentInfo: {
65
+ agent: string;
66
+ movieParams: {
67
+ speed?: number | undefined;
68
+ provider?: string | undefined;
69
+ model?: string | undefined;
70
+ fillOption?: {
71
+ style: "aspectFit" | "aspectFill";
72
+ } | undefined;
73
+ transition?: {
74
+ type: "fade" | "slideout_left";
75
+ duration: number;
76
+ } | undefined;
77
+ };
78
+ };
79
+ imageParams: MulmoImageParams;
80
+ movieFile: string | undefined;
81
+ soundEffectFile?: string;
82
+ soundEffectPrompt?: string;
83
+ soundEffectModel?: string;
84
+ soundEffectAgentInfo?: {
85
+ agentName: string;
86
+ defaultModel: string;
87
+ };
88
+ lipSyncFile?: string;
89
+ lipSyncModel?: string;
90
+ lipSyncAgentName?: string;
91
+ lipSyncTrimAudio?: boolean;
92
+ bgmFile?: string | null;
93
+ startAt?: number;
94
+ duration?: number;
95
+ audioFile?: string;
96
+ beatDuration?: number;
97
+ htmlPrompt?: undefined;
98
+ htmlImageFile?: undefined;
99
+ htmlPath?: undefined;
100
+ htmlImageSystemPrompt?: undefined;
101
+ } | {
102
+ imagePath: string;
103
+ referenceImageForMovie: string;
104
+ imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
105
+ prompt: string;
106
+ referenceImages: string[];
107
+ movieAgentInfo: {
108
+ agent: string;
109
+ movieParams: {
110
+ speed?: number | undefined;
111
+ provider?: string | undefined;
112
+ model?: string | undefined;
113
+ fillOption?: {
114
+ style: "aspectFit" | "aspectFill";
115
+ } | undefined;
116
+ transition?: {
117
+ type: "fade" | "slideout_left";
118
+ duration: number;
119
+ } | undefined;
120
+ };
121
+ };
122
+ imageParams: MulmoImageParams;
123
+ movieFile: string | undefined;
124
+ soundEffectFile?: string;
125
+ soundEffectPrompt?: string;
126
+ soundEffectModel?: string;
127
+ soundEffectAgentInfo?: {
128
+ agentName: string;
129
+ defaultModel: string;
130
+ };
131
+ lipSyncFile?: string;
132
+ lipSyncModel?: string;
133
+ lipSyncAgentName?: string;
134
+ lipSyncTrimAudio?: boolean;
135
+ bgmFile?: string | null;
136
+ startAt?: number;
137
+ duration?: number;
138
+ audioFile?: string;
139
+ beatDuration?: number;
140
+ htmlPrompt?: undefined;
141
+ htmlImageFile?: undefined;
142
+ htmlPath?: undefined;
143
+ htmlImageSystemPrompt?: undefined;
144
+ }>;
145
+ inputs: {
146
+ context: string;
147
+ beat: string;
148
+ index: string;
149
+ imageRefs: string;
150
+ };
151
+ };
152
+ imagePlugin: {
153
+ if: string;
154
+ defaultValue: {};
155
+ agent: (namedInputs: {
156
+ context: MulmoStudioContext;
157
+ beat: import("../types/type.js").MulmoBeat;
158
+ index: number;
159
+ }) => Promise<void>;
160
+ inputs: {
161
+ context: string;
162
+ beat: string;
163
+ index: string;
164
+ onComplete: string[];
165
+ };
166
+ };
167
+ htmlImageAgent: {
168
+ if: string;
169
+ defaultValue: {};
170
+ agent: string;
171
+ inputs: {
172
+ media: string;
173
+ prompt: string;
174
+ system: string;
175
+ params: {
176
+ model: string;
177
+ max_tokens: string;
178
+ };
179
+ cache: {
180
+ force: string[];
181
+ file: string;
182
+ index: string;
183
+ id: string;
184
+ mulmoContext: string;
185
+ sessionType: string;
186
+ };
187
+ };
188
+ };
189
+ htmlReader: {
190
+ if: string;
191
+ agent: (namedInputs: {
192
+ htmlPath: string;
193
+ }) => Promise<{
194
+ html: string;
195
+ }>;
196
+ inputs: {
197
+ onComplete: string[];
198
+ htmlPath: string;
199
+ };
200
+ output: {
201
+ htmlText: string;
202
+ };
203
+ defaultValue: {};
204
+ };
205
+ htmlImageGenerator: {
206
+ if: string;
207
+ defaultValue: {};
208
+ agent: (namedInputs: {
209
+ file: string;
210
+ canvasSize: import("../types/type.js").MulmoCanvasDimension;
211
+ htmlText: string;
212
+ }) => Promise<void>;
213
+ inputs: {
214
+ htmlText: string;
215
+ canvasSize: string;
216
+ file: string;
217
+ };
218
+ };
219
+ imageGenerator: {
220
+ if: string;
221
+ agent: string;
222
+ retry: number;
223
+ inputs: {
224
+ media: string;
225
+ prompt: string;
226
+ referenceImages: string;
227
+ cache: {
228
+ force: string[];
229
+ file: string;
230
+ index: string;
231
+ id: string;
232
+ mulmoContext: string;
233
+ sessionType: string;
234
+ };
235
+ params: {
236
+ model: string;
237
+ moderation: string;
238
+ canvasSize: string;
239
+ quality: string;
240
+ };
241
+ };
242
+ defaultValue: {};
243
+ };
244
+ movieGenerator: {
245
+ if: string;
246
+ agent: string;
247
+ inputs: {
248
+ media: string;
249
+ onComplete: string[];
250
+ prompt: string;
251
+ imagePath: string;
252
+ movieFile: string;
253
+ cache: {
254
+ force: string[];
255
+ file: string;
256
+ index: string;
257
+ id: string;
258
+ sessionType: string;
259
+ mulmoContext: string;
260
+ };
261
+ params: {
262
+ model: string;
263
+ duration: string;
264
+ canvasSize: string;
265
+ };
266
+ };
267
+ defaultValue: {};
268
+ };
269
+ imageFromMovie: {
270
+ if: string;
271
+ agent: (namedInputs: {
272
+ movieFile: string;
273
+ imageFile: string;
274
+ }) => Promise<object>;
275
+ inputs: {
276
+ onComplete: string[];
277
+ imageFile: string;
278
+ movieFile: string;
279
+ };
280
+ defaultValue: {};
281
+ };
282
+ audioChecker: {
283
+ agent: (namedInputs: {
284
+ movieFile: string;
285
+ imageFile: string;
286
+ soundEffectFile: string;
287
+ }) => Promise<{
288
+ hasMovieAudio: boolean;
289
+ }>;
290
+ inputs: {
291
+ onComplete: string[];
292
+ movieFile: string;
293
+ imageFile: string;
294
+ soundEffectFile: string;
295
+ };
296
+ };
297
+ soundEffectGenerator: {
298
+ if: string;
299
+ agent: string;
300
+ inputs: {
301
+ onComplete: string[];
302
+ prompt: string;
303
+ movieFile: string;
304
+ soundEffectFile: string;
305
+ params: {
306
+ model: string;
307
+ duration: string;
308
+ };
309
+ cache: {
310
+ force: string[];
311
+ file: string;
312
+ index: string;
313
+ id: string;
314
+ sessionType: string;
315
+ mulmoContext: string;
316
+ };
317
+ };
318
+ defaultValue: {};
319
+ };
320
+ AudioTrimmer: {
321
+ if: string;
322
+ agent: (namedInputs: {
323
+ audioFile: string;
324
+ bgmFile: string;
325
+ startAt: number;
326
+ duration: number;
327
+ }) => Promise<{
328
+ buffer: Buffer<ArrayBufferLike>;
329
+ }>;
330
+ inputs: {
331
+ onComplete: string[];
332
+ audioFile: string;
333
+ bgmFile: string;
334
+ startAt: string;
335
+ duration: string;
336
+ cache: {
337
+ force: string[];
338
+ file: string;
339
+ index: string;
340
+ id: string;
341
+ mulmoContext: string;
342
+ };
343
+ };
344
+ defaultValue: {};
345
+ };
346
+ lipSyncGenerator: {
347
+ if: string;
348
+ agent: string;
349
+ inputs: {
350
+ onComplete: string[];
351
+ movieFile: string;
352
+ imageFile: string;
353
+ audioFile: string;
354
+ lipSyncFile: string;
355
+ params: {
356
+ model: string;
357
+ duration: string;
358
+ };
359
+ cache: {
360
+ force: string[];
361
+ file: string;
362
+ index: string;
363
+ id: string;
364
+ sessionType: string;
365
+ mulmoContext: string;
366
+ };
367
+ };
368
+ defaultValue: {};
369
+ };
370
+ output: {
371
+ agent: string;
372
+ inputs: {
373
+ onComplete: string[];
374
+ imageFile: string;
375
+ movieFile: string;
376
+ soundEffectFile: string;
377
+ lipSyncFile: string;
378
+ hasMovieAudio: string;
379
+ htmlImageFile: string;
380
+ };
381
+ output: {
382
+ imageFile: string;
383
+ movieFile: string;
384
+ soundEffectFile: string;
385
+ lipSyncFile: string;
386
+ hasMovieAudio: string;
387
+ htmlImageFile: string;
388
+ };
389
+ isResult: boolean;
390
+ };
391
+ };
392
+ };
393
+ export declare const images_graph_data: GraphData;
3
394
  export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
4
395
  type ImageOptions = {
5
396
  imageAgents: Record<string, unknown>;
@@ -41,7 +41,7 @@ const defaultAgents = {
41
41
  anthropicAgent,
42
42
  };
43
43
  dotenv.config();
44
- const beat_graph_data = {
44
+ export const beat_graph_data = {
45
45
  version: 0.5,
46
46
  concurrency: 4,
47
47
  nodes: {
@@ -297,7 +297,7 @@ const beat_graph_data = {
297
297
  },
298
298
  },
299
299
  };
300
- const graph_data = {
300
+ export const images_graph_data = {
301
301
  version: 0.5,
302
302
  concurrency: 4,
303
303
  nodes: {
@@ -403,7 +403,7 @@ const generateImages = async (context, args) => {
403
403
  ...defaultAgents,
404
404
  ...optionImageAgents,
405
405
  };
406
- const graph = new GraphAI(graph_data, graphaiAgent, await graphOption(context, settings));
406
+ const graph = new GraphAI(images_graph_data, graphaiAgent, await graphOption(context, settings));
407
407
  Object.keys(injections).forEach((key) => {
408
408
  graph.injectValue(key, injections[key]);
409
409
  });
@@ -1,8 +1,8 @@
1
1
  export * from "./audio.js";
2
2
  export * from "./images.js";
3
+ export * from "./captions.js";
3
4
  export * from "./image_references.js";
4
5
  export * from "./image_agents.js";
5
6
  export * from "./movie.js";
6
7
  export * from "./pdf.js";
7
8
  export * from "./translate.js";
8
- export * from "./captions.js";
@@ -1,8 +1,8 @@
1
1
  export * from "./audio.js";
2
2
  export * from "./images.js";
3
+ export * from "./captions.js";
3
4
  export * from "./image_references.js";
4
5
  export * from "./image_agents.js";
5
6
  export * from "./movie.js";
6
7
  export * from "./pdf.js";
7
8
  export * from "./translate.js";
8
- export * from "./captions.js";
@@ -1,4 +1,5 @@
1
1
  import "dotenv/config";
2
+ import type { GraphData } from "graphai";
2
3
  import type { LANG, MulmoStudioContext, PublicAPIArgs } from "../types/index.js";
3
4
  export declare const translateTextGraph: {
4
5
  version: number;
@@ -45,6 +46,7 @@ export declare const translateTextGraph: {
45
46
  };
46
47
  };
47
48
  };
49
+ export declare const translate_graph_data: GraphData;
48
50
  export declare const getOutputMultilingualFilePathAndMkdir: (context: MulmoStudioContext) => {
49
51
  outputMultilingualFilePath: string;
50
52
  outDirPath: string;
@@ -132,7 +132,7 @@ const beatGraph = {
132
132
  },
133
133
  },
134
134
  };
135
- const translateGraph = {
135
+ export const translate_graph_data = {
136
136
  version: 0.5,
137
137
  nodes: {
138
138
  context: {},
@@ -276,7 +276,7 @@ export const translate = async (context, args) => {
276
276
  : [...new Set([context.lang, context.studio.script.captionParams?.lang].filter((x) => !isNull(x)))];
277
277
  const config = settings2GraphAIConfig(settings, process.env);
278
278
  assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
279
- const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
279
+ const graph = new GraphAI(translate_graph_data, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
280
280
  graph.injectValue("context", context);
281
281
  graph.injectValue("targetLangs", targetLangs);
282
282
  graph.injectValue("outDirPath", outDirPath);
@@ -1,8 +1,19 @@
1
1
  import fs from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
- import { getAspectRatio } from "./movie_google_agent.js";
4
3
  import { provider2ImageAgent } from "../utils/provider2agent.js";
5
4
  import { GoogleGenAI, PersonGeneration } from "@google/genai";
5
+ import { blankImagePath, blankSquareImagePath, blankVerticalImagePath } from "../utils/file.js";
6
+ const getAspectRatio = (canvasSize) => {
7
+ if (canvasSize.width > canvasSize.height) {
8
+ return "16:9";
9
+ }
10
+ else if (canvasSize.width < canvasSize.height) {
11
+ return "9:16";
12
+ }
13
+ else {
14
+ return "1:1";
15
+ }
16
+ };
6
17
  export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
7
18
  const { prompt, referenceImages } = namedInputs;
8
19
  const aspectRatio = getAspectRatio(params.canvasSize);
@@ -15,12 +26,22 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
15
26
  const ai = new GoogleGenAI({ apiKey });
16
27
  if (model === "gemini-2.5-flash-image-preview") {
17
28
  const contents = [{ text: prompt }];
18
- referenceImages?.forEach((imagePath) => {
29
+ const images = [...(referenceImages ?? [])];
30
+ // NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
31
+ if (aspectRatio === "9:16") {
32
+ images.push(blankVerticalImagePath());
33
+ }
34
+ else if (aspectRatio === "1:1") {
35
+ images.push(blankSquareImagePath());
36
+ }
37
+ else {
38
+ images.push(blankImagePath());
39
+ }
40
+ images.forEach((imagePath) => {
19
41
  const imageData = fs.readFileSync(imagePath);
20
42
  const base64Image = imageData.toString("base64");
21
43
  contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
22
44
  });
23
- // NOTE: There is no way to specify the aspect ratio for Gemini.
24
45
  const response = await ai.models.generateContent({ model, contents });
25
46
  if (!response.candidates?.[0]?.content?.parts) {
26
47
  throw new Error("ERROR: generateContent returned no candidates");
@@ -33,6 +33,9 @@ export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
33
33
  },
34
34
  image: undefined,
35
35
  };
36
+ if (model === "veo-3.0-generate-preview") {
37
+ payload.config.durationSeconds = undefined;
38
+ }
36
39
  if (imagePath) {
37
40
  const buffer = readFileSync(imagePath);
38
41
  const imageBytes = buffer.toString("base64");
@@ -430,7 +430,7 @@ export const promptTemplates = [
430
430
  },
431
431
  scriptName: "text_only_template.json",
432
432
  systemPrompt: "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
433
- title: "Ghibli-style comic strips with nano banana.",
433
+ title: "Ghibli-style comic strips",
434
434
  },
435
435
  {
436
436
  description: "Template for Ghost in the shell style comic presentation.",
@@ -908,4 +908,11 @@ export const promptTemplates = [
908
908
  systemPrompt: "This script is for a movie trailer. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
909
909
  title: "Movie Trailer template",
910
910
  },
911
+ {
912
+ description: "Template for business analysis presentation.",
913
+ filename: "vision",
914
+ scriptName: "vision.json",
915
+ systemPrompt: "First, determine a set of slides (=beats) to present, and choose an appropriate style for each beat (from the JSON template blow) and add required data for it. For each beat, put an appropriate text to the text field for the presenter to read for that slide in details. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
916
+ title: "Business Analysis",
917
+ },
911
918
  ];