mulmocast 2.0.2 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +22 -0
  2. package/assets/templates/ghibli_comic_strips.json +1 -1
  3. package/lib/actions/audio.js +2 -1
  4. package/lib/actions/bundle.js +5 -2
  5. package/lib/agents/image_genai_agent.js +1 -1
  6. package/lib/agents/image_openai_agent.js +2 -2
  7. package/lib/agents/index.d.ts +2 -1
  8. package/lib/agents/index.js +2 -1
  9. package/lib/agents/movie_genai_agent.js +1 -1
  10. package/lib/agents/tts_gemini_agent.d.ts +5 -0
  11. package/lib/agents/tts_gemini_agent.js +64 -0
  12. package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
  13. package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
  14. package/lib/data/promptTemplates.d.ts +40 -10
  15. package/lib/data/promptTemplates.js +55 -1
  16. package/lib/data/templateDataSet.js +1 -1
  17. package/lib/methods/mulmo_presentation_style.js +1 -1
  18. package/lib/types/schema.d.ts +30 -30
  19. package/lib/types/schema.js +4 -2
  20. package/lib/types/type.d.ts +2 -0
  21. package/lib/utils/context.d.ts +33 -33
  22. package/lib/utils/ffmpeg_utils.d.ts +1 -0
  23. package/lib/utils/ffmpeg_utils.js +23 -0
  24. package/lib/utils/provider2agent.d.ts +10 -1
  25. package/lib/utils/provider2agent.js +17 -8
  26. package/lib/utils/utils.js +3 -0
  27. package/package.json +7 -7
  28. package/scripts/test/gpt.json +6 -0
  29. package/scripts/test/image-2.png +0 -0
  30. package/scripts/test/test_audio_gemini.json +67 -0
  31. package/scripts/test/test_audio_gemini.json~ +67 -0
  32. package/scripts/test/test_genai.json +17 -9
  33. package/scripts/test/test_image_refs.json +1 -1
  34. package/scripts/test/test_lipsync2.json +66 -0
  35. package/scripts/test/test_lipsync2.json~ +24 -0
  36. package/scripts/test/test_replicate.json +6 -6
package/README.md CHANGED
@@ -440,6 +440,28 @@ Options:
440
440
  --image_width Image width (e.g., 400px, 50%, auto) [string]
441
441
  ```
442
442
 
443
+ ```
444
+ mulmo bundle <file>
445
+
446
+ Generate bundle files
447
+
448
+ Positionals:
449
+ file Mulmo Script File [string] [required]
450
+
451
+ Options:
452
+ --version Show version number [boolean]
453
+ -v, --verbose verbose log [boolean] [required] [default: false]
454
+ -h, --help Show help [boolean]
455
+ -o, --outdir output dir [string]
456
+ -b, --basedir base dir [string]
457
+ -l, --lang target language
458
+ [string] [choices: "en", "ja", "fr", "es", "de", "zh-CN", "zh-TW", "ko", "it",
459
+ "pt", "ar", "hi"]
460
+ -f, --force Force regenerate [boolean] [default: false]
461
+ --backup create backup media file [boolean] [default: false]
462
+ -p, --presentationStyle Presentation Style [string]
463
+ ```
464
+
443
465
  ```
444
466
  mulmo tool <command>
445
467
 
@@ -13,7 +13,7 @@
13
13
  },
14
14
  "imageParams": {
15
15
  "provider": "google",
16
- "model": "gemini-2.5-flash-image-preview",
16
+ "model": "gemini-2.5-flash-image",
17
17
  "style": "<style>Ghibli style multi-panel comic strips in landscape mode. Use speech bubbles with short, natural dialogue (1–6 words). Keep text minimal, like real comics. Let the art convey the story and emotions. Use the input image as the presenter.</style>",
18
18
  "images": {
19
19
  "presenter": {
@@ -2,7 +2,7 @@ import dotenv from "dotenv";
2
2
  import { GraphAI, TaskManager, GraphAILogger } from "graphai";
3
3
  import * as agents from "@graphai/vanilla";
4
4
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
5
- import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsElevenlabsAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent } from "../agents/index.js";
5
+ import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
6
6
  import { text2SpeechProviderSchema } from "../types/index.js";
7
7
  import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
8
8
  import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
@@ -221,6 +221,7 @@ const audioAgents = {
221
221
  ttsOpenaiAgent,
222
222
  ttsNijivoiceAgent,
223
223
  ttsGoogleAgent,
224
+ ttsGeminiAgent,
224
225
  ttsElevenlabsAgent,
225
226
  mediaMockAgent,
226
227
  addBGMAgent,
@@ -58,8 +58,11 @@ export const mulmoViewerBundle = async (context) => {
58
58
  const zipper = new ZipBuilder(path.resolve(dir, zipFileName));
59
59
  // text
60
60
  const resultJson = [];
61
- context.studio.script.beats.forEach((beat) => {
62
- resultJson.push({ text: beat.text, duration: beat.duration, audioSources: {}, multiLinguals: {} });
61
+ context.studio.script.beats.forEach((beat, index) => {
62
+ const sudioBeats = context.studio.beats[index];
63
+ const { duration, startAt } = sudioBeats;
64
+ // console.log(context.studio.beats[index]);
65
+ resultJson.push({ text: beat.text, duration, startTime: startAt, endTime: (startAt ?? 0) + (duration ?? 0), audioSources: {}, multiLinguals: {} });
63
66
  });
64
67
  // audio
65
68
  for (const lang of bundleTargetLang) {
@@ -71,7 +71,7 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
71
71
  }
72
72
  try {
73
73
  const ai = new GoogleGenAI({ apiKey });
74
- if (model === "gemini-2.5-flash-image-preview") {
74
+ if (model === "gemini-2.5-flash-image" || model === "gemini-3-pro-image-preview") {
75
75
  const contents = getGeminiContents(prompt, aspectRatio, referenceImages);
76
76
  const response = await ai.models.generateContent({ model, contents });
77
77
  return geminiFlashResult(response);
@@ -17,7 +17,7 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
17
17
  const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
18
18
  const openai = new OpenAI({ apiKey, baseURL });
19
19
  const size = (() => {
20
- if (model === "gpt-image-1") {
20
+ if (model === "gpt-image-1" || model === "gpt-image-1-mini") {
21
21
  if (canvasSize.width > canvasSize.height) {
22
22
  return "1536x1024";
23
23
  }
@@ -46,7 +46,7 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
46
46
  n: 1,
47
47
  size,
48
48
  };
49
- if (model === "gpt-image-1") {
49
+ if (model === "gpt-image-1" || model === "gpt-image-1-mini") {
50
50
  imageOptions.moderation = moderation || "auto";
51
51
  imageOptions.background = "opaque";
52
52
  if (quality) {
@@ -11,6 +11,7 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
11
11
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
12
12
  import ttsOpenaiAgent from "./tts_openai_agent.js";
13
13
  import ttsGoogleAgent from "./tts_google_agent.js";
14
+ import ttsGeminiAgent from "./tts_gemini_agent.js";
14
15
  import validateSchemaAgent from "./validate_schema_agent.js";
15
16
  import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
16
17
  import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
@@ -19,4 +20,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
19
20
  import { textInputAgent } from "@graphai/input_agents";
20
21
  import { openAIAgent } from "@graphai/openai_agent";
21
22
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
22
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
23
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
@@ -11,6 +11,7 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
11
11
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
12
12
  import ttsOpenaiAgent from "./tts_openai_agent.js";
13
13
  import ttsGoogleAgent from "./tts_google_agent.js";
14
+ import ttsGeminiAgent from "./tts_gemini_agent.js";
14
15
  import validateSchemaAgent from "./validate_schema_agent.js";
15
16
  import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
16
17
  import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
@@ -20,4 +21,4 @@ import { textInputAgent } from "@graphai/input_agents";
20
21
  import { openAIAgent } from "@graphai/openai_agent";
21
22
  // import * as vanilla from "@graphai/vanilla";
22
23
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
23
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
24
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
@@ -43,7 +43,7 @@ export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
43
43
  },
44
44
  image: undefined,
45
45
  };
46
- if (model === "veo-3.0-generate-preview") {
46
+ if (model === "veo-3.0-generate-001" || model === "veo-3.1-generate-preview") {
47
47
  payload.config.durationSeconds = undefined;
48
48
  }
49
49
  if (imagePath) {
@@ -0,0 +1,5 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ import type { GoogleTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult } from "../types/agent.js";
3
+ export declare const ttsGeminiAgent: AgentFunction<GoogleTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs>;
4
+ declare const ttsGeminiAgentInfo: AgentFunctionInfo;
5
+ export default ttsGeminiAgentInfo;
@@ -0,0 +1,64 @@
1
+ import { GraphAILogger } from "graphai";
2
+ import { GoogleGenAI } from "@google/genai";
3
+ import { provider2TTSAgent } from "../utils/provider2agent.js";
4
+ import { apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
5
+ import { pcmToMp3 } from "../utils/ffmpeg_utils.js";
6
+ export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
7
+ const { text } = namedInputs;
8
+ const { voice, suppressError } = params;
9
+ const apiKey = config?.apiKey;
10
+ if (!apiKey) {
11
+ throw new Error("Google GenAI API key is required (GEMINI_API_KEY)", {
12
+ cause: apiKeyMissingError("ttsGeminiAgent", audioAction, "GEMINI_API_KEY"),
13
+ });
14
+ }
15
+ try {
16
+ const ai = new GoogleGenAI({ apiKey });
17
+ const response = await ai.models.generateContent({
18
+ model: "gemini-2.5-flash-preview-tts",
19
+ contents: [{ parts: [{ text }] }],
20
+ config: {
21
+ responseModalities: ["AUDIO"],
22
+ speechConfig: {
23
+ voiceConfig: {
24
+ prebuiltVoiceConfig: { voiceName: voice ?? provider2TTSAgent.gemini.defaultVoice },
25
+ },
26
+ },
27
+ },
28
+ });
29
+ const inlineData = response.candidates?.[0]?.content?.parts?.[0]?.inlineData;
30
+ const pcmBase64 = inlineData?.data;
31
+ const mimeType = inlineData?.mimeType;
32
+ if (!pcmBase64)
33
+ throw new Error("No audio data returned");
34
+ // Extract sample rate from mimeType (e.g., "audio/L16;codec=pcm;rate=24000")
35
+ const rateMatch = mimeType?.match(/rate=(\d+)/);
36
+ const sampleRate = rateMatch ? parseInt(rateMatch[1]) : 24000;
37
+ const rawPcm = Buffer.from(pcmBase64, "base64");
38
+ return { buffer: await pcmToMp3(rawPcm, sampleRate) };
39
+ }
40
+ catch (e) {
41
+ if (suppressError) {
42
+ return {
43
+ error: e,
44
+ };
45
+ }
46
+ GraphAILogger.info(e);
47
+ throw new Error("TTS Gemini Error", {
48
+ cause: agentGenerationError("ttsGeminiAgent", audioAction, audioFileTarget),
49
+ });
50
+ }
51
+ };
52
+ const ttsGeminiAgentInfo = {
53
+ name: "ttsGeminiAgent",
54
+ agent: ttsGeminiAgent,
55
+ mock: ttsGeminiAgent,
56
+ samples: [],
57
+ description: "Google Gemini TTS agent",
58
+ category: ["tts"],
59
+ author: "Receptron Team",
60
+ repository: "https://github.com/receptron/mulmocast-cli/",
61
+ license: "MIT",
62
+ environmentVariables: ["GEMINI_API_KEY"],
63
+ };
64
+ export default ttsGeminiAgentInfo;
@@ -16,7 +16,7 @@ export declare const builder: (yargs: Argv) => Argv<{
16
16
  } & {
17
17
  s: string;
18
18
  } & {
19
- llm: "mock" | "openai" | "anthropic" | "gemini" | "groq" | undefined;
19
+ llm: "mock" | "openai" | "gemini" | "anthropic" | "groq" | undefined;
20
20
  } & {
21
21
  llm_model: string | undefined;
22
22
  }>;
@@ -10,7 +10,7 @@ export declare const builder: (yargs: Argv) => Argv<{
10
10
  } & {
11
11
  beats_per_scene: number;
12
12
  } & {
13
- llm: "mock" | "openai" | "anthropic" | "gemini" | "groq" | undefined;
13
+ llm: "mock" | "openai" | "gemini" | "anthropic" | "groq" | undefined;
14
14
  } & {
15
15
  llm_model: string | undefined;
16
16
  } & {
@@ -39,6 +39,10 @@ export declare const promptTemplates: ({
39
39
  provider?: undefined;
40
40
  model?: undefined;
41
41
  };
42
+ movieParams: {
43
+ provider: string;
44
+ model?: undefined;
45
+ };
42
46
  soundEffectParams: {
43
47
  provider: string;
44
48
  };
@@ -58,7 +62,6 @@ export declare const promptTemplates: ({
58
62
  Teacher?: undefined;
59
63
  };
60
64
  };
61
- movieParams?: undefined;
62
65
  };
63
66
  scriptName: string;
64
67
  systemPrompt: string;
@@ -181,6 +184,10 @@ export declare const promptTemplates: ({
181
184
  style?: undefined;
182
185
  model?: undefined;
183
186
  };
187
+ movieParams: {
188
+ provider: string;
189
+ model?: undefined;
190
+ };
184
191
  soundEffectParams: {
185
192
  provider: string;
186
193
  };
@@ -200,7 +207,6 @@ export declare const promptTemplates: ({
200
207
  Teacher?: undefined;
201
208
  };
202
209
  };
203
- movieParams?: undefined;
204
210
  };
205
211
  scriptName: string;
206
212
  systemPrompt: string;
@@ -233,6 +239,10 @@ export declare const promptTemplates: ({
233
239
  provider?: undefined;
234
240
  model?: undefined;
235
241
  };
242
+ movieParams: {
243
+ provider: string;
244
+ model?: undefined;
245
+ };
236
246
  soundEffectParams: {
237
247
  provider: string;
238
248
  };
@@ -252,7 +262,6 @@ export declare const promptTemplates: ({
252
262
  Teacher?: undefined;
253
263
  };
254
264
  };
255
- movieParams?: undefined;
256
265
  };
257
266
  scriptName: string;
258
267
  systemPrompt: string;
@@ -298,6 +307,10 @@ export declare const promptTemplates: ({
298
307
  provider?: undefined;
299
308
  model?: undefined;
300
309
  };
310
+ movieParams: {
311
+ provider: string;
312
+ model?: undefined;
313
+ };
301
314
  soundEffectParams: {
302
315
  provider: string;
303
316
  };
@@ -317,7 +330,6 @@ export declare const promptTemplates: ({
317
330
  Teacher?: undefined;
318
331
  };
319
332
  };
320
- movieParams?: undefined;
321
333
  };
322
334
  scriptName: string;
323
335
  systemPrompt: string;
@@ -363,6 +375,10 @@ export declare const promptTemplates: ({
363
375
  provider: string;
364
376
  style: string;
365
377
  };
378
+ movieParams: {
379
+ provider: string;
380
+ model?: undefined;
381
+ };
366
382
  soundEffectParams: {
367
383
  provider: string;
368
384
  };
@@ -382,7 +398,6 @@ export declare const promptTemplates: ({
382
398
  Teacher?: undefined;
383
399
  };
384
400
  };
385
- movieParams?: undefined;
386
401
  };
387
402
  scriptName: string;
388
403
  systemPrompt: string;
@@ -434,6 +449,10 @@ export declare const promptTemplates: ({
434
449
  provider?: undefined;
435
450
  model?: undefined;
436
451
  };
452
+ movieParams: {
453
+ provider: string;
454
+ model?: undefined;
455
+ };
437
456
  soundEffectParams: {
438
457
  provider: string;
439
458
  };
@@ -453,7 +472,6 @@ export declare const promptTemplates: ({
453
472
  Teacher?: undefined;
454
473
  };
455
474
  };
456
- movieParams?: undefined;
457
475
  };
458
476
  scriptName: string;
459
477
  systemPrompt: string;
@@ -486,6 +504,10 @@ export declare const promptTemplates: ({
486
504
  provider?: undefined;
487
505
  model?: undefined;
488
506
  };
507
+ movieParams: {
508
+ provider: string;
509
+ model?: undefined;
510
+ };
489
511
  soundEffectParams: {
490
512
  provider: string;
491
513
  };
@@ -515,7 +537,6 @@ export declare const promptTemplates: ({
515
537
  Presenter?: undefined;
516
538
  };
517
539
  };
518
- movieParams?: undefined;
519
540
  };
520
541
  scriptName: string;
521
542
  systemPrompt: string;
@@ -548,6 +569,10 @@ export declare const promptTemplates: ({
548
569
  provider?: undefined;
549
570
  model?: undefined;
550
571
  };
572
+ movieParams: {
573
+ provider: string;
574
+ model?: undefined;
575
+ };
551
576
  soundEffectParams: {
552
577
  provider: string;
553
578
  };
@@ -567,7 +592,6 @@ export declare const promptTemplates: ({
567
592
  Teacher?: undefined;
568
593
  };
569
594
  };
570
- movieParams?: undefined;
571
595
  };
572
596
  scriptName: string;
573
597
  systemPrompt: string;
@@ -613,6 +637,10 @@ export declare const promptTemplates: ({
613
637
  provider?: undefined;
614
638
  model?: undefined;
615
639
  };
640
+ movieParams: {
641
+ provider: string;
642
+ model?: undefined;
643
+ };
616
644
  soundEffectParams: {
617
645
  provider: string;
618
646
  };
@@ -632,7 +660,6 @@ export declare const promptTemplates: ({
632
660
  Teacher?: undefined;
633
661
  };
634
662
  };
635
- movieParams?: undefined;
636
663
  };
637
664
  scriptName: string;
638
665
  systemPrompt: string;
@@ -668,6 +695,10 @@ export declare const promptTemplates: ({
668
695
  provider?: undefined;
669
696
  model?: undefined;
670
697
  };
698
+ movieParams: {
699
+ provider: string;
700
+ model?: undefined;
701
+ };
671
702
  soundEffectParams: {
672
703
  provider: string;
673
704
  };
@@ -687,7 +718,6 @@ export declare const promptTemplates: ({
687
718
  Teacher?: undefined;
688
719
  };
689
720
  };
690
- movieParams?: undefined;
691
721
  };
692
722
  scriptName: string;
693
723
  systemPrompt: string;
@@ -32,6 +32,9 @@ export const promptTemplates = [
32
32
  },
33
33
  style: "<style>AKIRA aesthetic.</style>",
34
34
  },
35
+ movieParams: {
36
+ provider: "replicate",
37
+ },
35
38
  soundEffectParams: {
36
39
  provider: "replicate",
37
40
  },
@@ -149,6 +152,9 @@ export const promptTemplates = [
149
152
  images: {},
150
153
  provider: "openai",
151
154
  },
155
+ movieParams: {
156
+ provider: "replicate",
157
+ },
152
158
  soundEffectParams: {
153
159
  provider: "replicate",
154
160
  },
@@ -192,6 +198,9 @@ export const promptTemplates = [
192
198
  imageParams: {
193
199
  style: "A hand-drawn style illustration with a warm, nostalgic atmosphere. The background is rich with natural scenery—lush forests, cloudy skies, and traditional Japanese architecture. Characters have expressive eyes, soft facial features, and are portrayed with gentle lighting and subtle shading. The color palette is muted yet vivid, using earthy tones and watercolor-like textures. The overall scene feels magical and peaceful, with a sense of quiet wonder and emotional depth, reminiscent of classic 1980s and 1990s Japanese animation.",
194
200
  },
201
+ movieParams: {
202
+ provider: "replicate",
203
+ },
195
204
  soundEffectParams: {
196
205
  provider: "replicate",
197
206
  },
@@ -242,6 +251,9 @@ export const promptTemplates = [
242
251
  imageParams: {
243
252
  style: "<style>A multi panel comic strips. 1990s American workplace humor. Clean, minimalist line art with muted colors. One character is a nerdy office worker with glasses</style>",
244
253
  },
254
+ movieParams: {
255
+ provider: "replicate",
256
+ },
245
257
  soundEffectParams: {
246
258
  provider: "replicate",
247
259
  },
@@ -285,6 +297,9 @@ export const promptTemplates = [
285
297
  imageParams: {
286
298
  style: "<style>Photo realistic and cinematic. Let the art convey the story and emotions without text. Use the image for the aspect ratio</style>",
287
299
  },
300
+ movieParams: {
301
+ provider: "replicate",
302
+ },
288
303
  soundEffectParams: {
289
304
  provider: "replicate",
290
305
  },
@@ -337,6 +352,9 @@ export const promptTemplates = [
337
352
  },
338
353
  style: "<style>Dragon Ball/Dr. Slump aesthetic.</style>",
339
354
  },
355
+ movieParams: {
356
+ provider: "replicate",
357
+ },
340
358
  soundEffectParams: {
341
359
  provider: "replicate",
342
360
  },
@@ -389,6 +407,9 @@ export const promptTemplates = [
389
407
  },
390
408
  style: "<style>Ghibli style</style>",
391
409
  },
410
+ movieParams: {
411
+ provider: "replicate",
412
+ },
392
413
  soundEffectParams: {
393
414
  provider: "replicate",
394
415
  },
@@ -439,10 +460,13 @@ export const promptTemplates = [
439
460
  type: "image",
440
461
  },
441
462
  },
442
- model: "gemini-2.5-flash-image-preview",
463
+ model: "gemini-2.5-flash-image",
443
464
  provider: "google",
444
465
  style: "<style>Ghibli style multi-panel comic strips in landscape mode. Use speech bubbles with short, natural dialogue (1–6 words). Keep text minimal, like real comics. Let the art convey the story and emotions. Use the input image as the presenter.</style>",
445
466
  },
467
+ movieParams: {
468
+ provider: "replicate",
469
+ },
446
470
  soundEffectParams: {
447
471
  provider: "replicate",
448
472
  },
@@ -502,6 +526,9 @@ export const promptTemplates = [
502
526
  },
503
527
  style: "<style>Ghost in the shell aesthetic.</style>",
504
528
  },
529
+ movieParams: {
530
+ provider: "replicate",
531
+ },
505
532
  soundEffectParams: {
506
533
  provider: "replicate",
507
534
  },
@@ -546,6 +573,9 @@ export const promptTemplates = [
546
573
  images: {},
547
574
  provider: "openai",
548
575
  },
576
+ movieParams: {
577
+ provider: "replicate",
578
+ },
549
579
  soundEffectParams: {
550
580
  provider: "replicate",
551
581
  },
@@ -590,6 +620,9 @@ export const promptTemplates = [
590
620
  images: {},
591
621
  provider: "openai",
592
622
  },
623
+ movieParams: {
624
+ provider: "replicate",
625
+ },
593
626
  soundEffectParams: {
594
627
  provider: "replicate",
595
628
  },
@@ -642,6 +675,9 @@ export const promptTemplates = [
642
675
  },
643
676
  style: "<style>One Piece aesthetic.</style>",
644
677
  },
678
+ movieParams: {
679
+ provider: "replicate",
680
+ },
645
681
  soundEffectParams: {
646
682
  provider: "replicate",
647
683
  },
@@ -694,6 +730,9 @@ export const promptTemplates = [
694
730
  },
695
731
  style: "<style>Photo realistic, cinematic.</style>",
696
732
  },
733
+ movieParams: {
734
+ provider: "replicate",
735
+ },
697
736
  soundEffectParams: {
698
737
  provider: "replicate",
699
738
  },
@@ -746,6 +785,9 @@ export const promptTemplates = [
746
785
  },
747
786
  style: "<style>Photo realistic, cinematic.</style>",
748
787
  },
788
+ movieParams: {
789
+ provider: "replicate",
790
+ },
749
791
  soundEffectParams: {
750
792
  provider: "replicate",
751
793
  },
@@ -789,6 +831,9 @@ export const promptTemplates = [
789
831
  imageParams: {
790
832
  style: "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>",
791
833
  },
834
+ movieParams: {
835
+ provider: "replicate",
836
+ },
792
837
  soundEffectParams: {
793
838
  provider: "replicate",
794
839
  },
@@ -845,6 +890,9 @@ export const promptTemplates = [
845
890
  imageParams: {
846
891
  style: "<style>Photo realistic, cinematic.</style>",
847
892
  },
893
+ movieParams: {
894
+ provider: "replicate",
895
+ },
848
896
  soundEffectParams: {
849
897
  provider: "replicate",
850
898
  },
@@ -898,6 +946,9 @@ export const promptTemplates = [
898
946
  },
899
947
  style: "<style>A dreamy, hyper-detailed anime style that blends photorealistic backgrounds with vibrant, saturated colors. The skies are often filled with luminous clouds, dazzling sunsets, or star-filled nights, rendered with a glowing, almost ethereal quality. Urban landscapes and rural scenery are meticulously illustrated, with attention to tiny details like reflections in puddles, neon lights, or the texture of grass swaying in the wind. Characters are drawn with soft, expressive features, standing out against the breathtaking environments, creating a sense of emotional depth and lyrical atmosphere. The overall mood is cinematic, romantic, and filled with a sense of fleeting beauty and longing.</style>",
900
948
  },
949
+ movieParams: {
950
+ provider: "replicate",
951
+ },
901
952
  soundEffectParams: {
902
953
  provider: "replicate",
903
954
  },
@@ -944,6 +995,9 @@ export const promptTemplates = [
944
995
  imageParams: {
945
996
  style: "<style>Photo realistic, cinematic.</style>",
946
997
  },
998
+ movieParams: {
999
+ provider: "replicate",
1000
+ },
947
1001
  soundEffectParams: {
948
1002
  provider: "replicate",
949
1003
  },
@@ -41,7 +41,7 @@ export const templateDataSet = {
41
41
  "```",
42
42
  ghibli_comic_strips: "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.\n" +
43
43
  "```JSON\n" +
44
- `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]"}],"canvasSize":{"width":1536,"height":1024},"imageParams":{"provider":"google","model":"gemini-2.5-flash-image-preview","style":"<style>Ghibli style multi-panel comic strips in landscape mode. Use speech bubbles with short, natural dialogue (1–6 words). Keep text minimal, like real comics. Let the art convey the story and emotions. Use the input image as the presenter.</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"}}}}}\n` +
44
+ `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]"}],"canvasSize":{"width":1536,"height":1024},"imageParams":{"provider":"google","model":"gemini-2.5-flash-image","style":"<style>Ghibli style multi-panel comic strips in landscape mode. Use speech bubbles with short, natural dialogue (1–6 words). Keep text minimal, like real comics. Let the art convey the story and emotions. Use the input image as the presenter.</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"}}}}}\n` +
45
45
  "```",
46
46
  ghost_comic: "Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.\n" +
47
47
  "```JSON\n" +
@@ -91,7 +91,7 @@ export const MulmoPresentationStyleMethods = {
91
91
  },
92
92
  getMovieAgentInfo(presentationStyle, beat) {
93
93
  const movieParams = { ...presentationStyle.movieParams, ...beat?.movieParams };
94
- const movieProvider = text2MovieProviderSchema.parse(movieParams?.provider);
94
+ const movieProvider = text2MovieProviderSchema.parse(movieParams?.provider ?? defaultProviders.text2movie);
95
95
  const agentInfo = provider2MovieAgent[movieProvider];
96
96
  return {
97
97
  agent: agentInfo.agentName,