mulmocast 2.0.2 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +22 -0
  2. package/assets/templates/ghibli_comic_strips.json +1 -1
  3. package/lib/actions/audio.js +2 -1
  4. package/lib/actions/bundle.js +5 -2
  5. package/lib/agents/image_genai_agent.js +1 -1
  6. package/lib/agents/image_openai_agent.js +2 -2
  7. package/lib/agents/index.d.ts +2 -1
  8. package/lib/agents/index.js +2 -1
  9. package/lib/agents/movie_genai_agent.js +1 -1
  10. package/lib/agents/tts_gemini_agent.d.ts +5 -0
  11. package/lib/agents/tts_gemini_agent.js +64 -0
  12. package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
  13. package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
  14. package/lib/data/promptTemplates.d.ts +40 -10
  15. package/lib/data/promptTemplates.js +55 -1
  16. package/lib/data/templateDataSet.js +1 -1
  17. package/lib/methods/mulmo_presentation_style.js +1 -1
  18. package/lib/types/schema.d.ts +30 -30
  19. package/lib/types/schema.js +4 -2
  20. package/lib/types/type.d.ts +2 -0
  21. package/lib/utils/context.d.ts +33 -33
  22. package/lib/utils/ffmpeg_utils.d.ts +1 -0
  23. package/lib/utils/ffmpeg_utils.js +23 -0
  24. package/lib/utils/provider2agent.d.ts +10 -1
  25. package/lib/utils/provider2agent.js +17 -8
  26. package/lib/utils/utils.js +3 -0
  27. package/package.json +7 -7
  28. package/scripts/test/gpt.json +6 -0
  29. package/scripts/test/image-2.png +0 -0
  30. package/scripts/test/test_audio_gemini.json +67 -0
  31. package/scripts/test/test_audio_gemini.json~ +67 -0
  32. package/scripts/test/test_genai.json +17 -9
  33. package/scripts/test/test_image_refs.json +1 -1
  34. package/scripts/test/test_lipsync2.json +66 -0
  35. package/scripts/test/test_lipsync2.json~ +24 -0
  36. package/scripts/test/test_replicate.json +6 -6
@@ -453,9 +453,9 @@ export declare const htmlPromptParamsSchema: z.ZodObject<{
453
453
  data: z.ZodOptional<z.ZodAny>;
454
454
  images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
455
455
  }, z.core.$strict>;
456
- export declare const text2MovieProviderSchema: z.ZodDefault<z.ZodEnum<{
456
+ export declare const text2MovieProviderSchema: z.ZodEnum<{
457
457
  [x: string]: string;
458
- }>>;
458
+ }>;
459
459
  export declare const text2SoundEffectProviderSchema: z.ZodDefault<z.ZodEnum<{
460
460
  [x: string]: string;
461
461
  }>>;
@@ -601,9 +601,9 @@ export declare const mulmoBeatSchema: z.ZodObject<{
601
601
  movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
602
602
  }, z.core.$strict>>;
603
603
  movieParams: z.ZodOptional<z.ZodObject<{
604
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
604
+ provider: z.ZodOptional<z.ZodEnum<{
605
605
  [x: string]: string;
606
- }>>>;
606
+ }>>;
607
607
  model: z.ZodOptional<z.ZodString>;
608
608
  fillOption: z.ZodOptional<z.ZodObject<{
609
609
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -681,9 +681,9 @@ export declare const mulmoTransitionSchema: z.ZodObject<{
681
681
  duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
682
682
  }, z.core.$strip>;
683
683
  export declare const mulmoMovieParamsSchema: z.ZodObject<{
684
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
684
+ provider: z.ZodOptional<z.ZodEnum<{
685
685
  [x: string]: string;
686
- }>>>;
686
+ }>>;
687
687
  model: z.ZodOptional<z.ZodString>;
688
688
  fillOption: z.ZodOptional<z.ZodObject<{
689
689
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -761,10 +761,10 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
761
761
  prompt: z.ZodString;
762
762
  }, z.core.$strict>]>>>;
763
763
  }, z.core.$strict>>>;
764
- movieParams: z.ZodOptional<z.ZodObject<{
765
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
764
+ movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
765
+ provider: z.ZodOptional<z.ZodEnum<{
766
766
  [x: string]: string;
767
- }>>>;
767
+ }>>;
768
768
  model: z.ZodOptional<z.ZodString>;
769
769
  fillOption: z.ZodOptional<z.ZodObject<{
770
770
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -779,7 +779,7 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
779
779
  }>;
780
780
  duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
781
781
  }, z.core.$strip>>;
782
- }, z.core.$strict>>;
782
+ }, z.core.$strict>>>;
783
783
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
784
784
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
785
785
  [x: string]: string;
@@ -897,10 +897,10 @@ export declare const mulmoScriptSchema: z.ZodObject<{
897
897
  prompt: z.ZodString;
898
898
  }, z.core.$strict>]>>>;
899
899
  }, z.core.$strict>>>;
900
- movieParams: z.ZodOptional<z.ZodObject<{
901
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
900
+ movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
901
+ provider: z.ZodOptional<z.ZodEnum<{
902
902
  [x: string]: string;
903
- }>>>;
903
+ }>>;
904
904
  model: z.ZodOptional<z.ZodString>;
905
905
  fillOption: z.ZodOptional<z.ZodObject<{
906
906
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -915,7 +915,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
915
915
  }>;
916
916
  duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
917
917
  }, z.core.$strip>>;
918
- }, z.core.$strict>>;
918
+ }, z.core.$strict>>>;
919
919
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
920
920
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
921
921
  [x: string]: string;
@@ -1105,9 +1105,9 @@ export declare const mulmoScriptSchema: z.ZodObject<{
1105
1105
  movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
1106
1106
  }, z.core.$strict>>;
1107
1107
  movieParams: z.ZodOptional<z.ZodObject<{
1108
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1108
+ provider: z.ZodOptional<z.ZodEnum<{
1109
1109
  [x: string]: string;
1110
- }>>>;
1110
+ }>>;
1111
1111
  model: z.ZodOptional<z.ZodString>;
1112
1112
  fillOption: z.ZodOptional<z.ZodObject<{
1113
1113
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -1309,10 +1309,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
1309
1309
  prompt: z.ZodString;
1310
1310
  }, z.core.$strict>]>>>;
1311
1311
  }, z.core.$strict>>>;
1312
- movieParams: z.ZodOptional<z.ZodObject<{
1313
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1312
+ movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
1313
+ provider: z.ZodOptional<z.ZodEnum<{
1314
1314
  [x: string]: string;
1315
- }>>>;
1315
+ }>>;
1316
1316
  model: z.ZodOptional<z.ZodString>;
1317
1317
  fillOption: z.ZodOptional<z.ZodObject<{
1318
1318
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -1327,7 +1327,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
1327
1327
  }>;
1328
1328
  duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
1329
1329
  }, z.core.$strip>>;
1330
- }, z.core.$strict>>;
1330
+ }, z.core.$strict>>>;
1331
1331
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
1332
1332
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1333
1333
  [x: string]: string;
@@ -1517,9 +1517,9 @@ export declare const mulmoStudioSchema: z.ZodObject<{
1517
1517
  movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
1518
1518
  }, z.core.$strict>>;
1519
1519
  movieParams: z.ZodOptional<z.ZodObject<{
1520
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1520
+ provider: z.ZodOptional<z.ZodEnum<{
1521
1521
  [x: string]: string;
1522
- }>>>;
1522
+ }>>;
1523
1523
  model: z.ZodOptional<z.ZodString>;
1524
1524
  fillOption: z.ZodOptional<z.ZodObject<{
1525
1525
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -1657,10 +1657,10 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
1657
1657
  prompt: z.ZodString;
1658
1658
  }, z.core.$strict>]>>>;
1659
1659
  }, z.core.$strict>>>;
1660
- movieParams: z.ZodOptional<z.ZodObject<{
1661
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1660
+ movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
1661
+ provider: z.ZodOptional<z.ZodEnum<{
1662
1662
  [x: string]: string;
1663
- }>>>;
1663
+ }>>;
1664
1664
  model: z.ZodOptional<z.ZodString>;
1665
1665
  fillOption: z.ZodOptional<z.ZodObject<{
1666
1666
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -1675,7 +1675,7 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
1675
1675
  }>;
1676
1676
  duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
1677
1677
  }, z.core.$strip>>;
1678
- }, z.core.$strict>>;
1678
+ }, z.core.$strict>>>;
1679
1679
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
1680
1680
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1681
1681
  [x: string]: string;
@@ -1787,10 +1787,10 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
1787
1787
  prompt: z.ZodString;
1788
1788
  }, z.core.$strict>]>>>;
1789
1789
  }, z.core.$strict>>>;
1790
- movieParams: z.ZodOptional<z.ZodObject<{
1791
- provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1790
+ movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
1791
+ provider: z.ZodOptional<z.ZodEnum<{
1792
1792
  [x: string]: string;
1793
- }>>>;
1793
+ }>>;
1794
1794
  model: z.ZodOptional<z.ZodString>;
1795
1795
  fillOption: z.ZodOptional<z.ZodObject<{
1796
1796
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -1805,7 +1805,7 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
1805
1805
  }>;
1806
1806
  duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
1807
1807
  }, z.core.$strip>>;
1808
- }, z.core.$strict>>;
1808
+ }, z.core.$strict>>>;
1809
1809
  soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
1810
1810
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1811
1811
  [x: string]: string;
@@ -268,7 +268,7 @@ export const htmlPromptParamsSchema = z
268
268
  images: z.record(z.string(), z.any()).optional(),
269
269
  })
270
270
  .strict();
271
- export const text2MovieProviderSchema = z.enum(Object.keys(provider2MovieAgent)).default(defaultProviders.text2movie);
271
+ export const text2MovieProviderSchema = z.enum(Object.keys(provider2MovieAgent));
272
272
  export const text2SoundEffectProviderSchema = z.enum(Object.keys(provider2SoundEffectAgent)).default(defaultProviders.soundEffect);
273
273
  export const mulmoSoundEffectParamsSchema = z.object({
274
274
  provider: text2SoundEffectProviderSchema.optional(),
@@ -359,7 +359,9 @@ export const mulmoPresentationStyleSchema = z.object({
359
359
  provider: defaultProviders.text2image,
360
360
  images: {},
361
361
  }),
362
- movieParams: mulmoMovieParamsSchema.optional(),
362
+ movieParams: mulmoMovieParamsSchema.optional().default({
363
+ provider: defaultProviders.text2movie,
364
+ }),
363
365
  soundEffectParams: mulmoSoundEffectParamsSchema.optional().default({
364
366
  provider: defaultProviders.soundEffect,
365
367
  }),
@@ -139,6 +139,8 @@ export type ImageType = "image" | "movie";
139
139
  export type MulmoViewerBeat = {
140
140
  text?: string;
141
141
  duration?: number;
142
+ startTime?: number;
143
+ endTime?: number;
142
144
  multiLinguals?: Record<string, string>;
143
145
  audioSources?: Record<string, string>;
144
146
  imageSource?: string;
@@ -56,6 +56,17 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
56
56
  prompt: string;
57
57
  }> | undefined;
58
58
  };
59
+ movieParams: {
60
+ provider?: string | undefined;
61
+ model?: string | undefined;
62
+ fillOption?: {
63
+ style: "aspectFit" | "aspectFill";
64
+ } | undefined;
65
+ transition?: {
66
+ type: "fade" | "slideout_left";
67
+ duration: number;
68
+ } | undefined;
69
+ };
59
70
  soundEffectParams: {
60
71
  provider?: string | undefined;
61
72
  model?: string | undefined;
@@ -252,17 +263,6 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
252
263
  enableLipSync?: boolean | undefined;
253
264
  hidden?: boolean | undefined;
254
265
  }[];
255
- movieParams?: {
256
- provider?: string | undefined;
257
- model?: string | undefined;
258
- fillOption?: {
259
- style: "aspectFit" | "aspectFill";
260
- } | undefined;
261
- transition?: {
262
- type: "fade" | "slideout_left";
263
- duration: number;
264
- } | undefined;
265
- } | undefined;
266
266
  lipSyncParams?: {
267
267
  provider?: string | undefined;
268
268
  model?: string | undefined;
@@ -371,6 +371,17 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
371
371
  prompt: string;
372
372
  }> | undefined;
373
373
  };
374
+ movieParams: {
375
+ provider?: string | undefined;
376
+ model?: string | undefined;
377
+ fillOption?: {
378
+ style: "aspectFit" | "aspectFill";
379
+ } | undefined;
380
+ transition?: {
381
+ type: "fade" | "slideout_left";
382
+ duration: number;
383
+ } | undefined;
384
+ };
374
385
  soundEffectParams: {
375
386
  provider?: string | undefined;
376
387
  model?: string | undefined;
@@ -567,17 +578,6 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
567
578
  enableLipSync?: boolean | undefined;
568
579
  hidden?: boolean | undefined;
569
580
  }[];
570
- movieParams?: {
571
- provider?: string | undefined;
572
- model?: string | undefined;
573
- fillOption?: {
574
- style: "aspectFit" | "aspectFill";
575
- } | undefined;
576
- transition?: {
577
- type: "fade" | "slideout_left";
578
- duration: number;
579
- } | undefined;
580
- } | undefined;
581
581
  lipSyncParams?: {
582
582
  provider?: string | undefined;
583
583
  model?: string | undefined;
@@ -693,6 +693,17 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
693
693
  prompt: string;
694
694
  }> | undefined;
695
695
  };
696
+ movieParams: {
697
+ provider?: string | undefined;
698
+ model?: string | undefined;
699
+ fillOption?: {
700
+ style: "aspectFit" | "aspectFill";
701
+ } | undefined;
702
+ transition?: {
703
+ type: "fade" | "slideout_left";
704
+ duration: number;
705
+ } | undefined;
706
+ };
696
707
  soundEffectParams: {
697
708
  provider?: string | undefined;
698
709
  model?: string | undefined;
@@ -716,17 +727,6 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
716
727
  path: string;
717
728
  } | undefined;
718
729
  };
719
- movieParams?: {
720
- provider?: string | undefined;
721
- model?: string | undefined;
722
- fillOption?: {
723
- style: "aspectFit" | "aspectFill";
724
- } | undefined;
725
- transition?: {
726
- type: "fade" | "slideout_left";
727
- duration: number;
728
- } | undefined;
729
- } | undefined;
730
730
  lipSyncParams?: {
731
731
  provider?: string | undefined;
732
732
  model?: string | undefined;
@@ -18,3 +18,4 @@ export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<{
18
18
  export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<object>;
19
19
  export declare const trimMusic: (inputFile: string, startTime: number, duration: number) => Promise<Buffer>;
20
20
  export declare const createSilentAudio: (filePath: string, durationSec: number) => Promise<void>;
21
+ export declare const pcmToMp3: (rawPcm: Buffer, sampleRate?: number) => Promise<Buffer>;
@@ -2,6 +2,7 @@ import ffmpeg from "fluent-ffmpeg";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import { isFile } from "./file.js";
4
4
  import fs from "fs";
5
+ import { Readable, PassThrough } from "node:stream";
5
6
  export const setFfmpegPath = (ffmpegPath) => {
6
7
  ffmpeg.setFfmpegPath(ffmpegPath);
7
8
  };
@@ -144,3 +145,25 @@ export const createSilentAudio = (filePath, durationSec) => {
144
145
  .run();
145
146
  });
146
147
  };
148
+ export const pcmToMp3 = (rawPcm, sampleRate = 24000) => {
149
+ return new Promise((resolve, reject) => {
150
+ const inputStream = new Readable({
151
+ read() {
152
+ this.push(rawPcm);
153
+ this.push(null);
154
+ },
155
+ });
156
+ const outputChunks = [];
157
+ const outputStream = new PassThrough();
158
+ outputStream.on("data", (chunk) => outputChunks.push(chunk));
159
+ outputStream.on("end", () => resolve(Buffer.concat(outputChunks)));
160
+ outputStream.on("error", reject);
161
+ ffmpeg(inputStream)
162
+ .inputFormat("s16le")
163
+ .inputOptions([`-ar ${sampleRate}`, "-ac 1"])
164
+ .audioCodec("libmp3lame")
165
+ .format("mp3")
166
+ .on("error", reject)
167
+ .pipe(outputStream);
168
+ });
169
+ };
@@ -16,6 +16,12 @@ export declare const provider2TTSAgent: {
16
16
  hasLimitedConcurrency: boolean;
17
17
  keyName: string;
18
18
  };
19
+ gemini: {
20
+ agentName: string;
21
+ hasLimitedConcurrency: boolean;
22
+ defaultVoice: string;
23
+ keyName: string;
24
+ };
19
25
  elevenlabs: {
20
26
  agentName: string;
21
27
  hasLimitedConcurrency: boolean;
@@ -76,7 +82,10 @@ export declare const provider2MovieAgent: {
76
82
  models: string[];
77
83
  keyName: string;
78
84
  modelParams: {
79
- "veo-3.0-generate-preview": {
85
+ "veo-3.1-generate-preview": {
86
+ durations: number[];
87
+ };
88
+ "veo-3.0-generate-001": {
80
89
  durations: number[];
81
90
  };
82
91
  "veo-2.0-generate-001": {
@@ -17,6 +17,12 @@ export const provider2TTSAgent = {
17
17
  hasLimitedConcurrency: false,
18
18
  keyName: "GEMINI_API_KEY",
19
19
  },
20
+ gemini: {
21
+ agentName: "ttsGeminiAgent",
22
+ hasLimitedConcurrency: false,
23
+ defaultVoice: "Kore",
24
+ keyName: "GEMINI_API_KEY",
25
+ },
20
26
  elevenlabs: {
21
27
  agentName: "ttsElevenlabsAgent",
22
28
  hasLimitedConcurrency: true,
@@ -42,8 +48,8 @@ export const provider2ImageAgent = {
42
48
  },
43
49
  google: {
44
50
  agentName: "imageGenAIAgent",
45
- defaultModel: "gemini-2.5-flash-image-preview",
46
- models: ["imagen-3.0-generate-002", "imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image-preview"],
51
+ defaultModel: "gemini-2.5-flash-image",
52
+ models: ["imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
47
53
  keyName: "GEMINI_API_KEY",
48
54
  },
49
55
  replicate: {
@@ -77,8 +83,8 @@ export const provider2MovieAgent = {
77
83
  "minimax/hailuo-02",
78
84
  "minimax/hailuo-02-fast",
79
85
  "pixverse/pixverse-v4.5",
80
- "wan-video/wan-2.2-i2v-480p-fast",
81
- "wan-video/wan-2.2-t2v-480p-fast",
86
+ "wan-video/wan-2.2-i2v-fast",
87
+ "wan-video/wan-2.2-t2v-fast",
82
88
  ],
83
89
  modelParams: {
84
90
  "bytedance/seedance-1-lite": {
@@ -144,12 +150,12 @@ export const provider2MovieAgent = {
144
150
  last_image: "last_frame_image",
145
151
  price_per_sec: 0.12,
146
152
  },
147
- "wan-video/wan-2.2-i2v-480p-fast": {
153
+ "wan-video/wan-2.2-i2v-fast": {
148
154
  durations: [5],
149
155
  start_image: "image",
150
156
  price_per_sec: 0.012,
151
157
  },
152
- "wan-video/wan-2.2-t2v-480p-fast": {
158
+ "wan-video/wan-2.2-t2v-fast": {
153
159
  durations: [5],
154
160
  start_image: undefined,
155
161
  price_per_sec: 0.012,
@@ -159,10 +165,13 @@ export const provider2MovieAgent = {
159
165
  google: {
160
166
  agentName: "movieGenAIAgent",
161
167
  defaultModel: "veo-2.0-generate-001",
162
- models: ["veo-2.0-generate-001", "veo-3.0-generate-preview"],
168
+ models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
163
169
  keyName: "GEMINI_API_KEY",
164
170
  modelParams: {
165
- "veo-3.0-generate-preview": {
171
+ "veo-3.1-generate-preview": {
172
+ durations: [4, 6, 8],
173
+ },
174
+ "veo-3.0-generate-001": {
166
175
  durations: [4, 6, 8],
167
176
  },
168
177
  "veo-2.0-generate-001": {
@@ -73,6 +73,9 @@ export const settings2GraphAIConfig = (settings, env) => {
73
73
  ttsNijivoiceAgent: {
74
74
  apiKey: getKey("TTS", "NIJIVOICE_API_KEY"),
75
75
  },
76
+ ttsGeminiAgent: {
77
+ apiKey: getKey("TTS", "GEMINI_API_KEY"),
78
+ },
76
79
  ttsElevenlabsAgent: {
77
80
  apiKey: getKey("TTS", "ELEVENLABS_API_KEY"),
78
81
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.0.2",
3
+ "version": "2.0.4",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -74,7 +74,7 @@
74
74
  "homepage": "https://github.com/receptron/mulmocast-cli#readme",
75
75
  "dependencies": {
76
76
  "@google-cloud/text-to-speech": "^6.4.0",
77
- "@google/genai": "^1.29.1",
77
+ "@google/genai": "^1.30.0",
78
78
  "@graphai/anthropic_agent": "^2.0.11",
79
79
  "@graphai/browserless_agent": "^2.0.1",
80
80
  "@graphai/gemini_agent": "^2.0.1",
@@ -95,14 +95,14 @@
95
95
  "fluent-ffmpeg": "^2.1.3",
96
96
  "graphai": "^2.0.16",
97
97
  "jsdom": "^27.2.0",
98
- "marked": "^17.0.0",
98
+ "marked": "^17.0.1",
99
99
  "mulmocast-vision": "^1.0.4",
100
100
  "ora": "^9.0.0",
101
- "puppeteer": "^24.30.0",
102
- "replicate": "^1.3.1",
101
+ "puppeteer": "^24.31.0",
102
+ "replicate": "^1.4.0",
103
103
  "yaml": "^2.8.1",
104
104
  "yargs": "^18.0.0",
105
- "zod": "^4.1.12"
105
+ "zod": "^4.1.13"
106
106
  },
107
107
  "devDependencies": {
108
108
  "@receptron/test_utils": "^2.0.3",
@@ -117,7 +117,7 @@
117
117
  "prettier": "^3.6.2",
118
118
  "tsx": "^4.20.6",
119
119
  "typescript": "^5.9.3",
120
- "typescript-eslint": "^8.46.4"
120
+ "typescript-eslint": "^8.48.0"
121
121
  },
122
122
  "engines": {
123
123
  "node": ">=20.0.0"
@@ -21,11 +21,17 @@
21
21
  }
22
22
  },
23
23
  "beats": [
24
+ {
25
+ "speaker": "Host",
26
+ "text": "How are you?",
27
+ "imagePrompt": "A witch in Harajuku"
28
+ },
24
29
  {
25
30
  "speaker": "Host",
26
31
  "text": "How are you?",
27
32
  "imagePrompt": "A witch in Harajuku",
28
33
  "imageParams": {
34
+ "model": "gpt-image-1-mini",
29
35
  "style": "Ukiyoe-style"
30
36
  }
31
37
  }
Binary file
@@ -0,0 +1,67 @@
1
+ {
2
+ "$mulmocast": {
3
+ "version": "1.1"
4
+ },
5
+ "lang": "en",
6
+ "title": "Audio Instructions Test",
7
+ "speechParams": {
8
+ "speakers": {
9
+ "Presenter": {
10
+ "provider": "gemini",
11
+ "voiceId": "Kore"
12
+ },
13
+ "Presenter2": {
14
+ "provider": "gemini",
15
+ "voiceId": "Puck"
16
+ }
17
+ }
18
+ },
19
+ "beats": [
20
+ {
21
+ "speaker": "Presenter",
22
+ "text": "Hello, I'm a presenter. I have no instructions.",
23
+ "image": {
24
+ "type": "textSlide",
25
+ "slide": {
26
+ "title": "Presenter"
27
+ }
28
+ }
29
+ },
30
+ {
31
+ "speaker": "Presenter2",
32
+ "text": "Hello, I'm a presenter 2. My instructions are 'Speak in a cheerful and positive tone'.",
33
+ "image": {
34
+ "type": "textSlide",
35
+ "slide": {
36
+ "title": "Presenter 2"
37
+ }
38
+ }
39
+ },
40
+ {
41
+ "speaker": "Presenter",
42
+ "text": "Hello, I'm a presenter. I have a British English instruction.",
43
+ "speechOptions": {
44
+ "instruction": "Speak in British English."
45
+ },
46
+ "image": {
47
+ "type": "textSlide",
48
+ "slide": {
49
+ "title": "Presenter with British English instruction"
50
+ }
51
+ }
52
+ },
53
+ {
54
+ "speaker": "Presenter",
55
+ "text": "Hello, I'm a presenter. I have a whisper instruction.",
56
+ "speechOptions": {
57
+ "instruction": "Whisper softly, like a pillow talk."
58
+ },
59
+ "image": {
60
+ "type": "textSlide",
61
+ "slide": {
62
+ "title": "Presenter with whisper instruction"
63
+ }
64
+ }
65
+ }
66
+ ]
67
+ }
@@ -0,0 +1,67 @@
1
+ {
2
+ "$mulmocast": {
3
+ "version": "1.1"
4
+ },
5
+ "lang": "en",
6
+ "title": "Audio Instructions Test",
7
+ "speechParams": {
8
+ "speakers": {
9
+ "Presenter": {
10
+ "provider": "google",
11
+ "voiceId": "ja-JP-Standard-A"
12
+ },
13
+ "Presenter2": {
14
+ "provider": "google",
15
+ "voiceId": "ja-JP-Standard-B"
16
+ }
17
+ }
18
+ },
19
+ "beats": [
20
+ {
21
+ "speaker": "Presenter",
22
+ "text": "Hello, I'm a presenter. I have no instructions.",
23
+ "image": {
24
+ "type": "textSlide",
25
+ "slide": {
26
+ "title": "Presenter"
27
+ }
28
+ }
29
+ },
30
+ {
31
+ "speaker": "Presenter2",
32
+ "text": "Hello, I'm a presenter 2. My instructions are 'Speak in a cheerful and positive tone'.",
33
+ "image": {
34
+ "type": "textSlide",
35
+ "slide": {
36
+ "title": "Presenter 2"
37
+ }
38
+ }
39
+ },
40
+ {
41
+ "speaker": "Presenter",
42
+ "text": "Hello, I'm a presenter. I have a British English instruction.",
43
+ "speechOptions": {
44
+ "instruction": "Speak in British English."
45
+ },
46
+ "image": {
47
+ "type": "textSlide",
48
+ "slide": {
49
+ "title": "Presenter with British English instruction"
50
+ }
51
+ }
52
+ },
53
+ {
54
+ "speaker": "Presenter",
55
+ "text": "Hello, I'm a presenter. I have a whisper instruction.",
56
+ "speechOptions": {
57
+ "instruction": "Whisper softly, like a pillow talk."
58
+ },
59
+ "image": {
60
+ "type": "textSlide",
61
+ "slide": {
62
+ "title": "Presenter with whisper instruction"
63
+ }
64
+ }
65
+ }
66
+ ]
67
+ }