mulmocast 2.6.1 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -234,6 +234,16 @@ MulmoAnimation.prototype._applyCoverBaseStyle = function (el, iw, ih) {
234
234
  el.style.height = ih + "px";
235
235
  };
236
236
 
237
+ /**
238
+ * Render all animations at their final state (last frame).
239
+ * Used for generating static images (PDF, thumbnails) from animated content.
240
+ * @param {number} fps - frames per second
241
+ */
242
+ MulmoAnimation.prototype.renderFinal = function (fps) {
243
+ const lastFrame = Math.max(0, window.__MULMO.totalFrames - 1);
244
+ this.update(lastFrame, fps);
245
+ };
246
+
237
247
  /**
238
248
  * Update all registered animations for the given frame.
239
249
  * @param {number} frame - current frame number
@@ -25,6 +25,20 @@ if (typeof window.render === "function") {
25
25
  }
26
26
  }
27
27
 
28
+ /**
29
+ * Render the final frame of the animation (all content fully visible).
30
+ * Used by Puppeteer to capture a static image for PDF/thumbnail generation.
31
+ * Returns a Promise (or value) from the render function.
32
+ */
33
+ window.renderFinal = function () {
34
+ const mulmo = window.__MULMO;
35
+ const lastFrame = Math.max(0, (mulmo.totalFrames || 0) - 1);
36
+ mulmo.frame = lastFrame;
37
+ if (typeof window.render === "function") {
38
+ return window.render(lastFrame, mulmo.totalFrames, mulmo.fps);
39
+ }
40
+ };
41
+
28
42
  /**
29
43
  * Play animation in real-time using requestAnimationFrame.
30
44
  * Returns a Promise that resolves when all frames have been rendered.
@@ -80,12 +80,12 @@ export const imagePreprocessAgent = async (namedInputs) => {
80
80
  if (isAnimatedHtml) {
81
81
  const animatedVideoPath = getBeatAnimatedVideoPath(context, index);
82
82
  // ImagePluginPreprocessAgentResponse
83
+ // imageFromMovie is false: the plugin generates both the .mp4 video AND
84
+ // a high-quality final-frame PNG directly from HTML (better than extracting from compressed video).
83
85
  return {
84
86
  ...returnValue,
85
- imagePath, // for thumbnail extraction
87
+ imagePath, // static final-frame PNG (generated by the plugin)
86
88
  movieFile: animatedVideoPath, // .mp4 path for the pipeline
87
- imageFromMovie: true, // triggers extractImageFromMovie
88
- useLastFrame: true, // extract last frame for PDF/static (animation complete state)
89
89
  referenceImageForMovie: pluginPath,
90
90
  markdown,
91
91
  html,
@@ -1,4 +1,4 @@
1
- import { MulmoStudioContext, MulmoImagePromptMedia } from "../types/index.js";
1
+ import { MulmoStudioContext, MulmoBeat, MulmoImagePromptMedia } from "../types/index.js";
2
2
  export declare const generateReferenceImage: (inputs: {
3
3
  context: MulmoStudioContext;
4
4
  key: string;
@@ -11,5 +11,12 @@ export type MediaRefs = {
11
11
  movieRefs: Record<string, string>;
12
12
  };
13
13
  export declare const getMediaRefs: (context: MulmoStudioContext) => Promise<MediaRefs>;
14
+ export declare const resolveBeatLocalRefs: (namedInputs: {
15
+ context: MulmoStudioContext;
16
+ beat: MulmoBeat;
17
+ index: number;
18
+ imageRefs: Record<string, string>;
19
+ movieRefs: Record<string, string>;
20
+ }) => Promise<MediaRefs>;
14
21
  /** @deprecated Use getMediaRefs instead */
15
22
  export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
@@ -2,8 +2,8 @@ import { GraphAI, GraphAILogger } from "graphai";
2
2
  import { getReferenceImagePath } from "../utils/file.js";
3
3
  import { graphOption } from "./images.js";
4
4
  import { MulmoPresentationStyleMethods, MulmoMediaSourceMethods } from "../methods/index.js";
5
- import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent } from "../agents/index.js";
6
- import { agentGenerationError, imageReferenceAction, imageFileTarget } from "../utils/error_cause.js";
5
+ import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent, imageReplicateAgent, movieGenAIAgent, movieReplicateAgent } from "../agents/index.js";
6
+ import { agentGenerationError, imageReferenceAction, imageFileTarget, movieFileTarget } from "../utils/error_cause.js";
7
7
  // public api
8
8
  // Application may call this function directly to generate reference image.
9
9
  export const generateReferenceImage = async (inputs) => {
@@ -77,6 +77,105 @@ export const getMediaRefs = async (context) => {
77
77
  const resolveMovieReference = async (movie, context, key) => {
78
78
  return MulmoMediaSourceMethods.imageReference(movie.source, context, key);
79
79
  };
80
+ const generateReferenceMovie = async (inputs) => {
81
+ const { context, key, index, moviePrompt, imagePath } = inputs;
82
+ const moviePath = getReferenceImagePath(context, key, "mp4");
83
+ const movieAgentInfo = MulmoPresentationStyleMethods.getMovieAgentInfo(context.presentationStyle);
84
+ GraphAILogger.info(`Generating reference movie for ${key}: ${moviePrompt.prompt}`);
85
+ const movie_graph_data = {
86
+ version: 0.5,
87
+ nodes: {
88
+ movieGenerator: {
89
+ agent: movieAgentInfo.agent,
90
+ inputs: {
91
+ media: "movie",
92
+ prompt: moviePrompt.prompt,
93
+ imagePath: imagePath ?? null,
94
+ movieFile: moviePath,
95
+ cache: {
96
+ force: [context.force],
97
+ file: moviePath,
98
+ index,
99
+ id: key,
100
+ mulmoContext: context,
101
+ sessionType: "imageReference",
102
+ },
103
+ },
104
+ params: {
105
+ model: movieAgentInfo.movieParams.model,
106
+ canvasSize: context.presentationStyle.canvasSize,
107
+ },
108
+ },
109
+ },
110
+ };
111
+ try {
112
+ const options = await graphOption(context);
113
+ const graph = new GraphAI(movie_graph_data, { movieGenAIAgent, movieReplicateAgent, mediaMockAgent }, options);
114
+ await graph.run();
115
+ return moviePath;
116
+ }
117
+ catch (error) {
118
+ GraphAILogger.error(error);
119
+ throw new Error(`generateReferenceMovie: generate error: key=${key}`, {
120
+ cause: agentGenerationError(movieAgentInfo.agent, imageReferenceAction, movieFileTarget),
121
+ });
122
+ }
123
+ };
124
+ const resolveLocalRefs = async (context, images, beatIndex, globalImageRefs) => {
125
+ const localImageRefs = {};
126
+ const localMovieRefs = {};
127
+ // Stage 1: image, imagePrompt, movie (parallel)
128
+ await Promise.all(Object.keys(images)
129
+ .sort()
130
+ .map(async (key, i) => {
131
+ const entry = images[key];
132
+ if (entry.type === "imagePrompt") {
133
+ localImageRefs[key] = await generateReferenceImage({
134
+ context,
135
+ key,
136
+ index: beatIndex * 100 + i,
137
+ image: entry,
138
+ });
139
+ }
140
+ else if (entry.type === "image") {
141
+ localImageRefs[key] = await MulmoMediaSourceMethods.imageReference(entry.source, context, key);
142
+ }
143
+ else if (entry.type === "movie") {
144
+ localMovieRefs[key] = await resolveMovieReference(entry, context, key);
145
+ }
146
+ }));
147
+ // Stage 2: moviePrompt (imageName references imageRefs only)
148
+ const combinedImageRefs = { ...globalImageRefs, ...localImageRefs };
149
+ await Promise.all(Object.keys(images)
150
+ .sort()
151
+ .map(async (key, i) => {
152
+ const entry = images[key];
153
+ if (entry.type === "moviePrompt") {
154
+ const mp = entry;
155
+ const refImagePath = mp.imageName ? combinedImageRefs[mp.imageName] : undefined;
156
+ localMovieRefs[key] = await generateReferenceMovie({
157
+ context,
158
+ key,
159
+ index: beatIndex * 100 + i,
160
+ moviePrompt: mp,
161
+ imagePath: refImagePath,
162
+ });
163
+ }
164
+ }));
165
+ return { localImageRefs, localMovieRefs };
166
+ };
167
+ export const resolveBeatLocalRefs = async (namedInputs) => {
168
+ const { context, beat, index, imageRefs, movieRefs } = namedInputs;
169
+ const images = beat.images;
170
+ if (!images) {
171
+ return { imageRefs, movieRefs };
172
+ }
173
+ const { localImageRefs, localMovieRefs } = await resolveLocalRefs(context, images, index, imageRefs);
174
+ return {
175
+ imageRefs: { ...imageRefs, ...localImageRefs },
176
+ movieRefs: { ...movieRefs, ...localMovieRefs },
177
+ };
178
+ };
80
179
  /** @deprecated Use getMediaRefs instead */
81
180
  export const getImageRefs = async (context) => {
82
181
  const { imageRefs } = await getMediaRefs(context);
@@ -25,6 +25,22 @@ export declare const beat_graph_data: {
25
25
  withBackup: {
26
26
  value: boolean;
27
27
  };
28
+ localRefs: {
29
+ agent: (namedInputs: {
30
+ context: MulmoStudioContext;
31
+ beat: import("../types/type.js").MulmoBeat;
32
+ index: number;
33
+ imageRefs: Record<string, string>;
34
+ movieRefs: Record<string, string>;
35
+ }) => Promise<import("./image_references.js").MediaRefs>;
36
+ inputs: {
37
+ context: string;
38
+ beat: string;
39
+ index: string;
40
+ imageRefs: string;
41
+ movieRefs: string;
42
+ };
43
+ };
28
44
  preprocessor: {
29
45
  agent: (namedInputs: {
30
46
  context: MulmoStudioContext;
@@ -14,7 +14,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
14
14
  import { settings2GraphAIConfig } from "../utils/utils.js";
15
15
  import { audioCheckerError } from "../utils/error_cause.js";
16
16
  import { extractImageFromMovie, ffmpegGetMediaDuration, trimMusic } from "../utils/ffmpeg_utils.js";
17
- import { getMediaRefs } from "./image_references.js";
17
+ import { getMediaRefs, resolveBeatLocalRefs } from "./image_references.js";
18
18
  import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
19
19
  const vanillaAgents = vanilla.default ?? vanilla;
20
20
  const imageAgents = {
@@ -60,8 +60,8 @@ export const beat_graph_data = {
60
60
  forceLipSync: { value: false },
61
61
  forceSoundEffect: { value: false },
62
62
  withBackup: { value: false },
63
- preprocessor: {
64
- agent: imagePreprocessAgent,
63
+ localRefs: {
64
+ agent: resolveBeatLocalRefs,
65
65
  inputs: {
66
66
  context: ":context",
67
67
  beat: ":beat",
@@ -70,6 +70,16 @@ export const beat_graph_data = {
70
70
  movieRefs: ":movieRefs",
71
71
  },
72
72
  },
73
+ preprocessor: {
74
+ agent: imagePreprocessAgent,
75
+ inputs: {
76
+ context: ":context",
77
+ beat: ":beat",
78
+ index: ":__mapIndex",
79
+ imageRefs: ":localRefs.imageRefs",
80
+ movieRefs: ":localRefs.movieRefs",
81
+ },
82
+ },
73
83
  imagePlugin: {
74
84
  if: ":beat.image",
75
85
  defaultValue: {},
@@ -78,8 +88,8 @@ export const beat_graph_data = {
78
88
  context: ":context",
79
89
  beat: ":beat",
80
90
  index: ":__mapIndex",
81
- imageRefs: ":imageRefs",
82
- movieRefs: ":movieRefs",
91
+ imageRefs: ":localRefs.imageRefs",
92
+ movieRefs: ":localRefs.movieRefs",
83
93
  onComplete: [":preprocessor"],
84
94
  },
85
95
  },
@@ -44,10 +44,11 @@ const getMediaDurationsOfAllBeats = (context) => {
44
44
  const beat = context.studio.script.beats[index];
45
45
  const { duration: movieDuration, hasAudio: hasMovieAudio } = await getMovieDuration(context, beat);
46
46
  const audioDuration = studioBeat.audioFile ? (await ffmpegGetMediaDuration(studioBeat.audioFile)).duration : 0;
47
+ const hasMoviePrompt = Boolean(beat.moviePrompt);
47
48
  return {
48
49
  movieDuration,
49
50
  audioDuration,
50
- hasMedia: movieDuration + audioDuration > 0,
51
+ hasMedia: movieDuration + audioDuration > 0 || hasMoviePrompt,
51
52
  silenceDuration: 0,
52
53
  hasMovieAudio,
53
54
  };
@@ -289,6 +289,11 @@ export declare const mulmoMovieMediaSchema: z.ZodObject<{
289
289
  path: z.ZodString;
290
290
  }, z.core.$strict>], "kind">;
291
291
  }, z.core.$strict>;
292
+ export declare const mulmoMoviePromptMediaSchema: z.ZodObject<{
293
+ type: z.ZodLiteral<"moviePrompt">;
294
+ prompt: z.ZodString;
295
+ imageName: z.ZodOptional<z.ZodString>;
296
+ }, z.core.$strict>;
292
297
  export declare const mulmoTextSlideMediaSchema: z.ZodObject<{
293
298
  type: z.ZodLiteral<"textSlide">;
294
299
  slide: z.ZodObject<{
@@ -3009,6 +3014,10 @@ export declare const mulmoImageParamsImagesValueSchema: z.ZodUnion<readonly [z.Z
3009
3014
  kind: z.ZodLiteral<"path">;
3010
3015
  path: z.ZodString;
3011
3016
  }, z.core.$strict>], "kind">;
3017
+ }, z.core.$strict>, z.ZodObject<{
3018
+ type: z.ZodLiteral<"moviePrompt">;
3019
+ prompt: z.ZodString;
3020
+ imageName: z.ZodOptional<z.ZodString>;
3012
3021
  }, z.core.$strict>]>;
3013
3022
  export declare const mulmoImageParamsImagesSchema: z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
3014
3023
  type: z.ZodLiteral<"image">;
@@ -3041,6 +3050,10 @@ export declare const mulmoImageParamsImagesSchema: z.ZodRecord<z.ZodString, z.Zo
3041
3050
  kind: z.ZodLiteral<"path">;
3042
3051
  path: z.ZodString;
3043
3052
  }, z.core.$strict>], "kind">;
3053
+ }, z.core.$strict>, z.ZodObject<{
3054
+ type: z.ZodLiteral<"moviePrompt">;
3055
+ prompt: z.ZodString;
3056
+ imageName: z.ZodOptional<z.ZodString>;
3044
3057
  }, z.core.$strict>]>>;
3045
3058
  export declare const mulmoFillOptionSchema: z.ZodObject<{
3046
3059
  style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
@@ -3125,6 +3138,10 @@ export declare const mulmoImageParamsSchema: z.ZodObject<{
3125
3138
  kind: z.ZodLiteral<"path">;
3126
3139
  path: z.ZodString;
3127
3140
  }, z.core.$strict>], "kind">;
3141
+ }, z.core.$strict>, z.ZodObject<{
3142
+ type: z.ZodLiteral<"moviePrompt">;
3143
+ prompt: z.ZodString;
3144
+ imageName: z.ZodOptional<z.ZodString>;
3128
3145
  }, z.core.$strict>]>>>;
3129
3146
  backgroundImage: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
3130
3147
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -6364,6 +6381,42 @@ export declare const mulmoBeatSchema: z.ZodObject<{
6364
6381
  }, z.core.$strip>], "type">>;
6365
6382
  bottomOffset: z.ZodOptional<z.ZodNumber>;
6366
6383
  }, z.core.$strict>>;
6384
+ images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
6385
+ type: z.ZodLiteral<"image">;
6386
+ source: z.ZodDiscriminatedUnion<[z.ZodObject<{
6387
+ kind: z.ZodLiteral<"url">;
6388
+ url: z.ZodURL;
6389
+ }, z.core.$strict>, z.ZodObject<{
6390
+ kind: z.ZodLiteral<"base64">;
6391
+ data: z.ZodString;
6392
+ }, z.core.$strict>, z.ZodObject<{
6393
+ kind: z.ZodLiteral<"path">;
6394
+ path: z.ZodString;
6395
+ }, z.core.$strict>], "kind">;
6396
+ }, z.core.$strict>, z.ZodObject<{
6397
+ type: z.ZodLiteral<"imagePrompt">;
6398
+ prompt: z.ZodString;
6399
+ canvasSize: z.ZodOptional<z.ZodObject<{
6400
+ width: z.ZodNumber;
6401
+ height: z.ZodNumber;
6402
+ }, z.core.$strict>>;
6403
+ }, z.core.$strict>, z.ZodObject<{
6404
+ type: z.ZodLiteral<"movie">;
6405
+ source: z.ZodDiscriminatedUnion<[z.ZodObject<{
6406
+ kind: z.ZodLiteral<"url">;
6407
+ url: z.ZodURL;
6408
+ }, z.core.$strict>, z.ZodObject<{
6409
+ kind: z.ZodLiteral<"base64">;
6410
+ data: z.ZodString;
6411
+ }, z.core.$strict>, z.ZodObject<{
6412
+ kind: z.ZodLiteral<"path">;
6413
+ path: z.ZodString;
6414
+ }, z.core.$strict>], "kind">;
6415
+ }, z.core.$strict>, z.ZodObject<{
6416
+ type: z.ZodLiteral<"moviePrompt">;
6417
+ prompt: z.ZodString;
6418
+ imageName: z.ZodOptional<z.ZodString>;
6419
+ }, z.core.$strict>]>>>;
6367
6420
  imageNames: z.ZodOptional<z.ZodArray<z.ZodString>>;
6368
6421
  imagePrompt: z.ZodOptional<z.ZodString>;
6369
6422
  moviePrompt: z.ZodOptional<z.ZodString>;
@@ -6490,6 +6543,10 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
6490
6543
  kind: z.ZodLiteral<"path">;
6491
6544
  path: z.ZodString;
6492
6545
  }, z.core.$strict>], "kind">;
6546
+ }, z.core.$strict>, z.ZodObject<{
6547
+ type: z.ZodLiteral<"moviePrompt">;
6548
+ prompt: z.ZodString;
6549
+ imageName: z.ZodOptional<z.ZodString>;
6493
6550
  }, z.core.$strict>]>>>;
6494
6551
  backgroundImage: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
6495
6552
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -6962,6 +7019,10 @@ export declare const mulmoScriptSchema: z.ZodObject<{
6962
7019
  kind: z.ZodLiteral<"path">;
6963
7020
  path: z.ZodString;
6964
7021
  }, z.core.$strict>], "kind">;
7022
+ }, z.core.$strict>, z.ZodObject<{
7023
+ type: z.ZodLiteral<"moviePrompt">;
7024
+ prompt: z.ZodString;
7025
+ imageName: z.ZodOptional<z.ZodString>;
6965
7026
  }, z.core.$strict>]>>>;
6966
7027
  backgroundImage: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
6967
7028
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -10196,6 +10257,42 @@ export declare const mulmoScriptSchema: z.ZodObject<{
10196
10257
  }, z.core.$strip>], "type">>;
10197
10258
  bottomOffset: z.ZodOptional<z.ZodNumber>;
10198
10259
  }, z.core.$strict>>;
10260
+ images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
10261
+ type: z.ZodLiteral<"image">;
10262
+ source: z.ZodDiscriminatedUnion<[z.ZodObject<{
10263
+ kind: z.ZodLiteral<"url">;
10264
+ url: z.ZodURL;
10265
+ }, z.core.$strict>, z.ZodObject<{
10266
+ kind: z.ZodLiteral<"base64">;
10267
+ data: z.ZodString;
10268
+ }, z.core.$strict>, z.ZodObject<{
10269
+ kind: z.ZodLiteral<"path">;
10270
+ path: z.ZodString;
10271
+ }, z.core.$strict>], "kind">;
10272
+ }, z.core.$strict>, z.ZodObject<{
10273
+ type: z.ZodLiteral<"imagePrompt">;
10274
+ prompt: z.ZodString;
10275
+ canvasSize: z.ZodOptional<z.ZodObject<{
10276
+ width: z.ZodNumber;
10277
+ height: z.ZodNumber;
10278
+ }, z.core.$strict>>;
10279
+ }, z.core.$strict>, z.ZodObject<{
10280
+ type: z.ZodLiteral<"movie">;
10281
+ source: z.ZodDiscriminatedUnion<[z.ZodObject<{
10282
+ kind: z.ZodLiteral<"url">;
10283
+ url: z.ZodURL;
10284
+ }, z.core.$strict>, z.ZodObject<{
10285
+ kind: z.ZodLiteral<"base64">;
10286
+ data: z.ZodString;
10287
+ }, z.core.$strict>, z.ZodObject<{
10288
+ kind: z.ZodLiteral<"path">;
10289
+ path: z.ZodString;
10290
+ }, z.core.$strict>], "kind">;
10291
+ }, z.core.$strict>, z.ZodObject<{
10292
+ type: z.ZodLiteral<"moviePrompt">;
10293
+ prompt: z.ZodString;
10294
+ imageName: z.ZodOptional<z.ZodString>;
10295
+ }, z.core.$strict>]>>>;
10199
10296
  imageNames: z.ZodOptional<z.ZodArray<z.ZodString>>;
10200
10297
  imagePrompt: z.ZodOptional<z.ZodString>;
10201
10298
  moviePrompt: z.ZodOptional<z.ZodString>;
@@ -10397,6 +10494,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
10397
10494
  kind: z.ZodLiteral<"path">;
10398
10495
  path: z.ZodString;
10399
10496
  }, z.core.$strict>], "kind">;
10497
+ }, z.core.$strict>, z.ZodObject<{
10498
+ type: z.ZodLiteral<"moviePrompt">;
10499
+ prompt: z.ZodString;
10500
+ imageName: z.ZodOptional<z.ZodString>;
10400
10501
  }, z.core.$strict>]>>>;
10401
10502
  backgroundImage: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
10402
10503
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -13631,6 +13732,42 @@ export declare const mulmoStudioSchema: z.ZodObject<{
13631
13732
  }, z.core.$strip>], "type">>;
13632
13733
  bottomOffset: z.ZodOptional<z.ZodNumber>;
13633
13734
  }, z.core.$strict>>;
13735
+ images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
13736
+ type: z.ZodLiteral<"image">;
13737
+ source: z.ZodDiscriminatedUnion<[z.ZodObject<{
13738
+ kind: z.ZodLiteral<"url">;
13739
+ url: z.ZodURL;
13740
+ }, z.core.$strict>, z.ZodObject<{
13741
+ kind: z.ZodLiteral<"base64">;
13742
+ data: z.ZodString;
13743
+ }, z.core.$strict>, z.ZodObject<{
13744
+ kind: z.ZodLiteral<"path">;
13745
+ path: z.ZodString;
13746
+ }, z.core.$strict>], "kind">;
13747
+ }, z.core.$strict>, z.ZodObject<{
13748
+ type: z.ZodLiteral<"imagePrompt">;
13749
+ prompt: z.ZodString;
13750
+ canvasSize: z.ZodOptional<z.ZodObject<{
13751
+ width: z.ZodNumber;
13752
+ height: z.ZodNumber;
13753
+ }, z.core.$strict>>;
13754
+ }, z.core.$strict>, z.ZodObject<{
13755
+ type: z.ZodLiteral<"movie">;
13756
+ source: z.ZodDiscriminatedUnion<[z.ZodObject<{
13757
+ kind: z.ZodLiteral<"url">;
13758
+ url: z.ZodURL;
13759
+ }, z.core.$strict>, z.ZodObject<{
13760
+ kind: z.ZodLiteral<"base64">;
13761
+ data: z.ZodString;
13762
+ }, z.core.$strict>, z.ZodObject<{
13763
+ kind: z.ZodLiteral<"path">;
13764
+ path: z.ZodString;
13765
+ }, z.core.$strict>], "kind">;
13766
+ }, z.core.$strict>, z.ZodObject<{
13767
+ type: z.ZodLiteral<"moviePrompt">;
13768
+ prompt: z.ZodString;
13769
+ imageName: z.ZodOptional<z.ZodString>;
13770
+ }, z.core.$strict>]>>>;
13634
13771
  imageNames: z.ZodOptional<z.ZodArray<z.ZodString>>;
13635
13772
  imagePrompt: z.ZodOptional<z.ZodString>;
13636
13773
  moviePrompt: z.ZodOptional<z.ZodString>;
@@ -13768,6 +13905,10 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
13768
13905
  kind: z.ZodLiteral<"path">;
13769
13906
  path: z.ZodString;
13770
13907
  }, z.core.$strict>], "kind">;
13908
+ }, z.core.$strict>, z.ZodObject<{
13909
+ type: z.ZodLiteral<"moviePrompt">;
13910
+ prompt: z.ZodString;
13911
+ imageName: z.ZodOptional<z.ZodString>;
13771
13912
  }, z.core.$strict>]>>>;
13772
13913
  backgroundImage: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
13773
13914
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -14234,6 +14375,10 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
14234
14375
  kind: z.ZodLiteral<"path">;
14235
14376
  path: z.ZodString;
14236
14377
  }, z.core.$strict>], "kind">;
14378
+ }, z.core.$strict>, z.ZodObject<{
14379
+ type: z.ZodLiteral<"moviePrompt">;
14380
+ prompt: z.ZodString;
14381
+ imageName: z.ZodOptional<z.ZodString>;
14237
14382
  }, z.core.$strict>]>>>;
14238
14383
  backgroundImage: z.ZodOptional<z.ZodNullable<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
14239
14384
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -148,6 +148,13 @@ export const mulmoMovieMediaSchema = z
148
148
  source: mediaSourceSchema,
149
149
  })
150
150
  .strict();
151
+ export const mulmoMoviePromptMediaSchema = z
152
+ .object({
153
+ type: z.literal("moviePrompt"),
154
+ prompt: z.string().min(1),
155
+ imageName: z.string().optional().describe("Reference an imageRefs key to use as image-to-video input"),
156
+ })
157
+ .strict();
151
158
  export const mulmoTextSlideMediaSchema = z
152
159
  .object({
153
160
  type: z.literal("textSlide"),
@@ -337,7 +344,12 @@ export const mulmoImagePromptMediaSchema = z
337
344
  canvasSize: z.object({ width: z.number(), height: z.number() }).strict().optional(),
338
345
  })
339
346
  .strict();
340
- export const mulmoImageParamsImagesValueSchema = z.union([mulmoImageMediaSchema, mulmoImagePromptMediaSchema, mulmoMovieMediaSchema]);
347
+ export const mulmoImageParamsImagesValueSchema = z.union([
348
+ mulmoImageMediaSchema,
349
+ mulmoImagePromptMediaSchema,
350
+ mulmoMovieMediaSchema,
351
+ mulmoMoviePromptMediaSchema,
352
+ ]);
341
353
  export const mulmoImageParamsImagesSchema = z.record(imageIdSchema, mulmoImageParamsImagesValueSchema);
342
354
  export const mulmoFillOptionSchema = z
343
355
  .object({
@@ -486,6 +498,9 @@ export const mulmoBeatSchema = z
486
498
  speechOptions: speechOptionsSchema.optional(),
487
499
  textSlideParams: textSlideParamsSchema.optional(),
488
500
  captionParams: mulmoCaptionParamsSchema.optional(),
501
+ images: mulmoImageParamsImagesSchema
502
+ .optional()
503
+ .describe("Beat-local media references. Same schema as imageParams.images. Merged with global refs (local takes precedence)."),
489
504
  imageNames: z.array(imageIdSchema).optional(), // list of image names to use for image generation. The default is all images in the imageParams.images.
490
505
  imagePrompt: z.string().optional(),
491
506
  moviePrompt: z.string().optional(),
@@ -1,5 +1,5 @@
1
1
  import { type CallbackFunction } from "graphai";
2
- import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, mulmoStudioMultiLingualSchema, mulmoStudioMultiLingualArraySchema, mulmoStudioMultiLingualDataSchema, mulmoStudioMultiLingualFileSchema, speakerDictionarySchema, speakerSchema, mulmoSpeechParamsSchema, mulmoImageParamsSchema, mulmoImageParamsImagesValueSchema, mulmoImageParamsImagesSchema, mulmoFillOptionSchema, mulmoTransitionSchema, mulmoVideoFilterSchema, mulmoMovieParamsSchema, mulmoSoundEffectParamsSchema, mulmoLipSyncParamsSchema, textSlideParamsSchema, speechOptionsSchema, speakerDataSchema, mulmoCanvasDimensionSchema, mulmoPromptTemplateSchema, mulmoPromptTemplateFileSchema, text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, multiLingualTextsSchema, mulmoImageAssetSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema, mediaSourceMermaidSchema, backgroundImageSchema, backgroundImageSourceSchema, mulmoSessionStateSchema, mulmoOpenAIImageModelSchema, mulmoGoogleImageModelSchema, mulmoGoogleMovieModelSchema, mulmoReplicateMovieModelSchema, mulmoImagePromptMediaSchema, mulmoMovieMediaSchema, markdownLayoutSchema, row2Schema, grid2x2Schema } from "./schema.js";
2
+ import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, mulmoStudioMultiLingualSchema, mulmoStudioMultiLingualArraySchema, mulmoStudioMultiLingualDataSchema, mulmoStudioMultiLingualFileSchema, speakerDictionarySchema, speakerSchema, mulmoSpeechParamsSchema, mulmoImageParamsSchema, mulmoImageParamsImagesValueSchema, mulmoImageParamsImagesSchema, mulmoFillOptionSchema, mulmoTransitionSchema, mulmoVideoFilterSchema, mulmoMovieParamsSchema, mulmoSoundEffectParamsSchema, mulmoLipSyncParamsSchema, textSlideParamsSchema, speechOptionsSchema, speakerDataSchema, mulmoCanvasDimensionSchema, mulmoPromptTemplateSchema, mulmoPromptTemplateFileSchema, text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, multiLingualTextsSchema, mulmoImageAssetSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema, mediaSourceMermaidSchema, backgroundImageSchema, backgroundImageSourceSchema, mulmoSessionStateSchema, mulmoOpenAIImageModelSchema, mulmoGoogleImageModelSchema, mulmoGoogleMovieModelSchema, mulmoReplicateMovieModelSchema, mulmoImagePromptMediaSchema, mulmoMovieMediaSchema, mulmoMoviePromptMediaSchema, markdownLayoutSchema, row2Schema, grid2x2Schema } from "./schema.js";
3
3
  import { pdf_modes, pdf_sizes, storyToScriptGenerateMode } from "./const.js";
4
4
  import type { LLM } from "./provider2agent.js";
5
5
  import { z } from "zod";
@@ -48,6 +48,7 @@ export type MulmoGoogleImageModel = z.infer<typeof mulmoGoogleImageModelSchema>;
48
48
  export type MulmoGoogleMovieModel = z.infer<typeof mulmoGoogleMovieModelSchema>;
49
49
  export type MulmoReplicateMovieModel = z.infer<typeof mulmoReplicateMovieModelSchema>;
50
50
  export type MulmoImagePromptMedia = z.infer<typeof mulmoImagePromptMediaSchema>;
51
+ export type MulmoMoviePromptMedia = z.infer<typeof mulmoMoviePromptMediaSchema>;
51
52
  export type MulmoMarkdownLayout = z.infer<typeof markdownLayoutSchema>;
52
53
  export type MulmoRow2 = z.infer<typeof row2Schema>;
53
54
  export type MulmoGrid2x2 = z.infer<typeof grid2x2Schema>;
@@ -78,6 +78,10 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
78
78
  kind: "path";
79
79
  path: string;
80
80
  };
81
+ } | {
82
+ type: "moviePrompt";
83
+ prompt: string;
84
+ imageName?: string | undefined;
81
85
  } | {
82
86
  type: "imagePrompt";
83
87
  prompt: string;
@@ -1980,6 +1984,42 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
1980
1984
  } | undefined;
1981
1985
  bottomOffset?: number | undefined;
1982
1986
  } | undefined;
1987
+ images?: Record<string, {
1988
+ type: "image";
1989
+ source: {
1990
+ kind: "url";
1991
+ url: string;
1992
+ } | {
1993
+ kind: "base64";
1994
+ data: string;
1995
+ } | {
1996
+ kind: "path";
1997
+ path: string;
1998
+ };
1999
+ } | {
2000
+ type: "movie";
2001
+ source: {
2002
+ kind: "url";
2003
+ url: string;
2004
+ } | {
2005
+ kind: "base64";
2006
+ data: string;
2007
+ } | {
2008
+ kind: "path";
2009
+ path: string;
2010
+ };
2011
+ } | {
2012
+ type: "moviePrompt";
2013
+ prompt: string;
2014
+ imageName?: string | undefined;
2015
+ } | {
2016
+ type: "imagePrompt";
2017
+ prompt: string;
2018
+ canvasSize?: {
2019
+ width: number;
2020
+ height: number;
2021
+ } | undefined;
2022
+ }> | undefined;
1983
2023
  imageNames?: string[] | undefined;
1984
2024
  imagePrompt?: string | undefined;
1985
2025
  moviePrompt?: string | undefined;
@@ -2190,6 +2230,10 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
2190
2230
  kind: "path";
2191
2231
  path: string;
2192
2232
  };
2233
+ } | {
2234
+ type: "moviePrompt";
2235
+ prompt: string;
2236
+ imageName?: string | undefined;
2193
2237
  } | {
2194
2238
  type: "imagePrompt";
2195
2239
  prompt: string;
@@ -4092,6 +4136,42 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
4092
4136
  } | undefined;
4093
4137
  bottomOffset?: number | undefined;
4094
4138
  } | undefined;
4139
+ images?: Record<string, {
4140
+ type: "image";
4141
+ source: {
4142
+ kind: "url";
4143
+ url: string;
4144
+ } | {
4145
+ kind: "base64";
4146
+ data: string;
4147
+ } | {
4148
+ kind: "path";
4149
+ path: string;
4150
+ };
4151
+ } | {
4152
+ type: "movie";
4153
+ source: {
4154
+ kind: "url";
4155
+ url: string;
4156
+ } | {
4157
+ kind: "base64";
4158
+ data: string;
4159
+ } | {
4160
+ kind: "path";
4161
+ path: string;
4162
+ };
4163
+ } | {
4164
+ type: "moviePrompt";
4165
+ prompt: string;
4166
+ imageName?: string | undefined;
4167
+ } | {
4168
+ type: "imagePrompt";
4169
+ prompt: string;
4170
+ canvasSize?: {
4171
+ width: number;
4172
+ height: number;
4173
+ } | undefined;
4174
+ }> | undefined;
4095
4175
  imageNames?: string[] | undefined;
4096
4176
  imagePrompt?: string | undefined;
4097
4177
  moviePrompt?: string | undefined;
@@ -4309,6 +4389,10 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
4309
4389
  kind: "path";
4310
4390
  path: string;
4311
4391
  };
4392
+ } | {
4393
+ type: "moviePrompt";
4394
+ prompt: string;
4395
+ imageName?: string | undefined;
4312
4396
  } | {
4313
4397
  type: "imagePrompt";
4314
4398
  prompt: string;
@@ -16,5 +16,11 @@ export declare const renderHTMLToFrames: (html: string, outputDir: string, width
16
16
  * page.screencast() captures frames directly to an mp4 file.
17
17
  */
18
18
  export declare const renderHTMLToVideo: (html: string, videoPath: string, width: number, height: number, totalFrames: number, fps: number) => Promise<void>;
19
+ /**
20
+ * Render the final frame of an animated HTML page as a static image.
21
+ * Loads the animated HTML, calls window.renderFinal() to set all animations
22
+ * to their end state, then takes a screenshot. Used for PDF/thumbnail generation.
23
+ */
24
+ export declare const renderHTMLToFinalFrame: (html: string, outputPath: string, width: number, height: number) => Promise<void>;
19
25
  export declare const renderMarkdownToImage: (markdown: string, style: string, outputPath: string, width: number, height: number) => Promise<void>;
20
26
  export declare const interpolate: (template: string, data: Record<string, string>) => string;
@@ -233,6 +233,35 @@ export const renderHTMLToVideo = async (html, videoPath, width, height, totalFra
233
233
  await browser.close();
234
234
  }
235
235
  };
236
+ /**
237
+ * Render the final frame of an animated HTML page as a static image.
238
+ * Loads the animated HTML, calls window.renderFinal() to set all animations
239
+ * to their end state, then takes a screenshot. Used for PDF/thumbnail generation.
240
+ */
241
+ export const renderHTMLToFinalFrame = async (html, outputPath, width, height) => {
242
+ const browser = await puppeteer.launch({
243
+ args: isCI ? ["--no-sandbox", "--allow-file-access-from-files"] : ["--allow-file-access-from-files"],
244
+ });
245
+ try {
246
+ const page = await browser.newPage();
247
+ await loadHtmlIntoPage(page, html, 30000);
248
+ await page.setViewport({ width, height });
249
+ await page.addStyleTag({ content: "html{height:100%;margin:0;padding:0;overflow:hidden}" });
250
+ await scaleContentToFit(page, width, height);
251
+ await waitForVideosReady(page);
252
+ // Render the final frame (all animations at end state)
253
+ await page.evaluate(async () => {
254
+ const w = window;
255
+ if (typeof w.renderFinal === "function") {
256
+ await Promise.resolve(w.renderFinal());
257
+ }
258
+ });
259
+ await page.screenshot({ path: outputPath });
260
+ }
261
+ finally {
262
+ await browser.close();
263
+ }
264
+ };
236
265
  export const renderMarkdownToImage = async (markdown, style, outputPath, width, height) => {
237
266
  const header = `<head><style>${style}</style></head>`;
238
267
  const body = await marked(markdown);
@@ -2,7 +2,7 @@ import fs from "node:fs";
2
2
  import nodePath from "node:path";
3
3
  import { MulmoBeatMethods } from "../../methods/mulmo_beat.js";
4
4
  import { getHTMLFile, getJSFile } from "../file.js";
5
- import { renderHTMLToImage, interpolate, renderHTMLToFrames, renderHTMLToVideo } from "../html_render.js";
5
+ import { renderHTMLToImage, interpolate, renderHTMLToFrames, renderHTMLToVideo, renderHTMLToFinalFrame } from "../html_render.js";
6
6
  import { framesToVideo } from "../ffmpeg_utils.js";
7
7
  import { parrotingImagePath } from "./utils.js";
8
8
  import { swipeElementsToHtml, swipeElementsToScript } from "../swipe_to_html.js";
@@ -84,22 +84,13 @@ const getAnimationConfig = (params) => {
84
84
  const movie = MulmoBeatMethods.isMovieMode(animation);
85
85
  return { fps, movie };
86
86
  };
87
- const processHtmlTailwindAnimated = async (params) => {
88
- const { beat, imagePath, canvasSize, context } = params;
89
- if (!beat.image || beat.image.type !== imageType)
90
- return;
91
- const animConfig = getAnimationConfig(params);
92
- if (!animConfig)
93
- return;
94
- const duration = params.beatDuration ?? beat.duration;
95
- if (duration === undefined) {
96
- throw new Error("html_tailwind animation requires beat.duration or audio-derived duration. Set duration in the beat or ensure audio is generated first.");
97
- }
98
- const fps = animConfig.fps;
99
- const totalFrames = Math.floor(duration * fps);
100
- if (totalFrames <= 0) {
101
- throw new Error(`html_tailwind animation: totalFrames is ${totalFrames} (duration=${duration}, fps=${fps}). Increase duration or fps.`);
102
- }
87
+ /** Large frame count to ensure all animations reach their end state when exact duration is unknown */
88
+ const FINAL_FRAME_TOTAL = 9000;
89
+ /**
90
+ * Build the animated HTML string from beat data and template.
91
+ */
92
+ const buildAnimatedHtml = (params, totalFrames, fps) => {
93
+ const { beat, context } = params;
103
94
  const imageData = beat.image;
104
95
  const { html, script } = resolveHtmlAndScript(imageData);
105
96
  const template = getHTMLFile("tailwind_animated");
@@ -115,26 +106,49 @@ const processHtmlTailwindAnimated = async (params) => {
115
106
  });
116
107
  const resolvedImageRefs = resolveImageRefs(rawHtmlData, params.imageRefs ?? {});
117
108
  const resolvedAllRefs = resolveMovieRefs(resolvedImageRefs, params.movieRefs ?? {});
118
- const htmlData = resolveRelativeImagePaths(resolvedAllRefs, context.fileDirs.mulmoFileDirPath);
119
- // imagePath is set to the .mp4 path by imagePluginAgent for animated beats
120
- const videoPath = imagePath;
121
- if (animConfig.movie) {
122
- // CDP screencast: real-time recording (experimental, faster)
123
- await renderHTMLToVideo(htmlData, videoPath, canvasSize.width, canvasSize.height, totalFrames, fps);
124
- }
125
- else {
126
- // Frame-by-frame screenshot (deterministic, slower)
127
- const framesDir = videoPath.replace(/\.[^/.]+$/, "_frames");
128
- fs.mkdirSync(framesDir, { recursive: true });
129
- try {
130
- await renderHTMLToFrames(htmlData, framesDir, canvasSize.width, canvasSize.height, totalFrames, fps);
131
- await framesToVideo(framesDir, videoPath, fps, canvasSize.width, canvasSize.height);
109
+ return resolveRelativeImagePaths(resolvedAllRefs, context.fileDirs.mulmoFileDirPath);
110
+ };
111
+ const processHtmlTailwindAnimated = async (params) => {
112
+ const { beat, imagePath, canvasSize } = params;
113
+ if (!beat.image || beat.image.type !== imageType)
114
+ return;
115
+ const animConfig = getAnimationConfig(params);
116
+ if (!animConfig)
117
+ return;
118
+ const duration = params.beatDuration ?? beat.duration;
119
+ const fps = animConfig.fps;
120
+ // Generate video if duration is available
121
+ if (duration !== undefined) {
122
+ const totalFrames = Math.floor(duration * fps);
123
+ if (totalFrames <= 0) {
124
+ throw new Error(`html_tailwind animation: totalFrames is ${totalFrames} (duration=${duration}, fps=${fps}). Increase duration or fps.`);
125
+ }
126
+ const htmlData = buildAnimatedHtml(params, totalFrames, fps);
127
+ // imagePath is set to the .mp4 path by imagePluginAgent for animated beats
128
+ const videoPath = imagePath;
129
+ if (animConfig.movie) {
130
+ await renderHTMLToVideo(htmlData, videoPath, canvasSize.width, canvasSize.height, totalFrames, fps);
132
131
  }
133
- finally {
134
- fs.rmSync(framesDir, { recursive: true, force: true });
132
+ else {
133
+ const framesDir = videoPath.replace(/\.[^/.]+$/, "_frames");
134
+ fs.mkdirSync(framesDir, { recursive: true });
135
+ try {
136
+ await renderHTMLToFrames(htmlData, framesDir, canvasSize.width, canvasSize.height, totalFrames, fps);
137
+ await framesToVideo(framesDir, videoPath, fps, canvasSize.width, canvasSize.height);
138
+ }
139
+ finally {
140
+ fs.rmSync(framesDir, { recursive: true, force: true });
141
+ }
135
142
  }
136
143
  }
137
- return videoPath;
144
+ // Generate a high-quality static image of the final frame for PDF/thumbnail use.
145
+ // Uses a large totalFrames so all animations are guaranteed to reach their end state,
146
+ // even when exact duration is unknown (e.g., PDF generation without audio).
147
+ const finalFramePath = imagePath.replace(/_animated\.mp4$/, ".png");
148
+ const finalHtml = buildAnimatedHtml(params, FINAL_FRAME_TOTAL, fps);
149
+ await renderHTMLToFinalFrame(finalHtml, finalFramePath, canvasSize.width, canvasSize.height);
150
+ // Return video path when video was generated, otherwise return the static PNG path
151
+ return duration !== undefined ? imagePath : finalFramePath;
138
152
  };
139
153
  const processHtmlTailwindStatic = async (params) => {
140
154
  const { beat, imagePath, canvasSize, context } = params;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.6.1",
3
+ "version": "2.6.3",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -88,7 +88,7 @@
88
88
  "homepage": "https://github.com/receptron/mulmocast-cli#readme",
89
89
  "dependencies": {
90
90
  "@google-cloud/text-to-speech": "^6.4.0",
91
- "@google/genai": "^1.44.0",
91
+ "@google/genai": "^1.45.0",
92
92
  "@graphai/anthropic_agent": "^2.0.12",
93
93
  "@graphai/browserless_agent": "^2.0.2",
94
94
  "@graphai/gemini_agent": "^2.0.5",
@@ -98,8 +98,8 @@
98
98
  "@graphai/stream_agent_filter": "^2.0.3",
99
99
  "@graphai/vanilla": "^2.0.12",
100
100
  "@graphai/vanilla_node_agents": "^2.0.4",
101
- "@inquirer/input": "^5.0.8",
102
- "@inquirer/select": "^5.1.0",
101
+ "@inquirer/input": "^5.0.10",
102
+ "@inquirer/select": "^5.1.2",
103
103
  "@modelcontextprotocol/sdk": "^1.27.1",
104
104
  "@mozilla/readability": "^0.6.0",
105
105
  "@tavily/core": "^0.5.11",
@@ -108,7 +108,7 @@
108
108
  "dotenv": "^17.3.1",
109
109
  "fluent-ffmpeg": "^2.1.3",
110
110
  "graphai": "^2.0.16",
111
- "jsdom": "^28.1.0",
111
+ "jsdom": "^29.0.0",
112
112
  "marked": "^17.0.4",
113
113
  "mulmocast-vision": "^1.0.9",
114
114
  "ora": "^9.3.0",
@@ -133,8 +133,8 @@
133
133
  "globals": "^17.4.0",
134
134
  "prettier": "^3.8.1",
135
135
  "tsx": "^4.21.0",
136
- "typescript": "6.0.0-beta",
137
- "typescript-eslint": "^8.57.0"
136
+ "typescript": "6.0.1-rc",
137
+ "typescript-eslint": "^8.57.1"
138
138
  },
139
139
  "engines": {
140
140
  "node": ">=22.0.0"
@@ -0,0 +1,211 @@
1
+ {
2
+ "$mulmocast": { "version": "1.1" },
3
+ "lang": "en",
4
+ "canvasSize": { "width": 1080, "height": 1920 },
5
+ "title": "Beat-local media references test",
6
+ "speechParams": {
7
+ "provider": "openai",
8
+ "speakers": {
9
+ "Presenter": { "provider": "openai", "voiceId": "alloy" }
10
+ }
11
+ },
12
+ "imageParams": {
13
+ "provider": "google",
14
+ "model": "gemini-2.5-flash-image",
15
+ "images": {
16
+ "global_bg": {
17
+ "type": "image",
18
+ "source": {
19
+ "kind": "url",
20
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-cli/refs/heads/main/assets/images/mulmocast_credit.png"
21
+ }
22
+ },
23
+ "character": {
24
+ "type": "imagePrompt",
25
+ "prompt": "A friendly cartoon robot mascot with round blue body, big expressive eyes, small antenna on head, waving hand, white background, simple clean illustration style"
26
+ },
27
+ "global_movie": {
28
+ "type": "movie",
29
+ "source": {
30
+ "kind": "path",
31
+ "path": "../../test/assets/hello.mp4"
32
+ }
33
+ }
34
+ }
35
+ },
36
+ "beats": [
37
+ {
38
+ "text": "Beat1. Beat-local image referenced via image:name in html_tailwind.",
39
+ "speaker": "Presenter",
40
+ "images": {
41
+ "local_bg": {
42
+ "type": "image",
43
+ "source": {
44
+ "kind": "url",
45
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-cli/refs/heads/main/assets/images/mulmocast_credit.png"
46
+ }
47
+ }
48
+ },
49
+ "image": {
50
+ "type": "html_tailwind",
51
+ "html": [
52
+ "<div class='h-full w-full overflow-hidden relative bg-black'>",
53
+ " <div style='position:absolute;inset:0;overflow:hidden'>",
54
+ " <img src='image:local_bg' style='width:100%;height:100%;object-fit:cover;filter:brightness(0.8)' />",
55
+ " </div>",
56
+ " <div style='position:absolute;top:50%;left:40px;right:40px;transform:translateY(-50%);text-align:center'>",
57
+ " <div style='display:inline-block;background:rgba(59,130,246,0.85);padding:12px 32px;border-radius:12px'>",
58
+ " <span style='color:white;font-size:72px;font-weight:900'>Beat 1</span>",
59
+ " </div>",
60
+ " <div style='color:white;font-size:44px;font-weight:900;margin-top:20px;text-shadow:0 4px 16px rgba(0,0,0,0.9)'>Local image → image:name</div>",
61
+ " </div>",
62
+ "</div>"
63
+ ]
64
+ }
65
+ },
66
+ {
67
+ "text": "Beat2. Local key overrides the global one with the same name. This beat sees the local version of global_bg.",
68
+ "speaker": "Presenter",
69
+ "images": {
70
+ "global_bg": {
71
+ "type": "image",
72
+ "source": {
73
+ "kind": "path",
74
+ "path": "../../assets/images/mulmocast_credit.png"
75
+ }
76
+ }
77
+ },
78
+ "image": {
79
+ "type": "html_tailwind",
80
+ "html": [
81
+ "<div class='h-full w-full overflow-hidden relative bg-black'>",
82
+ " <div style='position:absolute;inset:0;overflow:hidden'>",
83
+ " <img src='image:global_bg' style='width:100%;height:100%;object-fit:cover;filter:brightness(0.8)' />",
84
+ " </div>",
85
+ " <div style='position:absolute;top:50%;left:40px;right:40px;transform:translateY(-50%);text-align:center'>",
86
+ " <div style='display:inline-block;background:rgba(239,68,68,0.85);padding:12px 32px;border-radius:12px'>",
87
+ " <span style='color:white;font-size:72px;font-weight:900'>Beat 2</span>",
88
+ " </div>",
89
+ " <div style='color:white;font-size:44px;font-weight:900;margin-top:20px;text-shadow:0 4px 16px rgba(0,0,0,0.9)'>Local overrides global_bg</div>",
90
+ " </div>",
91
+ "</div>"
92
+ ]
93
+ }
94
+ },
95
+ {
96
+ "text": "Beat3. No beat.images. Uses global refs only. Both image:global_bg and movie:global_movie work.",
97
+ "speaker": "Presenter",
98
+ "image": {
99
+ "type": "html_tailwind",
100
+ "html": [
101
+ "<div class='h-full w-full overflow-hidden relative bg-black'>",
102
+ " <div style='position:absolute;inset:0;overflow:hidden'>",
103
+ " <video src='movie:global_movie' autoplay muted loop style='width:100%;height:100%;object-fit:cover;filter:brightness(0.7)'></video>",
104
+ " </div>",
105
+ " <div style='position:absolute;top:50%;left:40px;right:40px;transform:translateY(-50%);text-align:center'>",
106
+ " <div style='display:inline-block;background:rgba(34,197,94,0.85);padding:12px 32px;border-radius:12px'>",
107
+ " <span style='color:white;font-size:72px;font-weight:900'>Beat 3</span>",
108
+ " </div>",
109
+ " <div style='color:white;font-size:44px;font-weight:900;margin-top:20px;text-shadow:0 4px 16px rgba(0,0,0,0.9)'>Global refs only (no beat.images)</div>",
110
+ " </div>",
111
+ "</div>",
112
+ ""
113
+ ],
114
+ "animation": { "movie": true }
115
+ }
116
+ },
117
+ {
118
+ "text": "Beat4. Beat-local movie ref used in html_tailwind via movie:name.",
119
+ "speaker": "Presenter",
120
+ "images": {
121
+ "local_movie": {
122
+ "type": "movie",
123
+ "source": {
124
+ "kind": "path",
125
+ "path": "../../test/assets/hello.mp4"
126
+ }
127
+ }
128
+ },
129
+ "image": {
130
+ "type": "html_tailwind",
131
+ "html": [
132
+ "<div class='h-full w-full overflow-hidden relative bg-black'>",
133
+ " <div style='position:absolute;inset:0;overflow:hidden'>",
134
+ " <video src='movie:local_movie' autoplay muted loop style='width:100%;height:100%;object-fit:cover'></video>",
135
+ " </div>",
136
+ " <div style='position:absolute;top:50%;left:40px;right:40px;transform:translateY(-50%);text-align:center'>",
137
+ " <div style='display:inline-block;background:rgba(168,85,247,0.85);padding:12px 32px;border-radius:12px'>",
138
+ " <span style='color:white;font-size:72px;font-weight:900'>Beat 4</span>",
139
+ " </div>",
140
+ " <div style='color:white;font-size:44px;font-weight:900;margin-top:20px;text-shadow:0 4px 16px rgba(0,0,0,0.9)'>Local movie → movie:name</div>",
141
+ " </div>",
142
+ "</div>"
143
+ ],
144
+ "animation": { "movie": true }
145
+ }
146
+ },
147
+ {
148
+ "text": "Beat5. This beat generates a new image using the global character reference for consistency. The same robot appears in a different scene.",
149
+ "speaker": "Presenter",
150
+ "images": {
151
+ "generated": {
152
+ "type": "imagePrompt",
153
+ "prompt": "The friendly cartoon robot mascot exploring a futuristic neon city at night, cyberpunk style, BRIGHT WELL-LIT SCENE"
154
+ }
155
+ },
156
+ "imageNames": ["character"],
157
+ "image": {
158
+ "type": "html_tailwind",
159
+ "html": [
160
+ "<div class='h-full w-full flex flex-col bg-gray-900'>",
161
+ " <div style='flex:1;display:flex;align-items:center;justify-content:center;padding:40px'>",
162
+ " <img src='image:generated' style='max-width:80%;max-height:100%;object-fit:contain;border-radius:24px;box-shadow:0 8px 32px rgba(0,0,0,0.5)' />",
163
+ " </div>",
164
+ " <div style='flex:1;display:flex;align-items:center;justify-content:center;padding:40px'>",
165
+ " <div style='text-align:center'>",
166
+ " <div style='display:inline-block;background:rgba(59,130,246,0.85);padding:8px 24px;border-radius:8px;margin-bottom:16px'>",
167
+ " <span style='color:white;font-size:36px;font-weight:700'>Beat 5 — Global Ref → Image Gen</span>",
168
+ " </div>",
169
+ " <div style='color:white;font-size:40px;font-weight:900;line-height:1.4;margin-top:16px'>imageNames references global_bg</div>",
170
+ " <div style='color:rgba(255,255,255,0.7);font-size:32px;margin-top:12px;line-height:1.4'>Beat-level imagePrompt generates a new image<br>using the global reference for consistency</div>",
171
+ " </div>",
172
+ " </div>",
173
+ "</div>"
174
+ ]
175
+ }
176
+ },
177
+ {
178
+ "text": "Beat6. This beat generates a video from the global character image. The moviePrompt uses the character as the starting frame for image-to-video generation.",
179
+ "speaker": "Presenter",
180
+ "images": {
181
+ "generated_movie": {
182
+ "type": "moviePrompt",
183
+ "prompt": "The robot mascot waves and dances happily, smooth animation",
184
+ "imageName": "character"
185
+ }
186
+ },
187
+ "image": {
188
+ "type": "html_tailwind",
189
+ "html": [
190
+ "<div class='h-full w-full flex flex-col bg-gray-900'>",
191
+ " <div style='flex:1;display:flex;align-items:center;justify-content:center;padding:40px'>",
192
+ " <div style='max-width:80%;max-height:100%;border-radius:24px;overflow:hidden;box-shadow:0 8px 32px rgba(0,0,0,0.5)'>",
193
+ " <video src='movie:generated_movie' autoplay muted loop style='width:100%;height:100%;object-fit:contain'></video>",
194
+ " </div>",
195
+ " </div>",
196
+ " <div style='flex:1;display:flex;align-items:center;justify-content:center;padding:40px'>",
197
+ " <div style='text-align:center'>",
198
+ " <div style='display:inline-block;background:rgba(168,85,247,0.85);padding:8px 24px;border-radius:8px;margin-bottom:16px'>",
199
+ " <span style='color:white;font-size:36px;font-weight:700'>Beat 6 — Global Ref → Movie Gen</span>",
200
+ " </div>",
201
+ " <div style='color:white;font-size:40px;font-weight:900;line-height:1.4;margin-top:16px'>moviePrompt.imageName = character</div>",
202
+ " <div style='color:rgba(255,255,255,0.7);font-size:32px;margin-top:12px;line-height:1.4'>Image-to-video: global character image<br>becomes the starting frame for video</div>",
203
+ " </div>",
204
+ " </div>",
205
+ "</div>"
206
+ ],
207
+ "animation": { "movie": true }
208
+ }
209
+ }
210
+ ]
211
+ }