mulmocast 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ export const speechOptionsSchema = z
20
20
  })
21
21
  .strict();
22
22
  const speakerIdSchema = z.string();
23
- export const text2SpeechProviderSchema = z.union([z.literal("openai"), z.literal("nijivoice"), z.literal("google"), z.literal("elevenlabs")]).default("openai");
23
+ export const text2SpeechProviderSchema = z.enum(["openai", "nijivoice", "google", "elevenlabs"]).default("openai");
24
24
  export const speakerDataSchema = z
25
25
  .object({
26
26
  displayName: z.record(langSchema, z.string()).optional(),
@@ -159,19 +159,19 @@ export const mulmoFillOptionSchema = z
159
159
  style: z.enum(["aspectFit", "aspectFill"]).default("aspectFit"),
160
160
  })
161
161
  .describe("How to handle aspect ratio differences between image and canvas");
162
- export const text2ImageProviderSchema = z.union([z.literal("openai"), z.literal("google")]).default("openai");
162
+ export const text2ImageProviderSchema = z.enum(["openai", "google"]).default("openai");
163
163
  // NOTE: This is for UI only. (until we figure out how to use it in mulmoImageParamsSchema)
164
164
  export const mulmoOpenAIImageModelSchema = z
165
165
  .object({
166
166
  provider: z.literal("openai"),
167
- model: z.union([z.literal("dall-e-3"), z.literal("gpt-image-1")]).optional(),
167
+ model: z.enum(["dall-e-3", "gpt-image-1"]).optional(),
168
168
  })
169
169
  .strict();
170
170
  // NOTE: This is for UI only. (until we figure out how to use it in mulmoImageParamsSchema)
171
171
  export const mulmoGoogleImageModelSchema = z
172
172
  .object({
173
173
  provider: z.literal("google"),
174
- model: z.union([z.literal("imagen-3.0-fast-generate-001"), z.literal("imagen-3.0-generate-002"), z.literal("imagen-3.0-capability-001")]).optional(),
174
+ model: z.enum(["imagen-3.0-fast-generate-001", "imagen-3.0-generate-002", "imagen-3.0-capability-001"]).optional(),
175
175
  })
176
176
  .strict();
177
177
  export const mulmoImageParamsSchema = z
@@ -265,8 +265,22 @@ export const mulmoSpeechParamsSchema = z
265
265
  speakers: speakerDictionarySchema,
266
266
  })
267
267
  .strict();
268
- export const text2HtmlImageProviderSchema = z.union([z.literal("openai"), z.literal("anthropic")]).default("openai");
269
- export const text2MovieProviderSchema = z.union([z.literal("openai"), z.literal("google"), z.literal("replicate")]).default("google");
268
+ export const text2HtmlImageProviderSchema = z.enum(["openai", "anthropic"]).default("openai");
269
+ export const text2MovieProviderSchema = z.enum(["google", "replicate"]).default("google");
270
+ // NOTE: This is UI only. (until we figure out how to use it in mulmoMovieParamsSchema)
271
+ export const mulmoGoogleMovieModelSchema = z
272
+ .object({
273
+ provider: z.literal("google"),
274
+ model: z.enum(["veo-2.0-generate-001"]).optional(),
275
+ })
276
+ .strict();
277
+ // NOTE: This is UI only. (until we figure out how to use it in mulmoMovieParamsSchema)
278
+ export const mulmoReplicateMovieModelSchema = z
279
+ .object({
280
+ provider: z.literal("replicate"),
281
+ model: z.enum(["bytedance/seedance-1-lite", "kwaivgi/kling-v2.1", "google/veo-3"]).optional(),
282
+ })
283
+ .strict();
270
284
  export const mulmoTransitionSchema = z.object({
271
285
  type: z.enum(["fade", "slideout_left"]),
272
286
  duration: z.number().min(0).max(2).default(0.3), // transition duration in seconds
@@ -315,7 +329,7 @@ export const mulmoReferenceSchema = z.object({
315
329
  url: URLStringSchema,
316
330
  title: z.string().optional(),
317
331
  description: z.string().optional(),
318
- type: z.union([z.literal("article"), z.literal("paper"), z.literal("image"), z.literal("video"), z.literal("audio")]).default("article"),
332
+ type: z.enum(["article", "paper", "image", "video", "audio"]).default("article"),
319
333
  });
320
334
  export const mulmoScriptSchema = mulmoPresentationStyleSchema
321
335
  .extend({
@@ -363,6 +377,7 @@ export const mulmoSessionStateSchema = z.object({
363
377
  movie: z.record(z.number().int(), z.boolean()),
364
378
  multiLingual: z.record(z.number().int(), z.boolean()),
365
379
  caption: z.record(z.number().int(), z.boolean()),
380
+ html: z.record(z.number().int(), z.boolean()),
366
381
  }),
367
382
  });
368
383
  export const mulmoStudioSchema = z
@@ -1,4 +1,4 @@
1
- import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, mulmoStudioMultiLingualSchema, mulmoStudioMultiLingualDataSchema, speakerDictionarySchema, mulmoImageParamsSchema, mulmoImageParamsImagesSchema, mulmoFillOptionSchema, mulmoMovieParamsSchema, mulmoSpeechParamsSchema, textSlideParamsSchema, speechOptionsSchema, speakerDataSchema, mulmoCanvasDimensionSchema, mulmoScriptTemplateSchema, mulmoScriptTemplateFileSchema, text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, multiLingualTextsSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema, mulmoSessionStateSchema, mulmoOpenAIImageModelSchema, mulmoGoogleImageModelSchema } from "./schema.js";
1
+ import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, mulmoStudioMultiLingualSchema, mulmoStudioMultiLingualDataSchema, speakerDictionarySchema, mulmoImageParamsSchema, mulmoImageParamsImagesSchema, mulmoFillOptionSchema, mulmoMovieParamsSchema, mulmoSpeechParamsSchema, textSlideParamsSchema, speechOptionsSchema, speakerDataSchema, mulmoCanvasDimensionSchema, mulmoScriptTemplateSchema, mulmoScriptTemplateFileSchema, text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, multiLingualTextsSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema, mulmoSessionStateSchema, mulmoOpenAIImageModelSchema, mulmoGoogleImageModelSchema, mulmoGoogleMovieModelSchema, mulmoReplicateMovieModelSchema } from "./schema.js";
2
2
  import { pdf_modes, pdf_sizes, storyToScriptGenerateMode } from "../utils/const.js";
3
3
  import { LLM } from "../utils/utils.js";
4
4
  import { z } from "zod";
@@ -33,6 +33,8 @@ export type MultiLingualTexts = z.infer<typeof multiLingualTextsSchema>;
33
33
  export type MulmoMovieParams = z.infer<typeof mulmoMovieParamsSchema>;
34
34
  export type MulmoOpenAIImageModel = z.infer<typeof mulmoOpenAIImageModelSchema>;
35
35
  export type MulmoGoogleImageModel = z.infer<typeof mulmoGoogleImageModelSchema>;
36
+ export type MulmoGoogleMovieModel = z.infer<typeof mulmoGoogleMovieModelSchema>;
37
+ export type MulmoReplicateMovieModel = z.infer<typeof mulmoReplicateMovieModelSchema>;
36
38
  export type MulmoTextSlideMedia = z.infer<typeof mulmoTextSlideMediaSchema>;
37
39
  export type MulmoMarkdownMedia = z.infer<typeof mulmoMarkdownMediaSchema>;
38
40
  export type MulmoImageMedia = z.infer<typeof mulmoImageMediaSchema>;
@@ -88,7 +90,7 @@ export type Text2HtmlAgentInfo = {
88
90
  export type BeatMediaType = "movie" | "image";
89
91
  export type StoryToScriptGenerateMode = (typeof storyToScriptGenerateMode)[keyof typeof storyToScriptGenerateMode];
90
92
  export type SessionType = "audio" | "image" | "video" | "multiLingual" | "caption" | "pdf";
91
- export type BeatSessionType = "audio" | "image" | "multiLingual" | "caption" | "movie";
93
+ export type BeatSessionType = "audio" | "image" | "multiLingual" | "caption" | "movie" | "html";
92
94
  export type SessionProgressEvent = {
93
95
  kind: "session";
94
96
  sessionType: SessionType;
@@ -271,7 +271,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
271
271
  }> | undefined;
272
272
  } | undefined;
273
273
  movieParams?: {
274
- provider?: "openai" | "google" | "replicate" | undefined;
274
+ provider?: "google" | "replicate" | undefined;
275
275
  model?: string | undefined;
276
276
  fillOption?: {
277
277
  style: "aspectFit" | "aspectFill";
@@ -321,6 +321,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
321
321
  movie: {};
322
322
  multiLingual: {};
323
323
  caption: {};
324
+ html: {};
324
325
  };
325
326
  };
326
327
  presentationStyle: {
@@ -389,7 +390,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
389
390
  }> | undefined;
390
391
  } | undefined;
391
392
  movieParams?: {
392
- provider?: "openai" | "google" | "replicate" | undefined;
393
+ provider?: "google" | "replicate" | undefined;
393
394
  model?: string | undefined;
394
395
  fillOption?: {
395
396
  style: "aspectFit" | "aspectFill";
@@ -56,6 +56,7 @@ const initSessionState = () => {
56
56
  movie: {},
57
57
  multiLingual: {},
58
58
  caption: {},
59
+ html: {},
59
60
  },
60
61
  };
61
62
  };
package/lib/utils/file.js CHANGED
@@ -81,10 +81,18 @@ export const getOutputVideoFilePath = (outDirPath, fileName, lang, caption) => {
81
81
  export const imageSuffix = "p";
82
82
  export const getBeatPngImagePath = (context, index) => {
83
83
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
84
+ const beat = context.studio.script.beats[index]; // beat could be undefined only in a test case.
85
+ if (beat?.id) {
86
+ return `${imageProjectDirPath}/${beat.id}.png`;
87
+ }
84
88
  return `${imageProjectDirPath}/${index}${imageSuffix}.png`;
85
89
  };
86
90
  export const getBeatMoviePath = (context, index) => {
87
91
  const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
92
+ const beat = context.studio.script.beats[index]; // beat could be undefined only in a test case.
93
+ if (beat?.id) {
94
+ return `${imageProjectDirPath}/${beat.id}.mov`;
95
+ }
88
96
  return `${imageProjectDirPath}/${index}.mov`;
89
97
  };
90
98
  export const getReferenceImagePath = (context, key, extension) => {
@@ -22,18 +22,23 @@ export const fileCacheAgentFilter = async (context, next) => {
22
22
  }
23
23
  };
24
24
  if (await shouldUseCache()) {
25
- GraphAILogger.debug("cache");
25
+ GraphAILogger.debug(`cache: ${path.basename(file)}`);
26
26
  return true;
27
27
  }
28
28
  try {
29
29
  MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, true);
30
- const output = (await next(context));
31
- const buffer = output ? output["buffer"] : undefined;
30
+ const output = (await next(context)) || undefined;
31
+ const { buffer, text } = output ?? {};
32
32
  if (buffer) {
33
33
  writingMessage(file);
34
34
  await fsPromise.writeFile(file, buffer);
35
35
  return true;
36
36
  }
37
+ else if (text) {
38
+ writingMessage(file);
39
+ await fsPromise.writeFile(file, text, "utf-8");
40
+ return true;
41
+ }
37
42
  GraphAILogger.log("no cache, no buffer: " + file);
38
43
  return false;
39
44
  }
@@ -266,7 +266,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
266
266
  }> | undefined;
267
267
  } | undefined;
268
268
  movieParams?: {
269
- provider?: "openai" | "google" | "replicate" | undefined;
269
+ provider?: "google" | "replicate" | undefined;
270
270
  model?: string | undefined;
271
271
  fillOption?: {
272
272
  style: "aspectFit" | "aspectFill";
@@ -19,3 +19,4 @@ export declare const localizedText: (beat: MulmoBeat, multiLingualData?: MulmoSt
19
19
  export declare const sleep: (milliseconds: number) => Promise<unknown>;
20
20
  export declare function userAssert(condition: boolean, message: string): asserts condition;
21
21
  export declare const settings2GraphAIConfig: (settings?: Record<string, string>) => ConfigDataDictionary<DefaultConfigData>;
22
+ export declare const getExtention: (contentType: string | null, url: string) => string;
@@ -102,3 +102,17 @@ export const settings2GraphAIConfig = (settings) => {
102
102
  }
103
103
  return config;
104
104
  };
105
+ export const getExtention = (contentType, url) => {
106
+ if (contentType?.includes("jpeg") || contentType?.includes("jpg")) {
107
+ return "jpg";
108
+ }
109
+ else if (contentType?.includes("png")) {
110
+ return "png";
111
+ }
112
+ // Fall back to URL extension
113
+ const urlExtension = url.split(".").pop()?.toLowerCase();
114
+ if (urlExtension && ["jpg", "jpeg", "png"].includes(urlExtension)) {
115
+ return urlExtension === "jpeg" ? "jpg" : urlExtension;
116
+ }
117
+ return "png"; // default
118
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.js",
@@ -60,47 +60,47 @@
60
60
  "homepage": "https://github.com/receptron/mulmocast-cli#readme",
61
61
  "dependencies": {
62
62
  "@google-cloud/text-to-speech": "^6.1.0",
63
- "@graphai/anthropic_agent": "^2.0.2",
63
+ "@graphai/anthropic_agent": "^2.0.5",
64
64
  "@graphai/browserless_agent": "^2.0.1",
65
65
  "@graphai/gemini_agent": "^2.0.0",
66
66
  "@graphai/groq_agent": "^2.0.0",
67
67
  "@graphai/input_agents": "^1.0.1",
68
68
  "@graphai/openai_agent": "^2.0.3",
69
69
  "@graphai/stream_agent_filter": "^2.0.2",
70
- "@graphai/vanilla": "^2.0.4",
70
+ "@graphai/vanilla": "^2.0.5",
71
71
  "@graphai/vanilla_node_agents": "^2.0.1",
72
72
  "@modelcontextprotocol/sdk": "^1.13.1",
73
- "@tavily/core": "^0.5.8",
73
+ "@tavily/core": "^0.5.9",
74
74
  "canvas": "^3.1.2",
75
75
  "clipboardy": "^4.0.0",
76
- "dotenv": "^17.0.0",
76
+ "dotenv": "^17.1.0",
77
77
  "fluent-ffmpeg": "^2.1.3",
78
78
  "google-auth-library": "^9.15.1",
79
- "graphai": "^2.0.9",
80
- "inquirer": "^12.6.3",
79
+ "graphai": "^2.0.12",
80
+ "inquirer": "^12.7.0",
81
81
  "marked": "^16.0.0",
82
82
  "ora": "^8.2.0",
83
- "puppeteer": "^24.11.1",
83
+ "puppeteer": "^24.12.0",
84
84
  "replicate": "^1.0.1",
85
85
  "yaml": "^2.8.0",
86
86
  "yargs": "^18.0.0",
87
- "zod": "^3.25.67",
87
+ "zod": "^3.25.76",
88
88
  "zod-to-json-schema": "^3.24.6"
89
89
  },
90
90
  "devDependencies": {
91
91
  "@anatine/zod-mock": "^3.14.0",
92
- "@faker-js/faker": "^9.8.0",
92
+ "@faker-js/faker": "^9.9.0",
93
93
  "@receptron/test_utils": "^2.0.0",
94
94
  "@types/fluent-ffmpeg": "^2.1.26",
95
95
  "@types/yargs": "^17.0.33",
96
- "eslint": "^9.30.0",
96
+ "eslint": "^9.30.1",
97
97
  "eslint-config-prettier": "^10.1.5",
98
98
  "eslint-plugin-prettier": "^5.5.1",
99
99
  "prettier": "^3.6.2",
100
100
  "ts-node": "^10.9.2",
101
101
  "tsx": "^4.20.3",
102
102
  "typescript": "^5.7.3",
103
- "typescript-eslint": "^8.35.0"
103
+ "typescript-eslint": "^8.36.0"
104
104
  },
105
105
  "engines": {
106
106
  "node": ">=18.0.0"
@@ -0,0 +1,60 @@
1
+ {
2
+ "$mulmocast": {
3
+ "version": "1.0"
4
+ },
5
+ "title": "Voice Over Test",
6
+ "captionParams": {
7
+ "lang": "en"
8
+ },
9
+ "canvasSize": {
10
+ "width": 1552,
11
+ "height": 2064
12
+ },
13
+ "beats": [
14
+ {
15
+ "text": "Description of this section of the movie",
16
+ "image": {
17
+ "type": "movie",
18
+ "source": {
19
+ "kind": "url",
20
+ "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/movies/actions.mp4"
21
+ }
22
+ }
23
+ },
24
+ {
25
+ "text": "Description of this section of the movie starting at 8 seconds",
26
+ "image": {
27
+ "type": "voice_over",
28
+ "startAt": 8.0
29
+ }
30
+ },
31
+ {
32
+ "text": "Description of this section of the movie starting at 14.5 seconds",
33
+ "image": {
34
+ "type": "voice_over",
35
+ "startAt": 14.5
36
+ }
37
+ },
38
+ {
39
+ "text": "Description of this section of the movie starting at 21 seconds",
40
+ "image": {
41
+ "type": "voice_over",
42
+ "startAt": 21.0
43
+ }
44
+ },
45
+ {
46
+ "text": "Description of this section of the movie starting at 25 seconds",
47
+ "image": {
48
+ "type": "voice_over",
49
+ "startAt": 25.0
50
+ }
51
+ },
52
+ {
53
+ "text": "Description of this section of the movie starting at 30 seconds",
54
+ "image": {
55
+ "type": "voice_over",
56
+ "startAt": 30.0
57
+ }
58
+ }
59
+ ]
60
+ }