mulmocast 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/assets/templates/characters.json +16 -0
  2. package/assets/templates/html.json +6 -0
  3. package/lib/actions/audio.js +13 -19
  4. package/lib/actions/image_agents.d.ts +145 -0
  5. package/lib/actions/image_agents.js +59 -0
  6. package/lib/actions/image_references.d.ts +9 -0
  7. package/lib/actions/image_references.js +79 -0
  8. package/lib/actions/images.d.ts +17 -109
  9. package/lib/actions/images.js +83 -188
  10. package/lib/actions/index.d.ts +2 -0
  11. package/lib/actions/index.js +2 -0
  12. package/lib/actions/movie.js +3 -1
  13. package/lib/actions/pdf.js +5 -2
  14. package/lib/agents/image_google_agent.d.ts +2 -15
  15. package/lib/agents/image_google_agent.js +5 -5
  16. package/lib/agents/image_openai_agent.d.ts +2 -17
  17. package/lib/agents/image_openai_agent.js +9 -9
  18. package/lib/agents/movie_google_agent.d.ts +2 -17
  19. package/lib/agents/movie_google_agent.js +7 -7
  20. package/lib/agents/movie_replicate_agent.d.ts +2 -16
  21. package/lib/agents/movie_replicate_agent.js +4 -4
  22. package/lib/agents/tts_google_agent.d.ts +9 -1
  23. package/lib/agents/tts_google_agent.js +2 -2
  24. package/lib/agents/tts_nijivoice_agent.js +1 -1
  25. package/lib/agents/tts_openai_agent.d.ts +13 -1
  26. package/lib/agents/tts_openai_agent.js +2 -2
  27. package/lib/cli/helpers.js +7 -7
  28. package/lib/index.d.ts +1 -0
  29. package/lib/index.js +1 -0
  30. package/lib/methods/index.d.ts +1 -0
  31. package/lib/methods/index.js +1 -0
  32. package/lib/methods/mulmo_beat.d.ts +6 -0
  33. package/lib/methods/mulmo_beat.js +21 -0
  34. package/lib/methods/mulmo_presentation_style.d.ts +3 -1
  35. package/lib/methods/mulmo_presentation_style.js +31 -7
  36. package/lib/methods/mulmo_studio_context.js +3 -0
  37. package/lib/tools/story_to_script.js +2 -2
  38. package/lib/types/agent.d.ts +55 -0
  39. package/lib/types/agent.js +3 -0
  40. package/lib/types/schema.d.ts +560 -296
  41. package/lib/types/schema.js +19 -10
  42. package/lib/types/type.d.ts +3 -2
  43. package/lib/utils/const.d.ts +0 -1
  44. package/lib/utils/const.js +0 -1
  45. package/lib/utils/context.d.ts +24 -13
  46. package/lib/utils/context.js +1 -0
  47. package/lib/utils/ffmpeg_utils.d.ts +1 -1
  48. package/lib/utils/ffmpeg_utils.js +1 -1
  49. package/lib/utils/file.js +4 -4
  50. package/lib/utils/filters.js +3 -4
  51. package/lib/utils/markdown.js +1 -1
  52. package/lib/utils/preprocess.d.ts +15 -8
  53. package/lib/utils/provider2agent.d.ts +72 -0
  54. package/lib/utils/provider2agent.js +81 -0
  55. package/lib/utils/string.js +5 -5
  56. package/lib/utils/utils.d.ts +13 -11
  57. package/lib/utils/utils.js +56 -62
  58. package/package.json +7 -6
  59. package/scripts/templates/html.json +42 -0
  60. package/scripts/templates/image_refs.json +35 -0
@@ -1,4 +1,5 @@
1
1
  import { z } from "zod";
2
+ import { htmlLLMProvider, provider2TTSAgent, provider2ImageAgent, provider2MovieAgent, defaultProviders } from "../utils/provider2agent.js";
2
3
  export const langSchema = z.string();
3
4
  const URLStringSchema = z.string().url();
4
5
  export const localizedTextSchema = z
@@ -20,7 +21,7 @@ export const speechOptionsSchema = z
20
21
  })
21
22
  .strict();
22
23
  const speakerIdSchema = z.string();
23
- export const text2SpeechProviderSchema = z.enum(["openai", "nijivoice", "google", "elevenlabs"]).default("openai");
24
+ export const text2SpeechProviderSchema = z.enum(Object.keys(provider2TTSAgent)).default(defaultProviders.tts);
24
25
  export const speakerDataSchema = z
25
26
  .object({
26
27
  displayName: z.record(langSchema, z.string()).optional(),
@@ -153,25 +154,31 @@ const mulmoMidiMediaSchema = z
153
154
  .strict();
154
155
  export const mulmoAudioAssetSchema = z.union([mulmoAudioMediaSchema, mulmoMidiMediaSchema]);
155
156
  const imageIdSchema = z.string();
156
- export const mulmoImageParamsImagesSchema = z.record(imageIdSchema, mulmoImageMediaSchema);
157
+ export const mulmoImagePromptMediaSchema = z
158
+ .object({
159
+ type: z.literal("imagePrompt"),
160
+ prompt: z.string(),
161
+ })
162
+ .strict();
163
+ export const mulmoImageParamsImagesSchema = z.record(imageIdSchema, z.union([mulmoImageMediaSchema, mulmoImagePromptMediaSchema]));
157
164
  export const mulmoFillOptionSchema = z
158
165
  .object({
159
166
  style: z.enum(["aspectFit", "aspectFill"]).default("aspectFit"),
160
167
  })
161
168
  .describe("How to handle aspect ratio differences between image and canvas");
162
- export const text2ImageProviderSchema = z.enum(["openai", "google"]).default("openai");
169
+ export const text2ImageProviderSchema = z.enum(Object.keys(provider2ImageAgent)).default(defaultProviders.text2image);
163
170
  // NOTE: This is for UI only. (until we figure out how to use it in mulmoImageParamsSchema)
164
171
  export const mulmoOpenAIImageModelSchema = z
165
172
  .object({
166
173
  provider: z.literal("openai"),
167
- model: z.enum(["dall-e-3", "gpt-image-1"]).optional(),
174
+ model: z.enum(provider2ImageAgent["openai"].models).optional(),
168
175
  })
169
176
  .strict();
170
177
  // NOTE: This is for UI only. (until we figure out how to use it in mulmoImageParamsSchema)
171
178
  export const mulmoGoogleImageModelSchema = z
172
179
  .object({
173
180
  provider: z.literal("google"),
174
- model: z.enum(["imagen-3.0-fast-generate-001", "imagen-3.0-generate-002", "imagen-3.0-capability-001"]).optional(),
181
+ model: z.enum(provider2ImageAgent["google"].models).optional(),
175
182
  })
176
183
  .strict();
177
184
  export const mulmoImageParamsSchema = z
@@ -232,6 +239,7 @@ export const mulmoBeatSchema = z
232
239
  audioParams: beatAudioParamsSchema.optional(), // beat specific parameters
233
240
  movieParams: z
234
241
  .object({
242
+ model: z.string().optional(),
235
243
  fillOption: mulmoFillOptionSchema.optional(),
236
244
  speed: z.number().optional().describe("Speed of the video. 1.0 is normal speed. 0.5 is half speed. 2.0 is double speed."),
237
245
  })
@@ -265,20 +273,20 @@ export const mulmoSpeechParamsSchema = z
265
273
  speakers: speakerDictionarySchema,
266
274
  })
267
275
  .strict();
268
- export const text2HtmlImageProviderSchema = z.enum(["openai", "anthropic"]).default("openai");
269
- export const text2MovieProviderSchema = z.enum(["google", "replicate"]).default("google");
276
+ export const text2HtmlImageProviderSchema = z.enum(htmlLLMProvider).default(defaultProviders.text2Html);
277
+ export const text2MovieProviderSchema = z.enum(Object.keys(provider2MovieAgent)).default(defaultProviders.text2movie);
270
278
  // NOTE: This is UI only. (until we figure out how to use it in mulmoMovieParamsSchema)
271
279
  export const mulmoGoogleMovieModelSchema = z
272
280
  .object({
273
281
  provider: z.literal("google"),
274
- model: z.enum(["veo-2.0-generate-001"]).optional(),
282
+ model: z.enum(provider2MovieAgent.google.models).optional(),
275
283
  })
276
284
  .strict();
277
285
  // NOTE: This is UI only. (until we figure out how to use it in mulmoMovieParamsSchema)
278
286
  export const mulmoReplicateMovieModelSchema = z
279
287
  .object({
280
288
  provider: z.literal("replicate"),
281
- model: z.enum(["bytedance/seedance-1-lite", "kwaivgi/kling-v2.1", "google/veo-3"]).optional(),
289
+ model: z.enum(provider2MovieAgent.replicate.models).optional(),
282
290
  })
283
291
  .strict();
284
292
  export const mulmoTransitionSchema = z.object({
@@ -329,7 +337,7 @@ export const mulmoReferenceSchema = z.object({
329
337
  url: URLStringSchema,
330
338
  title: z.string().optional(),
331
339
  description: z.string().optional(),
332
- type: z.enum(["article", "paper", "image", "video", "audio"]).default("article"),
340
+ type: z.union([z.enum(["article", "paper", "image", "video", "audio"]), z.string()]).default("article"),
333
341
  });
334
342
  export const mulmoScriptSchema = mulmoPresentationStyleSchema
335
343
  .extend({
@@ -378,6 +386,7 @@ export const mulmoSessionStateSchema = z.object({
378
386
  multiLingual: z.record(z.number().int(), z.boolean()),
379
387
  caption: z.record(z.number().int(), z.boolean()),
380
388
  html: z.record(z.number().int(), z.boolean()),
389
+ imageReference: z.record(z.number().int(), z.boolean()),
381
390
  }),
382
391
  });
383
392
  export const mulmoStudioSchema = z
@@ -1,4 +1,4 @@
1
- import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, mulmoStudioMultiLingualSchema, mulmoStudioMultiLingualDataSchema, speakerDictionarySchema, mulmoImageParamsSchema, mulmoImageParamsImagesSchema, mulmoFillOptionSchema, mulmoMovieParamsSchema, mulmoSpeechParamsSchema, textSlideParamsSchema, speechOptionsSchema, speakerDataSchema, mulmoCanvasDimensionSchema, mulmoScriptTemplateSchema, mulmoScriptTemplateFileSchema, text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, multiLingualTextsSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema, mulmoSessionStateSchema, mulmoOpenAIImageModelSchema, mulmoGoogleImageModelSchema, mulmoGoogleMovieModelSchema, mulmoReplicateMovieModelSchema } from "./schema.js";
1
+ import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, mulmoStudioMultiLingualSchema, mulmoStudioMultiLingualDataSchema, speakerDictionarySchema, mulmoImageParamsSchema, mulmoImageParamsImagesSchema, mulmoFillOptionSchema, mulmoMovieParamsSchema, mulmoSpeechParamsSchema, textSlideParamsSchema, speechOptionsSchema, speakerDataSchema, mulmoCanvasDimensionSchema, mulmoScriptTemplateSchema, mulmoScriptTemplateFileSchema, text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, multiLingualTextsSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema, mulmoSessionStateSchema, mulmoOpenAIImageModelSchema, mulmoGoogleImageModelSchema, mulmoGoogleMovieModelSchema, mulmoReplicateMovieModelSchema, mulmoImagePromptMediaSchema } from "./schema.js";
2
2
  import { pdf_modes, pdf_sizes, storyToScriptGenerateMode } from "../utils/const.js";
3
3
  import { LLM } from "../utils/utils.js";
4
4
  import { z } from "zod";
@@ -35,6 +35,7 @@ export type MulmoOpenAIImageModel = z.infer<typeof mulmoOpenAIImageModelSchema>;
35
35
  export type MulmoGoogleImageModel = z.infer<typeof mulmoGoogleImageModelSchema>;
36
36
  export type MulmoGoogleMovieModel = z.infer<typeof mulmoGoogleMovieModelSchema>;
37
37
  export type MulmoReplicateMovieModel = z.infer<typeof mulmoReplicateMovieModelSchema>;
38
+ export type MulmoImagePromptMedia = z.infer<typeof mulmoImagePromptMediaSchema>;
38
39
  export type MulmoTextSlideMedia = z.infer<typeof mulmoTextSlideMediaSchema>;
39
40
  export type MulmoMarkdownMedia = z.infer<typeof mulmoMarkdownMediaSchema>;
40
41
  export type MulmoImageMedia = z.infer<typeof mulmoImageMediaSchema>;
@@ -90,7 +91,7 @@ export type Text2HtmlAgentInfo = {
90
91
  export type BeatMediaType = "movie" | "image";
91
92
  export type StoryToScriptGenerateMode = (typeof storyToScriptGenerateMode)[keyof typeof storyToScriptGenerateMode];
92
93
  export type SessionType = "audio" | "image" | "video" | "multiLingual" | "caption" | "pdf";
93
- export type BeatSessionType = "audio" | "image" | "multiLingual" | "caption" | "movie" | "html";
94
+ export type BeatSessionType = "audio" | "image" | "multiLingual" | "caption" | "movie" | "html" | "imageReference";
94
95
  export type SessionProgressEvent = {
95
96
  kind: "session";
96
97
  sessionType: SessionType;
@@ -9,4 +9,3 @@ export declare const storyToScriptGenerateMode: {
9
9
  stepWise: string;
10
10
  oneStep: string;
11
11
  };
12
- export declare const defaultOpenAIImageModel = "dall-e-3";
@@ -9,4 +9,3 @@ export const storyToScriptGenerateMode = {
9
9
  stepWise: "step_wise",
10
10
  oneStep: "one_step",
11
11
  };
12
- export const defaultOpenAIImageModel = "dall-e-3";
@@ -49,7 +49,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
49
49
  height: number;
50
50
  };
51
51
  speechParams: {
52
- provider: "openai" | "nijivoice" | "google" | "elevenlabs";
52
+ provider: string;
53
53
  speakers: Record<string, {
54
54
  voiceId: string;
55
55
  displayName?: Record<string, string> | undefined;
@@ -57,7 +57,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
57
57
  speed?: number | undefined;
58
58
  instruction?: string | undefined;
59
59
  } | undefined;
60
- provider?: "openai" | "nijivoice" | "google" | "elevenlabs" | undefined;
60
+ provider?: string | undefined;
61
61
  }>;
62
62
  };
63
63
  beats: {
@@ -193,9 +193,10 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
193
193
  type: "midi";
194
194
  source: string;
195
195
  } | undefined;
196
+ imagePrompt?: string | undefined;
196
197
  description?: string | undefined;
197
198
  imageParams?: {
198
- provider: "openai" | "google";
199
+ provider: string;
199
200
  style?: string | undefined;
200
201
  model?: string | undefined;
201
202
  moderation?: string | undefined;
@@ -214,6 +215,9 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
214
215
  path: string;
215
216
  kind: "path";
216
217
  };
218
+ } | {
219
+ type: "imagePrompt";
220
+ prompt: string;
217
221
  }> | undefined;
218
222
  } | undefined;
219
223
  audioParams?: {
@@ -221,6 +225,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
221
225
  } | undefined;
222
226
  movieParams?: {
223
227
  speed?: number | undefined;
228
+ model?: string | undefined;
224
229
  fillOption?: {
225
230
  style: "aspectFit" | "aspectFill";
226
231
  } | undefined;
@@ -236,7 +241,6 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
236
241
  lang?: string | undefined;
237
242
  } | undefined;
238
243
  imageNames?: string[] | undefined;
239
- imagePrompt?: string | undefined;
240
244
  moviePrompt?: string | undefined;
241
245
  htmlPrompt?: {
242
246
  prompt: string;
@@ -249,7 +253,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
249
253
  title?: string | undefined;
250
254
  description?: string | undefined;
251
255
  imageParams?: {
252
- provider: "openai" | "google";
256
+ provider: string;
253
257
  style?: string | undefined;
254
258
  model?: string | undefined;
255
259
  moderation?: string | undefined;
@@ -268,10 +272,13 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
268
272
  path: string;
269
273
  kind: "path";
270
274
  };
275
+ } | {
276
+ type: "imagePrompt";
277
+ prompt: string;
271
278
  }> | undefined;
272
279
  } | undefined;
273
280
  movieParams?: {
274
- provider?: "google" | "replicate" | undefined;
281
+ provider?: string | undefined;
275
282
  model?: string | undefined;
276
283
  fillOption?: {
277
284
  style: "aspectFit" | "aspectFill";
@@ -282,7 +289,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
282
289
  } | undefined;
283
290
  } | undefined;
284
291
  htmlImageParams?: {
285
- provider: "openai" | "anthropic";
292
+ provider: string;
286
293
  model?: string | undefined;
287
294
  } | undefined;
288
295
  textSlideParams?: {
@@ -293,7 +300,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
293
300
  lang?: string | undefined;
294
301
  } | undefined;
295
302
  references?: {
296
- type: "image" | "audio" | "article" | "paper" | "video";
303
+ type: string;
297
304
  url: string;
298
305
  title?: string | undefined;
299
306
  description?: string | undefined;
@@ -322,6 +329,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
322
329
  multiLingual: {};
323
330
  caption: {};
324
331
  html: {};
332
+ imageReference: {};
325
333
  };
326
334
  };
327
335
  presentationStyle: {
@@ -356,7 +364,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
356
364
  height: number;
357
365
  };
358
366
  speechParams: {
359
- provider: "openai" | "nijivoice" | "google" | "elevenlabs";
367
+ provider: string;
360
368
  speakers: Record<string, {
361
369
  voiceId: string;
362
370
  displayName?: Record<string, string> | undefined;
@@ -364,11 +372,11 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
364
372
  speed?: number | undefined;
365
373
  instruction?: string | undefined;
366
374
  } | undefined;
367
- provider?: "openai" | "nijivoice" | "google" | "elevenlabs" | undefined;
375
+ provider?: string | undefined;
368
376
  }>;
369
377
  };
370
378
  imageParams?: {
371
- provider: "openai" | "google";
379
+ provider: string;
372
380
  style?: string | undefined;
373
381
  model?: string | undefined;
374
382
  moderation?: string | undefined;
@@ -387,10 +395,13 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
387
395
  path: string;
388
396
  kind: "path";
389
397
  };
398
+ } | {
399
+ type: "imagePrompt";
400
+ prompt: string;
390
401
  }> | undefined;
391
402
  } | undefined;
392
403
  movieParams?: {
393
- provider?: "google" | "replicate" | undefined;
404
+ provider?: string | undefined;
394
405
  model?: string | undefined;
395
406
  fillOption?: {
396
407
  style: "aspectFit" | "aspectFill";
@@ -401,7 +412,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
401
412
  } | undefined;
402
413
  } | undefined;
403
414
  htmlImageParams?: {
404
- provider: "openai" | "anthropic";
415
+ provider: string;
405
416
  model?: string | undefined;
406
417
  } | undefined;
407
418
  textSlideParams?: {
@@ -57,6 +57,7 @@ const initSessionState = () => {
57
57
  multiLingual: {},
58
58
  caption: {},
59
59
  html: {},
60
+ imageReference: {},
60
61
  },
61
62
  };
62
63
  };
@@ -12,4 +12,4 @@ export declare const FfmpegContextPushFormattedAudio: (context: FfmpegContext, s
12
12
  export declare const FfmpegContextInputFormattedAudio: (context: FfmpegContext, input: string, duration?: number | undefined, inputOptions?: string[]) => string;
13
13
  export declare const FfmpegContextGenerateOutput: (context: FfmpegContext, output: string, options?: string[]) => Promise<number>;
14
14
  export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<number>;
15
- export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<void>;
15
+ export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<object>;
@@ -77,7 +77,7 @@ export const extractImageFromMovie = (movieFile, imagePath) => {
77
77
  ffmpeg(movieFile)
78
78
  .outputOptions(["-frames:v 1"])
79
79
  .output(imagePath)
80
- .on("end", () => resolve())
80
+ .on("end", () => resolve({}))
81
81
  .on("error", (err) => reject(err))
82
82
  .run();
83
83
  });
package/lib/utils/file.js CHANGED
@@ -29,9 +29,9 @@ export function readMulmoScriptFile(arg2, errorMessage) {
29
29
  fileName: parsedPath.name,
30
30
  };
31
31
  }
32
- catch (__error) {
32
+ catch (error) {
33
33
  if (errorMessage) {
34
- GraphAILogger.info("read file format is broken.");
34
+ GraphAILogger.info("read file format is broken.", error);
35
35
  }
36
36
  return null;
37
37
  }
@@ -159,8 +159,8 @@ export const readTemplatePrompt = (templateName) => {
159
159
  const template = JSON.parse(templateData);
160
160
  const script = (() => {
161
161
  if (template.scriptName) {
162
- const script = readScriptTemplateFile(template.scriptName);
163
- return { ...script, ...(template.presentationStyle ?? {}) };
162
+ const scriptData = readScriptTemplateFile(template.scriptName);
163
+ return { ...scriptData, ...(template.presentationStyle ?? {}) };
164
164
  }
165
165
  return undefined;
166
166
  })();
@@ -7,17 +7,16 @@ import { writingMessage } from "./file.js";
7
7
  import { text2hash } from "./utils.js";
8
8
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
9
9
  export const fileCacheAgentFilter = async (context, next) => {
10
- const { namedInputs } = context;
11
- const { file, force, mulmoContext, index, sessionType } = namedInputs;
10
+ const { force, file, index, mulmoContext, sessionType } = context.namedInputs.cache;
12
11
  const shouldUseCache = async () => {
13
- if (force) {
12
+ if (force && force.some((element) => element)) {
14
13
  return false;
15
14
  }
16
15
  try {
17
16
  await fsPromise.access(file);
18
17
  return true;
19
18
  }
20
- catch (__e) {
19
+ catch {
21
20
  return false;
22
21
  }
23
22
  };
@@ -18,7 +18,7 @@ export const renderHTMLToImage = async (html, outputPath, width, height, isMerma
18
18
  }, { timeout: 20000 });
19
19
  }
20
20
  // Step 3: Capture screenshot of the page (which contains the Markdown-rendered HTML)
21
- await page.screenshot({ path: outputPath, omitBackground: omitBackground });
21
+ await page.screenshot({ path: outputPath, omitBackground });
22
22
  await browser.close();
23
23
  };
24
24
  export const renderMarkdownToImage = async (markdown, style, outputPath, width, height) => {
@@ -44,7 +44,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
44
44
  height: number;
45
45
  };
46
46
  speechParams: {
47
- provider: "openai" | "nijivoice" | "google" | "elevenlabs";
47
+ provider: string;
48
48
  speakers: Record<string, {
49
49
  voiceId: string;
50
50
  displayName?: Record<string, string> | undefined;
@@ -52,7 +52,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
52
52
  speed?: number | undefined;
53
53
  instruction?: string | undefined;
54
54
  } | undefined;
55
- provider?: "openai" | "nijivoice" | "google" | "elevenlabs" | undefined;
55
+ provider?: string | undefined;
56
56
  }>;
57
57
  };
58
58
  beats: {
@@ -188,9 +188,10 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
188
188
  type: "midi";
189
189
  source: string;
190
190
  } | undefined;
191
+ imagePrompt?: string | undefined;
191
192
  description?: string | undefined;
192
193
  imageParams?: {
193
- provider: "openai" | "google";
194
+ provider: string;
194
195
  style?: string | undefined;
195
196
  model?: string | undefined;
196
197
  moderation?: string | undefined;
@@ -209,6 +210,9 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
209
210
  path: string;
210
211
  kind: "path";
211
212
  };
213
+ } | {
214
+ type: "imagePrompt";
215
+ prompt: string;
212
216
  }> | undefined;
213
217
  } | undefined;
214
218
  audioParams?: {
@@ -216,6 +220,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
216
220
  } | undefined;
217
221
  movieParams?: {
218
222
  speed?: number | undefined;
223
+ model?: string | undefined;
219
224
  fillOption?: {
220
225
  style: "aspectFit" | "aspectFill";
221
226
  } | undefined;
@@ -231,7 +236,6 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
231
236
  lang?: string | undefined;
232
237
  } | undefined;
233
238
  imageNames?: string[] | undefined;
234
- imagePrompt?: string | undefined;
235
239
  moviePrompt?: string | undefined;
236
240
  htmlPrompt?: {
237
241
  prompt: string;
@@ -244,7 +248,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
244
248
  title?: string | undefined;
245
249
  description?: string | undefined;
246
250
  imageParams?: {
247
- provider: "openai" | "google";
251
+ provider: string;
248
252
  style?: string | undefined;
249
253
  model?: string | undefined;
250
254
  moderation?: string | undefined;
@@ -263,10 +267,13 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
263
267
  path: string;
264
268
  kind: "path";
265
269
  };
270
+ } | {
271
+ type: "imagePrompt";
272
+ prompt: string;
266
273
  }> | undefined;
267
274
  } | undefined;
268
275
  movieParams?: {
269
- provider?: "google" | "replicate" | undefined;
276
+ provider?: string | undefined;
270
277
  model?: string | undefined;
271
278
  fillOption?: {
272
279
  style: "aspectFit" | "aspectFill";
@@ -277,7 +284,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
277
284
  } | undefined;
278
285
  } | undefined;
279
286
  htmlImageParams?: {
280
- provider: "openai" | "anthropic";
287
+ provider: string;
281
288
  model?: string | undefined;
282
289
  } | undefined;
283
290
  textSlideParams?: {
@@ -288,7 +295,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
288
295
  lang?: string | undefined;
289
296
  } | undefined;
290
297
  references?: {
291
- type: "image" | "audio" | "article" | "paper" | "video";
298
+ type: string;
292
299
  url: string;
293
300
  title?: string | undefined;
294
301
  description?: string | undefined;
@@ -0,0 +1,72 @@
1
+ export declare const defaultProviders: {
2
+ tts: string;
3
+ text2image: string;
4
+ text2movie: string;
5
+ text2Html: string;
6
+ llm: string;
7
+ };
8
+ export declare const provider2TTSAgent: {
9
+ nijivoice: {
10
+ agentName: string;
11
+ hasLimitedConcurrency: boolean;
12
+ };
13
+ openai: {
14
+ agentName: string;
15
+ hasLimitedConcurrency: boolean;
16
+ };
17
+ google: {
18
+ agentName: string;
19
+ hasLimitedConcurrency: boolean;
20
+ };
21
+ elevenlabs: {
22
+ agentName: string;
23
+ hasLimitedConcurrency: boolean;
24
+ };
25
+ };
26
+ export declare const provider2ImageAgent: {
27
+ openai: {
28
+ agentName: string;
29
+ defaultModel: string;
30
+ models: string[];
31
+ };
32
+ google: {
33
+ agentName: string;
34
+ defaultModel: string;
35
+ models: string[];
36
+ };
37
+ };
38
+ export declare const provider2MovieAgent: {
39
+ replicate: {
40
+ agentName: string;
41
+ models: string[];
42
+ };
43
+ google: {
44
+ agentName: string;
45
+ models: string[];
46
+ };
47
+ };
48
+ export declare const provider2LLMAgent: {
49
+ readonly openai: {
50
+ readonly agentName: "openAIAgent";
51
+ readonly defaultModel: "gpt-4o";
52
+ readonly max_tokens: 8192;
53
+ };
54
+ readonly anthropic: {
55
+ readonly agentName: "anthropicAgent";
56
+ readonly defaultModel: "claude-3-7-sonnet-20250219";
57
+ readonly max_tokens: 8192;
58
+ };
59
+ readonly gemini: {
60
+ readonly agentName: "geminiAgent";
61
+ readonly defaultModel: "gemini-1.5-flash";
62
+ readonly max_tokens: 8192;
63
+ };
64
+ readonly groq: {
65
+ readonly agentName: "groqAgent";
66
+ readonly defaultModel: "llama3-8b-8192";
67
+ readonly max_tokens: 4096;
68
+ };
69
+ };
70
+ export declare const llm: (keyof typeof provider2LLMAgent)[];
71
+ export type LLM = keyof typeof provider2LLMAgent;
72
+ export declare const htmlLLMProvider: string[];
@@ -0,0 +1,81 @@
1
+ export const defaultProviders = {
2
+ tts: "openai",
3
+ text2image: "openai",
4
+ text2movie: "google",
5
+ text2Html: "openai",
6
+ llm: "openai",
7
+ };
8
+ export const provider2TTSAgent = {
9
+ nijivoice: {
10
+ agentName: "ttsNijivoiceAgent",
11
+ hasLimitedConcurrency: true,
12
+ },
13
+ openai: {
14
+ agentName: "ttsOpenaiAgent",
15
+ hasLimitedConcurrency: false,
16
+ },
17
+ google: {
18
+ agentName: "ttsGoogleAgent",
19
+ hasLimitedConcurrency: false,
20
+ },
21
+ elevenlabs: {
22
+ agentName: "ttsElevenlabsAgent",
23
+ hasLimitedConcurrency: true,
24
+ },
25
+ };
26
+ export const provider2ImageAgent = {
27
+ openai: {
28
+ agentName: "imageOpenaiAgent",
29
+ defaultModel: "gpt-image-1",
30
+ models: ["dall-e-3", "gpt-image-1"],
31
+ },
32
+ google: {
33
+ agentName: "imageGoogleAgent",
34
+ defaultModel: "imagen-3.0-fast-generate-001",
35
+ models: ["imagen-3.0-fast-generate-001", "imagen-3.0-generate-002", "imagen-3.0-capability-001"],
36
+ },
37
+ };
38
+ export const provider2MovieAgent = {
39
+ replicate: {
40
+ agentName: "movieReplicateAgent",
41
+ models: [
42
+ "bytedance/seedance-1-lite",
43
+ "bytedance/seedance-1-pro",
44
+ "kwaivgi/kling-v1.6-pro",
45
+ "kwaivgi/kling-v2.1",
46
+ "google/veo-2",
47
+ "google/veo-3",
48
+ "google/veo-3-fast",
49
+ "minimax/video-01",
50
+ ],
51
+ },
52
+ google: {
53
+ agentName: "movieGoogleAgent",
54
+ models: ["veo-2.0-generate-001"],
55
+ },
56
+ };
57
+ // : Record<LLM, { agent: string; defaultModel: string; max_tokens: number }>
58
+ export const provider2LLMAgent = {
59
+ openai: {
60
+ agentName: "openAIAgent",
61
+ defaultModel: "gpt-4o",
62
+ max_tokens: 8192,
63
+ },
64
+ anthropic: {
65
+ agentName: "anthropicAgent",
66
+ defaultModel: "claude-3-7-sonnet-20250219",
67
+ max_tokens: 8192,
68
+ },
69
+ gemini: {
70
+ agentName: "geminiAgent",
71
+ defaultModel: "gemini-1.5-flash",
72
+ max_tokens: 8192,
73
+ },
74
+ groq: {
75
+ agentName: "groqAgent",
76
+ defaultModel: "llama3-8b-8192",
77
+ max_tokens: 4096,
78
+ },
79
+ };
80
+ export const llm = Object.keys(provider2LLMAgent);
81
+ export const htmlLLMProvider = ["openai", "anthropic"];
@@ -20,18 +20,18 @@ export const recursiveSplitJa = (text) => {
20
20
  const delimiters = ["。", "?", "!", "、"];
21
21
  return delimiters
22
22
  .reduce((textData, delimiter) => {
23
- return textData.map((text) => splitIntoSentencesJa(text, delimiter, 7)).flat(1);
23
+ return textData.map((textInner) => splitIntoSentencesJa(textInner, delimiter, 7)).flat(1);
24
24
  }, [text])
25
25
  .flat(1);
26
26
  };
27
27
  export function replacePairsJa(str, replacements) {
28
- replacements.forEach(({ from, to }) => {
28
+ return replacements.reduce((tmp, current) => {
29
+ const { from, to } = current;
29
30
  // Escape any special regex characters in the 'from' string.
30
31
  const escapedFrom = from.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
31
32
  const regex = new RegExp(escapedFrom, "g");
32
- str = str.replace(regex, to);
33
- });
34
- return str;
33
+ return tmp.replace(regex, to);
34
+ }, str);
35
35
  }
36
36
  export const replacementsJa = [
37
37
  { from: "Anthropic", to: "アンスロピック" },