mulmocast 2.1.15 → 2.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/lib/actions/audio.js +1 -1
  2. package/lib/actions/bundle.d.ts +4 -1
  3. package/lib/actions/bundle.js +37 -21
  4. package/lib/actions/translate.js +1 -1
  5. package/lib/agents/image_genai_agent.js +2 -2
  6. package/lib/agents/image_openai_agent.js +1 -1
  7. package/lib/agents/image_replicate_agent.js +1 -1
  8. package/lib/agents/lipsync_replicate_agent.js +1 -1
  9. package/lib/agents/movie_genai_agent.js +2 -2
  10. package/lib/agents/movie_replicate_agent.js +1 -1
  11. package/lib/agents/sound_effect_replicate_agent.js +1 -1
  12. package/lib/agents/tts_elevenlabs_agent.js +1 -1
  13. package/lib/agents/tts_gemini_agent.js +1 -1
  14. package/lib/agents/tts_kotodama_agent.js +1 -1
  15. package/lib/agents/tts_openai_agent.js +5 -2
  16. package/lib/cli/commands/bundle/handler.js +1 -1
  17. package/lib/cli/commands/movie/builder.js +1 -1
  18. package/lib/cli/commands/pdf/builder.js +1 -1
  19. package/lib/cli/commands/tool/scripting/builder.js +1 -1
  20. package/lib/cli/commands/tool/scripting/handler.d.ts +1 -1
  21. package/lib/cli/commands/tool/scripting/handler.js +1 -1
  22. package/lib/cli/commands/tool/story_to_script/builder.js +2 -2
  23. package/lib/cli/commands/tool/story_to_script/handler.d.ts +1 -1
  24. package/lib/cli/commands/tool/story_to_script/handler.js +1 -1
  25. package/lib/cli/common.js +1 -1
  26. package/lib/cli/helpers.js +1 -1
  27. package/lib/data/scriptTemplates.js +2 -2
  28. package/lib/data/templateDataSet.js +1 -1
  29. package/lib/index.common.d.ts +2 -2
  30. package/lib/index.common.js +2 -2
  31. package/lib/mcp/server.js +1 -1
  32. package/lib/methods/mulmo_presentation_style.d.ts +6 -6
  33. package/lib/methods/mulmo_presentation_style.js +1 -1
  34. package/lib/methods/mulmo_studio_context.d.ts +1 -1
  35. package/lib/tools/story_to_script.d.ts +1 -1
  36. package/lib/tools/story_to_script.js +1 -1
  37. package/lib/types/agent.d.ts +1 -0
  38. package/lib/types/const.d.ts +15 -0
  39. package/lib/types/const.js +15 -0
  40. package/lib/types/provider2agent.d.ts +191 -0
  41. package/lib/types/provider2agent.js +326 -0
  42. package/lib/types/schema.js +2 -2
  43. package/lib/types/type.d.ts +2 -2
  44. package/lib/utils/utils.d.ts +1 -1
  45. package/lib/utils/utils.js +1 -1
  46. package/package.json +5 -5
  47. package/scripts/templates/html.json +1 -1
  48. package/scripts/templates/presentation.json +1 -1
@@ -8,7 +8,7 @@ import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters
8
8
  import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
9
9
  import { localizedText, settings2GraphAIConfig } from "../utils/utils.js";
10
10
  import { text2hash } from "../utils/utils_node.js";
11
- import { provider2TTSAgent } from "../utils/provider2agent.js";
11
+ import { provider2TTSAgent } from "../types/provider2agent.js";
12
12
  import { invalidAudioSourceError } from "../utils/error_cause.js";
13
13
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
14
14
  import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
@@ -1,2 +1,5 @@
1
1
  import { type MulmoStudioContext } from "../types/index.js";
2
- export declare const mulmoViewerBundle: (context: MulmoStudioContext) => Promise<void>;
2
+ export type MulmoViewerBundleOptions = {
3
+ skipZip?: boolean;
4
+ };
5
+ export declare const mulmoViewerBundle: (context: MulmoStudioContext, options?: MulmoViewerBundleOptions) => Promise<void>;
@@ -4,7 +4,7 @@ import { GraphAILogger } from "graphai";
4
4
  import { listLocalizedAudioPaths } from "./audio.js";
5
5
  import { mkdir } from "../utils/file.js";
6
6
  import { ZipBuilder } from "../utils/zip.js";
7
- import { bundleTargetLang } from "../utils/const.js";
7
+ import { bundleTargetLang } from "../types/const.js";
8
8
  import { createSilentAudio } from "../utils/ffmpeg_utils.js";
9
9
  import { silentMp3 } from "../utils/context.js";
10
10
  const downloadFile = async (url, destPath) => {
@@ -27,9 +27,12 @@ const processBgm = async (bgm, outDir, baseDir, zipper) => {
27
27
  return undefined;
28
28
  }
29
29
  const fileName = path.basename(bgm.path);
30
- const destPath = path.resolve(outDir, fileName);
31
- fs.copyFileSync(sourcePath, destPath);
32
- zipper.addFile(sourcePath, fileName);
30
+ if (zipper) {
31
+ zipper.addFile(sourcePath, fileName);
32
+ }
33
+ else {
34
+ fs.copyFileSync(sourcePath, path.resolve(outDir, fileName));
35
+ }
33
36
  return fileName;
34
37
  }
35
38
  else if (bgm.kind === "url") {
@@ -37,7 +40,7 @@ const processBgm = async (bgm, outDir, baseDir, zipper) => {
37
40
  const fileName = path.basename(new URL(bgm.url).pathname) || "bgm.mp3";
38
41
  const destPath = path.resolve(outDir, fileName);
39
42
  await downloadFile(bgm.url, destPath);
40
- zipper.addFile(destPath);
43
+ zipper?.addFile(destPath);
41
44
  return fileName;
42
45
  }
43
46
  // base64 or other formats are not supported
@@ -52,12 +55,16 @@ const imageSourceMappings = [
52
55
  ["lipSyncFile", "videoWithAudioSource"],
53
56
  ["htmlImageFile", "htmlImageSource"],
54
57
  ];
55
- export const mulmoViewerBundle = async (context) => {
56
- const isZip = true;
58
+ export const mulmoViewerBundle = async (context, options = {}) => {
59
+ const { skipZip = false } = options;
57
60
  const outDir = context.fileDirs.outDirPath;
58
61
  const baseDir = context.fileDirs.baseDirPath;
62
+ const filename = context.studio.filename;
59
63
  mkdir(outDir);
60
- const zipper = new ZipBuilder(path.resolve(outDir, zipFileName));
64
+ // Bundle directory: output/<script_name>/
65
+ const bundleDir = path.resolve(outDir, filename);
66
+ mkdir(bundleDir);
67
+ const zipper = skipZip ? undefined : new ZipBuilder(path.resolve(bundleDir, zipFileName));
61
68
  // text
62
69
  const resultJson = [];
63
70
  context.studio.script.beats.forEach((beat, index) => {
@@ -77,13 +84,17 @@ export const mulmoViewerBundle = async (context) => {
77
84
  }
78
85
  if (fileName === "silent300.mp3") {
79
86
  // Download from GitHub URL
80
- const destPath = path.resolve(outDir, fileName);
87
+ const destPath = path.resolve(bundleDir, fileName);
81
88
  await downloadFile(silentMp3, destPath);
82
- zipper.addFile(destPath, fileName);
89
+ zipper?.addFile(destPath, fileName);
83
90
  }
84
91
  else if (fs.existsSync(audio)) {
85
- fs.copyFileSync(audio, path.resolve(outDir, fileName));
86
- zipper.addFile(audio, fileName);
92
+ if (zipper) {
93
+ zipper.addFile(audio, fileName);
94
+ }
95
+ else {
96
+ fs.copyFileSync(audio, path.resolve(bundleDir, fileName));
97
+ }
87
98
  }
88
99
  }
89
100
  }));
@@ -96,13 +107,17 @@ export const mulmoViewerBundle = async (context) => {
96
107
  if (typeof value === "string") {
97
108
  data[source] = path.basename(value);
98
109
  if (fs.existsSync(value)) {
99
- fs.copyFileSync(value, path.resolve(outDir, path.basename(value)));
100
- zipper.addFile(value);
110
+ if (zipper) {
111
+ zipper.addFile(value);
112
+ }
113
+ else {
114
+ fs.copyFileSync(value, path.resolve(bundleDir, path.basename(value)));
115
+ }
101
116
  }
102
117
  }
103
118
  });
104
119
  });
105
- // silent
120
+ // silent - generated files always go to bundleDir
106
121
  await Promise.all(context.studio.script.beats.map(async (__, index) => {
107
122
  const data = resultJson[index];
108
123
  if (data.audioSources &&
@@ -111,9 +126,9 @@ export const mulmoViewerBundle = async (context) => {
111
126
  data.videoWithAudioSource === undefined &&
112
127
  data.duration) {
113
128
  const file = `silent_${index}.mp3`;
114
- const audioFile = path.resolve(outDir, file);
129
+ const audioFile = path.resolve(bundleDir, file);
115
130
  await createSilentAudio(audioFile, data.duration);
116
- zipper.addFile(audioFile);
131
+ zipper?.addFile(audioFile);
117
132
  data.audioSources.ja = file;
118
133
  data.audioSources.en = file;
119
134
  }
@@ -127,11 +142,12 @@ export const mulmoViewerBundle = async (context) => {
127
142
  });
128
143
  });
129
144
  // BGM
130
- const bgmFileName = await processBgm(context.studio?.script.audioParams?.bgm, outDir, baseDir, zipper);
145
+ const bgmFileName = await processBgm(context.studio?.script.audioParams?.bgm, bundleDir, baseDir, zipper);
131
146
  const bundleData = { beats: resultJson, bgmSource: bgmFileName, title: context.studio.script.title };
132
- fs.writeFileSync(path.resolve(outDir, viewJsonFileName), JSON.stringify(bundleData, null, 2));
133
- zipper.addFile(path.resolve(outDir, viewJsonFileName));
134
- if (isZip) {
147
+ const viewJsonPath = path.resolve(bundleDir, viewJsonFileName);
148
+ fs.writeFileSync(viewJsonPath, JSON.stringify(bundleData, null, 2));
149
+ zipper?.addFile(viewJsonPath);
150
+ if (zipper) {
135
151
  await zipper.finalize();
136
152
  }
137
153
  };
@@ -7,7 +7,7 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
7
7
  import { splitText } from "../utils/string.js";
8
8
  import { settings2GraphAIConfig, beatId, multiLingualObjectToArray } from "../utils/utils.js";
9
9
  import { getMultiLingual } from "../utils/context.js";
10
- import { currentMulmoScriptVersion } from "../utils/const.js";
10
+ import { currentMulmoScriptVersion } from "../types/const.js";
11
11
  import { translateApiKeyMissingError, hasCause, agentGenerationError, translateAction, multiLingualFileTarget } from "../utils/error_cause.js";
12
12
  import { getOutputMultilingualFilePath, mkdir, writingMessage, hashSHA256 } from "../utils/file.js";
13
13
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
@@ -1,9 +1,9 @@
1
1
  import fs from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
- import { provider2ImageAgent } from "../utils/provider2agent.js";
3
+ import { provider2ImageAgent } from "../types/provider2agent.js";
4
4
  import { apiKeyMissingError, agentIncorrectAPIKeyError, agentGenerationError, agentInvalidResponseError, imageAction, imageFileTarget, hasCause, getGenAIErrorReason, resultify, } from "../utils/error_cause.js";
5
5
  import { getAspectRatio } from "../utils/utils.js";
6
- import { ASPECT_RATIOS, PRO_ASPECT_RATIOS } from "../utils/const.js";
6
+ import { ASPECT_RATIOS, PRO_ASPECT_RATIOS } from "../types/const.js";
7
7
  import { GoogleGenAI, PersonGeneration } from "@google/genai";
8
8
  const getGeminiContents = (prompt, referenceImages) => {
9
9
  const contents = [{ text: prompt }];
@@ -2,7 +2,7 @@ import fs from "fs";
2
2
  import path from "path";
3
3
  import { GraphAILogger } from "graphai";
4
4
  import OpenAI, { toFile, AuthenticationError, RateLimitError, APIError } from "openai";
5
- import { provider2ImageAgent, gptImages } from "../utils/provider2agent.js";
5
+ import { provider2ImageAgent, gptImages } from "../types/provider2agent.js";
6
6
  import { apiKeyMissingError, agentGenerationError, openAIAgentGenerationError, agentIncorrectAPIKeyError, agentAPIRateLimitError, agentInvalidResponseError, imageAction, imageFileTarget, } from "../utils/error_cause.js";
7
7
  // https://platform.openai.com/docs/guides/image-generation
8
8
  export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
@@ -3,7 +3,7 @@ import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
4
  import { getAspectRatio } from "./movie_replicate_agent.js";
5
5
  import { apiKeyMissingError, agentIncorrectAPIKeyError, agentGenerationError, agentInvalidResponseError, imageAction, imageFileTarget, hasCause, } from "../utils/error_cause.js";
6
- import { provider2ImageAgent } from "../utils/provider2agent.js";
6
+ import { provider2ImageAgent } from "../types/provider2agent.js";
7
7
  export const imageReplicateAgent = async ({ namedInputs, params, config, }) => {
8
8
  const { prompt, referenceImages } = namedInputs;
9
9
  const { canvasSize } = params;
@@ -1,7 +1,7 @@
1
1
  import { readFileSync, existsSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
- import { provider2LipSyncAgent } from "../utils/provider2agent.js";
4
+ import { provider2LipSyncAgent } from "../types/provider2agent.js";
5
5
  import { apiKeyMissingError, agentGenerationError, agentFileNotExistError, imageAction, movieFileTarget, audioFileTarget, hasCause, } from "../utils/error_cause.js";
6
6
  export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) => {
7
7
  const { movieFile, audioFile, imageFile } = namedInputs;
@@ -3,8 +3,8 @@ import { GraphAILogger, sleep } from "graphai";
3
3
  import { GoogleGenAI, PersonGeneration } from "@google/genai";
4
4
  import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, hasCause, } from "../utils/error_cause.js";
5
5
  import { getAspectRatio } from "../utils/utils.js";
6
- import { ASPECT_RATIOS } from "../utils/const.js";
7
- import { getModelDuration, provider2MovieAgent } from "../utils/provider2agent.js";
6
+ import { ASPECT_RATIOS } from "../types/const.js";
7
+ import { getModelDuration, provider2MovieAgent } from "../types/provider2agent.js";
8
8
  const pollUntilDone = async (ai, operation) => {
9
9
  const response = { operation };
10
10
  while (!response.operation.done) {
@@ -2,7 +2,7 @@ import { readFileSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
4
  import { apiKeyMissingError, agentGenerationError, agentInvalidResponseError, imageAction, movieFileTarget, videoDurationTarget, unsupportedModelTarget, } from "../utils/error_cause.js";
5
- import { provider2MovieAgent, getModelDuration } from "../utils/provider2agent.js";
5
+ import { provider2MovieAgent, getModelDuration } from "../types/provider2agent.js";
6
6
  async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
7
7
  const replicate = new Replicate({
8
8
  auth: apiKey,
@@ -1,7 +1,7 @@
1
1
  import { readFileSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
- import { provider2SoundEffectAgent } from "../utils/provider2agent.js";
4
+ import { provider2SoundEffectAgent } from "../types/provider2agent.js";
5
5
  import { apiKeyMissingError, agentGenerationError, imageAction, movieFileTarget, hasCause } from "../utils/error_cause.js";
6
6
  export const soundEffectReplicateAgent = async ({ namedInputs, params, config }) => {
7
7
  const { prompt, movieFile } = namedInputs;
@@ -1,5 +1,5 @@
1
1
  import { GraphAILogger } from "graphai";
2
- import { provider2TTSAgent } from "../utils/provider2agent.js";
2
+ import { provider2TTSAgent } from "../types/provider2agent.js";
3
3
  import { apiKeyMissingError, agentVoiceLimitReachedError, agentIncorrectAPIKeyError, agentGenerationError, audioAction, audioFileTarget, } from "../utils/error_cause.js";
4
4
  export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
5
5
  const { text } = namedInputs;
@@ -1,6 +1,6 @@
1
1
  import { GraphAILogger } from "graphai";
2
2
  import { GoogleGenAI } from "@google/genai";
3
- import { provider2TTSAgent } from "../utils/provider2agent.js";
3
+ import { provider2TTSAgent } from "../types/provider2agent.js";
4
4
  import { agentIncorrectAPIKeyError, apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget, getGenAIErrorReason, } from "../utils/error_cause.js";
5
5
  import { pcmToMp3 } from "../utils/ffmpeg_utils.js";
6
6
  const getPrompt = (text, instructions) => {
@@ -1,5 +1,5 @@
1
1
  import { GraphAILogger } from "graphai";
2
- import { provider2TTSAgent } from "../utils/provider2agent.js";
2
+ import { provider2TTSAgent } from "../types/provider2agent.js";
3
3
  import { apiKeyMissingError, agentIncorrectAPIKeyError, agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
4
4
  export const ttsKotodamaAgent = async ({ namedInputs, params, config, }) => {
5
5
  const { text } = namedInputs;
@@ -1,10 +1,10 @@
1
1
  import { GraphAILogger } from "graphai";
2
2
  import OpenAI, { AuthenticationError, RateLimitError } from "openai";
3
- import { provider2TTSAgent } from "../utils/provider2agent.js";
3
+ import { provider2TTSAgent } from "../types/provider2agent.js";
4
4
  import { apiKeyMissingError, agentIncorrectAPIKeyError, agentAPIRateLimitError, agentGenerationError, audioAction, audioFileTarget, } from "../utils/error_cause.js";
5
5
  export const ttsOpenaiAgent = async ({ namedInputs, params, config, }) => {
6
6
  const { text } = namedInputs;
7
- const { model, voice, suppressError, instructions } = params;
7
+ const { model, voice, suppressError, instructions, speed } = params;
8
8
  const { apiKey, baseURL } = config ?? {};
9
9
  if (!apiKey) {
10
10
  throw new Error("OpenAI API key is required (OPENAI_API_KEY)", {
@@ -21,6 +21,9 @@ export const ttsOpenaiAgent = async ({ namedInputs, params, config, }) => {
21
21
  if (instructions) {
22
22
  tts_options["instructions"] = instructions;
23
23
  }
24
+ if (speed) {
25
+ tts_options["speed"] = speed;
26
+ }
24
27
  GraphAILogger.log("ttsOptions", tts_options);
25
28
  const response = await openai.audio.speech.create(tts_options);
26
29
  const buffer = Buffer.from(await response.arrayBuffer());
@@ -1,6 +1,6 @@
1
1
  import { mulmoViewerBundle, audio, images, translate } from "../../../actions/index.js";
2
2
  import { initializeContext } from "../../helpers.js";
3
- import { bundleTargetLang } from "../../../utils/const.js";
3
+ import { bundleTargetLang } from "../../../types/const.js";
4
4
  export const handler = async (argv) => {
5
5
  const context = await initializeContext(argv);
6
6
  if (!context) {
@@ -1,5 +1,5 @@
1
1
  import { commonOptions } from "../../common.js";
2
- import { languages } from "../../../utils/const.js";
2
+ import { languages } from "../../../types/const.js";
3
3
  export const builder = (yargs) => commonOptions(yargs)
4
4
  .option("a", {
5
5
  alias: "audiodir",
@@ -1,5 +1,5 @@
1
1
  import { commonOptions } from "../../common.js";
2
- import { pdf_modes, pdf_sizes } from "../../../utils/const.js";
2
+ import { pdf_modes, pdf_sizes } from "../../../types/const.js";
3
3
  export const builder = (yargs) => commonOptions(yargs)
4
4
  .option("i", {
5
5
  alias: "imagedir",
@@ -1,4 +1,4 @@
1
- import { llm } from "../../../../utils/provider2agent.js";
1
+ import { llm } from "../../../../types/provider2agent.js";
2
2
  import { getAvailablePromptTemplates } from "../../../../utils/file.js";
3
3
  const availableTemplateNames = getAvailablePromptTemplates().map((template) => template.filename);
4
4
  export const builder = (yargs) => {
@@ -1,5 +1,5 @@
1
1
  import { ToolCliArgs } from "../../../../types/cli_types.js";
2
- import type { LLM } from "../../../../utils/provider2agent.js";
2
+ import type { LLM } from "../../../../types/provider2agent.js";
3
3
  export declare const handler: (argv: ToolCliArgs<{
4
4
  o?: string;
5
5
  b?: string;
@@ -1,5 +1,5 @@
1
1
  import { getBaseDirPath, getFullPath } from "../../../../utils/file.js";
2
- import { outDirName, cacheDirName } from "../../../../utils/const.js";
2
+ import { outDirName, cacheDirName } from "../../../../types/const.js";
3
3
  import { getUrlsIfNeeded, selectTemplate } from "../../../../utils/inquirer.js";
4
4
  import { createMulmoScriptFromUrl, createMulmoScriptFromFile } from "../../../../tools/create_mulmo_script_from_url.js";
5
5
  import { createMulmoScriptInteractively } from "../../../../tools/create_mulmo_script_interactively.js";
@@ -1,6 +1,6 @@
1
1
  import { getAvailablePromptTemplates } from "../../../../utils/file.js";
2
- import { llm } from "../../../../utils/provider2agent.js";
3
- import { storyToScriptGenerateMode } from "../../../../utils/const.js";
2
+ import { llm } from "../../../../types/provider2agent.js";
3
+ import { storyToScriptGenerateMode } from "../../../../types/const.js";
4
4
  const availableTemplateNames = getAvailablePromptTemplates().map((template) => template.filename);
5
5
  export const builder = (yargs) => {
6
6
  return yargs
@@ -1,5 +1,5 @@
1
1
  import { ToolCliArgs } from "../../../../types/cli_types.js";
2
- import type { LLM } from "../../../../utils/provider2agent.js";
2
+ import type { LLM } from "../../../../types/provider2agent.js";
3
3
  export declare const handler: (argv: ToolCliArgs<{
4
4
  o?: string;
5
5
  b?: string;
@@ -3,7 +3,7 @@ import { setGraphAILogger } from "../../../../cli/helpers.js";
3
3
  import { storyToScript } from "../../../../tools/story_to_script.js";
4
4
  import { mulmoStoryboardSchema } from "../../../../types/schema.js";
5
5
  import { getBaseDirPath, getFullPath, readAndParseJson } from "../../../../utils/file.js";
6
- import { outDirName } from "../../../../utils/const.js";
6
+ import { outDirName } from "../../../../types/const.js";
7
7
  export const handler = async (argv) => {
8
8
  const { v: verbose, s: filename, file, o: outdir, b: basedir, beats_per_scene, llm, llm_model, mode } = argv;
9
9
  let { t: template } = argv;
package/lib/cli/common.js CHANGED
@@ -1,4 +1,4 @@
1
- import { languages } from "../utils/const.js";
1
+ import { languages } from "../types/const.js";
2
2
  export const commonOptions = (yargs) => {
3
3
  return yargs
4
4
  .option("o", {
@@ -4,7 +4,7 @@ import path from "path";
4
4
  import clipboardy from "clipboardy";
5
5
  import { getBaseDirPath, getFullPath, getOutputStudioFilePath, resolveDirPath, mkdir, getOutputMultilingualFilePath, generateTimestampedFileName, } from "../utils/file.js";
6
6
  import { isHttp } from "../utils/utils.js";
7
- import { outDirName, imageDirName, audioDirName } from "../utils/const.js";
7
+ import { outDirName, imageDirName, audioDirName } from "../types/const.js";
8
8
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
9
9
  import { translate } from "../actions/translate.js";
10
10
  import { initializeContextFromFiles } from "../utils/context.js";
@@ -484,7 +484,7 @@ export const scriptTemplates = [
484
484
  ],
485
485
  filename: "html",
486
486
  htmlImageParams: {
487
- model: "claude-3-7-sonnet-20250219",
487
+ model: "claude-sonnet-4-5-20250929",
488
488
  provider: "anthropic",
489
489
  },
490
490
  lang: "en",
@@ -987,7 +987,7 @@ export const scriptTemplates = [
987
987
  ],
988
988
  filename: "presentation",
989
989
  htmlImageParams: {
990
- model: "claude-3-7-sonnet-20250219",
990
+ model: "claude-sonnet-4-5-20250929",
991
991
  provider: "anthropic",
992
992
  },
993
993
  lang: "en",
@@ -49,7 +49,7 @@ export const templateDataSet = {
49
49
  "```",
50
50
  html: "Another LLM will generate actual slides from the prompt and data for each beat. Adding optional data would help it to generate more compelling slide. Mention the reference in one of beats, if it exists. The valid type of reference is 'article', 'paper', 'image', 'video', 'audio'. Use the JSON below as a template.\n" +
51
51
  "```JSON\n" +
52
- '{"$mulmocast":{"version":"1.1","credit":"closing"},"references":[{"url":"https://www.somegreatwebsite.com/article/123","title":"Title of the article we are referencing","type":"[TYPE OF ARTICLE: article, paper, image, video, audio]"}],"title":"[TITLE: Brief, engaging title for the topic]","htmlImageParams":{"provider":"anthropic","model":"claude-3-7-sonnet-20250219"},"lang":"en","beats":[{"text":"[NARRATION: Narration for the beat.]","htmlPrompt":{"prompt":"[PROMPT to create appropriate HTML page for the beat.]"}},{"text":"[NARRATION: Narration for the beat.]","htmlPrompt":{"prompt":"[PROMPT to create appropriate HTML page for the beat with the data.]","data":{"description":"DATA TO BE PRESENTED IN THIS BEAT (in any format)]","net_income":{"Q2 FY2024":320,"Q3 FY2024":333,"Q4 FY2024":350},"unit":"USD (Million)"}}}],"canvasSize":{"width":1536,"height":1024}}\n' +
52
+ '{"$mulmocast":{"version":"1.1","credit":"closing"},"references":[{"url":"https://www.somegreatwebsite.com/article/123","title":"Title of the article we are referencing","type":"[TYPE OF ARTICLE: article, paper, image, video, audio]"}],"title":"[TITLE: Brief, engaging title for the topic]","htmlImageParams":{"provider":"anthropic","model":"claude-sonnet-4-5-20250929"},"lang":"en","beats":[{"text":"[NARRATION: Narration for the beat.]","htmlPrompt":{"prompt":"[PROMPT to create appropriate HTML page for the beat.]"}},{"text":"[NARRATION: Narration for the beat.]","htmlPrompt":{"prompt":"[PROMPT to create appropriate HTML page for the beat with the data.]","data":{"description":"DATA TO BE PRESENTED IN THIS BEAT (in any format)]","net_income":{"Q2 FY2024":320,"Q3 FY2024":333,"Q4 FY2024":350},"unit":"USD (Million)"}}}],"canvasSize":{"width":1536,"height":1024}}\n' +
53
53
  "```",
54
54
  image_prompt: "Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.\n" +
55
55
  "```JSON\n" +
@@ -1,6 +1,6 @@
1
1
  export * from "./types/index.js";
2
- export * from "./utils/provider2agent.js";
3
- export * from "./utils/const.js";
2
+ export * from "./types/provider2agent.js";
3
+ export * from "./types/const.js";
4
4
  export * from "./utils/string.js";
5
5
  export * from "./utils/utils.js";
6
6
  export * from "./utils/prompt.js";
@@ -1,7 +1,7 @@
1
1
  // Entry point for universal code
2
2
  export * from "./types/index.js";
3
- export * from "./utils/provider2agent.js";
4
- export * from "./utils/const.js";
3
+ export * from "./types/provider2agent.js";
4
+ export * from "./types/const.js";
5
5
  export * from "./utils/string.js";
6
6
  export * from "./utils/utils.js";
7
7
  export * from "./utils/prompt.js";
package/lib/mcp/server.js CHANGED
@@ -9,7 +9,7 @@ import { fileURLToPath } from "url";
9
9
  import { GraphAILogger } from "graphai";
10
10
  import { audio, images, movie, captions, pdf } from "../actions/index.js";
11
11
  import { initializeContext, runTranslateIfNeeded } from "../cli/helpers.js";
12
- import { outDirName } from "../utils/const.js";
12
+ import { outDirName } from "../types/const.js";
13
13
  import { resolveDirPath, mkdir, generateTimestampedFileName } from "../utils/file.js";
14
14
  import { MulmoScriptMethods } from "../methods/index.js";
15
15
  dotenv.config({ quiet: true });
@@ -177,19 +177,19 @@ export declare const MulmoPresentationStyleMethods: {
177
177
  };
178
178
  getSoundEffectAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
179
179
  agentName: string;
180
- defaultModel: import("../utils/provider2agent.js").ReplicateModel;
180
+ defaultModel: import("../types/provider2agent.js").ReplicateModel;
181
181
  keyName: string;
182
- models: import("../utils/provider2agent.js").ReplicateModel[];
183
- modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
182
+ models: import("../types/provider2agent.js").ReplicateModel[];
183
+ modelParams: Record<import("../types/provider2agent.js").ReplicateModel, {
184
184
  identifier?: `${string}/${string}:${string}`;
185
185
  }>;
186
186
  };
187
187
  getLipSyncAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
188
188
  agentName: string;
189
- defaultModel: import("../utils/provider2agent.js").ReplicateModel;
189
+ defaultModel: import("../types/provider2agent.js").ReplicateModel;
190
190
  keyName: string;
191
- models: import("../utils/provider2agent.js").ReplicateModel[];
192
- modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
191
+ models: import("../types/provider2agent.js").ReplicateModel[];
192
+ modelParams: Record<import("../types/provider2agent.js").ReplicateModel, {
193
193
  identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
194
194
  video?: string;
195
195
  audio: string;
@@ -6,7 +6,7 @@
6
6
  import { isNull } from "graphai";
7
7
  import { userAssert } from "../utils/utils.js";
8
8
  import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, mulmoTransitionSchema, } from "../types/schema.js";
9
- import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../utils/provider2agent.js";
9
+ import { provider2ImageAgent, provider2MovieAgent, provider2LLMAgent, provider2SoundEffectAgent, provider2LipSyncAgent, defaultProviders, } from "../types/provider2agent.js";
10
10
  const defaultTextSlideStyles = [
11
11
  '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
12
12
  "body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
@@ -4,7 +4,7 @@
4
4
  * Works in both Node.js and modern browsers.
5
5
  */
6
6
  import { BeatSessionType, MulmoStudioContext, SessionProgressCallback, SessionType, MulmoBeat, SpeechOptions } from "../types/index.js";
7
- import { provider2TTSAgent } from "../utils/provider2agent.js";
7
+ import { provider2TTSAgent } from "../types/provider2agent.js";
8
8
  export declare const addSessionProgressCallback: (cb: SessionProgressCallback) => void;
9
9
  export declare const removeSessionProgressCallback: (cb: SessionProgressCallback) => void;
10
10
  export declare const MulmoStudioContextMethods: {
@@ -1,5 +1,5 @@
1
1
  import { MulmoStoryboard, StoryToScriptGenerateMode } from "../types/index.js";
2
- import type { LLM } from "../utils/provider2agent.js";
2
+ import type { LLM } from "../types/provider2agent.js";
3
3
  export declare const storyToScript: ({ story, beatsPerScene, templateName, outdir, fileName, llm, llmModel, generateMode, }: {
4
4
  story: MulmoStoryboard;
5
5
  beatsPerScene: number;
@@ -10,7 +10,7 @@ import { graphDataScriptGeneratePrompt, sceneToBeatsPrompt, storyToScriptInfoPro
10
10
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
11
11
  import validateSchemaAgent from "../agents/validate_schema_agent.js";
12
12
  import { llmPair } from "../utils/utils.js";
13
- import { storyToScriptGenerateMode } from "../utils/const.js";
13
+ import { storyToScriptGenerateMode } from "../types/const.js";
14
14
  import { cliLoadingPlugin } from "../utils/plugins.js";
15
15
  const vanillaAgents = agents.default ?? agents;
16
16
  const createValidatedScriptGraphData = ({ systemPrompt, prompt, schema, llmAgent, llmModel, maxTokens, }) => {
@@ -111,6 +111,7 @@ export type TTSAgentParams = {
111
111
  export type OpenAITTSAgentParams = TTSAgentParams & {
112
112
  instructions: string;
113
113
  model: string;
114
+ speed: number;
114
115
  };
115
116
  export type NijivoiceTTSAgentParams = TTSAgentParams & {
116
117
  speed: number;
@@ -0,0 +1,15 @@
1
+ export declare const currentMulmoScriptVersion = "1.1";
2
+ export declare const outDirName = "output";
3
+ export declare const audioDirName = "audio";
4
+ export declare const imageDirName = "images";
5
+ export declare const cacheDirName = "cache";
6
+ export declare const pdf_modes: string[];
7
+ export declare const pdf_sizes: string[];
8
+ export declare const languages: string[];
9
+ export declare const storyToScriptGenerateMode: {
10
+ stepWise: string;
11
+ oneStep: string;
12
+ };
13
+ export declare const bundleTargetLang: string[];
14
+ export declare const ASPECT_RATIOS: string[];
15
+ export declare const PRO_ASPECT_RATIOS: string[];
@@ -0,0 +1,15 @@
1
+ export const currentMulmoScriptVersion = "1.1";
2
+ export const outDirName = "output";
3
+ export const audioDirName = "audio";
4
+ export const imageDirName = "images";
5
+ export const cacheDirName = "cache";
6
+ export const pdf_modes = ["slide", "talk", "handout"];
7
+ export const pdf_sizes = ["letter", "a4"];
8
+ export const languages = ["en", "ja", "fr", "es", "de", "zh-CN", "zh-TW", "ko", "it", "pt", "ar", "hi"];
9
+ export const storyToScriptGenerateMode = {
10
+ stepWise: "step_wise",
11
+ oneStep: "one_step",
12
+ };
13
+ export const bundleTargetLang = ["ja", "en"];
14
+ export const ASPECT_RATIOS = ["1:1", "9:16", "16:9"];
15
+ export const PRO_ASPECT_RATIOS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"];
@@ -0,0 +1,191 @@
1
+ export declare const provider2TTSAgent: {
2
+ nijivoice: {
3
+ agentName: string;
4
+ hasLimitedConcurrency: boolean;
5
+ keyName: string;
6
+ };
7
+ openai: {
8
+ agentName: string;
9
+ hasLimitedConcurrency: boolean;
10
+ defaultModel: string;
11
+ defaultVoice: string;
12
+ keyName: string;
13
+ baseURLKeyName: string;
14
+ };
15
+ google: {
16
+ agentName: string;
17
+ hasLimitedConcurrency: boolean;
18
+ keyName: string;
19
+ };
20
+ gemini: {
21
+ agentName: string;
22
+ hasLimitedConcurrency: boolean;
23
+ defaultModel: string;
24
+ defaultVoice: string;
25
+ models: string[];
26
+ keyName: string;
27
+ };
28
+ elevenlabs: {
29
+ agentName: string;
30
+ hasLimitedConcurrency: boolean;
31
+ defaultModel: string;
32
+ models: string[];
33
+ keyName: string;
34
+ };
35
+ kotodama: {
36
+ agentName: string;
37
+ hasLimitedConcurrency: boolean;
38
+ defaultVoice: string;
39
+ defaultDecoration: string;
40
+ keyName: string;
41
+ };
42
+ mock: {
43
+ agentName: string;
44
+ hasLimitedConcurrency: boolean;
45
+ defaultModel: string;
46
+ models: string[];
47
+ };
48
+ };
49
+ export declare const gptImages: string[];
50
+ export declare const provider2ImageAgent: {
51
+ openai: {
52
+ agentName: string;
53
+ defaultModel: string;
54
+ models: string[];
55
+ keyName: string;
56
+ baseURLKeyName: string;
57
+ };
58
+ google: {
59
+ agentName: string;
60
+ defaultModel: string;
61
+ models: string[];
62
+ keyName: string;
63
+ };
64
+ replicate: {
65
+ agentName: string;
66
+ defaultModel: string;
67
+ models: string[];
68
+ keyName: string;
69
+ };
70
+ mock: {
71
+ agentName: string;
72
+ defaultModel: string;
73
+ models: string[];
74
+ keyName: string;
75
+ };
76
+ };
77
+ export type ReplicateModel = `${string}/${string}`;
78
+ export declare const provider2MovieAgent: {
79
+ replicate: {
80
+ agentName: string;
81
+ defaultModel: ReplicateModel;
82
+ keyName: string;
83
+ models: string[];
84
+ modelParams: Record<ReplicateModel, {
85
+ durations: number[];
86
+ start_image: string | undefined;
87
+ last_image?: string;
88
+ price_per_sec: number;
89
+ }>;
90
+ };
91
+ google: {
92
+ agentName: string;
93
+ defaultModel: string;
94
+ models: string[];
95
+ keyName: string;
96
+ modelParams: {
97
+ "veo-3.1-generate-preview": {
98
+ durations: number[];
99
+ };
100
+ "veo-3.0-generate-001": {
101
+ durations: number[];
102
+ };
103
+ "veo-2.0-generate-001": {
104
+ durations: number[];
105
+ };
106
+ };
107
+ };
108
+ mock: {
109
+ agentName: string;
110
+ defaultModel: string;
111
+ models: string[];
112
+ keyName: string;
113
+ modelParams: {};
114
+ };
115
+ };
116
+ export declare const provider2SoundEffectAgent: {
117
+ replicate: {
118
+ agentName: string;
119
+ defaultModel: ReplicateModel;
120
+ keyName: string;
121
+ models: ReplicateModel[];
122
+ modelParams: Record<ReplicateModel, {
123
+ identifier?: `${string}/${string}:${string}`;
124
+ }>;
125
+ };
126
+ };
127
+ export declare const provider2LipSyncAgent: {
128
+ replicate: {
129
+ agentName: string;
130
+ defaultModel: ReplicateModel;
131
+ keyName: string;
132
+ models: ReplicateModel[];
133
+ modelParams: Record<ReplicateModel, {
134
+ identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
135
+ video?: string;
136
+ audio: string;
137
+ image?: string;
138
+ }>;
139
+ };
140
+ };
141
+ export declare const provider2LLMAgent: {
142
+ readonly openai: {
143
+ readonly agentName: "openAIAgent";
144
+ readonly defaultModel: "gpt-5";
145
+ readonly keyName: "OPENAI_API_KEY";
146
+ readonly baseURLKeyName: "OPENAI_BASE_URL";
147
+ readonly max_tokens: 8192;
148
+ readonly models: readonly ["gpt-5", "gpt-5-nano", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3", "o3-mini", "o3-pro", "o1", "o1-pro", "gpt-4o", "gpt-4o-mini"];
149
+ };
150
+ readonly anthropic: {
151
+ readonly agentName: "anthropicAgent";
152
+ readonly defaultModel: "claude-sonnet-4-5-20250929";
153
+ readonly max_tokens: 8192;
154
+ readonly models: readonly ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001"];
155
+ readonly keyName: "ANTHROPIC_API_KEY";
156
+ readonly apiKeyNameOverride: "ANTHROPIC_API_TOKEN";
157
+ };
158
+ readonly gemini: {
159
+ readonly agentName: "geminiAgent";
160
+ readonly defaultModel: "gemini-2.5-flash";
161
+ readonly max_tokens: 8192;
162
+ readonly models: readonly ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"];
163
+ readonly keyName: "GEMINI_API_KEY";
164
+ };
165
+ readonly groq: {
166
+ readonly agentName: "groqAgent";
167
+ readonly defaultModel: "llama-3.1-8b-instant";
168
+ readonly keyName: "GROQ_API_KEY";
169
+ readonly max_tokens: 4096;
170
+ readonly models: readonly ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"];
171
+ };
172
+ readonly mock: {
173
+ readonly agentName: "mediaMockAgent";
174
+ readonly defaultModel: "mock";
175
+ readonly max_tokens: 4096;
176
+ readonly models: readonly ["mock"];
177
+ };
178
+ };
179
+ export declare const defaultProviders: {
180
+ tts: keyof typeof provider2TTSAgent;
181
+ text2image: keyof typeof provider2ImageAgent;
182
+ text2movie: keyof typeof provider2MovieAgent;
183
+ text2Html: keyof typeof provider2LLMAgent;
184
+ llm: keyof typeof provider2LLMAgent;
185
+ soundEffect: keyof typeof provider2SoundEffectAgent;
186
+ lipSync: keyof typeof provider2LipSyncAgent;
187
+ };
188
+ export declare const llm: (keyof typeof provider2LLMAgent)[];
189
+ export type LLM = keyof typeof provider2LLMAgent;
190
+ export declare const htmlLLMProvider: string[];
191
+ export declare const getModelDuration: (provider: keyof typeof provider2MovieAgent, model: string, movieDuration?: number) => number | undefined;
@@ -0,0 +1,326 @@
1
+ // node & browser
2
+ export const provider2TTSAgent = {
3
+ nijivoice: {
4
+ agentName: "ttsNijivoiceAgent",
5
+ hasLimitedConcurrency: true,
6
+ keyName: "NIJIVOICE_API_KEY",
7
+ },
8
+ openai: {
9
+ agentName: "ttsOpenaiAgent",
10
+ hasLimitedConcurrency: false,
11
+ defaultModel: "gpt-4o-mini-tts",
12
+ defaultVoice: "shimmer",
13
+ keyName: "OPENAI_API_KEY",
14
+ baseURLKeyName: "OPENAI_BASE_URL",
15
+ },
16
+ google: {
17
+ agentName: "ttsGoogleAgent",
18
+ hasLimitedConcurrency: false,
19
+ keyName: "GEMINI_API_KEY",
20
+ },
21
+ gemini: {
22
+ agentName: "ttsGeminiAgent",
23
+ hasLimitedConcurrency: false,
24
+ defaultModel: "gemini-2.5-flash-preview-tts",
25
+ defaultVoice: "Kore",
26
+ models: ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"],
27
+ keyName: "GEMINI_API_KEY",
28
+ },
29
+ elevenlabs: {
30
+ agentName: "ttsElevenlabsAgent",
31
+ hasLimitedConcurrency: true,
32
+ defaultModel: "eleven_multilingual_v2",
33
+ // Models | ElevenLabs Documentation
34
+ // https://elevenlabs.io/docs/models
35
+ models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_turbo_v2", "eleven_flash_v2_5", "eleven_flash_v2"],
36
+ keyName: "ELEVENLABS_API_KEY",
37
+ },
38
+ kotodama: {
39
+ agentName: "ttsKotodamaAgent",
40
+ hasLimitedConcurrency: true,
41
+ defaultVoice: "Atla",
42
+ defaultDecoration: "neutral",
43
+ keyName: "KOTODAMA_API_KEY",
44
+ },
45
+ mock: {
46
+ agentName: "mediaMockAgent",
47
+ hasLimitedConcurrency: true,
48
+ defaultModel: "mock-model",
49
+ models: ["mock-model"],
50
+ },
51
+ };
52
+ export const gptImages = ["gpt-image-1.5", "gpt-image-1", "gpt-image-1-mini"];
53
+ export const provider2ImageAgent = {
54
+ openai: {
55
+ agentName: "imageOpenaiAgent",
56
+ defaultModel: "gpt-image-1",
57
+ models: ["dall-e-3", ...gptImages],
58
+ keyName: "OPENAI_API_KEY",
59
+ baseURLKeyName: "OPENAI_BASE_URL",
60
+ },
61
+ google: {
62
+ agentName: "imageGenAIAgent",
63
+ defaultModel: "gemini-2.5-flash-image",
64
+ models: ["imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
65
+ keyName: "GEMINI_API_KEY",
66
+ },
67
+ replicate: {
68
+ agentName: "imageReplicateAgent",
69
+ defaultModel: "bytedance/seedream-4",
70
+ models: ["bytedance/seedream-4", "qwen/qwen-image"],
71
+ keyName: "REPLICATE_API_TOKEN",
72
+ },
73
+ mock: {
74
+ agentName: "mediaMockAgent",
75
+ defaultModel: "mock-model",
76
+ models: ["mock-model"],
77
+ keyName: "",
78
+ },
79
+ };
80
+ export const provider2MovieAgent = {
81
+ replicate: {
82
+ agentName: "movieReplicateAgent",
83
+ defaultModel: "bytedance/seedance-1-lite",
84
+ keyName: "REPLICATE_API_TOKEN",
85
+ models: [
86
+ "bytedance/seedance-1-lite",
87
+ "bytedance/seedance-1-pro",
88
+ "kwaivgi/kling-v1.6-pro",
89
+ "kwaivgi/kling-v2.1",
90
+ "kwaivgi/kling-v2.1-master",
91
+ "google/veo-2",
92
+ "google/veo-3",
93
+ "google/veo-3-fast",
94
+ "minimax/video-01",
95
+ "minimax/hailuo-02",
96
+ "minimax/hailuo-02-fast",
97
+ "pixverse/pixverse-v4.5",
98
+ "wan-video/wan-2.2-i2v-fast",
99
+ "wan-video/wan-2.2-t2v-fast",
100
+ ],
101
+ modelParams: {
102
+ "bytedance/seedance-1-lite": {
103
+ durations: [5, 10],
104
+ start_image: "image",
105
+ last_image: "last_frame_image",
106
+ price_per_sec: 0.036, // in USD
107
+ },
108
+ "bytedance/seedance-1-pro": {
109
+ durations: [5, 10],
110
+ start_image: "image",
111
+ last_image: "last_frame_image",
112
+ price_per_sec: 0.15,
113
+ },
114
+ "kwaivgi/kling-v1.6-pro": {
115
+ durations: [5, 10],
116
+ start_image: "start_image",
117
+ price_per_sec: 0.095,
118
+ },
119
+ "kwaivgi/kling-v2.1": {
120
+ durations: [5, 10],
121
+ start_image: "start_image",
122
+ price_per_sec: 0.05,
123
+ },
124
+ "kwaivgi/kling-v2.1-master": {
125
+ durations: [5, 10],
126
+ start_image: "start_image",
127
+ price_per_sec: 0.28,
128
+ },
129
+ "google/veo-2": {
130
+ durations: [5, 6, 7, 8],
131
+ start_image: "image",
132
+ price_per_sec: 0.5,
133
+ },
134
+ "google/veo-3": {
135
+ durations: [8],
136
+ start_image: "image",
137
+ price_per_sec: 0.75,
138
+ },
139
+ "google/veo-3-fast": {
140
+ durations: [8],
141
+ start_image: "image",
142
+ price_per_sec: 0.4,
143
+ },
144
+ "minimax/video-01": {
145
+ durations: [6],
146
+ start_image: "first_frame_image",
147
+ price_per_sec: 0.5,
148
+ },
149
+ "minimax/hailuo-02": {
150
+ durations: [6], // NOTE: 10 for only 720p
151
+ start_image: "first_frame_image",
152
+ price_per_sec: 0.08,
153
+ },
154
+ "minimax/hailuo-02-fast": {
155
+ durations: [6, 10], // NOTE: 512P
156
+ start_image: "first_frame_image",
157
+ price_per_sec: 0.0166,
158
+ },
159
+ "pixverse/pixverse-v4.5": {
160
+ durations: [5, 8],
161
+ start_image: "image",
162
+ last_image: "last_frame_image",
163
+ price_per_sec: 0.12,
164
+ },
165
+ "wan-video/wan-2.2-i2v-fast": {
166
+ durations: [5],
167
+ start_image: "image",
168
+ price_per_sec: 0.012,
169
+ },
170
+ "wan-video/wan-2.2-t2v-fast": {
171
+ durations: [5],
172
+ start_image: undefined,
173
+ price_per_sec: 0.012,
174
+ },
175
+ },
176
+ },
177
+ google: {
178
+ agentName: "movieGenAIAgent",
179
+ defaultModel: "veo-2.0-generate-001",
180
+ models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
181
+ keyName: "GEMINI_API_KEY",
182
+ modelParams: {
183
+ "veo-3.1-generate-preview": {
184
+ durations: [4, 6, 8],
185
+ },
186
+ "veo-3.0-generate-001": {
187
+ durations: [4, 6, 8],
188
+ },
189
+ "veo-2.0-generate-001": {
190
+ durations: [5, 6, 7, 8],
191
+ },
192
+ },
193
+ },
194
+ mock: {
195
+ agentName: "mediaMockAgent",
196
+ defaultModel: "mock-model",
197
+ models: ["mock-model"],
198
+ keyName: "",
199
+ modelParams: {},
200
+ },
201
+ };
202
+ export const provider2SoundEffectAgent = {
203
+ replicate: {
204
+ agentName: "soundEffectReplicateAgent",
205
+ defaultModel: "zsxkib/mmaudio",
206
+ keyName: "REPLICATE_API_TOKEN",
207
+ models: ["zsxkib/mmaudio"],
208
+ modelParams: {
209
+ "zsxkib/mmaudio": {
210
+ identifier: "zsxkib/mmaudio:62871fb59889b2d7c13777f08deb3b36bdff88f7e1d53a50ad7694548a41b484",
211
+ },
212
+ },
213
+ },
214
+ };
215
+ export const provider2LipSyncAgent = {
216
+ replicate: {
217
+ agentName: "lipSyncReplicateAgent",
218
+ defaultModel: "bytedance/omni-human",
219
+ keyName: "REPLICATE_API_TOKEN",
220
+ models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
221
+ modelParams: {
222
+ "bytedance/latentsync": {
223
+ identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
224
+ video: "video",
225
+ audio: "audio",
226
+ },
227
+ "tmappdev/lipsync": {
228
+ identifier: "tmappdev/lipsync:c54ce2fe673ea59b857b91250b3d71a2cd304a78f2370687632805c8405fbf4c",
229
+ video: "video_input",
230
+ audio: "audio_input",
231
+ },
232
+ "bytedance/omni-human": {
233
+ identifier: "bytedance/omni-human",
234
+ image: "image",
235
+ audio: "audio",
236
+ price_per_sec: 0.14,
237
+ },
238
+ /* NOTE: This model does not work with large base64 urls.
239
+ "sync/lipsync-2": {
240
+ video: "video",
241
+ audio: "audio",
242
+ },
243
+ */
244
+ /* NOTE: This model does not work well for some unknown reason.
245
+ "kwaivgi/kling-lip-sync": {
246
+ video: "video_url",
247
+ audio: "audio_file",
248
+ },
249
+ */
250
+ },
251
+ },
252
+ };
253
+ // : Record<LLM, { agent: string; defaultModel: string; max_tokens: number }>
254
+ export const provider2LLMAgent = {
255
+ openai: {
256
+ agentName: "openAIAgent",
257
+ defaultModel: "gpt-5",
258
+ keyName: "OPENAI_API_KEY",
259
+ baseURLKeyName: "OPENAI_BASE_URL",
260
+ max_tokens: 8192,
261
+ models: [
262
+ "gpt-5",
263
+ "gpt-5-nano",
264
+ "gpt-5-mini",
265
+ "gpt-4.1",
266
+ "gpt-4.1-mini",
267
+ "gpt-4.1-nano",
268
+ "o3",
269
+ "o3-mini",
270
+ "o3-pro",
271
+ "o1",
272
+ "o1-pro",
273
+ "gpt-4o",
274
+ "gpt-4o-mini",
275
+ ],
276
+ },
277
+ anthropic: {
278
+ agentName: "anthropicAgent",
279
+ defaultModel: "claude-sonnet-4-5-20250929",
280
+ max_tokens: 8192,
281
+ models: ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-sonnet-4-5-20250929", "claude-haiku-4-5-20251001"],
282
+ keyName: "ANTHROPIC_API_KEY",
283
+ apiKeyNameOverride: "ANTHROPIC_API_TOKEN",
284
+ // GraphAI is currently using ANTHROPIC_API_KEY, but the official name is ANTHROPIC_API_TOKEN.
285
+ },
286
+ gemini: {
287
+ agentName: "geminiAgent",
288
+ defaultModel: "gemini-2.5-flash",
289
+ max_tokens: 8192,
290
+ models: ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"],
291
+ keyName: "GEMINI_API_KEY",
292
+ },
293
+ groq: {
294
+ agentName: "groqAgent",
295
+ defaultModel: "llama-3.1-8b-instant",
296
+ keyName: "GROQ_API_KEY",
297
+ max_tokens: 4096,
298
+ models: ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"],
299
+ },
300
+ mock: {
301
+ agentName: "mediaMockAgent",
302
+ defaultModel: "mock",
303
+ max_tokens: 4096,
304
+ models: ["mock"],
305
+ },
306
+ };
307
+ export const defaultProviders = {
308
+ tts: "openai",
309
+ text2image: "openai",
310
+ text2movie: "replicate",
311
+ text2Html: "openai",
312
+ llm: "openai",
313
+ soundEffect: "replicate",
314
+ lipSync: "replicate",
315
+ };
316
+ export const llm = Object.keys(provider2LLMAgent);
317
+ export const htmlLLMProvider = ["openai", "anthropic", "mock"];
318
+ export const getModelDuration = (provider, model, movieDuration) => {
319
+ const modelParams = provider2MovieAgent[provider]?.modelParams;
320
+ const { durations } = modelParams[model];
321
+ if (durations && movieDuration) {
322
+ const largerDurations = durations.filter((d) => d >= movieDuration);
323
+ return largerDurations.length > 0 ? largerDurations[0] : durations[durations.length - 1];
324
+ }
325
+ return durations?.[0];
326
+ };
@@ -1,6 +1,6 @@
1
1
  import { z } from "zod";
2
- import { htmlLLMProvider, provider2TTSAgent, provider2ImageAgent, provider2MovieAgent, defaultProviders, provider2SoundEffectAgent, } from "../utils/provider2agent.js";
3
- import { currentMulmoScriptVersion } from "../utils/const.js";
2
+ import { htmlLLMProvider, provider2TTSAgent, provider2ImageAgent, provider2MovieAgent, defaultProviders, provider2SoundEffectAgent } from "./provider2agent.js";
3
+ import { currentMulmoScriptVersion } from "./const.js";
4
4
  import { mulmoVideoFilterSchema } from "./schema_video_filter.js";
5
5
  // Re-export video filter schema
6
6
  export { mulmoVideoFilterSchema } from "./schema_video_filter.js";
@@ -1,7 +1,7 @@
1
1
  import { type CallbackFunction } from "graphai";
2
2
  import { langSchema, localizedTextSchema, mulmoBeatSchema, mulmoScriptSchema, mulmoStudioSchema, mulmoStudioBeatSchema, mulmoStoryboardSchema, mulmoStoryboardSceneSchema, mulmoStudioMultiLingualSchema, mulmoStudioMultiLingualArraySchema, mulmoStudioMultiLingualDataSchema, mulmoStudioMultiLingualFileSchema, speakerDictionarySchema, speakerSchema, mulmoSpeechParamsSchema, mulmoImageParamsSchema, mulmoImageParamsImagesValueSchema, mulmoImageParamsImagesSchema, mulmoFillOptionSchema, mulmoTransitionSchema, mulmoVideoFilterSchema, mulmoMovieParamsSchema, mulmoSoundEffectParamsSchema, mulmoLipSyncParamsSchema, textSlideParamsSchema, speechOptionsSchema, speakerDataSchema, mulmoCanvasDimensionSchema, mulmoPromptTemplateSchema, mulmoPromptTemplateFileSchema, text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoPresentationStyleSchema, multiLingualTextsSchema, mulmoImageAssetSchema, mulmoMermaidMediaSchema, mulmoTextSlideMediaSchema, mulmoMarkdownMediaSchema, mulmoImageMediaSchema, mulmoChartMediaSchema, mediaSourceSchema, mediaSourceMermaidSchema, mulmoSessionStateSchema, mulmoOpenAIImageModelSchema, mulmoGoogleImageModelSchema, mulmoGoogleMovieModelSchema, mulmoReplicateMovieModelSchema, mulmoImagePromptMediaSchema } from "./schema.js";
3
- import { pdf_modes, pdf_sizes, storyToScriptGenerateMode } from "../utils/const.js";
4
- import type { LLM } from "../utils/provider2agent.js";
3
+ import { pdf_modes, pdf_sizes, storyToScriptGenerateMode } from "./const.js";
4
+ import type { LLM } from "./provider2agent.js";
5
5
  import { z } from "zod";
6
6
  export type LANG = z.infer<typeof langSchema>;
7
7
  export type MulmoBeat = z.infer<typeof mulmoBeatSchema>;
@@ -5,7 +5,7 @@
5
5
  */
6
6
  import type { ConfigDataDictionary, DefaultConfigData } from "graphai";
7
7
  import { MulmoBeat, MulmoStudioBeat, MulmoStudioMultiLingual, MulmoStudioMultiLingualData } from "../types/index.js";
8
- import { type LLM } from "./provider2agent.js";
8
+ import { type LLM } from "../types/provider2agent.js";
9
9
  export declare const llmPair: (_llm?: LLM, _model?: string) => {
10
10
  agent: "mediaMockAgent" | "openAIAgent" | "anthropicAgent" | "geminiAgent" | "groqAgent";
11
11
  model: string;
@@ -3,7 +3,7 @@
3
3
  * (No Node.js built-ins like fs, path, dotenv, etc.)
4
4
  * Works in both Node.js and modern browsers.
5
5
  */
6
- import { provider2LLMAgent, provider2TTSAgent, provider2ImageAgent, provider2MovieAgent, provider2SoundEffectAgent, provider2LipSyncAgent, } from "./provider2agent.js";
6
+ import { provider2LLMAgent, provider2TTSAgent, provider2ImageAgent, provider2MovieAgent, provider2SoundEffectAgent, provider2LipSyncAgent, } from "../types/provider2agent.js";
7
7
  export const llmPair = (_llm, _model) => {
8
8
  const llmKey = _llm ?? "openai";
9
9
  const agent = provider2LLMAgent[llmKey]?.agentName ?? provider2LLMAgent.openai.agentName;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.1.15",
3
+ "version": "2.1.16",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -77,7 +77,7 @@
77
77
  "@google/genai": "^1.34.0",
78
78
  "@graphai/anthropic_agent": "^2.0.12",
79
79
  "@graphai/browserless_agent": "^2.0.1",
80
- "@graphai/gemini_agent": "^2.0.1",
80
+ "@graphai/gemini_agent": "^2.0.2",
81
81
  "@graphai/groq_agent": "^2.0.2",
82
82
  "@graphai/input_agents": "^1.0.2",
83
83
  "@graphai/openai_agent": "^2.0.8",
@@ -94,7 +94,7 @@
94
94
  "dotenv": "^17.2.3",
95
95
  "fluent-ffmpeg": "^2.1.3",
96
96
  "graphai": "^2.0.16",
97
- "jsdom": "^27.3.0",
97
+ "jsdom": "^27.4.0",
98
98
  "marked": "^17.0.1",
99
99
  "mulmocast-vision": "^1.0.8",
100
100
  "ora": "^9.0.0",
@@ -102,7 +102,7 @@
102
102
  "replicate": "^1.4.0",
103
103
  "yaml": "^2.8.2",
104
104
  "yargs": "^18.0.0",
105
- "zod": "^4.2.1"
105
+ "zod": "^4.3.5"
106
106
  },
107
107
  "devDependencies": {
108
108
  "@receptron/test_utils": "^2.0.3",
@@ -117,7 +117,7 @@
117
117
  "prettier": "^3.7.4",
118
118
  "tsx": "^4.21.0",
119
119
  "typescript": "^5.9.3",
120
- "typescript-eslint": "^8.50.1"
120
+ "typescript-eslint": "^8.52.0"
121
121
  },
122
122
  "engines": {
123
123
  "node": ">=20.0.0"
@@ -13,7 +13,7 @@
13
13
  "title": "[TITLE: Brief, engaging title for the topic]",
14
14
  "htmlImageParams": {
15
15
  "provider": "anthropic",
16
- "model": "claude-3-7-sonnet-20250219"
16
+ "model": "claude-sonnet-4-5-20250929"
17
17
  },
18
18
  "lang": "en",
19
19
  "beats": [
@@ -5,7 +5,7 @@
5
5
  },
6
6
  "htmlImageParams": {
7
7
  "provider": "anthropic",
8
- "model": "claude-3-7-sonnet-20250219"
8
+ "model": "claude-sonnet-4-5-20250929"
9
9
  },
10
10
  "title": "Sample Title",
11
11
  "references": [