mulmocast 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +5 -0
  2. package/lib/actions/captions.js +1 -1
  3. package/lib/actions/images.d.ts +13 -3
  4. package/lib/actions/images.js +91 -9
  5. package/lib/actions/movie.d.ts +2 -2
  6. package/lib/actions/movie.js +21 -6
  7. package/lib/agents/add_bgm_agent.js +1 -1
  8. package/lib/agents/combine_audio_files_agent.js +9 -5
  9. package/lib/agents/index.d.ts +2 -1
  10. package/lib/agents/index.js +2 -1
  11. package/lib/agents/movie_replicate_agent.d.ts +23 -0
  12. package/lib/agents/movie_replicate_agent.js +93 -0
  13. package/lib/cli/commands/tool/scripting/builder.d.ts +3 -1
  14. package/lib/cli/commands/tool/scripting/builder.js +5 -0
  15. package/lib/cli/commands/tool/scripting/handler.d.ts +1 -0
  16. package/lib/cli/commands/tool/scripting/handler.js +13 -4
  17. package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
  18. package/lib/cli/helpers.js +8 -3
  19. package/lib/methods/mulmo_presentation_style.d.ts +2 -1
  20. package/lib/methods/mulmo_presentation_style.js +21 -2
  21. package/lib/methods/mulmo_studio_context.js +1 -1
  22. package/lib/tools/create_mulmo_script_from_url.d.ts +1 -0
  23. package/lib/tools/create_mulmo_script_from_url.js +129 -43
  24. package/lib/types/schema.d.ts +1261 -165
  25. package/lib/types/schema.js +47 -1
  26. package/lib/types/type.d.ts +9 -2
  27. package/lib/utils/ffmpeg_utils.d.ts +2 -2
  28. package/lib/utils/ffmpeg_utils.js +9 -4
  29. package/lib/utils/preprocess.d.ts +47 -6
  30. package/lib/utils/utils.d.ts +1 -0
  31. package/lib/utils/utils.js +5 -0
  32. package/package.json +3 -2
  33. package/scripts/templates/presentation.json +123 -0
  34. package/scripts/templates/presentation.json~ +119 -0
@@ -10,7 +10,7 @@ export declare const builder: (yargs: Argv) => Argv<{
10
10
  } & {
11
11
  beats_per_scene: number;
12
12
  } & {
13
- llm: "openAI" | "anthropic" | "gemini" | "groq" | undefined;
13
+ llm: "anthropic" | "openAI" | "gemini" | "groq" | undefined;
14
14
  } & {
15
15
  llm_model: string | undefined;
16
16
  } & {
@@ -7,7 +7,7 @@ import { isHttp } from "../utils/utils.js";
7
7
  import { createOrUpdateStudioData } from "../utils/preprocess.js";
8
8
  import { outDirName, imageDirName, audioDirName } from "../utils/const.js";
9
9
  import { translate } from "../actions/translate.js";
10
- import { mulmoPresentationStyleSchema, mulmoStudioMultiLingualSchema } from "../types/schema.js";
10
+ import { mulmoCaptionParamsSchema, mulmoPresentationStyleSchema, mulmoStudioMultiLingualSchema } from "../types/schema.js";
11
11
  export const setGraphAILogger = (verbose, logValues) => {
12
12
  if (verbose) {
13
13
  if (logValues) {
@@ -126,13 +126,18 @@ export const initializeContext = async (argv) => {
126
126
  // validate mulmoStudioSchema. skip if __test_invalid__ is true
127
127
  const studio = createOrUpdateStudioData(mulmoScript, currentStudio?.mulmoData, fileName);
128
128
  const multiLingual = getMultiLingual(outputMultilingualFilePath, studio.beats.length);
129
+ if (argv.c) {
130
+ studio.script.captionParams = mulmoCaptionParamsSchema.parse({
131
+ ...(studio.script.captionParams ?? {}),
132
+ lang: argv.c,
133
+ });
134
+ }
129
135
  return {
130
136
  studio,
131
137
  fileDirs: files,
132
138
  force: Boolean(argv.f),
133
139
  dryRun: Boolean(argv.dryRun),
134
140
  lang: argv.l,
135
- caption: argv.c,
136
141
  sessionState: {
137
142
  inSession: {
138
143
  audio: false,
@@ -160,7 +165,7 @@ export const initializeContext = async (argv) => {
160
165
  }
161
166
  };
162
167
  export const runTranslateIfNeeded = async (context, argv) => {
163
- if (argv.l || argv.c) {
168
+ if (argv.l || context.studio.script.captionParams?.lang) {
164
169
  GraphAILogger.log("run translate");
165
170
  await translate(context);
166
171
  }
@@ -1,5 +1,5 @@
1
1
  import "dotenv/config";
2
- import { MulmoCanvasDimension, MulmoBeat, SpeechOptions, Text2SpeechProvider, Text2ImageAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData } from "../types/index.js";
2
+ import { MulmoCanvasDimension, MulmoBeat, SpeechOptions, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData } from "../types/index.js";
3
3
  export declare const MulmoPresentationStyleMethods: {
4
4
  getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
5
5
  getSpeechProvider(presentationStyle: MulmoPresentationStyle): Text2SpeechProvider;
@@ -10,5 +10,6 @@ export declare const MulmoPresentationStyleMethods: {
10
10
  getProvider(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): Text2SpeechProvider;
11
11
  getVoiceId(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
12
12
  getImageAgentInfo(presentationStyle: MulmoPresentationStyle, dryRun?: boolean): Text2ImageAgentInfo;
13
+ getHtmlImageAgentInfo(presentationStyle: MulmoPresentationStyle): Text2HtmlAgentInfo;
13
14
  getImageType(_: MulmoPresentationStyle, beat: MulmoBeat): BeatMediaType;
14
15
  };
@@ -1,5 +1,6 @@
1
1
  import "dotenv/config";
2
- import { text2ImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
2
+ import { userAssert } from "../utils/utils.js";
3
+ import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
3
4
  import { defaultOpenAIImageModel } from "../utils/const.js";
4
5
  const defaultTextSlideStyles = [
5
6
  '*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
@@ -42,7 +43,11 @@ export const MulmoPresentationStyleMethods = {
42
43
  return { ...presentationStyle.speechParams.speakers[beat.speaker].speechOptions, ...beat.speechOptions };
43
44
  },
44
45
  getSpeaker(presentationStyle, beat) {
45
- return presentationStyle.speechParams.speakers[beat.speaker];
46
+ userAssert(!!presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
47
+ userAssert(!!beat?.speaker, "beat.speaker is not set");
48
+ const speaker = presentationStyle.speechParams.speakers[beat.speaker];
49
+ userAssert(!!speaker, `speaker is not set: speaker "${beat.speaker}"`);
50
+ return speaker;
46
51
  },
47
52
  getProvider(presentationStyle, beat) {
48
53
  const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
@@ -65,6 +70,20 @@ export const MulmoPresentationStyleMethods = {
65
70
  imageParams: { ...defaultImageParams, ...presentationStyle.imageParams },
66
71
  };
67
72
  },
73
+ getHtmlImageAgentInfo(presentationStyle) {
74
+ const provider = text2HtmlImageProviderSchema.parse(presentationStyle.htmlImageParams?.provider);
75
+ const agent = provider === "anthropic" ? "anthropicAgent" : "openAIAgent";
76
+ const model = presentationStyle.htmlImageParams?.model
77
+ ? presentationStyle.htmlImageParams?.model
78
+ : provider === "anthropic"
79
+ ? "claude-3-7-sonnet-20250219"
80
+ : "gpt-4o-mini";
81
+ return {
82
+ provider,
83
+ agent,
84
+ model,
85
+ };
86
+ },
68
87
  getImageType(_, beat) {
69
88
  return beat.image?.type == "movie" ? "movie" : "image";
70
89
  },
@@ -44,7 +44,7 @@ export const MulmoStudioContextMethods = {
44
44
  return context.studio.filename;
45
45
  },
46
46
  getCaption(context) {
47
- return context.caption;
47
+ return context.studio.script.captionParams?.lang;
48
48
  },
49
49
  setSessionState(context, sessionType, value) {
50
50
  context.sessionState.inSession[sessionType] = value;
@@ -1,3 +1,4 @@
1
1
  import "dotenv/config";
2
2
  import { ScriptingParams } from "../types/index.js";
3
3
  export declare const createMulmoScriptFromUrl: ({ urls, templateName, outDirPath, filename, cacheDirPath, llm, llm_model }: ScriptingParams) => Promise<void>;
4
+ export declare const createMulmoScriptFromFile: (fileName: string, { templateName, outDirPath, filename, cacheDirPath, llm, llm_model, verbose }: ScriptingParams) => Promise<void>;
@@ -1,4 +1,5 @@
1
1
  import "dotenv/config";
2
+ import path from "path";
2
3
  import { GraphAI } from "graphai";
3
4
  import { openAIAgent } from "@graphai/openai_agent";
4
5
  import { anthropicAgent } from "@graphai/anthropic_agent";
@@ -14,7 +15,56 @@ import { mulmoScriptSchema, urlsSchema } from "../types/schema.js";
14
15
  import { cliLoadingPlugin } from "../utils/plugins.js";
15
16
  import { graphDataScriptFromUrlPrompt } from "../utils/prompt.js";
16
17
  import { llmPair } from "../utils/utils.js";
18
+ import { readFileSync } from "fs";
17
19
  const vanillaAgents = agents.default ?? agents;
20
+ const graphMulmoScript = {
21
+ version: 0.5,
22
+ loop: {
23
+ // If the script is not valid and the counter is less than 3, continue the loop
24
+ while: ":continue",
25
+ },
26
+ nodes: {
27
+ sourceText: {},
28
+ systemPrompt: {},
29
+ llmAgent: {},
30
+ llmModel: {},
31
+ maxTokens: {},
32
+ counter: {
33
+ value: 0,
34
+ update: ":counter.add(1)",
35
+ },
36
+ llm: {
37
+ agent: ":llmAgent",
38
+ // console: { before: true },
39
+ inputs: {
40
+ system: ":systemPrompt",
41
+ prompt: graphDataScriptFromUrlPrompt("${:sourceText.text}"),
42
+ params: {
43
+ model: ":llmModel",
44
+ system: ":systemPrompt",
45
+ max_tokens: ":maxTokens",
46
+ },
47
+ },
48
+ },
49
+ validateSchemaAgent: {
50
+ agent: "validateSchemaAgent",
51
+ inputs: {
52
+ text: ":llm.text.codeBlock()",
53
+ schema: mulmoScriptSchema,
54
+ },
55
+ isResult: true,
56
+ },
57
+ continue: {
58
+ agent: ({ isValid, counter }) => {
59
+ return !isValid && counter < 3;
60
+ },
61
+ inputs: {
62
+ isValid: ":validateSchemaAgent.isValid",
63
+ counter: ":counter",
64
+ },
65
+ },
66
+ },
67
+ };
18
68
  const graphData = {
19
69
  version: 0.5,
20
70
  // Execute sequentially because the free version of browserless API doesn't support concurrent execution.
@@ -23,7 +73,7 @@ const graphData = {
23
73
  urls: {
24
74
  value: [],
25
75
  },
26
- prompt: {
76
+ systemPrompt: {
27
77
  value: "",
28
78
  },
29
79
  outdir: {
@@ -87,52 +137,60 @@ const graphData = {
87
137
  agent: "nestedAgent",
88
138
  inputs: {
89
139
  sourceText: ":sourceText",
90
- prompt: ":prompt",
140
+ systemPrompt: ":systemPrompt",
91
141
  llmAgent: ":llmAgent",
92
142
  llmModel: ":llmModel",
93
143
  maxTokens: ":maxTokens",
94
144
  },
95
- graph: {
96
- loop: {
97
- // If the script is not valid and the counter is less than 3, continue the loop
98
- while: ":continue",
99
- },
100
- nodes: {
101
- counter: {
102
- value: 0,
103
- update: ":counter.add(1)",
104
- },
105
- llm: {
106
- agent: ":llmAgent",
107
- inputs: {
108
- system: ":prompt",
109
- prompt: graphDataScriptFromUrlPrompt("${:sourceText.text}"),
110
- params: {
111
- model: ":llmModel",
112
- system: ":prompt",
113
- max_tokens: ":maxTokens",
114
- },
115
- },
116
- },
117
- validateSchemaAgent: {
118
- agent: "validateSchemaAgent",
119
- inputs: {
120
- text: ":llm.text.codeBlock()",
121
- schema: mulmoScriptSchema,
122
- },
123
- isResult: true,
124
- },
125
- continue: {
126
- agent: ({ isValid, counter }) => {
127
- return !isValid && counter < 3;
128
- },
129
- inputs: {
130
- isValid: ":validateSchemaAgent.isValid",
131
- counter: ":counter",
132
- },
133
- },
134
- },
145
+ graph: graphMulmoScript,
146
+ },
147
+ writeJSON: {
148
+ if: ":mulmoScript.validateSchemaAgent.isValid",
149
+ agent: "fileWriteAgent",
150
+ inputs: {
151
+ file: "${:outdir}/${:fileName}-${@now}.json",
152
+ text: ":mulmoScript.validateSchemaAgent.data.toJSON()",
153
+ },
154
+ isResult: true,
155
+ },
156
+ },
157
+ };
158
+ const graphDataText = {
159
+ version: 0.5,
160
+ // Execute sequentially because the free version of browserless API doesn't support concurrent execution.
161
+ concurrency: 1,
162
+ nodes: {
163
+ systemPrompt: {
164
+ value: "",
165
+ },
166
+ outdir: {
167
+ value: "",
168
+ },
169
+ fileName: {
170
+ value: "",
171
+ },
172
+ llmAgent: {
173
+ value: "",
174
+ },
175
+ llmModel: {
176
+ value: "",
177
+ },
178
+ maxTokens: {
179
+ value: 0,
180
+ },
181
+ sourceText: {},
182
+ // generate the mulmo script
183
+ mulmoScript: {
184
+ agent: "nestedAgent",
185
+ // console: { before: true },
186
+ inputs: {
187
+ sourceText: ":sourceText",
188
+ systemPrompt: ":systemPrompt",
189
+ llmAgent: ":llmAgent",
190
+ llmModel: ":llmModel",
191
+ maxTokens: ":maxTokens",
135
192
  },
193
+ graph: graphMulmoScript,
136
194
  },
137
195
  writeJSON: {
138
196
  if: ":mulmoScript.validateSchemaAgent.isValid",
@@ -169,7 +227,7 @@ export const createMulmoScriptFromUrl = async ({ urls, templateName, outDirPath,
169
227
  fileWriteAgent,
170
228
  }, { agentFilters });
171
229
  graph.injectValue("urls", parsedUrls);
172
- graph.injectValue("prompt", readTemplatePrompt(templateName));
230
+ graph.injectValue("systemPrompt", readTemplatePrompt(templateName));
173
231
  graph.injectValue("outdir", outDirPath);
174
232
  graph.injectValue("fileName", filename);
175
233
  graph.injectValue("llmAgent", agent);
@@ -179,3 +237,31 @@ export const createMulmoScriptFromUrl = async ({ urls, templateName, outDirPath,
179
237
  const result = await graph.run();
180
238
  writingMessage(result?.writeJSON?.path ?? "");
181
239
  };
240
+ export const createMulmoScriptFromFile = async (fileName, { templateName, outDirPath, filename, cacheDirPath, llm, llm_model, verbose }) => {
241
+ mkdir(outDirPath);
242
+ mkdir(cacheDirPath);
243
+ const filePath = path.resolve(process.cwd(), fileName);
244
+ const text = readFileSync(filePath, "utf-8");
245
+ const { agent, model, max_tokens } = llmPair(llm, llm_model);
246
+ const graph = new GraphAI(graphDataText, {
247
+ ...vanillaAgents,
248
+ openAIAgent,
249
+ anthropicAgent,
250
+ geminiAgent,
251
+ groqAgent,
252
+ validateSchemaAgent,
253
+ fileWriteAgent,
254
+ });
255
+ graph.injectValue("sourceText", { text });
256
+ graph.injectValue("systemPrompt", readTemplatePrompt(templateName));
257
+ graph.injectValue("outdir", outDirPath);
258
+ graph.injectValue("fileName", filename);
259
+ graph.injectValue("llmAgent", agent);
260
+ graph.injectValue("llmModel", model);
261
+ graph.injectValue("maxTokens", max_tokens);
262
+ if (!verbose) {
263
+ graph.registerCallback(cliLoadingPlugin({ nodeId: "mulmoScript", message: "Generating script..." }));
264
+ }
265
+ const result = await graph.run();
266
+ writingMessage(result?.writeJSON?.path ?? "");
267
+ };