mulmocast 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/assets/templates/akira_comic.json +28 -0
  2. package/assets/templates/children_book.json +13 -0
  3. package/assets/templates/comic_strips.json +14 -1
  4. package/assets/templates/drslump_comic.json +28 -0
  5. package/assets/templates/ghibli_comic.json +28 -0
  6. package/assets/templates/ghost_comic.json +35 -0
  7. package/assets/templates/onepiece_comic.json +28 -0
  8. package/assets/templates/sensei_and_taro.json +21 -0
  9. package/lib/actions/audio.js +2 -2
  10. package/lib/actions/captions.js +2 -2
  11. package/lib/actions/images.js +48 -6
  12. package/lib/actions/movie.d.ts +1 -1
  13. package/lib/actions/movie.js +13 -11
  14. package/lib/actions/pdf.js +6 -4
  15. package/lib/actions/translate.js +2 -2
  16. package/lib/agents/image_openai_agent.d.ts +1 -0
  17. package/lib/agents/image_openai_agent.js +15 -3
  18. package/lib/cli/bin.js +7 -0
  19. package/lib/cli/helpers.js +2 -1
  20. package/lib/tools/create_mulmo_script_from_url.js +2 -2
  21. package/lib/tools/create_mulmo_script_interactively.js +2 -2
  22. package/lib/tools/story_to_script.js +2 -2
  23. package/lib/types/schema.d.ts +1738 -228
  24. package/lib/types/schema.js +8 -2
  25. package/lib/utils/file.js +20 -9
  26. package/lib/utils/pdf.d.ts +1 -0
  27. package/lib/utils/pdf.js +5 -3
  28. package/lib/utils/preprocess.d.ts +50 -16
  29. package/package.json +10 -9
  30. package/scripts/templates/business.json +201 -0
  31. package/scripts/templates/children_book.json +90 -0
  32. package/scripts/templates/coding.json +130 -0
  33. package/scripts/templates/image_prompts_template.json +41 -0
  34. package/scripts/templates/sensei_and_taro.json +116 -0
  35. package/scripts/templates/text_only_template.json +35 -0
  36. package/assets/templates/ghibli_strips.json +0 -6
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Dr. Slump Style Comic Strips",
3
+ "description": "Template for Dr. Slump-style comic.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>AKIRA aesthetic.</style>",
16
+ "images": {
17
+ "girl": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/akira_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "image_prompts_template.json"
28
+ }
@@ -2,5 +2,18 @@
2
2
  "title": "Children Book",
3
3
  "description": "Template for children book.",
4
4
  "systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "A hand-drawn style illustration with a warm, nostalgic atmosphere. The background is rich with natural scenery—lush forests, cloudy skies, and traditional Japanese architecture. Characters have expressive eyes, soft facial features, and are portrayed with gentle lighting and subtle shading. The color palette is muted yet vivid, using earthy tones and watercolor-like textures. The overall scene feels magical and peaceful, with a sense of quiet wonder and emotional depth, reminiscent of classic 1980s and 1990s Japanese animation."
16
+ }
17
+ },
5
18
  "scriptName": "children_book.json"
6
19
  }
@@ -2,5 +2,18 @@
2
2
  "title": "American Comic Strips",
3
3
  "description": "Template for Dilbert-style comic strips.",
4
4
  "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
- "scriptName": "comic_strips.json"
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>A multi panel comic strips. 1990s American workplace humor. Clean, minimalist line art with muted colors. One character is a nerdy office worker with glasses</style>"
16
+ }
17
+ },
18
+ "scriptName": "text_only_template.json"
6
19
  }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Dr. Slump Style Comic Strips",
3
+ "description": "Template for Dr. Slump-style comic.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Dragon Ball/Dr. Slump aesthetic.</style>",
16
+ "images": {
17
+ "girl": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/slump_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "image_prompts_template.json"
28
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "American Comic Strips",
3
+ "description": "Template for Dilbert-style comic strips.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Ghibli style</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "image_prompts_template.json"
28
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "title": "Dr. Slump Style Comic Strips",
3
+ "description": "Template for Dr. Slump-style comic.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Ghost in the shell aesthetic.</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghost_presenter.png"
22
+ }
23
+ },
24
+ "optimus": {
25
+ "type": "image",
26
+ "source": {
27
+ "kind": "url",
28
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"
29
+ }
30
+ }
31
+ }
32
+ }
33
+ },
34
+ "scriptName": "image_prompts_template.json"
35
+ }
@@ -0,0 +1,28 @@
1
+ {
2
+ "title": "Dr. Slump Style Comic Strips",
3
+ "description": "Template for Dr. Slump-style comic.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>One Piece aesthetic.</style>",
16
+ "images": {
17
+ "presenter": {
18
+ "type": "image",
19
+ "source": {
20
+ "kind": "url",
21
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/onepiece_presenter.png"
22
+ }
23
+ }
24
+ }
25
+ }
26
+ },
27
+ "scriptName": "image_prompts_template.json"
28
+ }
@@ -2,5 +2,26 @@
2
2
  "title": "Student and Teacher",
3
3
  "description": "Interactive discussion between a student and teacher",
4
4
  "systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "canvasSize": {
11
+ "width": 1536,
12
+ "height": 1024
13
+ },
14
+ "imageParams": {
15
+ "style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
16
+ },
17
+ "speechParams": {
18
+ "provider": "nijivoice",
19
+ "speakers": {
20
+ "Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62" },
21
+ "Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
22
+ "Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
23
+ }
24
+ }
25
+ },
5
26
  "scriptName": "sensei_and_taro.json"
6
27
  }
@@ -1,6 +1,6 @@
1
1
  import "dotenv/config";
2
2
  import { GraphAI } from "graphai";
3
- import * as agents from "@graphai/vanilla";
3
+ import vanillaAgents from "@graphai/vanilla";
4
4
  import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
5
5
  import addBGMAgent from "../agents/add_bgm_agent.js";
6
6
  import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
12
12
  import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
13
13
  import { text2hash, localizedText } from "../utils/utils.js";
14
14
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
15
- const { default: __, ...vanillaAgents } = agents;
15
+ // const { default: __, ...vanillaAgents } = agents;
16
16
  // const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
17
17
  // const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
18
18
  const provider_to_agent = {
@@ -1,9 +1,9 @@
1
1
  import { GraphAI, GraphAILogger } from "graphai";
2
- import * as agents from "@graphai/vanilla";
2
+ import vanillaAgents from "@graphai/vanilla";
3
3
  import { getHTMLFile } from "../utils/file.js";
4
4
  import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
5
5
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
6
- const { default: __, ...vanillaAgents } = agents;
6
+ // const { default: __, ...vanillaAgents } = agents;
7
7
  const graph_data = {
8
8
  version: 0.5,
9
9
  nodes: {
@@ -1,15 +1,16 @@
1
1
  import dotenv from "dotenv";
2
+ import fs from "fs";
2
3
  import { GraphAI, GraphAILogger } from "graphai";
3
- import * as agents from "@graphai/vanilla";
4
+ import vanillaAgents from "@graphai/vanilla";
4
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
5
6
  import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
6
7
  import { fileCacheAgentFilter } from "../utils/filters.js";
7
8
  import imageGoogleAgent from "../agents/image_google_agent.js";
8
9
  import imageOpenaiAgent from "../agents/image_openai_agent.js";
9
- import { MulmoScriptMethods } from "../methods/index.js";
10
+ import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
10
11
  import { imagePlugins } from "../utils/image_plugins/index.js";
11
12
  import { imagePrompt } from "../utils/prompt.js";
12
- const { default: __, ...vanillaAgents } = agents;
13
+ // const { default: __, ...vanillaAgents } = agents;
13
14
  dotenv.config();
14
15
  // const openai = new OpenAI();
15
16
  import { GoogleAuth } from "google-auth-library";
@@ -21,7 +22,7 @@ const htmlStyle = (script, beat) => {
21
22
  };
22
23
  };
23
24
  const imagePreprocessAgent = async (namedInputs) => {
24
- const { context, beat, index, suffix, imageDirPath, imageAgentInfo } = namedInputs;
25
+ const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
25
26
  const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
26
27
  const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
27
28
  const returnValue = {
@@ -44,7 +45,12 @@ const imagePreprocessAgent = async (namedInputs) => {
44
45
  }
45
46
  }
46
47
  const prompt = imagePrompt(beat, imageParams.style);
47
- return { path: imagePath, prompt, ...returnValue };
48
+ const images = (() => {
49
+ const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
50
+ const sources = imageNames.map((name) => imageRefs[name]);
51
+ return sources.filter((source) => source !== undefined);
52
+ })();
53
+ return { path: imagePath, prompt, ...returnValue, images };
48
54
  };
49
55
  const graph_data = {
50
56
  version: 0.5,
@@ -54,9 +60,16 @@ const graph_data = {
54
60
  imageDirPath: {},
55
61
  imageAgentInfo: {},
56
62
  outputStudioFilePath: {},
63
+ imageRefs: {},
57
64
  map: {
58
65
  agent: "mapAgent",
59
- inputs: { rows: ":context.studio.script.beats", context: ":context", imageAgentInfo: ":imageAgentInfo", imageDirPath: ":imageDirPath" },
66
+ inputs: {
67
+ rows: ":context.studio.script.beats",
68
+ context: ":context",
69
+ imageAgentInfo: ":imageAgentInfo",
70
+ imageDirPath: ":imageDirPath",
71
+ imageRefs: ":imageRefs",
72
+ },
60
73
  isResult: true,
61
74
  params: {
62
75
  rowKey: "beat",
@@ -73,6 +86,7 @@ const graph_data = {
73
86
  suffix: "p",
74
87
  imageDirPath: ":imageDirPath",
75
88
  imageAgentInfo: ":imageAgentInfo",
89
+ imageRefs: ":imageRefs",
76
90
  },
77
91
  },
78
92
  imageGenerator: {
@@ -92,6 +106,7 @@ const graph_data = {
92
106
  size: ":preprocessor.imageParams.size",
93
107
  moderation: ":preprocessor.imageParams.moderation",
94
108
  aspectRatio: ":preprocessor.aspectRatio",
109
+ images: ":preprocessor.images",
95
110
  },
96
111
  },
97
112
  defaultValue: {},
@@ -170,12 +185,39 @@ const generateImages = async (context) => {
170
185
  },
171
186
  };
172
187
  }
188
+ if (imageAgentInfo.provider === "openai") {
189
+ // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
190
+ // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
191
+ // gpt-image-1:3,000,000 TPM、150 images per minute
192
+ graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
193
+ }
194
+ const imageRefs = {};
195
+ const images = studio.script.imageParams?.images;
196
+ if (images) {
197
+ await Promise.all(Object.keys(images).map(async (key) => {
198
+ const image = images[key];
199
+ if (image.source.kind === "path") {
200
+ imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
201
+ }
202
+ else if (image.source.kind === "url") {
203
+ const response = await fetch(image.source.url);
204
+ if (!response.ok) {
205
+ throw new Error(`Failed to download image: ${image.source.url}`);
206
+ }
207
+ const buffer = Buffer.from(await response.arrayBuffer());
208
+ const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
209
+ await fs.promises.writeFile(imagePath, buffer);
210
+ imageRefs[key] = imagePath;
211
+ }
212
+ }));
213
+ }
173
214
  GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
174
215
  const injections = {
175
216
  context,
176
217
  imageAgentInfo,
177
218
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
178
219
  imageDirPath,
220
+ imageRefs,
179
221
  };
180
222
  const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
181
223
  Object.keys(injections).forEach((key) => {
@@ -3,7 +3,7 @@ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType
3
3
  videoId: string;
4
4
  videoPart: string;
5
5
  };
6
- export declare const getAudioPart: (inputIndex: number, duration: number, delay: number) => {
6
+ export declare const getAudioPart: (inputIndex: number, duration: number, delay: number, mixAudio: number) => {
7
7
  audioId: string;
8
8
  audioPart: string;
9
9
  };
@@ -26,13 +26,14 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
26
26
  videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
27
27
  };
28
28
  };
29
- export const getAudioPart = (inputIndex, duration, delay) => {
29
+ export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
30
30
  const audioId = `a${inputIndex}`;
31
31
  return {
32
32
  audioId,
33
33
  audioPart: `[${inputIndex}:a]` +
34
34
  `atrim=duration=${duration},` + // Trim to beat duration
35
35
  `adelay=${delay * 1000}|${delay * 1000},` +
36
+ `volume=${mixAudio},` + // 👈 add this line
36
37
  `aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
37
38
  `[${audioId}]`,
38
39
  };
@@ -68,12 +69,13 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
68
69
  // Add each image input
69
70
  const filterComplexVideoIds = [];
70
71
  const filterComplexAudioIds = [];
71
- studio.beats.reduce((timestamp, beat, index) => {
72
- if (!beat.imageFile || !beat.duration) {
73
- throw new Error(`beat.imageFile or beat.duration is not set: index=${index}`);
72
+ studio.beats.reduce((timestamp, studioBeat, index) => {
73
+ const beat = studio.script.beats[index];
74
+ if (!studioBeat.imageFile || !studioBeat.duration) {
75
+ throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
74
76
  }
75
- const inputIndex = FfmpegContextAddInput(ffmpegContext, beat.imageFile);
76
- const mediaType = MulmoScriptMethods.getImageType(studio.script, studio.script.beats[index]);
77
+ const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.imageFile);
78
+ const mediaType = MulmoScriptMethods.getImageType(studio.script, beat);
77
79
  const extraPadding = (() => {
78
80
  // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
79
81
  if (index === 0) {
@@ -84,11 +86,11 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
84
86
  }
85
87
  return 0;
86
88
  })();
87
- const duration = beat.duration + extraPadding;
89
+ const duration = studioBeat.duration + extraPadding;
88
90
  const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
89
91
  ffmpegContext.filterComplex.push(videoPart);
90
- if (caption && beat.captionFile) {
91
- const captionInputIndex = FfmpegContextAddInput(ffmpegContext, beat.captionFile);
92
+ if (caption && studioBeat.captionFile) {
93
+ const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
92
94
  const compositeVideoId = `c${index}`;
93
95
  ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
94
96
  filterComplexVideoIds.push(compositeVideoId);
@@ -96,8 +98,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
96
98
  else {
97
99
  filterComplexVideoIds.push(videoId);
98
100
  }
99
- if (mediaType === "movie") {
100
- const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
101
+ if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
102
+ const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
101
103
  filterComplexAudioIds.push(audioId);
102
104
  ffmpegContext.filterComplex.push(audioPart);
103
105
  }
@@ -102,19 +102,20 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
102
102
  const pos = (() => {
103
103
  if (isLandscapeImage) {
104
104
  const cellHeight = pageHeight / imagesPerPage - offset;
105
- const { drawWidth, drawHeight } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
106
- const x = offset;
105
+ const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
106
+ const x = offset + (containerWidth - drawWidth) / 2;
107
107
  const y = pageHeight - (i + 1) * cellHeight + (cellHeight - drawHeight) * handoutImageRatio;
108
108
  return {
109
109
  x,
110
110
  y,
111
111
  width: drawWidth,
112
112
  height: drawHeight,
113
+ containerWidth,
113
114
  };
114
115
  }
115
116
  else {
116
117
  const cellWidth = pageWidth / imagesPerPage;
117
- const { drawWidth, drawHeight } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
118
+ const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
118
119
  const x = pageWidth - (imagesPerPage - i) * cellWidth + (cellWidth - drawWidth) * handoutImageRatio;
119
120
  const y = pageHeight - drawHeight - offset;
120
121
  return {
@@ -122,6 +123,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
122
123
  y,
123
124
  width: drawWidth,
124
125
  height: drawHeight,
126
+ containerWidth,
125
127
  };
126
128
  }
127
129
  })();
@@ -136,7 +138,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
136
138
  for (const [index, line] of lines.entries()) {
137
139
  page.drawText(line, {
138
140
  ...pos,
139
- x: pos.x + pos.width + textMargin,
141
+ x: offset + pos.containerWidth + textMargin,
140
142
  y: pos.y + pos.height - fontSize - (fontSize + 2) * index,
141
143
  size: fontSize,
142
144
  font,
@@ -1,13 +1,13 @@
1
1
  import "dotenv/config";
2
2
  import { GraphAI, assert } from "graphai";
3
- import * as agents from "@graphai/vanilla";
3
+ import vanillaAgents from "@graphai/vanilla";
4
4
  import { openAIAgent } from "@graphai/openai_agent";
5
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
7
7
  import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
8
8
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
9
9
  import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
10
- const { default: __, ...vanillaAgents } = agents;
10
+ // const { default: __, ...vanillaAgents } = agents;
11
11
  const translateGraph = {
12
12
  version: 0.5,
13
13
  nodes: {
@@ -6,6 +6,7 @@ export declare const imageOpenaiAgent: AgentFunction<{
6
6
  model: string;
7
7
  size: OpenAIImageSize | null | undefined;
8
8
  moderation: OpenAIModeration | null | undefined;
9
+ images: string[] | null | undefined;
9
10
  }, {
10
11
  buffer: Buffer;
11
12
  }, {
@@ -1,8 +1,9 @@
1
- import OpenAI from "openai";
1
+ import fs from "fs";
2
+ import OpenAI, { toFile } from "openai";
2
3
  // https://platform.openai.com/docs/guides/image-generation
3
4
  export const imageOpenaiAgent = async ({ namedInputs, params }) => {
4
5
  const { prompt } = namedInputs;
5
- const { apiKey, model, size, moderation } = params;
6
+ const { apiKey, model, size, moderation, images } = params;
6
7
  const openai = new OpenAI({ apiKey });
7
8
  const imageOptions = {
8
9
  model: model ?? "dall-e-3",
@@ -13,7 +14,18 @@ export const imageOpenaiAgent = async ({ namedInputs, params }) => {
13
14
  if (model === "gpt-image-1") {
14
15
  imageOptions.moderation = moderation || "auto";
15
16
  }
16
- const response = await openai.images.generate(imageOptions);
17
+ const response = await (async () => {
18
+ const targetSize = imageOptions.size;
19
+ if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
20
+ const imagelist = await Promise.all((images ?? []).map(async (file) => await toFile(fs.createReadStream(file), null, {
21
+ type: "image/png", // TODO: Support JPEG as well
22
+ })));
23
+ return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
24
+ }
25
+ else {
26
+ return await openai.images.generate(imageOptions);
27
+ }
28
+ })();
17
29
  if (!response.data) {
18
30
  throw new Error(`response.data is undefined: ${response}`);
19
31
  }
package/lib/cli/bin.js CHANGED
@@ -2,6 +2,9 @@
2
2
  import "dotenv/config";
3
3
  import yargs from "yargs/yargs";
4
4
  import { hideBin } from "yargs/helpers";
5
+ import { readFileSync } from "fs";
6
+ import { fileURLToPath } from "url";
7
+ import { dirname, join } from "path";
5
8
  import * as translateCmd from "./commands/translate/index.js";
6
9
  import * as audioCmd from "./commands/audio/index.js";
7
10
  import * as imagesCmd from "./commands/image/index.js";
@@ -9,9 +12,13 @@ import * as movieCmd from "./commands/movie/index.js";
9
12
  import * as pdfCmd from "./commands/pdf/index.js";
10
13
  import * as toolCmd from "./commands/tool/index.js";
11
14
  import { GraphAILogger } from "graphai";
15
+ const __filename = fileURLToPath(import.meta.url);
16
+ const __dirname = dirname(__filename);
17
+ const packageJson = JSON.parse(readFileSync(join(__dirname, "../../package.json"), "utf8"));
12
18
  export const main = async () => {
13
19
  const cli = yargs(hideBin(process.argv))
14
20
  .scriptName("mulmo")
21
+ .version(packageJson.version)
15
22
  .usage("$0 <command> [options]")
16
23
  .option("v", {
17
24
  alias: "verbose",
@@ -2,7 +2,7 @@ import { GraphAILogger } from "graphai";
2
2
  import fs from "fs";
3
3
  import path from "path";
4
4
  import clipboardy from "clipboardy";
5
- import { getBaseDirPath, getFullPath, readMulmoScriptFile, fetchMulmoScriptFile, getOutputStudioFilePath, resolveDirPath } from "../utils/file.js";
5
+ import { getBaseDirPath, getFullPath, readMulmoScriptFile, fetchMulmoScriptFile, getOutputStudioFilePath, resolveDirPath, mkdir } from "../utils/file.js";
6
6
  import { isHttp } from "../utils/utils.js";
7
7
  import { createOrUpdateStudioData } from "../utils/preprocess.js";
8
8
  import { outDirName, imageDirName, audioDirName } from "../utils/const.js";
@@ -33,6 +33,7 @@ export const getFileObject = (args) => {
33
33
  const fileName = `script_${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
34
34
  const clipboardText = clipboardy.readSync();
35
35
  const fileOrUrl = resolveDirPath(outDirPath, `${fileName}.json`);
36
+ mkdir(outDirPath);
36
37
  fs.writeFileSync(fileOrUrl, clipboardText, "utf8");
37
38
  return { fileOrUrl, fileName };
38
39
  }
@@ -4,7 +4,7 @@ import { openAIAgent } from "@graphai/openai_agent";
4
4
  import { anthropicAgent } from "@graphai/anthropic_agent";
5
5
  import { geminiAgent } from "@graphai/gemini_agent";
6
6
  import { groqAgent } from "@graphai/groq_agent";
7
- import * as agents from "@graphai/vanilla";
7
+ import vanillaAgents from "@graphai/vanilla";
8
8
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
9
9
  import { browserlessAgent } from "@graphai/browserless_agent";
10
10
  import validateSchemaAgent from "../agents/validate_schema_agent.js";
@@ -14,7 +14,7 @@ import { mulmoScriptSchema, urlsSchema } from "../types/schema.js";
14
14
  import { cliLoadingPlugin } from "../utils/plugins.js";
15
15
  import { graphDataScriptFromUrlPrompt } from "../utils/prompt.js";
16
16
  import { llmPair } from "../utils/utils.js";
17
- const { default: __, ...vanillaAgents } = agents;
17
+ // const { default: __, ...vanillaAgents } = agents;
18
18
  const graphData = {
19
19
  version: 0.5,
20
20
  // Execute sequentially because the free version of browserless API doesn't support concurrent execution.
@@ -6,7 +6,7 @@ import { openAIAgent } from "@graphai/openai_agent";
6
6
  import { anthropicAgent } from "@graphai/anthropic_agent";
7
7
  import { geminiAgent } from "@graphai/gemini_agent";
8
8
  import { groqAgent } from "@graphai/groq_agent";
9
- import * as agents from "@graphai/vanilla";
9
+ import vanillaAgents from "@graphai/vanilla";
10
10
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
11
11
  import { readTemplatePrompt, mkdir } from "../utils/file.js";
12
12
  import { browserlessCacheGenerator } from "../utils/filters.js";
@@ -16,7 +16,7 @@ import validateSchemaAgent from "../agents/validate_schema_agent.js";
16
16
  import { llmPair } from "../utils/utils.js";
17
17
  import { interactiveClarificationPrompt, prefixPrompt } from "../utils/prompt.js";
18
18
  // import { cliLoadingPlugin } from "../utils/plugins.js";
19
- const { default: __, ...vanillaAgents } = agents;
19
+ // const { default: __, ...vanillaAgents } = agents;
20
20
  const agentHeader = "\x1b[34m● \x1b[0m\x1b[1mAgent\x1b[0m:\x1b[0m";
21
21
  const graphDataForScraping = {
22
22
  version: 0.5,
@@ -5,14 +5,14 @@ import { openAIAgent } from "@graphai/openai_agent";
5
5
  import { anthropicAgent } from "@graphai/anthropic_agent";
6
6
  import { geminiAgent } from "@graphai/gemini_agent";
7
7
  import { groqAgent } from "@graphai/groq_agent";
8
- import * as agents from "@graphai/vanilla";
8
+ import vanillaAgents from "@graphai/vanilla";
9
9
  import { graphDataScriptGeneratePrompt, sceneToBeatsPrompt, storyToScriptInfoPrompt, storyToScriptPrompt } from "../utils/prompt.js";
10
10
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
11
11
  import validateSchemaAgent from "../agents/validate_schema_agent.js";
12
12
  import { llmPair } from "../utils/utils.js";
13
13
  import { storyToScriptGenerateMode } from "../utils/const.js";
14
14
  import { cliLoadingPlugin } from "../utils/plugins.js";
15
- const { default: __, ...vanillaAgents } = agents;
15
+ // const { default: __, ...vanillaAgents } = agents;
16
16
  const createValidatedScriptGraphData = ({ systemPrompt, prompt, schema, llmAgent, llmModel, maxTokens, }) => {
17
17
  return {
18
18
  loop: {