mulmocast 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/akira_comic.json +28 -0
- package/assets/templates/children_book.json +13 -0
- package/assets/templates/comic_strips.json +14 -1
- package/assets/templates/drslump_comic.json +28 -0
- package/assets/templates/ghibli_comic.json +28 -0
- package/assets/templates/ghost_comic.json +35 -0
- package/assets/templates/onepiece_comic.json +28 -0
- package/assets/templates/sensei_and_taro.json +21 -0
- package/lib/actions/audio.js +2 -2
- package/lib/actions/captions.js +2 -2
- package/lib/actions/images.js +48 -6
- package/lib/actions/movie.d.ts +1 -1
- package/lib/actions/movie.js +13 -11
- package/lib/actions/pdf.js +6 -4
- package/lib/actions/translate.js +2 -2
- package/lib/agents/image_openai_agent.d.ts +1 -0
- package/lib/agents/image_openai_agent.js +15 -3
- package/lib/cli/bin.js +7 -0
- package/lib/cli/helpers.js +2 -1
- package/lib/tools/create_mulmo_script_from_url.js +2 -2
- package/lib/tools/create_mulmo_script_interactively.js +2 -2
- package/lib/tools/story_to_script.js +2 -2
- package/lib/types/schema.d.ts +1738 -228
- package/lib/types/schema.js +8 -2
- package/lib/utils/file.js +20 -9
- package/lib/utils/pdf.d.ts +1 -0
- package/lib/utils/pdf.js +5 -3
- package/lib/utils/preprocess.d.ts +50 -16
- package/package.json +9 -9
- package/scripts/templates/children_book.json +0 -7
- package/scripts/templates/image_prompts_template.json +41 -0
- package/scripts/templates/sensei_and_taro.json +0 -11
- package/scripts/templates/text_only_template.json +35 -0
- package/assets/templates/ghibli_strips.json +0 -6
- package/scripts/templates/comic_strips.json +0 -30
- package/scripts/templates/ghibli_strips.json +0 -30
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Dr. Slump Style Comic Strips",
|
|
3
|
+
"description": "Template for Dr. Slump-style comic.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>AKIRA aesthetic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"girl": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/akira_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "image_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -2,5 +2,18 @@
|
|
|
2
2
|
"title": "Children Book",
|
|
3
3
|
"description": "Template for children book.",
|
|
4
4
|
"systemPrompt": "Please generate a script for a children book on the topic provided by the user. Each page (=beat) must haven an image prompt appropriate for the text.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "A hand-drawn style illustration with a warm, nostalgic atmosphere. The background is rich with natural scenery—lush forests, cloudy skies, and traditional Japanese architecture. Characters have expressive eyes, soft facial features, and are portrayed with gentle lighting and subtle shading. The color palette is muted yet vivid, using earthy tones and watercolor-like textures. The overall scene feels magical and peaceful, with a sense of quiet wonder and emotional depth, reminiscent of classic 1980s and 1990s Japanese animation."
|
|
16
|
+
}
|
|
17
|
+
},
|
|
5
18
|
"scriptName": "children_book.json"
|
|
6
19
|
}
|
|
@@ -2,5 +2,18 @@
|
|
|
2
2
|
"title": "American Comic Strips",
|
|
3
3
|
"description": "Template for Dilbert-style comic strips.",
|
|
4
4
|
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
-
"
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>A multi panel comic strips. 1990s American workplace humor. Clean, minimalist line art with muted colors. One character is a nerdy office worker with glasses</style>"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"scriptName": "text_only_template.json"
|
|
6
19
|
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Dr. Slump Style Comic Strips",
|
|
3
|
+
"description": "Template for Dr. Slump-style comic.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Dragon Ball/Dr. Slump aesthetic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"girl": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/slump_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "image_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "American Comic Strips",
|
|
3
|
+
"description": "Template for Dilbert-style comic strips.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Ghibli style</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "image_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Dr. Slump Style Comic Strips",
|
|
3
|
+
"description": "Template for Dr. Slump-style comic.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Ghost in the shell aesthetic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghost_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"optimus": {
|
|
25
|
+
"type": "image",
|
|
26
|
+
"source": {
|
|
27
|
+
"kind": "url",
|
|
28
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"scriptName": "image_prompts_template.json"
|
|
35
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "Dr. Slump Style Comic Strips",
|
|
3
|
+
"description": "Template for Dr. Slump-style comic.",
|
|
4
|
+
"systemPrompt": "Generate a script for a presentation of the given topic. Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>One Piece aesthetic.</style>",
|
|
16
|
+
"images": {
|
|
17
|
+
"presenter": {
|
|
18
|
+
"type": "image",
|
|
19
|
+
"source": {
|
|
20
|
+
"kind": "url",
|
|
21
|
+
"url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/onepiece_presenter.png"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"scriptName": "image_prompts_template.json"
|
|
28
|
+
}
|
|
@@ -2,5 +2,26 @@
|
|
|
2
2
|
"title": "Student and Teacher",
|
|
3
3
|
"description": "Interactive discussion between a student and teacher",
|
|
4
4
|
"systemPrompt": "この件について、内容全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本をArtifactとして作って。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.0",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"canvasSize": {
|
|
11
|
+
"width": 1536,
|
|
12
|
+
"height": 1024
|
|
13
|
+
},
|
|
14
|
+
"imageParams": {
|
|
15
|
+
"style": "<style>Ghibli style. Student (Taro) is a young teenager with a dark short hair with glasses. Teacher is a middle-aged man with grey hair and moustache.</style>"
|
|
16
|
+
},
|
|
17
|
+
"speechParams": {
|
|
18
|
+
"provider": "nijivoice",
|
|
19
|
+
"speakers": {
|
|
20
|
+
"Announcer": { "displayName": { "ja": "アナウンサー" }, "voiceId": "afd7df65-0fdc-4d31-ae8b-a29f0f5eed62" },
|
|
21
|
+
"Student": { "displayName": { "ja": "太郎" }, "voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f" },
|
|
22
|
+
"Teacher": { "displayName": { "ja": "先生" }, "voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae" }
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
},
|
|
5
26
|
"scriptName": "sensei_and_taro.json"
|
|
6
27
|
}
|
package/lib/actions/audio.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { GraphAI } from "graphai";
|
|
3
|
-
import
|
|
3
|
+
import vanillaAgents from "@graphai/vanilla";
|
|
4
4
|
import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
|
|
5
5
|
import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
6
6
|
import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
|
|
@@ -12,7 +12,7 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
|
12
12
|
import { getAudioArtifactFilePath, getAudioSegmentDirPath, getAudioCombinedFilePath, getOutputStudioFilePath, defaultBGMPath, mkdir, writingMessage, getAudioSegmentFilePath, resolveMediaSource, } from "../utils/file.js";
|
|
13
13
|
import { text2hash, localizedText } from "../utils/utils.js";
|
|
14
14
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
15
|
-
const { default: __, ...vanillaAgents } = agents;
|
|
15
|
+
// const { default: __, ...vanillaAgents } = agents;
|
|
16
16
|
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
17
17
|
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
18
18
|
const provider_to_agent = {
|
package/lib/actions/captions.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
2
|
-
import
|
|
2
|
+
import vanillaAgents from "@graphai/vanilla";
|
|
3
3
|
import { getHTMLFile } from "../utils/file.js";
|
|
4
4
|
import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
5
5
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
6
|
-
const { default: __, ...vanillaAgents } = agents;
|
|
6
|
+
// const { default: __, ...vanillaAgents } = agents;
|
|
7
7
|
const graph_data = {
|
|
8
8
|
version: 0.5,
|
|
9
9
|
nodes: {
|
package/lib/actions/images.js
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
|
+
import fs from "fs";
|
|
2
3
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
3
|
-
import
|
|
4
|
+
import vanillaAgents from "@graphai/vanilla";
|
|
4
5
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
5
6
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
6
7
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
7
8
|
import imageGoogleAgent from "../agents/image_google_agent.js";
|
|
8
9
|
import imageOpenaiAgent from "../agents/image_openai_agent.js";
|
|
9
|
-
import { MulmoScriptMethods } from "../methods/index.js";
|
|
10
|
+
import { MulmoScriptMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
10
11
|
import { imagePlugins } from "../utils/image_plugins/index.js";
|
|
11
12
|
import { imagePrompt } from "../utils/prompt.js";
|
|
12
|
-
const { default: __, ...vanillaAgents } = agents;
|
|
13
|
+
// const { default: __, ...vanillaAgents } = agents;
|
|
13
14
|
dotenv.config();
|
|
14
15
|
// const openai = new OpenAI();
|
|
15
16
|
import { GoogleAuth } from "google-auth-library";
|
|
@@ -21,7 +22,7 @@ const htmlStyle = (script, beat) => {
|
|
|
21
22
|
};
|
|
22
23
|
};
|
|
23
24
|
const imagePreprocessAgent = async (namedInputs) => {
|
|
24
|
-
const { context, beat, index, suffix, imageDirPath, imageAgentInfo } = namedInputs;
|
|
25
|
+
const { context, beat, index, suffix, imageDirPath, imageAgentInfo, imageRefs } = namedInputs;
|
|
25
26
|
const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
|
|
26
27
|
const imagePath = `${imageDirPath}/${context.studio.filename}/${index}${suffix}.png`;
|
|
27
28
|
const returnValue = {
|
|
@@ -44,7 +45,12 @@ const imagePreprocessAgent = async (namedInputs) => {
|
|
|
44
45
|
}
|
|
45
46
|
}
|
|
46
47
|
const prompt = imagePrompt(beat, imageParams.style);
|
|
47
|
-
|
|
48
|
+
const images = (() => {
|
|
49
|
+
const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
|
|
50
|
+
const sources = imageNames.map((name) => imageRefs[name]);
|
|
51
|
+
return sources.filter((source) => source !== undefined);
|
|
52
|
+
})();
|
|
53
|
+
return { path: imagePath, prompt, ...returnValue, images };
|
|
48
54
|
};
|
|
49
55
|
const graph_data = {
|
|
50
56
|
version: 0.5,
|
|
@@ -54,9 +60,16 @@ const graph_data = {
|
|
|
54
60
|
imageDirPath: {},
|
|
55
61
|
imageAgentInfo: {},
|
|
56
62
|
outputStudioFilePath: {},
|
|
63
|
+
imageRefs: {},
|
|
57
64
|
map: {
|
|
58
65
|
agent: "mapAgent",
|
|
59
|
-
inputs: {
|
|
66
|
+
inputs: {
|
|
67
|
+
rows: ":context.studio.script.beats",
|
|
68
|
+
context: ":context",
|
|
69
|
+
imageAgentInfo: ":imageAgentInfo",
|
|
70
|
+
imageDirPath: ":imageDirPath",
|
|
71
|
+
imageRefs: ":imageRefs",
|
|
72
|
+
},
|
|
60
73
|
isResult: true,
|
|
61
74
|
params: {
|
|
62
75
|
rowKey: "beat",
|
|
@@ -73,6 +86,7 @@ const graph_data = {
|
|
|
73
86
|
suffix: "p",
|
|
74
87
|
imageDirPath: ":imageDirPath",
|
|
75
88
|
imageAgentInfo: ":imageAgentInfo",
|
|
89
|
+
imageRefs: ":imageRefs",
|
|
76
90
|
},
|
|
77
91
|
},
|
|
78
92
|
imageGenerator: {
|
|
@@ -92,6 +106,7 @@ const graph_data = {
|
|
|
92
106
|
size: ":preprocessor.imageParams.size",
|
|
93
107
|
moderation: ":preprocessor.imageParams.moderation",
|
|
94
108
|
aspectRatio: ":preprocessor.aspectRatio",
|
|
109
|
+
images: ":preprocessor.images",
|
|
95
110
|
},
|
|
96
111
|
},
|
|
97
112
|
defaultValue: {},
|
|
@@ -170,12 +185,39 @@ const generateImages = async (context) => {
|
|
|
170
185
|
},
|
|
171
186
|
};
|
|
172
187
|
}
|
|
188
|
+
if (imageAgentInfo.provider === "openai") {
|
|
189
|
+
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
190
|
+
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
191
|
+
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
192
|
+
graph_data.concurrency = imageAgentInfo.imageParams.model === "dall-e-3" ? 4 : 16;
|
|
193
|
+
}
|
|
194
|
+
const imageRefs = {};
|
|
195
|
+
const images = studio.script.imageParams?.images;
|
|
196
|
+
if (images) {
|
|
197
|
+
await Promise.all(Object.keys(images).map(async (key) => {
|
|
198
|
+
const image = images[key];
|
|
199
|
+
if (image.source.kind === "path") {
|
|
200
|
+
imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
|
|
201
|
+
}
|
|
202
|
+
else if (image.source.kind === "url") {
|
|
203
|
+
const response = await fetch(image.source.url);
|
|
204
|
+
if (!response.ok) {
|
|
205
|
+
throw new Error(`Failed to download image: ${image.source.url}`);
|
|
206
|
+
}
|
|
207
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
208
|
+
const imagePath = `${imageDirPath}/${context.studio.filename}/${key}.png`;
|
|
209
|
+
await fs.promises.writeFile(imagePath, buffer);
|
|
210
|
+
imageRefs[key] = imagePath;
|
|
211
|
+
}
|
|
212
|
+
}));
|
|
213
|
+
}
|
|
173
214
|
GraphAILogger.info(`text2image: provider=${imageAgentInfo.provider} model=${imageAgentInfo.imageParams.model}`);
|
|
174
215
|
const injections = {
|
|
175
216
|
context,
|
|
176
217
|
imageAgentInfo,
|
|
177
218
|
outputStudioFilePath: getOutputStudioFilePath(outDirPath, studio.filename),
|
|
178
219
|
imageDirPath,
|
|
220
|
+
imageRefs,
|
|
179
221
|
};
|
|
180
222
|
const graph = new GraphAI(graph_data, { ...vanillaAgents, imageGoogleAgent, imageOpenaiAgent, fileWriteAgent }, options);
|
|
181
223
|
Object.keys(injections).forEach((key) => {
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ export declare const getVideoPart: (inputIndex: number, mediaType: BeatMediaType
|
|
|
3
3
|
videoId: string;
|
|
4
4
|
videoPart: string;
|
|
5
5
|
};
|
|
6
|
-
export declare const getAudioPart: (inputIndex: number, duration: number, delay: number) => {
|
|
6
|
+
export declare const getAudioPart: (inputIndex: number, duration: number, delay: number, mixAudio: number) => {
|
|
7
7
|
audioId: string;
|
|
8
8
|
audioPart: string;
|
|
9
9
|
};
|
package/lib/actions/movie.js
CHANGED
|
@@ -26,13 +26,14 @@ export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
|
26
26
|
videoPart: `[${inputIndex}:v]` + videoFilters.filter((a) => a).join(",") + `[${videoId}]`,
|
|
27
27
|
};
|
|
28
28
|
};
|
|
29
|
-
export const getAudioPart = (inputIndex, duration, delay) => {
|
|
29
|
+
export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
|
|
30
30
|
const audioId = `a${inputIndex}`;
|
|
31
31
|
return {
|
|
32
32
|
audioId,
|
|
33
33
|
audioPart: `[${inputIndex}:a]` +
|
|
34
34
|
`atrim=duration=${duration},` + // Trim to beat duration
|
|
35
35
|
`adelay=${delay * 1000}|${delay * 1000},` +
|
|
36
|
+
`volume=${mixAudio},` + // 👈 add this line
|
|
36
37
|
`aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo` +
|
|
37
38
|
`[${audioId}]`,
|
|
38
39
|
};
|
|
@@ -68,12 +69,13 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
68
69
|
// Add each image input
|
|
69
70
|
const filterComplexVideoIds = [];
|
|
70
71
|
const filterComplexAudioIds = [];
|
|
71
|
-
studio.beats.reduce((timestamp,
|
|
72
|
-
|
|
73
|
-
|
|
72
|
+
studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
73
|
+
const beat = studio.script.beats[index];
|
|
74
|
+
if (!studioBeat.imageFile || !studioBeat.duration) {
|
|
75
|
+
throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
|
|
74
76
|
}
|
|
75
|
-
const inputIndex = FfmpegContextAddInput(ffmpegContext,
|
|
76
|
-
const mediaType = MulmoScriptMethods.getImageType(studio.script,
|
|
77
|
+
const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.imageFile);
|
|
78
|
+
const mediaType = MulmoScriptMethods.getImageType(studio.script, beat);
|
|
77
79
|
const extraPadding = (() => {
|
|
78
80
|
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
79
81
|
if (index === 0) {
|
|
@@ -84,11 +86,11 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
84
86
|
}
|
|
85
87
|
return 0;
|
|
86
88
|
})();
|
|
87
|
-
const duration =
|
|
89
|
+
const duration = studioBeat.duration + extraPadding;
|
|
88
90
|
const { videoId, videoPart } = getVideoPart(inputIndex, mediaType, duration, canvasInfo);
|
|
89
91
|
ffmpegContext.filterComplex.push(videoPart);
|
|
90
|
-
if (caption &&
|
|
91
|
-
const captionInputIndex = FfmpegContextAddInput(ffmpegContext,
|
|
92
|
+
if (caption && studioBeat.captionFile) {
|
|
93
|
+
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.captionFile);
|
|
92
94
|
const compositeVideoId = `c${index}`;
|
|
93
95
|
ffmpegContext.filterComplex.push(`[${videoId}][${captionInputIndex}:v]overlay=format=auto[${compositeVideoId}]`);
|
|
94
96
|
filterComplexVideoIds.push(compositeVideoId);
|
|
@@ -96,8 +98,8 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
96
98
|
else {
|
|
97
99
|
filterComplexVideoIds.push(videoId);
|
|
98
100
|
}
|
|
99
|
-
if (
|
|
100
|
-
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp);
|
|
101
|
+
if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
|
|
102
|
+
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
|
|
101
103
|
filterComplexAudioIds.push(audioId);
|
|
102
104
|
ffmpegContext.filterComplex.push(audioPart);
|
|
103
105
|
}
|
package/lib/actions/pdf.js
CHANGED
|
@@ -102,19 +102,20 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
|
|
|
102
102
|
const pos = (() => {
|
|
103
103
|
if (isLandscapeImage) {
|
|
104
104
|
const cellHeight = pageHeight / imagesPerPage - offset;
|
|
105
|
-
const { drawWidth, drawHeight } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
|
|
106
|
-
const x = offset;
|
|
105
|
+
const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, (pageWidth - offset) * handoutImageRatio, cellHeight - offset, origWidth, origHeight);
|
|
106
|
+
const x = offset + (containerWidth - drawWidth) / 2;
|
|
107
107
|
const y = pageHeight - (i + 1) * cellHeight + (cellHeight - drawHeight) * handoutImageRatio;
|
|
108
108
|
return {
|
|
109
109
|
x,
|
|
110
110
|
y,
|
|
111
111
|
width: drawWidth,
|
|
112
112
|
height: drawHeight,
|
|
113
|
+
containerWidth,
|
|
113
114
|
};
|
|
114
115
|
}
|
|
115
116
|
else {
|
|
116
117
|
const cellWidth = pageWidth / imagesPerPage;
|
|
117
|
-
const { drawWidth, drawHeight } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
|
|
118
|
+
const { drawWidth, drawHeight, containerWidth } = drawSize(fitWidth, cellWidth - offset, (pageHeight - offset) * handoutImageRatio, origWidth, origHeight);
|
|
118
119
|
const x = pageWidth - (imagesPerPage - i) * cellWidth + (cellWidth - drawWidth) * handoutImageRatio;
|
|
119
120
|
const y = pageHeight - drawHeight - offset;
|
|
120
121
|
return {
|
|
@@ -122,6 +123,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
|
|
|
122
123
|
y,
|
|
123
124
|
width: drawWidth,
|
|
124
125
|
height: drawHeight,
|
|
126
|
+
containerWidth,
|
|
125
127
|
};
|
|
126
128
|
}
|
|
127
129
|
})();
|
|
@@ -136,7 +138,7 @@ const pdfHandout = async (pageWidth, pageHeight, imagePaths, texts, pdfDoc, font
|
|
|
136
138
|
for (const [index, line] of lines.entries()) {
|
|
137
139
|
page.drawText(line, {
|
|
138
140
|
...pos,
|
|
139
|
-
x:
|
|
141
|
+
x: offset + pos.containerWidth + textMargin,
|
|
140
142
|
y: pos.y + pos.height - fontSize - (fontSize + 2) * index,
|
|
141
143
|
size: fontSize,
|
|
142
144
|
font,
|
package/lib/actions/translate.js
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { GraphAI, assert } from "graphai";
|
|
3
|
-
import
|
|
3
|
+
import vanillaAgents from "@graphai/vanilla";
|
|
4
4
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
5
5
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
6
6
|
import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
|
|
7
7
|
import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
8
8
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
9
9
|
import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
|
|
10
|
-
const { default: __, ...vanillaAgents } = agents;
|
|
10
|
+
// const { default: __, ...vanillaAgents } = agents;
|
|
11
11
|
const translateGraph = {
|
|
12
12
|
version: 0.5,
|
|
13
13
|
nodes: {
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import
|
|
1
|
+
import fs from "fs";
|
|
2
|
+
import OpenAI, { toFile } from "openai";
|
|
2
3
|
// https://platform.openai.com/docs/guides/image-generation
|
|
3
4
|
export const imageOpenaiAgent = async ({ namedInputs, params }) => {
|
|
4
5
|
const { prompt } = namedInputs;
|
|
5
|
-
const { apiKey, model, size, moderation } = params;
|
|
6
|
+
const { apiKey, model, size, moderation, images } = params;
|
|
6
7
|
const openai = new OpenAI({ apiKey });
|
|
7
8
|
const imageOptions = {
|
|
8
9
|
model: model ?? "dall-e-3",
|
|
@@ -13,7 +14,18 @@ export const imageOpenaiAgent = async ({ namedInputs, params }) => {
|
|
|
13
14
|
if (model === "gpt-image-1") {
|
|
14
15
|
imageOptions.moderation = moderation || "auto";
|
|
15
16
|
}
|
|
16
|
-
const response = await
|
|
17
|
+
const response = await (async () => {
|
|
18
|
+
const targetSize = imageOptions.size;
|
|
19
|
+
if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
|
|
20
|
+
const imagelist = await Promise.all((images ?? []).map(async (file) => await toFile(fs.createReadStream(file), null, {
|
|
21
|
+
type: "image/png", // TODO: Support JPEG as well
|
|
22
|
+
})));
|
|
23
|
+
return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
return await openai.images.generate(imageOptions);
|
|
27
|
+
}
|
|
28
|
+
})();
|
|
17
29
|
if (!response.data) {
|
|
18
30
|
throw new Error(`response.data is undefined: ${response}`);
|
|
19
31
|
}
|
package/lib/cli/bin.js
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
import "dotenv/config";
|
|
3
3
|
import yargs from "yargs/yargs";
|
|
4
4
|
import { hideBin } from "yargs/helpers";
|
|
5
|
+
import { readFileSync } from "fs";
|
|
6
|
+
import { fileURLToPath } from "url";
|
|
7
|
+
import { dirname, join } from "path";
|
|
5
8
|
import * as translateCmd from "./commands/translate/index.js";
|
|
6
9
|
import * as audioCmd from "./commands/audio/index.js";
|
|
7
10
|
import * as imagesCmd from "./commands/image/index.js";
|
|
@@ -9,9 +12,13 @@ import * as movieCmd from "./commands/movie/index.js";
|
|
|
9
12
|
import * as pdfCmd from "./commands/pdf/index.js";
|
|
10
13
|
import * as toolCmd from "./commands/tool/index.js";
|
|
11
14
|
import { GraphAILogger } from "graphai";
|
|
15
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
16
|
+
const __dirname = dirname(__filename);
|
|
17
|
+
const packageJson = JSON.parse(readFileSync(join(__dirname, "../../package.json"), "utf8"));
|
|
12
18
|
export const main = async () => {
|
|
13
19
|
const cli = yargs(hideBin(process.argv))
|
|
14
20
|
.scriptName("mulmo")
|
|
21
|
+
.version(packageJson.version)
|
|
15
22
|
.usage("$0 <command> [options]")
|
|
16
23
|
.option("v", {
|
|
17
24
|
alias: "verbose",
|
package/lib/cli/helpers.js
CHANGED
|
@@ -2,7 +2,7 @@ import { GraphAILogger } from "graphai";
|
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import path from "path";
|
|
4
4
|
import clipboardy from "clipboardy";
|
|
5
|
-
import { getBaseDirPath, getFullPath, readMulmoScriptFile, fetchMulmoScriptFile, getOutputStudioFilePath, resolveDirPath } from "../utils/file.js";
|
|
5
|
+
import { getBaseDirPath, getFullPath, readMulmoScriptFile, fetchMulmoScriptFile, getOutputStudioFilePath, resolveDirPath, mkdir } from "../utils/file.js";
|
|
6
6
|
import { isHttp } from "../utils/utils.js";
|
|
7
7
|
import { createOrUpdateStudioData } from "../utils/preprocess.js";
|
|
8
8
|
import { outDirName, imageDirName, audioDirName } from "../utils/const.js";
|
|
@@ -33,6 +33,7 @@ export const getFileObject = (args) => {
|
|
|
33
33
|
const fileName = `script_${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
|
|
34
34
|
const clipboardText = clipboardy.readSync();
|
|
35
35
|
const fileOrUrl = resolveDirPath(outDirPath, `${fileName}.json`);
|
|
36
|
+
mkdir(outDirPath);
|
|
36
37
|
fs.writeFileSync(fileOrUrl, clipboardText, "utf8");
|
|
37
38
|
return { fileOrUrl, fileName };
|
|
38
39
|
}
|
|
@@ -4,7 +4,7 @@ import { openAIAgent } from "@graphai/openai_agent";
|
|
|
4
4
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
5
5
|
import { geminiAgent } from "@graphai/gemini_agent";
|
|
6
6
|
import { groqAgent } from "@graphai/groq_agent";
|
|
7
|
-
import
|
|
7
|
+
import vanillaAgents from "@graphai/vanilla";
|
|
8
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
9
|
import { browserlessAgent } from "@graphai/browserless_agent";
|
|
10
10
|
import validateSchemaAgent from "../agents/validate_schema_agent.js";
|
|
@@ -14,7 +14,7 @@ import { mulmoScriptSchema, urlsSchema } from "../types/schema.js";
|
|
|
14
14
|
import { cliLoadingPlugin } from "../utils/plugins.js";
|
|
15
15
|
import { graphDataScriptFromUrlPrompt } from "../utils/prompt.js";
|
|
16
16
|
import { llmPair } from "../utils/utils.js";
|
|
17
|
-
const { default: __, ...vanillaAgents } = agents;
|
|
17
|
+
// const { default: __, ...vanillaAgents } = agents;
|
|
18
18
|
const graphData = {
|
|
19
19
|
version: 0.5,
|
|
20
20
|
// Execute sequentially because the free version of browserless API doesn't support concurrent execution.
|
|
@@ -6,7 +6,7 @@ import { openAIAgent } from "@graphai/openai_agent";
|
|
|
6
6
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
7
7
|
import { geminiAgent } from "@graphai/gemini_agent";
|
|
8
8
|
import { groqAgent } from "@graphai/groq_agent";
|
|
9
|
-
import
|
|
9
|
+
import vanillaAgents from "@graphai/vanilla";
|
|
10
10
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
11
|
import { readTemplatePrompt, mkdir } from "../utils/file.js";
|
|
12
12
|
import { browserlessCacheGenerator } from "../utils/filters.js";
|
|
@@ -16,7 +16,7 @@ import validateSchemaAgent from "../agents/validate_schema_agent.js";
|
|
|
16
16
|
import { llmPair } from "../utils/utils.js";
|
|
17
17
|
import { interactiveClarificationPrompt, prefixPrompt } from "../utils/prompt.js";
|
|
18
18
|
// import { cliLoadingPlugin } from "../utils/plugins.js";
|
|
19
|
-
const { default: __, ...vanillaAgents } = agents;
|
|
19
|
+
// const { default: __, ...vanillaAgents } = agents;
|
|
20
20
|
const agentHeader = "\x1b[34m● \x1b[0m\x1b[1mAgent\x1b[0m:\x1b[0m";
|
|
21
21
|
const graphDataForScraping = {
|
|
22
22
|
version: 0.5,
|
|
@@ -5,14 +5,14 @@ import { openAIAgent } from "@graphai/openai_agent";
|
|
|
5
5
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
6
6
|
import { geminiAgent } from "@graphai/gemini_agent";
|
|
7
7
|
import { groqAgent } from "@graphai/groq_agent";
|
|
8
|
-
import
|
|
8
|
+
import vanillaAgents from "@graphai/vanilla";
|
|
9
9
|
import { graphDataScriptGeneratePrompt, sceneToBeatsPrompt, storyToScriptInfoPrompt, storyToScriptPrompt } from "../utils/prompt.js";
|
|
10
10
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
11
|
import validateSchemaAgent from "../agents/validate_schema_agent.js";
|
|
12
12
|
import { llmPair } from "../utils/utils.js";
|
|
13
13
|
import { storyToScriptGenerateMode } from "../utils/const.js";
|
|
14
14
|
import { cliLoadingPlugin } from "../utils/plugins.js";
|
|
15
|
-
const { default: __, ...vanillaAgents } = agents;
|
|
15
|
+
// const { default: __, ...vanillaAgents } = agents;
|
|
16
16
|
const createValidatedScriptGraphData = ({ systemPrompt, prompt, schema, llmAgent, llmModel, maxTokens, }) => {
|
|
17
17
|
return {
|
|
18
18
|
loop: {
|