mulmocast 1.1.11 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/akira_comic.json +1 -1
- package/assets/templates/ani.json +1 -1
- package/assets/templates/business.json +1 -1
- package/assets/templates/characters.json +1 -1
- package/assets/templates/children_book.json +1 -1
- package/assets/templates/coding.json +1 -1
- package/assets/templates/comic_strips.json +1 -1
- package/assets/templates/drslump_comic.json +1 -1
- package/assets/templates/ghibli_comic.json +1 -1
- package/assets/templates/ghibli_image_only.json +1 -1
- package/assets/templates/ghibli_shorts.json +1 -1
- package/assets/templates/ghost_comic.json +1 -1
- package/assets/templates/html.json +12 -1
- package/assets/templates/onepiece_comic.json +1 -1
- package/assets/templates/portrait_movie.json +1 -1
- package/assets/templates/realistic_movie.json +1 -1
- package/assets/templates/sensei_and_taro.json +1 -1
- package/assets/templates/shorts.json +1 -1
- package/assets/templates/trailer.json +1 -1
- package/lib/actions/image_agents.d.ts +1 -1
- package/lib/actions/image_references.js +2 -2
- package/lib/actions/images.js +6 -30
- package/lib/actions/movie.js +2 -2
- package/lib/agents/add_bgm_agent.js +2 -1
- package/lib/agents/image_genai_agent.d.ts +5 -0
- package/lib/agents/image_genai_agent.js +52 -0
- package/lib/agents/image_openai_agent.js +1 -0
- package/lib/agents/index.d.ts +3 -3
- package/lib/agents/index.js +3 -3
- package/lib/agents/movie_genai_agent.d.ts +9 -0
- package/lib/agents/movie_genai_agent.js +86 -0
- package/lib/cli/commands/tool/prompt/handler.js +1 -1
- package/lib/cli/helpers.js +3 -1
- package/lib/methods/mulmo_beat.js +1 -1
- package/lib/methods/mulmo_presentation_style.js +1 -1
- package/lib/methods/mulmo_studio_context.d.ts +1 -0
- package/lib/methods/mulmo_studio_context.js +7 -0
- package/lib/tools/dump_prompt.d.ts +2 -1
- package/lib/tools/dump_prompt.js +3 -2
- package/lib/types/agent.d.ts +8 -1
- package/lib/types/schema.d.ts +33 -33
- package/lib/types/schema.js +2 -2
- package/lib/utils/context.d.ts +5 -5
- package/lib/utils/filters.js +7 -1
- package/lib/utils/inquirer.js +15 -22
- package/lib/utils/prompt.d.ts +1 -1
- package/lib/utils/prompt.js +1 -1
- package/lib/utils/provider2agent.d.ts +1 -1
- package/lib/utils/provider2agent.js +5 -5
- package/lib/utils/utils.d.ts +0 -1
- package/lib/utils/utils.js +4 -7
- package/package.json +9 -8
- package/scripts/test/test_genai.json +47 -0
- package/assets/templates/ani_ja.json +0 -44
- package/assets/templates/podcast_standard.json +0 -5
- package/assets/templates/text_and_image.json +0 -6
- package/assets/templates/text_only.json +0 -6
- package/scripts/test/test_hello_bgm_0.json +0 -21
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Akira style",
|
|
3
3
|
"description": "Template for Akira style comic presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Presentation with Ani",
|
|
3
3
|
"description": "Template for presentation with Ani.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "言葉づかいは思いっきりツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Business presentation",
|
|
3
3
|
"description": "Template for business presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Use textSlides, markdown, mermaid, or chart to show slides. Extract image links in the article (from <img> tag) to reuse them in the presentation. Mention the reference in one of beats, if it exists. Use the JSON below as a template. chartData is the data for Chart.js",
|
|
5
5
|
"scriptName": "business.json"
|
|
6
6
|
}
|
|
@@ -11,6 +11,6 @@
|
|
|
11
11
|
"height": 1024
|
|
12
12
|
}
|
|
13
13
|
},
|
|
14
|
-
"systemPrompt": "
|
|
14
|
+
"systemPrompt": "Use multiple characters. Generate image prompts for each character, and make references to them in the beats. Use the JSON below as a template.",
|
|
15
15
|
"scriptName": "image_refs.json"
|
|
16
16
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Children Book",
|
|
3
3
|
"description": "Template for children book.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "This script is for a children book. Each page (=beat) must haven an image prompt appropriate for the text.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Coding presentation",
|
|
3
3
|
"description": "Template for software and coding presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Use markdown with a code block to show some code on a slide. Avoid long coding examples, which may not fit in a single slide. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"scriptName": "coding.json"
|
|
6
6
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "American Comic Strips",
|
|
3
3
|
"description": "Template for Dilbert-style comic strips.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Dr. Slump Style",
|
|
3
3
|
"description": "Template for Dr. Slump style comic presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Ghibli comic style",
|
|
3
3
|
"description": "Template for Ghibli-style comic presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Ghibli comic image-only",
|
|
3
3
|
"description": "Template for Ghibli-style image-only comic presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Ghibli style for YouTube Shorts",
|
|
3
3
|
"description": "Template for Ghibli-style comic presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "This script is for YouTube shorts. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Ghost in the shell style",
|
|
3
3
|
"description": "Template for Ghost in the shell style comic presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Business presentation in HTML",
|
|
3
3
|
"description": "Template for business presentation in HTML.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another LLM will generate actual slides from the prompt and data for each beat. Adding optional data would help it to generate more compelling slide. Mention the reference in one of beats, if it exists. The valid type of reference is 'article', 'paper', 'image', 'video', 'audio'. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.1",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"lang": "en",
|
|
11
|
+
"canvasSize": {
|
|
12
|
+
"width": 1536,
|
|
13
|
+
"height": 1024
|
|
14
|
+
}
|
|
15
|
+
},
|
|
5
16
|
"scriptName": "html.json"
|
|
6
17
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "One Piece style",
|
|
3
3
|
"description": "Template for One Piece style comic presentation.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Photo realistic movie (portrait)",
|
|
3
3
|
"description": "Template for photo realistic movie in portrait mode.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Photo realistic movie template",
|
|
3
3
|
"description": "Template for photo realistic movie.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Student and Teacher",
|
|
3
3
|
"description": "Interactive discussion between a student and teacher",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "全てを高校生にも分かるように、太郎くん(Student)と先生(Teacher)の会話、という形の台本にして。ただし要点はしっかりと押さえて。以下に別のトピックに関するサンプルを貼り付けます。このJSONフォーマットに従って。",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Short movie template",
|
|
3
3
|
"description": "Template for Youtube shorts.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "This script is for YouTube shorts. The first beat should be a hook, which describes the topic. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"title": "Movie Trailer template",
|
|
3
3
|
"description": "Template for A Movie Trailer.",
|
|
4
|
-
"systemPrompt": "
|
|
4
|
+
"systemPrompt": "This script is for a movie trailer. Another AI will generate images for each beat based on the image prompt of that beat. Movie prompts must be written in English.",
|
|
5
5
|
"presentationStyle": {
|
|
6
6
|
"$mulmocast": {
|
|
7
7
|
"version": "1.1"
|
|
@@ -8,7 +8,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
8
8
|
imagePath: string;
|
|
9
9
|
htmlPrompt: string | undefined;
|
|
10
10
|
htmlPath: string;
|
|
11
|
-
htmlImageSystemPrompt: string
|
|
11
|
+
htmlImageSystemPrompt: string;
|
|
12
12
|
} | {
|
|
13
13
|
imagePath: string | undefined;
|
|
14
14
|
referenceImageForMovie: string | undefined;
|
|
@@ -4,7 +4,7 @@ import { getReferenceImagePath } from "../utils/file.js";
|
|
|
4
4
|
import { getExtention } from "../utils/utils.js";
|
|
5
5
|
import { graphOption } from "./images.js";
|
|
6
6
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
7
|
-
import {
|
|
7
|
+
import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent } from "../agents/index.js";
|
|
8
8
|
// public api
|
|
9
9
|
// Application may call this function directly to generate reference image.
|
|
10
10
|
export const generateReferenceImage = async (inputs) => {
|
|
@@ -39,7 +39,7 @@ export const generateReferenceImage = async (inputs) => {
|
|
|
39
39
|
},
|
|
40
40
|
};
|
|
41
41
|
const options = await graphOption(context);
|
|
42
|
-
const graph = new GraphAI(image_graph_data, {
|
|
42
|
+
const graph = new GraphAI(image_graph_data, { imageGenAIAgent, imageOpenaiAgent, mediaMockAgent }, options);
|
|
43
43
|
await graph.run();
|
|
44
44
|
return imagePath;
|
|
45
45
|
};
|
package/lib/actions/images.js
CHANGED
|
@@ -1,26 +1,25 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger, TaskManager } from "graphai";
|
|
4
|
-
import { GoogleAuth } from "google-auth-library";
|
|
5
4
|
import * as vanilla from "@graphai/vanilla";
|
|
6
5
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
6
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
8
7
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
|
-
import {
|
|
8
|
+
import { imageGenAIAgent, imageOpenaiAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, } from "../agents/index.js";
|
|
10
9
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
10
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
11
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
-
import {
|
|
12
|
+
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
14
13
|
import { extractImageFromMovie, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
15
14
|
import { getImageRefs } from "./image_references.js";
|
|
16
15
|
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
16
|
const vanillaAgents = vanilla.default ?? vanilla;
|
|
18
17
|
const imageAgents = {
|
|
19
|
-
|
|
18
|
+
imageGenAIAgent,
|
|
20
19
|
imageOpenaiAgent,
|
|
21
20
|
};
|
|
22
21
|
const movieAgents = {
|
|
23
|
-
|
|
22
|
+
movieGenAIAgent,
|
|
24
23
|
movieReplicateAgent,
|
|
25
24
|
mediaMockAgent,
|
|
26
25
|
};
|
|
@@ -151,6 +150,7 @@ const beat_graph_data = {
|
|
|
151
150
|
onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
|
|
152
151
|
prompt: ":beat.moviePrompt",
|
|
153
152
|
imagePath: ":preprocessor.referenceImageForMovie",
|
|
153
|
+
movieFile: ":preprocessor.movieFile", // for google genai agent
|
|
154
154
|
cache: {
|
|
155
155
|
force: [":context.force", ":forceMovie"],
|
|
156
156
|
file: ":preprocessor.movieFile",
|
|
@@ -331,20 +331,6 @@ const graph_data = {
|
|
|
331
331
|
},
|
|
332
332
|
},
|
|
333
333
|
};
|
|
334
|
-
const googleAuth = async () => {
|
|
335
|
-
try {
|
|
336
|
-
const auth = new GoogleAuth({
|
|
337
|
-
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
338
|
-
});
|
|
339
|
-
const client = await auth.getClient();
|
|
340
|
-
const accessToken = await client.getAccessToken();
|
|
341
|
-
return accessToken.token;
|
|
342
|
-
}
|
|
343
|
-
catch (error) {
|
|
344
|
-
GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
|
|
345
|
-
throw error;
|
|
346
|
-
}
|
|
347
|
-
};
|
|
348
334
|
export const graphOption = async (context, settings) => {
|
|
349
335
|
const options = {
|
|
350
336
|
agentFilters: [
|
|
@@ -356,17 +342,7 @@ export const graphOption = async (context, settings) => {
|
|
|
356
342
|
],
|
|
357
343
|
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
358
344
|
};
|
|
359
|
-
|
|
360
|
-
const config = settings2GraphAIConfig(settings, process.env);
|
|
361
|
-
// We need to get google's auth token only if the google is the text2image provider.
|
|
362
|
-
if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
363
|
-
userAssert(!!config.movieGoogleAgent || !!config.imageGoogleAgent, "GOOGLE_PROJECT_ID is not set");
|
|
364
|
-
GraphAILogger.log("google was specified as text2image engine");
|
|
365
|
-
const token = await googleAuth();
|
|
366
|
-
config["imageGoogleAgent"].token = token;
|
|
367
|
-
config["movieGoogleAgent"].token = token;
|
|
368
|
-
}
|
|
369
|
-
options.config = config;
|
|
345
|
+
options.config = settings2GraphAIConfig(settings, process.env);
|
|
370
346
|
return options;
|
|
371
347
|
};
|
|
372
348
|
const prepareGenerateImages = async (context) => {
|
package/lib/actions/movie.js
CHANGED
|
@@ -80,7 +80,7 @@ const getOutputOption = (audioId, videoId) => {
|
|
|
80
80
|
const addCaptions = (ffmpegContext, concatVideoId, context, caption) => {
|
|
81
81
|
const beatsWithCaptions = context.studio.beats.filter(({ captionFile }) => captionFile);
|
|
82
82
|
if (caption && beatsWithCaptions.length > 0) {
|
|
83
|
-
const introPadding = context
|
|
83
|
+
const introPadding = MulmoStudioContextMethods.getIntroPadding(context);
|
|
84
84
|
return beatsWithCaptions.reduce((acc, beat, index) => {
|
|
85
85
|
const { startAt, duration, captionFile } = beat;
|
|
86
86
|
if (startAt !== undefined && duration !== undefined && captionFile !== undefined) {
|
|
@@ -168,7 +168,7 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
|
168
168
|
const extraPadding = (() => {
|
|
169
169
|
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
170
170
|
if (index === 0) {
|
|
171
|
-
return context
|
|
171
|
+
return MulmoStudioContextMethods.getIntroPadding(context);
|
|
172
172
|
}
|
|
173
173
|
else if (index === context.studio.beats.length - 1) {
|
|
174
174
|
return context.presentationStyle.audioParams.outroPadding;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
2
|
import { GraphAILogger } from "graphai";
|
|
3
3
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextGenerateOutput, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
4
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
4
5
|
const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
5
6
|
const { voiceFile, outputFile, context } = namedInputs;
|
|
6
7
|
const { musicFile } = params;
|
|
@@ -11,7 +12,7 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
|
|
|
11
12
|
throw new Error(`AddBGMAgent musicFile not exist: ${musicFile}`);
|
|
12
13
|
}
|
|
13
14
|
const { duration: speechDuration } = await ffmpegGetMediaDuration(voiceFile);
|
|
14
|
-
const introPadding = context
|
|
15
|
+
const introPadding = MulmoStudioContextMethods.getIntroPadding(context);
|
|
15
16
|
const outroPadding = context.presentationStyle.audioParams.outroPadding;
|
|
16
17
|
const totalDuration = speechDuration + introPadding + outroPadding;
|
|
17
18
|
GraphAILogger.log("totalDucation:", speechDuration, totalDuration);
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GenAIImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const imageGenAIAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GenAIImageAgentConfig>;
|
|
4
|
+
declare const imageGenAIAgentInfo: AgentFunctionInfo;
|
|
5
|
+
export default imageGenAIAgentInfo;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { GraphAILogger } from "graphai";
|
|
2
|
+
import { getAspectRatio } from "./movie_google_agent.js";
|
|
3
|
+
import { provider2ImageAgent } from "../utils/provider2agent.js";
|
|
4
|
+
import { GoogleGenAI, PersonGeneration } from "@google/genai";
|
|
5
|
+
export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
|
|
6
|
+
const { prompt } = namedInputs;
|
|
7
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
8
|
+
const model = params.model ?? provider2ImageAgent["google"].defaultModel;
|
|
9
|
+
const apiKey = config?.apiKey;
|
|
10
|
+
if (!apiKey) {
|
|
11
|
+
throw new Error("API key is required for Google GenAI agent");
|
|
12
|
+
}
|
|
13
|
+
try {
|
|
14
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
15
|
+
const response = await ai.models.generateImages({
|
|
16
|
+
model,
|
|
17
|
+
prompt,
|
|
18
|
+
config: {
|
|
19
|
+
numberOfImages: 1, // default is 4!
|
|
20
|
+
aspectRatio,
|
|
21
|
+
personGeneration: PersonGeneration.ALLOW_ALL,
|
|
22
|
+
// safetyFilterLevel: SafetyFilterLevel.BLOCK_ONLY_HIGH,
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
if (!response.generatedImages || response.generatedImages.length === 0) {
|
|
26
|
+
throw new Error("ERROR: generateImage returned no generated images");
|
|
27
|
+
}
|
|
28
|
+
const image = response.generatedImages[0].image;
|
|
29
|
+
if (image && image.imageBytes) {
|
|
30
|
+
return { buffer: Buffer.from(image.imageBytes, "base64") };
|
|
31
|
+
}
|
|
32
|
+
throw new Error("ERROR: generateImage returned no image bytes");
|
|
33
|
+
}
|
|
34
|
+
catch (error) {
|
|
35
|
+
GraphAILogger.info("Failed to generate image:", error);
|
|
36
|
+
throw error;
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
const imageGenAIAgentInfo = {
|
|
40
|
+
name: "imageGenAIAgent",
|
|
41
|
+
agent: imageGenAIAgent,
|
|
42
|
+
mock: imageGenAIAgent,
|
|
43
|
+
samples: [],
|
|
44
|
+
description: "Google Image agent",
|
|
45
|
+
category: ["image"],
|
|
46
|
+
author: "Receptron Team",
|
|
47
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
48
|
+
// source: "https://github.com/receptron/mulmocast-cli/blob/main/src/agents/image_google_agent.ts",
|
|
49
|
+
license: "MIT",
|
|
50
|
+
environmentVariables: [],
|
|
51
|
+
};
|
|
52
|
+
export default imageGenAIAgentInfo;
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import addBGMAgent from "./add_bgm_agent.js";
|
|
2
2
|
import combineAudioFilesAgent from "./combine_audio_files_agent.js";
|
|
3
|
-
import
|
|
3
|
+
import imageGenAIAgent from "./image_genai_agent.js";
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
5
|
import tavilySearchAgent from "./tavily_agent.js";
|
|
6
|
-
import
|
|
6
|
+
import movieGenAIAgent from "./movie_genai_agent.js";
|
|
7
7
|
import movieReplicateAgent from "./movie_replicate_agent.js";
|
|
8
8
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
9
9
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
@@ -17,4 +17,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
|
|
|
17
17
|
import { textInputAgent } from "@graphai/input_agents";
|
|
18
18
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
19
19
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
20
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent,
|
|
20
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import addBGMAgent from "./add_bgm_agent.js";
|
|
2
2
|
import combineAudioFilesAgent from "./combine_audio_files_agent.js";
|
|
3
|
-
import
|
|
3
|
+
import imageGenAIAgent from "./image_genai_agent.js";
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
5
|
import tavilySearchAgent from "./tavily_agent.js";
|
|
6
|
-
import
|
|
6
|
+
import movieGenAIAgent from "./movie_genai_agent.js";
|
|
7
7
|
import movieReplicateAgent from "./movie_replicate_agent.js";
|
|
8
8
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
9
9
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
@@ -18,4 +18,4 @@ import { textInputAgent } from "@graphai/input_agents";
|
|
|
18
18
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
19
19
|
// import * as vanilla from "@graphai/vanilla";
|
|
20
20
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
21
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent,
|
|
21
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, GenAIImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
|
|
3
|
+
export declare const getAspectRatio: (canvasSize: {
|
|
4
|
+
width: number;
|
|
5
|
+
height: number;
|
|
6
|
+
}) => string;
|
|
7
|
+
export declare const movieGenAIAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GenAIImageAgentConfig>;
|
|
8
|
+
declare const movieGenAIAgentInfo: AgentFunctionInfo;
|
|
9
|
+
export default movieGenAIAgentInfo;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { GraphAILogger, sleep } from "graphai";
|
|
3
|
+
import { GoogleGenAI, PersonGeneration } from "@google/genai";
|
|
4
|
+
export const getAspectRatio = (canvasSize) => {
|
|
5
|
+
if (canvasSize.width > canvasSize.height) {
|
|
6
|
+
return "16:9";
|
|
7
|
+
}
|
|
8
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
9
|
+
return "9:16";
|
|
10
|
+
}
|
|
11
|
+
else {
|
|
12
|
+
return "1:1";
|
|
13
|
+
}
|
|
14
|
+
};
|
|
15
|
+
export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
|
|
16
|
+
const { prompt, imagePath, movieFile } = namedInputs;
|
|
17
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
18
|
+
const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
|
|
19
|
+
const duration = params.duration ?? 8;
|
|
20
|
+
const apiKey = config?.apiKey;
|
|
21
|
+
if (!apiKey) {
|
|
22
|
+
throw new Error("API key is required for Google GenAI agent");
|
|
23
|
+
}
|
|
24
|
+
try {
|
|
25
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
26
|
+
const payload = {
|
|
27
|
+
model,
|
|
28
|
+
prompt,
|
|
29
|
+
config: {
|
|
30
|
+
durationSeconds: duration,
|
|
31
|
+
aspectRatio,
|
|
32
|
+
personGeneration: undefined,
|
|
33
|
+
},
|
|
34
|
+
image: undefined,
|
|
35
|
+
};
|
|
36
|
+
if (imagePath) {
|
|
37
|
+
const buffer = readFileSync(imagePath);
|
|
38
|
+
const imageBytes = buffer.toString("base64");
|
|
39
|
+
payload.image = {
|
|
40
|
+
imageBytes,
|
|
41
|
+
mimeType: "image/png",
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
payload.config.personGeneration = PersonGeneration.ALLOW_ALL;
|
|
46
|
+
}
|
|
47
|
+
const operation = await ai.models.generateVideos(payload);
|
|
48
|
+
const response = { operation };
|
|
49
|
+
// Poll the operation status until the video is ready.
|
|
50
|
+
while (!response.operation.done) {
|
|
51
|
+
await sleep(5000);
|
|
52
|
+
response.operation = await ai.operations.getVideosOperation(response);
|
|
53
|
+
}
|
|
54
|
+
if (!response.operation.response?.generatedVideos) {
|
|
55
|
+
throw new Error(`No video: ${JSON.stringify(response.operation, null, 2)}`);
|
|
56
|
+
}
|
|
57
|
+
const video = response.operation.response.generatedVideos[0].video;
|
|
58
|
+
if (!video) {
|
|
59
|
+
throw new Error(`No video: ${JSON.stringify(response.operation, null, 2)}`);
|
|
60
|
+
}
|
|
61
|
+
await ai.files.download({
|
|
62
|
+
file: video,
|
|
63
|
+
downloadPath: movieFile,
|
|
64
|
+
});
|
|
65
|
+
await sleep(5000); // HACK: Without this, the file is not ready yet.
|
|
66
|
+
return { saved: movieFile };
|
|
67
|
+
}
|
|
68
|
+
catch (error) {
|
|
69
|
+
GraphAILogger.info("Failed to generate movie:", error.message);
|
|
70
|
+
throw error;
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
const movieGenAIAgentInfo = {
|
|
74
|
+
name: "movieGenAIAgent",
|
|
75
|
+
agent: movieGenAIAgent,
|
|
76
|
+
mock: movieGenAIAgent,
|
|
77
|
+
samples: [],
|
|
78
|
+
description: "Google Movie agent",
|
|
79
|
+
category: ["movie"],
|
|
80
|
+
author: "Receptron Team",
|
|
81
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
82
|
+
// source: "https://github.com/receptron/mulmocast-cli/blob/main/src/agents/image_google_agent.ts",
|
|
83
|
+
license: "MIT",
|
|
84
|
+
environmentVariables: [],
|
|
85
|
+
};
|
|
86
|
+
export default movieGenAIAgentInfo;
|
package/lib/cli/helpers.js
CHANGED
|
@@ -37,9 +37,11 @@ export const getFileObject = (args) => {
|
|
|
37
37
|
// We generate a new unique script file from clipboard text in the output directory
|
|
38
38
|
const generatedFileName = generateTimestampedFileName("script");
|
|
39
39
|
const clipboardText = clipboardy.readSync();
|
|
40
|
+
const json = JSON.parse(clipboardText);
|
|
41
|
+
const formattedText = JSON.stringify(json, null, 2);
|
|
40
42
|
const resolvedFilePath = resolveDirPath(outDirPath, `${generatedFileName}.json`);
|
|
41
43
|
mkdir(outDirPath);
|
|
42
|
-
fs.writeFileSync(resolvedFilePath,
|
|
44
|
+
fs.writeFileSync(resolvedFilePath, formattedText, "utf8");
|
|
43
45
|
return { fileOrUrl: resolvedFilePath, fileName: generatedFileName };
|
|
44
46
|
}
|
|
45
47
|
const resolvedFileOrUrl = file ?? "";
|
|
@@ -2,7 +2,7 @@ import { findImagePlugin } from "../utils/image_plugins/index.js";
|
|
|
2
2
|
export const MulmoBeatMethods = {
|
|
3
3
|
getHtmlPrompt(beat) {
|
|
4
4
|
if (beat?.htmlPrompt?.data) {
|
|
5
|
-
return beat.htmlPrompt.prompt + "\n\n
|
|
5
|
+
return beat.htmlPrompt.prompt + "\n\n[data]\n" + JSON.stringify(beat.htmlPrompt.data, null, 2);
|
|
6
6
|
}
|
|
7
7
|
return beat?.htmlPrompt?.prompt;
|
|
8
8
|
},
|
|
@@ -71,7 +71,7 @@ export const MulmoPresentationStyleMethods = {
|
|
|
71
71
|
// Notice that we copy imageParams from presentationStyle and update
|
|
72
72
|
// provider and model appropriately.
|
|
73
73
|
const imageParams = { ...presentationStyle.imageParams, ...beat?.imageParams };
|
|
74
|
-
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(imageParams?.provider);
|
|
74
|
+
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(imageParams?.provider) ?? defaultProviders.text2image;
|
|
75
75
|
const agentInfo = provider2ImageAgent[provider];
|
|
76
76
|
// The default text2image model is gpt-image-1 from OpenAI, and to use it you must have an OpenAI account and have verified your identity. If this is not possible, please specify dall-e-3 as the model.
|
|
77
77
|
const defaultImageParams = {
|
|
@@ -12,4 +12,5 @@ export declare const MulmoStudioContextMethods: {
|
|
|
12
12
|
setSessionState(context: MulmoStudioContext, sessionType: SessionType, value: boolean): void;
|
|
13
13
|
setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType, index: number, value: boolean): void;
|
|
14
14
|
needTranslate(context: MulmoStudioContext, includeCaption?: boolean): boolean | "" | undefined;
|
|
15
|
+
getIntroPadding(context: MulmoStudioContext): number;
|
|
15
16
|
};
|
|
@@ -71,4 +71,11 @@ export const MulmoStudioContextMethods = {
|
|
|
71
71
|
}
|
|
72
72
|
return context.studio.script.lang !== context.lang;
|
|
73
73
|
},
|
|
74
|
+
getIntroPadding(context) {
|
|
75
|
+
if (context.studio.script.beats[0].enableLipSync) {
|
|
76
|
+
// NOTE: We must set introPadding to 0 when enableLipSync is true. Otherwise, the lipsync will be out of sync.
|
|
77
|
+
return 0;
|
|
78
|
+
}
|
|
79
|
+
return context.presentationStyle.audioParams.introPadding;
|
|
80
|
+
},
|
|
74
81
|
};
|