mulmocast 1.1.10 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/assets/templates/html.json +11 -0
- package/lib/actions/audio.js +4 -20
- package/lib/actions/image_agents.d.ts +1 -1
- package/lib/actions/image_references.js +3 -2
- package/lib/actions/images.js +10 -30
- package/lib/agents/image_genai_agent.d.ts +5 -0
- package/lib/agents/image_genai_agent.js +52 -0
- package/lib/agents/index.d.ts +4 -3
- package/lib/agents/index.js +4 -3
- package/lib/agents/media_mock_agent.js +24 -1
- package/lib/agents/movie_genai_agent.d.ts +9 -0
- package/lib/agents/movie_genai_agent.js +86 -0
- package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
- package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
- package/lib/cli/helpers.js +3 -1
- package/lib/methods/mulmo_beat.js +1 -1
- package/lib/types/agent.d.ts +7 -1
- package/lib/types/schema.d.ts +33 -33
- package/lib/types/schema.js +2 -2
- package/lib/utils/context.d.ts +5 -5
- package/lib/utils/file.d.ts +1 -0
- package/lib/utils/file.js +1 -0
- package/lib/utils/filters.js +7 -1
- package/lib/utils/inquirer.js +15 -22
- package/lib/utils/prompt.d.ts +1 -1
- package/lib/utils/prompt.js +1 -1
- package/lib/utils/provider2agent.d.ts +22 -1
- package/lib/utils/provider2agent.js +35 -8
- package/lib/utils/string.js +1 -0
- package/lib/utils/utils.d.ts +1 -1
- package/lib/utils/utils.js +4 -4
- package/package.json +8 -6
- package/scripts/test/test_genai.json +47 -0
- package/scripts/test/test_hello.json +0 -3
- package/scripts/test/test_hello_bgm_0.json +21 -0
- package/scripts/test/test_hello_caption.json +38 -0
- package/scripts/test/test_hello_caption.json~ +21 -0
- package/scripts/test/test_hello_image.json +42 -0
- package/scripts/test/test_hello_image.json~ +18 -0
- package/scripts/test/test_hello_nobgm.json +21 -0
- package/scripts/test/test_media_mock.json +245 -0
- package/scripts/test/test_replicate.json +21 -1
|
@@ -2,5 +2,16 @@
|
|
|
2
2
|
"title": "Business presentation in HTML",
|
|
3
3
|
"description": "Template for business presentation in HTML.",
|
|
4
4
|
"systemPrompt": "Generate a script for a business presentation of the given topic. Another LLM will generate actual slides from the prompt and data for each beat. Adding optional data would help it to generate more compelling slide. Mention the reference in one of beats, if it exists. The valid type of reference is 'article', 'paper', 'image', 'video', 'audio'. Use the JSON below as a template.",
|
|
5
|
+
"presentationStyle": {
|
|
6
|
+
"$mulmocast": {
|
|
7
|
+
"version": "1.1",
|
|
8
|
+
"credit": "closing"
|
|
9
|
+
},
|
|
10
|
+
"lang": "en",
|
|
11
|
+
"canvasSize": {
|
|
12
|
+
"width": 1536,
|
|
13
|
+
"height": 1024
|
|
14
|
+
}
|
|
15
|
+
},
|
|
5
16
|
"scriptName": "html.json"
|
|
6
17
|
}
|
package/lib/actions/audio.js
CHANGED
|
@@ -1,19 +1,14 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import { GraphAI, TaskManager } from "graphai";
|
|
3
3
|
import * as agents from "@graphai/vanilla";
|
|
4
|
-
import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
|
|
5
|
-
import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
6
|
-
import combineAudioFilesAgent from "../agents/combine_audio_files_agent.js";
|
|
7
|
-
import ttsOpenaiAgent from "../agents/tts_openai_agent.js";
|
|
8
|
-
import ttsGoogleAgent from "../agents/tts_google_agent.js";
|
|
9
|
-
import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
|
|
10
4
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
11
|
-
import {
|
|
5
|
+
import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsElevenlabsAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent } from "../agents/index.js";
|
|
12
6
|
import { text2SpeechProviderSchema } from "../types/index.js";
|
|
13
7
|
import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
|
|
14
8
|
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
15
9
|
import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
|
|
16
10
|
import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
11
|
+
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
17
12
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
18
13
|
import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
|
|
19
14
|
const vanillaAgents = agents.default ?? agents;
|
|
@@ -85,6 +80,7 @@ const graph_tts = {
|
|
|
85
80
|
if: ":preprocessor.needsTTS",
|
|
86
81
|
agent: ":preprocessor.ttsAgent",
|
|
87
82
|
inputs: {
|
|
83
|
+
media: "audio",
|
|
88
84
|
text: ":preprocessor.text",
|
|
89
85
|
provider: ":preprocessor.provider",
|
|
90
86
|
lang: ":preprocessor.lang",
|
|
@@ -146,7 +142,6 @@ const graph_data = {
|
|
|
146
142
|
},
|
|
147
143
|
addBGM: {
|
|
148
144
|
agent: "addBGMAgent",
|
|
149
|
-
unless: ":context.presentationStyle.audioParams.bgmVolume.equal(0)",
|
|
150
145
|
inputs: {
|
|
151
146
|
wait: ":combineFiles",
|
|
152
147
|
voiceFile: ":audioCombinedFilePath",
|
|
@@ -156,18 +151,6 @@ const graph_data = {
|
|
|
156
151
|
musicFile: ":musicFile",
|
|
157
152
|
},
|
|
158
153
|
},
|
|
159
|
-
isResult: true,
|
|
160
|
-
defaultValue: {},
|
|
161
|
-
},
|
|
162
|
-
title: {
|
|
163
|
-
agent: "copyAgent",
|
|
164
|
-
params: {
|
|
165
|
-
namedKey: "title",
|
|
166
|
-
},
|
|
167
|
-
inputs: {
|
|
168
|
-
title: "\n${:context.studio.script.title}\n\n${:context.studio.script.description}\nReference: ${:context.studio.script.reference}\n",
|
|
169
|
-
waitFor: ":addBGM",
|
|
170
|
-
},
|
|
171
154
|
},
|
|
172
155
|
},
|
|
173
156
|
};
|
|
@@ -198,6 +181,7 @@ const audioAgents = {
|
|
|
198
181
|
ttsNijivoiceAgent,
|
|
199
182
|
ttsGoogleAgent,
|
|
200
183
|
ttsElevenlabsAgent,
|
|
184
|
+
mediaMockAgent,
|
|
201
185
|
addBGMAgent,
|
|
202
186
|
combineAudioFilesAgent,
|
|
203
187
|
};
|
|
@@ -8,7 +8,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
8
8
|
imagePath: string;
|
|
9
9
|
htmlPrompt: string | undefined;
|
|
10
10
|
htmlPath: string;
|
|
11
|
-
htmlImageSystemPrompt: string
|
|
11
|
+
htmlImageSystemPrompt: string;
|
|
12
12
|
} | {
|
|
13
13
|
imagePath: string | undefined;
|
|
14
14
|
referenceImageForMovie: string | undefined;
|
|
@@ -4,7 +4,7 @@ import { getReferenceImagePath } from "../utils/file.js";
|
|
|
4
4
|
import { getExtention } from "../utils/utils.js";
|
|
5
5
|
import { graphOption } from "./images.js";
|
|
6
6
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
7
|
-
import {
|
|
7
|
+
import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent } from "../agents/index.js";
|
|
8
8
|
// public api
|
|
9
9
|
// Application may call this function directly to generate reference image.
|
|
10
10
|
export const generateReferenceImage = async (inputs) => {
|
|
@@ -21,6 +21,7 @@ export const generateReferenceImage = async (inputs) => {
|
|
|
21
21
|
agent: imageAgentInfo.agent,
|
|
22
22
|
retry: 2,
|
|
23
23
|
inputs: {
|
|
24
|
+
media: "image",
|
|
24
25
|
prompt,
|
|
25
26
|
cache: {
|
|
26
27
|
force: [context.force, force ?? false],
|
|
@@ -38,7 +39,7 @@ export const generateReferenceImage = async (inputs) => {
|
|
|
38
39
|
},
|
|
39
40
|
};
|
|
40
41
|
const options = await graphOption(context);
|
|
41
|
-
const graph = new GraphAI(image_graph_data, {
|
|
42
|
+
const graph = new GraphAI(image_graph_data, { imageGenAIAgent, imageOpenaiAgent, mediaMockAgent }, options);
|
|
42
43
|
await graph.run();
|
|
43
44
|
return imagePath;
|
|
44
45
|
};
|
package/lib/actions/images.js
CHANGED
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger, TaskManager } from "graphai";
|
|
4
|
-
import { GoogleAuth } from "google-auth-library";
|
|
5
4
|
import * as vanilla from "@graphai/vanilla";
|
|
6
5
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
6
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
8
7
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
|
-
import {
|
|
8
|
+
import { imageGenAIAgent, imageOpenaiAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, } from "../agents/index.js";
|
|
10
9
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
11
10
|
import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
|
|
12
11
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
|
-
import {
|
|
12
|
+
import { settings2GraphAIConfig } from "../utils/utils.js";
|
|
14
13
|
import { extractImageFromMovie, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
|
|
15
14
|
import { getImageRefs } from "./image_references.js";
|
|
16
15
|
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
16
|
const vanillaAgents = vanilla.default ?? vanilla;
|
|
18
17
|
const imageAgents = {
|
|
19
|
-
|
|
18
|
+
imageGenAIAgent,
|
|
20
19
|
imageOpenaiAgent,
|
|
21
20
|
};
|
|
22
21
|
const movieAgents = {
|
|
23
|
-
|
|
22
|
+
movieGenAIAgent,
|
|
24
23
|
movieReplicateAgent,
|
|
24
|
+
mediaMockAgent,
|
|
25
25
|
};
|
|
26
26
|
const soundEffectAgents = {
|
|
27
27
|
soundEffectReplicateAgent,
|
|
@@ -77,6 +77,7 @@ const beat_graph_data = {
|
|
|
77
77
|
defaultValue: {},
|
|
78
78
|
agent: ":htmlImageAgentInfo.agent",
|
|
79
79
|
inputs: {
|
|
80
|
+
media: "html",
|
|
80
81
|
prompt: ":preprocessor.htmlPrompt",
|
|
81
82
|
system: ":preprocessor.htmlImageSystemPrompt",
|
|
82
83
|
params: {
|
|
@@ -122,6 +123,7 @@ const beat_graph_data = {
|
|
|
122
123
|
agent: ":preprocessor.imageAgentInfo.agent",
|
|
123
124
|
retry: 2,
|
|
124
125
|
inputs: {
|
|
126
|
+
media: "image",
|
|
125
127
|
prompt: ":preprocessor.prompt",
|
|
126
128
|
referenceImages: ":preprocessor.referenceImages",
|
|
127
129
|
cache: {
|
|
@@ -144,9 +146,11 @@ const beat_graph_data = {
|
|
|
144
146
|
if: ":preprocessor.movieFile",
|
|
145
147
|
agent: ":preprocessor.movieAgentInfo.agent",
|
|
146
148
|
inputs: {
|
|
149
|
+
media: "movie",
|
|
147
150
|
onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
|
|
148
151
|
prompt: ":beat.moviePrompt",
|
|
149
152
|
imagePath: ":preprocessor.referenceImageForMovie",
|
|
153
|
+
movieFile: ":preprocessor.movieFile", // for google genai agent
|
|
150
154
|
cache: {
|
|
151
155
|
force: [":context.force", ":forceMovie"],
|
|
152
156
|
file: ":preprocessor.movieFile",
|
|
@@ -327,20 +331,6 @@ const graph_data = {
|
|
|
327
331
|
},
|
|
328
332
|
},
|
|
329
333
|
};
|
|
330
|
-
const googleAuth = async () => {
|
|
331
|
-
try {
|
|
332
|
-
const auth = new GoogleAuth({
|
|
333
|
-
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
334
|
-
});
|
|
335
|
-
const client = await auth.getClient();
|
|
336
|
-
const accessToken = await client.getAccessToken();
|
|
337
|
-
return accessToken.token;
|
|
338
|
-
}
|
|
339
|
-
catch (error) {
|
|
340
|
-
GraphAILogger.info("install gcloud and run 'gcloud auth application-default login'");
|
|
341
|
-
throw error;
|
|
342
|
-
}
|
|
343
|
-
};
|
|
344
334
|
export const graphOption = async (context, settings) => {
|
|
345
335
|
const options = {
|
|
346
336
|
agentFilters: [
|
|
@@ -352,17 +342,7 @@ export const graphOption = async (context, settings) => {
|
|
|
352
342
|
],
|
|
353
343
|
taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
|
|
354
344
|
};
|
|
355
|
-
|
|
356
|
-
const config = settings2GraphAIConfig(settings, process.env);
|
|
357
|
-
// We need to get google's auth token only if the google is the text2image provider.
|
|
358
|
-
if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
359
|
-
userAssert(!!config.movieGoogleAgent || !!config.imageGoogleAgent, "GOOGLE_PROJECT_ID is not set");
|
|
360
|
-
GraphAILogger.log("google was specified as text2image engine");
|
|
361
|
-
const token = await googleAuth();
|
|
362
|
-
config["imageGoogleAgent"].token = token;
|
|
363
|
-
config["movieGoogleAgent"].token = token;
|
|
364
|
-
}
|
|
365
|
-
options.config = config;
|
|
345
|
+
options.config = settings2GraphAIConfig(settings, process.env);
|
|
366
346
|
return options;
|
|
367
347
|
};
|
|
368
348
|
const prepareGenerateImages = async (context) => {
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GenAIImageAgentConfig } from "../types/agent.js";
|
|
3
|
+
export declare const imageGenAIAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GenAIImageAgentConfig>;
|
|
4
|
+
declare const imageGenAIAgentInfo: AgentFunctionInfo;
|
|
5
|
+
export default imageGenAIAgentInfo;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { GraphAILogger } from "graphai";
|
|
2
|
+
import { getAspectRatio } from "./movie_google_agent.js";
|
|
3
|
+
import { provider2ImageAgent } from "../utils/provider2agent.js";
|
|
4
|
+
import { GoogleGenAI, PersonGeneration } from "@google/genai";
|
|
5
|
+
export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
|
|
6
|
+
const { prompt } = namedInputs;
|
|
7
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
8
|
+
const model = params.model ?? provider2ImageAgent["google"].defaultModel;
|
|
9
|
+
const apiKey = config?.apiKey;
|
|
10
|
+
if (!apiKey) {
|
|
11
|
+
throw new Error("API key is required for Google GenAI agent");
|
|
12
|
+
}
|
|
13
|
+
try {
|
|
14
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
15
|
+
const response = await ai.models.generateImages({
|
|
16
|
+
model,
|
|
17
|
+
prompt,
|
|
18
|
+
config: {
|
|
19
|
+
numberOfImages: 1, // default is 4!
|
|
20
|
+
aspectRatio,
|
|
21
|
+
personGeneration: PersonGeneration.ALLOW_ALL,
|
|
22
|
+
// safetyFilterLevel: SafetyFilterLevel.BLOCK_ONLY_HIGH,
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
if (!response.generatedImages || response.generatedImages.length === 0) {
|
|
26
|
+
throw new Error("ERROR: generateImage returned no generated images");
|
|
27
|
+
}
|
|
28
|
+
const image = response.generatedImages[0].image;
|
|
29
|
+
if (image && image.imageBytes) {
|
|
30
|
+
return { buffer: Buffer.from(image.imageBytes, "base64") };
|
|
31
|
+
}
|
|
32
|
+
throw new Error("ERROR: generateImage returned no image bytes");
|
|
33
|
+
}
|
|
34
|
+
catch (error) {
|
|
35
|
+
GraphAILogger.info("Failed to generate image:", error);
|
|
36
|
+
throw error;
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
const imageGenAIAgentInfo = {
|
|
40
|
+
name: "imageGenAIAgent",
|
|
41
|
+
agent: imageGenAIAgent,
|
|
42
|
+
mock: imageGenAIAgent,
|
|
43
|
+
samples: [],
|
|
44
|
+
description: "Google Image agent",
|
|
45
|
+
category: ["image"],
|
|
46
|
+
author: "Receptron Team",
|
|
47
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
48
|
+
// source: "https://github.com/receptron/mulmocast-cli/blob/main/src/agents/image_google_agent.ts",
|
|
49
|
+
license: "MIT",
|
|
50
|
+
environmentVariables: [],
|
|
51
|
+
};
|
|
52
|
+
export default imageGenAIAgentInfo;
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import addBGMAgent from "./add_bgm_agent.js";
|
|
2
2
|
import combineAudioFilesAgent from "./combine_audio_files_agent.js";
|
|
3
|
-
import
|
|
3
|
+
import imageGenAIAgent from "./image_genai_agent.js";
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
5
|
import tavilySearchAgent from "./tavily_agent.js";
|
|
6
|
-
import
|
|
6
|
+
import movieGenAIAgent from "./movie_genai_agent.js";
|
|
7
7
|
import movieReplicateAgent from "./movie_replicate_agent.js";
|
|
8
8
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
9
9
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
10
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
11
11
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
12
|
+
import ttsGoogleAgent from "./tts_google_agent.js";
|
|
12
13
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
13
14
|
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
14
15
|
import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
|
|
@@ -16,4 +17,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
|
|
|
16
17
|
import { textInputAgent } from "@graphai/input_agents";
|
|
17
18
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
18
19
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
19
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent,
|
|
20
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import addBGMAgent from "./add_bgm_agent.js";
|
|
2
2
|
import combineAudioFilesAgent from "./combine_audio_files_agent.js";
|
|
3
|
-
import
|
|
3
|
+
import imageGenAIAgent from "./image_genai_agent.js";
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
5
|
import tavilySearchAgent from "./tavily_agent.js";
|
|
6
|
-
import
|
|
6
|
+
import movieGenAIAgent from "./movie_genai_agent.js";
|
|
7
7
|
import movieReplicateAgent from "./movie_replicate_agent.js";
|
|
8
8
|
import mediaMockAgent from "./media_mock_agent.js";
|
|
9
9
|
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
10
10
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
11
11
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
12
|
+
import ttsGoogleAgent from "./tts_google_agent.js";
|
|
12
13
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
13
14
|
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
14
15
|
import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
|
|
@@ -17,4 +18,4 @@ import { textInputAgent } from "@graphai/input_agents";
|
|
|
17
18
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
18
19
|
// import * as vanilla from "@graphai/vanilla";
|
|
19
20
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
20
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent,
|
|
21
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
|
|
@@ -1,5 +1,28 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
|
-
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
import { silent60secPath, mulmoCreditPath } from "../utils/file.js";
|
|
4
|
+
export const mediaMockAgent = async ({ namedInputs }) => {
|
|
5
|
+
if (namedInputs.media === "audio") {
|
|
6
|
+
const buffer = fs.readFileSync(silent60secPath());
|
|
7
|
+
return { buffer };
|
|
8
|
+
}
|
|
9
|
+
if (namedInputs.media === "image") {
|
|
10
|
+
const buffer = fs.readFileSync(mulmoCreditPath());
|
|
11
|
+
return { buffer };
|
|
12
|
+
}
|
|
13
|
+
if (namedInputs.media === "movie") {
|
|
14
|
+
const url = "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/test/pingpong.mov";
|
|
15
|
+
const res = await fetch(url);
|
|
16
|
+
if (!res.ok) {
|
|
17
|
+
throw new Error(`Failed to fetch: ${res.status} ${res.statusText}`);
|
|
18
|
+
}
|
|
19
|
+
const arrayBuffer = await res.arrayBuffer();
|
|
20
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
21
|
+
return { buffer };
|
|
22
|
+
}
|
|
23
|
+
if (namedInputs.media === "html") {
|
|
24
|
+
return { text: "<html><title>test</title><body>test</body></html>" };
|
|
25
|
+
}
|
|
3
26
|
GraphAILogger.debug("agent dryRun");
|
|
4
27
|
return { buffer: Buffer.from([]) };
|
|
5
28
|
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { AgentBufferResult, GenAIImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
|
|
3
|
+
export declare const getAspectRatio: (canvasSize: {
|
|
4
|
+
width: number;
|
|
5
|
+
height: number;
|
|
6
|
+
}) => string;
|
|
7
|
+
export declare const movieGenAIAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GenAIImageAgentConfig>;
|
|
8
|
+
declare const movieGenAIAgentInfo: AgentFunctionInfo;
|
|
9
|
+
export default movieGenAIAgentInfo;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import { readFileSync } from "fs";
|
|
2
|
+
import { GraphAILogger, sleep } from "graphai";
|
|
3
|
+
import { GoogleGenAI, PersonGeneration } from "@google/genai";
|
|
4
|
+
export const getAspectRatio = (canvasSize) => {
|
|
5
|
+
if (canvasSize.width > canvasSize.height) {
|
|
6
|
+
return "16:9";
|
|
7
|
+
}
|
|
8
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
9
|
+
return "9:16";
|
|
10
|
+
}
|
|
11
|
+
else {
|
|
12
|
+
return "1:1";
|
|
13
|
+
}
|
|
14
|
+
};
|
|
15
|
+
export const movieGenAIAgent = async ({ namedInputs, params, config, }) => {
|
|
16
|
+
const { prompt, imagePath, movieFile } = namedInputs;
|
|
17
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
18
|
+
const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
|
|
19
|
+
const duration = params.duration ?? 8;
|
|
20
|
+
const apiKey = config?.apiKey;
|
|
21
|
+
if (!apiKey) {
|
|
22
|
+
throw new Error("API key is required for Google GenAI agent");
|
|
23
|
+
}
|
|
24
|
+
try {
|
|
25
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
26
|
+
const payload = {
|
|
27
|
+
model,
|
|
28
|
+
prompt,
|
|
29
|
+
config: {
|
|
30
|
+
durationSeconds: duration,
|
|
31
|
+
aspectRatio,
|
|
32
|
+
personGeneration: undefined,
|
|
33
|
+
},
|
|
34
|
+
image: undefined,
|
|
35
|
+
};
|
|
36
|
+
if (imagePath) {
|
|
37
|
+
const buffer = readFileSync(imagePath);
|
|
38
|
+
const imageBytes = buffer.toString("base64");
|
|
39
|
+
payload.image = {
|
|
40
|
+
imageBytes,
|
|
41
|
+
mimeType: "image/png",
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
payload.config.personGeneration = PersonGeneration.ALLOW_ALL;
|
|
46
|
+
}
|
|
47
|
+
const operation = await ai.models.generateVideos(payload);
|
|
48
|
+
const response = { operation };
|
|
49
|
+
// Poll the operation status until the video is ready.
|
|
50
|
+
while (!response.operation.done) {
|
|
51
|
+
await sleep(5000);
|
|
52
|
+
response.operation = await ai.operations.getVideosOperation(response);
|
|
53
|
+
}
|
|
54
|
+
if (!response.operation.response?.generatedVideos) {
|
|
55
|
+
throw new Error(`No video: ${JSON.stringify(response.operation, null, 2)}`);
|
|
56
|
+
}
|
|
57
|
+
const video = response.operation.response.generatedVideos[0].video;
|
|
58
|
+
if (!video) {
|
|
59
|
+
throw new Error(`No video: ${JSON.stringify(response.operation, null, 2)}`);
|
|
60
|
+
}
|
|
61
|
+
await ai.files.download({
|
|
62
|
+
file: video,
|
|
63
|
+
downloadPath: movieFile,
|
|
64
|
+
});
|
|
65
|
+
await sleep(5000); // HACK: Without this, the file is not ready yet.
|
|
66
|
+
return { saved: movieFile };
|
|
67
|
+
}
|
|
68
|
+
catch (error) {
|
|
69
|
+
GraphAILogger.info("Failed to generate movie:", error.message);
|
|
70
|
+
throw error;
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
const movieGenAIAgentInfo = {
|
|
74
|
+
name: "movieGenAIAgent",
|
|
75
|
+
agent: movieGenAIAgent,
|
|
76
|
+
mock: movieGenAIAgent,
|
|
77
|
+
samples: [],
|
|
78
|
+
description: "Google Movie agent",
|
|
79
|
+
category: ["movie"],
|
|
80
|
+
author: "Receptron Team",
|
|
81
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
82
|
+
// source: "https://github.com/receptron/mulmocast-cli/blob/main/src/agents/image_google_agent.ts",
|
|
83
|
+
license: "MIT",
|
|
84
|
+
environmentVariables: [],
|
|
85
|
+
};
|
|
86
|
+
export default movieGenAIAgentInfo;
|
|
@@ -16,7 +16,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
16
16
|
} & {
|
|
17
17
|
s: string;
|
|
18
18
|
} & {
|
|
19
|
-
llm: "openai" | "anthropic" | "gemini" | "groq" | undefined;
|
|
19
|
+
llm: "mock" | "openai" | "anthropic" | "gemini" | "groq" | undefined;
|
|
20
20
|
} & {
|
|
21
21
|
llm_model: string | undefined;
|
|
22
22
|
}>;
|
|
@@ -10,7 +10,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
10
10
|
} & {
|
|
11
11
|
beats_per_scene: number;
|
|
12
12
|
} & {
|
|
13
|
-
llm: "openai" | "anthropic" | "gemini" | "groq" | undefined;
|
|
13
|
+
llm: "mock" | "openai" | "anthropic" | "gemini" | "groq" | undefined;
|
|
14
14
|
} & {
|
|
15
15
|
llm_model: string | undefined;
|
|
16
16
|
} & {
|
package/lib/cli/helpers.js
CHANGED
|
@@ -37,9 +37,11 @@ export const getFileObject = (args) => {
|
|
|
37
37
|
// We generate a new unique script file from clipboard text in the output directory
|
|
38
38
|
const generatedFileName = generateTimestampedFileName("script");
|
|
39
39
|
const clipboardText = clipboardy.readSync();
|
|
40
|
+
const json = JSON.parse(clipboardText);
|
|
41
|
+
const formattedText = JSON.stringify(json, null, 2);
|
|
40
42
|
const resolvedFilePath = resolveDirPath(outDirPath, `${generatedFileName}.json`);
|
|
41
43
|
mkdir(outDirPath);
|
|
42
|
-
fs.writeFileSync(resolvedFilePath,
|
|
44
|
+
fs.writeFileSync(resolvedFilePath, formattedText, "utf8");
|
|
43
45
|
return { fileOrUrl: resolvedFilePath, fileName: generatedFileName };
|
|
44
46
|
}
|
|
45
47
|
const resolvedFileOrUrl = file ?? "";
|
|
@@ -2,7 +2,7 @@ import { findImagePlugin } from "../utils/image_plugins/index.js";
|
|
|
2
2
|
export const MulmoBeatMethods = {
|
|
3
3
|
getHtmlPrompt(beat) {
|
|
4
4
|
if (beat?.htmlPrompt?.data) {
|
|
5
|
-
return beat.htmlPrompt.prompt + "\n\n
|
|
5
|
+
return beat.htmlPrompt.prompt + "\n\n[data]\n" + JSON.stringify(beat.htmlPrompt.data, null, 2);
|
|
6
6
|
}
|
|
7
7
|
return beat?.htmlPrompt?.prompt;
|
|
8
8
|
},
|
package/lib/types/agent.d.ts
CHANGED
|
@@ -10,7 +10,9 @@ export type OpenAIImageOptions = {
|
|
|
10
10
|
quality?: OpenAIImageQuality;
|
|
11
11
|
};
|
|
12
12
|
export type AgentBufferResult = {
|
|
13
|
-
buffer
|
|
13
|
+
buffer?: Buffer;
|
|
14
|
+
saved?: string;
|
|
15
|
+
text?: string;
|
|
14
16
|
};
|
|
15
17
|
export type AgentPromptInputs = {
|
|
16
18
|
prompt: string;
|
|
@@ -47,8 +49,12 @@ export type GoogleImageAgentConfig = {
|
|
|
47
49
|
projectId?: string;
|
|
48
50
|
token?: string;
|
|
49
51
|
};
|
|
52
|
+
export type GenAIImageAgentConfig = {
|
|
53
|
+
apiKey?: string;
|
|
54
|
+
};
|
|
50
55
|
export type MovieAgentInputs = AgentPromptInputs & {
|
|
51
56
|
imagePath?: string;
|
|
57
|
+
movieFile: string;
|
|
52
58
|
};
|
|
53
59
|
export type GoogleMovieAgentParams = ImageAgentParams & {
|
|
54
60
|
duration?: number;
|