mulmocast 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -3
- package/assets/templates/ghibli_shorts.json +34 -0
- package/assets/templates/shorts.json +18 -0
- package/assets/templates/trailer.json +25 -0
- package/lib/actions/audio.d.ts +2 -1
- package/lib/actions/audio.js +35 -17
- package/lib/actions/captions.js +5 -5
- package/lib/actions/images.d.ts +2 -1
- package/lib/actions/images.js +90 -58
- package/lib/actions/movie.js +53 -16
- package/lib/actions/pdf.js +3 -3
- package/lib/actions/translate.d.ts +2 -1
- package/lib/actions/translate.js +21 -16
- package/lib/agents/combine_audio_files_agent.js +4 -0
- package/lib/agents/image_google_agent.d.ts +4 -1
- package/lib/agents/image_google_agent.js +3 -2
- package/lib/agents/image_openai_agent.d.ts +5 -3
- package/lib/agents/image_openai_agent.js +35 -7
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/movie_google_agent.d.ts +9 -2
- package/lib/agents/movie_google_agent.js +24 -16
- package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
- package/lib/agents/tts_elevenlabs_agent.js +60 -0
- package/lib/agents/tts_google_agent.js +1 -1
- package/lib/agents/tts_nijivoice_agent.js +3 -2
- package/lib/agents/tts_openai_agent.js +1 -1
- package/lib/cli/commands/audio/handler.js +4 -1
- package/lib/cli/commands/image/handler.js +4 -1
- package/lib/cli/commands/movie/handler.js +4 -1
- package/lib/cli/commands/pdf/handler.js +4 -1
- package/lib/cli/commands/translate/handler.js +4 -1
- package/lib/cli/helpers.d.ts +3 -3
- package/lib/cli/helpers.js +38 -20
- package/lib/index.d.ts +5 -0
- package/lib/index.js +5 -0
- package/lib/methods/mulmo_media_source.d.ts +1 -0
- package/lib/methods/mulmo_media_source.js +12 -0
- package/lib/methods/mulmo_script.d.ts +1 -1
- package/lib/methods/mulmo_script.js +9 -5
- package/lib/methods/mulmo_studio_context.d.ts +5 -0
- package/lib/methods/mulmo_studio_context.js +23 -0
- package/lib/types/index.d.ts +1 -0
- package/lib/types/index.js +1 -0
- package/lib/types/schema.d.ts +1513 -290
- package/lib/types/schema.js +26 -35
- package/lib/types/type.d.ts +4 -1
- package/lib/utils/file.d.ts +5 -15
- package/lib/utils/file.js +14 -21
- package/lib/utils/filters.js +4 -4
- package/lib/utils/image_plugins/beat.d.ts +4 -0
- package/lib/utils/image_plugins/beat.js +7 -0
- package/lib/utils/image_plugins/image.d.ts +1 -1
- package/lib/utils/image_plugins/index.d.ts +2 -1
- package/lib/utils/image_plugins/index.js +2 -1
- package/lib/utils/image_plugins/movie.d.ts +1 -1
- package/lib/utils/image_plugins/source.js +2 -2
- package/lib/utils/preprocess.d.ts +26 -23
- package/lib/utils/preprocess.js +4 -0
- package/package.json +8 -8
- package/scripts/templates/movie_prompts_no_text_template.json +50 -0
- package/scripts/templates/shorts_template.json +52 -0
package/lib/actions/movie.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import { GraphAILogger } from "graphai";
|
|
1
|
+
import { GraphAILogger, assert } from "graphai";
|
|
2
|
+
import { mulmoTransitionSchema } from "../types/index.js";
|
|
2
3
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
3
4
|
import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
|
|
4
5
|
import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
|
|
5
|
-
import {
|
|
6
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
6
7
|
// const isMac = process.platform === "darwin";
|
|
7
8
|
const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
|
|
8
9
|
export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
|
|
@@ -38,10 +39,10 @@ export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
|
|
|
38
39
|
`[${audioId}]`,
|
|
39
40
|
};
|
|
40
41
|
};
|
|
41
|
-
const getOutputOption = (audioId) => {
|
|
42
|
+
const getOutputOption = (audioId, videoId) => {
|
|
42
43
|
return [
|
|
43
44
|
"-preset medium", // Changed from veryfast to medium for better compression
|
|
44
|
-
|
|
45
|
+
`-map [${videoId}]`, // Map the video stream
|
|
45
46
|
`-map ${audioId}`, // Map the audio stream
|
|
46
47
|
`-c:v ${videoCodec}`, // Set video codec
|
|
47
48
|
...(videoCodec === "libx264" ? ["-crf", "26"] : []), // Add CRF for libx264
|
|
@@ -61,20 +62,27 @@ const getOutputOption = (audioId) => {
|
|
|
61
62
|
const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
|
|
62
63
|
const start = performance.now();
|
|
63
64
|
const ffmpegContext = FfmpegContextInit();
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
const missingIndex = studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
|
|
66
|
+
if (missingIndex !== -1) {
|
|
67
|
+
GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
|
|
68
|
+
return false;
|
|
67
69
|
}
|
|
68
70
|
const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
|
|
69
71
|
// Add each image input
|
|
70
72
|
const filterComplexVideoIds = [];
|
|
71
73
|
const filterComplexAudioIds = [];
|
|
74
|
+
const transitionVideoIds = [];
|
|
75
|
+
const beatTimestamps = [];
|
|
72
76
|
studio.beats.reduce((timestamp, studioBeat, index) => {
|
|
73
77
|
const beat = studio.script.beats[index];
|
|
74
|
-
|
|
75
|
-
|
|
78
|
+
const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
|
|
79
|
+
if (!sourceFile) {
|
|
80
|
+
throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
|
|
76
81
|
}
|
|
77
|
-
|
|
82
|
+
if (!studioBeat.duration) {
|
|
83
|
+
throw new Error(`studioBeat.duration is not set: index=${index}`);
|
|
84
|
+
}
|
|
85
|
+
const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
|
|
78
86
|
const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
|
|
79
87
|
const extraPadding = (() => {
|
|
80
88
|
// We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
|
|
@@ -98,16 +106,43 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
98
106
|
else {
|
|
99
107
|
filterComplexVideoIds.push(videoId);
|
|
100
108
|
}
|
|
109
|
+
if (studio.script.movieParams?.transition && index < studio.beats.length - 1) {
|
|
110
|
+
const sourceId = filterComplexVideoIds.pop();
|
|
111
|
+
ffmpegContext.filterComplex.push(`[${sourceId}]split=2[${sourceId}_0][${sourceId}_1]`);
|
|
112
|
+
filterComplexVideoIds.push(`${sourceId}_0`);
|
|
113
|
+
transitionVideoIds.push(`${sourceId}_1`);
|
|
114
|
+
}
|
|
101
115
|
if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
|
|
102
116
|
const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
|
|
103
117
|
filterComplexAudioIds.push(audioId);
|
|
104
118
|
ffmpegContext.filterComplex.push(audioPart);
|
|
105
119
|
}
|
|
120
|
+
beatTimestamps.push(timestamp);
|
|
106
121
|
return timestamp + duration;
|
|
107
122
|
}, 0);
|
|
123
|
+
assert(filterComplexVideoIds.length === studio.beats.length, "videoIds.length !== studio.beats.length");
|
|
124
|
+
assert(beatTimestamps.length === studio.beats.length, "beatTimestamps.length !== studio.beats.length");
|
|
108
125
|
// console.log("*** images", images.audioIds);
|
|
109
126
|
// Concatenate the trimmed images
|
|
110
|
-
|
|
127
|
+
const concatVideoId = "concat_video";
|
|
128
|
+
ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[${concatVideoId}]`);
|
|
129
|
+
// Add tranditions if needed
|
|
130
|
+
const mixedVideoId = (() => {
|
|
131
|
+
if (studio.script.movieParams?.transition && transitionVideoIds.length > 1) {
|
|
132
|
+
const transition = mulmoTransitionSchema.parse(studio.script.movieParams.transition);
|
|
133
|
+
return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
|
|
134
|
+
const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
|
|
135
|
+
const processedVideoId = `${transitionVideoId}_f`;
|
|
136
|
+
// TODO: This mechanism does not work for video beats yet. It works only with image beats.
|
|
137
|
+
// If we can to add other transition types than fade, we need to add them here.
|
|
138
|
+
ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`);
|
|
139
|
+
const outputId = `${transitionVideoId}_o`;
|
|
140
|
+
ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
|
|
141
|
+
return outputId;
|
|
142
|
+
}, concatVideoId);
|
|
143
|
+
}
|
|
144
|
+
return concatVideoId;
|
|
145
|
+
})();
|
|
111
146
|
const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
|
|
112
147
|
const artifactAudioId = `${audioIndex}:a`;
|
|
113
148
|
const ffmpegContextAudioId = (() => {
|
|
@@ -121,23 +156,25 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
|
|
|
121
156
|
}
|
|
122
157
|
return artifactAudioId;
|
|
123
158
|
})();
|
|
124
|
-
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
|
|
159
|
+
await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
|
|
125
160
|
const end = performance.now();
|
|
126
161
|
GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
|
|
127
162
|
GraphAILogger.info(studio.script.title);
|
|
128
163
|
GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
|
|
164
|
+
return true;
|
|
129
165
|
};
|
|
130
166
|
export const movie = async (context) => {
|
|
131
|
-
|
|
167
|
+
MulmoStudioContextMethods.setSessionState(context, "video", true);
|
|
132
168
|
try {
|
|
133
169
|
const { studio, fileDirs, caption } = context;
|
|
134
170
|
const { outDirPath } = fileDirs;
|
|
135
171
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
|
|
136
172
|
const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
|
|
137
|
-
await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)
|
|
138
|
-
|
|
173
|
+
if (await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)) {
|
|
174
|
+
writingMessage(outputVideoPath);
|
|
175
|
+
}
|
|
139
176
|
}
|
|
140
177
|
finally {
|
|
141
|
-
|
|
178
|
+
MulmoStudioContextMethods.setSessionState(context, "video", false);
|
|
142
179
|
}
|
|
143
180
|
};
|
package/lib/actions/pdf.js
CHANGED
|
@@ -6,7 +6,7 @@ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
|
|
|
6
6
|
import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
|
|
7
7
|
import { MulmoScriptMethods } from "../methods/index.js";
|
|
8
8
|
import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
|
|
9
|
-
import {
|
|
9
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
10
10
|
const imagesPerPage = 4;
|
|
11
11
|
const offset = 10;
|
|
12
12
|
const handoutImageRatio = 0.5;
|
|
@@ -224,10 +224,10 @@ const generatePdf = async (context, pdfMode, pdfSize) => {
|
|
|
224
224
|
};
|
|
225
225
|
export const pdf = async (context, pdfMode, pdfSize) => {
|
|
226
226
|
try {
|
|
227
|
-
|
|
227
|
+
MulmoStudioContextMethods.setSessionState(context, "pdf", true);
|
|
228
228
|
await generatePdf(context, pdfMode, pdfSize);
|
|
229
229
|
}
|
|
230
230
|
finally {
|
|
231
|
-
|
|
231
|
+
MulmoStudioContextMethods.setSessionState(context, "pdf", false);
|
|
232
232
|
}
|
|
233
233
|
};
|
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
+
import type { CallbackFunction } from "graphai";
|
|
2
3
|
import { MulmoStudioContext } from "../types/index.js";
|
|
3
|
-
export declare const translate: (context: MulmoStudioContext) => Promise<void>;
|
|
4
|
+
export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/translate.js
CHANGED
|
@@ -6,19 +6,19 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
|
6
6
|
import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
|
|
7
7
|
import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
|
|
8
8
|
import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
|
|
9
|
-
import {
|
|
9
|
+
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
10
10
|
const vanillaAgents = agents.default ?? agents;
|
|
11
11
|
const translateGraph = {
|
|
12
12
|
version: 0.5,
|
|
13
13
|
nodes: {
|
|
14
|
-
|
|
14
|
+
context: {},
|
|
15
15
|
defaultLang: {},
|
|
16
16
|
outDirPath: {},
|
|
17
17
|
outputStudioFilePath: {},
|
|
18
18
|
lang: {
|
|
19
19
|
agent: "stringUpdateTextAgent",
|
|
20
20
|
inputs: {
|
|
21
|
-
newText: ":studio.script.lang",
|
|
21
|
+
newText: ":context.studio.script.lang",
|
|
22
22
|
oldText: ":defaultLang",
|
|
23
23
|
},
|
|
24
24
|
},
|
|
@@ -27,15 +27,15 @@ const translateGraph = {
|
|
|
27
27
|
isResult: true,
|
|
28
28
|
agent: "mergeObjectAgent",
|
|
29
29
|
inputs: {
|
|
30
|
-
items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
|
|
30
|
+
items: [":context.studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
|
|
31
31
|
},
|
|
32
32
|
},
|
|
33
33
|
beatsMap: {
|
|
34
34
|
agent: "mapAgent",
|
|
35
35
|
inputs: {
|
|
36
36
|
targetLangs: ":targetLangs",
|
|
37
|
-
|
|
38
|
-
rows: ":studio.script.beats",
|
|
37
|
+
context: ":context",
|
|
38
|
+
rows: ":context.studio.script.beats",
|
|
39
39
|
lang: ":lang",
|
|
40
40
|
},
|
|
41
41
|
params: {
|
|
@@ -52,7 +52,7 @@ const translateGraph = {
|
|
|
52
52
|
},
|
|
53
53
|
inputs: {
|
|
54
54
|
index: ":__mapIndex",
|
|
55
|
-
rows: ":studio.multiLingual",
|
|
55
|
+
rows: ":context.studio.multiLingual",
|
|
56
56
|
},
|
|
57
57
|
},
|
|
58
58
|
preprocessMultiLingual: {
|
|
@@ -62,7 +62,7 @@ const translateGraph = {
|
|
|
62
62
|
multiLingual: ":multiLingual",
|
|
63
63
|
rows: ":targetLangs",
|
|
64
64
|
lang: ":lang.text",
|
|
65
|
-
|
|
65
|
+
context: ":context",
|
|
66
66
|
beatIndex: ":__mapIndex",
|
|
67
67
|
},
|
|
68
68
|
params: {
|
|
@@ -79,7 +79,7 @@ const translateGraph = {
|
|
|
79
79
|
multiLingual: ":multiLingual", // for cache
|
|
80
80
|
lang: ":lang", // for cache
|
|
81
81
|
beatIndex: ":beatIndex", // for cache
|
|
82
|
-
|
|
82
|
+
mulmoContext: ":context", // for cache
|
|
83
83
|
system: translateSystemPrompt,
|
|
84
84
|
prompt: translatePrompts,
|
|
85
85
|
},
|
|
@@ -175,7 +175,7 @@ const translateGraph = {
|
|
|
175
175
|
};
|
|
176
176
|
const localizedTextCacheAgentFilter = async (context, next) => {
|
|
177
177
|
const { namedInputs } = context;
|
|
178
|
-
const {
|
|
178
|
+
const { mulmoContext, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
|
|
179
179
|
if (!beat.text) {
|
|
180
180
|
return { text: "" };
|
|
181
181
|
}
|
|
@@ -192,11 +192,11 @@ const localizedTextCacheAgentFilter = async (context, next) => {
|
|
|
192
192
|
return { text: beat.text };
|
|
193
193
|
}
|
|
194
194
|
try {
|
|
195
|
-
|
|
195
|
+
MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
|
|
196
196
|
return await next(context);
|
|
197
197
|
}
|
|
198
198
|
finally {
|
|
199
|
-
|
|
199
|
+
MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, false);
|
|
200
200
|
}
|
|
201
201
|
};
|
|
202
202
|
const agentFilters = [
|
|
@@ -208,20 +208,25 @@ const agentFilters = [
|
|
|
208
208
|
];
|
|
209
209
|
const defaultLang = "en";
|
|
210
210
|
const targetLangs = ["ja", "en"];
|
|
211
|
-
export const translate = async (context) => {
|
|
211
|
+
export const translate = async (context, callbacks) => {
|
|
212
212
|
try {
|
|
213
|
-
|
|
213
|
+
MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
|
|
214
214
|
const { studio, fileDirs } = context;
|
|
215
215
|
const { outDirPath } = fileDirs;
|
|
216
216
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
|
|
217
217
|
mkdir(outDirPath);
|
|
218
218
|
assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
|
|
219
219
|
const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
|
|
220
|
-
graph.injectValue("
|
|
220
|
+
graph.injectValue("context", context);
|
|
221
221
|
graph.injectValue("defaultLang", defaultLang);
|
|
222
222
|
graph.injectValue("targetLangs", targetLangs);
|
|
223
223
|
graph.injectValue("outDirPath", outDirPath);
|
|
224
224
|
graph.injectValue("outputStudioFilePath", outputStudioFilePath);
|
|
225
|
+
if (callbacks) {
|
|
226
|
+
callbacks.forEach((callback) => {
|
|
227
|
+
graph.registerCallback(callback);
|
|
228
|
+
});
|
|
229
|
+
}
|
|
225
230
|
const results = await graph.run();
|
|
226
231
|
writingMessage(outputStudioFilePath);
|
|
227
232
|
if (results.mergeStudioResult) {
|
|
@@ -229,6 +234,6 @@ export const translate = async (context) => {
|
|
|
229
234
|
}
|
|
230
235
|
}
|
|
231
236
|
finally {
|
|
232
|
-
|
|
237
|
+
MulmoStudioContextMethods.setSessionState(context, "multiLingual", false);
|
|
233
238
|
}
|
|
234
239
|
};
|
|
@@ -26,11 +26,15 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
|
|
|
26
26
|
const totalPadding = await (async () => {
|
|
27
27
|
if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
|
|
28
28
|
const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
|
|
29
|
+
// NOTE: We respect the duration of the movie, only if the movie is specified as a madia source, NOT generated.
|
|
29
30
|
const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
|
|
30
31
|
if (movieDuration > audioDuration) {
|
|
31
32
|
return padding + (movieDuration - audioDuration);
|
|
32
33
|
}
|
|
33
34
|
}
|
|
35
|
+
else if (beat.duration && beat.duration > audioDuration) {
|
|
36
|
+
return padding + (beat.duration - audioDuration);
|
|
37
|
+
}
|
|
34
38
|
return padding;
|
|
35
39
|
})();
|
|
36
40
|
studioBeat.duration = audioDuration + totalPadding;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
|
+
import { getAspectRatio } from "./movie_google_agent.js";
|
|
2
3
|
async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
3
4
|
const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
|
|
4
5
|
try {
|
|
@@ -50,9 +51,9 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
50
51
|
throw error;
|
|
51
52
|
}
|
|
52
53
|
}
|
|
53
|
-
export const imageGoogleAgent = async ({ namedInputs, params, config
|
|
54
|
+
export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
54
55
|
const { prompt } = namedInputs;
|
|
55
|
-
const aspectRatio = params.
|
|
56
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
56
57
|
const model = params.model ?? "imagen-3.0-fast-generate-001";
|
|
57
58
|
//const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
|
|
58
59
|
const projectId = config?.projectId;
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
-
type OpenAIImageSize = "1792x1024" | "auto" | "1024x1024" | "1536x1024" | "1024x1536" | "256x256";
|
|
3
2
|
type OpenAIModeration = "low" | "auto";
|
|
4
3
|
export declare const imageOpenaiAgent: AgentFunction<{
|
|
5
4
|
apiKey: string;
|
|
6
5
|
model: string;
|
|
7
|
-
size: OpenAIImageSize | null | undefined;
|
|
8
6
|
moderation: OpenAIModeration | null | undefined;
|
|
9
|
-
|
|
7
|
+
canvasSize: {
|
|
8
|
+
width: number;
|
|
9
|
+
height: number;
|
|
10
|
+
};
|
|
10
11
|
}, {
|
|
11
12
|
buffer: Buffer;
|
|
12
13
|
}, {
|
|
13
14
|
prompt: string;
|
|
15
|
+
images: string[] | null | undefined;
|
|
14
16
|
}>;
|
|
15
17
|
declare const imageOpenaiAgentInfo: AgentFunctionInfo;
|
|
16
18
|
export default imageOpenaiAgentInfo;
|
|
@@ -1,15 +1,41 @@
|
|
|
1
1
|
import fs from "fs";
|
|
2
|
+
import path from "path";
|
|
2
3
|
import OpenAI, { toFile } from "openai";
|
|
3
4
|
// https://platform.openai.com/docs/guides/image-generation
|
|
4
5
|
export const imageOpenaiAgent = async ({ namedInputs, params }) => {
|
|
5
|
-
const { prompt } = namedInputs;
|
|
6
|
-
const { apiKey,
|
|
6
|
+
const { prompt, images } = namedInputs;
|
|
7
|
+
const { apiKey, moderation, canvasSize } = params;
|
|
8
|
+
const model = params.model ?? "dall-e-3";
|
|
7
9
|
const openai = new OpenAI({ apiKey });
|
|
10
|
+
const size = (() => {
|
|
11
|
+
if (model === "gpt-image-1") {
|
|
12
|
+
if (canvasSize.width > canvasSize.height) {
|
|
13
|
+
return "1536x1024";
|
|
14
|
+
}
|
|
15
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
16
|
+
return "1024x1536";
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
return "1024x1024";
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
else {
|
|
23
|
+
if (canvasSize.width > canvasSize.height) {
|
|
24
|
+
return "1792x1024";
|
|
25
|
+
}
|
|
26
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
27
|
+
return "1024x1792";
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
return "1024x1024";
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
})();
|
|
8
34
|
const imageOptions = {
|
|
9
|
-
model
|
|
35
|
+
model,
|
|
10
36
|
prompt,
|
|
11
37
|
n: 1,
|
|
12
|
-
size
|
|
38
|
+
size,
|
|
13
39
|
};
|
|
14
40
|
if (model === "gpt-image-1") {
|
|
15
41
|
imageOptions.moderation = moderation || "auto";
|
|
@@ -17,9 +43,11 @@ export const imageOpenaiAgent = async ({ namedInputs, params }) => {
|
|
|
17
43
|
const response = await (async () => {
|
|
18
44
|
const targetSize = imageOptions.size;
|
|
19
45
|
if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
|
|
20
|
-
const imagelist = await Promise.all((images ?? []).map(async (file) =>
|
|
21
|
-
|
|
22
|
-
|
|
46
|
+
const imagelist = await Promise.all((images ?? []).map(async (file) => {
|
|
47
|
+
const ext = path.extname(file).toLowerCase();
|
|
48
|
+
const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
|
|
49
|
+
return await toFile(fs.createReadStream(file), null, { type });
|
|
50
|
+
}));
|
|
23
51
|
return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
|
|
24
52
|
}
|
|
25
53
|
else {
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
|
|
|
2
2
|
import combineAudioFilesAgent from "./combine_audio_files_agent.js";
|
|
3
3
|
import imageGoogleAgent from "./image_google_agent.js";
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
|
+
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
5
6
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
6
7
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
7
8
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
@@ -9,4 +10,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
|
|
|
9
10
|
import { textInputAgent } from "@graphai/input_agents";
|
|
10
11
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
11
12
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
12
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
13
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
|
|
|
2
2
|
import combineAudioFilesAgent from "./combine_audio_files_agent.js";
|
|
3
3
|
import imageGoogleAgent from "./image_google_agent.js";
|
|
4
4
|
import imageOpenaiAgent from "./image_openai_agent.js";
|
|
5
|
+
import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
5
6
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
6
7
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
7
8
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
@@ -10,4 +11,4 @@ import { textInputAgent } from "@graphai/input_agents";
|
|
|
10
11
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
11
12
|
// import * as vanilla from "@graphai/vanilla";
|
|
12
13
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
13
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
14
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
|
|
@@ -3,15 +3,22 @@ export type MovieGoogleConfig = {
|
|
|
3
3
|
projectId?: string;
|
|
4
4
|
token?: string;
|
|
5
5
|
};
|
|
6
|
+
export declare const getAspectRatio: (canvasSize: {
|
|
7
|
+
width: number;
|
|
8
|
+
height: number;
|
|
9
|
+
}) => string;
|
|
6
10
|
export declare const movieGoogleAgent: AgentFunction<{
|
|
7
11
|
model: string;
|
|
8
|
-
|
|
12
|
+
canvasSize: {
|
|
13
|
+
width: number;
|
|
14
|
+
height: number;
|
|
15
|
+
};
|
|
9
16
|
duration?: number;
|
|
10
17
|
}, {
|
|
11
18
|
buffer: Buffer;
|
|
12
19
|
}, {
|
|
13
20
|
prompt: string;
|
|
14
|
-
imagePath
|
|
21
|
+
imagePath?: string;
|
|
15
22
|
}, MovieGoogleConfig>;
|
|
16
23
|
declare const movieGoogleAgentInfo: AgentFunctionInfo;
|
|
17
24
|
export default movieGoogleAgentInfo;
|
|
@@ -2,26 +2,29 @@ import { readFileSync } from "fs";
|
|
|
2
2
|
import { GraphAILogger, sleep } from "graphai";
|
|
3
3
|
async function generateMovie(projectId, model, token, prompt, imagePath, aspectRatio, duration) {
|
|
4
4
|
const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}`;
|
|
5
|
-
// Prepare the payload for the API request
|
|
6
|
-
const buffer = readFileSync(imagePath);
|
|
7
|
-
const bytesBase64Encoded = buffer.toString("base64");
|
|
8
5
|
const payload = {
|
|
9
6
|
instances: [
|
|
10
7
|
{
|
|
11
8
|
prompt: prompt,
|
|
12
|
-
image:
|
|
13
|
-
bytesBase64Encoded,
|
|
14
|
-
mimeType: "image/png",
|
|
15
|
-
},
|
|
9
|
+
image: undefined,
|
|
16
10
|
},
|
|
17
11
|
],
|
|
18
12
|
parameters: {
|
|
19
13
|
sampleCount: 1,
|
|
20
14
|
aspectRatio: aspectRatio,
|
|
21
|
-
|
|
15
|
+
safetySetting: "block_only_high",
|
|
16
|
+
personGeneration: "allow_all",
|
|
22
17
|
durationSeconds: duration,
|
|
23
18
|
},
|
|
24
19
|
};
|
|
20
|
+
if (imagePath) {
|
|
21
|
+
const buffer = readFileSync(imagePath);
|
|
22
|
+
const bytesBase64Encoded = buffer.toString("base64");
|
|
23
|
+
payload.instances[0].image = {
|
|
24
|
+
bytesBase64Encoded,
|
|
25
|
+
mimeType: "image/png",
|
|
26
|
+
};
|
|
27
|
+
}
|
|
25
28
|
// Make the API call using fetch
|
|
26
29
|
const response = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:predictLongRunning`, {
|
|
27
30
|
method: "POST",
|
|
@@ -32,6 +35,7 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
32
35
|
body: JSON.stringify(payload),
|
|
33
36
|
});
|
|
34
37
|
if (!response.ok) {
|
|
38
|
+
GraphAILogger.info("create project on google cloud console and setup the project. More details see readme.");
|
|
35
39
|
throw new Error(`Error: ${response.status} - ${response.statusText}`);
|
|
36
40
|
}
|
|
37
41
|
const initialResponse = await response.json();
|
|
@@ -72,18 +76,22 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
|
|
|
72
76
|
}
|
|
73
77
|
return undefined;
|
|
74
78
|
}
|
|
79
|
+
export const getAspectRatio = (canvasSize) => {
|
|
80
|
+
if (canvasSize.width > canvasSize.height) {
|
|
81
|
+
return "16:9";
|
|
82
|
+
}
|
|
83
|
+
else if (canvasSize.width < canvasSize.height) {
|
|
84
|
+
return "9:16";
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
return "1:1";
|
|
88
|
+
}
|
|
89
|
+
};
|
|
75
90
|
export const movieGoogleAgent = async ({ namedInputs, params, config }) => {
|
|
76
91
|
const { prompt, imagePath } = namedInputs;
|
|
77
|
-
|
|
78
|
-
if (prompt) {
|
|
79
|
-
const buffer = Buffer.from(prompt);
|
|
80
|
-
return { buffer };
|
|
81
|
-
}
|
|
82
|
-
*/
|
|
83
|
-
const aspectRatio = params.aspectRatio ?? "16:9";
|
|
92
|
+
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
84
93
|
const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
|
|
85
94
|
const duration = params.duration ?? 8;
|
|
86
|
-
//const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
|
|
87
95
|
const projectId = config?.projectId;
|
|
88
96
|
const token = config?.token;
|
|
89
97
|
try {
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { GraphAILogger } from "graphai";
|
|
2
|
+
export const ttsElevenlabsAgent = async ({ namedInputs, params }) => {
|
|
3
|
+
const { text } = namedInputs;
|
|
4
|
+
const { voice, model, stability, similarityBoost, suppressError } = params;
|
|
5
|
+
const apiKey = process.env.ELEVENLABS_API_KEY;
|
|
6
|
+
if (!apiKey) {
|
|
7
|
+
throw new Error("ELEVENLABS_API_KEY environment variable is required");
|
|
8
|
+
}
|
|
9
|
+
if (!voice) {
|
|
10
|
+
throw new Error("Voice ID is required");
|
|
11
|
+
}
|
|
12
|
+
try {
|
|
13
|
+
const requestBody = {
|
|
14
|
+
text,
|
|
15
|
+
model_id: model ?? "eleven_monolingual_v1",
|
|
16
|
+
voice_settings: {
|
|
17
|
+
stability: stability ?? 0.5,
|
|
18
|
+
similarity_boost: similarityBoost ?? 0.75,
|
|
19
|
+
},
|
|
20
|
+
};
|
|
21
|
+
GraphAILogger.log("ElevenLabs TTS options", requestBody);
|
|
22
|
+
const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voice}`, {
|
|
23
|
+
method: "POST",
|
|
24
|
+
headers: {
|
|
25
|
+
Accept: "audio/mpeg",
|
|
26
|
+
"Content-Type": "application/json",
|
|
27
|
+
"xi-api-key": apiKey,
|
|
28
|
+
},
|
|
29
|
+
body: JSON.stringify(requestBody),
|
|
30
|
+
});
|
|
31
|
+
if (!response.ok) {
|
|
32
|
+
throw new Error(`Eleven Labs API error: ${response.status} ${response.statusText}`);
|
|
33
|
+
}
|
|
34
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
35
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
36
|
+
return { buffer };
|
|
37
|
+
}
|
|
38
|
+
catch (e) {
|
|
39
|
+
if (suppressError) {
|
|
40
|
+
return {
|
|
41
|
+
error: e,
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
GraphAILogger.info(e);
|
|
45
|
+
throw new Error("TTS Eleven Labs Error");
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
const ttsElevenlabsAgentInfo = {
|
|
49
|
+
name: "ttsElevenlabsAgent",
|
|
50
|
+
agent: ttsElevenlabsAgent,
|
|
51
|
+
mock: ttsElevenlabsAgent,
|
|
52
|
+
samples: [],
|
|
53
|
+
description: "Eleven Labs TTS agent",
|
|
54
|
+
category: ["tts"],
|
|
55
|
+
author: "Receptron Team",
|
|
56
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
57
|
+
license: "MIT",
|
|
58
|
+
environmentVariables: ["ELEVENLABS_API_KEY"],
|
|
59
|
+
};
|
|
60
|
+
export default ttsElevenlabsAgentInfo;
|
|
@@ -44,7 +44,7 @@ const ttsGoogleAgentInfo = {
|
|
|
44
44
|
description: "Google TTS agent",
|
|
45
45
|
category: ["tts"],
|
|
46
46
|
author: "Receptron Team",
|
|
47
|
-
repository: "https://github.com/receptron/
|
|
47
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
48
48
|
license: "MIT",
|
|
49
49
|
environmentVariables: ["OPENAI_API_KEY"],
|
|
50
50
|
};
|
|
@@ -57,8 +57,9 @@ const ttsNijivoiceAgentInfo = {
|
|
|
57
57
|
samples: [],
|
|
58
58
|
description: "TTS nijivoice agent",
|
|
59
59
|
category: ["tts"],
|
|
60
|
-
author: "
|
|
61
|
-
repository: "https://github.com/receptron/
|
|
60
|
+
author: "Receptron Team",
|
|
61
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
62
62
|
license: "MIT",
|
|
63
|
+
environmentVariables: ["NIJIVOICE_API_KEY"],
|
|
63
64
|
};
|
|
64
65
|
export default ttsNijivoiceAgentInfo;
|