npm - mulmocast - Versions diffs - 0.0.10 → 0.0.12 - Mend

mulmocast 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/README.md +18 -3
package/assets/templates/ghibli_shorts.json +34 -0
package/assets/templates/shorts.json +18 -0
package/assets/templates/trailer.json +25 -0
package/lib/actions/audio.d.ts +2 -1
package/lib/actions/audio.js +35 -17
package/lib/actions/captions.js +5 -5
package/lib/actions/images.d.ts +2 -1
package/lib/actions/images.js +90 -58
package/lib/actions/movie.js +53 -16
package/lib/actions/pdf.js +3 -3
package/lib/actions/translate.d.ts +2 -1
package/lib/actions/translate.js +21 -16
package/lib/agents/combine_audio_files_agent.js +4 -0
package/lib/agents/image_google_agent.d.ts +4 -1
package/lib/agents/image_google_agent.js +3 -2
package/lib/agents/image_openai_agent.d.ts +5 -3
package/lib/agents/image_openai_agent.js +35 -7
package/lib/agents/index.d.ts +2 -1
package/lib/agents/index.js +2 -1
package/lib/agents/movie_google_agent.d.ts +9 -2
package/lib/agents/movie_google_agent.js +24 -16
package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
package/lib/agents/tts_elevenlabs_agent.js +60 -0
package/lib/agents/tts_google_agent.js +1 -1
package/lib/agents/tts_nijivoice_agent.js +3 -2
package/lib/agents/tts_openai_agent.js +1 -1
package/lib/cli/commands/audio/handler.js +4 -1
package/lib/cli/commands/image/handler.js +4 -1
package/lib/cli/commands/movie/handler.js +4 -1
package/lib/cli/commands/pdf/handler.js +4 -1
package/lib/cli/commands/translate/handler.js +4 -1
package/lib/cli/helpers.d.ts +3 -3
package/lib/cli/helpers.js +38 -20
package/lib/index.d.ts +5 -0
package/lib/index.js +5 -0
package/lib/methods/mulmo_media_source.d.ts +1 -0
package/lib/methods/mulmo_media_source.js +12 -0
package/lib/methods/mulmo_script.d.ts +1 -1
package/lib/methods/mulmo_script.js +9 -5
package/lib/methods/mulmo_studio_context.d.ts +5 -0
package/lib/methods/mulmo_studio_context.js +23 -0
package/lib/types/index.d.ts +1 -0
package/lib/types/index.js +1 -0
package/lib/types/schema.d.ts +1513 -290
package/lib/types/schema.js +26 -35
package/lib/types/type.d.ts +4 -1
package/lib/utils/file.d.ts +5 -15
package/lib/utils/file.js +14 -21
package/lib/utils/filters.js +4 -4
package/lib/utils/image_plugins/beat.d.ts +4 -0
package/lib/utils/image_plugins/beat.js +7 -0
package/lib/utils/image_plugins/image.d.ts +1 -1
package/lib/utils/image_plugins/index.d.ts +2 -1
package/lib/utils/image_plugins/index.js +2 -1
package/lib/utils/image_plugins/movie.d.ts +1 -1
package/lib/utils/image_plugins/source.js +2 -2
package/lib/utils/preprocess.d.ts +26 -23
package/lib/utils/preprocess.js +4 -0
package/package.json +8 -8
package/scripts/templates/movie_prompts_no_text_template.json +50 -0
package/scripts/templates/shorts_template.json +52 -0

package/lib/actions/movie.js CHANGED Viewed

@@ -1,8 +1,9 @@
-import { GraphAILogger } from "graphai";
+import { GraphAILogger, assert } from "graphai";
+import { mulmoTransitionSchema } from "../types/index.js";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { getAudioArtifactFilePath, getOutputVideoFilePath, writingMessage } from "../utils/file.js";
 import { FfmpegContextAddInput, FfmpegContextInit, FfmpegContextPushFormattedAudio, FfmpegContextGenerateOutput } from "../utils/ffmpeg_utils.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 // const isMac = process.platform === "darwin";
 const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
 export const getVideoPart = (inputIndex, mediaType, duration, canvasInfo) => {
@@ -38,10 +39,10 @@ export const getAudioPart = (inputIndex, duration, delay, mixAudio) => {
             `[${audioId}]`,
     };
 };
-const getOutputOption = (audioId) => {
+const getOutputOption = (audioId, videoId) => {
     return [
         "-preset medium", // Changed from veryfast to medium for better compression
-        "-map [v]", // Map the video stream
+        `-map [${videoId}]`, // Map the video stream
         `-map ${audioId}`, // Map the audio stream
         `-c:v ${videoCodec}`, // Set video codec
         ...(videoCodec === "libx264" ? ["-crf", "26"] : []), // Add CRF for libx264
@@ -61,20 +62,27 @@ const getOutputOption = (audioId) => {
 const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, caption) => {
     const start = performance.now();
     const ffmpegContext = FfmpegContextInit();
-    if (studio.beats.some((beat) => !beat.imageFile)) {
-        GraphAILogger.info("beat.imageFile is not set. Please run `yarn run images ${file}` ");
-        return;
+    const missingIndex = studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
+    if (missingIndex !== -1) {
+        GraphAILogger.info(`ERROR: beat.imageFile or beat.movieFile is not set on beat ${missingIndex}.`);
+        return false;
     }
     const canvasInfo = MulmoScriptMethods.getCanvasSize(studio.script);
     // Add each image input
     const filterComplexVideoIds = [];
     const filterComplexAudioIds = [];
+    const transitionVideoIds = [];
+    const beatTimestamps = [];
     studio.beats.reduce((timestamp, studioBeat, index) => {
         const beat = studio.script.beats[index];
-        if (!studioBeat.imageFile || !studioBeat.duration) {
-            throw new Error(`studioBeat.imageFile or studioBeat.duration is not set: index=${index}`);
+        const sourceFile = studioBeat.movieFile ?? studioBeat.imageFile;
+        if (!sourceFile) {
+            throw new Error(`studioBeat.imageFile or studioBeat.movieFile is not set: index=${index}`);
         }
-        const inputIndex = FfmpegContextAddInput(ffmpegContext, studioBeat.movieFile ?? studioBeat.imageFile);
+        if (!studioBeat.duration) {
+            throw new Error(`studioBeat.duration is not set: index=${index}`);
+        }
+        const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
         const mediaType = studioBeat.movieFile ? "movie" : MulmoScriptMethods.getImageType(studio.script, beat);
         const extraPadding = (() => {
             // We need to consider only intro and outro padding because the other paddings were already added to the beat.duration
@@ -98,16 +106,43 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
         else {
             filterComplexVideoIds.push(videoId);
         }
+        if (studio.script.movieParams?.transition && index < studio.beats.length - 1) {
+            const sourceId = filterComplexVideoIds.pop();
+            ffmpegContext.filterComplex.push(`[${sourceId}]split=2[${sourceId}_0][${sourceId}_1]`);
+            filterComplexVideoIds.push(`${sourceId}_0`);
+            transitionVideoIds.push(`${sourceId}_1`);
+        }
         if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0) {
             const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
             filterComplexAudioIds.push(audioId);
             ffmpegContext.filterComplex.push(audioPart);
         }
+        beatTimestamps.push(timestamp);
         return timestamp + duration;
     }, 0);
+    assert(filterComplexVideoIds.length === studio.beats.length, "videoIds.length !== studio.beats.length");
+    assert(beatTimestamps.length === studio.beats.length, "beatTimestamps.length !== studio.beats.length");
     // console.log("*** images", images.audioIds);
     // Concatenate the trimmed images
-    ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[v]`);
+    const concatVideoId = "concat_video";
+    ffmpegContext.filterComplex.push(`${filterComplexVideoIds.map((id) => `[${id}]`).join("")}concat=n=${studio.beats.length}:v=1:a=0[${concatVideoId}]`);
+    // Add tranditions if needed
+    const mixedVideoId = (() => {
+        if (studio.script.movieParams?.transition && transitionVideoIds.length > 1) {
+            const transition = mulmoTransitionSchema.parse(studio.script.movieParams.transition);
+            return transitionVideoIds.reduce((acc, transitionVideoId, index) => {
+                const transitionStartTime = beatTimestamps[index + 1] - 0.05; // 0.05 is to avoid flickering
+                const processedVideoId = `${transitionVideoId}_f`;
+                // TODO: This mechanism does not work for video beats yet. It works only with image beats.
+                // If we can to add other transition types than fade, we need to add them here.
+                ffmpegContext.filterComplex.push(`[${transitionVideoId}]format=yuva420p,fade=t=out:d=${transition.duration}:alpha=1,setpts=PTS-STARTPTS+${transitionStartTime}/TB[${processedVideoId}]`);
+                const outputId = `${transitionVideoId}_o`;
+                ffmpegContext.filterComplex.push(`[${acc}][${processedVideoId}]overlay=enable='between(t,${transitionStartTime},${transitionStartTime + transition.duration})'[${outputId}]`);
+                return outputId;
+            }, concatVideoId);
+        }
+        return concatVideoId;
+    })();
     const audioIndex = FfmpegContextAddInput(ffmpegContext, audioArtifactFilePath); // Add audio input
     const artifactAudioId = `${audioIndex}:a`;
     const ffmpegContextAudioId = (() => {
@@ -121,23 +156,25 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, studio, capti
         }
         return artifactAudioId;
     })();
-    await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId));
+    await FfmpegContextGenerateOutput(ffmpegContext, outputVideoPath, getOutputOption(ffmpegContextAudioId, mixedVideoId));
     const end = performance.now();
     GraphAILogger.info(`Video created successfully! ${Math.round(end - start) / 1000} sec`);
     GraphAILogger.info(studio.script.title);
     GraphAILogger.info((studio.script.references ?? []).map((reference) => `${reference.title} (${reference.url})`).join("\n"));
+    return true;
 };
 export const movie = async (context) => {
-    MulmoStudioMethods.setSessionState(context.studio, "video", true);
+    MulmoStudioContextMethods.setSessionState(context, "video", true);
     try {
         const { studio, fileDirs, caption } = context;
         const { outDirPath } = fileDirs;
         const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, studio.filename);
         const outputVideoPath = getOutputVideoFilePath(outDirPath, studio.filename, context.lang, caption);
-        await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption);
-        writingMessage(outputVideoPath);
+        if (await createVideo(audioArtifactFilePath, outputVideoPath, studio, caption)) {
+            writingMessage(outputVideoPath);
+        }
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "video", false);
+        MulmoStudioContextMethods.setSessionState(context, "video", false);
     }
 };

package/lib/actions/pdf.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { chunkArray, isHttp, localizedText } from "../utils/utils.js";
 import { getOutputPdfFilePath, writingMessage } from "../utils/file.js";
 import { MulmoScriptMethods } from "../methods/index.js";
 import { fontSize, textMargin, drawSize, wrapText } from "../utils/pdf.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 const imagesPerPage = 4;
 const offset = 10;
 const handoutImageRatio = 0.5;
@@ -224,10 +224,10 @@ const generatePdf = async (context, pdfMode, pdfSize) => {
 };
 export const pdf = async (context, pdfMode, pdfSize) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "pdf", true);
+        MulmoStudioContextMethods.setSessionState(context, "pdf", true);
         await generatePdf(context, pdfMode, pdfSize);
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "pdf", false);
+        MulmoStudioContextMethods.setSessionState(context, "pdf", false);
     }
 };

package/lib/actions/translate.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 import "dotenv/config";
+import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext } from "../types/index.js";
-export declare const translate: (context: MulmoStudioContext) => Promise<void>;
+export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;

package/lib/actions/translate.js CHANGED Viewed

@@ -6,19 +6,19 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
 import { getOutputStudioFilePath, mkdir, writingMessage } from "../utils/file.js";
 import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
-import { MulmoStudioMethods } from "../methods/mulmo_studio.js";
+import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 const vanillaAgents = agents.default ?? agents;
 const translateGraph = {
     version: 0.5,
     nodes: {
-        studio: {},
+        context: {},
         defaultLang: {},
         outDirPath: {},
         outputStudioFilePath: {},
         lang: {
             agent: "stringUpdateTextAgent",
             inputs: {
-                newText: ":studio.script.lang",
+                newText: ":context.studio.script.lang",
                 oldText: ":defaultLang",
             },
         },
@@ -27,15 +27,15 @@ const translateGraph = {
             isResult: true,
             agent: "mergeObjectAgent",
             inputs: {
-                items: [":studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
+                items: [":context.studio", { multiLingual: ":beatsMap.mergeMultiLingualData" }],
             },
         },
         beatsMap: {
             agent: "mapAgent",
             inputs: {
                 targetLangs: ":targetLangs",
-                studio: ":studio",
-                rows: ":studio.script.beats",
+                context: ":context",
+                rows: ":context.studio.script.beats",
                 lang: ":lang",
             },
             params: {
@@ -52,7 +52,7 @@ const translateGraph = {
                         },
                         inputs: {
                             index: ":__mapIndex",
-                            rows: ":studio.multiLingual",
+                            rows: ":context.studio.multiLingual",
                         },
                     },
                     preprocessMultiLingual: {
@@ -62,7 +62,7 @@ const translateGraph = {
                             multiLingual: ":multiLingual",
                             rows: ":targetLangs",
                             lang: ":lang.text",
-                            studio: ":studio",
+                            context: ":context",
                             beatIndex: ":__mapIndex",
                         },
                         params: {
@@ -79,7 +79,7 @@ const translateGraph = {
                                         multiLingual: ":multiLingual", // for cache
                                         lang: ":lang", // for cache
                                         beatIndex: ":beatIndex", // for cache
-                                        studio: ":studio", // for cache
+                                        mulmoContext: ":context", // for cache
                                         system: translateSystemPrompt,
                                         prompt: translatePrompts,
                                     },
@@ -175,7 +175,7 @@ const translateGraph = {
 };
 const localizedTextCacheAgentFilter = async (context, next) => {
     const { namedInputs } = context;
-    const { studio, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
+    const { mulmoContext, targetLang, beat, beatIndex, lang, multiLingual } = namedInputs;
     if (!beat.text) {
         return { text: "" };
     }
@@ -192,11 +192,11 @@ const localizedTextCacheAgentFilter = async (context, next) => {
         return { text: beat.text };
     }
     try {
-        MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, true);
+        MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
         return await next(context);
     }
     finally {
-        MulmoStudioMethods.setBeatSessionState(studio, "multiLingual", beatIndex, false);
+        MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, false);
     }
 };
 const agentFilters = [
@@ -208,20 +208,25 @@ const agentFilters = [
 ];
 const defaultLang = "en";
 const targetLangs = ["ja", "en"];
-export const translate = async (context) => {
+export const translate = async (context, callbacks) => {
     try {
-        MulmoStudioMethods.setSessionState(context.studio, "multiLingual", true);
+        MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
         const { studio, fileDirs } = context;
         const { outDirPath } = fileDirs;
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, studio.filename);
         mkdir(outDirPath);
         assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
         const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
-        graph.injectValue("studio", studio);
+        graph.injectValue("context", context);
         graph.injectValue("defaultLang", defaultLang);
         graph.injectValue("targetLangs", targetLangs);
         graph.injectValue("outDirPath", outDirPath);
         graph.injectValue("outputStudioFilePath", outputStudioFilePath);
+        if (callbacks) {
+            callbacks.forEach((callback) => {
+                graph.registerCallback(callback);
+            });
+        }
         const results = await graph.run();
         writingMessage(outputStudioFilePath);
         if (results.mergeStudioResult) {
@@ -229,6 +234,6 @@ export const translate = async (context) => {
         }
     }
     finally {
-        MulmoStudioMethods.setSessionState(context.studio, "multiLingual", false);
+        MulmoStudioContextMethods.setSessionState(context, "multiLingual", false);
     }
 };

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -26,11 +26,15 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
             const totalPadding = await (async () => {
                 if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
                     const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
+                    // NOTE: We respect the duration of the movie, only if the movie is specified as a madia source, NOT generated.
                     const movieDuration = await ffmpegGetMediaDuration(pathOrUrl);
                     if (movieDuration > audioDuration) {
                         return padding + (movieDuration - audioDuration);
                     }
                 }
+                else if (beat.duration && beat.duration > audioDuration) {
+                    return padding + (beat.duration - audioDuration);
+                }
                 return padding;
             })();
             studioBeat.duration = audioDuration + totalPadding;

package/lib/agents/image_google_agent.d.ts CHANGED Viewed

@@ -5,7 +5,10 @@ export type ImageGoogleConfig = {
 };
 export declare const imageGoogleAgent: AgentFunction<{
     model: string;
-    aspectRatio: string;
+    canvasSize: {
+        width: number;
+        height: number;
+    };
 }, {
     buffer: Buffer;
 }, {

package/lib/agents/image_google_agent.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { GraphAILogger } from "graphai";
+import { getAspectRatio } from "./movie_google_agent.js";
 async function generateImage(projectId, model, token, prompt, aspectRatio) {
     const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
     try {
@@ -50,9 +51,9 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
         throw error;
     }
 }
-export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
+export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
     const { prompt } = namedInputs;
-    const aspectRatio = params.aspectRatio ?? "16:9";
+    const aspectRatio = getAspectRatio(params.canvasSize);
     const model = params.model ?? "imagen-3.0-fast-generate-001";
     //const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
     const projectId = config?.projectId;

package/lib/agents/image_openai_agent.d.ts CHANGED Viewed

@@ -1,16 +1,18 @@
 import { AgentFunction, AgentFunctionInfo } from "graphai";
-type OpenAIImageSize = "1792x1024" | "auto" | "1024x1024" | "1536x1024" | "1024x1536" | "256x256";
 type OpenAIModeration = "low" | "auto";
 export declare const imageOpenaiAgent: AgentFunction<{
     apiKey: string;
     model: string;
-    size: OpenAIImageSize | null | undefined;
     moderation: OpenAIModeration | null | undefined;
-    images: string[] | null | undefined;
+    canvasSize: {
+        width: number;
+        height: number;
+    };
 }, {
     buffer: Buffer;
 }, {
     prompt: string;
+    images: string[] | null | undefined;
 }>;
 declare const imageOpenaiAgentInfo: AgentFunctionInfo;
 export default imageOpenaiAgentInfo;

package/lib/agents/image_openai_agent.js CHANGED Viewed

@@ -1,15 +1,41 @@
 import fs from "fs";
+import path from "path";
 import OpenAI, { toFile } from "openai";
 // https://platform.openai.com/docs/guides/image-generation
 export const imageOpenaiAgent = async ({ namedInputs, params }) => {
-    const { prompt } = namedInputs;
-    const { apiKey, model, size, moderation, images } = params;
+    const { prompt, images } = namedInputs;
+    const { apiKey, moderation, canvasSize } = params;
+    const model = params.model ?? "dall-e-3";
     const openai = new OpenAI({ apiKey });
+    const size = (() => {
+        if (model === "gpt-image-1") {
+            if (canvasSize.width > canvasSize.height) {
+                return "1536x1024";
+            }
+            else if (canvasSize.width < canvasSize.height) {
+                return "1024x1536";
+            }
+            else {
+                return "1024x1024";
+            }
+        }
+        else {
+            if (canvasSize.width > canvasSize.height) {
+                return "1792x1024";
+            }
+            else if (canvasSize.width < canvasSize.height) {
+                return "1024x1792";
+            }
+            else {
+                return "1024x1024";
+            }
+        }
+    })();
     const imageOptions = {
-        model: model ?? "dall-e-3",
+        model,
         prompt,
         n: 1,
-        size: size ?? (model === "gpt-image-1" ? "1536x1024" : "1792x1024"),
+        size,
     };
     if (model === "gpt-image-1") {
         imageOptions.moderation = moderation || "auto";
@@ -17,9 +43,11 @@ export const imageOpenaiAgent = async ({ namedInputs, params }) => {
     const response = await (async () => {
         const targetSize = imageOptions.size;
         if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
-            const imagelist = await Promise.all((images ?? []).map(async (file) => await toFile(fs.createReadStream(file), null, {
-                type: "image/png", // TODO: Support JPEG as well
-            })));
+            const imagelist = await Promise.all((images ?? []).map(async (file) => {
+                const ext = path.extname(file).toLowerCase();
+                const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
+                return await toFile(fs.createReadStream(file), null, { type });
+            }));
             return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
         }
         else {

package/lib/agents/index.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
 import combineAudioFilesAgent from "./combine_audio_files_agent.js";
 import imageGoogleAgent from "./image_google_agent.js";
 import imageOpenaiAgent from "./image_openai_agent.js";
+import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
 import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
 import ttsOpenaiAgent from "./tts_openai_agent.js";
 import validateSchemaAgent from "./validate_schema_agent.js";
@@ -9,4 +10,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
 import { textInputAgent } from "@graphai/input_agents";
 import { openAIAgent } from "@graphai/openai_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
+export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };

package/lib/agents/index.js CHANGED Viewed

@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
 import combineAudioFilesAgent from "./combine_audio_files_agent.js";
 import imageGoogleAgent from "./image_google_agent.js";
 import imageOpenaiAgent from "./image_openai_agent.js";
+import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
 import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
 import ttsOpenaiAgent from "./tts_openai_agent.js";
 import validateSchemaAgent from "./validate_schema_agent.js";
@@ -10,4 +11,4 @@ import { textInputAgent } from "@graphai/input_agents";
 import { openAIAgent } from "@graphai/openai_agent";
 // import * as vanilla from "@graphai/vanilla";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
+export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };

package/lib/agents/movie_google_agent.d.ts CHANGED Viewed

@@ -3,15 +3,22 @@ export type MovieGoogleConfig = {
     projectId?: string;
     token?: string;
 };
+export declare const getAspectRatio: (canvasSize: {
+    width: number;
+    height: number;
+}) => string;
 export declare const movieGoogleAgent: AgentFunction<{
     model: string;
-    aspectRatio: string;
+    canvasSize: {
+        width: number;
+        height: number;
+    };
     duration?: number;
 }, {
     buffer: Buffer;
 }, {
     prompt: string;
-    imagePath: string;
+    imagePath?: string;
 }, MovieGoogleConfig>;
 declare const movieGoogleAgentInfo: AgentFunctionInfo;
 export default movieGoogleAgentInfo;

package/lib/agents/movie_google_agent.js CHANGED Viewed

@@ -2,26 +2,29 @@ import { readFileSync } from "fs";
 import { GraphAILogger, sleep } from "graphai";
 async function generateMovie(projectId, model, token, prompt, imagePath, aspectRatio, duration) {
     const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}`;
-    // Prepare the payload for the API request
-    const buffer = readFileSync(imagePath);
-    const bytesBase64Encoded = buffer.toString("base64");
     const payload = {
         instances: [
             {
                 prompt: prompt,
-                image: {
-                    bytesBase64Encoded,
-                    mimeType: "image/png",
-                },
+                image: undefined,
             },
         ],
         parameters: {
             sampleCount: 1,
             aspectRatio: aspectRatio,
-            //safetySetting: "block_only_high",
+            safetySetting: "block_only_high",
+            personGeneration: "allow_all",
             durationSeconds: duration,
         },
     };
+    if (imagePath) {
+        const buffer = readFileSync(imagePath);
+        const bytesBase64Encoded = buffer.toString("base64");
+        payload.instances[0].image = {
+            bytesBase64Encoded,
+            mimeType: "image/png",
+        };
+    }
     // Make the API call using fetch
     const response = await fetch(`${GOOGLE_IMAGEN_ENDPOINT}:predictLongRunning`, {
         method: "POST",
@@ -32,6 +35,7 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
         body: JSON.stringify(payload),
     });
     if (!response.ok) {
+        GraphAILogger.info("create project on google cloud console and setup the project. More details see readme.");
         throw new Error(`Error: ${response.status} - ${response.statusText}`);
     }
     const initialResponse = await response.json();
@@ -72,18 +76,22 @@ async function generateMovie(projectId, model, token, prompt, imagePath, aspectR
     }
     return undefined;
 }
+export const getAspectRatio = (canvasSize) => {
+    if (canvasSize.width > canvasSize.height) {
+        return "16:9";
+    }
+    else if (canvasSize.width < canvasSize.height) {
+        return "9:16";
+    }
+    else {
+        return "1:1";
+    }
+};
 export const movieGoogleAgent = async ({ namedInputs, params, config }) => {
     const { prompt, imagePath } = namedInputs;
-    /*
-    if (prompt) {
-      const buffer = Buffer.from(prompt);
-      return { buffer };
-    }
-    */
-    const aspectRatio = params.aspectRatio ?? "16:9";
+    const aspectRatio = getAspectRatio(params.canvasSize);
     const model = params.model ?? "veo-2.0-generate-001"; // "veo-3.0-generate-preview";
     const duration = params.duration ?? 8;
-    //const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
     const projectId = config?.projectId;
     const token = config?.token;
     try {

package/lib/agents/tts_elevenlabs_agent.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import type { AgentFunction, AgentFunctionInfo } from "graphai";
+export declare const ttsElevenlabsAgent: AgentFunction;
+declare const ttsElevenlabsAgentInfo: AgentFunctionInfo;
+export default ttsElevenlabsAgentInfo;

package/lib/agents/tts_elevenlabs_agent.js ADDED Viewed

@@ -0,0 +1,60 @@
+import { GraphAILogger } from "graphai";
+export const ttsElevenlabsAgent = async ({ namedInputs, params }) => {
+    const { text } = namedInputs;
+    const { voice, model, stability, similarityBoost, suppressError } = params;
+    const apiKey = process.env.ELEVENLABS_API_KEY;
+    if (!apiKey) {
+        throw new Error("ELEVENLABS_API_KEY environment variable is required");
+    }
+    if (!voice) {
+        throw new Error("Voice ID is required");
+    }
+    try {
+        const requestBody = {
+            text,
+            model_id: model ?? "eleven_monolingual_v1",
+            voice_settings: {
+                stability: stability ?? 0.5,
+                similarity_boost: similarityBoost ?? 0.75,
+            },
+        };
+        GraphAILogger.log("ElevenLabs TTS options", requestBody);
+        const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voice}`, {
+            method: "POST",
+            headers: {
+                Accept: "audio/mpeg",
+                "Content-Type": "application/json",
+                "xi-api-key": apiKey,
+            },
+            body: JSON.stringify(requestBody),
+        });
+        if (!response.ok) {
+            throw new Error(`Eleven Labs API error: ${response.status} ${response.statusText}`);
+        }
+        const arrayBuffer = await response.arrayBuffer();
+        const buffer = Buffer.from(arrayBuffer);
+        return { buffer };
+    }
+    catch (e) {
+        if (suppressError) {
+            return {
+                error: e,
+            };
+        }
+        GraphAILogger.info(e);
+        throw new Error("TTS Eleven Labs Error");
+    }
+};
+const ttsElevenlabsAgentInfo = {
+    name: "ttsElevenlabsAgent",
+    agent: ttsElevenlabsAgent,
+    mock: ttsElevenlabsAgent,
+    samples: [],
+    description: "Eleven Labs TTS agent",
+    category: ["tts"],
+    author: "Receptron Team",
+    repository: "https://github.com/receptron/mulmocast-cli/",
+    license: "MIT",
+    environmentVariables: ["ELEVENLABS_API_KEY"],
+};
+export default ttsElevenlabsAgentInfo;

package/lib/agents/tts_google_agent.js CHANGED Viewed

@@ -44,7 +44,7 @@ const ttsGoogleAgentInfo = {
     description: "Google TTS agent",
     category: ["tts"],
     author: "Receptron Team",
-    repository: "https://github.com/receptron/graphai-agents/tree/main/tts/tts-openai-agent",
+    repository: "https://github.com/receptron/mulmocast-cli/",
     license: "MIT",
     environmentVariables: ["OPENAI_API_KEY"],
 };

package/lib/agents/tts_nijivoice_agent.js CHANGED Viewed

@@ -57,8 +57,9 @@ const ttsNijivoiceAgentInfo = {
     samples: [],
     description: "TTS nijivoice agent",
     category: ["tts"],
-    author: "isamu arimoto",
-    repository: "https://github.com/receptron/graphai/",
+    author: "Receptron Team",
+    repository: "https://github.com/receptron/mulmocast-cli/",
     license: "MIT",
+    environmentVariables: ["NIJIVOICE_API_KEY"],
 };
 export default ttsNijivoiceAgentInfo;