npm - mulmocast - Versions diffs - 0.1.2 → 0.1.4 - Mend

mulmocast 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/assets/templates/characters.json +16 -0
package/assets/templates/html.json +6 -0
package/lib/actions/audio.js +13 -19
package/lib/actions/image_agents.d.ts +145 -0
package/lib/actions/image_agents.js +59 -0
package/lib/actions/image_references.d.ts +9 -0
package/lib/actions/image_references.js +79 -0
package/lib/actions/images.d.ts +17 -109
package/lib/actions/images.js +83 -188
package/lib/actions/index.d.ts +2 -0
package/lib/actions/index.js +2 -0
package/lib/actions/movie.js +3 -1
package/lib/actions/pdf.js +5 -2
package/lib/agents/image_google_agent.d.ts +2 -15
package/lib/agents/image_google_agent.js +5 -5
package/lib/agents/image_openai_agent.d.ts +2 -17
package/lib/agents/image_openai_agent.js +9 -9
package/lib/agents/movie_google_agent.d.ts +2 -17
package/lib/agents/movie_google_agent.js +7 -7
package/lib/agents/movie_replicate_agent.d.ts +2 -16
package/lib/agents/movie_replicate_agent.js +4 -4
package/lib/agents/tts_google_agent.d.ts +9 -1
package/lib/agents/tts_google_agent.js +2 -2
package/lib/agents/tts_nijivoice_agent.js +1 -1
package/lib/agents/tts_openai_agent.d.ts +13 -1
package/lib/agents/tts_openai_agent.js +2 -2
package/lib/cli/helpers.js +7 -7
package/lib/index.d.ts +1 -0
package/lib/index.js +1 -0
package/lib/methods/index.d.ts +1 -0
package/lib/methods/index.js +1 -0
package/lib/methods/mulmo_beat.d.ts +6 -0
package/lib/methods/mulmo_beat.js +21 -0
package/lib/methods/mulmo_presentation_style.d.ts +3 -1
package/lib/methods/mulmo_presentation_style.js +31 -7
package/lib/methods/mulmo_studio_context.js +3 -0
package/lib/tools/story_to_script.js +2 -2
package/lib/types/agent.d.ts +55 -0
package/lib/types/agent.js +3 -0
package/lib/types/schema.d.ts +560 -296
package/lib/types/schema.js +19 -10
package/lib/types/type.d.ts +3 -2
package/lib/utils/const.d.ts +0 -1
package/lib/utils/const.js +0 -1
package/lib/utils/context.d.ts +24 -13
package/lib/utils/context.js +1 -0
package/lib/utils/ffmpeg_utils.d.ts +1 -1
package/lib/utils/ffmpeg_utils.js +1 -1
package/lib/utils/file.js +4 -4
package/lib/utils/filters.js +3 -4
package/lib/utils/markdown.js +1 -1
package/lib/utils/preprocess.d.ts +15 -8
package/lib/utils/provider2agent.d.ts +72 -0
package/lib/utils/provider2agent.js +81 -0
package/lib/utils/string.js +5 -5
package/lib/utils/utils.d.ts +13 -11
package/lib/utils/utils.js +56 -62
package/package.json +7 -6
package/scripts/templates/html.json +42 -0
package/scripts/templates/image_refs.json +35 -0

package/lib/actions/images.js CHANGED Viewed

@@ -1,86 +1,38 @@
 import dotenv from "dotenv";
 import fs from "fs";
 import { GraphAI, GraphAILogger, TaskManager } from "graphai";
-import * as agents from "@graphai/vanilla";
+import { GoogleAuth } from "google-auth-library";
+import * as vanilla from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
 import { anthropicAgent } from "@graphai/anthropic_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
-import { getOutputStudioFilePath, getBeatPngImagePath, getBeatMoviePath, getReferenceImagePath, mkdir } from "../utils/file.js";
-import { fileCacheAgentFilter } from "../utils/filters.js";
 import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
 import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
-import { findImagePlugin } from "../utils/image_plugins/index.js";
-import { userAssert, settings2GraphAIConfig, getExtention } from "../utils/utils.js";
-import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
-import { defaultOpenAIImageModel } from "../utils/const.js";
-import { renderHTMLToImage } from "../utils/markdown.js";
-const vanillaAgents = agents.default ?? agents;
-dotenv.config();
-import { GoogleAuth } from "google-auth-library";
+import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
+import { fileCacheAgentFilter } from "../utils/filters.js";
+import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
 import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
-const htmlStyle = (context, beat) => {
-    return {
-        canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
-        textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
-    };
+import { getImageRefs } from "./image_references.js";
+import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
+const vanillaAgents = vanilla.default ?? vanilla;
+const imageAgents = {
+    imageGoogleAgent,
+    imageOpenaiAgent,
 };
-export const imagePreprocessAgent = async (namedInputs) => {
-    const { context, beat, index, imageRefs } = namedInputs;
-    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
-    // const imageParams = { ...imageAgentInfo.imageParams, ...beat.imageParams };
-    const imagePath = getBeatPngImagePath(context, index);
-    const returnValue = {
-        imageParams: imageAgentInfo.imageParams,
-        movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
-    };
-    if (beat.image) {
-        const plugin = findImagePlugin(beat?.image?.type);
-        if (!plugin) {
-            throw new Error(`invalid beat image type: ${beat.image}`);
-        }
-        const path = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
-        // undefined prompt indicates that image generation is not needed
-        return { imagePath: path, referenceImage: path, ...returnValue };
-    }
-    if (beat.htmlPrompt) {
-        const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
-        const htmlPath = imagePath.replace(/\.[^/.]+$/, ".html");
-        return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
-    }
-    // images for "edit_image"
-    const images = (() => {
-        const imageNames = beat.imageNames ?? Object.keys(imageRefs); // use all images if imageNames is not specified
-        const sources = imageNames.map((name) => imageRefs[name]);
-        return sources.filter((source) => source !== undefined);
-    })();
-    if (beat.moviePrompt && !beat.imagePrompt) {
-        return { ...returnValue, imagePath, images, imageFromMovie: true }; // no image prompt, only movie prompt
-    }
-    const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
-    return { imageAgentInfo, imagePath, referenceImage: imagePath, prompt, ...returnValue, images };
-};
-export const imagePluginAgent = async (namedInputs) => {
-    const { context, beat, index } = namedInputs;
-    const imagePath = getBeatPngImagePath(context, index);
-    const plugin = findImagePlugin(beat?.image?.type);
-    if (!plugin) {
-        throw new Error(`invalid beat image type: ${beat.image}`);
-    }
-    try {
-        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
-        const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
-        await plugin.process(processorParams);
-        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
-    }
-    catch (error) {
-        MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
-        throw error;
-    }
+const movieAgents = {
+    movieGoogleAgent,
+    movieReplicateAgent,
 };
-const htmlImageGeneratorAgent = async (namedInputs) => {
-    const { file, canvasSize, htmlText } = namedInputs;
-    await renderHTMLToImage(htmlText, file, canvasSize.width, canvasSize.height);
+const defaultAgents = {
+    ...vanillaAgents,
+    ...imageAgents,
+    ...movieAgents,
+    mediaMockAgent,
+    fileWriteAgent,
+    openAIAgent,
+    anthropicAgent,
 };
+dotenv.config();
 const beat_graph_data = {
     version: 0.5,
     concurrency: 4,
@@ -91,6 +43,8 @@ const beat_graph_data = {
         imageRefs: {},
         beat: {},
         __mapIndex: {},
+        forceMovie: { value: false },
+        forceImage: { value: false },
         preprocessor: {
             agent: imagePreprocessAgent,
             inputs: {
@@ -108,7 +62,7 @@ const beat_graph_data = {
                 context: ":context",
                 beat: ":beat",
                 index: ":__mapIndex",
-                onComplete: ":preprocessor",
+                onComplete: [":preprocessor"],
             },
         },
         htmlImageAgent: {
@@ -122,10 +76,13 @@ const beat_graph_data = {
                     model: ":htmlImageAgentInfo.model",
                     max_tokens: ":htmlImageAgentInfo.max_tokens",
                 },
-                file: ":preprocessor.htmlPath", // only for fileCacheAgentFilter
-                mulmoContext: ":context", // for fileCacheAgentFilter
-                index: ":__mapIndex", // for fileCacheAgentFilter
-                sessionType: "html", // for fileCacheAgentFilter
+                cache: {
+                    force: [":context.force", ":forceImage"],
+                    file: ":preprocessor.htmlPath",
+                    index: ":__mapIndex",
+                    mulmoContext: ":context",
+                    sessionType: "html",
+                },
             },
         },
         htmlReader: {
@@ -135,7 +92,7 @@ const beat_graph_data = {
                 return { html };
             },
             inputs: {
-                onComplete: ":htmlImageAgent", // to wait for htmlImageAgent to finish
+                onComplete: [":htmlImageAgent"], // to wait for htmlImageAgent to finish
                 htmlPath: ":preprocessor.htmlPath",
             },
             output: {
@@ -150,10 +107,7 @@ const beat_graph_data = {
             inputs: {
                 htmlText: ":htmlReader.htmlText",
                 canvasSize: ":context.presentationStyle.canvasSize",
-                file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
-                mulmoContext: ":context", // for fileCacheAgentFilter
-                index: ":__mapIndex", // for fileCacheAgentFilter
-                sessionType: "image", // for fileCacheAgentFilter
+                file: ":preprocessor.imagePath",
             },
         },
         imageGenerator: {
@@ -162,12 +116,14 @@ const beat_graph_data = {
             retry: 2,
             inputs: {
                 prompt: ":preprocessor.prompt",
-                images: ":preprocessor.images",
-                file: ":preprocessor.imagePath", // only for fileCacheAgentFilter
-                force: ":context.force", // only for fileCacheAgentFilter
-                mulmoContext: ":context", // for fileCacheAgentFilter
-                index: ":__mapIndex", // for fileCacheAgentFilter
-                sessionType: "image", // for fileCacheAgentFilter
+                referenceImages: ":preprocessor.referenceImages",
+                cache: {
+                    force: [":context.force", ":forceImage"],
+                    file: ":preprocessor.imagePath",
+                    index: ":__mapIndex",
+                    mulmoContext: ":context",
+                    sessionType: "image",
+                },
                 params: {
                     model: ":preprocessor.imageParams.model",
                     moderation: ":preprocessor.imageParams.moderation",
@@ -182,14 +138,16 @@ const beat_graph_data = {
             inputs: {
                 onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
                 prompt: ":beat.moviePrompt",
-                imagePath: ":preprocessor.referenceImage",
-                file: ":preprocessor.movieFile",
-                studio: ":context.studio", // for cache
-                mulmoContext: ":context", // for fileCacheAgentFilter
-                index: ":__mapIndex", // for cache
-                sessionType: "movie", // for cache
+                imagePath: ":preprocessor.referenceImageForMovie",
+                cache: {
+                    force: [":context.force", ":forceMovie"],
+                    file: ":preprocessor.movieFile",
+                    index: ":__mapIndex",
+                    sessionType: "movie",
+                    mulmoContext: ":context",
+                },
                 params: {
-                    model: ":context.presentationStyle.movieParams.model",
+                    model: ":preprocessor.movieParams.model",
                     duration: ":beat.duration",
                     canvasSize: ":context.presentationStyle.canvasSize",
                 },
@@ -199,15 +157,14 @@ const beat_graph_data = {
         imageFromMovie: {
             if: ":preprocessor.imageFromMovie",
             agent: async (namedInputs) => {
-                await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
-                return { generatedImage: true };
+                return await extractImageFromMovie(namedInputs.movieFile, namedInputs.imageFile);
             },
             inputs: {
-                onComplete: ":movieGenerator", // to wait for movieGenerator to finish
+                onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
                 imageFile: ":preprocessor.imagePath",
                 movieFile: ":preprocessor.movieFile",
             },
-            defaultValue: { generatedImage: false },
+            defaultValue: {},
         },
         output: {
             agent: "copyAgent",
@@ -307,21 +264,19 @@ const googleAuth = async () => {
         throw error;
     }
 };
-const graphOption = async (context, settings) => {
-    const agentFilters = [
-        {
-            name: "fileCacheAgentFilter",
-            agent: fileCacheAgentFilter,
-            nodeIds: ["imageGenerator", "movieGenerator", "htmlImageGenerator", "htmlImageAgent"],
-        },
-    ];
-    const taskManager = new TaskManager(getConcurrency(context));
+export const graphOption = async (context, settings) => {
     const options = {
-        agentFilters,
-        taskManager,
+        agentFilters: [
+            {
+                name: "fileCacheAgentFilter",
+                agent: fileCacheAgentFilter,
+                nodeIds: ["imageGenerator", "movieGenerator", "htmlImageAgent"],
+            },
+        ],
+        taskManager: new TaskManager(MulmoPresentationStyleMethods.getConcurrency(context.presentationStyle)),
     };
     const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
-    const config = settings2GraphAIConfig(settings);
+    const config = settings2GraphAIConfig(settings, process.env);
     // We need to get google's auth token only if the google is the text2image provider.
     if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
         userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
@@ -339,32 +294,6 @@ const graphOption = async (context, settings) => {
     options.config = config;
     return options;
 };
-// TODO: unit test
-export const getImageRefs = async (context) => {
-    const imageRefs = {};
-    const images = context.presentationStyle.imageParams?.images;
-    if (images) {
-        await Promise.all(Object.keys(images).map(async (key) => {
-            const image = images[key];
-            if (image.source.kind === "path") {
-                imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
-            }
-            else if (image.source.kind === "url") {
-                const response = await fetch(image.source.url);
-                if (!response.ok) {
-                    throw new Error(`Failed to download image: ${image.source.url}`);
-                }
-                const buffer = Buffer.from(await response.arrayBuffer());
-                // Detect file extension from Content-Type header or URL
-                const extension = getExtention(response.headers.get("content-type"), image.source.url);
-                const imagePath = getReferenceImagePath(context, key, extension);
-                await fs.promises.writeFile(imagePath, buffer);
-                imageRefs[key] = imagePath;
-            }
-        }));
-    }
-    return imageRefs;
-};
 const prepareGenerateImages = async (context) => {
     const fileName = MulmoStudioContextMethods.getFileName(context);
     const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
@@ -373,56 +302,26 @@ const prepareGenerateImages = async (context) => {
     const provider = MulmoPresentationStyleMethods.getText2ImageProvider(context.presentationStyle.imageParams?.provider);
     const htmlImageAgentInfo = MulmoPresentationStyleMethods.getHtmlImageAgentInfo(context.presentationStyle);
     const imageRefs = await getImageRefs(context);
-    // Determine movie agent based on provider
-    const getMovieAgent = () => {
-        const provider = context.presentationStyle.movieParams?.provider ?? "google";
-        switch (provider) {
-            case "replicate":
-                return "movieReplicateAgent";
-            case "google":
-            default:
-                return "movieGoogleAgent";
-        }
-    };
     GraphAILogger.info(`text2image: provider=${provider} model=${context.presentationStyle.imageParams?.model}`);
     const injections = {
         context,
         htmlImageAgentInfo,
         movieAgentInfo: {
-            agent: getMovieAgent(),
+            agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
         },
         outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
         imageRefs,
     };
     return injections;
 };
-const getConcurrency = (context) => {
-    if (context.presentationStyle.movieParams?.provider === "replicate") {
-        return 4;
-    }
-    const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
-    if (imageAgentInfo.imageParams.provider === "openai") {
-        // NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
-        // dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
-        // gpt-image-1：3,000,000 TPM、150 images per minute
-        return imageAgentInfo.imageParams.model === defaultOpenAIImageModel ? 4 : 16;
-    }
-    return 4;
-};
-const generateImages = async (context, settings, callbacks) => {
-    const options = await graphOption(context, settings);
+const generateImages = async (context, settings, callbacks, options) => {
+    const optionImageAgents = options?.imageAgents ?? {};
     const injections = await prepareGenerateImages(context);
-    const graph = new GraphAI(graph_data, {
-        ...vanillaAgents,
-        imageGoogleAgent,
-        movieGoogleAgent,
-        movieReplicateAgent,
-        imageOpenaiAgent,
-        mediaMockAgent,
-        fileWriteAgent,
-        openAIAgent,
-        anthropicAgent,
-    }, options);
+    const graphaiAgent = {
+        ...defaultAgents,
+        ...optionImageAgents,
+    };
+    const graph = new GraphAI(graph_data, graphaiAgent, await graphOption(context, settings));
     Object.keys(injections).forEach((key) => {
         graph.injectValue(key, injections[key]);
     });
@@ -434,10 +333,12 @@ const generateImages = async (context, settings, callbacks) => {
     const res = await graph.run();
     return res.mergeResult;
 };
-export const images = async (context, settings, callbacks) => {
+// public api
+export const images = async (context, args) => {
+    const { settings, callbacks, options } = args ?? {};
     try {
         MulmoStudioContextMethods.setSessionState(context, "image", true);
-        const newContext = await generateImages(context, settings, callbacks);
+        const newContext = await generateImages(context, settings, callbacks, options);
         MulmoStudioContextMethods.setSessionState(context, "image", false);
         return newContext;
     }
@@ -446,20 +347,12 @@ export const images = async (context, settings, callbacks) => {
         throw error;
     }
 };
-export const generateBeatImage = async (index, context, settings, callbacks) => {
+// public api
+export const generateBeatImage = async (inputs) => {
+    const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
     const options = await graphOption(context, settings);
     const injections = await prepareGenerateImages(context);
-    const graph = new GraphAI(beat_graph_data, {
-        ...vanillaAgents,
-        imageGoogleAgent,
-        movieGoogleAgent,
-        movieReplicateAgent,
-        imageOpenaiAgent,
-        mediaMockAgent,
-        fileWriteAgent,
-        openAIAgent,
-        anthropicAgent,
-    }, options);
+    const graph = new GraphAI(beat_graph_data, defaultAgents, options);
     Object.keys(injections).forEach((key) => {
         if ("outputStudioFilePath" !== key) {
             graph.injectValue(key, injections[key]);
@@ -467,6 +360,8 @@ export const generateBeatImage = async (index, context, settings, callbacks) =>
     });
     graph.injectValue("__mapIndex", index);
     graph.injectValue("beat", context.studio.script.beats[index]);
+    graph.injectValue("forceMovie", forceMovie ?? false);
+    graph.injectValue("forceImage", forceImage ?? false);
     if (callbacks) {
         callbacks.forEach((callback) => {
             graph.registerCallback(callback);

package/lib/actions/index.d.ts CHANGED Viewed

@@ -1,5 +1,7 @@
 export * from "./audio.js";
 export * from "./images.js";
+export * from "./image_references.js";
+export * from "./image_agents.js";
 export * from "./movie.js";
 export * from "./pdf.js";
 export * from "./translate.js";

package/lib/actions/index.js CHANGED Viewed

@@ -1,5 +1,7 @@
 export * from "./audio.js";
 export * from "./images.js";
+export * from "./image_references.js";
+export * from "./image_agents.js";
 export * from "./movie.js";
 export * from "./pdf.js";
 export * from "./translate.js";

package/lib/actions/movie.js CHANGED Viewed

@@ -218,7 +218,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
     // Concatenate the trimmed images
     const concatVideoId = "concat_video";
     const videoIds = videoIdsForBeats.filter((id) => id !== undefined); // filter out voice-over beats
-    ffmpegContext.filterComplex.push(`${videoIds.map((id) => `[${id}]`).join("")}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`);
+    const inputs = videoIds.map((id) => `[${id}]`).join("");
+    const filter = `${inputs}concat=n=${videoIds.length}:v=1:a=0[${concatVideoId}]`;
+    ffmpegContext.filterComplex.push(filter);
     const captionedVideoId = addCaptions(ffmpegContext, concatVideoId, context, caption);
     const mixedVideoId = addTransitionEffects(ffmpegContext, captionedVideoId, context, transitionVideoIds, beatTimestamps);
     GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));

package/lib/actions/pdf.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import fs from "fs";
 import path from "path";
 import puppeteer from "puppeteer";
+import { GraphAILogger } from "graphai";
 import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { localizedText, isHttp } from "../utils/utils.js";
 import { getOutputPdfFilePath, writingMessage, getHTMLFile } from "../utils/file.js";
@@ -17,7 +18,8 @@ const loadImage = async (imagePath) => {
         const mimeType = ext === "jpg" ? "jpeg" : ext;
         return `data:image/${mimeType};base64,${imageData.toString("base64")}`;
     }
-    catch (__error) {
+    catch (error) {
+        GraphAILogger.info("loadImage failed", error);
         const placeholderData = fs.readFileSync("assets/images/mulmocast_credit.png");
         return `data:image/png;base64,${placeholderData.toString("base64")}`;
     }
@@ -101,7 +103,8 @@ const generatePDFHTML = async (context, pdfMode, pdfSize) => {
     const imagePaths = studio.beats.map((beat) => beat.imageFile);
     const texts = studio.script.beats.map((beat, index) => localizedText(beat, multiLingual?.[index], lang));
     const imageDataUrls = await Promise.all(imagePaths.map(loadImage));
-    const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
+    const defaultPageSize = `${getPdfSize(pdfSize)} ${isLandscapeImage ? "landscape" : "portrait"}`;
+    const pageSize = pdfMode === "handout" ? `${getPdfSize(pdfSize)} portrait` : defaultPageSize;
     const pagesHTML = generatePagesHTML(pdfMode, imageDataUrls, texts);
     const template = getHTMLFile(`pdf_${pdfMode}`);
     const baseTemplateData = {

package/lib/agents/image_google_agent.d.ts CHANGED Viewed

@@ -1,18 +1,5 @@
 import type { AgentFunction, AgentFunctionInfo } from "graphai";
-export type ImageGoogleConfig = {
-    projectId?: string;
-    token?: string;
-};
-export declare const imageGoogleAgent: AgentFunction<{
-    model: string;
-    canvasSize: {
-        width: number;
-        height: number;
-    };
-}, {
-    buffer: Buffer;
-}, {
-    prompt: string;
-}, ImageGoogleConfig>;
+import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GoogleImageAgentConfig } from "../types/agent.js";
+export declare const imageGoogleAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GoogleImageAgentConfig>;
 declare const imageGoogleAgentInfo: AgentFunctionInfo;
 export default imageGoogleAgentInfo;

package/lib/agents/image_google_agent.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { GraphAILogger } from "graphai";
 import { getAspectRatio } from "./movie_google_agent.js";
+import { provider2ImageAgent } from "../utils/provider2agent.js";
 async function generateImage(projectId, model, token, prompt, aspectRatio) {
     const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
     try {
@@ -7,12 +8,12 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
         const payload = {
             instances: [
                 {
-                    prompt: prompt,
+                    prompt,
                 },
             ],
             parameters: {
                 sampleCount: 1,
-                aspectRatio: aspectRatio,
+                aspectRatio,
                 safetySetting: "block_only_high",
             },
         };
@@ -51,11 +52,10 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
         throw error;
     }
 }
-export const imageGoogleAgent = async ({ namedInputs, params, config }) => {
+export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
     const { prompt } = namedInputs;
     const aspectRatio = getAspectRatio(params.canvasSize);
-    const model = params.model ?? "imagen-3.0-fast-generate-001";
-    //const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
+    const model = params.model ?? provider2ImageAgent["google"].defaultModel;
     const projectId = config?.projectId;
     const token = config?.token;
     try {

package/lib/agents/image_openai_agent.d.ts CHANGED Viewed

@@ -1,20 +1,5 @@
 import { AgentFunction, AgentFunctionInfo } from "graphai";
-type OpenAIModeration = "low" | "auto";
-export declare const imageOpenaiAgent: AgentFunction<{
-    apiKey: string;
-    model: string;
-    moderation: OpenAIModeration | null | undefined;
-    canvasSize: {
-        width: number;
-        height: number;
-    };
-}, {
-    buffer: Buffer;
-}, {
-    prompt: string;
-    images: string[] | null | undefined;
-}, {
-    apiKey?: string;
-}>;
+import type { AgentBufferResult, OpenAIImageAgentParams, OpenAIImageAgentInputs, OpenAIImageAgentConfig } from "../types/agent.js";
+export declare const imageOpenaiAgent: AgentFunction<OpenAIImageAgentParams, AgentBufferResult, OpenAIImageAgentInputs, OpenAIImageAgentConfig>;
 declare const imageOpenaiAgentInfo: AgentFunctionInfo;
 export default imageOpenaiAgentInfo;

package/lib/agents/image_openai_agent.js CHANGED Viewed

@@ -2,14 +2,14 @@ import fs from "fs";
 import path from "path";
 import { GraphAILogger } from "graphai";
 import OpenAI, { toFile } from "openai";
-import { defaultOpenAIImageModel } from "../utils/const.js";
+import { provider2ImageAgent } from "../utils/provider2agent.js";
 // https://platform.openai.com/docs/guides/image-generation
-export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
-    const { prompt, images } = namedInputs;
+export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
+    const { prompt, referenceImages } = namedInputs;
     const { moderation, canvasSize } = params;
-    const { apiKey } = { ...config };
-    const model = params.model ?? defaultOpenAIImageModel;
-    const openai = new OpenAI({ apiKey });
+    const { apiKey, baseURL } = { ...config };
+    const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
+    const openai = new OpenAI({ apiKey, baseURL });
     const size = (() => {
         if (model === "gpt-image-1") {
             if (canvasSize.width > canvasSize.height) {
@@ -46,13 +46,13 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
     const response = await (async () => {
         try {
             const targetSize = imageOptions.size;
-            if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
-                const imagelist = await Promise.all((images ?? []).map(async (file) => {
+            if ((referenceImages ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
+                const referenceImageFiles = await Promise.all((referenceImages ?? []).map(async (file) => {
                     const ext = path.extname(file).toLowerCase();
                     const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
                     return await toFile(fs.createReadStream(file), null, { type });
                 }));
-                return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
+                return await openai.images.edit({ ...imageOptions, size: targetSize, image: referenceImageFiles });
             }
             else {
                 return await openai.images.generate(imageOptions);

package/lib/agents/movie_google_agent.d.ts CHANGED Viewed

@@ -1,24 +1,9 @@
 import type { AgentFunction, AgentFunctionInfo } from "graphai";
-export type MovieGoogleConfig = {
-    projectId?: string;
-    token?: string;
-};
+import type { AgentBufferResult, GoogleImageAgentConfig, GoogleMovieAgentParams, MovieAgentInputs } from "../types/agent.js";
 export declare const getAspectRatio: (canvasSize: {
     width: number;
     height: number;
 }) => string;
-export declare const movieGoogleAgent: AgentFunction<{
-    model: string;
-    canvasSize: {
-        width: number;
-        height: number;
-    };
-    duration?: number;
-}, {
-    buffer: Buffer;
-}, {
-    prompt: string;
-    imagePath?: string;
-}, MovieGoogleConfig>;
+export declare const movieGoogleAgent: AgentFunction<GoogleMovieAgentParams, AgentBufferResult, MovieAgentInputs, GoogleImageAgentConfig>;
 declare const movieGoogleAgentInfo: AgentFunctionInfo;
 export default movieGoogleAgentInfo;