npm - mulmocast - Versions diffs - 0.1.3 → 0.1.5 - Mend

mulmocast 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/lib/actions/audio.js +13 -18
package/lib/actions/image_agents.d.ts +30 -6
package/lib/actions/image_agents.js +5 -2
package/lib/actions/image_references.js +2 -1
package/lib/actions/images.d.ts +9 -1
package/lib/actions/images.js +38 -13
package/lib/actions/movie.js +3 -2
package/lib/agents/add_bgm_agent.js +1 -1
package/lib/agents/combine_audio_files_agent.js +10 -7
package/lib/agents/image_google_agent.js +2 -2
package/lib/agents/image_openai_agent.js +2 -2
package/lib/agents/movie_replicate_agent.js +1 -1
package/lib/agents/tts_elevenlabs_agent.d.ts +2 -1
package/lib/agents/tts_elevenlabs_agent.js +4 -3
package/lib/agents/tts_google_agent.d.ts +2 -9
package/lib/agents/tts_nijivoice_agent.d.ts +2 -1
package/lib/agents/tts_nijivoice_agent.js +3 -3
package/lib/agents/tts_openai_agent.d.ts +2 -13
package/lib/agents/tts_openai_agent.js +4 -3
package/lib/index.browser.d.ts +1 -0
package/lib/index.browser.js +1 -0
package/lib/index.d.ts +1 -0
package/lib/index.js +2 -0
package/lib/methods/mulmo_presentation_style.d.ts +2 -1
package/lib/methods/mulmo_presentation_style.js +21 -17
package/lib/types/agent.d.ts +29 -2
package/lib/types/agent.js +0 -1
package/lib/types/schema.d.ts +596 -485
package/lib/types/schema.js +15 -11
package/lib/utils/const.d.ts +0 -1
package/lib/utils/const.js +0 -1
package/lib/utils/context.d.ts +36 -30
package/lib/utils/ffmpeg_utils.d.ts +4 -1
package/lib/utils/ffmpeg_utils.js +2 -1
package/lib/utils/preprocess.d.ts +28 -24
package/lib/utils/provider2agent.d.ts +76 -0
package/lib/utils/provider2agent.js +87 -0
package/lib/utils/utils.d.ts +6 -11
package/lib/utils/utils.js +5 -26
package/package.json +2 -2

package/lib/actions/audio.js CHANGED Viewed

@@ -12,18 +12,10 @@ import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
 import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
+import { provider2TTSAgent } from "../utils/provider2agent.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
 const vanillaAgents = agents.default ?? agents;
-// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
-// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
-const provider_to_agent = {
-    nijivoice: "ttsNijivoiceAgent",
-    openai: "ttsOpenaiAgent",
-    google: "ttsGoogleAgent",
-    elevenlabs: "ttsElevenlabsAgent",
-    mock: "mediaMockAgent",
-};
 const getAudioPath = (context, beat, audioFile) => {
     if (beat.audio?.type === "audio") {
         const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
@@ -40,14 +32,15 @@ const getAudioPath = (context, beat, audioFile) => {
 const getAudioParam = (presentationStyle, beat) => {
     const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
     // Use speaker-specific provider if available, otherwise fall back to script-level provider
-    const provider = MulmoPresentationStyleMethods.getProvider(presentationStyle, beat);
+    const provider = MulmoPresentationStyleMethods.getTTSProvider(presentationStyle, beat);
     const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
-    return { voiceId, provider, speechOptions };
+    const model = MulmoPresentationStyleMethods.getTTSModel(presentationStyle, beat);
+    return { voiceId, provider, speechOptions, model };
 };
 export const getBeatAudioPath = (text, context, beat, lang) => {
     const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
-    const { voiceId, provider, speechOptions } = getAudioParam(context.presentationStyle, beat);
-    const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider].join(":");
+    const { voiceId, provider, speechOptions, model } = getAudioParam(context.presentationStyle, beat);
+    const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider, model ?? ""].join(":");
     const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
     const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
     return getAudioPath(context, beat, audioFile);
@@ -56,15 +49,16 @@ const preprocessor = (namedInputs) => {
     const { beat, studioBeat, multiLingual, context } = namedInputs;
     const { lang, presentationStyle } = context;
     const text = localizedText(beat, multiLingual, lang);
-    const { voiceId, provider, speechOptions } = getAudioParam(presentationStyle, beat);
+    const { voiceId, provider, speechOptions, model } = getAudioParam(presentationStyle, beat);
     const audioPath = getBeatAudioPath(text, context, beat, lang);
-    studioBeat.audioFile = audioPath; // TODO
+    studioBeat.audioFile = audioPath; // TODO: Passing by reference is difficult to maintain, so pass it using graphai inputs
     const needsTTS = !beat.audio && audioPath !== undefined;
     return {
-        ttsAgent: provider_to_agent[provider],
+        ttsAgent: provider2TTSAgent[provider].agentName,
         text,
         voiceId,
         speechOptions,
+        model,
         audioPath,
         studioBeat,
         needsTTS,
@@ -102,6 +96,7 @@ const graph_tts = {
                     voice: ":preprocessor.voiceId",
                     speed: ":preprocessor.speechOptions.speed",
                     instructions: ":preprocessor.speechOptions.instruction",
+                    model: ":preprocessor.model",
                 },
             },
         },
@@ -186,8 +181,8 @@ export const audioFilePath = (context) => {
 const getConcurrency = (context) => {
     // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
     const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
-        const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
-        return provider === "nijivoice" || provider === "elevenlabs";
+        const provider = (speaker.provider ?? context.presentationStyle.speechParams.provider);
+        return provider2TTSAgent[provider].hasLimitedConcurrency;
     });
     return hasLimitedConcurrencyProvider ? 1 : 8;
 };

package/lib/actions/image_agents.d.ts CHANGED Viewed

@@ -13,9 +13,9 @@ export declare const imagePreprocessAgent: (namedInputs: {
     imagePath: string | undefined;
     referenceImageForMovie: string | undefined;
     imageParams: {
-        provider: "openai" | "google";
-        style?: string | undefined;
+        provider: string;
         model?: string | undefined;
+        style?: string | undefined;
         moderation?: string | undefined;
         images?: Record<string, {
             type: "image";
@@ -44,10 +44,22 @@ export declare const imagePreprocessAgent: (namedInputs: {
 } | {
     imagePath: string;
     imageFromMovie: boolean;
+    movieParams: {
+        speed?: number | undefined;
+        model?: string | undefined;
+        fillOption?: {
+            style: "aspectFit" | "aspectFill";
+        } | undefined;
+        provider?: string | undefined;
+        transition?: {
+            type: "fade" | "slideout_left";
+            duration: number;
+        } | undefined;
+    };
     imageParams: {
-        provider: "openai" | "google";
-        style?: string | undefined;
+        provider: string;
         model?: string | undefined;
+        style?: string | undefined;
         moderation?: string | undefined;
         images?: Record<string, {
             type: "image";
@@ -79,10 +91,22 @@ export declare const imagePreprocessAgent: (namedInputs: {
     imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
     prompt: string;
     referenceImages: string[];
+    movieParams: {
+        speed?: number | undefined;
+        model?: string | undefined;
+        fillOption?: {
+            style: "aspectFit" | "aspectFill";
+        } | undefined;
+        provider?: string | undefined;
+        transition?: {
+            type: "fade" | "slideout_left";
+            duration: number;
+        } | undefined;
+    };
     imageParams: {
-        provider: "openai" | "google";
-        style?: string | undefined;
+        provider: string;
         model?: string | undefined;
+        style?: string | undefined;
         moderation?: string | undefined;
         images?: Record<string, {
             type: "image";

package/lib/actions/image_agents.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMeth
 import { getBeatPngImagePath, getBeatMoviePath } from "../utils/file.js";
 import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
 import { renderHTMLToImage } from "../utils/markdown.js";
+import { GraphAILogger } from "graphai";
 const htmlStyle = (context, beat) => {
     return {
         canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
@@ -27,13 +28,15 @@ export const imagePreprocessAgent = async (namedInputs) => {
         // undefined prompt indicates that image generation is not needed
         return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
     }
+    const movieParams = { ...context.presentationStyle.movieParams, ...beat.movieParams };
+    GraphAILogger.log(`movieParams: ${index}`, movieParams, beat.moviePrompt);
     if (beat.moviePrompt && !beat.imagePrompt) {
-        return { ...returnValue, imagePath, imageFromMovie: true }; // no image prompt, only movie prompt
+        return { ...returnValue, imagePath, imageFromMovie: true, movieParams }; // no image prompt, only movie prompt
     }
     // referenceImages for "edit_image", openai agent.
     const referenceImages = MulmoBeatMethods.getImageReferenceForImageGenerator(beat, imageRefs);
     const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
-    return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages };
+    return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieParams };
 };
 export const imagePluginAgent = async (namedInputs) => {
     const { context, beat, index } = namedInputs;

package/lib/actions/image_references.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import fs from "fs";
-import { GraphAI } from "graphai";
+import { GraphAI, GraphAILogger } from "graphai";
 import { getReferenceImagePath } from "../utils/file.js";
 import { getExtention } from "../utils/utils.js";
 import { graphOption } from "./images.js";
@@ -13,6 +13,7 @@ export const generateReferenceImage = async (inputs) => {
     // generate image
     const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
     const prompt = `${image.prompt}\n${imageAgentInfo.imageParams.style || ""}`;
+    GraphAILogger.info(`Generating reference image for ${key}: ${prompt}`);
     const image_graph_data = {
         version: 0.5,
         nodes: {

package/lib/actions/images.d.ts CHANGED Viewed

@@ -1,7 +1,14 @@
 import type { GraphOptions, CallbackFunction } from "graphai";
 import { MulmoStudioContext } from "../types/index.js";
 export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
-export declare const images: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
+type ImageOptions = {
+    imageAgents: Record<string, unknown>;
+};
+export declare const images: (context: MulmoStudioContext, args?: {
+    settings?: Record<string, string>;
+    callbacks?: CallbackFunction[];
+    options?: ImageOptions;
+}) => Promise<MulmoStudioContext>;
 export declare const generateBeatImage: (inputs: {
     index: number;
     context: MulmoStudioContext;
@@ -10,3 +17,4 @@ export declare const generateBeatImage: (inputs: {
     forceMovie?: boolean;
     forceImage?: boolean;
 }) => Promise<void>;
+export {};

package/lib/actions/images.js CHANGED Viewed

@@ -2,7 +2,7 @@ import dotenv from "dotenv";
 import fs from "fs";
 import { GraphAI, GraphAILogger, TaskManager } from "graphai";
 import { GoogleAuth } from "google-auth-library";
-import * as agents from "@graphai/vanilla";
+import * as vanilla from "@graphai/vanilla";
 import { openAIAgent } from "@graphai/openai_agent";
 import { anthropicAgent } from "@graphai/anthropic_agent";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
@@ -11,16 +11,22 @@ import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../met
 import { getOutputStudioFilePath, mkdir } from "../utils/file.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
-import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
+import { extractImageFromMovie, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
 import { getImageRefs } from "./image_references.js";
 import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
-const vanillaAgents = agents.default ?? agents;
+const vanillaAgents = vanilla.default ?? vanilla;
 const imageAgents = {
-    ...vanillaAgents,
     imageGoogleAgent,
+    imageOpenaiAgent,
+};
+const movieAgents = {
     movieGoogleAgent,
     movieReplicateAgent,
-    imageOpenaiAgent,
+};
+const defaultAgents = {
+    ...vanillaAgents,
+    ...imageAgents,
+    ...movieAgents,
     mediaMockAgent,
     fileWriteAgent,
     openAIAgent,
@@ -141,7 +147,7 @@ const beat_graph_data = {
                     mulmoContext: ":context",
                 },
                 params: {
-                    model: ":context.presentationStyle.movieParams.model",
+                    model: ":preprocessor.movieParams.model",
                     duration: ":beat.duration",
                     canvasSize: ":context.presentationStyle.canvasSize",
                 },
@@ -160,16 +166,30 @@ const beat_graph_data = {
             },
             defaultValue: {},
         },
+        audioChecker: {
+            if: ":preprocessor.movieFile",
+            agent: async (namedInputs) => {
+                const { hasAudio } = await ffmpegGetMediaDuration(namedInputs.movieFile);
+                return { hasMovieAudio: hasAudio };
+            },
+            inputs: {
+                onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
+                movieFile: ":preprocessor.movieFile",
+            },
+            defaultValue: {},
+        },
         output: {
             agent: "copyAgent",
             inputs: {
-                onComplete: [":imageFromMovie", ":htmlImageGenerator"], // to wait for imageFromMovie to finish
+                onComplete: [":imageFromMovie", ":htmlImageGenerator", ":audioChecker"], // to wait for imageFromMovie to finish
                 imageFile: ":preprocessor.imagePath",
                 movieFile: ":preprocessor.movieFile",
+                hasMovieAudio: ":audioChecker.hasMovieAudio",
             },
             output: {
                 imageFile: ".imageFile",
                 movieFile: ".movieFile",
+                hasMovieAudio: ".hasMovieAudio",
             },
             isResult: true,
         },
@@ -308,10 +328,14 @@ const prepareGenerateImages = async (context) => {
     };
     return injections;
 };
-const generateImages = async (context, settings, callbacks) => {
-    const options = await graphOption(context, settings);
+const generateImages = async (context, settings, callbacks, options) => {
+    const optionImageAgents = options?.imageAgents ?? {};
     const injections = await prepareGenerateImages(context);
-    const graph = new GraphAI(graph_data, imageAgents, options);
+    const graphaiAgent = {
+        ...defaultAgents,
+        ...optionImageAgents,
+    };
+    const graph = new GraphAI(graph_data, graphaiAgent, await graphOption(context, settings));
     Object.keys(injections).forEach((key) => {
         graph.injectValue(key, injections[key]);
     });
@@ -324,10 +348,11 @@ const generateImages = async (context, settings, callbacks) => {
     return res.mergeResult;
 };
 // public api
-export const images = async (context, settings, callbacks) => {
+export const images = async (context, args) => {
+    const { settings, callbacks, options } = args ?? {};
     try {
         MulmoStudioContextMethods.setSessionState(context, "image", true);
-        const newContext = await generateImages(context, settings, callbacks);
+        const newContext = await generateImages(context, settings, callbacks, options);
         MulmoStudioContextMethods.setSessionState(context, "image", false);
         return newContext;
     }
@@ -341,7 +366,7 @@ export const generateBeatImage = async (inputs) => {
     const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
     const options = await graphOption(context, settings);
     const injections = await prepareGenerateImages(context);
-    const graph = new GraphAI(beat_graph_data, imageAgents, options);
+    const graph = new GraphAI(beat_graph_data, defaultAgents, options);
     Object.keys(injections).forEach((key) => {
         if ("outputStudioFilePath" !== key) {
             graph.injectValue(key, injections[key]);

package/lib/actions/movie.js CHANGED Viewed

@@ -204,8 +204,9 @@ const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
             videoIdsForBeats.push(videoId);
         }
         // NOTE: We don't support audio if the speed is not 1.0.
-        if (beat.image?.type == "movie" && beat.image.mixAudio > 0.0 && speed === 1.0) {
-            const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, beat.image.mixAudio);
+        const movieVolume = beat.audioParams?.movieVolume ?? 1.0;
+        if (studioBeat.hasMovieAudio && movieVolume > 0.0 && speed === 1.0) {
+            const { audioId, audioPart } = getAudioPart(inputIndex, duration, timestamp, movieVolume);
             audioIdsFromMovieBeats.push(audioId);
             ffmpegContext.filterComplex.push(audioPart);
         }

package/lib/agents/add_bgm_agent.js CHANGED Viewed

@@ -10,7 +10,7 @@ const addBGMAgent = async ({ namedInputs, params, }) => {
     if (!musicFile.match(/^http/) && !fs.existsSync(musicFile)) {
         throw new Error(`AddBGMAgent musicFile not exist: ${musicFile}`);
     }
-    const speechDuration = await ffmpegGetMediaDuration(voiceFile);
+    const { duration: speechDuration } = await ffmpegGetMediaDuration(voiceFile);
     const introPadding = context.presentationStyle.audioParams.introPadding;
     const outroPadding = context.presentationStyle.audioParams.outroPadding;
     const totalDuration = speechDuration + introPadding + outroPadding;

package/lib/agents/combine_audio_files_agent.js CHANGED Viewed

@@ -2,13 +2,14 @@ import { assert, GraphAILogger } from "graphai";
 import { silent60secPath } from "../utils/file.js";
 import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
 import { userAssert } from "../utils/utils.js";
-const getMovieDulation = async (beat) => {
+const getMovieDuration = async (beat) => {
     if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
         const pathOrUrl = beat.image.source.kind === "url" ? beat.image.source.url : beat.image.source.path;
         const speed = beat.movieParams?.speed ?? 1.0;
-        return (await ffmpegGetMediaDuration(pathOrUrl)) / speed;
+        const { duration, hasAudio } = await ffmpegGetMediaDuration(pathOrUrl);
+        return { duration: duration / speed, hasAudio };
     }
-    return 0;
+    return { duration: 0, hasAudio: false };
 };
 const getPadding = (context, beat, index) => {
     if (beat.audioParams?.padding !== undefined) {
@@ -29,16 +30,17 @@ const getTotalPadding = (padding, movieDuration, audioDuration, duration) => {
     }
     return padding;
 };
-const getMediaDurations = (context) => {
+const getMediaDurationsOfAllBeats = (context) => {
     return Promise.all(context.studio.beats.map(async (studioBeat, index) => {
         const beat = context.studio.script.beats[index];
-        const movieDuration = await getMovieDulation(beat);
-        const audioDuration = studioBeat.audioFile ? await ffmpegGetMediaDuration(studioBeat.audioFile) : 0;
+        const { duration: movieDuration, hasAudio: hasMovieAudio } = await getMovieDuration(beat);
+        const audioDuration = studioBeat.audioFile ? (await ffmpegGetMediaDuration(studioBeat.audioFile)).duration : 0;
         return {
             movieDuration,
             audioDuration,
             hasMedia: movieDuration + audioDuration > 0,
             silenceDuration: 0,
+            hasMovieAudio,
         };
     }));
 };
@@ -64,7 +66,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
     const { context, combinedFileName } = namedInputs;
     const ffmpegContext = FfmpegContextInit();
     // First, get the audio durations of all beats, taking advantage of multi-threading capability of ffmpeg.
-    const mediaDurations = await getMediaDurations(context);
+    const mediaDurations = await getMediaDurationsOfAllBeats(context);
     const beatDurations = [];
     context.studio.script.beats.forEach((beat, index) => {
         if (beatDurations.length > index) {
@@ -196,6 +198,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
                 audioDuration: mediaDurations[index].audioDuration,
                 movieDuration: mediaDurations[index].movieDuration,
                 silenceDuration: mediaDurations[index].silenceDuration,
+                hasMovieAudio: mediaDurations[index].hasMovieAudio,
             })),
         },
     };

package/lib/agents/image_google_agent.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { GraphAILogger } from "graphai";
 import { getAspectRatio } from "./movie_google_agent.js";
+import { provider2ImageAgent } from "../utils/provider2agent.js";
 async function generateImage(projectId, model, token, prompt, aspectRatio) {
     const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
     try {
@@ -54,8 +55,7 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
 export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
     const { prompt } = namedInputs;
     const aspectRatio = getAspectRatio(params.canvasSize);
-    const model = params.model ?? "imagen-3.0-fast-generate-001";
-    //const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
+    const model = params.model ?? provider2ImageAgent["google"].defaultModel;
     const projectId = config?.projectId;
     const token = config?.token;
     try {

package/lib/agents/image_openai_agent.js CHANGED Viewed

@@ -2,13 +2,13 @@ import fs from "fs";
 import path from "path";
 import { GraphAILogger } from "graphai";
 import OpenAI, { toFile } from "openai";
-import { defaultOpenAIImageModel } from "../utils/const.js";
+import { provider2ImageAgent } from "../utils/provider2agent.js";
 // https://platform.openai.com/docs/guides/image-generation
 export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
     const { prompt, referenceImages } = namedInputs;
     const { moderation, canvasSize } = params;
     const { apiKey, baseURL } = { ...config };
-    const model = params.model ?? defaultOpenAIImageModel;
+    const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
     const openai = new OpenAI({ apiKey, baseURL });
     const size = (() => {
         if (model === "gpt-image-1") {

package/lib/agents/movie_replicate_agent.js CHANGED Viewed

@@ -21,7 +21,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
     if (imagePath) {
         const buffer = readFileSync(imagePath);
         const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
-        if (model === "kwaivgi/kling-v2.1") {
+        if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro") {
             input.start_image = base64Image;
         }
         else {

package/lib/agents/tts_elevenlabs_agent.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { AgentFunction, AgentFunctionInfo } from "graphai";
-export declare const ttsElevenlabsAgent: AgentFunction;
+import type { ElevenlabsTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult, AgentConfig } from "../types/agent.js";
+export declare const ttsElevenlabsAgent: AgentFunction<ElevenlabsTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs, AgentConfig>;
 declare const ttsElevenlabsAgentInfo: AgentFunctionInfo;
 export default ttsElevenlabsAgentInfo;

package/lib/agents/tts_elevenlabs_agent.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { GraphAILogger } from "graphai";
-export const ttsElevenlabsAgent = async ({ namedInputs, params, config }) => {
+import { provider2TTSAgent } from "../utils/provider2agent.js";
+export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
     const { text } = namedInputs;
     const { voice, model, stability, similarityBoost, suppressError } = params;
     const apiKey = config?.apiKey ?? process.env.ELEVENLABS_API_KEY;
@@ -7,12 +8,12 @@ export const ttsElevenlabsAgent = async ({ namedInputs, params, config }) => {
         throw new Error("ELEVENLABS_API_KEY environment variable is required");
     }
     if (!voice) {
-        throw new Error("Voice ID is required");
+        throw new Error("ELEVENLABS Voice ID is required");
     }
     try {
         const requestBody = {
             text,
-            model_id: model ?? "eleven_monolingual_v1",
+            model_id: model ?? provider2TTSAgent.elevenlabs.defaultModel,
             voice_settings: {
                 stability: stability ?? 0.5,
                 similarity_boost: similarityBoost ?? 0.75,

package/lib/agents/tts_google_agent.d.ts CHANGED Viewed

@@ -1,12 +1,5 @@
 import type { AgentFunction, AgentFunctionInfo } from "graphai";
-export declare const ttsGoogleAgent: AgentFunction<{
-    voice: string;
-    speed: number;
-    suppressError: boolean;
-}, {
-    buffer?: Buffer | null;
-}, {
-    text: string;
-}>;
+import type { GoogleTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult } from "../types/agent.js";
+export declare const ttsGoogleAgent: AgentFunction<GoogleTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs>;
 declare const ttsGoogleAgentInfo: AgentFunctionInfo;
 export default ttsGoogleAgentInfo;

package/lib/agents/tts_nijivoice_agent.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { AgentFunction, AgentFunctionInfo } from "graphai";
-export declare const ttsNijivoiceAgent: AgentFunction;
+import type { NijivoiceTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult, AgentConfig } from "../types/agent.js";
+export declare const ttsNijivoiceAgent: AgentFunction<NijivoiceTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs, AgentConfig>;
 declare const ttsNijivoiceAgentInfo: AgentFunctionInfo;
 export default ttsNijivoiceAgentInfo;

package/lib/agents/tts_nijivoice_agent.js CHANGED Viewed

@@ -6,11 +6,11 @@ const errorMessage = [
     "1. Obtain an API key from Niji Voice (https://platform.nijivoice.com/) and set it as the NIJIVOICE_API_KEY environment variable.",
     '2. Use OpenAI\'s TTS instead of Niji Voice by changing speechParams.provider from "nijivoice" to "openai".',
 ].join("\n");
-export const ttsNijivoiceAgent = async ({ params, namedInputs, config }) => {
+export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
     const { suppressError, voice, speed, speed_global } = params;
     const { apiKey } = config ?? {};
     const { text } = namedInputs;
-    assert(apiKey ?? nijovoiceApiKey, errorMessage);
+    assert(!!(apiKey ?? nijovoiceApiKey), errorMessage);
     const url = `https://api.nijivoice.com/api/platform/v1/voice-actors/${voice}/generate-voice`;
     const options = {
         method: "POST",
@@ -31,7 +31,7 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config }) => {
         if (voiceJson && voiceJson.generatedVoice && voiceJson.generatedVoice.audioFileDownloadUrl) {
             const audioRes = await fetch(voiceJson.generatedVoice.audioFileDownloadUrl);
             const buffer = Buffer.from(await audioRes.arrayBuffer());
-            return { buffer, generatedVoice: voiceJson.generatedVoice };
+            return { buffer };
         }
         if (suppressError) {
             return {

package/lib/agents/tts_openai_agent.d.ts CHANGED Viewed

@@ -1,16 +1,5 @@
 import type { AgentFunction, AgentFunctionInfo } from "graphai";
-export declare const ttsOpenaiAgent: AgentFunction<{
-    model: string;
-    voice: string;
-    instructions: string;
-    suppressError: boolean;
-}, {
-    buffer?: Buffer;
-}, {
-    text: string;
-}, {
-    baseURL?: string;
-    apiKey?: string;
-}>;
+import type { OpenAITTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult, OpenAIImageAgentConfig } from "../types/agent.js";
+export declare const ttsOpenaiAgent: AgentFunction<OpenAITTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs, OpenAIImageAgentConfig>;
 declare const ttsOpenaiAgentInfo: AgentFunctionInfo;
 export default ttsOpenaiAgentInfo;

package/lib/agents/tts_openai_agent.js CHANGED Viewed

@@ -1,14 +1,15 @@
 import { GraphAILogger } from "graphai";
 import OpenAI from "openai";
-export const ttsOpenaiAgent = async ({ namedInputs, params, config }) => {
+import { provider2TTSAgent } from "../utils/provider2agent.js";
+export const ttsOpenaiAgent = async ({ namedInputs, params, config, }) => {
     const { text } = namedInputs;
     const { model, voice, suppressError, instructions } = params;
     const { apiKey, baseURL } = config ?? {};
     const openai = new OpenAI({ apiKey, baseURL });
     try {
         const tts_options = {
-            model: model ?? "gpt-4o-mini-tts", // "tts-1",
-            voice: voice ?? "shimmer",
+            model: model ?? provider2TTSAgent.openai.defaultModel,
+            voice: voice ?? provider2TTSAgent.openai.defaultVoice,
             input: text,
         };
         if (instructions) {

package/lib/index.browser.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
 export * from "./types/type.js";
 export * from "./types/schema.js";
+export * from "./utils/provider2agent.js";
 export * from "./agents/validate_schema_agent.js";

package/lib/index.browser.js CHANGED Viewed

@@ -1,4 +1,5 @@
 // Entry point that exposes only APIs available for use in the browser
 export * from "./types/type.js";
 export * from "./types/schema.js";
+export * from "./utils/provider2agent.js";
 export * from "./agents/validate_schema_agent.js";

package/lib/index.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@ export * from "./actions/index.js";
 export * from "./cli/helpers.js";
 export * from "./utils/file.js";
 export * from "./utils/ffmpeg_utils.js";
+export * from "./utils/provider2agent.js";
 export * from "./methods/index.js";
 export * from "./agents/index.js";
 export * from "./types/index.js";

package/lib/index.js CHANGED Viewed

@@ -1,7 +1,9 @@
+// NOTE: If you want to support usage in the browser codebase, also add to src/index.browser.ts
 export * from "./actions/index.js";
 export * from "./cli/helpers.js";
 export * from "./utils/file.js";
 export * from "./utils/ffmpeg_utils.js";
+export * from "./utils/provider2agent.js";
 export * from "./methods/index.js";
 export * from "./agents/index.js";
 export * from "./types/index.js";

package/lib/methods/mulmo_presentation_style.d.ts CHANGED Viewed

@@ -7,7 +7,8 @@ export declare const MulmoPresentationStyleMethods: {
     getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
     getSpeechOptions(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeechOptions | undefined;
     getSpeaker(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeakerData;
-    getProvider(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): Text2SpeechProvider;
+    getTTSProvider(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): Text2SpeechProvider;
+    getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
     getVoiceId(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
     getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
     getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;