npm - mulmocast - Versions diffs - 1.1.4 → 1.1.5 - Mend

mulmocast 1.1.4 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/lib/actions/audio.d.ts +0 -1
package/lib/actions/audio.js +8 -12
package/lib/actions/images.js +1 -0
package/lib/actions/movie.js +1 -3
package/lib/agents/image_openai_agent.js +4 -1
package/lib/methods/mulmo_presentation_style.d.ts +2 -3
package/lib/methods/mulmo_presentation_style.js +14 -8
package/lib/types/agent.d.ts +3 -0
package/lib/types/schema.d.ts +704 -0
package/lib/types/schema.js +5 -1
package/lib/utils/context.d.ts +25 -0
package/lib/utils/file.d.ts +1 -1
package/lib/utils/file.js +5 -2
package/lib/utils/preprocess.d.ts +13 -0
package/package.json +2 -1
package/scripts/templates/image_prompt_only_template.ts +95 -0
package/scripts/test/gpt.json +32 -0
package/scripts/test/mulmo_story.json +11 -0
package/scripts/test/test.json +64 -0
package/scripts/test/test1.json +40 -0
package/scripts/test/test2.json +66 -0
package/scripts/test/test_audio.json +151 -0
package/scripts/test/test_audio_instructions.json +69 -0
package/scripts/test/test_beats.json +58 -0
package/scripts/test/test_captions.json +52 -0
package/scripts/test/test_elevenlabs_models.json +193 -0
package/scripts/test/test_en.json +29 -0
package/scripts/test/test_hello.json +17 -0
package/scripts/test/test_hello_google.json +25 -0
package/scripts/test/test_html.json +66 -0
package/scripts/test/test_image_refs.json +49 -0
package/scripts/test/test_images.json +48 -0
package/scripts/test/test_lang.json +31 -0
package/scripts/test/test_layout.json +152 -0
package/scripts/test/test_lipsync.json +53 -0
package/scripts/test/test_loop.json +34 -0
package/scripts/test/test_media.json +244 -0
package/scripts/test/test_mixed_providers.json +91 -0
package/scripts/test/test_movie.json +39 -0
package/scripts/test/test_no_audio.json +252 -0
package/scripts/test/test_no_audio_with_credit.json +253 -0
package/scripts/test/test_order.json +68 -0
package/scripts/test/test_order_portrait.json +72 -0
package/scripts/test/test_replicate.json +126 -0
package/scripts/test/test_slideout_left_no_audio.json +45 -0
package/scripts/test/test_sound_effect.json +41 -0
package/scripts/test/test_spillover.json +116 -0
package/scripts/test/test_transition.json +55 -0
package/scripts/test/test_transition_no_audio.json +45 -0
package/scripts/test/test_video_speed.json +80 -0
package/scripts/test/test_voice_over.json +104 -0
package/scripts/test/test_voices.json +54 -0

package/lib/actions/audio.d.ts CHANGED Viewed

@@ -2,6 +2,5 @@ import "dotenv/config";
 import type { CallbackFunction } from "graphai";
 import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
 export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
-export declare const audioFilePath: (context: MulmoStudioContext) => string;
 export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
 export declare const audio: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;

package/lib/actions/audio.js CHANGED Viewed

@@ -9,7 +9,7 @@ import ttsGoogleAgent from "../agents/tts_google_agent.js";
 import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
 import { fileWriteAgent } from "@graphai/vanilla_node_agents";
 import { MulmoPresentationStyleMethods } from "../methods/index.js";
-import { text2SpeechProviderSchema, } from "../types/index.js";
+import { text2SpeechProviderSchema } from "../types/index.js";
 import { fileCacheAgentFilter } from "../utils/filters.js";
 import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
 import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
@@ -30,15 +30,15 @@ const getAudioPath = (context, beat, audioFile) => {
     }
     return audioFile;
 };
-const getAudioParam = (presentationStyle, beat) => {
-    const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
+const getAudioParam = (context, beat) => {
+    const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
     const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
     const provider = text2SpeechProviderSchema.parse(speaker.provider);
     return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
 };
 export const getBeatAudioPath = (text, context, beat, lang) => {
     const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
-    const { voiceId, provider, speechOptions, model } = getAudioParam(context.presentationStyle, beat);
+    const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
     const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider, model ?? ""].join(":");
     const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
     const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
@@ -46,9 +46,9 @@ export const getBeatAudioPath = (text, context, beat, lang) => {
 };
 const preprocessor = (namedInputs) => {
     const { beat, studioBeat, multiLingual, context } = namedInputs;
-    const { lang, presentationStyle } = context;
+    const { lang } = context;
     const text = localizedText(beat, multiLingual, lang);
-    const { voiceId, provider, speechOptions, model } = getAudioParam(presentationStyle, beat);
+    const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
     const audioPath = getBeatAudioPath(text, context, beat, lang);
     studioBeat.audioFile = audioPath; // TODO: Passing by reference is difficult to maintain, so pass it using graphai inputs
     const needsTTS = !beat.audio && audioPath !== undefined;
@@ -174,11 +174,6 @@ const agentFilters = [
         nodeIds: ["tts"],
     },
 ];
-export const audioFilePath = (context) => {
-    const fileName = MulmoStudioContextMethods.getFileName(context);
-    const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
-    return getAudioArtifactFilePath(outDirPath, fileName);
-};
 const getConcurrency = (context) => {
     // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
     const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
@@ -231,7 +226,7 @@ export const audio = async (context, settings, callbacks) => {
         const fileName = MulmoStudioContextMethods.getFileName(context);
         const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
         const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
-        const audioArtifactFilePath = audioFilePath(context);
+        const audioArtifactFilePath = getAudioArtifactFilePath(context);
         const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
         const audioCombinedFilePath = getAudioFilePath(audioDirPath, fileName, fileName, context.lang);
         const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
@@ -253,6 +248,7 @@ export const audio = async (context, settings, callbacks) => {
         const result = await graph.run();
         writingMessage(audioCombinedFilePath);
         MulmoStudioContextMethods.setSessionState(context, "audio", false);
+        writingMessage(audioArtifactFilePath);
         return result.combineFiles;
     }
     catch (__error) {

package/lib/actions/images.js CHANGED Viewed

@@ -135,6 +135,7 @@ const beat_graph_data = {
                     model: ":preprocessor.imageParams.model",
                     moderation: ":preprocessor.imageParams.moderation",
                     canvasSize: ":context.presentationStyle.canvasSize",
+                    quality: ":preprocessor.imageParams.quality",
                 },
             },
             defaultValue: {},

package/lib/actions/movie.js CHANGED Viewed

@@ -246,9 +246,7 @@ export const movieFilePath = (context) => {
 export const movie = async (context) => {
     MulmoStudioContextMethods.setSessionState(context, "video", true);
     try {
-        const fileName = MulmoStudioContextMethods.getFileName(context);
-        const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
-        const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, fileName);
+        const audioArtifactFilePath = getAudioArtifactFilePath(context);
         const outputVideoPath = movieFilePath(context);
         if (await createVideo(audioArtifactFilePath, outputVideoPath, context)) {
             writingMessage(outputVideoPath);

package/lib/agents/image_openai_agent.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { provider2ImageAgent } from "../utils/provider2agent.js";
 // https://platform.openai.com/docs/guides/image-generation
 export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
     const { prompt, referenceImages } = namedInputs;
-    const { moderation, canvasSize } = params;
+    const { moderation, canvasSize, quality } = params;
     const { apiKey, baseURL } = { ...config };
     const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
     const openai = new OpenAI({ apiKey, baseURL });
@@ -42,6 +42,9 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
     };
     if (model === "gpt-image-1") {
         imageOptions.moderation = moderation || "auto";
+        if (quality) {
+            imageOptions.quality = quality;
+        }
     }
     const response = await (async () => {
         try {

package/lib/methods/mulmo_presentation_style.d.ts CHANGED Viewed

@@ -1,12 +1,11 @@
 import "dotenv/config";
-import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
+import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
 export declare const MulmoPresentationStyleMethods: {
     getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
     getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
     getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
     getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
-    getSpeaker(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeakerData;
-    getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
+    getSpeaker(context: MulmoStudioContext, beat: MulmoBeat): SpeakerData;
     getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
     getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
     getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {

package/lib/methods/mulmo_presentation_style.js CHANGED Viewed

@@ -46,18 +46,24 @@ export const MulmoPresentationStyleMethods = {
         }
         return keys[0];
     },
-    getSpeaker(presentationStyle, beat) {
-        userAssert(!!presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
-        const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(presentationStyle);
-        userAssert(!!speakerId, "beat.speaker and default speaker is not set");
-        const speaker = presentationStyle.speechParams.speakers[speakerId];
+    getSpeaker(context, beat) {
+        userAssert(!!context.presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
+        const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(context.presentationStyle);
+        const speaker = context.presentationStyle.speechParams.speakers[speakerId];
         userAssert(!!speaker, `speaker is not set: speaker "${speakerId}"`);
+        // Check if the speaker has a language-specific version
+        const lang = context.lang ?? context.studio.script.lang;
+        if (speaker.lang && lang && speaker.lang[lang]) {
+            return speaker.lang[lang];
+        }
         return speaker;
     },
-    getTTSModel(presentationStyle, beat) {
-        const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
-        return speaker.model;
+    /* NOTE: This method is not used.
+    getTTSModel(context: MulmoStudioContext, beat: MulmoBeat): string | undefined {
+      const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
+      return speaker.model;
     },
+    */
     getText2ImageProvider(provider) {
         return text2ImageProviderSchema.parse(provider);
     },

package/lib/types/agent.d.ts CHANGED Viewed

@@ -1,11 +1,13 @@
 export type OpenAIImageSize = "1792x1024" | "1024x1792" | "1024x1024" | "1536x1024" | "1024x1536";
 export type OpenAIImageModeration = "low" | "auto";
+export type OpenAIImageQuality = "low" | "medium" | "high" | "auto";
 export type OpenAIImageOptions = {
     model: string;
     prompt: string;
     n: number;
     size: OpenAIImageSize;
     moderation?: OpenAIImageModeration;
+    quality?: OpenAIImageQuality;
 };
 export type AgentBufferResult = {
     buffer: Buffer;
@@ -35,6 +37,7 @@ export type ImageAgentParams = {
 };
 export type OpenAIImageAgentParams = ImageAgentParams & {
     moderation: OpenAIImageModeration | null | undefined;
+    quality?: OpenAIImageQuality;
 };
 export type OpenAIImageAgentConfig = {
     baseURL?: string;