npm - mulmocast - Versions diffs - 1.2.33 → 1.2.35 - Mend

mulmocast 1.2.33 → 1.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +2 -4
package/lib/actions/audio.js +2 -9
package/lib/agents/tts_google_agent.js +1 -1
package/lib/methods/mulmo_presentation_style.d.ts +3 -0
package/lib/methods/mulmo_presentation_style.js +2 -0
package/lib/methods/mulmo_studio_context.d.ts +10 -1
package/lib/methods/mulmo_studio_context.js +8 -0
package/lib/types/type.d.ts +1 -0
package/lib/utils/image_plugins/vision.js +3 -2
package/lib/utils/provider2agent.d.ts +17 -0
package/lib/utils/provider2agent.js +17 -0
package/package.json +4 -4

package/README.md CHANGED Viewed

@@ -106,13 +106,11 @@ OPENAI_API_KEY=your_openai_api_key
 DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
 ```
-#### (Optional) For Google's image generation model
+#### (Optional) For Google's image and TTS.
 ```bash
-GOOGLE_PROJECT_ID=your_google_project_id
+GEMINI_API_KEY=your_google_gemini_api_key
 ```
-See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
 #### (Optional) For AI providers
 ```bash
 # For Anthropic Claude (htmlPrompt feature)

package/lib/actions/audio.js CHANGED Viewed

@@ -9,7 +9,6 @@ import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, re
 import { localizedText, settings2GraphAIConfig } from "../utils/utils.js";
 import { text2hash } from "../utils/utils_node.js";
 import { provider2TTSAgent } from "../utils/provider2agent.js";
-import { MulmoPresentationStyleMethods } from "../methods/index.js";
 import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
 import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
 dotenv.config({ quiet: true });
@@ -27,15 +26,9 @@ const getAudioPath = (context, beat, audioFile) => {
     }
     return audioFile;
 };
-const getAudioParam = (context, beat, lang) => {
-    const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat, lang);
-    const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
-    const provider = text2SpeechProviderSchema.parse(speaker.provider);
-    return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
-};
 export const getBeatAudioPath = (text, context, beat, lang) => {
     const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
-    const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat, lang);
+    const { voiceId, provider, speechOptions, model } = MulmoStudioContextMethods.getAudioParam(context, beat, lang);
     const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider, model ?? ""].join(":");
     GraphAILogger.log(`getBeatAudioPath [${hash_string}]`);
     const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
@@ -54,7 +47,7 @@ const preprocessorAgent = (namedInputs) => {
     const { beat, studioBeat, multiLingual, context, lang } = namedInputs;
     // const { lang } = context;
     const text = localizedText(beat, multiLingual, lang);
-    const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat, lang);
+    const { voiceId, provider, speechOptions, model } = MulmoStudioContextMethods.getAudioParam(context, beat, lang);
     const audioPath = getBeatAudioPath(text, context, beat, lang);
     studioBeat.audioFile = audioPath; // TODO: Passing by reference is difficult to maintain, so pass it using graphai inputs
     const needsTTS = !beat.audio && audioPath !== undefined;

package/lib/agents/tts_google_agent.js CHANGED Viewed

@@ -46,6 +46,6 @@ const ttsGoogleAgentInfo = {
     author: "Receptron Team",
     repository: "https://github.com/receptron/mulmocast-cli/",
     license: "MIT",
-    environmentVariables: ["GOOGLE_GENAI_API_KEY"],
+    environmentVariables: ["GEMINI_API_KEY"],
 };
 export default ttsGoogleAgentInfo;

package/lib/methods/mulmo_presentation_style.d.ts CHANGED Viewed

@@ -26,10 +26,12 @@ export declare const MulmoPresentationStyleMethods: {
                 duration: number;
             } | undefined;
         };
+        keyName: string;
     };
     getSoundEffectAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
         agentName: string;
         defaultModel: import("../utils/provider2agent.js").ReplicateModel;
+        keyName: string;
         models: import("../utils/provider2agent.js").ReplicateModel[];
         modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
             identifier?: `${string}/${string}:${string}`;
@@ -38,6 +40,7 @@ export declare const MulmoPresentationStyleMethods: {
     getLipSyncAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
         agentName: string;
         defaultModel: import("../utils/provider2agent.js").ReplicateModel;
+        keyName: string;
         models: import("../utils/provider2agent.js").ReplicateModel[];
         modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
             identifier?: `${string}/${string}:${string}` | `${string}/${string}`;

package/lib/methods/mulmo_presentation_style.js CHANGED Viewed

@@ -86,6 +86,7 @@ export const MulmoPresentationStyleMethods = {
         return {
             agent: agentInfo.agentName,
             imageParams: { ...defaultImageParams, ...imageParams },
+            keyName: agentInfo.keyName,
         };
     },
     getMovieAgentInfo(presentationStyle, beat) {
@@ -95,6 +96,7 @@ export const MulmoPresentationStyleMethods = {
         return {
             agent: agentInfo.agentName,
             movieParams,
+            keyName: agentInfo.keyName,
         };
     },
     getSoundEffectAgentInfo(presentationStyle, beat) {

package/lib/methods/mulmo_studio_context.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * (No Node.js built-ins like fs, path, dotenv, etc.)
  * Works in both Node.js and modern browsers.
  */
-import { BeatSessionType, MulmoStudioContext, SessionProgressCallback, SessionType } from "../types/index.js";
+import { BeatSessionType, MulmoStudioContext, SessionProgressCallback, SessionType, MulmoBeat } from "../types/index.js";
 export declare const addSessionProgressCallback: (cb: SessionProgressCallback) => void;
 export declare const removeSessionProgressCallback: (cb: SessionProgressCallback) => void;
 export declare const MulmoStudioContextMethods: {
@@ -17,4 +17,13 @@ export declare const MulmoStudioContextMethods: {
     setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType | undefined, index: number, id: string | undefined, value: boolean): void;
     needTranslate(context: MulmoStudioContext, includeCaption?: boolean): boolean | "" | undefined;
     getIntroPadding(context: MulmoStudioContext): number;
+    getAudioParam(context: MulmoStudioContext, beat: MulmoBeat, lang?: string): {
+        voiceId: string;
+        provider: "google" | "mock" | "nijivoice" | "openai" | "elevenlabs";
+        speechOptions: {
+            speed?: number | undefined;
+            instruction?: string | undefined;
+        };
+        model: string | undefined;
+    };
 };

package/lib/methods/mulmo_studio_context.js CHANGED Viewed

@@ -3,8 +3,10 @@
  * (No Node.js built-ins like fs, path, dotenv, etc.)
  * Works in both Node.js and modern browsers.
  */
+import { text2SpeechProviderSchema } from "../types/index.js";
 import { beatId } from "../utils/utils.js";
 import { GraphAILogger } from "graphai";
+import { MulmoPresentationStyleMethods } from "./mulmo_presentation_style.js";
 const sessionProgressCallbacks = new Set();
 export const addSessionProgressCallback = (cb) => {
     sessionProgressCallbacks.add(cb);
@@ -84,4 +86,10 @@ export const MulmoStudioContextMethods = {
         }
         return context.presentationStyle.audioParams.introPadding;
     },
+    getAudioParam(context, beat, lang) {
+        const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat, lang);
+        const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
+        const provider = text2SpeechProviderSchema.parse(speaker.provider);
+        return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
+    },
 };

package/lib/types/type.d.ts CHANGED Viewed

@@ -76,6 +76,7 @@ export type PDFSize = (typeof pdf_sizes)[number];
 export type Text2ImageAgentInfo = {
     agent: string;
     imageParams: MulmoImageParams;
+    keyName?: string;
 };
 export type Text2HtmlAgentInfo = {
     provider: Text2HtmlImageProvider;

package/lib/utils/image_plugins/vision.js CHANGED Viewed

@@ -16,10 +16,11 @@ const processVision = async (params) => {
     return imagePath;
 };
 const dumpHtml = async (params) => {
-    const { beat } = params;
+    const { beat, context } = params;
+    const rootDir = context.fileDirs.nodeModuleRootPath ? resolvePath(context.fileDirs.nodeModuleRootPath, "mulmocast-vision") : undefined;
     if (!beat.image || beat.image.type !== imageType)
         return;
-    const handler = new htmlPlugin({});
+    const handler = new htmlPlugin({ rootDir });
     return handler.getHtml(templateNameTofunctionName(beat.image.style), beat.image.data);
 };
 export const process = processVision;

package/lib/utils/provider2agent.d.ts CHANGED Viewed

@@ -2,22 +2,26 @@ export declare const provider2TTSAgent: {
     nijivoice: {
         agentName: string;
         hasLimitedConcurrency: boolean;
+        keyName: string;
     };
     openai: {
         agentName: string;
         hasLimitedConcurrency: boolean;
         defaultModel: string;
         defaultVoice: string;
+        keyName: string;
     };
     google: {
         agentName: string;
         hasLimitedConcurrency: boolean;
+        keyName: string;
     };
     elevenlabs: {
         agentName: string;
         hasLimitedConcurrency: boolean;
         defaultModel: string;
         models: string[];
+        keyName: string;
     };
     mock: {
         agentName: string;
@@ -31,21 +35,25 @@ export declare const provider2ImageAgent: {
         agentName: string;
         defaultModel: string;
         models: string[];
+        keyName: string;
     };
     google: {
         agentName: string;
         defaultModel: string;
         models: string[];
+        keyName: string;
     };
     replicate: {
         agentName: string;
         defaultModel: string;
         models: string[];
+        keyName: string;
     };
     mock: {
         agentName: string;
         defaultModel: string;
         models: string[];
+        keyName: string;
     };
 };
 export type ReplicateModel = `${string}/${string}`;
@@ -53,6 +61,7 @@ export declare const provider2MovieAgent: {
     replicate: {
         agentName: string;
         defaultModel: ReplicateModel;
+        keyName: string;
         models: string[];
         modelParams: Record<ReplicateModel, {
             durations: number[];
@@ -65,17 +74,20 @@ export declare const provider2MovieAgent: {
         agentName: string;
         defaultModel: string;
         models: string[];
+        keyName: string;
     };
     mock: {
         agentName: string;
         defaultModel: string;
         models: string[];
+        keyName: string;
     };
 };
 export declare const provider2SoundEffectAgent: {
     replicate: {
         agentName: string;
         defaultModel: ReplicateModel;
+        keyName: string;
         models: ReplicateModel[];
         modelParams: Record<ReplicateModel, {
             identifier?: `${string}/${string}:${string}`;
@@ -86,6 +98,7 @@ export declare const provider2LipSyncAgent: {
     replicate: {
         agentName: string;
         defaultModel: ReplicateModel;
+        keyName: string;
         models: ReplicateModel[];
         modelParams: Record<ReplicateModel, {
             identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
@@ -99,6 +112,7 @@ export declare const provider2LLMAgent: {
     readonly openai: {
         readonly agentName: "openAIAgent";
         readonly defaultModel: "gpt-5";
+        readonly keyName: "OPENAI_API_KEY";
         readonly max_tokens: 8192;
         readonly models: readonly ["gpt-5", "gpt-5-nano", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3", "o3-mini", "o3-pro", "o1", "o1-pro", "gpt-4o", "gpt-4o-mini"];
     };
@@ -107,16 +121,19 @@ export declare const provider2LLMAgent: {
         readonly defaultModel: "claude-3-7-sonnet-20250219";
         readonly max_tokens: 8192;
         readonly models: readonly ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"];
+        readonly keyName: "ANTHROPIC_API_KEY";
     };
     readonly gemini: {
         readonly agentName: "geminiAgent";
         readonly defaultModel: "gemini-2.5-flash";
         readonly max_tokens: 8192;
         readonly models: readonly ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"];
+        readonly keyName: "GEMINI_API_KEY";
     };
     readonly groq: {
         readonly agentName: "groqAgent";
         readonly defaultModel: "llama-3.1-8b-instant";
+        readonly keyName: "GROQ_API_KEY";
         readonly max_tokens: 4096;
         readonly models: readonly ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"];
     };

package/lib/utils/provider2agent.js CHANGED Viewed

@@ -3,16 +3,19 @@ export const provider2TTSAgent = {
     nijivoice: {
         agentName: "ttsNijivoiceAgent",
         hasLimitedConcurrency: true,
+        keyName: "NIJIVOICE_API_KEY",
     },
     openai: {
         agentName: "ttsOpenaiAgent",
         hasLimitedConcurrency: false,
         defaultModel: "gpt-4o-mini-tts",
         defaultVoice: "shimmer",
+        keyName: "OPENAI_API_KEY",
     },
     google: {
         agentName: "ttsGoogleAgent",
         hasLimitedConcurrency: false,
+        keyName: "GEMINI_API_KEY",
     },
     elevenlabs: {
         agentName: "ttsElevenlabsAgent",
@@ -21,6 +24,7 @@ export const provider2TTSAgent = {
         // Models | ElevenLabs Documentation
         // https://elevenlabs.io/docs/models
         models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_turbo_v2", "eleven_flash_v2_5", "eleven_flash_v2"],
+        keyName: "ELEVENLABS_API_KEY",
     },
     mock: {
         agentName: "mediaMockAgent",
@@ -34,27 +38,32 @@ export const provider2ImageAgent = {
         agentName: "imageOpenaiAgent",
         defaultModel: "gpt-image-1",
         models: ["dall-e-3", "gpt-image-1"],
+        keyName: "OPENAI_API_KEY",
     },
     google: {
         agentName: "imageGenAIAgent",
         defaultModel: "gemini-2.5-flash-image-preview",
         models: ["imagen-3.0-generate-002", "imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image-preview"],
+        keyName: "GEMINI_API_KEY",
     },
     replicate: {
         agentName: "imageReplicateAgent",
         defaultModel: "bytedance/seedream-4",
         models: ["bytedance/seedream-4", "qwen/qwen-image"],
+        keyName: "REPLICATE_API_TOKEN",
     },
     mock: {
         agentName: "mediaMockAgent",
         defaultModel: "mock-model",
         models: ["mock-model"],
+        keyName: "",
     },
 };
 export const provider2MovieAgent = {
     replicate: {
         agentName: "movieReplicateAgent",
         defaultModel: "bytedance/seedance-1-lite",
+        keyName: "REPLICATE_API_TOKEN",
         models: [
             "bytedance/seedance-1-lite",
             "bytedance/seedance-1-pro",
@@ -151,17 +160,20 @@ export const provider2MovieAgent = {
         agentName: "movieGenAIAgent",
         defaultModel: "veo-2.0-generate-001",
         models: ["veo-2.0-generate-001", "veo-3.0-generate-preview"],
+        keyName: "GEMINI_API_KEY",
     },
     mock: {
         agentName: "mediaMockAgent",
         defaultModel: "mock-model",
         models: ["mock-model"],
+        keyName: "",
     },
 };
 export const provider2SoundEffectAgent = {
     replicate: {
         agentName: "soundEffectReplicateAgent",
         defaultModel: "zsxkib/mmaudio",
+        keyName: "REPLICATE_API_TOKEN",
         models: ["zsxkib/mmaudio"],
         modelParams: {
             "zsxkib/mmaudio": {
@@ -174,6 +186,7 @@ export const provider2LipSyncAgent = {
     replicate: {
         agentName: "lipSyncReplicateAgent",
         defaultModel: "bytedance/omni-human",
+        keyName: "REPLICATE_API_TOKEN",
         models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
         modelParams: {
             "bytedance/latentsync": {
@@ -212,6 +225,7 @@ export const provider2LLMAgent = {
     openai: {
         agentName: "openAIAgent",
         defaultModel: "gpt-5",
+        keyName: "OPENAI_API_KEY",
         max_tokens: 8192,
         models: [
             "gpt-5",
@@ -234,16 +248,19 @@ export const provider2LLMAgent = {
         defaultModel: "claude-3-7-sonnet-20250219",
         max_tokens: 8192,
         models: ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"],
+        keyName: "ANTHROPIC_API_KEY",
     },
     gemini: {
         agentName: "geminiAgent",
         defaultModel: "gemini-2.5-flash",
         max_tokens: 8192,
         models: ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"],
+        keyName: "GEMINI_API_KEY",
     },
     groq: {
         agentName: "groqAgent",
         defaultModel: "llama-3.1-8b-instant",
+        keyName: "GROQ_API_KEY",
         max_tokens: 4096,
         models: ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"],
     },

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mulmocast",
-  "version": "1.2.33",
+  "version": "1.2.35",
   "description": "",
   "type": "module",
   "main": "lib/index.node.js",
@@ -69,7 +69,7 @@
   "homepage": "https://github.com/receptron/mulmocast-cli#readme",
   "dependencies": {
     "@google-cloud/text-to-speech": "^6.3.0",
-    "@google/genai": "^1.19.0",
+    "@google/genai": "^1.20.0",
     "@graphai/anthropic_agent": "^2.0.11",
     "@graphai/browserless_agent": "^2.0.1",
     "@graphai/gemini_agent": "^2.0.1",
@@ -90,7 +90,7 @@
     "graphai": "^2.0.15",
     "jsdom": "^27.0.0",
     "marked": "^16.3.0",
-    "mulmocast-vision": "^1.0.3",
+    "mulmocast-vision": "^1.0.4",
     "ora": "^8.2.0",
     "puppeteer": "^24.20.0",
     "replicate": "^1.1.0",
@@ -111,7 +111,7 @@
     "prettier": "^3.6.2",
     "tsx": "^4.20.5",
     "typescript": "^5.9.2",
-    "typescript-eslint": "^8.43.0"
+    "typescript-eslint": "^8.44.0"
   },
   "engines": {
     "node": ">=18.0.0"