mulmocast 1.2.33 → 1.2.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -106,13 +106,11 @@ OPENAI_API_KEY=your_openai_api_key
106
106
  DEFAULT_OPENAI_IMAGE_MODEL=gpt-image-1 # for the advanced image generation model
107
107
  ```
108
108
 
109
- #### (Optional) For Google's image generation model
109
+ #### (Optional) For Google's image and TTS.
110
110
  ```bash
111
- GOOGLE_PROJECT_ID=your_google_project_id
111
+ GEMINI_API_KEY=your_google_gemini_api_key
112
112
  ```
113
113
 
114
- See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
115
-
116
114
  #### (Optional) For AI providers
117
115
  ```bash
118
116
  # For Anthropic Claude (htmlPrompt feature)
@@ -9,7 +9,6 @@ import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, re
9
9
  import { localizedText, settings2GraphAIConfig } from "../utils/utils.js";
10
10
  import { text2hash } from "../utils/utils_node.js";
11
11
  import { provider2TTSAgent } from "../utils/provider2agent.js";
12
- import { MulmoPresentationStyleMethods } from "../methods/index.js";
13
12
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
14
13
  import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
15
14
  dotenv.config({ quiet: true });
@@ -27,15 +26,9 @@ const getAudioPath = (context, beat, audioFile) => {
27
26
  }
28
27
  return audioFile;
29
28
  };
30
- const getAudioParam = (context, beat, lang) => {
31
- const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat, lang);
32
- const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
33
- const provider = text2SpeechProviderSchema.parse(speaker.provider);
34
- return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
35
- };
36
29
  export const getBeatAudioPath = (text, context, beat, lang) => {
37
30
  const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
38
- const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat, lang);
31
+ const { voiceId, provider, speechOptions, model } = MulmoStudioContextMethods.getAudioParam(context, beat, lang);
39
32
  const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider, model ?? ""].join(":");
40
33
  GraphAILogger.log(`getBeatAudioPath [${hash_string}]`);
41
34
  const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
@@ -54,7 +47,7 @@ const preprocessorAgent = (namedInputs) => {
54
47
  const { beat, studioBeat, multiLingual, context, lang } = namedInputs;
55
48
  // const { lang } = context;
56
49
  const text = localizedText(beat, multiLingual, lang);
57
- const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat, lang);
50
+ const { voiceId, provider, speechOptions, model } = MulmoStudioContextMethods.getAudioParam(context, beat, lang);
58
51
  const audioPath = getBeatAudioPath(text, context, beat, lang);
59
52
  studioBeat.audioFile = audioPath; // TODO: Passing by reference is difficult to maintain, so pass it using graphai inputs
60
53
  const needsTTS = !beat.audio && audioPath !== undefined;
@@ -46,6 +46,6 @@ const ttsGoogleAgentInfo = {
46
46
  author: "Receptron Team",
47
47
  repository: "https://github.com/receptron/mulmocast-cli/",
48
48
  license: "MIT",
49
- environmentVariables: ["GOOGLE_GENAI_API_KEY"],
49
+ environmentVariables: ["GEMINI_API_KEY"],
50
50
  };
51
51
  export default ttsGoogleAgentInfo;
@@ -26,10 +26,12 @@ export declare const MulmoPresentationStyleMethods: {
26
26
  duration: number;
27
27
  } | undefined;
28
28
  };
29
+ keyName: string;
29
30
  };
30
31
  getSoundEffectAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
31
32
  agentName: string;
32
33
  defaultModel: import("../utils/provider2agent.js").ReplicateModel;
34
+ keyName: string;
33
35
  models: import("../utils/provider2agent.js").ReplicateModel[];
34
36
  modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
35
37
  identifier?: `${string}/${string}:${string}`;
@@ -38,6 +40,7 @@ export declare const MulmoPresentationStyleMethods: {
38
40
  getLipSyncAgentInfo(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): {
39
41
  agentName: string;
40
42
  defaultModel: import("../utils/provider2agent.js").ReplicateModel;
43
+ keyName: string;
41
44
  models: import("../utils/provider2agent.js").ReplicateModel[];
42
45
  modelParams: Record<import("../utils/provider2agent.js").ReplicateModel, {
43
46
  identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
@@ -86,6 +86,7 @@ export const MulmoPresentationStyleMethods = {
86
86
  return {
87
87
  agent: agentInfo.agentName,
88
88
  imageParams: { ...defaultImageParams, ...imageParams },
89
+ keyName: agentInfo.keyName,
89
90
  };
90
91
  },
91
92
  getMovieAgentInfo(presentationStyle, beat) {
@@ -95,6 +96,7 @@ export const MulmoPresentationStyleMethods = {
95
96
  return {
96
97
  agent: agentInfo.agentName,
97
98
  movieParams,
99
+ keyName: agentInfo.keyName,
98
100
  };
99
101
  },
100
102
  getSoundEffectAgentInfo(presentationStyle, beat) {
@@ -3,7 +3,7 @@
3
3
  * (No Node.js built-ins like fs, path, dotenv, etc.)
4
4
  * Works in both Node.js and modern browsers.
5
5
  */
6
- import { BeatSessionType, MulmoStudioContext, SessionProgressCallback, SessionType } from "../types/index.js";
6
+ import { BeatSessionType, MulmoStudioContext, SessionProgressCallback, SessionType, MulmoBeat } from "../types/index.js";
7
7
  export declare const addSessionProgressCallback: (cb: SessionProgressCallback) => void;
8
8
  export declare const removeSessionProgressCallback: (cb: SessionProgressCallback) => void;
9
9
  export declare const MulmoStudioContextMethods: {
@@ -17,4 +17,13 @@ export declare const MulmoStudioContextMethods: {
17
17
  setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType | undefined, index: number, id: string | undefined, value: boolean): void;
18
18
  needTranslate(context: MulmoStudioContext, includeCaption?: boolean): boolean | "" | undefined;
19
19
  getIntroPadding(context: MulmoStudioContext): number;
20
+ getAudioParam(context: MulmoStudioContext, beat: MulmoBeat, lang?: string): {
21
+ voiceId: string;
22
+ provider: "google" | "mock" | "nijivoice" | "openai" | "elevenlabs";
23
+ speechOptions: {
24
+ speed?: number | undefined;
25
+ instruction?: string | undefined;
26
+ };
27
+ model: string | undefined;
28
+ };
20
29
  };
@@ -3,8 +3,10 @@
3
3
  * (No Node.js built-ins like fs, path, dotenv, etc.)
4
4
  * Works in both Node.js and modern browsers.
5
5
  */
6
+ import { text2SpeechProviderSchema } from "../types/index.js";
6
7
  import { beatId } from "../utils/utils.js";
7
8
  import { GraphAILogger } from "graphai";
9
+ import { MulmoPresentationStyleMethods } from "./mulmo_presentation_style.js";
8
10
  const sessionProgressCallbacks = new Set();
9
11
  export const addSessionProgressCallback = (cb) => {
10
12
  sessionProgressCallbacks.add(cb);
@@ -84,4 +86,10 @@ export const MulmoStudioContextMethods = {
84
86
  }
85
87
  return context.presentationStyle.audioParams.introPadding;
86
88
  },
89
+ getAudioParam(context, beat, lang) {
90
+ const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat, lang);
91
+ const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
92
+ const provider = text2SpeechProviderSchema.parse(speaker.provider);
93
+ return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
94
+ },
87
95
  };
@@ -76,6 +76,7 @@ export type PDFSize = (typeof pdf_sizes)[number];
76
76
  export type Text2ImageAgentInfo = {
77
77
  agent: string;
78
78
  imageParams: MulmoImageParams;
79
+ keyName?: string;
79
80
  };
80
81
  export type Text2HtmlAgentInfo = {
81
82
  provider: Text2HtmlImageProvider;
@@ -16,10 +16,11 @@ const processVision = async (params) => {
16
16
  return imagePath;
17
17
  };
18
18
  const dumpHtml = async (params) => {
19
- const { beat } = params;
19
+ const { beat, context } = params;
20
+ const rootDir = context.fileDirs.nodeModuleRootPath ? resolvePath(context.fileDirs.nodeModuleRootPath, "mulmocast-vision") : undefined;
20
21
  if (!beat.image || beat.image.type !== imageType)
21
22
  return;
22
- const handler = new htmlPlugin({});
23
+ const handler = new htmlPlugin({ rootDir });
23
24
  return handler.getHtml(templateNameTofunctionName(beat.image.style), beat.image.data);
24
25
  };
25
26
  export const process = processVision;
@@ -2,22 +2,26 @@ export declare const provider2TTSAgent: {
2
2
  nijivoice: {
3
3
  agentName: string;
4
4
  hasLimitedConcurrency: boolean;
5
+ keyName: string;
5
6
  };
6
7
  openai: {
7
8
  agentName: string;
8
9
  hasLimitedConcurrency: boolean;
9
10
  defaultModel: string;
10
11
  defaultVoice: string;
12
+ keyName: string;
11
13
  };
12
14
  google: {
13
15
  agentName: string;
14
16
  hasLimitedConcurrency: boolean;
17
+ keyName: string;
15
18
  };
16
19
  elevenlabs: {
17
20
  agentName: string;
18
21
  hasLimitedConcurrency: boolean;
19
22
  defaultModel: string;
20
23
  models: string[];
24
+ keyName: string;
21
25
  };
22
26
  mock: {
23
27
  agentName: string;
@@ -31,21 +35,25 @@ export declare const provider2ImageAgent: {
31
35
  agentName: string;
32
36
  defaultModel: string;
33
37
  models: string[];
38
+ keyName: string;
34
39
  };
35
40
  google: {
36
41
  agentName: string;
37
42
  defaultModel: string;
38
43
  models: string[];
44
+ keyName: string;
39
45
  };
40
46
  replicate: {
41
47
  agentName: string;
42
48
  defaultModel: string;
43
49
  models: string[];
50
+ keyName: string;
44
51
  };
45
52
  mock: {
46
53
  agentName: string;
47
54
  defaultModel: string;
48
55
  models: string[];
56
+ keyName: string;
49
57
  };
50
58
  };
51
59
  export type ReplicateModel = `${string}/${string}`;
@@ -53,6 +61,7 @@ export declare const provider2MovieAgent: {
53
61
  replicate: {
54
62
  agentName: string;
55
63
  defaultModel: ReplicateModel;
64
+ keyName: string;
56
65
  models: string[];
57
66
  modelParams: Record<ReplicateModel, {
58
67
  durations: number[];
@@ -65,17 +74,20 @@ export declare const provider2MovieAgent: {
65
74
  agentName: string;
66
75
  defaultModel: string;
67
76
  models: string[];
77
+ keyName: string;
68
78
  };
69
79
  mock: {
70
80
  agentName: string;
71
81
  defaultModel: string;
72
82
  models: string[];
83
+ keyName: string;
73
84
  };
74
85
  };
75
86
  export declare const provider2SoundEffectAgent: {
76
87
  replicate: {
77
88
  agentName: string;
78
89
  defaultModel: ReplicateModel;
90
+ keyName: string;
79
91
  models: ReplicateModel[];
80
92
  modelParams: Record<ReplicateModel, {
81
93
  identifier?: `${string}/${string}:${string}`;
@@ -86,6 +98,7 @@ export declare const provider2LipSyncAgent: {
86
98
  replicate: {
87
99
  agentName: string;
88
100
  defaultModel: ReplicateModel;
101
+ keyName: string;
89
102
  models: ReplicateModel[];
90
103
  modelParams: Record<ReplicateModel, {
91
104
  identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
@@ -99,6 +112,7 @@ export declare const provider2LLMAgent: {
99
112
  readonly openai: {
100
113
  readonly agentName: "openAIAgent";
101
114
  readonly defaultModel: "gpt-5";
115
+ readonly keyName: "OPENAI_API_KEY";
102
116
  readonly max_tokens: 8192;
103
117
  readonly models: readonly ["gpt-5", "gpt-5-nano", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3", "o3-mini", "o3-pro", "o1", "o1-pro", "gpt-4o", "gpt-4o-mini"];
104
118
  };
@@ -107,16 +121,19 @@ export declare const provider2LLMAgent: {
107
121
  readonly defaultModel: "claude-3-7-sonnet-20250219";
108
122
  readonly max_tokens: 8192;
109
123
  readonly models: readonly ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"];
124
+ readonly keyName: "ANTHROPIC_API_KEY";
110
125
  };
111
126
  readonly gemini: {
112
127
  readonly agentName: "geminiAgent";
113
128
  readonly defaultModel: "gemini-2.5-flash";
114
129
  readonly max_tokens: 8192;
115
130
  readonly models: readonly ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"];
131
+ readonly keyName: "GEMINI_API_KEY";
116
132
  };
117
133
  readonly groq: {
118
134
  readonly agentName: "groqAgent";
119
135
  readonly defaultModel: "llama-3.1-8b-instant";
136
+ readonly keyName: "GROQ_API_KEY";
120
137
  readonly max_tokens: 4096;
121
138
  readonly models: readonly ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"];
122
139
  };
@@ -3,16 +3,19 @@ export const provider2TTSAgent = {
3
3
  nijivoice: {
4
4
  agentName: "ttsNijivoiceAgent",
5
5
  hasLimitedConcurrency: true,
6
+ keyName: "NIJIVOICE_API_KEY",
6
7
  },
7
8
  openai: {
8
9
  agentName: "ttsOpenaiAgent",
9
10
  hasLimitedConcurrency: false,
10
11
  defaultModel: "gpt-4o-mini-tts",
11
12
  defaultVoice: "shimmer",
13
+ keyName: "OPENAI_API_KEY",
12
14
  },
13
15
  google: {
14
16
  agentName: "ttsGoogleAgent",
15
17
  hasLimitedConcurrency: false,
18
+ keyName: "GEMINI_API_KEY",
16
19
  },
17
20
  elevenlabs: {
18
21
  agentName: "ttsElevenlabsAgent",
@@ -21,6 +24,7 @@ export const provider2TTSAgent = {
21
24
  // Models | ElevenLabs Documentation
22
25
  // https://elevenlabs.io/docs/models
23
26
  models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_turbo_v2", "eleven_flash_v2_5", "eleven_flash_v2"],
27
+ keyName: "ELEVENLABS_API_KEY",
24
28
  },
25
29
  mock: {
26
30
  agentName: "mediaMockAgent",
@@ -34,27 +38,32 @@ export const provider2ImageAgent = {
34
38
  agentName: "imageOpenaiAgent",
35
39
  defaultModel: "gpt-image-1",
36
40
  models: ["dall-e-3", "gpt-image-1"],
41
+ keyName: "OPENAI_API_KEY",
37
42
  },
38
43
  google: {
39
44
  agentName: "imageGenAIAgent",
40
45
  defaultModel: "gemini-2.5-flash-image-preview",
41
46
  models: ["imagen-3.0-generate-002", "imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image-preview"],
47
+ keyName: "GEMINI_API_KEY",
42
48
  },
43
49
  replicate: {
44
50
  agentName: "imageReplicateAgent",
45
51
  defaultModel: "bytedance/seedream-4",
46
52
  models: ["bytedance/seedream-4", "qwen/qwen-image"],
53
+ keyName: "REPLICATE_API_TOKEN",
47
54
  },
48
55
  mock: {
49
56
  agentName: "mediaMockAgent",
50
57
  defaultModel: "mock-model",
51
58
  models: ["mock-model"],
59
+ keyName: "",
52
60
  },
53
61
  };
54
62
  export const provider2MovieAgent = {
55
63
  replicate: {
56
64
  agentName: "movieReplicateAgent",
57
65
  defaultModel: "bytedance/seedance-1-lite",
66
+ keyName: "REPLICATE_API_TOKEN",
58
67
  models: [
59
68
  "bytedance/seedance-1-lite",
60
69
  "bytedance/seedance-1-pro",
@@ -151,17 +160,20 @@ export const provider2MovieAgent = {
151
160
  agentName: "movieGenAIAgent",
152
161
  defaultModel: "veo-2.0-generate-001",
153
162
  models: ["veo-2.0-generate-001", "veo-3.0-generate-preview"],
163
+ keyName: "GEMINI_API_KEY",
154
164
  },
155
165
  mock: {
156
166
  agentName: "mediaMockAgent",
157
167
  defaultModel: "mock-model",
158
168
  models: ["mock-model"],
169
+ keyName: "",
159
170
  },
160
171
  };
161
172
  export const provider2SoundEffectAgent = {
162
173
  replicate: {
163
174
  agentName: "soundEffectReplicateAgent",
164
175
  defaultModel: "zsxkib/mmaudio",
176
+ keyName: "REPLICATE_API_TOKEN",
165
177
  models: ["zsxkib/mmaudio"],
166
178
  modelParams: {
167
179
  "zsxkib/mmaudio": {
@@ -174,6 +186,7 @@ export const provider2LipSyncAgent = {
174
186
  replicate: {
175
187
  agentName: "lipSyncReplicateAgent",
176
188
  defaultModel: "bytedance/omni-human",
189
+ keyName: "REPLICATE_API_TOKEN",
177
190
  models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
178
191
  modelParams: {
179
192
  "bytedance/latentsync": {
@@ -212,6 +225,7 @@ export const provider2LLMAgent = {
212
225
  openai: {
213
226
  agentName: "openAIAgent",
214
227
  defaultModel: "gpt-5",
228
+ keyName: "OPENAI_API_KEY",
215
229
  max_tokens: 8192,
216
230
  models: [
217
231
  "gpt-5",
@@ -234,16 +248,19 @@ export const provider2LLMAgent = {
234
248
  defaultModel: "claude-3-7-sonnet-20250219",
235
249
  max_tokens: 8192,
236
250
  models: ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"],
251
+ keyName: "ANTHROPIC_API_KEY",
237
252
  },
238
253
  gemini: {
239
254
  agentName: "geminiAgent",
240
255
  defaultModel: "gemini-2.5-flash",
241
256
  max_tokens: 8192,
242
257
  models: ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"],
258
+ keyName: "GEMINI_API_KEY",
243
259
  },
244
260
  groq: {
245
261
  agentName: "groqAgent",
246
262
  defaultModel: "llama-3.1-8b-instant",
263
+ keyName: "GROQ_API_KEY",
247
264
  max_tokens: 4096,
248
265
  models: ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"],
249
266
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "1.2.33",
3
+ "version": "1.2.35",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -69,7 +69,7 @@
69
69
  "homepage": "https://github.com/receptron/mulmocast-cli#readme",
70
70
  "dependencies": {
71
71
  "@google-cloud/text-to-speech": "^6.3.0",
72
- "@google/genai": "^1.19.0",
72
+ "@google/genai": "^1.20.0",
73
73
  "@graphai/anthropic_agent": "^2.0.11",
74
74
  "@graphai/browserless_agent": "^2.0.1",
75
75
  "@graphai/gemini_agent": "^2.0.1",
@@ -90,7 +90,7 @@
90
90
  "graphai": "^2.0.15",
91
91
  "jsdom": "^27.0.0",
92
92
  "marked": "^16.3.0",
93
- "mulmocast-vision": "^1.0.3",
93
+ "mulmocast-vision": "^1.0.4",
94
94
  "ora": "^8.2.0",
95
95
  "puppeteer": "^24.20.0",
96
96
  "replicate": "^1.1.0",
@@ -111,7 +111,7 @@
111
111
  "prettier": "^3.6.2",
112
112
  "tsx": "^4.20.5",
113
113
  "typescript": "^5.9.2",
114
- "typescript-eslint": "^8.43.0"
114
+ "typescript-eslint": "^8.44.0"
115
115
  },
116
116
  "engines": {
117
117
  "node": ">=18.0.0"