mulmocast 1.2.11 → 1.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  import "dotenv/config";
2
2
  import { MulmoStudioContext, MulmoBeat, PublicAPIArgs } from "../types/index.js";
3
3
  export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
4
- export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, args?: PublicAPIArgs) => Promise<void>;
4
+ export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, args?: PublicAPIArgs & {
5
+ langs: string[];
6
+ }) => Promise<void>;
5
7
  export declare const audio: (context: MulmoStudioContext, args?: PublicAPIArgs) => Promise<MulmoStudioContext>;
@@ -40,9 +40,9 @@ export const getBeatAudioPath = (text, context, beat, lang) => {
40
40
  const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
41
41
  return getAudioPath(context, beat, audioFile);
42
42
  };
43
- const preprocessor = (namedInputs) => {
44
- const { beat, studioBeat, multiLingual, context } = namedInputs;
45
- const { lang } = context;
43
+ const preprocessorAgent = (namedInputs) => {
44
+ const { beat, studioBeat, multiLingual, context, lang } = namedInputs;
45
+ // const { lang } = context;
46
46
  const text = localizedText(beat, multiLingual, lang);
47
47
  const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
48
48
  const audioPath = getBeatAudioPath(text, context, beat, lang);
@@ -68,13 +68,15 @@ const graph_tts = {
68
68
  multiLingual: {},
69
69
  context: {},
70
70
  __mapIndex: {},
71
+ lang: {},
71
72
  preprocessor: {
72
- agent: preprocessor,
73
+ agent: preprocessorAgent,
73
74
  inputs: {
74
75
  beat: ":beat",
75
76
  studioBeat: ":studioBeat",
76
77
  multiLingual: ":multiLingual",
77
78
  context: ":context",
79
+ lang: ":lang",
78
80
  },
79
81
  },
80
82
  tts: {
@@ -103,6 +105,33 @@ const graph_tts = {
103
105
  },
104
106
  },
105
107
  };
108
+ const graph_tts_map = {
109
+ version: 0.5,
110
+ concurrency: 8,
111
+ nodes: {
112
+ beat: {},
113
+ studioBeat: {},
114
+ multiLingual: {},
115
+ context: {},
116
+ __mapIndex: {},
117
+ langs: {},
118
+ map: {
119
+ agent: "mapAgent",
120
+ inputs: {
121
+ rows: ":langs",
122
+ beat: ":beat",
123
+ studioBeat: ":studioBeat",
124
+ multiLingual: ":multiLingual",
125
+ context: ":context",
126
+ __mapIndex: ":__mapIndex",
127
+ },
128
+ params: {
129
+ rowKey: "lang",
130
+ },
131
+ graph: graph_tts,
132
+ },
133
+ },
134
+ };
106
135
  const graph_data = {
107
136
  version: 0.5,
108
137
  concurrency: 8,
@@ -119,6 +148,7 @@ const graph_data = {
119
148
  studioBeat: ":context.studio.beats",
120
149
  multiLingual: ":context.multiLingual",
121
150
  context: ":context",
151
+ lang: ":context.lang",
122
152
  },
123
153
  params: {
124
154
  rowKey: "beat",
@@ -188,7 +218,7 @@ const audioAgents = {
188
218
  combineAudioFilesAgent,
189
219
  };
190
220
  export const generateBeatAudio = async (index, context, args) => {
191
- const { settings, callbacks } = args ?? {};
221
+ const { settings, callbacks, langs } = args ?? {};
192
222
  try {
193
223
  MulmoStudioContextMethods.setSessionState(context, "audio", true);
194
224
  const fileName = MulmoStudioContextMethods.getFileName(context);
@@ -199,12 +229,18 @@ export const generateBeatAudio = async (index, context, args) => {
199
229
  mkdir(audioSegmentDirPath);
200
230
  const config = settings2GraphAIConfig(settings);
201
231
  const taskManager = new TaskManager(getConcurrency(context));
202
- const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager, config });
232
+ const graph = new GraphAI(langs ? graph_tts_map : graph_tts, audioAgents, { agentFilters, taskManager, config });
203
233
  graph.injectValue("__mapIndex", index);
204
234
  graph.injectValue("beat", context.studio.script.beats[index]);
205
235
  graph.injectValue("studioBeat", context.studio.beats[index]);
206
- graph.injectValue("multiLingual", context.multiLingual);
236
+ graph.injectValue("multiLingual", context.multiLingual[index]);
207
237
  graph.injectValue("context", context);
238
+ if (langs) {
239
+ graph.injectValue("langs", langs);
240
+ }
241
+ else {
242
+ graph.injectValue("lang", context.lang);
243
+ }
208
244
  if (callbacks) {
209
245
  callbacks.forEach((callback) => {
210
246
  graph.registerCallback(callback);
@@ -1,9 +1,9 @@
1
1
  import fs from "fs";
2
2
  import { GraphAI, GraphAILogger } from "graphai";
3
- import { getReferenceImagePath } from "../utils/file.js";
3
+ import { getReferenceImagePath, resolveAssetPath } from "../utils/file.js";
4
4
  import { getExtention } from "../utils/utils.js";
5
5
  import { graphOption } from "./images.js";
6
- import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
6
+ import { MulmoPresentationStyleMethods } from "../methods/index.js";
7
7
  import { imageOpenaiAgent, mediaMockAgent, imageGenAIAgent } from "../agents/index.js";
8
8
  // public api
9
9
  // Application may call this function directly to generate reference image.
@@ -70,7 +70,7 @@ export const getImageRefs = async (context) => {
70
70
  }
71
71
  else if (image.type === "image") {
72
72
  if (image.source.kind === "path") {
73
- imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
73
+ imageRefs[key] = resolveAssetPath(context, image.source.path);
74
74
  }
75
75
  else if (image.source.kind === "url") {
76
76
  imageRefs[key] = await downLoadImage(context, key, image.source.url);
@@ -144,10 +144,18 @@ const translateGraph = {
144
144
  mergeStudioResult: {
145
145
  isResult: true,
146
146
  agent: (namedInputs) => {
147
- const { multiLingual, beats } = namedInputs;
147
+ const { multiLingual, beats, originalMultiLingual } = namedInputs;
148
148
  const multiLingualObject = beats.reduce((tmp, beat, beatIndex) => {
149
149
  const key = beatId(beat?.id, beatIndex);
150
- tmp[key] = multiLingual[beatIndex];
150
+ const originalData = originalMultiLingual[beatIndex]?.multiLingualTexts ?? {};
151
+ const { multiLingualTexts, cacheKey } = multiLingual[beatIndex];
152
+ tmp[key] = {
153
+ cacheKey,
154
+ multiLingualTexts: {
155
+ ...originalData,
156
+ ...multiLingualTexts,
157
+ },
158
+ };
151
159
  return tmp;
152
160
  }, {});
153
161
  return {
@@ -156,7 +164,8 @@ const translateGraph = {
156
164
  };
157
165
  },
158
166
  inputs: {
159
- multiLingual: ":beatsMap.mergeMultiLingualData",
167
+ originalMultiLingual: ":context.multiLingual", // original
168
+ multiLingual: ":beatsMap.mergeMultiLingualData", // update
160
169
  beats: ":context.studio.script.beats",
161
170
  },
162
171
  },
@@ -1,9 +1,10 @@
1
+ import fs from "fs";
1
2
  import { GraphAILogger } from "graphai";
2
3
  import { getAspectRatio } from "./movie_google_agent.js";
3
4
  import { provider2ImageAgent } from "../utils/provider2agent.js";
4
5
  import { GoogleGenAI, PersonGeneration } from "@google/genai";
5
6
  export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
6
- const { prompt } = namedInputs;
7
+ const { prompt, referenceImages } = namedInputs;
7
8
  const aspectRatio = getAspectRatio(params.canvasSize);
8
9
  const model = params.model ?? provider2ImageAgent["google"].defaultModel;
9
10
  const apiKey = config?.apiKey;
@@ -12,24 +13,53 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
12
13
  }
13
14
  try {
14
15
  const ai = new GoogleGenAI({ apiKey });
15
- const response = await ai.models.generateImages({
16
- model,
17
- prompt,
18
- config: {
19
- numberOfImages: 1, // default is 4!
20
- aspectRatio,
21
- personGeneration: PersonGeneration.ALLOW_ALL,
22
- // safetyFilterLevel: SafetyFilterLevel.BLOCK_ONLY_HIGH,
23
- },
24
- });
25
- if (!response.generatedImages || response.generatedImages.length === 0) {
26
- throw new Error("ERROR: generateImage returned no generated images");
16
+ if (model === "gemini-2.5-flash-image-preview") {
17
+ const contents = [{ text: prompt }];
18
+ referenceImages?.forEach((imagePath) => {
19
+ const imageData = fs.readFileSync(imagePath);
20
+ const base64Image = imageData.toString("base64");
21
+ contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
22
+ });
23
+ // NOTE: There is no way to specify the aspect ratio for Gemini.
24
+ const response = await ai.models.generateContent({ model, contents });
25
+ if (!response.candidates?.[0]?.content?.parts) {
26
+ throw new Error("ERROR: generateContent returned no candidates");
27
+ }
28
+ for (const part of response.candidates[0].content.parts) {
29
+ if (part.text) {
30
+ GraphAILogger.info("Gemini image generation response:", part.text);
31
+ }
32
+ else if (part.inlineData) {
33
+ const imageData = part.inlineData.data;
34
+ if (!imageData) {
35
+ throw new Error("ERROR: generateContent returned no image data");
36
+ }
37
+ const buffer = Buffer.from(imageData, "base64");
38
+ return { buffer };
39
+ }
40
+ }
41
+ throw new Error("ERROR: generateContent returned no image data");
27
42
  }
28
- const image = response.generatedImages[0].image;
29
- if (image && image.imageBytes) {
30
- return { buffer: Buffer.from(image.imageBytes, "base64") };
43
+ else {
44
+ const response = await ai.models.generateImages({
45
+ model,
46
+ prompt,
47
+ config: {
48
+ numberOfImages: 1, // default is 4!
49
+ aspectRatio,
50
+ personGeneration: PersonGeneration.ALLOW_ALL,
51
+ // safetyFilterLevel: SafetyFilterLevel.BLOCK_ONLY_HIGH,
52
+ },
53
+ });
54
+ if (!response.generatedImages || response.generatedImages.length === 0) {
55
+ throw new Error("ERROR: generateImage returned no generated images");
56
+ }
57
+ const image = response.generatedImages[0].image;
58
+ if (image && image.imageBytes) {
59
+ return { buffer: Buffer.from(image.imageBytes, "base64") };
60
+ }
61
+ throw new Error("ERROR: generateImage returned no image bytes");
31
62
  }
32
- throw new Error("ERROR: generateImage returned no image bytes");
33
63
  }
34
64
  catch (error) {
35
65
  GraphAILogger.info("Failed to generate image:", error);
@@ -3,5 +3,7 @@ export * from "./utils/provider2agent.js";
3
3
  export * from "./utils/const.js";
4
4
  export * from "./utils/string.js";
5
5
  export * from "./utils/utils.js";
6
+ export * from "./utils/prompt.js";
6
7
  export * from "./methods/mulmo_presentation_style.js";
7
8
  export * from "./methods/mulmo_script.js";
9
+ export * from "./methods/mulmo_studio_context.js";
@@ -4,5 +4,7 @@ export * from "./utils/provider2agent.js";
4
4
  export * from "./utils/const.js";
5
5
  export * from "./utils/string.js";
6
6
  export * from "./utils/utils.js";
7
+ export * from "./utils/prompt.js";
7
8
  export * from "./methods/mulmo_presentation_style.js";
8
9
  export * from "./methods/mulmo_script.js";
10
+ export * from "./methods/mulmo_studio_context.js";
@@ -1,6 +1,5 @@
1
1
  import fs from "fs";
2
- import { getFullPath } from "../utils/file.js";
3
- import { MulmoStudioContextMethods } from "../methods/index.js";
2
+ import { getFullPath, resolveAssetPath } from "../utils/file.js";
4
3
  export const MulmoMediaSourceMethods = {
5
4
  async getText(mediaSource, context) {
6
5
  if (mediaSource.kind === "text") {
@@ -23,7 +22,7 @@ export const MulmoMediaSourceMethods = {
23
22
  if (!mediaSource)
24
23
  return null;
25
24
  if (mediaSource.kind === "path") {
26
- return MulmoStudioContextMethods.resolveAssetPath(context, mediaSource.path);
25
+ return resolveAssetPath(context, mediaSource.path);
27
26
  }
28
27
  if (mediaSource.kind === "url") {
29
28
  return mediaSource.url;
@@ -1,3 +1,8 @@
1
+ /**
2
+ * Browser-friendly packages only.
3
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
4
+ * Works in both Node.js and modern browsers.
5
+ */
1
6
  import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
2
7
  export declare const MulmoPresentationStyleMethods: {
3
8
  getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
@@ -1,4 +1,8 @@
1
- // node & browser
1
+ /**
2
+ * Browser-friendly packages only.
3
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
4
+ * Works in both Node.js and modern browsers.
5
+ */
2
6
  import { isNull } from "graphai";
3
7
  import { userAssert } from "../utils/utils.js";
4
8
  import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2MovieProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema, } from "../types/schema.js";
@@ -1,3 +1,8 @@
1
+ /**
2
+ * Browser-friendly packages only.
3
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
4
+ * Works in both Node.js and modern browsers.
5
+ */
1
6
  import { type MulmoStudioBeat, type MulmoScript, type MulmoStudioMultiLingual } from "../types/index.js";
2
7
  export declare const MulmoScriptMethods: {
3
8
  validate(script: any): MulmoScript;
@@ -1,4 +1,8 @@
1
- // node & browser
1
+ /**
2
+ * Browser-friendly packages only.
3
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
4
+ * Works in both Node.js and modern browsers.
5
+ */
2
6
  import { GraphAILogger } from "graphai";
3
7
  import { mulmoScriptSchema, mulmoStudioMultiLingualFileSchema } from "../types/index.js";
4
8
  import { beatId } from "../utils/utils.js";
@@ -1,8 +1,12 @@
1
+ /**
2
+ * Browser-friendly packages only.
3
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
4
+ * Works in both Node.js and modern browsers.
5
+ */
1
6
  import { BeatSessionType, MulmoStudioContext, SessionProgressCallback, SessionType } from "../types/index.js";
2
7
  export declare const addSessionProgressCallback: (cb: SessionProgressCallback) => void;
3
8
  export declare const removeSessionProgressCallback: (cb: SessionProgressCallback) => void;
4
9
  export declare const MulmoStudioContextMethods: {
5
- resolveAssetPath(context: MulmoStudioContext, relativePath: string): string;
6
10
  getAudioDirPath(context: MulmoStudioContext): string;
7
11
  getImageDirPath(context: MulmoStudioContext): string;
8
12
  getImageProjectDirPath(context: MulmoStudioContext): string;
@@ -1,4 +1,8 @@
1
- import path from "path";
1
+ /**
2
+ * Browser-friendly packages only.
3
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
4
+ * Works in both Node.js and modern browsers.
5
+ */
2
6
  import { beatId } from "../utils/utils.js";
3
7
  import { GraphAILogger } from "graphai";
4
8
  const sessionProgressCallbacks = new Set();
@@ -25,9 +29,6 @@ const notifyBeatStateChange = (context, sessionType, id) => {
25
29
  }
26
30
  };
27
31
  export const MulmoStudioContextMethods = {
28
- resolveAssetPath(context, relativePath) {
29
- return path.resolve(context.fileDirs.mulmoFileDirPath, relativePath);
30
- },
31
32
  getAudioDirPath(context) {
32
33
  return context.fileDirs.audioDirPath;
33
34
  },
@@ -27,7 +27,9 @@ export type AgentErrorResult = {
27
27
  export type AgentConfig = {
28
28
  apiKey?: string;
29
29
  };
30
- export type ImageAgentInputs = AgentPromptInputs;
30
+ export type ImageAgentInputs = AgentPromptInputs & {
31
+ referenceImages: string[] | null | undefined;
32
+ };
31
33
  export type OpenAIImageAgentInputs = AgentPromptInputs & {
32
34
  referenceImages: string[] | null | undefined;
33
35
  };
@@ -38,6 +38,7 @@ export declare const getCaptionImagePath: (context: MulmoStudioContext, index: n
38
38
  export declare const getOutputPdfFilePath: (outDirPath: string, fileName: string, pdfMode: PDFMode, lang?: string) => string;
39
39
  export declare const getPromptTemplateFilePath: (promptTemplateName: string) => string;
40
40
  export declare const mkdir: (dirPath: string) => void;
41
+ export declare const resolveAssetPath: (context: MulmoStudioContext, relativePath: string) => string;
41
42
  export declare const silent60secPath: () => string;
42
43
  export declare const defaultBGMPath: () => string;
43
44
  export declare const mulmoCreditPath: () => string;
package/lib/utils/file.js CHANGED
@@ -128,6 +128,10 @@ export const mkdir = (dirPath) => {
128
128
  fs.mkdirSync(dirPath, { recursive: true });
129
129
  }
130
130
  };
131
+ // asset path
132
+ export const resolveAssetPath = (context, relativePath) => {
133
+ return path.resolve(context.fileDirs.mulmoFileDirPath, relativePath);
134
+ };
131
135
  // export const silentPath = path.resolve(npmRoot, "./assets/audio/silent300.mp3");
132
136
  // export const silentLastPath = path.resolve(npmRoot, "./assets/audio/silent800.mp3");
133
137
  export const silent60secPath = () => path.resolve(npmRoot, "./assets/audio/silent60sec.mp3");
@@ -38,7 +38,7 @@ export const provider2ImageAgent = {
38
38
  google: {
39
39
  agentName: "imageGenAIAgent",
40
40
  defaultModel: "imagen-4.0-generate-preview-06-06",
41
- models: ["imagen-3.0-generate-002", "imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06"],
41
+ models: ["imagen-3.0-generate-002", "imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image-preview"],
42
42
  },
43
43
  mock: {
44
44
  agentName: "mediaMockAgent",
@@ -1,3 +1,8 @@
1
+ /**
2
+ * Browser-friendly packages only.
3
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
4
+ * Works in both Node.js and modern browsers.
5
+ */
1
6
  import type { ConfigDataDictionary, DefaultConfigData } from "graphai";
2
7
  import { MulmoBeat, MulmoStudioBeat, MulmoStudioMultiLingual, MulmoStudioMultiLingualData } from "../types/index.js";
3
8
  import { type LLM } from "./provider2agent.js";
@@ -1,4 +1,8 @@
1
- // node & browser
1
+ /**
2
+ * Browser-friendly packages only.
3
+ * (No Node.js built-ins like fs, path, dotenv, etc.)
4
+ * Works in both Node.js and modern browsers.
5
+ */
2
6
  import { provider2LLMAgent } from "./provider2agent.js";
3
7
  export const llmPair = (_llm, _model) => {
4
8
  const llmKey = _llm ?? "openai";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "1.2.11",
3
+ "version": "1.2.12",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -55,7 +55,6 @@
55
55
  "format": "prettier --write '{src,scripts,assets/templates,assets/styles,draft,ideason,scripts_mag2,proto,test,batch,graphai,output,docs/scripts}/**/*.{ts,json,yaml}'",
56
56
  "deep_research": "npx tsx ./src/tools/deep_research.ts",
57
57
  "template": "npx tsx batch/template2tsobject.ts && yarn run format",
58
- "fake_data": "npx tsx test/fake/sample.ts",
59
58
  "mcp_server": "npx tsx ./src/mcp/server.ts"
60
59
  },
61
60
  "repository": "git+ssh://git@github.com/receptron/mulmocast-cli.git",
@@ -81,7 +80,6 @@
81
80
  "@inquirer/select": "^4.3.2",
82
81
  "@modelcontextprotocol/sdk": "^1.17.4",
83
82
  "@tavily/core": "^0.5.11",
84
- "canvas": "^3.2.0",
85
83
  "clipboardy": "^4.0.0",
86
84
  "dotenv": "^17.2.1",
87
85
  "fluent-ffmpeg": "^2.1.3",
@@ -96,8 +94,6 @@
96
94
  "zod-to-json-schema": "^3.24.6"
97
95
  },
98
96
  "devDependencies": {
99
- "@anatine/zod-mock": "^3.14.0",
100
- "@faker-js/faker": "^9.9.0",
101
97
  "@receptron/test_utils": "^2.0.3",
102
98
  "@types/fluent-ffmpeg": "^2.1.26",
103
99
  "@types/yargs": "^17.0.33",
@@ -106,7 +102,6 @@
106
102
  "eslint-plugin-prettier": "^5.5.4",
107
103
  "eslint-plugin-sonarjs": "^3.0.5",
108
104
  "prettier": "^3.6.2",
109
- "ts-node": "^10.9.2",
110
105
  "tsx": "^4.20.5",
111
106
  "typescript": "^5.9.2",
112
107
  "typescript-eslint": "^8.41.0"
@@ -9,6 +9,14 @@
9
9
  },
10
10
  "lang": "en",
11
11
  "beats": [
12
+ {
13
+ "id": "gemini_2_5_flash_image_preview",
14
+ "text": "image generated by gemini-2.5-flash-image-preview",
15
+ "imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
16
+ "imageParams": {
17
+ "model": "gemini-2.5-flash-image-preview"
18
+ }
19
+ },
12
20
  {
13
21
  "id": "imagen_3",
14
22
  "text": "image generated by imagen-3",
@@ -45,6 +45,16 @@
45
45
  "text": "Hello World with no reference image",
46
46
  "imagePrompt": "Saying hello to the world",
47
47
  "imageNames": []
48
+ },
49
+ {
50
+ "id": "gemini_2_5_flash_image_preview",
51
+ "text": "Hello World with a witch and a broom with Gemini",
52
+ "imagePrompt": "Saying hello to the world",
53
+ "imageNames": ["witch", "broom"],
54
+ "imageParams": {
55
+ "provider": "google",
56
+ "model": "gemini-2.5-flash-image-preview"
57
+ }
48
58
  }
49
59
  ]
50
60
  }