mulmocast 1.2.65 → 1.2.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,39 +1,30 @@
1
1
  import path from "path";
2
2
  import fs from "fs";
3
- import { GraphAILogger } from "graphai";
4
3
  import { listLocalizedAudioPaths } from "./audio.js";
5
- import { imagePreprocessAgent } from "./image_agents.js";
6
4
  import { mkdir } from "../utils/file.js";
7
5
  import { ZipBuilder } from "../utils/zip.js";
8
6
  import { bundleTargetLang } from "../utils/const.js";
9
- const beatImage = (context) => {
10
- return async (beat, index) => {
11
- try {
12
- const res = await imagePreprocessAgent({ context, beat, index, imageRefs: {} });
13
- if ("htmlPrompt" in res) {
14
- return { htmlImageSource: res.htmlImageFile, imageSource: res.imagePath };
15
- }
16
- const { imagePath, movieFile, lipSyncFile } = res;
17
- return { imageSource: imagePath, videoSource: movieFile, videoWithAudioSource: lipSyncFile };
18
- }
19
- catch (e) {
20
- GraphAILogger.log(e);
21
- return {};
22
- }
23
- };
24
- };
25
- // TODO reference
7
+ import { createSilentAudio } from "../utils/ffmpeg_utils.js";
26
8
  const viewJsonFileName = "mulmo_view.json";
27
9
  const zipFileName = "mulmo.zip";
10
+ const imageSourceMappings = [
11
+ ["imageFile", "imageSource"],
12
+ ["movieFile", "videoSource"],
13
+ ["soundEffectFile", "soundEffectSource"],
14
+ ["lipSyncFile", "videoWithAudioSource"],
15
+ ["htmlImageFile", "htmlImageSource"],
16
+ ];
28
17
  export const mulmoViewerBundle = async (context) => {
29
18
  const isZip = true;
30
19
  const dir = path.resolve(context.fileDirs.fileName);
31
20
  mkdir(dir);
32
21
  const zipper = new ZipBuilder(path.resolve(dir, zipFileName));
22
+ // text
33
23
  const resultJson = [];
34
24
  context.studio.script.beats.forEach((beat) => {
35
25
  resultJson.push({ text: beat.text, duration: beat.duration, audioSources: {}, multiLinguals: {} });
36
26
  });
27
+ // audio
37
28
  for (const lang of bundleTargetLang) {
38
29
  const audios = listLocalizedAudioPaths({ ...context, lang });
39
30
  audios.forEach((audio, index) => {
@@ -49,14 +40,13 @@ export const mulmoViewerBundle = async (context) => {
49
40
  }
50
41
  });
51
42
  }
52
- const images = await Promise.all(context.studio.script.beats.map(beatImage(context)));
53
- images.forEach((image, index) => {
43
+ // image, movie
44
+ context.studio.beats.forEach((image, index) => {
54
45
  const data = resultJson[index];
55
- const keys = ["htmlImageSource", "imageSource", "videoSource", "videoWithAudioSource"];
56
- keys.forEach((key) => {
46
+ imageSourceMappings.forEach(([key, source]) => {
57
47
  const value = image[key];
58
- if (value) {
59
- data[key] = path.basename(value);
48
+ if (typeof value === "string") {
49
+ data[source] = path.basename(value);
60
50
  if (fs.existsSync(value)) {
61
51
  fs.copyFileSync(value, path.resolve(dir, path.basename(value)));
62
52
  zipper.addFile(value);
@@ -64,6 +54,23 @@ export const mulmoViewerBundle = async (context) => {
64
54
  }
65
55
  });
66
56
  });
57
+ // silent
58
+ await Promise.all(context.studio.script.beats.map(async (__, index) => {
59
+ const data = resultJson[index];
60
+ if (data.audioSources &&
61
+ Object.keys(data.audioSources).length === 0 &&
62
+ data.videoSource === undefined &&
63
+ data.videoWithAudioSource === undefined &&
64
+ data.duration) {
65
+ const file = `silent_${index}.mp3`;
66
+ const audioFile = path.resolve(dir, file);
67
+ await createSilentAudio(audioFile, data.duration);
68
+ zipper.addFile(audioFile);
69
+ data.audioSources.ja = file;
70
+ data.audioSources.en = file;
71
+ }
72
+ }));
73
+ // multiLinguals
67
74
  context.multiLingual.forEach((beat, index) => {
68
75
  bundleTargetLang.forEach((lang) => {
69
76
  if (resultJson[index] && resultJson[index].multiLinguals) {
@@ -1,3 +1,4 @@
1
+ import { GraphAILogger } from "graphai";
1
2
  import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMethods, MulmoMediaSourceMethods } from "../methods/index.js";
2
3
  import { getBeatPngImagePath, getBeatMoviePaths, getAudioFilePath } from "../utils/file.js";
3
4
  import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
@@ -28,14 +29,17 @@ export const imagePreprocessAgent = async (namedInputs) => {
28
29
  beatDuration: beat.duration ?? studioBeat?.duration,
29
30
  };
30
31
  const isMovie = Boolean(beat.moviePrompt || beat?.image?.type === "movie");
31
- if (isMovie) {
32
- if (beat.soundEffectPrompt) {
32
+ if (beat.soundEffectPrompt) {
33
+ if (isMovie) {
33
34
  returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
34
35
  returnValue.soundEffectModel =
35
36
  beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
36
37
  returnValue.soundEffectFile = moviePaths.soundEffectFile;
37
38
  returnValue.soundEffectPrompt = beat.soundEffectPrompt;
38
39
  }
40
+ else {
41
+ GraphAILogger.warn(`soundEffectPrompt is set, but there is no video. beat: ${index}`);
42
+ }
39
43
  }
40
44
  if (beat.enableLipSync) {
41
45
  const lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
@@ -354,11 +354,33 @@ export const images_graph_data = {
354
354
  studio.beats.forEach((studioBeat, index) => {
355
355
  const beat = studio.script.beats[index];
356
356
  if (beat.image?.type === "beat") {
357
- if (beat.image.id && beatIndexMap[beat.image.id] !== undefined) {
358
- studioBeat.imageFile = studio.beats[beatIndexMap[beat.image.id]].imageFile;
357
+ // reference Beat by plugin
358
+ const referenceBeat = (() => {
359
+ if (beat.image.id) {
360
+ if (beatIndexMap[beat.image.id] !== undefined) {
361
+ return studio.beats[beatIndexMap[beat.image.id]];
362
+ }
363
+ else {
364
+ GraphAILogger.info(`reference beat not exist: id=${beat.image.id}`);
365
+ }
366
+ }
367
+ else if (index > 0) {
368
+ return studio.beats[index - 1];
369
+ }
370
+ })();
371
+ if (referenceBeat === undefined) {
372
+ // error?
373
+ GraphAILogger.info(`reference beat not exist: index=${index}`);
359
374
  }
360
- else if (index > 0) {
361
- studioBeat.imageFile = studio.beats[index - 1].imageFile;
375
+ else {
376
+ studioBeat.imageFile = referenceBeat.imageFile;
377
+ studioBeat.movieFile = referenceBeat.movieFile;
378
+ studioBeat.soundEffectFile = referenceBeat.soundEffectFile;
379
+ studioBeat.lipSyncFile = referenceBeat.lipSyncFile;
380
+ studioBeat.hasMovieAudio = referenceBeat.hasMovieAudio;
381
+ studioBeat.htmlImageFile = referenceBeat.htmlImageFile;
382
+ studioBeat.markdown = referenceBeat.markdown;
383
+ studioBeat.html = referenceBeat.html;
362
384
  }
363
385
  }
364
386
  });
@@ -1,5 +1,6 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
2
  import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GenAIImageAgentConfig } from "../types/agent.js";
3
+ export declare const ratio2BlankPath: (aspectRatio: string) => string;
3
4
  export declare const imageGenAIAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GenAIImageAgentConfig>;
4
5
  declare const imageGenAIAgentInfo: AgentFunctionInfo;
5
6
  export default imageGenAIAgentInfo;
@@ -11,9 +11,53 @@ const getAspectRatio = (canvasSize) => {
11
11
  else if (canvasSize.width < canvasSize.height) {
12
12
  return "9:16";
13
13
  }
14
- else {
15
- return "1:1";
14
+ return "1:1";
15
+ };
16
+ export const ratio2BlankPath = (aspectRatio) => {
17
+ if (aspectRatio === "9:16") {
18
+ return blankVerticalImagePath();
19
+ }
20
+ else if (aspectRatio === "1:1") {
21
+ return blankSquareImagePath();
22
+ }
23
+ return blankImagePath();
24
+ };
25
+ const getGeminiContents = (prompt, aspectRatio, referenceImages) => {
26
+ const contents = [{ text: prompt }];
27
+ const images = [...(referenceImages ?? [])];
28
+ // NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
29
+ images.push(ratio2BlankPath(aspectRatio));
30
+ images.forEach((imagePath) => {
31
+ const imageData = fs.readFileSync(imagePath);
32
+ const base64Image = imageData.toString("base64");
33
+ contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
34
+ });
35
+ return contents;
36
+ };
37
+ const geminiFlashResult = (response) => {
38
+ if (!response.candidates?.[0]?.content?.parts) {
39
+ throw new Error("ERROR: generateContent returned no candidates", {
40
+ cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
41
+ });
42
+ }
43
+ for (const part of response.candidates[0].content.parts) {
44
+ if (part.text) {
45
+ GraphAILogger.info("Gemini image generation response:", part.text);
46
+ }
47
+ else if (part.inlineData) {
48
+ const imageData = part.inlineData.data;
49
+ if (!imageData) {
50
+ throw new Error("ERROR: generateContent returned no image data", {
51
+ cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
52
+ });
53
+ }
54
+ const buffer = Buffer.from(imageData, "base64");
55
+ return { buffer };
56
+ }
16
57
  }
58
+ throw new Error("ERROR: generateContent returned no image data", {
59
+ cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
60
+ });
17
61
  };
18
62
  export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
19
63
  const { prompt, referenceImages } = namedInputs;
@@ -28,47 +72,9 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
28
72
  try {
29
73
  const ai = new GoogleGenAI({ apiKey });
30
74
  if (model === "gemini-2.5-flash-image-preview") {
31
- const contents = [{ text: prompt }];
32
- const images = [...(referenceImages ?? [])];
33
- // NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
34
- if (aspectRatio === "9:16") {
35
- images.push(blankVerticalImagePath());
36
- }
37
- else if (aspectRatio === "1:1") {
38
- images.push(blankSquareImagePath());
39
- }
40
- else {
41
- images.push(blankImagePath());
42
- }
43
- images.forEach((imagePath) => {
44
- const imageData = fs.readFileSync(imagePath);
45
- const base64Image = imageData.toString("base64");
46
- contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
47
- });
75
+ const contents = getGeminiContents(prompt, aspectRatio, referenceImages);
48
76
  const response = await ai.models.generateContent({ model, contents });
49
- if (!response.candidates?.[0]?.content?.parts) {
50
- throw new Error("ERROR: generateContent returned no candidates", {
51
- cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
52
- });
53
- }
54
- for (const part of response.candidates[0].content.parts) {
55
- if (part.text) {
56
- GraphAILogger.info("Gemini image generation response:", part.text);
57
- }
58
- else if (part.inlineData) {
59
- const imageData = part.inlineData.data;
60
- if (!imageData) {
61
- throw new Error("ERROR: generateContent returned no image data", {
62
- cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
63
- });
64
- }
65
- const buffer = Buffer.from(imageData, "base64");
66
- return { buffer };
67
- }
68
- }
69
- throw new Error("ERROR: generateContent returned no image data", {
70
- cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
71
- });
77
+ return geminiFlashResult(response);
72
78
  }
73
79
  else {
74
80
  const response = await ai.models.generateImages({
@@ -11,6 +11,7 @@ const waitStable = async (page, ms = 1200, step = 200) => {
11
11
  let last = -1;
12
12
  let stable = 0;
13
13
  while (stable < ms) {
14
+ // eslint-disable-next-line no-undef
14
15
  const len = await page.evaluate(() => document.body?.innerText?.length || 0);
15
16
  stable = len === last ? stable + step : 0;
16
17
  last = len;
@@ -38,6 +39,7 @@ const fetchArticle = async (url) => {
38
39
  let finalText = text;
39
40
  if (finalText.length < 100) {
40
41
  const raw = await page.evaluate(() => {
42
+ // eslint-disable-next-line no-undef
41
43
  const el = document.querySelector("article, main, [role=main], .article, .post") || document.body;
42
44
  return el?.textContent || "";
43
45
  });
@@ -17,3 +17,4 @@ export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<{
17
17
  }>;
18
18
  export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<object>;
19
19
  export declare const trimMusic: (inputFile: string, startTime: number, duration: number) => Promise<Buffer>;
20
+ export declare const createSilentAudio: (filePath: string, durationSec: number) => Promise<void>;
@@ -132,3 +132,15 @@ export const trimMusic = (inputFile, startTime, duration) => {
132
132
  });
133
133
  });
134
134
  };
135
+ export const createSilentAudio = (filePath, durationSec) => {
136
+ const filter = `anullsrc=r=44100:cl=stereo,atrim=duration=${durationSec},aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo[a]`;
137
+ return new Promise((resolve, reject) => {
138
+ ffmpeg()
139
+ .complexFilter([filter])
140
+ .outputOptions(["-map", "[a]"])
141
+ .output(filePath)
142
+ .on("end", () => resolve())
143
+ .on("error", (err) => reject(err))
144
+ .run();
145
+ });
146
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "1.2.65",
3
+ "version": "1.2.67",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -92,7 +92,7 @@
92
92
  "dotenv": "^17.2.3",
93
93
  "fluent-ffmpeg": "^2.1.3",
94
94
  "graphai": "^2.0.16",
95
- "jsdom": "^27.0.0",
95
+ "jsdom": "^27.0.1",
96
96
  "marked": "^16.4.1",
97
97
  "mulmocast-vision": "^1.0.4",
98
98
  "ora": "^9.0.0",
@@ -116,7 +116,7 @@
116
116
  "prettier": "^3.6.2",
117
117
  "tsx": "^4.20.6",
118
118
  "typescript": "^5.9.3",
119
- "typescript-eslint": "^8.46.1"
119
+ "typescript-eslint": "^8.46.2"
120
120
  },
121
121
  "engines": {
122
122
  "node": ">=20.0.0"
@@ -49,7 +49,7 @@
49
49
  },
50
50
  {
51
51
  "speaker": "Presenter",
52
- "text": "This is a reference beat.",
52
+ "text": "This is a image reference beat.",
53
53
  "duration": 0.5,
54
54
  "image": {
55
55
  "type": "beat",
@@ -81,6 +81,7 @@
81
81
  }
82
82
  },
83
83
  {
84
+ "id": "textSlide",
84
85
  "speaker": "Presenter",
85
86
  "text": "",
86
87
  "duration": 2,
@@ -94,6 +95,7 @@
94
95
  },
95
96
  {
96
97
  "speaker": "Presenter",
98
+ "id": "pingpongmov",
97
99
  "text": "This is a local movie with audio.",
98
100
  "image": {
99
101
  "type": "movie",
@@ -253,6 +255,24 @@
253
255
  "</footer>"
254
256
  ]
255
257
  }
258
+ },
259
+ {
260
+ "speaker": "Presenter",
261
+ "text": "This is a text slide reference beat.",
262
+ "duration": 0.5,
263
+ "image": {
264
+ "type": "beat",
265
+ "id": "textSlide"
266
+ }
267
+ },
268
+ {
269
+ "speaker": "Presenter",
270
+ "text": "This is a movie reference beat.",
271
+ "duration": 0.5,
272
+ "image": {
273
+ "type": "beat",
274
+ "id": "pingpongmov"
275
+ }
256
276
  }
257
277
  ]
258
278
  }
File without changes