mulmocast 1.2.66 → 1.2.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,38 +2,66 @@ import path from "path";
2
2
  import fs from "fs";
3
3
  import { GraphAILogger } from "graphai";
4
4
  import { listLocalizedAudioPaths } from "./audio.js";
5
- import { imagePreprocessAgent } from "./image_agents.js";
6
5
  import { mkdir } from "../utils/file.js";
7
6
  import { ZipBuilder } from "../utils/zip.js";
8
7
  import { bundleTargetLang } from "../utils/const.js";
9
- const beatImage = (context) => {
10
- return async (beat, index) => {
11
- try {
12
- const res = await imagePreprocessAgent({ context, beat, index, imageRefs: {} });
13
- if ("htmlPrompt" in res) {
14
- return { htmlImageSource: res.htmlImageFile, imageSource: res.imagePath };
15
- }
16
- const { imagePath, movieFile, lipSyncFile } = res;
17
- return { imageSource: imagePath, videoSource: movieFile, videoWithAudioSource: lipSyncFile };
18
- }
19
- catch (e) {
20
- GraphAILogger.log(e);
21
- return {};
8
+ import { createSilentAudio } from "../utils/ffmpeg_utils.js";
9
+ const downloadFile = async (url, destPath) => {
10
+ const response = await fetch(url);
11
+ if (!response.ok) {
12
+ throw new Error(`Failed to download file from ${url}: ${response.statusText}`);
13
+ }
14
+ const buffer = await response.arrayBuffer();
15
+ fs.writeFileSync(destPath, Buffer.from(buffer));
16
+ };
17
+ const processBgm = async (bgm, dir, zipper) => {
18
+ if (!bgm) {
19
+ return undefined;
20
+ }
21
+ if (bgm.kind === "path") {
22
+ // Local file path
23
+ const sourcePath = path.resolve(bgm.path);
24
+ if (!fs.existsSync(sourcePath)) {
25
+ GraphAILogger.log(`BGM file not found: ${sourcePath}`);
26
+ return undefined;
22
27
  }
23
- };
28
+ const fileName = path.basename(bgm.path);
29
+ const destPath = path.resolve(dir, fileName);
30
+ fs.copyFileSync(sourcePath, destPath);
31
+ zipper.addFile(sourcePath, fileName);
32
+ return fileName;
33
+ }
34
+ else if (bgm.kind === "url") {
35
+ // URL download
36
+ const fileName = path.basename(new URL(bgm.url).pathname) || "bgm.mp3";
37
+ const destPath = path.resolve(dir, fileName);
38
+ await downloadFile(bgm.url, destPath);
39
+ zipper.addFile(destPath);
40
+ return fileName;
41
+ }
42
+ // base64 or other formats are not supported
43
+ return undefined;
24
44
  };
25
- // TODO reference
26
45
  const viewJsonFileName = "mulmo_view.json";
27
46
  const zipFileName = "mulmo.zip";
47
+ const imageSourceMappings = [
48
+ ["imageFile", "imageSource"],
49
+ ["movieFile", "videoSource"],
50
+ ["soundEffectFile", "soundEffectSource"],
51
+ ["lipSyncFile", "videoWithAudioSource"],
52
+ ["htmlImageFile", "htmlImageSource"],
53
+ ];
28
54
  export const mulmoViewerBundle = async (context) => {
29
55
  const isZip = true;
30
56
  const dir = path.resolve(context.fileDirs.fileName);
31
57
  mkdir(dir);
32
58
  const zipper = new ZipBuilder(path.resolve(dir, zipFileName));
59
+ // text
33
60
  const resultJson = [];
34
61
  context.studio.script.beats.forEach((beat) => {
35
62
  resultJson.push({ text: beat.text, duration: beat.duration, audioSources: {}, multiLinguals: {} });
36
63
  });
64
+ // audio
37
65
  for (const lang of bundleTargetLang) {
38
66
  const audios = listLocalizedAudioPaths({ ...context, lang });
39
67
  audios.forEach((audio, index) => {
@@ -49,14 +77,13 @@ export const mulmoViewerBundle = async (context) => {
49
77
  }
50
78
  });
51
79
  }
52
- const images = await Promise.all(context.studio.script.beats.map(beatImage(context)));
53
- images.forEach((image, index) => {
80
+ // image, movie
81
+ context.studio.beats.forEach((image, index) => {
54
82
  const data = resultJson[index];
55
- const keys = ["htmlImageSource", "imageSource", "videoSource", "videoWithAudioSource"];
56
- keys.forEach((key) => {
83
+ imageSourceMappings.forEach(([key, source]) => {
57
84
  const value = image[key];
58
- if (value) {
59
- data[key] = path.basename(value);
85
+ if (typeof value === "string") {
86
+ data[source] = path.basename(value);
60
87
  if (fs.existsSync(value)) {
61
88
  fs.copyFileSync(value, path.resolve(dir, path.basename(value)));
62
89
  zipper.addFile(value);
@@ -64,6 +91,23 @@ export const mulmoViewerBundle = async (context) => {
64
91
  }
65
92
  });
66
93
  });
94
+ // silent
95
+ await Promise.all(context.studio.script.beats.map(async (__, index) => {
96
+ const data = resultJson[index];
97
+ if (data.audioSources &&
98
+ Object.keys(data.audioSources).length === 0 &&
99
+ data.videoSource === undefined &&
100
+ data.videoWithAudioSource === undefined &&
101
+ data.duration) {
102
+ const file = `silent_${index}.mp3`;
103
+ const audioFile = path.resolve(dir, file);
104
+ await createSilentAudio(audioFile, data.duration);
105
+ zipper.addFile(audioFile);
106
+ data.audioSources.ja = file;
107
+ data.audioSources.en = file;
108
+ }
109
+ }));
110
+ // multiLinguals
67
111
  context.multiLingual.forEach((beat, index) => {
68
112
  bundleTargetLang.forEach((lang) => {
69
113
  if (resultJson[index] && resultJson[index].multiLinguals) {
@@ -71,7 +115,10 @@ export const mulmoViewerBundle = async (context) => {
71
115
  }
72
116
  });
73
117
  });
74
- fs.writeFileSync(path.resolve(dir, viewJsonFileName), JSON.stringify({ beats: resultJson, bgmSource: context.studio?.script.audioParams?.bgm }, null, 2));
118
+ // BGM
119
+ const bgmFileName = await processBgm(context.studio?.script.audioParams?.bgm, dir, zipper);
120
+ const bundleData = { beats: resultJson, bgmSource: bgmFileName };
121
+ fs.writeFileSync(path.resolve(dir, viewJsonFileName), JSON.stringify(bundleData, null, 2));
75
122
  zipper.addFile(path.resolve(dir, viewJsonFileName));
76
123
  if (isZip) {
77
124
  await zipper.finalize();
@@ -1,5 +1,6 @@
1
1
  import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
2
  import type { AgentBufferResult, ImageAgentInputs, ImageAgentParams, GenAIImageAgentConfig } from "../types/agent.js";
3
+ export declare const ratio2BlankPath: (aspectRatio: string) => string;
3
4
  export declare const imageGenAIAgent: AgentFunction<ImageAgentParams, AgentBufferResult, ImageAgentInputs, GenAIImageAgentConfig>;
4
5
  declare const imageGenAIAgentInfo: AgentFunctionInfo;
5
6
  export default imageGenAIAgentInfo;
@@ -11,9 +11,53 @@ const getAspectRatio = (canvasSize) => {
11
11
  else if (canvasSize.width < canvasSize.height) {
12
12
  return "9:16";
13
13
  }
14
- else {
15
- return "1:1";
14
+ return "1:1";
15
+ };
16
+ export const ratio2BlankPath = (aspectRatio) => {
17
+ if (aspectRatio === "9:16") {
18
+ return blankVerticalImagePath();
19
+ }
20
+ else if (aspectRatio === "1:1") {
21
+ return blankSquareImagePath();
22
+ }
23
+ return blankImagePath();
24
+ };
25
+ const getGeminiContents = (prompt, aspectRatio, referenceImages) => {
26
+ const contents = [{ text: prompt }];
27
+ const images = [...(referenceImages ?? [])];
28
+ // NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
29
+ images.push(ratio2BlankPath(aspectRatio));
30
+ images.forEach((imagePath) => {
31
+ const imageData = fs.readFileSync(imagePath);
32
+ const base64Image = imageData.toString("base64");
33
+ contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
34
+ });
35
+ return contents;
36
+ };
37
+ const geminiFlashResult = (response) => {
38
+ if (!response.candidates?.[0]?.content?.parts) {
39
+ throw new Error("ERROR: generateContent returned no candidates", {
40
+ cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
41
+ });
42
+ }
43
+ for (const part of response.candidates[0].content.parts) {
44
+ if (part.text) {
45
+ GraphAILogger.info("Gemini image generation response:", part.text);
46
+ }
47
+ else if (part.inlineData) {
48
+ const imageData = part.inlineData.data;
49
+ if (!imageData) {
50
+ throw new Error("ERROR: generateContent returned no image data", {
51
+ cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
52
+ });
53
+ }
54
+ const buffer = Buffer.from(imageData, "base64");
55
+ return { buffer };
56
+ }
16
57
  }
58
+ throw new Error("ERROR: generateContent returned no image data", {
59
+ cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
60
+ });
17
61
  };
18
62
  export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
19
63
  const { prompt, referenceImages } = namedInputs;
@@ -28,47 +72,9 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
28
72
  try {
29
73
  const ai = new GoogleGenAI({ apiKey });
30
74
  if (model === "gemini-2.5-flash-image-preview") {
31
- const contents = [{ text: prompt }];
32
- const images = [...(referenceImages ?? [])];
33
- // NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
34
- if (aspectRatio === "9:16") {
35
- images.push(blankVerticalImagePath());
36
- }
37
- else if (aspectRatio === "1:1") {
38
- images.push(blankSquareImagePath());
39
- }
40
- else {
41
- images.push(blankImagePath());
42
- }
43
- images.forEach((imagePath) => {
44
- const imageData = fs.readFileSync(imagePath);
45
- const base64Image = imageData.toString("base64");
46
- contents.push({ inlineData: { mimeType: "image/png", data: base64Image } });
47
- });
75
+ const contents = getGeminiContents(prompt, aspectRatio, referenceImages);
48
76
  const response = await ai.models.generateContent({ model, contents });
49
- if (!response.candidates?.[0]?.content?.parts) {
50
- throw new Error("ERROR: generateContent returned no candidates", {
51
- cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
52
- });
53
- }
54
- for (const part of response.candidates[0].content.parts) {
55
- if (part.text) {
56
- GraphAILogger.info("Gemini image generation response:", part.text);
57
- }
58
- else if (part.inlineData) {
59
- const imageData = part.inlineData.data;
60
- if (!imageData) {
61
- throw new Error("ERROR: generateContent returned no image data", {
62
- cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
63
- });
64
- }
65
- const buffer = Buffer.from(imageData, "base64");
66
- return { buffer };
67
- }
68
- }
69
- throw new Error("ERROR: generateContent returned no image data", {
70
- cause: agentInvalidResponseError("imageGenAIAgent", imageAction, imageFileTarget),
71
- });
77
+ return geminiFlashResult(response);
72
78
  }
73
79
  else {
74
80
  const response = await ai.models.generateImages({
@@ -11,6 +11,7 @@ const waitStable = async (page, ms = 1200, step = 200) => {
11
11
  let last = -1;
12
12
  let stable = 0;
13
13
  while (stable < ms) {
14
+ // eslint-disable-next-line no-undef
14
15
  const len = await page.evaluate(() => document.body?.innerText?.length || 0);
15
16
  stable = len === last ? stable + step : 0;
16
17
  last = len;
@@ -38,6 +39,7 @@ const fetchArticle = async (url) => {
38
39
  let finalText = text;
39
40
  if (finalText.length < 100) {
40
41
  const raw = await page.evaluate(() => {
42
+ // eslint-disable-next-line no-undef
41
43
  const el = document.querySelector("article, main, [role=main], .article, .post") || document.body;
42
44
  return el?.textContent || "";
43
45
  });
@@ -134,3 +134,18 @@ export type PublicAPIArgs = {
134
134
  callbacks?: CallbackFunction[];
135
135
  };
136
136
  export type ImageType = "image" | "movie";
137
+ export type MulmoViewerBeat = {
138
+ text?: string;
139
+ duration?: number;
140
+ multiLinguals?: Record<string, string>;
141
+ audioSources?: Record<string, string>;
142
+ imageSource?: string;
143
+ videoSource?: string;
144
+ videoWithAudioSource?: string;
145
+ htmlImageSource?: string;
146
+ soundEffectSource?: string;
147
+ };
148
+ export type MulmoViewerData = {
149
+ beats: MulmoViewerBeat[];
150
+ bgmSource?: string;
151
+ };
@@ -17,3 +17,4 @@ export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<{
17
17
  }>;
18
18
  export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<object>;
19
19
  export declare const trimMusic: (inputFile: string, startTime: number, duration: number) => Promise<Buffer>;
20
+ export declare const createSilentAudio: (filePath: string, durationSec: number) => Promise<void>;
@@ -132,3 +132,15 @@ export const trimMusic = (inputFile, startTime, duration) => {
132
132
  });
133
133
  });
134
134
  };
135
+ export const createSilentAudio = (filePath, durationSec) => {
136
+ const filter = `anullsrc=r=44100:cl=stereo,atrim=duration=${durationSec},aformat=sample_fmts=fltp:sample_rates=44100:channel_layouts=stereo[a]`;
137
+ return new Promise((resolve, reject) => {
138
+ ffmpeg()
139
+ .complexFilter([filter])
140
+ .outputOptions(["-map", "[a]"])
141
+ .output(filePath)
142
+ .on("end", () => resolve())
143
+ .on("error", (err) => reject(err))
144
+ .run();
145
+ });
146
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "1.2.66",
3
+ "version": "1.2.68",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -92,7 +92,7 @@
92
92
  "dotenv": "^17.2.3",
93
93
  "fluent-ffmpeg": "^2.1.3",
94
94
  "graphai": "^2.0.16",
95
- "jsdom": "^27.0.0",
95
+ "jsdom": "^27.0.1",
96
96
  "marked": "^16.4.1",
97
97
  "mulmocast-vision": "^1.0.4",
98
98
  "ora": "^9.0.0",
@@ -116,7 +116,7 @@
116
116
  "prettier": "^3.6.2",
117
117
  "tsx": "^4.20.6",
118
118
  "typescript": "^5.9.3",
119
- "typescript-eslint": "^8.46.1"
119
+ "typescript-eslint": "^8.46.2"
120
120
  },
121
121
  "engines": {
122
122
  "node": ">=20.0.0"