mulmocast 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -293,17 +293,11 @@ export const graphOption = async (context, settings) => {
293
293
  const config = settings2GraphAIConfig(settings, process.env);
294
294
  // We need to get google's auth token only if the google is the text2image provider.
295
295
  if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
296
- userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
296
+ userAssert(!!config.movieGoogleAgent || !!config.imageGoogleAgent, "GOOGLE_PROJECT_ID is not set");
297
297
  GraphAILogger.log("google was specified as text2image engine");
298
298
  const token = await googleAuth();
299
- config["imageGoogleAgent"] = {
300
- projectId: process.env.GOOGLE_PROJECT_ID,
301
- token,
302
- };
303
- config["movieGoogleAgent"] = {
304
- projectId: process.env.GOOGLE_PROJECT_ID,
305
- token,
306
- };
299
+ config["imageGoogleAgent"].token = token;
300
+ config["movieGoogleAgent"].token = token;
307
301
  }
308
302
  options.config = config;
309
303
  return options;
@@ -1,4 +1,7 @@
1
1
  import "dotenv/config";
2
2
  import type { CallbackFunction } from "graphai";
3
3
  import { MulmoStudioContext } from "../types/index.js";
4
- export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
4
+ export declare const translate: (context: MulmoStudioContext, args?: {
5
+ callbacks?: CallbackFunction[];
6
+ settings?: Record<string, string>;
7
+ }) => Promise<void>;
@@ -4,6 +4,7 @@ import * as agents from "@graphai/vanilla";
4
4
  import { openAIAgent } from "@graphai/openai_agent";
5
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
7
+ import { settings2GraphAIConfig } from "../utils/utils.js";
7
8
  import { getOutputMultilingualFilePath, mkdir, writingMessage } from "../utils/file.js";
8
9
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
9
10
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
@@ -208,15 +209,17 @@ const agentFilters = [
208
209
  ];
209
210
  const defaultLang = "en";
210
211
  const targetLangs = ["ja", "en"];
211
- export const translate = async (context, callbacks) => {
212
+ export const translate = async (context, args) => {
213
+ const { settings, callbacks } = args ?? {};
212
214
  try {
213
215
  MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
214
216
  const fileName = MulmoStudioContextMethods.getFileName(context);
215
217
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
216
218
  const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
217
219
  mkdir(outDirPath);
218
- assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
219
- const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
220
+ const config = settings2GraphAIConfig(settings, process.env);
221
+ assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
222
+ const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
220
223
  graph.injectValue("context", context);
221
224
  graph.injectValue("defaultLang", defaultLang);
222
225
  graph.injectValue("targetLangs", targetLangs);
@@ -1,6 +1,6 @@
1
1
  import { assert, GraphAILogger } from "graphai";
2
2
  import { silent60secPath } from "../utils/file.js";
3
- import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
3
+ import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration, } from "../utils/ffmpeg_utils.js";
4
4
  import { userAssert } from "../utils/utils.js";
5
5
  const getMovieDuration = async (beat) => {
6
6
  if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
@@ -62,6 +62,93 @@ const getGroupBeatDurations = (context, group, audioDuration) => {
62
62
  });
63
63
  return durations;
64
64
  };
65
+ const getInputIds = (context, mediaDurations, ffmpegContext, silentIds) => {
66
+ const inputIds = [];
67
+ context.studio.beats.forEach((studioBeat, index) => {
68
+ const { silenceDuration } = mediaDurations[index];
69
+ const paddingId = `[padding_${index}]`;
70
+ if (studioBeat.audioFile) {
71
+ const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
72
+ inputIds.push(audioId);
73
+ }
74
+ if (silenceDuration > 0) {
75
+ const silentId = silentIds.pop();
76
+ ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
77
+ inputIds.push(paddingId);
78
+ }
79
+ });
80
+ return inputIds;
81
+ };
82
+ const voiceOverProcess = (context, mediaDurations, movieDuration, beatDurations, groupLength) => {
83
+ return (remaining, idx, iGroup) => {
84
+ const subBeatDurations = mediaDurations[idx];
85
+ userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
86
+ if (iGroup === groupLength - 1) {
87
+ beatDurations.push(remaining);
88
+ subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
89
+ return 0;
90
+ }
91
+ const nextBeat = context.studio.script.beats[idx + 1];
92
+ assert(nextBeat.image?.type === "voice_over", "nextBeat.image.type !== voice_over");
93
+ const voiceStartAt = nextBeat.image?.startAt;
94
+ if (voiceStartAt) {
95
+ const remainingDuration = movieDuration - voiceStartAt;
96
+ const duration = remaining - remainingDuration;
97
+ userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
98
+ beatDurations.push(duration);
99
+ subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
100
+ userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
101
+ return remainingDuration;
102
+ }
103
+ beatDurations.push(subBeatDurations.audioDuration);
104
+ return remaining - subBeatDurations.audioDuration;
105
+ };
106
+ };
107
+ const getVoiceOverGroup = (context, index) => {
108
+ const group = [index];
109
+ for (let i = index + 1; i < context.studio.beats.length && context.studio.script.beats[i].image?.type === "voice_over"; i++) {
110
+ group.push(i);
111
+ }
112
+ return group;
113
+ };
114
+ const getSpillOverGroup = (context, mediaDurations, index) => {
115
+ const group = [index];
116
+ for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMedia; i++) {
117
+ group.push(i);
118
+ }
119
+ return group;
120
+ };
121
+ const spilledOverAudio = (context, group, audioDuration, beatDurations, mediaDurations) => {
122
+ const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
123
+ // Yes, the current beat has spilled over audio.
124
+ const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
125
+ if (beatsTotalDuration > audioDuration + 0.01) {
126
+ // 0.01 is a tolerance to avoid floating point precision issues
127
+ group.reduce((remaining, idx, iGroup) => {
128
+ if (remaining >= groupBeatsDurations[iGroup]) {
129
+ return remaining - groupBeatsDurations[iGroup];
130
+ }
131
+ mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
132
+ return 0;
133
+ }, audioDuration);
134
+ }
135
+ else if (audioDuration > beatsTotalDuration) {
136
+ // Last beat gets the rest of the audio.
137
+ groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
138
+ }
139
+ beatDurations.push(...groupBeatsDurations);
140
+ };
141
+ const noSpilledOverAudio = (context, beat, index, movieDuration, audioDuration, beatDurations, mediaDurations) => {
142
+ // padding is the amount of audio padding specified in the script.
143
+ const padding = getPadding(context, beat, index);
144
+ // totalPadding is the amount of audio padding to be added to the audio file.
145
+ const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
146
+ const beatDuration = audioDuration + totalPadding;
147
+ beatDurations.push(beatDuration);
148
+ if (totalPadding > 0) {
149
+ mediaDurations[index].silenceDuration = totalPadding;
150
+ }
151
+ };
65
152
  const combineAudioFilesAgent = async ({ namedInputs, }) => {
66
153
  const { context, combinedFileName } = namedInputs;
67
154
  const ffmpegContext = FfmpegContextInit();
@@ -77,91 +164,37 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
77
164
  const { audioDuration, movieDuration } = mediaDurations[index];
78
165
  // Check if we are processing a voice-over beat.
79
166
  if (movieDuration > 0) {
80
- const group = [index];
81
- for (let i = index + 1; i < context.studio.beats.length && context.studio.script.beats[i].image?.type === "voice_over"; i++) {
82
- group.push(i);
83
- }
167
+ const group = getVoiceOverGroup(context, index);
84
168
  if (group.length > 1) {
85
- group.reduce((remaining, idx, iGroup) => {
86
- const subBeatDurations = mediaDurations[idx];
87
- userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
88
- if (iGroup === group.length - 1) {
89
- beatDurations.push(remaining);
90
- subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
91
- return 0;
92
- }
93
- const nextBeat = context.studio.script.beats[idx + 1];
94
- assert(nextBeat.image?.type === "voice_over", "nextBeat.image.type !== voice_over");
95
- const voiceStartAt = nextBeat.image?.startAt;
96
- if (voiceStartAt) {
97
- const remainingDuration = movieDuration - voiceStartAt;
98
- const duration = remaining - remainingDuration;
99
- userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
100
- beatDurations.push(duration);
101
- subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
102
- userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
103
- return remainingDuration;
104
- }
105
- beatDurations.push(subBeatDurations.audioDuration);
106
- return remaining - subBeatDurations.audioDuration;
107
- }, movieDuration);
169
+ GraphAILogger.log(`Voice over group: ${group.length}`);
170
+ group.reduce(voiceOverProcess(context, mediaDurations, movieDuration, beatDurations, group.length), movieDuration);
108
171
  return;
109
172
  }
110
173
  }
111
174
  // Check if the current beat has media and the next beat does not have media.
112
175
  if (audioDuration > 0) {
113
176
  // Check if the current beat has spilled over audio.
114
- const group = [index];
115
- for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMedia; i++) {
116
- group.push(i);
117
- }
177
+ const group = getSpillOverGroup(context, mediaDurations, index);
118
178
  if (group.length > 1) {
119
- const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
120
- // Yes, the current beat has spilled over audio.
121
- const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
122
- if (beatsTotalDuration > audioDuration + 0.01) {
123
- // 0.01 is a tolerance to avoid floating point precision issues
124
- group.reduce((remaining, idx, iGroup) => {
125
- if (remaining >= groupBeatsDurations[iGroup]) {
126
- return remaining - groupBeatsDurations[iGroup];
127
- }
128
- mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
129
- return 0;
130
- }, audioDuration);
131
- }
132
- else {
133
- // Last beat gets the rest of the audio.
134
- if (audioDuration > beatsTotalDuration) {
135
- groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
136
- }
137
- }
138
- beatDurations.push(...groupBeatsDurations);
139
- }
140
- else {
141
- // No spilled over audio.
142
- assert(beatDurations.length === index, "beatDurations.length !== index");
143
- // padding is the amount of audio padding specified in the script.
144
- const padding = getPadding(context, beat, index);
145
- // totalPadding is the amount of audio padding to be added to the audio file.
146
- const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
147
- const beatDuration = audioDuration + totalPadding;
148
- beatDurations.push(beatDuration);
149
- if (totalPadding > 0) {
150
- mediaDurations[index].silenceDuration = totalPadding;
151
- }
179
+ GraphAILogger.log(`Spill over group: ${group.length}`);
180
+ spilledOverAudio(context, group, audioDuration, beatDurations, mediaDurations);
181
+ return;
152
182
  }
183
+ // No spilled over audio.
184
+ assert(beatDurations.length === index, "beatDurations.length !== index");
185
+ noSpilledOverAudio(context, beat, index, movieDuration, audioDuration, beatDurations, mediaDurations);
186
+ return;
153
187
  }
154
- else if (movieDuration > 0) {
188
+ if (movieDuration > 0) {
155
189
  // This beat has only a movie, not audio.
156
190
  beatDurations.push(movieDuration);
157
191
  mediaDurations[index].silenceDuration = movieDuration;
192
+ return;
158
193
  }
159
- else {
160
- // The current beat has no audio, nor no spilled over audio
161
- const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
162
- beatDurations.push(beatDuration);
163
- mediaDurations[index].silenceDuration = beatDuration;
164
- }
194
+ // The current beat has no audio, nor no spilled over audio
195
+ const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
196
+ beatDurations.push(beatDuration);
197
+ mediaDurations[index].silenceDuration = beatDuration;
165
198
  });
166
199
  assert(beatDurations.length === context.studio.beats.length, "beatDurations.length !== studio.beats.length");
167
200
  // We cannot reuse longSilentId. We need to explicitly split it for each beat.
@@ -170,20 +203,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
170
203
  const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
171
204
  ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
172
205
  }
173
- const inputIds = [];
174
- context.studio.beats.forEach((studioBeat, index) => {
175
- const { silenceDuration } = mediaDurations[index];
176
- const paddingId = `[padding_${index}]`;
177
- if (studioBeat.audioFile) {
178
- const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
179
- inputIds.push(audioId);
180
- }
181
- if (silenceDuration > 0) {
182
- const silentId = silentIds.pop();
183
- ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
184
- inputIds.push(paddingId);
185
- }
186
- });
206
+ const inputIds = getInputIds(context, mediaDurations, ffmpegContext, silentIds);
187
207
  assert(silentIds.length === 0, "silentIds.length !== 0");
188
208
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
189
209
  // Finally, combine all audio files.
@@ -62,7 +62,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
62
62
  const { prompt, imagePath } = namedInputs;
63
63
  const aspectRatio = getAspectRatio(params.canvasSize);
64
64
  const duration = params.duration ?? 5;
65
- const apiKey = config?.apiKey ?? process.env.REPLICATE_API_TOKEN;
65
+ const apiKey = config?.apiKey;
66
66
  if (!apiKey) {
67
67
  throw new Error("REPLICATE_API_TOKEN environment variable is required");
68
68
  }
@@ -3,7 +3,7 @@ import { provider2TTSAgent } from "../utils/provider2agent.js";
3
3
  export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
4
4
  const { text } = namedInputs;
5
5
  const { voice, model, stability, similarityBoost, suppressError } = params;
6
- const apiKey = config?.apiKey ?? process.env.ELEVENLABS_API_KEY;
6
+ const apiKey = config?.apiKey;
7
7
  if (!apiKey) {
8
8
  throw new Error("ELEVENLABS_API_KEY environment variable is required");
9
9
  }
@@ -1,5 +1,4 @@
1
1
  import { GraphAILogger, assert } from "graphai";
2
- const nijovoiceApiKey = process.env.NIJIVOICE_API_KEY ?? "";
3
2
  const errorMessage = [
4
3
  "TTS NijiVoice: No API key. ",
5
4
  "You have the following options:",
@@ -10,12 +9,12 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
10
9
  const { suppressError, voice, speed, speed_global } = params;
11
10
  const { apiKey } = config ?? {};
12
11
  const { text } = namedInputs;
13
- assert(!!(apiKey ?? nijovoiceApiKey), errorMessage);
12
+ assert(!!apiKey, errorMessage);
14
13
  const url = `https://api.nijivoice.com/api/platform/v1/voice-actors/${voice}/generate-voice`;
15
14
  const options = {
16
15
  method: "POST",
17
16
  headers: {
18
- "x-api-key": apiKey ?? nijovoiceApiKey,
17
+ "x-api-key": apiKey,
19
18
  accept: "application/json",
20
19
  "content-type": "application/json",
21
20
  },
@@ -1,4 +1,2 @@
1
- export * from "./types/type.js";
2
- export * from "./types/schema.js";
3
- export * from "./utils/provider2agent.js";
1
+ export * from "./index.common.js";
4
2
  export * from "./agents/validate_schema_agent.js";
@@ -1,5 +1,3 @@
1
- // Entry point that exposes only APIs available for use in the browser
2
- export * from "./types/type.js";
3
- export * from "./types/schema.js";
4
- export * from "./utils/provider2agent.js";
1
+ // Entry point for browser code
2
+ export * from "./index.common.js";
5
3
  export * from "./agents/validate_schema_agent.js";
@@ -0,0 +1,2 @@
1
+ export * from "./types/index.js";
2
+ export * from "./utils/provider2agent.js";
@@ -0,0 +1,3 @@
1
+ // Entry point for universal code
2
+ export * from "./types/index.js";
3
+ export * from "./utils/provider2agent.js";
@@ -0,0 +1,7 @@
1
+ export * from "./index.common.js";
2
+ export * from "./actions/index.js";
3
+ export * from "./cli/helpers.js";
4
+ export * from "./utils/file.js";
5
+ export * from "./utils/ffmpeg_utils.js";
6
+ export * from "./methods/index.js";
7
+ export * from "./agents/index.js";
@@ -0,0 +1,8 @@
1
+ // Entry point for node.js code
2
+ export * from "./index.common.js";
3
+ export * from "./actions/index.js";
4
+ export * from "./cli/helpers.js";
5
+ export * from "./utils/file.js";
6
+ export * from "./utils/ffmpeg_utils.js";
7
+ export * from "./methods/index.js";
8
+ export * from "./agents/index.js";
@@ -1,3 +1,3 @@
1
1
  export declare const imageType = "image";
2
- export declare const process: (params: import("../../index.browser.js").ImageProcessorParams) => string | undefined;
3
- export declare const path: (params: import("../../index.browser.js").ImageProcessorParams) => string | undefined;
2
+ export declare const process: (params: import("../../index.common.js").ImageProcessorParams) => string | undefined;
3
+ export declare const path: (params: import("../../index.common.js").ImageProcessorParams) => string | undefined;
@@ -1,3 +1,3 @@
1
1
  export declare const imageType = "movie";
2
- export declare const process: (params: import("../../index.browser.js").ImageProcessorParams) => string | undefined;
3
- export declare const path: (params: import("../../index.browser.js").ImageProcessorParams) => string | undefined;
2
+ export declare const process: (params: import("../../index.common.js").ImageProcessorParams) => string | undefined;
3
+ export declare const path: (params: import("../../index.common.js").ImageProcessorParams) => string | undefined;
@@ -56,12 +56,18 @@ export const settings2GraphAIConfig = (settings, env) => {
56
56
  apiKey: getKey("IMAGE", "OPENAI_API_KEY"),
57
57
  baseURL: getKey("IMAGE", "OPENAI_BASE_URL"),
58
58
  },
59
+ imageGoogleAgent: {
60
+ projectId: getKey("IMAGE", "GOOGLE_PROJECT_ID"),
61
+ },
59
62
  anthropicAgent: {
60
63
  apiKey: getKey("LLM", "ANTHROPIC_API_TOKEN"),
61
64
  },
62
65
  movieReplicateAgent: {
63
66
  apiKey: getKey("MOVIE", "REPLICATE_API_TOKEN"),
64
67
  },
68
+ movieGoogleAgent: {
69
+ projectId: getKey("MOVIE", "GOOGLE_PROJECT_ID"),
70
+ },
65
71
  ttsNijivoiceAgent: {
66
72
  apiKey: getKey("TTS", "NIJIVOICE_API_KEY"),
67
73
  },
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "",
5
5
  "type": "module",
6
- "main": "lib/index.js",
6
+ "main": "lib/index.node.js",
7
7
  "exports": {
8
8
  ".": {
9
- "types": "./lib/index.d.ts",
10
- "default": "./lib/index.js"
9
+ "types": "./lib/index.node.d.ts",
10
+ "default": "./lib/index.node.js"
11
11
  },
12
12
  "./browser": {
13
13
  "types": "./lib/index.browser.d.ts",
@@ -75,7 +75,7 @@
75
75
  "clipboardy": "^4.0.0",
76
76
  "dotenv": "^17.2.0",
77
77
  "fluent-ffmpeg": "^2.1.3",
78
- "google-auth-library": "^9.15.1",
78
+ "google-auth-library": "^10.1.0",
79
79
  "graphai": "^2.0.12",
80
80
  "inquirer": "^12.7.0",
81
81
  "marked": "^16.0.0",