mulmocast 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/assets/templates/ani.json +48 -0
  2. package/assets/templates/ani_ja.json +45 -0
  3. package/lib/actions/audio.js +2 -0
  4. package/lib/actions/image_agents.d.ts +28 -22
  5. package/lib/actions/image_agents.js +4 -4
  6. package/lib/actions/images.js +12 -21
  7. package/lib/actions/translate.d.ts +4 -1
  8. package/lib/actions/translate.js +6 -3
  9. package/lib/agents/combine_audio_files_agent.js +106 -86
  10. package/lib/agents/movie_replicate_agent.js +4 -3
  11. package/lib/agents/tts_elevenlabs_agent.js +1 -1
  12. package/lib/agents/tts_nijivoice_agent.js +2 -3
  13. package/lib/cli/commands/tool/scripting/builder.js +1 -1
  14. package/lib/cli/commands/tool/scripting/handler.d.ts +1 -1
  15. package/lib/cli/commands/tool/story_to_script/builder.js +1 -1
  16. package/lib/cli/commands/tool/story_to_script/handler.d.ts +1 -1
  17. package/lib/index.browser.d.ts +1 -3
  18. package/lib/index.browser.js +2 -4
  19. package/lib/index.common.d.ts +2 -0
  20. package/lib/index.common.js +3 -0
  21. package/lib/index.node.d.ts +7 -0
  22. package/lib/index.node.js +8 -0
  23. package/lib/methods/mulmo_presentation_style.d.ts +15 -1
  24. package/lib/methods/mulmo_presentation_style.js +10 -11
  25. package/lib/tools/story_to_script.d.ts +1 -1
  26. package/lib/types/schema.d.ts +343 -322
  27. package/lib/types/schema.js +21 -14
  28. package/lib/types/type.d.ts +3 -2
  29. package/lib/utils/context.d.ts +73 -72
  30. package/lib/utils/ffmpeg_utils.js +6 -0
  31. package/lib/utils/image_plugins/image.d.ts +2 -2
  32. package/lib/utils/image_plugins/movie.d.ts +2 -2
  33. package/lib/utils/preprocess.d.ts +37 -36
  34. package/lib/utils/provider2agent.d.ts +9 -7
  35. package/lib/utils/provider2agent.js +12 -7
  36. package/lib/utils/utils.d.ts +1 -2
  37. package/lib/utils/utils.js +7 -2
  38. package/package.json +11 -11
  39. package/scripts/templates/presentation.json~ +0 -119
@@ -0,0 +1,48 @@
1
+ {
2
+ "title": "Presentation with Ani in Japanese",
3
+ "description": "Template for presentation with Ani in Japanese.",
4
+ "systemPrompt": "Generate a script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "movieParams": {
11
+ "provider": "replicate",
12
+ "model": "bytedance/seedance-1-lite"
13
+ },
14
+ "speechParams": {
15
+ "provider": "openai",
16
+ "speakers": {
17
+ "Presenter": {
18
+ "voiceId": "shimmer",
19
+ "speechOptions": { "instruction": "Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl." }
20
+ }
21
+ }
22
+ },
23
+ "audioParams": {
24
+ "bgm": {
25
+ "kind": "url",
26
+ "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"
27
+ }
28
+ },
29
+ "lang": "en",
30
+ "canvasSize": {
31
+ "width": 1024,
32
+ "height": 1536
33
+ },
34
+ "imageParams": {
35
+ "style": "<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>",
36
+ "images": {
37
+ "ani": {
38
+ "type": "image",
39
+ "source": {
40
+ "kind": "url",
41
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"
42
+ }
43
+ }
44
+ }
45
+ }
46
+ },
47
+ "scriptName": "image_prompts_template.json"
48
+ }
@@ -0,0 +1,45 @@
1
+ {
2
+ "title": "Presentation with Ani",
3
+ "description": "Template for presentation with Ani.",
4
+ "systemPrompt": "Generate a Japanese script for a presentation of the given topic. 言葉づかいは少しツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
5
+ "presentationStyle": {
6
+ "$mulmocast": {
7
+ "version": "1.0",
8
+ "credit": "closing"
9
+ },
10
+ "movieParams": {
11
+ "provider": "replicate",
12
+ "model": "bytedance/seedance-1-lite"
13
+ },
14
+ "audioParams": {
15
+ "bgm": {
16
+ "kind": "url",
17
+ "url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"
18
+ }
19
+ },
20
+ "lang": "ja",
21
+ "canvasSize": {
22
+ "width": 1024,
23
+ "height": 1536
24
+ },
25
+ "speechParams": {
26
+ "provider": "nijivoice",
27
+ "speakers": {
28
+ "Presenter": { "voiceId": "9d9ed276-49ee-443a-bc19-26e6136d05f0" }
29
+ }
30
+ },
31
+ "imageParams": {
32
+ "style": "<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>",
33
+ "images": {
34
+ "ani": {
35
+ "type": "image",
36
+ "source": {
37
+ "kind": "url",
38
+ "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"
39
+ }
40
+ }
41
+ }
42
+ }
43
+ },
44
+ "scriptName": "image_prompts_template.json"
45
+ }
@@ -143,6 +143,7 @@ const graph_data = {
143
143
  },
144
144
  addBGM: {
145
145
  agent: "addBGMAgent",
146
+ unless: ":context.presentationStyle.audioParams.bgmVolume.equal(0)",
146
147
  inputs: {
147
148
  wait: ":combineFiles",
148
149
  voiceFile: ":audioCombinedFilePath",
@@ -153,6 +154,7 @@ const graph_data = {
153
154
  },
154
155
  },
155
156
  isResult: true,
157
+ defaultValue: {},
156
158
  },
157
159
  title: {
158
160
  agent: "copyAgent",
@@ -44,17 +44,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
44
44
  } | {
45
45
  imagePath: string;
46
46
  imageFromMovie: boolean;
47
- movieParams: {
48
- speed?: number | undefined;
49
- model?: string | undefined;
50
- fillOption?: {
51
- style: "aspectFit" | "aspectFill";
52
- } | undefined;
53
- provider?: string | undefined;
54
- transition?: {
55
- type: "fade" | "slideout_left";
56
- duration: number;
57
- } | undefined;
47
+ movieAgentInfo: {
48
+ agent: string;
49
+ movieParams: {
50
+ speed?: number | undefined;
51
+ provider?: string | undefined;
52
+ model?: string | undefined;
53
+ fillOption?: {
54
+ style: "aspectFit" | "aspectFill";
55
+ } | undefined;
56
+ transition?: {
57
+ type: "fade" | "slideout_left";
58
+ duration: number;
59
+ } | undefined;
60
+ };
58
61
  };
59
62
  imageParams: {
60
63
  provider: string;
@@ -91,17 +94,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
91
94
  imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
92
95
  prompt: string;
93
96
  referenceImages: string[];
94
- movieParams: {
95
- speed?: number | undefined;
96
- model?: string | undefined;
97
- fillOption?: {
98
- style: "aspectFit" | "aspectFill";
99
- } | undefined;
100
- provider?: string | undefined;
101
- transition?: {
102
- type: "fade" | "slideout_left";
103
- duration: number;
104
- } | undefined;
97
+ movieAgentInfo: {
98
+ agent: string;
99
+ movieParams: {
100
+ speed?: number | undefined;
101
+ provider?: string | undefined;
102
+ model?: string | undefined;
103
+ fillOption?: {
104
+ style: "aspectFit" | "aspectFill";
105
+ } | undefined;
106
+ transition?: {
107
+ type: "fade" | "slideout_left";
108
+ duration: number;
109
+ } | undefined;
110
+ };
105
111
  };
106
112
  imageParams: {
107
113
  provider: string;
@@ -28,15 +28,15 @@ export const imagePreprocessAgent = async (namedInputs) => {
28
28
  // undefined prompt indicates that image generation is not needed
29
29
  return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
30
30
  }
31
- const movieParams = { ...context.presentationStyle.movieParams, ...beat.movieParams };
32
- GraphAILogger.log(`movieParams: ${index}`, movieParams, beat.moviePrompt);
31
+ const movieAgentInfo = MulmoPresentationStyleMethods.getMovieAgentInfo(context.presentationStyle, beat);
32
+ GraphAILogger.log(`movieParams: ${index}`, movieAgentInfo.movieParams, beat.moviePrompt);
33
33
  if (beat.moviePrompt && !beat.imagePrompt) {
34
- return { ...returnValue, imagePath, imageFromMovie: true, movieParams }; // no image prompt, only movie prompt
34
+ return { ...returnValue, imagePath, imageFromMovie: true, movieAgentInfo }; // no image prompt, only movie prompt
35
35
  }
36
36
  // referenceImages for "edit_image", openai agent.
37
37
  const referenceImages = MulmoBeatMethods.getImageReferenceForImageGenerator(beat, imageRefs);
38
38
  const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
39
- return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieParams };
39
+ return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieAgentInfo };
40
40
  };
41
41
  export const imagePluginAgent = async (namedInputs) => {
42
42
  const { context, beat, index } = namedInputs;
@@ -39,7 +39,6 @@ const beat_graph_data = {
39
39
  nodes: {
40
40
  context: {},
41
41
  htmlImageAgentInfo: {},
42
- movieAgentInfo: {},
43
42
  imageRefs: {},
44
43
  beat: {},
45
44
  __mapIndex: {},
@@ -134,7 +133,7 @@ const beat_graph_data = {
134
133
  },
135
134
  movieGenerator: {
136
135
  if: ":preprocessor.movieFile",
137
- agent: ":movieAgentInfo.agent",
136
+ agent: ":preprocessor.movieAgentInfo.agent",
138
137
  inputs: {
139
138
  onComplete: [":imageGenerator", ":imagePlugin"], // to wait for imageGenerator to finish
140
139
  prompt: ":beat.moviePrompt",
@@ -147,7 +146,7 @@ const beat_graph_data = {
147
146
  mulmoContext: ":context",
148
147
  },
149
148
  params: {
150
- model: ":preprocessor.movieParams.model",
149
+ model: ":preprocessor.movieAgentInfo.movieParams.model",
151
150
  duration: ":beat.duration",
152
151
  canvasSize: ":context.presentationStyle.canvasSize",
153
152
  },
@@ -167,16 +166,19 @@ const beat_graph_data = {
167
166
  defaultValue: {},
168
167
  },
169
168
  audioChecker: {
170
- if: ":preprocessor.movieFile",
171
169
  agent: async (namedInputs) => {
172
- const { hasAudio } = await ffmpegGetMediaDuration(namedInputs.movieFile);
170
+ const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
171
+ if (!sourceFile) {
172
+ return { hasMovieAudio: false };
173
+ }
174
+ const { hasAudio } = await ffmpegGetMediaDuration(sourceFile);
173
175
  return { hasMovieAudio: hasAudio };
174
176
  },
175
177
  inputs: {
176
- onComplete: [":movieGenerator"], // to wait for movieGenerator to finish
178
+ onComplete: [":movieGenerator", ":htmlImageGenerator"], // to wait for movieGenerator and htmlImageGenerator to finish
177
179
  movieFile: ":preprocessor.movieFile",
180
+ imageFile: ":preprocessor.imagePath",
178
181
  },
179
- defaultValue: {},
180
182
  },
181
183
  output: {
182
184
  agent: "copyAgent",
@@ -201,7 +203,6 @@ const graph_data = {
201
203
  nodes: {
202
204
  context: {},
203
205
  htmlImageAgentInfo: {},
204
- movieAgentInfo: {},
205
206
  outputStudioFilePath: {},
206
207
  imageRefs: {},
207
208
  map: {
@@ -210,7 +211,6 @@ const graph_data = {
210
211
  rows: ":context.studio.script.beats",
211
212
  context: ":context",
212
213
  htmlImageAgentInfo: ":htmlImageAgentInfo",
213
- movieAgentInfo: ":movieAgentInfo",
214
214
  imageRefs: ":imageRefs",
215
215
  },
216
216
  isResult: true,
@@ -293,17 +293,11 @@ export const graphOption = async (context, settings) => {
293
293
  const config = settings2GraphAIConfig(settings, process.env);
294
294
  // We need to get google's auth token only if the google is the text2image provider.
295
295
  if (provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
296
- userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
296
+ userAssert(!!config.movieGoogleAgent || !!config.imageGoogleAgent, "GOOGLE_PROJECT_ID is not set");
297
297
  GraphAILogger.log("google was specified as text2image engine");
298
298
  const token = await googleAuth();
299
- config["imageGoogleAgent"] = {
300
- projectId: process.env.GOOGLE_PROJECT_ID,
301
- token,
302
- };
303
- config["movieGoogleAgent"] = {
304
- projectId: process.env.GOOGLE_PROJECT_ID,
305
- token,
306
- };
299
+ config["imageGoogleAgent"].token = token;
300
+ config["movieGoogleAgent"].token = token;
307
301
  }
308
302
  options.config = config;
309
303
  return options;
@@ -320,9 +314,6 @@ const prepareGenerateImages = async (context) => {
320
314
  const injections = {
321
315
  context,
322
316
  htmlImageAgentInfo,
323
- movieAgentInfo: {
324
- agent: MulmoPresentationStyleMethods.getMovieAgent(context.presentationStyle),
325
- },
326
317
  outputStudioFilePath: getOutputStudioFilePath(outDirPath, fileName),
327
318
  imageRefs,
328
319
  };
@@ -1,4 +1,7 @@
1
1
  import "dotenv/config";
2
2
  import type { CallbackFunction } from "graphai";
3
3
  import { MulmoStudioContext } from "../types/index.js";
4
- export declare const translate: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
4
+ export declare const translate: (context: MulmoStudioContext, args?: {
5
+ callbacks?: CallbackFunction[];
6
+ settings?: Record<string, string>;
7
+ }) => Promise<void>;
@@ -4,6 +4,7 @@ import * as agents from "@graphai/vanilla";
4
4
  import { openAIAgent } from "@graphai/openai_agent";
5
5
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
6
  import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
7
+ import { settings2GraphAIConfig } from "../utils/utils.js";
7
8
  import { getOutputMultilingualFilePath, mkdir, writingMessage } from "../utils/file.js";
8
9
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
9
10
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
@@ -208,15 +209,17 @@ const agentFilters = [
208
209
  ];
209
210
  const defaultLang = "en";
210
211
  const targetLangs = ["ja", "en"];
211
- export const translate = async (context, callbacks) => {
212
+ export const translate = async (context, args) => {
213
+ const { settings, callbacks } = args ?? {};
212
214
  try {
213
215
  MulmoStudioContextMethods.setSessionState(context, "multiLingual", true);
214
216
  const fileName = MulmoStudioContextMethods.getFileName(context);
215
217
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
216
218
  const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
217
219
  mkdir(outDirPath);
218
- assert(!!process.env.OPENAI_API_KEY, "The OPENAI_API_KEY environment variable is missing or empty");
219
- const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters });
220
+ const config = settings2GraphAIConfig(settings, process.env);
221
+ assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
222
+ const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
220
223
  graph.injectValue("context", context);
221
224
  graph.injectValue("defaultLang", defaultLang);
222
225
  graph.injectValue("targetLangs", targetLangs);
@@ -1,6 +1,6 @@
1
1
  import { assert, GraphAILogger } from "graphai";
2
2
  import { silent60secPath } from "../utils/file.js";
3
- import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration } from "../utils/ffmpeg_utils.js";
3
+ import { FfmpegContextInit, FfmpegContextGenerateOutput, FfmpegContextInputFormattedAudio, ffmpegGetMediaDuration, } from "../utils/ffmpeg_utils.js";
4
4
  import { userAssert } from "../utils/utils.js";
5
5
  const getMovieDuration = async (beat) => {
6
6
  if (beat.image?.type === "movie" && (beat.image.source.kind === "url" || beat.image.source.kind === "path")) {
@@ -62,6 +62,93 @@ const getGroupBeatDurations = (context, group, audioDuration) => {
62
62
  });
63
63
  return durations;
64
64
  };
65
+ const getInputIds = (context, mediaDurations, ffmpegContext, silentIds) => {
66
+ const inputIds = [];
67
+ context.studio.beats.forEach((studioBeat, index) => {
68
+ const { silenceDuration } = mediaDurations[index];
69
+ const paddingId = `[padding_${index}]`;
70
+ if (studioBeat.audioFile) {
71
+ const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
72
+ inputIds.push(audioId);
73
+ }
74
+ if (silenceDuration > 0) {
75
+ const silentId = silentIds.pop();
76
+ ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
77
+ inputIds.push(paddingId);
78
+ }
79
+ });
80
+ return inputIds;
81
+ };
82
+ const voiceOverProcess = (context, mediaDurations, movieDuration, beatDurations, groupLength) => {
83
+ return (remaining, idx, iGroup) => {
84
+ const subBeatDurations = mediaDurations[idx];
85
+ userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
86
+ if (iGroup === groupLength - 1) {
87
+ beatDurations.push(remaining);
88
+ subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
89
+ return 0;
90
+ }
91
+ const nextBeat = context.studio.script.beats[idx + 1];
92
+ assert(nextBeat.image?.type === "voice_over", "nextBeat.image.type !== voice_over");
93
+ const voiceStartAt = nextBeat.image?.startAt;
94
+ if (voiceStartAt) {
95
+ const remainingDuration = movieDuration - voiceStartAt;
96
+ const duration = remaining - remainingDuration;
97
+ userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
98
+ beatDurations.push(duration);
99
+ subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
100
+ userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
101
+ return remainingDuration;
102
+ }
103
+ beatDurations.push(subBeatDurations.audioDuration);
104
+ return remaining - subBeatDurations.audioDuration;
105
+ };
106
+ };
107
+ const getVoiceOverGroup = (context, index) => {
108
+ const group = [index];
109
+ for (let i = index + 1; i < context.studio.beats.length && context.studio.script.beats[i].image?.type === "voice_over"; i++) {
110
+ group.push(i);
111
+ }
112
+ return group;
113
+ };
114
+ const getSpillOverGroup = (context, mediaDurations, index) => {
115
+ const group = [index];
116
+ for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMedia; i++) {
117
+ group.push(i);
118
+ }
119
+ return group;
120
+ };
121
+ const spilledOverAudio = (context, group, audioDuration, beatDurations, mediaDurations) => {
122
+ const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
123
+ // Yes, the current beat has spilled over audio.
124
+ const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
125
+ if (beatsTotalDuration > audioDuration + 0.01) {
126
+ // 0.01 is a tolerance to avoid floating point precision issues
127
+ group.reduce((remaining, idx, iGroup) => {
128
+ if (remaining >= groupBeatsDurations[iGroup]) {
129
+ return remaining - groupBeatsDurations[iGroup];
130
+ }
131
+ mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
132
+ return 0;
133
+ }, audioDuration);
134
+ }
135
+ else if (audioDuration > beatsTotalDuration) {
136
+ // Last beat gets the rest of the audio.
137
+ groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
138
+ }
139
+ beatDurations.push(...groupBeatsDurations);
140
+ };
141
+ const noSpilledOverAudio = (context, beat, index, movieDuration, audioDuration, beatDurations, mediaDurations) => {
142
+ // padding is the amount of audio padding specified in the script.
143
+ const padding = getPadding(context, beat, index);
144
+ // totalPadding is the amount of audio padding to be added to the audio file.
145
+ const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
146
+ const beatDuration = audioDuration + totalPadding;
147
+ beatDurations.push(beatDuration);
148
+ if (totalPadding > 0) {
149
+ mediaDurations[index].silenceDuration = totalPadding;
150
+ }
151
+ };
65
152
  const combineAudioFilesAgent = async ({ namedInputs, }) => {
66
153
  const { context, combinedFileName } = namedInputs;
67
154
  const ffmpegContext = FfmpegContextInit();
@@ -77,91 +164,37 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
77
164
  const { audioDuration, movieDuration } = mediaDurations[index];
78
165
  // Check if we are processing a voice-over beat.
79
166
  if (movieDuration > 0) {
80
- const group = [index];
81
- for (let i = index + 1; i < context.studio.beats.length && context.studio.script.beats[i].image?.type === "voice_over"; i++) {
82
- group.push(i);
83
- }
167
+ const group = getVoiceOverGroup(context, index);
84
168
  if (group.length > 1) {
85
- group.reduce((remaining, idx, iGroup) => {
86
- const subBeatDurations = mediaDurations[idx];
87
- userAssert(subBeatDurations.audioDuration <= remaining, `Duration Overflow: At index(${idx}) audioDuration(${subBeatDurations.audioDuration}) > remaining(${remaining})`);
88
- if (iGroup === group.length - 1) {
89
- beatDurations.push(remaining);
90
- subBeatDurations.silenceDuration = remaining - subBeatDurations.audioDuration;
91
- return 0;
92
- }
93
- const nextBeat = context.studio.script.beats[idx + 1];
94
- assert(nextBeat.image?.type === "voice_over", "nextBeat.image.type !== voice_over");
95
- const voiceStartAt = nextBeat.image?.startAt;
96
- if (voiceStartAt) {
97
- const remainingDuration = movieDuration - voiceStartAt;
98
- const duration = remaining - remainingDuration;
99
- userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
100
- beatDurations.push(duration);
101
- subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
102
- userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
103
- return remainingDuration;
104
- }
105
- beatDurations.push(subBeatDurations.audioDuration);
106
- return remaining - subBeatDurations.audioDuration;
107
- }, movieDuration);
169
+ GraphAILogger.log(`Voice over group: ${group.length}`);
170
+ group.reduce(voiceOverProcess(context, mediaDurations, movieDuration, beatDurations, group.length), movieDuration);
108
171
  return;
109
172
  }
110
173
  }
111
174
  // Check if the current beat has media and the next beat does not have media.
112
175
  if (audioDuration > 0) {
113
176
  // Check if the current beat has spilled over audio.
114
- const group = [index];
115
- for (let i = index + 1; i < context.studio.beats.length && !mediaDurations[i].hasMedia; i++) {
116
- group.push(i);
117
- }
177
+ const group = getSpillOverGroup(context, mediaDurations, index);
118
178
  if (group.length > 1) {
119
- const groupBeatsDurations = getGroupBeatDurations(context, group, audioDuration);
120
- // Yes, the current beat has spilled over audio.
121
- const beatsTotalDuration = groupBeatsDurations.reduce((a, b) => a + b, 0);
122
- if (beatsTotalDuration > audioDuration + 0.01) {
123
- // 0.01 is a tolerance to avoid floating point precision issues
124
- group.reduce((remaining, idx, iGroup) => {
125
- if (remaining >= groupBeatsDurations[iGroup]) {
126
- return remaining - groupBeatsDurations[iGroup];
127
- }
128
- mediaDurations[idx].silenceDuration = groupBeatsDurations[iGroup] - remaining;
129
- return 0;
130
- }, audioDuration);
131
- }
132
- else {
133
- // Last beat gets the rest of the audio.
134
- if (audioDuration > beatsTotalDuration) {
135
- groupBeatsDurations[groupBeatsDurations.length - 1] += audioDuration - beatsTotalDuration;
136
- }
137
- }
138
- beatDurations.push(...groupBeatsDurations);
139
- }
140
- else {
141
- // No spilled over audio.
142
- assert(beatDurations.length === index, "beatDurations.length !== index");
143
- // padding is the amount of audio padding specified in the script.
144
- const padding = getPadding(context, beat, index);
145
- // totalPadding is the amount of audio padding to be added to the audio file.
146
- const totalPadding = Math.round(getTotalPadding(padding, movieDuration, audioDuration, beat.duration) * 100) / 100;
147
- const beatDuration = audioDuration + totalPadding;
148
- beatDurations.push(beatDuration);
149
- if (totalPadding > 0) {
150
- mediaDurations[index].silenceDuration = totalPadding;
151
- }
179
+ GraphAILogger.log(`Spill over group: ${group.length}`);
180
+ spilledOverAudio(context, group, audioDuration, beatDurations, mediaDurations);
181
+ return;
152
182
  }
183
+ // No spilled over audio.
184
+ assert(beatDurations.length === index, "beatDurations.length !== index");
185
+ noSpilledOverAudio(context, beat, index, movieDuration, audioDuration, beatDurations, mediaDurations);
186
+ return;
153
187
  }
154
- else if (movieDuration > 0) {
188
+ if (movieDuration > 0) {
155
189
  // This beat has only a movie, not audio.
156
190
  beatDurations.push(movieDuration);
157
191
  mediaDurations[index].silenceDuration = movieDuration;
192
+ return;
158
193
  }
159
- else {
160
- // The current beat has no audio, nor no spilled over audio
161
- const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
162
- beatDurations.push(beatDuration);
163
- mediaDurations[index].silenceDuration = beatDuration;
164
- }
194
+ // The current beat has no audio, nor no spilled over audio
195
+ const beatDuration = beat.duration ?? (movieDuration > 0 ? movieDuration : 1.0);
196
+ beatDurations.push(beatDuration);
197
+ mediaDurations[index].silenceDuration = beatDuration;
165
198
  });
166
199
  assert(beatDurations.length === context.studio.beats.length, "beatDurations.length !== studio.beats.length");
167
200
  // We cannot reuse longSilentId. We need to explicitly split it for each beat.
@@ -170,20 +203,7 @@ const combineAudioFilesAgent = async ({ namedInputs, }) => {
170
203
  const longSilentId = FfmpegContextInputFormattedAudio(ffmpegContext, silent60secPath(), undefined, ["-stream_loop", "-1"]);
171
204
  ffmpegContext.filterComplex.push(`${longSilentId}asplit=${silentIds.length}${silentIds.join("")}`);
172
205
  }
173
- const inputIds = [];
174
- context.studio.beats.forEach((studioBeat, index) => {
175
- const { silenceDuration } = mediaDurations[index];
176
- const paddingId = `[padding_${index}]`;
177
- if (studioBeat.audioFile) {
178
- const audioId = FfmpegContextInputFormattedAudio(ffmpegContext, studioBeat.audioFile);
179
- inputIds.push(audioId);
180
- }
181
- if (silenceDuration > 0) {
182
- const silentId = silentIds.pop();
183
- ffmpegContext.filterComplex.push(`${silentId}atrim=start=0:end=${silenceDuration}${paddingId}`);
184
- inputIds.push(paddingId);
185
- }
186
- });
206
+ const inputIds = getInputIds(context, mediaDurations, ffmpegContext, silentIds);
187
207
  assert(silentIds.length === 0, "silentIds.length !== 0");
188
208
  GraphAILogger.log("filterComplex:", ffmpegContext.filterComplex.join("\n"));
189
209
  // Finally, combine all audio files.
@@ -1,6 +1,7 @@
1
1
  import { readFileSync } from "fs";
2
2
  import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
+ import { provider2MovieAgent } from "../utils/provider2agent.js";
4
5
  async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, duration) {
5
6
  const replicate = new Replicate({
6
7
  auth: apiKey,
@@ -21,7 +22,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
21
22
  if (imagePath) {
22
23
  const buffer = readFileSync(imagePath);
23
24
  const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
24
- if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro") {
25
+ if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro" || model === "minimax/hailuo-02") {
25
26
  input.start_image = base64Image;
26
27
  }
27
28
  else {
@@ -29,7 +30,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
29
30
  }
30
31
  }
31
32
  try {
32
- const output = await replicate.run(model ?? "bytedance/seedance-1-lite", { input });
33
+ const output = await replicate.run(model ?? provider2MovieAgent.replicate.defaultModel, { input });
33
34
  // Download the generated video
34
35
  if (output && typeof output === "object" && "url" in output) {
35
36
  const videoUrl = output.url();
@@ -62,7 +63,7 @@ export const movieReplicateAgent = async ({ namedInputs, params, config, }) => {
62
63
  const { prompt, imagePath } = namedInputs;
63
64
  const aspectRatio = getAspectRatio(params.canvasSize);
64
65
  const duration = params.duration ?? 5;
65
- const apiKey = config?.apiKey ?? process.env.REPLICATE_API_TOKEN;
66
+ const apiKey = config?.apiKey;
66
67
  if (!apiKey) {
67
68
  throw new Error("REPLICATE_API_TOKEN environment variable is required");
68
69
  }
@@ -3,7 +3,7 @@ import { provider2TTSAgent } from "../utils/provider2agent.js";
3
3
  export const ttsElevenlabsAgent = async ({ namedInputs, params, config, }) => {
4
4
  const { text } = namedInputs;
5
5
  const { voice, model, stability, similarityBoost, suppressError } = params;
6
- const apiKey = config?.apiKey ?? process.env.ELEVENLABS_API_KEY;
6
+ const apiKey = config?.apiKey;
7
7
  if (!apiKey) {
8
8
  throw new Error("ELEVENLABS_API_KEY environment variable is required");
9
9
  }
@@ -1,5 +1,4 @@
1
1
  import { GraphAILogger, assert } from "graphai";
2
- const nijovoiceApiKey = process.env.NIJIVOICE_API_KEY ?? "";
3
2
  const errorMessage = [
4
3
  "TTS NijiVoice: No API key. ",
5
4
  "You have the following options:",
@@ -10,12 +9,12 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
10
9
  const { suppressError, voice, speed, speed_global } = params;
11
10
  const { apiKey } = config ?? {};
12
11
  const { text } = namedInputs;
13
- assert(!!(apiKey ?? nijovoiceApiKey), errorMessage);
12
+ assert(!!apiKey, errorMessage);
14
13
  const url = `https://api.nijivoice.com/api/platform/v1/voice-actors/${voice}/generate-voice`;
15
14
  const options = {
16
15
  method: "POST",
17
16
  headers: {
18
- "x-api-key": apiKey ?? nijovoiceApiKey,
17
+ "x-api-key": apiKey,
19
18
  accept: "application/json",
20
19
  "content-type": "application/json",
21
20
  },
@@ -1,4 +1,4 @@
1
- import { llm } from "../../../../utils/utils.js";
1
+ import { llm } from "../../../../utils/provider2agent.js";
2
2
  import { getAvailableTemplates } from "../../../../utils/file.js";
3
3
  const availableTemplateNames = getAvailableTemplates().map((template) => template.filename);
4
4
  export const builder = (yargs) => {
@@ -1,5 +1,5 @@
1
1
  import { ToolCliArgs } from "../../../../types/cli_types.js";
2
- import { LLM } from "../../../../utils/utils.js";
2
+ import type { LLM } from "../../../../utils/provider2agent.js";
3
3
  export declare const handler: (argv: ToolCliArgs<{
4
4
  o?: string;
5
5
  b?: string;