mulmocast 0.0.23 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/assets/html/caption.html +2 -0
- package/lib/actions/audio.d.ts +2 -2
- package/lib/actions/audio.js +8 -7
- package/lib/actions/captions.js +6 -4
- package/lib/actions/images.d.ts +6 -6
- package/lib/actions/images.js +29 -32
- package/lib/actions/movie.js +3 -3
- package/lib/agents/image_openai_agent.d.ts +2 -0
- package/lib/agents/image_openai_agent.js +3 -2
- package/lib/agents/tts_elevenlabs_agent.js +2 -2
- package/lib/agents/tts_nijivoice_agent.js +3 -2
- package/lib/agents/tts_openai_agent.js +3 -2
- package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
- package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
- package/lib/methods/mulmo_presentation_style.js +5 -8
- package/lib/types/schema.d.ts +55 -385
- package/lib/types/schema.js +1 -7
- package/lib/types/type.d.ts +1 -0
- package/lib/utils/preprocess.d.ts +2 -14
- package/lib/utils/prompt.d.ts +2 -1
- package/lib/utils/prompt.js +10 -0
- package/lib/utils/utils.d.ts +3 -1
- package/lib/utils/utils.js +48 -6
- package/package.json +11 -11
package/README.md
CHANGED
|
@@ -103,6 +103,14 @@ GOOGLE_PROJECT_ID=your_google_project_id
|
|
|
103
103
|
|
|
104
104
|
See also [pre-requisites for Google's image generation model](./docs/pre-requisites-google.md)
|
|
105
105
|
|
|
106
|
+
#### (Optional) For AI providers
|
|
107
|
+
```bash
|
|
108
|
+
# For Anthropic Claude (htmlPrompt feature)
|
|
109
|
+
ANTHROPIC_API_TOKEN=your_anthropic_api_token
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
For htmlPrompt configuration, see [docs/image.md](./docs/image.md#2-htmlprompt).
|
|
113
|
+
|
|
106
114
|
#### (Optional) For Movie models
|
|
107
115
|
```bash
|
|
108
116
|
REPLICATE_API_TOKEN=your_replicate_api_key
|
package/assets/html/caption.html
CHANGED
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
}
|
|
20
20
|
.caption {
|
|
21
21
|
/* Text positioned at the bottom */
|
|
22
|
+
width: 80%;
|
|
22
23
|
position: absolute;
|
|
23
24
|
bottom: 0px;
|
|
24
25
|
/* Enable text wrapping */
|
|
@@ -34,6 +35,7 @@
|
|
|
34
35
|
padding-right: 10%;
|
|
35
36
|
padding-top: 4px;
|
|
36
37
|
background: rgba(0, 0, 0, 0.4);
|
|
38
|
+
${styles}
|
|
37
39
|
}
|
|
38
40
|
</style>
|
|
39
41
|
</head>
|
package/lib/actions/audio.d.ts
CHANGED
|
@@ -3,5 +3,5 @@ import type { CallbackFunction } from "graphai";
|
|
|
3
3
|
import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
|
|
4
4
|
export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
|
|
5
5
|
export declare const audioFilePath: (context: MulmoStudioContext) => string;
|
|
6
|
-
export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
7
|
-
export declare const audio: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
|
6
|
+
export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
7
|
+
export declare const audio: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
package/lib/actions/audio.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { GraphAI } from "graphai";
|
|
3
|
-
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
2
|
+
import { GraphAI, TaskManager } from "graphai";
|
|
4
3
|
import * as agents from "@graphai/vanilla";
|
|
5
4
|
import ttsNijivoiceAgent from "../agents/tts_nijivoice_agent.js";
|
|
6
5
|
import addBGMAgent from "../agents/add_bgm_agent.js";
|
|
@@ -12,7 +11,7 @@ import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
|
12
11
|
import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
13
12
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
14
13
|
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
15
|
-
import { text2hash, localizedText } from "../utils/utils.js";
|
|
14
|
+
import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
|
|
16
15
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
17
16
|
import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
|
|
18
17
|
const vanillaAgents = agents.default ?? agents;
|
|
@@ -200,7 +199,7 @@ const audioAgents = {
|
|
|
200
199
|
addBGMAgent,
|
|
201
200
|
combineAudioFilesAgent,
|
|
202
201
|
};
|
|
203
|
-
export const generateBeatAudio = async (index, context, callbacks) => {
|
|
202
|
+
export const generateBeatAudio = async (index, context, settings, callbacks) => {
|
|
204
203
|
try {
|
|
205
204
|
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
206
205
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
@@ -209,8 +208,9 @@ export const generateBeatAudio = async (index, context, callbacks) => {
|
|
|
209
208
|
const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
|
|
210
209
|
mkdir(outDirPath);
|
|
211
210
|
mkdir(audioSegmentDirPath);
|
|
211
|
+
const config = settings2GraphAIConfig(settings);
|
|
212
212
|
const taskManager = new TaskManager(getConcurrency(context));
|
|
213
|
-
const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager });
|
|
213
|
+
const graph = new GraphAI(graph_tts, audioAgents, { agentFilters, taskManager, config });
|
|
214
214
|
graph.injectValue("__mapIndex", index);
|
|
215
215
|
graph.injectValue("beat", context.studio.script.beats[index]);
|
|
216
216
|
graph.injectValue("studioBeat", context.studio.beats[index]);
|
|
@@ -227,7 +227,7 @@ export const generateBeatAudio = async (index, context, callbacks) => {
|
|
|
227
227
|
MulmoStudioContextMethods.setSessionState(context, "audio", false);
|
|
228
228
|
}
|
|
229
229
|
};
|
|
230
|
-
export const audio = async (context, callbacks) => {
|
|
230
|
+
export const audio = async (context, settings, callbacks) => {
|
|
231
231
|
try {
|
|
232
232
|
MulmoStudioContextMethods.setSessionState(context, "audio", true);
|
|
233
233
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
@@ -239,8 +239,9 @@ export const audio = async (context, callbacks) => {
|
|
|
239
239
|
const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
|
|
240
240
|
mkdir(outDirPath);
|
|
241
241
|
mkdir(audioSegmentDirPath);
|
|
242
|
+
const config = settings2GraphAIConfig(settings);
|
|
242
243
|
const taskManager = new TaskManager(getConcurrency(context));
|
|
243
|
-
const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager });
|
|
244
|
+
const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager, config });
|
|
244
245
|
graph.injectValue("context", context);
|
|
245
246
|
graph.injectValue("audioArtifactFilePath", audioArtifactFilePath);
|
|
246
247
|
graph.injectValue("audioCombinedFilePath", audioCombinedFilePath);
|
package/lib/actions/captions.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { mulmoCaptionParamsSchema } from "../types/index.js";
|
|
1
2
|
import { GraphAI, GraphAILogger } from "graphai";
|
|
2
3
|
import * as agents from "@graphai/vanilla";
|
|
3
4
|
import { getHTMLFile, getCaptionImagePath } from "../utils/file.js";
|
|
@@ -23,22 +24,23 @@ const graph_data = {
|
|
|
23
24
|
const { beat, context, index } = namedInputs;
|
|
24
25
|
try {
|
|
25
26
|
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, true);
|
|
26
|
-
const
|
|
27
|
+
const captionParams = mulmoCaptionParamsSchema.parse({ ...context.studio.script.captionParams, ...beat.captionParams });
|
|
27
28
|
const canvasSize = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
|
|
28
29
|
const imagePath = getCaptionImagePath(context, index);
|
|
29
30
|
const template = getHTMLFile("caption");
|
|
30
31
|
const text = (() => {
|
|
31
32
|
const multiLingual = context.multiLingual;
|
|
32
|
-
if (
|
|
33
|
-
return multiLingual[index].multiLingualTexts[
|
|
33
|
+
if (captionParams.lang && multiLingual) {
|
|
34
|
+
return multiLingual[index].multiLingualTexts[captionParams.lang].text;
|
|
34
35
|
}
|
|
35
|
-
GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${
|
|
36
|
+
GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${captionParams.lang}`);
|
|
36
37
|
return beat.text;
|
|
37
38
|
})();
|
|
38
39
|
const htmlData = interpolate(template, {
|
|
39
40
|
caption: text,
|
|
40
41
|
width: `${canvasSize.width}`,
|
|
41
42
|
height: `${canvasSize.height}`,
|
|
43
|
+
styles: captionParams.styles.join(";\n"),
|
|
42
44
|
});
|
|
43
45
|
await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
|
|
44
46
|
context.studio.beats[index].captionFile = imagePath;
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -32,11 +32,11 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
32
32
|
imagePath: string | undefined;
|
|
33
33
|
referenceImage: string | undefined;
|
|
34
34
|
htmlPrompt?: undefined;
|
|
35
|
-
|
|
35
|
+
htmlImageSystemPrompt?: undefined;
|
|
36
36
|
} | {
|
|
37
37
|
imagePath: string;
|
|
38
38
|
htmlPrompt: string;
|
|
39
|
-
|
|
39
|
+
htmlImageSystemPrompt: string[];
|
|
40
40
|
} | {
|
|
41
41
|
imagePath: string;
|
|
42
42
|
images: string[];
|
|
@@ -64,7 +64,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
64
64
|
};
|
|
65
65
|
movieFile: string | undefined;
|
|
66
66
|
htmlPrompt?: undefined;
|
|
67
|
-
|
|
67
|
+
htmlImageSystemPrompt?: undefined;
|
|
68
68
|
} | {
|
|
69
69
|
images: string[];
|
|
70
70
|
imageParams: {
|
|
@@ -93,7 +93,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
93
93
|
referenceImage: string;
|
|
94
94
|
prompt: string;
|
|
95
95
|
htmlPrompt?: undefined;
|
|
96
|
-
|
|
96
|
+
htmlImageSystemPrompt?: undefined;
|
|
97
97
|
}>;
|
|
98
98
|
export declare const imagePluginAgent: (namedInputs: {
|
|
99
99
|
context: MulmoStudioContext;
|
|
@@ -101,5 +101,5 @@ export declare const imagePluginAgent: (namedInputs: {
|
|
|
101
101
|
index: number;
|
|
102
102
|
}) => Promise<void>;
|
|
103
103
|
export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
|
|
104
|
-
export declare const images: (context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
|
105
|
-
export declare const generateBeatImage: (index: number, context: MulmoStudioContext, callbacks?: CallbackFunction[]) => Promise<void>;
|
|
104
|
+
export declare const images: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
|
|
105
|
+
export declare const generateBeatImage: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
|
package/lib/actions/images.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import dotenv from "dotenv";
|
|
2
2
|
import fs from "fs";
|
|
3
|
-
import { GraphAI, GraphAILogger } from "graphai";
|
|
4
|
-
import { TaskManager } from "graphai/lib/task_manager.js";
|
|
3
|
+
import { GraphAI, GraphAILogger, TaskManager } from "graphai";
|
|
5
4
|
import * as agents from "@graphai/vanilla";
|
|
6
5
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
6
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
@@ -11,7 +10,8 @@ import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
|
11
10
|
import { imageGoogleAgent, imageOpenaiAgent, movieGoogleAgent, movieReplicateAgent, mediaMockAgent } from "../agents/index.js";
|
|
12
11
|
import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
|
|
13
12
|
import { findImagePlugin } from "../utils/image_plugins/index.js";
|
|
14
|
-
import {
|
|
13
|
+
import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
|
|
14
|
+
import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
|
|
15
15
|
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
16
16
|
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
17
17
|
const vanillaAgents = agents.default ?? agents;
|
|
@@ -43,15 +43,7 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
43
43
|
}
|
|
44
44
|
if (beat.htmlPrompt) {
|
|
45
45
|
const htmlPrompt = beat.htmlPrompt.prompt + (beat.htmlPrompt.data ? "\n\n data\n" + JSON.stringify(beat.htmlPrompt.data, null, 2) : "");
|
|
46
|
-
|
|
47
|
-
"Based on the provided information, create a single slide HTML page using Tailwind CSS.",
|
|
48
|
-
`The view port size is ${context.presentationStyle.canvasSize.width}x${context.presentationStyle.canvasSize.height}. Make sure the HTML fits within the view port.`,
|
|
49
|
-
"If charts are needed, use Chart.js to present them in a clean and visually appealing way.",
|
|
50
|
-
"Include a balanced mix of comments, graphs, and illustrations to enhance visual impact.",
|
|
51
|
-
"Output only the HTML code. Do not include any comments, explanations, or additional information outside the HTML.",
|
|
52
|
-
"If data is provided, use it effectively to populate the slide.",
|
|
53
|
-
];
|
|
54
|
-
return { imagePath, htmlPrompt, htmlSystemPrompt };
|
|
46
|
+
return { imagePath, htmlPrompt, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
|
|
55
47
|
}
|
|
56
48
|
// images for "edit_image"
|
|
57
49
|
const images = (() => {
|
|
@@ -85,6 +77,9 @@ export const imagePluginAgent = async (namedInputs) => {
|
|
|
85
77
|
};
|
|
86
78
|
const htmlImageGeneratorAgent = async (namedInputs) => {
|
|
87
79
|
const { html, file, canvasSize } = namedInputs;
|
|
80
|
+
// Save HTML file
|
|
81
|
+
const htmlFile = file.replace(/\.[^/.]+$/, ".html");
|
|
82
|
+
await fs.promises.writeFile(htmlFile, html, "utf8");
|
|
88
83
|
await renderHTMLToImage(html, file, canvasSize.width, canvasSize.height);
|
|
89
84
|
};
|
|
90
85
|
const beat_graph_data = {
|
|
@@ -123,12 +118,13 @@ const beat_graph_data = {
|
|
|
123
118
|
if: ":preprocessor.htmlPrompt",
|
|
124
119
|
defaultValue: {},
|
|
125
120
|
agent: ":htmlImageAgentInfo.agent",
|
|
126
|
-
params: {
|
|
127
|
-
mode: ":htmlImageAgentInfo.model",
|
|
128
|
-
},
|
|
129
121
|
inputs: {
|
|
130
122
|
prompt: ":preprocessor.htmlPrompt",
|
|
131
|
-
system: ":preprocessor.
|
|
123
|
+
system: ":preprocessor.htmlImageSystemPrompt",
|
|
124
|
+
params: {
|
|
125
|
+
model: ":htmlImageAgentInfo.model",
|
|
126
|
+
max_tokens: ":htmlImageAgentInfo.max_tokens",
|
|
127
|
+
},
|
|
132
128
|
},
|
|
133
129
|
},
|
|
134
130
|
htmlImageGenerator: {
|
|
@@ -297,7 +293,7 @@ const googleAuth = async () => {
|
|
|
297
293
|
throw error;
|
|
298
294
|
}
|
|
299
295
|
};
|
|
300
|
-
const graphOption = async (context) => {
|
|
296
|
+
const graphOption = async (context, settings) => {
|
|
301
297
|
const agentFilters = [
|
|
302
298
|
{
|
|
303
299
|
name: "fileCacheAgentFilter",
|
|
@@ -311,21 +307,22 @@ const graphOption = async (context) => {
|
|
|
311
307
|
taskManager,
|
|
312
308
|
};
|
|
313
309
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
|
|
310
|
+
const config = settings2GraphAIConfig(settings);
|
|
314
311
|
// We need to get google's auth token only if the google is the text2image provider.
|
|
315
312
|
if (imageAgentInfo.provider === "google" || context.presentationStyle.movieParams?.provider === "google") {
|
|
313
|
+
userAssert(!!process.env.GOOGLE_PROJECT_ID, "GOOGLE_PROJECT_ID is not set");
|
|
316
314
|
GraphAILogger.log("google was specified as text2image engine");
|
|
317
315
|
const token = await googleAuth();
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
token,
|
|
326
|
-
},
|
|
316
|
+
config["imageGoogleAgent"] = {
|
|
317
|
+
projectId: process.env.GOOGLE_PROJECT_ID,
|
|
318
|
+
token,
|
|
319
|
+
};
|
|
320
|
+
config["movieGoogleAgent"] = {
|
|
321
|
+
projectId: process.env.GOOGLE_PROJECT_ID,
|
|
322
|
+
token,
|
|
327
323
|
};
|
|
328
324
|
}
|
|
325
|
+
options.config = config;
|
|
329
326
|
return options;
|
|
330
327
|
};
|
|
331
328
|
// TODO: unit test
|
|
@@ -417,8 +414,8 @@ const getConcurrency = (context) => {
|
|
|
417
414
|
}
|
|
418
415
|
return 4;
|
|
419
416
|
};
|
|
420
|
-
const generateImages = async (context, callbacks) => {
|
|
421
|
-
const options = await graphOption(context);
|
|
417
|
+
const generateImages = async (context, settings, callbacks) => {
|
|
418
|
+
const options = await graphOption(context, settings);
|
|
422
419
|
const injections = await prepareGenerateImages(context);
|
|
423
420
|
const graph = new GraphAI(graph_data, {
|
|
424
421
|
...vanillaAgents,
|
|
@@ -442,10 +439,10 @@ const generateImages = async (context, callbacks) => {
|
|
|
442
439
|
const res = await graph.run();
|
|
443
440
|
return res.mergeResult;
|
|
444
441
|
};
|
|
445
|
-
export const images = async (context, callbacks) => {
|
|
442
|
+
export const images = async (context, settings, callbacks) => {
|
|
446
443
|
try {
|
|
447
444
|
MulmoStudioContextMethods.setSessionState(context, "image", true);
|
|
448
|
-
const newContext = await generateImages(context, callbacks);
|
|
445
|
+
const newContext = await generateImages(context, settings, callbacks);
|
|
449
446
|
MulmoStudioContextMethods.setSessionState(context, "image", false);
|
|
450
447
|
return newContext;
|
|
451
448
|
}
|
|
@@ -454,8 +451,8 @@ export const images = async (context, callbacks) => {
|
|
|
454
451
|
throw error;
|
|
455
452
|
}
|
|
456
453
|
};
|
|
457
|
-
export const generateBeatImage = async (index, context, callbacks) => {
|
|
458
|
-
const options = await graphOption(context);
|
|
454
|
+
export const generateBeatImage = async (index, context, settings, callbacks) => {
|
|
455
|
+
const options = await graphOption(context, settings);
|
|
459
456
|
const injections = await prepareGenerateImages(context);
|
|
460
457
|
const graph = new GraphAI(beat_graph_data, {
|
|
461
458
|
...vanillaAgents,
|
package/lib/actions/movie.js
CHANGED
|
@@ -69,7 +69,8 @@ const getOutputOption = (audioId, videoId) => {
|
|
|
69
69
|
"-b:a 128k", // Audio bitrate
|
|
70
70
|
];
|
|
71
71
|
};
|
|
72
|
-
const createVideo = async (audioArtifactFilePath, outputVideoPath, context
|
|
72
|
+
const createVideo = async (audioArtifactFilePath, outputVideoPath, context) => {
|
|
73
|
+
const caption = MulmoStudioContextMethods.getCaption(context);
|
|
73
74
|
const start = performance.now();
|
|
74
75
|
const ffmpegContext = FfmpegContextInit();
|
|
75
76
|
const missingIndex = context.studio.beats.findIndex((beat) => !beat.imageFile && !beat.movieFile);
|
|
@@ -208,12 +209,11 @@ export const movieFilePath = (context) => {
|
|
|
208
209
|
export const movie = async (context) => {
|
|
209
210
|
MulmoStudioContextMethods.setSessionState(context, "video", true);
|
|
210
211
|
try {
|
|
211
|
-
const caption = MulmoStudioContextMethods.getCaption(context);
|
|
212
212
|
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
213
213
|
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
214
214
|
const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, fileName);
|
|
215
215
|
const outputVideoPath = movieFilePath(context);
|
|
216
|
-
if (await createVideo(audioArtifactFilePath, outputVideoPath, context
|
|
216
|
+
if (await createVideo(audioArtifactFilePath, outputVideoPath, context)) {
|
|
217
217
|
writingMessage(outputVideoPath);
|
|
218
218
|
}
|
|
219
219
|
}
|
|
@@ -4,9 +4,10 @@ import { GraphAILogger } from "graphai";
|
|
|
4
4
|
import OpenAI, { toFile } from "openai";
|
|
5
5
|
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
6
6
|
// https://platform.openai.com/docs/guides/image-generation
|
|
7
|
-
export const imageOpenaiAgent = async ({ namedInputs, params }) => {
|
|
7
|
+
export const imageOpenaiAgent = async ({ namedInputs, params, config }) => {
|
|
8
8
|
const { prompt, images } = namedInputs;
|
|
9
|
-
const {
|
|
9
|
+
const { moderation, canvasSize } = params;
|
|
10
|
+
const { apiKey } = { ...config };
|
|
10
11
|
const model = params.model ?? defaultOpenAIImageModel;
|
|
11
12
|
const openai = new OpenAI({ apiKey });
|
|
12
13
|
const size = (() => {
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
|
-
export const ttsElevenlabsAgent = async ({ namedInputs, params }) => {
|
|
2
|
+
export const ttsElevenlabsAgent = async ({ namedInputs, params, config }) => {
|
|
3
3
|
const { text } = namedInputs;
|
|
4
4
|
const { voice, model, stability, similarityBoost, suppressError } = params;
|
|
5
|
-
const apiKey = process.env.ELEVENLABS_API_KEY;
|
|
5
|
+
const apiKey = config?.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
6
6
|
if (!apiKey) {
|
|
7
7
|
throw new Error("ELEVENLABS_API_KEY environment variable is required");
|
|
8
8
|
}
|
|
@@ -6,8 +6,9 @@ const errorMessage = [
|
|
|
6
6
|
"1. Obtain an API key from Niji Voice (https://platform.nijivoice.com/) and set it as the NIJIVOICE_API_KEY environment variable.",
|
|
7
7
|
'2. Use OpenAI\'s TTS instead of Niji Voice by changing speechParams.provider from "nijivoice" to "openai".',
|
|
8
8
|
].join("\n");
|
|
9
|
-
export const ttsNijivoiceAgent = async ({ params, namedInputs }) => {
|
|
10
|
-
const {
|
|
9
|
+
export const ttsNijivoiceAgent = async ({ params, namedInputs, config }) => {
|
|
10
|
+
const { suppressError, voice, speed, speed_global } = params;
|
|
11
|
+
const { apiKey } = config ?? {};
|
|
11
12
|
const { text } = namedInputs;
|
|
12
13
|
assert(apiKey ?? nijovoiceApiKey, errorMessage);
|
|
13
14
|
const url = `https://api.nijivoice.com/api/platform/v1/voice-actors/${voice}/generate-voice`;
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
2
|
import OpenAI from "openai";
|
|
3
|
-
export const ttsOpenaiAgent = async ({ namedInputs, params }) => {
|
|
3
|
+
export const ttsOpenaiAgent = async ({ namedInputs, params, config }) => {
|
|
4
4
|
const { text } = namedInputs;
|
|
5
|
-
const {
|
|
5
|
+
const { model, voice, suppressError, instructions } = params;
|
|
6
|
+
const { apiKey } = config ?? {};
|
|
6
7
|
const openai = new OpenAI({ apiKey });
|
|
7
8
|
try {
|
|
8
9
|
const tts_options = {
|
|
@@ -16,7 +16,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
16
16
|
} & {
|
|
17
17
|
s: string;
|
|
18
18
|
} & {
|
|
19
|
-
llm: "
|
|
19
|
+
llm: "openai" | "anthropic" | "gemini" | "groq" | undefined;
|
|
20
20
|
} & {
|
|
21
21
|
llm_model: string | undefined;
|
|
22
22
|
}>;
|
|
@@ -10,7 +10,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
10
10
|
} & {
|
|
11
11
|
beats_per_scene: number;
|
|
12
12
|
} & {
|
|
13
|
-
llm: "
|
|
13
|
+
llm: "openai" | "anthropic" | "gemini" | "groq" | undefined;
|
|
14
14
|
} & {
|
|
15
15
|
llm_model: string | undefined;
|
|
16
16
|
} & {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { userAssert } from "../utils/utils.js";
|
|
2
|
+
import { userAssert, llmConfig } from "../utils/utils.js";
|
|
3
3
|
import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
|
|
4
4
|
import { defaultOpenAIImageModel } from "../utils/const.js";
|
|
5
5
|
const defaultTextSlideStyles = [
|
|
@@ -72,16 +72,13 @@ export const MulmoPresentationStyleMethods = {
|
|
|
72
72
|
},
|
|
73
73
|
getHtmlImageAgentInfo(presentationStyle) {
|
|
74
74
|
const provider = text2HtmlImageProviderSchema.parse(presentationStyle.htmlImageParams?.provider);
|
|
75
|
-
const
|
|
76
|
-
const model = presentationStyle.htmlImageParams?.model
|
|
77
|
-
? presentationStyle.htmlImageParams?.model
|
|
78
|
-
: provider === "anthropic"
|
|
79
|
-
? "claude-3-7-sonnet-20250219"
|
|
80
|
-
: "gpt-4o-mini";
|
|
75
|
+
const defaultConfig = llmConfig[provider];
|
|
76
|
+
const model = presentationStyle.htmlImageParams?.model ? presentationStyle.htmlImageParams?.model : defaultConfig.defaultModel;
|
|
81
77
|
return {
|
|
82
78
|
provider,
|
|
83
|
-
agent,
|
|
79
|
+
agent: defaultConfig.agent,
|
|
84
80
|
model,
|
|
81
|
+
max_tokens: defaultConfig.max_tokens,
|
|
85
82
|
};
|
|
86
83
|
},
|
|
87
84
|
getImageType(_, beat) {
|