mulmocast 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.js +5 -13
- package/lib/actions/image_agents.d.ts +27 -3
- package/lib/actions/image_agents.js +5 -2
- package/lib/actions/images.d.ts +9 -1
- package/lib/actions/images.js +22 -11
- package/lib/agents/image_google_agent.js +2 -2
- package/lib/agents/image_openai_agent.js +2 -2
- package/lib/agents/movie_replicate_agent.js +1 -1
- package/lib/index.d.ts +1 -0
- package/lib/index.js +1 -0
- package/lib/methods/mulmo_presentation_style.d.ts +1 -1
- package/lib/methods/mulmo_presentation_style.js +17 -17
- package/lib/types/schema.d.ts +243 -222
- package/lib/types/schema.js +10 -8
- package/lib/utils/const.d.ts +0 -1
- package/lib/utils/const.js +0 -1
- package/lib/utils/context.d.ts +12 -11
- package/lib/utils/preprocess.d.ts +7 -6
- package/lib/utils/provider2agent.d.ts +72 -0
- package/lib/utils/provider2agent.js +81 -0
- package/lib/utils/utils.d.ts +6 -11
- package/lib/utils/utils.js +5 -26
- package/package.json +1 -1
package/lib/actions/audio.js
CHANGED
|
@@ -12,18 +12,10 @@ import { MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
|
12
12
|
import { fileCacheAgentFilter } from "../utils/filters.js";
|
|
13
13
|
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
14
14
|
import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
|
|
15
|
+
import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
15
16
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
16
17
|
import { MulmoMediaSourceMethods } from "../methods/mulmo_media_source.js";
|
|
17
18
|
const vanillaAgents = agents.default ?? agents;
|
|
18
|
-
// const rion_takanashi_voice = "b9277ce3-ba1c-4f6f-9a65-c05ca102ded0"; // たかなし りおん
|
|
19
|
-
// const ben_carter_voice = "bc06c63f-fef6-43b6-92f7-67f919bd5dae"; // ベン・カーター
|
|
20
|
-
const provider_to_agent = {
|
|
21
|
-
nijivoice: "ttsNijivoiceAgent",
|
|
22
|
-
openai: "ttsOpenaiAgent",
|
|
23
|
-
google: "ttsGoogleAgent",
|
|
24
|
-
elevenlabs: "ttsElevenlabsAgent",
|
|
25
|
-
mock: "mediaMockAgent",
|
|
26
|
-
};
|
|
27
19
|
const getAudioPath = (context, beat, audioFile) => {
|
|
28
20
|
if (beat.audio?.type === "audio") {
|
|
29
21
|
const path = MulmoMediaSourceMethods.resolve(beat.audio.source, context);
|
|
@@ -40,7 +32,7 @@ const getAudioPath = (context, beat, audioFile) => {
|
|
|
40
32
|
const getAudioParam = (presentationStyle, beat) => {
|
|
41
33
|
const voiceId = MulmoPresentationStyleMethods.getVoiceId(presentationStyle, beat);
|
|
42
34
|
// Use speaker-specific provider if available, otherwise fall back to script-level provider
|
|
43
|
-
const provider = MulmoPresentationStyleMethods.
|
|
35
|
+
const provider = MulmoPresentationStyleMethods.getTTSProvider(presentationStyle, beat);
|
|
44
36
|
const speechOptions = MulmoPresentationStyleMethods.getSpeechOptions(presentationStyle, beat);
|
|
45
37
|
return { voiceId, provider, speechOptions };
|
|
46
38
|
};
|
|
@@ -61,7 +53,7 @@ const preprocessor = (namedInputs) => {
|
|
|
61
53
|
studioBeat.audioFile = audioPath; // TODO
|
|
62
54
|
const needsTTS = !beat.audio && audioPath !== undefined;
|
|
63
55
|
return {
|
|
64
|
-
ttsAgent:
|
|
56
|
+
ttsAgent: provider2TTSAgent[provider].agentName,
|
|
65
57
|
text,
|
|
66
58
|
voiceId,
|
|
67
59
|
speechOptions,
|
|
@@ -186,8 +178,8 @@ export const audioFilePath = (context) => {
|
|
|
186
178
|
const getConcurrency = (context) => {
|
|
187
179
|
// Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
|
|
188
180
|
const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
|
|
189
|
-
const provider = speaker.provider ?? context.presentationStyle.speechParams.provider;
|
|
190
|
-
return provider
|
|
181
|
+
const provider = (speaker.provider ?? context.presentationStyle.speechParams.provider);
|
|
182
|
+
return provider2TTSAgent[provider].hasLimitedConcurrency;
|
|
191
183
|
});
|
|
192
184
|
return hasLimitedConcurrencyProvider ? 1 : 8;
|
|
193
185
|
};
|
|
@@ -13,7 +13,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
13
13
|
imagePath: string | undefined;
|
|
14
14
|
referenceImageForMovie: string | undefined;
|
|
15
15
|
imageParams: {
|
|
16
|
-
provider:
|
|
16
|
+
provider: string;
|
|
17
17
|
style?: string | undefined;
|
|
18
18
|
model?: string | undefined;
|
|
19
19
|
moderation?: string | undefined;
|
|
@@ -44,8 +44,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
44
44
|
} | {
|
|
45
45
|
imagePath: string;
|
|
46
46
|
imageFromMovie: boolean;
|
|
47
|
+
movieParams: {
|
|
48
|
+
speed?: number | undefined;
|
|
49
|
+
model?: string | undefined;
|
|
50
|
+
fillOption?: {
|
|
51
|
+
style: "aspectFit" | "aspectFill";
|
|
52
|
+
} | undefined;
|
|
53
|
+
provider?: string | undefined;
|
|
54
|
+
transition?: {
|
|
55
|
+
type: "fade" | "slideout_left";
|
|
56
|
+
duration: number;
|
|
57
|
+
} | undefined;
|
|
58
|
+
};
|
|
47
59
|
imageParams: {
|
|
48
|
-
provider:
|
|
60
|
+
provider: string;
|
|
49
61
|
style?: string | undefined;
|
|
50
62
|
model?: string | undefined;
|
|
51
63
|
moderation?: string | undefined;
|
|
@@ -79,8 +91,20 @@ export declare const imagePreprocessAgent: (namedInputs: {
|
|
|
79
91
|
imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
|
|
80
92
|
prompt: string;
|
|
81
93
|
referenceImages: string[];
|
|
94
|
+
movieParams: {
|
|
95
|
+
speed?: number | undefined;
|
|
96
|
+
model?: string | undefined;
|
|
97
|
+
fillOption?: {
|
|
98
|
+
style: "aspectFit" | "aspectFill";
|
|
99
|
+
} | undefined;
|
|
100
|
+
provider?: string | undefined;
|
|
101
|
+
transition?: {
|
|
102
|
+
type: "fade" | "slideout_left";
|
|
103
|
+
duration: number;
|
|
104
|
+
} | undefined;
|
|
105
|
+
};
|
|
82
106
|
imageParams: {
|
|
83
|
-
provider:
|
|
107
|
+
provider: string;
|
|
84
108
|
style?: string | undefined;
|
|
85
109
|
model?: string | undefined;
|
|
86
110
|
moderation?: string | undefined;
|
|
@@ -2,6 +2,7 @@ import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMeth
|
|
|
2
2
|
import { getBeatPngImagePath, getBeatMoviePath } from "../utils/file.js";
|
|
3
3
|
import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
|
|
4
4
|
import { renderHTMLToImage } from "../utils/markdown.js";
|
|
5
|
+
import { GraphAILogger } from "graphai";
|
|
5
6
|
const htmlStyle = (context, beat) => {
|
|
6
7
|
return {
|
|
7
8
|
canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
|
|
@@ -27,13 +28,15 @@ export const imagePreprocessAgent = async (namedInputs) => {
|
|
|
27
28
|
// undefined prompt indicates that image generation is not needed
|
|
28
29
|
return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
|
|
29
30
|
}
|
|
31
|
+
const movieParams = { ...context.presentationStyle.movieParams, ...beat.movieParams };
|
|
32
|
+
GraphAILogger.log(`movieParams: ${index}`, movieParams, beat.moviePrompt);
|
|
30
33
|
if (beat.moviePrompt && !beat.imagePrompt) {
|
|
31
|
-
return { ...returnValue, imagePath, imageFromMovie: true }; // no image prompt, only movie prompt
|
|
34
|
+
return { ...returnValue, imagePath, imageFromMovie: true, movieParams }; // no image prompt, only movie prompt
|
|
32
35
|
}
|
|
33
36
|
// referenceImages for "edit_image", openai agent.
|
|
34
37
|
const referenceImages = MulmoBeatMethods.getImageReferenceForImageGenerator(beat, imageRefs);
|
|
35
38
|
const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
|
|
36
|
-
return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages };
|
|
39
|
+
return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages, movieParams };
|
|
37
40
|
};
|
|
38
41
|
export const imagePluginAgent = async (namedInputs) => {
|
|
39
42
|
const { context, beat, index } = namedInputs;
|
package/lib/actions/images.d.ts
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
import type { GraphOptions, CallbackFunction } from "graphai";
|
|
2
2
|
import { MulmoStudioContext } from "../types/index.js";
|
|
3
3
|
export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
|
|
4
|
-
|
|
4
|
+
type ImageOptions = {
|
|
5
|
+
imageAgents: Record<string, unknown>;
|
|
6
|
+
};
|
|
7
|
+
export declare const images: (context: MulmoStudioContext, args?: {
|
|
8
|
+
settings?: Record<string, string>;
|
|
9
|
+
callbacks?: CallbackFunction[];
|
|
10
|
+
options?: ImageOptions;
|
|
11
|
+
}) => Promise<MulmoStudioContext>;
|
|
5
12
|
export declare const generateBeatImage: (inputs: {
|
|
6
13
|
index: number;
|
|
7
14
|
context: MulmoStudioContext;
|
|
@@ -10,3 +17,4 @@ export declare const generateBeatImage: (inputs: {
|
|
|
10
17
|
forceMovie?: boolean;
|
|
11
18
|
forceImage?: boolean;
|
|
12
19
|
}) => Promise<void>;
|
|
20
|
+
export {};
|
package/lib/actions/images.js
CHANGED
|
@@ -2,7 +2,7 @@ import dotenv from "dotenv";
|
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { GraphAI, GraphAILogger, TaskManager } from "graphai";
|
|
4
4
|
import { GoogleAuth } from "google-auth-library";
|
|
5
|
-
import * as
|
|
5
|
+
import * as vanilla from "@graphai/vanilla";
|
|
6
6
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
7
7
|
import { anthropicAgent } from "@graphai/anthropic_agent";
|
|
8
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
@@ -14,13 +14,19 @@ import { userAssert, settings2GraphAIConfig } from "../utils/utils.js";
|
|
|
14
14
|
import { extractImageFromMovie } from "../utils/ffmpeg_utils.js";
|
|
15
15
|
import { getImageRefs } from "./image_references.js";
|
|
16
16
|
import { imagePreprocessAgent, imagePluginAgent, htmlImageGeneratorAgent } from "./image_agents.js";
|
|
17
|
-
const vanillaAgents =
|
|
17
|
+
const vanillaAgents = vanilla.default ?? vanilla;
|
|
18
18
|
const imageAgents = {
|
|
19
|
-
...vanillaAgents,
|
|
20
19
|
imageGoogleAgent,
|
|
20
|
+
imageOpenaiAgent,
|
|
21
|
+
};
|
|
22
|
+
const movieAgents = {
|
|
21
23
|
movieGoogleAgent,
|
|
22
24
|
movieReplicateAgent,
|
|
23
|
-
|
|
25
|
+
};
|
|
26
|
+
const defaultAgents = {
|
|
27
|
+
...vanillaAgents,
|
|
28
|
+
...imageAgents,
|
|
29
|
+
...movieAgents,
|
|
24
30
|
mediaMockAgent,
|
|
25
31
|
fileWriteAgent,
|
|
26
32
|
openAIAgent,
|
|
@@ -141,7 +147,7 @@ const beat_graph_data = {
|
|
|
141
147
|
mulmoContext: ":context",
|
|
142
148
|
},
|
|
143
149
|
params: {
|
|
144
|
-
model: ":
|
|
150
|
+
model: ":preprocessor.movieParams.model",
|
|
145
151
|
duration: ":beat.duration",
|
|
146
152
|
canvasSize: ":context.presentationStyle.canvasSize",
|
|
147
153
|
},
|
|
@@ -308,10 +314,14 @@ const prepareGenerateImages = async (context) => {
|
|
|
308
314
|
};
|
|
309
315
|
return injections;
|
|
310
316
|
};
|
|
311
|
-
const generateImages = async (context, settings, callbacks) => {
|
|
312
|
-
const
|
|
317
|
+
const generateImages = async (context, settings, callbacks, options) => {
|
|
318
|
+
const optionImageAgents = options?.imageAgents ?? {};
|
|
313
319
|
const injections = await prepareGenerateImages(context);
|
|
314
|
-
const
|
|
320
|
+
const graphaiAgent = {
|
|
321
|
+
...defaultAgents,
|
|
322
|
+
...optionImageAgents,
|
|
323
|
+
};
|
|
324
|
+
const graph = new GraphAI(graph_data, graphaiAgent, await graphOption(context, settings));
|
|
315
325
|
Object.keys(injections).forEach((key) => {
|
|
316
326
|
graph.injectValue(key, injections[key]);
|
|
317
327
|
});
|
|
@@ -324,10 +334,11 @@ const generateImages = async (context, settings, callbacks) => {
|
|
|
324
334
|
return res.mergeResult;
|
|
325
335
|
};
|
|
326
336
|
// public api
|
|
327
|
-
export const images = async (context,
|
|
337
|
+
export const images = async (context, args) => {
|
|
338
|
+
const { settings, callbacks, options } = args ?? {};
|
|
328
339
|
try {
|
|
329
340
|
MulmoStudioContextMethods.setSessionState(context, "image", true);
|
|
330
|
-
const newContext = await generateImages(context, settings, callbacks);
|
|
341
|
+
const newContext = await generateImages(context, settings, callbacks, options);
|
|
331
342
|
MulmoStudioContextMethods.setSessionState(context, "image", false);
|
|
332
343
|
return newContext;
|
|
333
344
|
}
|
|
@@ -341,7 +352,7 @@ export const generateBeatImage = async (inputs) => {
|
|
|
341
352
|
const { index, context, settings, callbacks, forceMovie, forceImage } = inputs;
|
|
342
353
|
const options = await graphOption(context, settings);
|
|
343
354
|
const injections = await prepareGenerateImages(context);
|
|
344
|
-
const graph = new GraphAI(beat_graph_data,
|
|
355
|
+
const graph = new GraphAI(beat_graph_data, defaultAgents, options);
|
|
345
356
|
Object.keys(injections).forEach((key) => {
|
|
346
357
|
if ("outputStudioFilePath" !== key) {
|
|
347
358
|
graph.injectValue(key, injections[key]);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
2
|
import { getAspectRatio } from "./movie_google_agent.js";
|
|
3
|
+
import { provider2ImageAgent } from "../utils/provider2agent.js";
|
|
3
4
|
async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
4
5
|
const GOOGLE_IMAGEN_ENDPOINT = `https://us-central1-aiplatform.googleapis.com/v1/projects/${projectId}/locations/us-central1/publishers/google/models/${model}:predict`;
|
|
5
6
|
try {
|
|
@@ -54,8 +55,7 @@ async function generateImage(projectId, model, token, prompt, aspectRatio) {
|
|
|
54
55
|
export const imageGoogleAgent = async ({ namedInputs, params, config, }) => {
|
|
55
56
|
const { prompt } = namedInputs;
|
|
56
57
|
const aspectRatio = getAspectRatio(params.canvasSize);
|
|
57
|
-
const model = params.model ?? "
|
|
58
|
-
//const projectId = process.env.GOOGLE_PROJECT_ID; // Your Google Cloud Project ID
|
|
58
|
+
const model = params.model ?? provider2ImageAgent["google"].defaultModel;
|
|
59
59
|
const projectId = config?.projectId;
|
|
60
60
|
const token = config?.token;
|
|
61
61
|
try {
|
|
@@ -2,13 +2,13 @@ import fs from "fs";
|
|
|
2
2
|
import path from "path";
|
|
3
3
|
import { GraphAILogger } from "graphai";
|
|
4
4
|
import OpenAI, { toFile } from "openai";
|
|
5
|
-
import {
|
|
5
|
+
import { provider2ImageAgent } from "../utils/provider2agent.js";
|
|
6
6
|
// https://platform.openai.com/docs/guides/image-generation
|
|
7
7
|
export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
|
|
8
8
|
const { prompt, referenceImages } = namedInputs;
|
|
9
9
|
const { moderation, canvasSize } = params;
|
|
10
10
|
const { apiKey, baseURL } = { ...config };
|
|
11
|
-
const model = params.model ??
|
|
11
|
+
const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
|
|
12
12
|
const openai = new OpenAI({ apiKey, baseURL });
|
|
13
13
|
const size = (() => {
|
|
14
14
|
if (model === "gpt-image-1") {
|
|
@@ -21,7 +21,7 @@ async function generateMovie(model, apiKey, prompt, imagePath, aspectRatio, dura
|
|
|
21
21
|
if (imagePath) {
|
|
22
22
|
const buffer = readFileSync(imagePath);
|
|
23
23
|
const base64Image = `data:image/png;base64,${buffer.toString("base64")}`;
|
|
24
|
-
if (model === "kwaivgi/kling-v2.1") {
|
|
24
|
+
if (model === "kwaivgi/kling-v2.1" || model === "kwaivgi/kling-v1.6-pro") {
|
|
25
25
|
input.start_image = base64Image;
|
|
26
26
|
}
|
|
27
27
|
else {
|
package/lib/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ export * from "./actions/index.js";
|
|
|
2
2
|
export * from "./cli/helpers.js";
|
|
3
3
|
export * from "./utils/file.js";
|
|
4
4
|
export * from "./utils/ffmpeg_utils.js";
|
|
5
|
+
export * from "./utils/provider2agent.js";
|
|
5
6
|
export * from "./methods/index.js";
|
|
6
7
|
export * from "./agents/index.js";
|
|
7
8
|
export * from "./types/index.js";
|
package/lib/index.js
CHANGED
|
@@ -2,6 +2,7 @@ export * from "./actions/index.js";
|
|
|
2
2
|
export * from "./cli/helpers.js";
|
|
3
3
|
export * from "./utils/file.js";
|
|
4
4
|
export * from "./utils/ffmpeg_utils.js";
|
|
5
|
+
export * from "./utils/provider2agent.js";
|
|
5
6
|
export * from "./methods/index.js";
|
|
6
7
|
export * from "./agents/index.js";
|
|
7
8
|
export * from "./types/index.js";
|
|
@@ -7,7 +7,7 @@ export declare const MulmoPresentationStyleMethods: {
|
|
|
7
7
|
getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
|
|
8
8
|
getSpeechOptions(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeechOptions | undefined;
|
|
9
9
|
getSpeaker(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeakerData;
|
|
10
|
-
|
|
10
|
+
getTTSProvider(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): Text2SpeechProvider;
|
|
11
11
|
getVoiceId(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
|
|
12
12
|
getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
|
|
13
13
|
getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
|
-
import { userAssert
|
|
2
|
+
import { userAssert } from "../utils/utils.js";
|
|
3
3
|
import { text2ImageProviderSchema, text2HtmlImageProviderSchema, text2SpeechProviderSchema, mulmoCanvasDimensionSchema } from "../types/schema.js";
|
|
4
|
-
import {
|
|
4
|
+
import { defaultProviders, provider2ImageAgent, provider2MovieAgent, provider2LLMAgent } from "../utils/provider2agent.js";
|
|
5
5
|
const defaultTextSlideStyles = [
|
|
6
6
|
'*,*::before,*::after{box-sizing:border-box}body,h1,h2,h3,h4,p,figure,blockquote,dl,dd{margin:0}ul[role="list"],ol[role="list"]{list-style:none}html:focus-within{scroll-behavior:smooth}body{min-height:100vh;text-rendering:optimizeSpeed;line-height:1.5}a:not([class]){text-decoration-skip-ink:auto}img,picture{max-width:100%;display:block}input,button,textarea,select{font:inherit}@media(prefers-reduced-motion:reduce){html:focus-within{scroll-behavior:auto}*,*::before,*::after{animation-duration:.01ms !important;animation-iteration-count:1 !important;transition-duration:.01ms !important;scroll-behavior:auto !important}}',
|
|
7
7
|
"body { margin: 60px; margin-top: 40px; color:#333; font-size: 30px; font-family: Arial, sans-serif; box-sizing: border-box; height: 100vh }",
|
|
@@ -49,7 +49,7 @@ export const MulmoPresentationStyleMethods = {
|
|
|
49
49
|
userAssert(!!speaker, `speaker is not set: speaker "${beat.speaker}"`);
|
|
50
50
|
return speaker;
|
|
51
51
|
},
|
|
52
|
-
|
|
52
|
+
getTTSProvider(presentationStyle, beat) {
|
|
53
53
|
const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
|
|
54
54
|
return speaker.provider ?? presentationStyle.speechParams.provider;
|
|
55
55
|
},
|
|
@@ -65,46 +65,46 @@ export const MulmoPresentationStyleMethods = {
|
|
|
65
65
|
// provider and model appropriately.
|
|
66
66
|
const imageParams = { ...presentationStyle.imageParams, ...beat?.imageParams };
|
|
67
67
|
const provider = MulmoPresentationStyleMethods.getText2ImageProvider(imageParams?.provider);
|
|
68
|
+
const agentInfo = provider2ImageAgent[provider];
|
|
69
|
+
// The default text2image model is gpt-image-1 from OpenAI, and to use it you must have an OpenAI account and have verified your identity. If this is not possible, please specify dall-e-3 as the model.
|
|
68
70
|
const defaultImageParams = {
|
|
69
71
|
provider,
|
|
70
|
-
model:
|
|
72
|
+
model: agentInfo.defaultModel,
|
|
71
73
|
};
|
|
72
74
|
return {
|
|
73
|
-
agent:
|
|
75
|
+
agent: agentInfo.agentName,
|
|
74
76
|
imageParams: { ...defaultImageParams, ...imageParams },
|
|
75
77
|
};
|
|
76
78
|
},
|
|
77
79
|
// Determine movie agent based on provider
|
|
78
80
|
getMovieAgent(presentationStyle) {
|
|
79
|
-
const movieProvider = presentationStyle.movieParams?.provider ??
|
|
80
|
-
|
|
81
|
-
case "replicate":
|
|
82
|
-
return "movieReplicateAgent";
|
|
83
|
-
case "google":
|
|
84
|
-
default:
|
|
85
|
-
return "movieGoogleAgent";
|
|
86
|
-
}
|
|
81
|
+
const movieProvider = (presentationStyle.movieParams?.provider ?? defaultProviders.text2movie);
|
|
82
|
+
return provider2MovieAgent[movieProvider].agentName;
|
|
87
83
|
},
|
|
88
84
|
getConcurrency(presentationStyle) {
|
|
85
|
+
/*
|
|
89
86
|
if (presentationStyle.movieParams?.provider === "replicate") {
|
|
90
|
-
|
|
87
|
+
return 4;
|
|
91
88
|
}
|
|
89
|
+
*/
|
|
92
90
|
const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(presentationStyle);
|
|
93
91
|
if (imageAgentInfo.imageParams.provider === "openai") {
|
|
94
92
|
// NOTE: Here are the rate limits of OpenAI's text2image API (1token = 32x32 patch).
|
|
95
93
|
// dall-e-3: 7,500 RPM、15 images per minute (4 images for max resolution)
|
|
96
94
|
// gpt-image-1:3,000,000 TPM、150 images per minute
|
|
97
|
-
|
|
95
|
+
if (imageAgentInfo.imageParams.model === provider2ImageAgent.openai.defaultModel) {
|
|
96
|
+
return 16;
|
|
97
|
+
}
|
|
98
98
|
}
|
|
99
99
|
return 4;
|
|
100
100
|
},
|
|
101
101
|
getHtmlImageAgentInfo(presentationStyle) {
|
|
102
102
|
const provider = text2HtmlImageProviderSchema.parse(presentationStyle.htmlImageParams?.provider);
|
|
103
|
-
const defaultConfig =
|
|
103
|
+
const defaultConfig = provider2LLMAgent[provider];
|
|
104
104
|
const model = presentationStyle.htmlImageParams?.model ? presentationStyle.htmlImageParams?.model : defaultConfig.defaultModel;
|
|
105
105
|
return {
|
|
106
106
|
provider,
|
|
107
|
-
agent: defaultConfig.
|
|
107
|
+
agent: defaultConfig.agentName,
|
|
108
108
|
model,
|
|
109
109
|
max_tokens: defaultConfig.max_tokens,
|
|
110
110
|
};
|