mulmocast 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/lib/actions/audio.js +13 -18
  2. package/lib/actions/image_agents.d.ts +30 -6
  3. package/lib/actions/image_agents.js +5 -2
  4. package/lib/actions/image_references.js +2 -1
  5. package/lib/actions/images.d.ts +9 -1
  6. package/lib/actions/images.js +38 -13
  7. package/lib/actions/movie.js +3 -2
  8. package/lib/agents/add_bgm_agent.js +1 -1
  9. package/lib/agents/combine_audio_files_agent.js +10 -7
  10. package/lib/agents/image_google_agent.js +2 -2
  11. package/lib/agents/image_openai_agent.js +2 -2
  12. package/lib/agents/movie_replicate_agent.js +1 -1
  13. package/lib/agents/tts_elevenlabs_agent.d.ts +2 -1
  14. package/lib/agents/tts_elevenlabs_agent.js +4 -3
  15. package/lib/agents/tts_google_agent.d.ts +2 -9
  16. package/lib/agents/tts_nijivoice_agent.d.ts +2 -1
  17. package/lib/agents/tts_nijivoice_agent.js +3 -3
  18. package/lib/agents/tts_openai_agent.d.ts +2 -13
  19. package/lib/agents/tts_openai_agent.js +4 -3
  20. package/lib/index.browser.d.ts +1 -0
  21. package/lib/index.browser.js +1 -0
  22. package/lib/index.d.ts +1 -0
  23. package/lib/index.js +2 -0
  24. package/lib/methods/mulmo_presentation_style.d.ts +2 -1
  25. package/lib/methods/mulmo_presentation_style.js +21 -17
  26. package/lib/types/agent.d.ts +29 -2
  27. package/lib/types/agent.js +0 -1
  28. package/lib/types/schema.d.ts +596 -485
  29. package/lib/types/schema.js +15 -11
  30. package/lib/utils/const.d.ts +0 -1
  31. package/lib/utils/const.js +0 -1
  32. package/lib/utils/context.d.ts +36 -30
  33. package/lib/utils/ffmpeg_utils.d.ts +4 -1
  34. package/lib/utils/ffmpeg_utils.js +2 -1
  35. package/lib/utils/preprocess.d.ts +28 -24
  36. package/lib/utils/provider2agent.d.ts +76 -0
  37. package/lib/utils/provider2agent.js +87 -0
  38. package/lib/utils/utils.d.ts +6 -11
  39. package/lib/utils/utils.js +5 -26
  40. package/package.json +2 -2
@@ -1,4 +1,5 @@
1
1
  import { z } from "zod";
2
+ import { htmlLLMProvider, provider2TTSAgent, provider2ImageAgent, provider2MovieAgent, defaultProviders } from "../utils/provider2agent.js";
2
3
  export const langSchema = z.string();
3
4
  const URLStringSchema = z.string().url();
4
5
  export const localizedTextSchema = z
@@ -20,13 +21,14 @@ export const speechOptionsSchema = z
20
21
  })
21
22
  .strict();
22
23
  const speakerIdSchema = z.string();
23
- export const text2SpeechProviderSchema = z.enum(["openai", "nijivoice", "google", "elevenlabs"]).default("openai");
24
+ export const text2SpeechProviderSchema = z.enum(Object.keys(provider2TTSAgent)).default(defaultProviders.tts);
24
25
  export const speakerDataSchema = z
25
26
  .object({
26
27
  displayName: z.record(langSchema, z.string()).optional(),
27
28
  voiceId: z.string(),
28
29
  speechOptions: speechOptionsSchema.optional(),
29
30
  provider: text2SpeechProviderSchema.optional(),
31
+ model: z.string().optional().describe("TTS model to use for this speaker"),
30
32
  })
31
33
  .strict();
32
34
  export const speakerDictionarySchema = z.record(speakerIdSchema, speakerDataSchema);
@@ -129,9 +131,7 @@ export const mulmoImageAssetSchema = z.union([
129
131
  mulmoPdfMediaSchema,
130
132
  mulmoImageMediaSchema,
131
133
  mulmoSvgMediaSchema,
132
- mulmoMovieMediaSchema.extend({
133
- mixAudio: z.number().default(1.0),
134
- }),
134
+ mulmoMovieMediaSchema,
135
135
  mulmoTextSlideMediaSchema,
136
136
  mulmoChartMediaSchema,
137
137
  mulmoMermaidMediaSchema,
@@ -165,19 +165,19 @@ export const mulmoFillOptionSchema = z
165
165
  style: z.enum(["aspectFit", "aspectFill"]).default("aspectFit"),
166
166
  })
167
167
  .describe("How to handle aspect ratio differences between image and canvas");
168
- export const text2ImageProviderSchema = z.enum(["openai", "google"]).default("openai");
168
+ export const text2ImageProviderSchema = z.enum(Object.keys(provider2ImageAgent)).default(defaultProviders.text2image);
169
169
  // NOTE: This is for UI only. (until we figure out how to use it in mulmoImageParamsSchema)
170
170
  export const mulmoOpenAIImageModelSchema = z
171
171
  .object({
172
172
  provider: z.literal("openai"),
173
- model: z.enum(["dall-e-3", "gpt-image-1"]).optional(),
173
+ model: z.enum(provider2ImageAgent["openai"].models).optional(),
174
174
  })
175
175
  .strict();
176
176
  // NOTE: This is for UI only. (until we figure out how to use it in mulmoImageParamsSchema)
177
177
  export const mulmoGoogleImageModelSchema = z
178
178
  .object({
179
179
  provider: z.literal("google"),
180
- model: z.enum(["imagen-3.0-fast-generate-001", "imagen-3.0-generate-002", "imagen-3.0-capability-001"]).optional(),
180
+ model: z.enum(provider2ImageAgent["google"].models).optional(),
181
181
  })
182
182
  .strict();
183
183
  export const mulmoImageParamsSchema = z
@@ -197,6 +197,7 @@ export const textSlideParamsSchema = z
197
197
  export const beatAudioParamsSchema = z
198
198
  .object({
199
199
  padding: z.number().optional().describe("Padding between beats"), // seconds
200
+ movieVolume: z.number().default(1.0).describe("Audio volume of the imported or generated movie"),
200
201
  })
201
202
  .strict();
202
203
  export const mulmoHtmlImageParamsSchema = z
@@ -238,6 +239,7 @@ export const mulmoBeatSchema = z
238
239
  audioParams: beatAudioParamsSchema.optional(), // beat specific parameters
239
240
  movieParams: z
240
241
  .object({
242
+ model: z.string().optional(),
241
243
  fillOption: mulmoFillOptionSchema.optional(),
242
244
  speed: z.number().optional().describe("Speed of the video. 1.0 is normal speed. 0.5 is half speed. 2.0 is double speed."),
243
245
  })
@@ -269,22 +271,23 @@ export const mulmoSpeechParamsSchema = z
269
271
  .object({
270
272
  provider: text2SpeechProviderSchema, // has default value
271
273
  speakers: speakerDictionarySchema,
274
+ model: z.string().optional().describe("Default TTS model to use"),
272
275
  })
273
276
  .strict();
274
- export const text2HtmlImageProviderSchema = z.enum(["openai", "anthropic"]).default("openai");
275
- export const text2MovieProviderSchema = z.enum(["google", "replicate"]).default("google");
277
+ export const text2HtmlImageProviderSchema = z.enum(htmlLLMProvider).default(defaultProviders.text2Html);
278
+ export const text2MovieProviderSchema = z.enum(Object.keys(provider2MovieAgent)).default(defaultProviders.text2movie);
276
279
  // NOTE: This is UI only. (until we figure out how to use it in mulmoMovieParamsSchema)
277
280
  export const mulmoGoogleMovieModelSchema = z
278
281
  .object({
279
282
  provider: z.literal("google"),
280
- model: z.enum(["veo-2.0-generate-001"]).optional(),
283
+ model: z.enum(provider2MovieAgent.google.models).optional(),
281
284
  })
282
285
  .strict();
283
286
  // NOTE: This is UI only. (until we figure out how to use it in mulmoMovieParamsSchema)
284
287
  export const mulmoReplicateMovieModelSchema = z
285
288
  .object({
286
289
  provider: z.literal("replicate"),
287
- model: z.enum(["bytedance/seedance-1-lite", "kwaivgi/kling-v2.1", "google/veo-3"]).optional(),
290
+ model: z.enum(provider2MovieAgent.replicate.models).optional(),
288
291
  })
289
292
  .strict();
290
293
  export const mulmoTransitionSchema = z.object({
@@ -358,6 +361,7 @@ export const mulmoStudioBeatSchema = z
358
361
  audioDuration: z.number().optional(),
359
362
  movieDuration: z.number().optional(),
360
363
  silenceDuration: z.number().optional(),
364
+ hasMovieAudio: z.boolean().optional(),
361
365
  audioFile: z.string().optional(),
362
366
  imageFile: z.string().optional(), // path to the image
363
367
  movieFile: z.string().optional(), // path to the movie file
@@ -9,4 +9,3 @@ export declare const storyToScriptGenerateMode: {
9
9
  stepWise: string;
10
10
  oneStep: string;
11
11
  };
12
- export declare const defaultOpenAIImageModel = "dall-e-3";
@@ -9,4 +9,3 @@ export const storyToScriptGenerateMode = {
9
9
  stepWise: "step_wise",
10
10
  oneStep: "one_step",
11
11
  };
12
- export const defaultOpenAIImageModel = "dall-e-3";
@@ -12,6 +12,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
12
12
  audioDuration?: number | undefined;
13
13
  movieDuration?: number | undefined;
14
14
  silenceDuration?: number | undefined;
15
+ hasMovieAudio?: boolean | undefined;
15
16
  audioFile?: string | undefined;
16
17
  imageFile?: string | undefined;
17
18
  movieFile?: string | undefined;
@@ -49,7 +50,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
49
50
  height: number;
50
51
  };
51
52
  speechParams: {
52
- provider: "openai" | "nijivoice" | "google" | "elevenlabs";
53
+ provider: string;
53
54
  speakers: Record<string, {
54
55
  voiceId: string;
55
56
  displayName?: Record<string, string> | undefined;
@@ -57,8 +58,10 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
57
58
  speed?: number | undefined;
58
59
  instruction?: string | undefined;
59
60
  } | undefined;
60
- provider?: "openai" | "nijivoice" | "google" | "elevenlabs" | undefined;
61
+ provider?: string | undefined;
62
+ model?: string | undefined;
61
63
  }>;
64
+ model?: string | undefined;
62
65
  };
63
66
  beats: {
64
67
  text: string;
@@ -119,6 +122,21 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
119
122
  path: string;
120
123
  kind: "path";
121
124
  };
125
+ } | {
126
+ type: "movie";
127
+ source: {
128
+ url: string;
129
+ kind: "url";
130
+ } | {
131
+ kind: "base64";
132
+ data: string;
133
+ } | {
134
+ text: string;
135
+ kind: "text";
136
+ } | {
137
+ path: string;
138
+ kind: "path";
139
+ };
122
140
  } | {
123
141
  type: "textSlide";
124
142
  slide: {
@@ -156,22 +174,6 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
156
174
  } | {
157
175
  type: "voice_over";
158
176
  startAt?: number | undefined;
159
- } | {
160
- type: "movie";
161
- source: {
162
- url: string;
163
- kind: "url";
164
- } | {
165
- kind: "base64";
166
- data: string;
167
- } | {
168
- text: string;
169
- kind: "text";
170
- } | {
171
- path: string;
172
- kind: "path";
173
- };
174
- mixAudio: number;
175
177
  } | undefined;
176
178
  id?: string | undefined;
177
179
  audio?: {
@@ -196,9 +198,9 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
196
198
  imagePrompt?: string | undefined;
197
199
  description?: string | undefined;
198
200
  imageParams?: {
199
- provider: "openai" | "google";
200
- style?: string | undefined;
201
+ provider: string;
201
202
  model?: string | undefined;
203
+ style?: string | undefined;
202
204
  moderation?: string | undefined;
203
205
  images?: Record<string, {
204
206
  type: "image";
@@ -221,10 +223,12 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
221
223
  }> | undefined;
222
224
  } | undefined;
223
225
  audioParams?: {
226
+ movieVolume: number;
224
227
  padding?: number | undefined;
225
228
  } | undefined;
226
229
  movieParams?: {
227
230
  speed?: number | undefined;
231
+ model?: string | undefined;
228
232
  fillOption?: {
229
233
  style: "aspectFit" | "aspectFill";
230
234
  } | undefined;
@@ -252,9 +256,9 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
252
256
  title?: string | undefined;
253
257
  description?: string | undefined;
254
258
  imageParams?: {
255
- provider: "openai" | "google";
256
- style?: string | undefined;
259
+ provider: string;
257
260
  model?: string | undefined;
261
+ style?: string | undefined;
258
262
  moderation?: string | undefined;
259
263
  images?: Record<string, {
260
264
  type: "image";
@@ -277,7 +281,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
277
281
  }> | undefined;
278
282
  } | undefined;
279
283
  movieParams?: {
280
- provider?: "google" | "replicate" | undefined;
284
+ provider?: string | undefined;
281
285
  model?: string | undefined;
282
286
  fillOption?: {
283
287
  style: "aspectFit" | "aspectFill";
@@ -288,7 +292,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
288
292
  } | undefined;
289
293
  } | undefined;
290
294
  htmlImageParams?: {
291
- provider: "openai" | "anthropic";
295
+ provider: string;
292
296
  model?: string | undefined;
293
297
  } | undefined;
294
298
  textSlideParams?: {
@@ -363,7 +367,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
363
367
  height: number;
364
368
  };
365
369
  speechParams: {
366
- provider: "openai" | "nijivoice" | "google" | "elevenlabs";
370
+ provider: string;
367
371
  speakers: Record<string, {
368
372
  voiceId: string;
369
373
  displayName?: Record<string, string> | undefined;
@@ -371,13 +375,15 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
371
375
  speed?: number | undefined;
372
376
  instruction?: string | undefined;
373
377
  } | undefined;
374
- provider?: "openai" | "nijivoice" | "google" | "elevenlabs" | undefined;
378
+ provider?: string | undefined;
379
+ model?: string | undefined;
375
380
  }>;
381
+ model?: string | undefined;
376
382
  };
377
383
  imageParams?: {
378
- provider: "openai" | "google";
379
- style?: string | undefined;
384
+ provider: string;
380
385
  model?: string | undefined;
386
+ style?: string | undefined;
381
387
  moderation?: string | undefined;
382
388
  images?: Record<string, {
383
389
  type: "image";
@@ -400,7 +406,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
400
406
  }> | undefined;
401
407
  } | undefined;
402
408
  movieParams?: {
403
- provider?: "google" | "replicate" | undefined;
409
+ provider?: string | undefined;
404
410
  model?: string | undefined;
405
411
  fillOption?: {
406
412
  style: "aspectFit" | "aspectFill";
@@ -411,7 +417,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
411
417
  } | undefined;
412
418
  } | undefined;
413
419
  htmlImageParams?: {
414
- provider: "openai" | "anthropic";
420
+ provider: string;
415
421
  model?: string | undefined;
416
422
  } | undefined;
417
423
  textSlideParams?: {
@@ -11,5 +11,8 @@ export declare const FfmpegContextAddInput: (context: FfmpegContext, input: stri
11
11
  export declare const FfmpegContextPushFormattedAudio: (context: FfmpegContext, sourceId: string, outputId: string, duration?: number | undefined) => void;
12
12
  export declare const FfmpegContextInputFormattedAudio: (context: FfmpegContext, input: string, duration?: number | undefined, inputOptions?: string[]) => string;
13
13
  export declare const FfmpegContextGenerateOutput: (context: FfmpegContext, output: string, options?: string[]) => Promise<number>;
14
- export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<number>;
14
+ export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<{
15
+ duration: number;
16
+ hasAudio: boolean;
17
+ }>;
15
18
  export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<object>;
@@ -67,7 +67,8 @@ export const ffmpegGetMediaDuration = (filePath) => {
67
67
  reject(err);
68
68
  }
69
69
  else {
70
- resolve(metadata.format.duration);
70
+ const hasAudio = metadata.streams?.some((stream) => stream.codec_type === "audio") ?? false;
71
+ resolve({ duration: metadata.format.duration, hasAudio });
71
72
  }
72
73
  });
73
74
  });
@@ -7,6 +7,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
7
7
  audioDuration?: number | undefined;
8
8
  movieDuration?: number | undefined;
9
9
  silenceDuration?: number | undefined;
10
+ hasMovieAudio?: boolean | undefined;
10
11
  audioFile?: string | undefined;
11
12
  imageFile?: string | undefined;
12
13
  movieFile?: string | undefined;
@@ -44,7 +45,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
44
45
  height: number;
45
46
  };
46
47
  speechParams: {
47
- provider: "openai" | "nijivoice" | "google" | "elevenlabs";
48
+ provider: string;
48
49
  speakers: Record<string, {
49
50
  voiceId: string;
50
51
  displayName?: Record<string, string> | undefined;
@@ -52,8 +53,10 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
52
53
  speed?: number | undefined;
53
54
  instruction?: string | undefined;
54
55
  } | undefined;
55
- provider?: "openai" | "nijivoice" | "google" | "elevenlabs" | undefined;
56
+ provider?: string | undefined;
57
+ model?: string | undefined;
56
58
  }>;
59
+ model?: string | undefined;
57
60
  };
58
61
  beats: {
59
62
  text: string;
@@ -114,6 +117,21 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
114
117
  path: string;
115
118
  kind: "path";
116
119
  };
120
+ } | {
121
+ type: "movie";
122
+ source: {
123
+ url: string;
124
+ kind: "url";
125
+ } | {
126
+ kind: "base64";
127
+ data: string;
128
+ } | {
129
+ text: string;
130
+ kind: "text";
131
+ } | {
132
+ path: string;
133
+ kind: "path";
134
+ };
117
135
  } | {
118
136
  type: "textSlide";
119
137
  slide: {
@@ -151,22 +169,6 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
151
169
  } | {
152
170
  type: "voice_over";
153
171
  startAt?: number | undefined;
154
- } | {
155
- type: "movie";
156
- source: {
157
- url: string;
158
- kind: "url";
159
- } | {
160
- kind: "base64";
161
- data: string;
162
- } | {
163
- text: string;
164
- kind: "text";
165
- } | {
166
- path: string;
167
- kind: "path";
168
- };
169
- mixAudio: number;
170
172
  } | undefined;
171
173
  id?: string | undefined;
172
174
  audio?: {
@@ -191,9 +193,9 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
191
193
  imagePrompt?: string | undefined;
192
194
  description?: string | undefined;
193
195
  imageParams?: {
194
- provider: "openai" | "google";
195
- style?: string | undefined;
196
+ provider: string;
196
197
  model?: string | undefined;
198
+ style?: string | undefined;
197
199
  moderation?: string | undefined;
198
200
  images?: Record<string, {
199
201
  type: "image";
@@ -216,10 +218,12 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
216
218
  }> | undefined;
217
219
  } | undefined;
218
220
  audioParams?: {
221
+ movieVolume: number;
219
222
  padding?: number | undefined;
220
223
  } | undefined;
221
224
  movieParams?: {
222
225
  speed?: number | undefined;
226
+ model?: string | undefined;
223
227
  fillOption?: {
224
228
  style: "aspectFit" | "aspectFill";
225
229
  } | undefined;
@@ -247,9 +251,9 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
247
251
  title?: string | undefined;
248
252
  description?: string | undefined;
249
253
  imageParams?: {
250
- provider: "openai" | "google";
251
- style?: string | undefined;
254
+ provider: string;
252
255
  model?: string | undefined;
256
+ style?: string | undefined;
253
257
  moderation?: string | undefined;
254
258
  images?: Record<string, {
255
259
  type: "image";
@@ -272,7 +276,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
272
276
  }> | undefined;
273
277
  } | undefined;
274
278
  movieParams?: {
275
- provider?: "google" | "replicate" | undefined;
279
+ provider?: string | undefined;
276
280
  model?: string | undefined;
277
281
  fillOption?: {
278
282
  style: "aspectFit" | "aspectFill";
@@ -283,7 +287,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
283
287
  } | undefined;
284
288
  } | undefined;
285
289
  htmlImageParams?: {
286
- provider: "openai" | "anthropic";
290
+ provider: string;
287
291
  model?: string | undefined;
288
292
  } | undefined;
289
293
  textSlideParams?: {
@@ -0,0 +1,76 @@
1
+ export declare const defaultProviders: {
2
+ tts: string;
3
+ text2image: string;
4
+ text2movie: string;
5
+ text2Html: string;
6
+ llm: string;
7
+ };
8
+ export declare const provider2TTSAgent: {
9
+ nijivoice: {
10
+ agentName: string;
11
+ hasLimitedConcurrency: boolean;
12
+ };
13
+ openai: {
14
+ agentName: string;
15
+ hasLimitedConcurrency: boolean;
16
+ defaultModel: string;
17
+ defaultVoice: string;
18
+ };
19
+ google: {
20
+ agentName: string;
21
+ hasLimitedConcurrency: boolean;
22
+ };
23
+ elevenlabs: {
24
+ agentName: string;
25
+ hasLimitedConcurrency: boolean;
26
+ defaultModel: string;
27
+ models: string[];
28
+ };
29
+ };
30
+ export declare const provider2ImageAgent: {
31
+ openai: {
32
+ agentName: string;
33
+ defaultModel: string;
34
+ models: string[];
35
+ };
36
+ google: {
37
+ agentName: string;
38
+ defaultModel: string;
39
+ models: string[];
40
+ };
41
+ };
42
+ export declare const provider2MovieAgent: {
43
+ replicate: {
44
+ agentName: string;
45
+ models: string[];
46
+ };
47
+ google: {
48
+ agentName: string;
49
+ models: string[];
50
+ };
51
+ };
52
+ export declare const provider2LLMAgent: {
53
+ readonly openai: {
54
+ readonly agentName: "openAIAgent";
55
+ readonly defaultModel: "gpt-4o";
56
+ readonly max_tokens: 8192;
57
+ };
58
+ readonly anthropic: {
59
+ readonly agentName: "anthropicAgent";
60
+ readonly defaultModel: "claude-3-7-sonnet-20250219";
61
+ readonly max_tokens: 8192;
62
+ };
63
+ readonly gemini: {
64
+ readonly agentName: "geminiAgent";
65
+ readonly defaultModel: "gemini-1.5-flash";
66
+ readonly max_tokens: 8192;
67
+ };
68
+ readonly groq: {
69
+ readonly agentName: "groqAgent";
70
+ readonly defaultModel: "llama3-8b-8192";
71
+ readonly max_tokens: 4096;
72
+ };
73
+ };
74
+ export declare const llm: (keyof typeof provider2LLMAgent)[];
75
+ export type LLM = keyof typeof provider2LLMAgent;
76
+ export declare const htmlLLMProvider: string[];
@@ -0,0 +1,87 @@
1
+ export const defaultProviders = {
2
+ tts: "openai",
3
+ text2image: "openai",
4
+ text2movie: "google",
5
+ text2Html: "openai",
6
+ llm: "openai",
7
+ };
8
+ export const provider2TTSAgent = {
9
+ nijivoice: {
10
+ agentName: "ttsNijivoiceAgent",
11
+ hasLimitedConcurrency: true,
12
+ },
13
+ openai: {
14
+ agentName: "ttsOpenaiAgent",
15
+ hasLimitedConcurrency: false,
16
+ defaultModel: "gpt-4o-mini-tts",
17
+ defaultVoice: "shimmer",
18
+ },
19
+ google: {
20
+ agentName: "ttsGoogleAgent",
21
+ hasLimitedConcurrency: false,
22
+ },
23
+ elevenlabs: {
24
+ agentName: "ttsElevenlabsAgent",
25
+ hasLimitedConcurrency: true,
26
+ defaultModel: "eleven_multilingual_v2",
27
+ // Models | ElevenLabs Documentation
28
+ // https://elevenlabs.io/docs/models
29
+ models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_turbo_v2", "eleven_flash_v2_5", "eleven_flash_v2"],
30
+ },
31
+ };
32
+ export const provider2ImageAgent = {
33
+ openai: {
34
+ agentName: "imageOpenaiAgent",
35
+ defaultModel: "gpt-image-1",
36
+ models: ["dall-e-3", "gpt-image-1"],
37
+ },
38
+ google: {
39
+ agentName: "imageGoogleAgent",
40
+ defaultModel: "imagen-3.0-fast-generate-001",
41
+ models: ["imagen-3.0-fast-generate-001", "imagen-3.0-generate-002", "imagen-3.0-capability-001"],
42
+ },
43
+ };
44
+ export const provider2MovieAgent = {
45
+ replicate: {
46
+ agentName: "movieReplicateAgent",
47
+ models: [
48
+ "bytedance/seedance-1-lite",
49
+ "bytedance/seedance-1-pro",
50
+ "kwaivgi/kling-v1.6-pro",
51
+ "kwaivgi/kling-v2.1",
52
+ "google/veo-2",
53
+ "google/veo-3",
54
+ "google/veo-3-fast",
55
+ "minimax/video-01",
56
+ ],
57
+ },
58
+ google: {
59
+ agentName: "movieGoogleAgent",
60
+ models: ["veo-2.0-generate-001"],
61
+ },
62
+ };
63
+ // : Record<LLM, { agent: string; defaultModel: string; max_tokens: number }>
64
+ export const provider2LLMAgent = {
65
+ openai: {
66
+ agentName: "openAIAgent",
67
+ defaultModel: "gpt-4o",
68
+ max_tokens: 8192,
69
+ },
70
+ anthropic: {
71
+ agentName: "anthropicAgent",
72
+ defaultModel: "claude-3-7-sonnet-20250219",
73
+ max_tokens: 8192,
74
+ },
75
+ gemini: {
76
+ agentName: "geminiAgent",
77
+ defaultModel: "gemini-1.5-flash",
78
+ max_tokens: 8192,
79
+ },
80
+ groq: {
81
+ agentName: "groqAgent",
82
+ defaultModel: "llama3-8b-8192",
83
+ max_tokens: 4096,
84
+ },
85
+ };
86
+ export const llm = Object.keys(provider2LLMAgent);
87
+ export const htmlLLMProvider = ["openai", "anthropic"];
@@ -1,16 +1,12 @@
1
- import { MulmoBeat, MulmoStudioMultiLingualData } from "../types/index.js";
2
1
  import type { ConfigDataDictionary, DefaultConfigData } from "graphai";
3
- export declare const llm: readonly ["openai", "anthropic", "gemini", "groq"];
4
- export type LLM = (typeof llm)[number];
5
- export declare const llmConfig: Record<LLM, {
6
- agent: string;
7
- defaultModel: string;
8
- max_tokens: number;
9
- }>;
2
+ import { MulmoBeat, MulmoStudioMultiLingualData } from "../types/index.js";
3
+ import { llm } from "./provider2agent.js";
4
+ import type { LLM } from "./provider2agent.js";
5
+ export { LLM, llm };
10
6
  export declare const llmPair: (_llm?: LLM, _model?: string) => {
11
- agent: string;
7
+ agent: "openAIAgent" | "anthropicAgent" | "geminiAgent" | "groqAgent";
12
8
  model: string;
13
- max_tokens: number;
9
+ max_tokens: 8192 | 4096;
14
10
  };
15
11
  export declare const chunkArray: <T>(array: T[], size?: number) => T[][];
16
12
  export declare const isHttp: (fileOrUrl: string) => boolean;
@@ -26,4 +22,3 @@ type CleanableObject = {
26
22
  [key: string]: CleanableValue;
27
23
  };
28
24
  export declare const deepClean: <T extends CleanableValue>(input: T) => T | undefined;
29
- export {};
@@ -1,32 +1,11 @@
1
1
  import * as crypto from "crypto";
2
- export const llm = ["openai", "anthropic", "gemini", "groq"];
3
- export const llmConfig = {
4
- openai: {
5
- agent: "openAIAgent",
6
- defaultModel: "gpt-4o",
7
- max_tokens: 8192,
8
- },
9
- anthropic: {
10
- agent: "anthropicAgent",
11
- defaultModel: "claude-3-7-sonnet-20250219",
12
- max_tokens: 8192,
13
- },
14
- gemini: {
15
- agent: "geminiAgent",
16
- defaultModel: "gemini-1.5-flash",
17
- max_tokens: 8192,
18
- },
19
- groq: {
20
- agent: "groqAgent",
21
- defaultModel: "llama3-8b-8192",
22
- max_tokens: 4096,
23
- },
24
- };
2
+ import { provider2LLMAgent, llm } from "./provider2agent.js";
3
+ export { llm };
25
4
  export const llmPair = (_llm, _model) => {
26
5
  const llmKey = _llm ?? "openai";
27
- const agent = llmConfig[llmKey]?.agent ?? llmConfig.openai.agent;
28
- const model = _model ?? llmConfig[llmKey]?.defaultModel ?? llmConfig.openai.defaultModel;
29
- const max_tokens = llmConfig[llmKey]?.max_tokens ?? llmConfig.openai.max_tokens;
6
+ const agent = provider2LLMAgent[llmKey]?.agentName ?? provider2LLMAgent.openai.agentName;
7
+ const model = _model ?? provider2LLMAgent[llmKey]?.defaultModel ?? provider2LLMAgent.openai.defaultModel;
8
+ const max_tokens = provider2LLMAgent[llmKey]?.max_tokens ?? provider2LLMAgent.openai.max_tokens;
30
9
  return { agent, model, max_tokens };
31
10
  };
32
11
  export const chunkArray = (array, size = 3) => {