mulmocast 2.1.21 → 2.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,22 @@
1
1
  import fs from "fs";
2
2
  import path from "path";
3
3
  import { GraphAILogger } from "graphai";
4
- import OpenAI, { toFile, AuthenticationError, RateLimitError, APIError } from "openai";
4
+ import { toFile, AuthenticationError, RateLimitError, APIError } from "openai";
5
+ import { createOpenAIClient } from "../utils/openai_client.js";
5
6
  import { provider2ImageAgent, gptImages } from "../types/provider2agent.js";
6
7
  import { apiKeyMissingError, agentGenerationError, openAIAgentGenerationError, agentIncorrectAPIKeyError, agentAPIRateLimitError, agentInvalidResponseError, imageAction, imageFileTarget, } from "../utils/error_cause.js";
7
8
  // https://platform.openai.com/docs/guides/image-generation
8
9
  export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
9
10
  const { prompt, referenceImages } = namedInputs;
10
11
  const { moderation, canvasSize, quality } = params;
11
- const { apiKey, baseURL } = { ...config };
12
+ const { apiKey, baseURL, apiVersion } = { ...config };
12
13
  if (!apiKey) {
13
14
  throw new Error("OpenAI API key is required (OPENAI_API_KEY)", {
14
15
  cause: apiKeyMissingError("imageOpenaiAgent", imageAction, "OPENAI_API_KEY"),
15
16
  });
16
17
  }
17
18
  const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
18
- const openai = new OpenAI({ apiKey, baseURL });
19
+ const openai = createOpenAIClient({ apiKey, baseURL, apiVersion });
19
20
  const size = (() => {
20
21
  if (gptImages.includes(model)) {
21
22
  if (canvasSize.width > canvasSize.height) {
@@ -1,17 +1,18 @@
1
1
  import { GraphAILogger } from "graphai";
2
- import OpenAI, { AuthenticationError, RateLimitError } from "openai";
2
+ import { AuthenticationError, RateLimitError } from "openai";
3
3
  import { provider2TTSAgent } from "../types/provider2agent.js";
4
+ import { createOpenAIClient } from "../utils/openai_client.js";
4
5
  import { apiKeyMissingError, agentIncorrectAPIKeyError, agentAPIRateLimitError, agentGenerationError, audioAction, audioFileTarget, } from "../utils/error_cause.js";
5
6
  export const ttsOpenaiAgent = async ({ namedInputs, params, config, }) => {
6
7
  const { text } = namedInputs;
7
8
  const { model, voice, suppressError, instructions, speed } = params;
8
- const { apiKey, baseURL } = config ?? {};
9
+ const { apiKey, baseURL, apiVersion } = config ?? {};
9
10
  if (!apiKey) {
10
11
  throw new Error("OpenAI API key is required (OPENAI_API_KEY)", {
11
12
  cause: apiKeyMissingError("ttsOpenaiAgent", audioAction, "OPENAI_API_KEY"),
12
13
  });
13
14
  }
14
- const openai = new OpenAI({ apiKey, baseURL });
15
+ const openai = createOpenAIClient({ apiKey, baseURL, apiVersion });
15
16
  try {
16
17
  const tts_options = {
17
18
  model: model ?? provider2TTSAgent.openai.defaultModel,
@@ -58,6 +58,7 @@ export type ReplicateImageAgentParams = {
58
58
  export type OpenAIImageAgentConfig = {
59
59
  baseURL?: string;
60
60
  apiKey?: string;
61
+ apiVersion?: string;
61
62
  };
62
63
  export type GoogleImageAgentConfig = {
63
64
  projectId?: string;
@@ -139,6 +139,7 @@ export declare const provider2LLMAgent: {
139
139
  readonly defaultModel: "gpt-5";
140
140
  readonly keyName: "OPENAI_API_KEY";
141
141
  readonly baseURLKeyName: "OPENAI_BASE_URL";
142
+ readonly apiVersionKeyName: "OPENAI_API_VERSION";
142
143
  readonly max_tokens: 8192;
143
144
  readonly models: readonly ["gpt-5", "gpt-5-nano", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3", "o3-mini", "o3-pro", "o1", "o1-pro", "gpt-4o", "gpt-4o-mini"];
144
145
  };
@@ -252,6 +252,7 @@ export const provider2LLMAgent = {
252
252
  defaultModel: "gpt-5",
253
253
  keyName: "OPENAI_API_KEY",
254
254
  baseURLKeyName: "OPENAI_BASE_URL",
255
+ apiVersionKeyName: "OPENAI_API_VERSION",
255
256
  max_tokens: 8192,
256
257
  models: [
257
258
  "gpt-5",
@@ -43,6 +43,8 @@ export declare const speakerDataSchema: z.ZodObject<{
43
43
  [x: string]: string;
44
44
  }>>>;
45
45
  model: z.ZodOptional<z.ZodString>;
46
+ baseURL: z.ZodOptional<z.ZodString>;
47
+ apiVersion: z.ZodOptional<z.ZodString>;
46
48
  }, z.core.$strict>;
47
49
  export declare const speakerSchema: z.ZodObject<{
48
50
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
@@ -59,6 +61,8 @@ export declare const speakerSchema: z.ZodObject<{
59
61
  [x: string]: string;
60
62
  }>>>;
61
63
  model: z.ZodOptional<z.ZodString>;
64
+ baseURL: z.ZodOptional<z.ZodString>;
65
+ apiVersion: z.ZodOptional<z.ZodString>;
62
66
  lang: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
63
67
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
64
68
  voiceId: z.ZodString;
@@ -74,6 +78,8 @@ export declare const speakerSchema: z.ZodObject<{
74
78
  [x: string]: string;
75
79
  }>>>;
76
80
  model: z.ZodOptional<z.ZodString>;
81
+ baseURL: z.ZodOptional<z.ZodString>;
82
+ apiVersion: z.ZodOptional<z.ZodString>;
77
83
  }, z.core.$strict>>>;
78
84
  }, z.core.$strict>;
79
85
  export declare const speakerDictionarySchema: z.ZodRecord<z.ZodString, z.ZodObject<{
@@ -91,6 +97,8 @@ export declare const speakerDictionarySchema: z.ZodRecord<z.ZodString, z.ZodObje
91
97
  [x: string]: string;
92
98
  }>>>;
93
99
  model: z.ZodOptional<z.ZodString>;
100
+ baseURL: z.ZodOptional<z.ZodString>;
101
+ apiVersion: z.ZodOptional<z.ZodString>;
94
102
  lang: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
95
103
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
96
104
  voiceId: z.ZodString;
@@ -106,6 +114,8 @@ export declare const speakerDictionarySchema: z.ZodRecord<z.ZodString, z.ZodObje
106
114
  [x: string]: string;
107
115
  }>>>;
108
116
  model: z.ZodOptional<z.ZodString>;
117
+ baseURL: z.ZodOptional<z.ZodString>;
118
+ apiVersion: z.ZodOptional<z.ZodString>;
109
119
  }, z.core.$strict>>>;
110
120
  }, z.core.$strict>>;
111
121
  export declare const mulmoSpeechParamsSchema: z.ZodDefault<z.ZodObject<{
@@ -124,6 +134,8 @@ export declare const mulmoSpeechParamsSchema: z.ZodDefault<z.ZodObject<{
124
134
  [x: string]: string;
125
135
  }>>>;
126
136
  model: z.ZodOptional<z.ZodString>;
137
+ baseURL: z.ZodOptional<z.ZodString>;
138
+ apiVersion: z.ZodOptional<z.ZodString>;
127
139
  lang: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
128
140
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
129
141
  voiceId: z.ZodString;
@@ -139,6 +151,8 @@ export declare const mulmoSpeechParamsSchema: z.ZodDefault<z.ZodObject<{
139
151
  [x: string]: string;
140
152
  }>>>;
141
153
  model: z.ZodOptional<z.ZodString>;
154
+ baseURL: z.ZodOptional<z.ZodString>;
155
+ apiVersion: z.ZodOptional<z.ZodString>;
142
156
  }, z.core.$strict>>>;
143
157
  }, z.core.$strict>>;
144
158
  }, z.core.$strip>>;
@@ -437,6 +451,8 @@ export declare const mulmoBeatImageParamsSchema: z.ZodObject<{
437
451
  quality: z.ZodOptional<z.ZodString>;
438
452
  style: z.ZodOptional<z.ZodString>;
439
453
  moderation: z.ZodOptional<z.ZodString>;
454
+ baseURL: z.ZodOptional<z.ZodString>;
455
+ apiVersion: z.ZodOptional<z.ZodString>;
440
456
  }, z.core.$strict>;
441
457
  export declare const mulmoImageParamsSchema: z.ZodObject<{
442
458
  provider: z.ZodOptional<z.ZodEnum<{
@@ -446,6 +462,8 @@ export declare const mulmoImageParamsSchema: z.ZodObject<{
446
462
  quality: z.ZodOptional<z.ZodString>;
447
463
  style: z.ZodOptional<z.ZodString>;
448
464
  moderation: z.ZodOptional<z.ZodString>;
465
+ baseURL: z.ZodOptional<z.ZodString>;
466
+ apiVersion: z.ZodOptional<z.ZodString>;
449
467
  images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
450
468
  type: z.ZodLiteral<"image">;
451
469
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -891,6 +909,8 @@ export declare const mulmoBeatSchema: z.ZodObject<{
891
909
  quality: z.ZodOptional<z.ZodString>;
892
910
  style: z.ZodOptional<z.ZodString>;
893
911
  moderation: z.ZodOptional<z.ZodString>;
912
+ baseURL: z.ZodOptional<z.ZodString>;
913
+ apiVersion: z.ZodOptional<z.ZodString>;
894
914
  }, z.core.$strict>>;
895
915
  audioParams: z.ZodOptional<z.ZodObject<{
896
916
  padding: z.ZodOptional<z.ZodNumber>;
@@ -1223,6 +1243,8 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
1223
1243
  [x: string]: string;
1224
1244
  }>>>;
1225
1245
  model: z.ZodOptional<z.ZodString>;
1246
+ baseURL: z.ZodOptional<z.ZodString>;
1247
+ apiVersion: z.ZodOptional<z.ZodString>;
1226
1248
  lang: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
1227
1249
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
1228
1250
  voiceId: z.ZodString;
@@ -1238,6 +1260,8 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
1238
1260
  [x: string]: string;
1239
1261
  }>>>;
1240
1262
  model: z.ZodOptional<z.ZodString>;
1263
+ baseURL: z.ZodOptional<z.ZodString>;
1264
+ apiVersion: z.ZodOptional<z.ZodString>;
1241
1265
  }, z.core.$strict>>>;
1242
1266
  }, z.core.$strict>>;
1243
1267
  }, z.core.$strip>>;
@@ -1249,6 +1273,8 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
1249
1273
  quality: z.ZodOptional<z.ZodString>;
1250
1274
  style: z.ZodOptional<z.ZodString>;
1251
1275
  moderation: z.ZodOptional<z.ZodString>;
1276
+ baseURL: z.ZodOptional<z.ZodString>;
1277
+ apiVersion: z.ZodOptional<z.ZodString>;
1252
1278
  images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
1253
1279
  type: z.ZodLiteral<"image">;
1254
1280
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -1584,6 +1610,8 @@ export declare const mulmoScriptSchema: z.ZodObject<{
1584
1610
  [x: string]: string;
1585
1611
  }>>>;
1586
1612
  model: z.ZodOptional<z.ZodString>;
1613
+ baseURL: z.ZodOptional<z.ZodString>;
1614
+ apiVersion: z.ZodOptional<z.ZodString>;
1587
1615
  lang: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
1588
1616
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
1589
1617
  voiceId: z.ZodString;
@@ -1599,6 +1627,8 @@ export declare const mulmoScriptSchema: z.ZodObject<{
1599
1627
  [x: string]: string;
1600
1628
  }>>>;
1601
1629
  model: z.ZodOptional<z.ZodString>;
1630
+ baseURL: z.ZodOptional<z.ZodString>;
1631
+ apiVersion: z.ZodOptional<z.ZodString>;
1602
1632
  }, z.core.$strict>>>;
1603
1633
  }, z.core.$strict>>;
1604
1634
  }, z.core.$strip>>;
@@ -1610,6 +1640,8 @@ export declare const mulmoScriptSchema: z.ZodObject<{
1610
1640
  quality: z.ZodOptional<z.ZodString>;
1611
1641
  style: z.ZodOptional<z.ZodString>;
1612
1642
  moderation: z.ZodOptional<z.ZodString>;
1643
+ baseURL: z.ZodOptional<z.ZodString>;
1644
+ apiVersion: z.ZodOptional<z.ZodString>;
1613
1645
  images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
1614
1646
  type: z.ZodLiteral<"image">;
1615
1647
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -2049,6 +2081,8 @@ export declare const mulmoScriptSchema: z.ZodObject<{
2049
2081
  quality: z.ZodOptional<z.ZodString>;
2050
2082
  style: z.ZodOptional<z.ZodString>;
2051
2083
  moderation: z.ZodOptional<z.ZodString>;
2084
+ baseURL: z.ZodOptional<z.ZodString>;
2085
+ apiVersion: z.ZodOptional<z.ZodString>;
2052
2086
  }, z.core.$strict>>;
2053
2087
  audioParams: z.ZodOptional<z.ZodObject<{
2054
2088
  padding: z.ZodOptional<z.ZodNumber>;
@@ -2456,6 +2490,8 @@ export declare const mulmoStudioSchema: z.ZodObject<{
2456
2490
  [x: string]: string;
2457
2491
  }>>>;
2458
2492
  model: z.ZodOptional<z.ZodString>;
2493
+ baseURL: z.ZodOptional<z.ZodString>;
2494
+ apiVersion: z.ZodOptional<z.ZodString>;
2459
2495
  lang: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
2460
2496
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
2461
2497
  voiceId: z.ZodString;
@@ -2471,6 +2507,8 @@ export declare const mulmoStudioSchema: z.ZodObject<{
2471
2507
  [x: string]: string;
2472
2508
  }>>>;
2473
2509
  model: z.ZodOptional<z.ZodString>;
2510
+ baseURL: z.ZodOptional<z.ZodString>;
2511
+ apiVersion: z.ZodOptional<z.ZodString>;
2474
2512
  }, z.core.$strict>>>;
2475
2513
  }, z.core.$strict>>;
2476
2514
  }, z.core.$strip>>;
@@ -2482,6 +2520,8 @@ export declare const mulmoStudioSchema: z.ZodObject<{
2482
2520
  quality: z.ZodOptional<z.ZodString>;
2483
2521
  style: z.ZodOptional<z.ZodString>;
2484
2522
  moderation: z.ZodOptional<z.ZodString>;
2523
+ baseURL: z.ZodOptional<z.ZodString>;
2524
+ apiVersion: z.ZodOptional<z.ZodString>;
2485
2525
  images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
2486
2526
  type: z.ZodLiteral<"image">;
2487
2527
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -2921,6 +2961,8 @@ export declare const mulmoStudioSchema: z.ZodObject<{
2921
2961
  quality: z.ZodOptional<z.ZodString>;
2922
2962
  style: z.ZodOptional<z.ZodString>;
2923
2963
  moderation: z.ZodOptional<z.ZodString>;
2964
+ baseURL: z.ZodOptional<z.ZodString>;
2965
+ apiVersion: z.ZodOptional<z.ZodString>;
2924
2966
  }, z.core.$strict>>;
2925
2967
  audioParams: z.ZodOptional<z.ZodObject<{
2926
2968
  padding: z.ZodOptional<z.ZodNumber>;
@@ -3264,6 +3306,8 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
3264
3306
  [x: string]: string;
3265
3307
  }>>>;
3266
3308
  model: z.ZodOptional<z.ZodString>;
3309
+ baseURL: z.ZodOptional<z.ZodString>;
3310
+ apiVersion: z.ZodOptional<z.ZodString>;
3267
3311
  lang: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
3268
3312
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
3269
3313
  voiceId: z.ZodString;
@@ -3279,6 +3323,8 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
3279
3323
  [x: string]: string;
3280
3324
  }>>>;
3281
3325
  model: z.ZodOptional<z.ZodString>;
3326
+ baseURL: z.ZodOptional<z.ZodString>;
3327
+ apiVersion: z.ZodOptional<z.ZodString>;
3282
3328
  }, z.core.$strict>>>;
3283
3329
  }, z.core.$strict>>;
3284
3330
  }, z.core.$strip>>;
@@ -3290,6 +3336,8 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
3290
3336
  quality: z.ZodOptional<z.ZodString>;
3291
3337
  style: z.ZodOptional<z.ZodString>;
3292
3338
  moderation: z.ZodOptional<z.ZodString>;
3339
+ baseURL: z.ZodOptional<z.ZodString>;
3340
+ apiVersion: z.ZodOptional<z.ZodString>;
3293
3341
  images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
3294
3342
  type: z.ZodLiteral<"image">;
3295
3343
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -3619,6 +3667,8 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
3619
3667
  [x: string]: string;
3620
3668
  }>>>;
3621
3669
  model: z.ZodOptional<z.ZodString>;
3670
+ baseURL: z.ZodOptional<z.ZodString>;
3671
+ apiVersion: z.ZodOptional<z.ZodString>;
3622
3672
  lang: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
3623
3673
  displayName: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
3624
3674
  voiceId: z.ZodString;
@@ -3634,6 +3684,8 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
3634
3684
  [x: string]: string;
3635
3685
  }>>>;
3636
3686
  model: z.ZodOptional<z.ZodString>;
3687
+ baseURL: z.ZodOptional<z.ZodString>;
3688
+ apiVersion: z.ZodOptional<z.ZodString>;
3637
3689
  }, z.core.$strict>>>;
3638
3690
  }, z.core.$strict>>;
3639
3691
  }, z.core.$strip>>;
@@ -3645,6 +3697,8 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
3645
3697
  quality: z.ZodOptional<z.ZodString>;
3646
3698
  style: z.ZodOptional<z.ZodString>;
3647
3699
  moderation: z.ZodOptional<z.ZodString>;
3700
+ baseURL: z.ZodOptional<z.ZodString>;
3701
+ apiVersion: z.ZodOptional<z.ZodString>;
3648
3702
  images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnion<readonly [z.ZodObject<{
3649
3703
  type: z.ZodLiteral<"image">;
3650
3704
  source: z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -39,6 +39,8 @@ export const speakerDataSchema = z
39
39
  speechOptions: speechOptionsSchema.optional(),
40
40
  provider: text2SpeechProviderSchema.optional(),
41
41
  model: z.string().optional().describe("TTS model to use for this speaker"),
42
+ baseURL: z.string().optional(), // Azure/custom endpoint URL
43
+ apiVersion: z.string().optional(), // Azure API version (e.g., "2025-04-01-preview")
42
44
  })
43
45
  .strict();
44
46
  export const speakerSchema = speakerDataSchema.extend({
@@ -240,6 +242,8 @@ export const mulmoBeatImageParamsSchema = z
240
242
  quality: z.string().optional(), // optional image quality (model specific)
241
243
  style: z.string().optional(), // optional image style
242
244
  moderation: z.string().optional(), // optional image style
245
+ baseURL: z.string().optional(), // Azure/custom endpoint URL
246
+ apiVersion: z.string().optional(), // Azure API version (e.g., "2025-04-01-preview")
243
247
  })
244
248
  .strict();
245
249
  export const mulmoImageParamsSchema = mulmoBeatImageParamsSchema
@@ -24,6 +24,8 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
24
24
  } | undefined;
25
25
  provider?: string | undefined;
26
26
  model?: string | undefined;
27
+ baseURL?: string | undefined;
28
+ apiVersion?: string | undefined;
27
29
  lang?: Record<string, {
28
30
  voiceId: string;
29
31
  displayName?: Record<string, string> | undefined;
@@ -37,6 +39,8 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
37
39
  } | undefined;
38
40
  provider?: string | undefined;
39
41
  model?: string | undefined;
42
+ baseURL?: string | undefined;
43
+ apiVersion?: string | undefined;
40
44
  }> | undefined;
41
45
  }>;
42
46
  };
@@ -46,6 +50,8 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
46
50
  quality?: string | undefined;
47
51
  style?: string | undefined;
48
52
  moderation?: string | undefined;
53
+ baseURL?: string | undefined;
54
+ apiVersion?: string | undefined;
49
55
  images?: Record<string, {
50
56
  type: "image";
51
57
  source: {
@@ -369,6 +375,8 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
369
375
  quality?: string | undefined;
370
376
  style?: string | undefined;
371
377
  moderation?: string | undefined;
378
+ baseURL?: string | undefined;
379
+ apiVersion?: string | undefined;
372
380
  } | undefined;
373
381
  audioParams?: {
374
382
  movieVolume: number;
@@ -664,6 +672,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
664
672
  } | undefined;
665
673
  provider?: string | undefined;
666
674
  model?: string | undefined;
675
+ baseURL?: string | undefined;
676
+ apiVersion?: string | undefined;
667
677
  lang?: Record<string, {
668
678
  voiceId: string;
669
679
  displayName?: Record<string, string> | undefined;
@@ -677,6 +687,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
677
687
  } | undefined;
678
688
  provider?: string | undefined;
679
689
  model?: string | undefined;
690
+ baseURL?: string | undefined;
691
+ apiVersion?: string | undefined;
680
692
  }> | undefined;
681
693
  }>;
682
694
  };
@@ -686,6 +698,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
686
698
  quality?: string | undefined;
687
699
  style?: string | undefined;
688
700
  moderation?: string | undefined;
701
+ baseURL?: string | undefined;
702
+ apiVersion?: string | undefined;
689
703
  images?: Record<string, {
690
704
  type: "image";
691
705
  source: {
@@ -1009,6 +1023,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
1009
1023
  quality?: string | undefined;
1010
1024
  style?: string | undefined;
1011
1025
  moderation?: string | undefined;
1026
+ baseURL?: string | undefined;
1027
+ apiVersion?: string | undefined;
1012
1028
  } | undefined;
1013
1029
  audioParams?: {
1014
1030
  movieVolume: number;
@@ -1311,6 +1327,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
1311
1327
  } | undefined;
1312
1328
  provider?: string | undefined;
1313
1329
  model?: string | undefined;
1330
+ baseURL?: string | undefined;
1331
+ apiVersion?: string | undefined;
1314
1332
  lang?: Record<string, {
1315
1333
  voiceId: string;
1316
1334
  displayName?: Record<string, string> | undefined;
@@ -1324,6 +1342,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
1324
1342
  } | undefined;
1325
1343
  provider?: string | undefined;
1326
1344
  model?: string | undefined;
1345
+ baseURL?: string | undefined;
1346
+ apiVersion?: string | undefined;
1327
1347
  }> | undefined;
1328
1348
  }>;
1329
1349
  };
@@ -1333,6 +1353,8 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
1333
1353
  quality?: string | undefined;
1334
1354
  style?: string | undefined;
1335
1355
  moderation?: string | undefined;
1356
+ baseURL?: string | undefined;
1357
+ apiVersion?: string | undefined;
1336
1358
  images?: Record<string, {
1337
1359
  type: "image";
1338
1360
  source: {
@@ -0,0 +1,17 @@
1
+ import OpenAI from "openai";
2
+ export interface OpenAIClientOptions {
3
+ apiKey?: string;
4
+ baseURL?: string;
5
+ apiVersion?: string;
6
+ }
7
+ /**
8
+ * Detects if the given URL is an Azure OpenAI endpoint
9
+ * Safely parses the URL and checks if the hostname ends with ".openai.azure.com"
10
+ */
11
+ export declare const isAzureEndpoint: (baseURL: string | undefined) => boolean;
12
+ /**
13
+ * Creates an OpenAI or AzureOpenAI client based on the baseURL
14
+ * - If baseURL contains ".openai.azure.com", returns AzureOpenAI client
15
+ * - Otherwise, returns standard OpenAI client
16
+ */
17
+ export declare const createOpenAIClient: (options: OpenAIClientOptions) => OpenAI;
@@ -0,0 +1,35 @@
1
+ import OpenAI, { AzureOpenAI } from "openai";
2
+ /**
3
+ * Detects if the given URL is an Azure OpenAI endpoint
4
+ * Safely parses the URL and checks if the hostname ends with ".openai.azure.com"
5
+ */
6
+ export const isAzureEndpoint = (baseURL) => {
7
+ if (!baseURL)
8
+ return false;
9
+ try {
10
+ const url = new URL(baseURL);
11
+ return url.hostname.endsWith(".openai.azure.com");
12
+ }
13
+ catch {
14
+ return false;
15
+ }
16
+ };
17
+ /**
18
+ * Creates an OpenAI or AzureOpenAI client based on the baseURL
19
+ * - If baseURL contains ".openai.azure.com", returns AzureOpenAI client
20
+ * - Otherwise, returns standard OpenAI client
21
+ */
22
+ export const createOpenAIClient = (options) => {
23
+ const { apiKey, baseURL, apiVersion } = options;
24
+ if (isAzureEndpoint(baseURL)) {
25
+ return new AzureOpenAI({
26
+ apiKey,
27
+ endpoint: baseURL,
28
+ apiVersion: apiVersion ?? "2025-04-01-preview",
29
+ });
30
+ }
31
+ return new OpenAI({
32
+ apiKey,
33
+ baseURL,
34
+ });
35
+ };
@@ -53,6 +53,9 @@ export const settings2GraphAIConfig = (settings, env) => {
53
53
  if (info.baseURLKeyName) {
54
54
  config[info.agentName].baseURL = getKey(prefix, info.baseURLKeyName);
55
55
  }
56
+ if (info.apiVersionKeyName) {
57
+ config[info.agentName].apiVersion = getKey(prefix, info.apiVersionKeyName);
58
+ }
56
59
  });
57
60
  };
58
61
  const config = {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.1.21",
3
+ "version": "2.1.23",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -83,7 +83,7 @@
83
83
  "@graphai/gemini_agent": "^2.0.4",
84
84
  "@graphai/groq_agent": "^2.0.2",
85
85
  "@graphai/input_agents": "^1.0.2",
86
- "@graphai/openai_agent": "^2.0.8",
86
+ "@graphai/openai_agent": "^2.0.9",
87
87
  "@graphai/stream_agent_filter": "^2.0.2",
88
88
  "@graphai/vanilla": "^2.0.12",
89
89
  "@graphai/vanilla_node_agents": "^2.0.4",
@@ -101,7 +101,7 @@
101
101
  "marked": "^17.0.1",
102
102
  "mulmocast-vision": "^1.0.8",
103
103
  "ora": "^9.1.0",
104
- "puppeteer": "^24.36.0",
104
+ "puppeteer": "^24.36.1",
105
105
  "replicate": "^1.4.0",
106
106
  "yaml": "^2.8.2",
107
107
  "yargs": "^18.0.0",
@@ -116,11 +116,11 @@
116
116
  "eslint": "^9.39.2",
117
117
  "eslint-config-prettier": "^10.1.8",
118
118
  "eslint-plugin-prettier": "^5.5.5",
119
- "eslint-plugin-sonarjs": "^3.0.5",
119
+ "eslint-plugin-sonarjs": "^3.0.6",
120
120
  "prettier": "^3.8.1",
121
121
  "tsx": "^4.21.0",
122
122
  "typescript": "^5.9.3",
123
- "typescript-eslint": "^8.53.1"
123
+ "typescript-eslint": "^8.54.0"
124
124
  },
125
125
  "engines": {
126
126
  "node": ">=20.0.0"
@@ -0,0 +1,24 @@
1
+ {
2
+ "$mulmocast": {
3
+ "version": "1.1"
4
+ },
5
+ "lang": "en",
6
+ "title": "Test Audio with Azure OpenAI TTS",
7
+ "speechParams": {
8
+ "speakers": {
9
+ "Presenter": {
10
+ "provider": "openai",
11
+ "voiceId": "alloy",
12
+ "model": "tts"
13
+ }
14
+ }
15
+ },
16
+ "beats": [
17
+ {
18
+ "text": "Hello, this is a test of Azure OpenAI text to speech."
19
+ },
20
+ {
21
+ "text": "MulmoCast now supports Azure OpenAI for both image generation and speech synthesis."
22
+ }
23
+ ]
24
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "$mulmocast": {
3
+ "version": "1.1"
4
+ },
5
+ "lang": "en",
6
+ "title": "Test Images with Azure OpenAI",
7
+ "imageParams": {
8
+ "provider": "openai",
9
+ "model": "gpt-image-1.5"
10
+ },
11
+ "audioParams": {
12
+ "suppressSpeech": true
13
+ },
14
+ "beats": [
15
+ {
16
+ "text": "",
17
+ "imagePrompt": "A beautiful sunset over mountains, photorealistic style",
18
+ "duration": 3
19
+ },
20
+ {
21
+ "text": "",
22
+ "imagePrompt": "A cute robot waving hello, digital art style",
23
+ "duration": 3
24
+ }
25
+ ]
26
+ }
@@ -1,5 +0,0 @@
1
- import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
- import type { NijivoiceTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult, AgentConfig } from "../types/agent.js";
3
- export declare const ttsNijivoiceAgent: AgentFunction<NijivoiceTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs, AgentConfig>;
4
- declare const ttsNijivoiceAgentInfo: AgentFunctionInfo;
5
- export default ttsNijivoiceAgentInfo;
@@ -1,76 +0,0 @@
1
- import { GraphAILogger } from "graphai";
2
- import { apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
3
- /*
4
- const errorMessage = [
5
- "TTS NijiVoice: No API key. ",
6
- "You have the following options:",
7
- "1. Obtain an API key from Niji Voice (https://platform.nijivoice.com/) and set it as the NIJIVOICE_API_KEY environment variable.",
8
- '2. Use OpenAI\'s TTS instead of Niji Voice by changing speechParams.provider from "nijivoice" to "openai".',
9
- ].join("\n");
10
- */
11
- export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
12
- const { suppressError, voice, speed, speed_global } = params;
13
- const { apiKey } = config ?? {};
14
- const { text } = namedInputs;
15
- if (!apiKey) {
16
- throw new Error("NijiVoice API key is required (NIJIVOICE_API_KEY)", {
17
- cause: apiKeyMissingError("ttsNijivoiceAgent", audioAction, "NIJIVOICE_API_KEY"),
18
- });
19
- }
20
- const url = `https://api.nijivoice.com/api/platform/v1/voice-actors/${voice}/generate-voice`;
21
- const options = {
22
- method: "POST",
23
- headers: {
24
- "x-api-key": apiKey,
25
- accept: "application/json",
26
- "content-type": "application/json",
27
- },
28
- body: JSON.stringify({
29
- format: "mp3",
30
- speed: String(speed ?? speed_global ?? "1.0"),
31
- script: text,
32
- }),
33
- };
34
- try {
35
- const voiceRes = await fetch(url, options);
36
- const voiceJson = await voiceRes.json();
37
- if (voiceJson?.generatedVoice?.audioFileDownloadUrl) {
38
- const audioRes = await fetch(voiceJson.generatedVoice.audioFileDownloadUrl);
39
- const buffer = Buffer.from(await audioRes.arrayBuffer());
40
- return { buffer };
41
- }
42
- if (suppressError) {
43
- return {
44
- error: voiceJson,
45
- };
46
- }
47
- GraphAILogger.info(voiceJson);
48
- throw new Error("TTS Nijivoice Error", {
49
- cause: agentGenerationError("ttsNijivoiceAgent", audioAction, audioFileTarget),
50
- });
51
- }
52
- catch (e) {
53
- if (suppressError) {
54
- return {
55
- error: e,
56
- };
57
- }
58
- GraphAILogger.info(e);
59
- throw new Error("TTS Nijivoice Error", {
60
- cause: agentGenerationError("ttsNijivoiceAgent", audioAction, audioFileTarget),
61
- });
62
- }
63
- };
64
- const ttsNijivoiceAgentInfo = {
65
- name: "ttsNijivoiceAgent",
66
- agent: ttsNijivoiceAgent,
67
- mock: ttsNijivoiceAgent,
68
- samples: [],
69
- description: "TTS nijivoice agent",
70
- category: ["tts"],
71
- author: "Receptron Team",
72
- repository: "https://github.com/receptron/mulmocast-cli/",
73
- license: "MIT",
74
- environmentVariables: ["NIJIVOICE_API_KEY"],
75
- };
76
- export default ttsNijivoiceAgentInfo;
@@ -1 +0,0 @@
1
- export {};
@@ -1 +0,0 @@
1
- export {};
@@ -1,15 +0,0 @@
1
- export declare const currentMulmoScriptVersion = "1.1";
2
- export declare const outDirName = "output";
3
- export declare const audioDirName = "audio";
4
- export declare const imageDirName = "images";
5
- export declare const cacheDirName = "cache";
6
- export declare const pdf_modes: string[];
7
- export declare const pdf_sizes: string[];
8
- export declare const languages: string[];
9
- export declare const storyToScriptGenerateMode: {
10
- stepWise: string;
11
- oneStep: string;
12
- };
13
- export declare const bundleTargetLang: string[];
14
- export declare const ASPECT_RATIOS: string[];
15
- export declare const PRO_ASPECT_RATIOS: string[];
@@ -1,15 +0,0 @@
1
- export const currentMulmoScriptVersion = "1.1";
2
- export const outDirName = "output";
3
- export const audioDirName = "audio";
4
- export const imageDirName = "images";
5
- export const cacheDirName = "cache";
6
- export const pdf_modes = ["slide", "talk", "handout"];
7
- export const pdf_sizes = ["letter", "a4"];
8
- export const languages = ["en", "ja", "fr", "es", "de", "zh-CN", "zh-TW", "ko", "it", "pt", "ar", "hi"];
9
- export const storyToScriptGenerateMode = {
10
- stepWise: "step_wise",
11
- oneStep: "one_step",
12
- };
13
- export const bundleTargetLang = ["ja", "en"];
14
- export const ASPECT_RATIOS = ["1:1", "9:16", "16:9"];
15
- export const PRO_ASPECT_RATIOS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"];
@@ -1,191 +0,0 @@
1
- export declare const provider2TTSAgent: {
2
- nijivoice: {
3
- agentName: string;
4
- hasLimitedConcurrency: boolean;
5
- keyName: string;
6
- };
7
- openai: {
8
- agentName: string;
9
- hasLimitedConcurrency: boolean;
10
- defaultModel: string;
11
- defaultVoice: string;
12
- keyName: string;
13
- baseURLKeyName: string;
14
- };
15
- google: {
16
- agentName: string;
17
- hasLimitedConcurrency: boolean;
18
- keyName: string;
19
- };
20
- gemini: {
21
- agentName: string;
22
- hasLimitedConcurrency: boolean;
23
- defaultModel: string;
24
- defaultVoice: string;
25
- models: string[];
26
- keyName: string;
27
- };
28
- elevenlabs: {
29
- agentName: string;
30
- hasLimitedConcurrency: boolean;
31
- defaultModel: string;
32
- models: string[];
33
- keyName: string;
34
- };
35
- kotodama: {
36
- agentName: string;
37
- hasLimitedConcurrency: boolean;
38
- defaultVoice: string;
39
- defaultDecoration: string;
40
- keyName: string;
41
- };
42
- mock: {
43
- agentName: string;
44
- hasLimitedConcurrency: boolean;
45
- defaultModel: string;
46
- models: string[];
47
- };
48
- };
49
- export declare const gptImages: string[];
50
- export declare const provider2ImageAgent: {
51
- openai: {
52
- agentName: string;
53
- defaultModel: string;
54
- models: string[];
55
- keyName: string;
56
- baseURLKeyName: string;
57
- };
58
- google: {
59
- agentName: string;
60
- defaultModel: string;
61
- models: string[];
62
- keyName: string;
63
- };
64
- replicate: {
65
- agentName: string;
66
- defaultModel: string;
67
- models: string[];
68
- keyName: string;
69
- };
70
- mock: {
71
- agentName: string;
72
- defaultModel: string;
73
- models: string[];
74
- keyName: string;
75
- };
76
- };
77
- export type ReplicateModel = `${string}/${string}`;
78
- export declare const provider2MovieAgent: {
79
- replicate: {
80
- agentName: string;
81
- defaultModel: ReplicateModel;
82
- keyName: string;
83
- models: string[];
84
- modelParams: Record<ReplicateModel, {
85
- durations: number[];
86
- start_image: string | undefined;
87
- last_image?: string;
88
- price_per_sec: number;
89
- }>;
90
- };
91
- google: {
92
- agentName: string;
93
- defaultModel: string;
94
- models: string[];
95
- keyName: string;
96
- modelParams: {
97
- "veo-3.1-generate-preview": {
98
- durations: number[];
99
- };
100
- "veo-3.0-generate-001": {
101
- durations: number[];
102
- };
103
- "veo-2.0-generate-001": {
104
- durations: number[];
105
- };
106
- };
107
- };
108
- mock: {
109
- agentName: string;
110
- defaultModel: string;
111
- models: string[];
112
- keyName: string;
113
- modelParams: {};
114
- };
115
- };
116
- export declare const provider2SoundEffectAgent: {
117
- replicate: {
118
- agentName: string;
119
- defaultModel: ReplicateModel;
120
- keyName: string;
121
- models: ReplicateModel[];
122
- modelParams: Record<ReplicateModel, {
123
- identifier?: `${string}/${string}:${string}`;
124
- }>;
125
- };
126
- };
127
- export declare const provider2LipSyncAgent: {
128
- replicate: {
129
- agentName: string;
130
- defaultModel: ReplicateModel;
131
- keyName: string;
132
- models: ReplicateModel[];
133
- modelParams: Record<ReplicateModel, {
134
- identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
135
- video?: string;
136
- audio: string;
137
- image?: string;
138
- }>;
139
- };
140
- };
141
- export declare const provider2LLMAgent: {
142
- readonly openai: {
143
- readonly agentName: "openAIAgent";
144
- readonly defaultModel: "gpt-5";
145
- readonly keyName: "OPENAI_API_KEY";
146
- readonly baseURLKeyName: "OPENAI_BASE_URL";
147
- readonly max_tokens: 8192;
148
- readonly models: readonly ["gpt-5", "gpt-5-nano", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3", "o3-mini", "o3-pro", "o1", "o1-pro", "gpt-4o", "gpt-4o-mini"];
149
- };
150
- readonly anthropic: {
151
- readonly agentName: "anthropicAgent";
152
- readonly defaultModel: "claude-3-7-sonnet-20250219";
153
- readonly max_tokens: 8192;
154
- readonly models: readonly ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"];
155
- readonly keyName: "ANTHROPIC_API_KEY";
156
- readonly apiKeyNameOverride: "ANTHROPIC_API_TOKEN";
157
- };
158
- readonly gemini: {
159
- readonly agentName: "geminiAgent";
160
- readonly defaultModel: "gemini-2.5-flash";
161
- readonly max_tokens: 8192;
162
- readonly models: readonly ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"];
163
- readonly keyName: "GEMINI_API_KEY";
164
- };
165
- readonly groq: {
166
- readonly agentName: "groqAgent";
167
- readonly defaultModel: "llama-3.1-8b-instant";
168
- readonly keyName: "GROQ_API_KEY";
169
- readonly max_tokens: 4096;
170
- readonly models: readonly ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"];
171
- };
172
- readonly mock: {
173
- readonly agentName: "mediaMockAgent";
174
- readonly defaultModel: "mock";
175
- readonly max_tokens: 4096;
176
- readonly models: readonly ["mock"];
177
- };
178
- };
179
- export declare const defaultProviders: {
180
- tts: keyof typeof provider2TTSAgent;
181
- text2image: keyof typeof provider2ImageAgent;
182
- text2movie: keyof typeof provider2MovieAgent;
183
- text2Html: keyof typeof provider2LLMAgent;
184
- llm: keyof typeof provider2LLMAgent;
185
- soundEffect: keyof typeof provider2SoundEffectAgent;
186
- lipSync: keyof typeof provider2LipSyncAgent;
187
- };
188
- export declare const llm: (keyof typeof provider2LLMAgent)[];
189
- export type LLM = keyof typeof provider2LLMAgent;
190
- export declare const htmlLLMProvider: string[];
191
- export declare const getModelDuration: (provider: keyof typeof provider2MovieAgent, model: string, movieDuration?: number) => number | undefined;
@@ -1,326 +0,0 @@
1
- // node & browser
2
- export const provider2TTSAgent = {
3
- nijivoice: {
4
- agentName: "ttsNijivoiceAgent",
5
- hasLimitedConcurrency: true,
6
- keyName: "NIJIVOICE_API_KEY",
7
- },
8
- openai: {
9
- agentName: "ttsOpenaiAgent",
10
- hasLimitedConcurrency: false,
11
- defaultModel: "gpt-4o-mini-tts",
12
- defaultVoice: "shimmer",
13
- keyName: "OPENAI_API_KEY",
14
- baseURLKeyName: "OPENAI_BASE_URL",
15
- },
16
- google: {
17
- agentName: "ttsGoogleAgent",
18
- hasLimitedConcurrency: false,
19
- keyName: "GEMINI_API_KEY",
20
- },
21
- gemini: {
22
- agentName: "ttsGeminiAgent",
23
- hasLimitedConcurrency: false,
24
- defaultModel: "gemini-2.5-flash-preview-tts",
25
- defaultVoice: "Kore",
26
- models: ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"],
27
- keyName: "GEMINI_API_KEY",
28
- },
29
- elevenlabs: {
30
- agentName: "ttsElevenlabsAgent",
31
- hasLimitedConcurrency: true,
32
- defaultModel: "eleven_multilingual_v2",
33
- // Models | ElevenLabs Documentation
34
- // https://elevenlabs.io/docs/models
35
- models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_turbo_v2", "eleven_flash_v2_5", "eleven_flash_v2"],
36
- keyName: "ELEVENLABS_API_KEY",
37
- },
38
- kotodama: {
39
- agentName: "ttsKotodamaAgent",
40
- hasLimitedConcurrency: true,
41
- defaultVoice: "Atla",
42
- defaultDecoration: "neutral",
43
- keyName: "KOTODAMA_API_KEY",
44
- },
45
- mock: {
46
- agentName: "mediaMockAgent",
47
- hasLimitedConcurrency: true,
48
- defaultModel: "mock-model",
49
- models: ["mock-model"],
50
- },
51
- };
52
- export const gptImages = ["gpt-image-1.5", "gpt-image-1", "gpt-image-1-mini"];
53
- export const provider2ImageAgent = {
54
- openai: {
55
- agentName: "imageOpenaiAgent",
56
- defaultModel: "gpt-image-1",
57
- models: ["dall-e-3", ...gptImages],
58
- keyName: "OPENAI_API_KEY",
59
- baseURLKeyName: "OPENAI_BASE_URL",
60
- },
61
- google: {
62
- agentName: "imageGenAIAgent",
63
- defaultModel: "gemini-2.5-flash-image",
64
- models: ["imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
65
- keyName: "GEMINI_API_KEY",
66
- },
67
- replicate: {
68
- agentName: "imageReplicateAgent",
69
- defaultModel: "bytedance/seedream-4",
70
- models: ["bytedance/seedream-4", "qwen/qwen-image"],
71
- keyName: "REPLICATE_API_TOKEN",
72
- },
73
- mock: {
74
- agentName: "mediaMockAgent",
75
- defaultModel: "mock-model",
76
- models: ["mock-model"],
77
- keyName: "",
78
- },
79
- };
80
- export const provider2MovieAgent = {
81
- replicate: {
82
- agentName: "movieReplicateAgent",
83
- defaultModel: "bytedance/seedance-1-lite",
84
- keyName: "REPLICATE_API_TOKEN",
85
- models: [
86
- "bytedance/seedance-1-lite",
87
- "bytedance/seedance-1-pro",
88
- "kwaivgi/kling-v1.6-pro",
89
- "kwaivgi/kling-v2.1",
90
- "kwaivgi/kling-v2.1-master",
91
- "google/veo-2",
92
- "google/veo-3",
93
- "google/veo-3-fast",
94
- "minimax/video-01",
95
- "minimax/hailuo-02",
96
- "minimax/hailuo-02-fast",
97
- "pixverse/pixverse-v4.5",
98
- "wan-video/wan-2.2-i2v-fast",
99
- "wan-video/wan-2.2-t2v-fast",
100
- ],
101
- modelParams: {
102
- "bytedance/seedance-1-lite": {
103
- durations: [5, 10],
104
- start_image: "image",
105
- last_image: "last_frame_image",
106
- price_per_sec: 0.036, // in USD
107
- },
108
- "bytedance/seedance-1-pro": {
109
- durations: [5, 10],
110
- start_image: "image",
111
- last_image: "last_frame_image",
112
- price_per_sec: 0.15,
113
- },
114
- "kwaivgi/kling-v1.6-pro": {
115
- durations: [5, 10],
116
- start_image: "start_image",
117
- price_per_sec: 0.095,
118
- },
119
- "kwaivgi/kling-v2.1": {
120
- durations: [5, 10],
121
- start_image: "start_image",
122
- price_per_sec: 0.05,
123
- },
124
- "kwaivgi/kling-v2.1-master": {
125
- durations: [5, 10],
126
- start_image: "start_image",
127
- price_per_sec: 0.28,
128
- },
129
- "google/veo-2": {
130
- durations: [5, 6, 7, 8],
131
- start_image: "image",
132
- price_per_sec: 0.5,
133
- },
134
- "google/veo-3": {
135
- durations: [8],
136
- start_image: "image",
137
- price_per_sec: 0.75,
138
- },
139
- "google/veo-3-fast": {
140
- durations: [8],
141
- start_image: "image",
142
- price_per_sec: 0.4,
143
- },
144
- "minimax/video-01": {
145
- durations: [6],
146
- start_image: "first_frame_image",
147
- price_per_sec: 0.5,
148
- },
149
- "minimax/hailuo-02": {
150
- durations: [6], // NOTE: 10 for only 720p
151
- start_image: "first_frame_image",
152
- price_per_sec: 0.08,
153
- },
154
- "minimax/hailuo-02-fast": {
155
- durations: [6, 10], // NOTE: 512P
156
- start_image: "first_frame_image",
157
- price_per_sec: 0.0166,
158
- },
159
- "pixverse/pixverse-v4.5": {
160
- durations: [5, 8],
161
- start_image: "image",
162
- last_image: "last_frame_image",
163
- price_per_sec: 0.12,
164
- },
165
- "wan-video/wan-2.2-i2v-fast": {
166
- durations: [5],
167
- start_image: "image",
168
- price_per_sec: 0.012,
169
- },
170
- "wan-video/wan-2.2-t2v-fast": {
171
- durations: [5],
172
- start_image: undefined,
173
- price_per_sec: 0.012,
174
- },
175
- },
176
- },
177
- google: {
178
- agentName: "movieGenAIAgent",
179
- defaultModel: "veo-2.0-generate-001",
180
- models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
181
- keyName: "GEMINI_API_KEY",
182
- modelParams: {
183
- "veo-3.1-generate-preview": {
184
- durations: [4, 6, 8],
185
- },
186
- "veo-3.0-generate-001": {
187
- durations: [4, 6, 8],
188
- },
189
- "veo-2.0-generate-001": {
190
- durations: [5, 6, 7, 8],
191
- },
192
- },
193
- },
194
- mock: {
195
- agentName: "mediaMockAgent",
196
- defaultModel: "mock-model",
197
- models: ["mock-model"],
198
- keyName: "",
199
- modelParams: {},
200
- },
201
- };
202
- export const provider2SoundEffectAgent = {
203
- replicate: {
204
- agentName: "soundEffectReplicateAgent",
205
- defaultModel: "zsxkib/mmaudio",
206
- keyName: "REPLICATE_API_TOKEN",
207
- models: ["zsxkib/mmaudio"],
208
- modelParams: {
209
- "zsxkib/mmaudio": {
210
- identifier: "zsxkib/mmaudio:62871fb59889b2d7c13777f08deb3b36bdff88f7e1d53a50ad7694548a41b484",
211
- },
212
- },
213
- },
214
- };
215
- export const provider2LipSyncAgent = {
216
- replicate: {
217
- agentName: "lipSyncReplicateAgent",
218
- defaultModel: "bytedance/omni-human",
219
- keyName: "REPLICATE_API_TOKEN",
220
- models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
221
- modelParams: {
222
- "bytedance/latentsync": {
223
- identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
224
- video: "video",
225
- audio: "audio",
226
- },
227
- "tmappdev/lipsync": {
228
- identifier: "tmappdev/lipsync:c54ce2fe673ea59b857b91250b3d71a2cd304a78f2370687632805c8405fbf4c",
229
- video: "video_input",
230
- audio: "audio_input",
231
- },
232
- "bytedance/omni-human": {
233
- identifier: "bytedance/omni-human",
234
- image: "image",
235
- audio: "audio",
236
- price_per_sec: 0.14,
237
- },
238
- /* NOTE: This model does not work with large base64 urls.
239
- "sync/lipsync-2": {
240
- video: "video",
241
- audio: "audio",
242
- },
243
- */
244
- /* NOTE: This model does not work well for some unknown reason.
245
- "kwaivgi/kling-lip-sync": {
246
- video: "video_url",
247
- audio: "audio_file",
248
- },
249
- */
250
- },
251
- },
252
- };
253
- // : Record<LLM, { agent: string; defaultModel: string; max_tokens: number }>
254
- export const provider2LLMAgent = {
255
- openai: {
256
- agentName: "openAIAgent",
257
- defaultModel: "gpt-5",
258
- keyName: "OPENAI_API_KEY",
259
- baseURLKeyName: "OPENAI_BASE_URL",
260
- max_tokens: 8192,
261
- models: [
262
- "gpt-5",
263
- "gpt-5-nano",
264
- "gpt-5-mini",
265
- "gpt-4.1",
266
- "gpt-4.1-mini",
267
- "gpt-4.1-nano",
268
- "o3",
269
- "o3-mini",
270
- "o3-pro",
271
- "o1",
272
- "o1-pro",
273
- "gpt-4o",
274
- "gpt-4o-mini",
275
- ],
276
- },
277
- anthropic: {
278
- agentName: "anthropicAgent",
279
- defaultModel: "claude-3-7-sonnet-20250219",
280
- max_tokens: 8192,
281
- models: ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"],
282
- keyName: "ANTHROPIC_API_KEY",
283
- apiKeyNameOverride: "ANTHROPIC_API_TOKEN",
284
- // GraphAI is currently using ANTHROPIC_API_KEY, but the official name is ANTHROPIC_API_TOKEN.
285
- },
286
- gemini: {
287
- agentName: "geminiAgent",
288
- defaultModel: "gemini-2.5-flash",
289
- max_tokens: 8192,
290
- models: ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"],
291
- keyName: "GEMINI_API_KEY",
292
- },
293
- groq: {
294
- agentName: "groqAgent",
295
- defaultModel: "llama-3.1-8b-instant",
296
- keyName: "GROQ_API_KEY",
297
- max_tokens: 4096,
298
- models: ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"],
299
- },
300
- mock: {
301
- agentName: "mediaMockAgent",
302
- defaultModel: "mock",
303
- max_tokens: 4096,
304
- models: ["mock"],
305
- },
306
- };
307
- export const defaultProviders = {
308
- tts: "openai",
309
- text2image: "openai",
310
- text2movie: "replicate",
311
- text2Html: "openai",
312
- llm: "openai",
313
- soundEffect: "replicate",
314
- lipSync: "replicate",
315
- };
316
- export const llm = Object.keys(provider2LLMAgent);
317
- export const htmlLLMProvider = ["openai", "anthropic", "mock"];
318
- export const getModelDuration = (provider, model, movieDuration) => {
319
- const modelParams = provider2MovieAgent[provider]?.modelParams;
320
- const { durations } = modelParams[model];
321
- if (durations && movieDuration) {
322
- const largerDurations = durations.filter((d) => d >= movieDuration);
323
- return largerDurations.length > 0 ? largerDurations[0] : durations[durations.length - 1];
324
- }
325
- return durations?.[0];
326
- };