mulmocast 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import dotenv from "dotenv";
2
2
  import { GraphAI, TaskManager, GraphAILogger } from "graphai";
3
3
  import * as agents from "@graphai/vanilla";
4
4
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
5
- import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
5
+ import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, ttsKotodamaAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
6
6
  import { text2SpeechProviderSchema } from "../types/index.js";
7
7
  import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
8
8
  import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
@@ -108,6 +108,7 @@ const graph_tts = {
108
108
  voice: ":preprocessor.voiceId",
109
109
  speed: ":preprocessor.speechOptions.speed",
110
110
  instructions: ":preprocessor.speechOptions.instruction",
111
+ decoration: ":preprocessor.speechOptions.decoration",
111
112
  model: ":preprocessor.model",
112
113
  },
113
114
  },
@@ -222,6 +223,7 @@ const audioAgents = {
222
223
  ttsNijivoiceAgent,
223
224
  ttsGoogleAgent,
224
225
  ttsGeminiAgent,
226
+ ttsKotodamaAgent,
225
227
  ttsElevenlabsAgent,
226
228
  mediaMockAgent,
227
229
  addBGMAgent,
@@ -12,6 +12,7 @@ import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
12
12
  import ttsOpenaiAgent from "./tts_openai_agent.js";
13
13
  import ttsGoogleAgent from "./tts_google_agent.js";
14
14
  import ttsGeminiAgent from "./tts_gemini_agent.js";
15
+ import ttsKotodamaAgent from "./tts_kotodama_agent.js";
15
16
  import validateSchemaAgent from "./validate_schema_agent.js";
16
17
  import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
17
18
  import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
@@ -20,4 +21,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
20
21
  import { textInputAgent } from "@graphai/input_agents";
21
22
  import { openAIAgent } from "@graphai/openai_agent";
22
23
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
23
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
24
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsKotodamaAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
@@ -12,6 +12,7 @@ import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
12
12
  import ttsOpenaiAgent from "./tts_openai_agent.js";
13
13
  import ttsGoogleAgent from "./tts_google_agent.js";
14
14
  import ttsGeminiAgent from "./tts_gemini_agent.js";
15
+ import ttsKotodamaAgent from "./tts_kotodama_agent.js";
15
16
  import validateSchemaAgent from "./validate_schema_agent.js";
16
17
  import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
17
18
  import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
@@ -21,4 +22,4 @@ import { textInputAgent } from "@graphai/input_agents";
21
22
  import { openAIAgent } from "@graphai/openai_agent";
22
23
  // import * as vanilla from "@graphai/vanilla";
23
24
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
24
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
25
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsKotodamaAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
@@ -116,6 +116,9 @@ export type NijivoiceTTSAgentParams = TTSAgentParams & {
116
116
  speed: number;
117
117
  speed_global: number;
118
118
  };
119
+ export type KotodamaTTSAgentParams = TTSAgentParams & {
120
+ decoration: string;
121
+ };
119
122
  export type GoogleTTSAgentParams = TTSAgentParams & {
120
123
  speed: number;
121
124
  };
@@ -19,6 +19,7 @@ export declare const multiLingualTextsSchema: z.ZodRecord<z.ZodString, z.ZodObje
19
19
  export declare const speechOptionsSchema: z.ZodObject<{
20
20
  speed: z.ZodOptional<z.ZodNumber>;
21
21
  instruction: z.ZodOptional<z.ZodString>;
22
+ decoration: z.ZodOptional<z.ZodString>;
22
23
  }, z.core.$strict>;
23
24
  export declare const defaultSpeaker = "Presenter";
24
25
  export declare const text2SpeechProviderSchema: z.ZodDefault<z.ZodEnum<{
@@ -31,6 +32,7 @@ export declare const speakerDataSchema: z.ZodObject<{
31
32
  speechOptions: z.ZodOptional<z.ZodObject<{
32
33
  speed: z.ZodOptional<z.ZodNumber>;
33
34
  instruction: z.ZodOptional<z.ZodString>;
35
+ decoration: z.ZodOptional<z.ZodString>;
34
36
  }, z.core.$strict>>;
35
37
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
36
38
  [x: string]: string;
@@ -44,6 +46,7 @@ export declare const speakerSchema: z.ZodObject<{
44
46
  speechOptions: z.ZodOptional<z.ZodObject<{
45
47
  speed: z.ZodOptional<z.ZodNumber>;
46
48
  instruction: z.ZodOptional<z.ZodString>;
49
+ decoration: z.ZodOptional<z.ZodString>;
47
50
  }, z.core.$strict>>;
48
51
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
49
52
  [x: string]: string;
@@ -56,6 +59,7 @@ export declare const speakerSchema: z.ZodObject<{
56
59
  speechOptions: z.ZodOptional<z.ZodObject<{
57
60
  speed: z.ZodOptional<z.ZodNumber>;
58
61
  instruction: z.ZodOptional<z.ZodString>;
62
+ decoration: z.ZodOptional<z.ZodString>;
59
63
  }, z.core.$strict>>;
60
64
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
61
65
  [x: string]: string;
@@ -70,6 +74,7 @@ export declare const speakerDictionarySchema: z.ZodRecord<z.ZodString, z.ZodObje
70
74
  speechOptions: z.ZodOptional<z.ZodObject<{
71
75
  speed: z.ZodOptional<z.ZodNumber>;
72
76
  instruction: z.ZodOptional<z.ZodString>;
77
+ decoration: z.ZodOptional<z.ZodString>;
73
78
  }, z.core.$strict>>;
74
79
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
75
80
  [x: string]: string;
@@ -82,6 +87,7 @@ export declare const speakerDictionarySchema: z.ZodRecord<z.ZodString, z.ZodObje
82
87
  speechOptions: z.ZodOptional<z.ZodObject<{
83
88
  speed: z.ZodOptional<z.ZodNumber>;
84
89
  instruction: z.ZodOptional<z.ZodString>;
90
+ decoration: z.ZodOptional<z.ZodString>;
85
91
  }, z.core.$strict>>;
86
92
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
87
93
  [x: string]: string;
@@ -97,6 +103,7 @@ export declare const mulmoSpeechParamsSchema: z.ZodDefault<z.ZodObject<{
97
103
  speechOptions: z.ZodOptional<z.ZodObject<{
98
104
  speed: z.ZodOptional<z.ZodNumber>;
99
105
  instruction: z.ZodOptional<z.ZodString>;
106
+ decoration: z.ZodOptional<z.ZodString>;
100
107
  }, z.core.$strict>>;
101
108
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
102
109
  [x: string]: string;
@@ -109,6 +116,7 @@ export declare const mulmoSpeechParamsSchema: z.ZodDefault<z.ZodObject<{
109
116
  speechOptions: z.ZodOptional<z.ZodObject<{
110
117
  speed: z.ZodOptional<z.ZodNumber>;
111
118
  instruction: z.ZodOptional<z.ZodString>;
119
+ decoration: z.ZodOptional<z.ZodString>;
112
120
  }, z.core.$strict>>;
113
121
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
114
122
  [x: string]: string;
@@ -629,6 +637,7 @@ export declare const mulmoBeatSchema: z.ZodObject<{
629
637
  speechOptions: z.ZodOptional<z.ZodObject<{
630
638
  speed: z.ZodOptional<z.ZodNumber>;
631
639
  instruction: z.ZodOptional<z.ZodString>;
640
+ decoration: z.ZodOptional<z.ZodString>;
632
641
  }, z.core.$strict>>;
633
642
  textSlideParams: z.ZodOptional<z.ZodObject<{
634
643
  cssStyles: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
@@ -716,6 +725,7 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
716
725
  speechOptions: z.ZodOptional<z.ZodObject<{
717
726
  speed: z.ZodOptional<z.ZodNumber>;
718
727
  instruction: z.ZodOptional<z.ZodString>;
728
+ decoration: z.ZodOptional<z.ZodString>;
719
729
  }, z.core.$strict>>;
720
730
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
721
731
  [x: string]: string;
@@ -728,6 +738,7 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
728
738
  speechOptions: z.ZodOptional<z.ZodObject<{
729
739
  speed: z.ZodOptional<z.ZodNumber>;
730
740
  instruction: z.ZodOptional<z.ZodString>;
741
+ decoration: z.ZodOptional<z.ZodString>;
731
742
  }, z.core.$strict>>;
732
743
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
733
744
  [x: string]: string;
@@ -852,6 +863,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
852
863
  speechOptions: z.ZodOptional<z.ZodObject<{
853
864
  speed: z.ZodOptional<z.ZodNumber>;
854
865
  instruction: z.ZodOptional<z.ZodString>;
866
+ decoration: z.ZodOptional<z.ZodString>;
855
867
  }, z.core.$strict>>;
856
868
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
857
869
  [x: string]: string;
@@ -864,6 +876,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
864
876
  speechOptions: z.ZodOptional<z.ZodObject<{
865
877
  speed: z.ZodOptional<z.ZodNumber>;
866
878
  instruction: z.ZodOptional<z.ZodString>;
879
+ decoration: z.ZodOptional<z.ZodString>;
867
880
  }, z.core.$strict>>;
868
881
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
869
882
  [x: string]: string;
@@ -1133,6 +1146,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
1133
1146
  speechOptions: z.ZodOptional<z.ZodObject<{
1134
1147
  speed: z.ZodOptional<z.ZodNumber>;
1135
1148
  instruction: z.ZodOptional<z.ZodString>;
1149
+ decoration: z.ZodOptional<z.ZodString>;
1136
1150
  }, z.core.$strict>>;
1137
1151
  textSlideParams: z.ZodOptional<z.ZodObject<{
1138
1152
  cssStyles: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
@@ -1264,6 +1278,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
1264
1278
  speechOptions: z.ZodOptional<z.ZodObject<{
1265
1279
  speed: z.ZodOptional<z.ZodNumber>;
1266
1280
  instruction: z.ZodOptional<z.ZodString>;
1281
+ decoration: z.ZodOptional<z.ZodString>;
1267
1282
  }, z.core.$strict>>;
1268
1283
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1269
1284
  [x: string]: string;
@@ -1276,6 +1291,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
1276
1291
  speechOptions: z.ZodOptional<z.ZodObject<{
1277
1292
  speed: z.ZodOptional<z.ZodNumber>;
1278
1293
  instruction: z.ZodOptional<z.ZodString>;
1294
+ decoration: z.ZodOptional<z.ZodString>;
1279
1295
  }, z.core.$strict>>;
1280
1296
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1281
1297
  [x: string]: string;
@@ -1545,6 +1561,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
1545
1561
  speechOptions: z.ZodOptional<z.ZodObject<{
1546
1562
  speed: z.ZodOptional<z.ZodNumber>;
1547
1563
  instruction: z.ZodOptional<z.ZodString>;
1564
+ decoration: z.ZodOptional<z.ZodString>;
1548
1565
  }, z.core.$strict>>;
1549
1566
  textSlideParams: z.ZodOptional<z.ZodObject<{
1550
1567
  cssStyles: z.ZodUnion<readonly [z.ZodString, z.ZodArray<z.ZodString>]>;
@@ -1612,6 +1629,7 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
1612
1629
  speechOptions: z.ZodOptional<z.ZodObject<{
1613
1630
  speed: z.ZodOptional<z.ZodNumber>;
1614
1631
  instruction: z.ZodOptional<z.ZodString>;
1632
+ decoration: z.ZodOptional<z.ZodString>;
1615
1633
  }, z.core.$strict>>;
1616
1634
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1617
1635
  [x: string]: string;
@@ -1624,6 +1642,7 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
1624
1642
  speechOptions: z.ZodOptional<z.ZodObject<{
1625
1643
  speed: z.ZodOptional<z.ZodNumber>;
1626
1644
  instruction: z.ZodOptional<z.ZodString>;
1645
+ decoration: z.ZodOptional<z.ZodString>;
1627
1646
  }, z.core.$strict>>;
1628
1647
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1629
1648
  [x: string]: string;
@@ -1742,6 +1761,7 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
1742
1761
  speechOptions: z.ZodOptional<z.ZodObject<{
1743
1762
  speed: z.ZodOptional<z.ZodNumber>;
1744
1763
  instruction: z.ZodOptional<z.ZodString>;
1764
+ decoration: z.ZodOptional<z.ZodString>;
1745
1765
  }, z.core.$strict>>;
1746
1766
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1747
1767
  [x: string]: string;
@@ -1754,6 +1774,7 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
1754
1774
  speechOptions: z.ZodOptional<z.ZodObject<{
1755
1775
  speed: z.ZodOptional<z.ZodNumber>;
1756
1776
  instruction: z.ZodOptional<z.ZodString>;
1777
+ decoration: z.ZodOptional<z.ZodString>;
1757
1778
  }, z.core.$strict>>;
1758
1779
  provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
1759
1780
  [x: string]: string;
@@ -18,8 +18,9 @@ export const localizedTextSchema = z
18
18
  export const multiLingualTextsSchema = z.record(langSchema, localizedTextSchema);
19
19
  export const speechOptionsSchema = z
20
20
  .object({
21
- speed: z.number().optional(), // default: 1.0
22
- instruction: z.string().optional(),
21
+ speed: z.number().optional(), // default: 1.0 for google and niji voice
22
+ instruction: z.string().optional(), // for tts openai
23
+ decoration: z.string().optional(), // for kotodama. default: neutral
23
24
  })
24
25
  .strict();
25
26
  const speakerIdSchema = z.string();
@@ -17,6 +17,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
17
17
  speechOptions?: {
18
18
  speed?: number | undefined;
19
19
  instruction?: string | undefined;
20
+ decoration?: string | undefined;
20
21
  } | undefined;
21
22
  provider?: string | undefined;
22
23
  model?: string | undefined;
@@ -27,6 +28,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
27
28
  speechOptions?: {
28
29
  speed?: number | undefined;
29
30
  instruction?: string | undefined;
31
+ decoration?: string | undefined;
30
32
  } | undefined;
31
33
  provider?: string | undefined;
32
34
  model?: string | undefined;
@@ -242,6 +244,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
242
244
  speechOptions?: {
243
245
  speed?: number | undefined;
244
246
  instruction?: string | undefined;
247
+ decoration?: string | undefined;
245
248
  } | undefined;
246
249
  textSlideParams?: {
247
250
  cssStyles: string | string[];
@@ -332,6 +335,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
332
335
  speechOptions?: {
333
336
  speed?: number | undefined;
334
337
  instruction?: string | undefined;
338
+ decoration?: string | undefined;
335
339
  } | undefined;
336
340
  provider?: string | undefined;
337
341
  model?: string | undefined;
@@ -342,6 +346,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
342
346
  speechOptions?: {
343
347
  speed?: number | undefined;
344
348
  instruction?: string | undefined;
349
+ decoration?: string | undefined;
345
350
  } | undefined;
346
351
  provider?: string | undefined;
347
352
  model?: string | undefined;
@@ -557,6 +562,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
557
562
  speechOptions?: {
558
563
  speed?: number | undefined;
559
564
  instruction?: string | undefined;
565
+ decoration?: string | undefined;
560
566
  } | undefined;
561
567
  textSlideParams?: {
562
568
  cssStyles: string | string[];
@@ -654,6 +660,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
654
660
  speechOptions?: {
655
661
  speed?: number | undefined;
656
662
  instruction?: string | undefined;
663
+ decoration?: string | undefined;
657
664
  } | undefined;
658
665
  provider?: string | undefined;
659
666
  model?: string | undefined;
@@ -664,6 +671,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
664
671
  speechOptions?: {
665
672
  speed?: number | undefined;
666
673
  instruction?: string | undefined;
674
+ decoration?: string | undefined;
667
675
  } | undefined;
668
676
  provider?: string | undefined;
669
677
  model?: string | undefined;
@@ -10,6 +10,7 @@ export declare const provider2TTSAgent: {
10
10
  defaultModel: string;
11
11
  defaultVoice: string;
12
12
  keyName: string;
13
+ baseURLKeyName: string;
13
14
  };
14
15
  google: {
15
16
  agentName: string;
@@ -29,6 +30,13 @@ export declare const provider2TTSAgent: {
29
30
  models: string[];
30
31
  keyName: string;
31
32
  };
33
+ kotodama: {
34
+ agentName: string;
35
+ hasLimitedConcurrency: boolean;
36
+ defaultVoice: string;
37
+ defaultDecoration: string;
38
+ keyName: string;
39
+ };
32
40
  mock: {
33
41
  agentName: string;
34
42
  hasLimitedConcurrency: boolean;
@@ -42,6 +50,7 @@ export declare const provider2ImageAgent: {
42
50
  defaultModel: string;
43
51
  models: string[];
44
52
  keyName: string;
53
+ baseURLKeyName: string;
45
54
  };
46
55
  google: {
47
56
  agentName: string;
@@ -131,6 +140,7 @@ export declare const provider2LLMAgent: {
131
140
  readonly agentName: "openAIAgent";
132
141
  readonly defaultModel: "gpt-5";
133
142
  readonly keyName: "OPENAI_API_KEY";
143
+ readonly baseURLKeyName: "OPENAI_BASE_URL";
134
144
  readonly max_tokens: 8192;
135
145
  readonly models: readonly ["gpt-5", "gpt-5-nano", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3", "o3-mini", "o3-pro", "o1", "o1-pro", "gpt-4o", "gpt-4o-mini"];
136
146
  };
@@ -140,6 +150,7 @@ export declare const provider2LLMAgent: {
140
150
  readonly max_tokens: 8192;
141
151
  readonly models: readonly ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"];
142
152
  readonly keyName: "ANTHROPIC_API_KEY";
153
+ readonly apiKeyNameOverride: "ANTHROPIC_API_TOKEN";
143
154
  };
144
155
  readonly gemini: {
145
156
  readonly agentName: "geminiAgent";
@@ -11,6 +11,7 @@ export const provider2TTSAgent = {
11
11
  defaultModel: "gpt-4o-mini-tts",
12
12
  defaultVoice: "shimmer",
13
13
  keyName: "OPENAI_API_KEY",
14
+ baseURLKeyName: "OPENAI_BASE_URL",
14
15
  },
15
16
  google: {
16
17
  agentName: "ttsGoogleAgent",
@@ -32,6 +33,13 @@ export const provider2TTSAgent = {
32
33
  models: ["eleven_multilingual_v2", "eleven_turbo_v2_5", "eleven_turbo_v2", "eleven_flash_v2_5", "eleven_flash_v2"],
33
34
  keyName: "ELEVENLABS_API_KEY",
34
35
  },
36
+ kotodama: {
37
+ agentName: "ttsKotodamaAgent",
38
+ hasLimitedConcurrency: true,
39
+ defaultVoice: "Atla",
40
+ defaultDecoration: "neutral",
41
+ keyName: "KOTODAMA_API_KEY",
42
+ },
35
43
  mock: {
36
44
  agentName: "mediaMockAgent",
37
45
  hasLimitedConcurrency: true,
@@ -45,6 +53,7 @@ export const provider2ImageAgent = {
45
53
  defaultModel: "gpt-image-1",
46
54
  models: ["dall-e-3", "gpt-image-1"],
47
55
  keyName: "OPENAI_API_KEY",
56
+ baseURLKeyName: "OPENAI_BASE_URL",
48
57
  },
49
58
  google: {
50
59
  agentName: "imageGenAIAgent",
@@ -244,6 +253,7 @@ export const provider2LLMAgent = {
244
253
  agentName: "openAIAgent",
245
254
  defaultModel: "gpt-5",
246
255
  keyName: "OPENAI_API_KEY",
256
+ baseURLKeyName: "OPENAI_BASE_URL",
247
257
  max_tokens: 8192,
248
258
  models: [
249
259
  "gpt-5",
@@ -267,6 +277,8 @@ export const provider2LLMAgent = {
267
277
  max_tokens: 8192,
268
278
  models: ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"],
269
279
  keyName: "ANTHROPIC_API_KEY",
280
+ apiKeyNameOverride: "ANTHROPIC_API_TOKEN",
281
+ // GraphAI is currently using ANTHROPIC_API_KEY, but the official name is ANTHROPIC_API_TOKEN.
270
282
  },
271
283
  gemini: {
272
284
  agentName: "geminiAgent",
@@ -3,7 +3,7 @@
3
3
  * (No Node.js built-ins like fs, path, dotenv, etc.)
4
4
  * Works in both Node.js and modern browsers.
5
5
  */
6
- import { provider2LLMAgent } from "./provider2agent.js";
6
+ import { provider2LLMAgent, provider2TTSAgent, provider2ImageAgent, provider2MovieAgent, provider2SoundEffectAgent, provider2LipSyncAgent, } from "./provider2agent.js";
7
7
  export const llmPair = (_llm, _model) => {
8
8
  const llmKey = _llm ?? "openai";
9
9
  const agent = provider2LLMAgent[llmKey]?.agentName ?? provider2LLMAgent.openai.agentName;
@@ -42,58 +42,30 @@ export const settings2GraphAIConfig = (settings, env) => {
42
42
  const getKey = (prefix, key) => {
43
43
  return settings?.[`${prefix}_${key}`] ?? settings?.[key] ?? env?.[`${prefix}_${key}`] ?? env?.[key];
44
44
  };
45
- const config = {
46
- openAIAgent: {
47
- apiKey: getKey("LLM", "OPENAI_API_KEY"),
48
- baseURL: getKey("LLM", "OPENAI_BASE_URL"),
49
- },
50
- anthropicAgent: {
51
- apiKey: getKey("LLM", "ANTHROPIC_API_TOKEN"),
52
- },
53
- imageOpenaiAgent: {
54
- apiKey: getKey("IMAGE", "OPENAI_API_KEY"),
55
- baseURL: getKey("IMAGE", "OPENAI_BASE_URL"),
56
- },
57
- imageReplicateAgent: {
58
- apiKey: getKey("IMAGE", "REPLICATE_API_TOKEN"),
59
- },
60
- imageGenAIAgent: {
61
- apiKey: getKey("IMAGE", "GEMINI_API_KEY"),
62
- },
63
- movieReplicateAgent: {
64
- apiKey: getKey("MOVIE", "REPLICATE_API_TOKEN"),
65
- },
66
- movieGenAIAgent: {
67
- apiKey: getKey("MOVIE", "GEMINI_API_KEY"),
68
- },
69
- ttsOpenaiAgent: {
70
- apiKey: getKey("TTS", "OPENAI_API_KEY"),
71
- baseURL: getKey("TTS", "OPENAI_BASE_URL"),
72
- },
73
- ttsNijivoiceAgent: {
74
- apiKey: getKey("TTS", "NIJIVOICE_API_KEY"),
75
- },
76
- ttsGoogleAgent: {
77
- apiKey: getKey("TTS", "GEMINI_API_KEY"),
78
- },
79
- ttsGeminiAgent: {
80
- apiKey: getKey("TTS", "GEMINI_API_KEY"),
81
- },
82
- ttsElevenlabsAgent: {
83
- apiKey: getKey("TTS", "ELEVENLABS_API_KEY"),
84
- },
85
- soundEffectReplicateAgent: {
86
- apiKey: getKey("SOUND_EFFECT", "REPLICATE_API_TOKEN"),
87
- },
88
- lipSyncReplicateAgent: {
89
- apiKey: getKey("LIPSYNC", "REPLICATE_API_TOKEN"),
90
- },
91
- // TODO
92
- // browserlessAgent
93
- // ttsGoogleAgent
94
- // geminiAgent, groqAgent for tool
95
- // TAVILY_API_KEY ( for deep research)
45
+ const addProviderConfigs = (config, providers, prefix) => {
46
+ Object.entries(providers).forEach(([__provider, info]) => {
47
+ if (info.agentName === "mediaMockAgent" || !info.keyName)
48
+ return;
49
+ const apiKeyName = info.apiKeyNameOverride || info.keyName;
50
+ config[info.agentName] = {
51
+ apiKey: getKey(prefix, apiKeyName),
52
+ };
53
+ if (info.baseURLKeyName) {
54
+ config[info.agentName].baseURL = getKey(prefix, info.baseURLKeyName);
55
+ }
56
+ });
96
57
  };
58
+ const config = {};
59
+ addProviderConfigs(config, provider2LLMAgent, "LLM");
60
+ addProviderConfigs(config, provider2TTSAgent, "TTS");
61
+ addProviderConfigs(config, provider2ImageAgent, "IMAGE");
62
+ addProviderConfigs(config, provider2MovieAgent, "MOVIE");
63
+ addProviderConfigs(config, provider2SoundEffectAgent, "SOUND_EFFECT");
64
+ addProviderConfigs(config, provider2LipSyncAgent, "LIPSYNC");
65
+ // TODO
66
+ // browserlessAgent
67
+ // geminiAgent, groqAgent for tool
68
+ // TAVILY_API_KEY ( for deep research)
97
69
  return deepClean(config) ?? {};
98
70
  };
99
71
  export const deepClean = (input) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.0.7",
3
+ "version": "2.0.9",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -96,11 +96,11 @@
96
96
  "graphai": "^2.0.16",
97
97
  "jsdom": "^27.2.0",
98
98
  "marked": "^17.0.1",
99
- "mulmocast-vision": "^1.0.4",
99
+ "mulmocast-vision": "^1.0.8",
100
100
  "ora": "^9.0.0",
101
- "puppeteer": "^24.31.0",
101
+ "puppeteer": "^24.32.0",
102
102
  "replicate": "^1.4.0",
103
- "yaml": "^2.8.1",
103
+ "yaml": "^2.8.2",
104
104
  "yargs": "^18.0.0",
105
105
  "zod": "^4.1.13"
106
106
  },
@@ -114,10 +114,10 @@
114
114
  "eslint-config-prettier": "^10.1.8",
115
115
  "eslint-plugin-prettier": "^5.5.4",
116
116
  "eslint-plugin-sonarjs": "^3.0.5",
117
- "prettier": "^3.7.1",
118
- "tsx": "^4.20.6",
117
+ "prettier": "^3.7.4",
118
+ "tsx": "^4.21.0",
119
119
  "typescript": "^5.9.3",
120
- "typescript-eslint": "^8.48.0"
120
+ "typescript-eslint": "^8.48.1"
121
121
  },
122
122
  "engines": {
123
123
  "node": ">=20.0.0"
@@ -12,10 +12,10 @@ This directory contains MulmoScript samples for testing MulmoCast features.
12
12
 
13
13
  Simple test scripts for basic functionality verification
14
14
 
15
- - **test_hello.json** - 最もシンプルなHello Worldテスト / Simplest Hello World test
16
- - **test.json** - 基本的な動作テスト / Basic functionality test
17
- - **test1.json**, **test2.json** - 追加の基本テスト / Additional basic tests
18
- - **test_beats.json** - Beatの基本機能テスト / Beat basic features test
15
+ - [**test_hello.json**](./test_hello.json) - 最もシンプルなHello Worldテスト / Simplest Hello World test
16
+ - [**test.json**](./test.json) - 基本的な動作テスト / Basic functionality test
17
+ - [**test1.json**](./test1.json), [**test2.json**](./test2.json) - 追加の基本テスト / Additional basic tests
18
+ - [**test_beats.json**](./test_beats.json) - Beatの基本機能テスト / Beat basic features test
19
19
 
20
20
  ### 🎤 TTS(音声合成)テスト / TTS (Text-to-Speech) Tests
21
21
 
@@ -23,13 +23,13 @@ Simple test scripts for basic functionality verification
23
23
 
24
24
  Tests for various TTS providers
25
25
 
26
- - **test_all_tts.json** - 全TTSプロバイダーのテスト(OpenAI, Gemini, Google, ElevenLabs, Nijivoice) / All TTS providers test
27
- - **test_audio.json** - 音声パラメータのテスト(padding, duration, movieVolumeなど) / Audio parameters test
28
- - **test_audio_gemini.json** - Gemini TTSの個別テスト / Gemini TTS specific test
29
- - **test_audio_instructions.json** - OpenAI TTS instructionsのテスト / OpenAI TTS instructions test
30
- - **test_elevenlabs_models.json** - ElevenLabsの複数モデルテスト / ElevenLabs multiple models test
31
- - **test_voices.json** - 複数の音声設定テスト / Multiple voice settings test
32
- - **test_mixed_providers.json** - 複数のTTSプロバイダー混在テスト / Mixed TTS providers test
26
+ - [**test_all_tts.json**](./test_all_tts.json) - 全TTSプロバイダーのテスト(OpenAI, Gemini, Google, ElevenLabs, Nijivoice) / All TTS providers test
27
+ - [**test_audio.json**](./test_audio.json) - 音声パラメータのテスト(padding, duration, movieVolumeなど) / Audio parameters test
28
+ - [**test_audio_gemini.json**](./test_audio_gemini.json) - Gemini TTSの個別テスト / Gemini TTS specific test
29
+ - [**test_audio_instructions.json**](./test_audio_instructions.json) - OpenAI TTS instructionsのテスト / OpenAI TTS instructions test
30
+ - [**test_elevenlabs_models.json**](./test_elevenlabs_models.json) - ElevenLabsの複数モデルテスト / ElevenLabs multiple models test
31
+ - [**test_voices.json**](./test_voices.json) - 複数の音声設定テスト / Multiple voice settings test
32
+ - [**test_mixed_providers.json**](./test_mixed_providers.json) - 複数のTTSプロバイダー混在テスト / Mixed TTS providers test
33
33
 
34
34
  ### 🖼️ 画像生成テスト / Image Generation Tests
35
35
 
@@ -37,13 +37,13 @@ Tests for various TTS providers
37
37
 
38
38
  Image generation feature tests
39
39
 
40
- - **test_images.json** - 画像生成の基本テスト / Basic image generation test
41
- - **test_hello_image.json** - Hello World画像テスト / Hello World image test
42
- - **test_image_refs.json** - 参照画像を使った生成テスト / Image generation with references
43
- - **test_markdown.json** - Markdown形式の画像テスト / Markdown format image test
44
- - **test_html.json** - HTMLから画像生成テスト / HTML to image test
45
- - **test_vision.json** - Vision APIを使った画像生成テスト / Vision API image test
46
- - **test_layout.json** - レイアウト機能のテスト / Layout features test
40
+ - [**test_images.json**](./test_images.json) - 画像生成の基本テスト / Basic image generation test
41
+ - [**test_hello_image.json**](./test_hello_image.json) - Hello World画像テスト / Hello World image test
42
+ - [**test_image_refs.json**](./test_image_refs.json) - 参照画像を使った生成テスト / Image generation with references
43
+ - [**test_markdown.json**](./test_markdown.json) - Markdown形式の画像テスト / Markdown format image test
44
+ - [**test_html.json**](./test_html.json) - HTMLから画像生成テスト / HTML to image test
45
+ - [**test_vision.json**](./test_vision.json) - Vision APIを使った画像生成テスト / Vision API image test
46
+ - [**test_layout.json**](./test_layout.json) - レイアウト機能のテスト / Layout features test
47
47
 
48
48
  ### 🎬 動画生成テスト / Video Generation Tests
49
49
 
@@ -51,12 +51,12 @@ Image generation feature tests
51
51
 
52
52
  Video generation feature tests
53
53
 
54
- - **test_movie.json** - 動画生成の基本テスト(imagePrompt + moviePrompt) / Basic video generation test
55
- - **test_movie2.json** - 動画生成の追加テスト / Additional video generation test
56
- - **test_genai_movie.json** - GenAI動画生成テスト / GenAI video generation test
57
- - **test_genai.json** - GenAI機能テスト / GenAI features test
58
- - **test_replicate.json** - Replicate動画生成テスト / Replicate video generation test
59
- - **test_mv.json** - ミュージックビデオ形式のテスト / Music video format test
54
+ - [**test_movie.json**](./test_movie.json) - 動画生成の基本テスト(imagePrompt + moviePrompt) / Basic video generation test
55
+ - [**test_movie2.json**](./test_movie2.json) - 動画生成の追加テスト / Additional video generation test
56
+ - [**test_genai_movie.json**](./test_genai_movie.json) - GenAI動画生成テスト / GenAI video generation test
57
+ - [**test_genai.json**](./test_genai.json) - GenAI機能テスト / GenAI features test
58
+ - [**test_replicate.json**](./test_replicate.json) - Replicate動画生成テスト / Replicate video generation test
59
+ - [**test_mv.json**](./test_mv.json) - ミュージックビデオ形式のテスト / Music video format test
60
60
 
61
61
  ### 🎭 高度な機能テスト / Advanced Feature Tests
62
62
 
@@ -64,17 +64,17 @@ Video generation feature tests
64
64
 
65
65
  Special features and complex scenario tests
66
66
 
67
- - **test_spillover.json** - 音声スピルオーバー機能テスト / Audio spillover feature test
68
- - **test_lipsync.json** - リップシンク機能テスト / Lip-sync feature test
69
- - **test_transition.json** - トランジション効果テスト / Transition effects test
70
- - **test_transition_no_audio.json** - 音声なしトランジションテスト / Transition without audio test
71
- - **test_slideout_left_no_audio.json** - スライドアウト効果テスト / Slide-out effect test
72
- - **test_sound_effect.json** - サウンドエフェクトテスト / Sound effect test
73
- - **test_voice_over.json** - ボイスオーバー機能テスト / Voice-over feature test
74
- - **test_captions.json** - 字幕機能テスト / Caption feature test
75
- - **test_hello_caption.json** - Hello World字幕テスト / Hello World caption test
76
- - **test_loop.json** - ループ再生テスト / Loop playback test
77
- - **test_video_speed.json** - 動画速度調整テスト / Video speed adjustment test
67
+ - [**test_spillover.json**](./test_spillover.json) - 音声スピルオーバー機能テスト / Audio spillover feature test
68
+ - [**test_lipsync.json**](./test_lipsync.json) - リップシンク機能テスト / Lip-sync feature test
69
+ - [**test_transition.json**](./test_transition.json) - トランジション効果テスト / Transition effects test
70
+ - [**test_transition_no_audio.json**](./test_transition_no_audio.json) - 音声なしトランジションテスト / Transition without audio test
71
+ - [**test_slideout_left_no_audio.json**](./test_slideout_left_no_audio.json) - スライドアウト効果テスト / Slide-out effect test
72
+ - [**test_sound_effect.json**](./test_sound_effect.json) - サウンドエフェクトテスト / Sound effect test
73
+ - [**test_voice_over.json**](./test_voice_over.json) - ボイスオーバー機能テスト / Voice-over feature test
74
+ - [**test_captions.json**](./test_captions.json) - 字幕機能テスト / Caption feature test
75
+ - [**test_hello_caption.json**](./test_hello_caption.json) - Hello World字幕テスト / Hello World caption test
76
+ - [**test_loop.json**](./test_loop.json) - ループ再生テスト / Loop playback test
77
+ - [**test_video_speed.json**](./test_video_speed.json) - 動画速度調整テスト / Video speed adjustment test
78
78
 
79
79
  ### 🔧 特殊条件テスト / Special Condition Tests
80
80
 
@@ -82,13 +82,13 @@ Special features and complex scenario tests
82
82
 
83
83
  Edge cases and special condition tests
84
84
 
85
- - **test_no_audio.json** - 音声なし動画テスト / Video without audio test
86
- - **test_no_audio_with_credit.json** - クレジット付き音声なしテスト / No audio with credits test
87
- - **test_hello_nobgm.json** - BGMなしテスト / Test without BGM
88
- - **test_size_error.json** - サイズエラーテスト / Size error test
89
- - **test_media.json** - メディアファイル処理テスト / Media file processing test
90
- - **test_order.json** - 順序処理テスト / Order processing test
91
- - **test_order_portrait.json** - 縦向き順序テスト / Portrait order test
85
+ - [**test_no_audio.json**](./test_no_audio.json) - 音声なし動画テスト / Video without audio test
86
+ - [**test_no_audio_with_credit.json**](./test_no_audio_with_credit.json) - クレジット付き音声なしテスト / No audio with credits test
87
+ - [**test_hello_nobgm.json**](./test_hello_nobgm.json) - BGMなしテスト / Test without BGM
88
+ - [**test_size_error.json**](./test_size_error.json) - サイズエラーテスト / Size error test
89
+ - [**test_media.json**](./test_media.json) - メディアファイル処理テスト / Media file processing test
90
+ - [**test_order.json**](./test_order.json) - 順序処理テスト / Order processing test
91
+ - [**test_order_portrait.json**](./test_order_portrait.json) - 縦向き順序テスト / Portrait order test
92
92
 
93
93
  ### 🌍 多言語テスト / Multi-language Tests
94
94
 
@@ -96,8 +96,8 @@ Edge cases and special condition tests
96
96
 
97
97
  Language setting tests
98
98
 
99
- - **test_lang.json** - 多言語サポートテスト / Multi-language support test
100
- - **test_en.json** - 英語専用テスト / English-only test
99
+ - [**test_lang.json**](./test_lang.json) - 多言語サポートテスト / Multi-language support test
100
+ - [**test_en.json**](./test_en.json) - 英語専用テスト / English-only test
101
101
 
102
102
  ### 🎯 プロバイダー別テスト / Provider-Specific Tests
103
103
 
@@ -105,10 +105,10 @@ Language setting tests
105
105
 
106
106
  Provider-specific feature tests
107
107
 
108
- - **test_hello_google.json** - Google TTS専用テスト / Google TTS specific test
109
- - **gpt.json** - GPTモデルテスト / GPT model test
110
- - **mulmo_story.json** - ストーリー形式テスト / Story format test
111
- - **nano_banana.json** - カスタムサンプル / Custom sample
108
+ - [**test_hello_google.json**](./test_hello_google.json) - Google TTS専用テスト / Google TTS specific test
109
+ - [**gpt.json**](./gpt.json) - GPTモデルテスト / GPT model test
110
+ - [**mulmo_story.json**](./mulmo_story.json) - ストーリー形式テスト / Story format test
111
+ - [**nano_banana.json**](./nano_banana.json) - カスタムサンプル / Custom sample
112
112
 
113
113
  ## 🚀 使い方 / Usage
114
114
 
@@ -0,0 +1,57 @@
1
+ {
2
+ "$mulmocast": {
3
+ "version": "1.1"
4
+ },
5
+ "lang": "en",
6
+ "title": "Media Test",
7
+ "speechParams": {
8
+ "speakers": {
9
+ "Presenter": {
10
+ "provider": "kotodama",
11
+ "voiceId": "Poporo"
12
+ },
13
+ "Shion": {
14
+ "provider": "kotodama",
15
+ "speechOptions": {
16
+ "decoration": "laughing"
17
+ },
18
+ "voiceId": "Shion"
19
+ }
20
+ }
21
+ },
22
+ "beats": [
23
+ {
24
+ "speaker": "Presenter",
25
+ "text": "こんにちは",
26
+ "image": {
27
+ "type": "textSlide",
28
+ "slide": {
29
+ "title": "Hello, kotodama"
30
+ }
31
+ }
32
+ },
33
+ {
34
+ "speaker": "Shion",
35
+ "text": "こんにちは",
36
+ "image": {
37
+ "type": "textSlide",
38
+ "slide": {
39
+ "title": "Hello, kotodama"
40
+ }
41
+ }
42
+ },
43
+ {
44
+ "speaker": "Presenter",
45
+ "text": "ハッピーだね",
46
+ "speechOptions": {
47
+ "decoration": "happy"
48
+ },
49
+ "image": {
50
+ "type": "textSlide",
51
+ "slide": {
52
+ "title": "Hello, kotodama"
53
+ }
54
+ }
55
+ }
56
+ ]
57
+ }
@@ -1,45 +0,0 @@
1
- {
2
- "$mulmocast": {
3
- "version": "1.1"
4
- },
5
- "lang": "ja",
6
- "title": "All Image Providers Test",
7
- "imageParams": {
8
- "canvasSize": {
9
- "width": 1024,
10
- "height": 1024
11
- }
12
- },
13
- "beats": [
14
- {
15
- "speaker": "Presenter",
16
- "text": "こんにちは、テストです。",
17
- "image": {
18
- "type": "generated",
19
- "prompt": "美しい日本庭園",
20
- "provider": "openai",
21
- "model": "gpt-image-1"
22
- }
23
- },
24
- {
25
- "speaker": "Presenter",
26
- "text": "こんにちは、テストです。",
27
- "image": {
28
- "type": "generated",
29
- "prompt": "美しい日本庭園",
30
- "provider": "google",
31
- "model": "gemini-2.5-flash-image"
32
- }
33
- },
34
- {
35
- "speaker": "Presenter",
36
- "text": "こんにちは、テストです。",
37
- "image": {
38
- "type": "generated",
39
- "prompt": "美しい日本庭園",
40
- "provider": "replicate",
41
- "model": "bytedance/seedream-4"
42
- }
43
- }
44
- ]
45
- }
@@ -1,37 +0,0 @@
1
- {
2
- "$mulmocast": {
3
- "version": "1.1"
4
- },
5
- "lang": "ja",
6
- "title": "All Movie Providers Test",
7
- "imageParams": {
8
- "canvasSize": {
9
- "width": 1280,
10
- "height": 720
11
- }
12
- },
13
- "beats": [
14
- {
15
- "speaker": "Presenter",
16
- "text": "こんにちは、テストです。",
17
- "image": {
18
- "type": "movie",
19
- "prompt": "美しい日本庭園を散歩するカメラワーク",
20
- "provider": "google",
21
- "model": "veo-2.0-generate-001",
22
- "duration": 5
23
- }
24
- },
25
- {
26
- "speaker": "Presenter",
27
- "text": "こんにちは、テストです。",
28
- "image": {
29
- "type": "movie",
30
- "prompt": "美しい日本庭園を散歩するカメラワーク",
31
- "provider": "replicate",
32
- "model": "bytedance/seedance-1-lite",
33
- "duration": 5
34
- }
35
- }
36
- ]
37
- }
@@ -1,83 +0,0 @@
1
- {
2
- "$mulmocast": {
3
- "version": "1.1"
4
- },
5
- "lang": "ja",
6
- "title": "All TTS Providers Test",
7
- "speechParams": {
8
- "speakers": {
9
- "OpenAI": {
10
- "provider": "openai",
11
- "voiceId": "shimmer"
12
- },
13
- "Gemini": {
14
- "provider": "gemini",
15
- "voiceId": "Kore"
16
- },
17
- "Google": {
18
- "provider": "google",
19
- "voiceId": "ja-JP-Standard-A"
20
- },
21
- "ElevenLabs": {
22
- "provider": "elevenlabs",
23
- "voiceId": "3JDquces8E8bkmvbh6Bc"
24
- },
25
- "Nijivoice": {
26
- "provider": "nijivoice",
27
- "voiceId": "231e0170-0ece-4155-be44-231423062f41"
28
- }
29
- }
30
- },
31
- "beats": [
32
- {
33
- "speaker": "OpenAI",
34
- "text": "こんにちは、テストです。",
35
- "image": {
36
- "type": "textSlide",
37
- "slide": {
38
- "title": "OpenAI TTS"
39
- }
40
- }
41
- },
42
- {
43
- "speaker": "Gemini",
44
- "text": "こんにちは、テストです。",
45
- "image": {
46
- "type": "textSlide",
47
- "slide": {
48
- "title": "Gemini TTS"
49
- }
50
- }
51
- },
52
- {
53
- "speaker": "Google",
54
- "text": "こんにちは、テストです。",
55
- "image": {
56
- "type": "textSlide",
57
- "slide": {
58
- "title": "Google TTS"
59
- }
60
- }
61
- },
62
- {
63
- "speaker": "ElevenLabs",
64
- "text": "こんにちは、テストです。",
65
- "image": {
66
- "type": "textSlide",
67
- "slide": {
68
- "title": "ElevenLabs TTS"
69
- }
70
- }
71
- },
72
- {
73
- "speaker": "Nijivoice",
74
- "text": "こんにちは、テストです。",
75
- "image": {
76
- "type": "textSlide",
77
- "slide": {
78
- "title": "Nijivoice TTS"
79
- }
80
- }
81
- }
82
- ]
83
- }
@@ -1,67 +0,0 @@
1
- {
2
- "$mulmocast": {
3
- "version": "1.1"
4
- },
5
- "lang": "en",
6
- "title": "Audio Instructions Test",
7
- "speechParams": {
8
- "speakers": {
9
- "Presenter": {
10
- "provider": "google",
11
- "voiceId": "ja-JP-Standard-A"
12
- },
13
- "Presenter2": {
14
- "provider": "google",
15
- "voiceId": "ja-JP-Standard-B"
16
- }
17
- }
18
- },
19
- "beats": [
20
- {
21
- "speaker": "Presenter",
22
- "text": "Hello, I'm a presenter. I have no instructions.",
23
- "image": {
24
- "type": "textSlide",
25
- "slide": {
26
- "title": "Presenter"
27
- }
28
- }
29
- },
30
- {
31
- "speaker": "Presenter2",
32
- "text": "Hello, I'm a presenter 2. My instructions are 'Speak in a cheerful and positive tone'.",
33
- "image": {
34
- "type": "textSlide",
35
- "slide": {
36
- "title": "Presenter 2"
37
- }
38
- }
39
- },
40
- {
41
- "speaker": "Presenter",
42
- "text": "Hello, I'm a presenter. I have a British English instruction.",
43
- "speechOptions": {
44
- "instruction": "Speak in British English."
45
- },
46
- "image": {
47
- "type": "textSlide",
48
- "slide": {
49
- "title": "Presenter with British English instruction"
50
- }
51
- }
52
- },
53
- {
54
- "speaker": "Presenter",
55
- "text": "Hello, I'm a presenter. I have a whisper instruction.",
56
- "speechOptions": {
57
- "instruction": "Whisper softly, like a pillow talk."
58
- },
59
- "image": {
60
- "type": "textSlide",
61
- "slide": {
62
- "title": "Presenter with whisper instruction"
63
- }
64
- }
65
- }
66
- ]
67
- }
@@ -1,84 +0,0 @@
1
- {
2
- "$mulmocast": { "version": "1.1" },
3
- "imageParams": {
4
- "provider": "google",
5
- "style": "<style>Photo realistic.</style>"
6
- },
7
- "movieParams": {
8
- "provider": "google"
9
- },
10
- "lang": "en",
11
- "beats": [
12
- {
13
- "id": "gemini_3_pro_image_preview",
14
- "text": "image generated by gemini-3-pro-image-preview",
15
- "imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
16
- "imageParams": {
17
- "model": "gemini-3-pro-image-preview"
18
- }
19
- },
20
- {
21
- "id": "gemini_2_5_flash_image",
22
- "text": "image generated by gemini-2.5-flash-image",
23
- "imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
24
- "imageParams": {
25
- "model": "gemini-2.5-flash-image"
26
- }
27
- },
28
- {
29
- "id": "imagen_4",
30
- "text": "image generated by imagen-4",
31
- "imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses"
32
- },
33
- {
34
- "id": "imagen_4_ultra",
35
- "text": "image generated by imagen-4",
36
- "imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
37
- "imageParams": {
38
- "model": "imagen-4.0-ultra-generate-preview-06-06"
39
- }
40
- },
41
- {
42
- "id": "genai_veo2",
43
- "text": "movie generated by veo2",
44
- "duration": 5,
45
- "moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses"
46
- },
47
- {
48
- "id": "genai_veo2_image",
49
- "text": "movie generated by veo2 with image",
50
- "duration": 5,
51
- "imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
52
- "moviePrompt": "a woman takes a selfie with her phone"
53
- },
54
- {
55
- "id": "genai_veo3",
56
- "text": "movie generated by veo3",
57
- "moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
58
- "movieParams": {
59
- "model": "veo-3.0-generate-001"
60
- }
61
- },
62
- {
63
- "id": "genai_veo3_1",
64
- "text": "movie generated by veo3_1",
65
- "moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
66
- "movieParams": {
67
- "model": "veo-3.1-generate-preview"
68
- }
69
- },
70
- {
71
- "id": "genai_veo3_image",
72
- "text": "movie generated by veo3",
73
- "duration": 5,
74
- "imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
75
- "imageParams": {
76
- "model": "gemini-2.5-flash-image"
77
- },
78
- "moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
79
- "movieParams": {
80
- "model": "veo-3.0-generate-001"
81
- }
82
- }
83
- ]
84
- }
@@ -1,22 +0,0 @@
1
- {
2
- "$mulmocast": { "version": "1.1" },
3
- "imageParams": {
4
- "provider": "google",
5
- "style": "<style>Photo realistic.</style>"
6
- },
7
- "canvasSize": {
8
- "width": 720,
9
- "height": 1280
10
- },
11
- "lang": "en",
12
- "movieParams": {
13
- "provider": "google",
14
- "model": "veo-3.1-generate-preview"
15
- },
16
- "beats": [
17
- {
18
- "moviePrompt": "A butterfly flying in slow motion",
19
- "duration": 20
20
- }
21
- ]
22
- }
File without changes
@@ -1,24 +0,0 @@
1
- {
2
- "$mulmocast": {
3
- "version": "1.1"
4
- },
5
- "lang": "en",
6
- "movieParams": {
7
- "provider": "google"
8
- },
9
- "lipSyncParams": {
10
- "provider": "replicate",
11
- "model": "bytedance/latentsync"
12
- },
13
- "canvasSize": {
14
- "width": 1280,
15
- "height": 720
16
- },
17
- "beats": [
18
- {
19
- "text": "Hello, I'm macoro. Hello, I'm macoro. Hello, I'm macoro.",
20
- "moviePrompt": "macoro is speaking",
21
- "enableLipSync": true
22
- }
23
- ]
24
- }
@@ -1,40 +0,0 @@
1
- {
2
- "$mulmocast": {
3
- "version": "1.1"
4
- },
5
- "lang": "en",
6
- "movieParams": {
7
- "provider": "google"
8
- },
9
- "canvasSize": {
10
- "width": 720,
11
- "height": 1280
12
- },
13
- "imageParams": {
14
- "provider": "openai",
15
- "model": "dall-e-3",
16
- "style": "Photo realistic, cinematic style.",
17
- "images": {
18
- "optimus": {
19
- "type": "image",
20
- "source": {
21
- "kind": "url",
22
- "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"
23
- }
24
- }
25
- }
26
- },
27
- "beats": [
28
- {
29
- "text": "Image with both image and movie prompt",
30
- "imagePrompt": "巨大なロケット「Starship」が発射台から打ち上がろうとしている様子。空には星と火星が浮かんでいる。",
31
- "moviePrompt": "Startship lifts off.",
32
- "duration": 5
33
- },
34
- {
35
- "text": "Image with only movie prompt",
36
- "moviePrompt": "Startship lifts off.",
37
- "duration": 5
38
- }
39
- ]
40
- }
@@ -1,65 +0,0 @@
1
- {
2
- "$mulmocast": {
3
- "version": "1.1",
4
- "credit": "closing"
5
- },
6
- "canvasSize": {
7
- "width": 1536,
8
- "height": 1024
9
- },
10
- "speechParams": {
11
- "speakers": {
12
- "Presenter": {
13
- "displayName": {
14
- "en": "Presenter"
15
- },
16
- "voiceId": "shimmer"
17
- }
18
- }
19
- },
20
- "imageParams": {
21
- "provider": "openai",
22
- "style": "<style>Vibrant 3D animation style inspired by K-pop aesthetics, with glossy, stylized characters. The overall visual style combines elements of modern animation, game cinematics, and fashion-forward character design, with sleek outlines, glowing effects, and a polished, cinematic finish.</style>",
23
- "images": {
24
- "min": {
25
- "type": "image",
26
- "source": {
27
- "kind": "url",
28
- "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/min_anime.png"
29
- }
30
- }
31
- }
32
- },
33
- "movieParams": {
34
- "provider": "replicate"
35
- },
36
- "soundEffectParams": {
37
- "provider": "replicate"
38
- },
39
- "captionParams": {
40
- "lang": "en",
41
- "styles": ["font-size: 64px", "width: 90%", "padding-left: 5%", "padding-right: 5%"]
42
- },
43
- "audioParams": {
44
- "padding": 0,
45
- "introPadding": 0,
46
- "closingPadding": 0,
47
- "outroPadding": 0,
48
- "bgm": {
49
- "kind": "url",
50
- "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/music/finetuning_with_you.mp3"
51
- },
52
- "bgmVolume": 1,
53
- "audioVolume": 0,
54
- "suppressSpeech": true
55
- },
56
- "title": "Music Video",
57
- "lang": "en",
58
- "beats": [
59
- {
60
- "text": "Finetuning with you",
61
- "moviePrompt": "The singer preparing to sing a song.",
62
- "playGenMovieToEnd": true
63
- }
64
- ]
65
- }