mulmocast 2.0.3 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.js +2 -1
- package/lib/agents/image_genai_agent.js +20 -4
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/tts_gemini_agent.d.ts +5 -0
- package/lib/agents/tts_gemini_agent.js +64 -0
- package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
- package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
- package/lib/utils/ffmpeg_utils.d.ts +1 -0
- package/lib/utils/ffmpeg_utils.js +23 -0
- package/lib/utils/provider2agent.d.ts +6 -0
- package/lib/utils/provider2agent.js +6 -0
- package/lib/utils/utils.js +3 -0
- package/package.json +3 -3
- package/scripts/test/test_audio_gemini.json +67 -0
- package/scripts/test/test_audio_gemini.json~ +67 -0
- package/scripts/test/test_genai2.json +25 -0
- package/scripts/test/test_genai2.json~ +84 -0
package/lib/actions/audio.js
CHANGED
|
@@ -2,7 +2,7 @@ import dotenv from "dotenv";
|
|
|
2
2
|
import { GraphAI, TaskManager, GraphAILogger } from "graphai";
|
|
3
3
|
import * as agents from "@graphai/vanilla";
|
|
4
4
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
5
|
-
import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsElevenlabsAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent } from "../agents/index.js";
|
|
5
|
+
import { ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, ttsElevenlabsAgent, addBGMAgent, combineAudioFilesAgent, mediaMockAgent, } from "../agents/index.js";
|
|
6
6
|
import { text2SpeechProviderSchema } from "../types/index.js";
|
|
7
7
|
import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
|
|
8
8
|
import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
|
|
@@ -221,6 +221,7 @@ const audioAgents = {
|
|
|
221
221
|
ttsOpenaiAgent,
|
|
222
222
|
ttsNijivoiceAgent,
|
|
223
223
|
ttsGoogleAgent,
|
|
224
|
+
ttsGeminiAgent,
|
|
224
225
|
ttsElevenlabsAgent,
|
|
225
226
|
mediaMockAgent,
|
|
226
227
|
addBGMAgent,
|
|
@@ -22,11 +22,13 @@ export const ratio2BlankPath = (aspectRatio) => {
|
|
|
22
22
|
}
|
|
23
23
|
return blankImagePath();
|
|
24
24
|
};
|
|
25
|
-
const getGeminiContents = (prompt,
|
|
25
|
+
const getGeminiContents = (prompt, referenceImages, aspectRatio) => {
|
|
26
26
|
const contents = [{ text: prompt }];
|
|
27
27
|
const images = [...(referenceImages ?? [])];
|
|
28
28
|
// NOTE: There is no way to explicitly specify the aspect ratio for Gemini. This is just a hint.
|
|
29
|
-
|
|
29
|
+
if (aspectRatio) {
|
|
30
|
+
images.push(ratio2BlankPath(aspectRatio));
|
|
31
|
+
}
|
|
30
32
|
images.forEach((imagePath) => {
|
|
31
33
|
const imageData = fs.readFileSync(imagePath);
|
|
32
34
|
const base64Image = imageData.toString("base64");
|
|
@@ -71,11 +73,25 @@ export const imageGenAIAgent = async ({ namedInputs, params, config, }) => {
|
|
|
71
73
|
}
|
|
72
74
|
try {
|
|
73
75
|
const ai = new GoogleGenAI({ apiKey });
|
|
74
|
-
if (model === "gemini-2.5-flash-image"
|
|
75
|
-
const contents = getGeminiContents(prompt,
|
|
76
|
+
if (model === "gemini-2.5-flash-image") {
|
|
77
|
+
const contents = getGeminiContents(prompt, referenceImages, aspectRatio);
|
|
76
78
|
const response = await ai.models.generateContent({ model, contents });
|
|
77
79
|
return geminiFlashResult(response);
|
|
78
80
|
}
|
|
81
|
+
else if (model === "gemini-3-pro-image-preview") {
|
|
82
|
+
const contents = getGeminiContents(prompt, referenceImages);
|
|
83
|
+
const response = await ai.models.generateContent({
|
|
84
|
+
model,
|
|
85
|
+
contents,
|
|
86
|
+
config: {
|
|
87
|
+
imageConfig: {
|
|
88
|
+
// '1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', or '21:9'.
|
|
89
|
+
aspectRatio,
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
return geminiFlashResult(response);
|
|
94
|
+
}
|
|
79
95
|
else {
|
|
80
96
|
const response = await ai.models.generateImages({
|
|
81
97
|
model,
|
package/lib/agents/index.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
|
11
11
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
12
12
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
13
13
|
import ttsGoogleAgent from "./tts_google_agent.js";
|
|
14
|
+
import ttsGeminiAgent from "./tts_gemini_agent.js";
|
|
14
15
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
15
16
|
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
16
17
|
import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
|
|
@@ -19,4 +20,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
|
|
|
19
20
|
import { textInputAgent } from "@graphai/input_agents";
|
|
20
21
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
21
22
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
22
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
|
|
23
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
|
package/lib/agents/index.js
CHANGED
|
@@ -11,6 +11,7 @@ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
|
|
|
11
11
|
import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
|
|
12
12
|
import ttsOpenaiAgent from "./tts_openai_agent.js";
|
|
13
13
|
import ttsGoogleAgent from "./tts_google_agent.js";
|
|
14
|
+
import ttsGeminiAgent from "./tts_gemini_agent.js";
|
|
14
15
|
import validateSchemaAgent from "./validate_schema_agent.js";
|
|
15
16
|
import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
|
|
16
17
|
import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
|
|
@@ -20,4 +21,4 @@ import { textInputAgent } from "@graphai/input_agents";
|
|
|
20
21
|
import { openAIAgent } from "@graphai/openai_agent";
|
|
21
22
|
// import * as vanilla from "@graphai/vanilla";
|
|
22
23
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
23
|
-
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
|
|
24
|
+
export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, imageReplicateAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, ttsGeminiAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { AgentFunction, AgentFunctionInfo } from "graphai";
|
|
2
|
+
import type { GoogleTTSAgentParams, AgentBufferResult, AgentTextInputs, AgentErrorResult } from "../types/agent.js";
|
|
3
|
+
export declare const ttsGeminiAgent: AgentFunction<GoogleTTSAgentParams, AgentBufferResult | AgentErrorResult, AgentTextInputs>;
|
|
4
|
+
declare const ttsGeminiAgentInfo: AgentFunctionInfo;
|
|
5
|
+
export default ttsGeminiAgentInfo;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { GraphAILogger } from "graphai";
|
|
2
|
+
import { GoogleGenAI } from "@google/genai";
|
|
3
|
+
import { provider2TTSAgent } from "../utils/provider2agent.js";
|
|
4
|
+
import { apiKeyMissingError, agentGenerationError, audioAction, audioFileTarget } from "../utils/error_cause.js";
|
|
5
|
+
import { pcmToMp3 } from "../utils/ffmpeg_utils.js";
|
|
6
|
+
export const ttsGeminiAgent = async ({ namedInputs, params, config, }) => {
|
|
7
|
+
const { text } = namedInputs;
|
|
8
|
+
const { voice, suppressError } = params;
|
|
9
|
+
const apiKey = config?.apiKey;
|
|
10
|
+
if (!apiKey) {
|
|
11
|
+
throw new Error("Google GenAI API key is required (GEMINI_API_KEY)", {
|
|
12
|
+
cause: apiKeyMissingError("ttsGeminiAgent", audioAction, "GEMINI_API_KEY"),
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
try {
|
|
16
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
17
|
+
const response = await ai.models.generateContent({
|
|
18
|
+
model: "gemini-2.5-flash-preview-tts",
|
|
19
|
+
contents: [{ parts: [{ text }] }],
|
|
20
|
+
config: {
|
|
21
|
+
responseModalities: ["AUDIO"],
|
|
22
|
+
speechConfig: {
|
|
23
|
+
voiceConfig: {
|
|
24
|
+
prebuiltVoiceConfig: { voiceName: voice ?? provider2TTSAgent.gemini.defaultVoice },
|
|
25
|
+
},
|
|
26
|
+
},
|
|
27
|
+
},
|
|
28
|
+
});
|
|
29
|
+
const inlineData = response.candidates?.[0]?.content?.parts?.[0]?.inlineData;
|
|
30
|
+
const pcmBase64 = inlineData?.data;
|
|
31
|
+
const mimeType = inlineData?.mimeType;
|
|
32
|
+
if (!pcmBase64)
|
|
33
|
+
throw new Error("No audio data returned");
|
|
34
|
+
// Extract sample rate from mimeType (e.g., "audio/L16;codec=pcm;rate=24000")
|
|
35
|
+
const rateMatch = mimeType?.match(/rate=(\d+)/);
|
|
36
|
+
const sampleRate = rateMatch ? parseInt(rateMatch[1]) : 24000;
|
|
37
|
+
const rawPcm = Buffer.from(pcmBase64, "base64");
|
|
38
|
+
return { buffer: await pcmToMp3(rawPcm, sampleRate) };
|
|
39
|
+
}
|
|
40
|
+
catch (e) {
|
|
41
|
+
if (suppressError) {
|
|
42
|
+
return {
|
|
43
|
+
error: e,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
GraphAILogger.info(e);
|
|
47
|
+
throw new Error("TTS Gemini Error", {
|
|
48
|
+
cause: agentGenerationError("ttsGeminiAgent", audioAction, audioFileTarget),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
const ttsGeminiAgentInfo = {
|
|
53
|
+
name: "ttsGeminiAgent",
|
|
54
|
+
agent: ttsGeminiAgent,
|
|
55
|
+
mock: ttsGeminiAgent,
|
|
56
|
+
samples: [],
|
|
57
|
+
description: "Google Gemini TTS agent",
|
|
58
|
+
category: ["tts"],
|
|
59
|
+
author: "Receptron Team",
|
|
60
|
+
repository: "https://github.com/receptron/mulmocast-cli/",
|
|
61
|
+
license: "MIT",
|
|
62
|
+
environmentVariables: ["GEMINI_API_KEY"],
|
|
63
|
+
};
|
|
64
|
+
export default ttsGeminiAgentInfo;
|
|
@@ -16,7 +16,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
16
16
|
} & {
|
|
17
17
|
s: string;
|
|
18
18
|
} & {
|
|
19
|
-
llm: "mock" | "openai" | "
|
|
19
|
+
llm: "mock" | "openai" | "gemini" | "anthropic" | "groq" | undefined;
|
|
20
20
|
} & {
|
|
21
21
|
llm_model: string | undefined;
|
|
22
22
|
}>;
|
|
@@ -10,7 +10,7 @@ export declare const builder: (yargs: Argv) => Argv<{
|
|
|
10
10
|
} & {
|
|
11
11
|
beats_per_scene: number;
|
|
12
12
|
} & {
|
|
13
|
-
llm: "mock" | "openai" | "
|
|
13
|
+
llm: "mock" | "openai" | "gemini" | "anthropic" | "groq" | undefined;
|
|
14
14
|
} & {
|
|
15
15
|
llm_model: string | undefined;
|
|
16
16
|
} & {
|
|
@@ -18,3 +18,4 @@ export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<{
|
|
|
18
18
|
export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<object>;
|
|
19
19
|
export declare const trimMusic: (inputFile: string, startTime: number, duration: number) => Promise<Buffer>;
|
|
20
20
|
export declare const createSilentAudio: (filePath: string, durationSec: number) => Promise<void>;
|
|
21
|
+
export declare const pcmToMp3: (rawPcm: Buffer, sampleRate?: number) => Promise<Buffer>;
|
|
@@ -2,6 +2,7 @@ import ffmpeg from "fluent-ffmpeg";
|
|
|
2
2
|
import { GraphAILogger } from "graphai";
|
|
3
3
|
import { isFile } from "./file.js";
|
|
4
4
|
import fs from "fs";
|
|
5
|
+
import { Readable, PassThrough } from "node:stream";
|
|
5
6
|
export const setFfmpegPath = (ffmpegPath) => {
|
|
6
7
|
ffmpeg.setFfmpegPath(ffmpegPath);
|
|
7
8
|
};
|
|
@@ -144,3 +145,25 @@ export const createSilentAudio = (filePath, durationSec) => {
|
|
|
144
145
|
.run();
|
|
145
146
|
});
|
|
146
147
|
};
|
|
148
|
+
export const pcmToMp3 = (rawPcm, sampleRate = 24000) => {
|
|
149
|
+
return new Promise((resolve, reject) => {
|
|
150
|
+
const inputStream = new Readable({
|
|
151
|
+
read() {
|
|
152
|
+
this.push(rawPcm);
|
|
153
|
+
this.push(null);
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
const outputChunks = [];
|
|
157
|
+
const outputStream = new PassThrough();
|
|
158
|
+
outputStream.on("data", (chunk) => outputChunks.push(chunk));
|
|
159
|
+
outputStream.on("end", () => resolve(Buffer.concat(outputChunks)));
|
|
160
|
+
outputStream.on("error", reject);
|
|
161
|
+
ffmpeg(inputStream)
|
|
162
|
+
.inputFormat("s16le")
|
|
163
|
+
.inputOptions([`-ar ${sampleRate}`, "-ac 1"])
|
|
164
|
+
.audioCodec("libmp3lame")
|
|
165
|
+
.format("mp3")
|
|
166
|
+
.on("error", reject)
|
|
167
|
+
.pipe(outputStream);
|
|
168
|
+
});
|
|
169
|
+
};
|
|
@@ -16,6 +16,12 @@ export declare const provider2TTSAgent: {
|
|
|
16
16
|
hasLimitedConcurrency: boolean;
|
|
17
17
|
keyName: string;
|
|
18
18
|
};
|
|
19
|
+
gemini: {
|
|
20
|
+
agentName: string;
|
|
21
|
+
hasLimitedConcurrency: boolean;
|
|
22
|
+
defaultVoice: string;
|
|
23
|
+
keyName: string;
|
|
24
|
+
};
|
|
19
25
|
elevenlabs: {
|
|
20
26
|
agentName: string;
|
|
21
27
|
hasLimitedConcurrency: boolean;
|
|
@@ -17,6 +17,12 @@ export const provider2TTSAgent = {
|
|
|
17
17
|
hasLimitedConcurrency: false,
|
|
18
18
|
keyName: "GEMINI_API_KEY",
|
|
19
19
|
},
|
|
20
|
+
gemini: {
|
|
21
|
+
agentName: "ttsGeminiAgent",
|
|
22
|
+
hasLimitedConcurrency: false,
|
|
23
|
+
defaultVoice: "Kore",
|
|
24
|
+
keyName: "GEMINI_API_KEY",
|
|
25
|
+
},
|
|
20
26
|
elevenlabs: {
|
|
21
27
|
agentName: "ttsElevenlabsAgent",
|
|
22
28
|
hasLimitedConcurrency: true,
|
package/lib/utils/utils.js
CHANGED
|
@@ -73,6 +73,9 @@ export const settings2GraphAIConfig = (settings, env) => {
|
|
|
73
73
|
ttsNijivoiceAgent: {
|
|
74
74
|
apiKey: getKey("TTS", "NIJIVOICE_API_KEY"),
|
|
75
75
|
},
|
|
76
|
+
ttsGeminiAgent: {
|
|
77
|
+
apiKey: getKey("TTS", "GEMINI_API_KEY"),
|
|
78
|
+
},
|
|
76
79
|
ttsElevenlabsAgent: {
|
|
77
80
|
apiKey: getKey("TTS", "ELEVENLABS_API_KEY"),
|
|
78
81
|
},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mulmocast",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.5",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "lib/index.node.js",
|
|
@@ -102,7 +102,7 @@
|
|
|
102
102
|
"replicate": "^1.4.0",
|
|
103
103
|
"yaml": "^2.8.1",
|
|
104
104
|
"yargs": "^18.0.0",
|
|
105
|
-
"zod": "^4.1.
|
|
105
|
+
"zod": "^4.1.13"
|
|
106
106
|
},
|
|
107
107
|
"devDependencies": {
|
|
108
108
|
"@receptron/test_utils": "^2.0.3",
|
|
@@ -117,7 +117,7 @@
|
|
|
117
117
|
"prettier": "^3.6.2",
|
|
118
118
|
"tsx": "^4.20.6",
|
|
119
119
|
"typescript": "^5.9.3",
|
|
120
|
-
"typescript-eslint": "^8.
|
|
120
|
+
"typescript-eslint": "^8.48.0"
|
|
121
121
|
},
|
|
122
122
|
"engines": {
|
|
123
123
|
"node": ">=20.0.0"
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1"
|
|
4
|
+
},
|
|
5
|
+
"lang": "en",
|
|
6
|
+
"title": "Audio Instructions Test",
|
|
7
|
+
"speechParams": {
|
|
8
|
+
"speakers": {
|
|
9
|
+
"Presenter": {
|
|
10
|
+
"provider": "gemini",
|
|
11
|
+
"voiceId": "Kore"
|
|
12
|
+
},
|
|
13
|
+
"Presenter2": {
|
|
14
|
+
"provider": "gemini",
|
|
15
|
+
"voiceId": "Puck"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"beats": [
|
|
20
|
+
{
|
|
21
|
+
"speaker": "Presenter",
|
|
22
|
+
"text": "Hello, I'm a presenter. I have no instructions.",
|
|
23
|
+
"image": {
|
|
24
|
+
"type": "textSlide",
|
|
25
|
+
"slide": {
|
|
26
|
+
"title": "Presenter"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"speaker": "Presenter2",
|
|
32
|
+
"text": "Hello, I'm a presenter 2. My instructions are 'Speak in a cheerful and positive tone'.",
|
|
33
|
+
"image": {
|
|
34
|
+
"type": "textSlide",
|
|
35
|
+
"slide": {
|
|
36
|
+
"title": "Presenter 2"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"speaker": "Presenter",
|
|
42
|
+
"text": "Hello, I'm a presenter. I have a British English instruction.",
|
|
43
|
+
"speechOptions": {
|
|
44
|
+
"instruction": "Speak in British English."
|
|
45
|
+
},
|
|
46
|
+
"image": {
|
|
47
|
+
"type": "textSlide",
|
|
48
|
+
"slide": {
|
|
49
|
+
"title": "Presenter with British English instruction"
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"speaker": "Presenter",
|
|
55
|
+
"text": "Hello, I'm a presenter. I have a whisper instruction.",
|
|
56
|
+
"speechOptions": {
|
|
57
|
+
"instruction": "Whisper softly, like a pillow talk."
|
|
58
|
+
},
|
|
59
|
+
"image": {
|
|
60
|
+
"type": "textSlide",
|
|
61
|
+
"slide": {
|
|
62
|
+
"title": "Presenter with whisper instruction"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
]
|
|
67
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1"
|
|
4
|
+
},
|
|
5
|
+
"lang": "en",
|
|
6
|
+
"title": "Audio Instructions Test",
|
|
7
|
+
"speechParams": {
|
|
8
|
+
"speakers": {
|
|
9
|
+
"Presenter": {
|
|
10
|
+
"provider": "google",
|
|
11
|
+
"voiceId": "ja-JP-Standard-A"
|
|
12
|
+
},
|
|
13
|
+
"Presenter2": {
|
|
14
|
+
"provider": "google",
|
|
15
|
+
"voiceId": "ja-JP-Standard-B"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"beats": [
|
|
20
|
+
{
|
|
21
|
+
"speaker": "Presenter",
|
|
22
|
+
"text": "Hello, I'm a presenter. I have no instructions.",
|
|
23
|
+
"image": {
|
|
24
|
+
"type": "textSlide",
|
|
25
|
+
"slide": {
|
|
26
|
+
"title": "Presenter"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"speaker": "Presenter2",
|
|
32
|
+
"text": "Hello, I'm a presenter 2. My instructions are 'Speak in a cheerful and positive tone'.",
|
|
33
|
+
"image": {
|
|
34
|
+
"type": "textSlide",
|
|
35
|
+
"slide": {
|
|
36
|
+
"title": "Presenter 2"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"speaker": "Presenter",
|
|
42
|
+
"text": "Hello, I'm a presenter. I have a British English instruction.",
|
|
43
|
+
"speechOptions": {
|
|
44
|
+
"instruction": "Speak in British English."
|
|
45
|
+
},
|
|
46
|
+
"image": {
|
|
47
|
+
"type": "textSlide",
|
|
48
|
+
"slide": {
|
|
49
|
+
"title": "Presenter with British English instruction"
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"speaker": "Presenter",
|
|
55
|
+
"text": "Hello, I'm a presenter. I have a whisper instruction.",
|
|
56
|
+
"speechOptions": {
|
|
57
|
+
"instruction": "Whisper softly, like a pillow talk."
|
|
58
|
+
},
|
|
59
|
+
"image": {
|
|
60
|
+
"type": "textSlide",
|
|
61
|
+
"slide": {
|
|
62
|
+
"title": "Presenter with whisper instruction"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
]
|
|
67
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": { "version": "1.1" },
|
|
3
|
+
"imageParams": {
|
|
4
|
+
"provider": "google",
|
|
5
|
+
"style": "<style>Photo realistic.</style>"
|
|
6
|
+
},
|
|
7
|
+
"canvasSize": {
|
|
8
|
+
"width": 720,
|
|
9
|
+
"height": 1280
|
|
10
|
+
},
|
|
11
|
+
"movieParams": {
|
|
12
|
+
"provider": "google"
|
|
13
|
+
},
|
|
14
|
+
"lang": "en",
|
|
15
|
+
"beats": [
|
|
16
|
+
{
|
|
17
|
+
"id": "gemini_3_pro_image_preview",
|
|
18
|
+
"text": "image generated by gemini-3-pro-image-preview",
|
|
19
|
+
"imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
20
|
+
"imageParams": {
|
|
21
|
+
"model": "gemini-3-pro-image-preview"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
]
|
|
25
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": { "version": "1.1" },
|
|
3
|
+
"imageParams": {
|
|
4
|
+
"provider": "google",
|
|
5
|
+
"style": "<style>Photo realistic.</style>"
|
|
6
|
+
},
|
|
7
|
+
"movieParams": {
|
|
8
|
+
"provider": "google"
|
|
9
|
+
},
|
|
10
|
+
"lang": "en",
|
|
11
|
+
"beats": [
|
|
12
|
+
{
|
|
13
|
+
"id": "gemini_3_pro_image_preview",
|
|
14
|
+
"text": "image generated by gemini-3-pro-image-preview",
|
|
15
|
+
"imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
16
|
+
"imageParams": {
|
|
17
|
+
"model": "gemini-3-pro-image-preview"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "gemini_2_5_flash_image",
|
|
22
|
+
"text": "image generated by gemini-2.5-flash-image",
|
|
23
|
+
"imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
24
|
+
"imageParams": {
|
|
25
|
+
"model": "gemini-2.5-flash-image"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"id": "imagen_4",
|
|
30
|
+
"text": "image generated by imagen-4",
|
|
31
|
+
"imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"id": "imagen_4_ultra",
|
|
35
|
+
"text": "image generated by imagen-4",
|
|
36
|
+
"imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
37
|
+
"imageParams": {
|
|
38
|
+
"model": "imagen-4.0-ultra-generate-preview-06-06"
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"id": "genai_veo2",
|
|
43
|
+
"text": "movie generated by veo2",
|
|
44
|
+
"duration": 5,
|
|
45
|
+
"moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses"
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"id": "genai_veo2_image",
|
|
49
|
+
"text": "movie generated by veo2 with image",
|
|
50
|
+
"duration": 5,
|
|
51
|
+
"imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
52
|
+
"moviePrompt": "a woman takes a selfie with her phone"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"id": "genai_veo3",
|
|
56
|
+
"text": "movie generated by veo3",
|
|
57
|
+
"moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
58
|
+
"movieParams": {
|
|
59
|
+
"model": "veo-3.0-generate-001"
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"id": "genai_veo3_1",
|
|
64
|
+
"text": "movie generated by veo3_1",
|
|
65
|
+
"moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
66
|
+
"movieParams": {
|
|
67
|
+
"model": "veo-3.1-generate-preview"
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"id": "genai_veo3_image",
|
|
72
|
+
"text": "movie generated by veo3",
|
|
73
|
+
"duration": 5,
|
|
74
|
+
"imagePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
75
|
+
"imageParams": {
|
|
76
|
+
"model": "gemini-2.5-flash-image"
|
|
77
|
+
},
|
|
78
|
+
"moviePrompt": "a woman is walking through a busy Tokyo street at night, she is wearing dark sunglasses",
|
|
79
|
+
"movieParams": {
|
|
80
|
+
"model": "veo-3.0-generate-001"
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
]
|
|
84
|
+
}
|