mulmocast 2.1.20 → 2.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/lib/actions/captions.js +101 -31
- package/lib/actions/movie.d.ts +4 -0
- package/lib/actions/movie.js +24 -24
- package/lib/types/schema.d.ts +113 -0
- package/lib/types/schema.js +19 -1
- package/lib/utils/context.d.ts +47 -0
- package/lib/utils/file.d.ts +1 -1
- package/lib/utils/file.js +4 -1
- package/package.json +1 -1
- package/scripts/test/test.json +3 -3
- package/scripts/test/test2.json +6 -4
- package/scripts/test/test_all_tts.json +7 -7
- package/scripts/test/test_captions.json +6 -1
- package/scripts/test/test_lang.json +2 -2
- package/scripts/test/test_mixed_providers.json +3 -3
package/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# MulmoCast: A Multi-Modal Presentation Tool for the AI-Native Era
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/mulmocast)
|
|
4
|
+
|
|
3
5
|
## Quick Start Guide
|
|
4
6
|
|
|
5
7
|
If you want to try our beta version, follow the instruction in the release note below.
|
package/lib/actions/captions.js
CHANGED
|
@@ -7,6 +7,105 @@ import { renderHTMLToImage, interpolate } from "../utils/markdown.js";
|
|
|
7
7
|
import { MulmoStudioContextMethods, MulmoPresentationStyleMethods } from "../methods/index.js";
|
|
8
8
|
import { fileWriteAgent } from "@graphai/vanilla_node_agents";
|
|
9
9
|
const vanillaAgents = agents.default ?? agents;
|
|
10
|
+
const defaultDelimiters = ["。", "?", "!", ".", "?", "!"];
|
|
11
|
+
// Split text by delimiters while keeping delimiters attached to the preceding text
|
|
12
|
+
const splitTextByDelimiters = (text, delimiters) => {
|
|
13
|
+
if (!text || delimiters.length === 0) {
|
|
14
|
+
return [text];
|
|
15
|
+
}
|
|
16
|
+
const { segments, current } = [...text].reduce((acc, char) => {
|
|
17
|
+
const newCurrent = acc.current + char;
|
|
18
|
+
if (delimiters.includes(char)) {
|
|
19
|
+
const trimmed = newCurrent.trim();
|
|
20
|
+
return {
|
|
21
|
+
segments: trimmed ? [...acc.segments, trimmed] : acc.segments,
|
|
22
|
+
current: "",
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
return { ...acc, current: newCurrent };
|
|
26
|
+
}, { segments: [], current: "" });
|
|
27
|
+
const finalSegments = current.trim() ? [...segments, current.trim()] : segments;
|
|
28
|
+
return finalSegments.length > 0 ? finalSegments : [text];
|
|
29
|
+
};
|
|
30
|
+
// Get split texts based on settings
|
|
31
|
+
const getSplitTexts = (text, texts, textSplit) => {
|
|
32
|
+
// Manual split takes precedence
|
|
33
|
+
if (texts && texts.length > 0) {
|
|
34
|
+
return texts;
|
|
35
|
+
}
|
|
36
|
+
// No splitting or undefined
|
|
37
|
+
if (!textSplit || textSplit.type === "none") {
|
|
38
|
+
return [text];
|
|
39
|
+
}
|
|
40
|
+
// Split by delimiters
|
|
41
|
+
if (textSplit.type === "delimiters") {
|
|
42
|
+
const delimiters = textSplit.delimiters ?? defaultDelimiters;
|
|
43
|
+
return splitTextByDelimiters(text, delimiters);
|
|
44
|
+
}
|
|
45
|
+
return [text];
|
|
46
|
+
};
|
|
47
|
+
// Calculate timing ratios based on text length
|
|
48
|
+
const calculateTimingRatios = (splitTexts) => {
|
|
49
|
+
const totalLength = splitTexts.reduce((sum, t) => sum + t.length, 0);
|
|
50
|
+
if (totalLength === 0) {
|
|
51
|
+
return splitTexts.map(() => 1 / splitTexts.length);
|
|
52
|
+
}
|
|
53
|
+
return splitTexts.map((t) => t.length / totalLength);
|
|
54
|
+
};
|
|
55
|
+
// Convert ratios to cumulative ratios: [0.3, 0.5, 0.2] -> [0, 0.3, 0.8, 1.0]
|
|
56
|
+
const calculateCumulativeRatios = (ratios) => {
|
|
57
|
+
return ratios.reduce((acc, ratio) => [...acc, acc[acc.length - 1] + ratio], [0]);
|
|
58
|
+
};
|
|
59
|
+
// Generate caption files for a single beat
|
|
60
|
+
const generateBeatCaptions = async (beat, context, index) => {
|
|
61
|
+
const captionParams = mulmoCaptionParamsSchema.parse({ ...context.studio.script.captionParams, ...beat.captionParams });
|
|
62
|
+
const canvasSize = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
|
|
63
|
+
const template = getHTMLFile("caption");
|
|
64
|
+
if (captionParams.lang && !context.multiLingual?.[index]?.multiLingualTexts?.[captionParams.lang]) {
|
|
65
|
+
GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${captionParams.lang}`);
|
|
66
|
+
}
|
|
67
|
+
const text = localizedText(beat, context.multiLingual?.[index], captionParams.lang, context.studio.script.lang);
|
|
68
|
+
// Get beat timing info
|
|
69
|
+
const studioBeat = context.studio.beats[index];
|
|
70
|
+
const beatStartAt = studioBeat.startAt ?? 0;
|
|
71
|
+
const beatDuration = studioBeat.duration ?? 0;
|
|
72
|
+
const introPadding = MulmoStudioContextMethods.getIntroPadding(context);
|
|
73
|
+
// Determine split texts based on captionSplit setting
|
|
74
|
+
const captionSplit = captionParams.captionSplit ?? "none";
|
|
75
|
+
const splitTexts = captionSplit === "estimate" ? getSplitTexts(text, beat.texts, captionParams.textSplit) : [text];
|
|
76
|
+
// Calculate timing
|
|
77
|
+
const cumulativeRatios = calculateCumulativeRatios(calculateTimingRatios(splitTexts));
|
|
78
|
+
// Generate caption images with absolute timing
|
|
79
|
+
const captionFiles = await Promise.all(splitTexts.map(async (segmentText, subIndex) => {
|
|
80
|
+
const imagePath = getCaptionImagePath(context, index, subIndex);
|
|
81
|
+
const htmlData = interpolate(template, {
|
|
82
|
+
caption: processLineBreaks(segmentText),
|
|
83
|
+
width: `${canvasSize.width}`,
|
|
84
|
+
height: `${canvasSize.height}`,
|
|
85
|
+
styles: captionParams.styles.join(";\n"),
|
|
86
|
+
});
|
|
87
|
+
await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
|
|
88
|
+
return {
|
|
89
|
+
file: imagePath,
|
|
90
|
+
startAt: beatStartAt + introPadding + beatDuration * cumulativeRatios[subIndex],
|
|
91
|
+
endAt: beatStartAt + introPadding + beatDuration * cumulativeRatios[subIndex + 1],
|
|
92
|
+
};
|
|
93
|
+
}));
|
|
94
|
+
return captionFiles;
|
|
95
|
+
};
|
|
96
|
+
// GraphAI agent for caption generation
|
|
97
|
+
const captionGenerationAgent = async (namedInputs) => {
|
|
98
|
+
const { beat, context, index } = namedInputs;
|
|
99
|
+
try {
|
|
100
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, beat.id, true);
|
|
101
|
+
const captionFiles = await generateBeatCaptions(beat, context, index);
|
|
102
|
+
context.studio.beats[index].captionFiles = captionFiles;
|
|
103
|
+
return captionFiles;
|
|
104
|
+
}
|
|
105
|
+
finally {
|
|
106
|
+
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, beat.id, false);
|
|
107
|
+
}
|
|
108
|
+
};
|
|
10
109
|
export const caption_graph_data = {
|
|
11
110
|
version: 0.5,
|
|
12
111
|
nodes: {
|
|
@@ -23,37 +122,8 @@ export const caption_graph_data = {
|
|
|
23
122
|
graph: {
|
|
24
123
|
nodes: {
|
|
25
124
|
generateCaption: {
|
|
26
|
-
agent:
|
|
27
|
-
|
|
28
|
-
try {
|
|
29
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, beat.id, true);
|
|
30
|
-
const captionParams = mulmoCaptionParamsSchema.parse({ ...context.studio.script.captionParams, ...beat.captionParams });
|
|
31
|
-
const canvasSize = MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle);
|
|
32
|
-
const imagePath = getCaptionImagePath(context, index);
|
|
33
|
-
const template = getHTMLFile("caption");
|
|
34
|
-
if (captionParams.lang && !context.multiLingual?.[index]?.multiLingualTexts?.[captionParams.lang]) {
|
|
35
|
-
GraphAILogger.warn(`No multiLingual caption found for beat ${index}, lang: ${captionParams.lang}`);
|
|
36
|
-
}
|
|
37
|
-
const text = localizedText(beat, context.multiLingual?.[index], captionParams.lang, context.studio.script.lang);
|
|
38
|
-
const htmlData = interpolate(template, {
|
|
39
|
-
caption: processLineBreaks(text),
|
|
40
|
-
width: `${canvasSize.width}`,
|
|
41
|
-
height: `${canvasSize.height}`,
|
|
42
|
-
styles: captionParams.styles.join(";\n"),
|
|
43
|
-
});
|
|
44
|
-
await renderHTMLToImage(htmlData, imagePath, canvasSize.width, canvasSize.height, false, true);
|
|
45
|
-
context.studio.beats[index].captionFile = imagePath;
|
|
46
|
-
return imagePath;
|
|
47
|
-
}
|
|
48
|
-
finally {
|
|
49
|
-
MulmoStudioContextMethods.setBeatSessionState(context, "caption", index, beat.id, false);
|
|
50
|
-
}
|
|
51
|
-
},
|
|
52
|
-
inputs: {
|
|
53
|
-
beat: ":beat",
|
|
54
|
-
context: ":context",
|
|
55
|
-
index: ":__mapIndex",
|
|
56
|
-
},
|
|
125
|
+
agent: captionGenerationAgent,
|
|
126
|
+
inputs: { beat: ":beat", context: ":context", index: ":__mapIndex" },
|
|
57
127
|
isResult: true,
|
|
58
128
|
},
|
|
59
129
|
},
|
package/lib/actions/movie.d.ts
CHANGED
|
@@ -27,6 +27,10 @@ export declare const getTransitionVideoId: (transition: MulmoTransition, videoId
|
|
|
27
27
|
beatIndex: number;
|
|
28
28
|
};
|
|
29
29
|
export declare const getConcatVideoFilter: (concatVideoId: string, videoIdsForBeats: VideoId[]) => string;
|
|
30
|
+
export declare const getTransitionFrameDurations: (context: MulmoStudioContext, index: number) => {
|
|
31
|
+
firstDuration: number;
|
|
32
|
+
lastDuration: number;
|
|
33
|
+
};
|
|
30
34
|
export declare const validateBeatSource: (studioBeat: MulmoStudioContext["studio"]["beats"][number], index: number) => string;
|
|
31
35
|
export declare const addSplitAndExtractFrames: (ffmpegContext: FfmpegContext, videoId: string, firstDuration: number, lastDuration: number, isMovie: boolean, needFirst: boolean, needLast: boolean, canvasInfo: {
|
|
32
36
|
width: number;
|
package/lib/actions/movie.js
CHANGED
|
@@ -86,19 +86,22 @@ const getOutputOption = (audioId, videoId) => {
|
|
|
86
86
|
];
|
|
87
87
|
};
|
|
88
88
|
const addCaptions = (ffmpegContext, concatVideoId, context, caption) => {
|
|
89
|
-
const beatsWithCaptions = context.studio.beats.filter(({
|
|
89
|
+
const beatsWithCaptions = context.studio.beats.filter(({ captionFiles }) => captionFiles && captionFiles.length > 0);
|
|
90
90
|
if (caption && beatsWithCaptions.length > 0) {
|
|
91
|
-
const
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, captionFile);
|
|
96
|
-
const compositeVideoId = `oc${index}`;
|
|
97
|
-
ffmpegContext.filterComplex.push(`[${prevVideoId}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt + introPadding},${startAt + duration + introPadding})'[${compositeVideoId}]`);
|
|
98
|
-
return compositeVideoId;
|
|
91
|
+
const { videoId } = beatsWithCaptions.reduce((acc, beat) => {
|
|
92
|
+
const { captionFiles } = beat;
|
|
93
|
+
if (!captionFiles) {
|
|
94
|
+
return acc;
|
|
99
95
|
}
|
|
100
|
-
return
|
|
101
|
-
|
|
96
|
+
return captionFiles.reduce((innerAcc, captionData) => {
|
|
97
|
+
const { file, startAt, endAt } = captionData;
|
|
98
|
+
const captionInputIndex = FfmpegContextAddInput(ffmpegContext, file);
|
|
99
|
+
const compositeVideoId = `oc${innerAcc.captionIndex}`;
|
|
100
|
+
ffmpegContext.filterComplex.push(`[${innerAcc.videoId}][${captionInputIndex}:v]overlay=format=auto:enable='between(t,${startAt},${endAt})'[${compositeVideoId}]`);
|
|
101
|
+
return { videoId: compositeVideoId, captionIndex: innerAcc.captionIndex + 1 };
|
|
102
|
+
}, acc);
|
|
103
|
+
}, { videoId: concatVideoId, captionIndex: 0 });
|
|
104
|
+
return videoId;
|
|
102
105
|
}
|
|
103
106
|
return concatVideoId;
|
|
104
107
|
};
|
|
@@ -280,24 +283,21 @@ const getClampedTransitionDuration = (transitionDuration, prevBeatDuration, curr
|
|
|
280
283
|
const maxDuration = Math.min(prevBeatDuration, currentBeatDuration) * 0.9; // Use 90% to leave some margin
|
|
281
284
|
return Math.min(transitionDuration, maxDuration);
|
|
282
285
|
};
|
|
283
|
-
const getTransitionFrameDurations = (context, index) => {
|
|
286
|
+
export const getTransitionFrameDurations = (context, index) => {
|
|
284
287
|
const minFrame = 1 / 30; // 30fpsを想定。最小1フレーム
|
|
285
288
|
const beats = context.studio.beats;
|
|
286
289
|
const scriptBeats = context.studio.script.beats;
|
|
290
|
+
const getTransitionDuration = (transition, prevBeatIndex, currentBeatIndex) => {
|
|
291
|
+
if (!transition || prevBeatIndex < 0 || currentBeatIndex >= beats.length)
|
|
292
|
+
return 0;
|
|
293
|
+
const prevBeatDuration = beats[prevBeatIndex].duration ?? 1;
|
|
294
|
+
const currentBeatDuration = beats[currentBeatIndex].duration ?? 1;
|
|
295
|
+
return getClampedTransitionDuration(transition.duration, prevBeatDuration, currentBeatDuration);
|
|
296
|
+
};
|
|
287
297
|
const currentTransition = MulmoPresentationStyleMethods.getMovieTransition(context, scriptBeats[index]);
|
|
288
|
-
|
|
289
|
-
if (currentTransition && index > 0) {
|
|
290
|
-
const prevBeatDuration = beats[index - 1].duration ?? 1;
|
|
291
|
-
const currentBeatDuration = beats[index].duration ?? 1;
|
|
292
|
-
firstDuration = getClampedTransitionDuration(currentTransition.duration, prevBeatDuration, currentBeatDuration);
|
|
293
|
-
}
|
|
298
|
+
const firstDuration = index > 0 ? getTransitionDuration(currentTransition, index - 1, index) : 0;
|
|
294
299
|
const nextTransition = index < scriptBeats.length - 1 ? MulmoPresentationStyleMethods.getMovieTransition(context, scriptBeats[index + 1]) : null;
|
|
295
|
-
|
|
296
|
-
if (nextTransition) {
|
|
297
|
-
const prevBeatDuration = beats[index].duration ?? 1;
|
|
298
|
-
const currentBeatDuration = beats[index + 1].duration ?? 1;
|
|
299
|
-
lastDuration = getClampedTransitionDuration(nextTransition.duration, prevBeatDuration, currentBeatDuration);
|
|
300
|
-
}
|
|
300
|
+
const lastDuration = getTransitionDuration(nextTransition, index, index + 1);
|
|
301
301
|
return {
|
|
302
302
|
firstDuration: Math.max(firstDuration, minFrame),
|
|
303
303
|
lastDuration: Math.max(lastDuration, minFrame),
|
package/lib/types/schema.d.ts
CHANGED
|
@@ -190,9 +190,29 @@ export declare const mulmoTextSlideMediaSchema: z.ZodObject<{
|
|
|
190
190
|
bullets: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
191
191
|
}, z.core.$strip>;
|
|
192
192
|
}, z.core.$strict>;
|
|
193
|
+
export declare const captionSplitSchema: z.ZodDefault<z.ZodEnum<{
|
|
194
|
+
none: "none";
|
|
195
|
+
estimate: "estimate";
|
|
196
|
+
}>>;
|
|
197
|
+
export declare const textSplitSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
198
|
+
type: z.ZodLiteral<"none">;
|
|
199
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
200
|
+
type: z.ZodLiteral<"delimiters">;
|
|
201
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
202
|
+
}, z.core.$strip>], "type">;
|
|
193
203
|
export declare const mulmoCaptionParamsSchema: z.ZodObject<{
|
|
194
204
|
lang: z.ZodOptional<z.ZodString>;
|
|
195
205
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
206
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
207
|
+
none: "none";
|
|
208
|
+
estimate: "estimate";
|
|
209
|
+
}>>>;
|
|
210
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
211
|
+
type: z.ZodLiteral<"none">;
|
|
212
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
213
|
+
type: z.ZodLiteral<"delimiters">;
|
|
214
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
215
|
+
}, z.core.$strip>], "type">>;
|
|
196
216
|
}, z.core.$strict>;
|
|
197
217
|
export declare const mulmoChartMediaSchema: z.ZodObject<{
|
|
198
218
|
type: z.ZodLiteral<"chart">;
|
|
@@ -747,6 +767,7 @@ export declare const mulmoMovieParamsSchema: z.ZodObject<{
|
|
|
747
767
|
export declare const mulmoBeatSchema: z.ZodObject<{
|
|
748
768
|
speaker: z.ZodOptional<z.ZodString>;
|
|
749
769
|
text: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
770
|
+
texts: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
750
771
|
id: z.ZodOptional<z.ZodString>;
|
|
751
772
|
description: z.ZodOptional<z.ZodString>;
|
|
752
773
|
image: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
|
|
@@ -1130,6 +1151,16 @@ export declare const mulmoBeatSchema: z.ZodObject<{
|
|
|
1130
1151
|
captionParams: z.ZodOptional<z.ZodObject<{
|
|
1131
1152
|
lang: z.ZodOptional<z.ZodString>;
|
|
1132
1153
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
1154
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
1155
|
+
none: "none";
|
|
1156
|
+
estimate: "estimate";
|
|
1157
|
+
}>>>;
|
|
1158
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
1159
|
+
type: z.ZodLiteral<"none">;
|
|
1160
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
1161
|
+
type: z.ZodLiteral<"delimiters">;
|
|
1162
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1163
|
+
}, z.core.$strip>], "type">>;
|
|
1133
1164
|
}, z.core.$strict>>;
|
|
1134
1165
|
imageNames: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1135
1166
|
imagePrompt: z.ZodOptional<z.ZodString>;
|
|
@@ -1485,6 +1516,16 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
|
|
|
1485
1516
|
captionParams: z.ZodOptional<z.ZodObject<{
|
|
1486
1517
|
lang: z.ZodOptional<z.ZodString>;
|
|
1487
1518
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
1519
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
1520
|
+
none: "none";
|
|
1521
|
+
estimate: "estimate";
|
|
1522
|
+
}>>>;
|
|
1523
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
1524
|
+
type: z.ZodLiteral<"none">;
|
|
1525
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
1526
|
+
type: z.ZodLiteral<"delimiters">;
|
|
1527
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1528
|
+
}, z.core.$strip>], "type">>;
|
|
1488
1529
|
}, z.core.$strict>>;
|
|
1489
1530
|
audioParams: z.ZodDefault<z.ZodObject<{
|
|
1490
1531
|
padding: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
@@ -1836,6 +1877,16 @@ export declare const mulmoScriptSchema: z.ZodObject<{
|
|
|
1836
1877
|
captionParams: z.ZodOptional<z.ZodObject<{
|
|
1837
1878
|
lang: z.ZodOptional<z.ZodString>;
|
|
1838
1879
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
1880
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
1881
|
+
none: "none";
|
|
1882
|
+
estimate: "estimate";
|
|
1883
|
+
}>>>;
|
|
1884
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
1885
|
+
type: z.ZodLiteral<"none">;
|
|
1886
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
1887
|
+
type: z.ZodLiteral<"delimiters">;
|
|
1888
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1889
|
+
}, z.core.$strip>], "type">>;
|
|
1839
1890
|
}, z.core.$strict>>;
|
|
1840
1891
|
audioParams: z.ZodDefault<z.ZodObject<{
|
|
1841
1892
|
padding: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
@@ -1874,6 +1925,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
|
|
|
1874
1925
|
beats: z.ZodArray<z.ZodObject<{
|
|
1875
1926
|
speaker: z.ZodOptional<z.ZodString>;
|
|
1876
1927
|
text: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
1928
|
+
texts: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
1877
1929
|
id: z.ZodOptional<z.ZodString>;
|
|
1878
1930
|
description: z.ZodOptional<z.ZodString>;
|
|
1879
1931
|
image: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
|
|
@@ -2257,6 +2309,16 @@ export declare const mulmoScriptSchema: z.ZodObject<{
|
|
|
2257
2309
|
captionParams: z.ZodOptional<z.ZodObject<{
|
|
2258
2310
|
lang: z.ZodOptional<z.ZodString>;
|
|
2259
2311
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
2312
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
2313
|
+
none: "none";
|
|
2314
|
+
estimate: "estimate";
|
|
2315
|
+
}>>>;
|
|
2316
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
2317
|
+
type: z.ZodLiteral<"none">;
|
|
2318
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
2319
|
+
type: z.ZodLiteral<"delimiters">;
|
|
2320
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2321
|
+
}, z.core.$strip>], "type">>;
|
|
2260
2322
|
}, z.core.$strict>>;
|
|
2261
2323
|
imageNames: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2262
2324
|
imagePrompt: z.ZodOptional<z.ZodString>;
|
|
@@ -2289,6 +2351,11 @@ export declare const mulmoStudioBeatSchema: z.ZodObject<{
|
|
|
2289
2351
|
soundEffectFile: z.ZodOptional<z.ZodString>;
|
|
2290
2352
|
lipSyncFile: z.ZodOptional<z.ZodString>;
|
|
2291
2353
|
captionFile: z.ZodOptional<z.ZodString>;
|
|
2354
|
+
captionFiles: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
2355
|
+
file: z.ZodString;
|
|
2356
|
+
startAt: z.ZodNumber;
|
|
2357
|
+
endAt: z.ZodNumber;
|
|
2358
|
+
}, z.core.$strip>>>;
|
|
2292
2359
|
htmlImageFile: z.ZodOptional<z.ZodString>;
|
|
2293
2360
|
markdown: z.ZodOptional<z.ZodString>;
|
|
2294
2361
|
html: z.ZodOptional<z.ZodString>;
|
|
@@ -2682,6 +2749,16 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
2682
2749
|
captionParams: z.ZodOptional<z.ZodObject<{
|
|
2683
2750
|
lang: z.ZodOptional<z.ZodString>;
|
|
2684
2751
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
2752
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
2753
|
+
none: "none";
|
|
2754
|
+
estimate: "estimate";
|
|
2755
|
+
}>>>;
|
|
2756
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
2757
|
+
type: z.ZodLiteral<"none">;
|
|
2758
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
2759
|
+
type: z.ZodLiteral<"delimiters">;
|
|
2760
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2761
|
+
}, z.core.$strip>], "type">>;
|
|
2685
2762
|
}, z.core.$strict>>;
|
|
2686
2763
|
audioParams: z.ZodDefault<z.ZodObject<{
|
|
2687
2764
|
padding: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
@@ -2720,6 +2797,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
2720
2797
|
beats: z.ZodArray<z.ZodObject<{
|
|
2721
2798
|
speaker: z.ZodOptional<z.ZodString>;
|
|
2722
2799
|
text: z.ZodDefault<z.ZodOptional<z.ZodString>>;
|
|
2800
|
+
texts: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2723
2801
|
id: z.ZodOptional<z.ZodString>;
|
|
2724
2802
|
description: z.ZodOptional<z.ZodString>;
|
|
2725
2803
|
image: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
|
|
@@ -3103,6 +3181,16 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
3103
3181
|
captionParams: z.ZodOptional<z.ZodObject<{
|
|
3104
3182
|
lang: z.ZodOptional<z.ZodString>;
|
|
3105
3183
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
3184
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
3185
|
+
none: "none";
|
|
3186
|
+
estimate: "estimate";
|
|
3187
|
+
}>>>;
|
|
3188
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
3189
|
+
type: z.ZodLiteral<"none">;
|
|
3190
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
3191
|
+
type: z.ZodLiteral<"delimiters">;
|
|
3192
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
3193
|
+
}, z.core.$strip>], "type">>;
|
|
3106
3194
|
}, z.core.$strict>>;
|
|
3107
3195
|
imageNames: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
3108
3196
|
imagePrompt: z.ZodOptional<z.ZodString>;
|
|
@@ -3136,6 +3224,11 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
3136
3224
|
soundEffectFile: z.ZodOptional<z.ZodString>;
|
|
3137
3225
|
lipSyncFile: z.ZodOptional<z.ZodString>;
|
|
3138
3226
|
captionFile: z.ZodOptional<z.ZodString>;
|
|
3227
|
+
captionFiles: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
3228
|
+
file: z.ZodString;
|
|
3229
|
+
startAt: z.ZodNumber;
|
|
3230
|
+
endAt: z.ZodNumber;
|
|
3231
|
+
}, z.core.$strip>>>;
|
|
3139
3232
|
htmlImageFile: z.ZodOptional<z.ZodString>;
|
|
3140
3233
|
markdown: z.ZodOptional<z.ZodString>;
|
|
3141
3234
|
html: z.ZodOptional<z.ZodString>;
|
|
@@ -3464,6 +3557,16 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
|
|
|
3464
3557
|
captionParams: z.ZodOptional<z.ZodObject<{
|
|
3465
3558
|
lang: z.ZodOptional<z.ZodString>;
|
|
3466
3559
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
3560
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
3561
|
+
none: "none";
|
|
3562
|
+
estimate: "estimate";
|
|
3563
|
+
}>>>;
|
|
3564
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
3565
|
+
type: z.ZodLiteral<"none">;
|
|
3566
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
3567
|
+
type: z.ZodLiteral<"delimiters">;
|
|
3568
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
3569
|
+
}, z.core.$strip>], "type">>;
|
|
3467
3570
|
}, z.core.$strict>>;
|
|
3468
3571
|
audioParams: z.ZodDefault<z.ZodObject<{
|
|
3469
3572
|
padding: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
@@ -3809,6 +3912,16 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
|
|
|
3809
3912
|
captionParams: z.ZodOptional<z.ZodObject<{
|
|
3810
3913
|
lang: z.ZodOptional<z.ZodString>;
|
|
3811
3914
|
styles: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
3915
|
+
captionSplit: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
3916
|
+
none: "none";
|
|
3917
|
+
estimate: "estimate";
|
|
3918
|
+
}>>>;
|
|
3919
|
+
textSplit: z.ZodOptional<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
3920
|
+
type: z.ZodLiteral<"none">;
|
|
3921
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
3922
|
+
type: z.ZodLiteral<"delimiters">;
|
|
3923
|
+
delimiters: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
3924
|
+
}, z.core.$strip>], "type">>;
|
|
3812
3925
|
}, z.core.$strict>>;
|
|
3813
3926
|
audioParams: z.ZodDefault<z.ZodObject<{
|
|
3814
3927
|
padding: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
package/lib/types/schema.js
CHANGED
|
@@ -119,10 +119,20 @@ export const mulmoTextSlideMediaSchema = z
|
|
|
119
119
|
}),
|
|
120
120
|
})
|
|
121
121
|
.strict();
|
|
122
|
+
export const captionSplitSchema = z.enum(["none", "estimate"]).default("none");
|
|
123
|
+
export const textSplitSchema = z.discriminatedUnion("type", [
|
|
124
|
+
z.object({ type: z.literal("none") }),
|
|
125
|
+
z.object({
|
|
126
|
+
type: z.literal("delimiters"),
|
|
127
|
+
delimiters: z.array(z.string()).optional(), // default: ["。", "?", "!", ".", "?", "!"]
|
|
128
|
+
}),
|
|
129
|
+
]);
|
|
122
130
|
export const mulmoCaptionParamsSchema = z
|
|
123
131
|
.object({
|
|
124
132
|
lang: langSchema.optional(),
|
|
125
133
|
styles: z.array(z.string()).optional().default([]), // css styles
|
|
134
|
+
captionSplit: captionSplitSchema.optional(), // how to determine caption timing
|
|
135
|
+
textSplit: textSplitSchema.optional(), // how to split text into segments (default: none)
|
|
126
136
|
})
|
|
127
137
|
.strict();
|
|
128
138
|
export const mulmoChartMediaSchema = z
|
|
@@ -317,6 +327,7 @@ export const mulmoBeatSchema = z
|
|
|
317
327
|
.object({
|
|
318
328
|
speaker: speakerIdSchema.optional(),
|
|
319
329
|
text: z.string().optional().default("").describe("Text to be spoken. If empty, the audio is not generated."),
|
|
330
|
+
texts: z.array(z.string()).optional().describe("Manually split texts for captions. Takes precedence over text for caption display."),
|
|
320
331
|
id: z.string().optional().describe("Unique identifier for the beat."),
|
|
321
332
|
description: z.string().optional(),
|
|
322
333
|
image: mulmoImageAssetSchema.optional(),
|
|
@@ -442,7 +453,14 @@ export const mulmoStudioBeatSchema = z
|
|
|
442
453
|
movieFile: z.string().optional(), // path to the movie file
|
|
443
454
|
soundEffectFile: z.string().optional(), // path to the sound effect file
|
|
444
455
|
lipSyncFile: z.string().optional(), // path to the lip sync file
|
|
445
|
-
captionFile: z.string().optional(), // path to the caption image
|
|
456
|
+
captionFile: z.string().optional(), // path to the caption image (deprecated, use captionFiles)
|
|
457
|
+
captionFiles: z
|
|
458
|
+
.array(z.object({
|
|
459
|
+
file: z.string(),
|
|
460
|
+
startAt: z.number(), // absolute start time in seconds
|
|
461
|
+
endAt: z.number(), // absolute end time in seconds
|
|
462
|
+
}))
|
|
463
|
+
.optional(), // split caption images with timing
|
|
446
464
|
htmlImageFile: z.string().optional(), // path to the html image
|
|
447
465
|
markdown: z.string().optional(), // markdown string (alternative to image)
|
|
448
466
|
html: z.string().optional(), // html string (alternative to image)
|
package/lib/utils/context.d.ts
CHANGED
|
@@ -247,6 +247,7 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
|
|
|
247
247
|
beats: {
|
|
248
248
|
text: string;
|
|
249
249
|
speaker?: string | undefined;
|
|
250
|
+
texts?: string[] | undefined;
|
|
250
251
|
id?: string | undefined;
|
|
251
252
|
description?: string | undefined;
|
|
252
253
|
image?: {
|
|
@@ -555,6 +556,13 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
|
|
|
555
556
|
captionParams?: {
|
|
556
557
|
styles: string[];
|
|
557
558
|
lang?: string | undefined;
|
|
559
|
+
captionSplit?: "none" | "estimate" | undefined;
|
|
560
|
+
textSplit?: {
|
|
561
|
+
type: "none";
|
|
562
|
+
} | {
|
|
563
|
+
type: "delimiters";
|
|
564
|
+
delimiters?: string[] | undefined;
|
|
565
|
+
} | undefined;
|
|
558
566
|
} | undefined;
|
|
559
567
|
imageNames?: string[] | undefined;
|
|
560
568
|
imagePrompt?: string | undefined;
|
|
@@ -583,6 +591,13 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
|
|
|
583
591
|
captionParams?: {
|
|
584
592
|
styles: string[];
|
|
585
593
|
lang?: string | undefined;
|
|
594
|
+
captionSplit?: "none" | "estimate" | undefined;
|
|
595
|
+
textSplit?: {
|
|
596
|
+
type: "none";
|
|
597
|
+
} | {
|
|
598
|
+
type: "delimiters";
|
|
599
|
+
delimiters?: string[] | undefined;
|
|
600
|
+
} | undefined;
|
|
586
601
|
} | undefined;
|
|
587
602
|
title?: string | undefined;
|
|
588
603
|
description?: string | undefined;
|
|
@@ -611,6 +626,11 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
|
|
|
611
626
|
soundEffectFile?: string | undefined;
|
|
612
627
|
lipSyncFile?: string | undefined;
|
|
613
628
|
captionFile?: string | undefined;
|
|
629
|
+
captionFiles?: {
|
|
630
|
+
file: string;
|
|
631
|
+
startAt: number;
|
|
632
|
+
endAt: number;
|
|
633
|
+
}[] | undefined;
|
|
614
634
|
htmlImageFile?: string | undefined;
|
|
615
635
|
markdown?: string | undefined;
|
|
616
636
|
html?: string | undefined;
|
|
@@ -867,6 +887,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
867
887
|
beats: {
|
|
868
888
|
text: string;
|
|
869
889
|
speaker?: string | undefined;
|
|
890
|
+
texts?: string[] | undefined;
|
|
870
891
|
id?: string | undefined;
|
|
871
892
|
description?: string | undefined;
|
|
872
893
|
image?: {
|
|
@@ -1175,6 +1196,13 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
1175
1196
|
captionParams?: {
|
|
1176
1197
|
styles: string[];
|
|
1177
1198
|
lang?: string | undefined;
|
|
1199
|
+
captionSplit?: "none" | "estimate" | undefined;
|
|
1200
|
+
textSplit?: {
|
|
1201
|
+
type: "none";
|
|
1202
|
+
} | {
|
|
1203
|
+
type: "delimiters";
|
|
1204
|
+
delimiters?: string[] | undefined;
|
|
1205
|
+
} | undefined;
|
|
1178
1206
|
} | undefined;
|
|
1179
1207
|
imageNames?: string[] | undefined;
|
|
1180
1208
|
imagePrompt?: string | undefined;
|
|
@@ -1203,6 +1231,13 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
1203
1231
|
captionParams?: {
|
|
1204
1232
|
styles: string[];
|
|
1205
1233
|
lang?: string | undefined;
|
|
1234
|
+
captionSplit?: "none" | "estimate" | undefined;
|
|
1235
|
+
textSplit?: {
|
|
1236
|
+
type: "none";
|
|
1237
|
+
} | {
|
|
1238
|
+
type: "delimiters";
|
|
1239
|
+
delimiters?: string[] | undefined;
|
|
1240
|
+
} | undefined;
|
|
1206
1241
|
} | undefined;
|
|
1207
1242
|
title?: string | undefined;
|
|
1208
1243
|
description?: string | undefined;
|
|
@@ -1231,6 +1266,11 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
1231
1266
|
soundEffectFile?: string | undefined;
|
|
1232
1267
|
lipSyncFile?: string | undefined;
|
|
1233
1268
|
captionFile?: string | undefined;
|
|
1269
|
+
captionFiles?: {
|
|
1270
|
+
file: string;
|
|
1271
|
+
startAt: number;
|
|
1272
|
+
endAt: number;
|
|
1273
|
+
}[] | undefined;
|
|
1234
1274
|
htmlImageFile?: string | undefined;
|
|
1235
1275
|
markdown?: string | undefined;
|
|
1236
1276
|
html?: string | undefined;
|
|
@@ -1504,6 +1544,13 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
1504
1544
|
captionParams?: {
|
|
1505
1545
|
styles: string[];
|
|
1506
1546
|
lang?: string | undefined;
|
|
1547
|
+
captionSplit?: "none" | "estimate" | undefined;
|
|
1548
|
+
textSplit?: {
|
|
1549
|
+
type: "none";
|
|
1550
|
+
} | {
|
|
1551
|
+
type: "delimiters";
|
|
1552
|
+
delimiters?: string[] | undefined;
|
|
1553
|
+
} | undefined;
|
|
1507
1554
|
} | undefined;
|
|
1508
1555
|
};
|
|
1509
1556
|
sessionState: {
|
package/lib/utils/file.d.ts
CHANGED
|
@@ -34,7 +34,7 @@ export declare const getBeatMoviePaths: (context: MulmoStudioContext, index: num
|
|
|
34
34
|
lipSyncFile: string;
|
|
35
35
|
};
|
|
36
36
|
export declare const getReferenceImagePath: (context: MulmoStudioContext, key: string, extension: string) => string;
|
|
37
|
-
export declare const getCaptionImagePath: (context: MulmoStudioContext, index: number) => string;
|
|
37
|
+
export declare const getCaptionImagePath: (context: MulmoStudioContext, index: number, subIndex?: number) => string;
|
|
38
38
|
export declare const getOutputPdfFilePath: (outDirPath: string, fileName: string, pdfMode: PDFMode, lang?: string) => string;
|
|
39
39
|
export declare const getPromptTemplateFilePath: (promptTemplateName: string) => string;
|
|
40
40
|
export declare const mkdir: (dirPath: string) => void;
|
package/lib/utils/file.js
CHANGED
|
@@ -109,8 +109,11 @@ export const getReferenceImagePath = (context, key, extension) => {
|
|
|
109
109
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
|
110
110
|
return `${imageProjectDirPath}/${key}.${extension}`;
|
|
111
111
|
};
|
|
112
|
-
export const getCaptionImagePath = (context, index) => {
|
|
112
|
+
export const getCaptionImagePath = (context, index, subIndex) => {
|
|
113
113
|
const imageProjectDirPath = MulmoStudioContextMethods.getImageProjectDirPath(context);
|
|
114
|
+
if (subIndex !== undefined) {
|
|
115
|
+
return `${imageProjectDirPath}/${index}_caption_${subIndex}.png`;
|
|
116
|
+
}
|
|
114
117
|
return `${imageProjectDirPath}/${index}_caption.png`;
|
|
115
118
|
};
|
|
116
119
|
// pdf
|
package/package.json
CHANGED
package/scripts/test/test.json
CHANGED
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"displayName": {
|
|
24
24
|
"ja": "アナウンサー"
|
|
25
25
|
},
|
|
26
|
-
"voiceId": "
|
|
26
|
+
"voiceId": "Aoede",
|
|
27
27
|
"speechOptions": {
|
|
28
28
|
"speed": 1.666
|
|
29
29
|
}
|
|
@@ -33,14 +33,14 @@
|
|
|
33
33
|
"displayName": {
|
|
34
34
|
"ja": "生徒"
|
|
35
35
|
},
|
|
36
|
-
"voiceId": "
|
|
36
|
+
"voiceId": "Puck"
|
|
37
37
|
},
|
|
38
38
|
"Teacher": {
|
|
39
39
|
"provider": "gemini",
|
|
40
40
|
"displayName": {
|
|
41
41
|
"ja": "先生"
|
|
42
42
|
},
|
|
43
|
-
"voiceId": "
|
|
43
|
+
"voiceId": "Charon"
|
|
44
44
|
}
|
|
45
45
|
}
|
|
46
46
|
},
|
package/scripts/test/test2.json
CHANGED
|
@@ -17,25 +17,27 @@
|
|
|
17
17
|
"style": "<style>monochrome"
|
|
18
18
|
},
|
|
19
19
|
"speechParams": {
|
|
20
|
-
"provider": "gemini",
|
|
21
20
|
"speakers": {
|
|
22
21
|
"Announcer": {
|
|
23
22
|
"displayName": {
|
|
24
23
|
"ja": "千草朋香"
|
|
25
24
|
},
|
|
26
|
-
"
|
|
25
|
+
"provider": "gemini",
|
|
26
|
+
"voiceId": "Aoede"
|
|
27
27
|
},
|
|
28
28
|
"Student": {
|
|
29
29
|
"displayName": {
|
|
30
30
|
"ja": "太郎"
|
|
31
31
|
},
|
|
32
|
-
"
|
|
32
|
+
"provider": "gemini",
|
|
33
|
+
"voiceId": "Puck"
|
|
33
34
|
},
|
|
34
35
|
"Teacher": {
|
|
35
36
|
"displayName": {
|
|
36
37
|
"ja": "山田先生"
|
|
37
38
|
},
|
|
38
|
-
"
|
|
39
|
+
"provider": "gemini",
|
|
40
|
+
"voiceId": "Charon"
|
|
39
41
|
}
|
|
40
42
|
}
|
|
41
43
|
},
|
|
@@ -22,9 +22,9 @@
|
|
|
22
22
|
"provider": "elevenlabs",
|
|
23
23
|
"voiceId": "3JDquces8E8bkmvbh6Bc"
|
|
24
24
|
},
|
|
25
|
-
"
|
|
26
|
-
"provider": "
|
|
27
|
-
"voiceId": "
|
|
25
|
+
"Kotodama": {
|
|
26
|
+
"provider": "kotodama",
|
|
27
|
+
"voiceId": "Poporo"
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
},
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
},
|
|
42
42
|
{
|
|
43
43
|
"speaker": "Gemini",
|
|
44
|
-
"text": "
|
|
44
|
+
"text": "こんにちは、テストです。ジェミニです。",
|
|
45
45
|
"image": {
|
|
46
46
|
"type": "textSlide",
|
|
47
47
|
"slide": {
|
|
@@ -70,12 +70,12 @@
|
|
|
70
70
|
}
|
|
71
71
|
},
|
|
72
72
|
{
|
|
73
|
-
"speaker": "
|
|
74
|
-
"text": "
|
|
73
|
+
"speaker": "Kotodama",
|
|
74
|
+
"text": "こんにちは、テストです。コトダマ",
|
|
75
75
|
"image": {
|
|
76
76
|
"type": "textSlide",
|
|
77
77
|
"slide": {
|
|
78
|
-
"title": "
|
|
78
|
+
"title": "Kotodama TTS"
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
}
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
"lang": {
|
|
12
12
|
"ja": {
|
|
13
13
|
"provider": "gemini",
|
|
14
|
-
"voiceId": "
|
|
14
|
+
"voiceId": "Leda"
|
|
15
15
|
}
|
|
16
16
|
}
|
|
17
17
|
}
|
|
@@ -70,7 +70,7 @@
|
|
|
70
70
|
"image": {
|
|
71
71
|
"type": "textSlide",
|
|
72
72
|
"slide": {
|
|
73
|
-
"title": "Text replacement test for
|
|
73
|
+
"title": "Text replacement test for Gemini"
|
|
74
74
|
}
|
|
75
75
|
}
|
|
76
76
|
},
|
|
@@ -29,7 +29,7 @@
|
|
|
29
29
|
}
|
|
30
30
|
},
|
|
31
31
|
"Host": {
|
|
32
|
-
"voiceId": "
|
|
32
|
+
"voiceId": "Kore",
|
|
33
33
|
"provider": "gemini",
|
|
34
34
|
"displayName": {
|
|
35
35
|
"en": "Japanese Host"
|
|
@@ -79,12 +79,12 @@
|
|
|
79
79
|
},
|
|
80
80
|
{
|
|
81
81
|
"speaker": "Host",
|
|
82
|
-
"text": "
|
|
82
|
+
"text": "そして私は、Gemini TTS です。Google's TTS とは別の方法を利用しています。",
|
|
83
83
|
"image": {
|
|
84
84
|
"type": "textSlide",
|
|
85
85
|
"slide": {
|
|
86
86
|
"title": "Mixed Provider Demo",
|
|
87
|
-
"subtitle": "
|
|
87
|
+
"subtitle": "Gemini Speaker (Japanese)"
|
|
88
88
|
}
|
|
89
89
|
}
|
|
90
90
|
}
|