mulmocast 1.1.4 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.d.ts +0 -1
- package/lib/actions/audio.js +8 -12
- package/lib/actions/images.js +1 -0
- package/lib/actions/movie.js +1 -3
- package/lib/agents/image_openai_agent.js +4 -1
- package/lib/methods/mulmo_presentation_style.d.ts +2 -3
- package/lib/methods/mulmo_presentation_style.js +14 -8
- package/lib/types/agent.d.ts +3 -0
- package/lib/types/schema.d.ts +704 -0
- package/lib/types/schema.js +5 -1
- package/lib/utils/context.d.ts +25 -0
- package/lib/utils/file.d.ts +1 -1
- package/lib/utils/file.js +5 -2
- package/lib/utils/preprocess.d.ts +13 -0
- package/package.json +2 -1
- package/scripts/templates/image_prompt_only_template.ts +95 -0
- package/scripts/test/gpt.json +32 -0
- package/scripts/test/mulmo_story.json +11 -0
- package/scripts/test/test.json +64 -0
- package/scripts/test/test1.json +40 -0
- package/scripts/test/test2.json +66 -0
- package/scripts/test/test_audio.json +151 -0
- package/scripts/test/test_audio_instructions.json +69 -0
- package/scripts/test/test_beats.json +58 -0
- package/scripts/test/test_captions.json +52 -0
- package/scripts/test/test_elevenlabs_models.json +193 -0
- package/scripts/test/test_en.json +29 -0
- package/scripts/test/test_hello.json +17 -0
- package/scripts/test/test_hello_google.json +25 -0
- package/scripts/test/test_html.json +66 -0
- package/scripts/test/test_image_refs.json +49 -0
- package/scripts/test/test_images.json +48 -0
- package/scripts/test/test_lang.json +31 -0
- package/scripts/test/test_layout.json +152 -0
- package/scripts/test/test_lipsync.json +53 -0
- package/scripts/test/test_loop.json +34 -0
- package/scripts/test/test_media.json +244 -0
- package/scripts/test/test_mixed_providers.json +91 -0
- package/scripts/test/test_movie.json +39 -0
- package/scripts/test/test_no_audio.json +252 -0
- package/scripts/test/test_no_audio_with_credit.json +253 -0
- package/scripts/test/test_order.json +68 -0
- package/scripts/test/test_order_portrait.json +72 -0
- package/scripts/test/test_replicate.json +126 -0
- package/scripts/test/test_slideout_left_no_audio.json +45 -0
- package/scripts/test/test_sound_effect.json +41 -0
- package/scripts/test/test_spillover.json +116 -0
- package/scripts/test/test_transition.json +55 -0
- package/scripts/test/test_transition_no_audio.json +45 -0
- package/scripts/test/test_video_speed.json +80 -0
- package/scripts/test/test_voice_over.json +104 -0
- package/scripts/test/test_voices.json +54 -0
package/lib/types/schema.js
CHANGED
|
@@ -32,7 +32,9 @@ export const speakerDataSchema = z
|
|
|
32
32
|
model: z.string().optional().describe("TTS model to use for this speaker"),
|
|
33
33
|
})
|
|
34
34
|
.strict();
|
|
35
|
-
export const speakerDictionarySchema = z.record(speakerIdSchema, speakerDataSchema
|
|
35
|
+
export const speakerDictionarySchema = z.record(speakerIdSchema, speakerDataSchema.extend({
|
|
36
|
+
lang: z.record(langSchema, speakerDataSchema).optional(),
|
|
37
|
+
}));
|
|
36
38
|
export const mediaSourceSchema = z.discriminatedUnion("kind", [
|
|
37
39
|
z.object({ kind: z.literal("url"), url: URLStringSchema }).strict(), // https://example.com/foo.pdf
|
|
38
40
|
z.object({ kind: z.literal("base64"), data: z.string() }).strict(), // base64
|
|
@@ -172,6 +174,7 @@ export const mulmoOpenAIImageModelSchema = z
|
|
|
172
174
|
.object({
|
|
173
175
|
provider: z.literal("openai"),
|
|
174
176
|
model: z.enum(provider2ImageAgent["openai"].models).optional(),
|
|
177
|
+
quality: z.enum(["low", "medium", "high", "auto"]).optional(),
|
|
175
178
|
})
|
|
176
179
|
.strict();
|
|
177
180
|
// NOTE: This is for UI only. (until we figure out how to use it in mulmoImageParamsSchema)
|
|
@@ -185,6 +188,7 @@ export const mulmoImageParamsSchema = z
|
|
|
185
188
|
.object({
|
|
186
189
|
provider: text2ImageProviderSchema, // has default value
|
|
187
190
|
model: z.string().optional(), // default: provider specific
|
|
191
|
+
quality: z.string().optional(), // optional image quality (model specific)
|
|
188
192
|
style: z.string().optional(), // optional image style
|
|
189
193
|
moderation: z.string().optional(), // optional image style
|
|
190
194
|
images: mulmoImageParamsImagesSchema.optional(),
|
package/lib/utils/context.d.ts
CHANGED
|
@@ -25,6 +25,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
25
25
|
provider: string;
|
|
26
26
|
model?: string | undefined;
|
|
27
27
|
style?: string | undefined;
|
|
28
|
+
quality?: string | undefined;
|
|
28
29
|
moderation?: string | undefined;
|
|
29
30
|
images?: Record<string, {
|
|
30
31
|
type: "image";
|
|
@@ -94,6 +95,17 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
94
95
|
speechParams: {
|
|
95
96
|
speakers: Record<string, {
|
|
96
97
|
voiceId: string;
|
|
98
|
+
lang?: Record<string, {
|
|
99
|
+
voiceId: string;
|
|
100
|
+
displayName?: Record<string, string> | undefined;
|
|
101
|
+
isDefault?: boolean | undefined;
|
|
102
|
+
speechOptions?: {
|
|
103
|
+
speed?: number | undefined;
|
|
104
|
+
instruction?: string | undefined;
|
|
105
|
+
} | undefined;
|
|
106
|
+
provider?: string | undefined;
|
|
107
|
+
model?: string | undefined;
|
|
108
|
+
}> | undefined;
|
|
97
109
|
displayName?: Record<string, string> | undefined;
|
|
98
110
|
isDefault?: boolean | undefined;
|
|
99
111
|
speechOptions?: {
|
|
@@ -242,6 +254,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
242
254
|
provider: string;
|
|
243
255
|
model?: string | undefined;
|
|
244
256
|
style?: string | undefined;
|
|
257
|
+
quality?: string | undefined;
|
|
245
258
|
moderation?: string | undefined;
|
|
246
259
|
images?: Record<string, {
|
|
247
260
|
type: "image";
|
|
@@ -362,6 +375,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
362
375
|
provider: string;
|
|
363
376
|
model?: string | undefined;
|
|
364
377
|
style?: string | undefined;
|
|
378
|
+
quality?: string | undefined;
|
|
365
379
|
moderation?: string | undefined;
|
|
366
380
|
images?: Record<string, {
|
|
367
381
|
type: "image";
|
|
@@ -431,6 +445,17 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
431
445
|
speechParams: {
|
|
432
446
|
speakers: Record<string, {
|
|
433
447
|
voiceId: string;
|
|
448
|
+
lang?: Record<string, {
|
|
449
|
+
voiceId: string;
|
|
450
|
+
displayName?: Record<string, string> | undefined;
|
|
451
|
+
isDefault?: boolean | undefined;
|
|
452
|
+
speechOptions?: {
|
|
453
|
+
speed?: number | undefined;
|
|
454
|
+
instruction?: string | undefined;
|
|
455
|
+
} | undefined;
|
|
456
|
+
provider?: string | undefined;
|
|
457
|
+
model?: string | undefined;
|
|
458
|
+
}> | undefined;
|
|
434
459
|
displayName?: Record<string, string> | undefined;
|
|
435
460
|
isDefault?: boolean | undefined;
|
|
436
461
|
speechOptions?: {
|
package/lib/utils/file.d.ts
CHANGED
|
@@ -21,7 +21,7 @@ export declare const getOutputStudioFilePath: (outDirPath: string, fileName: str
|
|
|
21
21
|
export declare const getOutputMultilingualFilePath: (outDirPath: string, fileName: string) => string;
|
|
22
22
|
export declare const resolveDirPath: (dirPath: string, studioFileName: string) => string;
|
|
23
23
|
export declare const getAudioFilePath: (audioDirPath: string, dirName: string, fileName: string, lang?: string) => string;
|
|
24
|
-
export declare const getAudioArtifactFilePath: (
|
|
24
|
+
export declare const getAudioArtifactFilePath: (context: MulmoStudioContext) => string;
|
|
25
25
|
export declare const getOutputVideoFilePath: (outDirPath: string, fileName: string, lang?: string, caption?: string) => string;
|
|
26
26
|
export declare const imageSuffix = "p";
|
|
27
27
|
export declare const getBeatPngImagePath: (context: MulmoStudioContext, index: number) => string;
|
package/lib/utils/file.js
CHANGED
|
@@ -72,8 +72,11 @@ export const getAudioFilePath = (audioDirPath, dirName, fileName, lang) => {
|
|
|
72
72
|
}
|
|
73
73
|
return path.resolve(audioDirPath, dirName, fileName + ".mp3");
|
|
74
74
|
};
|
|
75
|
-
export const getAudioArtifactFilePath = (
|
|
76
|
-
|
|
75
|
+
export const getAudioArtifactFilePath = (context) => {
|
|
76
|
+
const suffix = context.lang ? `_${context.lang}` : "";
|
|
77
|
+
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
78
|
+
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
79
|
+
return path.resolve(outDirPath, fileName + suffix + ".mp3");
|
|
77
80
|
};
|
|
78
81
|
export const getOutputVideoFilePath = (outDirPath, fileName, lang, caption) => {
|
|
79
82
|
const suffix = lang ? `_${lang}` : "";
|
|
@@ -20,6 +20,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
|
|
|
20
20
|
provider: string;
|
|
21
21
|
model?: string | undefined;
|
|
22
22
|
style?: string | undefined;
|
|
23
|
+
quality?: string | undefined;
|
|
23
24
|
moderation?: string | undefined;
|
|
24
25
|
images?: Record<string, {
|
|
25
26
|
type: "image";
|
|
@@ -89,6 +90,17 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
|
|
|
89
90
|
speechParams: {
|
|
90
91
|
speakers: Record<string, {
|
|
91
92
|
voiceId: string;
|
|
93
|
+
lang?: Record<string, {
|
|
94
|
+
voiceId: string;
|
|
95
|
+
displayName?: Record<string, string> | undefined;
|
|
96
|
+
isDefault?: boolean | undefined;
|
|
97
|
+
speechOptions?: {
|
|
98
|
+
speed?: number | undefined;
|
|
99
|
+
instruction?: string | undefined;
|
|
100
|
+
} | undefined;
|
|
101
|
+
provider?: string | undefined;
|
|
102
|
+
model?: string | undefined;
|
|
103
|
+
}> | undefined;
|
|
92
104
|
displayName?: Record<string, string> | undefined;
|
|
93
105
|
isDefault?: boolean | undefined;
|
|
94
106
|
speechOptions?: {
|
|
@@ -237,6 +249,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
|
|
|
237
249
|
provider: string;
|
|
238
250
|
model?: string | undefined;
|
|
239
251
|
style?: string | undefined;
|
|
252
|
+
quality?: string | undefined;
|
|
240
253
|
moderation?: string | undefined;
|
|
241
254
|
images?: Record<string, {
|
|
242
255
|
type: "image";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mulmocast",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.5",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "lib/index.node.js",
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
"files": [
|
|
24
24
|
"./lib",
|
|
25
25
|
"./scripts/templates",
|
|
26
|
+
"./scripts/test",
|
|
26
27
|
"./assets/audio/silent60sec.mp3",
|
|
27
28
|
"./assets/html/",
|
|
28
29
|
"./assets/templates/"
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import {
|
|
2
|
+
mulmoCastCreditSchema,
|
|
3
|
+
langSchema,
|
|
4
|
+
}
|
|
5
|
+
export const mulmoPresentationStyleSchema = z.object({
|
|
6
|
+
$mulmocast: mulmoCastCreditSchema,
|
|
7
|
+
"lang": "en",
|
|
8
|
+
title: z.string().default("[TITLE: Brief, engaging title for the topic]")
|
|
9
|
+
lang: z.string().default("en"),
|
|
10
|
+
"references": z.array(z.object({
|
|
11
|
+
"url": "[SOURCE_URL: URL of the source material]",
|
|
12
|
+
"title": "[SOURCE_TITLE: Title of the referenced article, or paper]",
|
|
13
|
+
"type": "[SOURCE_TYPE: article, paper]"
|
|
14
|
+
}
|
|
15
|
+
],
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
description: z.string().optional(),
|
|
19
|
+
references: z.array(mulmoReferenceSchema).optional(),
|
|
20
|
+
beats: z.array(mulmoBeatSchema).min(1),
|
|
21
|
+
|
|
22
|
+
// TODO: Delete it later
|
|
23
|
+
imagePath: z.string().optional(), // for keynote images movie ??
|
|
24
|
+
canvasSize: mulmoCanvasDimensionSchema, // has default value
|
|
25
|
+
speechParams: z
|
|
26
|
+
.object({
|
|
27
|
+
speakers: speakerDictionarySchema,
|
|
28
|
+
})
|
|
29
|
+
.default({
|
|
30
|
+
speakers: {
|
|
31
|
+
[defaultSpeaker]: {
|
|
32
|
+
voiceId: "shimmer",
|
|
33
|
+
displayName: {
|
|
34
|
+
en: defaultSpeaker,
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
}),
|
|
39
|
+
imageParams: mulmoImageParamsSchema.optional().default({
|
|
40
|
+
provider: defaultProviders.text2image,
|
|
41
|
+
images: {},
|
|
42
|
+
}),
|
|
43
|
+
movieParams: mulmoMovieParamsSchema.optional().default({
|
|
44
|
+
provider: defaultProviders.text2movie,
|
|
45
|
+
}),
|
|
46
|
+
soundEffectParams: mulmoSoundEffectParamsSchema.optional().default({
|
|
47
|
+
provider: defaultProviders.soundEffect,
|
|
48
|
+
}),
|
|
49
|
+
lipSyncParams: mulmoLipSyncParamsSchema.optional(),
|
|
50
|
+
htmlImageParams: mulmoHtmlImageParamsSchema
|
|
51
|
+
.extend({
|
|
52
|
+
provider: text2HtmlImageProviderSchema,
|
|
53
|
+
})
|
|
54
|
+
.optional(),
|
|
55
|
+
// for textSlides
|
|
56
|
+
textSlideParams: textSlideParamsSchema.optional(),
|
|
57
|
+
captionParams: mulmoCaptionParamsSchema.optional(),
|
|
58
|
+
audioParams: audioParamsSchema.default({
|
|
59
|
+
introPadding: 1.0,
|
|
60
|
+
padding: 0.3,
|
|
61
|
+
closingPadding: 0.8,
|
|
62
|
+
outroPadding: 1.0,
|
|
63
|
+
bgmVolume: 0.2,
|
|
64
|
+
audioVolume: 1.0,
|
|
65
|
+
}),
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
{
|
|
69
|
+
"beats": [
|
|
70
|
+
{
|
|
71
|
+
"text": "[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]",
|
|
72
|
+
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"text": "[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]",
|
|
76
|
+
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
"text": "[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]",
|
|
80
|
+
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"text": "[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]",
|
|
84
|
+
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"text": "[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]",
|
|
88
|
+
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"text": "[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]",
|
|
92
|
+
"imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
|
|
93
|
+
}
|
|
94
|
+
]
|
|
95
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1",
|
|
4
|
+
"credit": "closing"
|
|
5
|
+
},
|
|
6
|
+
"title": "Testing OpenAI's new Image Generation",
|
|
7
|
+
"description": "Hello",
|
|
8
|
+
"imageParams": {
|
|
9
|
+
"model": "gpt-image-1",
|
|
10
|
+
"style": "<style>Japanese animation with soft watercolor backgrounds, characters with simple rounded faces, large expressive eyes, small nose and mouth, soft jawlines, minimalist facial features, pastel color palette, detailed natural environments, whimsical magical elements, hand-drawn aesthetic, gentle lighting, flowing movement in hair and clothing, nostalgic countryside scenery with fantasy elements."
|
|
11
|
+
},
|
|
12
|
+
"speechParams": {
|
|
13
|
+
"speakers": {
|
|
14
|
+
"Host": {
|
|
15
|
+
"voiceId": "shimmer",
|
|
16
|
+
"displayName": {
|
|
17
|
+
"en": "Host"
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"beats": [
|
|
23
|
+
{
|
|
24
|
+
"speaker": "Host",
|
|
25
|
+
"text": "How are you?",
|
|
26
|
+
"imagePrompt": "A witch in Harajuku",
|
|
27
|
+
"imageParams": {
|
|
28
|
+
"style": "Ukiyoe-style"
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
]
|
|
32
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "MulmoScript",
|
|
3
|
+
"scenes": [
|
|
4
|
+
{
|
|
5
|
+
"description": "MulmoCast is a multi-modal presentation platform built for the generative AI era. Traditional tools like PowerPoint and Keynote were designed decades ago for human authors. Today, however, large language models (LLMs) are generating content—and they need a native environment optimized for their capabilities. MulmoCast is that environment. It empowers AI to automatically create and deliver rich, multi-modal presentations—including slides, videos, podcasts, documents, and comics—using our open presentation language: MulmoScript."
|
|
6
|
+
},
|
|
7
|
+
{
|
|
8
|
+
"description": " MulmoScript is a JSON-based language that enables LLMs to describe structured, machine-readable presentations. It supports a wide range of elements: bullet points, charts, graphs, images, voiceovers, and videos. Just as HTML unlocked the web, MulmoScript enables interoperability, customization, and ecosystem growth for AI-generated content. MulmoCast renders this content into any format, giving end users complete flexibility in how they consume it—whether as a slideshow, podcast, video, or document. It also supports multilingual output for both reading and listening."
|
|
9
|
+
}
|
|
10
|
+
]
|
|
11
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1",
|
|
4
|
+
"credit": "closing"
|
|
5
|
+
},
|
|
6
|
+
"title": "MASAI: A Modular Future for Software Engineering AI",
|
|
7
|
+
"description": "Exploring MASAI, a modular approach for AI agents in software engineering that revolutionizes how complex coding issues are tackled.",
|
|
8
|
+
"references": [
|
|
9
|
+
{
|
|
10
|
+
"url": "https://arxiv.org/abs/2406.11638",
|
|
11
|
+
"title": "MASAI: A Modular Future for Software Engineering AI",
|
|
12
|
+
"description": "An article on MASAI, a modular approach for AI agents in software engineering."
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"lang": "ja",
|
|
16
|
+
"imageParams": {
|
|
17
|
+
"style": "<style>monochrome"
|
|
18
|
+
},
|
|
19
|
+
"speechParams": {
|
|
20
|
+
"speakers": {
|
|
21
|
+
"Announcer": {
|
|
22
|
+
"provider": "nijivoice",
|
|
23
|
+
"displayName": {
|
|
24
|
+
"ja": "アナウンサー"
|
|
25
|
+
},
|
|
26
|
+
"voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c",
|
|
27
|
+
"speechOptions": {
|
|
28
|
+
"speed": 1.666
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"Student": {
|
|
32
|
+
"provider": "nijivoice",
|
|
33
|
+
"displayName": {
|
|
34
|
+
"ja": "生徒"
|
|
35
|
+
},
|
|
36
|
+
"voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f"
|
|
37
|
+
},
|
|
38
|
+
"Teacher": {
|
|
39
|
+
"provider": "nijivoice",
|
|
40
|
+
"displayName": {
|
|
41
|
+
"ja": "先生"
|
|
42
|
+
},
|
|
43
|
+
"voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae"
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"beats": [
|
|
48
|
+
{
|
|
49
|
+
"speaker": "Announcer",
|
|
50
|
+
"text": "こんにちは。米国で活躍するエンジニアが新しい技術やビジネスを分かりやすく解説する、中島聡のLife is beautiful。"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"speaker": "Announcer",
|
|
54
|
+
"text": "今日は、アメリカで発表された「スターゲート・プロジェクト」に、ついて解説します。",
|
|
55
|
+
"imagePrompt": "Blue sky, a flock of birds",
|
|
56
|
+
"imageParams": {
|
|
57
|
+
"style": "<style>sumie-style"
|
|
58
|
+
},
|
|
59
|
+
"speechOptions": {
|
|
60
|
+
"speed": 0.8
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
]
|
|
64
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1",
|
|
4
|
+
"credit": "closing"
|
|
5
|
+
},
|
|
6
|
+
"title": "The Honey Trap vs. The Frontier: Engineering at Tesla and SpaceX",
|
|
7
|
+
"description": "We dive into Elon Musk's philosophy on building environments where engineers can truly flourish, contrasting the comfort-focused 'honey trap' with the high-expectation culture at Tesla and SpaceX.",
|
|
8
|
+
"speechParams": {
|
|
9
|
+
"speakers": {
|
|
10
|
+
"Host": {
|
|
11
|
+
"displayName": {
|
|
12
|
+
"ja": "司会"
|
|
13
|
+
},
|
|
14
|
+
"voiceId": "sage"
|
|
15
|
+
},
|
|
16
|
+
"Guest": {
|
|
17
|
+
"displayName": {
|
|
18
|
+
"ja": "ゲスト"
|
|
19
|
+
},
|
|
20
|
+
"voiceId": "shimmer"
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"beats": [
|
|
25
|
+
{
|
|
26
|
+
"speaker": "Host",
|
|
27
|
+
"text": "Hello and welcome to another episode of 'life is artificial', where we explore the cutting edge of technology, innovation, and what the future could look like.",
|
|
28
|
+
"speechOptions": {
|
|
29
|
+
"instruction": "Voice: Deep and rugged, with a hearty, boisterous quality, like a seasoned sea captain who's seen many voyages.\nTone: Friendly and spirited, with a sense of adventure and enthusiasm, making every detail feel like part of a grand journey.\nDialect: Classic pirate speech with old-timey nautical phrases, dropped 'g's, and exaggerated 'Arrrs' to stay in character."
|
|
30
|
+
}
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"speaker": "Guest",
|
|
34
|
+
"text": "こんにちは、ポッドキャスト版、ライフ・イズ・ビューティフルへようこそ。新しいテクノロジーについて分かりやすく語ります",
|
|
35
|
+
"speechOptions": {
|
|
36
|
+
"instruction": "voice: 赤ちゃん\nVoice: 高い声で、舌ったらず。甘えっぽく。語尾に’んちゃ’と付ける"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
]
|
|
40
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1",
|
|
4
|
+
"credit": "closing"
|
|
5
|
+
},
|
|
6
|
+
"title": "AIの進化と私たちの未来 - サム・アルトマン氏の3つの観察",
|
|
7
|
+
"description": "OpenAIのサム・アルトマン氏による、AIの進化と社会への影響に関する洞察を、高校生向けに分かりやすく解説する対話形式のスクリプト。",
|
|
8
|
+
"references": [
|
|
9
|
+
{
|
|
10
|
+
"url": "https://blog.samaltman.com/three-observations",
|
|
11
|
+
"title": "Three Observations",
|
|
12
|
+
"description": "An article on AI's evolution and its impact on society by Sam Altman."
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"lang": "ja",
|
|
16
|
+
"imageParams": {
|
|
17
|
+
"style": "<style>monochrome"
|
|
18
|
+
},
|
|
19
|
+
"speechParams": {
|
|
20
|
+
"provider": "nijivoice",
|
|
21
|
+
"speakers": {
|
|
22
|
+
"Announcer": {
|
|
23
|
+
"displayName": {
|
|
24
|
+
"ja": "千草朋香"
|
|
25
|
+
},
|
|
26
|
+
"voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c"
|
|
27
|
+
},
|
|
28
|
+
"Student": {
|
|
29
|
+
"displayName": {
|
|
30
|
+
"ja": "太郎"
|
|
31
|
+
},
|
|
32
|
+
"voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f"
|
|
33
|
+
},
|
|
34
|
+
"Teacher": {
|
|
35
|
+
"displayName": {
|
|
36
|
+
"ja": "山田先生"
|
|
37
|
+
},
|
|
38
|
+
"voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"beats": [
|
|
43
|
+
{
|
|
44
|
+
"speaker": "Announcer",
|
|
45
|
+
"text": "私たちの目の前で、人工知能の革命が静かに、",
|
|
46
|
+
"speechOptions": {
|
|
47
|
+
"speed": 1.5
|
|
48
|
+
},
|
|
49
|
+
"imagePrompt": "A futuristic scene depicting the quiet but certain advancement of AI, with digital interfaces and abstract representations of AI technology shaping the future."
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"speaker": "Announcer",
|
|
53
|
+
"text": "しかし確実に進んでいます。",
|
|
54
|
+
"speechOptions": {
|
|
55
|
+
"speed": 1.5
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"speaker": "Announcer",
|
|
60
|
+
"text": "オープンエーアイのサム・アルトマン氏が語る「3つの重要な観察」とは何か?",
|
|
61
|
+
"speechOptions": {
|
|
62
|
+
"speed": 1.5
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1"
|
|
4
|
+
},
|
|
5
|
+
"title": "Media Test",
|
|
6
|
+
"audioParams": {
|
|
7
|
+
"introPadding": 0,
|
|
8
|
+
"padding": 1.0,
|
|
9
|
+
"closingPadding": 5.0,
|
|
10
|
+
"outroPadding": 0
|
|
11
|
+
},
|
|
12
|
+
"beats": [
|
|
13
|
+
{
|
|
14
|
+
"speaker": "Presenter",
|
|
15
|
+
"text": "This is an opening beat.",
|
|
16
|
+
"image": {
|
|
17
|
+
"type": "textSlide",
|
|
18
|
+
"slide": {
|
|
19
|
+
"title": "Opening Beat"
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"speaker": "Presenter",
|
|
25
|
+
"text": "",
|
|
26
|
+
"image": {
|
|
27
|
+
"type": "textSlide",
|
|
28
|
+
"slide": {
|
|
29
|
+
"title": "No Audio with Duration 1.0 seconds (default)"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"speaker": "Presenter",
|
|
35
|
+
"text": "This is the third beat.",
|
|
36
|
+
"image": {
|
|
37
|
+
"type": "textSlide",
|
|
38
|
+
"slide": {
|
|
39
|
+
"title": "Third Beat"
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"speaker": "Presenter",
|
|
45
|
+
"text": "This beat has a custom audio padding of 0.0 seconds.",
|
|
46
|
+
"audioParams": {
|
|
47
|
+
"padding": 0.0
|
|
48
|
+
},
|
|
49
|
+
"image": {
|
|
50
|
+
"type": "textSlide",
|
|
51
|
+
"slide": {
|
|
52
|
+
"title": "Custom Audio Padding 0.0 seconds"
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"speaker": "Presenter",
|
|
58
|
+
"text": "This beat has a custom audio padding of 3.0 seconds.",
|
|
59
|
+
"audioParams": {
|
|
60
|
+
"padding": 3.0
|
|
61
|
+
},
|
|
62
|
+
"image": {
|
|
63
|
+
"type": "textSlide",
|
|
64
|
+
"slide": {
|
|
65
|
+
"title": "Custom Audio Padding 3.0 seconds"
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"speaker": "Presenter",
|
|
71
|
+
"text": "",
|
|
72
|
+
"duration": 2,
|
|
73
|
+
"image": {
|
|
74
|
+
"type": "textSlide",
|
|
75
|
+
"slide": {
|
|
76
|
+
"title": "No Audio with Duration 2 seconds"
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"speaker": "Presenter",
|
|
82
|
+
"text": "This is a local movie with audio.",
|
|
83
|
+
"image": {
|
|
84
|
+
"type": "movie",
|
|
85
|
+
"source": {
|
|
86
|
+
"kind": "url",
|
|
87
|
+
"url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/test/pingpong.mov"
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"speaker": "Presenter",
|
|
93
|
+
"text": "This is a local movie with 20% audio.",
|
|
94
|
+
"image": {
|
|
95
|
+
"type": "movie",
|
|
96
|
+
"source": {
|
|
97
|
+
"kind": "url",
|
|
98
|
+
"url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/test/pingpong.mov"
|
|
99
|
+
}
|
|
100
|
+
},
|
|
101
|
+
"audioParams": {
|
|
102
|
+
"movieVolume": 0.2
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"speaker": "Presenter",
|
|
107
|
+
"text": "This is a local movie with no audio.",
|
|
108
|
+
"image": {
|
|
109
|
+
"type": "movie",
|
|
110
|
+
"source": {
|
|
111
|
+
"kind": "url",
|
|
112
|
+
"url": "https://github.com/receptron/mulmocast-media/raw/refs/heads/main/test/pingpong.mov"
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
"audioParams": {
|
|
116
|
+
"movieVolume": 0.0
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"speaker": "Presenter",
|
|
121
|
+
"text": "This section has longer duration than the audio.",
|
|
122
|
+
"duration": 10,
|
|
123
|
+
"image": {
|
|
124
|
+
"type": "textSlide",
|
|
125
|
+
"slide": {
|
|
126
|
+
"title": "Duration 10 seconds"
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"speaker": "Presenter",
|
|
132
|
+
"text": "This is the beat before the closing slide.",
|
|
133
|
+
"image": {
|
|
134
|
+
"type": "textSlide",
|
|
135
|
+
"slide": {
|
|
136
|
+
"title": "Before Closing Slide"
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"speaker": "Presenter",
|
|
142
|
+
"text": "This is the closing slide.",
|
|
143
|
+
"image": {
|
|
144
|
+
"type": "textSlide",
|
|
145
|
+
"slide": {
|
|
146
|
+
"title": "Closing Slide"
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
]
|
|
151
|
+
}
|