mulmocast 1.1.4 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/actions/audio.d.ts +0 -1
- package/lib/actions/audio.js +18 -13
- package/lib/actions/image_agents.d.ts +3 -12
- package/lib/actions/image_agents.js +12 -8
- package/lib/actions/images.js +3 -1
- package/lib/actions/movie.js +1 -3
- package/lib/actions/translate.js +13 -31
- package/lib/agents/image_openai_agent.js +4 -1
- package/lib/agents/lipsync_replicate_agent.js +10 -3
- package/lib/cli/commands/audio/handler.js +1 -1
- package/lib/cli/commands/image/handler.js +1 -1
- package/lib/cli/commands/movie/handler.js +1 -1
- package/lib/cli/commands/pdf/handler.js +1 -1
- package/lib/cli/helpers.d.ts +1 -4
- package/lib/cli/helpers.js +3 -2
- package/lib/mcp/server.js +1 -1
- package/lib/methods/mulmo_presentation_style.d.ts +5 -5
- package/lib/methods/mulmo_presentation_style.js +14 -8
- package/lib/methods/mulmo_script.js +4 -1
- package/lib/methods/mulmo_studio_context.d.ts +1 -0
- package/lib/methods/mulmo_studio_context.js +8 -0
- package/lib/types/agent.d.ts +4 -0
- package/lib/types/schema.d.ts +712 -8
- package/lib/types/schema.js +6 -2
- package/lib/types/type.d.ts +1 -1
- package/lib/utils/const.js +1 -1
- package/lib/utils/context.d.ts +401 -34
- package/lib/utils/context.js +95 -56
- package/lib/utils/file.d.ts +1 -1
- package/lib/utils/file.js +5 -2
- package/lib/utils/filters.d.ts +1 -0
- package/lib/utils/filters.js +8 -0
- package/lib/utils/preprocess.d.ts +15 -2
- package/lib/utils/preprocess.js +3 -3
- package/lib/utils/provider2agent.d.ts +3 -2
- package/lib/utils/provider2agent.js +20 -2
- package/lib/utils/string.d.ts +1 -1
- package/lib/utils/string.js +11 -8
- package/package.json +2 -1
- package/scripts/templates/image_refs.json +1 -0
- package/scripts/templates/voice_over.json +1 -0
- package/scripts/test/gpt.json +33 -0
- package/scripts/test/mulmo_story.json +11 -0
- package/scripts/test/test.json +64 -0
- package/scripts/test/test1.json +41 -0
- package/scripts/test/test2.json +66 -0
- package/scripts/test/test_audio.json +152 -0
- package/scripts/test/test_audio_instructions.json +70 -0
- package/scripts/test/test_beats.json +59 -0
- package/scripts/test/test_captions.json +53 -0
- package/scripts/test/test_elevenlabs_models.json +194 -0
- package/scripts/test/test_en.json +29 -0
- package/scripts/test/test_hello.json +18 -0
- package/scripts/test/test_hello_google.json +26 -0
- package/scripts/test/test_html.json +67 -0
- package/scripts/test/test_image_refs.json +50 -0
- package/scripts/test/test_images.json +49 -0
- package/scripts/test/test_lang.json +87 -0
- package/scripts/test/test_layout.json +153 -0
- package/scripts/test/test_lipsync.json +62 -0
- package/scripts/test/test_loop.json +35 -0
- package/scripts/test/test_media.json +245 -0
- package/scripts/test/test_mixed_providers.json +92 -0
- package/scripts/test/test_movie.json +40 -0
- package/scripts/test/test_no_audio.json +253 -0
- package/scripts/test/test_no_audio_with_credit.json +254 -0
- package/scripts/test/test_order.json +69 -0
- package/scripts/test/test_order_portrait.json +73 -0
- package/scripts/test/test_replicate.json +145 -0
- package/scripts/test/test_slideout_left_no_audio.json +46 -0
- package/scripts/test/test_sound_effect.json +41 -0
- package/scripts/test/test_spillover.json +117 -0
- package/scripts/test/test_transition.json +56 -0
- package/scripts/test/test_transition_no_audio.json +46 -0
- package/scripts/test/test_video_speed.json +81 -0
- package/scripts/test/test_voice_over.json +105 -0
- package/scripts/test/test_voices.json +55 -0
package/lib/utils/context.js
CHANGED
|
@@ -1,44 +1,28 @@
|
|
|
1
1
|
import { GraphAILogger } from "graphai";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { readMulmoScriptFile, fetchMulmoScriptFile } from "./file.js";
|
|
4
|
-
import {
|
|
4
|
+
import { mulmoStudioSchema, mulmoCaptionParamsSchema } from "../types/index.js";
|
|
5
|
+
import { MulmoPresentationStyleMethods, MulmoScriptMethods } from "../methods/index.js";
|
|
5
6
|
import { mulmoPresentationStyleSchema, mulmoStudioMultiLingualSchema } from "../types/index.js";
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
while (dataSet.length < studioBeatsLength) {
|
|
26
|
-
dataSet.push({ multiLingualTexts: {} });
|
|
27
|
-
}
|
|
28
|
-
dataSet.length = studioBeatsLength;
|
|
29
|
-
return dataSet;
|
|
30
|
-
}
|
|
31
|
-
return [...Array(studioBeatsLength)].map(() => ({ multiLingualTexts: {} }));
|
|
32
|
-
};
|
|
33
|
-
export const getPresentationStyle = (presentationStylePath) => {
|
|
34
|
-
if (presentationStylePath) {
|
|
35
|
-
if (!fs.existsSync(presentationStylePath)) {
|
|
36
|
-
throw new Error(`ERROR: File not exists ${presentationStylePath}`);
|
|
37
|
-
}
|
|
38
|
-
const jsonData = readMulmoScriptFile(presentationStylePath, "ERROR: File does not exist " + presentationStylePath)?.mulmoData ?? null;
|
|
39
|
-
return mulmoPresentationStyleSchema.parse(jsonData);
|
|
40
|
-
}
|
|
41
|
-
return null;
|
|
7
|
+
const mulmoCredit = (speaker) => {
|
|
8
|
+
return {
|
|
9
|
+
speaker,
|
|
10
|
+
text: "",
|
|
11
|
+
image: {
|
|
12
|
+
type: "image",
|
|
13
|
+
source: {
|
|
14
|
+
kind: "url",
|
|
15
|
+
url: "https://github.com/receptron/mulmocast-cli/raw/refs/heads/main/assets/images/mulmocast_credit.png",
|
|
16
|
+
},
|
|
17
|
+
},
|
|
18
|
+
audio: {
|
|
19
|
+
type: "audio",
|
|
20
|
+
source: {
|
|
21
|
+
kind: "url",
|
|
22
|
+
url: "https://github.com/receptron/mulmocast-cli/raw/refs/heads/main/assets/audio/silent300.mp3",
|
|
23
|
+
},
|
|
24
|
+
},
|
|
25
|
+
};
|
|
42
26
|
};
|
|
43
27
|
const initSessionState = () => {
|
|
44
28
|
return {
|
|
@@ -63,32 +47,87 @@ const initSessionState = () => {
|
|
|
63
47
|
},
|
|
64
48
|
};
|
|
65
49
|
};
|
|
66
|
-
const
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
50
|
+
export const createStudioData = (_mulmoScript, fileName, videoCaptionLang, presentationStyle) => {
|
|
51
|
+
// validate and insert default value
|
|
52
|
+
const mulmoScript = _mulmoScript.__test_invalid__ ? _mulmoScript : MulmoScriptMethods.validate(_mulmoScript);
|
|
53
|
+
// We need to parse it to fill default values
|
|
54
|
+
const studio = mulmoStudioSchema.parse({
|
|
55
|
+
script: mulmoScript,
|
|
56
|
+
filename: fileName,
|
|
57
|
+
beats: [...Array(mulmoScript.beats.length)].map(() => ({})),
|
|
58
|
+
});
|
|
59
|
+
// TODO: Move this code out of this function later
|
|
60
|
+
// Addition cloing credit
|
|
61
|
+
if (mulmoScript.$mulmocast.credit === "closing") {
|
|
62
|
+
const defaultSpeaker = MulmoPresentationStyleMethods.getDefaultSpeaker(presentationStyle ?? studio.script);
|
|
63
|
+
mulmoScript.beats.push(mulmoCredit(mulmoScript.beats[0].speaker ?? defaultSpeaker)); // First speaker
|
|
64
|
+
}
|
|
65
|
+
studio.script = MulmoScriptMethods.validate(mulmoScript); // update the script
|
|
66
|
+
studio.beats = studio.script.beats.map((_, index) => studio.beats[index] ?? {});
|
|
67
|
+
if (videoCaptionLang) {
|
|
68
|
+
studio.script.captionParams = mulmoCaptionParamsSchema.parse({
|
|
69
|
+
...(studio.script.captionParams ?? {}),
|
|
70
|
+
lang: videoCaptionLang,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return studio;
|
|
74
|
+
};
|
|
75
|
+
export const fetchScript = async (isHttpPath, mulmoFilePath, fileOrUrl) => {
|
|
76
|
+
if (isHttpPath) {
|
|
77
|
+
const res = await fetchMulmoScriptFile(fileOrUrl);
|
|
78
|
+
if (!res.result || !res.script) {
|
|
79
|
+
GraphAILogger.info(`ERROR: HTTP error! ${res.status} ${fileOrUrl}`);
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
return res.script;
|
|
83
|
+
}
|
|
84
|
+
if (!fs.existsSync(mulmoFilePath)) {
|
|
85
|
+
GraphAILogger.info(`ERROR: File not exists ${mulmoFilePath}`);
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
return readMulmoScriptFile(mulmoFilePath, "ERROR: File does not exist " + mulmoFilePath)?.mulmoData ?? null;
|
|
89
|
+
};
|
|
90
|
+
export const getMultiLingual = (multilingualFilePath, studioBeatsLength) => {
|
|
91
|
+
if (!fs.existsSync(multilingualFilePath)) {
|
|
92
|
+
return [...Array(studioBeatsLength)].map(() => ({ multiLingualTexts: {} }));
|
|
93
|
+
}
|
|
94
|
+
const jsonData = readMulmoScriptFile(multilingualFilePath, "ERROR: File does not exist " + multilingualFilePath)?.mulmoData ?? null;
|
|
95
|
+
const dataSet = mulmoStudioMultiLingualSchema.parse(jsonData);
|
|
96
|
+
while (dataSet.length < studioBeatsLength) {
|
|
97
|
+
dataSet.push({ multiLingualTexts: {} });
|
|
98
|
+
}
|
|
99
|
+
dataSet.length = studioBeatsLength;
|
|
100
|
+
return dataSet;
|
|
101
|
+
};
|
|
102
|
+
export const getPresentationStyle = (presentationStylePath) => {
|
|
103
|
+
if (!presentationStylePath) {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
if (!fs.existsSync(presentationStylePath)) {
|
|
107
|
+
throw new Error(`ERROR: File not exists ${presentationStylePath}`);
|
|
108
|
+
}
|
|
109
|
+
const jsonData = readMulmoScriptFile(presentationStylePath, "ERROR: File does not exist " + presentationStylePath)?.mulmoData ?? null;
|
|
110
|
+
return mulmoPresentationStyleSchema.parse(jsonData);
|
|
76
111
|
};
|
|
77
|
-
export const initializeContextFromFiles = async (files, raiseError, force,
|
|
78
|
-
const { fileName, isHttpPath, fileOrUrl, mulmoFilePath,
|
|
79
|
-
// read mulmoScript, presentationStyle, currentStudio from files
|
|
112
|
+
export const initializeContextFromFiles = async (files, raiseError, force, captionLang, targetLang) => {
|
|
113
|
+
const { fileName, isHttpPath, fileOrUrl, mulmoFilePath, presentationStylePath, outputMultilingualFilePath } = files;
|
|
80
114
|
const mulmoScript = await fetchScript(isHttpPath, mulmoFilePath, fileOrUrl);
|
|
81
115
|
if (!mulmoScript) {
|
|
82
116
|
return null;
|
|
83
117
|
}
|
|
84
|
-
const presentationStyle = getPresentationStyle(presentationStylePath);
|
|
85
|
-
// Create or update MulmoStudio file with MulmoScript
|
|
86
|
-
const currentStudio = readMulmoScriptFile(outputStudioFilePath);
|
|
87
118
|
try {
|
|
88
|
-
|
|
89
|
-
const studio =
|
|
119
|
+
const presentationStyle = getPresentationStyle(presentationStylePath);
|
|
120
|
+
const studio = createStudioData(mulmoScript, fileName, captionLang, presentationStyle);
|
|
90
121
|
const multiLingual = getMultiLingual(outputMultilingualFilePath, studio.beats.length);
|
|
91
|
-
return
|
|
122
|
+
return {
|
|
123
|
+
studio,
|
|
124
|
+
multiLingual,
|
|
125
|
+
fileDirs: files,
|
|
126
|
+
presentationStyle: presentationStyle ?? studio.script,
|
|
127
|
+
sessionState: initSessionState(),
|
|
128
|
+
force: Boolean(force),
|
|
129
|
+
lang: targetLang ?? studio.script.lang, // This lang is target Language. studio.lang is default Language
|
|
130
|
+
};
|
|
92
131
|
}
|
|
93
132
|
catch (error) {
|
|
94
133
|
GraphAILogger.info(`Error: invalid MulmoScript Schema: ${isHttpPath ? fileOrUrl : mulmoFilePath} \n ${error}`);
|
package/lib/utils/file.d.ts
CHANGED
|
@@ -21,7 +21,7 @@ export declare const getOutputStudioFilePath: (outDirPath: string, fileName: str
|
|
|
21
21
|
export declare const getOutputMultilingualFilePath: (outDirPath: string, fileName: string) => string;
|
|
22
22
|
export declare const resolveDirPath: (dirPath: string, studioFileName: string) => string;
|
|
23
23
|
export declare const getAudioFilePath: (audioDirPath: string, dirName: string, fileName: string, lang?: string) => string;
|
|
24
|
-
export declare const getAudioArtifactFilePath: (
|
|
24
|
+
export declare const getAudioArtifactFilePath: (context: MulmoStudioContext) => string;
|
|
25
25
|
export declare const getOutputVideoFilePath: (outDirPath: string, fileName: string, lang?: string, caption?: string) => string;
|
|
26
26
|
export declare const imageSuffix = "p";
|
|
27
27
|
export declare const getBeatPngImagePath: (context: MulmoStudioContext, index: number) => string;
|
package/lib/utils/file.js
CHANGED
|
@@ -72,8 +72,11 @@ export const getAudioFilePath = (audioDirPath, dirName, fileName, lang) => {
|
|
|
72
72
|
}
|
|
73
73
|
return path.resolve(audioDirPath, dirName, fileName + ".mp3");
|
|
74
74
|
};
|
|
75
|
-
export const getAudioArtifactFilePath = (
|
|
76
|
-
|
|
75
|
+
export const getAudioArtifactFilePath = (context) => {
|
|
76
|
+
const suffix = context.lang ? `_${context.lang}` : "";
|
|
77
|
+
const fileName = MulmoStudioContextMethods.getFileName(context);
|
|
78
|
+
const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
|
|
79
|
+
return path.resolve(outDirPath, fileName + suffix + ".mp3");
|
|
77
80
|
};
|
|
78
81
|
export const getOutputVideoFilePath = (outDirPath, fileName, lang, caption) => {
|
|
79
82
|
const suffix = lang ? `_${lang}` : "";
|
package/lib/utils/filters.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import "dotenv/config";
|
|
2
2
|
import type { AgentFilterFunction } from "graphai";
|
|
3
|
+
export declare const nijovoiceTextAgentFilter: AgentFilterFunction;
|
|
3
4
|
export declare const fileCacheAgentFilter: AgentFilterFunction;
|
|
4
5
|
export declare const browserlessCacheGenerator: (cacheDir: string) => AgentFilterFunction;
|
package/lib/utils/filters.js
CHANGED
|
@@ -6,6 +6,14 @@ import { GraphAILogger } from "graphai";
|
|
|
6
6
|
import { writingMessage } from "./file.js";
|
|
7
7
|
import { text2hash } from "./utils.js";
|
|
8
8
|
import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
|
|
9
|
+
import { replacementsJa, replacePairsJa } from "../utils/string.js";
|
|
10
|
+
export const nijovoiceTextAgentFilter = async (context, next) => {
|
|
11
|
+
const { text, provider, lang } = context.namedInputs;
|
|
12
|
+
if (provider === "nijivoice" && lang === "ja") {
|
|
13
|
+
context.namedInputs.text = replacePairsJa(replacementsJa)(text);
|
|
14
|
+
}
|
|
15
|
+
return next(context);
|
|
16
|
+
};
|
|
9
17
|
export const fileCacheAgentFilter = async (context, next) => {
|
|
10
18
|
const { force, file, index, mulmoContext, sessionType } = context.namedInputs.cache;
|
|
11
19
|
const shouldUseCache = async () => {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { MulmoStudio, MulmoScript, MulmoPresentationStyle } from "../types/index.js";
|
|
2
|
-
export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, currentStudio: MulmoStudio | undefined, fileName: string,
|
|
2
|
+
export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, currentStudio: MulmoStudio | undefined, fileName: string, videoCaptionLang?: string, presentationStyle?: MulmoPresentationStyle | null) => {
|
|
3
3
|
beats: {
|
|
4
4
|
duration?: number | undefined;
|
|
5
5
|
startAt?: number | undefined;
|
|
@@ -16,10 +16,12 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
|
|
|
16
16
|
captionFile?: string | undefined;
|
|
17
17
|
}[];
|
|
18
18
|
script: {
|
|
19
|
+
lang: string;
|
|
19
20
|
imageParams: {
|
|
20
21
|
provider: string;
|
|
21
22
|
model?: string | undefined;
|
|
22
23
|
style?: string | undefined;
|
|
24
|
+
quality?: string | undefined;
|
|
23
25
|
moderation?: string | undefined;
|
|
24
26
|
images?: Record<string, {
|
|
25
27
|
type: "image";
|
|
@@ -89,6 +91,17 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
|
|
|
89
91
|
speechParams: {
|
|
90
92
|
speakers: Record<string, {
|
|
91
93
|
voiceId: string;
|
|
94
|
+
lang?: Record<string, {
|
|
95
|
+
voiceId: string;
|
|
96
|
+
displayName?: Record<string, string> | undefined;
|
|
97
|
+
isDefault?: boolean | undefined;
|
|
98
|
+
speechOptions?: {
|
|
99
|
+
speed?: number | undefined;
|
|
100
|
+
instruction?: string | undefined;
|
|
101
|
+
} | undefined;
|
|
102
|
+
provider?: string | undefined;
|
|
103
|
+
model?: string | undefined;
|
|
104
|
+
}> | undefined;
|
|
92
105
|
displayName?: Record<string, string> | undefined;
|
|
93
106
|
isDefault?: boolean | undefined;
|
|
94
107
|
speechOptions?: {
|
|
@@ -237,6 +250,7 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
|
|
|
237
250
|
provider: string;
|
|
238
251
|
model?: string | undefined;
|
|
239
252
|
style?: string | undefined;
|
|
253
|
+
quality?: string | undefined;
|
|
240
254
|
moderation?: string | undefined;
|
|
241
255
|
images?: Record<string, {
|
|
242
256
|
type: "image";
|
|
@@ -299,7 +313,6 @@ export declare const createOrUpdateStudioData: (_mulmoScript: MulmoScript, curre
|
|
|
299
313
|
} | undefined;
|
|
300
314
|
enableLipSync?: boolean | undefined;
|
|
301
315
|
}[];
|
|
302
|
-
lang?: string | undefined;
|
|
303
316
|
title?: string | undefined;
|
|
304
317
|
description?: string | undefined;
|
|
305
318
|
lipSyncParams?: {
|
package/lib/utils/preprocess.js
CHANGED
|
@@ -37,7 +37,7 @@ const mulmoCredit = (speaker) => {
|
|
|
37
37
|
},
|
|
38
38
|
};
|
|
39
39
|
};
|
|
40
|
-
export const createOrUpdateStudioData = (_mulmoScript, currentStudio, fileName,
|
|
40
|
+
export const createOrUpdateStudioData = (_mulmoScript, currentStudio, fileName, videoCaptionLang, presentationStyle) => {
|
|
41
41
|
const mulmoScript = _mulmoScript.__test_invalid__ ? _mulmoScript : MulmoScriptMethods.validate(_mulmoScript); // validate and insert default value
|
|
42
42
|
const studio = rebuildStudio(currentStudio, mulmoScript, fileName);
|
|
43
43
|
// TODO: Move this code out of this function later
|
|
@@ -48,10 +48,10 @@ export const createOrUpdateStudioData = (_mulmoScript, currentStudio, fileName,
|
|
|
48
48
|
}
|
|
49
49
|
studio.script = MulmoScriptMethods.validate(mulmoScript); // update the script
|
|
50
50
|
studio.beats = studio.script.beats.map((_, index) => studio.beats[index] ?? {});
|
|
51
|
-
if (
|
|
51
|
+
if (videoCaptionLang) {
|
|
52
52
|
studio.script.captionParams = mulmoCaptionParamsSchema.parse({
|
|
53
53
|
...(studio.script.captionParams ?? {}),
|
|
54
|
-
lang:
|
|
54
|
+
lang: videoCaptionLang,
|
|
55
55
|
});
|
|
56
56
|
}
|
|
57
57
|
return studio;
|
|
@@ -67,9 +67,10 @@ export declare const provider2LipSyncAgent: {
|
|
|
67
67
|
defaultModel: ReplicateModel;
|
|
68
68
|
models: ReplicateModel[];
|
|
69
69
|
modelParams: Record<ReplicateModel, {
|
|
70
|
-
identifier?: `${string}/${string}:${string}`;
|
|
71
|
-
video
|
|
70
|
+
identifier?: `${string}/${string}:${string}` | `${string}/${string}`;
|
|
71
|
+
video?: string;
|
|
72
72
|
audio: string;
|
|
73
|
+
image?: string;
|
|
73
74
|
}>;
|
|
74
75
|
};
|
|
75
76
|
};
|
|
@@ -50,6 +50,8 @@ export const provider2MovieAgent = {
|
|
|
50
50
|
"minimax/video-01",
|
|
51
51
|
"minimax/hailuo-02",
|
|
52
52
|
"pixverse/pixverse-v4.5",
|
|
53
|
+
"wan-video/wan-2.2-i2v-480p-fast",
|
|
54
|
+
"wan-video/wan-2.2-t2v-480p-fast",
|
|
53
55
|
],
|
|
54
56
|
modelParams: {
|
|
55
57
|
"bytedance/seedance-1-lite": {
|
|
@@ -110,6 +112,16 @@ export const provider2MovieAgent = {
|
|
|
110
112
|
last_image: "last_frame_image",
|
|
111
113
|
price_per_sec: 0.12,
|
|
112
114
|
},
|
|
115
|
+
"wan-video/wan-2.2-i2v-480p-fast": {
|
|
116
|
+
durations: [5],
|
|
117
|
+
start_image: "image",
|
|
118
|
+
price_per_sec: 0.012,
|
|
119
|
+
},
|
|
120
|
+
"wan-video/wan-2.2-t2v-480p-fast": {
|
|
121
|
+
durations: [5],
|
|
122
|
+
start_image: undefined,
|
|
123
|
+
price_per_sec: 0.012,
|
|
124
|
+
},
|
|
113
125
|
},
|
|
114
126
|
},
|
|
115
127
|
google: {
|
|
@@ -133,8 +145,8 @@ export const provider2SoundEffectAgent = {
|
|
|
133
145
|
export const provider2LipSyncAgent = {
|
|
134
146
|
replicate: {
|
|
135
147
|
agentName: "lipSyncReplicateAgent",
|
|
136
|
-
defaultModel: "bytedance/
|
|
137
|
-
models: ["bytedance/latentsync", "tmappdev/lipsync"],
|
|
148
|
+
defaultModel: "bytedance/omni-human",
|
|
149
|
+
models: ["bytedance/latentsync", "tmappdev/lipsync", "bytedance/omni-human"],
|
|
138
150
|
modelParams: {
|
|
139
151
|
"bytedance/latentsync": {
|
|
140
152
|
identifier: "bytedance/latentsync:637ce1919f807ca20da3a448ddc2743535d2853649574cd52a933120e9b9e293",
|
|
@@ -146,6 +158,12 @@ export const provider2LipSyncAgent = {
|
|
|
146
158
|
video: "video_input",
|
|
147
159
|
audio: "audio_input",
|
|
148
160
|
},
|
|
161
|
+
"bytedance/omni-human": {
|
|
162
|
+
identifier: "bytedance/omni-human",
|
|
163
|
+
image: "image",
|
|
164
|
+
audio: "audio",
|
|
165
|
+
price_per_sec: 0.14,
|
|
166
|
+
},
|
|
149
167
|
/* NOTE: This model does not work with large base64 urls.
|
|
150
168
|
"sync/lipsync-2": {
|
|
151
169
|
video: "video",
|
package/lib/utils/string.d.ts
CHANGED
|
@@ -4,6 +4,6 @@ interface Replacement {
|
|
|
4
4
|
from: string;
|
|
5
5
|
to: string;
|
|
6
6
|
}
|
|
7
|
-
export declare function replacePairsJa(
|
|
7
|
+
export declare function replacePairsJa(replacements: Replacement[]): (str: string) => string;
|
|
8
8
|
export declare const replacementsJa: Replacement[];
|
|
9
9
|
export {};
|
package/lib/utils/string.js
CHANGED
|
@@ -24,14 +24,16 @@ export const recursiveSplitJa = (text) => {
|
|
|
24
24
|
}, [text])
|
|
25
25
|
.flat(1);
|
|
26
26
|
};
|
|
27
|
-
export function replacePairsJa(
|
|
28
|
-
return
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
27
|
+
export function replacePairsJa(replacements) {
|
|
28
|
+
return (str) => {
|
|
29
|
+
return replacements.reduce((tmp, current) => {
|
|
30
|
+
const { from, to } = current;
|
|
31
|
+
// Escape any special regex characters in the 'from' string.
|
|
32
|
+
const escapedFrom = from.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
33
|
+
const regex = new RegExp(escapedFrom, "g");
|
|
34
|
+
return tmp.replace(regex, to);
|
|
35
|
+
}, str);
|
|
36
|
+
};
|
|
35
37
|
}
|
|
36
38
|
export const replacementsJa = [
|
|
37
39
|
{ from: "Anthropic", to: "アンスロピック" },
|
|
@@ -51,4 +53,5 @@ export const replacementsJa = [
|
|
|
51
53
|
{ from: "5つ", to: "いつつ" },
|
|
52
54
|
{ from: "危険な面", to: "危険なめん" },
|
|
53
55
|
{ from: "その通り!", to: "その通り。" },
|
|
56
|
+
{ from: "%", to: "パーセント" },
|
|
54
57
|
];
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mulmocast",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.6",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "lib/index.node.js",
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
"files": [
|
|
24
24
|
"./lib",
|
|
25
25
|
"./scripts/templates",
|
|
26
|
+
"./scripts/test",
|
|
26
27
|
"./assets/audio/silent60sec.mp3",
|
|
27
28
|
"./assets/html/",
|
|
28
29
|
"./assets/templates/"
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1",
|
|
4
|
+
"credit": "closing"
|
|
5
|
+
},
|
|
6
|
+
"lang": "en",
|
|
7
|
+
"title": "Testing OpenAI's new Image Generation",
|
|
8
|
+
"description": "Hello",
|
|
9
|
+
"imageParams": {
|
|
10
|
+
"model": "gpt-image-1",
|
|
11
|
+
"style": "<style>Japanese animation with soft watercolor backgrounds, characters with simple rounded faces, large expressive eyes, small nose and mouth, soft jawlines, minimalist facial features, pastel color palette, detailed natural environments, whimsical magical elements, hand-drawn aesthetic, gentle lighting, flowing movement in hair and clothing, nostalgic countryside scenery with fantasy elements."
|
|
12
|
+
},
|
|
13
|
+
"speechParams": {
|
|
14
|
+
"speakers": {
|
|
15
|
+
"Host": {
|
|
16
|
+
"voiceId": "shimmer",
|
|
17
|
+
"displayName": {
|
|
18
|
+
"en": "Host"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
"beats": [
|
|
24
|
+
{
|
|
25
|
+
"speaker": "Host",
|
|
26
|
+
"text": "How are you?",
|
|
27
|
+
"imagePrompt": "A witch in Harajuku",
|
|
28
|
+
"imageParams": {
|
|
29
|
+
"style": "Ukiyoe-style"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "MulmoScript",
|
|
3
|
+
"scenes": [
|
|
4
|
+
{
|
|
5
|
+
"description": "MulmoCast is a multi-modal presentation platform built for the generative AI era. Traditional tools like PowerPoint and Keynote were designed decades ago for human authors. Today, however, large language models (LLMs) are generating content—and they need a native environment optimized for their capabilities. MulmoCast is that environment. It empowers AI to automatically create and deliver rich, multi-modal presentations—including slides, videos, podcasts, documents, and comics—using our open presentation language: MulmoScript."
|
|
6
|
+
},
|
|
7
|
+
{
|
|
8
|
+
"description": " MulmoScript is a JSON-based language that enables LLMs to describe structured, machine-readable presentations. It supports a wide range of elements: bullet points, charts, graphs, images, voiceovers, and videos. Just as HTML unlocked the web, MulmoScript enables interoperability, customization, and ecosystem growth for AI-generated content. MulmoCast renders this content into any format, giving end users complete flexibility in how they consume it—whether as a slideshow, podcast, video, or document. It also supports multilingual output for both reading and listening."
|
|
9
|
+
}
|
|
10
|
+
]
|
|
11
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1",
|
|
4
|
+
"credit": "closing"
|
|
5
|
+
},
|
|
6
|
+
"title": "MASAI: A Modular Future for Software Engineering AI",
|
|
7
|
+
"description": "Exploring MASAI, a modular approach for AI agents in software engineering that revolutionizes how complex coding issues are tackled.",
|
|
8
|
+
"references": [
|
|
9
|
+
{
|
|
10
|
+
"url": "https://arxiv.org/abs/2406.11638",
|
|
11
|
+
"title": "MASAI: A Modular Future for Software Engineering AI",
|
|
12
|
+
"description": "An article on MASAI, a modular approach for AI agents in software engineering."
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"lang": "ja",
|
|
16
|
+
"imageParams": {
|
|
17
|
+
"style": "<style>monochrome"
|
|
18
|
+
},
|
|
19
|
+
"speechParams": {
|
|
20
|
+
"speakers": {
|
|
21
|
+
"Announcer": {
|
|
22
|
+
"provider": "nijivoice",
|
|
23
|
+
"displayName": {
|
|
24
|
+
"ja": "アナウンサー"
|
|
25
|
+
},
|
|
26
|
+
"voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c",
|
|
27
|
+
"speechOptions": {
|
|
28
|
+
"speed": 1.666
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"Student": {
|
|
32
|
+
"provider": "nijivoice",
|
|
33
|
+
"displayName": {
|
|
34
|
+
"ja": "生徒"
|
|
35
|
+
},
|
|
36
|
+
"voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f"
|
|
37
|
+
},
|
|
38
|
+
"Teacher": {
|
|
39
|
+
"provider": "nijivoice",
|
|
40
|
+
"displayName": {
|
|
41
|
+
"ja": "先生"
|
|
42
|
+
},
|
|
43
|
+
"voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae"
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
"beats": [
|
|
48
|
+
{
|
|
49
|
+
"speaker": "Announcer",
|
|
50
|
+
"text": "こんにちは。米国で活躍するエンジニアが新しい技術やビジネスを分かりやすく解説する、中島聡のLife is beautiful。"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"speaker": "Announcer",
|
|
54
|
+
"text": "今日は、アメリカで発表された「スターゲート・プロジェクト」に、ついて解説します。",
|
|
55
|
+
"imagePrompt": "Blue sky, a flock of birds",
|
|
56
|
+
"imageParams": {
|
|
57
|
+
"style": "<style>sumie-style"
|
|
58
|
+
},
|
|
59
|
+
"speechOptions": {
|
|
60
|
+
"speed": 0.8
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
]
|
|
64
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1",
|
|
4
|
+
"credit": "closing"
|
|
5
|
+
},
|
|
6
|
+
"lang": "en",
|
|
7
|
+
"title": "The Honey Trap vs. The Frontier: Engineering at Tesla and SpaceX",
|
|
8
|
+
"description": "We dive into Elon Musk's philosophy on building environments where engineers can truly flourish, contrasting the comfort-focused 'honey trap' with the high-expectation culture at Tesla and SpaceX.",
|
|
9
|
+
"speechParams": {
|
|
10
|
+
"speakers": {
|
|
11
|
+
"Host": {
|
|
12
|
+
"displayName": {
|
|
13
|
+
"ja": "司会"
|
|
14
|
+
},
|
|
15
|
+
"voiceId": "sage"
|
|
16
|
+
},
|
|
17
|
+
"Guest": {
|
|
18
|
+
"displayName": {
|
|
19
|
+
"ja": "ゲスト"
|
|
20
|
+
},
|
|
21
|
+
"voiceId": "shimmer"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"beats": [
|
|
26
|
+
{
|
|
27
|
+
"speaker": "Host",
|
|
28
|
+
"text": "Hello and welcome to another episode of 'life is artificial', where we explore the cutting edge of technology, innovation, and what the future could look like.",
|
|
29
|
+
"speechOptions": {
|
|
30
|
+
"instruction": "Voice: Deep and rugged, with a hearty, boisterous quality, like a seasoned sea captain who's seen many voyages.\nTone: Friendly and spirited, with a sense of adventure and enthusiasm, making every detail feel like part of a grand journey.\nDialect: Classic pirate speech with old-timey nautical phrases, dropped 'g's, and exaggerated 'Arrrs' to stay in character."
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"speaker": "Guest",
|
|
35
|
+
"text": "こんにちは、ポッドキャスト版、ライフ・イズ・ビューティフルへようこそ。新しいテクノロジーについて分かりやすく語ります",
|
|
36
|
+
"speechOptions": {
|
|
37
|
+
"instruction": "voice: 赤ちゃん\nVoice: 高い声で、舌ったらず。甘えっぽく。語尾に’んちゃ’と付ける"
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1",
|
|
4
|
+
"credit": "closing"
|
|
5
|
+
},
|
|
6
|
+
"title": "AIの進化と私たちの未来 - サム・アルトマン氏の3つの観察",
|
|
7
|
+
"description": "OpenAIのサム・アルトマン氏による、AIの進化と社会への影響に関する洞察を、高校生向けに分かりやすく解説する対話形式のスクリプト。",
|
|
8
|
+
"references": [
|
|
9
|
+
{
|
|
10
|
+
"url": "https://blog.samaltman.com/three-observations",
|
|
11
|
+
"title": "Three Observations",
|
|
12
|
+
"description": "An article on AI's evolution and its impact on society by Sam Altman."
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"lang": "ja",
|
|
16
|
+
"imageParams": {
|
|
17
|
+
"style": "<style>monochrome"
|
|
18
|
+
},
|
|
19
|
+
"speechParams": {
|
|
20
|
+
"provider": "nijivoice",
|
|
21
|
+
"speakers": {
|
|
22
|
+
"Announcer": {
|
|
23
|
+
"displayName": {
|
|
24
|
+
"ja": "千草朋香"
|
|
25
|
+
},
|
|
26
|
+
"voiceId": "3708ad43-cace-486c-a4ca-8fe41186e20c"
|
|
27
|
+
},
|
|
28
|
+
"Student": {
|
|
29
|
+
"displayName": {
|
|
30
|
+
"ja": "太郎"
|
|
31
|
+
},
|
|
32
|
+
"voiceId": "a7619e48-bf6a-4f9f-843f-40485651257f"
|
|
33
|
+
},
|
|
34
|
+
"Teacher": {
|
|
35
|
+
"displayName": {
|
|
36
|
+
"ja": "山田先生"
|
|
37
|
+
},
|
|
38
|
+
"voiceId": "bc06c63f-fef6-43b6-92f7-67f919bd5dae"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"beats": [
|
|
43
|
+
{
|
|
44
|
+
"speaker": "Announcer",
|
|
45
|
+
"text": "私たちの目の前で、人工知能の革命が静かに、",
|
|
46
|
+
"speechOptions": {
|
|
47
|
+
"speed": 1.5
|
|
48
|
+
},
|
|
49
|
+
"imagePrompt": "A futuristic scene depicting the quiet but certain advancement of AI, with digital interfaces and abstract representations of AI technology shaping the future."
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"speaker": "Announcer",
|
|
53
|
+
"text": "しかし確実に進んでいます。",
|
|
54
|
+
"speechOptions": {
|
|
55
|
+
"speed": 1.5
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"speaker": "Announcer",
|
|
60
|
+
"text": "オープンエーアイのサム・アルトマン氏が語る「3つの重要な観察」とは何か?",
|
|
61
|
+
"speechOptions": {
|
|
62
|
+
"speed": 1.5
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
]
|
|
66
|
+
}
|