mulmocast 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +1 -3
  2. package/assets/templates/ghibli_shorts.json +34 -0
  3. package/assets/templates/trailer.json +25 -0
  4. package/lib/actions/audio.js +29 -16
  5. package/lib/actions/captions.js +5 -5
  6. package/lib/actions/images.js +51 -12
  7. package/lib/actions/movie.js +46 -13
  8. package/lib/actions/pdf.js +3 -3
  9. package/lib/actions/translate.js +15 -15
  10. package/lib/agents/image_openai_agent.js +6 -3
  11. package/lib/agents/index.d.ts +2 -1
  12. package/lib/agents/index.js +2 -1
  13. package/lib/agents/tts_elevenlabs_agent.d.ts +4 -0
  14. package/lib/agents/tts_elevenlabs_agent.js +60 -0
  15. package/lib/agents/tts_google_agent.js +1 -1
  16. package/lib/agents/tts_nijivoice_agent.js +3 -2
  17. package/lib/agents/tts_openai_agent.js +1 -1
  18. package/lib/cli/commands/audio/handler.js +4 -1
  19. package/lib/cli/commands/image/handler.js +4 -1
  20. package/lib/cli/commands/movie/handler.js +4 -1
  21. package/lib/cli/commands/pdf/handler.js +4 -1
  22. package/lib/cli/commands/translate/handler.js +4 -1
  23. package/lib/cli/helpers.d.ts +3 -3
  24. package/lib/cli/helpers.js +38 -20
  25. package/lib/methods/mulmo_media_source.d.ts +1 -0
  26. package/lib/methods/mulmo_media_source.js +12 -0
  27. package/lib/methods/mulmo_script.d.ts +1 -0
  28. package/lib/methods/mulmo_script.js +9 -0
  29. package/lib/methods/mulmo_studio_context.d.ts +5 -0
  30. package/lib/methods/mulmo_studio_context.js +23 -0
  31. package/lib/types/schema.d.ts +1498 -242
  32. package/lib/types/schema.js +25 -34
  33. package/lib/types/type.d.ts +4 -1
  34. package/lib/utils/file.d.ts +4 -15
  35. package/lib/utils/file.js +2 -13
  36. package/lib/utils/filters.js +4 -4
  37. package/lib/utils/image_plugins/beat.d.ts +4 -0
  38. package/lib/utils/image_plugins/beat.js +7 -0
  39. package/lib/utils/image_plugins/index.d.ts +2 -1
  40. package/lib/utils/image_plugins/index.js +2 -1
  41. package/lib/utils/image_plugins/source.js +2 -2
  42. package/lib/utils/preprocess.d.ts +24 -20
  43. package/lib/utils/preprocess.js +4 -0
  44. package/package.json +1 -1
  45. package/scripts/templates/movie_prompts_no_text_template.json +50 -0
@@ -1,4 +1,5 @@
1
1
  import fs from "fs";
2
+ import path from "path";
2
3
  import OpenAI, { toFile } from "openai";
3
4
  // https://platform.openai.com/docs/guides/image-generation
4
5
  export const imageOpenaiAgent = async ({ namedInputs, params }) => {
@@ -42,9 +43,11 @@ export const imageOpenaiAgent = async ({ namedInputs, params }) => {
42
43
  const response = await (async () => {
43
44
  const targetSize = imageOptions.size;
44
45
  if ((images ?? []).length > 0 && (targetSize === "1536x1024" || targetSize === "1024x1536" || targetSize === "1024x1024")) {
45
- const imagelist = await Promise.all((images ?? []).map(async (file) => await toFile(fs.createReadStream(file), null, {
46
- type: "image/png", // TODO: Support JPEG as well
47
- })));
46
+ const imagelist = await Promise.all((images ?? []).map(async (file) => {
47
+ const ext = path.extname(file).toLowerCase();
48
+ const type = ext === ".jpg" || ext === ".jpeg" ? "image/jpeg" : "image/png";
49
+ return await toFile(fs.createReadStream(file), null, { type });
50
+ }));
48
51
  return await openai.images.edit({ ...imageOptions, size: targetSize, image: imagelist });
49
52
  }
50
53
  else {
@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
2
2
  import combineAudioFilesAgent from "./combine_audio_files_agent.js";
3
3
  import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
+ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
5
6
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
6
7
  import ttsOpenaiAgent from "./tts_openai_agent.js";
7
8
  import validateSchemaAgent from "./validate_schema_agent.js";
@@ -9,4 +10,4 @@ import { browserlessAgent } from "@graphai/browserless_agent";
9
10
  import { textInputAgent } from "@graphai/input_agents";
10
11
  import { openAIAgent } from "@graphai/openai_agent";
11
12
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
12
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
13
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -2,6 +2,7 @@ import addBGMAgent from "./add_bgm_agent.js";
2
2
  import combineAudioFilesAgent from "./combine_audio_files_agent.js";
3
3
  import imageGoogleAgent from "./image_google_agent.js";
4
4
  import imageOpenaiAgent from "./image_openai_agent.js";
5
+ import ttsElevenlabsAgent from "./tts_elevenlabs_agent.js";
5
6
  import ttsNijivoiceAgent from "./tts_nijivoice_agent.js";
6
7
  import ttsOpenaiAgent from "./tts_openai_agent.js";
7
8
  import validateSchemaAgent from "./validate_schema_agent.js";
@@ -10,4 +11,4 @@ import { textInputAgent } from "@graphai/input_agents";
10
11
  import { openAIAgent } from "@graphai/openai_agent";
11
12
  // import * as vanilla from "@graphai/vanilla";
12
13
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
13
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
14
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGoogleAgent, imageOpenaiAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, validateSchemaAgent, };
@@ -0,0 +1,4 @@
1
+ import type { AgentFunction, AgentFunctionInfo } from "graphai";
2
+ export declare const ttsElevenlabsAgent: AgentFunction;
3
+ declare const ttsElevenlabsAgentInfo: AgentFunctionInfo;
4
+ export default ttsElevenlabsAgentInfo;
@@ -0,0 +1,60 @@
1
+ import { GraphAILogger } from "graphai";
2
+ export const ttsElevenlabsAgent = async ({ namedInputs, params }) => {
3
+ const { text } = namedInputs;
4
+ const { voice, model, stability, similarityBoost, suppressError } = params;
5
+ const apiKey = process.env.ELEVENLABS_API_KEY;
6
+ if (!apiKey) {
7
+ throw new Error("ELEVENLABS_API_KEY environment variable is required");
8
+ }
9
+ if (!voice) {
10
+ throw new Error("Voice ID is required");
11
+ }
12
+ try {
13
+ const requestBody = {
14
+ text,
15
+ model_id: model ?? "eleven_monolingual_v1",
16
+ voice_settings: {
17
+ stability: stability ?? 0.5,
18
+ similarity_boost: similarityBoost ?? 0.75,
19
+ },
20
+ };
21
+ GraphAILogger.log("ElevenLabs TTS options", requestBody);
22
+ const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voice}`, {
23
+ method: "POST",
24
+ headers: {
25
+ Accept: "audio/mpeg",
26
+ "Content-Type": "application/json",
27
+ "xi-api-key": apiKey,
28
+ },
29
+ body: JSON.stringify(requestBody),
30
+ });
31
+ if (!response.ok) {
32
+ throw new Error(`Eleven Labs API error: ${response.status} ${response.statusText}`);
33
+ }
34
+ const arrayBuffer = await response.arrayBuffer();
35
+ const buffer = Buffer.from(arrayBuffer);
36
+ return { buffer };
37
+ }
38
+ catch (e) {
39
+ if (suppressError) {
40
+ return {
41
+ error: e,
42
+ };
43
+ }
44
+ GraphAILogger.info(e);
45
+ throw new Error("TTS Eleven Labs Error");
46
+ }
47
+ };
48
+ const ttsElevenlabsAgentInfo = {
49
+ name: "ttsElevenlabsAgent",
50
+ agent: ttsElevenlabsAgent,
51
+ mock: ttsElevenlabsAgent,
52
+ samples: [],
53
+ description: "Eleven Labs TTS agent",
54
+ category: ["tts"],
55
+ author: "Receptron Team",
56
+ repository: "https://github.com/receptron/mulmocast-cli/",
57
+ license: "MIT",
58
+ environmentVariables: ["ELEVENLABS_API_KEY"],
59
+ };
60
+ export default ttsElevenlabsAgentInfo;
@@ -44,7 +44,7 @@ const ttsGoogleAgentInfo = {
44
44
  description: "Google TTS agent",
45
45
  category: ["tts"],
46
46
  author: "Receptron Team",
47
- repository: "https://github.com/receptron/graphai-agents/tree/main/tts/tts-openai-agent",
47
+ repository: "https://github.com/receptron/mulmocast-cli/",
48
48
  license: "MIT",
49
49
  environmentVariables: ["OPENAI_API_KEY"],
50
50
  };
@@ -57,8 +57,9 @@ const ttsNijivoiceAgentInfo = {
57
57
  samples: [],
58
58
  description: "TTS nijivoice agent",
59
59
  category: ["tts"],
60
- author: "isamu arimoto",
61
- repository: "https://github.com/receptron/graphai/",
60
+ author: "Receptron Team",
61
+ repository: "https://github.com/receptron/mulmocast-cli/",
62
62
  license: "MIT",
63
+ environmentVariables: ["NIJIVOICE_API_KEY"],
63
64
  };
64
65
  export default ttsNijivoiceAgentInfo;
@@ -36,7 +36,7 @@ const ttsOpenaiAgentInfo = {
36
36
  description: "OpenAI TTS agent",
37
37
  category: ["tts"],
38
38
  author: "Receptron Team",
39
- repository: "https://github.com/receptron/graphai-agents/tree/main/tts/tts-openai-agent",
39
+ repository: "https://github.com/receptron/mulmocast-cli/",
40
40
  license: "MIT",
41
41
  environmentVariables: ["OPENAI_API_KEY"],
42
42
  };
@@ -1,7 +1,10 @@
1
1
  import { audio } from "../../../actions/index.js";
2
- import { initializeContext, runTranslateIfNeeded } from "../../../cli/helpers.js";
2
+ import { initializeContext, runTranslateIfNeeded } from "../../helpers.js";
3
3
  export const handler = async (argv) => {
4
4
  const context = await initializeContext(argv);
5
+ if (!context) {
6
+ process.exit(1);
7
+ }
5
8
  await runTranslateIfNeeded(context, argv);
6
9
  await audio(context);
7
10
  };
@@ -1,7 +1,10 @@
1
1
  import { images } from "../../../actions/index.js";
2
- import { initializeContext, runTranslateIfNeeded } from "../../../cli/helpers.js";
2
+ import { initializeContext, runTranslateIfNeeded } from "../../helpers.js";
3
3
  export const handler = async (argv) => {
4
4
  const context = await initializeContext(argv);
5
+ if (!context) {
6
+ process.exit(1);
7
+ }
5
8
  await runTranslateIfNeeded(context, argv);
6
9
  await images(context);
7
10
  };
@@ -1,7 +1,10 @@
1
1
  import { audio, images, movie, captions } from "../../../actions/index.js";
2
- import { initializeContext, runTranslateIfNeeded } from "../../../cli/helpers.js";
2
+ import { initializeContext, runTranslateIfNeeded } from "../../helpers.js";
3
3
  export const handler = async (argv) => {
4
4
  const context = await initializeContext(argv);
5
+ if (!context) {
6
+ process.exit(1);
7
+ }
5
8
  await runTranslateIfNeeded(context, argv);
6
9
  await audio(context);
7
10
  await images(context);
@@ -1,7 +1,10 @@
1
1
  import { images, pdf } from "../../../actions/index.js";
2
- import { initializeContext, runTranslateIfNeeded } from "../../../cli/helpers.js";
2
+ import { initializeContext, runTranslateIfNeeded } from "../../helpers.js";
3
3
  export const handler = async (argv) => {
4
4
  const context = await initializeContext(argv);
5
+ if (!context) {
6
+ process.exit(1);
7
+ }
5
8
  await runTranslateIfNeeded(context, argv);
6
9
  await images(context);
7
10
  await pdf(context, argv.pdf_mode, argv.pdf_size);
@@ -1,6 +1,9 @@
1
1
  import { translate } from "../../../actions/index.js";
2
- import { initializeContext } from "../../../cli/helpers.js";
2
+ import { initializeContext } from "../../helpers.js";
3
3
  export const handler = async (argv) => {
4
4
  const context = await initializeContext(argv);
5
+ if (!context) {
6
+ process.exit(1);
7
+ }
5
8
  await translate(context);
6
9
  };
@@ -1,4 +1,4 @@
1
- import type { MulmoStudioContext } from "../types/type.js";
1
+ import type { MulmoScript, MulmoStudioContext } from "../types/type.js";
2
2
  import type { CliArgs } from "../types/cli_types.js";
3
3
  export declare const setGraphAILogger: (verbose: boolean | undefined, logValues?: Record<string, unknown>) => void;
4
4
  export interface FileObject {
@@ -20,7 +20,7 @@ export declare const getFileObject: (args: {
20
20
  audiodir?: string;
21
21
  file: string;
22
22
  }) => FileObject;
23
- export declare const fetchScript: (isHttpPath: boolean, mulmoFilePath: string, fileOrUrl: string) => Promise<any>;
23
+ export declare const fetchScript: (isHttpPath: boolean, mulmoFilePath: string, fileOrUrl: string) => Promise<MulmoScript | null>;
24
24
  type InitOptions = {
25
25
  b?: string;
26
26
  o?: string;
@@ -30,7 +30,7 @@ type InitOptions = {
30
30
  l?: string;
31
31
  c?: string;
32
32
  };
33
- export declare const initializeContext: (argv: CliArgs<InitOptions>) => Promise<MulmoStudioContext>;
33
+ export declare const initializeContext: (argv: CliArgs<InitOptions>) => Promise<MulmoStudioContext | null>;
34
34
  export declare const runTranslateIfNeeded: (context: MulmoStudioContext, argv: {
35
35
  l?: string;
36
36
  c?: string;
@@ -65,15 +65,15 @@ export const fetchScript = async (isHttpPath, mulmoFilePath, fileOrUrl) => {
65
65
  const res = await fetchMulmoScriptFile(fileOrUrl);
66
66
  if (!res.result || !res.script) {
67
67
  GraphAILogger.info(`ERROR: HTTP error! ${res.status} ${fileOrUrl}`);
68
- process.exit(1);
68
+ return null;
69
69
  }
70
70
  return res.script;
71
71
  }
72
72
  if (!fs.existsSync(mulmoFilePath)) {
73
73
  GraphAILogger.info(`ERROR: File not exists ${mulmoFilePath}`);
74
- process.exit(1);
74
+ return null;
75
75
  }
76
- return readMulmoScriptFile(mulmoFilePath, "ERROR: File does not exist " + mulmoFilePath).mulmoData;
76
+ return readMulmoScriptFile(mulmoFilePath, "ERROR: File does not exist " + mulmoFilePath)?.mulmoData ?? null;
77
77
  };
78
78
  export const initializeContext = async (argv) => {
79
79
  const files = getFileObject({
@@ -88,25 +88,43 @@ export const initializeContext = async (argv) => {
88
88
  files,
89
89
  });
90
90
  const mulmoScript = await fetchScript(isHttpPath, mulmoFilePath, fileOrUrl);
91
+ if (!mulmoScript) {
92
+ return null;
93
+ }
91
94
  // Create or update MulmoStudio file with MulmoScript
92
95
  const currentStudio = readMulmoScriptFile(outputStudioFilePath);
93
- const studio = (() => {
94
- try {
95
- // validate mulmoStudioSchema. skip if __test_invalid__ is true
96
- return createOrUpdateStudioData(mulmoScript, currentStudio?.mulmoData, fileName);
97
- }
98
- catch (error) {
99
- GraphAILogger.info(`Error: invalid MulmoScript Schema: ${isHttpPath ? fileOrUrl : mulmoFilePath} \n ${error}`);
100
- process.exit(1);
101
- }
102
- })();
103
- return {
104
- studio,
105
- fileDirs: files,
106
- force: Boolean(argv.f),
107
- lang: argv.l,
108
- caption: argv.c,
109
- };
96
+ try {
97
+ // validate mulmoStudioSchema. skip if __test_invalid__ is true
98
+ const studio = createOrUpdateStudioData(mulmoScript, currentStudio?.mulmoData, fileName);
99
+ return {
100
+ studio,
101
+ fileDirs: files,
102
+ force: Boolean(argv.f),
103
+ lang: argv.l,
104
+ caption: argv.c,
105
+ sessionState: {
106
+ inSession: {
107
+ audio: false,
108
+ image: false,
109
+ video: false,
110
+ multiLingual: false,
111
+ caption: false,
112
+ pdf: false,
113
+ },
114
+ inBeatSession: {
115
+ audio: {},
116
+ image: {},
117
+ movie: {},
118
+ multiLingual: {},
119
+ caption: {},
120
+ },
121
+ },
122
+ };
123
+ }
124
+ catch (error) {
125
+ GraphAILogger.info(`Error: invalid MulmoScript Schema: ${isHttpPath ? fileOrUrl : mulmoFilePath} \n ${error}`);
126
+ return null;
127
+ }
110
128
  };
111
129
  export const runTranslateIfNeeded = async (context, argv) => {
112
130
  if (argv.l || argv.c) {
@@ -1,4 +1,5 @@
1
1
  import { MulmoMediaSource, MulmoStudioContext } from "../types/index.js";
2
2
  export declare const MulmoMediaSourceMethods: {
3
3
  getText(mediaSource: MulmoMediaSource, context: MulmoStudioContext): Promise<string | null>;
4
+ resolve(mediaSource: MulmoMediaSource | undefined, context: MulmoStudioContext): string | null;
4
5
  };
@@ -1,5 +1,6 @@
1
1
  import fs from "fs";
2
2
  import { getFullPath } from "../utils/file.js";
3
+ import { MulmoStudioContextMethods } from "../methods/index.js";
3
4
  export const MulmoMediaSourceMethods = {
4
5
  async getText(mediaSource, context) {
5
6
  if (mediaSource.kind === "text") {
@@ -18,4 +19,15 @@ export const MulmoMediaSourceMethods = {
18
19
  }
19
20
  return null;
20
21
  },
22
+ resolve(mediaSource, context) {
23
+ if (!mediaSource)
24
+ return null;
25
+ if (mediaSource.kind === "path") {
26
+ return MulmoStudioContextMethods.resolveAssetPath(context, mediaSource.path);
27
+ }
28
+ if (mediaSource.kind === "url") {
29
+ return mediaSource.url;
30
+ }
31
+ return null;
32
+ },
21
33
  };
@@ -3,6 +3,7 @@ import { MulmoCanvasDimension, MulmoScript, MulmoBeat, SpeechOptions, Text2Speec
3
3
  export declare const MulmoScriptMethods: {
4
4
  getCanvasSize(script: MulmoScript): MulmoCanvasDimension;
5
5
  getSpeechProvider(script: MulmoScript): Text2SpeechProvider;
6
+ getAllSpeechProviders(script: MulmoScript): Set<Text2SpeechProvider>;
6
7
  getTextSlideStyle(script: MulmoScript, beat: MulmoBeat): string;
7
8
  getSpeechOptions(script: MulmoScript, beat: MulmoBeat): SpeechOptions | undefined;
8
9
  getImageAgentInfo(script: MulmoScript): Text2ImageAgentInfo;
@@ -21,6 +21,15 @@ export const MulmoScriptMethods = {
21
21
  getSpeechProvider(script) {
22
22
  return text2SpeechProviderSchema.parse(script.speechParams?.provider);
23
23
  },
24
+ getAllSpeechProviders(script) {
25
+ const providers = new Set();
26
+ const defaultProvider = this.getSpeechProvider(script);
27
+ Object.values(script.speechParams.speakers).forEach((speaker) => {
28
+ const provider = speaker.provider ?? defaultProvider;
29
+ providers.add(provider);
30
+ });
31
+ return providers;
32
+ },
24
33
  getTextSlideStyle(script, beat) {
25
34
  const styles = script.textSlideParams?.cssStyles ?? [];
26
35
  // NOTES: Taking advantage of CSS override rule (you can redefine it to override)
@@ -1,4 +1,9 @@
1
1
  import { MulmoStudioContext } from "../types/index.js";
2
+ type SessionType = "audio" | "image" | "video" | "multiLingual" | "caption" | "pdf";
3
+ type BeatSessionType = "audio" | "image" | "multiLingual" | "caption" | "movie";
2
4
  export declare const MulmoStudioContextMethods: {
3
5
  resolveAssetPath(context: MulmoStudioContext, relativePath: string): string;
6
+ setSessionState(context: MulmoStudioContext, sessionType: SessionType, value: boolean): void;
7
+ setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType, index: number, value: boolean): void;
4
8
  };
9
+ export {};
@@ -1,6 +1,29 @@
1
1
  import path from "path";
2
+ import { GraphAILogger } from "graphai";
3
+ const notifyStateChange = (context, sessionType) => {
4
+ const prefix = context.sessionState.inSession[sessionType] ? "<" : " >";
5
+ GraphAILogger.info(`${prefix} ${sessionType}`);
6
+ };
7
+ const notifyBeatStateChange = (context, sessionType, index) => {
8
+ const prefix = context.sessionState.inBeatSession[sessionType][index] ? "{" : " }";
9
+ GraphAILogger.info(`${prefix} ${sessionType} ${index}`);
10
+ };
2
11
  export const MulmoStudioContextMethods = {
3
12
  resolveAssetPath(context, relativePath) {
4
13
  return path.resolve(context.fileDirs.mulmoFileDirPath, relativePath);
5
14
  },
15
+ setSessionState(context, sessionType, value) {
16
+ context.sessionState.inSession[sessionType] = value;
17
+ notifyStateChange(context, sessionType);
18
+ },
19
+ setBeatSessionState(context, sessionType, index, value) {
20
+ if (value) {
21
+ context.sessionState.inBeatSession[sessionType][index] = true;
22
+ }
23
+ else {
24
+ // NOTE: Setting to false causes the parse error in rebuildStudio in preprocess.ts
25
+ delete context.sessionState.inBeatSession[sessionType][index];
26
+ }
27
+ notifyBeatStateChange(context, sessionType, index);
28
+ },
6
29
  };