mulmocast 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/assets/templates/characters.json +16 -0
  2. package/assets/templates/html.json +6 -0
  3. package/lib/actions/audio.js +8 -6
  4. package/lib/actions/image_agents.d.ts +121 -0
  5. package/lib/actions/image_agents.js +56 -0
  6. package/lib/actions/image_references.d.ts +9 -0
  7. package/lib/actions/image_references.js +79 -0
  8. package/lib/actions/images.d.ts +9 -105
  9. package/lib/actions/images.js +83 -182
  10. package/lib/actions/index.d.ts +2 -0
  11. package/lib/actions/index.js +2 -0
  12. package/lib/actions/movie.js +3 -1
  13. package/lib/actions/pdf.js +5 -2
  14. package/lib/agents/image_google_agent.d.ts +2 -15
  15. package/lib/agents/image_google_agent.js +3 -3
  16. package/lib/agents/image_openai_agent.d.ts +2 -17
  17. package/lib/agents/image_openai_agent.js +7 -7
  18. package/lib/agents/movie_google_agent.d.ts +2 -17
  19. package/lib/agents/movie_google_agent.js +7 -7
  20. package/lib/agents/movie_replicate_agent.d.ts +2 -16
  21. package/lib/agents/movie_replicate_agent.js +3 -3
  22. package/lib/agents/tts_google_agent.d.ts +9 -1
  23. package/lib/agents/tts_google_agent.js +2 -2
  24. package/lib/agents/tts_nijivoice_agent.js +1 -1
  25. package/lib/agents/tts_openai_agent.d.ts +13 -1
  26. package/lib/agents/tts_openai_agent.js +2 -2
  27. package/lib/cli/helpers.js +7 -7
  28. package/lib/methods/index.d.ts +1 -0
  29. package/lib/methods/index.js +1 -0
  30. package/lib/methods/mulmo_beat.d.ts +6 -0
  31. package/lib/methods/mulmo_beat.js +21 -0
  32. package/lib/methods/mulmo_presentation_style.d.ts +2 -0
  33. package/lib/methods/mulmo_presentation_style.js +24 -0
  34. package/lib/methods/mulmo_studio_context.js +3 -0
  35. package/lib/tools/story_to_script.js +2 -2
  36. package/lib/types/agent.d.ts +55 -0
  37. package/lib/types/agent.js +3 -0
  38. package/lib/types/schema.d.ts +322 -74
  39. package/lib/types/schema.js +10 -2
  40. package/lib/types/type.d.ts +3 -2
  41. package/lib/utils/context.d.ts +13 -2
  42. package/lib/utils/context.js +2 -0
  43. package/lib/utils/ffmpeg_utils.d.ts +1 -1
  44. package/lib/utils/ffmpeg_utils.js +1 -1
  45. package/lib/utils/file.js +4 -4
  46. package/lib/utils/filters.js +11 -7
  47. package/lib/utils/markdown.js +1 -1
  48. package/lib/utils/preprocess.d.ts +8 -2
  49. package/lib/utils/string.js +5 -5
  50. package/lib/utils/utils.d.ts +8 -1
  51. package/lib/utils/utils.js +51 -36
  52. package/package.json +10 -9
  53. package/scripts/templates/html.json +42 -0
  54. package/scripts/templates/image_refs.json +35 -0
@@ -0,0 +1,16 @@
1
+ {
2
+ "title": "Story with multiple characters",
3
+ "description": "Template for story with multiple characters.",
4
+ "presentationStyle": {
5
+ "$mulmocast": {
6
+ "version": "1.0",
7
+ "credit": "closing"
8
+ },
9
+ "canvasSize": {
10
+ "width": 1536,
11
+ "height": 1024
12
+ }
13
+ },
14
+ "systemPrompt": "Generate a script for a the given story with multiple characters. Generate image prompts for each character, and make references to them in the beats. Use the JSON below as a template.",
15
+ "scriptName": "image_refs.json"
16
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "title": "Business presentation in HTML",
3
+ "description": "Template for business presentation in HTML.",
4
+ "systemPrompt": "Generate a script for a business presentation of the given topic. Another LLM will generate actual slides from the prompt and data for each beat. Adding optional data would help it to generate more compelling slide. Mention the reference in one of beats, if it exists. The valid type of reference is 'article', 'paper', 'image', 'video', 'audio'. Use the JSON below as a template.",
5
+ "scriptName": "html.json"
6
+ }
@@ -91,11 +91,13 @@ const graph_tts = {
91
91
  agent: ":preprocessor.ttsAgent",
92
92
  inputs: {
93
93
  text: ":preprocessor.text",
94
- file: ":preprocessor.audioPath",
95
- force: ":context.force",
96
- mulmoContext: ":context", // for cache
97
- index: ":__mapIndex", // for cache
98
- sessionType: "audio", // for cache
94
+ cache: {
95
+ force: [":context.force"],
96
+ file: ":preprocessor.audioPath",
97
+ index: ":__mapIndex",
98
+ mulmoContext: ":context",
99
+ sessionType: "audio",
100
+ },
99
101
  params: {
100
102
  voice: ":preprocessor.voiceId",
101
103
  speed: ":preprocessor.speechOptions.speed",
@@ -239,7 +241,7 @@ export const audio = async (context, settings, callbacks) => {
239
241
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
240
242
  mkdir(outDirPath);
241
243
  mkdir(audioSegmentDirPath);
242
- const config = settings2GraphAIConfig(settings);
244
+ const config = settings2GraphAIConfig(settings, process.env);
243
245
  const taskManager = new TaskManager(getConcurrency(context));
244
246
  const graph = new GraphAI(graph_data, audioAgents, { agentFilters, taskManager, config });
245
247
  graph.injectValue("context", context);
@@ -0,0 +1,121 @@
1
+ import { MulmoStudioContext, MulmoBeat, MulmoCanvasDimension } from "../types/index.js";
2
+ export declare const imagePreprocessAgent: (namedInputs: {
3
+ context: MulmoStudioContext;
4
+ beat: MulmoBeat;
5
+ index: number;
6
+ imageRefs: Record<string, string>;
7
+ }) => Promise<{
8
+ imagePath: string;
9
+ htmlPrompt: string | undefined;
10
+ htmlPath: string;
11
+ htmlImageSystemPrompt: string[];
12
+ } | {
13
+ imagePath: string | undefined;
14
+ referenceImageForMovie: string | undefined;
15
+ imageParams: {
16
+ provider: "openai" | "google";
17
+ style?: string | undefined;
18
+ model?: string | undefined;
19
+ moderation?: string | undefined;
20
+ images?: Record<string, {
21
+ type: "image";
22
+ source: {
23
+ url: string;
24
+ kind: "url";
25
+ } | {
26
+ kind: "base64";
27
+ data: string;
28
+ } | {
29
+ text: string;
30
+ kind: "text";
31
+ } | {
32
+ path: string;
33
+ kind: "path";
34
+ };
35
+ } | {
36
+ type: "imagePrompt";
37
+ prompt: string;
38
+ }> | undefined;
39
+ };
40
+ movieFile: string | undefined;
41
+ htmlPrompt?: undefined;
42
+ htmlPath?: undefined;
43
+ htmlImageSystemPrompt?: undefined;
44
+ } | {
45
+ imagePath: string;
46
+ imageFromMovie: boolean;
47
+ imageParams: {
48
+ provider: "openai" | "google";
49
+ style?: string | undefined;
50
+ model?: string | undefined;
51
+ moderation?: string | undefined;
52
+ images?: Record<string, {
53
+ type: "image";
54
+ source: {
55
+ url: string;
56
+ kind: "url";
57
+ } | {
58
+ kind: "base64";
59
+ data: string;
60
+ } | {
61
+ text: string;
62
+ kind: "text";
63
+ } | {
64
+ path: string;
65
+ kind: "path";
66
+ };
67
+ } | {
68
+ type: "imagePrompt";
69
+ prompt: string;
70
+ }> | undefined;
71
+ };
72
+ movieFile: string | undefined;
73
+ htmlPrompt?: undefined;
74
+ htmlPath?: undefined;
75
+ htmlImageSystemPrompt?: undefined;
76
+ } | {
77
+ imagePath: string;
78
+ referenceImageForMovie: string;
79
+ imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
80
+ prompt: string;
81
+ referenceImages: string[];
82
+ imageParams: {
83
+ provider: "openai" | "google";
84
+ style?: string | undefined;
85
+ model?: string | undefined;
86
+ moderation?: string | undefined;
87
+ images?: Record<string, {
88
+ type: "image";
89
+ source: {
90
+ url: string;
91
+ kind: "url";
92
+ } | {
93
+ kind: "base64";
94
+ data: string;
95
+ } | {
96
+ text: string;
97
+ kind: "text";
98
+ } | {
99
+ path: string;
100
+ kind: "path";
101
+ };
102
+ } | {
103
+ type: "imagePrompt";
104
+ prompt: string;
105
+ }> | undefined;
106
+ };
107
+ movieFile: string | undefined;
108
+ htmlPrompt?: undefined;
109
+ htmlPath?: undefined;
110
+ htmlImageSystemPrompt?: undefined;
111
+ }>;
112
+ export declare const imagePluginAgent: (namedInputs: {
113
+ context: MulmoStudioContext;
114
+ beat: MulmoBeat;
115
+ index: number;
116
+ }) => Promise<void>;
117
+ export declare const htmlImageGeneratorAgent: (namedInputs: {
118
+ file: string;
119
+ canvasSize: MulmoCanvasDimension;
120
+ htmlText: string;
121
+ }) => Promise<void>;
@@ -0,0 +1,56 @@
1
+ import { MulmoPresentationStyleMethods, MulmoStudioContextMethods, MulmoBeatMethods } from "../methods/index.js";
2
+ import { getBeatPngImagePath, getBeatMoviePath } from "../utils/file.js";
3
+ import { imagePrompt, htmlImageSystemPrompt } from "../utils/prompt.js";
4
+ import { renderHTMLToImage } from "../utils/markdown.js";
5
+ const htmlStyle = (context, beat) => {
6
+ return {
7
+ canvasSize: MulmoPresentationStyleMethods.getCanvasSize(context.presentationStyle),
8
+ textSlideStyle: MulmoPresentationStyleMethods.getTextSlideStyle(context.presentationStyle, beat),
9
+ };
10
+ };
11
+ export const imagePreprocessAgent = async (namedInputs) => {
12
+ const { context, beat, index, imageRefs } = namedInputs;
13
+ const imagePath = getBeatPngImagePath(context, index);
14
+ if (beat.htmlPrompt) {
15
+ const htmlPrompt = MulmoBeatMethods.getHtmlPrompt(beat);
16
+ const htmlPath = imagePath.replace(/\.[^/.]+$/, ".html");
17
+ return { imagePath, htmlPrompt, htmlPath, htmlImageSystemPrompt: htmlImageSystemPrompt(context.presentationStyle.canvasSize) };
18
+ }
19
+ const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle, beat);
20
+ const returnValue = {
21
+ imageParams: imageAgentInfo.imageParams,
22
+ movieFile: beat.moviePrompt ? getBeatMoviePath(context, index) : undefined,
23
+ };
24
+ if (beat.image) {
25
+ const plugin = MulmoBeatMethods.getPlugin(beat);
26
+ const pluginPath = plugin.path({ beat, context, imagePath, ...htmlStyle(context, beat) });
27
+ // undefined prompt indicates that image generation is not needed
28
+ return { ...returnValue, imagePath: pluginPath, referenceImageForMovie: pluginPath };
29
+ }
30
+ if (beat.moviePrompt && !beat.imagePrompt) {
31
+ return { ...returnValue, imagePath, imageFromMovie: true }; // no image prompt, only movie prompt
32
+ }
33
+ // referenceImages for "edit_image", openai agent.
34
+ const referenceImages = MulmoBeatMethods.getImageReferenceForImageGenerator(beat, imageRefs);
35
+ const prompt = imagePrompt(beat, imageAgentInfo.imageParams.style);
36
+ return { ...returnValue, imagePath, referenceImageForMovie: imagePath, imageAgentInfo, prompt, referenceImages };
37
+ };
38
+ export const imagePluginAgent = async (namedInputs) => {
39
+ const { context, beat, index } = namedInputs;
40
+ const imagePath = getBeatPngImagePath(context, index);
41
+ const plugin = MulmoBeatMethods.getPlugin(beat);
42
+ try {
43
+ MulmoStudioContextMethods.setBeatSessionState(context, "image", index, true);
44
+ const processorParams = { beat, context, imagePath, ...htmlStyle(context, beat) };
45
+ await plugin.process(processorParams);
46
+ MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
47
+ }
48
+ catch (error) {
49
+ MulmoStudioContextMethods.setBeatSessionState(context, "image", index, false);
50
+ throw error;
51
+ }
52
+ };
53
+ export const htmlImageGeneratorAgent = async (namedInputs) => {
54
+ const { file, canvasSize, htmlText } = namedInputs;
55
+ await renderHTMLToImage(htmlText, file, canvasSize.width, canvasSize.height);
56
+ };
@@ -0,0 +1,9 @@
1
+ import { MulmoStudioContext, MulmoImagePromptMedia } from "../types/index.js";
2
+ export declare const generateReferenceImage: (inputs: {
3
+ context: MulmoStudioContext;
4
+ key: string;
5
+ index: number;
6
+ image: MulmoImagePromptMedia;
7
+ force?: boolean;
8
+ }) => Promise<string>;
9
+ export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
@@ -0,0 +1,79 @@
1
+ import fs from "fs";
2
+ import { GraphAI } from "graphai";
3
+ import { getReferenceImagePath } from "../utils/file.js";
4
+ import { getExtention } from "../utils/utils.js";
5
+ import { graphOption } from "./images.js";
6
+ import { MulmoPresentationStyleMethods, MulmoStudioContextMethods } from "../methods/index.js";
7
+ import { imageGoogleAgent, imageOpenaiAgent } from "../agents/index.js";
8
+ // public api
9
+ // Application may call this function directly to generate reference image.
10
+ export const generateReferenceImage = async (inputs) => {
11
+ const { context, key, index, image, force } = inputs;
12
+ const imagePath = getReferenceImagePath(context, key, "png");
13
+ // generate image
14
+ const imageAgentInfo = MulmoPresentationStyleMethods.getImageAgentInfo(context.presentationStyle);
15
+ const prompt = `${image.prompt}\n${imageAgentInfo.imageParams.style || ""}`;
16
+ const image_graph_data = {
17
+ version: 0.5,
18
+ nodes: {
19
+ imageGenerator: {
20
+ agent: imageAgentInfo.agent,
21
+ retry: 2,
22
+ inputs: {
23
+ prompt,
24
+ cache: {
25
+ force: [context.force, force ?? false],
26
+ file: imagePath,
27
+ index,
28
+ mulmoContext: context,
29
+ sessionType: "imageReference",
30
+ },
31
+ },
32
+ params: {
33
+ model: imageAgentInfo.imageParams.model,
34
+ canvasSize: context.presentationStyle.canvasSize,
35
+ },
36
+ },
37
+ },
38
+ };
39
+ const options = await graphOption(context);
40
+ const graph = new GraphAI(image_graph_data, { imageGoogleAgent, imageOpenaiAgent }, options);
41
+ await graph.run();
42
+ return imagePath;
43
+ };
44
+ const downLoadImage = async (context, key, url) => {
45
+ const response = await fetch(url);
46
+ if (!response.ok) {
47
+ throw new Error(`Failed to download image: ${url}`);
48
+ }
49
+ const buffer = Buffer.from(await response.arrayBuffer());
50
+ // Detect file extension from Content-Type header or URL
51
+ const extension = getExtention(response.headers.get("content-type"), url);
52
+ const imagePath = getReferenceImagePath(context, key, extension);
53
+ await fs.promises.writeFile(imagePath, buffer);
54
+ return imagePath;
55
+ };
56
+ export const getImageRefs = async (context) => {
57
+ const images = context.presentationStyle.imageParams?.images;
58
+ if (!images) {
59
+ return {};
60
+ }
61
+ const imageRefs = {};
62
+ await Promise.all(Object.keys(images)
63
+ .sort()
64
+ .map(async (key, index) => {
65
+ const image = images[key];
66
+ if (image.type === "imagePrompt") {
67
+ imageRefs[key] = await generateReferenceImage({ context, key, index, image, force: false });
68
+ }
69
+ else if (image.type === "image") {
70
+ if (image.source.kind === "path") {
71
+ imageRefs[key] = MulmoStudioContextMethods.resolveAssetPath(context, image.source.path);
72
+ }
73
+ else if (image.source.kind === "url") {
74
+ imageRefs[key] = await downLoadImage(context, key, image.source.url);
75
+ }
76
+ }
77
+ }));
78
+ return imageRefs;
79
+ };
@@ -1,108 +1,12 @@
1
- import type { CallbackFunction } from "graphai";
2
- import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
3
- export declare const imagePreprocessAgent: (namedInputs: {
4
- context: MulmoStudioContext;
5
- beat: MulmoBeat;
1
+ import type { GraphOptions, CallbackFunction } from "graphai";
2
+ import { MulmoStudioContext } from "../types/index.js";
3
+ export declare const graphOption: (context: MulmoStudioContext, settings?: Record<string, string>) => Promise<GraphOptions>;
4
+ export declare const images: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
5
+ export declare const generateBeatImage: (inputs: {
6
6
  index: number;
7
- imageRefs: Record<string, string>;
8
- }) => Promise<{
9
- imageParams: {
10
- provider: "openai" | "google";
11
- style?: string | undefined;
12
- model?: string | undefined;
13
- moderation?: string | undefined;
14
- images?: Record<string, {
15
- type: "image";
16
- source: {
17
- url: string;
18
- kind: "url";
19
- } | {
20
- kind: "base64";
21
- data: string;
22
- } | {
23
- text: string;
24
- kind: "text";
25
- } | {
26
- path: string;
27
- kind: "path";
28
- };
29
- }> | undefined;
30
- };
31
- movieFile: string | undefined;
32
- imagePath: string | undefined;
33
- referenceImage: string | undefined;
34
- htmlPrompt?: undefined;
35
- htmlImageSystemPrompt?: undefined;
36
- } | {
37
- imagePath: string;
38
- htmlPrompt: string;
39
- htmlImageSystemPrompt: string[];
40
- } | {
41
- imagePath: string;
42
- images: string[];
43
- imageFromMovie: boolean;
44
- imageParams: {
45
- provider: "openai" | "google";
46
- style?: string | undefined;
47
- model?: string | undefined;
48
- moderation?: string | undefined;
49
- images?: Record<string, {
50
- type: "image";
51
- source: {
52
- url: string;
53
- kind: "url";
54
- } | {
55
- kind: "base64";
56
- data: string;
57
- } | {
58
- text: string;
59
- kind: "text";
60
- } | {
61
- path: string;
62
- kind: "path";
63
- };
64
- }> | undefined;
65
- };
66
- movieFile: string | undefined;
67
- htmlPrompt?: undefined;
68
- htmlImageSystemPrompt?: undefined;
69
- } | {
70
- images: string[];
71
- imageParams: {
72
- provider: "openai" | "google";
73
- style?: string | undefined;
74
- model?: string | undefined;
75
- moderation?: string | undefined;
76
- images?: Record<string, {
77
- type: "image";
78
- source: {
79
- url: string;
80
- kind: "url";
81
- } | {
82
- kind: "base64";
83
- data: string;
84
- } | {
85
- text: string;
86
- kind: "text";
87
- } | {
88
- path: string;
89
- kind: "path";
90
- };
91
- }> | undefined;
92
- };
93
- movieFile: string | undefined;
94
- imageAgentInfo: import("../types/type.js").Text2ImageAgentInfo;
95
- imagePath: string;
96
- referenceImage: string;
97
- prompt: string;
98
- htmlPrompt?: undefined;
99
- htmlImageSystemPrompt?: undefined;
100
- }>;
101
- export declare const imagePluginAgent: (namedInputs: {
102
7
  context: MulmoStudioContext;
103
- beat: MulmoBeat;
104
- index: number;
8
+ settings?: Record<string, string>;
9
+ callbacks?: CallbackFunction[];
10
+ forceMovie?: boolean;
11
+ forceImage?: boolean;
105
12
  }) => Promise<void>;
106
- export declare const getImageRefs: (context: MulmoStudioContext) => Promise<Record<string, string>>;
107
- export declare const images: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
108
- export declare const generateBeatImage: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;