mulmocast 1.1.4 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/lib/actions/audio.d.ts +0 -1
  2. package/lib/actions/audio.js +8 -12
  3. package/lib/actions/images.js +1 -0
  4. package/lib/actions/movie.js +1 -3
  5. package/lib/agents/image_openai_agent.js +4 -1
  6. package/lib/methods/mulmo_presentation_style.d.ts +2 -3
  7. package/lib/methods/mulmo_presentation_style.js +14 -8
  8. package/lib/types/agent.d.ts +3 -0
  9. package/lib/types/schema.d.ts +704 -0
  10. package/lib/types/schema.js +5 -1
  11. package/lib/utils/context.d.ts +25 -0
  12. package/lib/utils/file.d.ts +1 -1
  13. package/lib/utils/file.js +5 -2
  14. package/lib/utils/preprocess.d.ts +13 -0
  15. package/package.json +2 -1
  16. package/scripts/templates/image_prompt_only_template.ts +95 -0
  17. package/scripts/test/gpt.json +32 -0
  18. package/scripts/test/mulmo_story.json +11 -0
  19. package/scripts/test/test.json +64 -0
  20. package/scripts/test/test1.json +40 -0
  21. package/scripts/test/test2.json +66 -0
  22. package/scripts/test/test_audio.json +151 -0
  23. package/scripts/test/test_audio_instructions.json +69 -0
  24. package/scripts/test/test_beats.json +58 -0
  25. package/scripts/test/test_captions.json +52 -0
  26. package/scripts/test/test_elevenlabs_models.json +193 -0
  27. package/scripts/test/test_en.json +29 -0
  28. package/scripts/test/test_hello.json +17 -0
  29. package/scripts/test/test_hello_google.json +25 -0
  30. package/scripts/test/test_html.json +66 -0
  31. package/scripts/test/test_image_refs.json +49 -0
  32. package/scripts/test/test_images.json +48 -0
  33. package/scripts/test/test_lang.json +31 -0
  34. package/scripts/test/test_layout.json +152 -0
  35. package/scripts/test/test_lipsync.json +53 -0
  36. package/scripts/test/test_loop.json +34 -0
  37. package/scripts/test/test_media.json +244 -0
  38. package/scripts/test/test_mixed_providers.json +91 -0
  39. package/scripts/test/test_movie.json +39 -0
  40. package/scripts/test/test_no_audio.json +252 -0
  41. package/scripts/test/test_no_audio_with_credit.json +253 -0
  42. package/scripts/test/test_order.json +68 -0
  43. package/scripts/test/test_order_portrait.json +72 -0
  44. package/scripts/test/test_replicate.json +126 -0
  45. package/scripts/test/test_slideout_left_no_audio.json +45 -0
  46. package/scripts/test/test_sound_effect.json +41 -0
  47. package/scripts/test/test_spillover.json +116 -0
  48. package/scripts/test/test_transition.json +55 -0
  49. package/scripts/test/test_transition_no_audio.json +45 -0
  50. package/scripts/test/test_video_speed.json +80 -0
  51. package/scripts/test/test_voice_over.json +104 -0
  52. package/scripts/test/test_voices.json +54 -0
@@ -2,6 +2,5 @@ import "dotenv/config";
2
2
  import type { CallbackFunction } from "graphai";
3
3
  import { MulmoStudioContext, MulmoBeat } from "../types/index.js";
4
4
  export declare const getBeatAudioPath: (text: string, context: MulmoStudioContext, beat: MulmoBeat, lang?: string) => string | undefined;
5
- export declare const audioFilePath: (context: MulmoStudioContext) => string;
6
5
  export declare const generateBeatAudio: (index: number, context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<void>;
7
6
  export declare const audio: (context: MulmoStudioContext, settings?: Record<string, string>, callbacks?: CallbackFunction[]) => Promise<MulmoStudioContext>;
@@ -9,7 +9,7 @@ import ttsGoogleAgent from "../agents/tts_google_agent.js";
9
9
  import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
10
10
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
11
11
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
12
- import { text2SpeechProviderSchema, } from "../types/index.js";
12
+ import { text2SpeechProviderSchema } from "../types/index.js";
13
13
  import { fileCacheAgentFilter } from "../utils/filters.js";
14
14
  import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
15
15
  import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
@@ -30,15 +30,15 @@ const getAudioPath = (context, beat, audioFile) => {
30
30
  }
31
31
  return audioFile;
32
32
  };
33
- const getAudioParam = (presentationStyle, beat) => {
34
- const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
33
+ const getAudioParam = (context, beat) => {
34
+ const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
35
35
  const speechOptions = { ...speaker.speechOptions, ...beat.speechOptions };
36
36
  const provider = text2SpeechProviderSchema.parse(speaker.provider);
37
37
  return { voiceId: speaker.voiceId, provider, speechOptions, model: speaker.model };
38
38
  };
39
39
  export const getBeatAudioPath = (text, context, beat, lang) => {
40
40
  const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
41
- const { voiceId, provider, speechOptions, model } = getAudioParam(context.presentationStyle, beat);
41
+ const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
42
42
  const hash_string = [text, voiceId, speechOptions?.instruction ?? "", speechOptions?.speed ?? 1.0, provider, model ?? ""].join(":");
43
43
  const audioFileName = `${context.studio.filename}_${text2hash(hash_string)}`;
44
44
  const audioFile = getAudioFilePath(audioDirPath, context.studio.filename, audioFileName, lang);
@@ -46,9 +46,9 @@ export const getBeatAudioPath = (text, context, beat, lang) => {
46
46
  };
47
47
  const preprocessor = (namedInputs) => {
48
48
  const { beat, studioBeat, multiLingual, context } = namedInputs;
49
- const { lang, presentationStyle } = context;
49
+ const { lang } = context;
50
50
  const text = localizedText(beat, multiLingual, lang);
51
- const { voiceId, provider, speechOptions, model } = getAudioParam(presentationStyle, beat);
51
+ const { voiceId, provider, speechOptions, model } = getAudioParam(context, beat);
52
52
  const audioPath = getBeatAudioPath(text, context, beat, lang);
53
53
  studioBeat.audioFile = audioPath; // TODO: Passing by reference is difficult to maintain, so pass it using graphai inputs
54
54
  const needsTTS = !beat.audio && audioPath !== undefined;
@@ -174,11 +174,6 @@ const agentFilters = [
174
174
  nodeIds: ["tts"],
175
175
  },
176
176
  ];
177
- export const audioFilePath = (context) => {
178
- const fileName = MulmoStudioContextMethods.getFileName(context);
179
- const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
180
- return getAudioArtifactFilePath(outDirPath, fileName);
181
- };
182
177
  const getConcurrency = (context) => {
183
178
  // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
184
179
  const hasLimitedConcurrencyProvider = Object.values(context.presentationStyle.speechParams.speakers).some((speaker) => {
@@ -231,7 +226,7 @@ export const audio = async (context, settings, callbacks) => {
231
226
  const fileName = MulmoStudioContextMethods.getFileName(context);
232
227
  const audioDirPath = MulmoStudioContextMethods.getAudioDirPath(context);
233
228
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
234
- const audioArtifactFilePath = audioFilePath(context);
229
+ const audioArtifactFilePath = getAudioArtifactFilePath(context);
235
230
  const audioSegmentDirPath = resolveDirPath(audioDirPath, fileName);
236
231
  const audioCombinedFilePath = getAudioFilePath(audioDirPath, fileName, fileName, context.lang);
237
232
  const outputStudioFilePath = getOutputStudioFilePath(outDirPath, fileName);
@@ -253,6 +248,7 @@ export const audio = async (context, settings, callbacks) => {
253
248
  const result = await graph.run();
254
249
  writingMessage(audioCombinedFilePath);
255
250
  MulmoStudioContextMethods.setSessionState(context, "audio", false);
251
+ writingMessage(audioArtifactFilePath);
256
252
  return result.combineFiles;
257
253
  }
258
254
  catch (__error) {
@@ -135,6 +135,7 @@ const beat_graph_data = {
135
135
  model: ":preprocessor.imageParams.model",
136
136
  moderation: ":preprocessor.imageParams.moderation",
137
137
  canvasSize: ":context.presentationStyle.canvasSize",
138
+ quality: ":preprocessor.imageParams.quality",
138
139
  },
139
140
  },
140
141
  defaultValue: {},
@@ -246,9 +246,7 @@ export const movieFilePath = (context) => {
246
246
  export const movie = async (context) => {
247
247
  MulmoStudioContextMethods.setSessionState(context, "video", true);
248
248
  try {
249
- const fileName = MulmoStudioContextMethods.getFileName(context);
250
- const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
251
- const audioArtifactFilePath = getAudioArtifactFilePath(outDirPath, fileName);
249
+ const audioArtifactFilePath = getAudioArtifactFilePath(context);
252
250
  const outputVideoPath = movieFilePath(context);
253
251
  if (await createVideo(audioArtifactFilePath, outputVideoPath, context)) {
254
252
  writingMessage(outputVideoPath);
@@ -6,7 +6,7 @@ import { provider2ImageAgent } from "../utils/provider2agent.js";
6
6
  // https://platform.openai.com/docs/guides/image-generation
7
7
  export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
8
8
  const { prompt, referenceImages } = namedInputs;
9
- const { moderation, canvasSize } = params;
9
+ const { moderation, canvasSize, quality } = params;
10
10
  const { apiKey, baseURL } = { ...config };
11
11
  const model = params.model ?? provider2ImageAgent["openai"].defaultModel;
12
12
  const openai = new OpenAI({ apiKey, baseURL });
@@ -42,6 +42,9 @@ export const imageOpenaiAgent = async ({ namedInputs, params, config, }) => {
42
42
  };
43
43
  if (model === "gpt-image-1") {
44
44
  imageOptions.moderation = moderation || "auto";
45
+ if (quality) {
46
+ imageOptions.quality = quality;
47
+ }
45
48
  }
46
49
  const response = await (async () => {
47
50
  try {
@@ -1,12 +1,11 @@
1
1
  import "dotenv/config";
2
- import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider } from "../types/index.js";
2
+ import { MulmoCanvasDimension, MulmoBeat, Text2SpeechProvider, Text2ImageAgentInfo, Text2HtmlAgentInfo, BeatMediaType, MulmoPresentationStyle, SpeakerData, Text2ImageProvider, MulmoStudioContext } from "../types/index.js";
3
3
  export declare const MulmoPresentationStyleMethods: {
4
4
  getCanvasSize(presentationStyle: MulmoPresentationStyle): MulmoCanvasDimension;
5
5
  getAllSpeechProviders(presentationStyle: MulmoPresentationStyle): Set<Text2SpeechProvider>;
6
6
  getTextSlideStyle(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string;
7
7
  getDefaultSpeaker(presentationStyle: MulmoPresentationStyle): string;
8
- getSpeaker(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): SpeakerData;
9
- getTTSModel(presentationStyle: MulmoPresentationStyle, beat: MulmoBeat): string | undefined;
8
+ getSpeaker(context: MulmoStudioContext, beat: MulmoBeat): SpeakerData;
10
9
  getText2ImageProvider(provider: Text2ImageProvider | undefined): Text2ImageProvider;
11
10
  getImageAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): Text2ImageAgentInfo;
12
11
  getMovieAgentInfo(presentationStyle: MulmoPresentationStyle, beat?: MulmoBeat): {
@@ -46,18 +46,24 @@ export const MulmoPresentationStyleMethods = {
46
46
  }
47
47
  return keys[0];
48
48
  },
49
- getSpeaker(presentationStyle, beat) {
50
- userAssert(!!presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
51
- const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(presentationStyle);
52
- userAssert(!!speakerId, "beat.speaker and default speaker is not set");
53
- const speaker = presentationStyle.speechParams.speakers[speakerId];
49
+ getSpeaker(context, beat) {
50
+ userAssert(!!context.presentationStyle?.speechParams?.speakers, "presentationStyle.speechParams.speakers is not set!!");
51
+ const speakerId = beat?.speaker ?? MulmoPresentationStyleMethods.getDefaultSpeaker(context.presentationStyle);
52
+ const speaker = context.presentationStyle.speechParams.speakers[speakerId];
54
53
  userAssert(!!speaker, `speaker is not set: speaker "${speakerId}"`);
54
+ // Check if the speaker has a language-specific version
55
+ const lang = context.lang ?? context.studio.script.lang;
56
+ if (speaker.lang && lang && speaker.lang[lang]) {
57
+ return speaker.lang[lang];
58
+ }
55
59
  return speaker;
56
60
  },
57
- getTTSModel(presentationStyle, beat) {
58
- const speaker = MulmoPresentationStyleMethods.getSpeaker(presentationStyle, beat);
59
- return speaker.model;
61
+ /* NOTE: This method is not used.
62
+ getTTSModel(context: MulmoStudioContext, beat: MulmoBeat): string | undefined {
63
+ const speaker = MulmoPresentationStyleMethods.getSpeaker(context, beat);
64
+ return speaker.model;
60
65
  },
66
+ */
61
67
  getText2ImageProvider(provider) {
62
68
  return text2ImageProviderSchema.parse(provider);
63
69
  },
@@ -1,11 +1,13 @@
1
1
  export type OpenAIImageSize = "1792x1024" | "1024x1792" | "1024x1024" | "1536x1024" | "1024x1536";
2
2
  export type OpenAIImageModeration = "low" | "auto";
3
+ export type OpenAIImageQuality = "low" | "medium" | "high" | "auto";
3
4
  export type OpenAIImageOptions = {
4
5
  model: string;
5
6
  prompt: string;
6
7
  n: number;
7
8
  size: OpenAIImageSize;
8
9
  moderation?: OpenAIImageModeration;
10
+ quality?: OpenAIImageQuality;
9
11
  };
10
12
  export type AgentBufferResult = {
11
13
  buffer: Buffer;
@@ -35,6 +37,7 @@ export type ImageAgentParams = {
35
37
  };
36
38
  export type OpenAIImageAgentParams = ImageAgentParams & {
37
39
  moderation: OpenAIImageModeration | null | undefined;
40
+ quality?: OpenAIImageQuality;
38
41
  };
39
42
  export type OpenAIImageAgentConfig = {
40
43
  baseURL?: string;