mulmocast 1.2.17 → 1.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -190,7 +190,8 @@ const beat_graph_data = {
190
190
  return { hasMovieAudio: true };
191
191
  }
192
192
  const sourceFile = namedInputs.movieFile || namedInputs.imageFile;
193
- if (!sourceFile || !fs.existsSync(sourceFile)) {
193
+ if (!sourceFile) {
194
+ // no need to check if the file exists (ffmpegGetMediaDuration will check it if it is local file)
194
195
  return { hasMovieAudio: false };
195
196
  }
196
197
  const { hasAudio } = await ffmpegGetMediaDuration(sourceFile);
@@ -68,10 +68,9 @@ const beatGraph = {
68
68
  // for cache
69
69
  multiLingual: {
70
70
  agent: (namedInputs) => {
71
- const { multiLinguals, beatIndex, text, beat } = namedInputs;
72
- const key = beatId(beat?.id, beatIndex);
71
+ const { multiLinguals, beatIndex, text } = namedInputs;
73
72
  const cacheKey = hashSHA256(text ?? "");
74
- const multiLingual = multiLinguals?.[key];
73
+ const multiLingual = multiLinguals?.[beatIndex];
75
74
  if (!multiLingual) {
76
75
  return { cacheKey, multiLingualTexts: {} };
77
76
  }
@@ -87,7 +86,6 @@ const beatGraph = {
87
86
  },
88
87
  inputs: {
89
88
  text: ":beat.text",
90
- beat: ":beat",
91
89
  beatIndex: ":__mapIndex",
92
90
  multiLinguals: ":context.multiLingual",
93
91
  },
@@ -199,14 +197,17 @@ const localizedTextCacheAgentFilter = async (context, next) => {
199
197
  }
200
198
  // same language
201
199
  if (targetLang === lang) {
200
+ GraphAILogger.log(`translate: ${beatIndex} same lang`);
202
201
  return { text: beat.text };
203
202
  }
204
203
  // The original text is unchanged and the target language text is present
205
204
  if (multiLingual.cacheKey === multiLingual.multiLingualTexts[targetLang]?.cacheKey) {
205
+ GraphAILogger.log(`translate: ${beatIndex} cache hit`);
206
206
  return { text: multiLingual.multiLingualTexts[targetLang].text };
207
207
  }
208
208
  try {
209
209
  MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, beat.id, true);
210
+ GraphAILogger.log(`translate: ${beatIndex} run`);
210
211
  return await next(context);
211
212
  }
212
213
  finally {
@@ -13,8 +13,9 @@ import ttsGoogleAgent from "./tts_google_agent.js";
13
13
  import validateSchemaAgent from "./validate_schema_agent.js";
14
14
  import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
15
15
  import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
16
+ import puppeteerCrawlerAgent from "./puppeteer_crawler_agent.js";
16
17
  import { browserlessAgent } from "@graphai/browserless_agent";
17
18
  import { textInputAgent } from "@graphai/input_agents";
18
19
  import { openAIAgent } from "@graphai/openai_agent";
19
20
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
20
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
21
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
@@ -13,9 +13,10 @@ import ttsGoogleAgent from "./tts_google_agent.js";
13
13
  import validateSchemaAgent from "./validate_schema_agent.js";
14
14
  import soundEffectReplicateAgent from "./sound_effect_replicate_agent.js";
15
15
  import lipSyncReplicateAgent from "./lipsync_replicate_agent.js";
16
+ import puppeteerCrawlerAgent from "./puppeteer_crawler_agent.js";
16
17
  import { browserlessAgent } from "@graphai/browserless_agent";
17
18
  import { textInputAgent } from "@graphai/input_agents";
18
19
  import { openAIAgent } from "@graphai/openai_agent";
19
20
  // import * as vanilla from "@graphai/vanilla";
20
21
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
21
- export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, };
22
+ export { openAIAgent, fileWriteAgent, browserlessAgent, textInputAgent, addBGMAgent, combineAudioFilesAgent, imageGenAIAgent, imageOpenaiAgent, tavilySearchAgent, movieGenAIAgent, movieReplicateAgent, mediaMockAgent, ttsElevenlabsAgent, ttsNijivoiceAgent, ttsOpenaiAgent, ttsGoogleAgent, validateSchemaAgent, soundEffectReplicateAgent, lipSyncReplicateAgent, puppeteerCrawlerAgent, };
@@ -1,6 +1,6 @@
1
1
  import { MulmoBeat } from "../types/index.js";
2
2
  export declare const MulmoBeatMethods: {
3
3
  getHtmlPrompt(beat: MulmoBeat): string | undefined;
4
- getPlugin(beat: MulmoBeat): typeof import("../utils/image_plugins/text_slide.js") | typeof import("../utils/image_plugins/markdown.js") | typeof import("../utils/image_plugins/chart.js") | typeof import("../utils/image_plugins/mermaid.js") | typeof import("../utils/image_plugins/html_tailwind.js") | typeof import("../utils/image_plugins/image.js") | typeof import("../utils/image_plugins/movie.js") | typeof import("../utils/image_plugins/beat.js") | typeof import("../utils/image_plugins/voice_over.js");
4
+ getPlugin(beat: MulmoBeat): typeof import("../utils/image_plugins/text_slide.js") | typeof import("../utils/image_plugins/markdown.js") | typeof import("../utils/image_plugins/chart.js") | typeof import("../utils/image_plugins/mermaid.js") | typeof import("../utils/image_plugins/html_tailwind.js") | typeof import("../utils/image_plugins/image.js") | typeof import("../utils/image_plugins/movie.js") | typeof import("../utils/image_plugins/beat.js") | typeof import("../utils/image_plugins/voice_over.js") | typeof import("../utils/image_plugins/vision.js");
5
5
  getImageReferenceForImageGenerator(beat: MulmoBeat, imageRefs: Record<string, string>): string[];
6
6
  };
@@ -465,6 +465,19 @@ export declare const mulmoVoiceOverMediaSchema: z.ZodObject<{
465
465
  type: "voice_over";
466
466
  startAt?: number | undefined;
467
467
  }>;
468
+ export declare const mulmoVisionMediaSchema: z.ZodObject<{
469
+ type: z.ZodLiteral<"vision">;
470
+ name: z.ZodString;
471
+ data: z.ZodRecord<z.ZodString, z.ZodAny>;
472
+ }, "strict", z.ZodTypeAny, {
473
+ type: "vision";
474
+ data: Record<string, any>;
475
+ name: string;
476
+ }, {
477
+ type: "vision";
478
+ data: Record<string, any>;
479
+ name: string;
480
+ }>;
468
481
  export declare const mulmoImageAssetSchema: z.ZodUnion<[z.ZodObject<{
469
482
  type: z.ZodLiteral<"markdown">;
470
483
  markdown: z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>;
@@ -902,6 +915,18 @@ export declare const mulmoImageAssetSchema: z.ZodUnion<[z.ZodObject<{
902
915
  }, {
903
916
  type: "voice_over";
904
917
  startAt?: number | undefined;
918
+ }>, z.ZodObject<{
919
+ type: z.ZodLiteral<"vision">;
920
+ name: z.ZodString;
921
+ data: z.ZodRecord<z.ZodString, z.ZodAny>;
922
+ }, "strict", z.ZodTypeAny, {
923
+ type: "vision";
924
+ data: Record<string, any>;
925
+ name: string;
926
+ }, {
927
+ type: "vision";
928
+ data: Record<string, any>;
929
+ name: string;
905
930
  }>]>;
906
931
  export declare const mulmoAudioAssetSchema: z.ZodUnion<[z.ZodObject<{
907
932
  type: z.ZodLiteral<"audio">;
@@ -1830,6 +1855,18 @@ export declare const mulmoBeatSchema: z.ZodObject<{
1830
1855
  }, {
1831
1856
  type: "voice_over";
1832
1857
  startAt?: number | undefined;
1858
+ }>, z.ZodObject<{
1859
+ type: z.ZodLiteral<"vision">;
1860
+ name: z.ZodString;
1861
+ data: z.ZodRecord<z.ZodString, z.ZodAny>;
1862
+ }, "strict", z.ZodTypeAny, {
1863
+ type: "vision";
1864
+ data: Record<string, any>;
1865
+ name: string;
1866
+ }, {
1867
+ type: "vision";
1868
+ data: Record<string, any>;
1869
+ name: string;
1833
1870
  }>]>>;
1834
1871
  audio: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
1835
1872
  type: z.ZodLiteral<"audio">;
@@ -2263,6 +2300,10 @@ export declare const mulmoBeatSchema: z.ZodObject<{
2263
2300
  } | {
2264
2301
  type: "voice_over";
2265
2302
  startAt?: number | undefined;
2303
+ } | {
2304
+ type: "vision";
2305
+ data: Record<string, any>;
2306
+ name: string;
2266
2307
  } | undefined;
2267
2308
  audio?: {
2268
2309
  type: "audio";
@@ -2462,6 +2503,10 @@ export declare const mulmoBeatSchema: z.ZodObject<{
2462
2503
  } | {
2463
2504
  type: "voice_over";
2464
2505
  startAt?: number | undefined;
2506
+ } | {
2507
+ type: "vision";
2508
+ data: Record<string, any>;
2509
+ name: string;
2465
2510
  } | undefined;
2466
2511
  audio?: {
2467
2512
  type: "audio";
@@ -4310,6 +4355,18 @@ export declare const mulmoScriptSchema: z.ZodObject<{
4310
4355
  }, {
4311
4356
  type: "voice_over";
4312
4357
  startAt?: number | undefined;
4358
+ }>, z.ZodObject<{
4359
+ type: z.ZodLiteral<"vision">;
4360
+ name: z.ZodString;
4361
+ data: z.ZodRecord<z.ZodString, z.ZodAny>;
4362
+ }, "strict", z.ZodTypeAny, {
4363
+ type: "vision";
4364
+ data: Record<string, any>;
4365
+ name: string;
4366
+ }, {
4367
+ type: "vision";
4368
+ data: Record<string, any>;
4369
+ name: string;
4313
4370
  }>]>>;
4314
4371
  audio: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
4315
4372
  type: z.ZodLiteral<"audio">;
@@ -4743,6 +4800,10 @@ export declare const mulmoScriptSchema: z.ZodObject<{
4743
4800
  } | {
4744
4801
  type: "voice_over";
4745
4802
  startAt?: number | undefined;
4803
+ } | {
4804
+ type: "vision";
4805
+ data: Record<string, any>;
4806
+ name: string;
4746
4807
  } | undefined;
4747
4808
  audio?: {
4748
4809
  type: "audio";
@@ -4942,6 +5003,10 @@ export declare const mulmoScriptSchema: z.ZodObject<{
4942
5003
  } | {
4943
5004
  type: "voice_over";
4944
5005
  startAt?: number | undefined;
5006
+ } | {
5007
+ type: "vision";
5008
+ data: Record<string, any>;
5009
+ name: string;
4945
5010
  } | undefined;
4946
5011
  audio?: {
4947
5012
  type: "audio";
@@ -5243,6 +5308,10 @@ export declare const mulmoScriptSchema: z.ZodObject<{
5243
5308
  } | {
5244
5309
  type: "voice_over";
5245
5310
  startAt?: number | undefined;
5311
+ } | {
5312
+ type: "vision";
5313
+ data: Record<string, any>;
5314
+ name: string;
5246
5315
  } | undefined;
5247
5316
  audio?: {
5248
5317
  type: "audio";
@@ -5474,6 +5543,10 @@ export declare const mulmoScriptSchema: z.ZodObject<{
5474
5543
  } | {
5475
5544
  type: "voice_over";
5476
5545
  startAt?: number | undefined;
5546
+ } | {
5547
+ type: "vision";
5548
+ data: Record<string, any>;
5549
+ name: string;
5477
5550
  } | undefined;
5478
5551
  audio?: {
5479
5552
  type: "audio";
@@ -6983,6 +7056,18 @@ export declare const mulmoStudioSchema: z.ZodObject<{
6983
7056
  }, {
6984
7057
  type: "voice_over";
6985
7058
  startAt?: number | undefined;
7059
+ }>, z.ZodObject<{
7060
+ type: z.ZodLiteral<"vision">;
7061
+ name: z.ZodString;
7062
+ data: z.ZodRecord<z.ZodString, z.ZodAny>;
7063
+ }, "strict", z.ZodTypeAny, {
7064
+ type: "vision";
7065
+ data: Record<string, any>;
7066
+ name: string;
7067
+ }, {
7068
+ type: "vision";
7069
+ data: Record<string, any>;
7070
+ name: string;
6986
7071
  }>]>>;
6987
7072
  audio: z.ZodOptional<z.ZodUnion<[z.ZodObject<{
6988
7073
  type: z.ZodLiteral<"audio">;
@@ -7416,6 +7501,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
7416
7501
  } | {
7417
7502
  type: "voice_over";
7418
7503
  startAt?: number | undefined;
7504
+ } | {
7505
+ type: "vision";
7506
+ data: Record<string, any>;
7507
+ name: string;
7419
7508
  } | undefined;
7420
7509
  audio?: {
7421
7510
  type: "audio";
@@ -7615,6 +7704,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
7615
7704
  } | {
7616
7705
  type: "voice_over";
7617
7706
  startAt?: number | undefined;
7707
+ } | {
7708
+ type: "vision";
7709
+ data: Record<string, any>;
7710
+ name: string;
7618
7711
  } | undefined;
7619
7712
  audio?: {
7620
7713
  type: "audio";
@@ -7916,6 +8009,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
7916
8009
  } | {
7917
8010
  type: "voice_over";
7918
8011
  startAt?: number | undefined;
8012
+ } | {
8013
+ type: "vision";
8014
+ data: Record<string, any>;
8015
+ name: string;
7919
8016
  } | undefined;
7920
8017
  audio?: {
7921
8018
  type: "audio";
@@ -8147,6 +8244,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
8147
8244
  } | {
8148
8245
  type: "voice_over";
8149
8246
  startAt?: number | undefined;
8247
+ } | {
8248
+ type: "vision";
8249
+ data: Record<string, any>;
8250
+ name: string;
8150
8251
  } | undefined;
8151
8252
  audio?: {
8152
8253
  type: "audio";
@@ -8631,6 +8732,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
8631
8732
  } | {
8632
8733
  type: "voice_over";
8633
8734
  startAt?: number | undefined;
8735
+ } | {
8736
+ type: "vision";
8737
+ data: Record<string, any>;
8738
+ name: string;
8634
8739
  } | undefined;
8635
8740
  audio?: {
8636
8741
  type: "audio";
@@ -8882,6 +8987,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
8882
8987
  } | {
8883
8988
  type: "voice_over";
8884
8989
  startAt?: number | undefined;
8990
+ } | {
8991
+ type: "vision";
8992
+ data: Record<string, any>;
8993
+ name: string;
8885
8994
  } | undefined;
8886
8995
  audio?: {
8887
8996
  type: "audio";
@@ -130,6 +130,13 @@ export const mulmoVoiceOverMediaSchema = z
130
130
  startAt: z.number().optional().describe("The time to start the voice over the video in seconds."),
131
131
  })
132
132
  .strict();
133
+ export const mulmoVisionMediaSchema = z
134
+ .object({
135
+ type: z.literal("vision"),
136
+ name: z.string(),
137
+ data: z.record(z.string(), z.any()),
138
+ })
139
+ .strict();
133
140
  export const mulmoImageAssetSchema = z.union([
134
141
  mulmoMarkdownMediaSchema,
135
142
  mulmoWebMediaSchema,
@@ -143,6 +150,7 @@ export const mulmoImageAssetSchema = z.union([
143
150
  mulmoHtmlTailwindMediaSchema,
144
151
  mulmoBeatReferenceMediaSchema,
145
152
  mulmoVoiceOverMediaSchema,
153
+ mulmoVisionMediaSchema,
146
154
  ]);
147
155
  const mulmoAudioMediaSchema = z
148
156
  .object({
@@ -219,6 +219,10 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
219
219
  } | {
220
220
  type: "voice_over";
221
221
  startAt?: number | undefined;
222
+ } | {
223
+ type: "vision";
224
+ data: Record<string, any>;
225
+ name: string;
222
226
  } | undefined;
223
227
  audio?: {
224
228
  type: "audio";
@@ -567,6 +571,10 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
567
571
  } | {
568
572
  type: "voice_over";
569
573
  startAt?: number | undefined;
574
+ } | {
575
+ type: "vision";
576
+ data: Record<string, any>;
577
+ name: string;
570
578
  } | undefined;
571
579
  audio?: {
572
580
  type: "audio";
@@ -64,7 +64,9 @@ export const ffmpegGetMediaDuration = (filePath) => {
64
64
  return new Promise((resolve, reject) => {
65
65
  // Only check file existence for local paths, not URLs
66
66
  if (!filePath.startsWith("http://") && !filePath.startsWith("https://") && !fs.existsSync(filePath)) {
67
- reject(new Error(`File not found: ${filePath}`));
67
+ // NOTE: We don't reject here for scripts/test/test_hello_image.json, which uses mock image agent.
68
+ // reject(new Error(`File not found: ${filePath}`));
69
+ resolve({ duration: 0, hasAudio: false });
68
70
  return;
69
71
  }
70
72
  ffmpeg.ffprobe(filePath, (err, metadata) => {
@@ -7,4 +7,5 @@ import * as pluginImage from "./image.js";
7
7
  import * as pluginMovie from "./movie.js";
8
8
  import * as pluginBeat from "./beat.js";
9
9
  import * as pluginVoiceOver from "./voice_over.js";
10
- export declare const findImagePlugin: (imageType?: string) => typeof pluginTextSlide | typeof pluginMarkdown | typeof pluginChart | typeof pluginMermaid | typeof pluginHtmlTailwind | typeof pluginImage | typeof pluginMovie | typeof pluginBeat | typeof pluginVoiceOver | undefined;
10
+ import * as pluginVision from "./vision.js";
11
+ export declare const findImagePlugin: (imageType?: string) => typeof pluginTextSlide | typeof pluginMarkdown | typeof pluginChart | typeof pluginMermaid | typeof pluginHtmlTailwind | typeof pluginImage | typeof pluginMovie | typeof pluginBeat | typeof pluginVoiceOver | typeof pluginVision | undefined;
@@ -7,7 +7,19 @@ import * as pluginImage from "./image.js";
7
7
  import * as pluginMovie from "./movie.js";
8
8
  import * as pluginBeat from "./beat.js";
9
9
  import * as pluginVoiceOver from "./voice_over.js";
10
- const imagePlugins = [pluginTextSlide, pluginMarkdown, pluginImage, pluginChart, pluginMermaid, pluginMovie, pluginHtmlTailwind, pluginBeat, pluginVoiceOver];
10
+ import * as pluginVision from "./vision.js";
11
+ const imagePlugins = [
12
+ pluginTextSlide,
13
+ pluginMarkdown,
14
+ pluginImage,
15
+ pluginChart,
16
+ pluginMermaid,
17
+ pluginMovie,
18
+ pluginHtmlTailwind,
19
+ pluginBeat,
20
+ pluginVoiceOver,
21
+ pluginVision,
22
+ ];
11
23
  export const findImagePlugin = (imageType) => {
12
24
  return imagePlugins.find((plugin) => plugin.imageType === imageType);
13
25
  };
@@ -0,0 +1,4 @@
1
+ import { ImageProcessorParams } from "../../types/index.js";
2
+ export declare const imageType = "vision";
3
+ export declare const process: (params: ImageProcessorParams) => Promise<string | undefined>;
4
+ export declare const path: (params: ImageProcessorParams) => string;
@@ -0,0 +1,20 @@
1
+ import { parrotingImagePath } from "./utils.js";
2
+ import { htmlPlugin } from "mulmocast-vision";
3
+ export const imageType = "vision";
4
+ const toCreateName = (str) => {
5
+ return "create" + str.charAt(0).toUpperCase() + str.slice(1);
6
+ };
7
+ const processVision = async (params) => {
8
+ const { beat, imagePath } = params;
9
+ if (!beat?.image || beat.image.type !== imageType)
10
+ return;
11
+ const handler = new htmlPlugin({ outputDir: "" });
12
+ await handler[toCreateName(beat.image.name)](beat.image.data, {
13
+ name: beat.image.name,
14
+ imageFilePath: imagePath,
15
+ htmlFilePath: imagePath.replace(/\.png$/, ".html"),
16
+ });
17
+ return imagePath;
18
+ };
19
+ export const process = processVision;
20
+ export const path = parrotingImagePath;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "1.2.17",
3
+ "version": "1.2.19",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -87,6 +87,7 @@
87
87
  "graphai": "^2.0.14",
88
88
  "jsdom": "^26.1.0",
89
89
  "marked": "^16.2.0",
90
+ "mulmocast-vision": "^0.0.1",
90
91
  "ora": "^8.2.0",
91
92
  "puppeteer": "^24.17.0",
92
93
  "replicate": "^1.1.0",
@@ -0,0 +1,22 @@
1
+ {
2
+ "$mulmocast": {
3
+ "version": "1.1"
4
+ },
5
+ "lang": "en",
6
+ "title": "Media Test",
7
+ "beats": [
8
+ {
9
+ "id": "first",
10
+ "speaker": "Presenter",
11
+ "text": "This is a local image.",
12
+ "image": {
13
+ "type": "vision",
14
+ "name": "sectionDividerPage",
15
+ "data": {
16
+ "heading": "How AI Is Reshaping Referencing",
17
+ "subheading": "From sources to systems: reliability, traceability, and credit in the age of models"
18
+ }
19
+ }
20
+ }
21
+ ]
22
+ }
File without changes