mulmocast 1.1.4 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/lib/actions/audio.d.ts +0 -1
  2. package/lib/actions/audio.js +18 -13
  3. package/lib/actions/image_agents.d.ts +3 -12
  4. package/lib/actions/image_agents.js +12 -8
  5. package/lib/actions/images.js +3 -1
  6. package/lib/actions/movie.js +1 -3
  7. package/lib/actions/translate.js +13 -31
  8. package/lib/agents/image_openai_agent.js +4 -1
  9. package/lib/agents/lipsync_replicate_agent.js +10 -3
  10. package/lib/cli/commands/audio/handler.js +1 -1
  11. package/lib/cli/commands/image/handler.js +1 -1
  12. package/lib/cli/commands/movie/handler.js +1 -1
  13. package/lib/cli/commands/pdf/handler.js +1 -1
  14. package/lib/cli/helpers.d.ts +1 -4
  15. package/lib/cli/helpers.js +3 -2
  16. package/lib/mcp/server.js +1 -1
  17. package/lib/methods/mulmo_presentation_style.d.ts +5 -5
  18. package/lib/methods/mulmo_presentation_style.js +14 -8
  19. package/lib/methods/mulmo_script.js +4 -1
  20. package/lib/methods/mulmo_studio_context.d.ts +1 -0
  21. package/lib/methods/mulmo_studio_context.js +8 -0
  22. package/lib/types/agent.d.ts +4 -0
  23. package/lib/types/schema.d.ts +712 -8
  24. package/lib/types/schema.js +6 -2
  25. package/lib/types/type.d.ts +1 -1
  26. package/lib/utils/const.js +1 -1
  27. package/lib/utils/context.d.ts +401 -34
  28. package/lib/utils/context.js +95 -56
  29. package/lib/utils/file.d.ts +1 -1
  30. package/lib/utils/file.js +5 -2
  31. package/lib/utils/filters.d.ts +1 -0
  32. package/lib/utils/filters.js +8 -0
  33. package/lib/utils/preprocess.d.ts +15 -2
  34. package/lib/utils/preprocess.js +3 -3
  35. package/lib/utils/provider2agent.d.ts +3 -2
  36. package/lib/utils/provider2agent.js +20 -2
  37. package/lib/utils/string.d.ts +1 -1
  38. package/lib/utils/string.js +11 -8
  39. package/package.json +2 -1
  40. package/scripts/templates/image_refs.json +1 -0
  41. package/scripts/templates/voice_over.json +1 -0
  42. package/scripts/test/gpt.json +33 -0
  43. package/scripts/test/mulmo_story.json +11 -0
  44. package/scripts/test/test.json +64 -0
  45. package/scripts/test/test1.json +41 -0
  46. package/scripts/test/test2.json +66 -0
  47. package/scripts/test/test_audio.json +152 -0
  48. package/scripts/test/test_audio_instructions.json +70 -0
  49. package/scripts/test/test_beats.json +59 -0
  50. package/scripts/test/test_captions.json +53 -0
  51. package/scripts/test/test_elevenlabs_models.json +194 -0
  52. package/scripts/test/test_en.json +29 -0
  53. package/scripts/test/test_hello.json +18 -0
  54. package/scripts/test/test_hello_google.json +26 -0
  55. package/scripts/test/test_html.json +67 -0
  56. package/scripts/test/test_image_refs.json +50 -0
  57. package/scripts/test/test_images.json +49 -0
  58. package/scripts/test/test_lang.json +87 -0
  59. package/scripts/test/test_layout.json +153 -0
  60. package/scripts/test/test_lipsync.json +62 -0
  61. package/scripts/test/test_loop.json +35 -0
  62. package/scripts/test/test_media.json +245 -0
  63. package/scripts/test/test_mixed_providers.json +92 -0
  64. package/scripts/test/test_movie.json +40 -0
  65. package/scripts/test/test_no_audio.json +253 -0
  66. package/scripts/test/test_no_audio_with_credit.json +254 -0
  67. package/scripts/test/test_order.json +69 -0
  68. package/scripts/test/test_order_portrait.json +73 -0
  69. package/scripts/test/test_replicate.json +145 -0
  70. package/scripts/test/test_slideout_left_no_audio.json +46 -0
  71. package/scripts/test/test_sound_effect.json +41 -0
  72. package/scripts/test/test_spillover.json +117 -0
  73. package/scripts/test/test_transition.json +56 -0
  74. package/scripts/test/test_transition_no_audio.json +46 -0
  75. package/scripts/test/test_video_speed.json +81 -0
  76. package/scripts/test/test_voice_over.json +105 -0
  77. package/scripts/test/test_voices.json +55 -0
@@ -32,7 +32,9 @@ export const speakerDataSchema = z
32
32
  model: z.string().optional().describe("TTS model to use for this speaker"),
33
33
  })
34
34
  .strict();
35
- export const speakerDictionarySchema = z.record(speakerIdSchema, speakerDataSchema);
35
+ export const speakerDictionarySchema = z.record(speakerIdSchema, speakerDataSchema.extend({
36
+ lang: z.record(langSchema, speakerDataSchema).optional(),
37
+ }));
36
38
  export const mediaSourceSchema = z.discriminatedUnion("kind", [
37
39
  z.object({ kind: z.literal("url"), url: URLStringSchema }).strict(), // https://example.com/foo.pdf
38
40
  z.object({ kind: z.literal("base64"), data: z.string() }).strict(), // base64
@@ -172,6 +174,7 @@ export const mulmoOpenAIImageModelSchema = z
172
174
  .object({
173
175
  provider: z.literal("openai"),
174
176
  model: z.enum(provider2ImageAgent["openai"].models).optional(),
177
+ quality: z.enum(["low", "medium", "high", "auto"]).optional(),
175
178
  })
176
179
  .strict();
177
180
  // NOTE: This is for UI only. (until we figure out how to use it in mulmoImageParamsSchema)
@@ -185,6 +188,7 @@ export const mulmoImageParamsSchema = z
185
188
  .object({
186
189
  provider: text2ImageProviderSchema, // has default value
187
190
  model: z.string().optional(), // default: provider specific
191
+ quality: z.string().optional(), // optional image quality (model specific)
188
192
  style: z.string().optional(), // optional image style
189
193
  moderation: z.string().optional(), // optional image style
190
194
  images: mulmoImageParamsImagesSchema.optional(),
@@ -367,7 +371,7 @@ export const mulmoScriptSchema = mulmoPresentationStyleSchema
367
371
  title: z.string().optional(),
368
372
  description: z.string().optional(),
369
373
  references: z.array(mulmoReferenceSchema).optional(),
370
- lang: langSchema.optional(), // default "en"
374
+ lang: langSchema, // required (default WAS "en")
371
375
  beats: z.array(mulmoBeatSchema).min(1),
372
376
  // TODO: Delete it later
373
377
  imagePath: z.string().optional(), // for keynote images movie ??
@@ -53,7 +53,7 @@ export type FileDirs = {
53
53
  export type MulmoStudioContext = {
54
54
  fileDirs: FileDirs;
55
55
  studio: MulmoStudio;
56
- lang?: string;
56
+ lang: string;
57
57
  force: boolean;
58
58
  sessionState: MulmoSessionState;
59
59
  presentationStyle: MulmoPresentationStyle;
@@ -4,7 +4,7 @@ export const imageDirName = "images";
4
4
  export const cacheDirName = "cache";
5
5
  export const pdf_modes = ["slide", "talk", "handout"];
6
6
  export const pdf_sizes = ["letter", "a4"];
7
- export const languages = ["en", "ja"];
7
+ export const languages = ["en", "ja", "fr", "es"];
8
8
  export const storyToScriptGenerateMode = {
9
9
  stepWise: "step_wise",
10
10
  oneStep: "one_step",
@@ -1,9 +1,351 @@
1
1
  import type { MulmoScript, MulmoPresentationStyle, MulmoStudioMultiLingual } from "../types/type.js";
2
2
  import { FileObject } from "../types/index.js";
3
+ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: string, videoCaptionLang?: string, presentationStyle?: MulmoPresentationStyle | null) => {
4
+ beats: {
5
+ duration?: number | undefined;
6
+ startAt?: number | undefined;
7
+ hash?: string | undefined;
8
+ audioDuration?: number | undefined;
9
+ movieDuration?: number | undefined;
10
+ silenceDuration?: number | undefined;
11
+ hasMovieAudio?: boolean | undefined;
12
+ audioFile?: string | undefined;
13
+ imageFile?: string | undefined;
14
+ movieFile?: string | undefined;
15
+ soundEffectFile?: string | undefined;
16
+ lipSyncFile?: string | undefined;
17
+ captionFile?: string | undefined;
18
+ }[];
19
+ script: {
20
+ lang: string;
21
+ imageParams: {
22
+ provider: string;
23
+ model?: string | undefined;
24
+ style?: string | undefined;
25
+ quality?: string | undefined;
26
+ moderation?: string | undefined;
27
+ images?: Record<string, {
28
+ type: "image";
29
+ source: {
30
+ url: string;
31
+ kind: "url";
32
+ } | {
33
+ kind: "base64";
34
+ data: string;
35
+ } | {
36
+ text: string;
37
+ kind: "text";
38
+ } | {
39
+ path: string;
40
+ kind: "path";
41
+ };
42
+ } | {
43
+ type: "imagePrompt";
44
+ prompt: string;
45
+ }> | undefined;
46
+ };
47
+ audioParams: {
48
+ padding: number;
49
+ introPadding: number;
50
+ closingPadding: number;
51
+ outroPadding: number;
52
+ bgmVolume: number;
53
+ audioVolume: number;
54
+ suppressSpeech: boolean;
55
+ bgm?: {
56
+ url: string;
57
+ kind: "url";
58
+ } | {
59
+ kind: "base64";
60
+ data: string;
61
+ } | {
62
+ text: string;
63
+ kind: "text";
64
+ } | {
65
+ path: string;
66
+ kind: "path";
67
+ } | undefined;
68
+ };
69
+ movieParams: {
70
+ provider?: string | undefined;
71
+ model?: string | undefined;
72
+ fillOption?: {
73
+ style: "aspectFit" | "aspectFill";
74
+ } | undefined;
75
+ transition?: {
76
+ type: "fade" | "slideout_left";
77
+ duration: number;
78
+ } | undefined;
79
+ };
80
+ soundEffectParams: {
81
+ provider?: string | undefined;
82
+ model?: string | undefined;
83
+ };
84
+ $mulmocast: {
85
+ version: "1.1";
86
+ credit?: "closing" | undefined;
87
+ };
88
+ canvasSize: {
89
+ width: number;
90
+ height: number;
91
+ };
92
+ speechParams: {
93
+ speakers: Record<string, {
94
+ voiceId: string;
95
+ lang?: Record<string, {
96
+ voiceId: string;
97
+ displayName?: Record<string, string> | undefined;
98
+ isDefault?: boolean | undefined;
99
+ speechOptions?: {
100
+ speed?: number | undefined;
101
+ instruction?: string | undefined;
102
+ } | undefined;
103
+ provider?: string | undefined;
104
+ model?: string | undefined;
105
+ }> | undefined;
106
+ displayName?: Record<string, string> | undefined;
107
+ isDefault?: boolean | undefined;
108
+ speechOptions?: {
109
+ speed?: number | undefined;
110
+ instruction?: string | undefined;
111
+ } | undefined;
112
+ provider?: string | undefined;
113
+ model?: string | undefined;
114
+ }>;
115
+ };
116
+ beats: {
117
+ text: string;
118
+ image?: {
119
+ type: "markdown";
120
+ markdown: string | string[];
121
+ } | {
122
+ type: "web";
123
+ url: string;
124
+ } | {
125
+ type: "pdf";
126
+ source: {
127
+ url: string;
128
+ kind: "url";
129
+ } | {
130
+ kind: "base64";
131
+ data: string;
132
+ } | {
133
+ text: string;
134
+ kind: "text";
135
+ } | {
136
+ path: string;
137
+ kind: "path";
138
+ };
139
+ } | {
140
+ type: "image";
141
+ source: {
142
+ url: string;
143
+ kind: "url";
144
+ } | {
145
+ kind: "base64";
146
+ data: string;
147
+ } | {
148
+ text: string;
149
+ kind: "text";
150
+ } | {
151
+ path: string;
152
+ kind: "path";
153
+ };
154
+ } | {
155
+ type: "svg";
156
+ source: {
157
+ url: string;
158
+ kind: "url";
159
+ } | {
160
+ kind: "base64";
161
+ data: string;
162
+ } | {
163
+ text: string;
164
+ kind: "text";
165
+ } | {
166
+ path: string;
167
+ kind: "path";
168
+ };
169
+ } | {
170
+ type: "movie";
171
+ source: {
172
+ url: string;
173
+ kind: "url";
174
+ } | {
175
+ kind: "base64";
176
+ data: string;
177
+ } | {
178
+ text: string;
179
+ kind: "text";
180
+ } | {
181
+ path: string;
182
+ kind: "path";
183
+ };
184
+ } | {
185
+ type: "textSlide";
186
+ slide: {
187
+ title: string;
188
+ subtitle?: string | undefined;
189
+ bullets?: string[] | undefined;
190
+ };
191
+ } | {
192
+ type: "chart";
193
+ title: string;
194
+ chartData: Record<string, any>;
195
+ } | {
196
+ code: {
197
+ url: string;
198
+ kind: "url";
199
+ } | {
200
+ kind: "base64";
201
+ data: string;
202
+ } | {
203
+ text: string;
204
+ kind: "text";
205
+ } | {
206
+ path: string;
207
+ kind: "path";
208
+ };
209
+ type: "mermaid";
210
+ title: string;
211
+ appendix?: string[] | undefined;
212
+ } | {
213
+ type: "html_tailwind";
214
+ html: string | string[];
215
+ } | {
216
+ type: "beat";
217
+ id?: string | undefined;
218
+ } | {
219
+ type: "voice_over";
220
+ startAt?: number | undefined;
221
+ } | undefined;
222
+ audio?: {
223
+ type: "audio";
224
+ source: {
225
+ url: string;
226
+ kind: "url";
227
+ } | {
228
+ kind: "base64";
229
+ data: string;
230
+ } | {
231
+ text: string;
232
+ kind: "text";
233
+ } | {
234
+ path: string;
235
+ kind: "path";
236
+ };
237
+ } | {
238
+ type: "midi";
239
+ source: string;
240
+ } | undefined;
241
+ duration?: number | undefined;
242
+ speechOptions?: {
243
+ speed?: number | undefined;
244
+ instruction?: string | undefined;
245
+ } | undefined;
246
+ id?: string | undefined;
247
+ imagePrompt?: string | undefined;
248
+ speaker?: string | undefined;
249
+ description?: string | undefined;
250
+ imageParams?: {
251
+ provider: string;
252
+ model?: string | undefined;
253
+ style?: string | undefined;
254
+ quality?: string | undefined;
255
+ moderation?: string | undefined;
256
+ images?: Record<string, {
257
+ type: "image";
258
+ source: {
259
+ url: string;
260
+ kind: "url";
261
+ } | {
262
+ kind: "base64";
263
+ data: string;
264
+ } | {
265
+ text: string;
266
+ kind: "text";
267
+ } | {
268
+ path: string;
269
+ kind: "path";
270
+ };
271
+ } | {
272
+ type: "imagePrompt";
273
+ prompt: string;
274
+ }> | undefined;
275
+ } | undefined;
276
+ audioParams?: {
277
+ movieVolume: number;
278
+ padding?: number | undefined;
279
+ } | undefined;
280
+ movieParams?: {
281
+ speed?: number | undefined;
282
+ provider?: string | undefined;
283
+ model?: string | undefined;
284
+ fillOption?: {
285
+ style: "aspectFit" | "aspectFill";
286
+ } | undefined;
287
+ } | undefined;
288
+ soundEffectParams?: {
289
+ provider?: string | undefined;
290
+ model?: string | undefined;
291
+ } | undefined;
292
+ lipSyncParams?: {
293
+ provider?: string | undefined;
294
+ model?: string | undefined;
295
+ } | undefined;
296
+ htmlImageParams?: {
297
+ model?: string | undefined;
298
+ } | undefined;
299
+ textSlideParams?: {
300
+ cssStyles: string | string[];
301
+ } | undefined;
302
+ captionParams?: {
303
+ styles: string[];
304
+ lang?: string | undefined;
305
+ } | undefined;
306
+ imageNames?: string[] | undefined;
307
+ moviePrompt?: string | undefined;
308
+ soundEffectPrompt?: string | undefined;
309
+ htmlPrompt?: {
310
+ prompt: string;
311
+ data?: any;
312
+ images?: Record<string, any> | undefined;
313
+ systemPrompt?: string | undefined;
314
+ } | undefined;
315
+ enableLipSync?: boolean | undefined;
316
+ }[];
317
+ title?: string | undefined;
318
+ description?: string | undefined;
319
+ lipSyncParams?: {
320
+ provider?: string | undefined;
321
+ model?: string | undefined;
322
+ } | undefined;
323
+ htmlImageParams?: {
324
+ provider: string;
325
+ model?: string | undefined;
326
+ } | undefined;
327
+ textSlideParams?: {
328
+ cssStyles: string | string[];
329
+ } | undefined;
330
+ captionParams?: {
331
+ styles: string[];
332
+ lang?: string | undefined;
333
+ } | undefined;
334
+ references?: {
335
+ type: string;
336
+ url: string;
337
+ title?: string | undefined;
338
+ description?: string | undefined;
339
+ }[] | undefined;
340
+ imagePath?: string | undefined;
341
+ __test_invalid__?: boolean | undefined;
342
+ };
343
+ filename: string;
344
+ };
3
345
  export declare const fetchScript: (isHttpPath: boolean, mulmoFilePath: string, fileOrUrl: string) => Promise<MulmoScript | null>;
4
346
  export declare const getMultiLingual: (multilingualFilePath: string, studioBeatsLength: number) => MulmoStudioMultiLingual;
5
347
  export declare const getPresentationStyle: (presentationStylePath: string | undefined) => MulmoPresentationStyle | null;
6
- export declare const initializeContextFromFiles: (files: FileObject, raiseError: boolean, force?: boolean, caption?: string, lang?: string) => Promise<{
348
+ export declare const initializeContextFromFiles: (files: FileObject, raiseError: boolean, force?: boolean, captionLang?: string, targetLang?: string) => Promise<{
7
349
  studio: {
8
350
  beats: {
9
351
  duration?: number | undefined;
@@ -21,10 +363,12 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
21
363
  captionFile?: string | undefined;
22
364
  }[];
23
365
  script: {
366
+ lang: string;
24
367
  imageParams: {
25
368
  provider: string;
26
369
  model?: string | undefined;
27
370
  style?: string | undefined;
371
+ quality?: string | undefined;
28
372
  moderation?: string | undefined;
29
373
  images?: Record<string, {
30
374
  type: "image";
@@ -94,6 +438,17 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
94
438
  speechParams: {
95
439
  speakers: Record<string, {
96
440
  voiceId: string;
441
+ lang?: Record<string, {
442
+ voiceId: string;
443
+ displayName?: Record<string, string> | undefined;
444
+ isDefault?: boolean | undefined;
445
+ speechOptions?: {
446
+ speed?: number | undefined;
447
+ instruction?: string | undefined;
448
+ } | undefined;
449
+ provider?: string | undefined;
450
+ model?: string | undefined;
451
+ }> | undefined;
97
452
  displayName?: Record<string, string> | undefined;
98
453
  isDefault?: boolean | undefined;
99
454
  speechOptions?: {
@@ -242,6 +597,7 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
242
597
  provider: string;
243
598
  model?: string | undefined;
244
599
  style?: string | undefined;
600
+ quality?: string | undefined;
245
601
  moderation?: string | undefined;
246
602
  images?: Record<string, {
247
603
  type: "image";
@@ -304,7 +660,6 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
304
660
  } | undefined;
305
661
  enableLipSync?: boolean | undefined;
306
662
  }[];
307
- lang?: string | undefined;
308
663
  title?: string | undefined;
309
664
  description?: string | undefined;
310
665
  lipSyncParams?: {
@@ -333,35 +688,22 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
333
688
  };
334
689
  filename: string;
335
690
  };
691
+ multiLingual: {
692
+ multiLingualTexts: Record<string, {
693
+ text: string;
694
+ lang: string;
695
+ texts?: string[] | undefined;
696
+ ttsTexts?: string[] | undefined;
697
+ duration?: number | undefined;
698
+ }>;
699
+ }[];
336
700
  fileDirs: FileObject;
337
- force: boolean;
338
- lang: string | undefined;
339
- sessionState: {
340
- inSession: {
341
- audio: boolean;
342
- image: boolean;
343
- video: boolean;
344
- multiLingual: boolean;
345
- caption: boolean;
346
- pdf: boolean;
347
- };
348
- inBeatSession: {
349
- audio: {};
350
- image: {};
351
- movie: {};
352
- multiLingual: {};
353
- caption: {};
354
- html: {};
355
- imageReference: {};
356
- soundEffect: {};
357
- lipSync: {};
358
- };
359
- };
360
701
  presentationStyle: {
361
702
  imageParams: {
362
703
  provider: string;
363
704
  model?: string | undefined;
364
705
  style?: string | undefined;
706
+ quality?: string | undefined;
365
707
  moderation?: string | undefined;
366
708
  images?: Record<string, {
367
709
  type: "image";
@@ -431,6 +773,17 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
431
773
  speechParams: {
432
774
  speakers: Record<string, {
433
775
  voiceId: string;
776
+ lang?: Record<string, {
777
+ voiceId: string;
778
+ displayName?: Record<string, string> | undefined;
779
+ isDefault?: boolean | undefined;
780
+ speechOptions?: {
781
+ speed?: number | undefined;
782
+ instruction?: string | undefined;
783
+ } | undefined;
784
+ provider?: string | undefined;
785
+ model?: string | undefined;
786
+ }> | undefined;
434
787
  displayName?: Record<string, string> | undefined;
435
788
  isDefault?: boolean | undefined;
436
789
  speechOptions?: {
@@ -457,13 +810,27 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
457
810
  lang?: string | undefined;
458
811
  } | undefined;
459
812
  };
460
- multiLingual: {
461
- multiLingualTexts: Record<string, {
462
- text: string;
463
- lang: string;
464
- texts?: string[] | undefined;
465
- ttsTexts?: string[] | undefined;
466
- duration?: number | undefined;
467
- }>;
468
- }[];
813
+ sessionState: {
814
+ inSession: {
815
+ audio: boolean;
816
+ image: boolean;
817
+ video: boolean;
818
+ multiLingual: boolean;
819
+ caption: boolean;
820
+ pdf: boolean;
821
+ };
822
+ inBeatSession: {
823
+ audio: {};
824
+ image: {};
825
+ movie: {};
826
+ multiLingual: {};
827
+ caption: {};
828
+ html: {};
829
+ imageReference: {};
830
+ soundEffect: {};
831
+ lipSync: {};
832
+ };
833
+ };
834
+ force: boolean;
835
+ lang: string;
469
836
  } | null>;