mulmocast 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/assets/templates/ani.json +8 -3
  2. package/assets/templates/html.json +0 -1
  3. package/lib/actions/audio.js +1 -0
  4. package/lib/actions/captions.js +2 -2
  5. package/lib/actions/image_agents.js +3 -3
  6. package/lib/actions/images.js +5 -0
  7. package/lib/actions/translate.js +2 -2
  8. package/lib/agents/image_genai_agent.js +1 -1
  9. package/lib/agents/image_openai_agent.js +3 -0
  10. package/lib/agents/lipsync_replicate_agent.js +1 -1
  11. package/lib/agents/movie_genai_agent.js +1 -1
  12. package/lib/agents/movie_replicate_agent.js +1 -1
  13. package/lib/agents/sound_effect_replicate_agent.js +1 -1
  14. package/lib/agents/tts_elevenlabs_agent.js +1 -1
  15. package/lib/agents/tts_nijivoice_agent.js +10 -6
  16. package/lib/agents/tts_openai_agent.js +3 -0
  17. package/lib/data/bgmAssets.d.ts +18 -0
  18. package/lib/data/bgmAssets.js +101 -0
  19. package/lib/data/index.d.ts +1 -0
  20. package/lib/data/index.js +1 -0
  21. package/lib/data/promptTemplates.d.ts +13 -74
  22. package/lib/data/promptTemplates.js +7 -110
  23. package/lib/data/scriptTemplates.d.ts +1 -1
  24. package/lib/data/scriptTemplates.js +1 -0
  25. package/lib/data/templateDataSet.d.ts +0 -2
  26. package/lib/data/templateDataSet.js +1 -9
  27. package/lib/methods/mulmo_studio_context.d.ts +1 -1
  28. package/lib/methods/mulmo_studio_context.js +9 -8
  29. package/lib/types/schema.d.ts +45 -45
  30. package/lib/types/schema.js +9 -9
  31. package/lib/types/type.d.ts +1 -1
  32. package/lib/utils/filters.js +3 -3
  33. package/lib/utils/provider2agent.d.ts +7 -2
  34. package/lib/utils/provider2agent.js +21 -2
  35. package/package.json +11 -11
  36. package/scripts/templates/image_prompt_only_template.json +1 -0
  37. package/assets/templates/ghibli_image_only.json +0 -28
  38. package/assets/templates/ghibli_shorts.json +0 -33
  39. package/scripts/test/test_hello_caption.json~ +0 -21
  40. package/scripts/test/test_hello_image.json~ +0 -18
@@ -100,6 +100,12 @@ export const promptTemplates = [
100
100
  speechParams: {
101
101
  speakers: {
102
102
  Presenter: {
103
+ lang: {
104
+ ja: {
105
+ provider: "nijivoice",
106
+ voiceId: "9d9ed276-49ee-443a-bc19-26e6136d05f0",
107
+ },
108
+ },
103
109
  speechOptions: {
104
110
  instruction: "Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl.",
105
111
  },
@@ -110,7 +116,7 @@ export const promptTemplates = [
110
116
  },
111
117
  scriptName: "image_prompts_template.json",
112
118
  systemPrompt: "言葉づかいは思いっきりツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
113
- title: "Presentation with Ani",
119
+ title: "Presentation by Ani",
114
120
  },
115
121
  {
116
122
  description: "Template for business presentation.",
@@ -370,115 +376,6 @@ export const promptTemplates = [
370
376
  systemPrompt: "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
371
377
  title: "Ghibli comic style",
372
378
  },
373
- {
374
- description: "Template for Ghibli-style image-only comic presentation.",
375
- filename: "ghibli_image_only",
376
- presentationStyle: {
377
- $mulmocast: {
378
- credit: "closing",
379
- version: "1.1",
380
- },
381
- audioParams: {
382
- audioVolume: 1,
383
- bgmVolume: 0.2,
384
- closingPadding: 0.8,
385
- introPadding: 1,
386
- outroPadding: 1,
387
- padding: 0.3,
388
- suppressSpeech: false,
389
- },
390
- canvasSize: {
391
- height: 1024,
392
- width: 1536,
393
- },
394
- imageParams: {
395
- images: {
396
- presenter: {
397
- source: {
398
- kind: "url",
399
- url: "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png",
400
- },
401
- type: "image",
402
- },
403
- },
404
- style: "<style>Ghibli style</style>",
405
- },
406
- movieParams: {
407
- provider: "replicate",
408
- },
409
- soundEffectParams: {
410
- provider: "replicate",
411
- },
412
- speechParams: {
413
- speakers: {
414
- Presenter: {
415
- displayName: {
416
- en: "Presenter",
417
- },
418
- voiceId: "shimmer",
419
- },
420
- },
421
- },
422
- },
423
- scriptName: "image_prompt_only_template.json",
424
- systemPrompt: "Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
425
- title: "Ghibli comic image-only",
426
- },
427
- {
428
- description: "Template for Ghibli-style comic presentation.",
429
- filename: "ghibli_shorts",
430
- presentationStyle: {
431
- $mulmocast: {
432
- credit: "closing",
433
- version: "1.1",
434
- },
435
- audioParams: {
436
- audioVolume: 1,
437
- bgmVolume: 0.2,
438
- closingPadding: 0.8,
439
- introPadding: 1,
440
- outroPadding: 1,
441
- padding: 0.3,
442
- suppressSpeech: false,
443
- },
444
- canvasSize: {
445
- height: 1536,
446
- width: 1024,
447
- },
448
- imageParams: {
449
- images: {
450
- presenter: {
451
- source: {
452
- kind: "url",
453
- url: "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg",
454
- },
455
- type: "image",
456
- },
457
- },
458
- style: "<style>Ghibli style</style>",
459
- },
460
- movieParams: {
461
- provider: "replicate",
462
- },
463
- soundEffectParams: {
464
- provider: "replicate",
465
- },
466
- speechParams: {
467
- speakers: {
468
- Presenter: {
469
- provider: "nijivoice",
470
- speechOptions: {
471
- speed: 1.5,
472
- },
473
- voiceId: "3708ad43-cace-486c-a4ca-8fe41186e20c",
474
- },
475
- },
476
- },
477
- },
478
- scriptName: "image_prompts_template.json",
479
- systemPrompt: "This script is for YouTube shorts. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
480
- title: "Ghibli style for YouTube Shorts",
481
- },
482
379
  {
483
380
  description: "Template for Ghost in the shell style comic presentation.",
484
381
  filename: "ghost_comic",
@@ -390,8 +390,8 @@ export declare const scriptTemplates: ({
390
390
  imagePrompt: string;
391
391
  }[];
392
392
  filename: string;
393
+ lang: string;
393
394
  title: string;
394
- lang?: undefined;
395
395
  references?: undefined;
396
396
  htmlImageParams?: undefined;
397
397
  imageParams?: undefined;
@@ -523,6 +523,7 @@ export const scriptTemplates = [
523
523
  },
524
524
  ],
525
525
  filename: "image_prompt_only_template",
526
+ lang: "en",
526
527
  title: "[TITLE: Brief, engaging title for the topic]",
527
528
  },
528
529
  {
@@ -8,8 +8,6 @@ export declare const templateDataSet: {
8
8
  comic_strips: string;
9
9
  drslump_comic: string;
10
10
  ghibli_comic: string;
11
- ghibli_image_only: string;
12
- ghibli_shorts: string;
13
11
  ghost_comic: string;
14
12
  html: string;
15
13
  onepiece_comic: string;
@@ -5,7 +5,7 @@ export const templateDataSet = {
5
5
  "```",
6
6
  ani: "言葉づかいは思いっきりツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.\n" +
7
7
  "```JSON\n" +
8
- `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"movieParams":{"provider":"replicate","model":"bytedance/seedance-1-lite"},"speechParams":{"provider":"openai","speakers":{"Presenter":{"voiceId":"shimmer","speechOptions":{"instruction":"Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl."}}}},"audioParams":{"bgm":{"kind":"url","url":"https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"}},"canvasSize":{"width":1024,"height":1536},"imageParams":{"style":"<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>","images":{"ani":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"}}}}}\n` +
8
+ `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"movieParams":{"provider":"replicate","model":"bytedance/seedance-1-lite"},"speechParams":{"provider":"openai","speakers":{"Presenter":{"voiceId":"shimmer","speechOptions":{"instruction":"Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl."},"lang":{"ja":{"provider":"nijivoice","voiceId":"9d9ed276-49ee-443a-bc19-26e6136d05f0"}}}}},"audioParams":{"bgm":{"kind":"url","url":"https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"}},"canvasSize":{"width":1024,"height":1536},"imageParams":{"style":"<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>","images":{"ani":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"}}}}}\n` +
9
9
  "```",
10
10
  business: "Use textSlides, markdown, mermaid, or chart to show slides. Extract image links in the article (from <img> tag) to reuse them in the presentation. Mention the reference in one of beats, if it exists. Use the JSON below as a template. chartData is the data for Chart.js\n" +
11
11
  "```JSON\n" +
@@ -35,14 +35,6 @@ export const templateDataSet = {
35
35
  "```JSON\n" +
36
36
  `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"canvasSize":{"width":1536,"height":1024},"imageParams":{"style":"<style>Ghibli style</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"}}}}}\n` +
37
37
  "```",
38
- ghibli_image_only: "Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.\n" +
39
- "```JSON\n" +
40
- '{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","beats":[{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"canvasSize":{"width":1536,"height":1024},"imageParams":{"style":"<style>Ghibli style</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"}}}}}\n' +
41
- "```",
42
- ghibli_shorts: "This script is for YouTube shorts. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.\n" +
43
- "```JSON\n" +
44
- `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"canvasSize":{"width":1024,"height":1536},"speechParams":{"speakers":{"Presenter":{"provider":"nijivoice","voiceId":"3708ad43-cace-486c-a4ca-8fe41186e20c","speechOptions":{"speed":1.5}}}},"imageParams":{"style":"<style>Ghibli style</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg"}}}}}\n` +
45
- "```",
46
38
  ghost_comic: "Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.\n" +
47
39
  "```JSON\n" +
48
40
  `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"canvasSize":{"width":1536,"height":1024},"imageParams":{"style":"<style>Ghost in the shell aesthetic.</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghost_presenter.png"}},"optimus":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"}}}}}\n` +
@@ -10,7 +10,7 @@ export declare const MulmoStudioContextMethods: {
10
10
  getFileName(context: MulmoStudioContext): string;
11
11
  getCaption(context: MulmoStudioContext): string | undefined;
12
12
  setSessionState(context: MulmoStudioContext, sessionType: SessionType, value: boolean): void;
13
- setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType, index: number, value: boolean): void;
13
+ setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType, index: number, id: string | undefined, value: boolean): void;
14
14
  needTranslate(context: MulmoStudioContext, includeCaption?: boolean): boolean | "" | undefined;
15
15
  getIntroPadding(context: MulmoStudioContext): number;
16
16
  };
@@ -15,12 +15,12 @@ const notifyStateChange = (context, sessionType) => {
15
15
  callback({ kind: "session", sessionType, inSession });
16
16
  }
17
17
  };
18
- const notifyBeatStateChange = (context, sessionType, index) => {
19
- const inSession = context.sessionState.inBeatSession[sessionType][index] ?? false;
18
+ const notifyBeatStateChange = (context, sessionType, id) => {
19
+ const inSession = context.sessionState.inBeatSession[sessionType][id] ?? false;
20
20
  const prefix = inSession ? "{" : " }";
21
- GraphAILogger.info(`${prefix} ${sessionType} ${index}`);
21
+ GraphAILogger.info(`${prefix} ${sessionType} ${id}`);
22
22
  for (const callback of sessionProgressCallbacks) {
23
- callback({ kind: "beat", sessionType, index, inSession });
23
+ callback({ kind: "beat", sessionType, id, inSession });
24
24
  }
25
25
  };
26
26
  export const MulmoStudioContextMethods = {
@@ -50,18 +50,19 @@ export const MulmoStudioContextMethods = {
50
50
  context.sessionState.inSession[sessionType] = value;
51
51
  notifyStateChange(context, sessionType);
52
52
  },
53
- setBeatSessionState(context, sessionType, index, value) {
53
+ setBeatSessionState(context, sessionType, index, id, value) {
54
+ const key = id ?? `__index__${index}`;
54
55
  if (value) {
55
56
  if (!context.sessionState.inBeatSession[sessionType]) {
56
57
  context.sessionState.inBeatSession[sessionType] = {};
57
58
  }
58
- context.sessionState.inBeatSession[sessionType][index] = true;
59
+ context.sessionState.inBeatSession[sessionType][key] = true;
59
60
  }
60
61
  else {
61
62
  // NOTE: Setting to false causes the parse error in rebuildStudio in preprocess.ts
62
- delete context.sessionState.inBeatSession[sessionType][index];
63
+ delete context.sessionState.inBeatSession[sessionType][key];
63
64
  }
64
- notifyBeatStateChange(context, sessionType, index);
65
+ notifyBeatStateChange(context, sessionType, key);
65
66
  },
66
67
  needTranslate(context, includeCaption = false) {
67
68
  // context.studio.script.lang = defaultLang, context.lang = targetLanguage.
@@ -5919,35 +5919,35 @@ export declare const mulmoSessionStateSchema: z.ZodObject<{
5919
5919
  caption: boolean;
5920
5920
  }>;
5921
5921
  inBeatSession: z.ZodObject<{
5922
- audio: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5923
- image: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5924
- movie: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5925
- multiLingual: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5926
- caption: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5927
- html: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5928
- imageReference: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5929
- soundEffect: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5930
- lipSync: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
5922
+ audio: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5923
+ image: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5924
+ movie: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5925
+ multiLingual: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5926
+ caption: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5927
+ html: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5928
+ imageReference: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5929
+ soundEffect: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5930
+ lipSync: z.ZodRecord<z.ZodString, z.ZodBoolean>;
5931
5931
  }, "strip", z.ZodTypeAny, {
5932
- image: Record<number, boolean>;
5933
- audio: Record<number, boolean>;
5934
- movie: Record<number, boolean>;
5935
- html: Record<number, boolean>;
5936
- multiLingual: Record<number, boolean>;
5937
- caption: Record<number, boolean>;
5938
- imageReference: Record<number, boolean>;
5939
- soundEffect: Record<number, boolean>;
5940
- lipSync: Record<number, boolean>;
5932
+ image: Record<string, boolean>;
5933
+ audio: Record<string, boolean>;
5934
+ movie: Record<string, boolean>;
5935
+ html: Record<string, boolean>;
5936
+ multiLingual: Record<string, boolean>;
5937
+ caption: Record<string, boolean>;
5938
+ imageReference: Record<string, boolean>;
5939
+ soundEffect: Record<string, boolean>;
5940
+ lipSync: Record<string, boolean>;
5941
5941
  }, {
5942
- image: Record<number, boolean>;
5943
- audio: Record<number, boolean>;
5944
- movie: Record<number, boolean>;
5945
- html: Record<number, boolean>;
5946
- multiLingual: Record<number, boolean>;
5947
- caption: Record<number, boolean>;
5948
- imageReference: Record<number, boolean>;
5949
- soundEffect: Record<number, boolean>;
5950
- lipSync: Record<number, boolean>;
5942
+ image: Record<string, boolean>;
5943
+ audio: Record<string, boolean>;
5944
+ movie: Record<string, boolean>;
5945
+ html: Record<string, boolean>;
5946
+ multiLingual: Record<string, boolean>;
5947
+ caption: Record<string, boolean>;
5948
+ imageReference: Record<string, boolean>;
5949
+ soundEffect: Record<string, boolean>;
5950
+ lipSync: Record<string, boolean>;
5951
5951
  }>;
5952
5952
  }, "strip", z.ZodTypeAny, {
5953
5953
  inSession: {
@@ -5959,15 +5959,15 @@ export declare const mulmoSessionStateSchema: z.ZodObject<{
5959
5959
  caption: boolean;
5960
5960
  };
5961
5961
  inBeatSession: {
5962
- image: Record<number, boolean>;
5963
- audio: Record<number, boolean>;
5964
- movie: Record<number, boolean>;
5965
- html: Record<number, boolean>;
5966
- multiLingual: Record<number, boolean>;
5967
- caption: Record<number, boolean>;
5968
- imageReference: Record<number, boolean>;
5969
- soundEffect: Record<number, boolean>;
5970
- lipSync: Record<number, boolean>;
5962
+ image: Record<string, boolean>;
5963
+ audio: Record<string, boolean>;
5964
+ movie: Record<string, boolean>;
5965
+ html: Record<string, boolean>;
5966
+ multiLingual: Record<string, boolean>;
5967
+ caption: Record<string, boolean>;
5968
+ imageReference: Record<string, boolean>;
5969
+ soundEffect: Record<string, boolean>;
5970
+ lipSync: Record<string, boolean>;
5971
5971
  };
5972
5972
  }, {
5973
5973
  inSession: {
@@ -5979,15 +5979,15 @@ export declare const mulmoSessionStateSchema: z.ZodObject<{
5979
5979
  caption: boolean;
5980
5980
  };
5981
5981
  inBeatSession: {
5982
- image: Record<number, boolean>;
5983
- audio: Record<number, boolean>;
5984
- movie: Record<number, boolean>;
5985
- html: Record<number, boolean>;
5986
- multiLingual: Record<number, boolean>;
5987
- caption: Record<number, boolean>;
5988
- imageReference: Record<number, boolean>;
5989
- soundEffect: Record<number, boolean>;
5990
- lipSync: Record<number, boolean>;
5982
+ image: Record<string, boolean>;
5983
+ audio: Record<string, boolean>;
5984
+ movie: Record<string, boolean>;
5985
+ html: Record<string, boolean>;
5986
+ multiLingual: Record<string, boolean>;
5987
+ caption: Record<string, boolean>;
5988
+ imageReference: Record<string, boolean>;
5989
+ soundEffect: Record<string, boolean>;
5990
+ lipSync: Record<string, boolean>;
5991
5991
  };
5992
5992
  }>;
5993
5993
  export declare const mulmoStudioSchema: z.ZodObject<{
@@ -417,15 +417,15 @@ export const mulmoSessionStateSchema = z.object({
417
417
  pdf: z.boolean(),
418
418
  }),
419
419
  inBeatSession: z.object({
420
- audio: z.record(z.number().int(), z.boolean()),
421
- image: z.record(z.number().int(), z.boolean()),
422
- movie: z.record(z.number().int(), z.boolean()),
423
- multiLingual: z.record(z.number().int(), z.boolean()),
424
- caption: z.record(z.number().int(), z.boolean()),
425
- html: z.record(z.number().int(), z.boolean()),
426
- imageReference: z.record(z.number().int(), z.boolean()),
427
- soundEffect: z.record(z.number().int(), z.boolean()),
428
- lipSync: z.record(z.number().int(), z.boolean()),
420
+ audio: z.record(z.string(), z.boolean()),
421
+ image: z.record(z.string(), z.boolean()),
422
+ movie: z.record(z.string(), z.boolean()),
423
+ multiLingual: z.record(z.string(), z.boolean()),
424
+ caption: z.record(z.string(), z.boolean()),
425
+ html: z.record(z.string(), z.boolean()),
426
+ imageReference: z.record(z.string(), z.boolean()),
427
+ soundEffect: z.record(z.string(), z.boolean()),
428
+ lipSync: z.record(z.string(), z.boolean()),
429
429
  }),
430
430
  });
431
431
  export const mulmoStudioSchema = z
@@ -100,7 +100,7 @@ export type SessionProgressEvent = {
100
100
  } | {
101
101
  kind: "beat";
102
102
  sessionType: BeatSessionType;
103
- index: number;
103
+ id: string;
104
104
  inSession: boolean;
105
105
  };
106
106
  export type SessionProgressCallback = (change: SessionProgressEvent) => void;
@@ -15,7 +15,7 @@ export const nijovoiceTextAgentFilter = async (context, next) => {
15
15
  return next(context);
16
16
  };
17
17
  export const fileCacheAgentFilter = async (context, next) => {
18
- const { force, file, index, mulmoContext, sessionType } = context.namedInputs.cache;
18
+ const { force, file, index, mulmoContext, sessionType, id } = context.namedInputs.cache;
19
19
  const shouldUseCache = async () => {
20
20
  if (force && force.some((element) => element)) {
21
21
  return false;
@@ -33,7 +33,7 @@ export const fileCacheAgentFilter = async (context, next) => {
33
33
  return true;
34
34
  }
35
35
  try {
36
- MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, true);
36
+ MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, id, true);
37
37
  const output = (await next(context)) || undefined;
38
38
  const { buffer, text, saved } = output ?? {};
39
39
  if (saved) {
@@ -56,7 +56,7 @@ export const fileCacheAgentFilter = async (context, next) => {
56
56
  return false;
57
57
  }
58
58
  finally {
59
- MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, false);
59
+ MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, id, false);
60
60
  }
61
61
  };
62
62
  export const browserlessCacheGenerator = (cacheDir) => {
@@ -95,26 +95,31 @@ export declare const provider2LLMAgent: {
95
95
  readonly agentName: "openAIAgent";
96
96
  readonly defaultModel: "gpt-5";
97
97
  readonly max_tokens: 8192;
98
+ readonly models: readonly ["gpt-5", "gpt-5-nano", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3", "o3-mini", "o3-pro", "o1", "o1-pro", "gpt-4o", "gpt-4o-mini"];
98
99
  };
99
100
  readonly anthropic: {
100
101
  readonly agentName: "anthropicAgent";
101
102
  readonly defaultModel: "claude-3-7-sonnet-20250219";
102
103
  readonly max_tokens: 8192;
104
+ readonly models: readonly ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"];
103
105
  };
104
106
  readonly gemini: {
105
107
  readonly agentName: "geminiAgent";
106
- readonly defaultModel: "gemini-1.5-flash";
108
+ readonly defaultModel: "gemini-2.5-flash";
107
109
  readonly max_tokens: 8192;
110
+ readonly models: readonly ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"];
108
111
  };
109
112
  readonly groq: {
110
113
  readonly agentName: "groqAgent";
111
- readonly defaultModel: "llama3-8b-8192";
114
+ readonly defaultModel: "llama-3.1-8b-instant";
112
115
  readonly max_tokens: 4096;
116
+ readonly models: readonly ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"];
113
117
  };
114
118
  readonly mock: {
115
119
  readonly agentName: "mediaMockAgent";
116
120
  readonly defaultModel: "mock";
117
121
  readonly max_tokens: 4096;
122
+ readonly models: readonly ["mock"];
118
123
  };
119
124
  };
120
125
  export declare const defaultProviders: {
@@ -208,26 +208,45 @@ export const provider2LLMAgent = {
208
208
  agentName: "openAIAgent",
209
209
  defaultModel: "gpt-5",
210
210
  max_tokens: 8192,
211
+ models: [
212
+ "gpt-5",
213
+ "gpt-5-nano",
214
+ "gpt-5-mini",
215
+ "gpt-4.1",
216
+ "gpt-4.1-mini",
217
+ "gpt-4.1-nano",
218
+ "o3",
219
+ "o3-mini",
220
+ "o3-pro",
221
+ "o1",
222
+ "o1-pro",
223
+ "gpt-4o",
224
+ "gpt-4o-mini",
225
+ ],
211
226
  },
212
227
  anthropic: {
213
228
  agentName: "anthropicAgent",
214
229
  defaultModel: "claude-3-7-sonnet-20250219",
215
230
  max_tokens: 8192,
231
+ models: ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"],
216
232
  },
217
233
  gemini: {
218
234
  agentName: "geminiAgent",
219
- defaultModel: "gemini-1.5-flash",
235
+ defaultModel: "gemini-2.5-flash",
220
236
  max_tokens: 8192,
237
+ models: ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"],
221
238
  },
222
239
  groq: {
223
240
  agentName: "groqAgent",
224
- defaultModel: "llama3-8b-8192",
241
+ defaultModel: "llama-3.1-8b-instant",
225
242
  max_tokens: 4096,
243
+ models: ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"],
226
244
  },
227
245
  mock: {
228
246
  agentName: "mediaMockAgent",
229
247
  defaultModel: "mock",
230
248
  max_tokens: 4096,
249
+ models: ["mock"],
231
250
  },
232
251
  };
233
252
  export const defaultProviders = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "1.2.2",
3
+ "version": "1.2.4",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",
@@ -67,14 +67,14 @@
67
67
  "dependencies": {
68
68
  "@google-cloud/text-to-speech": "^6.2.0",
69
69
  "@google/genai": "^1.13.0",
70
- "@graphai/anthropic_agent": "^2.0.5",
70
+ "@graphai/anthropic_agent": "^2.0.9",
71
71
  "@graphai/browserless_agent": "^2.0.1",
72
- "@graphai/gemini_agent": "^2.0.0",
73
- "@graphai/groq_agent": "^2.0.0",
72
+ "@graphai/gemini_agent": "^2.0.1",
73
+ "@graphai/groq_agent": "^2.0.2",
74
74
  "@graphai/input_agents": "^1.0.2",
75
- "@graphai/openai_agent": "^2.0.3",
75
+ "@graphai/openai_agent": "^2.0.4",
76
76
  "@graphai/stream_agent_filter": "^2.0.2",
77
- "@graphai/vanilla": "^2.0.6",
77
+ "@graphai/vanilla": "^2.0.10",
78
78
  "@graphai/vanilla_node_agents": "^2.0.1",
79
79
  "@inquirer/input": "^4.2.1",
80
80
  "@inquirer/select": "^4.3.1",
@@ -84,10 +84,10 @@
84
84
  "clipboardy": "^4.0.0",
85
85
  "dotenv": "^17.2.1",
86
86
  "fluent-ffmpeg": "^2.1.3",
87
- "graphai": "^2.0.13",
87
+ "graphai": "^2.0.14",
88
88
  "marked": "^16.1.2",
89
89
  "ora": "^8.2.0",
90
- "puppeteer": "^24.16.0",
90
+ "puppeteer": "^24.16.2",
91
91
  "replicate": "^1.0.1",
92
92
  "yaml": "^2.8.1",
93
93
  "yargs": "^18.0.0",
@@ -97,7 +97,7 @@
97
97
  "devDependencies": {
98
98
  "@anatine/zod-mock": "^3.14.0",
99
99
  "@faker-js/faker": "^9.9.0",
100
- "@receptron/test_utils": "^2.0.0",
100
+ "@receptron/test_utils": "^2.0.1",
101
101
  "@types/fluent-ffmpeg": "^2.1.26",
102
102
  "@types/yargs": "^17.0.33",
103
103
  "eslint": "^9.33.0",
@@ -106,9 +106,9 @@
106
106
  "eslint-plugin-sonarjs": "^3.0.4",
107
107
  "prettier": "^3.6.2",
108
108
  "ts-node": "^10.9.2",
109
- "tsx": "^4.20.3",
109
+ "tsx": "^4.20.4",
110
110
  "typescript": "^5.9.2",
111
- "typescript-eslint": "^8.39.0"
111
+ "typescript-eslint": "^8.39.1"
112
112
  },
113
113
  "engines": {
114
114
  "node": ">=18.0.0"
@@ -4,6 +4,7 @@
4
4
  "credit": "closing"
5
5
  },
6
6
  "title": "[TITLE: Brief, engaging title for the topic]",
7
+ "lang": "en",
7
8
  "beats": [
8
9
  {
9
10
  "imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"
@@ -1,28 +0,0 @@
1
- {
2
- "title": "Ghibli comic image-only",
3
- "description": "Template for Ghibli-style image-only comic presentation.",
4
- "systemPrompt": "Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
5
- "presentationStyle": {
6
- "$mulmocast": {
7
- "version": "1.1",
8
- "credit": "closing"
9
- },
10
- "canvasSize": {
11
- "width": 1536,
12
- "height": 1024
13
- },
14
- "imageParams": {
15
- "style": "<style>Ghibli style</style>",
16
- "images": {
17
- "presenter": {
18
- "type": "image",
19
- "source": {
20
- "kind": "url",
21
- "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
22
- }
23
- }
24
- }
25
- }
26
- },
27
- "scriptName": "image_prompt_only_template.json"
28
- }