npm - mulmocast - Versions diffs - 1.2.2 → 1.2.4 - Mend

mulmocast 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/assets/templates/ani.json +8 -3
package/assets/templates/html.json +0 -1
package/lib/actions/audio.js +1 -0
package/lib/actions/captions.js +2 -2
package/lib/actions/image_agents.js +3 -3
package/lib/actions/images.js +5 -0
package/lib/actions/translate.js +2 -2
package/lib/agents/image_genai_agent.js +1 -1
package/lib/agents/image_openai_agent.js +3 -0
package/lib/agents/lipsync_replicate_agent.js +1 -1
package/lib/agents/movie_genai_agent.js +1 -1
package/lib/agents/movie_replicate_agent.js +1 -1
package/lib/agents/sound_effect_replicate_agent.js +1 -1
package/lib/agents/tts_elevenlabs_agent.js +1 -1
package/lib/agents/tts_nijivoice_agent.js +10 -6
package/lib/agents/tts_openai_agent.js +3 -0
package/lib/data/bgmAssets.d.ts +18 -0
package/lib/data/bgmAssets.js +101 -0
package/lib/data/index.d.ts +1 -0
package/lib/data/index.js +1 -0
package/lib/data/promptTemplates.d.ts +13 -74
package/lib/data/promptTemplates.js +7 -110
package/lib/data/scriptTemplates.d.ts +1 -1
package/lib/data/scriptTemplates.js +1 -0
package/lib/data/templateDataSet.d.ts +0 -2
package/lib/data/templateDataSet.js +1 -9
package/lib/methods/mulmo_studio_context.d.ts +1 -1
package/lib/methods/mulmo_studio_context.js +9 -8
package/lib/types/schema.d.ts +45 -45
package/lib/types/schema.js +9 -9
package/lib/types/type.d.ts +1 -1
package/lib/utils/filters.js +3 -3
package/lib/utils/provider2agent.d.ts +7 -2
package/lib/utils/provider2agent.js +21 -2
package/package.json +11 -11
package/scripts/templates/image_prompt_only_template.json +1 -0
package/assets/templates/ghibli_image_only.json +0 -28
package/assets/templates/ghibli_shorts.json +0 -33
package/scripts/test/test_hello_caption.json~ +0 -21
package/scripts/test/test_hello_image.json~ +0 -18

package/lib/data/promptTemplates.js CHANGED Viewed

@@ -100,6 +100,12 @@ export const promptTemplates = [
             speechParams: {
                 speakers: {
                     Presenter: {
+                        lang: {
+                            ja: {
+                                provider: "nijivoice",
+                                voiceId: "9d9ed276-49ee-443a-bc19-26e6136d05f0",
+                            },
+                        },
                         speechOptions: {
                             instruction: "Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl.",
                         },
@@ -110,7 +116,7 @@ export const promptTemplates = [
         },
         scriptName: "image_prompts_template.json",
         systemPrompt: "言葉づかいは思いっきりツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.",
-        title: "Presentation with Ani",
+        title: "Presentation by Ani",
     },
     {
         description: "Template for business presentation.",
@@ -370,115 +376,6 @@ export const promptTemplates = [
         systemPrompt: "Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
         title: "Ghibli comic style",
     },
-    {
-        description: "Template for Ghibli-style image-only comic presentation.",
-        filename: "ghibli_image_only",
-        presentationStyle: {
-            $mulmocast: {
-                credit: "closing",
-                version: "1.1",
-            },
-            audioParams: {
-                audioVolume: 1,
-                bgmVolume: 0.2,
-                closingPadding: 0.8,
-                introPadding: 1,
-                outroPadding: 1,
-                padding: 0.3,
-                suppressSpeech: false,
-            },
-            canvasSize: {
-                height: 1024,
-                width: 1536,
-            },
-            imageParams: {
-                images: {
-                    presenter: {
-                        source: {
-                            kind: "url",
-                            url: "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png",
-                        },
-                        type: "image",
-                    },
-                },
-                style: "<style>Ghibli style</style>",
-            },
-            movieParams: {
-                provider: "replicate",
-            },
-            soundEffectParams: {
-                provider: "replicate",
-            },
-            speechParams: {
-                speakers: {
-                    Presenter: {
-                        displayName: {
-                            en: "Presenter",
-                        },
-                        voiceId: "shimmer",
-                    },
-                },
-            },
-        },
-        scriptName: "image_prompt_only_template.json",
-        systemPrompt: "Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
-        title: "Ghibli comic image-only",
-    },
-    {
-        description: "Template for Ghibli-style comic presentation.",
-        filename: "ghibli_shorts",
-        presentationStyle: {
-            $mulmocast: {
-                credit: "closing",
-                version: "1.1",
-            },
-            audioParams: {
-                audioVolume: 1,
-                bgmVolume: 0.2,
-                closingPadding: 0.8,
-                introPadding: 1,
-                outroPadding: 1,
-                padding: 0.3,
-                suppressSpeech: false,
-            },
-            canvasSize: {
-                height: 1536,
-                width: 1024,
-            },
-            imageParams: {
-                images: {
-                    presenter: {
-                        source: {
-                            kind: "url",
-                            url: "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg",
-                        },
-                        type: "image",
-                    },
-                },
-                style: "<style>Ghibli style</style>",
-            },
-            movieParams: {
-                provider: "replicate",
-            },
-            soundEffectParams: {
-                provider: "replicate",
-            },
-            speechParams: {
-                speakers: {
-                    Presenter: {
-                        provider: "nijivoice",
-                        speechOptions: {
-                            speed: 1.5,
-                        },
-                        voiceId: "3708ad43-cace-486c-a4ca-8fe41186e20c",
-                    },
-                },
-            },
-        },
-        scriptName: "image_prompts_template.json",
-        systemPrompt: "This script is for YouTube shorts. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.",
-        title: "Ghibli style for YouTube Shorts",
-    },
     {
         description: "Template for Ghost in the shell style comic presentation.",
         filename: "ghost_comic",

package/lib/data/scriptTemplates.d.ts CHANGED Viewed

@@ -390,8 +390,8 @@ export declare const scriptTemplates: ({
         imagePrompt: string;
     }[];
     filename: string;
+    lang: string;
     title: string;
-    lang?: undefined;
     references?: undefined;
     htmlImageParams?: undefined;
     imageParams?: undefined;

package/lib/data/scriptTemplates.js CHANGED Viewed

@@ -523,6 +523,7 @@ export const scriptTemplates = [
             },
         ],
         filename: "image_prompt_only_template",
+        lang: "en",
         title: "[TITLE: Brief, engaging title for the topic]",
     },
     {

package/lib/data/templateDataSet.d.ts CHANGED Viewed

@@ -8,8 +8,6 @@ export declare const templateDataSet: {
     comic_strips: string;
     drslump_comic: string;
     ghibli_comic: string;
-    ghibli_image_only: string;
-    ghibli_shorts: string;
     ghost_comic: string;
     html: string;
     onepiece_comic: string;

package/lib/data/templateDataSet.js CHANGED Viewed

@@ -5,7 +5,7 @@ export const templateDataSet = {
         "```",
     ani: "言葉づかいは思いっきりツンデレにして。Another AI will generate comic for each beat based on the image prompt of that beat. You don't need to specify the style of the image, just describe the scene. Mention the reference in one of beats, if it exists. Use the JSON below as a template. Create appropriate amount of beats, and make sure the beats are coherent and flow well.\n" +
         "```JSON\n" +
-        `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"movieParams":{"provider":"replicate","model":"bytedance/seedance-1-lite"},"speechParams":{"provider":"openai","speakers":{"Presenter":{"voiceId":"shimmer","speechOptions":{"instruction":"Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl."}}}},"audioParams":{"bgm":{"kind":"url","url":"https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"}},"canvasSize":{"width":1024,"height":1536},"imageParams":{"style":"<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>","images":{"ani":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"}}}}}\n` +
+        `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"movieParams":{"provider":"replicate","model":"bytedance/seedance-1-lite"},"speechParams":{"provider":"openai","speakers":{"Presenter":{"voiceId":"shimmer","speechOptions":{"instruction":"Speak in a slightly high-pitched, curt tone with sudden flustered shifts—like a tsundere anime girl."},"lang":{"ja":{"provider":"nijivoice","voiceId":"9d9ed276-49ee-443a-bc19-26e6136d05f0"}}}}},"audioParams":{"bgm":{"kind":"url","url":"https://github.com/receptron/mulmocast-media/raw/refs/heads/main/bgms/morning001.mp3"}},"canvasSize":{"width":1024,"height":1536},"imageParams":{"style":"<style>A highly polished 2D digital illustration in anime and manga style, featuring clean linework, soft shading, vivid colors, and expressive facial detailing. The composition emphasizes clarity and visual impact with a minimalistic background and a strong character focus. The lighting is even and bright, giving the image a crisp and energetic feel, reminiscent of high-quality character art used in Japanese visual novels or mobile games.</style>","images":{"ani":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ani.png"}}}}}\n` +
         "```",
     business: "Use textSlides, markdown, mermaid, or chart to show slides. Extract image links in the article (from <img> tag) to reuse them in the presentation. Mention the reference in one of beats, if it exists. Use the JSON below as a template. chartData is the data for Chart.js\n" +
         "```JSON\n" +
@@ -35,14 +35,6 @@ export const templateDataSet = {
         "```JSON\n" +
         `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"canvasSize":{"width":1536,"height":1024},"imageParams":{"style":"<style>Ghibli style</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"}}}}}\n` +
         "```",
-    ghibli_image_only: "Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.\n" +
-        "```JSON\n" +
-        '{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","beats":[{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"canvasSize":{"width":1536,"height":1024},"imageParams":{"style":"<style>Ghibli style</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"}}}}}\n' +
-        "```",
-    ghibli_shorts: "This script is for YouTube shorts. Another AI will generate comic strips for each beat based on the text description of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.\n" +
-        "```JSON\n" +
-        `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"canvasSize":{"width":1024,"height":1536},"speechParams":{"speakers":{"Presenter":{"provider":"nijivoice","voiceId":"3708ad43-cace-486c-a4ca-8fe41186e20c","speechOptions":{"speed":1.5}}}},"imageParams":{"style":"<style>Ghibli style</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.jpg"}}}}}\n` +
-        "```",
     ghost_comic: "Another AI will generate images for each beat based on the image prompt of that beat. Mention the reference in one of beats, if it exists. Use the JSON below as a template.\n" +
         "```JSON\n" +
         `{"$mulmocast":{"version":"1.1","credit":"closing"},"title":"[TITLE: Brief, engaging title for the topic]","lang":"en","references":[{"url":"[SOURCE_URL: URL of the source material]","title":"[SOURCE_TITLE: Title of the referenced article, or paper]","type":"[SOURCE_TYPE: article, paper]"}],"beats":[{"text":"[OPENING_BEAT: Introduce the topic with a hook. Reference the source material and set up why this topic matters. Usually 2-3 sentences that grab attention and provide context.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[MAIN_CONCEPT: Define or explain the core concept/idea. This should be the central focus of your narrative. Keep it clear and accessible.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_1: Additional context, examples, or elaboration that helps illustrate the main concept. This could include how it works, why it's important, or real-world applications.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[SUPPORTING_DETAIL_2: Continue with more examples, deeper explanation, or different aspects of the topic if needed.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[ADDITIONAL_BEATS: Add more beats as necessary to fully explore the topic. Complex topics may require 6-10+ beats to cover adequately. Each beat should advance the narrative or provide valuable information.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"},{"text":"[CONCLUSION/IMPACT: Wrap up with the significance, implications, or key takeaway. Help the audience understand why this matters to them.]","imagePrompt":"[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"}],"canvasSize":{"width":1536,"height":1024},"imageParams":{"style":"<style>Ghost in the shell aesthetic.</style>","images":{"presenter":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghost_presenter.png"}},"optimus":{"type":"image","source":{"kind":"url","url":"https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/optimus.png"}}}}}\n` +

package/lib/methods/mulmo_studio_context.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export declare const MulmoStudioContextMethods: {
     getFileName(context: MulmoStudioContext): string;
     getCaption(context: MulmoStudioContext): string | undefined;
     setSessionState(context: MulmoStudioContext, sessionType: SessionType, value: boolean): void;
-    setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType, index: number, value: boolean): void;
+    setBeatSessionState(context: MulmoStudioContext, sessionType: BeatSessionType, index: number, id: string | undefined, value: boolean): void;
     needTranslate(context: MulmoStudioContext, includeCaption?: boolean): boolean | "" | undefined;
     getIntroPadding(context: MulmoStudioContext): number;
 };

package/lib/methods/mulmo_studio_context.js CHANGED Viewed

@@ -15,12 +15,12 @@ const notifyStateChange = (context, sessionType) => {
         callback({ kind: "session", sessionType, inSession });
     }
 };
-const notifyBeatStateChange = (context, sessionType, index) => {
-    const inSession = context.sessionState.inBeatSession[sessionType][index] ?? false;
+const notifyBeatStateChange = (context, sessionType, id) => {
+    const inSession = context.sessionState.inBeatSession[sessionType][id] ?? false;
     const prefix = inSession ? "{" : " }";
-    GraphAILogger.info(`${prefix} ${sessionType} ${index}`);
+    GraphAILogger.info(`${prefix} ${sessionType} ${id}`);
     for (const callback of sessionProgressCallbacks) {
-        callback({ kind: "beat", sessionType, index, inSession });
+        callback({ kind: "beat", sessionType, id, inSession });
     }
 };
 export const MulmoStudioContextMethods = {
@@ -50,18 +50,19 @@ export const MulmoStudioContextMethods = {
         context.sessionState.inSession[sessionType] = value;
         notifyStateChange(context, sessionType);
     },
-    setBeatSessionState(context, sessionType, index, value) {
+    setBeatSessionState(context, sessionType, index, id, value) {
+        const key = id ?? `__index__${index}`;
         if (value) {
             if (!context.sessionState.inBeatSession[sessionType]) {
                 context.sessionState.inBeatSession[sessionType] = {};
             }
-            context.sessionState.inBeatSession[sessionType][index] = true;
+            context.sessionState.inBeatSession[sessionType][key] = true;
         }
         else {
             // NOTE: Setting to false causes the parse error in rebuildStudio in preprocess.ts
-            delete context.sessionState.inBeatSession[sessionType][index];
+            delete context.sessionState.inBeatSession[sessionType][key];
         }
-        notifyBeatStateChange(context, sessionType, index);
+        notifyBeatStateChange(context, sessionType, key);
     },
     needTranslate(context, includeCaption = false) {
         // context.studio.script.lang = defaultLang, context.lang = targetLanguage.

package/lib/types/schema.d.ts CHANGED Viewed

@@ -5919,35 +5919,35 @@ export declare const mulmoSessionStateSchema: z.ZodObject<{
         caption: boolean;
     }>;
     inBeatSession: z.ZodObject<{
-        audio: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
-        image: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
-        movie: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
-        multiLingual: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
-        caption: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
-        html: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
-        imageReference: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
-        soundEffect: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
-        lipSync: z.ZodRecord<z.ZodNumber, z.ZodBoolean>;
+        audio: z.ZodRecord<z.ZodString, z.ZodBoolean>;
+        image: z.ZodRecord<z.ZodString, z.ZodBoolean>;
+        movie: z.ZodRecord<z.ZodString, z.ZodBoolean>;
+        multiLingual: z.ZodRecord<z.ZodString, z.ZodBoolean>;
+        caption: z.ZodRecord<z.ZodString, z.ZodBoolean>;
+        html: z.ZodRecord<z.ZodString, z.ZodBoolean>;
+        imageReference: z.ZodRecord<z.ZodString, z.ZodBoolean>;
+        soundEffect: z.ZodRecord<z.ZodString, z.ZodBoolean>;
+        lipSync: z.ZodRecord<z.ZodString, z.ZodBoolean>;
     }, "strip", z.ZodTypeAny, {
-        image: Record<number, boolean>;
-        audio: Record<number, boolean>;
-        movie: Record<number, boolean>;
-        html: Record<number, boolean>;
-        multiLingual: Record<number, boolean>;
-        caption: Record<number, boolean>;
-        imageReference: Record<number, boolean>;
-        soundEffect: Record<number, boolean>;
-        lipSync: Record<number, boolean>;
+        image: Record<string, boolean>;
+        audio: Record<string, boolean>;
+        movie: Record<string, boolean>;
+        html: Record<string, boolean>;
+        multiLingual: Record<string, boolean>;
+        caption: Record<string, boolean>;
+        imageReference: Record<string, boolean>;
+        soundEffect: Record<string, boolean>;
+        lipSync: Record<string, boolean>;
     }, {
-        image: Record<number, boolean>;
-        audio: Record<number, boolean>;
-        movie: Record<number, boolean>;
-        html: Record<number, boolean>;
-        multiLingual: Record<number, boolean>;
-        caption: Record<number, boolean>;
-        imageReference: Record<number, boolean>;
-        soundEffect: Record<number, boolean>;
-        lipSync: Record<number, boolean>;
+        image: Record<string, boolean>;
+        audio: Record<string, boolean>;
+        movie: Record<string, boolean>;
+        html: Record<string, boolean>;
+        multiLingual: Record<string, boolean>;
+        caption: Record<string, boolean>;
+        imageReference: Record<string, boolean>;
+        soundEffect: Record<string, boolean>;
+        lipSync: Record<string, boolean>;
     }>;
 }, "strip", z.ZodTypeAny, {
     inSession: {
@@ -5959,15 +5959,15 @@ export declare const mulmoSessionStateSchema: z.ZodObject<{
         caption: boolean;
     };
     inBeatSession: {
-        image: Record<number, boolean>;
-        audio: Record<number, boolean>;
-        movie: Record<number, boolean>;
-        html: Record<number, boolean>;
-        multiLingual: Record<number, boolean>;
-        caption: Record<number, boolean>;
-        imageReference: Record<number, boolean>;
-        soundEffect: Record<number, boolean>;
-        lipSync: Record<number, boolean>;
+        image: Record<string, boolean>;
+        audio: Record<string, boolean>;
+        movie: Record<string, boolean>;
+        html: Record<string, boolean>;
+        multiLingual: Record<string, boolean>;
+        caption: Record<string, boolean>;
+        imageReference: Record<string, boolean>;
+        soundEffect: Record<string, boolean>;
+        lipSync: Record<string, boolean>;
     };
 }, {
     inSession: {
@@ -5979,15 +5979,15 @@ export declare const mulmoSessionStateSchema: z.ZodObject<{
         caption: boolean;
     };
     inBeatSession: {
-        image: Record<number, boolean>;
-        audio: Record<number, boolean>;
-        movie: Record<number, boolean>;
-        html: Record<number, boolean>;
-        multiLingual: Record<number, boolean>;
-        caption: Record<number, boolean>;
-        imageReference: Record<number, boolean>;
-        soundEffect: Record<number, boolean>;
-        lipSync: Record<number, boolean>;
+        image: Record<string, boolean>;
+        audio: Record<string, boolean>;
+        movie: Record<string, boolean>;
+        html: Record<string, boolean>;
+        multiLingual: Record<string, boolean>;
+        caption: Record<string, boolean>;
+        imageReference: Record<string, boolean>;
+        soundEffect: Record<string, boolean>;
+        lipSync: Record<string, boolean>;
     };
 }>;
 export declare const mulmoStudioSchema: z.ZodObject<{

package/lib/types/schema.js CHANGED Viewed

@@ -417,15 +417,15 @@ export const mulmoSessionStateSchema = z.object({
         pdf: z.boolean(),
     }),
     inBeatSession: z.object({
-        audio: z.record(z.number().int(), z.boolean()),
-        image: z.record(z.number().int(), z.boolean()),
-        movie: z.record(z.number().int(), z.boolean()),
-        multiLingual: z.record(z.number().int(), z.boolean()),
-        caption: z.record(z.number().int(), z.boolean()),
-        html: z.record(z.number().int(), z.boolean()),
-        imageReference: z.record(z.number().int(), z.boolean()),
-        soundEffect: z.record(z.number().int(), z.boolean()),
-        lipSync: z.record(z.number().int(), z.boolean()),
+        audio: z.record(z.string(), z.boolean()),
+        image: z.record(z.string(), z.boolean()),
+        movie: z.record(z.string(), z.boolean()),
+        multiLingual: z.record(z.string(), z.boolean()),
+        caption: z.record(z.string(), z.boolean()),
+        html: z.record(z.string(), z.boolean()),
+        imageReference: z.record(z.string(), z.boolean()),
+        soundEffect: z.record(z.string(), z.boolean()),
+        lipSync: z.record(z.string(), z.boolean()),
     }),
 });
 export const mulmoStudioSchema = z

package/lib/types/type.d.ts CHANGED Viewed

@@ -100,7 +100,7 @@ export type SessionProgressEvent = {
 } | {
     kind: "beat";
     sessionType: BeatSessionType;
-    index: number;
+    id: string;
     inSession: boolean;
 };
 export type SessionProgressCallback = (change: SessionProgressEvent) => void;

package/lib/utils/filters.js CHANGED Viewed

@@ -15,7 +15,7 @@ export const nijovoiceTextAgentFilter = async (context, next) => {
     return next(context);
 };
 export const fileCacheAgentFilter = async (context, next) => {
-    const { force, file, index, mulmoContext, sessionType } = context.namedInputs.cache;
+    const { force, file, index, mulmoContext, sessionType, id } = context.namedInputs.cache;
     const shouldUseCache = async () => {
         if (force && force.some((element) => element)) {
             return false;
@@ -33,7 +33,7 @@ export const fileCacheAgentFilter = async (context, next) => {
         return true;
     }
     try {
-        MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, true);
+        MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, id, true);
         const output = (await next(context)) || undefined;
         const { buffer, text, saved } = output ?? {};
         if (saved) {
@@ -56,7 +56,7 @@ export const fileCacheAgentFilter = async (context, next) => {
         return false;
     }
     finally {
-        MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, false);
+        MulmoStudioContextMethods.setBeatSessionState(mulmoContext, sessionType, index, id, false);
     }
 };
 export const browserlessCacheGenerator = (cacheDir) => {

package/lib/utils/provider2agent.d.ts CHANGED Viewed

@@ -95,26 +95,31 @@ export declare const provider2LLMAgent: {
         readonly agentName: "openAIAgent";
         readonly defaultModel: "gpt-5";
         readonly max_tokens: 8192;
+        readonly models: readonly ["gpt-5", "gpt-5-nano", "gpt-5-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano", "o3", "o3-mini", "o3-pro", "o1", "o1-pro", "gpt-4o", "gpt-4o-mini"];
     };
     readonly anthropic: {
         readonly agentName: "anthropicAgent";
         readonly defaultModel: "claude-3-7-sonnet-20250219";
         readonly max_tokens: 8192;
+        readonly models: readonly ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"];
     };
     readonly gemini: {
         readonly agentName: "geminiAgent";
-        readonly defaultModel: "gemini-1.5-flash";
+        readonly defaultModel: "gemini-2.5-flash";
         readonly max_tokens: 8192;
+        readonly models: readonly ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"];
     };
     readonly groq: {
         readonly agentName: "groqAgent";
-        readonly defaultModel: "llama3-8b-8192";
+        readonly defaultModel: "llama-3.1-8b-instant";
         readonly max_tokens: 4096;
+        readonly models: readonly ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"];
     };
     readonly mock: {
         readonly agentName: "mediaMockAgent";
         readonly defaultModel: "mock";
         readonly max_tokens: 4096;
+        readonly models: readonly ["mock"];
     };
 };
 export declare const defaultProviders: {

package/lib/utils/provider2agent.js CHANGED Viewed

@@ -208,26 +208,45 @@ export const provider2LLMAgent = {
         agentName: "openAIAgent",
         defaultModel: "gpt-5",
         max_tokens: 8192,
+        models: [
+            "gpt-5",
+            "gpt-5-nano",
+            "gpt-5-mini",
+            "gpt-4.1",
+            "gpt-4.1-mini",
+            "gpt-4.1-nano",
+            "o3",
+            "o3-mini",
+            "o3-pro",
+            "o1",
+            "o1-pro",
+            "gpt-4o",
+            "gpt-4o-mini",
+        ],
     },
     anthropic: {
         agentName: "anthropicAgent",
         defaultModel: "claude-3-7-sonnet-20250219",
         max_tokens: 8192,
+        models: ["claude-opus-4-1-20250805", "claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-3-7-sonnet-20250219", "claude-3-haiku-20240307"],
     },
     gemini: {
         agentName: "geminiAgent",
-        defaultModel: "gemini-1.5-flash",
+        defaultModel: "gemini-2.5-flash",
         max_tokens: 8192,
+        models: ["gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.0-flash"],
     },
     groq: {
         agentName: "groqAgent",
-        defaultModel: "llama3-8b-8192",
+        defaultModel: "llama-3.1-8b-instant",
         max_tokens: 4096,
+        models: ["llama-3.1-8b-instant", "llama-3.3-70b-versatile", "deepseek-r1-distill-llama-70b", "openai/gpt-oss-120b", "openai/gpt-oss-20b"],
     },
     mock: {
         agentName: "mediaMockAgent",
         defaultModel: "mock",
         max_tokens: 4096,
+        models: ["mock"],
     },
 };
 export const defaultProviders = {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mulmocast",
-  "version": "1.2.2",
+  "version": "1.2.4",
   "description": "",
   "type": "module",
   "main": "lib/index.node.js",
@@ -67,14 +67,14 @@
   "dependencies": {
     "@google-cloud/text-to-speech": "^6.2.0",
     "@google/genai": "^1.13.0",
-    "@graphai/anthropic_agent": "^2.0.5",
+    "@graphai/anthropic_agent": "^2.0.9",
     "@graphai/browserless_agent": "^2.0.1",
-    "@graphai/gemini_agent": "^2.0.0",
-    "@graphai/groq_agent": "^2.0.0",
+    "@graphai/gemini_agent": "^2.0.1",
+    "@graphai/groq_agent": "^2.0.2",
     "@graphai/input_agents": "^1.0.2",
-    "@graphai/openai_agent": "^2.0.3",
+    "@graphai/openai_agent": "^2.0.4",
     "@graphai/stream_agent_filter": "^2.0.2",
-    "@graphai/vanilla": "^2.0.6",
+    "@graphai/vanilla": "^2.0.10",
     "@graphai/vanilla_node_agents": "^2.0.1",
     "@inquirer/input": "^4.2.1",
     "@inquirer/select": "^4.3.1",
@@ -84,10 +84,10 @@
     "clipboardy": "^4.0.0",
     "dotenv": "^17.2.1",
     "fluent-ffmpeg": "^2.1.3",
-    "graphai": "^2.0.13",
+    "graphai": "^2.0.14",
     "marked": "^16.1.2",
     "ora": "^8.2.0",
-    "puppeteer": "^24.16.0",
+    "puppeteer": "^24.16.2",
     "replicate": "^1.0.1",
     "yaml": "^2.8.1",
     "yargs": "^18.0.0",
@@ -97,7 +97,7 @@
   "devDependencies": {
     "@anatine/zod-mock": "^3.14.0",
     "@faker-js/faker": "^9.9.0",
-    "@receptron/test_utils": "^2.0.0",
+    "@receptron/test_utils": "^2.0.1",
     "@types/fluent-ffmpeg": "^2.1.26",
     "@types/yargs": "^17.0.33",
     "eslint": "^9.33.0",
@@ -106,9 +106,9 @@
     "eslint-plugin-sonarjs": "^3.0.4",
     "prettier": "^3.6.2",
     "ts-node": "^10.9.2",
-    "tsx": "^4.20.3",
+    "tsx": "^4.20.4",
     "typescript": "^5.9.2",
-    "typescript-eslint": "^8.39.0"
+    "typescript-eslint": "^8.39.1"
   },
   "engines": {
     "node": ">=18.0.0"

package/scripts/templates/image_prompt_only_template.json CHANGED Viewed

@@ -4,6 +4,7 @@
     "credit": "closing"
   },
   "title": "[TITLE: Brief, engaging title for the topic]",
+  "lang": "en",
   "beats": [
     {
       "imagePrompt": "[IMAGE_PROMPT: A prompt for the image to be generated for this beat.]"

package/assets/templates/ghibli_image_only.json DELETED Viewed

@@ -1,28 +0,0 @@
-{
-  "title": "Ghibli comic image-only",
-  "description": "Template for Ghibli-style image-only comic presentation.",
-  "systemPrompt": "Another AI will generate an image for each beat based on the text description of that beat. Use the JSON below as a template.",
-  "presentationStyle": {
-    "$mulmocast": {
-      "version": "1.1",
-      "credit": "closing"
-    },
-    "canvasSize": {
-      "width": 1536,
-      "height": 1024
-    },
-    "imageParams": {
-      "style": "<style>Ghibli style</style>",
-      "images": {
-        "presenter": {
-          "type": "image",
-          "source": {
-            "kind": "url",
-            "url": "https://raw.githubusercontent.com/receptron/mulmocast-media/refs/heads/main/characters/ghibli_presenter.png"
-          }
-        }
-      }
-    }
-  },
-  "scriptName": "image_prompt_only_template.json"
-}