mulmocast 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/assets/templates/ghibli_comic_strips.json +1 -1
- package/lib/actions/audio.js +2 -1
- package/lib/actions/bundle.js +5 -2
- package/lib/agents/image_genai_agent.js +1 -1
- package/lib/agents/image_openai_agent.js +2 -2
- package/lib/agents/index.d.ts +2 -1
- package/lib/agents/index.js +2 -1
- package/lib/agents/movie_genai_agent.js +1 -1
- package/lib/agents/tts_gemini_agent.d.ts +5 -0
- package/lib/agents/tts_gemini_agent.js +64 -0
- package/lib/cli/commands/tool/scripting/builder.d.ts +1 -1
- package/lib/cli/commands/tool/story_to_script/builder.d.ts +1 -1
- package/lib/data/promptTemplates.d.ts +40 -10
- package/lib/data/promptTemplates.js +55 -1
- package/lib/data/templateDataSet.js +1 -1
- package/lib/methods/mulmo_presentation_style.js +1 -1
- package/lib/types/schema.d.ts +30 -30
- package/lib/types/schema.js +4 -2
- package/lib/types/type.d.ts +2 -0
- package/lib/utils/context.d.ts +33 -33
- package/lib/utils/ffmpeg_utils.d.ts +1 -0
- package/lib/utils/ffmpeg_utils.js +23 -0
- package/lib/utils/provider2agent.d.ts +10 -1
- package/lib/utils/provider2agent.js +17 -8
- package/lib/utils/utils.js +3 -0
- package/package.json +7 -7
- package/scripts/test/gpt.json +6 -0
- package/scripts/test/image-2.png +0 -0
- package/scripts/test/test_audio_gemini.json +67 -0
- package/scripts/test/test_audio_gemini.json~ +67 -0
- package/scripts/test/test_genai.json +17 -9
- package/scripts/test/test_image_refs.json +1 -1
- package/scripts/test/test_lipsync2.json +66 -0
- package/scripts/test/test_lipsync2.json~ +24 -0
- package/scripts/test/test_replicate.json +6 -6
package/lib/types/schema.d.ts
CHANGED
|
@@ -453,9 +453,9 @@ export declare const htmlPromptParamsSchema: z.ZodObject<{
|
|
|
453
453
|
data: z.ZodOptional<z.ZodAny>;
|
|
454
454
|
images: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
|
|
455
455
|
}, z.core.$strict>;
|
|
456
|
-
export declare const text2MovieProviderSchema: z.
|
|
456
|
+
export declare const text2MovieProviderSchema: z.ZodEnum<{
|
|
457
457
|
[x: string]: string;
|
|
458
|
-
}
|
|
458
|
+
}>;
|
|
459
459
|
export declare const text2SoundEffectProviderSchema: z.ZodDefault<z.ZodEnum<{
|
|
460
460
|
[x: string]: string;
|
|
461
461
|
}>>;
|
|
@@ -601,9 +601,9 @@ export declare const mulmoBeatSchema: z.ZodObject<{
|
|
|
601
601
|
movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
602
602
|
}, z.core.$strict>>;
|
|
603
603
|
movieParams: z.ZodOptional<z.ZodObject<{
|
|
604
|
-
provider: z.ZodOptional<z.
|
|
604
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
605
605
|
[x: string]: string;
|
|
606
|
-
}
|
|
606
|
+
}>>;
|
|
607
607
|
model: z.ZodOptional<z.ZodString>;
|
|
608
608
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
609
609
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -681,9 +681,9 @@ export declare const mulmoTransitionSchema: z.ZodObject<{
|
|
|
681
681
|
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
682
682
|
}, z.core.$strip>;
|
|
683
683
|
export declare const mulmoMovieParamsSchema: z.ZodObject<{
|
|
684
|
-
provider: z.ZodOptional<z.
|
|
684
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
685
685
|
[x: string]: string;
|
|
686
|
-
}
|
|
686
|
+
}>>;
|
|
687
687
|
model: z.ZodOptional<z.ZodString>;
|
|
688
688
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
689
689
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -761,10 +761,10 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
|
|
|
761
761
|
prompt: z.ZodString;
|
|
762
762
|
}, z.core.$strict>]>>>;
|
|
763
763
|
}, z.core.$strict>>>;
|
|
764
|
-
movieParams: z.ZodOptional<z.ZodObject<{
|
|
765
|
-
provider: z.ZodOptional<z.
|
|
764
|
+
movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
765
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
766
766
|
[x: string]: string;
|
|
767
|
-
}
|
|
767
|
+
}>>;
|
|
768
768
|
model: z.ZodOptional<z.ZodString>;
|
|
769
769
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
770
770
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -779,7 +779,7 @@ export declare const mulmoPresentationStyleSchema: z.ZodObject<{
|
|
|
779
779
|
}>;
|
|
780
780
|
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
781
781
|
}, z.core.$strip>>;
|
|
782
|
-
}, z.core.$strict
|
|
782
|
+
}, z.core.$strict>>>;
|
|
783
783
|
soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
784
784
|
provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
785
785
|
[x: string]: string;
|
|
@@ -897,10 +897,10 @@ export declare const mulmoScriptSchema: z.ZodObject<{
|
|
|
897
897
|
prompt: z.ZodString;
|
|
898
898
|
}, z.core.$strict>]>>>;
|
|
899
899
|
}, z.core.$strict>>>;
|
|
900
|
-
movieParams: z.ZodOptional<z.ZodObject<{
|
|
901
|
-
provider: z.ZodOptional<z.
|
|
900
|
+
movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
901
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
902
902
|
[x: string]: string;
|
|
903
|
-
}
|
|
903
|
+
}>>;
|
|
904
904
|
model: z.ZodOptional<z.ZodString>;
|
|
905
905
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
906
906
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -915,7 +915,7 @@ export declare const mulmoScriptSchema: z.ZodObject<{
|
|
|
915
915
|
}>;
|
|
916
916
|
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
917
917
|
}, z.core.$strip>>;
|
|
918
|
-
}, z.core.$strict
|
|
918
|
+
}, z.core.$strict>>>;
|
|
919
919
|
soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
920
920
|
provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
921
921
|
[x: string]: string;
|
|
@@ -1105,9 +1105,9 @@ export declare const mulmoScriptSchema: z.ZodObject<{
|
|
|
1105
1105
|
movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
1106
1106
|
}, z.core.$strict>>;
|
|
1107
1107
|
movieParams: z.ZodOptional<z.ZodObject<{
|
|
1108
|
-
provider: z.ZodOptional<z.
|
|
1108
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
1109
1109
|
[x: string]: string;
|
|
1110
|
-
}
|
|
1110
|
+
}>>;
|
|
1111
1111
|
model: z.ZodOptional<z.ZodString>;
|
|
1112
1112
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
1113
1113
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -1309,10 +1309,10 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
1309
1309
|
prompt: z.ZodString;
|
|
1310
1310
|
}, z.core.$strict>]>>>;
|
|
1311
1311
|
}, z.core.$strict>>>;
|
|
1312
|
-
movieParams: z.ZodOptional<z.ZodObject<{
|
|
1313
|
-
provider: z.ZodOptional<z.
|
|
1312
|
+
movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
1313
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
1314
1314
|
[x: string]: string;
|
|
1315
|
-
}
|
|
1315
|
+
}>>;
|
|
1316
1316
|
model: z.ZodOptional<z.ZodString>;
|
|
1317
1317
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
1318
1318
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -1327,7 +1327,7 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
1327
1327
|
}>;
|
|
1328
1328
|
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
1329
1329
|
}, z.core.$strip>>;
|
|
1330
|
-
}, z.core.$strict
|
|
1330
|
+
}, z.core.$strict>>>;
|
|
1331
1331
|
soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
1332
1332
|
provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
1333
1333
|
[x: string]: string;
|
|
@@ -1517,9 +1517,9 @@ export declare const mulmoStudioSchema: z.ZodObject<{
|
|
|
1517
1517
|
movieVolume: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
1518
1518
|
}, z.core.$strict>>;
|
|
1519
1519
|
movieParams: z.ZodOptional<z.ZodObject<{
|
|
1520
|
-
provider: z.ZodOptional<z.
|
|
1520
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
1521
1521
|
[x: string]: string;
|
|
1522
|
-
}
|
|
1522
|
+
}>>;
|
|
1523
1523
|
model: z.ZodOptional<z.ZodString>;
|
|
1524
1524
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
1525
1525
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -1657,10 +1657,10 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
|
|
|
1657
1657
|
prompt: z.ZodString;
|
|
1658
1658
|
}, z.core.$strict>]>>>;
|
|
1659
1659
|
}, z.core.$strict>>>;
|
|
1660
|
-
movieParams: z.ZodOptional<z.ZodObject<{
|
|
1661
|
-
provider: z.ZodOptional<z.
|
|
1660
|
+
movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
1661
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
1662
1662
|
[x: string]: string;
|
|
1663
|
-
}
|
|
1663
|
+
}>>;
|
|
1664
1664
|
model: z.ZodOptional<z.ZodString>;
|
|
1665
1665
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
1666
1666
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -1675,7 +1675,7 @@ export declare const mulmoPromptTemplateSchema: z.ZodObject<{
|
|
|
1675
1675
|
}>;
|
|
1676
1676
|
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
1677
1677
|
}, z.core.$strip>>;
|
|
1678
|
-
}, z.core.$strict
|
|
1678
|
+
}, z.core.$strict>>>;
|
|
1679
1679
|
soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
1680
1680
|
provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
1681
1681
|
[x: string]: string;
|
|
@@ -1787,10 +1787,10 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
|
|
|
1787
1787
|
prompt: z.ZodString;
|
|
1788
1788
|
}, z.core.$strict>]>>>;
|
|
1789
1789
|
}, z.core.$strict>>>;
|
|
1790
|
-
movieParams: z.ZodOptional<z.ZodObject<{
|
|
1791
|
-
provider: z.ZodOptional<z.
|
|
1790
|
+
movieParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
1791
|
+
provider: z.ZodOptional<z.ZodEnum<{
|
|
1792
1792
|
[x: string]: string;
|
|
1793
|
-
}
|
|
1793
|
+
}>>;
|
|
1794
1794
|
model: z.ZodOptional<z.ZodString>;
|
|
1795
1795
|
fillOption: z.ZodOptional<z.ZodObject<{
|
|
1796
1796
|
style: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
|
|
@@ -1805,7 +1805,7 @@ export declare const mulmoPromptTemplateFileSchema: z.ZodObject<{
|
|
|
1805
1805
|
}>;
|
|
1806
1806
|
duration: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
1807
1807
|
}, z.core.$strip>>;
|
|
1808
|
-
}, z.core.$strict
|
|
1808
|
+
}, z.core.$strict>>>;
|
|
1809
1809
|
soundEffectParams: z.ZodDefault<z.ZodOptional<z.ZodObject<{
|
|
1810
1810
|
provider: z.ZodOptional<z.ZodDefault<z.ZodEnum<{
|
|
1811
1811
|
[x: string]: string;
|
package/lib/types/schema.js
CHANGED
|
@@ -268,7 +268,7 @@ export const htmlPromptParamsSchema = z
|
|
|
268
268
|
images: z.record(z.string(), z.any()).optional(),
|
|
269
269
|
})
|
|
270
270
|
.strict();
|
|
271
|
-
export const text2MovieProviderSchema = z.enum(Object.keys(provider2MovieAgent))
|
|
271
|
+
export const text2MovieProviderSchema = z.enum(Object.keys(provider2MovieAgent));
|
|
272
272
|
export const text2SoundEffectProviderSchema = z.enum(Object.keys(provider2SoundEffectAgent)).default(defaultProviders.soundEffect);
|
|
273
273
|
export const mulmoSoundEffectParamsSchema = z.object({
|
|
274
274
|
provider: text2SoundEffectProviderSchema.optional(),
|
|
@@ -359,7 +359,9 @@ export const mulmoPresentationStyleSchema = z.object({
|
|
|
359
359
|
provider: defaultProviders.text2image,
|
|
360
360
|
images: {},
|
|
361
361
|
}),
|
|
362
|
-
movieParams: mulmoMovieParamsSchema.optional()
|
|
362
|
+
movieParams: mulmoMovieParamsSchema.optional().default({
|
|
363
|
+
provider: defaultProviders.text2movie,
|
|
364
|
+
}),
|
|
363
365
|
soundEffectParams: mulmoSoundEffectParamsSchema.optional().default({
|
|
364
366
|
provider: defaultProviders.soundEffect,
|
|
365
367
|
}),
|
package/lib/types/type.d.ts
CHANGED
|
@@ -139,6 +139,8 @@ export type ImageType = "image" | "movie";
|
|
|
139
139
|
export type MulmoViewerBeat = {
|
|
140
140
|
text?: string;
|
|
141
141
|
duration?: number;
|
|
142
|
+
startTime?: number;
|
|
143
|
+
endTime?: number;
|
|
142
144
|
multiLinguals?: Record<string, string>;
|
|
143
145
|
audioSources?: Record<string, string>;
|
|
144
146
|
imageSource?: string;
|
package/lib/utils/context.d.ts
CHANGED
|
@@ -56,6 +56,17 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
|
|
|
56
56
|
prompt: string;
|
|
57
57
|
}> | undefined;
|
|
58
58
|
};
|
|
59
|
+
movieParams: {
|
|
60
|
+
provider?: string | undefined;
|
|
61
|
+
model?: string | undefined;
|
|
62
|
+
fillOption?: {
|
|
63
|
+
style: "aspectFit" | "aspectFill";
|
|
64
|
+
} | undefined;
|
|
65
|
+
transition?: {
|
|
66
|
+
type: "fade" | "slideout_left";
|
|
67
|
+
duration: number;
|
|
68
|
+
} | undefined;
|
|
69
|
+
};
|
|
59
70
|
soundEffectParams: {
|
|
60
71
|
provider?: string | undefined;
|
|
61
72
|
model?: string | undefined;
|
|
@@ -252,17 +263,6 @@ export declare const createStudioData: (_mulmoScript: MulmoScript, fileName: str
|
|
|
252
263
|
enableLipSync?: boolean | undefined;
|
|
253
264
|
hidden?: boolean | undefined;
|
|
254
265
|
}[];
|
|
255
|
-
movieParams?: {
|
|
256
|
-
provider?: string | undefined;
|
|
257
|
-
model?: string | undefined;
|
|
258
|
-
fillOption?: {
|
|
259
|
-
style: "aspectFit" | "aspectFill";
|
|
260
|
-
} | undefined;
|
|
261
|
-
transition?: {
|
|
262
|
-
type: "fade" | "slideout_left";
|
|
263
|
-
duration: number;
|
|
264
|
-
} | undefined;
|
|
265
|
-
} | undefined;
|
|
266
266
|
lipSyncParams?: {
|
|
267
267
|
provider?: string | undefined;
|
|
268
268
|
model?: string | undefined;
|
|
@@ -371,6 +371,17 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
371
371
|
prompt: string;
|
|
372
372
|
}> | undefined;
|
|
373
373
|
};
|
|
374
|
+
movieParams: {
|
|
375
|
+
provider?: string | undefined;
|
|
376
|
+
model?: string | undefined;
|
|
377
|
+
fillOption?: {
|
|
378
|
+
style: "aspectFit" | "aspectFill";
|
|
379
|
+
} | undefined;
|
|
380
|
+
transition?: {
|
|
381
|
+
type: "fade" | "slideout_left";
|
|
382
|
+
duration: number;
|
|
383
|
+
} | undefined;
|
|
384
|
+
};
|
|
374
385
|
soundEffectParams: {
|
|
375
386
|
provider?: string | undefined;
|
|
376
387
|
model?: string | undefined;
|
|
@@ -567,17 +578,6 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
567
578
|
enableLipSync?: boolean | undefined;
|
|
568
579
|
hidden?: boolean | undefined;
|
|
569
580
|
}[];
|
|
570
|
-
movieParams?: {
|
|
571
|
-
provider?: string | undefined;
|
|
572
|
-
model?: string | undefined;
|
|
573
|
-
fillOption?: {
|
|
574
|
-
style: "aspectFit" | "aspectFill";
|
|
575
|
-
} | undefined;
|
|
576
|
-
transition?: {
|
|
577
|
-
type: "fade" | "slideout_left";
|
|
578
|
-
duration: number;
|
|
579
|
-
} | undefined;
|
|
580
|
-
} | undefined;
|
|
581
581
|
lipSyncParams?: {
|
|
582
582
|
provider?: string | undefined;
|
|
583
583
|
model?: string | undefined;
|
|
@@ -693,6 +693,17 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
693
693
|
prompt: string;
|
|
694
694
|
}> | undefined;
|
|
695
695
|
};
|
|
696
|
+
movieParams: {
|
|
697
|
+
provider?: string | undefined;
|
|
698
|
+
model?: string | undefined;
|
|
699
|
+
fillOption?: {
|
|
700
|
+
style: "aspectFit" | "aspectFill";
|
|
701
|
+
} | undefined;
|
|
702
|
+
transition?: {
|
|
703
|
+
type: "fade" | "slideout_left";
|
|
704
|
+
duration: number;
|
|
705
|
+
} | undefined;
|
|
706
|
+
};
|
|
696
707
|
soundEffectParams: {
|
|
697
708
|
provider?: string | undefined;
|
|
698
709
|
model?: string | undefined;
|
|
@@ -716,17 +727,6 @@ export declare const initializeContextFromFiles: (files: FileObject, raiseError:
|
|
|
716
727
|
path: string;
|
|
717
728
|
} | undefined;
|
|
718
729
|
};
|
|
719
|
-
movieParams?: {
|
|
720
|
-
provider?: string | undefined;
|
|
721
|
-
model?: string | undefined;
|
|
722
|
-
fillOption?: {
|
|
723
|
-
style: "aspectFit" | "aspectFill";
|
|
724
|
-
} | undefined;
|
|
725
|
-
transition?: {
|
|
726
|
-
type: "fade" | "slideout_left";
|
|
727
|
-
duration: number;
|
|
728
|
-
} | undefined;
|
|
729
|
-
} | undefined;
|
|
730
730
|
lipSyncParams?: {
|
|
731
731
|
provider?: string | undefined;
|
|
732
732
|
model?: string | undefined;
|
|
@@ -18,3 +18,4 @@ export declare const ffmpegGetMediaDuration: (filePath: string) => Promise<{
|
|
|
18
18
|
export declare const extractImageFromMovie: (movieFile: string, imagePath: string) => Promise<object>;
|
|
19
19
|
export declare const trimMusic: (inputFile: string, startTime: number, duration: number) => Promise<Buffer>;
|
|
20
20
|
export declare const createSilentAudio: (filePath: string, durationSec: number) => Promise<void>;
|
|
21
|
+
export declare const pcmToMp3: (rawPcm: Buffer, sampleRate?: number) => Promise<Buffer>;
|
|
@@ -2,6 +2,7 @@ import ffmpeg from "fluent-ffmpeg";
|
|
|
2
2
|
import { GraphAILogger } from "graphai";
|
|
3
3
|
import { isFile } from "./file.js";
|
|
4
4
|
import fs from "fs";
|
|
5
|
+
import { Readable, PassThrough } from "node:stream";
|
|
5
6
|
export const setFfmpegPath = (ffmpegPath) => {
|
|
6
7
|
ffmpeg.setFfmpegPath(ffmpegPath);
|
|
7
8
|
};
|
|
@@ -144,3 +145,25 @@ export const createSilentAudio = (filePath, durationSec) => {
|
|
|
144
145
|
.run();
|
|
145
146
|
});
|
|
146
147
|
};
|
|
148
|
+
export const pcmToMp3 = (rawPcm, sampleRate = 24000) => {
|
|
149
|
+
return new Promise((resolve, reject) => {
|
|
150
|
+
const inputStream = new Readable({
|
|
151
|
+
read() {
|
|
152
|
+
this.push(rawPcm);
|
|
153
|
+
this.push(null);
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
const outputChunks = [];
|
|
157
|
+
const outputStream = new PassThrough();
|
|
158
|
+
outputStream.on("data", (chunk) => outputChunks.push(chunk));
|
|
159
|
+
outputStream.on("end", () => resolve(Buffer.concat(outputChunks)));
|
|
160
|
+
outputStream.on("error", reject);
|
|
161
|
+
ffmpeg(inputStream)
|
|
162
|
+
.inputFormat("s16le")
|
|
163
|
+
.inputOptions([`-ar ${sampleRate}`, "-ac 1"])
|
|
164
|
+
.audioCodec("libmp3lame")
|
|
165
|
+
.format("mp3")
|
|
166
|
+
.on("error", reject)
|
|
167
|
+
.pipe(outputStream);
|
|
168
|
+
});
|
|
169
|
+
};
|
|
@@ -16,6 +16,12 @@ export declare const provider2TTSAgent: {
|
|
|
16
16
|
hasLimitedConcurrency: boolean;
|
|
17
17
|
keyName: string;
|
|
18
18
|
};
|
|
19
|
+
gemini: {
|
|
20
|
+
agentName: string;
|
|
21
|
+
hasLimitedConcurrency: boolean;
|
|
22
|
+
defaultVoice: string;
|
|
23
|
+
keyName: string;
|
|
24
|
+
};
|
|
19
25
|
elevenlabs: {
|
|
20
26
|
agentName: string;
|
|
21
27
|
hasLimitedConcurrency: boolean;
|
|
@@ -76,7 +82,10 @@ export declare const provider2MovieAgent: {
|
|
|
76
82
|
models: string[];
|
|
77
83
|
keyName: string;
|
|
78
84
|
modelParams: {
|
|
79
|
-
"veo-3.
|
|
85
|
+
"veo-3.1-generate-preview": {
|
|
86
|
+
durations: number[];
|
|
87
|
+
};
|
|
88
|
+
"veo-3.0-generate-001": {
|
|
80
89
|
durations: number[];
|
|
81
90
|
};
|
|
82
91
|
"veo-2.0-generate-001": {
|
|
@@ -17,6 +17,12 @@ export const provider2TTSAgent = {
|
|
|
17
17
|
hasLimitedConcurrency: false,
|
|
18
18
|
keyName: "GEMINI_API_KEY",
|
|
19
19
|
},
|
|
20
|
+
gemini: {
|
|
21
|
+
agentName: "ttsGeminiAgent",
|
|
22
|
+
hasLimitedConcurrency: false,
|
|
23
|
+
defaultVoice: "Kore",
|
|
24
|
+
keyName: "GEMINI_API_KEY",
|
|
25
|
+
},
|
|
20
26
|
elevenlabs: {
|
|
21
27
|
agentName: "ttsElevenlabsAgent",
|
|
22
28
|
hasLimitedConcurrency: true,
|
|
@@ -42,8 +48,8 @@ export const provider2ImageAgent = {
|
|
|
42
48
|
},
|
|
43
49
|
google: {
|
|
44
50
|
agentName: "imageGenAIAgent",
|
|
45
|
-
defaultModel: "gemini-2.5-flash-image
|
|
46
|
-
models: ["imagen-
|
|
51
|
+
defaultModel: "gemini-2.5-flash-image",
|
|
52
|
+
models: ["imagen-4.0-generate-preview-06-06", "imagen-4.0-ultra-generate-preview-06-06", "gemini-2.5-flash-image", "gemini-3-pro-image-preview"],
|
|
47
53
|
keyName: "GEMINI_API_KEY",
|
|
48
54
|
},
|
|
49
55
|
replicate: {
|
|
@@ -77,8 +83,8 @@ export const provider2MovieAgent = {
|
|
|
77
83
|
"minimax/hailuo-02",
|
|
78
84
|
"minimax/hailuo-02-fast",
|
|
79
85
|
"pixverse/pixverse-v4.5",
|
|
80
|
-
"wan-video/wan-2.2-i2v-
|
|
81
|
-
"wan-video/wan-2.2-t2v-
|
|
86
|
+
"wan-video/wan-2.2-i2v-fast",
|
|
87
|
+
"wan-video/wan-2.2-t2v-fast",
|
|
82
88
|
],
|
|
83
89
|
modelParams: {
|
|
84
90
|
"bytedance/seedance-1-lite": {
|
|
@@ -144,12 +150,12 @@ export const provider2MovieAgent = {
|
|
|
144
150
|
last_image: "last_frame_image",
|
|
145
151
|
price_per_sec: 0.12,
|
|
146
152
|
},
|
|
147
|
-
"wan-video/wan-2.2-i2v-
|
|
153
|
+
"wan-video/wan-2.2-i2v-fast": {
|
|
148
154
|
durations: [5],
|
|
149
155
|
start_image: "image",
|
|
150
156
|
price_per_sec: 0.012,
|
|
151
157
|
},
|
|
152
|
-
"wan-video/wan-2.2-t2v-
|
|
158
|
+
"wan-video/wan-2.2-t2v-fast": {
|
|
153
159
|
durations: [5],
|
|
154
160
|
start_image: undefined,
|
|
155
161
|
price_per_sec: 0.012,
|
|
@@ -159,10 +165,13 @@ export const provider2MovieAgent = {
|
|
|
159
165
|
google: {
|
|
160
166
|
agentName: "movieGenAIAgent",
|
|
161
167
|
defaultModel: "veo-2.0-generate-001",
|
|
162
|
-
models: ["veo-2.0-generate-001", "veo-3.0-generate-preview"],
|
|
168
|
+
models: ["veo-2.0-generate-001", "veo-3.0-generate-001", "veo-3.1-generate-preview"],
|
|
163
169
|
keyName: "GEMINI_API_KEY",
|
|
164
170
|
modelParams: {
|
|
165
|
-
"veo-3.
|
|
171
|
+
"veo-3.1-generate-preview": {
|
|
172
|
+
durations: [4, 6, 8],
|
|
173
|
+
},
|
|
174
|
+
"veo-3.0-generate-001": {
|
|
166
175
|
durations: [4, 6, 8],
|
|
167
176
|
},
|
|
168
177
|
"veo-2.0-generate-001": {
|
package/lib/utils/utils.js
CHANGED
|
@@ -73,6 +73,9 @@ export const settings2GraphAIConfig = (settings, env) => {
|
|
|
73
73
|
ttsNijivoiceAgent: {
|
|
74
74
|
apiKey: getKey("TTS", "NIJIVOICE_API_KEY"),
|
|
75
75
|
},
|
|
76
|
+
ttsGeminiAgent: {
|
|
77
|
+
apiKey: getKey("TTS", "GEMINI_API_KEY"),
|
|
78
|
+
},
|
|
76
79
|
ttsElevenlabsAgent: {
|
|
77
80
|
apiKey: getKey("TTS", "ELEVENLABS_API_KEY"),
|
|
78
81
|
},
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mulmocast",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.4",
|
|
4
4
|
"description": "",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "lib/index.node.js",
|
|
@@ -74,7 +74,7 @@
|
|
|
74
74
|
"homepage": "https://github.com/receptron/mulmocast-cli#readme",
|
|
75
75
|
"dependencies": {
|
|
76
76
|
"@google-cloud/text-to-speech": "^6.4.0",
|
|
77
|
-
"@google/genai": "^1.
|
|
77
|
+
"@google/genai": "^1.30.0",
|
|
78
78
|
"@graphai/anthropic_agent": "^2.0.11",
|
|
79
79
|
"@graphai/browserless_agent": "^2.0.1",
|
|
80
80
|
"@graphai/gemini_agent": "^2.0.1",
|
|
@@ -95,14 +95,14 @@
|
|
|
95
95
|
"fluent-ffmpeg": "^2.1.3",
|
|
96
96
|
"graphai": "^2.0.16",
|
|
97
97
|
"jsdom": "^27.2.0",
|
|
98
|
-
"marked": "^17.0.
|
|
98
|
+
"marked": "^17.0.1",
|
|
99
99
|
"mulmocast-vision": "^1.0.4",
|
|
100
100
|
"ora": "^9.0.0",
|
|
101
|
-
"puppeteer": "^24.
|
|
102
|
-
"replicate": "^1.
|
|
101
|
+
"puppeteer": "^24.31.0",
|
|
102
|
+
"replicate": "^1.4.0",
|
|
103
103
|
"yaml": "^2.8.1",
|
|
104
104
|
"yargs": "^18.0.0",
|
|
105
|
-
"zod": "^4.1.
|
|
105
|
+
"zod": "^4.1.13"
|
|
106
106
|
},
|
|
107
107
|
"devDependencies": {
|
|
108
108
|
"@receptron/test_utils": "^2.0.3",
|
|
@@ -117,7 +117,7 @@
|
|
|
117
117
|
"prettier": "^3.6.2",
|
|
118
118
|
"tsx": "^4.20.6",
|
|
119
119
|
"typescript": "^5.9.3",
|
|
120
|
-
"typescript-eslint": "^8.
|
|
120
|
+
"typescript-eslint": "^8.48.0"
|
|
121
121
|
},
|
|
122
122
|
"engines": {
|
|
123
123
|
"node": ">=20.0.0"
|
package/scripts/test/gpt.json
CHANGED
|
@@ -21,11 +21,17 @@
|
|
|
21
21
|
}
|
|
22
22
|
},
|
|
23
23
|
"beats": [
|
|
24
|
+
{
|
|
25
|
+
"speaker": "Host",
|
|
26
|
+
"text": "How are you?",
|
|
27
|
+
"imagePrompt": "A witch in Harajuku"
|
|
28
|
+
},
|
|
24
29
|
{
|
|
25
30
|
"speaker": "Host",
|
|
26
31
|
"text": "How are you?",
|
|
27
32
|
"imagePrompt": "A witch in Harajuku",
|
|
28
33
|
"imageParams": {
|
|
34
|
+
"model": "gpt-image-1-mini",
|
|
29
35
|
"style": "Ukiyoe-style"
|
|
30
36
|
}
|
|
31
37
|
}
|
|
Binary file
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1"
|
|
4
|
+
},
|
|
5
|
+
"lang": "en",
|
|
6
|
+
"title": "Audio Instructions Test",
|
|
7
|
+
"speechParams": {
|
|
8
|
+
"speakers": {
|
|
9
|
+
"Presenter": {
|
|
10
|
+
"provider": "gemini",
|
|
11
|
+
"voiceId": "Kore"
|
|
12
|
+
},
|
|
13
|
+
"Presenter2": {
|
|
14
|
+
"provider": "gemini",
|
|
15
|
+
"voiceId": "Puck"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"beats": [
|
|
20
|
+
{
|
|
21
|
+
"speaker": "Presenter",
|
|
22
|
+
"text": "Hello, I'm a presenter. I have no instructions.",
|
|
23
|
+
"image": {
|
|
24
|
+
"type": "textSlide",
|
|
25
|
+
"slide": {
|
|
26
|
+
"title": "Presenter"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"speaker": "Presenter2",
|
|
32
|
+
"text": "Hello, I'm a presenter 2. My instructions are 'Speak in a cheerful and positive tone'.",
|
|
33
|
+
"image": {
|
|
34
|
+
"type": "textSlide",
|
|
35
|
+
"slide": {
|
|
36
|
+
"title": "Presenter 2"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"speaker": "Presenter",
|
|
42
|
+
"text": "Hello, I'm a presenter. I have a British English instruction.",
|
|
43
|
+
"speechOptions": {
|
|
44
|
+
"instruction": "Speak in British English."
|
|
45
|
+
},
|
|
46
|
+
"image": {
|
|
47
|
+
"type": "textSlide",
|
|
48
|
+
"slide": {
|
|
49
|
+
"title": "Presenter with British English instruction"
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"speaker": "Presenter",
|
|
55
|
+
"text": "Hello, I'm a presenter. I have a whisper instruction.",
|
|
56
|
+
"speechOptions": {
|
|
57
|
+
"instruction": "Whisper softly, like a pillow talk."
|
|
58
|
+
},
|
|
59
|
+
"image": {
|
|
60
|
+
"type": "textSlide",
|
|
61
|
+
"slide": {
|
|
62
|
+
"title": "Presenter with whisper instruction"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
]
|
|
67
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$mulmocast": {
|
|
3
|
+
"version": "1.1"
|
|
4
|
+
},
|
|
5
|
+
"lang": "en",
|
|
6
|
+
"title": "Audio Instructions Test",
|
|
7
|
+
"speechParams": {
|
|
8
|
+
"speakers": {
|
|
9
|
+
"Presenter": {
|
|
10
|
+
"provider": "google",
|
|
11
|
+
"voiceId": "ja-JP-Standard-A"
|
|
12
|
+
},
|
|
13
|
+
"Presenter2": {
|
|
14
|
+
"provider": "google",
|
|
15
|
+
"voiceId": "ja-JP-Standard-B"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"beats": [
|
|
20
|
+
{
|
|
21
|
+
"speaker": "Presenter",
|
|
22
|
+
"text": "Hello, I'm a presenter. I have no instructions.",
|
|
23
|
+
"image": {
|
|
24
|
+
"type": "textSlide",
|
|
25
|
+
"slide": {
|
|
26
|
+
"title": "Presenter"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"speaker": "Presenter2",
|
|
32
|
+
"text": "Hello, I'm a presenter 2. My instructions are 'Speak in a cheerful and positive tone'.",
|
|
33
|
+
"image": {
|
|
34
|
+
"type": "textSlide",
|
|
35
|
+
"slide": {
|
|
36
|
+
"title": "Presenter 2"
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"speaker": "Presenter",
|
|
42
|
+
"text": "Hello, I'm a presenter. I have a British English instruction.",
|
|
43
|
+
"speechOptions": {
|
|
44
|
+
"instruction": "Speak in British English."
|
|
45
|
+
},
|
|
46
|
+
"image": {
|
|
47
|
+
"type": "textSlide",
|
|
48
|
+
"slide": {
|
|
49
|
+
"title": "Presenter with British English instruction"
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"speaker": "Presenter",
|
|
55
|
+
"text": "Hello, I'm a presenter. I have a whisper instruction.",
|
|
56
|
+
"speechOptions": {
|
|
57
|
+
"instruction": "Whisper softly, like a pillow talk."
|
|
58
|
+
},
|
|
59
|
+
"image": {
|
|
60
|
+
"type": "textSlide",
|
|
61
|
+
"slide": {
|
|
62
|
+
"title": "Presenter with whisper instruction"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
]
|
|
67
|
+
}
|