mulmocast 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/lib/actions/audio.js +10 -1
  2. package/lib/actions/image_agents.d.ts +3 -12
  3. package/lib/actions/image_agents.js +12 -8
  4. package/lib/actions/images.js +2 -1
  5. package/lib/actions/translate.d.ts +51 -2
  6. package/lib/actions/translate.js +193 -148
  7. package/lib/agents/combine_audio_files_agent.js +1 -1
  8. package/lib/agents/lipsync_replicate_agent.js +10 -3
  9. package/lib/agents/tts_nijivoice_agent.js +1 -1
  10. package/lib/cli/commands/audio/handler.js +1 -1
  11. package/lib/cli/commands/image/handler.js +1 -1
  12. package/lib/cli/commands/movie/handler.js +1 -1
  13. package/lib/cli/commands/pdf/handler.js +1 -1
  14. package/lib/cli/helpers.d.ts +1 -4
  15. package/lib/cli/helpers.js +3 -2
  16. package/lib/index.common.d.ts +1 -0
  17. package/lib/index.common.js +1 -0
  18. package/lib/mcp/server.js +1 -1
  19. package/lib/methods/mulmo_presentation_style.d.ts +3 -2
  20. package/lib/methods/mulmo_script.d.ts +4 -1
  21. package/lib/methods/mulmo_script.js +18 -2
  22. package/lib/methods/mulmo_studio_context.d.ts +1 -0
  23. package/lib/methods/mulmo_studio_context.js +8 -0
  24. package/lib/types/agent.d.ts +1 -0
  25. package/lib/types/schema.d.ts +326 -230
  26. package/lib/types/schema.js +10 -3
  27. package/lib/types/type.d.ts +3 -2
  28. package/lib/utils/const.d.ts +1 -0
  29. package/lib/utils/const.js +2 -1
  30. package/lib/utils/context.d.ts +393 -50
  31. package/lib/utils/context.js +90 -57
  32. package/lib/utils/filters.d.ts +1 -0
  33. package/lib/utils/filters.js +8 -0
  34. package/lib/utils/image_plugins/mermaid.js +1 -1
  35. package/lib/utils/image_plugins/source.js +1 -1
  36. package/lib/utils/preprocess.d.ts +2 -2
  37. package/lib/utils/preprocess.js +3 -3
  38. package/lib/utils/provider2agent.d.ts +3 -2
  39. package/lib/utils/provider2agent.js +20 -2
  40. package/lib/utils/string.d.ts +1 -1
  41. package/lib/utils/string.js +12 -8
  42. package/lib/utils/utils.js +2 -6
  43. package/package.json +2 -2
  44. package/scripts/templates/image_refs.json +1 -0
  45. package/scripts/templates/voice_over.json +1 -0
  46. package/scripts/test/gpt.json +1 -0
  47. package/scripts/test/test1.json +1 -0
  48. package/scripts/test/test_audio.json +1 -0
  49. package/scripts/test/test_audio_instructions.json +1 -0
  50. package/scripts/test/test_beats.json +1 -0
  51. package/scripts/test/test_captions.json +1 -0
  52. package/scripts/test/test_elevenlabs_models.json +1 -0
  53. package/scripts/test/test_hello.json +1 -0
  54. package/scripts/test/test_hello_google.json +1 -0
  55. package/scripts/test/test_html.json +1 -0
  56. package/scripts/test/test_image_refs.json +1 -0
  57. package/scripts/test/test_images.json +1 -0
  58. package/scripts/test/test_lang.json +58 -2
  59. package/scripts/test/test_layout.json +1 -0
  60. package/scripts/test/test_lipsync.json +9 -0
  61. package/scripts/test/test_loop.json +1 -0
  62. package/scripts/test/test_media.json +1 -0
  63. package/scripts/test/test_mixed_providers.json +1 -0
  64. package/scripts/test/test_movie.json +1 -0
  65. package/scripts/test/test_no_audio.json +1 -0
  66. package/scripts/test/test_no_audio_with_credit.json +1 -0
  67. package/scripts/test/test_order.json +1 -0
  68. package/scripts/test/test_order_portrait.json +1 -0
  69. package/scripts/test/test_replicate.json +19 -0
  70. package/scripts/test/test_slideout_left_no_audio.json +1 -0
  71. package/scripts/test/test_spillover.json +1 -0
  72. package/scripts/test/test_transition.json +1 -0
  73. package/scripts/test/test_transition_no_audio.json +1 -0
  74. package/scripts/test/test_video_speed.json +1 -0
  75. package/scripts/test/test_voice_over.json +1 -0
  76. package/scripts/test/test_voices.json +1 -0
  77. package/scripts/templates/image_prompt_only_template.ts +0 -95
@@ -10,7 +10,7 @@ import ttsElevenlabsAgent from "../agents/tts_elevenlabs_agent.js";
10
10
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
11
11
  import { MulmoPresentationStyleMethods } from "../methods/index.js";
12
12
  import { text2SpeechProviderSchema } from "../types/index.js";
13
- import { fileCacheAgentFilter } from "../utils/filters.js";
13
+ import { fileCacheAgentFilter, nijovoiceTextAgentFilter } from "../utils/filters.js";
14
14
  import { getAudioArtifactFilePath, getAudioFilePath, getOutputStudioFilePath, resolveDirPath, defaultBGMPath, mkdir, writingMessage } from "../utils/file.js";
15
15
  import { text2hash, localizedText, settings2GraphAIConfig } from "../utils/utils.js";
16
16
  import { provider2TTSAgent } from "../utils/provider2agent.js";
@@ -58,6 +58,8 @@ const preprocessor = (namedInputs) => {
58
58
  voiceId,
59
59
  speechOptions,
60
60
  model,
61
+ provider,
62
+ lang,
61
63
  audioPath,
62
64
  studioBeat,
63
65
  needsTTS,
@@ -84,6 +86,8 @@ const graph_tts = {
84
86
  agent: ":preprocessor.ttsAgent",
85
87
  inputs: {
86
88
  text: ":preprocessor.text",
89
+ provider: ":preprocessor.provider",
90
+ lang: ":preprocessor.lang",
87
91
  cache: {
88
92
  force: [":context.force"],
89
93
  file: ":preprocessor.audioPath",
@@ -173,6 +177,11 @@ const agentFilters = [
173
177
  agent: fileCacheAgentFilter,
174
178
  nodeIds: ["tts"],
175
179
  },
180
+ {
181
+ name: "nijovoiceTextAgentFilter",
182
+ agent: nijovoiceTextAgentFilter,
183
+ nodeIds: ["tts"],
184
+ },
176
185
  ];
177
186
  const getConcurrency = (context) => {
178
187
  // Check if any speaker uses nijivoice or elevenlabs (providers that require concurrency = 1)
@@ -23,10 +23,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
23
23
  };
24
24
  lipSyncFile?: string;
25
25
  lipSyncModel?: string;
26
- lipSyncAgentInfo?: {
27
- agentName: string;
28
- defaultModel: string;
29
- };
26
+ lipSyncAgentName?: string;
30
27
  audioFile?: string;
31
28
  beatDuration?: number;
32
29
  htmlPrompt?: undefined;
@@ -61,10 +58,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
61
58
  };
62
59
  lipSyncFile?: string;
63
60
  lipSyncModel?: string;
64
- lipSyncAgentInfo?: {
65
- agentName: string;
66
- defaultModel: string;
67
- };
61
+ lipSyncAgentName?: string;
68
62
  audioFile?: string;
69
63
  beatDuration?: number;
70
64
  htmlPrompt?: undefined;
@@ -102,10 +96,7 @@ export declare const imagePreprocessAgent: (namedInputs: {
102
96
  };
103
97
  lipSyncFile?: string;
104
98
  lipSyncModel?: string;
105
- lipSyncAgentInfo?: {
106
- agentName: string;
107
- defaultModel: string;
108
- };
99
+ lipSyncAgentName?: string;
109
100
  audioFile?: string;
110
101
  beatDuration?: number;
111
102
  htmlPrompt?: undefined;
@@ -25,16 +25,20 @@ export const imagePreprocessAgent = async (namedInputs) => {
25
25
  movieFile: beat.moviePrompt ? moviePaths.movieFile : undefined,
26
26
  beatDuration: beat.duration ?? studioBeat?.duration,
27
27
  };
28
- if (beat.soundEffectPrompt) {
29
- returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
30
- returnValue.soundEffectModel =
31
- beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
32
- returnValue.soundEffectFile = moviePaths.soundEffectFile;
33
- returnValue.soundEffectPrompt = beat.soundEffectPrompt;
28
+ const isMovie = Boolean(beat.moviePrompt || beat?.image?.type === "movie");
29
+ if (isMovie) {
30
+ if (beat.soundEffectPrompt) {
31
+ returnValue.soundEffectAgentInfo = MulmoPresentationStyleMethods.getSoundEffectAgentInfo(context.presentationStyle, beat);
32
+ returnValue.soundEffectModel =
33
+ beat.soundEffectParams?.model ?? context.presentationStyle.soundEffectParams?.model ?? returnValue.soundEffectAgentInfo.defaultModel;
34
+ returnValue.soundEffectFile = moviePaths.soundEffectFile;
35
+ returnValue.soundEffectPrompt = beat.soundEffectPrompt;
36
+ }
34
37
  }
35
38
  if (beat.enableLipSync) {
36
- returnValue.lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
37
- returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? returnValue.lipSyncAgentInfo.defaultModel;
39
+ const lipSyncAgentInfo = MulmoPresentationStyleMethods.getLipSyncAgentInfo(context.presentationStyle, beat);
40
+ returnValue.lipSyncAgentName = lipSyncAgentInfo.agentName;
41
+ returnValue.lipSyncModel = beat.lipSyncParams?.model ?? context.presentationStyle.lipSyncParams?.model ?? lipSyncAgentInfo.defaultModel;
38
42
  returnValue.lipSyncFile = moviePaths.lipSyncFile;
39
43
  // Audio file will be set from the beat's audio file when available
40
44
  returnValue.audioFile = studioBeat?.audioFile;
@@ -218,10 +218,11 @@ const beat_graph_data = {
218
218
  },
219
219
  lipSyncGenerator: {
220
220
  if: ":beat.enableLipSync",
221
- agent: ":preprocessor.lipSyncAgentInfo.agentName",
221
+ agent: ":preprocessor.lipSyncAgentName",
222
222
  inputs: {
223
223
  onComplete: [":soundEffectGenerator"], // to wait for soundEffectGenerator to finish
224
224
  movieFile: ":preprocessor.movieFile",
225
+ imageFile: ":preprocessor.referenceImageForMovie",
225
226
  audioFile: ":preprocessor.audioFile",
226
227
  lipSyncFile: ":preprocessor.lipSyncFile",
227
228
  params: {
@@ -1,7 +1,56 @@
1
1
  import "dotenv/config";
2
2
  import type { CallbackFunction } from "graphai";
3
- import { MulmoStudioContext } from "../types/index.js";
3
+ import { LANG, LocalizedText, MulmoStudioContext } from "../types/index.js";
4
+ export declare const translateTextGraph: {
5
+ version: number;
6
+ nodes: {
7
+ localizedText: {
8
+ inputs: {
9
+ targetLang: string;
10
+ beat: string;
11
+ multiLingual: string;
12
+ lang: string;
13
+ beatIndex: string;
14
+ mulmoContext: string;
15
+ system: string;
16
+ prompt: string[];
17
+ };
18
+ passThrough: {
19
+ lang: string;
20
+ };
21
+ output: {
22
+ text: string;
23
+ };
24
+ agent: string;
25
+ };
26
+ splitText: {
27
+ agent: (namedInputs: {
28
+ localizedText: LocalizedText;
29
+ targetLang: LANG;
30
+ }) => string[];
31
+ inputs: {
32
+ targetLang: string;
33
+ localizedText: string;
34
+ };
35
+ };
36
+ textTranslateResult: {
37
+ isResult: boolean;
38
+ agent: string;
39
+ inputs: {
40
+ lang: string;
41
+ text: string;
42
+ texts: string;
43
+ ttsTexts: string;
44
+ cacheKey: string;
45
+ };
46
+ };
47
+ };
48
+ };
49
+ export declare const translateBeat: (index: number, context: MulmoStudioContext, targetLangs: string[], args?: {
50
+ settings?: Record<string, string>;
51
+ callbacks?: CallbackFunction[];
52
+ }) => Promise<void>;
4
53
  export declare const translate: (context: MulmoStudioContext, args?: {
5
54
  callbacks?: CallbackFunction[];
6
55
  settings?: Record<string, string>;
7
- }) => Promise<void>;
56
+ }) => Promise<MulmoStudioContext>;
@@ -1,34 +1,165 @@
1
1
  import "dotenv/config";
2
- import { GraphAI, assert } from "graphai";
2
+ import { createHash } from "crypto";
3
+ import fs from "fs";
4
+ import { GraphAI, assert, isNull, GraphAILogger } from "graphai";
3
5
  import * as agents from "@graphai/vanilla";
4
6
  import { openAIAgent } from "@graphai/openai_agent";
5
7
  import { fileWriteAgent } from "@graphai/vanilla_node_agents";
6
- import { recursiveSplitJa, replacementsJa, replacePairsJa } from "../utils/string.js";
8
+ import { recursiveSplitJa } from "../utils/string.js";
7
9
  import { settings2GraphAIConfig } from "../utils/utils.js";
10
+ import { getMultiLingual } from "../utils/context.js";
11
+ import { currentMulmoScriptVersion } from "../utils/const.js";
8
12
  import { getOutputMultilingualFilePath, mkdir, writingMessage } from "../utils/file.js";
9
13
  import { translateSystemPrompt, translatePrompts } from "../utils/prompt.js";
10
14
  import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
11
15
  const vanillaAgents = agents.default ?? agents;
12
- const translateGraph = {
16
+ const hashSHA256 = (text) => {
17
+ return createHash("sha256").update(text, "utf8").digest("hex");
18
+ };
19
+ // 1. translateGraph / map each beats.
20
+ // 2. beatGraph / map each target lang.
21
+ // 3. translateTextGraph / translate text.
22
+ export const translateTextGraph = {
13
23
  version: 0.5,
14
24
  nodes: {
25
+ localizedText: {
26
+ inputs: {
27
+ targetLang: ":targetLang", // for cache
28
+ beat: ":beat", // for cache
29
+ multiLingual: ":multiLingual", // for cache
30
+ lang: ":lang", // for cache
31
+ beatIndex: ":beatIndex", // for cache (state)
32
+ mulmoContext: ":context", // for cache (state)
33
+ system: translateSystemPrompt,
34
+ prompt: translatePrompts,
35
+ },
36
+ passThrough: {
37
+ lang: ":targetLang",
38
+ },
39
+ output: {
40
+ text: ".text",
41
+ },
42
+ // return { lang, text } <- localizedText
43
+ agent: "openAIAgent",
44
+ },
45
+ splitText: {
46
+ agent: (namedInputs) => {
47
+ const { localizedText, targetLang } = namedInputs;
48
+ // Cache
49
+ if (localizedText.texts) {
50
+ return localizedText.texts;
51
+ }
52
+ if (targetLang === "ja") {
53
+ return recursiveSplitJa(localizedText.text);
54
+ }
55
+ // not split
56
+ return [localizedText.text];
57
+ },
58
+ inputs: {
59
+ targetLang: ":targetLang",
60
+ localizedText: ":localizedText",
61
+ },
62
+ },
63
+ textTranslateResult: {
64
+ isResult: true,
65
+ agent: "copyAgent",
66
+ inputs: {
67
+ lang: ":targetLang",
68
+ text: ":localizedText.text",
69
+ texts: ":splitText",
70
+ ttsTexts: ":splitText",
71
+ cacheKey: ":multiLingual.cacheKey",
72
+ },
73
+ },
74
+ },
75
+ };
76
+ const beatGraph = {
77
+ version: 0.5,
78
+ nodes: {
79
+ targetLangs: {},
15
80
  context: {},
16
- defaultLang: {},
17
- outDirPath: {},
18
- outputMultilingualFilePath: {},
19
- lang: {
20
- agent: "stringUpdateTextAgent",
81
+ beat: {},
82
+ __mapIndex: {},
83
+ // for cache
84
+ multiLingual: {
85
+ agent: (namedInputs) => {
86
+ const { multiLinguals, beatIndex, text } = namedInputs;
87
+ const cacheKey = hashSHA256(text ?? "");
88
+ const multiLingual = multiLinguals?.[beatIndex];
89
+ if (!multiLingual) {
90
+ return { cacheKey, multiLingualTexts: {} };
91
+ }
92
+ return {
93
+ multiLingualTexts: Object.keys(multiLingual.multiLingualTexts).reduce((tmp, lang) => {
94
+ if (multiLingual.multiLingualTexts[lang].cacheKey === cacheKey) {
95
+ tmp[lang] = multiLingual.multiLingualTexts[lang];
96
+ }
97
+ return tmp;
98
+ }, {}),
99
+ cacheKey,
100
+ };
101
+ },
21
102
  inputs: {
22
- newText: ":context.studio.script.lang",
23
- oldText: ":defaultLang",
103
+ text: ":beat.text",
104
+ beatIndex: ":__mapIndex",
105
+ multiLinguals: ":context.multiLingual",
24
106
  },
25
107
  },
26
- targetLangs: {}, // TODO
27
- mergeStudioResult: {
108
+ preprocessMultiLingual: {
109
+ agent: "mapAgent",
110
+ inputs: {
111
+ beat: ":beat",
112
+ multiLingual: ":multiLingual",
113
+ rows: ":targetLangs",
114
+ lang: ":context.studio.script.lang",
115
+ context: ":context",
116
+ beatIndex: ":__mapIndex",
117
+ },
118
+ params: {
119
+ compositeResult: true,
120
+ rowKey: "targetLang",
121
+ },
122
+ graph: translateTextGraph,
123
+ },
124
+ mergeLocalizedText: {
125
+ // console: { after: true},
126
+ agent: "arrayToObjectAgent",
127
+ inputs: {
128
+ items: ":preprocessMultiLingual.textTranslateResult",
129
+ },
130
+ params: {
131
+ key: "lang",
132
+ },
133
+ },
134
+ multiLingualTexts: {
135
+ agent: "mergeObjectAgent",
136
+ inputs: {
137
+ items: [":multiLingual.multiLingualTexts", ":mergeLocalizedText"],
138
+ },
139
+ },
140
+ mergeMultiLingualData: {
28
141
  isResult: true,
142
+ // console: { after: true},
29
143
  agent: "mergeObjectAgent",
30
144
  inputs: {
31
- items: [{ multiLingual: ":beatsMap.mergeMultiLingualData" }],
145
+ items: [":multiLingual", { multiLingualTexts: ":multiLingualTexts" }],
146
+ },
147
+ },
148
+ },
149
+ };
150
+ const translateGraph = {
151
+ version: 0.5,
152
+ nodes: {
153
+ context: {},
154
+ outDirPath: {},
155
+ outputMultilingualFilePath: {},
156
+ targetLangs: {},
157
+ mergeStudioResult: {
158
+ isResult: true,
159
+ agent: "copyAgent",
160
+ inputs: {
161
+ version: "1.1",
162
+ multiLingual: ":beatsMap.mergeMultiLingualData",
32
163
  },
33
164
  },
34
165
  beatsMap: {
@@ -37,139 +168,18 @@ const translateGraph = {
37
168
  targetLangs: ":targetLangs",
38
169
  context: ":context",
39
170
  rows: ":context.studio.script.beats",
40
- lang: ":lang",
41
171
  },
42
172
  params: {
43
173
  rowKey: "beat",
44
174
  compositeResult: true,
45
175
  },
46
- graph: {
47
- version: 0.5,
48
- nodes: {
49
- // for cache
50
- multiLingual: {
51
- agent: (namedInputs) => {
52
- return (namedInputs.rows && namedInputs.rows[namedInputs.index]) || {};
53
- },
54
- inputs: {
55
- index: ":__mapIndex",
56
- rows: ":context.multiLingual",
57
- },
58
- },
59
- preprocessMultiLingual: {
60
- agent: "mapAgent",
61
- inputs: {
62
- beat: ":beat",
63
- multiLingual: ":multiLingual",
64
- rows: ":targetLangs",
65
- lang: ":lang.text",
66
- context: ":context",
67
- beatIndex: ":__mapIndex",
68
- },
69
- params: {
70
- compositeResult: true,
71
- rowKey: "targetLang",
72
- },
73
- graph: {
74
- version: 0.5,
75
- nodes: {
76
- localizedTexts: {
77
- inputs: {
78
- targetLang: ":targetLang", // for cache
79
- beat: ":beat", // for cache
80
- multiLingual: ":multiLingual", // for cache
81
- lang: ":lang", // for cache
82
- beatIndex: ":beatIndex", // for cache
83
- mulmoContext: ":context", // for cache
84
- system: translateSystemPrompt,
85
- prompt: translatePrompts,
86
- },
87
- passThrough: {
88
- lang: ":targetLang",
89
- },
90
- output: {
91
- text: ".text",
92
- },
93
- // return { lang, text } <- localizedText
94
- agent: "openAIAgent",
95
- },
96
- splitText: {
97
- agent: (namedInputs) => {
98
- const { localizedText, targetLang } = namedInputs;
99
- // Cache
100
- if (localizedText.texts) {
101
- return localizedText;
102
- }
103
- if (targetLang === "ja") {
104
- return {
105
- ...localizedText,
106
- texts: recursiveSplitJa(localizedText.text),
107
- };
108
- }
109
- // not split
110
- return {
111
- ...localizedText,
112
- texts: [localizedText.text],
113
- };
114
- // return { lang, text, texts }
115
- },
116
- inputs: {
117
- targetLang: ":targetLang",
118
- localizedText: ":localizedTexts",
119
- },
120
- },
121
- ttsTexts: {
122
- agent: (namedInputs) => {
123
- const { localizedText, targetLang } = namedInputs;
124
- // cache
125
- if (localizedText.ttsTexts) {
126
- return localizedText;
127
- }
128
- if (targetLang === "ja") {
129
- return {
130
- ...localizedText,
131
- ttsTexts: localizedText?.texts?.map((text) => replacePairsJa(text, replacementsJa)),
132
- };
133
- }
134
- return {
135
- ...localizedText,
136
- ttsTexts: localizedText.texts,
137
- };
138
- },
139
- inputs: {
140
- targetLang: ":targetLang",
141
- localizedText: ":splitText",
142
- },
143
- isResult: true,
144
- },
145
- },
146
- },
147
- },
148
- mergeLocalizedText: {
149
- agent: "arrayToObjectAgent",
150
- inputs: {
151
- items: ":preprocessMultiLingual.ttsTexts",
152
- },
153
- params: {
154
- key: "lang",
155
- },
156
- },
157
- mergeMultiLingualData: {
158
- isResult: true,
159
- agent: "mergeObjectAgent",
160
- inputs: {
161
- items: [":multiLingual", { multiLingualTexts: ":mergeLocalizedText" }],
162
- },
163
- },
164
- },
165
- },
176
+ graph: beatGraph,
166
177
  },
167
178
  writeOutput: {
168
- // console: { before: true },
169
179
  agent: "fileWriteAgent",
170
180
  inputs: {
171
181
  file: ":outputMultilingualFilePath",
172
- text: ":mergeStudioResult.multiLingual.toJSON()",
182
+ text: ":mergeStudioResult.toJSON()",
173
183
  },
174
184
  },
175
185
  },
@@ -180,18 +190,14 @@ const localizedTextCacheAgentFilter = async (context, next) => {
180
190
  if (!beat.text) {
181
191
  return { text: "" };
182
192
  }
183
- // The original text is unchanged and the target language text is present
184
- if (multiLingual.multiLingualTexts &&
185
- multiLingual.multiLingualTexts[lang] &&
186
- multiLingual.multiLingualTexts[lang].text === beat.text &&
187
- multiLingual.multiLingualTexts[targetLang] &&
188
- multiLingual.multiLingualTexts[targetLang].text) {
189
- return { text: multiLingual.multiLingualTexts[targetLang].text };
190
- }
191
193
  // same language
192
194
  if (targetLang === lang) {
193
195
  return { text: beat.text };
194
196
  }
197
+ // The original text is unchanged and the target language text is present
198
+ if (multiLingual.cacheKey === multiLingual.multiLingualTexts[targetLang]?.cacheKey) {
199
+ return { text: multiLingual.multiLingualTexts[targetLang].text };
200
+ }
195
201
  try {
196
202
  MulmoStudioContextMethods.setBeatSessionState(mulmoContext, "multiLingual", beatIndex, true);
197
203
  return await next(context);
@@ -204,11 +210,49 @@ const agentFilters = [
204
210
  {
205
211
  name: "localizedTextCacheAgentFilter",
206
212
  agent: localizedTextCacheAgentFilter,
207
- nodeIds: ["localizedTexts"],
213
+ nodeIds: ["localizedText"],
208
214
  },
209
215
  ];
210
- const defaultLang = "en";
211
- const targetLangs = ["ja", "en"];
216
+ export const translateBeat = async (index, context, targetLangs, args) => {
217
+ const { settings, callbacks } = args ?? {};
218
+ // Validate inputs
219
+ if (index < 0 || index >= context.studio.script.beats.length) {
220
+ throw new Error(`Invalid beat index: ${index}. Must be between 0 and ${context.studio.script.beats.length - 1}`);
221
+ }
222
+ if (!targetLangs || targetLangs.length === 0) {
223
+ throw new Error("targetLangs must be a non-empty array");
224
+ }
225
+ try {
226
+ const fileName = MulmoStudioContextMethods.getFileName(context);
227
+ const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
228
+ const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
229
+ mkdir(outDirPath);
230
+ const config = settings2GraphAIConfig(settings, process.env);
231
+ assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
232
+ const graph = new GraphAI(beatGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
233
+ graph.injectValue("context", context);
234
+ graph.injectValue("targetLangs", targetLangs);
235
+ graph.injectValue("beat", context.studio.script.beats[index]);
236
+ graph.injectValue("__mapIndex", index);
237
+ if (callbacks) {
238
+ callbacks.forEach((callback) => {
239
+ graph.registerCallback(callback);
240
+ });
241
+ }
242
+ const results = await graph.run();
243
+ const multiLingual = getMultiLingual(outputMultilingualFilePath, context.studio.beats.length);
244
+ multiLingual[index] = results.mergeMultiLingualData;
245
+ const data = {
246
+ version: currentMulmoScriptVersion,
247
+ multiLingual,
248
+ };
249
+ fs.writeFileSync(outputMultilingualFilePath, JSON.stringify(data, null, 2), "utf8");
250
+ writingMessage(outputMultilingualFilePath);
251
+ }
252
+ catch (error) {
253
+ GraphAILogger.log(error);
254
+ }
255
+ };
212
256
  export const translate = async (context, args) => {
213
257
  const { settings, callbacks } = args ?? {};
214
258
  try {
@@ -217,11 +261,11 @@ export const translate = async (context, args) => {
217
261
  const outDirPath = MulmoStudioContextMethods.getOutDirPath(context);
218
262
  const outputMultilingualFilePath = getOutputMultilingualFilePath(outDirPath, fileName);
219
263
  mkdir(outDirPath);
264
+ const targetLangs = [...new Set([context.lang, context.studio.script.captionParams?.lang].filter((x) => !isNull(x)))];
220
265
  const config = settings2GraphAIConfig(settings, process.env);
221
266
  assert(!!config?.openAIAgent?.apiKey, "The OPENAI_API_KEY environment variable is missing or empty");
222
267
  const graph = new GraphAI(translateGraph, { ...vanillaAgents, fileWriteAgent, openAIAgent }, { agentFilters, config });
223
268
  graph.injectValue("context", context);
224
- graph.injectValue("defaultLang", defaultLang);
225
269
  graph.injectValue("targetLangs", targetLangs);
226
270
  graph.injectValue("outDirPath", outDirPath);
227
271
  graph.injectValue("outputMultilingualFilePath", outputMultilingualFilePath);
@@ -239,4 +283,5 @@ export const translate = async (context, args) => {
239
283
  finally {
240
284
  MulmoStudioContextMethods.setSessionState(context, "multiLingual", false);
241
285
  }
286
+ return context;
242
287
  };
@@ -94,7 +94,7 @@ const voiceOverProcess = (context, mediaDurations, movieDuration, beatDurations,
94
94
  if (voiceStartAt) {
95
95
  const remainingDuration = movieDuration - voiceStartAt;
96
96
  const duration = remaining - remainingDuration;
97
- userAssert(duration >= 0, `Invalid startAt: At index(${idx}), avaiable duration(${duration}) < 0`);
97
+ userAssert(duration >= 0, `Invalid startAt: At index(${idx}), available duration(${duration}) < 0`);
98
98
  beatDurations.push(duration);
99
99
  subBeatDurations.silenceDuration = duration - subBeatDurations.audioDuration;
100
100
  userAssert(subBeatDurations.silenceDuration >= 0, `Duration Overwrap: At index(${idx}), silenceDuration(${subBeatDurations.silenceDuration}) < 0`);
@@ -3,7 +3,7 @@ import { GraphAILogger } from "graphai";
3
3
  import Replicate from "replicate";
4
4
  import { provider2LipSyncAgent } from "../utils/provider2agent.js";
5
5
  export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) => {
6
- const { movieFile, audioFile } = namedInputs;
6
+ const { movieFile, audioFile, imageFile } = namedInputs;
7
7
  const apiKey = config?.apiKey;
8
8
  const model = params.model ?? provider2LipSyncAgent.replicate.defaultModel;
9
9
  if (!apiKey) {
@@ -12,10 +12,12 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
12
12
  const replicate = new Replicate({
13
13
  auth: apiKey,
14
14
  });
15
- const videoBuffer = readFileSync(movieFile);
15
+ const videoBuffer = movieFile ? readFileSync(movieFile) : undefined;
16
16
  const audioBuffer = readFileSync(audioFile);
17
- const videoUri = `data:video/quicktime;base64,${videoBuffer.toString("base64")}`;
17
+ const imageBuffer = imageFile ? readFileSync(imageFile) : undefined;
18
+ const videoUri = videoBuffer ? `data:video/quicktime;base64,${videoBuffer.toString("base64")}` : undefined;
18
19
  const audioUri = `data:audio/wav;base64,${audioBuffer.toString("base64")}`;
20
+ const imageUri = imageBuffer ? `data:image/png;base64,${imageBuffer.toString("base64")}` : undefined;
19
21
  const input = {
20
22
  video: undefined,
21
23
  video_input: undefined,
@@ -23,6 +25,7 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
23
25
  audio: undefined,
24
26
  audio_input: undefined,
25
27
  audio_file: undefined,
28
+ image: undefined,
26
29
  };
27
30
  const modelParams = provider2LipSyncAgent.replicate.modelParams[model];
28
31
  if (!modelParams) {
@@ -30,12 +33,16 @@ export const lipSyncReplicateAgent = async ({ namedInputs, params, config, }) =>
30
33
  }
31
34
  const videoParam = modelParams.video;
32
35
  const audioParam = modelParams.audio;
36
+ const imageParam = modelParams.image;
33
37
  if (videoParam === "video" || videoParam === "video_input" || videoParam === "video_url") {
34
38
  input[videoParam] = videoUri;
35
39
  }
36
40
  if (audioParam === "audio" || audioParam === "audio_input" || audioParam === "audio_file") {
37
41
  input[audioParam] = audioUri;
38
42
  }
43
+ if (imageParam === "image") {
44
+ input[imageParam] = imageUri;
45
+ }
39
46
  const model_identifier = provider2LipSyncAgent.replicate.modelParams[model]?.identifier ?? model;
40
47
  try {
41
48
  const output = await replicate.run(model_identifier, {
@@ -27,7 +27,7 @@ export const ttsNijivoiceAgent = async ({ params, namedInputs, config, }) => {
27
27
  try {
28
28
  const voiceRes = await fetch(url, options);
29
29
  const voiceJson = await voiceRes.json();
30
- if (voiceJson && voiceJson.generatedVoice && voiceJson.generatedVoice.audioFileDownloadUrl) {
30
+ if (voiceJson?.generatedVoice?.audioFileDownloadUrl) {
31
31
  const audioRes = await fetch(voiceJson.generatedVoice.audioFileDownloadUrl);
32
32
  const buffer = Buffer.from(await audioRes.arrayBuffer());
33
33
  return { buffer };
@@ -5,6 +5,6 @@ export const handler = async (argv) => {
5
5
  if (!context) {
6
6
  process.exit(1);
7
7
  }
8
- await runTranslateIfNeeded(context, argv);
8
+ await runTranslateIfNeeded(context);
9
9
  await audio(context);
10
10
  };