mulmocast 2.4.2 → 2.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  import { MulmoStudioContext, MulmoBeat, MulmoTransition, MulmoCanvasDimension, MulmoFillOption, MulmoVideoFilter } from "../types/index.js";
2
2
  import { FfmpegContext } from "../utils/ffmpeg_utils.js";
3
3
  type VideoId = string | undefined;
4
- export declare const getVideoPart: (inputIndex: number, isMovie: boolean, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number, filters?: MulmoVideoFilter[]) => {
4
+ export declare const getVideoPart: (inputIndex: number, isMovie: boolean, duration: number, canvasInfo: MulmoCanvasDimension, fillOption: MulmoFillOption, speed: number, filters?: MulmoVideoFilter[], frameCount?: number) => {
5
5
  videoId: string;
6
6
  videoPart: string;
7
7
  };
@@ -8,7 +8,8 @@ import { MulmoStudioContextMethods } from "../methods/mulmo_studio_context.js";
8
8
  import { convertVideoFilterToFFmpeg } from "../utils/video_filter.js";
9
9
  // const isMac = process.platform === "darwin";
10
10
  const videoCodec = "libx264"; // "h264_videotoolbox" (macOS only) is too noisy
11
- export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters) => {
11
+ const VIDEO_FPS = 30;
12
+ export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOption, speed, filters, frameCount) => {
12
13
  const videoId = `v${inputIndex}`;
13
14
  const videoFilters = [];
14
15
  // Handle different media types
@@ -21,8 +22,20 @@ export const getVideoPart = (inputIndex, isMovie, duration, canvasInfo, fillOpti
21
22
  else {
22
23
  videoFilters.push("loop=loop=-1:size=1:start=0");
23
24
  }
24
- // Common filters for all media types
25
- videoFilters.push(`trim=duration=${originalDuration}`, "fps=30");
25
+ // Normalize framerate first so trim=end_frame counts frames at VIDEO_FPS,
26
+ // regardless of the input's native framerate.
27
+ videoFilters.push(`fps=${VIDEO_FPS}`);
28
+ // Use frame-exact trimming when frameCount is provided to prevent cumulative drift
29
+ // between video and audio tracks. trim=duration=X rounds up to next frame boundary,
30
+ // causing ~0.03s extra per beat that accumulates over many beats.
31
+ if (frameCount !== undefined && frameCount > 0) {
32
+ // Account for speed: setpts compresses timestamps, so we need more input frames
33
+ const inputFrameCount = Math.max(1, Math.round(frameCount * speed));
34
+ videoFilters.push(`trim=end_frame=${inputFrameCount}`);
35
+ }
36
+ else {
37
+ videoFilters.push(`trim=duration=${originalDuration}`);
38
+ }
26
39
  // Apply speed if specified
27
40
  if (speed === 1.0) {
28
41
  videoFilters.push("setpts=PTS-STARTPTS");
@@ -79,7 +92,7 @@ const getOutputOption = (audioId, videoId) => {
79
92
  "4M", // Reduced buffer size
80
93
  "-maxrate",
81
94
  "3M", // Reduced from 7M to 3M
82
- "-r 30", // Set frame rate
95
+ `-r ${VIDEO_FPS}`, // Set frame rate
83
96
  "-pix_fmt yuv420p", // Set pixel format for better compatibility
84
97
  "-c:a aac", // Audio codec
85
98
  "-b:a 128k", // Audio bitrate
@@ -284,7 +297,7 @@ const getClampedTransitionDuration = (transitionDuration, prevBeatDuration, curr
284
297
  return Math.min(transitionDuration, maxDuration);
285
298
  };
286
299
  export const getTransitionFrameDurations = (context, index) => {
287
- const minFrame = 1 / 30; // 30fpsを想定。最小1フレーム
300
+ const minFrame = 1 / VIDEO_FPS;
288
301
  const beats = context.studio.beats;
289
302
  const scriptBeats = context.studio.script.beats;
290
303
  const getTransitionDuration = (transition, prevBeatIndex, currentBeatIndex) => {
@@ -320,22 +333,22 @@ export const addSplitAndExtractFrames = (ffmpegContext, videoId, firstDuration,
320
333
  if (needFirst) {
321
334
  // Create static frame using nullsrc as base for proper framerate/timebase
322
335
  // Note: setpts must NOT be used here as it loses framerate metadata needed by xfade
323
- ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${firstDuration}:rate=30[${videoId}_first_null]`);
336
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${firstDuration}:rate=${VIDEO_FPS}[${videoId}_first_null]`);
324
337
  ffmpegContext.filterComplex.push(`[${videoId}_first_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_first_frame]`);
325
- ffmpegContext.filterComplex.push(`[${videoId}_first_null][${videoId}_first_frame]overlay=format=auto,fps=30[${videoId}_first]`);
338
+ ffmpegContext.filterComplex.push(`[${videoId}_first_null][${videoId}_first_frame]overlay=format=auto,fps=${VIDEO_FPS}[${videoId}_first]`);
326
339
  }
327
340
  if (needLast) {
328
341
  if (isMovie) {
329
342
  // Movie beats: extract actual last frame
330
- ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${lastDuration}:rate=30[${videoId}_last_null]`);
343
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${lastDuration}:rate=${VIDEO_FPS}[${videoId}_last_null]`);
331
344
  ffmpegContext.filterComplex.push(`[${videoId}_last_src]reverse,select='eq(n,0)',reverse,scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
332
- ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
345
+ ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=${VIDEO_FPS}[${videoId}_last]`);
333
346
  }
334
347
  else {
335
348
  // Image beats: all frames are identical, so just select one
336
- ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${lastDuration}:rate=30[${videoId}_last_null]`);
349
+ ffmpegContext.filterComplex.push(`nullsrc=size=${canvasInfo.width}x${canvasInfo.height}:duration=${lastDuration}:rate=${VIDEO_FPS}[${videoId}_last_null]`);
337
350
  ffmpegContext.filterComplex.push(`[${videoId}_last_src]select='eq(n,0)',scale=${canvasInfo.width}:${canvasInfo.height}[${videoId}_last_frame]`);
338
- ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=30[${videoId}_last]`);
351
+ ffmpegContext.filterComplex.push(`[${videoId}_last_null][${videoId}_last_frame]overlay=format=auto,fps=${VIDEO_FPS}[${videoId}_last]`);
339
352
  }
340
353
  }
341
354
  };
@@ -367,6 +380,7 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
367
380
  const needsFirstFrame = getNeedFirstFrame(context);
368
381
  // Check which beats need _last (for any transition on next beat - they all need previous beat's last frame)
369
382
  const needsLastFrame = getNeedLastFrame(context);
383
+ let cumulativeFrames = 0;
370
384
  context.studio.beats.reduce((timestamp, studioBeat, index) => {
371
385
  const beat = context.studio.script.beats[index];
372
386
  if (beat.image?.type === "voice_over") {
@@ -377,13 +391,19 @@ export const createVideo = async (audioArtifactFilePath, outputVideoPath, contex
377
391
  const sourceFile = isTest ? "/test/dummy.mp4" : validateBeatSource(studioBeat, index);
378
392
  // The movie duration is bigger in case of voice-over.
379
393
  const duration = Math.max(studioBeat.duration + getExtraPadding(context, index), studioBeat.movieDuration ?? 0);
394
+ // Use cumulative frame tracking to prevent audio-video drift from frame quantization.
395
+ // trim=duration=X rounds up to the next frame boundary (~0.03s per beat at 30fps),
396
+ // causing cumulative drift. Instead, compute exact frame counts per beat.
397
+ const targetEndFrame = Math.round((timestamp + duration) * VIDEO_FPS);
398
+ const frameCount = targetEndFrame - cumulativeFrames;
399
+ cumulativeFrames = targetEndFrame;
380
400
  const inputIndex = FfmpegContextAddInput(ffmpegContext, sourceFile);
381
401
  const isMovie = !!(studioBeat.lipSyncFile ||
382
402
  studioBeat.movieFile ||
383
403
  MulmoPresentationStyleMethods.getImageType(context.presentationStyle, beat) === "movie");
384
404
  const speed = beat.movieParams?.speed ?? 1.0;
385
405
  const filters = beat.movieParams?.filters;
386
- const { videoId, videoPart } = getVideoPart(inputIndex, isMovie, duration, canvasInfo, getFillOption(context, beat), speed, filters);
406
+ const { videoId, videoPart } = getVideoPart(inputIndex, isMovie, duration, canvasInfo, getFillOption(context, beat), speed, filters, frameCount);
387
407
  ffmpegContext.filterComplex.push(videoPart);
388
408
  // for transition
389
409
  const needFirst = needsFirstFrame[index]; // This beat has slidein
@@ -155,7 +155,7 @@ export declare const provider2LLMAgent: {
155
155
  readonly agentName: "geminiAgent";
156
156
  readonly defaultModel: "gemini-2.5-flash";
157
157
  readonly max_tokens: 8192;
158
- readonly models: readonly ["gemini-3-pro-preview", "gemini-3-flash-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite"];
158
+ readonly models: readonly ["gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite"];
159
159
  readonly keyName: "GEMINI_API_KEY";
160
160
  };
161
161
  readonly groq: {
@@ -289,7 +289,7 @@ export const provider2LLMAgent = {
289
289
  agentName: "geminiAgent",
290
290
  defaultModel: "gemini-2.5-flash",
291
291
  max_tokens: 8192,
292
- models: ["gemini-3-pro-preview", "gemini-3-flash-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite"],
292
+ models: ["gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite"],
293
293
  keyName: "GEMINI_API_KEY",
294
294
  },
295
295
  groq: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mulmocast",
3
- "version": "2.4.2",
3
+ "version": "2.4.4",
4
4
  "description": "",
5
5
  "type": "module",
6
6
  "main": "lib/index.node.js",