npm - vidpipe - Versions diffs - 1.3.3 → 1.3.4 - Mend

vidpipe 1.3.3 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -291,10 +291,12 @@ function initConfig(cli = {}) {
     SKIP_MEDIUM_CLIPS: cli.mediumClips === false,
     SKIP_SOCIAL: cli.social === false,
     SKIP_CAPTIONS: cli.captions === false,
+    SKIP_VISUAL_ENHANCEMENT: cli.visualEnhancement === false,
     LATE_API_KEY: cli.lateApiKey || process.env.LATE_API_KEY || "",
     LATE_PROFILE_ID: cli.lateProfileId || process.env.LATE_PROFILE_ID || "",
     SKIP_SOCIAL_PUBLISH: cli.socialPublish === false,
-    GEMINI_API_KEY: process.env.GEMINI_API_KEY || ""
+    GEMINI_API_KEY: process.env.GEMINI_API_KEY || "",
+    GEMINI_MODEL: process.env.GEMINI_MODEL || "gemini-2.5-pro"
   };
   return config;
 }
@@ -724,10 +726,12 @@ async function getVideoResolution(videoPath) {
 }
 async function extractSampleFrames(videoPath, tempDir) {
   const duration = await getVideoDuration(videoPath);
-  const interval = Math.max(1, Math.floor(duration / (SAMPLE_FRAMES + 1)));
+  const effectiveSamples = Math.min(SAMPLE_FRAMES, Math.max(1, Math.floor(duration) - 1));
+  const interval = Math.max(1, Math.floor(duration / (effectiveSamples + 1)));
   const timestamps = [];
-  for (let i = 1; i <= SAMPLE_FRAMES; i++) {
-    timestamps.push(i * interval);
+  for (let i = 1; i <= effectiveSamples; i++) {
+    const ts = i * interval;
+    if (ts < duration) timestamps.push(ts);
   }
   const framePaths = [];
   for (let i = 0; i < timestamps.length; i++) {
@@ -871,7 +875,7 @@ function findPeakDiff(means, searchFrom, searchTo, minDiff) {
   }
   return maxDiff >= minDiff ? { index: maxIdx, magnitude: maxDiff } : { index: -1, magnitude: maxDiff };
 }
-async function refineBoundingBox(framePaths, position) {
+async function refineBoundingBox(framePaths, position, minEdgeDiff = REFINE_MIN_EDGE_DIFF) {
   if (framePaths.length === 0) return null;
   const isRight = position.includes("right");
   const isBottom = position.includes("bottom");
@@ -893,10 +897,10 @@ async function refineBoundingBox(framePaths, position) {
   const avgRows = averageFloat64Arrays(rowMeansAll);
   const xFrom = isRight ? Math.floor(fw * 0.35) : Math.floor(fw * 0.05);
   const xTo = isRight ? Math.floor(fw * 0.95) : Math.floor(fw * 0.65);
-  const xEdge = findPeakDiff(avgCols, xFrom, xTo, REFINE_MIN_EDGE_DIFF);
+  const xEdge = findPeakDiff(avgCols, xFrom, xTo, minEdgeDiff);
   const yFrom = isBottom ? Math.floor(fh * 0.35) : Math.floor(fh * 0.05);
   const yTo = isBottom ? Math.floor(fh * 0.95) : Math.floor(fh * 0.65);
-  const yEdge = findPeakDiff(avgRows, yFrom, yTo, REFINE_MIN_EDGE_DIFF);
+  const yEdge = findPeakDiff(avgRows, yFrom, yTo, minEdgeDiff);
   if (xEdge.index < 0 || yEdge.index < 0) {
     logger_default.info(
       `[FaceDetection] Edge refinement: no strong edges (xDiff=${xEdge.magnitude.toFixed(1)}, yDiff=${yEdge.magnitude.toFixed(1)})`
@@ -986,25 +990,43 @@ async function detectWebcamRegion(videoPath) {
       y2: boxes.reduce((s, b) => s + b.y2, 0) / boxes.length,
       confidence: bestConfidence
     };
-    const refined = await refineBoundingBox(framePaths, bestPosition);
+    let refined = null;
+    refined = await refineBoundingBox(framePaths, bestPosition, REFINE_MIN_EDGE_DIFF);
+    if (!refined) {
+      for (const threshold of REFINE_RETRY_THRESHOLDS) {
+        logger_default.info(`[FaceDetection] Retrying edge refinement with threshold=${threshold}`);
+        refined = await refineBoundingBox(framePaths, bestPosition, threshold);
+        if (refined) break;
+      }
+    }
     const scaleX = resolution.width / MODEL_WIDTH;
     const scaleY = resolution.height / MODEL_HEIGHT;
-    let origX, origY, origW, origH;
+    let origX = 0, origY = 0, origW = 0, origH = 0;
     if (refined) {
       origX = Math.round(refined.x * scaleX);
       origY = Math.round(refined.y * scaleY);
       origW = Math.round(refined.width * scaleX);
       origH = Math.round(refined.height * scaleY);
-    } else {
-      const expandFactor = 1.4;
-      const faceCx = (avgBox.x1 + avgBox.x2) / 2;
-      const faceCy = (avgBox.y1 + avgBox.y2) / 2;
-      const faceW = (avgBox.x2 - avgBox.x1) * expandFactor;
-      const faceH = (avgBox.y2 - avgBox.y1) * expandFactor;
-      origX = Math.max(0, Math.round((faceCx - faceW / 2) * resolution.width));
-      origY = Math.max(0, Math.round((faceCy - faceH / 2) * resolution.height));
-      origW = Math.min(resolution.width - origX, Math.round(faceW * resolution.width));
-      origH = Math.min(resolution.height - origY, Math.round(faceH * resolution.height));
+      const refinedAR = origW / origH;
+      if (origW < MIN_WEBCAM_WIDTH_PX || origH < MIN_WEBCAM_HEIGHT_PX || refinedAR > MAX_WEBCAM_ASPECT_RATIO) {
+        logger_default.info(
+          `[FaceDetection] Refined region implausible (${origW}x${origH}px, AR=${refinedAR.toFixed(1)}), using proportional fallback`
+        );
+        refined = null;
+      }
+    }
+    if (!refined) {
+      const webcamWidthFrac = 0.33;
+      const webcamHeightFrac = 0.28;
+      origW = Math.round(resolution.width * webcamWidthFrac);
+      origH = Math.round(resolution.height * webcamHeightFrac);
+      const isRight = bestPosition.includes("right");
+      const isBottom = bestPosition.includes("bottom");
+      origX = isRight ? resolution.width - origW : 0;
+      origY = isBottom ? resolution.height - origH : 0;
+      logger_default.info(
+        `[FaceDetection] Using proportional fallback: (${origX},${origY}) ${origW}x${origH}`
+      );
     }
     const region = {
       x: origX,
@@ -1028,7 +1050,7 @@ async function detectWebcamRegion(videoPath) {
     });
   }
 }
-var ffmpegPath, ffprobePath, MODEL_PATH, cachedSession, SAMPLE_FRAMES, MODEL_WIDTH, MODEL_HEIGHT, MIN_FACE_CONFIDENCE, MIN_DETECTION_CONFIDENCE, REFINE_MIN_EDGE_DIFF, REFINE_MIN_SIZE_FRAC, REFINE_MAX_SIZE_FRAC;
+var ffmpegPath, ffprobePath, MODEL_PATH, cachedSession, SAMPLE_FRAMES, MODEL_WIDTH, MODEL_HEIGHT, MIN_FACE_CONFIDENCE, MIN_DETECTION_CONFIDENCE, REFINE_MIN_EDGE_DIFF, REFINE_RETRY_THRESHOLDS, REFINE_MIN_SIZE_FRAC, REFINE_MAX_SIZE_FRAC, MIN_WEBCAM_WIDTH_PX, MIN_WEBCAM_HEIGHT_PX, MAX_WEBCAM_ASPECT_RATIO;
 var init_faceDetection = __esm({
   "src/tools/ffmpeg/faceDetection.ts"() {
     "use strict";
@@ -1042,14 +1064,18 @@ var init_faceDetection = __esm({
     ffprobePath = getFFprobePath();
     MODEL_PATH = join(modelsDir(), "ultraface-320.onnx");
     cachedSession = null;
-    SAMPLE_FRAMES = 5;
+    SAMPLE_FRAMES = 15;
     MODEL_WIDTH = 320;
     MODEL_HEIGHT = 240;
     MIN_FACE_CONFIDENCE = 0.5;
     MIN_DETECTION_CONFIDENCE = 0.3;
     REFINE_MIN_EDGE_DIFF = 3;
+    REFINE_RETRY_THRESHOLDS = [2, 1];
     REFINE_MIN_SIZE_FRAC = 0.05;
     REFINE_MAX_SIZE_FRAC = 0.55;
+    MIN_WEBCAM_WIDTH_PX = 300;
+    MIN_WEBCAM_HEIGHT_PX = 200;
+    MAX_WEBCAM_ASPECT_RATIO = 3;
   }
 });
@@ -1462,16 +1488,31 @@ async function transcribeAudio(audioPath) {
   const openai = new default4({ apiKey: config2.OPENAI_API_KEY });
   try {
     const prompt = getWhisperPrompt();
-    const response = await openai.audio.transcriptions.create({
-      model: "whisper-1",
-      file: openReadStream(audioPath),
-      response_format: "verbose_json",
-      timestamp_granularities: ["word", "segment"],
-      ...prompt && { prompt }
-    });
+    let response;
+    for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
+      try {
+        response = await openai.audio.transcriptions.create({
+          model: "whisper-1",
+          file: openReadStream(audioPath),
+          response_format: "verbose_json",
+          timestamp_granularities: ["word", "segment"],
+          ...prompt && { prompt }
+        });
+        break;
+      } catch (retryError) {
+        const status = typeof retryError === "object" && retryError !== null && "status" in retryError ? retryError.status : void 0;
+        if (status === 401 || status === 400 || status === 429) throw retryError;
+        if (attempt === MAX_RETRIES) throw retryError;
+        const msg = retryError instanceof Error ? retryError.message : String(retryError);
+        logger_default.warn(`Whisper attempt ${attempt}/${MAX_RETRIES} failed: ${msg} \u2014 retrying in ${RETRY_DELAY_MS / 1e3}s`);
+        await new Promise((resolve3) => setTimeout(resolve3, RETRY_DELAY_MS));
+      }
+    }
+    if (!response) throw new Error("Whisper transcription failed after all retries");
     const verboseResponse = response;
     const rawSegments = verboseResponse.segments ?? [];
     const rawWords = verboseResponse.words ?? [];
+    const typedResponse = response;
     const words = rawWords.map((w) => ({
       word: w.word,
       start: w.start,
@@ -1485,20 +1526,20 @@ async function transcribeAudio(audioPath) {
       words: rawWords.filter((w) => w.start >= s.start && w.end <= s.end).map((w) => ({ word: w.word, start: w.start, end: w.end }))
     }));
     logger_default.info(
-      `Transcription complete \u2014 ${segments.length} segments, ${words.length} words, language=${response.language}`
+      `Transcription complete \u2014 ${segments.length} segments, ${words.length} words, language=${typedResponse.language}`
     );
-    const durationMinutes = (response.duration ?? 0) / 60;
+    const durationMinutes = (typedResponse.duration ?? 0) / 60;
     costTracker.recordServiceUsage("whisper", durationMinutes * WHISPER_COST_PER_MINUTE, {
       model: "whisper-1",
-      durationSeconds: response.duration ?? 0,
+      durationSeconds: typedResponse.duration ?? 0,
       audioFile: audioPath
     });
     return {
-      text: response.text,
+      text: typedResponse.text,
       segments,
       words,
-      language: response.language ?? "unknown",
-      duration: response.duration ?? 0
+      language: typedResponse.language ?? "unknown",
+      duration: typedResponse.duration ?? 0
     };
   } catch (error) {
     const message = error instanceof Error ? error.message : String(error);
@@ -1513,7 +1554,7 @@ async function transcribeAudio(audioPath) {
     throw new Error(`Whisper transcription failed: ${message}`);
   }
 }
-var MAX_FILE_SIZE_MB, WHISPER_COST_PER_MINUTE, WARN_FILE_SIZE_MB;
+var MAX_FILE_SIZE_MB, WHISPER_COST_PER_MINUTE, WARN_FILE_SIZE_MB, MAX_RETRIES, RETRY_DELAY_MS;
 var init_whisperClient = __esm({
   "src/tools/whisper/whisperClient.ts"() {
     "use strict";
@@ -1526,6 +1567,8 @@ var init_whisperClient = __esm({
     MAX_FILE_SIZE_MB = 25;
     WHISPER_COST_PER_MINUTE = 6e-3;
     WARN_FILE_SIZE_MB = 20;
+    MAX_RETRIES = 3;
+    RETRY_DELAY_MS = 5e3;
   }
 });
@@ -2989,6 +3032,8 @@ async function extractCompositeClipWithTransitions(videoPath, segments, outputPa
     "[aout]",
     "-c:v",
     "libx264",
+    "-pix_fmt",
+    "yuv420p",
     "-preset",
     "ultrafast",
     "-crf",
@@ -3087,23 +3132,24 @@ async function convertAspectRatio(inputPath, outputPath, targetRatio, options =
     });
   });
 }
-async function convertWithSmartLayout(inputPath, outputPath, config2) {
+async function convertWithSmartLayout(inputPath, outputPath, config2, webcamOverride) {
   const { label, targetW, screenH, camH, fallbackRatio } = config2;
   const outputDir = dirname(outputPath);
   await ensureDirectory(outputDir);
-  const webcam = await detectWebcamRegion(inputPath);
+  const webcam = webcamOverride !== void 0 ? webcamOverride : await detectWebcamRegion(inputPath);
   if (!webcam) {
     logger_default.info(`[${label}] No webcam found, falling back to center-crop`);
     return convertAspectRatio(inputPath, outputPath, fallbackRatio);
   }
   const resolution = await getVideoResolution(inputPath);
+  const margin = Math.round(resolution.width * 0.02);
   let screenCropX;
   let screenCropW;
   if (webcam.position === "top-right" || webcam.position === "bottom-right") {
     screenCropX = 0;
-    screenCropW = webcam.x;
+    screenCropW = Math.max(0, webcam.x - margin);
   } else {
-    screenCropX = webcam.x + webcam.width;
+    screenCropX = webcam.x + webcam.width + margin;
     screenCropW = Math.max(0, resolution.width - screenCropX);
   }
   const targetAR = targetW / camH;
@@ -3162,32 +3208,32 @@ async function convertWithSmartLayout(inputPath, outputPath, config2) {
     });
   });
 }
-async function convertToPortraitSmart(inputPath, outputPath) {
+async function convertToPortraitSmart(inputPath, outputPath, webcamOverride) {
   return convertWithSmartLayout(inputPath, outputPath, {
     label: "SmartPortrait",
     targetW: 1080,
     screenH: 1248,
     camH: 672,
     fallbackRatio: "9:16"
-  });
+  }, webcamOverride);
 }
-async function convertToSquareSmart(inputPath, outputPath) {
+async function convertToSquareSmart(inputPath, outputPath, webcamOverride) {
   return convertWithSmartLayout(inputPath, outputPath, {
     label: "SmartSquare",
     targetW: 1080,
     screenH: 700,
     camH: 380,
     fallbackRatio: "1:1"
-  });
+  }, webcamOverride);
 }
-async function convertToFeedSmart(inputPath, outputPath) {
+async function convertToFeedSmart(inputPath, outputPath, webcamOverride) {
   return convertWithSmartLayout(inputPath, outputPath, {
     label: "SmartFeed",
     targetW: 1080,
     screenH: 878,
     camH: 472,
     fallbackRatio: "4:5"
-  });
+  }, webcamOverride);
 }
 async function generatePlatformVariants(inputPath, outputDir, slug, platforms = ["tiktok", "linkedin"], options = {}) {
   await ensureDirectory(outputDir);
@@ -3208,11 +3254,11 @@ async function generatePlatformVariants(inputPath, outputDir, slug, platforms =
         if (options.useAgent) {
           logger_default.warn(`[generatePlatformVariants] LayoutAgent is disabled, falling back to ONNX pipeline`);
         }
-        await convertToPortraitSmart(inputPath, outPath);
+        await convertToPortraitSmart(inputPath, outPath, options.webcamOverride);
       } else if (ratio === "1:1") {
-        await convertToSquareSmart(inputPath, outPath);
+        await convertToSquareSmart(inputPath, outPath, options.webcamOverride);
       } else if (ratio === "4:5") {
-        await convertToFeedSmart(inputPath, outPath);
+        await convertToFeedSmart(inputPath, outPath, options.webcamOverride);
       } else {
         await convertAspectRatio(inputPath, outPath, ratio);
       }
@@ -3276,7 +3322,7 @@ var ShortsAgent_exports = {};
 __export(ShortsAgent_exports, {
   generateShorts: () => generateShorts
 });
-async function generateShorts(video, transcript, model, clipDirection) {
+async function generateShorts(video, transcript, model, clipDirection, webcamOverride) {
   const agent = new ShortsAgent(model);
   const transcriptLines = transcript.segments.map((seg) => {
     const words = seg.words.map((w) => `[${w.start.toFixed(2)}-${w.end.toFixed(2)}] ${w.word}`).join(" ");
@@ -3287,7 +3333,8 @@ Words: ${words}`;
     `Analyze the following transcript (${transcript.duration.toFixed(0)}s total) and plan shorts.
 `,
     `Video: ${video.filename}`,
-    `Duration: ${transcript.duration.toFixed(1)}s
+    `Duration: ${transcript.duration.toFixed(1)}s`,
+    `Target: ~${Math.max(3, Math.round(transcript.duration / 150))}\u2013${Math.max(5, Math.round(transcript.duration / 120))} shorts (scale by content richness)
 `,
     "--- TRANSCRIPT ---\n",
     transcriptLines.join("\n\n"),
@@ -3329,7 +3376,7 @@ Words: ${words}`;
       let variants;
       try {
         const defaultPlatforms = ["tiktok", "youtube-shorts", "instagram-reels", "instagram-feed", "linkedin"];
-        const results = await generatePlatformVariants(outputPath, shortsDir, shortSlug, defaultPlatforms);
+        const results = await generatePlatformVariants(outputPath, shortsDir, shortSlug, defaultPlatforms, { webcamOverride });
         if (results.length > 0) {
           variants = results.map((v) => ({
             path: v.path,
@@ -3428,7 +3475,7 @@ Words: ${words}`;
     await agent.destroy();
   }
 }
-var SYSTEM_PROMPT2, PLAN_SHORTS_SCHEMA, ShortsAgent;
+var SYSTEM_PROMPT2, ADD_SHORTS_SCHEMA, ShortsAgent;
 var init_ShortsAgent = __esm({
   "src/agents/ShortsAgent.ts"() {
     "use strict";
@@ -3442,7 +3489,23 @@ var init_ShortsAgent = __esm({
     init_fileSystem();
     init_paths();
     init_logger2();
-    SYSTEM_PROMPT2 = `You are a short-form video content strategist. Your job is to analyze a video transcript with word-level timestamps and identify the most compelling moments to extract as shorts (15\u201360 seconds each).
+    SYSTEM_PROMPT2 = `You are a short-form video content strategist. Your job is to **exhaustively** analyze a video transcript with word-level timestamps and extract every compelling moment as a short (15\u201360 seconds each).
+## Your workflow
+1. Read the transcript and note the total duration.
+2. Work through the transcript **section by section** (roughly 3\u20135 minute chunks). For each chunk, identify every possible short.
+3. Call **add_shorts** for each batch of shorts you find. You can call it as many times as needed.
+4. After your first pass, call **review_shorts** to see everything you've planned so far.
+5. Review for gaps: are there sections of the transcript with no shorts? Could any moments be combined into composites? Did you miss any humor, insights, or quotable moments?
+6. Add any additional shorts you find.
+7. When you are confident you've exhausted all opportunities, call **finalize_shorts**.
+## Target quantity
+Scale your output by video duration:
+- **~1 short per 2\u20133 minutes** of video content.
+- A 10-minute video \u2192 4\u20136 shorts. A 30-minute video \u2192 12\u201318 shorts. A 60-minute video \u2192 20\u201330 shorts.
+- These are guidelines, not hard caps \u2014 if the content is rich, find more. If it's sparse, find fewer.
+- **Never stop at 3\u20138 shorts for a long video.** Your job is to be thorough.
 ## What to look for
 - **Key insights** \u2014 concise, quotable takeaways
@@ -3450,34 +3513,34 @@ var init_ShortsAgent = __esm({
 - **Controversial takes** \u2014 bold opinions that spark discussion
 - **Educational nuggets** \u2014 clear explanations of complex topics
 - **Emotional peaks** \u2014 passion, vulnerability, excitement
-- **Topic compilations** \u2014 multiple brief mentions of one theme that can be stitched together
+- **Audience hooks** \u2014 moments that would make someone stop scrolling
+- **Before/after reveals** \u2014 showing a transformation or result
+- **Mistakes & corrections** \u2014 relatable "oops" moments that humanize the speaker
 ## Short types
 - **Single segment** \u2014 one contiguous section of the video
-- **Composite** \u2014 multiple non-contiguous segments combined into one short (great for topic compilations or building a narrative arc)
+- **Composite** \u2014 multiple non-contiguous segments combined into one short (great for topic compilations, building narrative arcs, or "every time X happens" montages). **Actively look for composite opportunities** \u2014 they often make the best shorts.
 ## Rules
 1. Each short must be 15\u201360 seconds total duration.
 2. Timestamps must align to word boundaries from the transcript.
 3. Prefer natural sentence boundaries for clean cuts.
-4. Aim for 3\u20138 shorts per video, depending on length and richness.
-5. Every short needs a catchy, descriptive title (5\u201310 words).
-6. Tags should be lowercase, no hashes, 3\u20136 per short.
-7. A 1-second buffer is automatically added before and after each segment boundary during extraction, so plan segments based on content timestamps without worrying about clipping words at the edges.
-When you have identified the shorts, call the **plan_shorts** tool with your complete plan.
+4. Every short needs a catchy, descriptive title (5\u201310 words).
+5. Tags should be lowercase, no hashes, 3\u20136 per short.
+6. A 1-second buffer is automatically added before and after each segment boundary during extraction, so plan segments based on content timestamps without worrying about clipping words at the edges.
+7. Avoid significant timestamp overlap between shorts \u2014 each short should bring unique content. Small overlaps (a few seconds of shared context) are OK.
 ## Using Clip Direction
 You may receive AI-generated clip direction with suggested shorts. Use these as a starting point but make your own decisions:
 - The suggestions are based on visual + audio analysis and may identify moments you'd miss from transcript alone
 - Feel free to adjust timestamps, combine suggestions, or ignore ones that don't work
 - You may also find good shorts NOT in the suggestions \u2014 always analyze the full transcript`;
-    PLAN_SHORTS_SCHEMA = {
+    ADD_SHORTS_SCHEMA = {
       type: "object",
       properties: {
         shorts: {
           type: "array",
-          description: "Array of planned short clips",
+          description: "Array of short clips to add to the plan",
           items: {
             type: "object",
             properties: {
@@ -3510,32 +3573,77 @@ You may receive AI-generated clip direction with suggested shorts. Use these as
     };
     ShortsAgent = class extends BaseAgent {
       plannedShorts = [];
+      isFinalized = false;
       constructor(model) {
         super("ShortsAgent", SYSTEM_PROMPT2, void 0, model);
       }
       getTools() {
         return [
           {
-            name: "plan_shorts",
-            description: "Submit the planned shorts as a structured JSON array. Call this once with all planned shorts.",
-            parameters: PLAN_SHORTS_SCHEMA,
+            name: "add_shorts",
+            description: "Add one or more shorts to your plan. You can call this multiple times to build your list incrementally as you analyze each section of the transcript.",
+            parameters: ADD_SHORTS_SCHEMA,
             handler: async (args) => {
-              return this.handleToolCall("plan_shorts", args);
+              return this.handleToolCall("add_shorts", args);
+            }
+          },
+          {
+            name: "review_shorts",
+            description: "Review all shorts planned so far. Returns a summary of every short in your current plan. Use this to check for gaps, overlaps, or missed opportunities before finalizing.",
+            parameters: { type: "object", properties: {} },
+            handler: async () => {
+              return this.handleToolCall("review_shorts", {});
+            }
+          },
+          {
+            name: "finalize_shorts",
+            description: "Finalize your short clip plan and trigger extraction. Call this ONCE after you have added all shorts and reviewed them for completeness.",
+            parameters: { type: "object", properties: {} },
+            handler: async () => {
+              return this.handleToolCall("finalize_shorts", {});
             }
           }
         ];
       }
       async handleToolCall(toolName, args) {
-        if (toolName === "plan_shorts") {
-          this.plannedShorts = args.shorts;
-          logger_default.info(`[ShortsAgent] Planned ${this.plannedShorts.length} shorts`);
-          return { success: true, count: this.plannedShorts.length };
+        switch (toolName) {
+          case "add_shorts": {
+            const newShorts = args.shorts;
+            this.plannedShorts.push(...newShorts);
+            logger_default.info(`[ShortsAgent] Added ${newShorts.length} shorts (total: ${this.plannedShorts.length})`);
+            return `Added ${newShorts.length} shorts. Total planned: ${this.plannedShorts.length}. Call add_shorts for more, review_shorts to check your plan, or finalize_shorts when done.`;
+          }
+          case "review_shorts": {
+            if (this.plannedShorts.length === 0) {
+              return "No shorts planned yet. Analyze the transcript and call add_shorts to start planning.";
+            }
+            const summary = this.plannedShorts.map((s, i) => {
+              const totalDur = s.segments.reduce((sum, seg) => sum + (seg.end - seg.start), 0);
+              const timeRanges = s.segments.map((seg) => `${seg.start.toFixed(1)}s\u2013${seg.end.toFixed(1)}s`).join(", ");
+              const type = s.segments.length > 1 ? "composite" : "single";
+              return `${i + 1}. "${s.title}" (${totalDur.toFixed(1)}s, ${type}) [${timeRanges}] \u2014 ${s.description}`;
+            }).join("\n");
+            return `## Planned shorts (${this.plannedShorts.length} total)
+${summary}
+Look for gaps in transcript coverage, missed composite opportunities, and any additional compelling moments.`;
+          }
+          case "finalize_shorts": {
+            this.isFinalized = true;
+            logger_default.info(`[ShortsAgent] Finalized ${this.plannedShorts.length} shorts`);
+            return `Finalized ${this.plannedShorts.length} shorts. Extraction will begin.`;
+          }
+          default:
+            throw new Error(`Unknown tool: ${toolName}`);
         }
-        throw new Error(`Unknown tool: ${toolName}`);
       }
       getPlannedShorts() {
         return this.plannedShorts;
       }
+      getIsFinalized() {
+        return this.isFinalized;
+      }
     };
   }
 });
@@ -3556,7 +3664,8 @@ Words: ${words}`;
     `Analyze the following transcript (${transcript.duration.toFixed(0)}s total) and plan medium-length clips (1\u20133 minutes each).
 `,
     `Video: ${video.filename}`,
-    `Duration: ${transcript.duration.toFixed(1)}s
+    `Duration: ${transcript.duration.toFixed(1)}s`,
+    `Target: ~${Math.max(1, Math.round(transcript.duration / 480))}\u2013${Math.max(2, Math.round(transcript.duration / 300))} medium clips (scale by content richness)
 `,
     "--- TRANSCRIPT ---\n",
     transcriptLines.join("\n\n"),
@@ -3649,7 +3758,7 @@ Words: ${words}`;
     await agent.destroy();
   }
 }
-var SYSTEM_PROMPT3, PLAN_MEDIUM_CLIPS_SCHEMA, MediumVideoAgent;
+var SYSTEM_PROMPT3, ADD_MEDIUM_CLIPS_SCHEMA, MediumVideoAgent;
 var init_MediumVideoAgent = __esm({
   "src/agents/MediumVideoAgent.ts"() {
     "use strict";
@@ -3662,7 +3771,23 @@ var init_MediumVideoAgent = __esm({
     init_fileSystem();
     init_paths();
     init_logger2();
-    SYSTEM_PROMPT3 = `You are a medium-form video content strategist. Your job is to analyze a video transcript with word-level timestamps and identify the best 1\u20133 minute segments to extract as standalone medium-form clips.
+    SYSTEM_PROMPT3 = `You are a medium-form video content strategist. Your job is to **exhaustively** analyze a video transcript with word-level timestamps and extract every viable 1\u20133 minute segment as a standalone medium-form clip.
+## Your workflow
+1. Read the transcript and note the total duration.
+2. Work through the transcript **section by section** (roughly 5\u20138 minute chunks). For each chunk, identify every complete topic or narrative arc.
+3. Call **add_medium_clips** for each batch of clips you find. You can call it as many times as needed.
+4. After your first pass, call **review_medium_clips** to see everything you've planned so far.
+5. Review for gaps: are there complete topics you missed? Could non-contiguous mentions of the same theme be compiled? Is there a tutorial segment that stands alone?
+6. Add any additional clips you find.
+7. When you are confident you've exhausted all opportunities, call **finalize_medium_clips**.
+## Target quantity
+Scale your output by video duration:
+- **~1 medium clip per 5\u20138 minutes** of video content.
+- A 10-minute video \u2192 1\u20132 clips. A 30-minute video \u2192 4\u20136 clips. A 60-minute video \u2192 8\u201312 clips.
+- These are guidelines, not hard caps \u2014 if the content is rich, find more.
+- **Never stop at 2\u20134 clips for a long video.** Your job is to be thorough.
 ## What to look for
@@ -3671,7 +3796,7 @@ var init_MediumVideoAgent = __esm({
 - **Educational deep dives** \u2014 clear, thorough explanations of complex topics
 - **Compelling stories** \u2014 anecdotes with setup, tension, and resolution
 - **Strong arguments** \u2014 claim \u2192 evidence \u2192 implication sequences
-- **Topic compilations** \u2014 multiple brief mentions of one theme across the video that can be compiled into a cohesive 1\u20133 minute segment
+- **Topic compilations** \u2014 multiple brief mentions of one theme across the video that can be compiled into a cohesive 1\u20133 minute segment. **Actively look for these** \u2014 they often make excellent content.
 ## Clip types
@@ -3684,12 +3809,12 @@ var init_MediumVideoAgent = __esm({
 2. Timestamps must align to word boundaries from the transcript.
 3. Prefer natural sentence and paragraph boundaries for clean entry/exit points.
 4. Each clip must be self-contained \u2014 a viewer with no other context should understand and get value from the clip.
-5. Aim for 2\u20134 medium clips per video, depending on length and richness.
-6. Every clip needs a descriptive title (5\u201312 words) and a topic label.
-7. For compilations, specify segments in the order they should appear in the final clip (which may differ from chronological order).
-8. Tags should be lowercase, no hashes, 3\u20136 per clip.
-9. A 1-second buffer is automatically added around each segment boundary.
-10. Each clip needs a hook \u2014 the opening line or concept that draws viewers in.
+5. Every clip needs a descriptive title (5\u201312 words) and a topic label.
+6. For compilations, specify segments in the order they should appear in the final clip (which may differ from chronological order).
+7. Tags should be lowercase, no hashes, 3\u20136 per clip.
+8. A 1-second buffer is automatically added around each segment boundary.
+9. Each clip needs a hook \u2014 the opening line or concept that draws viewers in.
+10. Avoid significant overlap with content that would work better as a short (punchy, viral, single-moment).
 ## Differences from shorts
@@ -3697,9 +3822,6 @@ var init_MediumVideoAgent = __esm({
 - Don't just find the most exciting 60 seconds \u2014 find where a topic starts and where it naturally concludes.
 - It's OK if a medium clip has slower pacing \u2014 depth and coherence matter more than constant high energy.
 - Look for segments that work as standalone mini-tutorials or explanations.
-- Avoid overlap with content that would work better as a short (punchy, viral, single-moment).
-When you have identified the clips, call the **plan_medium_clips** tool with your complete plan.
 ## Using Clip Direction
 You may receive AI-generated clip direction with suggested medium clips. Use these as a starting point but make your own decisions:
@@ -3707,12 +3829,12 @@ You may receive AI-generated clip direction with suggested medium clips. Use the
 - Feel free to adjust timestamps, combine suggestions, or ignore ones that don't work
 - You may also find good clips NOT in the suggestions \u2014 always analyze the full transcript
 - Pay special attention to suggested hooks and topic arcs \u2014 they come from multimodal analysis`;
-    PLAN_MEDIUM_CLIPS_SCHEMA = {
+    ADD_MEDIUM_CLIPS_SCHEMA = {
       type: "object",
       properties: {
         clips: {
           type: "array",
-          description: "Array of planned medium-length clips",
+          description: "Array of medium-length clips to add to the plan",
           items: {
             type: "object",
             properties: {
@@ -3748,32 +3870,79 @@ You may receive AI-generated clip direction with suggested medium clips. Use the
     };
     MediumVideoAgent = class extends BaseAgent {
       plannedClips = [];
+      isFinalized = false;
       constructor(model) {
         super("MediumVideoAgent", SYSTEM_PROMPT3, void 0, model);
       }
       getTools() {
         return [
           {
-            name: "plan_medium_clips",
-            description: "Submit the planned medium-length clips as a structured JSON array. Call this once with all planned clips.",
-            parameters: PLAN_MEDIUM_CLIPS_SCHEMA,
+            name: "add_medium_clips",
+            description: "Add one or more medium clips to your plan. You can call this multiple times to build your list incrementally as you analyze each section of the transcript.",
+            parameters: ADD_MEDIUM_CLIPS_SCHEMA,
             handler: async (args) => {
-              return this.handleToolCall("plan_medium_clips", args);
+              return this.handleToolCall("add_medium_clips", args);
+            }
+          },
+          {
+            name: "review_medium_clips",
+            description: "Review all medium clips planned so far. Returns a summary of every clip in your current plan. Use this to check for gaps, overlaps, or missed opportunities before finalizing.",
+            parameters: { type: "object", properties: {} },
+            handler: async () => {
+              return this.handleToolCall("review_medium_clips", {});
+            }
+          },
+          {
+            name: "finalize_medium_clips",
+            description: "Finalize your medium clip plan and trigger extraction. Call this ONCE after you have added all clips and reviewed them for completeness.",
+            parameters: { type: "object", properties: {} },
+            handler: async () => {
+              return this.handleToolCall("finalize_medium_clips", {});
             }
           }
         ];
       }
       async handleToolCall(toolName, args) {
-        if (toolName === "plan_medium_clips") {
-          this.plannedClips = args.clips;
-          logger_default.info(`[MediumVideoAgent] Planned ${this.plannedClips.length} medium clips`);
-          return { success: true, count: this.plannedClips.length };
+        switch (toolName) {
+          case "add_medium_clips": {
+            const newClips = args.clips;
+            this.plannedClips.push(...newClips);
+            logger_default.info(`[MediumVideoAgent] Added ${newClips.length} clips (total: ${this.plannedClips.length})`);
+            return `Added ${newClips.length} clips. Total planned: ${this.plannedClips.length}. Call add_medium_clips for more, review_medium_clips to check your plan, or finalize_medium_clips when done.`;
+          }
+          case "review_medium_clips": {
+            if (this.plannedClips.length === 0) {
+              return "No medium clips planned yet. Analyze the transcript and call add_medium_clips to start planning.";
+            }
+            const summary = this.plannedClips.map((c, i) => {
+              const totalDur = c.segments.reduce((sum, seg) => sum + (seg.end - seg.start), 0);
+              const timeRanges = c.segments.map((seg) => `${seg.start.toFixed(1)}s\u2013${seg.end.toFixed(1)}s`).join(", ");
+              const type = c.segments.length > 1 ? "compilation" : "deep dive";
+              return `${i + 1}. "${c.title}" (${totalDur.toFixed(1)}s, ${type}) [${timeRanges}]
+   Topic: ${c.topic} | Hook: ${c.hook}
+   ${c.description}`;
+            }).join("\n");
+            return `## Planned medium clips (${this.plannedClips.length} total)
+${summary}
+Look for gaps in transcript coverage, missed compilation opportunities, and complete topic arcs you may have overlooked.`;
+          }
+          case "finalize_medium_clips": {
+            this.isFinalized = true;
+            logger_default.info(`[MediumVideoAgent] Finalized ${this.plannedClips.length} medium clips`);
+            return `Finalized ${this.plannedClips.length} medium clips. Extraction will begin.`;
+          }
+          default:
+            throw new Error(`Unknown tool: ${toolName}`);
         }
-        throw new Error(`Unknown tool: ${toolName}`);
       }
       getPlannedClips() {
         return this.plannedClips;
       }
+      getIsFinalized() {
+        return this.isFinalized;
+      }
     };
   }
 });
@@ -4307,57 +4476,70 @@ var ProducerAgent_exports = {};
 __export(ProducerAgent_exports, {
   ProducerAgent: () => ProducerAgent
 });
-var SYSTEM_PROMPT4, PLAN_CUTS_SCHEMA, ProducerAgent;
+function mergeRemovals(removals) {
+  if (removals.length <= 1) return removals;
+  const sorted = [...removals].sort((a, b) => a.start - b.start);
+  const merged = [{ ...sorted[0] }];
+  for (let i = 1; i < sorted.length; i++) {
+    const prev = merged[merged.length - 1];
+    const curr = sorted[i];
+    if (curr.start <= prev.end + 2) {
+      prev.end = Math.max(prev.end, curr.end);
+      prev.reason = `${prev.reason}; ${curr.reason}`;
+    } else {
+      merged.push({ ...curr });
+    }
+  }
+  return merged;
+}
+var SYSTEM_PROMPT4, ADD_CUTS_SCHEMA, ProducerAgent;
 var init_ProducerAgent = __esm({
   "src/agents/ProducerAgent.ts"() {
     "use strict";
     init_BaseAgent();
     init_singlePassEdit();
     init_logger2();
-    SYSTEM_PROMPT4 = `You are a professional video cleaner. Your job is to analyze videos and identify regions that should be removed for a tighter, cleaner edit.
-## CONTEXT TOOLS (use these first to understand the video)
-- **get_video_info**: Get video dimensions, duration, and frame rate
-- **get_transcript**: Read what's being said (with optional time range filtering)
-- **get_editorial_direction**: Get AI-generated editorial guidance (cut points, pacing notes) from Gemini video analysis. Use this to inform your cleaning decisions.
-## WHAT TO REMOVE
-- **Dead air**: Long silences with no meaningful content
-- **Filler words**: Excessive "um", "uh", "like", "you know" clusters
-- **Bad takes**: False starts, stumbles, repeated sentences where the speaker restarts
-- **Long pauses**: Extended gaps between sentences (>3 seconds) that don't serve a purpose
-- **Redundant content**: Sections where the same point is repeated without adding value
-## WHAT TO PRESERVE
-- **Intentional pauses**: Dramatic pauses, thinking pauses before important points
-- **Demonstrations**: Silence during live coding, UI interaction, or waiting for results
-- **Meaningful silence**: Pauses that give the viewer time to absorb information
-- **All substantive content**: When in doubt, keep it
-## WORKFLOW
-1. Call get_video_info to know the video duration
-2. Call get_editorial_direction to get AI-powered editorial guidance (cut points, pacing issues)
-3. Call get_transcript (in sections if long) to understand what's being said and find removable regions
-4. When ready, call **plan_cuts** with your list of regions to remove
-## GUIDELINES
-- Be conservative: aim for 10-20% removal at most
-- Each removal should have a clear reason
-- Don't remove short pauses (<1 second) \u2014 they sound natural
-- Focus on making the video tighter, not shorter for its own sake
-- Use editorial direction from Gemini to identify problematic regions`;
-    PLAN_CUTS_SCHEMA = {
+    SYSTEM_PROMPT4 = `You are a professional video editor preparing raw footage for visual enhancement. Your goal is to produce a clean, tight edit that's ready for graphics overlays, captions, and social media distribution.
+## INFORMATION HIERARCHY
+You have three sources of information:
+1. **Editorial direction** (from Gemini video AI) \u2014 provides editorial judgment: what to cut, pacing issues, hook advice. It watched the actual video and can see visual cues the transcript cannot.
+2. **Transcript** \u2014 the ground truth for **what was said and when**. Timestamps in the transcript are accurate. Use it to verify that editorial direction timestamps actually match the spoken content.
+3. **Your own judgment** \u2014 use this to resolve conflicts and make final decisions.
+## CONFLICT RESOLUTION
+- **Timestamps**: The transcript's timestamps are authoritative. Gemini's timestamps can drift. Always cross-reference the editorial direction's timestamps against the transcript before cutting. If Gemini says "cut 85-108 because it's dead air" but the transcript shows substantive speech at 92-105, trust the transcript.
+- **Pacing vs Cleaning**: If the Pacing Analysis recommends removing an entire range but Cleaning Recommendations only flags pieces, favor pacing \u2014 it reflects the broader viewing experience.
+- **Hook & Retention**: If this section recommends starting at a later point, that overrides granular cleaning cuts in the opening.
+- **Valuable content**: Never cut substantive content that the viewer needs to understand the video's message. Filler and dead air around valuable content should be trimmed, but the content itself must be preserved.
+## WHAT YOU'RE OPTIMIZING FOR
+The video you produce will be further processed by a graphics agent that adds AI-generated image overlays, then captioned, then cut into shorts and medium clips. Your edit needs to:
+- Start with the strongest content \u2014 no dead air, no "I'm going to make a quick video" preambles
+- Flow naturally so captions and overlays land on clean, well-paced segments
+- Remove anything that isn't for the viewer (meta-commentary, editor instructions, false starts)
+## TOOLS
+- **get_video_info** \u2014 video duration, dimensions, frame rate
+- **get_editorial_direction** \u2014 Gemini's full editorial report (cut points, pacing, hook advice, cleaning recommendations)
+- **get_transcript** \u2014 timestamped transcript (supports start/end filtering)
+- **add_cuts** \u2014 queue regions for removal (call as many times as needed, use decimal-second precision)
+- **finalize_cuts** \u2014 merge adjacent cuts and trigger the render (call once at the end)`;
+    ADD_CUTS_SCHEMA = {
       type: "object",
       properties: {
         removals: {
           type: "array",
-          description: "Array of regions to remove from the video",
+          description: "One or more regions to remove from the video",
           items: {
             type: "object",
             properties: {
-              start: { type: "number", description: "Start time in seconds" },
-              end: { type: "number", description: "End time in seconds" },
+              start: { type: "number", description: "Start time in seconds (decimal precision, e.g. 14.3)" },
+              end: { type: "number", description: "End time in seconds (decimal precision, e.g. 37.0)" },
               reason: { type: "string", description: "Why this region should be removed" }
             },
             required: ["start", "end", "reason"]
@@ -4370,6 +4552,8 @@ var init_ProducerAgent = __esm({
       video;
       videoDuration = 0;
       removals = [];
+      renderPromise = null;
+      outputPath = "";
       constructor(video, model) {
         super("ProducerAgent", SYSTEM_PROMPT4, void 0, model);
         this.video = video;
@@ -4401,10 +4585,16 @@ var init_ProducerAgent = __esm({
             handler: async () => this.handleToolCall("get_editorial_direction", {})
           },
           {
-            name: "plan_cuts",
-            description: "Submit your list of regions to remove from the video. Call this ONCE with ALL planned removals.",
-            parameters: PLAN_CUTS_SCHEMA,
-            handler: async (rawArgs) => this.handleToolCall("plan_cuts", rawArgs)
+            name: "add_cuts",
+            description: "Add one or more regions to remove from the video. You can call this multiple times to build your edit list incrementally as you analyze each section.",
+            parameters: ADD_CUTS_SCHEMA,
+            handler: async (rawArgs) => this.handleToolCall("add_cuts", rawArgs)
+          },
+          {
+            name: "finalize_cuts",
+            description: "Finalize your edit list and trigger video rendering. Call this ONCE after you have added all cuts with add_cuts. Adjacent/overlapping cuts will be merged automatically.",
+            parameters: { type: "object", properties: {} },
+            handler: async () => this.handleToolCall("finalize_cuts", {})
           }
         ];
       }
@@ -4456,11 +4646,33 @@ var init_ProducerAgent = __esm({
               editorialDirection: direction
             };
           }
-          case "plan_cuts": {
+          case "add_cuts": {
             const { removals } = args;
-            logger_default.info(`[ProducerAgent] Received plan with ${removals.length} removals`);
-            this.removals = removals;
-            return `Plan received with ${removals.length} removals. Video will be rendered automatically.`;
+            this.removals.push(...removals);
+            logger_default.info(`[ProducerAgent] Added ${removals.length} cuts (total: ${this.removals.length})`);
+            return `Added ${removals.length} cuts. Total queued: ${this.removals.length}. Call add_cuts again for more, or finalize_cuts when done.`;
+          }
+          case "finalize_cuts": {
+            this.removals = mergeRemovals(this.removals);
+            logger_default.info(`[ProducerAgent] Finalized ${this.removals.length} cuts (after merging), starting render`);
+            const sortedRemovals = [...this.removals].sort((a, b) => a.start - b.start);
+            const keepSegments = [];
+            let cursor = 0;
+            for (const removal of sortedRemovals) {
+              if (removal.start > cursor) {
+                keepSegments.push({ start: cursor, end: removal.start });
+              }
+              cursor = Math.max(cursor, removal.end);
+            }
+            if (cursor < this.videoDuration) {
+              keepSegments.push({ start: cursor, end: this.videoDuration });
+            }
+            const totalRemoval = this.removals.reduce((sum, r) => sum + (r.end - r.start), 0);
+            logger_default.info(
+              `[ProducerAgent] ${this.removals.length} removals \u2192 ${keepSegments.length} keep segments, removing ${totalRemoval.toFixed(1)}s`
+            );
+            this.renderPromise = singlePassEdit(this.video.videoPath, keepSegments, this.outputPath);
+            return `Rendering started with ${this.removals.length} cuts. The video is being processed in the background.`;
           }
           default:
             throw new Error(`Unknown tool: ${toolName}`);
@@ -4473,73 +4685,47 @@ var init_ProducerAgent = __esm({
        */
       async produce(outputPath) {
         this.removals = [];
-        const prompt = `Analyze this video and decide which segments should be removed for a cleaner edit.
+        this.renderPromise = null;
+        this.outputPath = outputPath;
+        const prompt = `Clean this video by removing unwanted segments.
 **Video:** ${this.video.videoPath}
-## Instructions
-1. Call get_video_info to know the video duration.
-2. Call get_editorial_direction to get AI-powered editorial guidance (cut points, pacing issues).
-3. Call get_transcript to understand what's being said and identify removable regions.
-4. Call **plan_cuts** with your list of regions to remove.
-Focus on removing dead air, filler words, bad takes, and redundant content. Be conservative \u2014 aim for 10-20% removal at most.`;
+Get the video info, editorial direction, and transcript. Analyze them together, then add your cuts and finalize.`;
         try {
           const response = await this.run(prompt);
-          logger_default.info(`[ProducerAgent] Agent planning complete for ${this.video.videoPath}`);
-          if (this.removals.length === 0) {
-            logger_default.info(`[ProducerAgent] No removals planned \u2014 video is clean`);
+          logger_default.info(`[ProducerAgent] Agent conversation complete for ${this.video.videoPath}`);
+          if (this.renderPromise) {
+            await this.renderPromise;
+            logger_default.info(`[ProducerAgent] Render complete: ${outputPath}`);
+            const sortedRemovals = [...this.removals].sort((a, b) => a.start - b.start);
+            const keepSegments = [];
+            let cursor = 0;
+            for (const removal of sortedRemovals) {
+              if (removal.start > cursor) {
+                keepSegments.push({ start: cursor, end: removal.start });
+              }
+              cursor = Math.max(cursor, removal.end);
+            }
+            if (cursor < this.videoDuration) {
+              keepSegments.push({ start: cursor, end: this.videoDuration });
+            }
             return {
               summary: response,
+              outputPath,
               success: true,
-              editCount: 0,
-              removals: [],
-              keepSegments: [{ start: 0, end: this.videoDuration }]
+              editCount: this.removals.length,
+              removals: sortedRemovals.map((r) => ({ start: r.start, end: r.end })),
+              keepSegments
             };
           }
-          const maxRemoval = this.videoDuration * 0.2;
-          let totalRemoval = 0;
-          const sortedByDuration = [...this.removals].sort(
-            (a, b) => b.end - b.start - (a.end - a.start)
-          );
-          const cappedRemovals = [];
-          for (const r of sortedByDuration) {
-            const dur = r.end - r.start;
-            if (totalRemoval + dur <= maxRemoval) {
-              cappedRemovals.push(r);
-              totalRemoval += dur;
-            }
-          }
-          if (cappedRemovals.length < this.removals.length) {
-            logger_default.warn(
-              `[ProducerAgent] Safety cap: reduced ${this.removals.length} removals to ${cappedRemovals.length} (max 20% of ${this.videoDuration}s = ${maxRemoval.toFixed(1)}s)`
-            );
-          }
-          const sortedRemovals = [...cappedRemovals].sort((a, b) => a.start - b.start);
-          const keepSegments = [];
-          let cursor = 0;
-          for (const removal of sortedRemovals) {
-            if (removal.start > cursor) {
-              keepSegments.push({ start: cursor, end: removal.start });
-            }
-            cursor = Math.max(cursor, removal.end);
-          }
-          if (cursor < this.videoDuration) {
-            keepSegments.push({ start: cursor, end: this.videoDuration });
-          }
-          logger_default.info(
-            `[ProducerAgent] ${cappedRemovals.length} removals \u2192 ${keepSegments.length} keep segments, removing ${totalRemoval.toFixed(1)}s`
-          );
-          await singlePassEdit(this.video.videoPath, keepSegments, outputPath);
-          logger_default.info(`[ProducerAgent] Render complete: ${outputPath}`);
+          logger_default.info(`[ProducerAgent] No cuts finalized \u2014 video is clean`);
           return {
             summary: response,
-            outputPath,
             success: true,
-            editCount: cappedRemovals.length,
-            removals: sortedRemovals.map((r) => ({ start: r.start, end: r.end })),
-            keepSegments
+            editCount: 0,
+            removals: [],
+            keepSegments: [{ start: 0, end: this.videoDuration }]
           };
         } catch (err) {
           const message = err instanceof Error ? err.message : String(err);
@@ -4563,12 +4749,14 @@ Focus on removing dead air, filler words, bad takes, and redundant content. Be c
 var geminiClient_exports = {};
 __export(geminiClient_exports, {
   analyzeVideoClipDirection: () => analyzeVideoClipDirection,
-  analyzeVideoEditorial: () => analyzeVideoEditorial
+  analyzeVideoEditorial: () => analyzeVideoEditorial,
+  analyzeVideoForEnhancements: () => analyzeVideoForEnhancements
 });
 import { GoogleGenAI, createUserContent, createPartFromUri } from "@google/genai";
-async function analyzeVideoEditorial(videoPath, durationSeconds, model = "gemini-2.5-flash") {
+async function analyzeVideoEditorial(videoPath, durationSeconds, model) {
   const config2 = getConfig();
   const apiKey = config2.GEMINI_API_KEY;
+  const resolvedModel = model ?? config2.GEMINI_MODEL;
   if (!apiKey) {
     throw new Error(
       "GEMINI_API_KEY is required for video editorial analysis. Get a key at https://aistudio.google.com/apikey"
@@ -4594,9 +4782,9 @@ async function analyzeVideoEditorial(videoPath, durationSeconds, model = "gemini
   if (fileState !== "ACTIVE") {
     throw new Error(`Gemini file processing failed \u2014 state: ${fileState}`);
   }
-  logger_default.info(`[Gemini] Video ready, requesting editorial analysis (model: ${model})`);
+  logger_default.info(`[Gemini] Video ready, requesting editorial analysis (model: ${resolvedModel})`);
   const response = await ai.models.generateContent({
-    model,
+    model: resolvedModel,
     contents: createUserContent([
       createPartFromUri(file.uri, file.mimeType),
       EDITORIAL_PROMPT
@@ -4609,7 +4797,7 @@ async function analyzeVideoEditorial(videoPath, durationSeconds, model = "gemini
   const estimatedInputTokens = Math.ceil(durationSeconds * VIDEO_TOKENS_PER_SECOND);
   const estimatedOutputTokens = Math.ceil(text.length / 4);
   costTracker.recordServiceUsage("gemini", 0, {
-    model,
+    model: resolvedModel,
     durationSeconds,
     estimatedInputTokens,
     estimatedOutputTokens,
@@ -4618,9 +4806,10 @@ async function analyzeVideoEditorial(videoPath, durationSeconds, model = "gemini
   logger_default.info(`[Gemini] Editorial analysis complete (${text.length} chars)`);
   return text;
 }
-async function analyzeVideoClipDirection(videoPath, durationSeconds, model = "gemini-2.5-flash") {
+async function analyzeVideoClipDirection(videoPath, durationSeconds, model) {
   const config2 = getConfig();
   const apiKey = config2.GEMINI_API_KEY;
+  const resolvedModel = model ?? config2.GEMINI_MODEL;
   if (!apiKey) {
     throw new Error(
       "GEMINI_API_KEY is required for video clip direction analysis. Get a key at https://aistudio.google.com/apikey"
@@ -4646,9 +4835,9 @@ async function analyzeVideoClipDirection(videoPath, durationSeconds, model = "ge
   if (fileState !== "ACTIVE") {
     throw new Error(`Gemini file processing failed \u2014 state: ${fileState}`);
   }
-  logger_default.info(`[Gemini] Video ready, requesting clip direction analysis (model: ${model})`);
+  logger_default.info(`[Gemini] Video ready, requesting clip direction analysis (model: ${resolvedModel})`);
   const response = await ai.models.generateContent({
-    model,
+    model: resolvedModel,
     contents: createUserContent([
       createPartFromUri(file.uri, file.mimeType),
       CLIP_DIRECTION_PROMPT
@@ -4661,7 +4850,7 @@ async function analyzeVideoClipDirection(videoPath, durationSeconds, model = "ge
   const estimatedInputTokens = Math.ceil(durationSeconds * VIDEO_TOKENS_PER_SECOND);
   const estimatedOutputTokens = Math.ceil(text.length / 4);
   costTracker.recordServiceUsage("gemini", 0, {
-    model,
+    model: resolvedModel,
     durationSeconds,
     estimatedInputTokens,
     estimatedOutputTokens,
@@ -4670,7 +4859,60 @@ async function analyzeVideoClipDirection(videoPath, durationSeconds, model = "ge
   logger_default.info(`[Gemini] Clip direction analysis complete (${text.length} chars)`);
   return text;
 }
-var VIDEO_TOKENS_PER_SECOND, EDITORIAL_PROMPT, CLIP_DIRECTION_PROMPT;
+async function analyzeVideoForEnhancements(videoPath, durationSeconds, transcript, model) {
+  const config2 = getConfig();
+  const apiKey = config2.GEMINI_API_KEY;
+  const resolvedModel = model ?? config2.GEMINI_MODEL;
+  if (!apiKey) {
+    throw new Error(
+      "GEMINI_API_KEY is required for video enhancement analysis. Get a key at https://aistudio.google.com/apikey"
+    );
+  }
+  const ai = new GoogleGenAI({ apiKey });
+  logger_default.info(`[Gemini] Uploading video for enhancement analysis: ${videoPath}`);
+  const file = await ai.files.upload({
+    file: videoPath,
+    config: { mimeType: "video/mp4" }
+  });
+  if (!file.uri || !file.mimeType || !file.name) {
+    throw new Error("Gemini file upload failed \u2014 no URI returned");
+  }
+  logger_default.info(`[Gemini] Waiting for file processing to complete...`);
+  let fileState = file.state;
+  while (fileState === "PROCESSING") {
+    await new Promise((resolve3) => setTimeout(resolve3, 2e3));
+    const updated = await ai.files.get({ name: file.name });
+    fileState = updated.state;
+    logger_default.debug(`[Gemini] File state: ${fileState}`);
+  }
+  if (fileState !== "ACTIVE") {
+    throw new Error(`Gemini file processing failed \u2014 state: ${fileState}`);
+  }
+  logger_default.info(`[Gemini] Video ready, requesting enhancement analysis (model: ${resolvedModel})`);
+  const response = await ai.models.generateContent({
+    model: resolvedModel,
+    contents: createUserContent([
+      createPartFromUri(file.uri, file.mimeType),
+      ENHANCEMENT_ANALYSIS_PROMPT + transcript
+    ])
+  });
+  const text = response.text ?? "";
+  if (!text) {
+    throw new Error("Gemini returned empty response");
+  }
+  const estimatedInputTokens = Math.ceil(durationSeconds * VIDEO_TOKENS_PER_SECOND);
+  const estimatedOutputTokens = Math.ceil(text.length / 4);
+  costTracker.recordServiceUsage("gemini", 0, {
+    model: resolvedModel,
+    durationSeconds,
+    estimatedInputTokens,
+    estimatedOutputTokens,
+    videoFile: videoPath
+  });
+  logger_default.info(`[Gemini] Enhancement analysis complete (${text.length} chars)`);
+  return text;
+}
+var VIDEO_TOKENS_PER_SECOND, EDITORIAL_PROMPT, CLIP_DIRECTION_PROMPT, ENHANCEMENT_ANALYSIS_PROMPT;
 var init_geminiClient = __esm({
   "src/tools/gemini/geminiClient.ts"() {
     "use strict";
@@ -4692,7 +4934,7 @@ Flag sections that are too slow, too fast, or have dead air. Give start/end time
 Identify moments where text overlays, graphics, zoom-ins, or visual emphasis would improve engagement.
 ## Hook & Retention
-Rate the first 3 seconds (1-10) and suggest specific improvements for viewer retention.
+Rate the first 3 seconds (1-10) and suggest specific improvements for viewer retention. If the video has a weak opening (meta-commentary, dead air, false starts), recommend where the actual content begins so an editor can start the video there.
 ## Content Structure
 Break the video into intro/body sections/outro with timestamps and topic for each section.
@@ -4702,10 +4944,21 @@ Highlight the most engaging, surprising, or important moments that should be emp
 ## Cleaning Recommendations
 Identify sections that should be trimmed or removed entirely to produce a tighter edit. For each:
-- Give start/end timestamps (MM:SS format)
+- Give start/end timestamps (MM:SS.s format with decimal precision, e.g. 00:14.3 - 00:37.0)
 - Explain why it should be removed (dead air, filler words, false starts, repeated explanations, off-topic tangents, excessive pauses)
 - Rate the confidence (high/medium/low) \u2014 high means definitely remove, low means optional
+After listing the recommendations in markdown, also provide a machine-readable JSON block summarizing all suggested cuts:
+\`\`\`json:cuts
+[
+  { "start": 0.0, "end": 15.2, "reason": "Opening too slow - dead air and filler", "confidence": "high" },
+  { "start": 26.5, "end": 37.0, "reason": "Meta-commentary for editor", "confidence": "high" }
+]
+\`\`\`
+Times in the JSON block should be in seconds with decimal precision. Place cut boundaries at word boundaries.
 ## Hook Snippets for Short Videos
 Identify the 3-5 best moments (3-8 seconds each) that could serve as attention-grabbing hooks for the beginning of short-form videos. For each:
 - Give start/end timestamps
@@ -4760,6 +5013,465 @@ For each recommended medium clip, provide:
 Identify 2-4 medium clips. Prioritize: complete explanations, tutorial segments, deep dives, and compelling narrative arcs.
 Be precise with timestamps. Be opinionated about what works and what doesn't. Think about what would make someone stop scrolling.`;
+    ENHANCEMENT_ANALYSIS_PROMPT = `You are a visual content strategist reviewing raw video footage. Write an editorial report identifying moments where an AI-generated image overlay would genuinely enhance viewer comprehension.
+Watch the video carefully and read the transcript below. Write a natural editorial report covering:
+1. **Video layout observations** \u2014 What is on screen? Is there a webcam overlay? Where is the main content area (code editor, terminal, browser)? What areas of the screen have less visual activity and could safely hold an overlay without hiding important content?
+2. **Enhancement opportunities** \u2014 For each moment you identify, describe:
+   - The approximate timestamp range (in seconds) where the speaker is discussing the topic
+   - What the speaker is explaining and what is currently visible on screen
+   - The dominant background colors and brightness level at that moment (e.g., dark IDE, white browser, terminal with dark background). This helps the image designer choose contrasting colors so the overlay stands out
+   - What kind of image would help (diagram, flowchart, illustration, infographic, etc.)
+   - A detailed description of the image to generate
+   - Why showing this image at this moment helps the viewer understand
+   - Where on screen the image should go to avoid blocking important content
+3. **Timing guidance** \u2014 For each opportunity, note the natural start and end of the speaker's explanation. The image should appear when the topic begins and disappear when the speaker moves on. Typically 5-12 seconds is ideal \u2014 long enough to register, short enough to not overstay.
+Important guidelines:
+- Do NOT force opportunities \u2014 if the video doesn't need visual aids, say so
+- Do NOT suggest images when the screen already shows relevant visuals (diagrams, UI demos, live coding that needs to be seen)
+- Do NOT suggest images for trivial topics that don't need visual explanation
+- Do NOT suggest images during live demonstrations where the viewer needs to see the screen clearly
+- Moments shorter than 5 seconds are too brief for an overlay to register
+- It's perfectly fine to identify 0 opportunities, 1, or several \u2014 quality over quantity
+Write your report in natural language with clear section headers. This report will be read by a graphics agent that will make final decisions about what to generate.
+TRANSCRIPT:
+`;
+  }
+});
+// src/tools/imageGeneration.ts
+import { writeFile } from "fs/promises";
+import { dirname as dirname3 } from "path";
+import sharp from "sharp";
+async function generateImage(prompt, outputPath, options) {
+  const config2 = getConfig();
+  if (!config2.OPENAI_API_KEY) {
+    throw new Error("[ImageGen] OPENAI_API_KEY is required for image generation");
+  }
+  const size = options?.size ?? "auto";
+  const quality = options?.quality ?? "high";
+  const fullPrompt = (options?.style ? `${prompt}
+Style: ${options.style}` : prompt) + IMAGE_BASE_PROMPT;
+  logger_default.info(`[ImageGen] Generating image: ${prompt.substring(0, 100)}...`);
+  logger_default.debug(`[ImageGen] Size: ${size}, Quality: ${quality}`);
+  const response = await fetch("https://api.openai.com/v1/images/generations", {
+    method: "POST",
+    headers: {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${config2.OPENAI_API_KEY}`
+    },
+    body: JSON.stringify({
+      model: "gpt-image-1.5",
+      prompt: fullPrompt,
+      n: 1,
+      size,
+      quality
+    })
+  });
+  if (!response.ok) {
+    const errorText = await response.text();
+    logger_default.error(`[ImageGen] API error (${response.status}): ${errorText}`);
+    throw new Error(`[ImageGen] OpenAI API returned ${response.status}: ${errorText}`);
+  }
+  const result = await response.json();
+  const b64 = result.data?.[0]?.b64_json;
+  if (!b64) {
+    logger_default.error("[ImageGen] No b64_json in API response");
+    throw new Error("[ImageGen] API response missing b64_json image data");
+  }
+  const rawBuffer = Buffer.from(b64, "base64");
+  let validatedBuffer;
+  try {
+    validatedBuffer = await sharp(rawBuffer).png().toBuffer();
+  } catch (error) {
+    logger_default.error("[ImageGen] Failed to validate image data from API", { error });
+    throw new Error("[ImageGen] Invalid image data received from API - not a valid image format");
+  }
+  await ensureDirectory(dirname3(outputPath));
+  await writeFile(outputPath, validatedBuffer);
+  const estimatedCost = COST_BY_QUALITY[quality];
+  costTracker.recordServiceUsage("openai-image", estimatedCost, {
+    model: "gpt-image-1.5",
+    size,
+    quality,
+    prompt: prompt.substring(0, 200)
+  });
+  logger_default.info(`[ImageGen] Image saved to ${outputPath} (${validatedBuffer.length} bytes)`);
+  return outputPath;
+}
+var COST_BY_QUALITY, IMAGE_BASE_PROMPT;
+var init_imageGeneration = __esm({
+  "src/tools/imageGeneration.ts"() {
+    "use strict";
+    init_logger2();
+    init_environment();
+    init_costTracker();
+    init_fileSystem();
+    COST_BY_QUALITY = {
+      low: 0.04,
+      medium: 0.07,
+      high: 0.07
+    };
+    IMAGE_BASE_PROMPT = `
+Rendering requirements: The image MUST have a solid opaque background (not transparent). Include a thin border or subtle drop shadow around the entire image. Use a clean, flat design style suitable for overlaying on top of video content. The image should look like a polished infographic card that clearly separates from whatever is behind it.`;
+  }
+});
+// src/agents/GraphicsAgent.ts
+import sharp2 from "sharp";
+async function generateEnhancementImages(enhancementReport, enhancementsDir, videoDuration, model) {
+  await ensureDirectory(enhancementsDir);
+  const agent = new GraphicsAgent(model);
+  agent.setContext(enhancementsDir);
+  try {
+    const userMessage = `Here is the editorial report from our video analyst. The video is ${videoDuration.toFixed(1)} seconds long.
+Review each opportunity and make your editorial decision \u2014 generate an image or skip it.
+---
+${enhancementReport}`;
+    await agent.run(userMessage);
+    return agent.getOverlays();
+  } finally {
+    await agent.destroy();
+  }
+}
+var SYSTEM_PROMPT5, GENERATE_ENHANCEMENT_SCHEMA, SKIP_OPPORTUNITY_SCHEMA, GraphicsAgent;
+var init_GraphicsAgent = __esm({
+  "src/agents/GraphicsAgent.ts"() {
+    "use strict";
+    init_BaseAgent();
+    init_imageGeneration();
+    init_text();
+    init_paths();
+    init_fileSystem();
+    init_logger2();
+    SYSTEM_PROMPT5 = `You are a visual content designer and editorial director for educational video content. You are given an editorial report from a video analyst describing moments in a video where AI-generated image overlays could enhance viewer comprehension.
+Your job is to make the FINAL editorial decision for each opportunity:
+1. Decide whether to generate an image or skip the opportunity
+2. Determine the exact timing \u2014 when the image should appear and disappear
+3. Choose the optimal screen placement to avoid blocking important content
+4. Write a refined, high-quality image generation prompt
+Guidelines for editorial decisions:
+- Only generate images that genuinely add value \u2014 quality over quantity
+- Timing should match the speaker's explanation: appear when the topic starts, disappear when they move on
+- Keep display duration between 5-12 seconds \u2014 long enough to register, short enough to not overstay
+- Ensure at least 10 seconds gap between consecutive overlays to avoid visual clutter
+- Choose placement regions that avoid the webcam, main content area, and any important UI elements
+- Size should be 15-30% of video width \u2014 large enough to see, small enough to not dominate
+Guidelines for image prompts:
+- Create clean, professional diagrams and illustrations
+- Use flat design / modern infographic style
+- Include labels and annotations when helpful
+- Avoid photorealistic imagery \u2014 prefer stylized educational graphics
+- Keep the image simple and immediately understandable at a glance
+- The image will be shown as a small overlay, so avoid tiny details
+- Use high contrast colors for visibility when overlaid on video
+- No text-heavy images \u2014 a few key labels at most
+- Let the image content dictate its natural aspect ratio \u2014 don't force square if the content is better as landscape or portrait
+- IMPORTANT: Every image MUST have a solid, opaque background (e.g., white, light gray, dark navy) \u2014 never transparent or borderless. The image will be overlaid on top of a video so it needs to stand out with clear visual separation. If the report mentions a dark video background, use a light image background (and vice versa). Add a subtle border or shadow effect in the prompt to ensure the image pops against the video content.
+Process the report and call generate_enhancement for each image worth creating, or call skip_opportunity for those not worth generating.`;
+    GENERATE_ENHANCEMENT_SCHEMA = {
+      type: "object",
+      properties: {
+        prompt: {
+          type: "string",
+          description: "A refined, high-quality image generation prompt describing the visual to create"
+        },
+        timestampStart: {
+          type: "number",
+          description: "When to start showing the image (seconds from video start)"
+        },
+        timestampEnd: {
+          type: "number",
+          description: "When to stop showing the image (seconds from video start). Should be 5-12 seconds after timestampStart."
+        },
+        region: {
+          type: "string",
+          enum: ["top-left", "top-right", "bottom-left", "bottom-right", "center-right", "center-left"],
+          description: "Screen region for placement, chosen to avoid blocking important content"
+        },
+        sizePercent: {
+          type: "number",
+          description: "Image width as percentage of video width (15-30)"
+        },
+        topic: {
+          type: "string",
+          description: "Brief label for what this image illustrates"
+        },
+        reason: {
+          type: "string",
+          description: "Why this visual enhancement helps the viewer"
+        }
+      },
+      required: ["prompt", "timestampStart", "timestampEnd", "region", "sizePercent", "topic", "reason"]
+    };
+    SKIP_OPPORTUNITY_SCHEMA = {
+      type: "object",
+      properties: {
+        topic: {
+          type: "string",
+          description: "The topic from the report that is being skipped"
+        },
+        reason: {
+          type: "string",
+          description: "Why this opportunity should be skipped"
+        }
+      },
+      required: ["topic", "reason"]
+    };
+    GraphicsAgent = class extends BaseAgent {
+      overlays = [];
+      enhancementsDir = "";
+      imageIndex = 0;
+      constructor(model) {
+        super("GraphicsAgent", SYSTEM_PROMPT5, void 0, model);
+      }
+      setContext(enhancementsDir) {
+        this.enhancementsDir = enhancementsDir;
+      }
+      getTools() {
+        return [
+          {
+            name: "generate_enhancement",
+            description: "Generate an AI image overlay for a specific moment in the video. You decide the timing, placement, and prompt.",
+            parameters: GENERATE_ENHANCEMENT_SCHEMA,
+            handler: async (args) => this.handleToolCall("generate_enhancement", args)
+          },
+          {
+            name: "skip_opportunity",
+            description: "Skip an enhancement opportunity from the report that is not worth generating.",
+            parameters: SKIP_OPPORTUNITY_SCHEMA,
+            handler: async (args) => this.handleToolCall("skip_opportunity", args)
+          }
+        ];
+      }
+      async handleToolCall(toolName, args) {
+        if (toolName === "generate_enhancement") {
+          const prompt = args.prompt;
+          const timestampStart = args.timestampStart;
+          const timestampEnd = args.timestampEnd;
+          const region = args.region;
+          const sizePercent = Math.min(30, Math.max(15, args.sizePercent));
+          const topic = args.topic;
+          const reason = args.reason;
+          const slug = slugify(topic, { lower: true, strict: true });
+          const filename = `${this.imageIndex}-${slug}.png`;
+          const outputPath = join(this.enhancementsDir, filename);
+          try {
+            await generateImage(prompt, outputPath, { size: "auto" });
+            const metadata = await sharp2(outputPath).metadata();
+            const width = metadata.width ?? 1024;
+            const height = metadata.height ?? 1024;
+            const opportunity = {
+              timestampStart,
+              timestampEnd,
+              topic,
+              imagePrompt: prompt,
+              reason,
+              placement: { region, avoidAreas: [], sizePercent },
+              confidence: 1
+            };
+            const overlay = {
+              opportunity,
+              imagePath: outputPath,
+              width,
+              height
+            };
+            this.overlays.push(overlay);
+            this.imageIndex++;
+            logger_default.info(`Generated enhancement image: ${filename} (${width}x${height})`);
+            return { success: true, imagePath: outputPath, dimensions: `${width}x${height}` };
+          } catch (err) {
+            const message = err instanceof Error ? err.message : String(err);
+            logger_default.error(`Failed to generate image for "${topic}": ${message}`);
+            return { error: message };
+          }
+        }
+        if (toolName === "skip_opportunity") {
+          const topic = args.topic;
+          const reason = args.reason;
+          logger_default.info(`Skipped enhancement opportunity "${topic}": ${reason}`);
+          return { success: true, skipped: true };
+        }
+        throw new Error(`Unknown tool: ${toolName}`);
+      }
+      getOverlays() {
+        return this.overlays;
+      }
+    };
+  }
+});
+// src/tools/ffmpeg/overlayCompositing.ts
+function getOverlayPosition(region, margin) {
+  const m = String(margin);
+  switch (region) {
+    case "top-left":
+      return { x: m, y: m };
+    case "top-right":
+      return { x: `(main_w-overlay_w-${m})`, y: m };
+    case "bottom-left":
+      return { x: m, y: `(main_h-overlay_h-${m})` };
+    case "bottom-right":
+      return { x: `(main_w-overlay_w-${m})`, y: `(main_h-overlay_h-${m})` };
+    case "center-right":
+      return { x: `(main_w-overlay_w-${m})`, y: `((main_h-overlay_h)/2)` };
+    case "center-left":
+      return { x: m, y: `((main_h-overlay_h)/2)` };
+  }
+}
+function buildOverlayFilterComplex(overlays, videoWidth, videoHeight) {
+  const margin = Math.round(videoWidth * 0.05);
+  const filters = [];
+  for (let i = 0; i < overlays.length; i++) {
+    const overlay = overlays[i];
+    const inputIdx = i + 1;
+    const overlayWidth = Math.round(videoWidth * overlay.opportunity.placement.sizePercent / 100);
+    const start = overlay.opportunity.timestampStart;
+    const end = overlay.opportunity.timestampEnd;
+    filters.push(`[${inputIdx}:v]scale=${overlayWidth}:-1,format=rgba[img_${i}]`);
+    const prev = i === 0 ? "[0:v]" : `[out_${i - 1}]`;
+    const isLast = i === overlays.length - 1;
+    const out = isLast ? "[overlaid]" : `[out_${i}]`;
+    const pos = getOverlayPosition(overlay.opportunity.placement.region, margin);
+    filters.push(
+      `${prev}[img_${i}]overlay=x=${pos.x}:y=${pos.y}:enable='between(t,${start},${end})':format=auto${out}`
+    );
+  }
+  filters.push("[overlaid]format=yuv420p[outv]");
+  return filters.join(";");
+}
+async function compositeOverlays(videoPath, overlays, outputPath, videoWidth, videoHeight) {
+  if (overlays.length === 0) {
+    throw new Error("[OverlayCompositing] No overlays provided");
+  }
+  const ffmpegPath6 = getFFmpegPath();
+  const filterComplex = buildOverlayFilterComplex(overlays, videoWidth, videoHeight);
+  const args = ["-y", "-i", videoPath];
+  for (const overlay of overlays) {
+    args.push("-loop", "1", "-i", overlay.imagePath);
+  }
+  args.push(
+    "-filter_complex",
+    filterComplex,
+    "-map",
+    "[outv]",
+    "-map",
+    "0:a",
+    "-c:v",
+    "libx264",
+    "-preset",
+    "ultrafast",
+    "-crf",
+    "23",
+    "-threads",
+    "4",
+    "-c:a",
+    "copy",
+    "-shortest",
+    outputPath
+  );
+  logger_default.info(`[OverlayCompositing] Compositing ${overlays.length} overlays \u2192 ${outputPath}`);
+  return new Promise((resolve3, reject) => {
+    execFileRaw(ffmpegPath6, args, { maxBuffer: 50 * 1024 * 1024 }, (error, _stdout, stderr) => {
+      if (error) {
+        logger_default.error(`[OverlayCompositing] FFmpeg failed: ${stderr}`);
+        reject(new Error(`[OverlayCompositing] FFmpeg overlay compositing failed: ${error.message}`));
+        return;
+      }
+      logger_default.info(`[OverlayCompositing] Complete: ${outputPath}`);
+      resolve3(outputPath);
+    });
+  });
+}
+var init_overlayCompositing = __esm({
+  "src/tools/ffmpeg/overlayCompositing.ts"() {
+    "use strict";
+    init_process();
+    init_ffmpeg();
+    init_logger2();
+  }
+});
+// src/stages/visualEnhancement.ts
+var visualEnhancement_exports = {};
+__export(visualEnhancement_exports, {
+  enhanceVideo: () => enhanceVideo
+});
+async function enhanceVideo(videoPath, transcript, video) {
+  const enhancementsDir = join(video.videoDir, "enhancements");
+  await ensureDirectory(enhancementsDir);
+  logger_default.info("[VisualEnhancement] Step 1: Analyzing video for enhancement opportunities...");
+  const enhancementReport = await analyzeVideoForEnhancements(
+    videoPath,
+    video.duration,
+    transcript.text
+  );
+  if (!enhancementReport || enhancementReport.trim().length === 0) {
+    logger_default.info("[VisualEnhancement] No enhancement report generated \u2014 skipping");
+    return void 0;
+  }
+  logger_default.info(`[VisualEnhancement] Received editorial report (${enhancementReport.length} chars)`);
+  logger_default.info("[VisualEnhancement] Step 2: GraphicsAgent making editorial decisions and generating images...");
+  const overlays = await generateEnhancementImages(
+    enhancementReport,
+    enhancementsDir,
+    video.duration,
+    getModelForAgent("GraphicsAgent")
+  );
+  if (overlays.length === 0) {
+    logger_default.info("[VisualEnhancement] GraphicsAgent generated no images \u2014 skipping compositing");
+    return void 0;
+  }
+  logger_default.info(`[VisualEnhancement] Generated ${overlays.length} enhancement images`);
+  logger_default.info("[VisualEnhancement] Step 3: Compositing overlays onto video...");
+  const outputPath = join(video.videoDir, `${video.slug}-enhanced.mp4`);
+  const videoWidth = video.layout?.width ?? 1920;
+  const videoHeight = video.layout?.height ?? 1080;
+  const enhancedVideoPath = await compositeOverlays(
+    videoPath,
+    overlays,
+    outputPath,
+    videoWidth,
+    videoHeight
+  );
+  logger_default.info(`[VisualEnhancement] Enhanced video created: ${enhancedVideoPath}`);
+  let totalImageCost = 0;
+  for (const overlay of overlays) {
+    totalImageCost += 0.07;
+  }
+  return {
+    enhancedVideoPath,
+    overlays,
+    analysisTokens: 0,
+    // tracked by costTracker internally
+    imageGenCost: totalImageCost
+  };
+}
+var init_visualEnhancement = __esm({
+  "src/stages/visualEnhancement.ts"() {
+    "use strict";
+    init_geminiClient();
+    init_GraphicsAgent();
+    init_overlayCompositing();
+    init_modelConfig();
+    init_fileSystem();
+    init_paths();
+    init_logger2();
   }
 });
@@ -4980,7 +5692,7 @@ async function generateSocialPosts(video, transcript, summary, outputDir, model)
     await agent.destroy();
   }
 }
-var SYSTEM_PROMPT5, SocialMediaAgent;
+var SYSTEM_PROMPT6, SocialMediaAgent;
 var init_SocialMediaAgent = __esm({
   "src/agents/SocialMediaAgent.ts"() {
     "use strict";
@@ -4990,7 +5702,7 @@ var init_SocialMediaAgent = __esm({
     init_logger2();
     init_environment();
     init_types();
-    SYSTEM_PROMPT5 = `You are a viral social-media content strategist.
+    SYSTEM_PROMPT6 = `You are a viral social-media content strategist.
 Given a video transcript and summary you MUST generate one post for each of the 5 platforms listed below.
 Each post must match the platform's tone, format, and constraints exactly.
@@ -5014,7 +5726,7 @@ Always call "create_posts" exactly once with all 5 platform posts.`;
     SocialMediaAgent = class extends BaseAgent {
       collectedPosts = [];
       constructor(model) {
-        super("SocialMediaAgent", SYSTEM_PROMPT5, void 0, model);
+        super("SocialMediaAgent", SYSTEM_PROMPT6, void 0, model);
       }
       getMcpServers() {
         const config2 = getConfig();
@@ -5449,6 +6161,7 @@ var loadChapterAgent = async () => Promise.resolve().then(() => (init_ChapterAge
 var loadSummaryAgent = async () => Promise.resolve().then(() => (init_SummaryAgent(), SummaryAgent_exports));
 var loadProducerAgent = async () => Promise.resolve().then(() => (init_ProducerAgent(), ProducerAgent_exports));
 var loadGeminiClient = async () => Promise.resolve().then(() => (init_geminiClient(), geminiClient_exports));
+var loadVisualEnhancement = async () => Promise.resolve().then(() => (init_visualEnhancement(), visualEnhancement_exports));
 // src/assets/VideoAsset.ts
 var VideoAsset = class extends Asset {
@@ -5908,7 +6621,8 @@ var ShortVideoAsset = class extends VideoAsset {
       return this.videoPath;
     }
     await ensureDirectory(this.videoDir);
-    const parentVideo = await this.parent.getResult();
+    const mainParent = this.parent;
+    const parentVideo = await mainParent.getEditedVideo();
     await extractCompositeClip(parentVideo, this.clip.segments, this.videoPath);
     return this.videoPath;
   }
@@ -5951,6 +6665,7 @@ var ShortVideoAsset = class extends VideoAsset {
 init_paths();
 init_fileSystem();
 init_types();
+init_clipExtraction();
 var MediumClipAsset = class extends VideoAsset {
   /** Parent video this clip was extracted from */
   parent;
@@ -6012,18 +6727,20 @@ var MediumClipAsset = class extends VideoAsset {
     return fileExists(this.videoPath);
   }
   /**
-   * Get the rendered clip video path.
+   * Get the rendered clip video path, extracting from parent if needed.
+   * Extracts from the enhanced video so AI-generated overlays carry through.
    *
-   * @param opts - Asset options (force not used - clip must be pre-rendered)
+   * @param opts - Asset options (force regeneration, etc.)
    * @returns Path to the rendered video file
-   * @throws Error if clip hasn't been rendered yet
    */
   async getResult(opts) {
-    if (!await this.exists()) {
-      throw new Error(
-        `Medium clip "${this.slug}" not found at ${this.videoPath}. Run the medium-clips stage first.`
-      );
+    if (!opts?.force && await this.exists()) {
+      return this.videoPath;
     }
+    await ensureDirectory(this.videoDir);
+    const mainParent = this.parent;
+    const parentVideo = await mainParent.getEnhancedVideo();
+    await extractCompositeClip(parentVideo, this.clip.segments, this.videoPath);
     return this.videoPath;
   }
 };
@@ -6222,6 +6939,10 @@ var MainVideoAsset = class _MainVideoAsset extends VideoAsset {
   get editedVideoPath() {
     return join(this.videoDir, `${this.slug}-edited.mp4`);
   }
+  /** Path to the enhanced (visual overlays) video: videoDir/{slug}-enhanced.mp4 */
+  get enhancedVideoPath() {
+    return join(this.videoDir, `${this.slug}-enhanced.mp4`);
+  }
   /** Path to the captioned video: videoDir/{slug}-captioned.mp4 */
   get captionedVideoPath() {
     return join(this.videoDir, `${this.slug}-captioned.mp4`);
@@ -6275,7 +6996,13 @@ var MainVideoAsset = class _MainVideoAsset extends VideoAsset {
     logger_default.info(`Ingesting video: ${sourcePath} \u2192 ${slug}`);
     if (await fileExists(videoDir)) {
       logger_default.warn(`Output folder already exists, cleaning previous artifacts: ${videoDir}`);
-      const subDirs = ["thumbnails", "shorts", "social-posts", "chapters", "medium-clips", "captions"];
+      const subDirs = ["thumbnails", "shorts", "social-posts", "chapters", "medium-clips", "captions", "enhancements"];
+      const allEntries = await listDirectory(videoDir);
+      for (const entry of allEntries) {
+        if (entry.endsWith("-enhance-test")) {
+          await removeDirectory(join(videoDir, entry), { recursive: true, force: true });
+        }
+      }
       for (const sub of subDirs) {
         await removeDirectory(join(videoDir, sub), { recursive: true, force: true });
       }
@@ -6287,14 +7014,18 @@ var MainVideoAsset = class _MainVideoAsset extends VideoAsset {
         "captions.ass",
         "summary.md",
         "blog-post.md",
-        "README.md"
+        "README.md",
+        "clip-direction.md",
+        "editorial-direction.md",
+        "cost-report.md",
+        "layout.json"
       ];
       for (const pattern of stalePatterns) {
         await removeFile(join(videoDir, pattern));
       }
       const files = await listDirectory(videoDir);
       for (const file of files) {
-        if (file.endsWith("-edited.mp4") || file.endsWith("-captioned.mp4") || file.endsWith("-produced.mp4")) {
+        if (file.endsWith("-edited.mp4") || file.endsWith("-enhanced.mp4") || file.endsWith("-captioned.mp4") || file.endsWith("-produced.mp4")) {
           await removeFile(join(videoDir, file));
         }
       }
@@ -6416,9 +7147,37 @@ var MainVideoAsset = class _MainVideoAsset extends VideoAsset {
     logger_default.info("No silence removed, using original video");
     return this.videoPath;
   }
+  /**
+   * Get the enhanced (visual overlays) video.
+   * If not already generated, runs the visual enhancement stage.
+   * Falls back to the edited video if enhancement is skipped or finds no opportunities.
+   *
+   * @param opts - Options controlling generation
+   * @returns Path to the enhanced or edited video
+   */
+  async getEnhancedVideo(opts) {
+    if (!opts?.force && await fileExists(this.enhancedVideoPath)) {
+      return this.enhancedVideoPath;
+    }
+    const config2 = getConfig();
+    if (config2.SKIP_VISUAL_ENHANCEMENT) {
+      return this.getEditedVideo(opts);
+    }
+    const editedPath = await this.getEditedVideo(opts);
+    const transcript = await this.getTranscript();
+    const videoFile = await this.toVideoFile();
+    const { enhanceVideo: enhanceVideo2 } = await loadVisualEnhancement();
+    const result = await enhanceVideo2(editedPath, transcript, videoFile);
+    if (result) {
+      logger_default.info(`Visual enhancement completed: ${result.overlays.length} overlays composited`);
+      return result.enhancedVideoPath;
+    }
+    logger_default.info("No visual enhancements generated, using edited video");
+    return editedPath;
+  }
   /**
    * Get the captioned video.
-   * If not already generated, burns captions into the edited video.
+   * If not already generated, burns captions into the enhanced video.
    *
    * @param opts - Options controlling generation
    * @returns Path to the captioned video
@@ -6427,10 +7186,10 @@ var MainVideoAsset = class _MainVideoAsset extends VideoAsset {
     if (!opts?.force && await fileExists(this.captionedVideoPath)) {
       return this.captionedVideoPath;
     }
-    const editedPath = await this.getEditedVideo(opts);
+    const enhancedPath = await this.getEnhancedVideo(opts);
     const captions = await this.getCaptions();
     const { burnCaptions: burnCaptions2 } = await loadCaptionBurning();
-    await burnCaptions2(editedPath, captions.ass, this.captionedVideoPath);
+    await burnCaptions2(enhancedPath, captions.ass, this.captionedVideoPath);
     logger_default.info(`Captions burned into video: ${this.captionedVideoPath}`);
     return this.captionedVideoPath;
   }
@@ -6717,6 +7476,7 @@ var CONTENT_MATRIX = {
     "medium-clip": { captions: true, variantKey: null }
   },
   ["linkedin" /* LinkedIn */]: {
+    video: { captions: true, variantKey: null },
     "medium-clip": { captions: true, variantKey: null }
   },
   ["tiktok" /* TikTok */]: {
@@ -7196,9 +7956,107 @@ async function buildPublishQueue(video, shorts, mediumClips, socialPosts, captio
 init_ProducerAgent();
 init_captionBurning();
 init_singlePassEdit();
+init_visualEnhancement();
 init_modelConfig();
 init_costTracker();
 init_types();
+// src/services/processingState.ts
+init_fileSystem();
+init_paths();
+init_environment();
+init_logger2();
+function getStatePath() {
+  const config2 = getConfig();
+  return join(config2.OUTPUT_DIR, "processing-state.json");
+}
+async function readState() {
+  const statePath = getStatePath();
+  if (!fileExistsSync(statePath)) {
+    return { videos: {} };
+  }
+  return readJsonFile(statePath, { videos: {} });
+}
+async function writeState(state) {
+  const statePath = getStatePath();
+  await writeJsonFile(statePath, state);
+}
+async function getVideoStatus(slug) {
+  const state = await readState();
+  return state.videos[slug];
+}
+async function getUnprocessed() {
+  const state = await readState();
+  const result = {};
+  for (const [slug, video] of Object.entries(state.videos)) {
+    if (video.status === "pending" || video.status === "failed") {
+      result[slug] = video;
+    }
+  }
+  return result;
+}
+async function isCompleted(slug) {
+  const status = await getVideoStatus(slug);
+  return status?.status === "completed";
+}
+async function markPending(slug, sourcePath) {
+  const state = await readState();
+  state.videos[slug] = {
+    status: "pending",
+    sourcePath
+  };
+  await writeState(state);
+  logger_default.info(`[ProcessingState] Marked pending: ${slug}`);
+}
+async function markProcessing(slug) {
+  const state = await readState();
+  const existing = state.videos[slug];
+  if (!existing) {
+    logger_default.warn(`[ProcessingState] Cannot mark processing \u2014 unknown slug: ${slug}`);
+    return;
+  }
+  state.videos[slug] = {
+    ...existing,
+    status: "processing",
+    startedAt: (/* @__PURE__ */ new Date()).toISOString()
+  };
+  await writeState(state);
+  logger_default.info(`[ProcessingState] Marked processing: ${slug}`);
+}
+async function markCompleted(slug) {
+  const state = await readState();
+  const existing = state.videos[slug];
+  if (!existing) {
+    logger_default.warn(`[ProcessingState] Cannot mark completed \u2014 unknown slug: ${slug}`);
+    return;
+  }
+  state.videos[slug] = {
+    ...existing,
+    status: "completed",
+    completedAt: (/* @__PURE__ */ new Date()).toISOString(),
+    error: void 0
+  };
+  await writeState(state);
+  logger_default.info(`[ProcessingState] Marked completed: ${slug}`);
+}
+async function markFailed(slug, error) {
+  const state = await readState();
+  const existing = state.videos[slug];
+  if (!existing) {
+    logger_default.warn(`[ProcessingState] Cannot mark failed \u2014 unknown slug: ${slug}`);
+    return;
+  }
+  state.videos[slug] = {
+    ...existing,
+    status: "failed",
+    completedAt: (/* @__PURE__ */ new Date()).toISOString(),
+    error
+  };
+  await writeState(state);
+  logger_default.info(`[ProcessingState] Marked failed: ${slug} \u2014 ${error}`);
+}
+// src/pipeline.ts
 async function runStage(stageName, fn, stageResults) {
   costTracker.setStage(stageName);
   const start = Date.now();
@@ -7307,6 +8165,22 @@ async function processVideo(videoPath) {
     }
   }
   const captionTranscript = adjustedTranscript ?? transcript;
+  let enhancedVideoPath;
+  if (!cfg.SKIP_VISUAL_ENHANCEMENT && captionTranscript) {
+    const videoToEnhance = editedVideoPath ?? video.repoPath;
+    const enhancementResult = await runStage(
+      "visual-enhancement" /* VisualEnhancement */,
+      async () => {
+        const result = await enhanceVideo(videoToEnhance, captionTranscript, video);
+        if (!result) return void 0;
+        return result;
+      },
+      stageResults
+    );
+    if (enhancementResult) {
+      enhancedVideoPath = enhancementResult.enhancedVideoPath;
+    }
+  }
   let captions;
   if (captionTranscript && !cfg.SKIP_CAPTIONS) {
     captions = await runStage("captions" /* Captions */, () => generateCaptions(video, captionTranscript), stageResults);
@@ -7314,7 +8188,7 @@ async function processVideo(videoPath) {
   let captionedVideoPath;
   if (captions && !cfg.SKIP_CAPTIONS) {
     const assFile = captions.find((p) => p.endsWith(".ass"));
-    if (assFile && cleaningKeepSegments) {
+    if (assFile && cleaningKeepSegments && !enhancedVideoPath) {
       const captionedOutput = join(video.videoDir, `${video.slug}-captioned.mp4`);
       captionedVideoPath = await runStage(
         "caption-burn" /* CaptionBurn */,
@@ -7322,7 +8196,7 @@ async function processVideo(videoPath) {
         stageResults
       );
     } else if (assFile) {
-      const videoToBurn = editedVideoPath ?? video.repoPath;
+      const videoToBurn = enhancedVideoPath ?? editedVideoPath ?? video.repoPath;
       const captionedOutput = join(video.videoDir, `${video.slug}-captioned.mp4`);
       captionedVideoPath = await runStage(
         "caption-burn" /* CaptionBurn */,
@@ -7343,13 +8217,23 @@ async function processVideo(videoPath) {
       }
     } catch {
     }
-    const result = await runStage("shorts" /* Shorts */, () => generateShorts(shortsVideo, shortsTranscript, getModelForAgent("ShortsAgent"), clipDirection), stageResults);
+    let webcamRegion;
+    try {
+      const layoutPath = join(video.videoDir, "layout.json");
+      if (await fileExists(layoutPath)) {
+        const layout = await readJsonFile(layoutPath);
+        webcamRegion = layout.webcam;
+      }
+    } catch {
+    }
+    const result = await runStage("shorts" /* Shorts */, () => generateShorts(shortsVideo, shortsTranscript, getModelForAgent("ShortsAgent"), clipDirection, webcamRegion), stageResults);
     if (result) shorts = result;
   }
   let mediumClips = [];
   if (transcript && !cfg.SKIP_MEDIUM_CLIPS) {
     const mediumTranscript = adjustedTranscript ?? transcript;
-    const mediumVideo = editedVideoPath ? { ...video, repoPath: editedVideoPath } : video;
+    const mediumVideoPath = enhancedVideoPath ?? editedVideoPath;
+    const mediumVideo = mediumVideoPath ? { ...video, repoPath: mediumVideoPath } : video;
     let mediumClipDirection;
     try {
       const clipDirPath = join(video.videoDir, "clip-direction.md");
@@ -7455,6 +8339,7 @@ async function processVideo(videoPath) {
     video,
     transcript,
     editedVideoPath,
+    enhancedVideoPath,
     captions,
     captionedVideoPath,
     summary,
@@ -7512,11 +8397,18 @@ function generateCostMarkdown(report) {
   return md;
 }
 async function processVideoSafe(videoPath) {
+  const filename = basename(videoPath);
+  const slug = filename.replace(/\.(mp4|mov|webm|avi|mkv)$/i, "");
+  await markPending(slug, videoPath);
+  await markProcessing(slug);
   try {
-    return await processVideo(videoPath);
+    const result = await processVideo(videoPath);
+    await markCompleted(slug);
+    return result;
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
     logger_default.error(`Pipeline failed with uncaught error: ${message}`);
+    await markFailed(slug, message);
     return null;
   }
 }
@@ -8918,7 +9810,7 @@ program.command("schedule").description("View the current posting schedule acros
 program.command("doctor").description("Check all prerequisites and dependencies").action(async () => {
   await runDoctor();
 });
-var defaultCmd = program.command("process", { isDefault: true }).argument("[video-path]", "Path to a video file to process (implies --once)").option("--watch-dir <path>", "Folder to watch for new recordings (default: env WATCH_FOLDER)").option("--output-dir <path>", "Output directory for processed videos (default: ./recordings)").option("--openai-key <key>", "OpenAI API key (default: env OPENAI_API_KEY)").option("--exa-key <key>", "Exa AI API key for web search (default: env EXA_API_KEY)").option("--once", "Process a single video and exit (no watching)").option("--brand <path>", "Path to brand.json config (default: ./brand.json)").option("--no-git", "Skip git commit/push stage").option("--no-silence-removal", "Skip silence removal stage").option("--no-shorts", "Skip shorts generation").option("--no-medium-clips", "Skip medium clip generation").option("--no-social", "Skip social media post generation").option("--no-captions", "Skip caption generation/burning").option("--no-social-publish", "Skip social media publishing/queue-build stage").option("--late-api-key <key>", "Late API key (default: env LATE_API_KEY)").option("--late-profile-id <id>", "Late profile ID (default: env LATE_PROFILE_ID)").option("-v, --verbose", "Verbose logging").option("--doctor", "Check all prerequisites and exit").action(async (videoPath) => {
+var defaultCmd = program.command("process", { isDefault: true }).argument("[video-path]", "Path to a video file to process (implies --once)").option("--watch-dir <path>", "Folder to watch for new recordings (default: env WATCH_FOLDER)").option("--output-dir <path>", "Output directory for processed videos (default: ./recordings)").option("--openai-key <key>", "OpenAI API key (default: env OPENAI_API_KEY)").option("--exa-key <key>", "Exa AI API key for web search (default: env EXA_API_KEY)").option("--once", "Process a single video and exit (no watching)").option("--brand <path>", "Path to brand.json config (default: ./brand.json)").option("--no-git", "Skip git commit/push stage").option("--no-silence-removal", "Skip silence removal stage").option("--no-shorts", "Skip shorts generation").option("--no-medium-clips", "Skip medium clip generation").option("--no-social", "Skip social media post generation").option("--no-captions", "Skip caption generation/burning").option("--no-visual-enhancement", "Skip visual enhancement (AI image overlays)").option("--no-social-publish", "Skip social media publishing/queue-build stage").option("--late-api-key <key>", "Late API key (default: env LATE_API_KEY)").option("--late-profile-id <id>", "Late profile ID (default: env LATE_PROFILE_ID)").option("-v, --verbose", "Verbose logging").option("--doctor", "Check all prerequisites and exit").action(async (videoPath) => {
   const opts = defaultCmd.opts();
   if (opts.doctor) {
     await runDoctor();
@@ -8938,6 +9830,7 @@ var defaultCmd = program.command("process", { isDefault: true }).argument("[vide
     mediumClips: opts.mediumClips,
     social: opts.social,
     captions: opts.captions,
+    visualEnhancement: opts.visualEnhancement,
     socialPublish: opts.socialPublish,
     lateApiKey: opts.lateApiKey,
     lateProfileId: opts.lateProfileId
@@ -8990,12 +9883,47 @@ var defaultCmd = program.command("process", { isDefault: true }).argument("[vide
   }
   process.on("SIGINT", () => shutdown());
   process.on("SIGTERM", () => shutdown());
-  watcher.on("new-video", (filePath) => {
+  watcher.on("new-video", async (filePath) => {
+    const filename = filePath.replace(/\\/g, "/").split("/").pop() ?? "";
+    const slug = filename.replace(/\.(mp4|mov|webm|avi|mkv)$/i, "");
+    if (slug && await isCompleted(slug)) {
+      logger_default.info(`Skipping already-processed video: ${filePath}`);
+      return;
+    }
     queue.push(filePath);
     logger_default.info(`Queued video: ${filePath} (queue length: ${queue.length})`);
     processQueue().catch((err) => logger_default.error("Queue processing error:", err));
   });
   watcher.start();
+  try {
+    const watchFiles = listDirectorySync(config2.WATCH_FOLDER);
+    for (const file of watchFiles) {
+      const ext = extname(file).toLowerCase();
+      if (![".mp4", ".mov", ".webm", ".avi", ".mkv"].includes(ext)) continue;
+      const filePath = join(config2.WATCH_FOLDER, file);
+      const slug = file.replace(/\.(mp4|mov|webm|avi|mkv)$/i, "");
+      const status = await getVideoStatus(slug);
+      if (!status || status.status === "failed" || status.status === "pending") {
+        if (!queue.includes(filePath)) {
+          queue.push(filePath);
+          logger_default.info(`Startup scan: queued ${slug}${status ? ` (was ${status.status})` : " (new)"}`);
+        }
+      }
+    }
+  } catch (err) {
+    logger_default.warn(`Could not scan watch folder on startup: ${err instanceof Error ? err.message : String(err)}`);
+  }
+  const unprocessed = await getUnprocessed();
+  for (const [slug, state] of Object.entries(unprocessed)) {
+    if (!queue.includes(state.sourcePath)) {
+      queue.push(state.sourcePath);
+      logger_default.info(`Re-queued from state: ${slug} (${state.status})`);
+    }
+  }
+  if (queue.length > 0) {
+    logger_default.info(`Startup: ${queue.length} video(s) queued for processing`);
+    processQueue().catch((err) => logger_default.error("Queue processing error:", err));
+  }
   if (onceMode) {
     logger_default.info("Running in --once mode. Will exit after processing the next video.");
   } else {