npm - @kolbo/kolbo-code-linux-arm64-musl - Versions diffs - 1.1.72 → 1.1.73 - Mend

@kolbo/kolbo-code-linux-arm64-musl 1.1.72 → 1.1.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/bin/kolbo +0 -0
package/package.json +1 -1
package/skills/color-grading/SKILL.md +152 -0
package/skills/ffmpeg-patterns/SKILL.md +240 -0
package/skills/image-prompting-guide/SKILL.md +143 -0
package/skills/kolbo/SKILL.md +29 -0
package/skills/music-prompting/SKILL.md +146 -0
package/skills/production-review/SKILL.md +152 -0
package/skills/short-form-video/SKILL.md +168 -0
package/skills/sound-design/SKILL.md +154 -0
package/skills/storytelling/SKILL.md +139 -0
package/skills/subtitle-production/SKILL.md +244 -0
package/skills/subtitle-production/reference/burn_to_video.py +222 -0
package/skills/subtitle-production/reference/export_srts.py +127 -0
package/skills/subtitle-production/reference/gen_srt.py +42 -0
package/skills/typography-video/SKILL.md +182 -0
package/skills/typography-video/reference/KineticTitleScene.tsx +345 -0
package/skills/video-editing/SKILL.md +128 -0
package/skills/video-production/SKILL.md +7 -8
package/skills/video-prompting-guide/SKILL.md +268 -0

package/skills/typography-video/reference/KineticTitleScene.tsx ADDED Viewed

@@ -0,0 +1,345 @@
+/**
+ * KineticTitleScene — Full-screen kinetic text card
+ *
+ * bgStyle="solid"         — always-on dark background (default)
+ * bgStyle="dynamic_panel" — bg wipes in before text, wipes out after text gone
+ * bgStyle="transparent"   — no background (composite over footage, render with alpha codec)
+ *
+ * ALL words land simultaneously (staggered by 4 frames each).
+ * Alternating solid / outline / accent treatment.
+ * Based on Higgsfield / SOUL 2.0 reference style.
+ */
+import React from "react";
+import { useCurrentFrame, useVideoConfig, spring, interpolate, Easing } from "remotion";
+import { loadFont as loadPoppins } from "@remotion/google-fonts/Poppins";
+import { loadFont as loadHeebo } from "@remotion/google-fonts/Heebo";
+const { fontFamily: poppins } = loadPoppins();
+const { fontFamily: heebo } = loadHeebo();
+export interface KineticTitleProps {
+  words: string[];           // 2–4 words, each on its own line
+  subtext?: string;          // optional small line below
+  accentColor?: string;      // default brand blue
+  language?: string;         // "en" | "he"
+  bgStyle?: "solid" | "dynamic_panel" | "transparent";
+  durationInFrames: number;
+  fps: number;
+}
+const BRAND_BLUE = "#60a5fa";
+const STAGGER = 4; // frames between each word entrance
+// How many frames the bg leads the text on entry, and trails it on exit
+const BG_LEAD = 10;   // bg starts wiping in 10 frames before text
+const BG_TRAIL = 12;  // bg stays for 12 frames after text is gone
+export const KineticTitleScene: React.FC<KineticTitleProps> = ({
+  words = ["WENT", "VIRAL"],
+  subtext,
+  accentColor = BRAND_BLUE,
+  language = "en",
+  bgStyle = "solid",
+  durationInFrames = 150,
+  fps = 30,
+}) => {
+  const frame = useCurrentFrame();
+  const { width, height } = useVideoConfig();
+  const isHebrew = language === "he" || language === "iw";
+  const fontFamily = isHebrew ? heebo : poppins;
+  const isVertical = height > width;
+  const accent = accentColor;
+  const isDynamic = bgStyle === "dynamic_panel";
+  const isTransparent = bgStyle === "transparent";
+  // ── Text timing ──────────────────────────────────────────────────────────
+  // Dynamic: text starts after bg has partially appeared
+  const textOffset = isDynamic ? BG_LEAD : 0;
+  const textExitStart = durationInFrames - (isDynamic ? BG_TRAIL + 12 : 12);
+  const exitOpacity = interpolate(
+    frame,
+    [textExitStart, textExitStart + 12],
+    [1, 0],
+    { extrapolateRight: "clamp" }
+  );
+  // Global entrance scale
+  const globalScale = interpolate(frame - textOffset, [0, 20], [1.04, 1.0], {
+    extrapolateLeft: "clamp",
+    extrapolateRight: "clamp",
+    easing: Easing.out(Easing.cubic),
+  });
+  // ── Background panel timing ───────────────────────────────────────────────
+  // BG enters: wipes left→right starting at frame 0
+  const bgEnterDuration = 18;
+  const bgEnterProgress = interpolate(frame, [0, bgEnterDuration], [0, 1], {
+    extrapolateLeft: "clamp",
+    extrapolateRight: "clamp",
+    easing: Easing.out(Easing.cubic),
+  });
+  // BG exits: wipes left→right starting after text is fully gone
+  const bgExitStart = textExitStart + 12;
+  const bgExitProgress = interpolate(
+    frame,
+    [bgExitStart, bgExitStart + 14],
+    [0, 1],
+    {
+      extrapolateLeft: "clamp",
+      extrapolateRight: "clamp",
+      easing: Easing.in(Easing.cubic),
+    }
+  );
+  // scaleX wipe — most reliable in Remotion headless Chrome
+  // Enter: scaleX grows 0→1 from LEFT origin  (reveals left to right)
+  // Exit:  scaleX shrinks 1→0 from RIGHT origin (removes left to right)
+  const bgScaleX = isDynamic
+    ? (frame < bgExitStart
+        ? interpolate(bgEnterProgress, [0, 1], [0, 1])
+        : interpolate(bgExitProgress,  [0, 1], [1, 0]))
+    : 1;
+  const bgTransformOrigin = isDynamic && frame >= bgExitStart ? "right center" : "left center";
+  const showBg = !isTransparent;
+  // ── Accent scan line across bg (cinematic HUD feel) ──────────────────────
+  const scanY = interpolate(frame, [0, durationInFrames], [0, height], {
+    extrapolateRight: "clamp",
+  });
+  const scanOpacity = isDynamic
+    ? bgEnterProgress * (1 - bgExitProgress) * 0.12
+    : 0.06;
+  // ── Font size ─────────────────────────────────────────────────────────────
+  const baseFontSize = isVertical
+    ? Math.round(width * 0.22)
+    : Math.round(height * 0.26);
+  // ── Word styles: solid → outline → accent ────────────────────────────────
+  const wordStyles = (i: number) => {
+    const mod = i % 3;
+    if (mod === 0) return { color: "#ffffff",   stroke: "none",  shadow: `0 0 40px ${accent}44` };
+    if (mod === 1) return { color: "transparent", stroke: accent, shadow: `0 0 30px ${accent}44` };
+    return                 { color: accent,     stroke: "none",  shadow: `0 0 50px ${accent}88` };
+  };
+  // ── Separator line ────────────────────────────────────────────────────────
+  const SepLine = ({ delay }: { delay: number }) => {
+    const lineW = interpolate(frame - delay, [0, 16], [0, 1], {
+      extrapolateRight: "clamp",
+      easing: Easing.out(Easing.cubic),
+    });
+    return (
+      <div style={{
+        width: `${lineW * 100}%`,
+        height: 1,
+        background: `linear-gradient(${isHebrew ? "270deg" : "90deg"}, transparent, ${accent}88, transparent)`,
+        marginBottom: 2,
+        marginTop: 2,
+      }} />
+    );
+  };
+  return (
+    <div style={{
+      width, height,
+      // dynamic_panel: outer is transparent — the animated inner panel owns the background
+      background: (isTransparent || isDynamic) ? "transparent" : "#07070f",
+      overflow: "hidden",
+      position: "relative",
+      fontFamily,
+      direction: isHebrew ? "rtl" : "ltr",
+    }}>
+      {/* ── Background panel (with clip-path wipe) ── */}
+      {showBg && (
+        <div style={{
+          position: "absolute", inset: 0,
+          transform: isDynamic ? `scaleX(${bgScaleX})` : undefined,
+          transformOrigin: isDynamic ? bgTransformOrigin : undefined,
+        }}>
+          {/* Dark fill */}
+          <div style={{ position: "absolute", inset: 0, background: "#07070f" }} />
+          {/* Grid lines */}
+          <div style={{
+            position: "absolute", inset: 0,
+            backgroundImage: [
+              `linear-gradient(rgba(255,255,255,0.03) 1px, transparent 1px)`,
+              `linear-gradient(90deg, rgba(255,255,255,0.03) 1px, transparent 1px)`,
+            ].join(", "),
+            backgroundSize: "60px 60px",
+            pointerEvents: "none",
+          }} />
+          {/* Diagonal corner accent */}
+          <div style={{
+            position: "absolute",
+            top: -60,
+            left: isHebrew ? undefined : -60,
+            right: isHebrew ? -60 : undefined,
+            width: 300, height: 300,
+            background: `linear-gradient(${isHebrew ? "225deg" : "135deg"}, ${accent}18 0%, transparent 60%)`,
+            pointerEvents: "none",
+          }} />
+          {/* Vignette */}
+          <div style={{
+            position: "absolute", inset: 0,
+            background: "radial-gradient(ellipse at center, transparent 30%, rgba(0,0,0,0.65) 100%)",
+            pointerEvents: "none",
+          }} />
+          {/* Horizontal scan line (slow drift) */}
+          <div style={{
+            position: "absolute",
+            left: 0, right: 0,
+            top: scanY,
+            height: 1,
+            background: `linear-gradient(90deg, transparent, ${accent}, transparent)`,
+            opacity: scanOpacity,
+            pointerEvents: "none",
+          }} />
+          {/* Bright edge line at wipe front — sits at the right edge of the scaled panel */}
+          {isDynamic && bgScaleX > 0 && bgScaleX < 1 && (
+            <div style={{
+              position: "absolute",
+              top: 0, bottom: 0,
+              right: 0,
+              width: Math.round(2 / Math.max(bgScaleX, 0.05)), // compensate for scaleX compression
+              background: `linear-gradient(180deg, transparent, ${accent}, ${accent}, transparent)`,
+              opacity: 0.9,
+              boxShadow: `0 0 16px ${accent}, 0 0 32px ${accent}88`,
+            }} />
+          )}
+        </div>
+      )}
+      {/* ── Word stack ── */}
+      <div style={{
+        position: "absolute",
+        inset: 0,
+        display: "flex",
+        flexDirection: "column",
+        justifyContent: "center",
+        paddingLeft:  isHebrew ? 0 : isVertical ? 32 : 64,
+        paddingRight: isHebrew ? (isVertical ? 32 : 64) : 0,
+        paddingTop: isVertical ? 40 : 30,
+        transform: `scale(${globalScale})`,
+        opacity: exitOpacity,
+      }}>
+        {words.map((word, i) => {
+          const delay = textOffset + i * STAGGER;
+          const wordScale = spring({
+            frame: Math.max(0, frame - delay),
+            fps,
+            from: 0,
+            to: 1,
+            config: { damping: 9, stiffness: 350 },
+          });
+          const wordY = interpolate(
+            frame - delay, [0, 14], [40, 0],
+            { extrapolateLeft: "clamp", extrapolateRight: "clamp", easing: Easing.out(Easing.cubic) }
+          );
+          const ws = wordStyles(i);
+          const glitchAmt = i === 0
+            ? interpolate(frame - textOffset, [0, 1, 3], [12, 4, 0], { extrapolateRight: "clamp" })
+            : 0;
+          return (
+            <React.Fragment key={i}>
+              {i > 0 && <SepLine delay={textOffset + (i - 1) * STAGGER + 6} />}
+              <div style={{
+                position: "relative",
+                transform: `translateY(${wordY}px) scaleY(${wordScale})`,
+                transformOrigin: isHebrew ? "top right" : "top left",
+                lineHeight: 0.85,
+                overflow: "visible",
+              }}>
+                {glitchAmt > 0 && (
+                  <div style={{
+                    position: "absolute",
+                    fontSize: baseFontSize,
+                    fontWeight: 900,
+                    color: accent,
+                    textTransform: isHebrew ? "none" : "uppercase",
+                    letterSpacing: isHebrew ? 0 : -2,
+                    transform: `translateX(${glitchAmt}px)`,
+                    opacity: 0.45,
+                    mixBlendMode: "screen",
+                    userSelect: "none",
+                  }}>{word}</div>
+                )}
+                <div style={{
+                  fontSize: baseFontSize,
+                  fontWeight: 900,
+                  color: ws.color,
+                  textTransform: isHebrew ? "none" : "uppercase",
+                  letterSpacing: isHebrew ? 0 : -2,
+                  WebkitTextStroke: ws.stroke !== "none" ? `4px ${ws.stroke}` : undefined,
+                  textShadow: ws.shadow !== "none" ? ws.shadow : undefined,
+                  whiteSpace: "nowrap",
+                  userSelect: "none",
+                }}>{word}</div>
+              </div>
+            </React.Fragment>
+          );
+        })}
+        {subtext && (() => {
+          const stDelay = textOffset + words.length * STAGGER + 10;
+          const stOpacity = interpolate(frame - stDelay, [0, 14], [0, 1], {
+            extrapolateLeft: "clamp", extrapolateRight: "clamp",
+          });
+          return (
+            <div style={{
+              fontSize: Math.round(baseFontSize * 0.17),
+              fontWeight: 600,
+              color: "rgba(255,255,255,0.45)",
+              textTransform: isHebrew ? "none" : "uppercase",
+              letterSpacing: isHebrew ? 0 : 5,
+              marginTop: 20,
+              opacity: stOpacity * exitOpacity,
+            }}>{subtext}</div>
+          );
+        })()}
+      </div>
+      {/* ── Viewfinder corners ── */}
+      {Math.floor(frame / 10) % 2 === 0 && [
+        { top: 24, left: 24,  borderTopWidth: 2, borderLeftWidth: 2  },
+        { top: 24, right: 24, borderTopWidth: 2, borderRightWidth: 2 },
+        { bottom: 24, left: 24,  borderBottomWidth: 2, borderLeftWidth: 2  },
+        { bottom: 24, right: 24, borderBottomWidth: 2, borderRightWidth: 2 },
+      ].map((pos, i) => (
+        <div key={i} style={{
+          position: "absolute", width: 20, height: 20,
+          borderColor: accent, borderStyle: "solid", borderWidth: 0,
+          opacity: isTransparent ? exitOpacity * 0.5 : 0.5,
+          ...pos,
+        }} />
+      ))}
+      {/* ── Frame counter ── */}
+      <div style={{
+        position: "absolute",
+        bottom: isVertical ? 32 : 24,
+        right: isHebrew ? undefined : 36,
+        left: isHebrew ? 36 : undefined,
+        fontSize: 11,
+        fontFamily: "monospace",
+        color: `${accent}66`,
+        letterSpacing: 3,
+        opacity: exitOpacity,
+      }}>
+        {String(frame).padStart(4, "0")}
+      </div>
+    </div>
+  );
+};

package/skills/video-editing/SKILL.md ADDED Viewed

@@ -0,0 +1,128 @@
+---
+name: video-editing
+description: >
+  Video editing decisions: what to cut vs keep, cut techniques (J-cut, L-cut, hard cut),
+  pacing by format, edit decision structure, silence/filler removal, talking head editing.
+  Use when making editorial decisions about video content.
+  Keywords: video editing, cut, trim, j-cut, l-cut, pacing, filler words, silence, talking head,
+  edit decision, transition, dead air, false start
+---
+# Video Editing — Editorial Decisions
+## What to Cut
+1. **Filler words:** "um", "uh", "like", "you know" — cut at word boundaries using word timestamps
+2. **False starts:** When the speaker restarts a sentence, keep only the final take
+3. **Dead air:** Silence longer than 1.5 seconds should be trimmed to ~0.5 seconds
+4. **Off-topic tangents:** If the speaker wanders, cut to the next relevant segment
+5. **Repeated points:** Keep the best delivery, remove redundant takes
+## What NOT to Cut
+- **Breath pauses:** Natural 0.3-0.8 second pauses between sentences. These sound natural.
+- **Emphasis pauses:** Intentional pauses for dramatic effect
+- **Reactions and transitions:** Verbal bridges like "So..." or "Now..." that provide flow
+## Cut Techniques
+| Technique | Description | When to Use |
+|-----------|-------------|-------------|
+| **J-cut** | Audio from next segment starts ~0.5s before visual cut | Smooth transitions |
+| **L-cut** | Audio from current segment continues ~0.5s after visual cut | Maintaining continuity |
+| **Hard cut** | Instant transition | Major topic changes |
+| **Jump cut** | Cut within same shot (visible jump) | YouTube/social energy, pacing |
+| **Match cut** | Visual similarity bridges two different shots | Creative storytelling |
+## Pacing by Format
+| Format | Approach |
+|--------|----------|
+| **Short-form (< 60s)** | Aggressive cuts. Minimal dead air. High energy. Visual change every 1-3s |
+| **Medium-form (1-10 min)** | Balanced. Keep natural pauses for breathing room. Change every 3-5s |
+| **Long-form (> 10 min)** | Let scenes breathe. Only cut obvious problems. Change every 5-10s |
+## Edit Decision Structure
+When planning an edit, define:
+- **Cuts:** Ordered list of segments to keep (source, in/out points, speed)
+- **Overlays:** Timed overlay placements (images, diagrams, lower thirds)
+- **Subtitles:** Subtitle configuration (enabled, style, source file)
+- **Music:** Background music settings (asset, volume, ducking, fades)
+- **Transitions:** Transition type and timing between cuts
+## Silence Removal Workflow
+1. **Detect silence** with FFmpeg: `silencedetect=noise=-35dB:d=0.4`
+2. **Parse** silence_start/silence_end timestamps from stderr
+3. **Generate segments** of non-silent audio
+4. **Concatenate** segments with the concat demuxer
+5. Optional: apply `atempo=1.14` for subtle speedup that feels natural
+## Talking Head Editing Checklist
+- [ ] No visible jump cuts without intentional style choice
+- [ ] Audio doesn't pop or click at cut points
+- [ ] Pacing matches content energy and target platform
+- [ ] Speaker's face is never covered by overlays
+- [ ] All cuts are at word boundaries (not mid-word)
+- [ ] Filler words removed unless they serve the personality
+- [ ] B-roll covers any remaining jump cuts
+## Lip Sync (Dubbing / Localization)
+When replacing audio and matching lips:
+| Input | Tool | Output |
+|-------|------|--------|
+| Existing VIDEO + new audio | Lip sync | Video with synced lips |
+| Still PHOTO + audio | Talking head generator | New video from photo |
+**Decision rule:** If you have video footage of the person, use lip sync. If you only have a photo, use talking head generation.
+**Workflow for localization:**
+```
+transcribe(video) → translate → TTS(translated text) → lip_sync(original_video, new_audio)
+```
+Keep original video as source for each language — never chain lip sync outputs.
+**Face padding** for lip sync: `[0, 10, 0, 0]` (top, bottom, left, right) works for 90% of footage. Increase bottom if chin gets cropped.
+---
+## Kolbo MCP Integration
+| Task | Kolbo MCP Tool | Notes |
+|------|---------------|-------|
+| Transcribe for edit points | `transcribe_audio` | Word-level timestamps for precise cuts |
+| Lip sync dubbing | `generate_lipsync` | Source video + new audio |
+| Generate B-roll | `generate_video` or `generate_image` | Cover jump cuts |
+| Generate narration | `generate_speech` | Re-record with AI voice |
+| Visual analysis | `chat_send_message` + Gemini | "Analyze this video for edit points" |
+**Editing workflow with Kolbo:**
+1. `transcribe_audio` → get full transcript with word timestamps
+2. Identify filler words, dead air, false starts from transcript
+3. Generate FFmpeg trim commands for non-silent/non-filler segments
+4. `generate_image` or `generate_video` → B-roll for covering jump cuts
+5. Concatenate clips + burn-in subtitles + mix audio
+6. Review with `production-review` skill
+**Localization workflow:**
+1. `transcribe_audio` → source language transcript
+2. Translate the transcript (use `chat_send_message` for translation)
+3. `generate_speech` → TTS in target language
+4. `generate_lipsync` → sync new audio to original face
+5. Repeat for each language (always from original, never chain)
+---
+## Local / Free Options
+> **IMPORTANT:** Always use Kolbo MCP tools by default (`transcribe_audio`, `generate_lipsync`). FFmpeg silence removal is safe to use directly. For anything else, confirm with the user first.
+**FFmpeg (safe, standard):** Silence detection/removal, trimming, concatenation — all built-in.
+**Transcription:** If the user wants offline, `faster-whisper` runs on CPU (`pip install faster-whisper`). Confirm before installing.

package/skills/video-production/SKILL.md CHANGED Viewed

@@ -105,16 +105,15 @@ def detect_silence(video_path, noise_db=-35, duration=0.4):
 ## RTL (Hebrew/Arabic) Subtitles
-SRT with `subtitles` filter works for basic burn. For per-word karaoke highlighting with RTL:
+For comprehensive RTL subtitle handling, load the `subtitle-production` skill — it contains full patterns for:
+- Simple SRT burn-in with Heebo font + `Encoding=177`
+- ASS per-word positioning for karaoke (with PIL `~0.74` scale factor)
+- Remotion RTL captions with CSS `direction: rtl` and all the flip rules
+- RTL progress bar with FFmpeg `geq` filter
-- Each word gets its own ASS `Dialogue` line with explicit `\pos(x,y)`
-- Use PIL to measure word widths: apply `~0.74` scale factor (PIL→libass calibration)
-- Use `Alignment=7` (top-left anchor) so `\pos` sets exact top-left of each word
-- Set `Encoding=177` (Hebrew) in ASS style
-- Strip punctuation and render as separate positioned elements
-- Two ASS styles (e.g., White + Yellow) instead of inline `\c` color tags
+**CRITICAL**: Any inline ASS tag (`\c`, `\K`, `\1c`, etc.) between RTL words breaks Unicode bidi in libass — words render LTR. Use separate Dialogue lines per word instead.
-**CRITICAL**: Any inline ASS tag (`\c`, `\K`, `\1c`, etc.) between RTL words breaks Unicode bidirectional algorithm in libass — words render LTR. Use separate Dialogue lines instead.
+For Remotion RTL layout rules (padding flips, transform-origin, gradient direction), load the `typography-video` skill.
 ## Remotion Motion Graphics