npm - @thunderkiller/video-clipper - Versions diffs - 1.2.0 → 1.4.0 - Mend

@thunderkiller/video-clipper 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/CHANGELOG.md +19 -0
package/CONTRIBUTING.md +100 -0
package/LICENSE +15 -0
package/commitlint.config.js +25 -0
package/package.json +3 -1
package/.github/workflows/ci.yml +0 -42
package/.github/workflows/release.yml +0 -76
package/.husky/pre-commit +0 -3
package/.prettierignore +0 -6
package/.prettierrc +0 -7
package/.releaserc.json +0 -21
package/AGENTS.md +0 -122
package/docs/free-models.md +0 -78
package/docs/plan.md +0 -442
package/docs/refactorPhases.md +0 -105
package/docs/yt-downloader.md +0 -440
package/requirements.txt +0 -5
package/scripts/detect_events.py +0 -81
package/scripts/detect_events_whisper.py +0 -101
package/scripts/transcribe_whisper.py +0 -70
package/src/cli.ts +0 -186
package/src/config/env.ts +0 -18
package/src/config/index.ts +0 -2
package/src/index.ts +0 -46
package/src/pipeline/runner.ts +0 -147
package/src/pipeline/stages/audioProcessor.ts +0 -127
package/src/pipeline/stages/clipExporter.ts +0 -76
package/src/pipeline/stages/segmentAnalyzer.ts +0 -72
package/src/pipeline/stages/segmentSelector.ts +0 -39
package/src/pipeline/stages/videoResolver.ts +0 -44
package/src/services/audioAnalyzers/base.ts +0 -32
package/src/services/audioAnalyzers/factory.ts +0 -69
package/src/services/audioAnalyzers/gemini.ts +0 -136
package/src/services/audioAnalyzers/index.ts +0 -6
package/src/services/audioAnalyzers/whisper.ts +0 -80
package/src/services/audioAnalyzers/yamnet.ts +0 -54
package/src/services/audioDownloader/index.ts +0 -102
package/src/services/chunkBuilder/index.ts +0 -82
package/src/services/clipGenerator/index.ts +0 -210
package/src/services/clipRefiner/index.ts +0 -141
package/src/services/eventDetector/index.ts +0 -68
package/src/services/llmAnalyzer/LLMAnalyzer.ts +0 -98
package/src/services/llmAnalyzer/index.ts +0 -231
package/src/services/metadataExtractor/index.ts +0 -83
package/src/services/segmentRanker/index.ts +0 -88
package/src/services/signalMerger/index.ts +0 -53
package/src/services/transcriptAnalyzers/base.ts +0 -26
package/src/services/transcriptAnalyzers/factory.ts +0 -66
package/src/services/transcriptAnalyzers/gemini.ts +0 -24
package/src/services/transcriptAnalyzers/index.ts +0 -6
package/src/services/transcriptAnalyzers/whisper.ts +0 -68
package/src/services/transcriptAnalyzers/ytdlp.ts +0 -19
package/src/services/transcriptDetector/index.ts +0 -122
package/src/services/transcriptFetcher/index.ts +0 -147
package/src/services/urlParser/index.ts +0 -52
package/src/services/videoDownloader/index.ts +0 -268
package/src/types/analyzer.ts +0 -23
package/src/types/audio.ts +0 -19
package/src/types/cache.ts +0 -8
package/src/types/cli.ts +0 -22
package/src/types/config.ts +0 -151
package/src/types/downloader.ts +0 -15
package/src/types/factory.ts +0 -3
package/src/types/index.ts +0 -40
package/src/types/pipeline.ts +0 -60
package/src/types/segment.ts +0 -43
package/src/types/transcript.ts +0 -22
package/src/types/video.ts +0 -18
package/src/utils/cache.ts +0 -224
package/src/utils/chunker.ts +0 -60
package/src/utils/dumper.ts +0 -41
package/src/utils/format.ts +0 -10
package/src/utils/logger.ts +0 -17
package/src/utils/modelFactory.ts +0 -71
package/src/utils/redactConfig.ts +0 -23
package/src/utils/sliceAudio.ts +0 -35
package/test-trigger.txt +0 -1
package/tests/analyzerFactory.test.ts +0 -146
package/tests/audioEventDetector.test.ts +0 -69
package/tests/cache.test.ts +0 -203
package/tests/chunkBuilder.test.ts +0 -146
package/tests/chunker.test.ts +0 -95
package/tests/eventDetector.test.ts +0 -103
package/tests/llmAnalyzer.test.ts +0 -283
package/tests/segmentRanker.test.ts +0 -133
package/tests/setup.ts +0 -48
package/tests/signalMerger.test.ts +0 -197
package/tests/transcriptDetector.test.ts +0 -150
package/tests/transcriptFetcher.test.ts +0 -179
package/tests/urlParser.test.ts +0 -70
package/tsconfig.json +0 -16
package/tsconfig.test.json +0 -8
package/vitest.config.ts +0 -8

package/src/pipeline/stages/segmentSelector.ts DELETED Viewed

@@ -1,39 +0,0 @@
-import { mergeSignals } from '../../services/signalMerger/index.js';
-import { rankSegments } from '../../services/segmentRanker/index.js';
-import { log } from '../../utils/logger.js';
-import type {
-  ChunkEvaluation,
-  AudioEvent,
-  RankedSegment,
-  SegmentSelectorOpts,
-} from '../../types/index.js';
-export type { SegmentSelectorOpts };
-/**
- * Stage 5 — Segment Selector
- *
- * Merges transcript LLM evaluations with audio events (if any), then ranks
- * and deduplicates candidates to produce the final ordered list of segments.
- *
- * This stage sits between the two LLM passes: it runs after `analyzeSegments`
- * (pass 1) and its output feeds `refineRankedSegments` (pass 2).
- */
-export function selectSegments(
-  chunkEvals: ChunkEvaluation[],
-  audioEvents: AudioEvent[],
-  opts: SegmentSelectorOpts,
-): RankedSegment[] {
-  const merged = mergeSignals(chunkEvals, audioEvents);
-  const ranked = rankSegments(merged, opts.threshold, opts.topN);
-  if (ranked.length === 0) {
-    log.warn(`No segments scored above threshold ${opts.threshold}. Try lowering --threshold.`);
-  } else {
-    log.info(
-      `Analysis complete: ${ranked.length} segment${ranked.length !== 1 ? 's' : ''} above threshold ${opts.threshold}`,
-    );
-  }
-  return ranked;
-}

package/src/pipeline/stages/videoResolver.ts DELETED Viewed

@@ -1,44 +0,0 @@
-import { parseUrl } from '../../services/urlParser/index.js';
-import { extractMetadata } from '../../services/metadataExtractor/index.js';
-import { log } from '../../utils/logger.js';
-import { formatSeconds } from '../../utils/format.js';
-import type { VideoResolverResult } from '../../types/index.js';
-export type { VideoResolverResult };
-/**
- * Stage 1 — Video Resolver
- *
- * Parses a raw YouTube URL into a validated video ID, fetches metadata
- * (title + duration), and enforces the optional --max-duration guard.
- *
- * @throws {Error} on invalid URL, metadata fetch failure, or exceeded duration
- */
-export async function resolveVideo(
-  rawUrl: string,
-  maxDurationSec?: number,
-): Promise<VideoResolverResult> {
-  let videoId: string;
-  try {
-    videoId = parseUrl(rawUrl);
-  } catch {
-    throw new Error(`Invalid YouTube URL: ${rawUrl}`);
-  }
-  log.info(`Fetching metadata for ${videoId}...`);
-  const metadata = await extractMetadata(videoId);
-  log.info(
-    `Video: "${metadata.title}" (${metadata.duration > 0 ? formatSeconds(metadata.duration) : 'duration unknown'})`,
-  );
-  if (maxDurationSec !== undefined && metadata.duration > 0) {
-    if (metadata.duration > maxDurationSec) {
-      throw new Error(
-        `Video duration exceeds --max-duration limit. ` +
-          `(${formatSeconds(metadata.duration)} > ${formatSeconds(maxDurationSec)})`,
-      );
-    }
-  }
-  return { videoId, metadata };
-}

package/src/services/audioAnalyzers/base.ts DELETED Viewed

@@ -1,32 +0,0 @@
-import type { AudioEvent } from '../../types/index.js';
-/**
- * Contract every audio analyzer implementation must satisfy.
- *
- * Each concrete analyzer (Gemini, Whisper, YAMNet) extends this class and
- * implements `detect()`. The `source` property is used to tag the events they
- * return so downstream code knows which backend produced them.
- *
- * Usage:
- *   const analyzer = new GeminiAudioAnalyzer();
- *   const events   = await analyzer.detect(audioPath, gameProfile, offsetSec, durationSec);
- */
-export abstract class AudioAnalyzer {
-  abstract readonly source: AudioEvent['source'];
-  /**
-   * Detect audio events in the given WAV slice.
-   *
-   * @param audioPath        - Path to the WAV audio slice on disk
-   * @param gameProfile      - Profile key used to tune detection prompts/classes
-   * @param chunkOffsetSec   - Absolute start time of this slice within the full video (seconds)
-   * @param chunkDurationSec - Duration of this slice in seconds
-   * @returns Array of detected events with absolute timestamps
-   */
-  abstract detect(
-    audioPath: string,
-    gameProfile: string,
-    chunkOffsetSec: number,
-    chunkDurationSec: number,
-  ): Promise<AudioEvent[]>;
-}

package/src/services/audioAnalyzers/factory.ts DELETED Viewed

@@ -1,69 +0,0 @@
-import { log } from '../../utils/logger.js';
-import { AudioAnalyzer } from './base.js';
-import { GeminiAudioAnalyzer } from './gemini.js';
-import { WhisperAudioAnalyzer } from './whisper.js';
-import { YAMNetAudioAnalyzer } from './yamnet.js';
-import type { AudioProviderName } from '../../types/index.js';
-const KNOWN_PROVIDERS = new Set<AudioProviderName>(['gemini', 'whisper', 'yamnet']);
-/**
- * Parses the AUDIO_PROVIDER config string into an ordered list of provider names.
- *
- * Accepts a comma-separated list: "gemini,whisper" → ['gemini', 'whisper']
- * Single values still work:        "yamnet"         → ['yamnet']
- *
- * Backward-compat: "both" is mapped to ['gemini', 'whisper'] with a deprecation warning.
- */
-export function parseProviderChain(providerString: string): AudioProviderName[] {
-  if (providerString.trim() === 'both') {
-    log.warn(
-      '[audio] AUDIO_PROVIDER=both is deprecated. Use AUDIO_PROVIDER=gemini,whisper instead.',
-    );
-    return ['gemini', 'whisper'];
-  }
-  const names = providerString
-    .split(',')
-    .map((s) => s.trim().toLowerCase())
-    .filter(Boolean);
-  if (names.length === 0) {
-    throw new Error(`AUDIO_PROVIDER is empty. Provide at least one of: gemini, whisper, yamnet`);
-  }
-  for (const name of names) {
-    if (!KNOWN_PROVIDERS.has(name as AudioProviderName)) {
-      throw new Error(
-        `Unknown audio provider "${name}". Valid options: gemini, whisper, yamnet (comma-separated for chain)`,
-      );
-    }
-  }
-  return names as AudioProviderName[];
-}
-/**
- * Builds an ordered array of AudioAnalyzer instances from a provider chain string.
- *
- * The EventDetector will walk this array in order — if the first analyzer fails,
- * it falls back to the next, and so on.
- *
- * @example
- *   // AUDIO_PROVIDER=gemini,whisper  →  [GeminiAudioAnalyzer, WhisperAudioAnalyzer]
- *   const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
- */
-export function createAnalyzerChain(providerString: string): AudioAnalyzer[] {
-  const names = parseProviderChain(providerString);
-  return names.map((name) => {
-    switch (name) {
-      case 'gemini':
-        return new GeminiAudioAnalyzer();
-      case 'whisper':
-        return new WhisperAudioAnalyzer();
-      case 'yamnet':
-        return new YAMNetAudioAnalyzer();
-    }
-  });
-}

package/src/services/audioAnalyzers/gemini.ts DELETED Viewed

@@ -1,136 +0,0 @@
-import { GoogleGenerativeAI } from '@google/generative-ai';
-import * as fs from 'fs';
-import { z } from 'zod';
-import { config } from '../../config/index.js';
-import { log } from '../../utils/logger.js';
-import type { AudioEvent } from '../../types/index.js';
-import { AudioAnalyzer } from './base.js';
-/**
- * Gemini returns timestamps inconsistently as either:
- * - MM.SS notation: 1.03 = 1 min 3 sec = 63s
- * - True decimal seconds: 53.403 = 53.403s
- * Use normalizeGeminiTime() to resolve correct value.
- */
-const GeminiEventSchema = z.array(
-  z.object({
-    time_sec: z.number(),
-    event: z.string(),
-    confidence: z.number().min(0).max(1),
-  }),
-);
-const GAME_PROFILE_PROMPTS: Record<string, string> = {
-  valorant:
-    'You are analyzing audio from a Valorant gaming video. Identify ALL significant game events: kills, deaths, explosions, ability uses, spike plants/defuses, ace moments, clutch situations, crowd reactions, hype moments.',
-  fps: 'You are analyzing audio from an FPS gaming video. Identify ALL significant game events: kills, deaths, explosions, weapon fire, headshot sounds, kill streaks, crowd reactions, battle cries.',
-  boss_fight:
-    'You are analyzing audio from a boss fight video. Identify ALL significant game events: boss phase transitions, big hits, explosions, boss death, crowd cheering, epic moments, victory sounds.',
-  general:
-    'You are analyzing audio from a gaming video. Identify ALL significant audio events: explosions, gunshots, crowd reactions, cheering, epic moments, dramatic sounds.',
-};
-/**
- * Converts a MM.SS-notation value to decimal seconds.
- * e.g. 1.03 → 63, 1.40 → 100
- */
-function mmssToSeconds(value: number): number {
-  const minutes = Math.floor(value);
-  const seconds = Math.round((value % 1) * 100);
-  return minutes * 60 + seconds;
-}
-/**
- * Resolves a Gemini `time_sec` value to true decimal seconds.
- *
- * Gemini inconsistently returns either MM.SS notation (e.g. 1.03 meaning 63s)
- * or true decimal seconds (e.g. 53.403). This function disambiguates using
- * the known chunk duration:
- *
- * 1. If the fractional part > 0.59, it cannot be a seconds component (seconds
- *    only go 0-59), so it must be true decimal seconds — use as-is.
- * 2. Otherwise, check if the MM.SS conversion produces a value within the
- *    valid chunk range [0, chunkDurationSec). If yes, treat as MM.SS.
- * 3. Fallback: use the value as true decimal seconds (the format we asked for).
- *
- * YAMNet always returns true decimal seconds and does NOT use this function.
- */
-export function normalizeGeminiTime(value: number, chunkDurationSec: number): number {
-  const frac = value % 1;
-  if (Math.round(frac * 100) > 59) {
-    return value;
-  }
-  const mmss = mmssToSeconds(value);
-  if (mmss < chunkDurationSec) {
-    return mmss;
-  }
-  return value;
-}
-/**
- * Uses Google Gemini's multimodal API to detect audio events in a WAV slice.
- * Understands game context semantically — best accuracy for gaming content.
- *
- * Requires GOOGLE_GENERATIVE_AI_API_KEY to be set.
- */
-export class GeminiAudioAnalyzer extends AudioAnalyzer {
-  readonly source = 'gemini' as const;
-  async detect(
-    audioPath: string,
-    gameProfile: string,
-    chunkOffsetSec: number,
-    chunkDurationSec: number,
-  ): Promise<AudioEvent[]> {
-    const genai = new GoogleGenerativeAI(config.GOOGLE_GENERATIVE_AI_API_KEY!);
-    const model = genai.getGenerativeModel({ model: config.AUDIO_GEMINI_MODEL });
-    const audioData = fs.readFileSync(audioPath);
-    const base64Audio = audioData.toString('base64');
-    const extraInstructions = config.AUDIO_EXTRA_INSTRUCTIONS
-      ? `\nAdditional instructions:\n${config.AUDIO_EXTRA_INSTRUCTIONS}\n`
-      : '';
-    const prompt = `${GAME_PROFILE_PROMPTS[gameProfile] ?? GAME_PROFILE_PROMPTS.general} ${extraInstructions}
-For each event, return a JSON object with:
-- time_sec: the time in seconds (be very precise with the timestamp, Gemini is good at this when the format is correct)
-- event: a short description of the event (e.g., "gunshot", "explosion", "clutch moment")
-- confidence: your confidence level (0.0 to 1.0)
-Return ONLY a JSON array, no explanation. Format:
-[
-  {"time_sec": 12.5, "event": "gunshot", "confidence": 0.8},
-  {"time_sec": 45.2, "event": "explosion", "confidence": 0.9}
-]`;
-    const result = await model.generateContent([
-      { inlineData: { mimeType: 'audio/wav', data: base64Audio } },
-      prompt,
-    ]);
-    const text = result.response.text();
-    log.info(`[audio:gemini] response: ${text}`);
-    const cleaned = text
-      .replace(/^```(?:json)?\s*/i, '')
-      .replace(/\s*```\s*$/i, '')
-      .trim();
-    const parsed = GeminiEventSchema.safeParse(JSON.parse(cleaned));
-    if (!parsed.success) {
-      throw new Error(`Gemini response failed validation: ${parsed.error.message}`);
-    }
-    return parsed.data.map((e) => ({
-      time: normalizeGeminiTime(e.time_sec, chunkDurationSec) + chunkOffsetSec,
-      event: e.event,
-      confidence: e.confidence,
-      source: this.source,
-    }));
-  }
-}

package/src/services/audioAnalyzers/index.ts DELETED Viewed

@@ -1,6 +0,0 @@
-export { AudioAnalyzer } from './base.js';
-export { GeminiAudioAnalyzer, normalizeGeminiTime } from './gemini.js';
-export { WhisperAudioAnalyzer, getPythonBin } from './whisper.js';
-export { YAMNetAudioAnalyzer } from './yamnet.js';
-export { createAnalyzerChain, parseProviderChain } from './factory.js';
-export type { AudioProviderName } from '../../types/index.js';

package/src/services/audioAnalyzers/whisper.ts DELETED Viewed

@@ -1,80 +0,0 @@
-import { execa } from 'execa';
-import { config } from '../../config/index.js';
-import { log } from '../../utils/logger.js';
-import type { AudioEvent } from '../../types/index.js';
-import { AudioAnalyzer } from './base.js';
-/**
- * Resolves the Python interpreter binary, caching the result after the first
- * successful lookup. Shared by both Python-based analyzers (Whisper, YAMNet).
- */
-let _pythonBin: string | null = null;
-export async function getPythonBin(): Promise<string> {
-  if (_pythonBin) return _pythonBin;
-  for (const bin of ['python3', 'python']) {
-    try {
-      await execa(bin, ['--version']);
-      _pythonBin = bin;
-      return bin;
-    } catch {
-      log.warn(`[audio] ${bin} not found, trying next binary...`);
-    }
-  }
-  throw new Error(
-    'No Python interpreter found (tried python3, python). Install Python 3 to use YAMNet or Whisper.',
-  );
-}
-/**
- * Uses OpenAI Whisper (local) to transcribe the audio chunk and scan the
- * resulting transcript for hype keywords per game profile.
- *
- * Requires: pip install openai-whisper
- */
-export class WhisperAudioAnalyzer extends AudioAnalyzer {
-  readonly source = 'whisper' as const;
-  async detect(
-    audioPath: string,
-    gameProfile: string,
-    chunkOffsetSec: number,
-    _chunkDurationSec: number,
-  ): Promise<AudioEvent[]> {
-    const python = await getPythonBin();
-    let stdout: string;
-    try {
-      const result = await execa(python, [
-        'scripts/detect_events_whisper.py',
-        audioPath,
-        String(config.AUDIO_CONFIDENCE_THRESHOLD),
-        gameProfile,
-        config.AUDIO_WHISPER_MODEL,
-      ]);
-      stdout = result.stdout;
-    } catch (err) {
-      const message = err instanceof Error ? err.message : String(err);
-      if (message.includes('ModuleNotFoundError') || message.includes('No module named')) {
-        throw new Error(
-          'openai-whisper not installed. Run: pip install openai-whisper\n' +
-            'Or set AUDIO_PROVIDER=gemini in .env and configure GOOGLE_GENERATIVE_AI_API_KEY.',
-        );
-      }
-      throw new Error(`Whisper detection failed: ${message}`);
-    }
-    const events = JSON.parse(stdout) as Array<{ time: number; event: string; confidence: number }>;
-    return events.map((e) => ({
-      time: e.time + chunkOffsetSec,
-      event: e.event,
-      confidence: e.confidence,
-      source: this.source,
-    }));
-  }
-}

package/src/services/audioAnalyzers/yamnet.ts DELETED Viewed

@@ -1,54 +0,0 @@
-import { execa } from 'execa';
-import { config } from '../../config/index.js';
-import type { AudioEvent } from '../../types/index.js';
-import { AudioAnalyzer } from './base.js';
-import { getPythonBin } from './whisper.js';
-/**
- * Uses YAMNet (TensorFlow Hub) via a Python script to classify audio frames
- * against a fixed set of game-relevant sound classes (gunshot, explosion, etc.).
- *
- * Requires: pip install tensorflow-hub soundfile numpy
- */
-export class YAMNetAudioAnalyzer extends AudioAnalyzer {
-  readonly source = 'yamnet' as const;
-  async detect(
-    audioPath: string,
-    _gameProfile: string,
-    chunkOffsetSec: number,
-    _chunkDurationSec: number,
-  ): Promise<AudioEvent[]> {
-    const python = await getPythonBin();
-    let stdout: string;
-    try {
-      const result = await execa(python, [
-        'scripts/detect_events.py',
-        audioPath,
-        String(config.AUDIO_CONFIDENCE_THRESHOLD),
-      ]);
-      stdout = result.stdout;
-    } catch (err) {
-      const message = err instanceof Error ? err.message : String(err);
-      if (message.includes('ModuleNotFoundError') || message.includes('No module named')) {
-        throw new Error(
-          'YAMNet dependencies missing. Run: pip3 install tensorflow-hub soundfile numpy\n' +
-            'Or set AUDIO_PROVIDER=gemini in .env and configure GOOGLE_GENERATIVE_AI_API_KEY.',
-        );
-      }
-      throw new Error(`YAMNet detection failed: ${message}`);
-    }
-    const events = JSON.parse(stdout) as Array<{ time: number; event: string; confidence: number }>;
-    return events.map((e) => ({
-      time: e.time + chunkOffsetSec,
-      event: e.event,
-      confidence: e.confidence,
-      source: this.source,
-    }));
-  }
-}

package/src/services/audioDownloader/index.ts DELETED Viewed

@@ -1,102 +0,0 @@
-import { execa } from 'execa';
-import * as fs from 'fs';
-import * as path from 'path';
-import { config } from '../../config/index.js';
-function displayProgress(stream: 'stdout' | 'stderr'): (data: Buffer | string) => void {
-  return (data: Buffer | string) => {
-    const text = String(data);
-    const lines = text.split('\n').filter((line) => line.trim());
-    for (const line of lines) {
-      const progressMatch = line.match(/\[download\]\s+(\d+\.?\d*%)/);
-      if (progressMatch) {
-        process.stdout.write(`\r${progressMatch[0]}`);
-      }
-    }
-  };
-}
-export async function downloadAudio(videoId: string, outputDir: string): Promise<string> {
-  if (!fs.existsSync(outputDir)) {
-    fs.mkdirSync(outputDir, { recursive: true });
-  }
-  const outputPath = path.join(outputDir, `${videoId}_audio.wav`);
-  if (fs.existsSync(outputPath)) {
-    console.log(`[audio] Using cached audio: ${outputPath}`);
-    return outputPath;
-  }
-  console.log(`[audio] Downloading audio for ${videoId}...`);
-  try {
-    const args = [
-      '-x',
-      '--audio-format',
-      'wav',
-      '--audio-quality',
-      '0',
-      '--postprocessor-args',
-      'ffmpeg:-ar 16000 -ac 1',
-      '-o',
-      outputPath,
-      '--newline',
-      `https://youtube.com/watch?v=${videoId}`,
-    ];
-    if (config.FFMPEG_PATH) {
-      args.unshift('--ffmpeg-location', config.FFMPEG_PATH);
-    }
-    if (config.YT_DLP_COOKIES_FROM_BROWSER) {
-      args.unshift('--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
-    } else if (config.YT_DLP_COOKIES_FILE) {
-      args.unshift('--cookies', config.YT_DLP_COOKIES_FILE);
-    }
-    const subprocess = execa('yt-dlp', args);
-    subprocess.stdout?.on('data', displayProgress('stdout'));
-    subprocess.stderr?.on('data', displayProgress('stderr'));
-    await subprocess;
-    process.stdout.write('\n');
-  } catch (err) {
-    const message = err instanceof Error ? err.message : String(err);
-    if (message.includes('command not found') || message.includes('ENOENT')) {
-      throw new Error('yt-dlp is required. Install it: https://github.com/yt-dlp/yt-dlp');
-    }
-    if (message.includes('ffprobe and ffmpeg not found') || message.includes('ffmpeg not found')) {
-      throw new Error(
-        'ffmpeg is required for audio processing. Install it: `brew install ffmpeg`' +
-          (config.FFMPEG_PATH
-            ? ''
-            : '\nOr set FFMPEG_PATH in your .env to your ffmpeg binary location.'),
-      );
-    }
-    if (message.includes('Private video')) {
-      throw new Error(`Video "${videoId}" is private and cannot be downloaded.`);
-    }
-    if (message.includes('Sign in to confirm') || message.includes('confirm you')) {
-      throw new Error(
-        `yt-dlp was blocked by YouTube bot detection for "${videoId}". ` +
-          `Set YT_DLP_COOKIES_FROM_BROWSER=chrome (or firefox/safari) in your .env to authenticate.`,
-      );
-    }
-    if (message.includes('not available in your country') || message.includes('geo')) {
-      throw new Error(`Video "${videoId}" is geo-blocked in your region.`);
-    }
-    throw new Error(`Audio download failed: ${message}`);
-  }
-  console.log(`[audio] Audio saved to ${outputPath}`);
-  return outputPath;
-}

package/src/services/chunkBuilder/index.ts DELETED Viewed

@@ -1,82 +0,0 @@
-import type { TranscriptLine, MicroBlock, LLMChunk } from '../../types/index.js';
-/**
- * Groups raw transcript lines into micro-blocks of approximately `windowSec` seconds.
- *
- * A new block starts whenever the next line's `start` falls outside the current window.
- * Empty input returns an empty array.
- */
-export function buildMicroBlocks(lines: TranscriptLine[], windowSec: number): MicroBlock[] {
-  if (lines.length === 0) return [];
-  const blocks: MicroBlock[] = [];
-  let windowStart = lines[0].start;
-  let texts: string[] = [];
-  for (const line of lines) {
-    if (line.start >= windowStart + windowSec) {
-      blocks.push({
-        start: windowStart,
-        end: line.start,
-        text: texts.join(' '),
-      });
-      windowStart = line.start;
-      texts = [];
-    }
-    texts.push(line.text);
-  }
-  if (texts.length > 0) {
-    const lastLine = lines[lines.length - 1];
-    blocks.push({
-      start: windowStart,
-      end: lastLine.start + lastLine.duration,
-      text: texts.join(' '),
-    });
-  }
-  return blocks;
-}
-/**
- * Groups micro-blocks into larger LLM analysis chunks using a sliding window
- * of `chunkLen` seconds with `overlap` seconds of overlap between consecutive chunks.
- *
- * Each chunk spans from the first block whose `start >= chunkStart` to the last block
- * whose `start < chunkStart + chunkLen`. The next chunk starts at
- * `chunkStart + chunkLen - overlap`.
- *
- * Empty input returns an empty array.
- */
-export function buildLLMChunks(
-  blocks: MicroBlock[],
-  chunkLen: number,
-  overlap: number,
-): LLMChunk[] {
-  if (blocks.length === 0) return [];
-  const chunks: LLMChunk[] = [];
-  const totalEnd = blocks[blocks.length - 1].end;
-  let chunkStart = blocks[0].start;
-  while (chunkStart < totalEnd) {
-    const chunkEnd = chunkStart + chunkLen;
-    const window = blocks.filter((b) => b.start >= chunkStart && b.start < chunkEnd);
-    if (window.length > 0) {
-      chunks.push({
-        start: window[0].start,
-        end: window[window.length - 1].end,
-        text: window.map((b) => b.text).join(' '),
-      });
-    }
-    const step = chunkLen - overlap;
-    chunkStart += step;
-    if (step <= 0) break;
-  }
-  return chunks;
-}