npm - @thunderkiller/video-clipper - Versions diffs - 1.2.0 → 1.3.1 - Mend

@thunderkiller/video-clipper 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

package/CHANGELOG.md +13 -0
package/LICENSE +15 -0
package/package.json +1 -1
package/.github/workflows/ci.yml +0 -42
package/.github/workflows/release.yml +0 -76
package/.husky/pre-commit +0 -3
package/.prettierignore +0 -6
package/.prettierrc +0 -7
package/.releaserc.json +0 -21
package/AGENTS.md +0 -122
package/docs/free-models.md +0 -78
package/docs/plan.md +0 -442
package/docs/refactorPhases.md +0 -105
package/docs/yt-downloader.md +0 -440
package/requirements.txt +0 -5
package/scripts/detect_events.py +0 -81
package/scripts/detect_events_whisper.py +0 -101
package/scripts/transcribe_whisper.py +0 -70
package/src/cli.ts +0 -186
package/src/config/env.ts +0 -18
package/src/config/index.ts +0 -2
package/src/index.ts +0 -46
package/src/pipeline/runner.ts +0 -147
package/src/pipeline/stages/audioProcessor.ts +0 -127
package/src/pipeline/stages/clipExporter.ts +0 -76
package/src/pipeline/stages/segmentAnalyzer.ts +0 -72
package/src/pipeline/stages/segmentSelector.ts +0 -39
package/src/pipeline/stages/videoResolver.ts +0 -44
package/src/services/audioAnalyzers/base.ts +0 -32
package/src/services/audioAnalyzers/factory.ts +0 -69
package/src/services/audioAnalyzers/gemini.ts +0 -136
package/src/services/audioAnalyzers/index.ts +0 -6
package/src/services/audioAnalyzers/whisper.ts +0 -80
package/src/services/audioAnalyzers/yamnet.ts +0 -54
package/src/services/audioDownloader/index.ts +0 -102
package/src/services/chunkBuilder/index.ts +0 -82
package/src/services/clipGenerator/index.ts +0 -210
package/src/services/clipRefiner/index.ts +0 -141
package/src/services/eventDetector/index.ts +0 -68
package/src/services/llmAnalyzer/LLMAnalyzer.ts +0 -98
package/src/services/llmAnalyzer/index.ts +0 -231
package/src/services/metadataExtractor/index.ts +0 -83
package/src/services/segmentRanker/index.ts +0 -88
package/src/services/signalMerger/index.ts +0 -53
package/src/services/transcriptAnalyzers/base.ts +0 -26
package/src/services/transcriptAnalyzers/factory.ts +0 -66
package/src/services/transcriptAnalyzers/gemini.ts +0 -24
package/src/services/transcriptAnalyzers/index.ts +0 -6
package/src/services/transcriptAnalyzers/whisper.ts +0 -68
package/src/services/transcriptAnalyzers/ytdlp.ts +0 -19
package/src/services/transcriptDetector/index.ts +0 -122
package/src/services/transcriptFetcher/index.ts +0 -147
package/src/services/urlParser/index.ts +0 -52
package/src/services/videoDownloader/index.ts +0 -268
package/src/types/analyzer.ts +0 -23
package/src/types/audio.ts +0 -19
package/src/types/cache.ts +0 -8
package/src/types/cli.ts +0 -22
package/src/types/config.ts +0 -151
package/src/types/downloader.ts +0 -15
package/src/types/factory.ts +0 -3
package/src/types/index.ts +0 -40
package/src/types/pipeline.ts +0 -60
package/src/types/segment.ts +0 -43
package/src/types/transcript.ts +0 -22
package/src/types/video.ts +0 -18
package/src/utils/cache.ts +0 -224
package/src/utils/chunker.ts +0 -60
package/src/utils/dumper.ts +0 -41
package/src/utils/format.ts +0 -10
package/src/utils/logger.ts +0 -17
package/src/utils/modelFactory.ts +0 -71
package/src/utils/redactConfig.ts +0 -23
package/src/utils/sliceAudio.ts +0 -35
package/test-trigger.txt +0 -1
package/tests/analyzerFactory.test.ts +0 -146
package/tests/audioEventDetector.test.ts +0 -69
package/tests/cache.test.ts +0 -203
package/tests/chunkBuilder.test.ts +0 -146
package/tests/chunker.test.ts +0 -95
package/tests/eventDetector.test.ts +0 -103
package/tests/llmAnalyzer.test.ts +0 -283
package/tests/segmentRanker.test.ts +0 -133
package/tests/setup.ts +0 -48
package/tests/signalMerger.test.ts +0 -197
package/tests/transcriptDetector.test.ts +0 -150
package/tests/transcriptFetcher.test.ts +0 -179
package/tests/urlParser.test.ts +0 -70
package/tsconfig.json +0 -16
package/tsconfig.test.json +0 -8
package/vitest.config.ts +0 -8

package/src/services/clipGenerator/index.ts DELETED Viewed

@@ -1,210 +0,0 @@
-import ffmpeg from 'fluent-ffmpeg';
-import { promises as fs } from 'fs';
-import { join } from 'path';
-import pLimit from 'p-limit';
-import { config } from '../../config/index.js';
-import { log } from '../../utils/logger.js';
-import { formatSeconds } from '../../utils/format.js';
-import type { RankedSegment } from '../../types/index.js';
-if (config.FFMPEG_PATH) {
-  ffmpeg.setFfmpegPath(config.FFMPEG_PATH);
-}
-if (config.FFPROBE_PATH) {
-  ffmpeg.setFfprobePath(config.FFPROBE_PATH);
-}
-/**
- * Cuts a single clip from a video file using fluent-ffmpeg.
- * Re-encodes with libx264 (video) and aac (audio) for perfect audio/video sync.
- *
- * @returns The output file path on success
- * @throws {Error} if ffmpeg fails
- */
-function cutClip(
-  videoPath: string,
-  start: number,
-  end: number,
-  outputPath: string,
-): Promise<string> {
-  const adjustedStart = Math.max(0, start + config.TIMESTAMP_OFFSET_SECONDS);
-  const adjustedEnd = Math.max(adjustedStart + 1, end + config.TIMESTAMP_OFFSET_SECONDS);
-  const duration = adjustedEnd - adjustedStart;
-  log.info(
-    `Cutting clip: start=${adjustedStart.toFixed(2)}s, end=${adjustedEnd.toFixed(2)}s, duration=${duration.toFixed(2)}s`,
-  );
-  if (config.TIMESTAMP_OFFSET_SECONDS !== 0) {
-    log.info(`  Timestamp offset applied: ${config.TIMESTAMP_OFFSET_SECONDS}s`);
-  }
-  return new Promise((resolve, reject) => {
-    ffmpeg(videoPath)
-      .setStartTime(adjustedStart)
-      .setDuration(duration)
-      .outputOptions('-c:v', 'libx264')
-      .outputOptions('-preset', config.FFMPEG_PRESET)
-      .outputOptions('-c:a', 'aac')
-      .output(outputPath)
-      .on('end', () => resolve(outputPath))
-      .on('error', (err: Error) => reject(err))
-      .run();
-  });
-}
-/**
- * Copies a pre-downloaded segment file to the output directory.
- * Used when videos are downloaded via --download-sections segments mode.
- */
-async function copySegment(
-  sourcePath: string,
-  outputPath: string,
-  customPath?: string,
-): Promise<string> {
-  const outputDir = customPath || config.OUTPUT_DIR;
-  const finalOutputPath = join(outputDir, outputPath.split('/').pop() || '');
-  await fs.copyFile(sourcePath, finalOutputPath);
-  return finalOutputPath;
-}
-/**
- * Generates video clips for each ranked segment using fluent-ffmpeg.
- *
- * - Auto-creates the output directory if it doesn't exist.
- * - Runs clips with controlled concurrency via p-limit.
- * - Logs a warning per failed clip; never aborts the entire run.
- * - Re-encodes with libx264/aac for accurate audio/video sync.
- *
- * @param videoPath - Local path to the downloaded mp4
- * @param segments - Ranked segments to cut
- * @param videoId - Used to name output files
- * @param customPath - Custom output directory (optional, overrides OUTPUT_DIR)
- * @param concurrency - Maximum number of parallel clip operations (default: 1)
- * @returns Array of successfully written clip file paths
- * @throws {Error} if ffmpeg is not installed
- */
-export async function generateClips(
-  videoPath: string,
-  segments: RankedSegment[],
-  videoId: string,
-  customPath?: string,
-  concurrency: number = 1,
-): Promise<string[]> {
-  const outputDir = customPath || config.OUTPUT_DIR;
-  await fs.mkdir(outputDir, { recursive: true });
-  if (segments.length === 0) {
-    log.warn('No segments provided to generateClips — nothing to cut.');
-    return [];
-  }
-  const limit = pLimit(concurrency);
-  log.info(
-    `Generating ${segments.length} clip${segments.length !== 1 ? 's' : ''} from local video (max ${concurrency} parallel)...`,
-  );
-  const jobs = segments.map((segment, i) =>
-    limit(async () => {
-      const startInt = Math.floor(segment.start);
-      const endInt = Math.ceil(segment.end);
-      const outputPath = join(outputDir, `${videoId}_${startInt}_${endInt}.mp4`);
-      log.info(
-        `Cutting clip: ${outputPath} (${formatSeconds(startInt)} – ${formatSeconds(endInt)})`,
-      );
-      return cutClip(videoPath, segment.start, segment.end, outputPath);
-    }),
-  );
-  const results = await Promise.allSettled(jobs);
-  const paths: string[] = [];
-  for (let i = 0; i < results.length; i++) {
-    const result = results[i];
-    const segment = segments[i];
-    if (result.status === 'fulfilled') {
-      log.info(`Clip ready: ${result.value}`);
-      paths.push(result.value);
-    } else {
-      const reason = result.reason instanceof Error ? result.reason.message : String(result.reason);
-      log.warn(
-        `Failed to cut clip for segment [${formatSeconds(segment.start)} – ${formatSeconds(segment.end)}] (rank ${segment.rank}): ${reason}`,
-      );
-    }
-  }
-  return paths;
-}
-/**
- * Organizes pre-downloaded segment files from downloads/ to outputs/.
- * Used when videos are downloaded via --download-sections segments mode.
- *
- * @param sourcePaths - Paths to the pre-downloaded segment files in downloads/
- * @param videoId - Used to verify file naming
- * @param customPath - Custom output directory (optional, overrides OUTPUT_DIR)
- * @param concurrency - Maximum number of parallel copy operations (default: 1)
- * @returns Array of organized clip file paths in outputs/
- */
-export async function organizeClips(
-  sourcePaths: string[],
-  videoId: string,
-  customPath?: string,
-  concurrency: number = 1,
-): Promise<string[]> {
-  if (sourcePaths.length === 0) {
-    log.warn('No pre-downloaded segments to organize.');
-    return [];
-  }
-  const outputDir = customPath || config.OUTPUT_DIR;
-  await fs.mkdir(outputDir, { recursive: true });
-  const limit = pLimit(concurrency);
-  log.info(
-    `Organizing ${sourcePaths.length} clip${sourcePaths.length !== 1 ? 's' : ''} (max ${concurrency} parallel)...`,
-  );
-  const jobs = sourcePaths.map((sourcePath) =>
-    limit(async () => {
-      const filename = sourcePath.split('/').pop() || '';
-      const outputPath = join(outputDir, filename);
-      log.info(`Organizing clip: ${outputPath}`);
-      return copySegment(sourcePath, outputPath, customPath);
-    }),
-  );
-  const results = await Promise.allSettled(jobs);
-  const paths: string[] = [];
-  for (let i = 0; i < results.length; i++) {
-    const result = results[i];
-    if (result.status === 'fulfilled') {
-      log.info(`Clip ready: ${result.value}`);
-      paths.push(result.value);
-    } else {
-      const sourcePath = sourcePaths[i];
-      const reason = result.reason instanceof Error ? result.reason.message : String(result.reason);
-      log.warn(`Failed to organize clip ${sourcePath}: ${reason}`);
-    }
-  }
-  return paths;
-}
-/**
- * Probes ffmpeg availability by running `ffmpeg -version`.
- * @throws {Error} with an actionable install message if ffmpeg is not found
- */
-async function verifyFfmpeg(): Promise<void> {
-  await new Promise<void>((resolve, reject) => {
-    ffmpeg.getAvailableFormats((err) => {
-      if (err) {
-        reject(new Error('ffmpeg is required for clip generation. Install it first.'));
-      } else {
-        resolve();
-      }
-    });
-  });
-}

package/src/services/clipRefiner/index.ts DELETED Viewed

@@ -1,141 +0,0 @@
-import { generateObject } from 'ai';
-import pLimit from 'p-limit';
-import { z } from 'zod';
-import { config } from '../../config/index.js';
-import { log } from '../../utils/logger.js';
-import { getModel } from '../../utils/modelFactory.js';
-import { Cache } from '../../utils/cache.js';
-import type { RankedSegment, MicroBlock } from '../../types/index.js';
-const CONTEXT_PADDING_SEC = 30;
-const RefinedBoundariesSchema = z.object({
-  clip_start: z.number().describe('Refined clip start time in seconds'),
-  clip_end: z.number().describe('Refined clip end time in seconds'),
-});
-/**
- * Extracts the micro-block text that falls within a context window
- * around the segment, padded by CONTEXT_PADDING_SEC on each side.
- */
-function buildContextText(
-  segment: RankedSegment,
-  allBlocks: MicroBlock[],
-): { text: string; windowStart: number; windowEnd: number } {
-  const windowStart = Math.max(0, segment.start - CONTEXT_PADDING_SEC);
-  const windowEnd = segment.end + CONTEXT_PADDING_SEC;
-  const contextBlocks = allBlocks.filter((b) => b.end > windowStart && b.start < windowEnd);
-  return {
-    text: contextBlocks.map((b) => b.text).join(' '),
-    windowStart,
-    windowEnd,
-  };
-}
-/**
- * Builds the refinement prompt for a single ranked segment.
- */
-function buildPrompt(
-  segment: RankedSegment,
-  contextText: string,
-  windowStart: number,
-  windowEnd: number,
-): string {
-  return `You are a video editor refining clip boundaries.
-Goal: tighten the clip so it starts just before the interesting moment begins
-and ends just after it concludes, giving it a natural entry and exit point.
-Avoid cutting in the middle of a sentence.
-Current clip:
-START: ${segment.start}s
-END: ${segment.end}s
-REASON: ${segment.reason}
-Broader transcript context (${windowStart}s – ${windowEnd}s):
-${contextText}
-Rules:
-- clip_start must be >= ${windowStart}
-- clip_end must be <= ${windowEnd}
-- clip_start must be less than clip_end
-- Only make small adjustments (seconds, not minutes)`;
-}
-/**
- * Refines the boundaries of a single ranked segment via a second LLM pass.
- * Returns the segment with updated start/end if successful.
- * Falls back to the original boundaries on failure.
- */
-async function refineSegment(
-  segment: RankedSegment,
-  allBlocks: MicroBlock[],
-  noCache: boolean,
-): Promise<RankedSegment> {
-  const cache = new Cache(config.CACHE_DIR);
-  if (!noCache) {
-    const cached = await cache.readSegmentRefinement(segment.start, segment.end, segment.reason);
-    if (cached) {
-      log.info(`[segment] cache hit (rank=${segment.rank})`);
-      return { ...segment, start: cached.refined_start, end: cached.refined_end };
-    }
-  }
-  const { text, windowStart, windowEnd } = buildContextText(segment, allBlocks);
-  const { object } = await generateObject({
-    model: getModel(),
-    schema: RefinedBoundariesSchema,
-    prompt: buildPrompt(segment, text, windowStart, windowEnd),
-    maxRetries: config.LLM_MAX_RETRIES,
-  });
-  /** Clamp to context window to prevent LLM from hallucinating out-of-range values */
-  const refinedStart = Math.max(windowStart, Math.min(object.clip_start, object.clip_end - 1));
-  const refinedEnd = Math.min(windowEnd, Math.max(object.clip_end, object.clip_start + 1));
-  if (!noCache) {
-    await cache.writeSegmentRefinement(segment.start, segment.end, segment.reason, {
-      refined_start: refinedStart,
-      refined_end: refinedEnd,
-    });
-  }
-  return { ...segment, start: refinedStart, end: refinedEnd };
-}
-/**
- * Runs a second LLM pass on all ranked segments in parallel to tighten clip boundaries.
- * Segments that fail refinement retain their original boundaries.
- *
- * @returns RankedSegment[] with refined (or original) start/end values
- */
-export async function refineSegments(
-  segments: RankedSegment[],
-  allBlocks: MicroBlock[],
-  concurrency: number,
-  noCache = false,
-): Promise<RankedSegment[]> {
-  log.info(
-    `Refining boundaries for ${segments.length} segment${segments.length !== 1 ? 's' : ''} (max ${concurrency} parallel)...`,
-  );
-  const limit = pLimit(concurrency);
-  const results = await Promise.allSettled(
-    segments.map((segment) => limit(() => refineSegment(segment, allBlocks, noCache))),
-  );
-  const refined = results.map((result, i) => {
-    if (result.status === 'fulfilled') {
-      return result.value;
-    }
-    const reason = result.reason instanceof Error ? result.reason.message : String(result.reason);
-    log.warn(`[segment rank=${segments[i].rank}] refinement skipped: ${reason}`);
-    return segments[i];
-  });
-  log.info(`Refinement complete`);
-  return refined;
-}

package/src/services/eventDetector/index.ts DELETED Viewed

@@ -1,68 +0,0 @@
-import { log } from '../../utils/logger.js';
-import type { AudioAnalyzer } from '../audioAnalyzers/index.js';
-import type { AudioEvent } from '../../types/index.js';
-/**
- * Top-level audio event detector.
- *
- * Holds an ordered chain of AudioAnalyzer instances and walks the chain on each
- * `detect()` call: the first analyzer that succeeds wins. If an analyzer throws,
- * the error is logged and the next analyzer in the chain is tried. If the entire
- * chain is exhausted without success the error from the last analyzer is re-thrown.
- *
- * The chain is built once at startup via `createAnalyzerChain(config.AUDIO_PROVIDER)`
- * and injected here, keeping provider-selection logic out of this class.
- *
- * @example
- *   const chain    = createAnalyzerChain('gemini,whisper');
- *   const detector = new EventDetector(chain);
- *   const events   = await detector.detect(audioPath, 'valorant', 0, 120);
- */
-export class EventDetector {
-  constructor(private readonly chain: AudioAnalyzer[]) {
-    if (chain.length === 0) {
-      throw new Error('EventDetector requires at least one AudioAnalyzer in the chain.');
-    }
-  }
-  /**
-   * Detect audio events by walking the analyzer chain in order.
-   * Falls back to the next analyzer whenever one throws.
-   */
-  async detect(
-    audioPath: string,
-    gameProfile: string,
-    chunkOffsetSec: number,
-    chunkDurationSec: number,
-  ): Promise<AudioEvent[]> {
-    let lastError: unknown;
-    for (let i = 0; i < this.chain.length; i++) {
-      const analyzer = this.chain[i];
-      const isLast = i === this.chain.length - 1;
-      try {
-        const events = await analyzer.detect(
-          audioPath,
-          gameProfile,
-          chunkOffsetSec,
-          chunkDurationSec,
-        );
-        log.info(`[audio:${analyzer.source}] detected ${events.length} events`);
-        return events;
-      } catch (err) {
-        lastError = err;
-        const message = err instanceof Error ? err.message : String(err);
-        if (!isLast) {
-          const nextSource = this.chain[i + 1].source;
-          log.warn(`[audio:${analyzer.source}] failed, falling back to ${nextSource}: ${message}`);
-        } else {
-          log.error(`[audio:${analyzer.source}] failed (no more fallbacks): ${message}`);
-        }
-      }
-    }
-    throw lastError;
-  }
-}

package/src/services/llmAnalyzer/LLMAnalyzer.ts DELETED Viewed

@@ -1,98 +0,0 @@
-import { analyzeChunks } from './index.js';
-import { refineSegments } from '../clipRefiner/index.js';
-import { log } from '../../utils/logger.js';
-import { config } from '../../config/index.js';
-import type { TranscriptDetector } from '../transcriptDetector/index.js';
-import type { Cache } from '../../utils/cache.js';
-import type {
-  TranscriptLine,
-  MicroBlock,
-  LLMChunk,
-  AudioEvent,
-  ChunkEvaluation,
-  RankedSegment,
-  LLMAnalyzerResult,
-  LLMAnalyzerOpts,
-} from '../../types/index.js';
-/**
- * LLMAnalyzer — orchestrates transcript fetching + LLM-based segment analysis.
- *
- * Owns a TranscriptDetector (which encapsulates the provider chain) and the
- * Cache. Audio events are provided externally — they are pre-computed by the
- * AudioProcessor stage so that the full per-chunk caching logic stays in
- * audioProcessor.ts and is not duplicated here.
- *
- * LLM pass 1 (`analyze`) — fetches transcript, runs chunk analysis.
- * LLM pass 2 (`refine`)  — tightens clip boundaries on ranked segments.
- *
- * The free functions in `llmAnalyzer/index.ts` and `clipRefiner/index.ts` are
- * NOT modified — this class wraps them.
- *
- * @example
- *   const analyzer  = new LLMAnalyzer(transcriptDetector, cache);
- *   const result    = await analyzer.analyze({ videoId, audioPath, audioEvents, ... });
- *   // ... ranking step ...
- *   const refined   = await analyzer.refine(rankedSegments, result.microBlocks, opts);
- */
-export class LLMAnalyzer {
-  constructor(
-    private readonly transcriptDetector: TranscriptDetector,
-    private readonly cache: Cache,
-  ) {}
-  /**
-   * LLM pass 1 — fetch transcript then run chunk analysis.
-   *
-   * Returns lines, microBlocks, chunks, and chunkEvals so the caller has
-   * everything needed for the ranking step.
-   */
-  async analyze(opts: LLMAnalyzerOpts): Promise<LLMAnalyzerResult> {
-    const { lines, microBlocks, chunks } = await this.transcriptDetector.detect(
-      opts.videoId,
-      opts.audioPath,
-      this.cache,
-    );
-    const chunkLimit = opts.maxChunks ?? config.MAX_CHUNKS;
-    const chunksToAnalyze = chunkLimit !== undefined ? chunks.slice(0, chunkLimit) : chunks;
-    if (chunkLimit !== undefined) {
-      log.info(
-        `Limiting evaluation to ${chunksToAnalyze.length} of ${chunks.length} chunks (--max-chunks ${chunkLimit})`,
-      );
-    }
-    log.info(
-      `Analyzing chunks with ${config.LLM_MODEL} (${chunksToAnalyze.length} chunks, max ${opts.maxParallel} parallel)...`,
-    );
-    const chunkEvals = await analyzeChunks(
-      chunksToAnalyze,
-      lines,
-      opts.audioEvents,
-      opts.maxParallel,
-      opts.noCache,
-    );
-    const succeededCount = chunkEvals.filter((e) => e.status === 'success').length;
-    if (succeededCount === 0) {
-      throw new Error('All chunks failed LLM analysis. Check your API key and model config.');
-    }
-    return { lines, microBlocks, chunks, chunkEvals };
-  }
-  /**
-   * LLM pass 2 — tighten clip boundaries on already-ranked segments.
-   * Must be called after ranking, since it takes `RankedSegment[]` as input.
-   */
-  async refine(
-    rankedSegments: RankedSegment[],
-    microBlocks: MicroBlock[],
-    opts: Pick<LLMAnalyzerOpts, 'maxParallel' | 'noCache'>,
-  ): Promise<RankedSegment[]> {
-    log.info('Refining clip boundaries...');
-    return refineSegments(rankedSegments, microBlocks, opts.maxParallel, opts.noCache);
-  }
-}