npm - @thunderkiller/video-clipper - Versions diffs - 1.1.0 - Mend

@thunderkiller/video-clipper 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/.env.example +130 -0
package/.github/workflows/ci.yml +42 -0
package/.github/workflows/release.yml +72 -0
package/.husky/pre-commit +3 -0
package/.prettierignore +6 -0
package/.prettierrc +7 -0
package/.releaserc.json +21 -0
package/AGENTS.md +122 -0
package/CHANGELOG.md +45 -0
package/README.md +410 -0
package/dist/cli.js +187 -0
package/dist/config/env.js +14 -0
package/dist/config/index.js +1 -0
package/dist/index.js +35 -0
package/dist/pipeline/runner.js +132 -0
package/dist/pipeline/stages/audioProcessor.js +75 -0
package/dist/pipeline/stages/clipExporter.js +44 -0
package/dist/pipeline/stages/segmentAnalyzer.js +46 -0
package/dist/pipeline/stages/segmentSelector.js +23 -0
package/dist/pipeline/stages/videoResolver.js +34 -0
package/dist/services/audioAnalyzers/base.js +13 -0
package/dist/services/audioAnalyzers/factory.js +56 -0
package/dist/services/audioAnalyzers/gemini.js +109 -0
package/dist/services/audioAnalyzers/index.js +5 -0
package/dist/services/audioAnalyzers/whisper.js +62 -0
package/dist/services/audioAnalyzers/yamnet.js +40 -0
package/dist/services/audioDownloader/index.js +81 -0
package/dist/services/chunkBuilder/index.js +71 -0
package/dist/services/clipGenerator/index.js +156 -0
package/dist/services/clipRefiner/index.js +103 -0
package/dist/services/eventDetector/index.js +54 -0
package/dist/services/llmAnalyzer/LLMAnalyzer.js +63 -0
package/dist/services/llmAnalyzer/index.js +173 -0
package/dist/services/metadataExtractor/index.js +66 -0
package/dist/services/segmentRanker/index.js +40 -0
package/dist/services/signalMerger/index.js +36 -0
package/dist/services/transcriptAnalyzers/base.js +13 -0
package/dist/services/transcriptAnalyzers/factory.js +51 -0
package/dist/services/transcriptAnalyzers/gemini.js +19 -0
package/dist/services/transcriptAnalyzers/index.js +5 -0
package/dist/services/transcriptAnalyzers/whisper.js +55 -0
package/dist/services/transcriptAnalyzers/ytdlp.js +16 -0
package/dist/services/transcriptDetector/index.js +102 -0
package/dist/services/transcriptFetcher/index.js +124 -0
package/dist/services/urlParser/index.js +46 -0
package/dist/services/videoDownloader/index.js +212 -0
package/dist/types/audio.js +15 -0
package/dist/types/cli.js +1 -0
package/dist/types/config.js +150 -0
package/dist/types/index.js +5 -0
package/dist/types/pipeline.js +9 -0
package/dist/types/segment.js +36 -0
package/dist/types/transcript.js +16 -0
package/dist/types/video.js +14 -0
package/dist/utils/cache.js +143 -0
package/dist/utils/chunker.js +51 -0
package/dist/utils/dumper.js +36 -0
package/dist/utils/format.js +10 -0
package/dist/utils/logger.js +16 -0
package/dist/utils/modelFactory.js +60 -0
package/dist/utils/redactConfig.js +20 -0
package/dist/utils/sliceAudio.js +26 -0
package/docs/free-models.md +78 -0
package/docs/plan.md +442 -0
package/docs/refactorPhases.md +105 -0
package/docs/yt-downloader.md +440 -0
package/package.json +65 -0
package/requirements.txt +5 -0
package/scripts/detect_events.py +81 -0
package/scripts/detect_events_whisper.py +101 -0
package/scripts/transcribe_whisper.py +70 -0
package/src/cli.ts +186 -0
package/src/config/env.ts +18 -0
package/src/config/index.ts +2 -0
package/src/index.ts +46 -0
package/src/pipeline/runner.ts +155 -0
package/src/pipeline/stages/audioProcessor.ts +129 -0
package/src/pipeline/stages/clipExporter.ts +80 -0
package/src/pipeline/stages/segmentAnalyzer.ts +72 -0
package/src/pipeline/stages/segmentSelector.ts +39 -0
package/src/pipeline/stages/videoResolver.ts +47 -0
package/src/services/audioAnalyzers/base.ts +32 -0
package/src/services/audioAnalyzers/factory.ts +71 -0
package/src/services/audioAnalyzers/gemini.ts +137 -0
package/src/services/audioAnalyzers/index.ts +6 -0
package/src/services/audioAnalyzers/whisper.ts +80 -0
package/src/services/audioAnalyzers/yamnet.ts +54 -0
package/src/services/audioDownloader/index.ts +102 -0
package/src/services/chunkBuilder/index.ts +86 -0
package/src/services/clipGenerator/index.ts +210 -0
package/src/services/clipRefiner/index.ts +141 -0
package/src/services/eventDetector/index.ts +68 -0
package/src/services/llmAnalyzer/LLMAnalyzer.ts +114 -0
package/src/services/llmAnalyzer/index.ts +231 -0
package/src/services/metadataExtractor/index.ts +83 -0
package/src/services/segmentRanker/index.ts +88 -0
package/src/services/signalMerger/index.ts +53 -0
package/src/services/transcriptAnalyzers/base.ts +26 -0
package/src/services/transcriptAnalyzers/factory.ts +67 -0
package/src/services/transcriptAnalyzers/gemini.ts +24 -0
package/src/services/transcriptAnalyzers/index.ts +6 -0
package/src/services/transcriptAnalyzers/whisper.ts +68 -0
package/src/services/transcriptAnalyzers/ytdlp.ts +19 -0
package/src/services/transcriptDetector/index.ts +128 -0
package/src/services/transcriptFetcher/index.ts +151 -0
package/src/services/urlParser/index.ts +53 -0
package/src/services/videoDownloader/index.ts +282 -0
package/src/types/audio.ts +19 -0
package/src/types/cli.ts +22 -0
package/src/types/config.ts +174 -0
package/src/types/index.ts +26 -0
package/src/types/pipeline.ts +93 -0
package/src/types/segment.ts +43 -0
package/src/types/transcript.ts +22 -0
package/src/types/video.ts +18 -0
package/src/utils/cache.ts +223 -0
package/src/utils/chunker.ts +60 -0
package/src/utils/dumper.ts +41 -0
package/src/utils/format.ts +10 -0
package/src/utils/logger.ts +17 -0
package/src/utils/modelFactory.ts +71 -0
package/src/utils/redactConfig.ts +23 -0
package/src/utils/sliceAudio.ts +35 -0
package/test-trigger.txt +1 -0
package/tests/analyzerFactory.test.ts +146 -0
package/tests/audioEventDetector.test.ts +69 -0
package/tests/cache.test.ts +203 -0
package/tests/chunkBuilder.test.ts +146 -0
package/tests/chunker.test.ts +95 -0
package/tests/eventDetector.test.ts +103 -0
package/tests/llmAnalyzer.test.ts +283 -0
package/tests/segmentRanker.test.ts +133 -0
package/tests/setup.ts +48 -0
package/tests/signalMerger.test.ts +197 -0
package/tests/transcriptDetector.test.ts +150 -0
package/tests/transcriptFetcher.test.ts +179 -0
package/tests/urlParser.test.ts +70 -0
package/tsconfig.json +16 -0
package/tsconfig.test.json +8 -0
package/vitest.config.ts +8 -0

package/dist/pipeline/runner.js ADDED Viewed

@@ -0,0 +1,132 @@
+import { promises as fs } from 'fs';
+import { config } from '../config/index.js';
+import { Cache } from '../utils/cache.js';
+import { log } from '../utils/logger.js';
+import { dumpAnalysis, dumpTranscript } from '../utils/dumper.js';
+import { resolveVideo } from './stages/videoResolver.js';
+import { processAudio } from './stages/audioProcessor.js';
+import { analyzeSegments, refineRankedSegments } from './stages/segmentAnalyzer.js';
+import { selectSegments } from './stages/segmentSelector.js';
+import { exportClips } from './stages/clipExporter.js';
+import { downloadAudio } from '../services/audioDownloader/index.js';
+async function outputResult(result, outputJsonPath) {
+    const json = JSON.stringify(result, null, 2);
+    if (outputJsonPath) {
+        await fs.writeFile(outputJsonPath, json, 'utf-8');
+        log.info(`Output written to ${outputJsonPath}`);
+    }
+    else {
+        console.log('\n' + json);
+    }
+}
+/**
+ * Runs the full video-clipper pipeline for the given CLI arguments.
+ *
+ * Stage ordering:
+ *   1. resolveVideo         — parse URL, extract video ID + metadata
+ *   2. downloadAudio        — download WAV so Whisper/Gemini transcript providers can use it
+ *   3. processAudio         — detect audio events per window (reuses downloaded WAV)
+ *   4a. analyzeSegments     — fetch transcript + LLM pass 1 (informed by audio events)
+ *   5. selectSegments       — merge signals, rank, threshold filter
+ *   4b. refineRankedSegments — LLM pass 2 to tighten clip boundaries
+ *   6. exportClips          — download video + run ffmpeg (only if --clip)
+ *
+ * downloadAudio runs before analyzeSegments so that `audioPath` is available
+ * for Whisper/Gemini transcript providers. processAudio reuses the same WAV.
+ *
+ * Hard errors (invalid URL, transcript failure, all LLM chunks failed) are
+ * thrown so the caller can catch, log, and exit(1). Soft failures (audio
+ * detection, individual clip failures) are logged as warnings and the pipeline
+ * continues.
+ */
+export async function runPipeline(args) {
+    const threshold = args.threshold ?? config.SCORE_THRESHOLD;
+    const topN = args.topN ?? config.TOP_N_SEGMENTS;
+    const gameProfile = args.gameProfile ?? config.GAME_PROFILE;
+    const maxParallel = args.maxParallel ?? config.LLM_CONCURRENCY;
+    const cache = new Cache(config.CACHE_DIR, args.noCache);
+    // ── Stage 1: Resolve video ID + metadata ─────────────────────────────────
+    const { videoId, metadata } = await resolveVideo(args.url, args.maxDuration);
+    // ── Stage 2: Download audio ───────────────────────────────────────────────
+    // Downloaded before transcript so Whisper/Gemini transcript providers can
+    // use the WAV. Returns null when audio detection is disabled.
+    let audioPath = null;
+    const audioEnabled = config.AUDIO_DETECTION_ENABLED && !args.noAudio;
+    if (audioEnabled) {
+        try {
+            audioPath = await downloadAudio(videoId, `${config.OUTPUT_DIR}/audio`);
+        }
+        catch (err) {
+            const message = err instanceof Error ? err.message : String(err);
+            log.warn(`Audio download failed — continuing without audio: ${message}`);
+        }
+    }
+    // ── Stage 3: Audio event detection ───────────────────────────────────────
+    const audioEvents = await processAudio(videoId, metadata.duration, cache, {
+        noAudio: args.noAudio,
+        gameProfile,
+        maxParallel,
+        audioPath,
+    });
+    // ── Stage 4a: Fetch transcript + LLM analysis (informed by audio events) ──
+    const { lines, microBlocks, chunkEvals } = await analyzeSegments(videoId, audioPath, audioEvents, cache, {
+        maxChunks: args.maxChunks,
+        maxParallel,
+        noCache: args.noCache,
+    });
+    if (config.DUMP_OUTPUTS) {
+        await dumpTranscript(videoId, lines);
+    }
+    // ── Stage 5: Merge signals + rank ─────────────────────────────────────────
+    const rankedSegments = selectSegments(chunkEvals, audioEvents, { threshold, topN });
+    // Build partial result for early-exit path (no segments above threshold)
+    const partialResult = {
+        video_id: videoId,
+        title: metadata.title,
+        duration: metadata.duration,
+        chunk_evaluations: chunkEvals,
+        segments: rankedSegments,
+    };
+    if (rankedSegments.length === 0) {
+        await outputResult(partialResult, args.outputJson);
+        if (config.DUMP_OUTPUTS)
+            await dumpAnalysis(videoId, partialResult);
+        return;
+    }
+    // ── Stage 4b: Refine clip boundaries (LLM pass 2) ─────────────────────────
+    const refinedSegments = await refineRankedSegments(rankedSegments, microBlocks, cache, {
+        maxParallel,
+        noCache: args.noCache,
+    });
+    // ── Output result ─────────────────────────────────────────────────────────
+    const result = {
+        video_id: videoId,
+        title: metadata.title,
+        duration: metadata.duration,
+        chunk_evaluations: chunkEvals,
+        segments: refinedSegments,
+    };
+    await outputResult(result, args.outputJson);
+    if (config.DUMP_OUTPUTS)
+        await dumpAnalysis(videoId, result);
+    log.info('Done.');
+    // ── Stage 6: Download + generate clips (only with --clip) ─────────────────
+    if (!args.clip) {
+        log.info('Tip: run with --clip to download the video and generate mp4 clips.');
+        return;
+    }
+    const clipPaths = await exportClips(videoId, refinedSegments, {
+        localVideo: args.localVideo,
+        downloadSections: args.downloadSections,
+        videoPath: args.videoPath,
+    });
+    if (clipPaths.length === 0) {
+        log.warn('No clips were generated successfully.');
+    }
+    else {
+        log.info(`Done — ${clipPaths.length} clip${clipPaths.length !== 1 ? 's' : ''} saved:`);
+        for (const p of clipPaths) {
+            log.info(`  ${p}`);
+        }
+    }
+}

package/dist/pipeline/stages/audioProcessor.js ADDED Viewed

@@ -0,0 +1,75 @@
+import { promises as fs } from 'fs';
+import pLimit from 'p-limit';
+import { downloadAudio } from '../../services/audioDownloader/index.js';
+import { createAnalyzerChain } from '../../services/audioAnalyzers/index.js';
+import { EventDetector } from '../../services/eventDetector/index.js';
+import { sliceAudio } from '../../utils/sliceAudio.js';
+import { buildWindows } from '../../utils/chunker.js';
+import { log } from '../../utils/logger.js';
+import { config } from '../../config/index.js';
+/**
+ * Stage 3 — Audio Processor
+ *
+ * Downloads audio-only WAV, slices it into chunks using the generic
+ * `buildWindows` utility, runs event detection on each slice via an
+ * EventDetector (constructed from the ordered provider chain in config),
+ * and persists the results to cache.
+ *
+ * The provider chain is built once per run from `config.AUDIO_PROVIDER`
+ * (e.g. "gemini,whisper") via `createAnalyzerChain`. The EventDetector
+ * walks the chain in order, falling back to the next analyzer on failure.
+ *
+ * Returns an empty array immediately when audio detection is disabled via
+ * `--no-audio` or the `AUDIO_DETECTION_ENABLED` config flag.
+ */
+export async function processAudio(videoId, duration, cache, opts) {
+    const audioEnabled = config.AUDIO_DETECTION_ENABLED && !opts.noAudio;
+    if (!audioEnabled)
+        return [];
+    // Cache-first
+    const cached = await cache.readAudioEvents(videoId, opts.gameProfile, config.AUDIO_PROVIDER);
+    if (cached) {
+        log.info(`[cache hit] Audio events loaded from cache (${cached.length} events)`);
+        return cached;
+    }
+    try {
+        const audioPath = opts.audioPath ?? (await downloadAudio(videoId, `${config.OUTPUT_DIR}/audio`));
+        // Build the analyzer chain once per run from config
+        const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
+        const detector = new EventDetector(chain);
+        const providerNames = chain.map((a) => a.source).join(' → ');
+        log.info(`Detecting audio events (chain: ${providerNames}, profile: ${opts.gameProfile}, max ${opts.maxParallel} parallel)...`);
+        const windows = buildWindows(duration, config.CHUNK_LENGTH_SEC, config.CHUNK_OVERLAP_SEC);
+        const limit = pLimit(opts.maxParallel);
+        const results = await Promise.allSettled(windows.map((window) => limit(async () => {
+            log.info(`  Processing audio chunk ${window.start}s - ${window.end}s...`);
+            const cachedChunk = await cache.readAudioChunk(videoId, opts.gameProfile, config.AUDIO_PROVIDER, window.start, window.end);
+            if (cachedChunk) {
+                log.info(`  [cache hit] Audio chunk ${window.start}s - ${window.end}s (${cachedChunk.length} events)`);
+                return cachedChunk;
+            }
+            const slicePath = await sliceAudio(audioPath, window.start, window.end - window.start, config.OUTPUT_DIR);
+            const events = await detector.detect(slicePath, opts.gameProfile, window.start, window.end - window.start);
+            await fs.unlink(slicePath);
+            await cache.writeAudioChunk(videoId, opts.gameProfile, config.AUDIO_PROVIDER, window.start, window.end, events);
+            return events;
+        })));
+        const audioEvents = results
+            .flatMap((r, i) => {
+            if (r.status === 'fulfilled')
+                return r.value;
+            const w = windows[i];
+            log.warn(`  Audio event detection failed for chunk ${w.start}s - ${w.end}s: ${String(r.reason)}`);
+            return [];
+        })
+            .sort((a, b) => a.time - b.time);
+        log.info(`Audio event detection complete: ${audioEvents.length} events found`);
+        await cache.writeAudioEvents(videoId, opts.gameProfile, config.AUDIO_PROVIDER, audioEvents);
+        return audioEvents;
+    }
+    catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        log.warn(`Audio event detection disabled due to error: ${message}`);
+        return [];
+    }
+}

package/dist/pipeline/stages/clipExporter.js ADDED Viewed

@@ -0,0 +1,44 @@
+import { downloadVideo } from '../../services/videoDownloader/index.js';
+import { generateClips, organizeClips } from '../../services/clipGenerator/index.js';
+import { log } from '../../utils/logger.js';
+import { config } from '../../config/index.js';
+/**
+ * Stage 6 — Clip Exporter
+ *
+ * Handles all three clip-generation modes:
+ *   1. Local video  — user supplied --local-video; run ffmpeg directly
+ *   2. Segments     -- --download-sections N; download top-N clips via yt-dlp
+ *                      --download-sections, then copy to outputs/
+ *   3. Full video   — download full video with yt-dlp, then cut clips with ffmpeg
+ *
+ * @returns Array of absolute paths to the generated clip files.
+ */
+export async function exportClips(videoId, segments, opts) {
+    // Mode 1: local video already on disk — cut with ffmpeg
+    if (opts.localVideo) {
+        log.info(`Using local video: ${opts.localVideo}`);
+        return generateClips(opts.localVideo, segments, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
+    }
+    // Determine yt-dlp mode
+    const downloadSections = opts.downloadSections ?? config.DOWNLOAD_SECTIONS_MODE;
+    if (typeof downloadSections === 'number') {
+        // Mode 2: download only the top-N segments via --download-sections
+        const segmentsToDownload = segments.slice(0, downloadSections);
+        if (segmentsToDownload.length < downloadSections) {
+            log.warn(`Requested ${downloadSections} segments, but only ${segmentsToDownload.length} are available above threshold.`);
+        }
+        log.info(`Downloading ${segmentsToDownload.length} segments via yt-dlp --download-sections...`);
+        const downloadResult = await downloadVideo(videoId, 'segments', segmentsToDownload, opts.videoPath);
+        if (downloadResult.mode !== 'segments') {
+            throw new Error('Expected segments download result but got full-video result.');
+        }
+        return organizeClips(downloadResult.paths, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
+    }
+    // Mode 3: full-video download → cut clips with ffmpeg
+    log.info('Downloading full video via yt-dlp...');
+    const downloadResult = await downloadVideo(videoId, 'all', [], opts.videoPath);
+    if (downloadResult.mode !== 'all') {
+        throw new Error('Expected full-video download result but got segments result.');
+    }
+    return generateClips(downloadResult.path, segments, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
+}

package/dist/pipeline/stages/segmentAnalyzer.js ADDED Viewed

@@ -0,0 +1,46 @@
+import { LLMAnalyzer } from '../../services/llmAnalyzer/LLMAnalyzer.js';
+import { TranscriptDetector } from '../../services/transcriptDetector/index.js';
+import { createTranscriptChain } from '../../services/transcriptAnalyzers/index.js';
+import { refineSegments } from '../../services/clipRefiner/index.js';
+import { log } from '../../utils/logger.js';
+import { config } from '../../config/index.js';
+/**
+ * Stage 4a — Segment Analyzer (LLM pass 1)
+ *
+ * Builds a TranscriptDetector from config.TRANSCRIPT_PROVIDER and an
+ * LLMAnalyzer that owns it. Fetches the transcript (cache-first) and runs
+ * LLM chunk analysis informed by pre-computed audio events.
+ *
+ * Returns raw ChunkEvaluation results plus transcript data (lines, microBlocks,
+ * chunks) so the runner has everything it needs for ranking.
+ *
+ * NOTE: `processTranscript` no longer needs to run as a separate stage before
+ * this function — `LLMAnalyzer.analyze()` handles transcript fetching internally.
+ */
+export async function analyzeSegments(videoId, audioPath, audioEvents, cache, opts) {
+    log.info('Fetching transcript and analyzing segments...');
+    const chain = createTranscriptChain(config.TRANSCRIPT_PROVIDER);
+    const transcriptDetector = new TranscriptDetector(chain);
+    const analyzer = new LLMAnalyzer(transcriptDetector, cache);
+    const { lines, microBlocks, chunks, chunkEvals } = await analyzer.analyze({
+        videoId,
+        audioPath,
+        audioEvents,
+        maxChunks: opts.maxChunks,
+        maxParallel: opts.maxParallel,
+        noCache: opts.noCache,
+    });
+    return { lines, microBlocks, chunks, chunkEvals };
+}
+/**
+ * Stage 4b — Segment Refiner (LLM pass 2)
+ *
+ * Calls refineSegments() directly — no TranscriptDetector needed here since
+ * refinement only tightens clip boundaries and never touches the transcript.
+ * Separated from `analyzeSegments` because ranking (stage 5) must happen
+ * between the two passes.
+ */
+export async function refineRankedSegments(rankedSegments, microBlocks, _cache, opts) {
+    log.info('Refining clip boundaries...');
+    return refineSegments(rankedSegments, microBlocks, opts.maxParallel, opts.noCache);
+}

package/dist/pipeline/stages/segmentSelector.js ADDED Viewed

@@ -0,0 +1,23 @@
+import { mergeSignals } from '../../services/signalMerger/index.js';
+import { rankSegments } from '../../services/segmentRanker/index.js';
+import { log } from '../../utils/logger.js';
+/**
+ * Stage 5 — Segment Selector
+ *
+ * Merges transcript LLM evaluations with audio events (if any), then ranks
+ * and deduplicates candidates to produce the final ordered list of segments.
+ *
+ * This stage sits between the two LLM passes: it runs after `analyzeSegments`
+ * (pass 1) and its output feeds `refineRankedSegments` (pass 2).
+ */
+export function selectSegments(chunkEvals, audioEvents, opts) {
+    const merged = mergeSignals(chunkEvals, audioEvents);
+    const ranked = rankSegments(merged, opts.threshold, opts.topN);
+    if (ranked.length === 0) {
+        log.warn(`No segments scored above threshold ${opts.threshold}. Try lowering --threshold.`);
+    }
+    else {
+        log.info(`Analysis complete: ${ranked.length} segment${ranked.length !== 1 ? 's' : ''} above threshold ${opts.threshold}`);
+    }
+    return ranked;
+}

package/dist/pipeline/stages/videoResolver.js ADDED Viewed

@@ -0,0 +1,34 @@
+import { parseUrl } from '../../services/urlParser/index.js';
+import { extractMetadata } from '../../services/metadataExtractor/index.js';
+import { log } from '../../utils/logger.js';
+import { formatSeconds } from '../../utils/format.js';
+/**
+ * Stage 1 — Video Resolver
+ *
+ * Parses a raw YouTube URL into a validated video ID, fetches metadata
+ * (title + duration), and enforces the optional --max-duration guard.
+ *
+ * @throws {Error} on invalid URL, metadata fetch failure, or exceeded duration
+ */
+export async function resolveVideo(rawUrl, maxDurationSec) {
+    // Parse URL → video ID
+    let videoId;
+    try {
+        videoId = parseUrl(rawUrl);
+    }
+    catch {
+        throw new Error(`Invalid YouTube URL: ${rawUrl}`);
+    }
+    // Fetch metadata (yt-dlp → oEmbed fallback)
+    log.info(`Fetching metadata for ${videoId}...`);
+    const metadata = await extractMetadata(videoId);
+    log.info(`Video: "${metadata.title}" (${metadata.duration > 0 ? formatSeconds(metadata.duration) : 'duration unknown'})`);
+    // --max-duration guard
+    if (maxDurationSec !== undefined && metadata.duration > 0) {
+        if (metadata.duration > maxDurationSec) {
+            throw new Error(`Video duration exceeds --max-duration limit. ` +
+                `(${formatSeconds(metadata.duration)} > ${formatSeconds(maxDurationSec)})`);
+        }
+    }
+    return { videoId, metadata };
+}

package/dist/services/audioAnalyzers/base.js ADDED Viewed

@@ -0,0 +1,13 @@
+/**
+ * Contract every audio analyzer implementation must satisfy.
+ *
+ * Each concrete analyzer (Gemini, Whisper, YAMNet) extends this class and
+ * implements `detect()`. The `source` property is used to tag the events they
+ * return so downstream code knows which backend produced them.
+ *
+ * Usage:
+ *   const analyzer = new GeminiAudioAnalyzer();
+ *   const events   = await analyzer.detect(audioPath, gameProfile, offsetSec, durationSec);
+ */
+export class AudioAnalyzer {
+}

package/dist/services/audioAnalyzers/factory.js ADDED Viewed

@@ -0,0 +1,56 @@
+import { log } from '../../utils/logger.js';
+import { GeminiAudioAnalyzer } from './gemini.js';
+import { WhisperAudioAnalyzer } from './whisper.js';
+import { YAMNetAudioAnalyzer } from './yamnet.js';
+const KNOWN_PROVIDERS = new Set(['gemini', 'whisper', 'yamnet']);
+/**
+ * Parses the AUDIO_PROVIDER config string into an ordered list of provider names.
+ *
+ * Accepts a comma-separated list: "gemini,whisper" → ['gemini', 'whisper']
+ * Single values still work:        "yamnet"         → ['yamnet']
+ *
+ * Backward-compat: "both" is mapped to ['gemini', 'whisper'] with a deprecation warning.
+ */
+export function parseProviderChain(providerString) {
+    // Backward compatibility: map legacy 'both' to the new comma-separated form
+    if (providerString.trim() === 'both') {
+        log.warn('[audio] AUDIO_PROVIDER=both is deprecated. Use AUDIO_PROVIDER=gemini,whisper instead.');
+        return ['gemini', 'whisper'];
+    }
+    const names = providerString
+        .split(',')
+        .map((s) => s.trim().toLowerCase())
+        .filter(Boolean);
+    if (names.length === 0) {
+        throw new Error(`AUDIO_PROVIDER is empty. Provide at least one of: gemini, whisper, yamnet`);
+    }
+    for (const name of names) {
+        if (!KNOWN_PROVIDERS.has(name)) {
+            throw new Error(`Unknown audio provider "${name}". Valid options: gemini, whisper, yamnet (comma-separated for chain)`);
+        }
+    }
+    return names;
+}
+/**
+ * Builds an ordered array of AudioAnalyzer instances from a provider chain string.
+ *
+ * The EventDetector will walk this array in order — if the first analyzer fails,
+ * it falls back to the next, and so on.
+ *
+ * @example
+ *   // AUDIO_PROVIDER=gemini,whisper  →  [GeminiAudioAnalyzer, WhisperAudioAnalyzer]
+ *   const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
+ */
+export function createAnalyzerChain(providerString) {
+    const names = parseProviderChain(providerString);
+    return names.map((name) => {
+        switch (name) {
+            case 'gemini':
+                return new GeminiAudioAnalyzer();
+            case 'whisper':
+                return new WhisperAudioAnalyzer();
+            case 'yamnet':
+                return new YAMNetAudioAnalyzer();
+        }
+    });
+}

package/dist/services/audioAnalyzers/gemini.js ADDED Viewed

@@ -0,0 +1,109 @@
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import * as fs from 'fs';
+import { z } from 'zod';
+import { config } from '../../config/index.js';
+import { log } from '../../utils/logger.js';
+import { AudioAnalyzer } from './base.js';
+const GeminiEventSchema = z.array(z.object({
+    // Gemini inconsistently returns timestamps in either:
+    //   - MM.SS notation: 1.03 = 1 min 3 sec = 63s
+    //   - True decimal seconds: 53.403 = 53.403s
+    // Use normalizeGeminiTime() to resolve the correct value.
+    time_sec: z.number(),
+    event: z.string(),
+    confidence: z.number().min(0).max(1),
+}));
+const GAME_PROFILE_PROMPTS = {
+    valorant: 'You are analyzing audio from a Valorant gaming video. Identify ALL significant game events: kills, deaths, explosions, ability uses, spike plants/defuses, ace moments, clutch situations, crowd reactions, hype moments.',
+    fps: 'You are analyzing audio from an FPS gaming video. Identify ALL significant game events: kills, deaths, explosions, weapon fire, headshot sounds, kill streaks, crowd reactions, battle cries.',
+    boss_fight: 'You are analyzing audio from a boss fight video. Identify ALL significant game events: boss phase transitions, big hits, explosions, boss death, crowd cheering, epic moments, victory sounds.',
+    general: 'You are analyzing audio from a gaming video. Identify ALL significant audio events: explosions, gunshots, crowd reactions, cheering, epic moments, dramatic sounds.',
+};
+/**
+ * Converts a MM.SS-notation value to decimal seconds.
+ * e.g. 1.03 → 63, 1.40 → 100
+ */
+function mmssToSeconds(value) {
+    const minutes = Math.floor(value);
+    const seconds = Math.round((value % 1) * 100);
+    return minutes * 60 + seconds;
+}
+/**
+ * Resolves a Gemini `time_sec` value to true decimal seconds.
+ *
+ * Gemini inconsistently returns either MM.SS notation (e.g. 1.03 meaning 63s)
+ * or true decimal seconds (e.g. 53.403). This function disambiguates using
+ * the known chunk duration:
+ *
+ * 1. If the fractional part > 0.59, it cannot be a seconds component (seconds
+ *    only go 0-59), so it must be true decimal seconds — use as-is.
+ * 2. Otherwise, check if the MM.SS conversion produces a value within the
+ *    valid chunk range [0, chunkDurationSec). If yes, treat as MM.SS.
+ * 3. Fallback: use the value as true decimal seconds (the format we asked for).
+ *
+ * YAMNet always returns true decimal seconds and does NOT use this function.
+ */
+export function normalizeGeminiTime(value, chunkDurationSec) {
+    const frac = value % 1;
+    // Fractional part > 0.59 is impossible in MM.SS — must be decimal seconds
+    if (Math.round(frac * 100) > 59) {
+        return value;
+    }
+    // Fractional part ≤ 0.59: could be MM.SS — check if converted value fits in chunk
+    const mmss = mmssToSeconds(value);
+    if (mmss < chunkDurationSec) {
+        return mmss;
+    }
+    // MM.SS conversion overflows the chunk — must be true decimal seconds
+    return value;
+}
+/**
+ * Uses Google Gemini's multimodal API to detect audio events in a WAV slice.
+ * Understands game context semantically — best accuracy for gaming content.
+ *
+ * Requires GOOGLE_GENERATIVE_AI_API_KEY to be set.
+ */
+export class GeminiAudioAnalyzer extends AudioAnalyzer {
+    source = 'gemini';
+    async detect(audioPath, gameProfile, chunkOffsetSec, chunkDurationSec) {
+        const genai = new GoogleGenerativeAI(config.GOOGLE_GENERATIVE_AI_API_KEY);
+        const model = genai.getGenerativeModel({ model: config.AUDIO_GEMINI_MODEL });
+        const audioData = fs.readFileSync(audioPath);
+        const base64Audio = audioData.toString('base64');
+        const extraInstructions = config.AUDIO_EXTRA_INSTRUCTIONS
+            ? `\nAdditional instructions:\n${config.AUDIO_EXTRA_INSTRUCTIONS}\n`
+            : '';
+        const prompt = `${GAME_PROFILE_PROMPTS[gameProfile] ?? GAME_PROFILE_PROMPTS.general} ${extraInstructions}
+For each event, return a JSON object with:
+- time_sec: the time in seconds (be very precise with the timestamp, Gemini is good at this when the format is correct)
+- event: a short description of the event (e.g., "gunshot", "explosion", "clutch moment")
+- confidence: your confidence level (0.0 to 1.0)
+Return ONLY a JSON array, no explanation. Format:
+[
+  {"time_sec": 12.5, "event": "gunshot", "confidence": 0.8},
+  {"time_sec": 45.2, "event": "explosion", "confidence": 0.9}
+]`;
+        const result = await model.generateContent([
+            { inlineData: { mimeType: 'audio/wav', data: base64Audio } },
+            prompt,
+        ]);
+        const text = result.response.text();
+        log.info(`[audio:gemini] response: ${text}`);
+        const cleaned = text
+            .replace(/^```(?:json)?\s*/i, '')
+            .replace(/\s*```\s*$/i, '')
+            .trim();
+        const parsed = GeminiEventSchema.safeParse(JSON.parse(cleaned));
+        if (!parsed.success) {
+            throw new Error(`Gemini response failed validation: ${parsed.error.message}`);
+        }
+        return parsed.data.map((e) => ({
+            time: normalizeGeminiTime(e.time_sec, chunkDurationSec) + chunkOffsetSec,
+            event: e.event,
+            confidence: e.confidence,
+            source: this.source,
+        }));
+    }
+}

package/dist/services/audioAnalyzers/index.js ADDED Viewed

@@ -0,0 +1,5 @@
+export { AudioAnalyzer } from './base.js';
+export { GeminiAudioAnalyzer, normalizeGeminiTime } from './gemini.js';
+export { WhisperAudioAnalyzer, getPythonBin } from './whisper.js';
+export { YAMNetAudioAnalyzer } from './yamnet.js';
+export { createAnalyzerChain, parseProviderChain } from './factory.js';

package/dist/services/audioAnalyzers/whisper.js ADDED Viewed

@@ -0,0 +1,62 @@
+import { execa } from 'execa';
+import { config } from '../../config/index.js';
+import { log } from '../../utils/logger.js';
+import { AudioAnalyzer } from './base.js';
+/**
+ * Resolves the Python interpreter binary, caching the result after the first
+ * successful lookup. Shared by both Python-based analyzers (Whisper, YAMNet).
+ */
+let _pythonBin = null;
+export async function getPythonBin() {
+    if (_pythonBin)
+        return _pythonBin;
+    for (const bin of ['python3', 'python']) {
+        try {
+            await execa(bin, ['--version']);
+            _pythonBin = bin;
+            return bin;
+        }
+        catch {
+            log.warn(`[audio] ${bin} not found, trying next binary...`);
+        }
+    }
+    throw new Error('No Python interpreter found (tried python3, python). Install Python 3 to use YAMNet or Whisper.');
+}
+/**
+ * Uses OpenAI Whisper (local) to transcribe the audio chunk and scan the
+ * resulting transcript for hype keywords per game profile.
+ *
+ * Requires: pip install openai-whisper
+ */
+export class WhisperAudioAnalyzer extends AudioAnalyzer {
+    source = 'whisper';
+    async detect(audioPath, gameProfile, chunkOffsetSec, _chunkDurationSec) {
+        const python = await getPythonBin();
+        let stdout;
+        try {
+            const result = await execa(python, [
+                'scripts/detect_events_whisper.py',
+                audioPath,
+                String(config.AUDIO_CONFIDENCE_THRESHOLD),
+                gameProfile,
+                config.AUDIO_WHISPER_MODEL,
+            ]);
+            stdout = result.stdout;
+        }
+        catch (err) {
+            const message = err instanceof Error ? err.message : String(err);
+            if (message.includes('ModuleNotFoundError') || message.includes('No module named')) {
+                throw new Error('openai-whisper not installed. Run: pip install openai-whisper\n' +
+                    'Or set AUDIO_PROVIDER=gemini in .env and configure GOOGLE_GENERATIVE_AI_API_KEY.');
+            }
+            throw new Error(`Whisper detection failed: ${message}`);
+        }
+        const events = JSON.parse(stdout);
+        return events.map((e) => ({
+            time: e.time + chunkOffsetSec,
+            event: e.event,
+            confidence: e.confidence,
+            source: this.source,
+        }));
+    }
+}

package/dist/services/audioAnalyzers/yamnet.js ADDED Viewed

@@ -0,0 +1,40 @@
+import { execa } from 'execa';
+import { config } from '../../config/index.js';
+import { AudioAnalyzer } from './base.js';
+import { getPythonBin } from './whisper.js';
+/**
+ * Uses YAMNet (TensorFlow Hub) via a Python script to classify audio frames
+ * against a fixed set of game-relevant sound classes (gunshot, explosion, etc.).
+ *
+ * Requires: pip install tensorflow-hub soundfile numpy
+ */
+export class YAMNetAudioAnalyzer extends AudioAnalyzer {
+    source = 'yamnet';
+    async detect(audioPath, _gameProfile, chunkOffsetSec, _chunkDurationSec) {
+        const python = await getPythonBin();
+        let stdout;
+        try {
+            const result = await execa(python, [
+                'scripts/detect_events.py',
+                audioPath,
+                String(config.AUDIO_CONFIDENCE_THRESHOLD),
+            ]);
+            stdout = result.stdout;
+        }
+        catch (err) {
+            const message = err instanceof Error ? err.message : String(err);
+            if (message.includes('ModuleNotFoundError') || message.includes('No module named')) {
+                throw new Error('YAMNet dependencies missing. Run: pip3 install tensorflow-hub soundfile numpy\n' +
+                    'Or set AUDIO_PROVIDER=gemini in .env and configure GOOGLE_GENERATIVE_AI_API_KEY.');
+            }
+            throw new Error(`YAMNet detection failed: ${message}`);
+        }
+        const events = JSON.parse(stdout);
+        return events.map((e) => ({
+            time: e.time + chunkOffsetSec,
+            event: e.event,
+            confidence: e.confidence,
+            source: this.source,
+        }));
+    }
+}