@thunderkiller/video-clipper 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.github/workflows/release.yml +5 -1
  2. package/CHANGELOG.md +8 -0
  3. package/dist/pipeline/runner.js +3 -11
  4. package/dist/pipeline/stages/audioProcessor.js +0 -2
  5. package/dist/pipeline/stages/clipExporter.js +0 -4
  6. package/dist/pipeline/stages/videoResolver.js +0 -3
  7. package/dist/services/audioAnalyzers/factory.js +0 -1
  8. package/dist/services/audioAnalyzers/gemini.js +6 -7
  9. package/dist/services/chunkBuilder/index.js +0 -4
  10. package/dist/services/clipRefiner/index.js +1 -1
  11. package/dist/services/llmAnalyzer/LLMAnalyzer.js +0 -2
  12. package/dist/services/transcriptDetector/index.js +0 -4
  13. package/dist/services/transcriptFetcher/index.js +2 -6
  14. package/dist/services/urlParser/index.js +0 -1
  15. package/dist/types/analyzer.js +1 -0
  16. package/dist/types/cache.js +5 -0
  17. package/dist/types/config.js +0 -23
  18. package/dist/types/downloader.js +1 -0
  19. package/dist/types/factory.js +1 -0
  20. package/dist/types/index.js +1 -0
  21. package/dist/types/pipeline.js +0 -8
  22. package/dist/types/segment.js +6 -6
  23. package/dist/types/transcript.js +6 -6
  24. package/dist/utils/cache.js +1 -8
  25. package/package.json +1 -1
  26. package/src/pipeline/runner.ts +3 -11
  27. package/src/pipeline/stages/audioProcessor.ts +0 -2
  28. package/src/pipeline/stages/clipExporter.ts +0 -4
  29. package/src/pipeline/stages/videoResolver.ts +0 -3
  30. package/src/services/audioAnalyzers/factory.ts +1 -3
  31. package/src/services/audioAnalyzers/gemini.ts +6 -7
  32. package/src/services/audioAnalyzers/index.ts +1 -1
  33. package/src/services/chunkBuilder/index.ts +0 -4
  34. package/src/services/clipRefiner/index.ts +1 -1
  35. package/src/services/llmAnalyzer/LLMAnalyzer.ts +2 -18
  36. package/src/services/transcriptAnalyzers/factory.ts +1 -2
  37. package/src/services/transcriptAnalyzers/index.ts +1 -1
  38. package/src/services/transcriptDetector/index.ts +6 -12
  39. package/src/services/transcriptFetcher/index.ts +2 -6
  40. package/src/services/urlParser/index.ts +0 -1
  41. package/src/services/videoDownloader/index.ts +1 -15
  42. package/src/types/analyzer.ts +23 -0
  43. package/src/types/cache.ts +8 -0
  44. package/src/types/config.ts +0 -23
  45. package/src/types/downloader.ts +15 -0
  46. package/src/types/factory.ts +3 -0
  47. package/src/types/index.ts +14 -0
  48. package/src/types/pipeline.ts +0 -33
  49. package/src/types/segment.ts +6 -6
  50. package/src/types/transcript.ts +6 -6
  51. package/src/utils/cache.ts +13 -12
@@ -50,6 +50,10 @@ jobs:
50
50
  env:
51
51
  GITHUB_TOKEN: ${{ secrets.PUSH_TOKEN }}
52
52
 
53
+ - name: Pull updated package.json after semantic-release
54
+ if: success()
55
+ run: git pull origin master
56
+
53
57
  - name: Verify GitHub Release
54
58
  if: success()
55
59
  run: |
@@ -69,4 +73,4 @@ jobs:
69
73
  if: success()
70
74
  run: npm publish
71
75
  env:
72
- NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
76
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
package/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## [1.1.1](https://github.com/AmreetKumarkhuntia/video-clipper/compare/v1.1.0...v1.1.1) (2026-03-19)
2
+
3
+ ### Bug Fixes
4
+
5
+ - github workflows ([18a9536](https://github.com/AmreetKumarkhuntia/video-clipper/commit/18a953619ed17de71d3c9bd0a86e1b42a10aea37))
6
+ - **release:** pull updated package.json before npm publish to avoid race condition ([1839e4a](https://github.com/AmreetKumarkhuntia/video-clipper/commit/1839e4a57d516234ad629f0217e4fa5f4852e4e3))
7
+ - yaml correction ([33c7854](https://github.com/AmreetKumarkhuntia/video-clipper/commit/33c7854015e1266b3fc01b6206da4ec946f94307))
8
+
1
9
  # 1.0.0 (2026-03-18)
2
10
 
3
11
  ### Bug Fixes
@@ -45,11 +45,10 @@ export async function runPipeline(args) {
45
45
  const gameProfile = args.gameProfile ?? config.GAME_PROFILE;
46
46
  const maxParallel = args.maxParallel ?? config.LLM_CONCURRENCY;
47
47
  const cache = new Cache(config.CACHE_DIR, args.noCache);
48
- // ── Stage 1: Resolve video ID + metadata ─────────────────────────────────
49
48
  const { videoId, metadata } = await resolveVideo(args.url, args.maxDuration);
50
- // ── Stage 2: Download audio ───────────────────────────────────────────────
51
- // Downloaded before transcript so Whisper/Gemini transcript providers can
52
- // use the WAV. Returns null when audio detection is disabled.
49
+ /** Downloaded before transcript so Whisper/Gemini transcript providers can
50
+ * use the WAV. Returns null when audio detection is disabled.
51
+ */
53
52
  let audioPath = null;
54
53
  const audioEnabled = config.AUDIO_DETECTION_ENABLED && !args.noAudio;
55
54
  if (audioEnabled) {
@@ -61,14 +60,12 @@ export async function runPipeline(args) {
61
60
  log.warn(`Audio download failed — continuing without audio: ${message}`);
62
61
  }
63
62
  }
64
- // ── Stage 3: Audio event detection ───────────────────────────────────────
65
63
  const audioEvents = await processAudio(videoId, metadata.duration, cache, {
66
64
  noAudio: args.noAudio,
67
65
  gameProfile,
68
66
  maxParallel,
69
67
  audioPath,
70
68
  });
71
- // ── Stage 4a: Fetch transcript + LLM analysis (informed by audio events) ──
72
69
  const { lines, microBlocks, chunkEvals } = await analyzeSegments(videoId, audioPath, audioEvents, cache, {
73
70
  maxChunks: args.maxChunks,
74
71
  maxParallel,
@@ -77,9 +74,7 @@ export async function runPipeline(args) {
77
74
  if (config.DUMP_OUTPUTS) {
78
75
  await dumpTranscript(videoId, lines);
79
76
  }
80
- // ── Stage 5: Merge signals + rank ─────────────────────────────────────────
81
77
  const rankedSegments = selectSegments(chunkEvals, audioEvents, { threshold, topN });
82
- // Build partial result for early-exit path (no segments above threshold)
83
78
  const partialResult = {
84
79
  video_id: videoId,
85
80
  title: metadata.title,
@@ -93,12 +88,10 @@ export async function runPipeline(args) {
93
88
  await dumpAnalysis(videoId, partialResult);
94
89
  return;
95
90
  }
96
- // ── Stage 4b: Refine clip boundaries (LLM pass 2) ─────────────────────────
97
91
  const refinedSegments = await refineRankedSegments(rankedSegments, microBlocks, cache, {
98
92
  maxParallel,
99
93
  noCache: args.noCache,
100
94
  });
101
- // ── Output result ─────────────────────────────────────────────────────────
102
95
  const result = {
103
96
  video_id: videoId,
104
97
  title: metadata.title,
@@ -110,7 +103,6 @@ export async function runPipeline(args) {
110
103
  if (config.DUMP_OUTPUTS)
111
104
  await dumpAnalysis(videoId, result);
112
105
  log.info('Done.');
113
- // ── Stage 6: Download + generate clips (only with --clip) ─────────────────
114
106
  if (!args.clip) {
115
107
  log.info('Tip: run with --clip to download the video and generate mp4 clips.');
116
108
  return;
@@ -26,7 +26,6 @@ export async function processAudio(videoId, duration, cache, opts) {
26
26
  const audioEnabled = config.AUDIO_DETECTION_ENABLED && !opts.noAudio;
27
27
  if (!audioEnabled)
28
28
  return [];
29
- // Cache-first
30
29
  const cached = await cache.readAudioEvents(videoId, opts.gameProfile, config.AUDIO_PROVIDER);
31
30
  if (cached) {
32
31
  log.info(`[cache hit] Audio events loaded from cache (${cached.length} events)`);
@@ -34,7 +33,6 @@ export async function processAudio(videoId, duration, cache, opts) {
34
33
  }
35
34
  try {
36
35
  const audioPath = opts.audioPath ?? (await downloadAudio(videoId, `${config.OUTPUT_DIR}/audio`));
37
- // Build the analyzer chain once per run from config
38
36
  const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
39
37
  const detector = new EventDetector(chain);
40
38
  const providerNames = chain.map((a) => a.source).join(' → ');
@@ -14,15 +14,12 @@ import { config } from '../../config/index.js';
14
14
  * @returns Array of absolute paths to the generated clip files.
15
15
  */
16
16
  export async function exportClips(videoId, segments, opts) {
17
- // Mode 1: local video already on disk — cut with ffmpeg
18
17
  if (opts.localVideo) {
19
18
  log.info(`Using local video: ${opts.localVideo}`);
20
19
  return generateClips(opts.localVideo, segments, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
21
20
  }
22
- // Determine yt-dlp mode
23
21
  const downloadSections = opts.downloadSections ?? config.DOWNLOAD_SECTIONS_MODE;
24
22
  if (typeof downloadSections === 'number') {
25
- // Mode 2: download only the top-N segments via --download-sections
26
23
  const segmentsToDownload = segments.slice(0, downloadSections);
27
24
  if (segmentsToDownload.length < downloadSections) {
28
25
  log.warn(`Requested ${downloadSections} segments, but only ${segmentsToDownload.length} are available above threshold.`);
@@ -34,7 +31,6 @@ export async function exportClips(videoId, segments, opts) {
34
31
  }
35
32
  return organizeClips(downloadResult.paths, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
36
33
  }
37
- // Mode 3: full-video download → cut clips with ffmpeg
38
34
  log.info('Downloading full video via yt-dlp...');
39
35
  const downloadResult = await downloadVideo(videoId, 'all', [], opts.videoPath);
40
36
  if (downloadResult.mode !== 'all') {
@@ -11,7 +11,6 @@ import { formatSeconds } from '../../utils/format.js';
11
11
  * @throws {Error} on invalid URL, metadata fetch failure, or exceeded duration
12
12
  */
13
13
  export async function resolveVideo(rawUrl, maxDurationSec) {
14
- // Parse URL → video ID
15
14
  let videoId;
16
15
  try {
17
16
  videoId = parseUrl(rawUrl);
@@ -19,11 +18,9 @@ export async function resolveVideo(rawUrl, maxDurationSec) {
19
18
  catch {
20
19
  throw new Error(`Invalid YouTube URL: ${rawUrl}`);
21
20
  }
22
- // Fetch metadata (yt-dlp → oEmbed fallback)
23
21
  log.info(`Fetching metadata for ${videoId}...`);
24
22
  const metadata = await extractMetadata(videoId);
25
23
  log.info(`Video: "${metadata.title}" (${metadata.duration > 0 ? formatSeconds(metadata.duration) : 'duration unknown'})`);
26
- // --max-duration guard
27
24
  if (maxDurationSec !== undefined && metadata.duration > 0) {
28
25
  if (metadata.duration > maxDurationSec) {
29
26
  throw new Error(`Video duration exceeds --max-duration limit. ` +
@@ -12,7 +12,6 @@ const KNOWN_PROVIDERS = new Set(['gemini', 'whisper', 'yamnet']);
12
12
  * Backward-compat: "both" is mapped to ['gemini', 'whisper'] with a deprecation warning.
13
13
  */
14
14
  export function parseProviderChain(providerString) {
15
- // Backward compatibility: map legacy 'both' to the new comma-separated form
16
15
  if (providerString.trim() === 'both') {
17
16
  log.warn('[audio] AUDIO_PROVIDER=both is deprecated. Use AUDIO_PROVIDER=gemini,whisper instead.');
18
17
  return ['gemini', 'whisper'];
@@ -4,11 +4,13 @@ import { z } from 'zod';
4
4
  import { config } from '../../config/index.js';
5
5
  import { log } from '../../utils/logger.js';
6
6
  import { AudioAnalyzer } from './base.js';
7
+ /**
8
+ * Gemini returns timestamps inconsistently as either:
9
+ * - MM.SS notation: 1.03 = 1 min 3 sec = 63s
10
+ * - True decimal seconds: 53.403 = 53.403s
11
+ * Use normalizeGeminiTime() to resolve correct value.
12
+ */
7
13
  const GeminiEventSchema = z.array(z.object({
8
- // Gemini inconsistently returns timestamps in either:
9
- // - MM.SS notation: 1.03 = 1 min 3 sec = 63s
10
- // - True decimal seconds: 53.403 = 53.403s
11
- // Use normalizeGeminiTime() to resolve the correct value.
12
14
  time_sec: z.number(),
13
15
  event: z.string(),
14
16
  confidence: z.number().min(0).max(1),
@@ -45,16 +47,13 @@ function mmssToSeconds(value) {
45
47
  */
46
48
  export function normalizeGeminiTime(value, chunkDurationSec) {
47
49
  const frac = value % 1;
48
- // Fractional part > 0.59 is impossible in MM.SS — must be decimal seconds
49
50
  if (Math.round(frac * 100) > 59) {
50
51
  return value;
51
52
  }
52
- // Fractional part ≤ 0.59: could be MM.SS — check if converted value fits in chunk
53
53
  const mmss = mmssToSeconds(value);
54
54
  if (mmss < chunkDurationSec) {
55
55
  return mmss;
56
56
  }
57
- // MM.SS conversion overflows the chunk — must be true decimal seconds
58
57
  return value;
59
58
  }
60
59
  /**
@@ -12,19 +12,16 @@ export function buildMicroBlocks(lines, windowSec) {
12
12
  let texts = [];
13
13
  for (const line of lines) {
14
14
  if (line.start >= windowStart + windowSec) {
15
- // Flush current block
16
15
  blocks.push({
17
16
  start: windowStart,
18
17
  end: line.start,
19
18
  text: texts.join(' '),
20
19
  });
21
- // Start a new window aligned to the current line
22
20
  windowStart = line.start;
23
21
  texts = [];
24
22
  }
25
23
  texts.push(line.text);
26
24
  }
27
- // Flush the final block
28
25
  if (texts.length > 0) {
29
26
  const lastLine = lines[lines.length - 1];
30
27
  blocks.push({
@@ -63,7 +60,6 @@ export function buildLLMChunks(blocks, chunkLen, overlap) {
63
60
  }
64
61
  const step = chunkLen - overlap;
65
62
  chunkStart += step;
66
- // Guard: if overlap >= chunkLen we'd loop forever
67
63
  if (step <= 0)
68
64
  break;
69
65
  }
@@ -69,7 +69,7 @@ async function refineSegment(segment, allBlocks, noCache) {
69
69
  prompt: buildPrompt(segment, text, windowStart, windowEnd),
70
70
  maxRetries: config.LLM_MAX_RETRIES,
71
71
  });
72
- // Clamp to the context window to ensure LLM doesn't hallucinate out-of-range values
72
+ /** Clamp to context window to prevent LLM from hallucinating out-of-range values */
73
73
  const refinedStart = Math.max(windowStart, Math.min(object.clip_start, object.clip_end - 1));
74
74
  const refinedEnd = Math.min(windowEnd, Math.max(object.clip_end, object.clip_start + 1));
75
75
  if (!noCache) {
@@ -36,9 +36,7 @@ export class LLMAnalyzer {
36
36
  * everything needed for the ranking step.
37
37
  */
38
38
  async analyze(opts) {
39
- // ── Transcript ────────────────────────────────────────────────────────────
40
39
  const { lines, microBlocks, chunks } = await this.transcriptDetector.detect(opts.videoId, opts.audioPath, this.cache);
41
- // ── LLM pass 1 ────────────────────────────────────────────────────────────
42
40
  const chunkLimit = opts.maxChunks ?? config.MAX_CHUNKS;
43
41
  const chunksToAnalyze = chunkLimit !== undefined ? chunks.slice(0, chunkLimit) : chunks;
44
42
  if (chunkLimit !== undefined) {
@@ -46,7 +46,6 @@ export class TranscriptDetector {
46
46
  */
47
47
  async detect(videoId, audioPath, cache) {
48
48
  let lines;
49
- // Cache-first: if we already have lines on disk, skip the provider chain entirely
50
49
  const cached = await cache.readTranscript(videoId);
51
50
  if (cached) {
52
51
  log.info(`[cache hit] Transcript loaded from cache (${cached.length} lines)`);
@@ -60,9 +59,6 @@ export class TranscriptDetector {
60
59
  const chunks = this.buildChunks(microBlocks);
61
60
  return { lines, microBlocks, chunks };
62
61
  }
63
- // -------------------------------------------------------------------------
64
- // Private helpers
65
- // -------------------------------------------------------------------------
66
62
  /**
67
63
  * Walks the analyzer chain in order.
68
64
  * Falls back to the next analyzer whenever one throws.
@@ -17,7 +17,7 @@ import { config } from '../../config/index.js';
17
17
  export function parseVtt(vttContent) {
18
18
  const lines = vttContent.split(/\r?\n/);
19
19
  const result = [];
20
- // Regex: HH:MM:SS.mmm --> HH:MM:SS.mmm (optional positioning metadata after)
20
+ /** Regex to match HH:MM:SS.mmm --> HH:MM:SS.mmm timestamp lines */
21
21
  const TIMESTAMP_RE = /^(\d{2}):(\d{2}):(\d{2})[.,](\d{3})\s+-->\s+(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/;
22
22
  let i = 0;
23
23
  while (i < lines.length) {
@@ -32,7 +32,6 @@ export function parseVtt(vttContent) {
32
32
  parseInt(match[6], 10) * 60 +
33
33
  parseInt(match[7], 10) +
34
34
  parseInt(match[8], 10) / 1000;
35
- // Collect cue text lines until blank line or EOF
36
35
  i++;
37
36
  const textLines = [];
38
37
  while (i < lines.length && lines[i].trim() !== '') {
@@ -40,7 +39,6 @@ export function parseVtt(vttContent) {
40
39
  i++;
41
40
  }
42
41
  const rawText = textLines.join(' ');
43
- // Strip VTT inline tags: <00:00:00.000>, <c>, </c>, <b>, </b>, <i>, </i>, etc.
44
42
  const text = rawText
45
43
  .replace(/<[^>]+>/g, '')
46
44
  .replace(/&amp;/g, '&')
@@ -53,8 +51,7 @@ export function parseVtt(vttContent) {
53
51
  continue;
54
52
  }
55
53
  const duration = Math.max(0, endSec - startSec);
56
- // Deduplicate: skip if this cue text is identical to the previous one
57
- // (YouTube VTT often repeats the same line as text scrolls)
54
+ /** Skip duplicate cues - YouTube VTT often repeats same line as text scrolls */
58
55
  if (result.length > 0 && result[result.length - 1].text === text) {
59
56
  continue;
60
57
  }
@@ -104,7 +101,6 @@ export async function fetchTranscript(videoId) {
104
101
  const message = err instanceof Error ? err.message : String(err);
105
102
  throw new Error(`yt-dlp failed to fetch subtitles for "${videoId}": ${message}`);
106
103
  }
107
- // Find the downloaded .vtt file (yt-dlp names it <id>.<lang>.vtt)
108
104
  const files = await fs.readdir(tmpDir);
109
105
  const vttFile = files.find((f) => f.endsWith('.vtt'));
110
106
  if (!vttFile) {
@@ -37,7 +37,6 @@ export function parseUrl(url) {
37
37
  if (!videoId) {
38
38
  throw new Error(`Could not extract video ID from URL: "${url}"`);
39
39
  }
40
- // Strip any extra query params that may have been part of the path segment
41
40
  videoId = videoId.split('?')[0];
42
41
  if (videoId.length !== VIDEO_ID_LENGTH) {
43
42
  throw new Error(`Invalid video ID "${videoId}": expected ${VIDEO_ID_LENGTH} characters, got ${videoId.length}`);
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,5 @@
1
+ import { z } from 'zod';
2
+ export const SegmentRefinementSchema = z.object({
3
+ refined_start: z.number(),
4
+ refined_end: z.number(),
5
+ });
@@ -10,7 +10,6 @@ const LLM_PROVIDERS = [
10
10
  'openrouter',
11
11
  'custom',
12
12
  ];
13
- /** Map each provider to the env var name that holds its API key. */
14
13
  const PROVIDER_KEY_MAP = {
15
14
  openai: 'OPENAI_API_KEY',
16
15
  anthropic: 'ANTHROPIC_API_KEY',
@@ -24,9 +23,7 @@ const PROVIDER_KEY_MAP = {
24
23
  };
25
24
  export const ConfigSchema = z
26
25
  .object({
27
- // --- Provider selection ---
28
26
  LLM_PROVIDER: z.enum(LLM_PROVIDERS).default('openai'),
29
- // --- Per-provider API keys (all optional at schema level; enforced via superRefine) ---
30
27
  OPENAI_API_KEY: z.string().optional(),
31
28
  ANTHROPIC_API_KEY: z.string().optional(),
32
29
  GOOGLE_GENERATIVE_AI_API_KEY: z.string().optional(),
@@ -37,7 +34,6 @@ export const ConfigSchema = z
37
34
  OPENROUTER_API_KEY: z.string().optional(),
38
35
  CUSTOM_OPENAI_API_KEY: z.string().optional(),
39
36
  CUSTOM_OPENAI_BASE_URL: z.string().url().optional(),
40
- // --- Tunable parameters ---
41
37
  SCORE_THRESHOLD: z.coerce.number().min(1).max(10).default(7),
42
38
  TOP_N_SEGMENTS: z.coerce.number().min(1).default(10),
43
39
  CHUNK_LENGTH_SEC: z.coerce.number().min(10).default(120),
@@ -48,32 +44,20 @@ export const ConfigSchema = z
48
44
  DOWNLOAD_DIR: z.string().default('downloads/'),
49
45
  OUTPUT_DIR: z.string().default('outputs/'),
50
46
  CACHE_DIR: z.string().default('outputs/cache'),
51
- // --- Output dumping ---
52
47
  DUMP_OUTPUTS: z.coerce.boolean().default(true),
53
- // --- LLM evaluation limits ---
54
48
  MAX_CHUNKS: z.coerce.number().min(1).optional(),
55
49
  LLM_CONCURRENCY: z.coerce.number().min(1).default(3),
56
50
  CLIP_CONCURRENCY: z.coerce.number().min(1).default(1),
57
- // --- Custom system prompt (overrides the default if set) ---
58
51
  LLM_SYSTEM_PROMPT: z.string().optional(),
59
- // --- Gemini model used for audio event detection ---
60
52
  AUDIO_GEMINI_MODEL: z.string().default('gemini-2.5-flash'),
61
- // --- Extra instructions appended to the Gemini audio detection prompt ---
62
53
  AUDIO_EXTRA_INSTRUCTIONS: z.string().optional(),
63
- // --- Download mode for yt-dlp ---
64
54
  DOWNLOAD_SECTIONS_MODE: z.union([z.literal('all'), z.number().int().positive()]).default('all'),
65
- // --- FFmpeg paths (optional, for custom ffmpeg/ffprobe locations) ---
66
55
  FFMPEG_PATH: z.string().optional(),
67
56
  FFPROBE_PATH: z.string().optional(),
68
- // --- FFmpeg encoding preset for clip generation ---
69
57
  FFMPEG_PRESET: z
70
58
  .enum(['ultrafast', 'superfast', 'veryfast', 'fast', 'medium', 'slow', 'slower'])
71
59
  .default('fast'),
72
- // --- Timestamp offset for clips (adjusts if transcript is misaligned with video) ---
73
60
  TIMESTAMP_OFFSET_SECONDS: z.coerce.number().default(0),
74
- // --- Transcript provider ---
75
- // Comma-separated ordered fallback chain: "ytdlp" | "whisper" | "ytdlp,whisper" etc.
76
- // First provider that succeeds wins; subsequent providers are tried only on failure.
77
61
  TRANSCRIPT_PROVIDER: z
78
62
  .string()
79
63
  .default('ytdlp')
@@ -86,10 +70,7 @@ export const ConfigSchema = z
86
70
  }, {
87
71
  message: 'TRANSCRIPT_PROVIDER must be a comma-separated list of: ytdlp, whisper, gemini (e.g. "ytdlp")',
88
72
  }),
89
- // --- Audio event detection ---
90
73
  AUDIO_DETECTION_ENABLED: z.coerce.boolean().default(true),
91
- // Comma-separated ordered fallback chain: "gemini,whisper" | "yamnet" | "gemini" etc.
92
- // Legacy value "both" is accepted and mapped to "gemini,whisper" at runtime.
93
74
  AUDIO_PROVIDER: z
94
75
  .string()
95
76
  .default('gemini,whisper')
@@ -111,9 +92,7 @@ export const ConfigSchema = z
111
92
  AUDIO_CLIP_POST_ROLL: z.coerce.number().min(0).default(15),
112
93
  AUDIO_LLM_BOOST_WINDOW: z.coerce.number().min(0).default(10),
113
94
  AUDIO_LLM_SCORE_BOOST: z.coerce.number().min(0).default(2),
114
- // --- Game profile ---
115
95
  GAME_PROFILE: z.enum(['valorant', 'fps', 'boss_fight', 'general']).default('general'),
116
- // --- yt-dlp cookie support (for bot detection / auth) ---
117
96
  YT_DLP_COOKIES_FROM_BROWSER: z
118
97
  .enum(['chrome', 'firefox', 'safari', 'brave', 'edge', 'opera', 'chromium'])
119
98
  .optional(),
@@ -130,7 +109,6 @@ export const ConfigSchema = z
130
109
  message: `${keyName} is required when LLM_PROVIDER is "${provider}"`,
131
110
  });
132
111
  }
133
- // custom provider also requires a base URL
134
112
  if (provider === 'custom' &&
135
113
  (!data.CUSTOM_OPENAI_BASE_URL || data.CUSTOM_OPENAI_BASE_URL.trim() === '')) {
136
114
  ctx.addIssue({
@@ -139,7 +117,6 @@ export const ConfigSchema = z
139
117
  message: 'CUSTOM_OPENAI_BASE_URL is required when LLM_PROVIDER is "custom"',
140
118
  });
141
119
  }
142
- // Validate cookie config: only one method allowed at a time
143
120
  if (data.YT_DLP_COOKIES_FROM_BROWSER && data.YT_DLP_COOKIES_FILE) {
144
121
  ctx.addIssue({
145
122
  code: z.ZodIssueCode.custom,
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1 @@
1
+ export {};
@@ -3,3 +3,4 @@ export { TranscriptLineSchema, MicroBlockSchema, LLMChunkSchema } from './transc
3
3
  export { AnalyzedSegmentSchema, RankedSegmentSchema, ChunkEvaluationSchema } from './segment.js';
4
4
  export { AudioEventSchema, MergedCandidateSchema } from './audio.js';
5
5
  export { VideoMetadataSchema, PipelineResultSchema } from './video.js';
6
+ export { SegmentRefinementSchema } from './cache.js';
@@ -1,9 +1 @@
1
- /**
2
- * Pipeline-layer types: stage option bags, result shapes, and the generic
3
- * time-window interface from the chunker utility.
4
- *
5
- * All types here are owned by one pipeline stage but live centrally so the
6
- * runner (and any future consumers) can import them without reaching into
7
- * individual stage files.
8
- */
9
1
  export {};
@@ -3,13 +3,13 @@ export const AnalyzedSegmentSchema = z.object({
3
3
  interesting: z.boolean(),
4
4
  score: z.number().min(1).max(10),
5
5
  reason: z.string(),
6
- clip_start: z.number(), // seconds
7
- clip_end: z.number(), // seconds
6
+ clip_start: z.number(),
7
+ clip_end: z.number(),
8
8
  });
9
9
  export const RankedSegmentSchema = z.object({
10
10
  rank: z.number().int().min(1),
11
- start: z.number(), // seconds
12
- end: z.number(), // seconds
11
+ start: z.number(),
12
+ end: z.number(),
13
13
  score: z.number().min(1).max(10),
14
14
  reason: z.string(),
15
15
  source: z.enum(['transcript', 'audio', 'both']),
@@ -17,8 +17,8 @@ export const RankedSegmentSchema = z.object({
17
17
  });
18
18
  const ChunkEvaluationBaseSchema = z.object({
19
19
  chunk_index: z.number().int().min(0),
20
- chunk_start: z.number(), // seconds
21
- chunk_end: z.number(), // seconds
20
+ chunk_start: z.number(),
21
+ chunk_end: z.number(),
22
22
  });
23
23
  export const ChunkEvaluationSchema = z.discriminatedUnion('status', [
24
24
  ChunkEvaluationBaseSchema.extend({
@@ -1,16 +1,16 @@
1
1
  import { z } from 'zod';
2
2
  export const TranscriptLineSchema = z.object({
3
3
  text: z.string(),
4
- start: z.number(), // seconds (normalized from offset ms)
5
- duration: z.number(), // seconds (normalized from duration ms)
4
+ start: z.number(),
5
+ duration: z.number(),
6
6
  });
7
7
  export const MicroBlockSchema = z.object({
8
- start: z.number(), // seconds
9
- end: z.number(), // seconds
8
+ start: z.number(),
9
+ end: z.number(),
10
10
  text: z.string(),
11
11
  });
12
12
  export const LLMChunkSchema = z.object({
13
- start: z.number(), // seconds
14
- end: z.number(), // seconds
13
+ start: z.number(),
14
+ end: z.number(),
15
15
  text: z.string(),
16
16
  });
@@ -3,10 +3,7 @@ import { promises as fs } from 'fs';
3
3
  import path from 'path';
4
4
  import { z } from 'zod';
5
5
  import { log } from './logger.js';
6
- import { TranscriptLineSchema, ChunkEvaluationSchema, AudioEventSchema } from '../types/index.js';
7
- // ---------------------------------------------------------------------------
8
- // Internal cache-key helpers
9
- // ---------------------------------------------------------------------------
6
+ import { TranscriptLineSchema, ChunkEvaluationSchema, AudioEventSchema, SegmentRefinementSchema, } from '../types/index.js';
10
7
  /**
11
8
  * Serializes audio events into a stable string for cache keying.
12
9
  * Events are sorted by time so the key is order-independent.
@@ -44,10 +41,6 @@ async function writeCacheFile(filePath, data) {
44
41
  log.warn(`[cache] Failed to write ${filePath}: ${err instanceof Error ? err.message : String(err)}`);
45
42
  }
46
43
  }
47
- const SegmentRefinementSchema = z.object({
48
- refined_start: z.number(),
49
- refined_end: z.number(),
50
- });
51
44
  /**
52
45
  * Disk-backed cache for all pipeline stages.
53
46
  *
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thunderkiller/video-clipper",
3
- "version": "1.1.0",
3
+ "version": "1.1.1",
4
4
  "description": "CLI that analyzes YouTube transcripts with an LLM to find interesting moments and cut clips",
5
5
  "license": "ISC",
6
6
  "author": "",
@@ -52,12 +52,11 @@ export async function runPipeline(args: CliArgs): Promise<void> {
52
52
 
53
53
  const cache = new Cache(config.CACHE_DIR, args.noCache);
54
54
 
55
- // ── Stage 1: Resolve video ID + metadata ─────────────────────────────────
56
55
  const { videoId, metadata } = await resolveVideo(args.url as string, args.maxDuration);
57
56
 
58
- // ── Stage 2: Download audio ───────────────────────────────────────────────
59
- // Downloaded before transcript so Whisper/Gemini transcript providers can
60
- // use the WAV. Returns null when audio detection is disabled.
57
+ /** Downloaded before transcript so Whisper/Gemini transcript providers can
58
+ * use the WAV. Returns null when audio detection is disabled.
59
+ */
61
60
  let audioPath: string | null = null;
62
61
  const audioEnabled = config.AUDIO_DETECTION_ENABLED && !args.noAudio;
63
62
  if (audioEnabled) {
@@ -69,7 +68,6 @@ export async function runPipeline(args: CliArgs): Promise<void> {
69
68
  }
70
69
  }
71
70
 
72
- // ── Stage 3: Audio event detection ───────────────────────────────────────
73
71
  const audioEvents = await processAudio(videoId, metadata.duration, cache, {
74
72
  noAudio: args.noAudio,
75
73
  gameProfile,
@@ -77,7 +75,6 @@ export async function runPipeline(args: CliArgs): Promise<void> {
77
75
  audioPath,
78
76
  });
79
77
 
80
- // ── Stage 4a: Fetch transcript + LLM analysis (informed by audio events) ──
81
78
  const { lines, microBlocks, chunkEvals } = await analyzeSegments(
82
79
  videoId,
83
80
  audioPath,
@@ -94,10 +91,8 @@ export async function runPipeline(args: CliArgs): Promise<void> {
94
91
  await dumpTranscript(videoId, lines);
95
92
  }
96
93
 
97
- // ── Stage 5: Merge signals + rank ─────────────────────────────────────────
98
94
  const rankedSegments = selectSegments(chunkEvals, audioEvents, { threshold, topN });
99
95
 
100
- // Build partial result for early-exit path (no segments above threshold)
101
96
  const partialResult: PipelineResult = {
102
97
  video_id: videoId,
103
98
  title: metadata.title,
@@ -112,13 +107,11 @@ export async function runPipeline(args: CliArgs): Promise<void> {
112
107
  return;
113
108
  }
114
109
 
115
- // ── Stage 4b: Refine clip boundaries (LLM pass 2) ─────────────────────────
116
110
  const refinedSegments = await refineRankedSegments(rankedSegments, microBlocks, cache, {
117
111
  maxParallel,
118
112
  noCache: args.noCache,
119
113
  });
120
114
 
121
- // ── Output result ─────────────────────────────────────────────────────────
122
115
  const result: PipelineResult = {
123
116
  video_id: videoId,
124
117
  title: metadata.title,
@@ -132,7 +125,6 @@ export async function runPipeline(args: CliArgs): Promise<void> {
132
125
 
133
126
  log.info('Done.');
134
127
 
135
- // ── Stage 6: Download + generate clips (only with --clip) ─────────────────
136
128
  if (!args.clip) {
137
129
  log.info('Tip: run with --clip to download the video and generate mp4 clips.');
138
130
  return;
@@ -36,7 +36,6 @@ export async function processAudio(
36
36
  const audioEnabled = config.AUDIO_DETECTION_ENABLED && !opts.noAudio;
37
37
  if (!audioEnabled) return [];
38
38
 
39
- // Cache-first
40
39
  const cached = await cache.readAudioEvents(videoId, opts.gameProfile, config.AUDIO_PROVIDER);
41
40
  if (cached) {
42
41
  log.info(`[cache hit] Audio events loaded from cache (${cached.length} events)`);
@@ -47,7 +46,6 @@ export async function processAudio(
47
46
  const audioPath =
48
47
  opts.audioPath ?? (await downloadAudio(videoId, `${config.OUTPUT_DIR}/audio`));
49
48
 
50
- // Build the analyzer chain once per run from config
51
49
  const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
52
50
  const detector = new EventDetector(chain);
53
51
 
@@ -22,7 +22,6 @@ export async function exportClips(
22
22
  segments: RankedSegment[],
23
23
  opts: ClipExporterOpts,
24
24
  ): Promise<string[]> {
25
- // Mode 1: local video already on disk — cut with ffmpeg
26
25
  if (opts.localVideo) {
27
26
  log.info(`Using local video: ${opts.localVideo}`);
28
27
  return generateClips(
@@ -34,11 +33,9 @@ export async function exportClips(
34
33
  );
35
34
  }
36
35
 
37
- // Determine yt-dlp mode
38
36
  const downloadSections = opts.downloadSections ?? config.DOWNLOAD_SECTIONS_MODE;
39
37
 
40
38
  if (typeof downloadSections === 'number') {
41
- // Mode 2: download only the top-N segments via --download-sections
42
39
  const segmentsToDownload = segments.slice(0, downloadSections);
43
40
 
44
41
  if (segmentsToDownload.length < downloadSections) {
@@ -62,7 +59,6 @@ export async function exportClips(
62
59
  return organizeClips(downloadResult.paths, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
63
60
  }
64
61
 
65
- // Mode 3: full-video download → cut clips with ffmpeg
66
62
  log.info('Downloading full video via yt-dlp...');
67
63
  const downloadResult = await downloadVideo(videoId, 'all', [], opts.videoPath);
68
64