@thunderkiller/video-clipper 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/.github/workflows/release.yml +5 -1
  2. package/CHANGELOG.md +8 -0
  3. package/dist/pipeline/runner.js +3 -11
  4. package/dist/pipeline/stages/audioProcessor.js +0 -2
  5. package/dist/pipeline/stages/clipExporter.js +0 -4
  6. package/dist/pipeline/stages/videoResolver.js +0 -3
  7. package/dist/services/audioAnalyzers/factory.js +0 -1
  8. package/dist/services/audioAnalyzers/gemini.js +6 -7
  9. package/dist/services/chunkBuilder/index.js +0 -4
  10. package/dist/services/clipRefiner/index.js +1 -1
  11. package/dist/services/llmAnalyzer/LLMAnalyzer.js +0 -2
  12. package/dist/services/transcriptDetector/index.js +0 -4
  13. package/dist/services/transcriptFetcher/index.js +2 -6
  14. package/dist/services/urlParser/index.js +0 -1
  15. package/dist/types/analyzer.js +1 -0
  16. package/dist/types/cache.js +5 -0
  17. package/dist/types/config.js +0 -23
  18. package/dist/types/downloader.js +1 -0
  19. package/dist/types/factory.js +1 -0
  20. package/dist/types/index.js +1 -0
  21. package/dist/types/pipeline.js +0 -8
  22. package/dist/types/segment.js +6 -6
  23. package/dist/types/transcript.js +6 -6
  24. package/dist/utils/cache.js +1 -8
  25. package/package.json +1 -1
  26. package/src/pipeline/runner.ts +3 -11
  27. package/src/pipeline/stages/audioProcessor.ts +0 -2
  28. package/src/pipeline/stages/clipExporter.ts +0 -4
  29. package/src/pipeline/stages/videoResolver.ts +0 -3
  30. package/src/services/audioAnalyzers/factory.ts +1 -3
  31. package/src/services/audioAnalyzers/gemini.ts +6 -7
  32. package/src/services/audioAnalyzers/index.ts +1 -1
  33. package/src/services/chunkBuilder/index.ts +0 -4
  34. package/src/services/clipRefiner/index.ts +1 -1
  35. package/src/services/llmAnalyzer/LLMAnalyzer.ts +2 -18
  36. package/src/services/transcriptAnalyzers/factory.ts +1 -2
  37. package/src/services/transcriptAnalyzers/index.ts +1 -1
  38. package/src/services/transcriptDetector/index.ts +6 -12
  39. package/src/services/transcriptFetcher/index.ts +2 -6
  40. package/src/services/urlParser/index.ts +0 -1
  41. package/src/services/videoDownloader/index.ts +1 -15
  42. package/src/types/analyzer.ts +23 -0
  43. package/src/types/cache.ts +8 -0
  44. package/src/types/config.ts +0 -23
  45. package/src/types/downloader.ts +15 -0
  46. package/src/types/factory.ts +3 -0
  47. package/src/types/index.ts +14 -0
  48. package/src/types/pipeline.ts +0 -33
  49. package/src/types/segment.ts +6 -6
  50. package/src/types/transcript.ts +6 -6
  51. package/src/utils/cache.ts +13 -12
@@ -18,7 +18,6 @@ export async function resolveVideo(
18
18
  rawUrl: string,
19
19
  maxDurationSec?: number,
20
20
  ): Promise<VideoResolverResult> {
21
- // Parse URL → video ID
22
21
  let videoId: string;
23
22
  try {
24
23
  videoId = parseUrl(rawUrl);
@@ -26,14 +25,12 @@ export async function resolveVideo(
26
25
  throw new Error(`Invalid YouTube URL: ${rawUrl}`);
27
26
  }
28
27
 
29
- // Fetch metadata (yt-dlp → oEmbed fallback)
30
28
  log.info(`Fetching metadata for ${videoId}...`);
31
29
  const metadata = await extractMetadata(videoId);
32
30
  log.info(
33
31
  `Video: "${metadata.title}" (${metadata.duration > 0 ? formatSeconds(metadata.duration) : 'duration unknown'})`,
34
32
  );
35
33
 
36
- // --max-duration guard
37
34
  if (maxDurationSec !== undefined && metadata.duration > 0) {
38
35
  if (metadata.duration > maxDurationSec) {
39
36
  throw new Error(
@@ -3,8 +3,7 @@ import { AudioAnalyzer } from './base.js';
3
3
  import { GeminiAudioAnalyzer } from './gemini.js';
4
4
  import { WhisperAudioAnalyzer } from './whisper.js';
5
5
  import { YAMNetAudioAnalyzer } from './yamnet.js';
6
-
7
- export type AudioProviderName = 'gemini' | 'whisper' | 'yamnet';
6
+ import type { AudioProviderName } from '../../types/index.js';
8
7
 
9
8
  const KNOWN_PROVIDERS = new Set<AudioProviderName>(['gemini', 'whisper', 'yamnet']);
10
9
 
@@ -17,7 +16,6 @@ const KNOWN_PROVIDERS = new Set<AudioProviderName>(['gemini', 'whisper', 'yamnet
17
16
  * Backward-compat: "both" is mapped to ['gemini', 'whisper'] with a deprecation warning.
18
17
  */
19
18
  export function parseProviderChain(providerString: string): AudioProviderName[] {
20
- // Backward compatibility: map legacy 'both' to the new comma-separated form
21
19
  if (providerString.trim() === 'both') {
22
20
  log.warn(
23
21
  '[audio] AUDIO_PROVIDER=both is deprecated. Use AUDIO_PROVIDER=gemini,whisper instead.',
@@ -6,12 +6,14 @@ import { log } from '../../utils/logger.js';
6
6
  import type { AudioEvent } from '../../types/index.js';
7
7
  import { AudioAnalyzer } from './base.js';
8
8
 
9
+ /**
10
+ * Gemini returns timestamps inconsistently as either:
11
+ * - MM.SS notation: 1.03 = 1 min 3 sec = 63s
12
+ * - True decimal seconds: 53.403 = 53.403s
13
+ * Use normalizeGeminiTime() to resolve correct value.
14
+ */
9
15
  const GeminiEventSchema = z.array(
10
16
  z.object({
11
- // Gemini inconsistently returns timestamps in either:
12
- // - MM.SS notation: 1.03 = 1 min 3 sec = 63s
13
- // - True decimal seconds: 53.403 = 53.403s
14
- // Use normalizeGeminiTime() to resolve the correct value.
15
17
  time_sec: z.number(),
16
18
  event: z.string(),
17
19
  confidence: z.number().min(0).max(1),
@@ -56,18 +58,15 @@ function mmssToSeconds(value: number): number {
56
58
  export function normalizeGeminiTime(value: number, chunkDurationSec: number): number {
57
59
  const frac = value % 1;
58
60
 
59
- // Fractional part > 0.59 is impossible in MM.SS — must be decimal seconds
60
61
  if (Math.round(frac * 100) > 59) {
61
62
  return value;
62
63
  }
63
64
 
64
- // Fractional part ≤ 0.59: could be MM.SS — check if converted value fits in chunk
65
65
  const mmss = mmssToSeconds(value);
66
66
  if (mmss < chunkDurationSec) {
67
67
  return mmss;
68
68
  }
69
69
 
70
- // MM.SS conversion overflows the chunk — must be true decimal seconds
71
70
  return value;
72
71
  }
73
72
 
@@ -3,4 +3,4 @@ export { GeminiAudioAnalyzer, normalizeGeminiTime } from './gemini.js';
3
3
  export { WhisperAudioAnalyzer, getPythonBin } from './whisper.js';
4
4
  export { YAMNetAudioAnalyzer } from './yamnet.js';
5
5
  export { createAnalyzerChain, parseProviderChain } from './factory.js';
6
- export type { AudioProviderName } from './factory.js';
6
+ export type { AudioProviderName } from '../../types/index.js';
@@ -15,20 +15,17 @@ export function buildMicroBlocks(lines: TranscriptLine[], windowSec: number): Mi
15
15
 
16
16
  for (const line of lines) {
17
17
  if (line.start >= windowStart + windowSec) {
18
- // Flush current block
19
18
  blocks.push({
20
19
  start: windowStart,
21
20
  end: line.start,
22
21
  text: texts.join(' '),
23
22
  });
24
- // Start a new window aligned to the current line
25
23
  windowStart = line.start;
26
24
  texts = [];
27
25
  }
28
26
  texts.push(line.text);
29
27
  }
30
28
 
31
- // Flush the final block
32
29
  if (texts.length > 0) {
33
30
  const lastLine = lines[lines.length - 1];
34
31
  blocks.push({
@@ -78,7 +75,6 @@ export function buildLLMChunks(
78
75
  const step = chunkLen - overlap;
79
76
  chunkStart += step;
80
77
 
81
- // Guard: if overlap >= chunkLen we'd loop forever
82
78
  if (step <= 0) break;
83
79
  }
84
80
 
@@ -92,7 +92,7 @@ async function refineSegment(
92
92
  maxRetries: config.LLM_MAX_RETRIES,
93
93
  });
94
94
 
95
- // Clamp to the context window to ensure LLM doesn't hallucinate out-of-range values
95
+ /** Clamp to context window to prevent LLM from hallucinating out-of-range values */
96
96
  const refinedStart = Math.max(windowStart, Math.min(object.clip_start, object.clip_end - 1));
97
97
  const refinedEnd = Math.min(windowEnd, Math.max(object.clip_end, object.clip_start + 1));
98
98
 
@@ -11,24 +11,10 @@ import type {
11
11
  AudioEvent,
12
12
  ChunkEvaluation,
13
13
  RankedSegment,
14
+ LLMAnalyzerResult,
15
+ LLMAnalyzerOpts,
14
16
  } from '../../types/index.js';
15
17
 
16
- export interface LLMAnalyzerResult {
17
- lines: TranscriptLine[];
18
- microBlocks: MicroBlock[];
19
- chunks: LLMChunk[];
20
- chunkEvals: ChunkEvaluation[];
21
- }
22
-
23
- export interface LLMAnalyzerOpts {
24
- videoId: string;
25
- audioPath: string | null;
26
- audioEvents: AudioEvent[];
27
- maxChunks?: number;
28
- maxParallel: number;
29
- noCache: boolean;
30
- }
31
-
32
18
  /**
33
19
  * LLMAnalyzer — orchestrates transcript fetching + LLM-based segment analysis.
34
20
  *
@@ -62,14 +48,12 @@ export class LLMAnalyzer {
62
48
  * everything needed for the ranking step.
63
49
  */
64
50
  async analyze(opts: LLMAnalyzerOpts): Promise<LLMAnalyzerResult> {
65
- // ── Transcript ────────────────────────────────────────────────────────────
66
51
  const { lines, microBlocks, chunks } = await this.transcriptDetector.detect(
67
52
  opts.videoId,
68
53
  opts.audioPath,
69
54
  this.cache,
70
55
  );
71
56
 
72
- // ── LLM pass 1 ────────────────────────────────────────────────────────────
73
57
  const chunkLimit = opts.maxChunks ?? config.MAX_CHUNKS;
74
58
  const chunksToAnalyze = chunkLimit !== undefined ? chunks.slice(0, chunkLimit) : chunks;
75
59
 
@@ -3,8 +3,7 @@ import { TranscriptAnalyzer } from './base.js';
3
3
  import { YtDlpTranscriptAnalyzer } from './ytdlp.js';
4
4
  import { WhisperTranscriptAnalyzer } from './whisper.js';
5
5
  import { GeminiTranscriptAnalyzer } from './gemini.js';
6
-
7
- export type TranscriptProviderName = 'ytdlp' | 'whisper' | 'gemini';
6
+ import type { TranscriptProviderName } from '../../types/index.js';
8
7
 
9
8
  const KNOWN_PROVIDERS = new Set<TranscriptProviderName>(['ytdlp', 'whisper', 'gemini']);
10
9
 
@@ -3,4 +3,4 @@ export { YtDlpTranscriptAnalyzer } from './ytdlp.js';
3
3
  export { WhisperTranscriptAnalyzer } from './whisper.js';
4
4
  export { GeminiTranscriptAnalyzer } from './gemini.js';
5
5
  export { createTranscriptChain, parseTranscriptProviderChain } from './factory.js';
6
- export type { TranscriptProviderName } from './factory.js';
6
+ export type { TranscriptProviderName } from '../../types/index.js';
@@ -3,13 +3,12 @@ import { log } from '../../utils/logger.js';
3
3
  import { config } from '../../config/index.js';
4
4
  import type { TranscriptAnalyzer } from '../transcriptAnalyzers/index.js';
5
5
  import type { Cache } from '../../utils/cache.js';
6
- import type { TranscriptLine, MicroBlock, LLMChunk } from '../../types/index.js';
7
-
8
- export interface TranscriptDetectorResult {
9
- lines: TranscriptLine[];
10
- microBlocks: MicroBlock[];
11
- chunks: LLMChunk[];
12
- }
6
+ import type {
7
+ TranscriptLine,
8
+ MicroBlock,
9
+ LLMChunk,
10
+ TranscriptDetectorResult,
11
+ } from '../../types/index.js';
13
12
 
14
13
  /**
15
14
  * Top-level transcript detector.
@@ -60,7 +59,6 @@ export class TranscriptDetector {
60
59
  ): Promise<TranscriptDetectorResult> {
61
60
  let lines: TranscriptLine[];
62
61
 
63
- // Cache-first: if we already have lines on disk, skip the provider chain entirely
64
62
  const cached = await cache.readTranscript(videoId);
65
63
  if (cached) {
66
64
  log.info(`[cache hit] Transcript loaded from cache (${cached.length} lines)`);
@@ -76,10 +74,6 @@ export class TranscriptDetector {
76
74
  return { lines, microBlocks, chunks };
77
75
  }
78
76
 
79
- // -------------------------------------------------------------------------
80
- // Private helpers
81
- // -------------------------------------------------------------------------
82
-
83
77
  /**
84
78
  * Walks the analyzer chain in order.
85
79
  * Falls back to the next analyzer whenever one throws.
@@ -20,7 +20,7 @@ export function parseVtt(vttContent: string): TranscriptLine[] {
20
20
  const lines = vttContent.split(/\r?\n/);
21
21
  const result: TranscriptLine[] = [];
22
22
 
23
- // Regex: HH:MM:SS.mmm --> HH:MM:SS.mmm (optional positioning metadata after)
23
+ /** Regex to match HH:MM:SS.mmm --> HH:MM:SS.mmm timestamp lines */
24
24
  const TIMESTAMP_RE =
25
25
  /^(\d{2}):(\d{2}):(\d{2})[.,](\d{3})\s+-->\s+(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/;
26
26
 
@@ -42,7 +42,6 @@ export function parseVtt(vttContent: string): TranscriptLine[] {
42
42
  parseInt(match[7], 10) +
43
43
  parseInt(match[8], 10) / 1000;
44
44
 
45
- // Collect cue text lines until blank line or EOF
46
45
  i++;
47
46
  const textLines: string[] = [];
48
47
  while (i < lines.length && lines[i].trim() !== '') {
@@ -52,7 +51,6 @@ export function parseVtt(vttContent: string): TranscriptLine[] {
52
51
 
53
52
  const rawText = textLines.join(' ');
54
53
 
55
- // Strip VTT inline tags: <00:00:00.000>, <c>, </c>, <b>, </b>, <i>, </i>, etc.
56
54
  const text = rawText
57
55
  .replace(/<[^>]+>/g, '')
58
56
  .replace(/&amp;/g, '&')
@@ -68,8 +66,7 @@ export function parseVtt(vttContent: string): TranscriptLine[] {
68
66
 
69
67
  const duration = Math.max(0, endSec - startSec);
70
68
 
71
- // Deduplicate: skip if this cue text is identical to the previous one
72
- // (YouTube VTT often repeats the same line as text scrolls)
69
+ /** Skip duplicate cues - YouTube VTT often repeats same line as text scrolls */
73
70
  if (result.length > 0 && result[result.length - 1].text === text) {
74
71
  continue;
75
72
  }
@@ -125,7 +122,6 @@ export async function fetchTranscript(videoId: string): Promise<TranscriptLine[]
125
122
  throw new Error(`yt-dlp failed to fetch subtitles for "${videoId}": ${message}`);
126
123
  }
127
124
 
128
- // Find the downloaded .vtt file (yt-dlp names it <id>.<lang>.vtt)
129
125
  const files = await fs.readdir(tmpDir);
130
126
  const vttFile = files.find((f) => f.endsWith('.vtt'));
131
127
 
@@ -40,7 +40,6 @@ export function parseUrl(url: string): string {
40
40
  throw new Error(`Could not extract video ID from URL: "${url}"`);
41
41
  }
42
42
 
43
- // Strip any extra query params that may have been part of the path segment
44
43
  videoId = videoId.split('?')[0];
45
44
 
46
45
  if (videoId.length !== VIDEO_ID_LENGTH) {
@@ -4,21 +4,7 @@ import { join } from 'path';
4
4
  import pLimit from 'p-limit';
5
5
  import { config } from '../../config/index.js';
6
6
  import { log } from '../../utils/logger.js';
7
- import type { RankedSegment } from '../../types/index.js';
8
-
9
- export type DownloadMode = 'all' | 'segments';
10
-
11
- export interface DownloadResultAll {
12
- mode: 'all';
13
- path: string;
14
- }
15
-
16
- export interface DownloadResultSegments {
17
- mode: 'segments';
18
- paths: string[];
19
- }
20
-
21
- export type DownloadResult = DownloadResultAll | DownloadResultSegments;
7
+ import type { RankedSegment, DownloadMode, DownloadResult } from '../../types/index.js';
22
8
 
23
9
  /**
24
10
  * Formats a timestamp for yt-dlp --download-sections.
@@ -0,0 +1,23 @@
1
+ import type { TranscriptLine, MicroBlock, LLMChunk, ChunkEvaluation } from './index.js';
2
+
3
+ export interface LLMAnalyzerResult {
4
+ lines: TranscriptLine[];
5
+ microBlocks: MicroBlock[];
6
+ chunks: LLMChunk[];
7
+ chunkEvals: ChunkEvaluation[];
8
+ }
9
+
10
+ export interface LLMAnalyzerOpts {
11
+ videoId: string;
12
+ audioPath: string | null;
13
+ audioEvents: import('./audio.js').AudioEvent[];
14
+ maxChunks?: number;
15
+ maxParallel: number;
16
+ noCache: boolean;
17
+ }
18
+
19
+ export interface TranscriptDetectorResult {
20
+ lines: TranscriptLine[];
21
+ microBlocks: MicroBlock[];
22
+ chunks: LLMChunk[];
23
+ }
@@ -0,0 +1,8 @@
1
+ import { z } from 'zod';
2
+
3
+ export const SegmentRefinementSchema = z.object({
4
+ refined_start: z.number(),
5
+ refined_end: z.number(),
6
+ });
7
+
8
+ export type SegmentRefinement = z.infer<typeof SegmentRefinementSchema>;
@@ -14,7 +14,6 @@ const LLM_PROVIDERS = [
14
14
 
15
15
  export type LLMProvider = (typeof LLM_PROVIDERS)[number];
16
16
 
17
- /** Map each provider to the env var name that holds its API key. */
18
17
  const PROVIDER_KEY_MAP: Record<LLMProvider, string> = {
19
18
  openai: 'OPENAI_API_KEY',
20
19
  anthropic: 'ANTHROPIC_API_KEY',
@@ -29,10 +28,8 @@ const PROVIDER_KEY_MAP: Record<LLMProvider, string> = {
29
28
 
30
29
  export const ConfigSchema = z
31
30
  .object({
32
- // --- Provider selection ---
33
31
  LLM_PROVIDER: z.enum(LLM_PROVIDERS).default('openai'),
34
32
 
35
- // --- Per-provider API keys (all optional at schema level; enforced via superRefine) ---
36
33
  OPENAI_API_KEY: z.string().optional(),
37
34
  ANTHROPIC_API_KEY: z.string().optional(),
38
35
  GOOGLE_GENERATIVE_AI_API_KEY: z.string().optional(),
@@ -44,7 +41,6 @@ export const ConfigSchema = z
44
41
  CUSTOM_OPENAI_API_KEY: z.string().optional(),
45
42
  CUSTOM_OPENAI_BASE_URL: z.string().url().optional(),
46
43
 
47
- // --- Tunable parameters ---
48
44
  SCORE_THRESHOLD: z.coerce.number().min(1).max(10).default(7),
49
45
  TOP_N_SEGMENTS: z.coerce.number().min(1).default(10),
50
46
  CHUNK_LENGTH_SEC: z.coerce.number().min(10).default(120),
@@ -55,32 +51,20 @@ export const ConfigSchema = z
55
51
  DOWNLOAD_DIR: z.string().default('downloads/'),
56
52
  OUTPUT_DIR: z.string().default('outputs/'),
57
53
  CACHE_DIR: z.string().default('outputs/cache'),
58
- // --- Output dumping ---
59
54
  DUMP_OUTPUTS: z.coerce.boolean().default(true),
60
- // --- LLM evaluation limits ---
61
55
  MAX_CHUNKS: z.coerce.number().min(1).optional(),
62
56
  LLM_CONCURRENCY: z.coerce.number().min(1).default(3),
63
57
  CLIP_CONCURRENCY: z.coerce.number().min(1).default(1),
64
- // --- Custom system prompt (overrides the default if set) ---
65
58
  LLM_SYSTEM_PROMPT: z.string().optional(),
66
- // --- Gemini model used for audio event detection ---
67
59
  AUDIO_GEMINI_MODEL: z.string().default('gemini-2.5-flash'),
68
- // --- Extra instructions appended to the Gemini audio detection prompt ---
69
60
  AUDIO_EXTRA_INSTRUCTIONS: z.string().optional(),
70
- // --- Download mode for yt-dlp ---
71
61
  DOWNLOAD_SECTIONS_MODE: z.union([z.literal('all'), z.number().int().positive()]).default('all'),
72
- // --- FFmpeg paths (optional, for custom ffmpeg/ffprobe locations) ---
73
62
  FFMPEG_PATH: z.string().optional(),
74
63
  FFPROBE_PATH: z.string().optional(),
75
- // --- FFmpeg encoding preset for clip generation ---
76
64
  FFMPEG_PRESET: z
77
65
  .enum(['ultrafast', 'superfast', 'veryfast', 'fast', 'medium', 'slow', 'slower'])
78
66
  .default('fast'),
79
- // --- Timestamp offset for clips (adjusts if transcript is misaligned with video) ---
80
67
  TIMESTAMP_OFFSET_SECONDS: z.coerce.number().default(0),
81
- // --- Transcript provider ---
82
- // Comma-separated ordered fallback chain: "ytdlp" | "whisper" | "ytdlp,whisper" etc.
83
- // First provider that succeeds wins; subsequent providers are tried only on failure.
84
68
  TRANSCRIPT_PROVIDER: z
85
69
  .string()
86
70
  .default('ytdlp')
@@ -97,10 +81,7 @@ export const ConfigSchema = z
97
81
  'TRANSCRIPT_PROVIDER must be a comma-separated list of: ytdlp, whisper, gemini (e.g. "ytdlp")',
98
82
  },
99
83
  ),
100
- // --- Audio event detection ---
101
84
  AUDIO_DETECTION_ENABLED: z.coerce.boolean().default(true),
102
- // Comma-separated ordered fallback chain: "gemini,whisper" | "yamnet" | "gemini" etc.
103
- // Legacy value "both" is accepted and mapped to "gemini,whisper" at runtime.
104
85
  AUDIO_PROVIDER: z
105
86
  .string()
106
87
  .default('gemini,whisper')
@@ -127,9 +108,7 @@ export const ConfigSchema = z
127
108
  AUDIO_CLIP_POST_ROLL: z.coerce.number().min(0).default(15),
128
109
  AUDIO_LLM_BOOST_WINDOW: z.coerce.number().min(0).default(10),
129
110
  AUDIO_LLM_SCORE_BOOST: z.coerce.number().min(0).default(2),
130
- // --- Game profile ---
131
111
  GAME_PROFILE: z.enum(['valorant', 'fps', 'boss_fight', 'general']).default('general'),
132
- // --- yt-dlp cookie support (for bot detection / auth) ---
133
112
  YT_DLP_COOKIES_FROM_BROWSER: z
134
113
  .enum(['chrome', 'firefox', 'safari', 'brave', 'edge', 'opera', 'chromium'])
135
114
  .optional(),
@@ -148,7 +127,6 @@ export const ConfigSchema = z
148
127
  });
149
128
  }
150
129
 
151
- // custom provider also requires a base URL
152
130
  if (
153
131
  provider === 'custom' &&
154
132
  (!data.CUSTOM_OPENAI_BASE_URL || data.CUSTOM_OPENAI_BASE_URL.trim() === '')
@@ -160,7 +138,6 @@ export const ConfigSchema = z
160
138
  });
161
139
  }
162
140
 
163
- // Validate cookie config: only one method allowed at a time
164
141
  if (data.YT_DLP_COOKIES_FROM_BROWSER && data.YT_DLP_COOKIES_FILE) {
165
142
  ctx.addIssue({
166
143
  code: z.ZodIssueCode.custom,
@@ -0,0 +1,15 @@
1
+ import type { RankedSegment } from './index.js';
2
+
3
+ export type DownloadMode = 'all' | 'segments';
4
+
5
+ export interface DownloadResultAll {
6
+ mode: 'all';
7
+ path: string;
8
+ }
9
+
10
+ export interface DownloadResultSegments {
11
+ mode: 'segments';
12
+ paths: string[];
13
+ }
14
+
15
+ export type DownloadResult = DownloadResultAll | DownloadResultSegments;
@@ -0,0 +1,3 @@
1
+ export type TranscriptProviderName = 'ytdlp' | 'whisper' | 'gemini';
2
+
3
+ export type AudioProviderName = 'gemini' | 'whisper' | 'yamnet';
@@ -24,3 +24,17 @@ export type {
24
24
  SegmentSelectorOpts,
25
25
  ClipExporterOpts,
26
26
  } from './pipeline.js';
27
+
28
+ export type { LLMAnalyzerResult, LLMAnalyzerOpts, TranscriptDetectorResult } from './analyzer.js';
29
+
30
+ export type {
31
+ DownloadMode,
32
+ DownloadResultAll,
33
+ DownloadResultSegments,
34
+ DownloadResult,
35
+ } from './downloader.js';
36
+
37
+ export { SegmentRefinementSchema } from './cache.js';
38
+ export type { SegmentRefinement } from './cache.js';
39
+
40
+ export type { TranscriptProviderName, AudioProviderName } from './factory.js';
@@ -1,12 +1,3 @@
1
- /**
2
- * Pipeline-layer types: stage option bags, result shapes, and the generic
3
- * time-window interface from the chunker utility.
4
- *
5
- * All types here are owned by one pipeline stage but live centrally so the
6
- * runner (and any future consumers) can import them without reaching into
7
- * individual stage files.
8
- */
9
-
10
1
  import type {
11
2
  VideoMetadata,
12
3
  TranscriptLine,
@@ -15,10 +6,6 @@ import type {
15
6
  ChunkEvaluation,
16
7
  } from './index.js';
17
8
 
18
- // ---------------------------------------------------------------------------
19
- // Chunker utility
20
- // ---------------------------------------------------------------------------
21
-
22
9
  /** A half-open time window [start, end) in seconds. Returned by `buildWindows`. */
23
10
  export interface ChunkWindow {
24
11
  /** Start of the window in seconds (inclusive). */
@@ -27,19 +14,11 @@ export interface ChunkWindow {
27
14
  end: number;
28
15
  }
29
16
 
30
- // ---------------------------------------------------------------------------
31
- // Stage 1 — Video Resolver
32
- // ---------------------------------------------------------------------------
33
-
34
17
  export interface VideoResolverResult {
35
18
  videoId: string;
36
19
  metadata: VideoMetadata;
37
20
  }
38
21
 
39
- // ---------------------------------------------------------------------------
40
- // Stage 3 — Audio Processor
41
- // ---------------------------------------------------------------------------
42
-
43
22
  export interface AudioProcessorOpts {
44
23
  noAudio: boolean;
45
24
  gameProfile: string;
@@ -48,10 +27,6 @@ export interface AudioProcessorOpts {
48
27
  audioPath?: string | null;
49
28
  }
50
29
 
51
- // ---------------------------------------------------------------------------
52
- // Stage 4a + 4b — Segment Analyzer / Refiner
53
- // ---------------------------------------------------------------------------
54
-
55
30
  export interface SegmentAnalyzerOpts {
56
31
  maxChunks?: number;
57
32
  maxParallel: number;
@@ -65,19 +40,11 @@ export interface SegmentAnalyzerResult {
65
40
  chunkEvals: ChunkEvaluation[];
66
41
  }
67
42
 
68
- // ---------------------------------------------------------------------------
69
- // Stage 5 — Segment Selector
70
- // ---------------------------------------------------------------------------
71
-
72
43
  export interface SegmentSelectorOpts {
73
44
  threshold: number;
74
45
  topN: number;
75
46
  }
76
47
 
77
- // ---------------------------------------------------------------------------
78
- // Stage 6 — Clip Exporter
79
- // ---------------------------------------------------------------------------
80
-
81
48
  export interface ClipExporterOpts {
82
49
  /** Path to a pre-existing local video file. Skips yt-dlp download entirely. */
83
50
  localVideo?: string;
@@ -4,15 +4,15 @@ export const AnalyzedSegmentSchema = z.object({
4
4
  interesting: z.boolean(),
5
5
  score: z.number().min(1).max(10),
6
6
  reason: z.string(),
7
- clip_start: z.number(), // seconds
8
- clip_end: z.number(), // seconds
7
+ clip_start: z.number(),
8
+ clip_end: z.number(),
9
9
  });
10
10
  export type AnalyzedSegment = z.infer<typeof AnalyzedSegmentSchema>;
11
11
 
12
12
  export const RankedSegmentSchema = z.object({
13
13
  rank: z.number().int().min(1),
14
- start: z.number(), // seconds
15
- end: z.number(), // seconds
14
+ start: z.number(),
15
+ end: z.number(),
16
16
  score: z.number().min(1).max(10),
17
17
  reason: z.string(),
18
18
  source: z.enum(['transcript', 'audio', 'both']),
@@ -22,8 +22,8 @@ export type RankedSegment = z.infer<typeof RankedSegmentSchema>;
22
22
 
23
23
  const ChunkEvaluationBaseSchema = z.object({
24
24
  chunk_index: z.number().int().min(0),
25
- chunk_start: z.number(), // seconds
26
- chunk_end: z.number(), // seconds
25
+ chunk_start: z.number(),
26
+ chunk_end: z.number(),
27
27
  });
28
28
 
29
29
  export const ChunkEvaluationSchema = z.discriminatedUnion('status', [
@@ -2,21 +2,21 @@ import { z } from 'zod';
2
2
 
3
3
  export const TranscriptLineSchema = z.object({
4
4
  text: z.string(),
5
- start: z.number(), // seconds (normalized from offset ms)
6
- duration: z.number(), // seconds (normalized from duration ms)
5
+ start: z.number(),
6
+ duration: z.number(),
7
7
  });
8
8
  export type TranscriptLine = z.infer<typeof TranscriptLineSchema>;
9
9
 
10
10
  export const MicroBlockSchema = z.object({
11
- start: z.number(), // seconds
12
- end: z.number(), // seconds
11
+ start: z.number(),
12
+ end: z.number(),
13
13
  text: z.string(),
14
14
  });
15
15
  export type MicroBlock = z.infer<typeof MicroBlockSchema>;
16
16
 
17
17
  export const LLMChunkSchema = z.object({
18
- start: z.number(), // seconds
19
- end: z.number(), // seconds
18
+ start: z.number(),
19
+ end: z.number(),
20
20
  text: z.string(),
21
21
  });
22
22
  export type LLMChunk = z.infer<typeof LLMChunkSchema>;
@@ -3,12 +3,19 @@ import { promises as fs } from 'fs';
3
3
  import path from 'path';
4
4
  import { z } from 'zod';
5
5
  import { log } from './logger.js';
6
- import { TranscriptLineSchema, ChunkEvaluationSchema, AudioEventSchema } from '../types/index.js';
7
- import type { TranscriptLine, LLMChunk, ChunkEvaluation, AudioEvent } from '../types/index.js';
8
-
9
- // ---------------------------------------------------------------------------
10
- // Internal cache-key helpers
11
- // ---------------------------------------------------------------------------
6
+ import {
7
+ TranscriptLineSchema,
8
+ ChunkEvaluationSchema,
9
+ AudioEventSchema,
10
+ SegmentRefinementSchema,
11
+ } from '../types/index.js';
12
+ import type {
13
+ TranscriptLine,
14
+ LLMChunk,
15
+ ChunkEvaluation,
16
+ AudioEvent,
17
+ SegmentRefinement,
18
+ } from '../types/index.js';
12
19
 
13
20
  /**
14
21
  * Serializes audio events into a stable string for cache keying.
@@ -50,12 +57,6 @@ async function writeCacheFile(filePath: string, data: unknown): Promise<void> {
50
57
  }
51
58
  }
52
59
 
53
- const SegmentRefinementSchema = z.object({
54
- refined_start: z.number(),
55
- refined_end: z.number(),
56
- });
57
- type SegmentRefinement = z.infer<typeof SegmentRefinementSchema>;
58
-
59
60
  /**
60
61
  * Disk-backed cache for all pipeline stages.
61
62
  *