@thunderkiller/video-clipper 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.env.example +130 -0
  2. package/.github/workflows/ci.yml +42 -0
  3. package/.github/workflows/release.yml +72 -0
  4. package/.husky/pre-commit +3 -0
  5. package/.prettierignore +6 -0
  6. package/.prettierrc +7 -0
  7. package/.releaserc.json +21 -0
  8. package/AGENTS.md +122 -0
  9. package/CHANGELOG.md +45 -0
  10. package/README.md +410 -0
  11. package/dist/cli.js +187 -0
  12. package/dist/config/env.js +14 -0
  13. package/dist/config/index.js +1 -0
  14. package/dist/index.js +35 -0
  15. package/dist/pipeline/runner.js +132 -0
  16. package/dist/pipeline/stages/audioProcessor.js +75 -0
  17. package/dist/pipeline/stages/clipExporter.js +44 -0
  18. package/dist/pipeline/stages/segmentAnalyzer.js +46 -0
  19. package/dist/pipeline/stages/segmentSelector.js +23 -0
  20. package/dist/pipeline/stages/videoResolver.js +34 -0
  21. package/dist/services/audioAnalyzers/base.js +13 -0
  22. package/dist/services/audioAnalyzers/factory.js +56 -0
  23. package/dist/services/audioAnalyzers/gemini.js +109 -0
  24. package/dist/services/audioAnalyzers/index.js +5 -0
  25. package/dist/services/audioAnalyzers/whisper.js +62 -0
  26. package/dist/services/audioAnalyzers/yamnet.js +40 -0
  27. package/dist/services/audioDownloader/index.js +81 -0
  28. package/dist/services/chunkBuilder/index.js +71 -0
  29. package/dist/services/clipGenerator/index.js +156 -0
  30. package/dist/services/clipRefiner/index.js +103 -0
  31. package/dist/services/eventDetector/index.js +54 -0
  32. package/dist/services/llmAnalyzer/LLMAnalyzer.js +63 -0
  33. package/dist/services/llmAnalyzer/index.js +173 -0
  34. package/dist/services/metadataExtractor/index.js +66 -0
  35. package/dist/services/segmentRanker/index.js +40 -0
  36. package/dist/services/signalMerger/index.js +36 -0
  37. package/dist/services/transcriptAnalyzers/base.js +13 -0
  38. package/dist/services/transcriptAnalyzers/factory.js +51 -0
  39. package/dist/services/transcriptAnalyzers/gemini.js +19 -0
  40. package/dist/services/transcriptAnalyzers/index.js +5 -0
  41. package/dist/services/transcriptAnalyzers/whisper.js +55 -0
  42. package/dist/services/transcriptAnalyzers/ytdlp.js +16 -0
  43. package/dist/services/transcriptDetector/index.js +102 -0
  44. package/dist/services/transcriptFetcher/index.js +124 -0
  45. package/dist/services/urlParser/index.js +46 -0
  46. package/dist/services/videoDownloader/index.js +212 -0
  47. package/dist/types/audio.js +15 -0
  48. package/dist/types/cli.js +1 -0
  49. package/dist/types/config.js +150 -0
  50. package/dist/types/index.js +5 -0
  51. package/dist/types/pipeline.js +9 -0
  52. package/dist/types/segment.js +36 -0
  53. package/dist/types/transcript.js +16 -0
  54. package/dist/types/video.js +14 -0
  55. package/dist/utils/cache.js +143 -0
  56. package/dist/utils/chunker.js +51 -0
  57. package/dist/utils/dumper.js +36 -0
  58. package/dist/utils/format.js +10 -0
  59. package/dist/utils/logger.js +16 -0
  60. package/dist/utils/modelFactory.js +60 -0
  61. package/dist/utils/redactConfig.js +20 -0
  62. package/dist/utils/sliceAudio.js +26 -0
  63. package/docs/free-models.md +78 -0
  64. package/docs/plan.md +442 -0
  65. package/docs/refactorPhases.md +105 -0
  66. package/docs/yt-downloader.md +440 -0
  67. package/package.json +65 -0
  68. package/requirements.txt +5 -0
  69. package/scripts/detect_events.py +81 -0
  70. package/scripts/detect_events_whisper.py +101 -0
  71. package/scripts/transcribe_whisper.py +70 -0
  72. package/src/cli.ts +186 -0
  73. package/src/config/env.ts +18 -0
  74. package/src/config/index.ts +2 -0
  75. package/src/index.ts +46 -0
  76. package/src/pipeline/runner.ts +155 -0
  77. package/src/pipeline/stages/audioProcessor.ts +129 -0
  78. package/src/pipeline/stages/clipExporter.ts +80 -0
  79. package/src/pipeline/stages/segmentAnalyzer.ts +72 -0
  80. package/src/pipeline/stages/segmentSelector.ts +39 -0
  81. package/src/pipeline/stages/videoResolver.ts +47 -0
  82. package/src/services/audioAnalyzers/base.ts +32 -0
  83. package/src/services/audioAnalyzers/factory.ts +71 -0
  84. package/src/services/audioAnalyzers/gemini.ts +137 -0
  85. package/src/services/audioAnalyzers/index.ts +6 -0
  86. package/src/services/audioAnalyzers/whisper.ts +80 -0
  87. package/src/services/audioAnalyzers/yamnet.ts +54 -0
  88. package/src/services/audioDownloader/index.ts +102 -0
  89. package/src/services/chunkBuilder/index.ts +86 -0
  90. package/src/services/clipGenerator/index.ts +210 -0
  91. package/src/services/clipRefiner/index.ts +141 -0
  92. package/src/services/eventDetector/index.ts +68 -0
  93. package/src/services/llmAnalyzer/LLMAnalyzer.ts +114 -0
  94. package/src/services/llmAnalyzer/index.ts +231 -0
  95. package/src/services/metadataExtractor/index.ts +83 -0
  96. package/src/services/segmentRanker/index.ts +88 -0
  97. package/src/services/signalMerger/index.ts +53 -0
  98. package/src/services/transcriptAnalyzers/base.ts +26 -0
  99. package/src/services/transcriptAnalyzers/factory.ts +67 -0
  100. package/src/services/transcriptAnalyzers/gemini.ts +24 -0
  101. package/src/services/transcriptAnalyzers/index.ts +6 -0
  102. package/src/services/transcriptAnalyzers/whisper.ts +68 -0
  103. package/src/services/transcriptAnalyzers/ytdlp.ts +19 -0
  104. package/src/services/transcriptDetector/index.ts +128 -0
  105. package/src/services/transcriptFetcher/index.ts +151 -0
  106. package/src/services/urlParser/index.ts +53 -0
  107. package/src/services/videoDownloader/index.ts +282 -0
  108. package/src/types/audio.ts +19 -0
  109. package/src/types/cli.ts +22 -0
  110. package/src/types/config.ts +174 -0
  111. package/src/types/index.ts +26 -0
  112. package/src/types/pipeline.ts +93 -0
  113. package/src/types/segment.ts +43 -0
  114. package/src/types/transcript.ts +22 -0
  115. package/src/types/video.ts +18 -0
  116. package/src/utils/cache.ts +223 -0
  117. package/src/utils/chunker.ts +60 -0
  118. package/src/utils/dumper.ts +41 -0
  119. package/src/utils/format.ts +10 -0
  120. package/src/utils/logger.ts +17 -0
  121. package/src/utils/modelFactory.ts +71 -0
  122. package/src/utils/redactConfig.ts +23 -0
  123. package/src/utils/sliceAudio.ts +35 -0
  124. package/test-trigger.txt +1 -0
  125. package/tests/analyzerFactory.test.ts +146 -0
  126. package/tests/audioEventDetector.test.ts +69 -0
  127. package/tests/cache.test.ts +203 -0
  128. package/tests/chunkBuilder.test.ts +146 -0
  129. package/tests/chunker.test.ts +95 -0
  130. package/tests/eventDetector.test.ts +103 -0
  131. package/tests/llmAnalyzer.test.ts +283 -0
  132. package/tests/segmentRanker.test.ts +133 -0
  133. package/tests/setup.ts +48 -0
  134. package/tests/signalMerger.test.ts +197 -0
  135. package/tests/transcriptDetector.test.ts +150 -0
  136. package/tests/transcriptFetcher.test.ts +179 -0
  137. package/tests/urlParser.test.ts +70 -0
  138. package/tsconfig.json +16 -0
  139. package/tsconfig.test.json +8 -0
  140. package/vitest.config.ts +8 -0
@@ -0,0 +1,70 @@
1
+ """
2
+ transcribe_whisper.py — Full Whisper transcription for transcript generation.
3
+
4
+ Runs OpenAI Whisper on the provided audio file and writes a JSON array of
5
+ transcript segments to stdout:
6
+
7
+ [{"text": "...", "start": 0.0, "duration": 3.5}, ...]
8
+
9
+ Usage:
10
+ python transcribe_whisper.py <audio_path> [model_size]
11
+
12
+ Arguments:
13
+ audio_path - Path to the audio WAV file
14
+ model_size - Whisper model to use (default: medium)
15
+ Options: tiny, base, small, medium, large-v3
16
+
17
+ Requires: pip install openai-whisper
18
+ """
19
+
20
+ import json
21
+ import sys
22
+
23
+
24
+ def transcribe(audio_path: str, model_size: str = 'medium') -> list[dict]:
25
+ try:
26
+ import whisper # type: ignore
27
+ except ImportError:
28
+ print(
29
+ 'ModuleNotFoundError: openai-whisper not installed. Run: pip install openai-whisper',
30
+ file=sys.stderr,
31
+ )
32
+ sys.exit(2)
33
+
34
+ model = whisper.load_model(model_size)
35
+ result = model.transcribe(audio_path, word_timestamps=False, fp16=False)
36
+
37
+ segments: list[dict] = []
38
+ for seg in result.get('segments', []):
39
+ text: str = seg.get('text', '').strip()
40
+ start: float = float(seg.get('start', 0))
41
+ end: float = float(seg.get('end', start))
42
+ duration = max(0.0, round(end - start, 3))
43
+
44
+ if text:
45
+ segments.append({
46
+ 'text': text,
47
+ 'start': round(start, 3),
48
+ 'duration': duration,
49
+ })
50
+
51
+ return segments
52
+
53
+
54
+ if __name__ == '__main__':
55
+ if len(sys.argv) < 2:
56
+ print(
57
+ 'Usage: python transcribe_whisper.py <audio_path> [model_size]',
58
+ file=sys.stderr,
59
+ )
60
+ sys.exit(1)
61
+
62
+ audio_path = sys.argv[1]
63
+ model_size = sys.argv[2] if len(sys.argv) > 2 else 'medium'
64
+
65
+ try:
66
+ output = transcribe(audio_path, model_size)
67
+ print(json.dumps(output))
68
+ except Exception as e:
69
+ print(f'Error: {e}', file=sys.stderr)
70
+ sys.exit(1)
package/src/cli.ts ADDED
@@ -0,0 +1,186 @@
1
+ import { config } from './config/index.js';
2
+ import { log } from './utils/logger.js';
3
+ import type { CliArgs } from './types/index.js';
4
+
5
+ export type { CliArgs };
6
+
7
+ // ---------------------------------------------------------------------------
8
+ // Argument parser
9
+ // ---------------------------------------------------------------------------
10
+
11
+ export function parseArgs(argv: string[]): CliArgs {
12
+ const args = argv.slice(2);
13
+ const result: CliArgs = {
14
+ url: undefined,
15
+ clip: false,
16
+ downloadSections: undefined,
17
+ videoPath: undefined,
18
+ threshold: undefined,
19
+ topN: undefined,
20
+ maxDuration: undefined,
21
+ maxChunks: undefined,
22
+ maxParallel: undefined,
23
+ outputJson: undefined,
24
+ noCache: false,
25
+ noAudio: false,
26
+ gameProfile: undefined,
27
+ help: false,
28
+ };
29
+
30
+ for (let i = 0; i < args.length; i++) {
31
+ const arg = args[i];
32
+
33
+ if (arg === '--help' || arg === '-h') {
34
+ result.help = true;
35
+ } else if (arg === '--clip') {
36
+ result.clip = true;
37
+ } else if (arg === '--download-sections') {
38
+ const val = args[++i];
39
+ if (!val) {
40
+ log.error(`--download-sections requires a value: 'all' or a number (1, 2, 3, ...)`);
41
+ process.exit(1);
42
+ }
43
+
44
+ if (val === 'all') {
45
+ result.downloadSections = 'all';
46
+ } else if (val === 'segments') {
47
+ log.warn(
48
+ `--download-sections segments is deprecated. Use a number like --download-sections 5 to download top 5 segments, or --download-sections all for full video.`,
49
+ );
50
+ result.downloadSections = 'all';
51
+ } else {
52
+ const num = Number(val);
53
+ if (isNaN(num) || !Number.isInteger(num) || num < 1) {
54
+ log.error(`--download-sections requires 'all' or a positive integer (1, 2, 3, ...)`);
55
+ process.exit(1);
56
+ }
57
+ result.downloadSections = num;
58
+ }
59
+
60
+ result.clip = true;
61
+ } else if (arg === '--video-path') {
62
+ const val = args[++i];
63
+ if (!val) {
64
+ log.error(`--video-path requires a directory path`);
65
+ process.exit(1);
66
+ }
67
+ result.videoPath = val;
68
+ } else if (arg === '--local-video') {
69
+ const val = args[++i];
70
+ if (!val) {
71
+ log.error(`--local-video requires a file path`);
72
+ process.exit(1);
73
+ }
74
+ result.localVideo = val;
75
+ result.clip = true;
76
+ } else if (arg === '--no-cache') {
77
+ result.noCache = true;
78
+ } else if (arg === '--threshold') {
79
+ const val = Number(args[++i]);
80
+ if (isNaN(val)) {
81
+ log.error(`--threshold requires a numeric value`);
82
+ process.exit(1);
83
+ }
84
+ result.threshold = val;
85
+ } else if (arg === '--top-n') {
86
+ const val = Number(args[++i]);
87
+ if (isNaN(val)) {
88
+ log.error(`--top-n requires a numeric value`);
89
+ process.exit(1);
90
+ }
91
+ result.topN = val;
92
+ } else if (arg === '--max-duration') {
93
+ const val = Number(args[++i]);
94
+ if (isNaN(val)) {
95
+ log.error(`--max-duration requires a numeric value`);
96
+ process.exit(1);
97
+ }
98
+ result.maxDuration = val;
99
+ } else if (arg === '--max-chunks') {
100
+ const val = Number(args[++i]);
101
+ if (isNaN(val) || !Number.isInteger(val) || val < 1) {
102
+ log.error(`--max-chunks requires a positive integer`);
103
+ process.exit(1);
104
+ }
105
+ result.maxChunks = val;
106
+ } else if (arg === '--max-parallel') {
107
+ const val = Number(args[++i]);
108
+ if (isNaN(val) || !Number.isInteger(val) || val < 1) {
109
+ log.error(`--max-parallel requires a positive integer`);
110
+ process.exit(1);
111
+ }
112
+ result.maxParallel = val;
113
+ } else if (arg === '--no-audio') {
114
+ result.noAudio = true;
115
+ } else if (arg === '--game-profile') {
116
+ const val = args[++i];
117
+ if (!val) {
118
+ log.error(`--game-profile requires a value (valorant, fps, boss_fight, general)`);
119
+ process.exit(1);
120
+ }
121
+ result.gameProfile = val;
122
+ } else if (arg === '--output-json') {
123
+ result.outputJson = args[++i];
124
+ if (!result.outputJson) {
125
+ log.error(`--output-json requires a file path`);
126
+ process.exit(1);
127
+ }
128
+ } else if (!arg.startsWith('--')) {
129
+ result.url = arg;
130
+ } else {
131
+ log.error(`Unknown flag: ${arg}`);
132
+ printUsage();
133
+ process.exit(1);
134
+ }
135
+ }
136
+
137
+ return result;
138
+ }
139
+
140
+ // ---------------------------------------------------------------------------
141
+ // Usage text
142
+ // ---------------------------------------------------------------------------
143
+
144
+ export function printUsage(): void {
145
+ console.log(
146
+ `
147
+ Usage: npm run start -- <youtube-url> [options]
148
+ npx tsx src/index.ts <youtube-url> [options]
149
+
150
+ Note: when invoking via npm run, use -- to pass flags to the script:
151
+ npm run start -- <url> --max-chunks 3
152
+
153
+ Arguments:
154
+ <youtube-url> YouTube video URL (required)
155
+
156
+ Options:
157
+ --clip Download video and generate mp4 clips for each segment
158
+ --download-sections <mode> yt-dlp download mode: 'all' (full video) or N (top N segments only, e.g. 1, 2, 3...) (default: ${config.DOWNLOAD_SECTIONS_MODE})
159
+ --local-video <path> Path to local video file (skips yt-dlp download, requires --clip)
160
+ --video-path <path> Custom output directory for downloaded videos and clips (overrides DOWNLOAD_DIR/OUTPUT_DIR)
161
+ --threshold <n> Minimum score to keep a segment (default: ${config.SCORE_THRESHOLD})
162
+ --top-n <n> Maximum number of segments to return (default: ${config.TOP_N_SEGMENTS})
163
+ --max-duration <s> Abort if video is longer than <s> seconds
164
+ --max-chunks <n> Limit the number of transcript chunks sent to the LLM (useful for testing/cost control)
165
+ --max-parallel <n> Max number of LLM calls to run in parallel (default: LLM_CONCURRENCY env, or 3)
166
+ --output-json <path> Write output JSON to file instead of stdout
167
+ --no-cache Bypass all caches and force a fresh run (transcript + chunk LLM results)
168
+ --no-audio Disable audio event detection (transcript-only mode)
169
+ --game-profile <type> Game profile: valorant, fps, boss_fight, general (default: ${config.GAME_PROFILE})
170
+ --help, -h Show this help message
171
+
172
+ Examples:
173
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ
174
+ npm run start -- https://youtu.be/dQw4w9WgXcQ --clip
175
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --download-sections all
176
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --download-sections 3
177
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --download-sections 5 --video-path ./my-clips
178
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --local-video ./downloads/dQw4w9WgXcQ.mp4
179
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --local-video /path/to/video.mp4 --top-n 5
180
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --threshold 8 --top-n 5
181
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --output-json results.json
182
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --max-chunks 3
183
+ npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --max-parallel 5
184
+ `.trim(),
185
+ );
186
+ }
@@ -0,0 +1,18 @@
1
+ import 'dotenv/config';
2
+ import { ConfigSchema } from '../types/config.js';
3
+
4
+ function loadConfig() {
5
+ const result = ConfigSchema.safeParse(process.env);
6
+
7
+ if (!result.success) {
8
+ const issues = result.error.issues
9
+ .map((i) => ` - ${i.path.join('.')}: ${i.message}`)
10
+ .join('\n');
11
+ console.error(`[error] Invalid configuration:\n${issues}`);
12
+ process.exit(1);
13
+ }
14
+
15
+ return result.data;
16
+ }
17
+
18
+ export const config = loadConfig();
@@ -0,0 +1,2 @@
1
+ export { config } from './env.js';
2
+ export type { Config } from '../types/config.js';
package/src/index.ts ADDED
@@ -0,0 +1,46 @@
1
+ import { log } from './utils/logger.js';
2
+ import { formatConfig } from './utils/redactConfig.js';
3
+ import { config } from './config/index.js';
4
+ import { parseArgs, printUsage } from './cli.js';
5
+ import { runPipeline } from './pipeline/runner.js';
6
+
7
+ const args = parseArgs(process.argv);
8
+
9
+ if (args.help) {
10
+ printUsage();
11
+ process.exit(0);
12
+ }
13
+
14
+ if (!args.url) {
15
+ log.error('No YouTube URL provided.');
16
+ printUsage();
17
+ process.exit(1);
18
+ }
19
+
20
+ if (args.localVideo && !args.clip) {
21
+ log.error('--local-video requires --clip flag');
22
+ printUsage();
23
+ process.exit(1);
24
+ }
25
+
26
+ if (args.localVideo && args.downloadSections) {
27
+ log.warn(
28
+ '--download-sections is ignored when using --local-video (clipping all segments from --top-n)',
29
+ );
30
+ }
31
+
32
+ log.info(
33
+ `Starting video-clipper (model: ${config.LLM_MODEL})` +
34
+ (args.clip ? ' [--clip enabled]' : '') +
35
+ (args.localVideo ? ` [--local-video: ${args.localVideo}]` : '') +
36
+ (args.downloadSections !== undefined && args.downloadSections !== 'all'
37
+ ? ` [--download-sections: ${args.downloadSections}]`
38
+ : '') +
39
+ (args.videoPath ? ` [--video-path: ${args.videoPath}]` : ''),
40
+ );
41
+ log.info(`Config: ${formatConfig(config)}`);
42
+
43
+ runPipeline(args).catch((err) => {
44
+ log.error(err instanceof Error ? err.message : String(err));
45
+ process.exit(1);
46
+ });
@@ -0,0 +1,155 @@
1
+ import { promises as fs } from 'fs';
2
+ import { config } from '../config/index.js';
3
+ import { Cache } from '../utils/cache.js';
4
+ import { log } from '../utils/logger.js';
5
+ import { dumpAnalysis, dumpTranscript } from '../utils/dumper.js';
6
+ import { resolveVideo } from './stages/videoResolver.js';
7
+ import { processAudio } from './stages/audioProcessor.js';
8
+ import { analyzeSegments, refineRankedSegments } from './stages/segmentAnalyzer.js';
9
+ import { selectSegments } from './stages/segmentSelector.js';
10
+ import { exportClips } from './stages/clipExporter.js';
11
+ import { downloadAudio } from '../services/audioDownloader/index.js';
12
+ import type { CliArgs, PipelineResult } from '../types/index.js';
13
+
14
+ async function outputResult(
15
+ result: PipelineResult,
16
+ outputJsonPath: string | undefined,
17
+ ): Promise<void> {
18
+ const json = JSON.stringify(result, null, 2);
19
+ if (outputJsonPath) {
20
+ await fs.writeFile(outputJsonPath, json, 'utf-8');
21
+ log.info(`Output written to ${outputJsonPath}`);
22
+ } else {
23
+ console.log('\n' + json);
24
+ }
25
+ }
26
+
27
+ /**
28
+ * Runs the full video-clipper pipeline for the given CLI arguments.
29
+ *
30
+ * Stage ordering:
31
+ * 1. resolveVideo — parse URL, extract video ID + metadata
32
+ * 2. downloadAudio — download WAV so Whisper/Gemini transcript providers can use it
33
+ * 3. processAudio — detect audio events per window (reuses downloaded WAV)
34
+ * 4a. analyzeSegments — fetch transcript + LLM pass 1 (informed by audio events)
35
+ * 5. selectSegments — merge signals, rank, threshold filter
36
+ * 4b. refineRankedSegments — LLM pass 2 to tighten clip boundaries
37
+ * 6. exportClips — download video + run ffmpeg (only if --clip)
38
+ *
39
+ * downloadAudio runs before analyzeSegments so that `audioPath` is available
40
+ * for Whisper/Gemini transcript providers. processAudio reuses the same WAV.
41
+ *
42
+ * Hard errors (invalid URL, transcript failure, all LLM chunks failed) are
43
+ * thrown so the caller can catch, log, and exit(1). Soft failures (audio
44
+ * detection, individual clip failures) are logged as warnings and the pipeline
45
+ * continues.
46
+ */
47
+ export async function runPipeline(args: CliArgs): Promise<void> {
48
+ const threshold = args.threshold ?? config.SCORE_THRESHOLD;
49
+ const topN = args.topN ?? config.TOP_N_SEGMENTS;
50
+ const gameProfile = args.gameProfile ?? config.GAME_PROFILE;
51
+ const maxParallel = args.maxParallel ?? config.LLM_CONCURRENCY;
52
+
53
+ const cache = new Cache(config.CACHE_DIR, args.noCache);
54
+
55
+ // ── Stage 1: Resolve video ID + metadata ─────────────────────────────────
56
+ const { videoId, metadata } = await resolveVideo(args.url as string, args.maxDuration);
57
+
58
+ // ── Stage 2: Download audio ───────────────────────────────────────────────
59
+ // Downloaded before transcript so Whisper/Gemini transcript providers can
60
+ // use the WAV. Returns null when audio detection is disabled.
61
+ let audioPath: string | null = null;
62
+ const audioEnabled = config.AUDIO_DETECTION_ENABLED && !args.noAudio;
63
+ if (audioEnabled) {
64
+ try {
65
+ audioPath = await downloadAudio(videoId, `${config.OUTPUT_DIR}/audio`);
66
+ } catch (err) {
67
+ const message = err instanceof Error ? err.message : String(err);
68
+ log.warn(`Audio download failed — continuing without audio: ${message}`);
69
+ }
70
+ }
71
+
72
+ // ── Stage 3: Audio event detection ───────────────────────────────────────
73
+ const audioEvents = await processAudio(videoId, metadata.duration, cache, {
74
+ noAudio: args.noAudio,
75
+ gameProfile,
76
+ maxParallel,
77
+ audioPath,
78
+ });
79
+
80
+ // ── Stage 4a: Fetch transcript + LLM analysis (informed by audio events) ──
81
+ const { lines, microBlocks, chunkEvals } = await analyzeSegments(
82
+ videoId,
83
+ audioPath,
84
+ audioEvents,
85
+ cache,
86
+ {
87
+ maxChunks: args.maxChunks,
88
+ maxParallel,
89
+ noCache: args.noCache,
90
+ },
91
+ );
92
+
93
+ if (config.DUMP_OUTPUTS) {
94
+ await dumpTranscript(videoId, lines);
95
+ }
96
+
97
+ // ── Stage 5: Merge signals + rank ─────────────────────────────────────────
98
+ const rankedSegments = selectSegments(chunkEvals, audioEvents, { threshold, topN });
99
+
100
+ // Build partial result for early-exit path (no segments above threshold)
101
+ const partialResult: PipelineResult = {
102
+ video_id: videoId,
103
+ title: metadata.title,
104
+ duration: metadata.duration,
105
+ chunk_evaluations: chunkEvals,
106
+ segments: rankedSegments,
107
+ };
108
+
109
+ if (rankedSegments.length === 0) {
110
+ await outputResult(partialResult, args.outputJson);
111
+ if (config.DUMP_OUTPUTS) await dumpAnalysis(videoId, partialResult);
112
+ return;
113
+ }
114
+
115
+ // ── Stage 4b: Refine clip boundaries (LLM pass 2) ─────────────────────────
116
+ const refinedSegments = await refineRankedSegments(rankedSegments, microBlocks, cache, {
117
+ maxParallel,
118
+ noCache: args.noCache,
119
+ });
120
+
121
+ // ── Output result ─────────────────────────────────────────────────────────
122
+ const result: PipelineResult = {
123
+ video_id: videoId,
124
+ title: metadata.title,
125
+ duration: metadata.duration,
126
+ chunk_evaluations: chunkEvals,
127
+ segments: refinedSegments,
128
+ };
129
+
130
+ await outputResult(result, args.outputJson);
131
+ if (config.DUMP_OUTPUTS) await dumpAnalysis(videoId, result);
132
+
133
+ log.info('Done.');
134
+
135
+ // ── Stage 6: Download + generate clips (only with --clip) ─────────────────
136
+ if (!args.clip) {
137
+ log.info('Tip: run with --clip to download the video and generate mp4 clips.');
138
+ return;
139
+ }
140
+
141
+ const clipPaths = await exportClips(videoId, refinedSegments, {
142
+ localVideo: args.localVideo,
143
+ downloadSections: args.downloadSections,
144
+ videoPath: args.videoPath,
145
+ });
146
+
147
+ if (clipPaths.length === 0) {
148
+ log.warn('No clips were generated successfully.');
149
+ } else {
150
+ log.info(`Done — ${clipPaths.length} clip${clipPaths.length !== 1 ? 's' : ''} saved:`);
151
+ for (const p of clipPaths) {
152
+ log.info(` ${p}`);
153
+ }
154
+ }
155
+ }
@@ -0,0 +1,129 @@
1
+ import { promises as fs } from 'fs';
2
+ import pLimit from 'p-limit';
3
+ import { downloadAudio } from '../../services/audioDownloader/index.js';
4
+ import { createAnalyzerChain } from '../../services/audioAnalyzers/index.js';
5
+ import { EventDetector } from '../../services/eventDetector/index.js';
6
+ import { sliceAudio } from '../../utils/sliceAudio.js';
7
+ import { buildWindows } from '../../utils/chunker.js';
8
+ import { log } from '../../utils/logger.js';
9
+ import { config } from '../../config/index.js';
10
+ import type { Cache } from '../../utils/cache.js';
11
+ import type { AudioEvent, AudioProcessorOpts } from '../../types/index.js';
12
+
13
+ export type { AudioProcessorOpts };
14
+
15
+ /**
16
+ * Stage 3 — Audio Processor
17
+ *
18
+ * Downloads audio-only WAV, slices it into chunks using the generic
19
+ * `buildWindows` utility, runs event detection on each slice via an
20
+ * EventDetector (constructed from the ordered provider chain in config),
21
+ * and persists the results to cache.
22
+ *
23
+ * The provider chain is built once per run from `config.AUDIO_PROVIDER`
24
+ * (e.g. "gemini,whisper") via `createAnalyzerChain`. The EventDetector
25
+ * walks the chain in order, falling back to the next analyzer on failure.
26
+ *
27
+ * Returns an empty array immediately when audio detection is disabled via
28
+ * `--no-audio` or the `AUDIO_DETECTION_ENABLED` config flag.
29
+ */
30
+ export async function processAudio(
31
+ videoId: string,
32
+ duration: number,
33
+ cache: Cache,
34
+ opts: AudioProcessorOpts,
35
+ ): Promise<AudioEvent[]> {
36
+ const audioEnabled = config.AUDIO_DETECTION_ENABLED && !opts.noAudio;
37
+ if (!audioEnabled) return [];
38
+
39
+ // Cache-first
40
+ const cached = await cache.readAudioEvents(videoId, opts.gameProfile, config.AUDIO_PROVIDER);
41
+ if (cached) {
42
+ log.info(`[cache hit] Audio events loaded from cache (${cached.length} events)`);
43
+ return cached;
44
+ }
45
+
46
+ try {
47
+ const audioPath =
48
+ opts.audioPath ?? (await downloadAudio(videoId, `${config.OUTPUT_DIR}/audio`));
49
+
50
+ // Build the analyzer chain once per run from config
51
+ const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
52
+ const detector = new EventDetector(chain);
53
+
54
+ const providerNames = chain.map((a) => a.source).join(' → ');
55
+ log.info(
56
+ `Detecting audio events (chain: ${providerNames}, profile: ${opts.gameProfile}, max ${opts.maxParallel} parallel)...`,
57
+ );
58
+
59
+ const windows = buildWindows(duration, config.CHUNK_LENGTH_SEC, config.CHUNK_OVERLAP_SEC);
60
+ const limit = pLimit(opts.maxParallel);
61
+
62
+ const results = await Promise.allSettled(
63
+ windows.map((window) =>
64
+ limit(async () => {
65
+ log.info(` Processing audio chunk ${window.start}s - ${window.end}s...`);
66
+
67
+ const cachedChunk = await cache.readAudioChunk(
68
+ videoId,
69
+ opts.gameProfile,
70
+ config.AUDIO_PROVIDER,
71
+ window.start,
72
+ window.end,
73
+ );
74
+ if (cachedChunk) {
75
+ log.info(
76
+ ` [cache hit] Audio chunk ${window.start}s - ${window.end}s (${cachedChunk.length} events)`,
77
+ );
78
+ return cachedChunk;
79
+ }
80
+
81
+ const slicePath = await sliceAudio(
82
+ audioPath,
83
+ window.start,
84
+ window.end - window.start,
85
+ config.OUTPUT_DIR,
86
+ );
87
+ const events = await detector.detect(
88
+ slicePath,
89
+ opts.gameProfile,
90
+ window.start,
91
+ window.end - window.start,
92
+ );
93
+ await fs.unlink(slicePath);
94
+
95
+ await cache.writeAudioChunk(
96
+ videoId,
97
+ opts.gameProfile,
98
+ config.AUDIO_PROVIDER,
99
+ window.start,
100
+ window.end,
101
+ events,
102
+ );
103
+
104
+ return events;
105
+ }),
106
+ ),
107
+ );
108
+
109
+ const audioEvents: AudioEvent[] = results
110
+ .flatMap((r, i) => {
111
+ if (r.status === 'fulfilled') return r.value;
112
+ const w = windows[i]!;
113
+ log.warn(
114
+ ` Audio event detection failed for chunk ${w.start}s - ${w.end}s: ${String(r.reason)}`,
115
+ );
116
+ return [];
117
+ })
118
+ .sort((a, b) => a.time - b.time);
119
+
120
+ log.info(`Audio event detection complete: ${audioEvents.length} events found`);
121
+
122
+ await cache.writeAudioEvents(videoId, opts.gameProfile, config.AUDIO_PROVIDER, audioEvents);
123
+ return audioEvents;
124
+ } catch (err) {
125
+ const message = err instanceof Error ? err.message : String(err);
126
+ log.warn(`Audio event detection disabled due to error: ${message}`);
127
+ return [];
128
+ }
129
+ }
@@ -0,0 +1,80 @@
1
+ import { downloadVideo } from '../../services/videoDownloader/index.js';
2
+ import { generateClips, organizeClips } from '../../services/clipGenerator/index.js';
3
+ import { log } from '../../utils/logger.js';
4
+ import { config } from '../../config/index.js';
5
+ import type { RankedSegment, ClipExporterOpts } from '../../types/index.js';
6
+
7
+ export type { ClipExporterOpts };
8
+
9
+ /**
10
+ * Stage 6 — Clip Exporter
11
+ *
12
+ * Handles all three clip-generation modes:
13
+ * 1. Local video — user supplied --local-video; run ffmpeg directly
14
+ * 2. Segments -- --download-sections N; download top-N clips via yt-dlp
15
+ * --download-sections, then copy to outputs/
16
+ * 3. Full video — download full video with yt-dlp, then cut clips with ffmpeg
17
+ *
18
+ * @returns Array of absolute paths to the generated clip files.
19
+ */
20
+ export async function exportClips(
21
+ videoId: string,
22
+ segments: RankedSegment[],
23
+ opts: ClipExporterOpts,
24
+ ): Promise<string[]> {
25
+ // Mode 1: local video already on disk — cut with ffmpeg
26
+ if (opts.localVideo) {
27
+ log.info(`Using local video: ${opts.localVideo}`);
28
+ return generateClips(
29
+ opts.localVideo,
30
+ segments,
31
+ videoId,
32
+ opts.videoPath,
33
+ config.CLIP_CONCURRENCY,
34
+ );
35
+ }
36
+
37
+ // Determine yt-dlp mode
38
+ const downloadSections = opts.downloadSections ?? config.DOWNLOAD_SECTIONS_MODE;
39
+
40
+ if (typeof downloadSections === 'number') {
41
+ // Mode 2: download only the top-N segments via --download-sections
42
+ const segmentsToDownload = segments.slice(0, downloadSections);
43
+
44
+ if (segmentsToDownload.length < downloadSections) {
45
+ log.warn(
46
+ `Requested ${downloadSections} segments, but only ${segmentsToDownload.length} are available above threshold.`,
47
+ );
48
+ }
49
+
50
+ log.info(`Downloading ${segmentsToDownload.length} segments via yt-dlp --download-sections...`);
51
+ const downloadResult = await downloadVideo(
52
+ videoId,
53
+ 'segments',
54
+ segmentsToDownload,
55
+ opts.videoPath,
56
+ );
57
+
58
+ if (downloadResult.mode !== 'segments') {
59
+ throw new Error('Expected segments download result but got full-video result.');
60
+ }
61
+
62
+ return organizeClips(downloadResult.paths, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
63
+ }
64
+
65
+ // Mode 3: full-video download → cut clips with ffmpeg
66
+ log.info('Downloading full video via yt-dlp...');
67
+ const downloadResult = await downloadVideo(videoId, 'all', [], opts.videoPath);
68
+
69
+ if (downloadResult.mode !== 'all') {
70
+ throw new Error('Expected full-video download result but got segments result.');
71
+ }
72
+
73
+ return generateClips(
74
+ downloadResult.path,
75
+ segments,
76
+ videoId,
77
+ opts.videoPath,
78
+ config.CLIP_CONCURRENCY,
79
+ );
80
+ }