@thunderkiller/video-clipper 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/LICENSE +15 -0
  3. package/package.json +1 -1
  4. package/.github/workflows/ci.yml +0 -42
  5. package/.github/workflows/release.yml +0 -76
  6. package/.husky/pre-commit +0 -3
  7. package/.prettierignore +0 -6
  8. package/.prettierrc +0 -7
  9. package/.releaserc.json +0 -21
  10. package/AGENTS.md +0 -122
  11. package/docs/free-models.md +0 -78
  12. package/docs/plan.md +0 -442
  13. package/docs/refactorPhases.md +0 -105
  14. package/docs/yt-downloader.md +0 -440
  15. package/requirements.txt +0 -5
  16. package/scripts/detect_events.py +0 -81
  17. package/scripts/detect_events_whisper.py +0 -101
  18. package/scripts/transcribe_whisper.py +0 -70
  19. package/src/cli.ts +0 -186
  20. package/src/config/env.ts +0 -18
  21. package/src/config/index.ts +0 -2
  22. package/src/index.ts +0 -46
  23. package/src/pipeline/runner.ts +0 -147
  24. package/src/pipeline/stages/audioProcessor.ts +0 -127
  25. package/src/pipeline/stages/clipExporter.ts +0 -76
  26. package/src/pipeline/stages/segmentAnalyzer.ts +0 -72
  27. package/src/pipeline/stages/segmentSelector.ts +0 -39
  28. package/src/pipeline/stages/videoResolver.ts +0 -44
  29. package/src/services/audioAnalyzers/base.ts +0 -32
  30. package/src/services/audioAnalyzers/factory.ts +0 -69
  31. package/src/services/audioAnalyzers/gemini.ts +0 -136
  32. package/src/services/audioAnalyzers/index.ts +0 -6
  33. package/src/services/audioAnalyzers/whisper.ts +0 -80
  34. package/src/services/audioAnalyzers/yamnet.ts +0 -54
  35. package/src/services/audioDownloader/index.ts +0 -102
  36. package/src/services/chunkBuilder/index.ts +0 -82
  37. package/src/services/clipGenerator/index.ts +0 -210
  38. package/src/services/clipRefiner/index.ts +0 -141
  39. package/src/services/eventDetector/index.ts +0 -68
  40. package/src/services/llmAnalyzer/LLMAnalyzer.ts +0 -98
  41. package/src/services/llmAnalyzer/index.ts +0 -231
  42. package/src/services/metadataExtractor/index.ts +0 -83
  43. package/src/services/segmentRanker/index.ts +0 -88
  44. package/src/services/signalMerger/index.ts +0 -53
  45. package/src/services/transcriptAnalyzers/base.ts +0 -26
  46. package/src/services/transcriptAnalyzers/factory.ts +0 -66
  47. package/src/services/transcriptAnalyzers/gemini.ts +0 -24
  48. package/src/services/transcriptAnalyzers/index.ts +0 -6
  49. package/src/services/transcriptAnalyzers/whisper.ts +0 -68
  50. package/src/services/transcriptAnalyzers/ytdlp.ts +0 -19
  51. package/src/services/transcriptDetector/index.ts +0 -122
  52. package/src/services/transcriptFetcher/index.ts +0 -147
  53. package/src/services/urlParser/index.ts +0 -52
  54. package/src/services/videoDownloader/index.ts +0 -268
  55. package/src/types/analyzer.ts +0 -23
  56. package/src/types/audio.ts +0 -19
  57. package/src/types/cache.ts +0 -8
  58. package/src/types/cli.ts +0 -22
  59. package/src/types/config.ts +0 -151
  60. package/src/types/downloader.ts +0 -15
  61. package/src/types/factory.ts +0 -3
  62. package/src/types/index.ts +0 -40
  63. package/src/types/pipeline.ts +0 -60
  64. package/src/types/segment.ts +0 -43
  65. package/src/types/transcript.ts +0 -22
  66. package/src/types/video.ts +0 -18
  67. package/src/utils/cache.ts +0 -224
  68. package/src/utils/chunker.ts +0 -60
  69. package/src/utils/dumper.ts +0 -41
  70. package/src/utils/format.ts +0 -10
  71. package/src/utils/logger.ts +0 -17
  72. package/src/utils/modelFactory.ts +0 -71
  73. package/src/utils/redactConfig.ts +0 -23
  74. package/src/utils/sliceAudio.ts +0 -35
  75. package/test-trigger.txt +0 -1
  76. package/tests/analyzerFactory.test.ts +0 -146
  77. package/tests/audioEventDetector.test.ts +0 -69
  78. package/tests/cache.test.ts +0 -203
  79. package/tests/chunkBuilder.test.ts +0 -146
  80. package/tests/chunker.test.ts +0 -95
  81. package/tests/eventDetector.test.ts +0 -103
  82. package/tests/llmAnalyzer.test.ts +0 -283
  83. package/tests/segmentRanker.test.ts +0 -133
  84. package/tests/setup.ts +0 -48
  85. package/tests/signalMerger.test.ts +0 -197
  86. package/tests/transcriptDetector.test.ts +0 -150
  87. package/tests/transcriptFetcher.test.ts +0 -179
  88. package/tests/urlParser.test.ts +0 -70
  89. package/tsconfig.json +0 -16
  90. package/tsconfig.test.json +0 -8
  91. package/vitest.config.ts +0 -8
@@ -1,122 +0,0 @@
1
- import { buildMicroBlocks, buildLLMChunks } from '../chunkBuilder/index.js';
2
- import { log } from '../../utils/logger.js';
3
- import { config } from '../../config/index.js';
4
- import type { TranscriptAnalyzer } from '../transcriptAnalyzers/index.js';
5
- import type { Cache } from '../../utils/cache.js';
6
- import type {
7
- TranscriptLine,
8
- MicroBlock,
9
- LLMChunk,
10
- TranscriptDetectorResult,
11
- } from '../../types/index.js';
12
-
13
- /**
14
- * Top-level transcript detector.
15
- *
16
- * Holds an ordered chain of TranscriptAnalyzer instances and walks the chain
17
- * on each `detect()` call: the first analyzer that succeeds wins. If an
18
- * analyzer throws, the error is logged and the next analyzer in the chain is
19
- * tried. If the entire chain is exhausted without success the error from the
20
- * last analyzer is re-thrown.
21
- *
22
- * After obtaining raw transcript lines the detector groups them into
23
- * micro-blocks and builds overlapping LLM analysis chunks — keeping the full
24
- * "transcript concern" self-contained under one class.
25
- *
26
- * The chain is built once at startup via `createTranscriptChain(config.TRANSCRIPT_PROVIDER)`
27
- * and injected here, keeping provider-selection logic out of this class.
28
- *
29
- * Results are cached via the injected Cache instance so that repeat runs skip
30
- * the network round-trip to yt-dlp / Whisper.
31
- *
32
- * @example
33
- * const chain = createTranscriptChain('ytdlp,whisper');
34
- * const detector = new TranscriptDetector(chain);
35
- * const { lines, microBlocks, chunks } = await detector.detect(videoId, audioPath, cache);
36
- */
37
- export class TranscriptDetector {
38
- constructor(private readonly chain: TranscriptAnalyzer[]) {
39
- if (chain.length === 0) {
40
- throw new Error('TranscriptDetector requires at least one TranscriptAnalyzer in the chain.');
41
- }
42
- }
43
-
44
- /**
45
- * Fetches, groups, and chunks the transcript for the given video ID.
46
- *
47
- * Walks the analyzer chain in order, falling back on error. Cache is checked
48
- * first (before any analyzer is tried) and written after the first successful
49
- * fetch so subsequent runs with the same provider config are instant.
50
- *
51
- * @param videoId - YouTube video ID
52
- * @param audioPath - Path to the downloaded WAV, or null if audio is not yet available
53
- * @param cache - Cache instance for read/write of transcript lines
54
- */
55
- async detect(
56
- videoId: string,
57
- audioPath: string | null,
58
- cache: Cache,
59
- ): Promise<TranscriptDetectorResult> {
60
- let lines: TranscriptLine[];
61
-
62
- const cached = await cache.readTranscript(videoId);
63
- if (cached) {
64
- log.info(`[cache hit] Transcript loaded from cache (${cached.length} lines)`);
65
- lines = cached;
66
- } else {
67
- lines = await this.fetchFromChain(videoId, audioPath);
68
- await cache.writeTranscript(videoId, lines);
69
- }
70
-
71
- const microBlocks = this.buildMicroBlocks(lines);
72
- const chunks = this.buildChunks(microBlocks);
73
-
74
- return { lines, microBlocks, chunks };
75
- }
76
-
77
- /**
78
- * Walks the analyzer chain in order.
79
- * Falls back to the next analyzer whenever one throws.
80
- */
81
- private async fetchFromChain(
82
- videoId: string,
83
- audioPath: string | null,
84
- ): Promise<TranscriptLine[]> {
85
- let lastError: unknown;
86
-
87
- for (let i = 0; i < this.chain.length; i++) {
88
- const analyzer = this.chain[i];
89
- const isLast = i === this.chain.length - 1;
90
-
91
- try {
92
- const lines = await analyzer.detect(videoId, audioPath);
93
- log.info(`[transcript:${analyzer.source}] fetched ${lines.length} lines`);
94
- return lines;
95
- } catch (err) {
96
- lastError = err;
97
- const message = err instanceof Error ? err.message : String(err);
98
-
99
- if (!isLast) {
100
- const nextSource = this.chain[i + 1].source;
101
- log.warn(
102
- `[transcript:${analyzer.source}] failed, falling back to ${nextSource}: ${message}`,
103
- );
104
- } else {
105
- log.error(`[transcript:${analyzer.source}] failed (no more fallbacks): ${message}`);
106
- }
107
- }
108
- }
109
-
110
- throw lastError;
111
- }
112
-
113
- /** Groups raw transcript lines into micro-blocks. */
114
- private buildMicroBlocks(lines: TranscriptLine[]): MicroBlock[] {
115
- return buildMicroBlocks(lines, config.MICRO_BLOCK_SEC);
116
- }
117
-
118
- /** Builds overlapping LLM analysis chunks from micro-blocks. */
119
- private buildChunks(microBlocks: MicroBlock[]): LLMChunk[] {
120
- return buildLLMChunks(microBlocks, config.CHUNK_LENGTH_SEC, config.CHUNK_OVERLAP_SEC);
121
- }
122
- }
@@ -1,147 +0,0 @@
1
- import { execa } from 'execa';
2
- import fs from 'node:fs/promises';
3
- import os from 'node:os';
4
- import path from 'node:path';
5
- import { log } from '../../utils/logger.js';
6
- import { config } from '../../config/index.js';
7
- import type { TranscriptLine } from '../../types/index.js';
8
-
9
- /**
10
- * Parses a WebVTT string into TranscriptLine[].
11
- *
12
- * Handles:
13
- * - `HH:MM:SS.mmm --> HH:MM:SS.mmm` timestamp lines
14
- * - `<MM:SS.mmm><c>text</c>` inline cue tags (stripped)
15
- * - Duplicate / empty cues (skipped)
16
- *
17
- * Exported for unit testing.
18
- */
19
- export function parseVtt(vttContent: string): TranscriptLine[] {
20
- const lines = vttContent.split(/\r?\n/);
21
- const result: TranscriptLine[] = [];
22
-
23
- /** Regex to match HH:MM:SS.mmm --> HH:MM:SS.mmm timestamp lines */
24
- const TIMESTAMP_RE =
25
- /^(\d{2}):(\d{2}):(\d{2})[.,](\d{3})\s+-->\s+(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/;
26
-
27
- let i = 0;
28
- while (i < lines.length) {
29
- const line = lines[i].trim();
30
- const match = TIMESTAMP_RE.exec(line);
31
-
32
- if (match) {
33
- const startSec =
34
- parseInt(match[1], 10) * 3600 +
35
- parseInt(match[2], 10) * 60 +
36
- parseInt(match[3], 10) +
37
- parseInt(match[4], 10) / 1000;
38
-
39
- const endSec =
40
- parseInt(match[5], 10) * 3600 +
41
- parseInt(match[6], 10) * 60 +
42
- parseInt(match[7], 10) +
43
- parseInt(match[8], 10) / 1000;
44
-
45
- i++;
46
- const textLines: string[] = [];
47
- while (i < lines.length && lines[i].trim() !== '') {
48
- textLines.push(lines[i].trim());
49
- i++;
50
- }
51
-
52
- const rawText = textLines.join(' ');
53
-
54
- const text = rawText
55
- .replace(/<[^>]+>/g, '')
56
- .replace(/&amp;/g, '&')
57
- .replace(/&lt;/g, '<')
58
- .replace(/&gt;/g, '>')
59
- .replace(/&nbsp;/g, ' ')
60
- .replace(/\s+/g, ' ')
61
- .trim();
62
-
63
- if (text.length === 0) {
64
- continue;
65
- }
66
-
67
- const duration = Math.max(0, endSec - startSec);
68
-
69
- /** Skip duplicate cues - YouTube VTT often repeats same line as text scrolls */
70
- if (result.length > 0 && result[result.length - 1].text === text) {
71
- continue;
72
- }
73
-
74
- result.push({ text, start: startSec, duration });
75
- continue;
76
- }
77
-
78
- i++;
79
- }
80
-
81
- return result;
82
- }
83
-
84
- /**
85
- * Fetches the transcript for a given YouTube video ID using yt-dlp
86
- * auto-generated subtitles (VTT format).
87
- *
88
- * The VTT file is written to a temp directory, parsed into TranscriptLine[],
89
- * then cleaned up. Cookie config (YT_DLP_COOKIES_FROM_BROWSER /
90
- * YT_DLP_COOKIES_FILE) is forwarded to yt-dlp automatically.
91
- *
92
- * @throws {Error} with the yt-dlp stderr if the command fails
93
- * @throws {Error} if no subtitle file is produced
94
- * @throws {Error} if the subtitle file contains no parseable cues
95
- */
96
- export async function fetchTranscript(videoId: string): Promise<TranscriptLine[]> {
97
- const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'vc-vtt-'));
98
-
99
- try {
100
- const args = [
101
- '--write-auto-sub',
102
- '--sub-format',
103
- 'vtt',
104
- '--sub-lang',
105
- 'en.*',
106
- '--skip-download',
107
- '--output',
108
- path.join(tmpDir, '%(id)s.%(ext)s'),
109
- `https://www.youtube.com/watch?v=${videoId}`,
110
- ];
111
-
112
- if (config.YT_DLP_COOKIES_FROM_BROWSER) {
113
- args.unshift('--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
114
- } else if (config.YT_DLP_COOKIES_FILE) {
115
- args.unshift('--cookies', config.YT_DLP_COOKIES_FILE);
116
- }
117
-
118
- try {
119
- await execa('yt-dlp', args);
120
- } catch (err) {
121
- const message = err instanceof Error ? err.message : String(err);
122
- throw new Error(`yt-dlp failed to fetch subtitles for "${videoId}": ${message}`);
123
- }
124
-
125
- const files = await fs.readdir(tmpDir);
126
- const vttFile = files.find((f) => f.endsWith('.vtt'));
127
-
128
- if (!vttFile) {
129
- throw new Error(
130
- `No subtitles found for "${videoId}". The video may not have auto-generated captions.`,
131
- );
132
- }
133
-
134
- const content = await fs.readFile(path.join(tmpDir, vttFile), 'utf8');
135
- const lines = parseVtt(content);
136
-
137
- log.info(`Parsed ${lines.length} cues from subtitle file "${vttFile}".`);
138
-
139
- if (lines.length === 0) {
140
- throw new Error(`Subtitle file for "${videoId}" was empty or contained no parseable cues.`);
141
- }
142
-
143
- return lines;
144
- } finally {
145
- await fs.rm(tmpDir, { recursive: true, force: true });
146
- }
147
- }
@@ -1,52 +0,0 @@
1
- const VIDEO_ID_LENGTH = 11;
2
-
3
- /**
4
- * Parses a YouTube URL and returns the 11-character video ID.
5
- * Supports:
6
- * - https://www.youtube.com/watch?v=VIDEO_ID
7
- * - https://youtu.be/VIDEO_ID
8
- * - https://www.youtube.com/embed/VIDEO_ID
9
- * - https://www.youtube.com/shorts/VIDEO_ID
10
- *
11
- * @throws {Error} if the URL is not a valid YouTube URL or the video ID is not 11 characters
12
- */
13
- export function parseUrl(url: string): string {
14
- let parsed: URL;
15
-
16
- try {
17
- parsed = new URL(url);
18
- } catch {
19
- throw new Error(`Invalid URL: "${url}"`);
20
- }
21
-
22
- const { hostname, pathname, searchParams } = parsed;
23
- const host = hostname.replace(/^www\./, '');
24
-
25
- let videoId: string | null = null;
26
-
27
- if (host === 'youtube.com') {
28
- if (pathname === '/watch') {
29
- videoId = searchParams.get('v');
30
- } else if (pathname.startsWith('/embed/')) {
31
- videoId = pathname.split('/embed/')[1]?.split('/')[0] ?? null;
32
- } else if (pathname.startsWith('/shorts/')) {
33
- videoId = pathname.split('/shorts/')[1]?.split('/')[0] ?? null;
34
- }
35
- } else if (host === 'youtu.be') {
36
- videoId = pathname.slice(1).split('/')[0] ?? null;
37
- }
38
-
39
- if (!videoId) {
40
- throw new Error(`Could not extract video ID from URL: "${url}"`);
41
- }
42
-
43
- videoId = videoId.split('?')[0];
44
-
45
- if (videoId.length !== VIDEO_ID_LENGTH) {
46
- throw new Error(
47
- `Invalid video ID "${videoId}": expected ${VIDEO_ID_LENGTH} characters, got ${videoId.length}`,
48
- );
49
- }
50
-
51
- return videoId;
52
- }
@@ -1,268 +0,0 @@
1
- import { execa } from 'execa';
2
- import { promises as fs } from 'fs';
3
- import { join } from 'path';
4
- import pLimit from 'p-limit';
5
- import { config } from '../../config/index.js';
6
- import { log } from '../../utils/logger.js';
7
- import type { RankedSegment, DownloadMode, DownloadResult } from '../../types/index.js';
8
-
9
- /**
10
- * Formats a timestamp for yt-dlp --download-sections.
11
- * Converts seconds to HH:MM:SS.mmm format with millisecond precision.
12
- */
13
- function formatTimestamp(seconds: number): string {
14
- const h = Math.floor(seconds / 3600);
15
- const m = Math.floor((seconds % 3600) / 60);
16
- const s = seconds % 60;
17
- const sInt = Math.floor(s);
18
- const ms = Math.round((s - sInt) * 1000);
19
- return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(sInt).padStart(2, '0')}.${String(ms).padStart(3, '0')}`;
20
- }
21
-
22
- /**
23
- * Displays progress from yt-dlp stdout/stderr.
24
- */
25
- function displayProgress(stream: 'stdout' | 'stderr'): (data: Buffer | string) => void {
26
- return (data: Buffer | string) => {
27
- const text = String(data);
28
- const lines = text.split('\n').filter((line) => line.trim());
29
-
30
- for (const line of lines) {
31
- const progressMatch = line.match(/\[download\]\s+(\d+\.?\d*%)/);
32
- if (progressMatch) {
33
- process.stdout.write(`\r${progressMatch[0]}`);
34
- }
35
- }
36
- };
37
- }
38
-
39
- /**
40
- * Downloads a YouTube video using yt-dlp and returns the local file path.
41
- *
42
- * Strategy:
43
- * - Skips download if the target file already exists.
44
- * - Auto-creates the download directory if it doesn't exist.
45
- * - Surfaces clear errors for common failure modes (yt-dlp not installed,
46
- * private/geo-blocked video, etc.).
47
- *
48
- * @param videoId - 11-character YouTube video ID
49
- * @param customPath - Custom output directory (optional, overrides DOWNLOAD_DIR)
50
- * @returns Absolute path to the downloaded mp4 file
51
- * @throws {Error} if yt-dlp is not installed or the download fails
52
- */
53
- export async function downloadFullVideo(videoId: string, customPath?: string): Promise<string> {
54
- const downloadDir = customPath || config.DOWNLOAD_DIR;
55
- await fs.mkdir(downloadDir, { recursive: true });
56
-
57
- const outputPath = join(downloadDir, `${videoId}.mp4`);
58
-
59
- try {
60
- await fs.access(outputPath);
61
- log.info(`Video already downloaded: ${outputPath}`);
62
- return outputPath;
63
- } catch {}
64
-
65
- log.info(`Downloading full video ${videoId} via yt-dlp...`);
66
-
67
- try {
68
- const args = [
69
- '-f',
70
- 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
71
- '--merge-output-format',
72
- 'mp4',
73
- '-o',
74
- outputPath,
75
- '--no-playlist',
76
- '--newline',
77
- `https://www.youtube.com/watch?v=${videoId}`,
78
- ];
79
-
80
- if (config.YT_DLP_COOKIES_FROM_BROWSER) {
81
- args.splice(0, 0, '--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
82
- } else if (config.YT_DLP_COOKIES_FILE) {
83
- args.splice(0, 0, '--cookies', config.YT_DLP_COOKIES_FILE);
84
- }
85
-
86
- const subprocess = execa('yt-dlp', args);
87
-
88
- subprocess.stdout?.on('data', displayProgress('stdout'));
89
- subprocess.stderr?.on('data', displayProgress('stderr'));
90
-
91
- await subprocess;
92
- process.stdout.write('\n');
93
- } catch (err) {
94
- const message = err instanceof Error ? err.message : String(err);
95
-
96
- if (message.includes('command not found') || message.includes('ENOENT')) {
97
- throw new Error('yt-dlp is required. Install it: https://github.com/yt-dlp/yt-dlp');
98
- }
99
-
100
- if (message.includes('Private video') || message.includes('Sign in')) {
101
- throw new Error(`Video "${videoId}" is private and cannot be downloaded.`);
102
- }
103
-
104
- if (message.includes('not available in your country') || message.includes('geo')) {
105
- throw new Error(`Video "${videoId}" is geo-blocked in your region.`);
106
- }
107
-
108
- throw new Error(`Download failed: ${message}`);
109
- }
110
-
111
- log.info(`Download complete: ${outputPath}`);
112
- return outputPath;
113
- }
114
-
115
- /**
116
- * Downloads a single segment using yt-dlp --download-sections.
117
- */
118
- async function downloadSegment(
119
- videoId: string,
120
- segment: RankedSegment,
121
- index: number,
122
- customPath?: string,
123
- ): Promise<string> {
124
- const downloadDir = customPath || config.DOWNLOAD_DIR;
125
- await fs.mkdir(downloadDir, { recursive: true });
126
-
127
- const adjustedStart = Math.max(0, segment.start + config.TIMESTAMP_OFFSET_SECONDS);
128
- const adjustedEnd = Math.max(adjustedStart + 1, segment.end + config.TIMESTAMP_OFFSET_SECONDS);
129
- const startInt = Math.floor(adjustedStart);
130
- const endInt = Math.ceil(adjustedEnd);
131
- const outputPath = join(downloadDir, `${videoId}_${startInt}_${endInt}.mp4`);
132
-
133
- try {
134
- await fs.access(outputPath);
135
- log.info(`Segment ${index + 1}/${index} already downloaded: ${outputPath}`);
136
- return outputPath;
137
- } catch {}
138
-
139
- const startTs = formatTimestamp(adjustedStart);
140
- const endTs = formatTimestamp(adjustedEnd);
141
-
142
- log.info(`Downloading segment ${index + 1}: ${startTs} - ${endTs} (${segment.reason})`);
143
- log.info(` Requested: ${segment.start.toFixed(2)}s - ${segment.end.toFixed(2)}s`);
144
- if (config.TIMESTAMP_OFFSET_SECONDS !== 0) {
145
- log.info(
146
- ` Adjusted: ${adjustedStart.toFixed(2)}s - ${adjustedEnd.toFixed(2)}s (offset: ${config.TIMESTAMP_OFFSET_SECONDS}s)`,
147
- );
148
- }
149
-
150
- try {
151
- const args = [
152
- '-f',
153
- 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
154
- '--merge-output-format',
155
- 'mp4',
156
- '--download-sections',
157
- `*${startTs}-${endTs}`,
158
- '-o',
159
- outputPath,
160
- '--no-playlist',
161
- '--newline',
162
- `https://www.youtube.com/watch?v=${videoId}`,
163
- ];
164
-
165
- if (config.YT_DLP_COOKIES_FROM_BROWSER) {
166
- args.splice(0, 0, '--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
167
- } else if (config.YT_DLP_COOKIES_FILE) {
168
- args.splice(0, 0, '--cookies', config.YT_DLP_COOKIES_FILE);
169
- }
170
-
171
- const subprocess = execa('yt-dlp', args);
172
-
173
- subprocess.stdout?.on('data', displayProgress('stdout'));
174
- subprocess.stderr?.on('data', displayProgress('stderr'));
175
-
176
- await subprocess;
177
- process.stdout.write('\n');
178
- } catch (err) {
179
- const message = err instanceof Error ? err.message : String(err);
180
-
181
- if (message.includes('command not found') || message.includes('ENOENT')) {
182
- throw new Error('yt-dlp is required. Install it: https://github.com/yt-dlp/yt-dlp');
183
- }
184
-
185
- if (message.includes('Private video') || message.includes('Sign in')) {
186
- throw new Error(`Video "${videoId}" is private and cannot be downloaded.`);
187
- }
188
-
189
- if (message.includes('not available in your country') || message.includes('geo')) {
190
- throw new Error(`Video "${videoId}" is geo-blocked in your region.`);
191
- }
192
-
193
- throw new Error(`Segment download failed: ${message}`);
194
- }
195
-
196
- log.info(`Segment complete: ${outputPath}`);
197
- return outputPath;
198
- }
199
-
200
- /**
201
- * Downloads multiple segments in parallel.
202
- */
203
- async function downloadSegments(
204
- videoId: string,
205
- segments: RankedSegment[],
206
- customPath?: string,
207
- ): Promise<string[]> {
208
- if (segments.length === 0) {
209
- return [];
210
- }
211
-
212
- const limit = pLimit(Math.min(config.LLM_CONCURRENCY, 3));
213
- const results: Array<PromiseSettledResult<string>> = await Promise.allSettled(
214
- segments.map((segment, index) =>
215
- limit(() => downloadSegment(videoId, segment, index, customPath)),
216
- ),
217
- );
218
-
219
- const paths: string[] = [];
220
- for (let i = 0; i < results.length; i++) {
221
- const result = results[i];
222
- const segment = segments[i];
223
- if (result.status === 'fulfilled') {
224
- paths.push(result.value);
225
- } else {
226
- const reason = result.reason instanceof Error ? result.reason.message : String(result.reason);
227
- log.warn(
228
- `Failed to download segment [${formatTimestamp(segment.start)} – ${formatTimestamp(segment.end)}] (rank ${segment.rank}): ${reason}`,
229
- );
230
- }
231
- }
232
-
233
- return paths;
234
- }
235
-
236
- /**
237
- * Downloads a YouTube video based on the specified mode.
238
- *
239
- * @param videoId - 11-character YouTube video ID
240
- * @param mode - Download mode: 'all' (full video) or 'segments' (individual clips)
241
- * @param segments - Ranked segments (required when mode is 'segments')
242
- * @param customPath - Custom output directory (optional, overrides config defaults)
243
- * @returns Download result containing the mode and either path or paths
244
- */
245
- export async function downloadVideo(
246
- videoId: string,
247
- mode: DownloadMode = 'all',
248
- segments: RankedSegment[] = [],
249
- customPath?: string,
250
- ): Promise<DownloadResult> {
251
- if (mode === 'all') {
252
- const path = await downloadFullVideo(videoId, customPath);
253
- return { mode: 'all', path };
254
- }
255
-
256
- if (mode === 'segments') {
257
- if (segments.length === 0) {
258
- log.warn('No segments provided for download-segments mode. Skipping download.');
259
- return { mode: 'segments', paths: [] };
260
- }
261
-
262
- log.info(`Downloading ${segments.length} segments in parallel...`);
263
- const paths = await downloadSegments(videoId, segments, customPath);
264
- return { mode: 'segments', paths };
265
- }
266
-
267
- throw new Error(`Invalid download mode: ${mode}`);
268
- }
@@ -1,23 +0,0 @@
1
- import type { TranscriptLine, MicroBlock, LLMChunk, ChunkEvaluation } from './index.js';
2
-
3
- export interface LLMAnalyzerResult {
4
- lines: TranscriptLine[];
5
- microBlocks: MicroBlock[];
6
- chunks: LLMChunk[];
7
- chunkEvals: ChunkEvaluation[];
8
- }
9
-
10
- export interface LLMAnalyzerOpts {
11
- videoId: string;
12
- audioPath: string | null;
13
- audioEvents: import('./audio.js').AudioEvent[];
14
- maxChunks?: number;
15
- maxParallel: number;
16
- noCache: boolean;
17
- }
18
-
19
- export interface TranscriptDetectorResult {
20
- lines: TranscriptLine[];
21
- microBlocks: MicroBlock[];
22
- chunks: LLMChunk[];
23
- }
@@ -1,19 +0,0 @@
1
- import { z } from 'zod';
2
-
3
- export const AudioEventSchema = z.object({
4
- time: z.number(),
5
- event: z.string(),
6
- confidence: z.number().min(0).max(1),
7
- source: z.enum(['gemini', 'yamnet', 'whisper']),
8
- });
9
- export type AudioEvent = z.infer<typeof AudioEventSchema>;
10
-
11
- export const MergedCandidateSchema = z.object({
12
- start: z.number(),
13
- end: z.number(),
14
- score: z.number().min(1).max(10),
15
- source: z.enum(['transcript', 'audio', 'both']),
16
- reason: z.string(),
17
- audio_event: z.string().optional(),
18
- });
19
- export type MergedCandidate = z.infer<typeof MergedCandidateSchema>;
@@ -1,8 +0,0 @@
1
- import { z } from 'zod';
2
-
3
- export const SegmentRefinementSchema = z.object({
4
- refined_start: z.number(),
5
- refined_end: z.number(),
6
- });
7
-
8
- export type SegmentRefinement = z.infer<typeof SegmentRefinementSchema>;
package/src/types/cli.ts DELETED
@@ -1,22 +0,0 @@
1
- /**
2
- * Parsed CLI argument shape.
3
- * Defined here so both `src/cli.ts` (which creates it) and
4
- * `src/pipeline/runner.ts` (which consumes it) share a single source of truth.
5
- */
6
- export interface CliArgs {
7
- url: string | undefined;
8
- clip: boolean;
9
- downloadSections: 'all' | number | undefined;
10
- localVideo?: string;
11
- videoPath: string | undefined;
12
- threshold: number | undefined;
13
- topN: number | undefined;
14
- maxDuration: number | undefined;
15
- maxChunks: number | undefined;
16
- maxParallel: number | undefined;
17
- outputJson: string | undefined;
18
- noCache: boolean;
19
- noAudio: boolean;
20
- gameProfile?: string;
21
- help: boolean;
22
- }