@thunderkiller/video-clipper 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +130 -0
- package/.github/workflows/ci.yml +42 -0
- package/.github/workflows/release.yml +72 -0
- package/.husky/pre-commit +3 -0
- package/.prettierignore +6 -0
- package/.prettierrc +7 -0
- package/.releaserc.json +21 -0
- package/AGENTS.md +122 -0
- package/CHANGELOG.md +45 -0
- package/README.md +410 -0
- package/dist/cli.js +187 -0
- package/dist/config/env.js +14 -0
- package/dist/config/index.js +1 -0
- package/dist/index.js +35 -0
- package/dist/pipeline/runner.js +132 -0
- package/dist/pipeline/stages/audioProcessor.js +75 -0
- package/dist/pipeline/stages/clipExporter.js +44 -0
- package/dist/pipeline/stages/segmentAnalyzer.js +46 -0
- package/dist/pipeline/stages/segmentSelector.js +23 -0
- package/dist/pipeline/stages/videoResolver.js +34 -0
- package/dist/services/audioAnalyzers/base.js +13 -0
- package/dist/services/audioAnalyzers/factory.js +56 -0
- package/dist/services/audioAnalyzers/gemini.js +109 -0
- package/dist/services/audioAnalyzers/index.js +5 -0
- package/dist/services/audioAnalyzers/whisper.js +62 -0
- package/dist/services/audioAnalyzers/yamnet.js +40 -0
- package/dist/services/audioDownloader/index.js +81 -0
- package/dist/services/chunkBuilder/index.js +71 -0
- package/dist/services/clipGenerator/index.js +156 -0
- package/dist/services/clipRefiner/index.js +103 -0
- package/dist/services/eventDetector/index.js +54 -0
- package/dist/services/llmAnalyzer/LLMAnalyzer.js +63 -0
- package/dist/services/llmAnalyzer/index.js +173 -0
- package/dist/services/metadataExtractor/index.js +66 -0
- package/dist/services/segmentRanker/index.js +40 -0
- package/dist/services/signalMerger/index.js +36 -0
- package/dist/services/transcriptAnalyzers/base.js +13 -0
- package/dist/services/transcriptAnalyzers/factory.js +51 -0
- package/dist/services/transcriptAnalyzers/gemini.js +19 -0
- package/dist/services/transcriptAnalyzers/index.js +5 -0
- package/dist/services/transcriptAnalyzers/whisper.js +55 -0
- package/dist/services/transcriptAnalyzers/ytdlp.js +16 -0
- package/dist/services/transcriptDetector/index.js +102 -0
- package/dist/services/transcriptFetcher/index.js +124 -0
- package/dist/services/urlParser/index.js +46 -0
- package/dist/services/videoDownloader/index.js +212 -0
- package/dist/types/audio.js +15 -0
- package/dist/types/cli.js +1 -0
- package/dist/types/config.js +150 -0
- package/dist/types/index.js +5 -0
- package/dist/types/pipeline.js +9 -0
- package/dist/types/segment.js +36 -0
- package/dist/types/transcript.js +16 -0
- package/dist/types/video.js +14 -0
- package/dist/utils/cache.js +143 -0
- package/dist/utils/chunker.js +51 -0
- package/dist/utils/dumper.js +36 -0
- package/dist/utils/format.js +10 -0
- package/dist/utils/logger.js +16 -0
- package/dist/utils/modelFactory.js +60 -0
- package/dist/utils/redactConfig.js +20 -0
- package/dist/utils/sliceAudio.js +26 -0
- package/docs/free-models.md +78 -0
- package/docs/plan.md +442 -0
- package/docs/refactorPhases.md +105 -0
- package/docs/yt-downloader.md +440 -0
- package/package.json +65 -0
- package/requirements.txt +5 -0
- package/scripts/detect_events.py +81 -0
- package/scripts/detect_events_whisper.py +101 -0
- package/scripts/transcribe_whisper.py +70 -0
- package/src/cli.ts +186 -0
- package/src/config/env.ts +18 -0
- package/src/config/index.ts +2 -0
- package/src/index.ts +46 -0
- package/src/pipeline/runner.ts +155 -0
- package/src/pipeline/stages/audioProcessor.ts +129 -0
- package/src/pipeline/stages/clipExporter.ts +80 -0
- package/src/pipeline/stages/segmentAnalyzer.ts +72 -0
- package/src/pipeline/stages/segmentSelector.ts +39 -0
- package/src/pipeline/stages/videoResolver.ts +47 -0
- package/src/services/audioAnalyzers/base.ts +32 -0
- package/src/services/audioAnalyzers/factory.ts +71 -0
- package/src/services/audioAnalyzers/gemini.ts +137 -0
- package/src/services/audioAnalyzers/index.ts +6 -0
- package/src/services/audioAnalyzers/whisper.ts +80 -0
- package/src/services/audioAnalyzers/yamnet.ts +54 -0
- package/src/services/audioDownloader/index.ts +102 -0
- package/src/services/chunkBuilder/index.ts +86 -0
- package/src/services/clipGenerator/index.ts +210 -0
- package/src/services/clipRefiner/index.ts +141 -0
- package/src/services/eventDetector/index.ts +68 -0
- package/src/services/llmAnalyzer/LLMAnalyzer.ts +114 -0
- package/src/services/llmAnalyzer/index.ts +231 -0
- package/src/services/metadataExtractor/index.ts +83 -0
- package/src/services/segmentRanker/index.ts +88 -0
- package/src/services/signalMerger/index.ts +53 -0
- package/src/services/transcriptAnalyzers/base.ts +26 -0
- package/src/services/transcriptAnalyzers/factory.ts +67 -0
- package/src/services/transcriptAnalyzers/gemini.ts +24 -0
- package/src/services/transcriptAnalyzers/index.ts +6 -0
- package/src/services/transcriptAnalyzers/whisper.ts +68 -0
- package/src/services/transcriptAnalyzers/ytdlp.ts +19 -0
- package/src/services/transcriptDetector/index.ts +128 -0
- package/src/services/transcriptFetcher/index.ts +151 -0
- package/src/services/urlParser/index.ts +53 -0
- package/src/services/videoDownloader/index.ts +282 -0
- package/src/types/audio.ts +19 -0
- package/src/types/cli.ts +22 -0
- package/src/types/config.ts +174 -0
- package/src/types/index.ts +26 -0
- package/src/types/pipeline.ts +93 -0
- package/src/types/segment.ts +43 -0
- package/src/types/transcript.ts +22 -0
- package/src/types/video.ts +18 -0
- package/src/utils/cache.ts +223 -0
- package/src/utils/chunker.ts +60 -0
- package/src/utils/dumper.ts +41 -0
- package/src/utils/format.ts +10 -0
- package/src/utils/logger.ts +17 -0
- package/src/utils/modelFactory.ts +71 -0
- package/src/utils/redactConfig.ts +23 -0
- package/src/utils/sliceAudio.ts +35 -0
- package/test-trigger.txt +1 -0
- package/tests/analyzerFactory.test.ts +146 -0
- package/tests/audioEventDetector.test.ts +69 -0
- package/tests/cache.test.ts +203 -0
- package/tests/chunkBuilder.test.ts +146 -0
- package/tests/chunker.test.ts +95 -0
- package/tests/eventDetector.test.ts +103 -0
- package/tests/llmAnalyzer.test.ts +283 -0
- package/tests/segmentRanker.test.ts +133 -0
- package/tests/setup.ts +48 -0
- package/tests/signalMerger.test.ts +197 -0
- package/tests/transcriptDetector.test.ts +150 -0
- package/tests/transcriptFetcher.test.ts +179 -0
- package/tests/urlParser.test.ts +70 -0
- package/tsconfig.json +16 -0
- package/tsconfig.test.json +8 -0
- package/vitest.config.ts +8 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { execa } from 'execa';
|
|
2
|
+
import fs from 'node:fs/promises';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
import { log } from '../../utils/logger.js';
|
|
6
|
+
import { config } from '../../config/index.js';
|
|
7
|
+
/**
|
|
8
|
+
* Parses a WebVTT string into TranscriptLine[].
|
|
9
|
+
*
|
|
10
|
+
* Handles:
|
|
11
|
+
* - `HH:MM:SS.mmm --> HH:MM:SS.mmm` timestamp lines
|
|
12
|
+
* - `<MM:SS.mmm><c>text</c>` inline cue tags (stripped)
|
|
13
|
+
* - Duplicate / empty cues (skipped)
|
|
14
|
+
*
|
|
15
|
+
* Exported for unit testing.
|
|
16
|
+
*/
|
|
17
|
+
export function parseVtt(vttContent) {
|
|
18
|
+
const lines = vttContent.split(/\r?\n/);
|
|
19
|
+
const result = [];
|
|
20
|
+
// Regex: HH:MM:SS.mmm --> HH:MM:SS.mmm (optional positioning metadata after)
|
|
21
|
+
const TIMESTAMP_RE = /^(\d{2}):(\d{2}):(\d{2})[.,](\d{3})\s+-->\s+(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/;
|
|
22
|
+
let i = 0;
|
|
23
|
+
while (i < lines.length) {
|
|
24
|
+
const line = lines[i].trim();
|
|
25
|
+
const match = TIMESTAMP_RE.exec(line);
|
|
26
|
+
if (match) {
|
|
27
|
+
const startSec = parseInt(match[1], 10) * 3600 +
|
|
28
|
+
parseInt(match[2], 10) * 60 +
|
|
29
|
+
parseInt(match[3], 10) +
|
|
30
|
+
parseInt(match[4], 10) / 1000;
|
|
31
|
+
const endSec = parseInt(match[5], 10) * 3600 +
|
|
32
|
+
parseInt(match[6], 10) * 60 +
|
|
33
|
+
parseInt(match[7], 10) +
|
|
34
|
+
parseInt(match[8], 10) / 1000;
|
|
35
|
+
// Collect cue text lines until blank line or EOF
|
|
36
|
+
i++;
|
|
37
|
+
const textLines = [];
|
|
38
|
+
while (i < lines.length && lines[i].trim() !== '') {
|
|
39
|
+
textLines.push(lines[i].trim());
|
|
40
|
+
i++;
|
|
41
|
+
}
|
|
42
|
+
const rawText = textLines.join(' ');
|
|
43
|
+
// Strip VTT inline tags: <00:00:00.000>, <c>, </c>, <b>, </b>, <i>, </i>, etc.
|
|
44
|
+
const text = rawText
|
|
45
|
+
.replace(/<[^>]+>/g, '')
|
|
46
|
+
.replace(/&/g, '&')
|
|
47
|
+
.replace(/</g, '<')
|
|
48
|
+
.replace(/>/g, '>')
|
|
49
|
+
.replace(/ /g, ' ')
|
|
50
|
+
.replace(/\s+/g, ' ')
|
|
51
|
+
.trim();
|
|
52
|
+
if (text.length === 0) {
|
|
53
|
+
continue;
|
|
54
|
+
}
|
|
55
|
+
const duration = Math.max(0, endSec - startSec);
|
|
56
|
+
// Deduplicate: skip if this cue text is identical to the previous one
|
|
57
|
+
// (YouTube VTT often repeats the same line as text scrolls)
|
|
58
|
+
if (result.length > 0 && result[result.length - 1].text === text) {
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
result.push({ text, start: startSec, duration });
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
i++;
|
|
65
|
+
}
|
|
66
|
+
return result;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Fetches the transcript for a given YouTube video ID using yt-dlp
|
|
70
|
+
* auto-generated subtitles (VTT format).
|
|
71
|
+
*
|
|
72
|
+
* The VTT file is written to a temp directory, parsed into TranscriptLine[],
|
|
73
|
+
* then cleaned up. Cookie config (YT_DLP_COOKIES_FROM_BROWSER /
|
|
74
|
+
* YT_DLP_COOKIES_FILE) is forwarded to yt-dlp automatically.
|
|
75
|
+
*
|
|
76
|
+
* @throws {Error} with the yt-dlp stderr if the command fails
|
|
77
|
+
* @throws {Error} if no subtitle file is produced
|
|
78
|
+
* @throws {Error} if the subtitle file contains no parseable cues
|
|
79
|
+
*/
|
|
80
|
+
export async function fetchTranscript(videoId) {
|
|
81
|
+
const tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'vc-vtt-'));
|
|
82
|
+
try {
|
|
83
|
+
const args = [
|
|
84
|
+
'--write-auto-sub',
|
|
85
|
+
'--sub-format',
|
|
86
|
+
'vtt',
|
|
87
|
+
'--sub-lang',
|
|
88
|
+
'en.*',
|
|
89
|
+
'--skip-download',
|
|
90
|
+
'--output',
|
|
91
|
+
path.join(tmpDir, '%(id)s.%(ext)s'),
|
|
92
|
+
`https://www.youtube.com/watch?v=${videoId}`,
|
|
93
|
+
];
|
|
94
|
+
if (config.YT_DLP_COOKIES_FROM_BROWSER) {
|
|
95
|
+
args.unshift('--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
|
|
96
|
+
}
|
|
97
|
+
else if (config.YT_DLP_COOKIES_FILE) {
|
|
98
|
+
args.unshift('--cookies', config.YT_DLP_COOKIES_FILE);
|
|
99
|
+
}
|
|
100
|
+
try {
|
|
101
|
+
await execa('yt-dlp', args);
|
|
102
|
+
}
|
|
103
|
+
catch (err) {
|
|
104
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
105
|
+
throw new Error(`yt-dlp failed to fetch subtitles for "${videoId}": ${message}`);
|
|
106
|
+
}
|
|
107
|
+
// Find the downloaded .vtt file (yt-dlp names it <id>.<lang>.vtt)
|
|
108
|
+
const files = await fs.readdir(tmpDir);
|
|
109
|
+
const vttFile = files.find((f) => f.endsWith('.vtt'));
|
|
110
|
+
if (!vttFile) {
|
|
111
|
+
throw new Error(`No subtitles found for "${videoId}". The video may not have auto-generated captions.`);
|
|
112
|
+
}
|
|
113
|
+
const content = await fs.readFile(path.join(tmpDir, vttFile), 'utf8');
|
|
114
|
+
const lines = parseVtt(content);
|
|
115
|
+
log.info(`Parsed ${lines.length} cues from subtitle file "${vttFile}".`);
|
|
116
|
+
if (lines.length === 0) {
|
|
117
|
+
throw new Error(`Subtitle file for "${videoId}" was empty or contained no parseable cues.`);
|
|
118
|
+
}
|
|
119
|
+
return lines;
|
|
120
|
+
}
|
|
121
|
+
finally {
|
|
122
|
+
await fs.rm(tmpDir, { recursive: true, force: true });
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
const VIDEO_ID_LENGTH = 11;
|
|
2
|
+
/**
|
|
3
|
+
* Parses a YouTube URL and returns the 11-character video ID.
|
|
4
|
+
* Supports:
|
|
5
|
+
* - https://www.youtube.com/watch?v=VIDEO_ID
|
|
6
|
+
* - https://youtu.be/VIDEO_ID
|
|
7
|
+
* - https://www.youtube.com/embed/VIDEO_ID
|
|
8
|
+
* - https://www.youtube.com/shorts/VIDEO_ID
|
|
9
|
+
*
|
|
10
|
+
* @throws {Error} if the URL is not a valid YouTube URL or the video ID is not 11 characters
|
|
11
|
+
*/
|
|
12
|
+
export function parseUrl(url) {
|
|
13
|
+
let parsed;
|
|
14
|
+
try {
|
|
15
|
+
parsed = new URL(url);
|
|
16
|
+
}
|
|
17
|
+
catch {
|
|
18
|
+
throw new Error(`Invalid URL: "${url}"`);
|
|
19
|
+
}
|
|
20
|
+
const { hostname, pathname, searchParams } = parsed;
|
|
21
|
+
const host = hostname.replace(/^www\./, '');
|
|
22
|
+
let videoId = null;
|
|
23
|
+
if (host === 'youtube.com') {
|
|
24
|
+
if (pathname === '/watch') {
|
|
25
|
+
videoId = searchParams.get('v');
|
|
26
|
+
}
|
|
27
|
+
else if (pathname.startsWith('/embed/')) {
|
|
28
|
+
videoId = pathname.split('/embed/')[1]?.split('/')[0] ?? null;
|
|
29
|
+
}
|
|
30
|
+
else if (pathname.startsWith('/shorts/')) {
|
|
31
|
+
videoId = pathname.split('/shorts/')[1]?.split('/')[0] ?? null;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
else if (host === 'youtu.be') {
|
|
35
|
+
videoId = pathname.slice(1).split('/')[0] ?? null;
|
|
36
|
+
}
|
|
37
|
+
if (!videoId) {
|
|
38
|
+
throw new Error(`Could not extract video ID from URL: "${url}"`);
|
|
39
|
+
}
|
|
40
|
+
// Strip any extra query params that may have been part of the path segment
|
|
41
|
+
videoId = videoId.split('?')[0];
|
|
42
|
+
if (videoId.length !== VIDEO_ID_LENGTH) {
|
|
43
|
+
throw new Error(`Invalid video ID "${videoId}": expected ${VIDEO_ID_LENGTH} characters, got ${videoId.length}`);
|
|
44
|
+
}
|
|
45
|
+
return videoId;
|
|
46
|
+
}
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import { execa } from 'execa';
|
|
2
|
+
import { promises as fs } from 'fs';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import pLimit from 'p-limit';
|
|
5
|
+
import { config } from '../../config/index.js';
|
|
6
|
+
import { log } from '../../utils/logger.js';
|
|
7
|
+
/**
|
|
8
|
+
* Formats a timestamp for yt-dlp --download-sections.
|
|
9
|
+
* Converts seconds to HH:MM:SS.mmm format with millisecond precision.
|
|
10
|
+
*/
|
|
11
|
+
function formatTimestamp(seconds) {
|
|
12
|
+
const h = Math.floor(seconds / 3600);
|
|
13
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
14
|
+
const s = seconds % 60;
|
|
15
|
+
const sInt = Math.floor(s);
|
|
16
|
+
const ms = Math.round((s - sInt) * 1000);
|
|
17
|
+
return `${String(h).padStart(2, '0')}:${String(m).padStart(2, '0')}:${String(sInt).padStart(2, '0')}.${String(ms).padStart(3, '0')}`;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Displays progress from yt-dlp stdout/stderr.
|
|
21
|
+
*/
|
|
22
|
+
function displayProgress(stream) {
|
|
23
|
+
return (data) => {
|
|
24
|
+
const text = String(data);
|
|
25
|
+
const lines = text.split('\n').filter((line) => line.trim());
|
|
26
|
+
for (const line of lines) {
|
|
27
|
+
const progressMatch = line.match(/\[download\]\s+(\d+\.?\d*%)/);
|
|
28
|
+
if (progressMatch) {
|
|
29
|
+
process.stdout.write(`\r${progressMatch[0]}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Downloads a YouTube video using yt-dlp and returns the local file path.
|
|
36
|
+
*
|
|
37
|
+
* Strategy:
|
|
38
|
+
* - Skips download if the target file already exists.
|
|
39
|
+
* - Auto-creates the download directory if it doesn't exist.
|
|
40
|
+
* - Surfaces clear errors for common failure modes (yt-dlp not installed,
|
|
41
|
+
* private/geo-blocked video, etc.).
|
|
42
|
+
*
|
|
43
|
+
* @param videoId - 11-character YouTube video ID
|
|
44
|
+
* @param customPath - Custom output directory (optional, overrides DOWNLOAD_DIR)
|
|
45
|
+
* @returns Absolute path to the downloaded mp4 file
|
|
46
|
+
* @throws {Error} if yt-dlp is not installed or the download fails
|
|
47
|
+
*/
|
|
48
|
+
export async function downloadFullVideo(videoId, customPath) {
|
|
49
|
+
const downloadDir = customPath || config.DOWNLOAD_DIR;
|
|
50
|
+
await fs.mkdir(downloadDir, { recursive: true });
|
|
51
|
+
const outputPath = join(downloadDir, `${videoId}.mp4`);
|
|
52
|
+
try {
|
|
53
|
+
await fs.access(outputPath);
|
|
54
|
+
log.info(`Video already downloaded: ${outputPath}`);
|
|
55
|
+
return outputPath;
|
|
56
|
+
}
|
|
57
|
+
catch { }
|
|
58
|
+
log.info(`Downloading full video ${videoId} via yt-dlp...`);
|
|
59
|
+
try {
|
|
60
|
+
const args = [
|
|
61
|
+
'-f',
|
|
62
|
+
'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
|
|
63
|
+
'--merge-output-format',
|
|
64
|
+
'mp4',
|
|
65
|
+
'-o',
|
|
66
|
+
outputPath,
|
|
67
|
+
'--no-playlist',
|
|
68
|
+
'--newline',
|
|
69
|
+
`https://www.youtube.com/watch?v=${videoId}`,
|
|
70
|
+
];
|
|
71
|
+
if (config.YT_DLP_COOKIES_FROM_BROWSER) {
|
|
72
|
+
args.splice(0, 0, '--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
|
|
73
|
+
}
|
|
74
|
+
else if (config.YT_DLP_COOKIES_FILE) {
|
|
75
|
+
args.splice(0, 0, '--cookies', config.YT_DLP_COOKIES_FILE);
|
|
76
|
+
}
|
|
77
|
+
const subprocess = execa('yt-dlp', args);
|
|
78
|
+
subprocess.stdout?.on('data', displayProgress('stdout'));
|
|
79
|
+
subprocess.stderr?.on('data', displayProgress('stderr'));
|
|
80
|
+
await subprocess;
|
|
81
|
+
process.stdout.write('\n');
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
85
|
+
if (message.includes('command not found') || message.includes('ENOENT')) {
|
|
86
|
+
throw new Error('yt-dlp is required. Install it: https://github.com/yt-dlp/yt-dlp');
|
|
87
|
+
}
|
|
88
|
+
if (message.includes('Private video') || message.includes('Sign in')) {
|
|
89
|
+
throw new Error(`Video "${videoId}" is private and cannot be downloaded.`);
|
|
90
|
+
}
|
|
91
|
+
if (message.includes('not available in your country') || message.includes('geo')) {
|
|
92
|
+
throw new Error(`Video "${videoId}" is geo-blocked in your region.`);
|
|
93
|
+
}
|
|
94
|
+
throw new Error(`Download failed: ${message}`);
|
|
95
|
+
}
|
|
96
|
+
log.info(`Download complete: ${outputPath}`);
|
|
97
|
+
return outputPath;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Downloads a single segment using yt-dlp --download-sections.
|
|
101
|
+
*/
|
|
102
|
+
async function downloadSegment(videoId, segment, index, customPath) {
|
|
103
|
+
const downloadDir = customPath || config.DOWNLOAD_DIR;
|
|
104
|
+
await fs.mkdir(downloadDir, { recursive: true });
|
|
105
|
+
const adjustedStart = Math.max(0, segment.start + config.TIMESTAMP_OFFSET_SECONDS);
|
|
106
|
+
const adjustedEnd = Math.max(adjustedStart + 1, segment.end + config.TIMESTAMP_OFFSET_SECONDS);
|
|
107
|
+
const startInt = Math.floor(adjustedStart);
|
|
108
|
+
const endInt = Math.ceil(adjustedEnd);
|
|
109
|
+
const outputPath = join(downloadDir, `${videoId}_${startInt}_${endInt}.mp4`);
|
|
110
|
+
try {
|
|
111
|
+
await fs.access(outputPath);
|
|
112
|
+
log.info(`Segment ${index + 1}/${index} already downloaded: ${outputPath}`);
|
|
113
|
+
return outputPath;
|
|
114
|
+
}
|
|
115
|
+
catch { }
|
|
116
|
+
const startTs = formatTimestamp(adjustedStart);
|
|
117
|
+
const endTs = formatTimestamp(adjustedEnd);
|
|
118
|
+
log.info(`Downloading segment ${index + 1}: ${startTs} - ${endTs} (${segment.reason})`);
|
|
119
|
+
log.info(` Requested: ${segment.start.toFixed(2)}s - ${segment.end.toFixed(2)}s`);
|
|
120
|
+
if (config.TIMESTAMP_OFFSET_SECONDS !== 0) {
|
|
121
|
+
log.info(` Adjusted: ${adjustedStart.toFixed(2)}s - ${adjustedEnd.toFixed(2)}s (offset: ${config.TIMESTAMP_OFFSET_SECONDS}s)`);
|
|
122
|
+
}
|
|
123
|
+
try {
|
|
124
|
+
const args = [
|
|
125
|
+
'-f',
|
|
126
|
+
'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
|
|
127
|
+
'--merge-output-format',
|
|
128
|
+
'mp4',
|
|
129
|
+
'--download-sections',
|
|
130
|
+
`*${startTs}-${endTs}`,
|
|
131
|
+
'-o',
|
|
132
|
+
outputPath,
|
|
133
|
+
'--no-playlist',
|
|
134
|
+
'--newline',
|
|
135
|
+
`https://www.youtube.com/watch?v=${videoId}`,
|
|
136
|
+
];
|
|
137
|
+
if (config.YT_DLP_COOKIES_FROM_BROWSER) {
|
|
138
|
+
args.splice(0, 0, '--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
|
|
139
|
+
}
|
|
140
|
+
else if (config.YT_DLP_COOKIES_FILE) {
|
|
141
|
+
args.splice(0, 0, '--cookies', config.YT_DLP_COOKIES_FILE);
|
|
142
|
+
}
|
|
143
|
+
const subprocess = execa('yt-dlp', args);
|
|
144
|
+
subprocess.stdout?.on('data', displayProgress('stdout'));
|
|
145
|
+
subprocess.stderr?.on('data', displayProgress('stderr'));
|
|
146
|
+
await subprocess;
|
|
147
|
+
process.stdout.write('\n');
|
|
148
|
+
}
|
|
149
|
+
catch (err) {
|
|
150
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
151
|
+
if (message.includes('command not found') || message.includes('ENOENT')) {
|
|
152
|
+
throw new Error('yt-dlp is required. Install it: https://github.com/yt-dlp/yt-dlp');
|
|
153
|
+
}
|
|
154
|
+
if (message.includes('Private video') || message.includes('Sign in')) {
|
|
155
|
+
throw new Error(`Video "${videoId}" is private and cannot be downloaded.`);
|
|
156
|
+
}
|
|
157
|
+
if (message.includes('not available in your country') || message.includes('geo')) {
|
|
158
|
+
throw new Error(`Video "${videoId}" is geo-blocked in your region.`);
|
|
159
|
+
}
|
|
160
|
+
throw new Error(`Segment download failed: ${message}`);
|
|
161
|
+
}
|
|
162
|
+
log.info(`Segment complete: ${outputPath}`);
|
|
163
|
+
return outputPath;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Downloads multiple segments in parallel.
|
|
167
|
+
*/
|
|
168
|
+
async function downloadSegments(videoId, segments, customPath) {
|
|
169
|
+
if (segments.length === 0) {
|
|
170
|
+
return [];
|
|
171
|
+
}
|
|
172
|
+
const limit = pLimit(Math.min(config.LLM_CONCURRENCY, 3));
|
|
173
|
+
const results = await Promise.allSettled(segments.map((segment, index) => limit(() => downloadSegment(videoId, segment, index, customPath))));
|
|
174
|
+
const paths = [];
|
|
175
|
+
for (let i = 0; i < results.length; i++) {
|
|
176
|
+
const result = results[i];
|
|
177
|
+
const segment = segments[i];
|
|
178
|
+
if (result.status === 'fulfilled') {
|
|
179
|
+
paths.push(result.value);
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
const reason = result.reason instanceof Error ? result.reason.message : String(result.reason);
|
|
183
|
+
log.warn(`Failed to download segment [${formatTimestamp(segment.start)} – ${formatTimestamp(segment.end)}] (rank ${segment.rank}): ${reason}`);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return paths;
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Downloads a YouTube video based on the specified mode.
|
|
190
|
+
*
|
|
191
|
+
* @param videoId - 11-character YouTube video ID
|
|
192
|
+
* @param mode - Download mode: 'all' (full video) or 'segments' (individual clips)
|
|
193
|
+
* @param segments - Ranked segments (required when mode is 'segments')
|
|
194
|
+
* @param customPath - Custom output directory (optional, overrides config defaults)
|
|
195
|
+
* @returns Download result containing the mode and either path or paths
|
|
196
|
+
*/
|
|
197
|
+
export async function downloadVideo(videoId, mode = 'all', segments = [], customPath) {
|
|
198
|
+
if (mode === 'all') {
|
|
199
|
+
const path = await downloadFullVideo(videoId, customPath);
|
|
200
|
+
return { mode: 'all', path };
|
|
201
|
+
}
|
|
202
|
+
if (mode === 'segments') {
|
|
203
|
+
if (segments.length === 0) {
|
|
204
|
+
log.warn('No segments provided for download-segments mode. Skipping download.');
|
|
205
|
+
return { mode: 'segments', paths: [] };
|
|
206
|
+
}
|
|
207
|
+
log.info(`Downloading ${segments.length} segments in parallel...`);
|
|
208
|
+
const paths = await downloadSegments(videoId, segments, customPath);
|
|
209
|
+
return { mode: 'segments', paths };
|
|
210
|
+
}
|
|
211
|
+
throw new Error(`Invalid download mode: ${mode}`);
|
|
212
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export const AudioEventSchema = z.object({
|
|
3
|
+
time: z.number(),
|
|
4
|
+
event: z.string(),
|
|
5
|
+
confidence: z.number().min(0).max(1),
|
|
6
|
+
source: z.enum(['gemini', 'yamnet', 'whisper']),
|
|
7
|
+
});
|
|
8
|
+
export const MergedCandidateSchema = z.object({
|
|
9
|
+
start: z.number(),
|
|
10
|
+
end: z.number(),
|
|
11
|
+
score: z.number().min(1).max(10),
|
|
12
|
+
source: z.enum(['transcript', 'audio', 'both']),
|
|
13
|
+
reason: z.string(),
|
|
14
|
+
audio_event: z.string().optional(),
|
|
15
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
const LLM_PROVIDERS = [
|
|
3
|
+
'openai',
|
|
4
|
+
'anthropic',
|
|
5
|
+
'google',
|
|
6
|
+
'xai',
|
|
7
|
+
'mistral',
|
|
8
|
+
'groq',
|
|
9
|
+
'zai',
|
|
10
|
+
'openrouter',
|
|
11
|
+
'custom',
|
|
12
|
+
];
|
|
13
|
+
/** Map each provider to the env var name that holds its API key. */
|
|
14
|
+
const PROVIDER_KEY_MAP = {
|
|
15
|
+
openai: 'OPENAI_API_KEY',
|
|
16
|
+
anthropic: 'ANTHROPIC_API_KEY',
|
|
17
|
+
google: 'GOOGLE_GENERATIVE_AI_API_KEY',
|
|
18
|
+
xai: 'XAI_API_KEY',
|
|
19
|
+
mistral: 'MISTRAL_API_KEY',
|
|
20
|
+
groq: 'GROQ_API_KEY',
|
|
21
|
+
zai: 'ZAI_API_KEY',
|
|
22
|
+
openrouter: 'OPENROUTER_API_KEY',
|
|
23
|
+
custom: 'CUSTOM_OPENAI_API_KEY',
|
|
24
|
+
};
|
|
25
|
+
export const ConfigSchema = z
|
|
26
|
+
.object({
|
|
27
|
+
// --- Provider selection ---
|
|
28
|
+
LLM_PROVIDER: z.enum(LLM_PROVIDERS).default('openai'),
|
|
29
|
+
// --- Per-provider API keys (all optional at schema level; enforced via superRefine) ---
|
|
30
|
+
OPENAI_API_KEY: z.string().optional(),
|
|
31
|
+
ANTHROPIC_API_KEY: z.string().optional(),
|
|
32
|
+
GOOGLE_GENERATIVE_AI_API_KEY: z.string().optional(),
|
|
33
|
+
XAI_API_KEY: z.string().optional(),
|
|
34
|
+
MISTRAL_API_KEY: z.string().optional(),
|
|
35
|
+
GROQ_API_KEY: z.string().optional(),
|
|
36
|
+
ZAI_API_KEY: z.string().optional(),
|
|
37
|
+
OPENROUTER_API_KEY: z.string().optional(),
|
|
38
|
+
CUSTOM_OPENAI_API_KEY: z.string().optional(),
|
|
39
|
+
CUSTOM_OPENAI_BASE_URL: z.string().url().optional(),
|
|
40
|
+
// --- Tunable parameters ---
|
|
41
|
+
SCORE_THRESHOLD: z.coerce.number().min(1).max(10).default(7),
|
|
42
|
+
TOP_N_SEGMENTS: z.coerce.number().min(1).default(10),
|
|
43
|
+
CHUNK_LENGTH_SEC: z.coerce.number().min(10).default(120),
|
|
44
|
+
CHUNK_OVERLAP_SEC: z.coerce.number().min(0).default(20),
|
|
45
|
+
MICRO_BLOCK_SEC: z.coerce.number().min(5).default(15),
|
|
46
|
+
LLM_MODEL: z.string().default('gpt-4o'),
|
|
47
|
+
LLM_MAX_RETRIES: z.coerce.number().min(0).default(3),
|
|
48
|
+
DOWNLOAD_DIR: z.string().default('downloads/'),
|
|
49
|
+
OUTPUT_DIR: z.string().default('outputs/'),
|
|
50
|
+
CACHE_DIR: z.string().default('outputs/cache'),
|
|
51
|
+
// --- Output dumping ---
|
|
52
|
+
DUMP_OUTPUTS: z.coerce.boolean().default(true),
|
|
53
|
+
// --- LLM evaluation limits ---
|
|
54
|
+
MAX_CHUNKS: z.coerce.number().min(1).optional(),
|
|
55
|
+
LLM_CONCURRENCY: z.coerce.number().min(1).default(3),
|
|
56
|
+
CLIP_CONCURRENCY: z.coerce.number().min(1).default(1),
|
|
57
|
+
// --- Custom system prompt (overrides the default if set) ---
|
|
58
|
+
LLM_SYSTEM_PROMPT: z.string().optional(),
|
|
59
|
+
// --- Gemini model used for audio event detection ---
|
|
60
|
+
AUDIO_GEMINI_MODEL: z.string().default('gemini-2.5-flash'),
|
|
61
|
+
// --- Extra instructions appended to the Gemini audio detection prompt ---
|
|
62
|
+
AUDIO_EXTRA_INSTRUCTIONS: z.string().optional(),
|
|
63
|
+
// --- Download mode for yt-dlp ---
|
|
64
|
+
DOWNLOAD_SECTIONS_MODE: z.union([z.literal('all'), z.number().int().positive()]).default('all'),
|
|
65
|
+
// --- FFmpeg paths (optional, for custom ffmpeg/ffprobe locations) ---
|
|
66
|
+
FFMPEG_PATH: z.string().optional(),
|
|
67
|
+
FFPROBE_PATH: z.string().optional(),
|
|
68
|
+
// --- FFmpeg encoding preset for clip generation ---
|
|
69
|
+
FFMPEG_PRESET: z
|
|
70
|
+
.enum(['ultrafast', 'superfast', 'veryfast', 'fast', 'medium', 'slow', 'slower'])
|
|
71
|
+
.default('fast'),
|
|
72
|
+
// --- Timestamp offset for clips (adjusts if transcript is misaligned with video) ---
|
|
73
|
+
TIMESTAMP_OFFSET_SECONDS: z.coerce.number().default(0),
|
|
74
|
+
// --- Transcript provider ---
|
|
75
|
+
// Comma-separated ordered fallback chain: "ytdlp" | "whisper" | "ytdlp,whisper" etc.
|
|
76
|
+
// First provider that succeeds wins; subsequent providers are tried only on failure.
|
|
77
|
+
TRANSCRIPT_PROVIDER: z
|
|
78
|
+
.string()
|
|
79
|
+
.default('ytdlp')
|
|
80
|
+
.refine((v) => {
|
|
81
|
+
const parts = v
|
|
82
|
+
.split(',')
|
|
83
|
+
.map((s) => s.trim())
|
|
84
|
+
.filter(Boolean);
|
|
85
|
+
return parts.length > 0 && parts.every((p) => ['ytdlp', 'whisper', 'gemini'].includes(p));
|
|
86
|
+
}, {
|
|
87
|
+
message: 'TRANSCRIPT_PROVIDER must be a comma-separated list of: ytdlp, whisper, gemini (e.g. "ytdlp")',
|
|
88
|
+
}),
|
|
89
|
+
// --- Audio event detection ---
|
|
90
|
+
AUDIO_DETECTION_ENABLED: z.coerce.boolean().default(true),
|
|
91
|
+
// Comma-separated ordered fallback chain: "gemini,whisper" | "yamnet" | "gemini" etc.
|
|
92
|
+
// Legacy value "both" is accepted and mapped to "gemini,whisper" at runtime.
|
|
93
|
+
AUDIO_PROVIDER: z
|
|
94
|
+
.string()
|
|
95
|
+
.default('gemini,whisper')
|
|
96
|
+
.refine((v) => {
|
|
97
|
+
const legacy = v.trim() === 'both';
|
|
98
|
+
if (legacy)
|
|
99
|
+
return true;
|
|
100
|
+
const parts = v
|
|
101
|
+
.split(',')
|
|
102
|
+
.map((s) => s.trim())
|
|
103
|
+
.filter(Boolean);
|
|
104
|
+
return (parts.length > 0 && parts.every((p) => ['gemini', 'whisper', 'yamnet'].includes(p)));
|
|
105
|
+
}, {
|
|
106
|
+
message: 'AUDIO_PROVIDER must be a comma-separated list of: gemini, whisper, yamnet (e.g. "gemini,whisper")',
|
|
107
|
+
}),
|
|
108
|
+
AUDIO_WHISPER_MODEL: z.enum(['tiny', 'base', 'small', 'medium', 'large-v3']).default('medium'),
|
|
109
|
+
AUDIO_CONFIDENCE_THRESHOLD: z.coerce.number().min(0).max(1).default(0.3),
|
|
110
|
+
AUDIO_CLIP_PRE_ROLL: z.coerce.number().min(0).default(5),
|
|
111
|
+
AUDIO_CLIP_POST_ROLL: z.coerce.number().min(0).default(15),
|
|
112
|
+
AUDIO_LLM_BOOST_WINDOW: z.coerce.number().min(0).default(10),
|
|
113
|
+
AUDIO_LLM_SCORE_BOOST: z.coerce.number().min(0).default(2),
|
|
114
|
+
// --- Game profile ---
|
|
115
|
+
GAME_PROFILE: z.enum(['valorant', 'fps', 'boss_fight', 'general']).default('general'),
|
|
116
|
+
// --- yt-dlp cookie support (for bot detection / auth) ---
|
|
117
|
+
YT_DLP_COOKIES_FROM_BROWSER: z
|
|
118
|
+
.enum(['chrome', 'firefox', 'safari', 'brave', 'edge', 'opera', 'chromium'])
|
|
119
|
+
.optional(),
|
|
120
|
+
YT_DLP_COOKIES_FILE: z.string().optional(),
|
|
121
|
+
})
|
|
122
|
+
.superRefine((data, ctx) => {
|
|
123
|
+
const provider = data.LLM_PROVIDER;
|
|
124
|
+
const keyName = PROVIDER_KEY_MAP[provider];
|
|
125
|
+
const keyValue = data[keyName];
|
|
126
|
+
if (!keyValue || keyValue.trim() === '') {
|
|
127
|
+
ctx.addIssue({
|
|
128
|
+
code: z.ZodIssueCode.custom,
|
|
129
|
+
path: [keyName],
|
|
130
|
+
message: `${keyName} is required when LLM_PROVIDER is "${provider}"`,
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
// custom provider also requires a base URL
|
|
134
|
+
if (provider === 'custom' &&
|
|
135
|
+
(!data.CUSTOM_OPENAI_BASE_URL || data.CUSTOM_OPENAI_BASE_URL.trim() === '')) {
|
|
136
|
+
ctx.addIssue({
|
|
137
|
+
code: z.ZodIssueCode.custom,
|
|
138
|
+
path: ['CUSTOM_OPENAI_BASE_URL'],
|
|
139
|
+
message: 'CUSTOM_OPENAI_BASE_URL is required when LLM_PROVIDER is "custom"',
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
// Validate cookie config: only one method allowed at a time
|
|
143
|
+
if (data.YT_DLP_COOKIES_FROM_BROWSER && data.YT_DLP_COOKIES_FILE) {
|
|
144
|
+
ctx.addIssue({
|
|
145
|
+
code: z.ZodIssueCode.custom,
|
|
146
|
+
path: ['YT_DLP_COOKIES_FROM_BROWSER'],
|
|
147
|
+
message: 'Cannot set both YT_DLP_COOKIES_FROM_BROWSER and YT_DLP_COOKIES_FILE. Use only one.',
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
});
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { ConfigSchema } from './config.js';
|
|
2
|
+
export { TranscriptLineSchema, MicroBlockSchema, LLMChunkSchema } from './transcript.js';
|
|
3
|
+
export { AnalyzedSegmentSchema, RankedSegmentSchema, ChunkEvaluationSchema } from './segment.js';
|
|
4
|
+
export { AudioEventSchema, MergedCandidateSchema } from './audio.js';
|
|
5
|
+
export { VideoMetadataSchema, PipelineResultSchema } from './video.js';
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline-layer types: stage option bags, result shapes, and the generic
|
|
3
|
+
* time-window interface from the chunker utility.
|
|
4
|
+
*
|
|
5
|
+
* All types here are owned by one pipeline stage but live centrally so the
|
|
6
|
+
* runner (and any future consumers) can import them without reaching into
|
|
7
|
+
* individual stage files.
|
|
8
|
+
*/
|
|
9
|
+
export {};
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export const AnalyzedSegmentSchema = z.object({
|
|
3
|
+
interesting: z.boolean(),
|
|
4
|
+
score: z.number().min(1).max(10),
|
|
5
|
+
reason: z.string(),
|
|
6
|
+
clip_start: z.number(), // seconds
|
|
7
|
+
clip_end: z.number(), // seconds
|
|
8
|
+
});
|
|
9
|
+
export const RankedSegmentSchema = z.object({
|
|
10
|
+
rank: z.number().int().min(1),
|
|
11
|
+
start: z.number(), // seconds
|
|
12
|
+
end: z.number(), // seconds
|
|
13
|
+
score: z.number().min(1).max(10),
|
|
14
|
+
reason: z.string(),
|
|
15
|
+
source: z.enum(['transcript', 'audio', 'both']),
|
|
16
|
+
audio_event: z.string().optional(),
|
|
17
|
+
});
|
|
18
|
+
const ChunkEvaluationBaseSchema = z.object({
|
|
19
|
+
chunk_index: z.number().int().min(0),
|
|
20
|
+
chunk_start: z.number(), // seconds
|
|
21
|
+
chunk_end: z.number(), // seconds
|
|
22
|
+
});
|
|
23
|
+
export const ChunkEvaluationSchema = z.discriminatedUnion('status', [
|
|
24
|
+
ChunkEvaluationBaseSchema.extend({
|
|
25
|
+
status: z.literal('success'),
|
|
26
|
+
interesting: z.boolean(),
|
|
27
|
+
score: z.number().min(1).max(10),
|
|
28
|
+
reason: z.string(),
|
|
29
|
+
clip_start: z.number(),
|
|
30
|
+
clip_end: z.number(),
|
|
31
|
+
}),
|
|
32
|
+
ChunkEvaluationBaseSchema.extend({
|
|
33
|
+
status: z.literal('failed'),
|
|
34
|
+
error: z.string(),
|
|
35
|
+
}),
|
|
36
|
+
]);
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export const TranscriptLineSchema = z.object({
|
|
3
|
+
text: z.string(),
|
|
4
|
+
start: z.number(), // seconds (normalized from offset ms)
|
|
5
|
+
duration: z.number(), // seconds (normalized from duration ms)
|
|
6
|
+
});
|
|
7
|
+
export const MicroBlockSchema = z.object({
|
|
8
|
+
start: z.number(), // seconds
|
|
9
|
+
end: z.number(), // seconds
|
|
10
|
+
text: z.string(),
|
|
11
|
+
});
|
|
12
|
+
export const LLMChunkSchema = z.object({
|
|
13
|
+
start: z.number(), // seconds
|
|
14
|
+
end: z.number(), // seconds
|
|
15
|
+
text: z.string(),
|
|
16
|
+
});
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { RankedSegmentSchema, ChunkEvaluationSchema } from './segment.js';
|
|
3
|
+
export const VideoMetadataSchema = z.object({
|
|
4
|
+
videoId: z.string().length(11),
|
|
5
|
+
title: z.string(),
|
|
6
|
+
duration: z.number(), // seconds
|
|
7
|
+
});
|
|
8
|
+
export const PipelineResultSchema = z.object({
|
|
9
|
+
video_id: z.string().length(11),
|
|
10
|
+
title: z.string(),
|
|
11
|
+
duration: z.number(), // seconds
|
|
12
|
+
chunk_evaluations: z.array(ChunkEvaluationSchema),
|
|
13
|
+
segments: z.array(RankedSegmentSchema),
|
|
14
|
+
});
|