@thunderkiller/video-clipper 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/release.yml +5 -1
- package/CHANGELOG.md +8 -0
- package/dist/pipeline/runner.js +3 -11
- package/dist/pipeline/stages/audioProcessor.js +0 -2
- package/dist/pipeline/stages/clipExporter.js +0 -4
- package/dist/pipeline/stages/videoResolver.js +0 -3
- package/dist/services/audioAnalyzers/factory.js +0 -1
- package/dist/services/audioAnalyzers/gemini.js +6 -7
- package/dist/services/chunkBuilder/index.js +0 -4
- package/dist/services/clipRefiner/index.js +1 -1
- package/dist/services/llmAnalyzer/LLMAnalyzer.js +0 -2
- package/dist/services/transcriptDetector/index.js +0 -4
- package/dist/services/transcriptFetcher/index.js +2 -6
- package/dist/services/urlParser/index.js +0 -1
- package/dist/types/analyzer.js +1 -0
- package/dist/types/cache.js +5 -0
- package/dist/types/config.js +0 -23
- package/dist/types/downloader.js +1 -0
- package/dist/types/factory.js +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/pipeline.js +0 -8
- package/dist/types/segment.js +6 -6
- package/dist/types/transcript.js +6 -6
- package/dist/utils/cache.js +1 -8
- package/package.json +1 -1
- package/src/pipeline/runner.ts +3 -11
- package/src/pipeline/stages/audioProcessor.ts +0 -2
- package/src/pipeline/stages/clipExporter.ts +0 -4
- package/src/pipeline/stages/videoResolver.ts +0 -3
- package/src/services/audioAnalyzers/factory.ts +1 -3
- package/src/services/audioAnalyzers/gemini.ts +6 -7
- package/src/services/audioAnalyzers/index.ts +1 -1
- package/src/services/chunkBuilder/index.ts +0 -4
- package/src/services/clipRefiner/index.ts +1 -1
- package/src/services/llmAnalyzer/LLMAnalyzer.ts +2 -18
- package/src/services/transcriptAnalyzers/factory.ts +1 -2
- package/src/services/transcriptAnalyzers/index.ts +1 -1
- package/src/services/transcriptDetector/index.ts +6 -12
- package/src/services/transcriptFetcher/index.ts +2 -6
- package/src/services/urlParser/index.ts +0 -1
- package/src/services/videoDownloader/index.ts +1 -15
- package/src/types/analyzer.ts +23 -0
- package/src/types/cache.ts +8 -0
- package/src/types/config.ts +0 -23
- package/src/types/downloader.ts +15 -0
- package/src/types/factory.ts +3 -0
- package/src/types/index.ts +14 -0
- package/src/types/pipeline.ts +0 -33
- package/src/types/segment.ts +6 -6
- package/src/types/transcript.ts +6 -6
- package/src/utils/cache.ts +13 -12
|
@@ -50,6 +50,10 @@ jobs:
|
|
|
50
50
|
env:
|
|
51
51
|
GITHUB_TOKEN: ${{ secrets.PUSH_TOKEN }}
|
|
52
52
|
|
|
53
|
+
- name: Pull updated package.json after semantic-release
|
|
54
|
+
if: success()
|
|
55
|
+
run: git pull origin master
|
|
56
|
+
|
|
53
57
|
- name: Verify GitHub Release
|
|
54
58
|
if: success()
|
|
55
59
|
run: |
|
|
@@ -69,4 +73,4 @@ jobs:
|
|
|
69
73
|
if: success()
|
|
70
74
|
run: npm publish
|
|
71
75
|
env:
|
|
72
|
-
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
76
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
## [1.1.1](https://github.com/AmreetKumarkhuntia/video-clipper/compare/v1.1.0...v1.1.1) (2026-03-19)
|
|
2
|
+
|
|
3
|
+
### Bug Fixes
|
|
4
|
+
|
|
5
|
+
- github workflows ([18a9536](https://github.com/AmreetKumarkhuntia/video-clipper/commit/18a953619ed17de71d3c9bd0a86e1b42a10aea37))
|
|
6
|
+
- **release:** pull updated package.json before npm publish to avoid race condition ([1839e4a](https://github.com/AmreetKumarkhuntia/video-clipper/commit/1839e4a57d516234ad629f0217e4fa5f4852e4e3))
|
|
7
|
+
- yaml correction ([33c7854](https://github.com/AmreetKumarkhuntia/video-clipper/commit/33c7854015e1266b3fc01b6206da4ec946f94307))
|
|
8
|
+
|
|
1
9
|
# 1.0.0 (2026-03-18)
|
|
2
10
|
|
|
3
11
|
### Bug Fixes
|
package/dist/pipeline/runner.js
CHANGED
|
@@ -45,11 +45,10 @@ export async function runPipeline(args) {
|
|
|
45
45
|
const gameProfile = args.gameProfile ?? config.GAME_PROFILE;
|
|
46
46
|
const maxParallel = args.maxParallel ?? config.LLM_CONCURRENCY;
|
|
47
47
|
const cache = new Cache(config.CACHE_DIR, args.noCache);
|
|
48
|
-
// ── Stage 1: Resolve video ID + metadata ─────────────────────────────────
|
|
49
48
|
const { videoId, metadata } = await resolveVideo(args.url, args.maxDuration);
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
49
|
+
/** Downloaded before transcript so Whisper/Gemini transcript providers can
|
|
50
|
+
* use the WAV. Returns null when audio detection is disabled.
|
|
51
|
+
*/
|
|
53
52
|
let audioPath = null;
|
|
54
53
|
const audioEnabled = config.AUDIO_DETECTION_ENABLED && !args.noAudio;
|
|
55
54
|
if (audioEnabled) {
|
|
@@ -61,14 +60,12 @@ export async function runPipeline(args) {
|
|
|
61
60
|
log.warn(`Audio download failed — continuing without audio: ${message}`);
|
|
62
61
|
}
|
|
63
62
|
}
|
|
64
|
-
// ── Stage 3: Audio event detection ───────────────────────────────────────
|
|
65
63
|
const audioEvents = await processAudio(videoId, metadata.duration, cache, {
|
|
66
64
|
noAudio: args.noAudio,
|
|
67
65
|
gameProfile,
|
|
68
66
|
maxParallel,
|
|
69
67
|
audioPath,
|
|
70
68
|
});
|
|
71
|
-
// ── Stage 4a: Fetch transcript + LLM analysis (informed by audio events) ──
|
|
72
69
|
const { lines, microBlocks, chunkEvals } = await analyzeSegments(videoId, audioPath, audioEvents, cache, {
|
|
73
70
|
maxChunks: args.maxChunks,
|
|
74
71
|
maxParallel,
|
|
@@ -77,9 +74,7 @@ export async function runPipeline(args) {
|
|
|
77
74
|
if (config.DUMP_OUTPUTS) {
|
|
78
75
|
await dumpTranscript(videoId, lines);
|
|
79
76
|
}
|
|
80
|
-
// ── Stage 5: Merge signals + rank ─────────────────────────────────────────
|
|
81
77
|
const rankedSegments = selectSegments(chunkEvals, audioEvents, { threshold, topN });
|
|
82
|
-
// Build partial result for early-exit path (no segments above threshold)
|
|
83
78
|
const partialResult = {
|
|
84
79
|
video_id: videoId,
|
|
85
80
|
title: metadata.title,
|
|
@@ -93,12 +88,10 @@ export async function runPipeline(args) {
|
|
|
93
88
|
await dumpAnalysis(videoId, partialResult);
|
|
94
89
|
return;
|
|
95
90
|
}
|
|
96
|
-
// ── Stage 4b: Refine clip boundaries (LLM pass 2) ─────────────────────────
|
|
97
91
|
const refinedSegments = await refineRankedSegments(rankedSegments, microBlocks, cache, {
|
|
98
92
|
maxParallel,
|
|
99
93
|
noCache: args.noCache,
|
|
100
94
|
});
|
|
101
|
-
// ── Output result ─────────────────────────────────────────────────────────
|
|
102
95
|
const result = {
|
|
103
96
|
video_id: videoId,
|
|
104
97
|
title: metadata.title,
|
|
@@ -110,7 +103,6 @@ export async function runPipeline(args) {
|
|
|
110
103
|
if (config.DUMP_OUTPUTS)
|
|
111
104
|
await dumpAnalysis(videoId, result);
|
|
112
105
|
log.info('Done.');
|
|
113
|
-
// ── Stage 6: Download + generate clips (only with --clip) ─────────────────
|
|
114
106
|
if (!args.clip) {
|
|
115
107
|
log.info('Tip: run with --clip to download the video and generate mp4 clips.');
|
|
116
108
|
return;
|
|
@@ -26,7 +26,6 @@ export async function processAudio(videoId, duration, cache, opts) {
|
|
|
26
26
|
const audioEnabled = config.AUDIO_DETECTION_ENABLED && !opts.noAudio;
|
|
27
27
|
if (!audioEnabled)
|
|
28
28
|
return [];
|
|
29
|
-
// Cache-first
|
|
30
29
|
const cached = await cache.readAudioEvents(videoId, opts.gameProfile, config.AUDIO_PROVIDER);
|
|
31
30
|
if (cached) {
|
|
32
31
|
log.info(`[cache hit] Audio events loaded from cache (${cached.length} events)`);
|
|
@@ -34,7 +33,6 @@ export async function processAudio(videoId, duration, cache, opts) {
|
|
|
34
33
|
}
|
|
35
34
|
try {
|
|
36
35
|
const audioPath = opts.audioPath ?? (await downloadAudio(videoId, `${config.OUTPUT_DIR}/audio`));
|
|
37
|
-
// Build the analyzer chain once per run from config
|
|
38
36
|
const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
|
|
39
37
|
const detector = new EventDetector(chain);
|
|
40
38
|
const providerNames = chain.map((a) => a.source).join(' → ');
|
|
@@ -14,15 +14,12 @@ import { config } from '../../config/index.js';
|
|
|
14
14
|
* @returns Array of absolute paths to the generated clip files.
|
|
15
15
|
*/
|
|
16
16
|
export async function exportClips(videoId, segments, opts) {
|
|
17
|
-
// Mode 1: local video already on disk — cut with ffmpeg
|
|
18
17
|
if (opts.localVideo) {
|
|
19
18
|
log.info(`Using local video: ${opts.localVideo}`);
|
|
20
19
|
return generateClips(opts.localVideo, segments, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
|
|
21
20
|
}
|
|
22
|
-
// Determine yt-dlp mode
|
|
23
21
|
const downloadSections = opts.downloadSections ?? config.DOWNLOAD_SECTIONS_MODE;
|
|
24
22
|
if (typeof downloadSections === 'number') {
|
|
25
|
-
// Mode 2: download only the top-N segments via --download-sections
|
|
26
23
|
const segmentsToDownload = segments.slice(0, downloadSections);
|
|
27
24
|
if (segmentsToDownload.length < downloadSections) {
|
|
28
25
|
log.warn(`Requested ${downloadSections} segments, but only ${segmentsToDownload.length} are available above threshold.`);
|
|
@@ -34,7 +31,6 @@ export async function exportClips(videoId, segments, opts) {
|
|
|
34
31
|
}
|
|
35
32
|
return organizeClips(downloadResult.paths, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
|
|
36
33
|
}
|
|
37
|
-
// Mode 3: full-video download → cut clips with ffmpeg
|
|
38
34
|
log.info('Downloading full video via yt-dlp...');
|
|
39
35
|
const downloadResult = await downloadVideo(videoId, 'all', [], opts.videoPath);
|
|
40
36
|
if (downloadResult.mode !== 'all') {
|
|
@@ -11,7 +11,6 @@ import { formatSeconds } from '../../utils/format.js';
|
|
|
11
11
|
* @throws {Error} on invalid URL, metadata fetch failure, or exceeded duration
|
|
12
12
|
*/
|
|
13
13
|
export async function resolveVideo(rawUrl, maxDurationSec) {
|
|
14
|
-
// Parse URL → video ID
|
|
15
14
|
let videoId;
|
|
16
15
|
try {
|
|
17
16
|
videoId = parseUrl(rawUrl);
|
|
@@ -19,11 +18,9 @@ export async function resolveVideo(rawUrl, maxDurationSec) {
|
|
|
19
18
|
catch {
|
|
20
19
|
throw new Error(`Invalid YouTube URL: ${rawUrl}`);
|
|
21
20
|
}
|
|
22
|
-
// Fetch metadata (yt-dlp → oEmbed fallback)
|
|
23
21
|
log.info(`Fetching metadata for ${videoId}...`);
|
|
24
22
|
const metadata = await extractMetadata(videoId);
|
|
25
23
|
log.info(`Video: "${metadata.title}" (${metadata.duration > 0 ? formatSeconds(metadata.duration) : 'duration unknown'})`);
|
|
26
|
-
// --max-duration guard
|
|
27
24
|
if (maxDurationSec !== undefined && metadata.duration > 0) {
|
|
28
25
|
if (metadata.duration > maxDurationSec) {
|
|
29
26
|
throw new Error(`Video duration exceeds --max-duration limit. ` +
|
|
@@ -12,7 +12,6 @@ const KNOWN_PROVIDERS = new Set(['gemini', 'whisper', 'yamnet']);
|
|
|
12
12
|
* Backward-compat: "both" is mapped to ['gemini', 'whisper'] with a deprecation warning.
|
|
13
13
|
*/
|
|
14
14
|
export function parseProviderChain(providerString) {
|
|
15
|
-
// Backward compatibility: map legacy 'both' to the new comma-separated form
|
|
16
15
|
if (providerString.trim() === 'both') {
|
|
17
16
|
log.warn('[audio] AUDIO_PROVIDER=both is deprecated. Use AUDIO_PROVIDER=gemini,whisper instead.');
|
|
18
17
|
return ['gemini', 'whisper'];
|
|
@@ -4,11 +4,13 @@ import { z } from 'zod';
|
|
|
4
4
|
import { config } from '../../config/index.js';
|
|
5
5
|
import { log } from '../../utils/logger.js';
|
|
6
6
|
import { AudioAnalyzer } from './base.js';
|
|
7
|
+
/**
|
|
8
|
+
* Gemini returns timestamps inconsistently as either:
|
|
9
|
+
* - MM.SS notation: 1.03 = 1 min 3 sec = 63s
|
|
10
|
+
* - True decimal seconds: 53.403 = 53.403s
|
|
11
|
+
* Use normalizeGeminiTime() to resolve correct value.
|
|
12
|
+
*/
|
|
7
13
|
const GeminiEventSchema = z.array(z.object({
|
|
8
|
-
// Gemini inconsistently returns timestamps in either:
|
|
9
|
-
// - MM.SS notation: 1.03 = 1 min 3 sec = 63s
|
|
10
|
-
// - True decimal seconds: 53.403 = 53.403s
|
|
11
|
-
// Use normalizeGeminiTime() to resolve the correct value.
|
|
12
14
|
time_sec: z.number(),
|
|
13
15
|
event: z.string(),
|
|
14
16
|
confidence: z.number().min(0).max(1),
|
|
@@ -45,16 +47,13 @@ function mmssToSeconds(value) {
|
|
|
45
47
|
*/
|
|
46
48
|
export function normalizeGeminiTime(value, chunkDurationSec) {
|
|
47
49
|
const frac = value % 1;
|
|
48
|
-
// Fractional part > 0.59 is impossible in MM.SS — must be decimal seconds
|
|
49
50
|
if (Math.round(frac * 100) > 59) {
|
|
50
51
|
return value;
|
|
51
52
|
}
|
|
52
|
-
// Fractional part ≤ 0.59: could be MM.SS — check if converted value fits in chunk
|
|
53
53
|
const mmss = mmssToSeconds(value);
|
|
54
54
|
if (mmss < chunkDurationSec) {
|
|
55
55
|
return mmss;
|
|
56
56
|
}
|
|
57
|
-
// MM.SS conversion overflows the chunk — must be true decimal seconds
|
|
58
57
|
return value;
|
|
59
58
|
}
|
|
60
59
|
/**
|
|
@@ -12,19 +12,16 @@ export function buildMicroBlocks(lines, windowSec) {
|
|
|
12
12
|
let texts = [];
|
|
13
13
|
for (const line of lines) {
|
|
14
14
|
if (line.start >= windowStart + windowSec) {
|
|
15
|
-
// Flush current block
|
|
16
15
|
blocks.push({
|
|
17
16
|
start: windowStart,
|
|
18
17
|
end: line.start,
|
|
19
18
|
text: texts.join(' '),
|
|
20
19
|
});
|
|
21
|
-
// Start a new window aligned to the current line
|
|
22
20
|
windowStart = line.start;
|
|
23
21
|
texts = [];
|
|
24
22
|
}
|
|
25
23
|
texts.push(line.text);
|
|
26
24
|
}
|
|
27
|
-
// Flush the final block
|
|
28
25
|
if (texts.length > 0) {
|
|
29
26
|
const lastLine = lines[lines.length - 1];
|
|
30
27
|
blocks.push({
|
|
@@ -63,7 +60,6 @@ export function buildLLMChunks(blocks, chunkLen, overlap) {
|
|
|
63
60
|
}
|
|
64
61
|
const step = chunkLen - overlap;
|
|
65
62
|
chunkStart += step;
|
|
66
|
-
// Guard: if overlap >= chunkLen we'd loop forever
|
|
67
63
|
if (step <= 0)
|
|
68
64
|
break;
|
|
69
65
|
}
|
|
@@ -69,7 +69,7 @@ async function refineSegment(segment, allBlocks, noCache) {
|
|
|
69
69
|
prompt: buildPrompt(segment, text, windowStart, windowEnd),
|
|
70
70
|
maxRetries: config.LLM_MAX_RETRIES,
|
|
71
71
|
});
|
|
72
|
-
|
|
72
|
+
/** Clamp to context window to prevent LLM from hallucinating out-of-range values */
|
|
73
73
|
const refinedStart = Math.max(windowStart, Math.min(object.clip_start, object.clip_end - 1));
|
|
74
74
|
const refinedEnd = Math.min(windowEnd, Math.max(object.clip_end, object.clip_start + 1));
|
|
75
75
|
if (!noCache) {
|
|
@@ -36,9 +36,7 @@ export class LLMAnalyzer {
|
|
|
36
36
|
* everything needed for the ranking step.
|
|
37
37
|
*/
|
|
38
38
|
async analyze(opts) {
|
|
39
|
-
// ── Transcript ────────────────────────────────────────────────────────────
|
|
40
39
|
const { lines, microBlocks, chunks } = await this.transcriptDetector.detect(opts.videoId, opts.audioPath, this.cache);
|
|
41
|
-
// ── LLM pass 1 ────────────────────────────────────────────────────────────
|
|
42
40
|
const chunkLimit = opts.maxChunks ?? config.MAX_CHUNKS;
|
|
43
41
|
const chunksToAnalyze = chunkLimit !== undefined ? chunks.slice(0, chunkLimit) : chunks;
|
|
44
42
|
if (chunkLimit !== undefined) {
|
|
@@ -46,7 +46,6 @@ export class TranscriptDetector {
|
|
|
46
46
|
*/
|
|
47
47
|
async detect(videoId, audioPath, cache) {
|
|
48
48
|
let lines;
|
|
49
|
-
// Cache-first: if we already have lines on disk, skip the provider chain entirely
|
|
50
49
|
const cached = await cache.readTranscript(videoId);
|
|
51
50
|
if (cached) {
|
|
52
51
|
log.info(`[cache hit] Transcript loaded from cache (${cached.length} lines)`);
|
|
@@ -60,9 +59,6 @@ export class TranscriptDetector {
|
|
|
60
59
|
const chunks = this.buildChunks(microBlocks);
|
|
61
60
|
return { lines, microBlocks, chunks };
|
|
62
61
|
}
|
|
63
|
-
// -------------------------------------------------------------------------
|
|
64
|
-
// Private helpers
|
|
65
|
-
// -------------------------------------------------------------------------
|
|
66
62
|
/**
|
|
67
63
|
* Walks the analyzer chain in order.
|
|
68
64
|
* Falls back to the next analyzer whenever one throws.
|
|
@@ -17,7 +17,7 @@ import { config } from '../../config/index.js';
|
|
|
17
17
|
export function parseVtt(vttContent) {
|
|
18
18
|
const lines = vttContent.split(/\r?\n/);
|
|
19
19
|
const result = [];
|
|
20
|
-
|
|
20
|
+
/** Regex to match HH:MM:SS.mmm --> HH:MM:SS.mmm timestamp lines */
|
|
21
21
|
const TIMESTAMP_RE = /^(\d{2}):(\d{2}):(\d{2})[.,](\d{3})\s+-->\s+(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/;
|
|
22
22
|
let i = 0;
|
|
23
23
|
while (i < lines.length) {
|
|
@@ -32,7 +32,6 @@ export function parseVtt(vttContent) {
|
|
|
32
32
|
parseInt(match[6], 10) * 60 +
|
|
33
33
|
parseInt(match[7], 10) +
|
|
34
34
|
parseInt(match[8], 10) / 1000;
|
|
35
|
-
// Collect cue text lines until blank line or EOF
|
|
36
35
|
i++;
|
|
37
36
|
const textLines = [];
|
|
38
37
|
while (i < lines.length && lines[i].trim() !== '') {
|
|
@@ -40,7 +39,6 @@ export function parseVtt(vttContent) {
|
|
|
40
39
|
i++;
|
|
41
40
|
}
|
|
42
41
|
const rawText = textLines.join(' ');
|
|
43
|
-
// Strip VTT inline tags: <00:00:00.000>, <c>, </c>, <b>, </b>, <i>, </i>, etc.
|
|
44
42
|
const text = rawText
|
|
45
43
|
.replace(/<[^>]+>/g, '')
|
|
46
44
|
.replace(/&/g, '&')
|
|
@@ -53,8 +51,7 @@ export function parseVtt(vttContent) {
|
|
|
53
51
|
continue;
|
|
54
52
|
}
|
|
55
53
|
const duration = Math.max(0, endSec - startSec);
|
|
56
|
-
|
|
57
|
-
// (YouTube VTT often repeats the same line as text scrolls)
|
|
54
|
+
/** Skip duplicate cues - YouTube VTT often repeats same line as text scrolls */
|
|
58
55
|
if (result.length > 0 && result[result.length - 1].text === text) {
|
|
59
56
|
continue;
|
|
60
57
|
}
|
|
@@ -104,7 +101,6 @@ export async function fetchTranscript(videoId) {
|
|
|
104
101
|
const message = err instanceof Error ? err.message : String(err);
|
|
105
102
|
throw new Error(`yt-dlp failed to fetch subtitles for "${videoId}": ${message}`);
|
|
106
103
|
}
|
|
107
|
-
// Find the downloaded .vtt file (yt-dlp names it <id>.<lang>.vtt)
|
|
108
104
|
const files = await fs.readdir(tmpDir);
|
|
109
105
|
const vttFile = files.find((f) => f.endsWith('.vtt'));
|
|
110
106
|
if (!vttFile) {
|
|
@@ -37,7 +37,6 @@ export function parseUrl(url) {
|
|
|
37
37
|
if (!videoId) {
|
|
38
38
|
throw new Error(`Could not extract video ID from URL: "${url}"`);
|
|
39
39
|
}
|
|
40
|
-
// Strip any extra query params that may have been part of the path segment
|
|
41
40
|
videoId = videoId.split('?')[0];
|
|
42
41
|
if (videoId.length !== VIDEO_ID_LENGTH) {
|
|
43
42
|
throw new Error(`Invalid video ID "${videoId}": expected ${VIDEO_ID_LENGTH} characters, got ${videoId.length}`);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/types/config.js
CHANGED
|
@@ -10,7 +10,6 @@ const LLM_PROVIDERS = [
|
|
|
10
10
|
'openrouter',
|
|
11
11
|
'custom',
|
|
12
12
|
];
|
|
13
|
-
/** Map each provider to the env var name that holds its API key. */
|
|
14
13
|
const PROVIDER_KEY_MAP = {
|
|
15
14
|
openai: 'OPENAI_API_KEY',
|
|
16
15
|
anthropic: 'ANTHROPIC_API_KEY',
|
|
@@ -24,9 +23,7 @@ const PROVIDER_KEY_MAP = {
|
|
|
24
23
|
};
|
|
25
24
|
export const ConfigSchema = z
|
|
26
25
|
.object({
|
|
27
|
-
// --- Provider selection ---
|
|
28
26
|
LLM_PROVIDER: z.enum(LLM_PROVIDERS).default('openai'),
|
|
29
|
-
// --- Per-provider API keys (all optional at schema level; enforced via superRefine) ---
|
|
30
27
|
OPENAI_API_KEY: z.string().optional(),
|
|
31
28
|
ANTHROPIC_API_KEY: z.string().optional(),
|
|
32
29
|
GOOGLE_GENERATIVE_AI_API_KEY: z.string().optional(),
|
|
@@ -37,7 +34,6 @@ export const ConfigSchema = z
|
|
|
37
34
|
OPENROUTER_API_KEY: z.string().optional(),
|
|
38
35
|
CUSTOM_OPENAI_API_KEY: z.string().optional(),
|
|
39
36
|
CUSTOM_OPENAI_BASE_URL: z.string().url().optional(),
|
|
40
|
-
// --- Tunable parameters ---
|
|
41
37
|
SCORE_THRESHOLD: z.coerce.number().min(1).max(10).default(7),
|
|
42
38
|
TOP_N_SEGMENTS: z.coerce.number().min(1).default(10),
|
|
43
39
|
CHUNK_LENGTH_SEC: z.coerce.number().min(10).default(120),
|
|
@@ -48,32 +44,20 @@ export const ConfigSchema = z
|
|
|
48
44
|
DOWNLOAD_DIR: z.string().default('downloads/'),
|
|
49
45
|
OUTPUT_DIR: z.string().default('outputs/'),
|
|
50
46
|
CACHE_DIR: z.string().default('outputs/cache'),
|
|
51
|
-
// --- Output dumping ---
|
|
52
47
|
DUMP_OUTPUTS: z.coerce.boolean().default(true),
|
|
53
|
-
// --- LLM evaluation limits ---
|
|
54
48
|
MAX_CHUNKS: z.coerce.number().min(1).optional(),
|
|
55
49
|
LLM_CONCURRENCY: z.coerce.number().min(1).default(3),
|
|
56
50
|
CLIP_CONCURRENCY: z.coerce.number().min(1).default(1),
|
|
57
|
-
// --- Custom system prompt (overrides the default if set) ---
|
|
58
51
|
LLM_SYSTEM_PROMPT: z.string().optional(),
|
|
59
|
-
// --- Gemini model used for audio event detection ---
|
|
60
52
|
AUDIO_GEMINI_MODEL: z.string().default('gemini-2.5-flash'),
|
|
61
|
-
// --- Extra instructions appended to the Gemini audio detection prompt ---
|
|
62
53
|
AUDIO_EXTRA_INSTRUCTIONS: z.string().optional(),
|
|
63
|
-
// --- Download mode for yt-dlp ---
|
|
64
54
|
DOWNLOAD_SECTIONS_MODE: z.union([z.literal('all'), z.number().int().positive()]).default('all'),
|
|
65
|
-
// --- FFmpeg paths (optional, for custom ffmpeg/ffprobe locations) ---
|
|
66
55
|
FFMPEG_PATH: z.string().optional(),
|
|
67
56
|
FFPROBE_PATH: z.string().optional(),
|
|
68
|
-
// --- FFmpeg encoding preset for clip generation ---
|
|
69
57
|
FFMPEG_PRESET: z
|
|
70
58
|
.enum(['ultrafast', 'superfast', 'veryfast', 'fast', 'medium', 'slow', 'slower'])
|
|
71
59
|
.default('fast'),
|
|
72
|
-
// --- Timestamp offset for clips (adjusts if transcript is misaligned with video) ---
|
|
73
60
|
TIMESTAMP_OFFSET_SECONDS: z.coerce.number().default(0),
|
|
74
|
-
// --- Transcript provider ---
|
|
75
|
-
// Comma-separated ordered fallback chain: "ytdlp" | "whisper" | "ytdlp,whisper" etc.
|
|
76
|
-
// First provider that succeeds wins; subsequent providers are tried only on failure.
|
|
77
61
|
TRANSCRIPT_PROVIDER: z
|
|
78
62
|
.string()
|
|
79
63
|
.default('ytdlp')
|
|
@@ -86,10 +70,7 @@ export const ConfigSchema = z
|
|
|
86
70
|
}, {
|
|
87
71
|
message: 'TRANSCRIPT_PROVIDER must be a comma-separated list of: ytdlp, whisper, gemini (e.g. "ytdlp")',
|
|
88
72
|
}),
|
|
89
|
-
// --- Audio event detection ---
|
|
90
73
|
AUDIO_DETECTION_ENABLED: z.coerce.boolean().default(true),
|
|
91
|
-
// Comma-separated ordered fallback chain: "gemini,whisper" | "yamnet" | "gemini" etc.
|
|
92
|
-
// Legacy value "both" is accepted and mapped to "gemini,whisper" at runtime.
|
|
93
74
|
AUDIO_PROVIDER: z
|
|
94
75
|
.string()
|
|
95
76
|
.default('gemini,whisper')
|
|
@@ -111,9 +92,7 @@ export const ConfigSchema = z
|
|
|
111
92
|
AUDIO_CLIP_POST_ROLL: z.coerce.number().min(0).default(15),
|
|
112
93
|
AUDIO_LLM_BOOST_WINDOW: z.coerce.number().min(0).default(10),
|
|
113
94
|
AUDIO_LLM_SCORE_BOOST: z.coerce.number().min(0).default(2),
|
|
114
|
-
// --- Game profile ---
|
|
115
95
|
GAME_PROFILE: z.enum(['valorant', 'fps', 'boss_fight', 'general']).default('general'),
|
|
116
|
-
// --- yt-dlp cookie support (for bot detection / auth) ---
|
|
117
96
|
YT_DLP_COOKIES_FROM_BROWSER: z
|
|
118
97
|
.enum(['chrome', 'firefox', 'safari', 'brave', 'edge', 'opera', 'chromium'])
|
|
119
98
|
.optional(),
|
|
@@ -130,7 +109,6 @@ export const ConfigSchema = z
|
|
|
130
109
|
message: `${keyName} is required when LLM_PROVIDER is "${provider}"`,
|
|
131
110
|
});
|
|
132
111
|
}
|
|
133
|
-
// custom provider also requires a base URL
|
|
134
112
|
if (provider === 'custom' &&
|
|
135
113
|
(!data.CUSTOM_OPENAI_BASE_URL || data.CUSTOM_OPENAI_BASE_URL.trim() === '')) {
|
|
136
114
|
ctx.addIssue({
|
|
@@ -139,7 +117,6 @@ export const ConfigSchema = z
|
|
|
139
117
|
message: 'CUSTOM_OPENAI_BASE_URL is required when LLM_PROVIDER is "custom"',
|
|
140
118
|
});
|
|
141
119
|
}
|
|
142
|
-
// Validate cookie config: only one method allowed at a time
|
|
143
120
|
if (data.YT_DLP_COOKIES_FROM_BROWSER && data.YT_DLP_COOKIES_FILE) {
|
|
144
121
|
ctx.addIssue({
|
|
145
122
|
code: z.ZodIssueCode.custom,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/types/index.js
CHANGED
|
@@ -3,3 +3,4 @@ export { TranscriptLineSchema, MicroBlockSchema, LLMChunkSchema } from './transc
|
|
|
3
3
|
export { AnalyzedSegmentSchema, RankedSegmentSchema, ChunkEvaluationSchema } from './segment.js';
|
|
4
4
|
export { AudioEventSchema, MergedCandidateSchema } from './audio.js';
|
|
5
5
|
export { VideoMetadataSchema, PipelineResultSchema } from './video.js';
|
|
6
|
+
export { SegmentRefinementSchema } from './cache.js';
|
package/dist/types/pipeline.js
CHANGED
|
@@ -1,9 +1 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Pipeline-layer types: stage option bags, result shapes, and the generic
|
|
3
|
-
* time-window interface from the chunker utility.
|
|
4
|
-
*
|
|
5
|
-
* All types here are owned by one pipeline stage but live centrally so the
|
|
6
|
-
* runner (and any future consumers) can import them without reaching into
|
|
7
|
-
* individual stage files.
|
|
8
|
-
*/
|
|
9
1
|
export {};
|
package/dist/types/segment.js
CHANGED
|
@@ -3,13 +3,13 @@ export const AnalyzedSegmentSchema = z.object({
|
|
|
3
3
|
interesting: z.boolean(),
|
|
4
4
|
score: z.number().min(1).max(10),
|
|
5
5
|
reason: z.string(),
|
|
6
|
-
clip_start: z.number(),
|
|
7
|
-
clip_end: z.number(),
|
|
6
|
+
clip_start: z.number(),
|
|
7
|
+
clip_end: z.number(),
|
|
8
8
|
});
|
|
9
9
|
export const RankedSegmentSchema = z.object({
|
|
10
10
|
rank: z.number().int().min(1),
|
|
11
|
-
start: z.number(),
|
|
12
|
-
end: z.number(),
|
|
11
|
+
start: z.number(),
|
|
12
|
+
end: z.number(),
|
|
13
13
|
score: z.number().min(1).max(10),
|
|
14
14
|
reason: z.string(),
|
|
15
15
|
source: z.enum(['transcript', 'audio', 'both']),
|
|
@@ -17,8 +17,8 @@ export const RankedSegmentSchema = z.object({
|
|
|
17
17
|
});
|
|
18
18
|
const ChunkEvaluationBaseSchema = z.object({
|
|
19
19
|
chunk_index: z.number().int().min(0),
|
|
20
|
-
chunk_start: z.number(),
|
|
21
|
-
chunk_end: z.number(),
|
|
20
|
+
chunk_start: z.number(),
|
|
21
|
+
chunk_end: z.number(),
|
|
22
22
|
});
|
|
23
23
|
export const ChunkEvaluationSchema = z.discriminatedUnion('status', [
|
|
24
24
|
ChunkEvaluationBaseSchema.extend({
|
package/dist/types/transcript.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
export const TranscriptLineSchema = z.object({
|
|
3
3
|
text: z.string(),
|
|
4
|
-
start: z.number(),
|
|
5
|
-
duration: z.number(),
|
|
4
|
+
start: z.number(),
|
|
5
|
+
duration: z.number(),
|
|
6
6
|
});
|
|
7
7
|
export const MicroBlockSchema = z.object({
|
|
8
|
-
start: z.number(),
|
|
9
|
-
end: z.number(),
|
|
8
|
+
start: z.number(),
|
|
9
|
+
end: z.number(),
|
|
10
10
|
text: z.string(),
|
|
11
11
|
});
|
|
12
12
|
export const LLMChunkSchema = z.object({
|
|
13
|
-
start: z.number(),
|
|
14
|
-
end: z.number(),
|
|
13
|
+
start: z.number(),
|
|
14
|
+
end: z.number(),
|
|
15
15
|
text: z.string(),
|
|
16
16
|
});
|
package/dist/utils/cache.js
CHANGED
|
@@ -3,10 +3,7 @@ import { promises as fs } from 'fs';
|
|
|
3
3
|
import path from 'path';
|
|
4
4
|
import { z } from 'zod';
|
|
5
5
|
import { log } from './logger.js';
|
|
6
|
-
import { TranscriptLineSchema, ChunkEvaluationSchema, AudioEventSchema } from '../types/index.js';
|
|
7
|
-
// ---------------------------------------------------------------------------
|
|
8
|
-
// Internal cache-key helpers
|
|
9
|
-
// ---------------------------------------------------------------------------
|
|
6
|
+
import { TranscriptLineSchema, ChunkEvaluationSchema, AudioEventSchema, SegmentRefinementSchema, } from '../types/index.js';
|
|
10
7
|
/**
|
|
11
8
|
* Serializes audio events into a stable string for cache keying.
|
|
12
9
|
* Events are sorted by time so the key is order-independent.
|
|
@@ -44,10 +41,6 @@ async function writeCacheFile(filePath, data) {
|
|
|
44
41
|
log.warn(`[cache] Failed to write ${filePath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
45
42
|
}
|
|
46
43
|
}
|
|
47
|
-
const SegmentRefinementSchema = z.object({
|
|
48
|
-
refined_start: z.number(),
|
|
49
|
-
refined_end: z.number(),
|
|
50
|
-
});
|
|
51
44
|
/**
|
|
52
45
|
* Disk-backed cache for all pipeline stages.
|
|
53
46
|
*
|
package/package.json
CHANGED
package/src/pipeline/runner.ts
CHANGED
|
@@ -52,12 +52,11 @@ export async function runPipeline(args: CliArgs): Promise<void> {
|
|
|
52
52
|
|
|
53
53
|
const cache = new Cache(config.CACHE_DIR, args.noCache);
|
|
54
54
|
|
|
55
|
-
// ── Stage 1: Resolve video ID + metadata ─────────────────────────────────
|
|
56
55
|
const { videoId, metadata } = await resolveVideo(args.url as string, args.maxDuration);
|
|
57
56
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
/** Downloaded before transcript so Whisper/Gemini transcript providers can
|
|
58
|
+
* use the WAV. Returns null when audio detection is disabled.
|
|
59
|
+
*/
|
|
61
60
|
let audioPath: string | null = null;
|
|
62
61
|
const audioEnabled = config.AUDIO_DETECTION_ENABLED && !args.noAudio;
|
|
63
62
|
if (audioEnabled) {
|
|
@@ -69,7 +68,6 @@ export async function runPipeline(args: CliArgs): Promise<void> {
|
|
|
69
68
|
}
|
|
70
69
|
}
|
|
71
70
|
|
|
72
|
-
// ── Stage 3: Audio event detection ───────────────────────────────────────
|
|
73
71
|
const audioEvents = await processAudio(videoId, metadata.duration, cache, {
|
|
74
72
|
noAudio: args.noAudio,
|
|
75
73
|
gameProfile,
|
|
@@ -77,7 +75,6 @@ export async function runPipeline(args: CliArgs): Promise<void> {
|
|
|
77
75
|
audioPath,
|
|
78
76
|
});
|
|
79
77
|
|
|
80
|
-
// ── Stage 4a: Fetch transcript + LLM analysis (informed by audio events) ──
|
|
81
78
|
const { lines, microBlocks, chunkEvals } = await analyzeSegments(
|
|
82
79
|
videoId,
|
|
83
80
|
audioPath,
|
|
@@ -94,10 +91,8 @@ export async function runPipeline(args: CliArgs): Promise<void> {
|
|
|
94
91
|
await dumpTranscript(videoId, lines);
|
|
95
92
|
}
|
|
96
93
|
|
|
97
|
-
// ── Stage 5: Merge signals + rank ─────────────────────────────────────────
|
|
98
94
|
const rankedSegments = selectSegments(chunkEvals, audioEvents, { threshold, topN });
|
|
99
95
|
|
|
100
|
-
// Build partial result for early-exit path (no segments above threshold)
|
|
101
96
|
const partialResult: PipelineResult = {
|
|
102
97
|
video_id: videoId,
|
|
103
98
|
title: metadata.title,
|
|
@@ -112,13 +107,11 @@ export async function runPipeline(args: CliArgs): Promise<void> {
|
|
|
112
107
|
return;
|
|
113
108
|
}
|
|
114
109
|
|
|
115
|
-
// ── Stage 4b: Refine clip boundaries (LLM pass 2) ─────────────────────────
|
|
116
110
|
const refinedSegments = await refineRankedSegments(rankedSegments, microBlocks, cache, {
|
|
117
111
|
maxParallel,
|
|
118
112
|
noCache: args.noCache,
|
|
119
113
|
});
|
|
120
114
|
|
|
121
|
-
// ── Output result ─────────────────────────────────────────────────────────
|
|
122
115
|
const result: PipelineResult = {
|
|
123
116
|
video_id: videoId,
|
|
124
117
|
title: metadata.title,
|
|
@@ -132,7 +125,6 @@ export async function runPipeline(args: CliArgs): Promise<void> {
|
|
|
132
125
|
|
|
133
126
|
log.info('Done.');
|
|
134
127
|
|
|
135
|
-
// ── Stage 6: Download + generate clips (only with --clip) ─────────────────
|
|
136
128
|
if (!args.clip) {
|
|
137
129
|
log.info('Tip: run with --clip to download the video and generate mp4 clips.');
|
|
138
130
|
return;
|
|
@@ -36,7 +36,6 @@ export async function processAudio(
|
|
|
36
36
|
const audioEnabled = config.AUDIO_DETECTION_ENABLED && !opts.noAudio;
|
|
37
37
|
if (!audioEnabled) return [];
|
|
38
38
|
|
|
39
|
-
// Cache-first
|
|
40
39
|
const cached = await cache.readAudioEvents(videoId, opts.gameProfile, config.AUDIO_PROVIDER);
|
|
41
40
|
if (cached) {
|
|
42
41
|
log.info(`[cache hit] Audio events loaded from cache (${cached.length} events)`);
|
|
@@ -47,7 +46,6 @@ export async function processAudio(
|
|
|
47
46
|
const audioPath =
|
|
48
47
|
opts.audioPath ?? (await downloadAudio(videoId, `${config.OUTPUT_DIR}/audio`));
|
|
49
48
|
|
|
50
|
-
// Build the analyzer chain once per run from config
|
|
51
49
|
const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
|
|
52
50
|
const detector = new EventDetector(chain);
|
|
53
51
|
|
|
@@ -22,7 +22,6 @@ export async function exportClips(
|
|
|
22
22
|
segments: RankedSegment[],
|
|
23
23
|
opts: ClipExporterOpts,
|
|
24
24
|
): Promise<string[]> {
|
|
25
|
-
// Mode 1: local video already on disk — cut with ffmpeg
|
|
26
25
|
if (opts.localVideo) {
|
|
27
26
|
log.info(`Using local video: ${opts.localVideo}`);
|
|
28
27
|
return generateClips(
|
|
@@ -34,11 +33,9 @@ export async function exportClips(
|
|
|
34
33
|
);
|
|
35
34
|
}
|
|
36
35
|
|
|
37
|
-
// Determine yt-dlp mode
|
|
38
36
|
const downloadSections = opts.downloadSections ?? config.DOWNLOAD_SECTIONS_MODE;
|
|
39
37
|
|
|
40
38
|
if (typeof downloadSections === 'number') {
|
|
41
|
-
// Mode 2: download only the top-N segments via --download-sections
|
|
42
39
|
const segmentsToDownload = segments.slice(0, downloadSections);
|
|
43
40
|
|
|
44
41
|
if (segmentsToDownload.length < downloadSections) {
|
|
@@ -62,7 +59,6 @@ export async function exportClips(
|
|
|
62
59
|
return organizeClips(downloadResult.paths, videoId, opts.videoPath, config.CLIP_CONCURRENCY);
|
|
63
60
|
}
|
|
64
61
|
|
|
65
|
-
// Mode 3: full-video download → cut clips with ffmpeg
|
|
66
62
|
log.info('Downloading full video via yt-dlp...');
|
|
67
63
|
const downloadResult = await downloadVideo(videoId, 'all', [], opts.videoPath);
|
|
68
64
|
|