@thunderkiller/video-clipper 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/release.yml +5 -1
- package/CHANGELOG.md +8 -0
- package/dist/pipeline/runner.js +3 -11
- package/dist/pipeline/stages/audioProcessor.js +0 -2
- package/dist/pipeline/stages/clipExporter.js +0 -4
- package/dist/pipeline/stages/videoResolver.js +0 -3
- package/dist/services/audioAnalyzers/factory.js +0 -1
- package/dist/services/audioAnalyzers/gemini.js +6 -7
- package/dist/services/chunkBuilder/index.js +0 -4
- package/dist/services/clipRefiner/index.js +1 -1
- package/dist/services/llmAnalyzer/LLMAnalyzer.js +0 -2
- package/dist/services/transcriptDetector/index.js +0 -4
- package/dist/services/transcriptFetcher/index.js +2 -6
- package/dist/services/urlParser/index.js +0 -1
- package/dist/types/analyzer.js +1 -0
- package/dist/types/cache.js +5 -0
- package/dist/types/config.js +0 -23
- package/dist/types/downloader.js +1 -0
- package/dist/types/factory.js +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/pipeline.js +0 -8
- package/dist/types/segment.js +6 -6
- package/dist/types/transcript.js +6 -6
- package/dist/utils/cache.js +1 -8
- package/package.json +1 -1
- package/src/pipeline/runner.ts +3 -11
- package/src/pipeline/stages/audioProcessor.ts +0 -2
- package/src/pipeline/stages/clipExporter.ts +0 -4
- package/src/pipeline/stages/videoResolver.ts +0 -3
- package/src/services/audioAnalyzers/factory.ts +1 -3
- package/src/services/audioAnalyzers/gemini.ts +6 -7
- package/src/services/audioAnalyzers/index.ts +1 -1
- package/src/services/chunkBuilder/index.ts +0 -4
- package/src/services/clipRefiner/index.ts +1 -1
- package/src/services/llmAnalyzer/LLMAnalyzer.ts +2 -18
- package/src/services/transcriptAnalyzers/factory.ts +1 -2
- package/src/services/transcriptAnalyzers/index.ts +1 -1
- package/src/services/transcriptDetector/index.ts +6 -12
- package/src/services/transcriptFetcher/index.ts +2 -6
- package/src/services/urlParser/index.ts +0 -1
- package/src/services/videoDownloader/index.ts +1 -15
- package/src/types/analyzer.ts +23 -0
- package/src/types/cache.ts +8 -0
- package/src/types/config.ts +0 -23
- package/src/types/downloader.ts +15 -0
- package/src/types/factory.ts +3 -0
- package/src/types/index.ts +14 -0
- package/src/types/pipeline.ts +0 -33
- package/src/types/segment.ts +6 -6
- package/src/types/transcript.ts +6 -6
- package/src/utils/cache.ts +13 -12
|
@@ -18,7 +18,6 @@ export async function resolveVideo(
|
|
|
18
18
|
rawUrl: string,
|
|
19
19
|
maxDurationSec?: number,
|
|
20
20
|
): Promise<VideoResolverResult> {
|
|
21
|
-
// Parse URL → video ID
|
|
22
21
|
let videoId: string;
|
|
23
22
|
try {
|
|
24
23
|
videoId = parseUrl(rawUrl);
|
|
@@ -26,14 +25,12 @@ export async function resolveVideo(
|
|
|
26
25
|
throw new Error(`Invalid YouTube URL: ${rawUrl}`);
|
|
27
26
|
}
|
|
28
27
|
|
|
29
|
-
// Fetch metadata (yt-dlp → oEmbed fallback)
|
|
30
28
|
log.info(`Fetching metadata for ${videoId}...`);
|
|
31
29
|
const metadata = await extractMetadata(videoId);
|
|
32
30
|
log.info(
|
|
33
31
|
`Video: "${metadata.title}" (${metadata.duration > 0 ? formatSeconds(metadata.duration) : 'duration unknown'})`,
|
|
34
32
|
);
|
|
35
33
|
|
|
36
|
-
// --max-duration guard
|
|
37
34
|
if (maxDurationSec !== undefined && metadata.duration > 0) {
|
|
38
35
|
if (metadata.duration > maxDurationSec) {
|
|
39
36
|
throw new Error(
|
|
@@ -3,8 +3,7 @@ import { AudioAnalyzer } from './base.js';
|
|
|
3
3
|
import { GeminiAudioAnalyzer } from './gemini.js';
|
|
4
4
|
import { WhisperAudioAnalyzer } from './whisper.js';
|
|
5
5
|
import { YAMNetAudioAnalyzer } from './yamnet.js';
|
|
6
|
-
|
|
7
|
-
export type AudioProviderName = 'gemini' | 'whisper' | 'yamnet';
|
|
6
|
+
import type { AudioProviderName } from '../../types/index.js';
|
|
8
7
|
|
|
9
8
|
const KNOWN_PROVIDERS = new Set<AudioProviderName>(['gemini', 'whisper', 'yamnet']);
|
|
10
9
|
|
|
@@ -17,7 +16,6 @@ const KNOWN_PROVIDERS = new Set<AudioProviderName>(['gemini', 'whisper', 'yamnet
|
|
|
17
16
|
* Backward-compat: "both" is mapped to ['gemini', 'whisper'] with a deprecation warning.
|
|
18
17
|
*/
|
|
19
18
|
export function parseProviderChain(providerString: string): AudioProviderName[] {
|
|
20
|
-
// Backward compatibility: map legacy 'both' to the new comma-separated form
|
|
21
19
|
if (providerString.trim() === 'both') {
|
|
22
20
|
log.warn(
|
|
23
21
|
'[audio] AUDIO_PROVIDER=both is deprecated. Use AUDIO_PROVIDER=gemini,whisper instead.',
|
|
@@ -6,12 +6,14 @@ import { log } from '../../utils/logger.js';
|
|
|
6
6
|
import type { AudioEvent } from '../../types/index.js';
|
|
7
7
|
import { AudioAnalyzer } from './base.js';
|
|
8
8
|
|
|
9
|
+
/**
|
|
10
|
+
* Gemini returns timestamps inconsistently as either:
|
|
11
|
+
* - MM.SS notation: 1.03 = 1 min 3 sec = 63s
|
|
12
|
+
* - True decimal seconds: 53.403 = 53.403s
|
|
13
|
+
* Use normalizeGeminiTime() to resolve correct value.
|
|
14
|
+
*/
|
|
9
15
|
const GeminiEventSchema = z.array(
|
|
10
16
|
z.object({
|
|
11
|
-
// Gemini inconsistently returns timestamps in either:
|
|
12
|
-
// - MM.SS notation: 1.03 = 1 min 3 sec = 63s
|
|
13
|
-
// - True decimal seconds: 53.403 = 53.403s
|
|
14
|
-
// Use normalizeGeminiTime() to resolve the correct value.
|
|
15
17
|
time_sec: z.number(),
|
|
16
18
|
event: z.string(),
|
|
17
19
|
confidence: z.number().min(0).max(1),
|
|
@@ -56,18 +58,15 @@ function mmssToSeconds(value: number): number {
|
|
|
56
58
|
export function normalizeGeminiTime(value: number, chunkDurationSec: number): number {
|
|
57
59
|
const frac = value % 1;
|
|
58
60
|
|
|
59
|
-
// Fractional part > 0.59 is impossible in MM.SS — must be decimal seconds
|
|
60
61
|
if (Math.round(frac * 100) > 59) {
|
|
61
62
|
return value;
|
|
62
63
|
}
|
|
63
64
|
|
|
64
|
-
// Fractional part ≤ 0.59: could be MM.SS — check if converted value fits in chunk
|
|
65
65
|
const mmss = mmssToSeconds(value);
|
|
66
66
|
if (mmss < chunkDurationSec) {
|
|
67
67
|
return mmss;
|
|
68
68
|
}
|
|
69
69
|
|
|
70
|
-
// MM.SS conversion overflows the chunk — must be true decimal seconds
|
|
71
70
|
return value;
|
|
72
71
|
}
|
|
73
72
|
|
|
@@ -3,4 +3,4 @@ export { GeminiAudioAnalyzer, normalizeGeminiTime } from './gemini.js';
|
|
|
3
3
|
export { WhisperAudioAnalyzer, getPythonBin } from './whisper.js';
|
|
4
4
|
export { YAMNetAudioAnalyzer } from './yamnet.js';
|
|
5
5
|
export { createAnalyzerChain, parseProviderChain } from './factory.js';
|
|
6
|
-
export type { AudioProviderName } from '
|
|
6
|
+
export type { AudioProviderName } from '../../types/index.js';
|
|
@@ -15,20 +15,17 @@ export function buildMicroBlocks(lines: TranscriptLine[], windowSec: number): Mi
|
|
|
15
15
|
|
|
16
16
|
for (const line of lines) {
|
|
17
17
|
if (line.start >= windowStart + windowSec) {
|
|
18
|
-
// Flush current block
|
|
19
18
|
blocks.push({
|
|
20
19
|
start: windowStart,
|
|
21
20
|
end: line.start,
|
|
22
21
|
text: texts.join(' '),
|
|
23
22
|
});
|
|
24
|
-
// Start a new window aligned to the current line
|
|
25
23
|
windowStart = line.start;
|
|
26
24
|
texts = [];
|
|
27
25
|
}
|
|
28
26
|
texts.push(line.text);
|
|
29
27
|
}
|
|
30
28
|
|
|
31
|
-
// Flush the final block
|
|
32
29
|
if (texts.length > 0) {
|
|
33
30
|
const lastLine = lines[lines.length - 1];
|
|
34
31
|
blocks.push({
|
|
@@ -78,7 +75,6 @@ export function buildLLMChunks(
|
|
|
78
75
|
const step = chunkLen - overlap;
|
|
79
76
|
chunkStart += step;
|
|
80
77
|
|
|
81
|
-
// Guard: if overlap >= chunkLen we'd loop forever
|
|
82
78
|
if (step <= 0) break;
|
|
83
79
|
}
|
|
84
80
|
|
|
@@ -92,7 +92,7 @@ async function refineSegment(
|
|
|
92
92
|
maxRetries: config.LLM_MAX_RETRIES,
|
|
93
93
|
});
|
|
94
94
|
|
|
95
|
-
|
|
95
|
+
/** Clamp to context window to prevent LLM from hallucinating out-of-range values */
|
|
96
96
|
const refinedStart = Math.max(windowStart, Math.min(object.clip_start, object.clip_end - 1));
|
|
97
97
|
const refinedEnd = Math.min(windowEnd, Math.max(object.clip_end, object.clip_start + 1));
|
|
98
98
|
|
|
@@ -11,24 +11,10 @@ import type {
|
|
|
11
11
|
AudioEvent,
|
|
12
12
|
ChunkEvaluation,
|
|
13
13
|
RankedSegment,
|
|
14
|
+
LLMAnalyzerResult,
|
|
15
|
+
LLMAnalyzerOpts,
|
|
14
16
|
} from '../../types/index.js';
|
|
15
17
|
|
|
16
|
-
export interface LLMAnalyzerResult {
|
|
17
|
-
lines: TranscriptLine[];
|
|
18
|
-
microBlocks: MicroBlock[];
|
|
19
|
-
chunks: LLMChunk[];
|
|
20
|
-
chunkEvals: ChunkEvaluation[];
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export interface LLMAnalyzerOpts {
|
|
24
|
-
videoId: string;
|
|
25
|
-
audioPath: string | null;
|
|
26
|
-
audioEvents: AudioEvent[];
|
|
27
|
-
maxChunks?: number;
|
|
28
|
-
maxParallel: number;
|
|
29
|
-
noCache: boolean;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
18
|
/**
|
|
33
19
|
* LLMAnalyzer — orchestrates transcript fetching + LLM-based segment analysis.
|
|
34
20
|
*
|
|
@@ -62,14 +48,12 @@ export class LLMAnalyzer {
|
|
|
62
48
|
* everything needed for the ranking step.
|
|
63
49
|
*/
|
|
64
50
|
async analyze(opts: LLMAnalyzerOpts): Promise<LLMAnalyzerResult> {
|
|
65
|
-
// ── Transcript ────────────────────────────────────────────────────────────
|
|
66
51
|
const { lines, microBlocks, chunks } = await this.transcriptDetector.detect(
|
|
67
52
|
opts.videoId,
|
|
68
53
|
opts.audioPath,
|
|
69
54
|
this.cache,
|
|
70
55
|
);
|
|
71
56
|
|
|
72
|
-
// ── LLM pass 1 ────────────────────────────────────────────────────────────
|
|
73
57
|
const chunkLimit = opts.maxChunks ?? config.MAX_CHUNKS;
|
|
74
58
|
const chunksToAnalyze = chunkLimit !== undefined ? chunks.slice(0, chunkLimit) : chunks;
|
|
75
59
|
|
|
@@ -3,8 +3,7 @@ import { TranscriptAnalyzer } from './base.js';
|
|
|
3
3
|
import { YtDlpTranscriptAnalyzer } from './ytdlp.js';
|
|
4
4
|
import { WhisperTranscriptAnalyzer } from './whisper.js';
|
|
5
5
|
import { GeminiTranscriptAnalyzer } from './gemini.js';
|
|
6
|
-
|
|
7
|
-
export type TranscriptProviderName = 'ytdlp' | 'whisper' | 'gemini';
|
|
6
|
+
import type { TranscriptProviderName } from '../../types/index.js';
|
|
8
7
|
|
|
9
8
|
const KNOWN_PROVIDERS = new Set<TranscriptProviderName>(['ytdlp', 'whisper', 'gemini']);
|
|
10
9
|
|
|
@@ -3,4 +3,4 @@ export { YtDlpTranscriptAnalyzer } from './ytdlp.js';
|
|
|
3
3
|
export { WhisperTranscriptAnalyzer } from './whisper.js';
|
|
4
4
|
export { GeminiTranscriptAnalyzer } from './gemini.js';
|
|
5
5
|
export { createTranscriptChain, parseTranscriptProviderChain } from './factory.js';
|
|
6
|
-
export type { TranscriptProviderName } from '
|
|
6
|
+
export type { TranscriptProviderName } from '../../types/index.js';
|
|
@@ -3,13 +3,12 @@ import { log } from '../../utils/logger.js';
|
|
|
3
3
|
import { config } from '../../config/index.js';
|
|
4
4
|
import type { TranscriptAnalyzer } from '../transcriptAnalyzers/index.js';
|
|
5
5
|
import type { Cache } from '../../utils/cache.js';
|
|
6
|
-
import type {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
}
|
|
6
|
+
import type {
|
|
7
|
+
TranscriptLine,
|
|
8
|
+
MicroBlock,
|
|
9
|
+
LLMChunk,
|
|
10
|
+
TranscriptDetectorResult,
|
|
11
|
+
} from '../../types/index.js';
|
|
13
12
|
|
|
14
13
|
/**
|
|
15
14
|
* Top-level transcript detector.
|
|
@@ -60,7 +59,6 @@ export class TranscriptDetector {
|
|
|
60
59
|
): Promise<TranscriptDetectorResult> {
|
|
61
60
|
let lines: TranscriptLine[];
|
|
62
61
|
|
|
63
|
-
// Cache-first: if we already have lines on disk, skip the provider chain entirely
|
|
64
62
|
const cached = await cache.readTranscript(videoId);
|
|
65
63
|
if (cached) {
|
|
66
64
|
log.info(`[cache hit] Transcript loaded from cache (${cached.length} lines)`);
|
|
@@ -76,10 +74,6 @@ export class TranscriptDetector {
|
|
|
76
74
|
return { lines, microBlocks, chunks };
|
|
77
75
|
}
|
|
78
76
|
|
|
79
|
-
// -------------------------------------------------------------------------
|
|
80
|
-
// Private helpers
|
|
81
|
-
// -------------------------------------------------------------------------
|
|
82
|
-
|
|
83
77
|
/**
|
|
84
78
|
* Walks the analyzer chain in order.
|
|
85
79
|
* Falls back to the next analyzer whenever one throws.
|
|
@@ -20,7 +20,7 @@ export function parseVtt(vttContent: string): TranscriptLine[] {
|
|
|
20
20
|
const lines = vttContent.split(/\r?\n/);
|
|
21
21
|
const result: TranscriptLine[] = [];
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
/** Regex to match HH:MM:SS.mmm --> HH:MM:SS.mmm timestamp lines */
|
|
24
24
|
const TIMESTAMP_RE =
|
|
25
25
|
/^(\d{2}):(\d{2}):(\d{2})[.,](\d{3})\s+-->\s+(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/;
|
|
26
26
|
|
|
@@ -42,7 +42,6 @@ export function parseVtt(vttContent: string): TranscriptLine[] {
|
|
|
42
42
|
parseInt(match[7], 10) +
|
|
43
43
|
parseInt(match[8], 10) / 1000;
|
|
44
44
|
|
|
45
|
-
// Collect cue text lines until blank line or EOF
|
|
46
45
|
i++;
|
|
47
46
|
const textLines: string[] = [];
|
|
48
47
|
while (i < lines.length && lines[i].trim() !== '') {
|
|
@@ -52,7 +51,6 @@ export function parseVtt(vttContent: string): TranscriptLine[] {
|
|
|
52
51
|
|
|
53
52
|
const rawText = textLines.join(' ');
|
|
54
53
|
|
|
55
|
-
// Strip VTT inline tags: <00:00:00.000>, <c>, </c>, <b>, </b>, <i>, </i>, etc.
|
|
56
54
|
const text = rawText
|
|
57
55
|
.replace(/<[^>]+>/g, '')
|
|
58
56
|
.replace(/&/g, '&')
|
|
@@ -68,8 +66,7 @@ export function parseVtt(vttContent: string): TranscriptLine[] {
|
|
|
68
66
|
|
|
69
67
|
const duration = Math.max(0, endSec - startSec);
|
|
70
68
|
|
|
71
|
-
|
|
72
|
-
// (YouTube VTT often repeats the same line as text scrolls)
|
|
69
|
+
/** Skip duplicate cues - YouTube VTT often repeats same line as text scrolls */
|
|
73
70
|
if (result.length > 0 && result[result.length - 1].text === text) {
|
|
74
71
|
continue;
|
|
75
72
|
}
|
|
@@ -125,7 +122,6 @@ export async function fetchTranscript(videoId: string): Promise<TranscriptLine[]
|
|
|
125
122
|
throw new Error(`yt-dlp failed to fetch subtitles for "${videoId}": ${message}`);
|
|
126
123
|
}
|
|
127
124
|
|
|
128
|
-
// Find the downloaded .vtt file (yt-dlp names it <id>.<lang>.vtt)
|
|
129
125
|
const files = await fs.readdir(tmpDir);
|
|
130
126
|
const vttFile = files.find((f) => f.endsWith('.vtt'));
|
|
131
127
|
|
|
@@ -40,7 +40,6 @@ export function parseUrl(url: string): string {
|
|
|
40
40
|
throw new Error(`Could not extract video ID from URL: "${url}"`);
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
// Strip any extra query params that may have been part of the path segment
|
|
44
43
|
videoId = videoId.split('?')[0];
|
|
45
44
|
|
|
46
45
|
if (videoId.length !== VIDEO_ID_LENGTH) {
|
|
@@ -4,21 +4,7 @@ import { join } from 'path';
|
|
|
4
4
|
import pLimit from 'p-limit';
|
|
5
5
|
import { config } from '../../config/index.js';
|
|
6
6
|
import { log } from '../../utils/logger.js';
|
|
7
|
-
import type { RankedSegment } from '../../types/index.js';
|
|
8
|
-
|
|
9
|
-
export type DownloadMode = 'all' | 'segments';
|
|
10
|
-
|
|
11
|
-
export interface DownloadResultAll {
|
|
12
|
-
mode: 'all';
|
|
13
|
-
path: string;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export interface DownloadResultSegments {
|
|
17
|
-
mode: 'segments';
|
|
18
|
-
paths: string[];
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
export type DownloadResult = DownloadResultAll | DownloadResultSegments;
|
|
7
|
+
import type { RankedSegment, DownloadMode, DownloadResult } from '../../types/index.js';
|
|
22
8
|
|
|
23
9
|
/**
|
|
24
10
|
* Formats a timestamp for yt-dlp --download-sections.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { TranscriptLine, MicroBlock, LLMChunk, ChunkEvaluation } from './index.js';
|
|
2
|
+
|
|
3
|
+
export interface LLMAnalyzerResult {
|
|
4
|
+
lines: TranscriptLine[];
|
|
5
|
+
microBlocks: MicroBlock[];
|
|
6
|
+
chunks: LLMChunk[];
|
|
7
|
+
chunkEvals: ChunkEvaluation[];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface LLMAnalyzerOpts {
|
|
11
|
+
videoId: string;
|
|
12
|
+
audioPath: string | null;
|
|
13
|
+
audioEvents: import('./audio.js').AudioEvent[];
|
|
14
|
+
maxChunks?: number;
|
|
15
|
+
maxParallel: number;
|
|
16
|
+
noCache: boolean;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface TranscriptDetectorResult {
|
|
20
|
+
lines: TranscriptLine[];
|
|
21
|
+
microBlocks: MicroBlock[];
|
|
22
|
+
chunks: LLMChunk[];
|
|
23
|
+
}
|
package/src/types/config.ts
CHANGED
|
@@ -14,7 +14,6 @@ const LLM_PROVIDERS = [
|
|
|
14
14
|
|
|
15
15
|
export type LLMProvider = (typeof LLM_PROVIDERS)[number];
|
|
16
16
|
|
|
17
|
-
/** Map each provider to the env var name that holds its API key. */
|
|
18
17
|
const PROVIDER_KEY_MAP: Record<LLMProvider, string> = {
|
|
19
18
|
openai: 'OPENAI_API_KEY',
|
|
20
19
|
anthropic: 'ANTHROPIC_API_KEY',
|
|
@@ -29,10 +28,8 @@ const PROVIDER_KEY_MAP: Record<LLMProvider, string> = {
|
|
|
29
28
|
|
|
30
29
|
export const ConfigSchema = z
|
|
31
30
|
.object({
|
|
32
|
-
// --- Provider selection ---
|
|
33
31
|
LLM_PROVIDER: z.enum(LLM_PROVIDERS).default('openai'),
|
|
34
32
|
|
|
35
|
-
// --- Per-provider API keys (all optional at schema level; enforced via superRefine) ---
|
|
36
33
|
OPENAI_API_KEY: z.string().optional(),
|
|
37
34
|
ANTHROPIC_API_KEY: z.string().optional(),
|
|
38
35
|
GOOGLE_GENERATIVE_AI_API_KEY: z.string().optional(),
|
|
@@ -44,7 +41,6 @@ export const ConfigSchema = z
|
|
|
44
41
|
CUSTOM_OPENAI_API_KEY: z.string().optional(),
|
|
45
42
|
CUSTOM_OPENAI_BASE_URL: z.string().url().optional(),
|
|
46
43
|
|
|
47
|
-
// --- Tunable parameters ---
|
|
48
44
|
SCORE_THRESHOLD: z.coerce.number().min(1).max(10).default(7),
|
|
49
45
|
TOP_N_SEGMENTS: z.coerce.number().min(1).default(10),
|
|
50
46
|
CHUNK_LENGTH_SEC: z.coerce.number().min(10).default(120),
|
|
@@ -55,32 +51,20 @@ export const ConfigSchema = z
|
|
|
55
51
|
DOWNLOAD_DIR: z.string().default('downloads/'),
|
|
56
52
|
OUTPUT_DIR: z.string().default('outputs/'),
|
|
57
53
|
CACHE_DIR: z.string().default('outputs/cache'),
|
|
58
|
-
// --- Output dumping ---
|
|
59
54
|
DUMP_OUTPUTS: z.coerce.boolean().default(true),
|
|
60
|
-
// --- LLM evaluation limits ---
|
|
61
55
|
MAX_CHUNKS: z.coerce.number().min(1).optional(),
|
|
62
56
|
LLM_CONCURRENCY: z.coerce.number().min(1).default(3),
|
|
63
57
|
CLIP_CONCURRENCY: z.coerce.number().min(1).default(1),
|
|
64
|
-
// --- Custom system prompt (overrides the default if set) ---
|
|
65
58
|
LLM_SYSTEM_PROMPT: z.string().optional(),
|
|
66
|
-
// --- Gemini model used for audio event detection ---
|
|
67
59
|
AUDIO_GEMINI_MODEL: z.string().default('gemini-2.5-flash'),
|
|
68
|
-
// --- Extra instructions appended to the Gemini audio detection prompt ---
|
|
69
60
|
AUDIO_EXTRA_INSTRUCTIONS: z.string().optional(),
|
|
70
|
-
// --- Download mode for yt-dlp ---
|
|
71
61
|
DOWNLOAD_SECTIONS_MODE: z.union([z.literal('all'), z.number().int().positive()]).default('all'),
|
|
72
|
-
// --- FFmpeg paths (optional, for custom ffmpeg/ffprobe locations) ---
|
|
73
62
|
FFMPEG_PATH: z.string().optional(),
|
|
74
63
|
FFPROBE_PATH: z.string().optional(),
|
|
75
|
-
// --- FFmpeg encoding preset for clip generation ---
|
|
76
64
|
FFMPEG_PRESET: z
|
|
77
65
|
.enum(['ultrafast', 'superfast', 'veryfast', 'fast', 'medium', 'slow', 'slower'])
|
|
78
66
|
.default('fast'),
|
|
79
|
-
// --- Timestamp offset for clips (adjusts if transcript is misaligned with video) ---
|
|
80
67
|
TIMESTAMP_OFFSET_SECONDS: z.coerce.number().default(0),
|
|
81
|
-
// --- Transcript provider ---
|
|
82
|
-
// Comma-separated ordered fallback chain: "ytdlp" | "whisper" | "ytdlp,whisper" etc.
|
|
83
|
-
// First provider that succeeds wins; subsequent providers are tried only on failure.
|
|
84
68
|
TRANSCRIPT_PROVIDER: z
|
|
85
69
|
.string()
|
|
86
70
|
.default('ytdlp')
|
|
@@ -97,10 +81,7 @@ export const ConfigSchema = z
|
|
|
97
81
|
'TRANSCRIPT_PROVIDER must be a comma-separated list of: ytdlp, whisper, gemini (e.g. "ytdlp")',
|
|
98
82
|
},
|
|
99
83
|
),
|
|
100
|
-
// --- Audio event detection ---
|
|
101
84
|
AUDIO_DETECTION_ENABLED: z.coerce.boolean().default(true),
|
|
102
|
-
// Comma-separated ordered fallback chain: "gemini,whisper" | "yamnet" | "gemini" etc.
|
|
103
|
-
// Legacy value "both" is accepted and mapped to "gemini,whisper" at runtime.
|
|
104
85
|
AUDIO_PROVIDER: z
|
|
105
86
|
.string()
|
|
106
87
|
.default('gemini,whisper')
|
|
@@ -127,9 +108,7 @@ export const ConfigSchema = z
|
|
|
127
108
|
AUDIO_CLIP_POST_ROLL: z.coerce.number().min(0).default(15),
|
|
128
109
|
AUDIO_LLM_BOOST_WINDOW: z.coerce.number().min(0).default(10),
|
|
129
110
|
AUDIO_LLM_SCORE_BOOST: z.coerce.number().min(0).default(2),
|
|
130
|
-
// --- Game profile ---
|
|
131
111
|
GAME_PROFILE: z.enum(['valorant', 'fps', 'boss_fight', 'general']).default('general'),
|
|
132
|
-
// --- yt-dlp cookie support (for bot detection / auth) ---
|
|
133
112
|
YT_DLP_COOKIES_FROM_BROWSER: z
|
|
134
113
|
.enum(['chrome', 'firefox', 'safari', 'brave', 'edge', 'opera', 'chromium'])
|
|
135
114
|
.optional(),
|
|
@@ -148,7 +127,6 @@ export const ConfigSchema = z
|
|
|
148
127
|
});
|
|
149
128
|
}
|
|
150
129
|
|
|
151
|
-
// custom provider also requires a base URL
|
|
152
130
|
if (
|
|
153
131
|
provider === 'custom' &&
|
|
154
132
|
(!data.CUSTOM_OPENAI_BASE_URL || data.CUSTOM_OPENAI_BASE_URL.trim() === '')
|
|
@@ -160,7 +138,6 @@ export const ConfigSchema = z
|
|
|
160
138
|
});
|
|
161
139
|
}
|
|
162
140
|
|
|
163
|
-
// Validate cookie config: only one method allowed at a time
|
|
164
141
|
if (data.YT_DLP_COOKIES_FROM_BROWSER && data.YT_DLP_COOKIES_FILE) {
|
|
165
142
|
ctx.addIssue({
|
|
166
143
|
code: z.ZodIssueCode.custom,
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { RankedSegment } from './index.js';
|
|
2
|
+
|
|
3
|
+
export type DownloadMode = 'all' | 'segments';
|
|
4
|
+
|
|
5
|
+
export interface DownloadResultAll {
|
|
6
|
+
mode: 'all';
|
|
7
|
+
path: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface DownloadResultSegments {
|
|
11
|
+
mode: 'segments';
|
|
12
|
+
paths: string[];
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export type DownloadResult = DownloadResultAll | DownloadResultSegments;
|
package/src/types/index.ts
CHANGED
|
@@ -24,3 +24,17 @@ export type {
|
|
|
24
24
|
SegmentSelectorOpts,
|
|
25
25
|
ClipExporterOpts,
|
|
26
26
|
} from './pipeline.js';
|
|
27
|
+
|
|
28
|
+
export type { LLMAnalyzerResult, LLMAnalyzerOpts, TranscriptDetectorResult } from './analyzer.js';
|
|
29
|
+
|
|
30
|
+
export type {
|
|
31
|
+
DownloadMode,
|
|
32
|
+
DownloadResultAll,
|
|
33
|
+
DownloadResultSegments,
|
|
34
|
+
DownloadResult,
|
|
35
|
+
} from './downloader.js';
|
|
36
|
+
|
|
37
|
+
export { SegmentRefinementSchema } from './cache.js';
|
|
38
|
+
export type { SegmentRefinement } from './cache.js';
|
|
39
|
+
|
|
40
|
+
export type { TranscriptProviderName, AudioProviderName } from './factory.js';
|
package/src/types/pipeline.ts
CHANGED
|
@@ -1,12 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Pipeline-layer types: stage option bags, result shapes, and the generic
|
|
3
|
-
* time-window interface from the chunker utility.
|
|
4
|
-
*
|
|
5
|
-
* All types here are owned by one pipeline stage but live centrally so the
|
|
6
|
-
* runner (and any future consumers) can import them without reaching into
|
|
7
|
-
* individual stage files.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
1
|
import type {
|
|
11
2
|
VideoMetadata,
|
|
12
3
|
TranscriptLine,
|
|
@@ -15,10 +6,6 @@ import type {
|
|
|
15
6
|
ChunkEvaluation,
|
|
16
7
|
} from './index.js';
|
|
17
8
|
|
|
18
|
-
// ---------------------------------------------------------------------------
|
|
19
|
-
// Chunker utility
|
|
20
|
-
// ---------------------------------------------------------------------------
|
|
21
|
-
|
|
22
9
|
/** A half-open time window [start, end) in seconds. Returned by `buildWindows`. */
|
|
23
10
|
export interface ChunkWindow {
|
|
24
11
|
/** Start of the window in seconds (inclusive). */
|
|
@@ -27,19 +14,11 @@ export interface ChunkWindow {
|
|
|
27
14
|
end: number;
|
|
28
15
|
}
|
|
29
16
|
|
|
30
|
-
// ---------------------------------------------------------------------------
|
|
31
|
-
// Stage 1 — Video Resolver
|
|
32
|
-
// ---------------------------------------------------------------------------
|
|
33
|
-
|
|
34
17
|
export interface VideoResolverResult {
|
|
35
18
|
videoId: string;
|
|
36
19
|
metadata: VideoMetadata;
|
|
37
20
|
}
|
|
38
21
|
|
|
39
|
-
// ---------------------------------------------------------------------------
|
|
40
|
-
// Stage 3 — Audio Processor
|
|
41
|
-
// ---------------------------------------------------------------------------
|
|
42
|
-
|
|
43
22
|
export interface AudioProcessorOpts {
|
|
44
23
|
noAudio: boolean;
|
|
45
24
|
gameProfile: string;
|
|
@@ -48,10 +27,6 @@ export interface AudioProcessorOpts {
|
|
|
48
27
|
audioPath?: string | null;
|
|
49
28
|
}
|
|
50
29
|
|
|
51
|
-
// ---------------------------------------------------------------------------
|
|
52
|
-
// Stage 4a + 4b — Segment Analyzer / Refiner
|
|
53
|
-
// ---------------------------------------------------------------------------
|
|
54
|
-
|
|
55
30
|
export interface SegmentAnalyzerOpts {
|
|
56
31
|
maxChunks?: number;
|
|
57
32
|
maxParallel: number;
|
|
@@ -65,19 +40,11 @@ export interface SegmentAnalyzerResult {
|
|
|
65
40
|
chunkEvals: ChunkEvaluation[];
|
|
66
41
|
}
|
|
67
42
|
|
|
68
|
-
// ---------------------------------------------------------------------------
|
|
69
|
-
// Stage 5 — Segment Selector
|
|
70
|
-
// ---------------------------------------------------------------------------
|
|
71
|
-
|
|
72
43
|
export interface SegmentSelectorOpts {
|
|
73
44
|
threshold: number;
|
|
74
45
|
topN: number;
|
|
75
46
|
}
|
|
76
47
|
|
|
77
|
-
// ---------------------------------------------------------------------------
|
|
78
|
-
// Stage 6 — Clip Exporter
|
|
79
|
-
// ---------------------------------------------------------------------------
|
|
80
|
-
|
|
81
48
|
export interface ClipExporterOpts {
|
|
82
49
|
/** Path to a pre-existing local video file. Skips yt-dlp download entirely. */
|
|
83
50
|
localVideo?: string;
|
package/src/types/segment.ts
CHANGED
|
@@ -4,15 +4,15 @@ export const AnalyzedSegmentSchema = z.object({
|
|
|
4
4
|
interesting: z.boolean(),
|
|
5
5
|
score: z.number().min(1).max(10),
|
|
6
6
|
reason: z.string(),
|
|
7
|
-
clip_start: z.number(),
|
|
8
|
-
clip_end: z.number(),
|
|
7
|
+
clip_start: z.number(),
|
|
8
|
+
clip_end: z.number(),
|
|
9
9
|
});
|
|
10
10
|
export type AnalyzedSegment = z.infer<typeof AnalyzedSegmentSchema>;
|
|
11
11
|
|
|
12
12
|
export const RankedSegmentSchema = z.object({
|
|
13
13
|
rank: z.number().int().min(1),
|
|
14
|
-
start: z.number(),
|
|
15
|
-
end: z.number(),
|
|
14
|
+
start: z.number(),
|
|
15
|
+
end: z.number(),
|
|
16
16
|
score: z.number().min(1).max(10),
|
|
17
17
|
reason: z.string(),
|
|
18
18
|
source: z.enum(['transcript', 'audio', 'both']),
|
|
@@ -22,8 +22,8 @@ export type RankedSegment = z.infer<typeof RankedSegmentSchema>;
|
|
|
22
22
|
|
|
23
23
|
const ChunkEvaluationBaseSchema = z.object({
|
|
24
24
|
chunk_index: z.number().int().min(0),
|
|
25
|
-
chunk_start: z.number(),
|
|
26
|
-
chunk_end: z.number(),
|
|
25
|
+
chunk_start: z.number(),
|
|
26
|
+
chunk_end: z.number(),
|
|
27
27
|
});
|
|
28
28
|
|
|
29
29
|
export const ChunkEvaluationSchema = z.discriminatedUnion('status', [
|
package/src/types/transcript.ts
CHANGED
|
@@ -2,21 +2,21 @@ import { z } from 'zod';
|
|
|
2
2
|
|
|
3
3
|
export const TranscriptLineSchema = z.object({
|
|
4
4
|
text: z.string(),
|
|
5
|
-
start: z.number(),
|
|
6
|
-
duration: z.number(),
|
|
5
|
+
start: z.number(),
|
|
6
|
+
duration: z.number(),
|
|
7
7
|
});
|
|
8
8
|
export type TranscriptLine = z.infer<typeof TranscriptLineSchema>;
|
|
9
9
|
|
|
10
10
|
export const MicroBlockSchema = z.object({
|
|
11
|
-
start: z.number(),
|
|
12
|
-
end: z.number(),
|
|
11
|
+
start: z.number(),
|
|
12
|
+
end: z.number(),
|
|
13
13
|
text: z.string(),
|
|
14
14
|
});
|
|
15
15
|
export type MicroBlock = z.infer<typeof MicroBlockSchema>;
|
|
16
16
|
|
|
17
17
|
export const LLMChunkSchema = z.object({
|
|
18
|
-
start: z.number(),
|
|
19
|
-
end: z.number(),
|
|
18
|
+
start: z.number(),
|
|
19
|
+
end: z.number(),
|
|
20
20
|
text: z.string(),
|
|
21
21
|
});
|
|
22
22
|
export type LLMChunk = z.infer<typeof LLMChunkSchema>;
|
package/src/utils/cache.ts
CHANGED
|
@@ -3,12 +3,19 @@ import { promises as fs } from 'fs';
|
|
|
3
3
|
import path from 'path';
|
|
4
4
|
import { z } from 'zod';
|
|
5
5
|
import { log } from './logger.js';
|
|
6
|
-
import {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
6
|
+
import {
|
|
7
|
+
TranscriptLineSchema,
|
|
8
|
+
ChunkEvaluationSchema,
|
|
9
|
+
AudioEventSchema,
|
|
10
|
+
SegmentRefinementSchema,
|
|
11
|
+
} from '../types/index.js';
|
|
12
|
+
import type {
|
|
13
|
+
TranscriptLine,
|
|
14
|
+
LLMChunk,
|
|
15
|
+
ChunkEvaluation,
|
|
16
|
+
AudioEvent,
|
|
17
|
+
SegmentRefinement,
|
|
18
|
+
} from '../types/index.js';
|
|
12
19
|
|
|
13
20
|
/**
|
|
14
21
|
* Serializes audio events into a stable string for cache keying.
|
|
@@ -50,12 +57,6 @@ async function writeCacheFile(filePath: string, data: unknown): Promise<void> {
|
|
|
50
57
|
}
|
|
51
58
|
}
|
|
52
59
|
|
|
53
|
-
const SegmentRefinementSchema = z.object({
|
|
54
|
-
refined_start: z.number(),
|
|
55
|
-
refined_end: z.number(),
|
|
56
|
-
});
|
|
57
|
-
type SegmentRefinement = z.infer<typeof SegmentRefinementSchema>;
|
|
58
|
-
|
|
59
60
|
/**
|
|
60
61
|
* Disk-backed cache for all pipeline stages.
|
|
61
62
|
*
|