@thunderkiller/video-clipper 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.env.example +130 -0
  2. package/.github/workflows/ci.yml +42 -0
  3. package/.github/workflows/release.yml +72 -0
  4. package/.husky/pre-commit +3 -0
  5. package/.prettierignore +6 -0
  6. package/.prettierrc +7 -0
  7. package/.releaserc.json +21 -0
  8. package/AGENTS.md +122 -0
  9. package/CHANGELOG.md +45 -0
  10. package/README.md +410 -0
  11. package/dist/cli.js +187 -0
  12. package/dist/config/env.js +14 -0
  13. package/dist/config/index.js +1 -0
  14. package/dist/index.js +35 -0
  15. package/dist/pipeline/runner.js +132 -0
  16. package/dist/pipeline/stages/audioProcessor.js +75 -0
  17. package/dist/pipeline/stages/clipExporter.js +44 -0
  18. package/dist/pipeline/stages/segmentAnalyzer.js +46 -0
  19. package/dist/pipeline/stages/segmentSelector.js +23 -0
  20. package/dist/pipeline/stages/videoResolver.js +34 -0
  21. package/dist/services/audioAnalyzers/base.js +13 -0
  22. package/dist/services/audioAnalyzers/factory.js +56 -0
  23. package/dist/services/audioAnalyzers/gemini.js +109 -0
  24. package/dist/services/audioAnalyzers/index.js +5 -0
  25. package/dist/services/audioAnalyzers/whisper.js +62 -0
  26. package/dist/services/audioAnalyzers/yamnet.js +40 -0
  27. package/dist/services/audioDownloader/index.js +81 -0
  28. package/dist/services/chunkBuilder/index.js +71 -0
  29. package/dist/services/clipGenerator/index.js +156 -0
  30. package/dist/services/clipRefiner/index.js +103 -0
  31. package/dist/services/eventDetector/index.js +54 -0
  32. package/dist/services/llmAnalyzer/LLMAnalyzer.js +63 -0
  33. package/dist/services/llmAnalyzer/index.js +173 -0
  34. package/dist/services/metadataExtractor/index.js +66 -0
  35. package/dist/services/segmentRanker/index.js +40 -0
  36. package/dist/services/signalMerger/index.js +36 -0
  37. package/dist/services/transcriptAnalyzers/base.js +13 -0
  38. package/dist/services/transcriptAnalyzers/factory.js +51 -0
  39. package/dist/services/transcriptAnalyzers/gemini.js +19 -0
  40. package/dist/services/transcriptAnalyzers/index.js +5 -0
  41. package/dist/services/transcriptAnalyzers/whisper.js +55 -0
  42. package/dist/services/transcriptAnalyzers/ytdlp.js +16 -0
  43. package/dist/services/transcriptDetector/index.js +102 -0
  44. package/dist/services/transcriptFetcher/index.js +124 -0
  45. package/dist/services/urlParser/index.js +46 -0
  46. package/dist/services/videoDownloader/index.js +212 -0
  47. package/dist/types/audio.js +15 -0
  48. package/dist/types/cli.js +1 -0
  49. package/dist/types/config.js +150 -0
  50. package/dist/types/index.js +5 -0
  51. package/dist/types/pipeline.js +9 -0
  52. package/dist/types/segment.js +36 -0
  53. package/dist/types/transcript.js +16 -0
  54. package/dist/types/video.js +14 -0
  55. package/dist/utils/cache.js +143 -0
  56. package/dist/utils/chunker.js +51 -0
  57. package/dist/utils/dumper.js +36 -0
  58. package/dist/utils/format.js +10 -0
  59. package/dist/utils/logger.js +16 -0
  60. package/dist/utils/modelFactory.js +60 -0
  61. package/dist/utils/redactConfig.js +20 -0
  62. package/dist/utils/sliceAudio.js +26 -0
  63. package/docs/free-models.md +78 -0
  64. package/docs/plan.md +442 -0
  65. package/docs/refactorPhases.md +105 -0
  66. package/docs/yt-downloader.md +440 -0
  67. package/package.json +65 -0
  68. package/requirements.txt +5 -0
  69. package/scripts/detect_events.py +81 -0
  70. package/scripts/detect_events_whisper.py +101 -0
  71. package/scripts/transcribe_whisper.py +70 -0
  72. package/src/cli.ts +186 -0
  73. package/src/config/env.ts +18 -0
  74. package/src/config/index.ts +2 -0
  75. package/src/index.ts +46 -0
  76. package/src/pipeline/runner.ts +155 -0
  77. package/src/pipeline/stages/audioProcessor.ts +129 -0
  78. package/src/pipeline/stages/clipExporter.ts +80 -0
  79. package/src/pipeline/stages/segmentAnalyzer.ts +72 -0
  80. package/src/pipeline/stages/segmentSelector.ts +39 -0
  81. package/src/pipeline/stages/videoResolver.ts +47 -0
  82. package/src/services/audioAnalyzers/base.ts +32 -0
  83. package/src/services/audioAnalyzers/factory.ts +71 -0
  84. package/src/services/audioAnalyzers/gemini.ts +137 -0
  85. package/src/services/audioAnalyzers/index.ts +6 -0
  86. package/src/services/audioAnalyzers/whisper.ts +80 -0
  87. package/src/services/audioAnalyzers/yamnet.ts +54 -0
  88. package/src/services/audioDownloader/index.ts +102 -0
  89. package/src/services/chunkBuilder/index.ts +86 -0
  90. package/src/services/clipGenerator/index.ts +210 -0
  91. package/src/services/clipRefiner/index.ts +141 -0
  92. package/src/services/eventDetector/index.ts +68 -0
  93. package/src/services/llmAnalyzer/LLMAnalyzer.ts +114 -0
  94. package/src/services/llmAnalyzer/index.ts +231 -0
  95. package/src/services/metadataExtractor/index.ts +83 -0
  96. package/src/services/segmentRanker/index.ts +88 -0
  97. package/src/services/signalMerger/index.ts +53 -0
  98. package/src/services/transcriptAnalyzers/base.ts +26 -0
  99. package/src/services/transcriptAnalyzers/factory.ts +67 -0
  100. package/src/services/transcriptAnalyzers/gemini.ts +24 -0
  101. package/src/services/transcriptAnalyzers/index.ts +6 -0
  102. package/src/services/transcriptAnalyzers/whisper.ts +68 -0
  103. package/src/services/transcriptAnalyzers/ytdlp.ts +19 -0
  104. package/src/services/transcriptDetector/index.ts +128 -0
  105. package/src/services/transcriptFetcher/index.ts +151 -0
  106. package/src/services/urlParser/index.ts +53 -0
  107. package/src/services/videoDownloader/index.ts +282 -0
  108. package/src/types/audio.ts +19 -0
  109. package/src/types/cli.ts +22 -0
  110. package/src/types/config.ts +174 -0
  111. package/src/types/index.ts +26 -0
  112. package/src/types/pipeline.ts +93 -0
  113. package/src/types/segment.ts +43 -0
  114. package/src/types/transcript.ts +22 -0
  115. package/src/types/video.ts +18 -0
  116. package/src/utils/cache.ts +223 -0
  117. package/src/utils/chunker.ts +60 -0
  118. package/src/utils/dumper.ts +41 -0
  119. package/src/utils/format.ts +10 -0
  120. package/src/utils/logger.ts +17 -0
  121. package/src/utils/modelFactory.ts +71 -0
  122. package/src/utils/redactConfig.ts +23 -0
  123. package/src/utils/sliceAudio.ts +35 -0
  124. package/test-trigger.txt +1 -0
  125. package/tests/analyzerFactory.test.ts +146 -0
  126. package/tests/audioEventDetector.test.ts +69 -0
  127. package/tests/cache.test.ts +203 -0
  128. package/tests/chunkBuilder.test.ts +146 -0
  129. package/tests/chunker.test.ts +95 -0
  130. package/tests/eventDetector.test.ts +103 -0
  131. package/tests/llmAnalyzer.test.ts +283 -0
  132. package/tests/segmentRanker.test.ts +133 -0
  133. package/tests/setup.ts +48 -0
  134. package/tests/signalMerger.test.ts +197 -0
  135. package/tests/transcriptDetector.test.ts +150 -0
  136. package/tests/transcriptFetcher.test.ts +179 -0
  137. package/tests/urlParser.test.ts +70 -0
  138. package/tsconfig.json +16 -0
  139. package/tsconfig.test.json +8 -0
  140. package/vitest.config.ts +8 -0
@@ -0,0 +1,174 @@
1
+ import { z } from 'zod';
2
+
3
+ const LLM_PROVIDERS = [
4
+ 'openai',
5
+ 'anthropic',
6
+ 'google',
7
+ 'xai',
8
+ 'mistral',
9
+ 'groq',
10
+ 'zai',
11
+ 'openrouter',
12
+ 'custom',
13
+ ] as const;
14
+
15
+ export type LLMProvider = (typeof LLM_PROVIDERS)[number];
16
+
17
+ /** Map each provider to the env var name that holds its API key. */
18
+ const PROVIDER_KEY_MAP: Record<LLMProvider, string> = {
19
+ openai: 'OPENAI_API_KEY',
20
+ anthropic: 'ANTHROPIC_API_KEY',
21
+ google: 'GOOGLE_GENERATIVE_AI_API_KEY',
22
+ xai: 'XAI_API_KEY',
23
+ mistral: 'MISTRAL_API_KEY',
24
+ groq: 'GROQ_API_KEY',
25
+ zai: 'ZAI_API_KEY',
26
+ openrouter: 'OPENROUTER_API_KEY',
27
+ custom: 'CUSTOM_OPENAI_API_KEY',
28
+ };
29
+
30
+ export const ConfigSchema = z
31
+ .object({
32
+ // --- Provider selection ---
33
+ LLM_PROVIDER: z.enum(LLM_PROVIDERS).default('openai'),
34
+
35
+ // --- Per-provider API keys (all optional at schema level; enforced via superRefine) ---
36
+ OPENAI_API_KEY: z.string().optional(),
37
+ ANTHROPIC_API_KEY: z.string().optional(),
38
+ GOOGLE_GENERATIVE_AI_API_KEY: z.string().optional(),
39
+ XAI_API_KEY: z.string().optional(),
40
+ MISTRAL_API_KEY: z.string().optional(),
41
+ GROQ_API_KEY: z.string().optional(),
42
+ ZAI_API_KEY: z.string().optional(),
43
+ OPENROUTER_API_KEY: z.string().optional(),
44
+ CUSTOM_OPENAI_API_KEY: z.string().optional(),
45
+ CUSTOM_OPENAI_BASE_URL: z.string().url().optional(),
46
+
47
+ // --- Tunable parameters ---
48
+ SCORE_THRESHOLD: z.coerce.number().min(1).max(10).default(7),
49
+ TOP_N_SEGMENTS: z.coerce.number().min(1).default(10),
50
+ CHUNK_LENGTH_SEC: z.coerce.number().min(10).default(120),
51
+ CHUNK_OVERLAP_SEC: z.coerce.number().min(0).default(20),
52
+ MICRO_BLOCK_SEC: z.coerce.number().min(5).default(15),
53
+ LLM_MODEL: z.string().default('gpt-4o'),
54
+ LLM_MAX_RETRIES: z.coerce.number().min(0).default(3),
55
+ DOWNLOAD_DIR: z.string().default('downloads/'),
56
+ OUTPUT_DIR: z.string().default('outputs/'),
57
+ CACHE_DIR: z.string().default('outputs/cache'),
58
+ // --- Output dumping ---
59
+ DUMP_OUTPUTS: z.coerce.boolean().default(true),
60
+ // --- LLM evaluation limits ---
61
+ MAX_CHUNKS: z.coerce.number().min(1).optional(),
62
+ LLM_CONCURRENCY: z.coerce.number().min(1).default(3),
63
+ CLIP_CONCURRENCY: z.coerce.number().min(1).default(1),
64
+ // --- Custom system prompt (overrides the default if set) ---
65
+ LLM_SYSTEM_PROMPT: z.string().optional(),
66
+ // --- Gemini model used for audio event detection ---
67
+ AUDIO_GEMINI_MODEL: z.string().default('gemini-2.5-flash'),
68
+ // --- Extra instructions appended to the Gemini audio detection prompt ---
69
+ AUDIO_EXTRA_INSTRUCTIONS: z.string().optional(),
70
+ // --- Download mode for yt-dlp ---
71
+ DOWNLOAD_SECTIONS_MODE: z.union([z.literal('all'), z.number().int().positive()]).default('all'),
72
+ // --- FFmpeg paths (optional, for custom ffmpeg/ffprobe locations) ---
73
+ FFMPEG_PATH: z.string().optional(),
74
+ FFPROBE_PATH: z.string().optional(),
75
+ // --- FFmpeg encoding preset for clip generation ---
76
+ FFMPEG_PRESET: z
77
+ .enum(['ultrafast', 'superfast', 'veryfast', 'fast', 'medium', 'slow', 'slower'])
78
+ .default('fast'),
79
+ // --- Timestamp offset for clips (adjusts if transcript is misaligned with video) ---
80
+ TIMESTAMP_OFFSET_SECONDS: z.coerce.number().default(0),
81
+ // --- Transcript provider ---
82
+ // Comma-separated ordered fallback chain: "ytdlp" | "whisper" | "ytdlp,whisper" etc.
83
+ // First provider that succeeds wins; subsequent providers are tried only on failure.
84
+ TRANSCRIPT_PROVIDER: z
85
+ .string()
86
+ .default('ytdlp')
87
+ .refine(
88
+ (v) => {
89
+ const parts = v
90
+ .split(',')
91
+ .map((s) => s.trim())
92
+ .filter(Boolean);
93
+ return parts.length > 0 && parts.every((p) => ['ytdlp', 'whisper', 'gemini'].includes(p));
94
+ },
95
+ {
96
+ message:
97
+ 'TRANSCRIPT_PROVIDER must be a comma-separated list of: ytdlp, whisper, gemini (e.g. "ytdlp")',
98
+ },
99
+ ),
100
+ // --- Audio event detection ---
101
+ AUDIO_DETECTION_ENABLED: z.coerce.boolean().default(true),
102
+ // Comma-separated ordered fallback chain: "gemini,whisper" | "yamnet" | "gemini" etc.
103
+ // Legacy value "both" is accepted and mapped to "gemini,whisper" at runtime.
104
+ AUDIO_PROVIDER: z
105
+ .string()
106
+ .default('gemini,whisper')
107
+ .refine(
108
+ (v) => {
109
+ const legacy = v.trim() === 'both';
110
+ if (legacy) return true;
111
+ const parts = v
112
+ .split(',')
113
+ .map((s) => s.trim())
114
+ .filter(Boolean);
115
+ return (
116
+ parts.length > 0 && parts.every((p) => ['gemini', 'whisper', 'yamnet'].includes(p))
117
+ );
118
+ },
119
+ {
120
+ message:
121
+ 'AUDIO_PROVIDER must be a comma-separated list of: gemini, whisper, yamnet (e.g. "gemini,whisper")',
122
+ },
123
+ ),
124
+ AUDIO_WHISPER_MODEL: z.enum(['tiny', 'base', 'small', 'medium', 'large-v3']).default('medium'),
125
+ AUDIO_CONFIDENCE_THRESHOLD: z.coerce.number().min(0).max(1).default(0.3),
126
+ AUDIO_CLIP_PRE_ROLL: z.coerce.number().min(0).default(5),
127
+ AUDIO_CLIP_POST_ROLL: z.coerce.number().min(0).default(15),
128
+ AUDIO_LLM_BOOST_WINDOW: z.coerce.number().min(0).default(10),
129
+ AUDIO_LLM_SCORE_BOOST: z.coerce.number().min(0).default(2),
130
+ // --- Game profile ---
131
+ GAME_PROFILE: z.enum(['valorant', 'fps', 'boss_fight', 'general']).default('general'),
132
+ // --- yt-dlp cookie support (for bot detection / auth) ---
133
+ YT_DLP_COOKIES_FROM_BROWSER: z
134
+ .enum(['chrome', 'firefox', 'safari', 'brave', 'edge', 'opera', 'chromium'])
135
+ .optional(),
136
+ YT_DLP_COOKIES_FILE: z.string().optional(),
137
+ })
138
+ .superRefine((data, ctx) => {
139
+ const provider = data.LLM_PROVIDER;
140
+ const keyName = PROVIDER_KEY_MAP[provider];
141
+ const keyValue = data[keyName as keyof typeof data] as string | undefined;
142
+
143
+ if (!keyValue || keyValue.trim() === '') {
144
+ ctx.addIssue({
145
+ code: z.ZodIssueCode.custom,
146
+ path: [keyName],
147
+ message: `${keyName} is required when LLM_PROVIDER is "${provider}"`,
148
+ });
149
+ }
150
+
151
+ // custom provider also requires a base URL
152
+ if (
153
+ provider === 'custom' &&
154
+ (!data.CUSTOM_OPENAI_BASE_URL || data.CUSTOM_OPENAI_BASE_URL.trim() === '')
155
+ ) {
156
+ ctx.addIssue({
157
+ code: z.ZodIssueCode.custom,
158
+ path: ['CUSTOM_OPENAI_BASE_URL'],
159
+ message: 'CUSTOM_OPENAI_BASE_URL is required when LLM_PROVIDER is "custom"',
160
+ });
161
+ }
162
+
163
+ // Validate cookie config: only one method allowed at a time
164
+ if (data.YT_DLP_COOKIES_FROM_BROWSER && data.YT_DLP_COOKIES_FILE) {
165
+ ctx.addIssue({
166
+ code: z.ZodIssueCode.custom,
167
+ path: ['YT_DLP_COOKIES_FROM_BROWSER'],
168
+ message:
169
+ 'Cannot set both YT_DLP_COOKIES_FROM_BROWSER and YT_DLP_COOKIES_FILE. Use only one.',
170
+ });
171
+ }
172
+ });
173
+
174
+ export type Config = z.infer<typeof ConfigSchema>;
@@ -0,0 +1,26 @@
1
+ export { ConfigSchema } from './config.js';
2
+ export type { Config } from './config.js';
3
+
4
+ export { TranscriptLineSchema, MicroBlockSchema, LLMChunkSchema } from './transcript.js';
5
+ export type { TranscriptLine, MicroBlock, LLMChunk } from './transcript.js';
6
+
7
+ export { AnalyzedSegmentSchema, RankedSegmentSchema, ChunkEvaluationSchema } from './segment.js';
8
+ export type { AnalyzedSegment, RankedSegment, ChunkEvaluation } from './segment.js';
9
+
10
+ export { AudioEventSchema, MergedCandidateSchema } from './audio.js';
11
+ export type { AudioEvent, MergedCandidate } from './audio.js';
12
+
13
+ export { VideoMetadataSchema, PipelineResultSchema } from './video.js';
14
+ export type { VideoMetadata, PipelineResult } from './video.js';
15
+
16
+ export type { CliArgs } from './cli.js';
17
+
18
+ export type {
19
+ ChunkWindow,
20
+ VideoResolverResult,
21
+ AudioProcessorOpts,
22
+ SegmentAnalyzerOpts,
23
+ SegmentAnalyzerResult,
24
+ SegmentSelectorOpts,
25
+ ClipExporterOpts,
26
+ } from './pipeline.js';
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Pipeline-layer types: stage option bags, result shapes, and the generic
3
+ * time-window interface from the chunker utility.
4
+ *
5
+ * All types here are owned by one pipeline stage but live centrally so the
6
+ * runner (and any future consumers) can import them without reaching into
7
+ * individual stage files.
8
+ */
9
+
10
+ import type {
11
+ VideoMetadata,
12
+ TranscriptLine,
13
+ MicroBlock,
14
+ LLMChunk,
15
+ ChunkEvaluation,
16
+ } from './index.js';
17
+
18
+ // ---------------------------------------------------------------------------
19
+ // Chunker utility
20
+ // ---------------------------------------------------------------------------
21
+
22
+ /** A half-open time window [start, end) in seconds. Returned by `buildWindows`. */
23
+ export interface ChunkWindow {
24
+ /** Start of the window in seconds (inclusive). */
25
+ start: number;
26
+ /** End of the window in seconds (exclusive upper bound). */
27
+ end: number;
28
+ }
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Stage 1 — Video Resolver
32
+ // ---------------------------------------------------------------------------
33
+
34
+ export interface VideoResolverResult {
35
+ videoId: string;
36
+ metadata: VideoMetadata;
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // Stage 3 — Audio Processor
41
+ // ---------------------------------------------------------------------------
42
+
43
+ export interface AudioProcessorOpts {
44
+ noAudio: boolean;
45
+ gameProfile: string;
46
+ maxParallel: number;
47
+ /** Pre-downloaded audio WAV path. When provided, skips the downloadAudio call. */
48
+ audioPath?: string | null;
49
+ }
50
+
51
+ // ---------------------------------------------------------------------------
52
+ // Stage 4a + 4b — Segment Analyzer / Refiner
53
+ // ---------------------------------------------------------------------------
54
+
55
+ export interface SegmentAnalyzerOpts {
56
+ maxChunks?: number;
57
+ maxParallel: number;
58
+ noCache: boolean;
59
+ }
60
+
61
+ export interface SegmentAnalyzerResult {
62
+ lines: TranscriptLine[];
63
+ microBlocks: MicroBlock[];
64
+ chunks: LLMChunk[];
65
+ chunkEvals: ChunkEvaluation[];
66
+ }
67
+
68
+ // ---------------------------------------------------------------------------
69
+ // Stage 5 — Segment Selector
70
+ // ---------------------------------------------------------------------------
71
+
72
+ export interface SegmentSelectorOpts {
73
+ threshold: number;
74
+ topN: number;
75
+ }
76
+
77
+ // ---------------------------------------------------------------------------
78
+ // Stage 6 — Clip Exporter
79
+ // ---------------------------------------------------------------------------
80
+
81
+ export interface ClipExporterOpts {
82
+ /** Path to a pre-existing local video file. Skips yt-dlp download entirely. */
83
+ localVideo?: string;
84
+ /**
85
+ * yt-dlp download strategy.
86
+ * - `'all'` — download the full video, then cut clips with ffmpeg
87
+ * - number — download only the top-N segments via --download-sections
88
+ * - undefined — same as `'all'`
89
+ */
90
+ downloadSections: 'all' | number | undefined;
91
+ /** Custom output/download directory (overrides config.DOWNLOAD_DIR / config.OUTPUT_DIR). */
92
+ videoPath?: string;
93
+ }
@@ -0,0 +1,43 @@
1
+ import { z } from 'zod';
2
+
3
+ export const AnalyzedSegmentSchema = z.object({
4
+ interesting: z.boolean(),
5
+ score: z.number().min(1).max(10),
6
+ reason: z.string(),
7
+ clip_start: z.number(), // seconds
8
+ clip_end: z.number(), // seconds
9
+ });
10
+ export type AnalyzedSegment = z.infer<typeof AnalyzedSegmentSchema>;
11
+
12
+ export const RankedSegmentSchema = z.object({
13
+ rank: z.number().int().min(1),
14
+ start: z.number(), // seconds
15
+ end: z.number(), // seconds
16
+ score: z.number().min(1).max(10),
17
+ reason: z.string(),
18
+ source: z.enum(['transcript', 'audio', 'both']),
19
+ audio_event: z.string().optional(),
20
+ });
21
+ export type RankedSegment = z.infer<typeof RankedSegmentSchema>;
22
+
23
+ const ChunkEvaluationBaseSchema = z.object({
24
+ chunk_index: z.number().int().min(0),
25
+ chunk_start: z.number(), // seconds
26
+ chunk_end: z.number(), // seconds
27
+ });
28
+
29
+ export const ChunkEvaluationSchema = z.discriminatedUnion('status', [
30
+ ChunkEvaluationBaseSchema.extend({
31
+ status: z.literal('success'),
32
+ interesting: z.boolean(),
33
+ score: z.number().min(1).max(10),
34
+ reason: z.string(),
35
+ clip_start: z.number(),
36
+ clip_end: z.number(),
37
+ }),
38
+ ChunkEvaluationBaseSchema.extend({
39
+ status: z.literal('failed'),
40
+ error: z.string(),
41
+ }),
42
+ ]);
43
+ export type ChunkEvaluation = z.infer<typeof ChunkEvaluationSchema>;
@@ -0,0 +1,22 @@
1
+ import { z } from 'zod';
2
+
3
+ export const TranscriptLineSchema = z.object({
4
+ text: z.string(),
5
+ start: z.number(), // seconds (normalized from offset ms)
6
+ duration: z.number(), // seconds (normalized from duration ms)
7
+ });
8
+ export type TranscriptLine = z.infer<typeof TranscriptLineSchema>;
9
+
10
+ export const MicroBlockSchema = z.object({
11
+ start: z.number(), // seconds
12
+ end: z.number(), // seconds
13
+ text: z.string(),
14
+ });
15
+ export type MicroBlock = z.infer<typeof MicroBlockSchema>;
16
+
17
+ export const LLMChunkSchema = z.object({
18
+ start: z.number(), // seconds
19
+ end: z.number(), // seconds
20
+ text: z.string(),
21
+ });
22
+ export type LLMChunk = z.infer<typeof LLMChunkSchema>;
@@ -0,0 +1,18 @@
1
+ import { z } from 'zod';
2
+ import { RankedSegmentSchema, ChunkEvaluationSchema } from './segment.js';
3
+
4
+ export const VideoMetadataSchema = z.object({
5
+ videoId: z.string().length(11),
6
+ title: z.string(),
7
+ duration: z.number(), // seconds
8
+ });
9
+ export type VideoMetadata = z.infer<typeof VideoMetadataSchema>;
10
+
11
+ export const PipelineResultSchema = z.object({
12
+ video_id: z.string().length(11),
13
+ title: z.string(),
14
+ duration: z.number(), // seconds
15
+ chunk_evaluations: z.array(ChunkEvaluationSchema),
16
+ segments: z.array(RankedSegmentSchema),
17
+ });
18
+ export type PipelineResult = z.infer<typeof PipelineResultSchema>;
@@ -0,0 +1,223 @@
1
+ import { createHash } from 'node:crypto';
2
+ import { promises as fs } from 'fs';
3
+ import path from 'path';
4
+ import { z } from 'zod';
5
+ import { log } from './logger.js';
6
+ import { TranscriptLineSchema, ChunkEvaluationSchema, AudioEventSchema } from '../types/index.js';
7
+ import type { TranscriptLine, LLMChunk, ChunkEvaluation, AudioEvent } from '../types/index.js';
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // Internal cache-key helpers
11
+ // ---------------------------------------------------------------------------
12
+
13
+ /**
14
+ * Serializes audio events into a stable string for cache keying.
15
+ * Events are sorted by time so the key is order-independent.
16
+ */
17
+ function audioEventsKey(events: AudioEvent[]): string {
18
+ if (events.length === 0) return '';
19
+ const sorted = [...events].sort((a, b) => a.time - b.time);
20
+ return JSON.stringify(sorted);
21
+ }
22
+
23
+ function hashContent(input: string): string {
24
+ return createHash('sha256').update(input).digest('hex');
25
+ }
26
+
27
+ async function readCacheFile<T>(filePath: string, schema: z.ZodType<T>): Promise<T | null> {
28
+ try {
29
+ const raw = await fs.readFile(filePath, 'utf-8');
30
+ const parsed = schema.safeParse(JSON.parse(raw));
31
+ if (!parsed.success) {
32
+ log.warn(`[cache] Corrupt entry at ${filePath} — ignoring`);
33
+ return null;
34
+ }
35
+ return parsed.data;
36
+ } catch {
37
+ // File not found or unreadable — normal cache miss, stay silent
38
+ return null;
39
+ }
40
+ }
41
+
42
+ async function writeCacheFile(filePath: string, data: unknown): Promise<void> {
43
+ try {
44
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
45
+ await fs.writeFile(filePath, JSON.stringify(data, null, 2), 'utf-8');
46
+ } catch (err) {
47
+ log.warn(
48
+ `[cache] Failed to write ${filePath}: ${err instanceof Error ? err.message : String(err)}`,
49
+ );
50
+ }
51
+ }
52
+
53
+ const SegmentRefinementSchema = z.object({
54
+ refined_start: z.number(),
55
+ refined_end: z.number(),
56
+ });
57
+ type SegmentRefinement = z.infer<typeof SegmentRefinementSchema>;
58
+
59
+ /**
60
+ * Disk-backed cache for all pipeline stages.
61
+ *
62
+ * Constructed once in runner.ts with the resolved cache directory and passed
63
+ * down to each stage that needs caching. Pass `disabled = true` to bypass all
64
+ * reads and writes (equivalent to --no-cache).
65
+ */
66
+ export class Cache {
67
+ constructor(
68
+ private readonly cacheDir: string,
69
+ private readonly disabled: boolean = false,
70
+ ) {}
71
+
72
+ // ---- Transcript ---------------------------------------------------------
73
+
74
+ private transcriptPath(videoId: string): string {
75
+ return path.join(this.cacheDir, 'transcript', `${hashContent(videoId)}.json`);
76
+ }
77
+
78
+ async readTranscript(videoId: string): Promise<TranscriptLine[] | null> {
79
+ if (this.disabled) return null;
80
+ return readCacheFile(this.transcriptPath(videoId), z.array(TranscriptLineSchema));
81
+ }
82
+
83
+ async writeTranscript(videoId: string, lines: TranscriptLine[]): Promise<void> {
84
+ if (this.disabled) return;
85
+ await writeCacheFile(this.transcriptPath(videoId), lines);
86
+ }
87
+
88
+ // ---- LLM chunk results --------------------------------------------------
89
+
90
+ private chunkPath(chunk: LLMChunk, chunkAudioEvents: AudioEvent[] = []): string {
91
+ const audioKey = audioEventsKey(chunkAudioEvents);
92
+ return path.join(
93
+ this.cacheDir,
94
+ 'chunks',
95
+ `${hashContent(`${chunk.start}|${chunk.end}|${chunk.text}|${audioKey}`)}.json`,
96
+ );
97
+ }
98
+
99
+ async readChunk(
100
+ chunk: LLMChunk,
101
+ chunkAudioEvents: AudioEvent[] = [],
102
+ ): Promise<ChunkEvaluation | null> {
103
+ if (this.disabled) return null;
104
+ return readCacheFile(this.chunkPath(chunk, chunkAudioEvents), ChunkEvaluationSchema);
105
+ }
106
+
107
+ async writeChunk(
108
+ chunk: LLMChunk,
109
+ evaluation: ChunkEvaluation,
110
+ chunkAudioEvents: AudioEvent[] = [],
111
+ ): Promise<void> {
112
+ if (this.disabled) return;
113
+ if (evaluation.status !== 'success') return;
114
+ await writeCacheFile(this.chunkPath(chunk, chunkAudioEvents), evaluation);
115
+ }
116
+
117
+ // ---- Segment refinement -------------------------------------------------
118
+
119
+ private segmentRefinementPath(start: number, end: number, reason: string): string {
120
+ return path.join(this.cacheDir, 'segments', `${hashContent(`${start}|${end}|${reason}`)}.json`);
121
+ }
122
+
123
+ async readSegmentRefinement(
124
+ start: number,
125
+ end: number,
126
+ reason: string,
127
+ ): Promise<SegmentRefinement | null> {
128
+ if (this.disabled) return null;
129
+ return readCacheFile(this.segmentRefinementPath(start, end, reason), SegmentRefinementSchema);
130
+ }
131
+
132
+ async writeSegmentRefinement(
133
+ start: number,
134
+ end: number,
135
+ reason: string,
136
+ refined: SegmentRefinement,
137
+ ): Promise<void> {
138
+ if (this.disabled) return;
139
+ await writeCacheFile(this.segmentRefinementPath(start, end, reason), refined);
140
+ }
141
+
142
+ // ---- Audio events (whole-video) -----------------------------------------
143
+
144
+ private audioEventPath(videoId: string, gameProfile: string, provider: string): string {
145
+ return path.join(
146
+ this.cacheDir,
147
+ 'audio',
148
+ `${hashContent(`${videoId}|${gameProfile}|${provider}`)}.json`,
149
+ );
150
+ }
151
+
152
+ async readAudioEvents(
153
+ videoId: string,
154
+ gameProfile: string,
155
+ provider: string,
156
+ ): Promise<AudioEvent[] | null> {
157
+ if (this.disabled) return null;
158
+ return readCacheFile(
159
+ this.audioEventPath(videoId, gameProfile, provider),
160
+ z.array(AudioEventSchema),
161
+ );
162
+ }
163
+
164
+ async writeAudioEvents(
165
+ videoId: string,
166
+ gameProfile: string,
167
+ provider: string,
168
+ events: AudioEvent[],
169
+ ): Promise<void> {
170
+ if (this.disabled) return;
171
+ await writeCacheFile(this.audioEventPath(videoId, gameProfile, provider), events);
172
+ }
173
+
174
+ // ---- Audio events (per-chunk) -------------------------------------------
175
+
176
+ /**
177
+ * Per-chunk audio cache — mirrors the LLM `chunks/` pattern.
178
+ * Key includes videoId, gameProfile, provider, and the exact window bounds
179
+ * so each 120s slice is stored independently.
180
+ */
181
+ private audioChunkPath(
182
+ videoId: string,
183
+ gameProfile: string,
184
+ provider: string,
185
+ windowStart: number,
186
+ windowEnd: number,
187
+ ): string {
188
+ return path.join(
189
+ this.cacheDir,
190
+ 'audio',
191
+ `${hashContent(`${videoId}|${gameProfile}|${provider}|${windowStart}|${windowEnd}`)}.json`,
192
+ );
193
+ }
194
+
195
+ async readAudioChunk(
196
+ videoId: string,
197
+ gameProfile: string,
198
+ provider: string,
199
+ windowStart: number,
200
+ windowEnd: number,
201
+ ): Promise<AudioEvent[] | null> {
202
+ if (this.disabled) return null;
203
+ return readCacheFile(
204
+ this.audioChunkPath(videoId, gameProfile, provider, windowStart, windowEnd),
205
+ z.array(AudioEventSchema),
206
+ );
207
+ }
208
+
209
+ async writeAudioChunk(
210
+ videoId: string,
211
+ gameProfile: string,
212
+ provider: string,
213
+ windowStart: number,
214
+ windowEnd: number,
215
+ events: AudioEvent[],
216
+ ): Promise<void> {
217
+ if (this.disabled) return;
218
+ await writeCacheFile(
219
+ this.audioChunkPath(videoId, gameProfile, provider, windowStart, windowEnd),
220
+ events,
221
+ );
222
+ }
223
+ }
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Generic windowed-chunker utility.
3
+ *
4
+ * Used by:
5
+ * - transcriptProcessor — builds LLM analysis windows over micro-blocks
6
+ * - audioProcessor — builds audio slice windows over the video duration
7
+ *
8
+ * The function returns non-overlapping or overlapping half-open intervals
9
+ * [start, end) that together cover the full range [0, totalDuration).
10
+ */
11
+
12
+ import type { ChunkWindow } from '../types/index.js';
13
+
14
+ export type { ChunkWindow };
15
+
16
+ /**
17
+ * Builds a list of time windows covering `[0, totalDuration)`.
18
+ *
19
+ * @param totalDuration - Total duration of the content in seconds.
20
+ * @param windowSec - Width of each window in seconds. Must be > 0.
21
+ * @param overlapSec - How many seconds consecutive windows share. Must be
22
+ * >= 0 and < windowSec. Defaults to 0.
23
+ * @returns Array of {start, end} windows. Empty when totalDuration <= 0.
24
+ *
25
+ * @example
26
+ * // No overlap — three equal windows
27
+ * buildWindows(60, 20)
28
+ * // → [{start:0,end:20}, {start:20,end:40}, {start:40,end:60}]
29
+ *
30
+ * @example
31
+ * // With overlap — each window starts 10s after the previous
32
+ * buildWindows(60, 30, 10)
33
+ * // → [{start:0,end:30}, {start:20,end:50}, {start:40,end:60}]
34
+ *
35
+ * @example
36
+ * // Remainder — last window is shorter
37
+ * buildWindows(70, 30)
38
+ * // → [{start:0,end:30}, {start:30,end:60}, {start:60,end:70}]
39
+ */
40
+ export function buildWindows(
41
+ totalDuration: number,
42
+ windowSec: number,
43
+ overlapSec: number = 0,
44
+ ): ChunkWindow[] {
45
+ if (totalDuration <= 0 || windowSec <= 0) return [];
46
+ if (overlapSec < 0) overlapSec = 0;
47
+ if (overlapSec >= windowSec) overlapSec = 0; // guard against infinite loop
48
+
49
+ const step = windowSec - overlapSec;
50
+ const windows: ChunkWindow[] = [];
51
+
52
+ for (let start = 0; start < totalDuration; start += step) {
53
+ windows.push({
54
+ start,
55
+ end: Math.min(start + windowSec, totalDuration),
56
+ });
57
+ }
58
+
59
+ return windows;
60
+ }