@thunderkiller/video-clipper 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.env.example +130 -0
  2. package/.github/workflows/ci.yml +42 -0
  3. package/.github/workflows/release.yml +72 -0
  4. package/.husky/pre-commit +3 -0
  5. package/.prettierignore +6 -0
  6. package/.prettierrc +7 -0
  7. package/.releaserc.json +21 -0
  8. package/AGENTS.md +122 -0
  9. package/CHANGELOG.md +45 -0
  10. package/README.md +410 -0
  11. package/dist/cli.js +187 -0
  12. package/dist/config/env.js +14 -0
  13. package/dist/config/index.js +1 -0
  14. package/dist/index.js +35 -0
  15. package/dist/pipeline/runner.js +132 -0
  16. package/dist/pipeline/stages/audioProcessor.js +75 -0
  17. package/dist/pipeline/stages/clipExporter.js +44 -0
  18. package/dist/pipeline/stages/segmentAnalyzer.js +46 -0
  19. package/dist/pipeline/stages/segmentSelector.js +23 -0
  20. package/dist/pipeline/stages/videoResolver.js +34 -0
  21. package/dist/services/audioAnalyzers/base.js +13 -0
  22. package/dist/services/audioAnalyzers/factory.js +56 -0
  23. package/dist/services/audioAnalyzers/gemini.js +109 -0
  24. package/dist/services/audioAnalyzers/index.js +5 -0
  25. package/dist/services/audioAnalyzers/whisper.js +62 -0
  26. package/dist/services/audioAnalyzers/yamnet.js +40 -0
  27. package/dist/services/audioDownloader/index.js +81 -0
  28. package/dist/services/chunkBuilder/index.js +71 -0
  29. package/dist/services/clipGenerator/index.js +156 -0
  30. package/dist/services/clipRefiner/index.js +103 -0
  31. package/dist/services/eventDetector/index.js +54 -0
  32. package/dist/services/llmAnalyzer/LLMAnalyzer.js +63 -0
  33. package/dist/services/llmAnalyzer/index.js +173 -0
  34. package/dist/services/metadataExtractor/index.js +66 -0
  35. package/dist/services/segmentRanker/index.js +40 -0
  36. package/dist/services/signalMerger/index.js +36 -0
  37. package/dist/services/transcriptAnalyzers/base.js +13 -0
  38. package/dist/services/transcriptAnalyzers/factory.js +51 -0
  39. package/dist/services/transcriptAnalyzers/gemini.js +19 -0
  40. package/dist/services/transcriptAnalyzers/index.js +5 -0
  41. package/dist/services/transcriptAnalyzers/whisper.js +55 -0
  42. package/dist/services/transcriptAnalyzers/ytdlp.js +16 -0
  43. package/dist/services/transcriptDetector/index.js +102 -0
  44. package/dist/services/transcriptFetcher/index.js +124 -0
  45. package/dist/services/urlParser/index.js +46 -0
  46. package/dist/services/videoDownloader/index.js +212 -0
  47. package/dist/types/audio.js +15 -0
  48. package/dist/types/cli.js +1 -0
  49. package/dist/types/config.js +150 -0
  50. package/dist/types/index.js +5 -0
  51. package/dist/types/pipeline.js +9 -0
  52. package/dist/types/segment.js +36 -0
  53. package/dist/types/transcript.js +16 -0
  54. package/dist/types/video.js +14 -0
  55. package/dist/utils/cache.js +143 -0
  56. package/dist/utils/chunker.js +51 -0
  57. package/dist/utils/dumper.js +36 -0
  58. package/dist/utils/format.js +10 -0
  59. package/dist/utils/logger.js +16 -0
  60. package/dist/utils/modelFactory.js +60 -0
  61. package/dist/utils/redactConfig.js +20 -0
  62. package/dist/utils/sliceAudio.js +26 -0
  63. package/docs/free-models.md +78 -0
  64. package/docs/plan.md +442 -0
  65. package/docs/refactorPhases.md +105 -0
  66. package/docs/yt-downloader.md +440 -0
  67. package/package.json +65 -0
  68. package/requirements.txt +5 -0
  69. package/scripts/detect_events.py +81 -0
  70. package/scripts/detect_events_whisper.py +101 -0
  71. package/scripts/transcribe_whisper.py +70 -0
  72. package/src/cli.ts +186 -0
  73. package/src/config/env.ts +18 -0
  74. package/src/config/index.ts +2 -0
  75. package/src/index.ts +46 -0
  76. package/src/pipeline/runner.ts +155 -0
  77. package/src/pipeline/stages/audioProcessor.ts +129 -0
  78. package/src/pipeline/stages/clipExporter.ts +80 -0
  79. package/src/pipeline/stages/segmentAnalyzer.ts +72 -0
  80. package/src/pipeline/stages/segmentSelector.ts +39 -0
  81. package/src/pipeline/stages/videoResolver.ts +47 -0
  82. package/src/services/audioAnalyzers/base.ts +32 -0
  83. package/src/services/audioAnalyzers/factory.ts +71 -0
  84. package/src/services/audioAnalyzers/gemini.ts +137 -0
  85. package/src/services/audioAnalyzers/index.ts +6 -0
  86. package/src/services/audioAnalyzers/whisper.ts +80 -0
  87. package/src/services/audioAnalyzers/yamnet.ts +54 -0
  88. package/src/services/audioDownloader/index.ts +102 -0
  89. package/src/services/chunkBuilder/index.ts +86 -0
  90. package/src/services/clipGenerator/index.ts +210 -0
  91. package/src/services/clipRefiner/index.ts +141 -0
  92. package/src/services/eventDetector/index.ts +68 -0
  93. package/src/services/llmAnalyzer/LLMAnalyzer.ts +114 -0
  94. package/src/services/llmAnalyzer/index.ts +231 -0
  95. package/src/services/metadataExtractor/index.ts +83 -0
  96. package/src/services/segmentRanker/index.ts +88 -0
  97. package/src/services/signalMerger/index.ts +53 -0
  98. package/src/services/transcriptAnalyzers/base.ts +26 -0
  99. package/src/services/transcriptAnalyzers/factory.ts +67 -0
  100. package/src/services/transcriptAnalyzers/gemini.ts +24 -0
  101. package/src/services/transcriptAnalyzers/index.ts +6 -0
  102. package/src/services/transcriptAnalyzers/whisper.ts +68 -0
  103. package/src/services/transcriptAnalyzers/ytdlp.ts +19 -0
  104. package/src/services/transcriptDetector/index.ts +128 -0
  105. package/src/services/transcriptFetcher/index.ts +151 -0
  106. package/src/services/urlParser/index.ts +53 -0
  107. package/src/services/videoDownloader/index.ts +282 -0
  108. package/src/types/audio.ts +19 -0
  109. package/src/types/cli.ts +22 -0
  110. package/src/types/config.ts +174 -0
  111. package/src/types/index.ts +26 -0
  112. package/src/types/pipeline.ts +93 -0
  113. package/src/types/segment.ts +43 -0
  114. package/src/types/transcript.ts +22 -0
  115. package/src/types/video.ts +18 -0
  116. package/src/utils/cache.ts +223 -0
  117. package/src/utils/chunker.ts +60 -0
  118. package/src/utils/dumper.ts +41 -0
  119. package/src/utils/format.ts +10 -0
  120. package/src/utils/logger.ts +17 -0
  121. package/src/utils/modelFactory.ts +71 -0
  122. package/src/utils/redactConfig.ts +23 -0
  123. package/src/utils/sliceAudio.ts +35 -0
  124. package/test-trigger.txt +1 -0
  125. package/tests/analyzerFactory.test.ts +146 -0
  126. package/tests/audioEventDetector.test.ts +69 -0
  127. package/tests/cache.test.ts +203 -0
  128. package/tests/chunkBuilder.test.ts +146 -0
  129. package/tests/chunker.test.ts +95 -0
  130. package/tests/eventDetector.test.ts +103 -0
  131. package/tests/llmAnalyzer.test.ts +283 -0
  132. package/tests/segmentRanker.test.ts +133 -0
  133. package/tests/setup.ts +48 -0
  134. package/tests/signalMerger.test.ts +197 -0
  135. package/tests/transcriptDetector.test.ts +150 -0
  136. package/tests/transcriptFetcher.test.ts +179 -0
  137. package/tests/urlParser.test.ts +70 -0
  138. package/tsconfig.json +16 -0
  139. package/tsconfig.test.json +8 -0
  140. package/vitest.config.ts +8 -0
@@ -0,0 +1,173 @@
1
+ import { generateObject } from 'ai';
2
+ import pLimit from 'p-limit';
3
+ import { config } from '../../config/index.js';
4
+ import { log } from '../../utils/logger.js';
5
+ import { formatSeconds } from '../../utils/format.js';
6
+ import { getModel } from '../../utils/modelFactory.js';
7
+ import { Cache } from '../../utils/cache.js';
8
+ import { AnalyzedSegmentSchema } from '../../types/index.js';
9
+ const BACKOFF_BASE_MS = 1000;
10
+ const BACKOFF_JITTER_MS = 500;
11
+ const DEFAULT_SYSTEM_PROMPT = `You are an expert video editor analyzing a YouTube transcript segment.
12
+
13
+ Identify if this segment contains a potentially interesting moment worth clipping.
14
+
15
+ Interesting moments include:
16
+ - surprising insights or revelations
17
+ - strong or controversial opinions
18
+ - humor or entertaining storytelling
19
+ - emotional moments
20
+ - key explanations of important concepts
21
+ - "aha" moments or turning points
22
+
23
+ If audio events are listed in the segment, treat them as strong positive signals —
24
+ they indicate high-action or high-energy moments that are often clip-worthy.`;
25
+ function isRateLimitError(err) {
26
+ const message = err instanceof Error ? err.message : String(err);
27
+ return (message.toLowerCase().includes('rate limit') ||
28
+ message.includes('429') ||
29
+ message.toLowerCase().includes('too many requests'));
30
+ }
31
+ function sleep(ms) {
32
+ return new Promise((resolve) => setTimeout(resolve, ms));
33
+ }
34
+ /**
35
+ * Builds the user prompt for a single transcript chunk.
36
+ * Semantic scoring hints are omitted when a custom system prompt is in use.
37
+ */
38
+ function buildPrompt(chunk, chunkLines, chunkAudioEvents, isCustomSystemPrompt) {
39
+ const semanticRules = isCustomSystemPrompt
40
+ ? ''
41
+ : `- Set interesting=true only if the segment is genuinely compelling
42
+ - Score 1-10 (7+ means worth clipping, 9-10 means viral potential)
43
+ `;
44
+ const transcriptBody = chunkLines.length > 0
45
+ ? chunkLines.map((l) => `[${l.start.toFixed(1)}s] ${l.text}`).join('\n')
46
+ : chunk.text;
47
+ const audioSection = chunkAudioEvents.length > 0
48
+ ? `\nAudio Events Detected (within this segment):\n${chunkAudioEvents
49
+ .slice()
50
+ .sort((a, b) => a.time - b.time)
51
+ .map((e) => ` [${e.time.toFixed(1)}s] ${e.event} (confidence: ${e.confidence.toFixed(2)})`)
52
+ .join('\n')}\n`
53
+ : '';
54
+ return `Transcript Segment:
55
+ START: ${chunk.start}s
56
+ END: ${chunk.end}s
57
+
58
+ ${transcriptBody}
59
+ ${audioSection}
60
+ Rules for your response:
61
+ ${semanticRules}- clip_start and clip_end must be within [${chunk.start}, ${chunk.end}]
62
+ - clip_start must be less than clip_end`;
63
+ }
64
+ /**
65
+ * Analyzes a single LLM chunk with exponential backoff + jitter on rate-limit errors.
66
+ */
67
+ async function analyzeChunk(chunk, chunkLines, chunkAudioEvents, noCache, chunkIndex) {
68
+ if (!noCache) {
69
+ const cache = new Cache(config.CACHE_DIR);
70
+ const cached = await cache.readChunk(chunk, chunkAudioEvents);
71
+ if (cached && cached.status === 'success') {
72
+ log.info(`[chunk] cache hit (${formatSeconds(chunk.start)}–${formatSeconds(chunk.end)})`);
73
+ return {
74
+ interesting: cached.interesting,
75
+ score: cached.score,
76
+ reason: cached.reason,
77
+ clip_start: cached.clip_start,
78
+ clip_end: cached.clip_end,
79
+ };
80
+ }
81
+ }
82
+ let lastError;
83
+ for (let attempt = 0; attempt <= config.LLM_MAX_RETRIES; attempt++) {
84
+ try {
85
+ const prompt = buildPrompt(chunk, chunkLines, chunkAudioEvents, !!config.LLM_SYSTEM_PROMPT);
86
+ const { object } = await generateObject({
87
+ model: getModel(),
88
+ schema: AnalyzedSegmentSchema,
89
+ system: config.LLM_SYSTEM_PROMPT ?? DEFAULT_SYSTEM_PROMPT,
90
+ prompt: prompt,
91
+ maxRetries: 0,
92
+ });
93
+ log.info(`Chunk ${chunkIndex} analysis complete: interesting=${object.interesting}, score=${object.score}, reason=${object.reason}`);
94
+ if (!noCache) {
95
+ const evaluation = {
96
+ status: 'success',
97
+ chunk_index: chunkIndex,
98
+ chunk_start: chunk.start,
99
+ chunk_end: chunk.end,
100
+ interesting: object.interesting,
101
+ score: object.score,
102
+ reason: object.reason,
103
+ clip_start: object.clip_start,
104
+ clip_end: object.clip_end,
105
+ };
106
+ await new Cache(config.CACHE_DIR).writeChunk(chunk, evaluation, chunkAudioEvents);
107
+ }
108
+ return object;
109
+ }
110
+ catch (err) {
111
+ lastError = err;
112
+ if (isRateLimitError(err) && attempt < config.LLM_MAX_RETRIES) {
113
+ const delay = BACKOFF_BASE_MS * Math.pow(2, attempt) + Math.random() * BACKOFF_JITTER_MS;
114
+ log.warn(`[chunk] Rate limit hit (attempt ${attempt + 1}/${config.LLM_MAX_RETRIES + 1}). ` +
115
+ `Retrying in ${Math.round(delay)}ms...`);
116
+ await sleep(delay);
117
+ continue;
118
+ }
119
+ else {
120
+ log.error(`[chunk] Analysis failed: ${err instanceof Error ? err.message : String(err)}`);
121
+ }
122
+ throw err;
123
+ }
124
+ }
125
+ throw lastError;
126
+ }
127
+ /**
128
+ * Analyzes all LLM chunks via Promise.allSettled — one failure never aborts the rest.
129
+ * Each chunk receives only the transcript lines and audio events within its window.
130
+ * Pass noCache=true to bypass the on-disk chunk cache.
131
+ */
132
+ export async function analyzeChunks(chunks, lines, audioEvents, concurrency, noCache = false) {
133
+ log.info(`Analyzing ${chunks.length} chunk${chunks.length !== 1 ? 's' : ''} (max ${concurrency} parallel)...`);
134
+ const limit = pLimit(concurrency);
135
+ const results = await Promise.allSettled(chunks.map((chunk, i) => {
136
+ const chunkLines = lines.filter((l) => l.start >= chunk.start && l.start < chunk.end);
137
+ const chunkAudioEvents = audioEvents.filter((e) => e.time >= chunk.start && e.time < chunk.end);
138
+ return limit(() => analyzeChunk(chunk, chunkLines, chunkAudioEvents, noCache, i));
139
+ }));
140
+ let succeeded = 0;
141
+ const evaluations = await Promise.all(results.map(async (result, i) => {
142
+ const chunk = chunks[i];
143
+ if (result.status === 'fulfilled') {
144
+ succeeded++;
145
+ const seg = result.value;
146
+ const evaluation = {
147
+ status: 'success',
148
+ chunk_index: i,
149
+ chunk_start: chunk.start,
150
+ chunk_end: chunk.end,
151
+ interesting: seg.interesting,
152
+ score: seg.score,
153
+ reason: seg.reason,
154
+ clip_start: seg.clip_start,
155
+ clip_end: seg.clip_end,
156
+ };
157
+ return evaluation;
158
+ }
159
+ else {
160
+ const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
161
+ log.warn(`[chunk ${i}] LLM analysis skipped: ${error}`);
162
+ return {
163
+ status: 'failed',
164
+ chunk_index: i,
165
+ chunk_start: chunk.start,
166
+ chunk_end: chunk.end,
167
+ error,
168
+ };
169
+ }
170
+ }));
171
+ log.info(`Analysis complete: ${succeeded}/${chunks.length} chunks succeeded`);
172
+ return evaluations;
173
+ }
@@ -0,0 +1,66 @@
1
+ import { execa } from 'execa';
2
+ import { config } from '../../config/index.js';
3
+ import { log } from '../../utils/logger.js';
4
+ const OEMBED_URL = 'https://www.youtube.com/oembed';
5
+ /**
6
+ * Attempts to extract video metadata via yt-dlp --dump-json.
7
+ * Returns null if yt-dlp is not installed or the call fails.
8
+ */
9
+ async function extractViaYtDlp(videoId) {
10
+ try {
11
+ const args = ['--dump-json', '--no-playlist', `https://www.youtube.com/watch?v=${videoId}`];
12
+ if (config.YT_DLP_COOKIES_FROM_BROWSER) {
13
+ args.unshift('--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
14
+ }
15
+ else if (config.YT_DLP_COOKIES_FILE) {
16
+ args.unshift('--cookies', config.YT_DLP_COOKIES_FILE);
17
+ }
18
+ const { stdout } = await execa('yt-dlp', args);
19
+ const data = JSON.parse(stdout);
20
+ return {
21
+ videoId,
22
+ title: data.title,
23
+ duration: data.duration,
24
+ };
25
+ }
26
+ catch (err) {
27
+ const message = err instanceof Error ? err.message : String(err);
28
+ log.warn(`yt-dlp failed for "${videoId}": ${message}`);
29
+ return null;
30
+ }
31
+ }
32
+ /**
33
+ * Falls back to YouTube oEmbed for the title (no API key needed).
34
+ * Duration is unavailable via oEmbed, so it returns 0 with a warning.
35
+ */
36
+ async function extractViaOEmbed(videoId) {
37
+ const url = `${OEMBED_URL}?url=https://www.youtube.com/watch?v=${videoId}&format=json`;
38
+ const res = await fetch(url);
39
+ if (!res.ok) {
40
+ throw new Error(`oEmbed request failed for "${videoId}": HTTP ${res.status}`);
41
+ }
42
+ const data = (await res.json());
43
+ log.warn(`yt-dlp unavailable — duration unknown for "${videoId}". Install yt-dlp for full metadata.`);
44
+ return {
45
+ videoId,
46
+ title: data.title,
47
+ duration: 0,
48
+ };
49
+ }
50
+ /**
51
+ * Extracts video metadata (title, duration) for a given YouTube video ID.
52
+ *
53
+ * Strategy:
54
+ * 1. Try yt-dlp --dump-json (full metadata, requires yt-dlp installed)
55
+ * 2. Fall back to YouTube oEmbed API (title only, duration = 0)
56
+ *
57
+ * @throws {Error} if both strategies fail
58
+ */
59
+ export async function extractMetadata(videoId) {
60
+ const ytDlpResult = await extractViaYtDlp(videoId);
61
+ if (ytDlpResult) {
62
+ return ytDlpResult;
63
+ }
64
+ log.warn(`yt-dlp failed for "${videoId}", falling back to oEmbed...`);
65
+ return extractViaOEmbed(videoId);
66
+ }
@@ -0,0 +1,40 @@
1
+ function overlapSeconds(aStart, aEnd, bStart, bEnd) {
2
+ const start = Math.max(aStart, bStart);
3
+ const end = Math.min(aEnd, bEnd);
4
+ return Math.max(0, end - start);
5
+ }
6
+ function significantlyOverlaps(aStart, aEnd, bStart, bEnd, aSource, bSource) {
7
+ const overlap = overlapSeconds(aStart, aEnd, bStart, bEnd);
8
+ if (overlap === 0)
9
+ return false;
10
+ const aDuration = aEnd - aStart;
11
+ const bDuration = bEnd - bStart;
12
+ const isAudioSource = aSource === 'audio' || bSource === 'audio';
13
+ if (isAudioSource) {
14
+ return overlap > 8;
15
+ }
16
+ return overlap / aDuration > 0.5 || overlap / bDuration > 0.5;
17
+ }
18
+ function deduplicateSegments(segments) {
19
+ const kept = [];
20
+ for (const candidate of segments) {
21
+ const dominated = kept.some((existing) => significantlyOverlaps(existing.start, existing.end, candidate.start, candidate.end, existing.source, candidate.source));
22
+ if (!dominated) {
23
+ kept.push(candidate);
24
+ }
25
+ }
26
+ return kept;
27
+ }
28
+ export function rankSegments(segments, threshold, topN) {
29
+ const filtered = segments.filter((s) => s.score >= threshold).sort((a, b) => b.score - a.score);
30
+ const deduped = deduplicateSegments(filtered);
31
+ return deduped.slice(0, topN).map((s, i) => ({
32
+ rank: i + 1,
33
+ start: s.start,
34
+ end: s.end,
35
+ score: s.score,
36
+ reason: s.reason,
37
+ source: s.source,
38
+ audio_event: s.audio_event,
39
+ }));
40
+ }
@@ -0,0 +1,36 @@
1
+ import { config } from '../../config/index.js';
2
+ export function mergeSignals(llmSegments, audioEvents, boostWindow, scoreBoost, preRoll, postRoll) {
3
+ const candidates = [];
4
+ const windowSec = boostWindow ?? config.AUDIO_LLM_BOOST_WINDOW;
5
+ const boost = scoreBoost ?? config.AUDIO_LLM_SCORE_BOOST;
6
+ const pre = preRoll ?? config.AUDIO_CLIP_PRE_ROLL;
7
+ const post = postRoll ?? config.AUDIO_CLIP_POST_ROLL;
8
+ const successfulSegments = llmSegments.filter((s) => s.status === 'success');
9
+ for (const seg of successfulSegments) {
10
+ if (!seg.interesting)
11
+ continue;
12
+ const nearby = audioEvents.filter((e) => Math.abs(e.time - seg.clip_start) < windowSec);
13
+ candidates.push({
14
+ start: seg.clip_start,
15
+ end: seg.clip_end,
16
+ score: Math.min(10, seg.score + (nearby.length > 0 ? boost : 0)),
17
+ source: nearby.length > 0 ? 'both' : 'transcript',
18
+ reason: seg.reason,
19
+ audio_event: nearby.length > 0 ? nearby[0].event : undefined,
20
+ });
21
+ }
22
+ for (const evt of audioEvents) {
23
+ const hasLLM = successfulSegments.some((s) => Math.abs(s.clip_start - evt.time) < windowSec);
24
+ if (!hasLLM) {
25
+ candidates.push({
26
+ start: Math.max(0, evt.time - pre),
27
+ end: evt.time + post,
28
+ score: Math.round(evt.confidence * 10),
29
+ source: 'audio',
30
+ reason: `Audio event: ${evt.event} (${(evt.confidence * 100).toFixed(0)}% confidence)`,
31
+ audio_event: evt.event,
32
+ });
33
+ }
34
+ }
35
+ return candidates;
36
+ }
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Contract every transcript analyzer implementation must satisfy.
3
+ *
4
+ * Each concrete analyzer (YtDlp, Whisper, Gemini) extends this class and
5
+ * implements `detect()`. The `source` property tags the lines it returns so
6
+ * downstream logging knows which backend produced them.
7
+ *
8
+ * Usage:
9
+ * const analyzer = new YtDlpTranscriptAnalyzer();
10
+ * const lines = await analyzer.detect(videoId, null);
11
+ */
12
+ export class TranscriptAnalyzer {
13
+ }
@@ -0,0 +1,51 @@
1
+ import { log } from '../../utils/logger.js';
2
+ import { YtDlpTranscriptAnalyzer } from './ytdlp.js';
3
+ import { WhisperTranscriptAnalyzer } from './whisper.js';
4
+ import { GeminiTranscriptAnalyzer } from './gemini.js';
5
+ const KNOWN_PROVIDERS = new Set(['ytdlp', 'whisper', 'gemini']);
6
+ /**
7
+ * Parses the TRANSCRIPT_PROVIDER config string into an ordered list of provider names.
8
+ *
9
+ * Accepts a comma-separated list: "ytdlp,whisper" → ['ytdlp', 'whisper']
10
+ * Single values still work: "ytdlp" → ['ytdlp']
11
+ */
12
+ export function parseTranscriptProviderChain(providerString) {
13
+ const names = providerString
14
+ .split(',')
15
+ .map((s) => s.trim().toLowerCase())
16
+ .filter(Boolean);
17
+ if (names.length === 0) {
18
+ throw new Error(`TRANSCRIPT_PROVIDER is empty. Provide at least one of: ytdlp, whisper, gemini`);
19
+ }
20
+ for (const name of names) {
21
+ if (!KNOWN_PROVIDERS.has(name)) {
22
+ throw new Error(`Unknown transcript provider "${name}". Valid options: ytdlp, whisper, gemini (comma-separated for chain)`);
23
+ }
24
+ }
25
+ return names;
26
+ }
27
+ /**
28
+ * Builds an ordered array of TranscriptAnalyzer instances from a provider chain string.
29
+ *
30
+ * The TranscriptDetector will walk this array in order — if the first analyzer
31
+ * fails, it falls back to the next, and so on.
32
+ *
33
+ * @example
34
+ * // TRANSCRIPT_PROVIDER=ytdlp,whisper → [YtDlpTranscriptAnalyzer, WhisperTranscriptAnalyzer]
35
+ * const chain = createTranscriptChain(config.TRANSCRIPT_PROVIDER);
36
+ */
37
+ export function createTranscriptChain(providerString) {
38
+ const names = parseTranscriptProviderChain(providerString);
39
+ return names.map((name) => {
40
+ switch (name) {
41
+ case 'ytdlp':
42
+ return new YtDlpTranscriptAnalyzer();
43
+ case 'whisper':
44
+ return new WhisperTranscriptAnalyzer();
45
+ case 'gemini':
46
+ log.warn('[transcript] GeminiTranscriptAnalyzer is not yet implemented — ' +
47
+ 'it will throw if reached in the chain.');
48
+ return new GeminiTranscriptAnalyzer();
49
+ }
50
+ });
51
+ }
@@ -0,0 +1,19 @@
1
+ import { TranscriptAnalyzer } from './base.js';
2
+ /**
3
+ * Transcribes a video using Google Gemini's multimodal audio understanding.
4
+ *
5
+ * @stub This implementation is not yet complete. It will:
6
+ * 1. Split the audio file into overlapping chunks
7
+ * 2. Send each chunk to the Gemini audio API for transcription
8
+ * 3. Stitch the returned text segments back into TranscriptLine[]
9
+ *
10
+ * Set TRANSCRIPT_PROVIDER=ytdlp or TRANSCRIPT_PROVIDER=whisper to use a
11
+ * working provider in the meantime.
12
+ */
13
+ export class GeminiTranscriptAnalyzer extends TranscriptAnalyzer {
14
+ source = 'gemini';
15
+ async detect(_videoId, _audioPath) {
16
+ throw new Error('GeminiTranscriptAnalyzer is not yet implemented. ' +
17
+ 'Use TRANSCRIPT_PROVIDER=ytdlp or TRANSCRIPT_PROVIDER=whisper instead.');
18
+ }
19
+ }
@@ -0,0 +1,5 @@
1
+ export { TranscriptAnalyzer } from './base.js';
2
+ export { YtDlpTranscriptAnalyzer } from './ytdlp.js';
3
+ export { WhisperTranscriptAnalyzer } from './whisper.js';
4
+ export { GeminiTranscriptAnalyzer } from './gemini.js';
5
+ export { createTranscriptChain, parseTranscriptProviderChain } from './factory.js';
@@ -0,0 +1,55 @@
1
+ import { execa } from 'execa';
2
+ import { z } from 'zod';
3
+ import { config } from '../../config/index.js';
4
+ import { TranscriptAnalyzer } from './base.js';
5
+ import { getPythonBin } from '../audioAnalyzers/whisper.js';
6
+ const WhisperSegmentSchema = z.object({
7
+ text: z.string(),
8
+ start: z.number(),
9
+ duration: z.number(),
10
+ });
11
+ /**
12
+ * Generates a transcript by running OpenAI Whisper locally on the downloaded
13
+ * audio file, then normalising the output to TranscriptLine[].
14
+ *
15
+ * Requires:
16
+ * - audioPath must not be null (audio must be downloaded before calling detect)
17
+ * - pip install openai-whisper
18
+ *
19
+ * The underlying script is `scripts/transcribe_whisper.py` which writes a JSON
20
+ * array of `{text, start, duration}` objects to stdout.
21
+ */
22
+ export class WhisperTranscriptAnalyzer extends TranscriptAnalyzer {
23
+ source = 'whisper';
24
+ async detect(videoId, audioPath) {
25
+ if (!audioPath) {
26
+ throw new Error('WhisperTranscriptAnalyzer requires an audio file. ' +
27
+ 'Ensure downloadAudio() runs before processTranscript() in the pipeline.');
28
+ }
29
+ const python = await getPythonBin();
30
+ let stdout;
31
+ try {
32
+ const result = await execa(python, [
33
+ 'scripts/transcribe_whisper.py',
34
+ audioPath,
35
+ config.AUDIO_WHISPER_MODEL,
36
+ ]);
37
+ stdout = result.stdout;
38
+ }
39
+ catch (err) {
40
+ const message = err instanceof Error ? err.message : String(err);
41
+ if (message.includes('ModuleNotFoundError') || message.includes('No module named')) {
42
+ throw new Error('openai-whisper not installed. Run: pip install openai-whisper\n' +
43
+ 'Or set TRANSCRIPT_PROVIDER=ytdlp in .env to use yt-dlp subtitles instead.');
44
+ }
45
+ throw new Error(`Whisper transcription failed for "${videoId}": ${message}`);
46
+ }
47
+ const raw = JSON.parse(stdout);
48
+ const segments = raw.map((seg) => WhisperSegmentSchema.parse(seg));
49
+ return segments.map((seg) => ({
50
+ text: seg.text.trim(),
51
+ start: seg.start,
52
+ duration: seg.duration,
53
+ }));
54
+ }
55
+ }
@@ -0,0 +1,16 @@
1
+ import { fetchTranscript } from '../transcriptFetcher/index.js';
2
+ import { TranscriptAnalyzer } from './base.js';
3
+ /**
4
+ * Fetches the YouTube transcript via yt-dlp auto-generated subtitles (VTT).
5
+ *
6
+ * This is the default transcript provider. It does not use the audio file —
7
+ * the `audioPath` parameter is accepted but ignored.
8
+ *
9
+ * Requires: yt-dlp installed and available on PATH.
10
+ */
11
+ export class YtDlpTranscriptAnalyzer extends TranscriptAnalyzer {
12
+ source = 'ytdlp';
13
+ async detect(videoId, _audioPath) {
14
+ return fetchTranscript(videoId);
15
+ }
16
+ }
@@ -0,0 +1,102 @@
1
+ import { buildMicroBlocks, buildLLMChunks } from '../chunkBuilder/index.js';
2
+ import { log } from '../../utils/logger.js';
3
+ import { config } from '../../config/index.js';
4
+ /**
5
+ * Top-level transcript detector.
6
+ *
7
+ * Holds an ordered chain of TranscriptAnalyzer instances and walks the chain
8
+ * on each `detect()` call: the first analyzer that succeeds wins. If an
9
+ * analyzer throws, the error is logged and the next analyzer in the chain is
10
+ * tried. If the entire chain is exhausted without success the error from the
11
+ * last analyzer is re-thrown.
12
+ *
13
+ * After obtaining raw transcript lines the detector groups them into
14
+ * micro-blocks and builds overlapping LLM analysis chunks — keeping the full
15
+ * "transcript concern" self-contained under one class.
16
+ *
17
+ * The chain is built once at startup via `createTranscriptChain(config.TRANSCRIPT_PROVIDER)`
18
+ * and injected here, keeping provider-selection logic out of this class.
19
+ *
20
+ * Results are cached via the injected Cache instance so that repeat runs skip
21
+ * the network round-trip to yt-dlp / Whisper.
22
+ *
23
+ * @example
24
+ * const chain = createTranscriptChain('ytdlp,whisper');
25
+ * const detector = new TranscriptDetector(chain);
26
+ * const { lines, microBlocks, chunks } = await detector.detect(videoId, audioPath, cache);
27
+ */
28
+ export class TranscriptDetector {
29
+ chain;
30
+ constructor(chain) {
31
+ this.chain = chain;
32
+ if (chain.length === 0) {
33
+ throw new Error('TranscriptDetector requires at least one TranscriptAnalyzer in the chain.');
34
+ }
35
+ }
36
+ /**
37
+ * Fetches, groups, and chunks the transcript for the given video ID.
38
+ *
39
+ * Walks the analyzer chain in order, falling back on error. Cache is checked
40
+ * first (before any analyzer is tried) and written after the first successful
41
+ * fetch so subsequent runs with the same provider config are instant.
42
+ *
43
+ * @param videoId - YouTube video ID
44
+ * @param audioPath - Path to the downloaded WAV, or null if audio is not yet available
45
+ * @param cache - Cache instance for read/write of transcript lines
46
+ */
47
+ async detect(videoId, audioPath, cache) {
48
+ let lines;
49
+ // Cache-first: if we already have lines on disk, skip the provider chain entirely
50
+ const cached = await cache.readTranscript(videoId);
51
+ if (cached) {
52
+ log.info(`[cache hit] Transcript loaded from cache (${cached.length} lines)`);
53
+ lines = cached;
54
+ }
55
+ else {
56
+ lines = await this.fetchFromChain(videoId, audioPath);
57
+ await cache.writeTranscript(videoId, lines);
58
+ }
59
+ const microBlocks = this.buildMicroBlocks(lines);
60
+ const chunks = this.buildChunks(microBlocks);
61
+ return { lines, microBlocks, chunks };
62
+ }
63
+ // -------------------------------------------------------------------------
64
+ // Private helpers
65
+ // -------------------------------------------------------------------------
66
+ /**
67
+ * Walks the analyzer chain in order.
68
+ * Falls back to the next analyzer whenever one throws.
69
+ */
70
+ async fetchFromChain(videoId, audioPath) {
71
+ let lastError;
72
+ for (let i = 0; i < this.chain.length; i++) {
73
+ const analyzer = this.chain[i];
74
+ const isLast = i === this.chain.length - 1;
75
+ try {
76
+ const lines = await analyzer.detect(videoId, audioPath);
77
+ log.info(`[transcript:${analyzer.source}] fetched ${lines.length} lines`);
78
+ return lines;
79
+ }
80
+ catch (err) {
81
+ lastError = err;
82
+ const message = err instanceof Error ? err.message : String(err);
83
+ if (!isLast) {
84
+ const nextSource = this.chain[i + 1].source;
85
+ log.warn(`[transcript:${analyzer.source}] failed, falling back to ${nextSource}: ${message}`);
86
+ }
87
+ else {
88
+ log.error(`[transcript:${analyzer.source}] failed (no more fallbacks): ${message}`);
89
+ }
90
+ }
91
+ }
92
+ throw lastError;
93
+ }
94
+ /** Groups raw transcript lines into micro-blocks. */
95
+ buildMicroBlocks(lines) {
96
+ return buildMicroBlocks(lines, config.MICRO_BLOCK_SEC);
97
+ }
98
+ /** Builds overlapping LLM analysis chunks from micro-blocks. */
99
+ buildChunks(microBlocks) {
100
+ return buildLLMChunks(microBlocks, config.CHUNK_LENGTH_SEC, config.CHUNK_OVERLAP_SEC);
101
+ }
102
+ }