@thunderkiller/video-clipper 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/CONTRIBUTING.md +100 -0
  3. package/LICENSE +15 -0
  4. package/commitlint.config.js +25 -0
  5. package/package.json +3 -1
  6. package/.github/workflows/ci.yml +0 -42
  7. package/.github/workflows/release.yml +0 -76
  8. package/.husky/pre-commit +0 -3
  9. package/.prettierignore +0 -6
  10. package/.prettierrc +0 -7
  11. package/.releaserc.json +0 -21
  12. package/AGENTS.md +0 -122
  13. package/docs/free-models.md +0 -78
  14. package/docs/plan.md +0 -442
  15. package/docs/refactorPhases.md +0 -105
  16. package/docs/yt-downloader.md +0 -440
  17. package/requirements.txt +0 -5
  18. package/scripts/detect_events.py +0 -81
  19. package/scripts/detect_events_whisper.py +0 -101
  20. package/scripts/transcribe_whisper.py +0 -70
  21. package/src/cli.ts +0 -186
  22. package/src/config/env.ts +0 -18
  23. package/src/config/index.ts +0 -2
  24. package/src/index.ts +0 -46
  25. package/src/pipeline/runner.ts +0 -147
  26. package/src/pipeline/stages/audioProcessor.ts +0 -127
  27. package/src/pipeline/stages/clipExporter.ts +0 -76
  28. package/src/pipeline/stages/segmentAnalyzer.ts +0 -72
  29. package/src/pipeline/stages/segmentSelector.ts +0 -39
  30. package/src/pipeline/stages/videoResolver.ts +0 -44
  31. package/src/services/audioAnalyzers/base.ts +0 -32
  32. package/src/services/audioAnalyzers/factory.ts +0 -69
  33. package/src/services/audioAnalyzers/gemini.ts +0 -136
  34. package/src/services/audioAnalyzers/index.ts +0 -6
  35. package/src/services/audioAnalyzers/whisper.ts +0 -80
  36. package/src/services/audioAnalyzers/yamnet.ts +0 -54
  37. package/src/services/audioDownloader/index.ts +0 -102
  38. package/src/services/chunkBuilder/index.ts +0 -82
  39. package/src/services/clipGenerator/index.ts +0 -210
  40. package/src/services/clipRefiner/index.ts +0 -141
  41. package/src/services/eventDetector/index.ts +0 -68
  42. package/src/services/llmAnalyzer/LLMAnalyzer.ts +0 -98
  43. package/src/services/llmAnalyzer/index.ts +0 -231
  44. package/src/services/metadataExtractor/index.ts +0 -83
  45. package/src/services/segmentRanker/index.ts +0 -88
  46. package/src/services/signalMerger/index.ts +0 -53
  47. package/src/services/transcriptAnalyzers/base.ts +0 -26
  48. package/src/services/transcriptAnalyzers/factory.ts +0 -66
  49. package/src/services/transcriptAnalyzers/gemini.ts +0 -24
  50. package/src/services/transcriptAnalyzers/index.ts +0 -6
  51. package/src/services/transcriptAnalyzers/whisper.ts +0 -68
  52. package/src/services/transcriptAnalyzers/ytdlp.ts +0 -19
  53. package/src/services/transcriptDetector/index.ts +0 -122
  54. package/src/services/transcriptFetcher/index.ts +0 -147
  55. package/src/services/urlParser/index.ts +0 -52
  56. package/src/services/videoDownloader/index.ts +0 -268
  57. package/src/types/analyzer.ts +0 -23
  58. package/src/types/audio.ts +0 -19
  59. package/src/types/cache.ts +0 -8
  60. package/src/types/cli.ts +0 -22
  61. package/src/types/config.ts +0 -151
  62. package/src/types/downloader.ts +0 -15
  63. package/src/types/factory.ts +0 -3
  64. package/src/types/index.ts +0 -40
  65. package/src/types/pipeline.ts +0 -60
  66. package/src/types/segment.ts +0 -43
  67. package/src/types/transcript.ts +0 -22
  68. package/src/types/video.ts +0 -18
  69. package/src/utils/cache.ts +0 -224
  70. package/src/utils/chunker.ts +0 -60
  71. package/src/utils/dumper.ts +0 -41
  72. package/src/utils/format.ts +0 -10
  73. package/src/utils/logger.ts +0 -17
  74. package/src/utils/modelFactory.ts +0 -71
  75. package/src/utils/redactConfig.ts +0 -23
  76. package/src/utils/sliceAudio.ts +0 -35
  77. package/test-trigger.txt +0 -1
  78. package/tests/analyzerFactory.test.ts +0 -146
  79. package/tests/audioEventDetector.test.ts +0 -69
  80. package/tests/cache.test.ts +0 -203
  81. package/tests/chunkBuilder.test.ts +0 -146
  82. package/tests/chunker.test.ts +0 -95
  83. package/tests/eventDetector.test.ts +0 -103
  84. package/tests/llmAnalyzer.test.ts +0 -283
  85. package/tests/segmentRanker.test.ts +0 -133
  86. package/tests/setup.ts +0 -48
  87. package/tests/signalMerger.test.ts +0 -197
  88. package/tests/transcriptDetector.test.ts +0 -150
  89. package/tests/transcriptFetcher.test.ts +0 -179
  90. package/tests/urlParser.test.ts +0 -70
  91. package/tsconfig.json +0 -16
  92. package/tsconfig.test.json +0 -8
  93. package/vitest.config.ts +0 -8
@@ -1,39 +0,0 @@
1
- import { mergeSignals } from '../../services/signalMerger/index.js';
2
- import { rankSegments } from '../../services/segmentRanker/index.js';
3
- import { log } from '../../utils/logger.js';
4
- import type {
5
- ChunkEvaluation,
6
- AudioEvent,
7
- RankedSegment,
8
- SegmentSelectorOpts,
9
- } from '../../types/index.js';
10
-
11
- export type { SegmentSelectorOpts };
12
-
13
- /**
14
- * Stage 5 — Segment Selector
15
- *
16
- * Merges transcript LLM evaluations with audio events (if any), then ranks
17
- * and deduplicates candidates to produce the final ordered list of segments.
18
- *
19
- * This stage sits between the two LLM passes: it runs after `analyzeSegments`
20
- * (pass 1) and its output feeds `refineRankedSegments` (pass 2).
21
- */
22
- export function selectSegments(
23
- chunkEvals: ChunkEvaluation[],
24
- audioEvents: AudioEvent[],
25
- opts: SegmentSelectorOpts,
26
- ): RankedSegment[] {
27
- const merged = mergeSignals(chunkEvals, audioEvents);
28
- const ranked = rankSegments(merged, opts.threshold, opts.topN);
29
-
30
- if (ranked.length === 0) {
31
- log.warn(`No segments scored above threshold ${opts.threshold}. Try lowering --threshold.`);
32
- } else {
33
- log.info(
34
- `Analysis complete: ${ranked.length} segment${ranked.length !== 1 ? 's' : ''} above threshold ${opts.threshold}`,
35
- );
36
- }
37
-
38
- return ranked;
39
- }
@@ -1,44 +0,0 @@
1
- import { parseUrl } from '../../services/urlParser/index.js';
2
- import { extractMetadata } from '../../services/metadataExtractor/index.js';
3
- import { log } from '../../utils/logger.js';
4
- import { formatSeconds } from '../../utils/format.js';
5
- import type { VideoResolverResult } from '../../types/index.js';
6
-
7
- export type { VideoResolverResult };
8
-
9
- /**
10
- * Stage 1 — Video Resolver
11
- *
12
- * Parses a raw YouTube URL into a validated video ID, fetches metadata
13
- * (title + duration), and enforces the optional --max-duration guard.
14
- *
15
- * @throws {Error} on invalid URL, metadata fetch failure, or exceeded duration
16
- */
17
- export async function resolveVideo(
18
- rawUrl: string,
19
- maxDurationSec?: number,
20
- ): Promise<VideoResolverResult> {
21
- let videoId: string;
22
- try {
23
- videoId = parseUrl(rawUrl);
24
- } catch {
25
- throw new Error(`Invalid YouTube URL: ${rawUrl}`);
26
- }
27
-
28
- log.info(`Fetching metadata for ${videoId}...`);
29
- const metadata = await extractMetadata(videoId);
30
- log.info(
31
- `Video: "${metadata.title}" (${metadata.duration > 0 ? formatSeconds(metadata.duration) : 'duration unknown'})`,
32
- );
33
-
34
- if (maxDurationSec !== undefined && metadata.duration > 0) {
35
- if (metadata.duration > maxDurationSec) {
36
- throw new Error(
37
- `Video duration exceeds --max-duration limit. ` +
38
- `(${formatSeconds(metadata.duration)} > ${formatSeconds(maxDurationSec)})`,
39
- );
40
- }
41
- }
42
-
43
- return { videoId, metadata };
44
- }
@@ -1,32 +0,0 @@
1
- import type { AudioEvent } from '../../types/index.js';
2
-
3
- /**
4
- * Contract every audio analyzer implementation must satisfy.
5
- *
6
- * Each concrete analyzer (Gemini, Whisper, YAMNet) extends this class and
7
- * implements `detect()`. The `source` property is used to tag the events they
8
- * return so downstream code knows which backend produced them.
9
- *
10
- * Usage:
11
- * const analyzer = new GeminiAudioAnalyzer();
12
- * const events = await analyzer.detect(audioPath, gameProfile, offsetSec, durationSec);
13
- */
14
- export abstract class AudioAnalyzer {
15
- abstract readonly source: AudioEvent['source'];
16
-
17
- /**
18
- * Detect audio events in the given WAV slice.
19
- *
20
- * @param audioPath - Path to the WAV audio slice on disk
21
- * @param gameProfile - Profile key used to tune detection prompts/classes
22
- * @param chunkOffsetSec - Absolute start time of this slice within the full video (seconds)
23
- * @param chunkDurationSec - Duration of this slice in seconds
24
- * @returns Array of detected events with absolute timestamps
25
- */
26
- abstract detect(
27
- audioPath: string,
28
- gameProfile: string,
29
- chunkOffsetSec: number,
30
- chunkDurationSec: number,
31
- ): Promise<AudioEvent[]>;
32
- }
@@ -1,69 +0,0 @@
1
- import { log } from '../../utils/logger.js';
2
- import { AudioAnalyzer } from './base.js';
3
- import { GeminiAudioAnalyzer } from './gemini.js';
4
- import { WhisperAudioAnalyzer } from './whisper.js';
5
- import { YAMNetAudioAnalyzer } from './yamnet.js';
6
- import type { AudioProviderName } from '../../types/index.js';
7
-
8
- const KNOWN_PROVIDERS = new Set<AudioProviderName>(['gemini', 'whisper', 'yamnet']);
9
-
10
- /**
11
- * Parses the AUDIO_PROVIDER config string into an ordered list of provider names.
12
- *
13
- * Accepts a comma-separated list: "gemini,whisper" → ['gemini', 'whisper']
14
- * Single values still work: "yamnet" → ['yamnet']
15
- *
16
- * Backward-compat: "both" is mapped to ['gemini', 'whisper'] with a deprecation warning.
17
- */
18
- export function parseProviderChain(providerString: string): AudioProviderName[] {
19
- if (providerString.trim() === 'both') {
20
- log.warn(
21
- '[audio] AUDIO_PROVIDER=both is deprecated. Use AUDIO_PROVIDER=gemini,whisper instead.',
22
- );
23
- return ['gemini', 'whisper'];
24
- }
25
-
26
- const names = providerString
27
- .split(',')
28
- .map((s) => s.trim().toLowerCase())
29
- .filter(Boolean);
30
-
31
- if (names.length === 0) {
32
- throw new Error(`AUDIO_PROVIDER is empty. Provide at least one of: gemini, whisper, yamnet`);
33
- }
34
-
35
- for (const name of names) {
36
- if (!KNOWN_PROVIDERS.has(name as AudioProviderName)) {
37
- throw new Error(
38
- `Unknown audio provider "${name}". Valid options: gemini, whisper, yamnet (comma-separated for chain)`,
39
- );
40
- }
41
- }
42
-
43
- return names as AudioProviderName[];
44
- }
45
-
46
- /**
47
- * Builds an ordered array of AudioAnalyzer instances from a provider chain string.
48
- *
49
- * The EventDetector will walk this array in order — if the first analyzer fails,
50
- * it falls back to the next, and so on.
51
- *
52
- * @example
53
- * // AUDIO_PROVIDER=gemini,whisper → [GeminiAudioAnalyzer, WhisperAudioAnalyzer]
54
- * const chain = createAnalyzerChain(config.AUDIO_PROVIDER);
55
- */
56
- export function createAnalyzerChain(providerString: string): AudioAnalyzer[] {
57
- const names = parseProviderChain(providerString);
58
-
59
- return names.map((name) => {
60
- switch (name) {
61
- case 'gemini':
62
- return new GeminiAudioAnalyzer();
63
- case 'whisper':
64
- return new WhisperAudioAnalyzer();
65
- case 'yamnet':
66
- return new YAMNetAudioAnalyzer();
67
- }
68
- });
69
- }
@@ -1,136 +0,0 @@
1
- import { GoogleGenerativeAI } from '@google/generative-ai';
2
- import * as fs from 'fs';
3
- import { z } from 'zod';
4
- import { config } from '../../config/index.js';
5
- import { log } from '../../utils/logger.js';
6
- import type { AudioEvent } from '../../types/index.js';
7
- import { AudioAnalyzer } from './base.js';
8
-
9
- /**
10
- * Gemini returns timestamps inconsistently as either:
11
- * - MM.SS notation: 1.03 = 1 min 3 sec = 63s
12
- * - True decimal seconds: 53.403 = 53.403s
13
- * Use normalizeGeminiTime() to resolve correct value.
14
- */
15
- const GeminiEventSchema = z.array(
16
- z.object({
17
- time_sec: z.number(),
18
- event: z.string(),
19
- confidence: z.number().min(0).max(1),
20
- }),
21
- );
22
-
23
- const GAME_PROFILE_PROMPTS: Record<string, string> = {
24
- valorant:
25
- 'You are analyzing audio from a Valorant gaming video. Identify ALL significant game events: kills, deaths, explosions, ability uses, spike plants/defuses, ace moments, clutch situations, crowd reactions, hype moments.',
26
- fps: 'You are analyzing audio from an FPS gaming video. Identify ALL significant game events: kills, deaths, explosions, weapon fire, headshot sounds, kill streaks, crowd reactions, battle cries.',
27
- boss_fight:
28
- 'You are analyzing audio from a boss fight video. Identify ALL significant game events: boss phase transitions, big hits, explosions, boss death, crowd cheering, epic moments, victory sounds.',
29
- general:
30
- 'You are analyzing audio from a gaming video. Identify ALL significant audio events: explosions, gunshots, crowd reactions, cheering, epic moments, dramatic sounds.',
31
- };
32
-
33
- /**
34
- * Converts a MM.SS-notation value to decimal seconds.
35
- * e.g. 1.03 → 63, 1.40 → 100
36
- */
37
- function mmssToSeconds(value: number): number {
38
- const minutes = Math.floor(value);
39
- const seconds = Math.round((value % 1) * 100);
40
- return minutes * 60 + seconds;
41
- }
42
-
43
- /**
44
- * Resolves a Gemini `time_sec` value to true decimal seconds.
45
- *
46
- * Gemini inconsistently returns either MM.SS notation (e.g. 1.03 meaning 63s)
47
- * or true decimal seconds (e.g. 53.403). This function disambiguates using
48
- * the known chunk duration:
49
- *
50
- * 1. If the fractional part > 0.59, it cannot be a seconds component (seconds
51
- * only go 0-59), so it must be true decimal seconds — use as-is.
52
- * 2. Otherwise, check if the MM.SS conversion produces a value within the
53
- * valid chunk range [0, chunkDurationSec). If yes, treat as MM.SS.
54
- * 3. Fallback: use the value as true decimal seconds (the format we asked for).
55
- *
56
- * YAMNet always returns true decimal seconds and does NOT use this function.
57
- */
58
- export function normalizeGeminiTime(value: number, chunkDurationSec: number): number {
59
- const frac = value % 1;
60
-
61
- if (Math.round(frac * 100) > 59) {
62
- return value;
63
- }
64
-
65
- const mmss = mmssToSeconds(value);
66
- if (mmss < chunkDurationSec) {
67
- return mmss;
68
- }
69
-
70
- return value;
71
- }
72
-
73
- /**
74
- * Uses Google Gemini's multimodal API to detect audio events in a WAV slice.
75
- * Understands game context semantically — best accuracy for gaming content.
76
- *
77
- * Requires GOOGLE_GENERATIVE_AI_API_KEY to be set.
78
- */
79
- export class GeminiAudioAnalyzer extends AudioAnalyzer {
80
- readonly source = 'gemini' as const;
81
-
82
- async detect(
83
- audioPath: string,
84
- gameProfile: string,
85
- chunkOffsetSec: number,
86
- chunkDurationSec: number,
87
- ): Promise<AudioEvent[]> {
88
- const genai = new GoogleGenerativeAI(config.GOOGLE_GENERATIVE_AI_API_KEY!);
89
- const model = genai.getGenerativeModel({ model: config.AUDIO_GEMINI_MODEL });
90
-
91
- const audioData = fs.readFileSync(audioPath);
92
- const base64Audio = audioData.toString('base64');
93
-
94
- const extraInstructions = config.AUDIO_EXTRA_INSTRUCTIONS
95
- ? `\nAdditional instructions:\n${config.AUDIO_EXTRA_INSTRUCTIONS}\n`
96
- : '';
97
-
98
- const prompt = `${GAME_PROFILE_PROMPTS[gameProfile] ?? GAME_PROFILE_PROMPTS.general} ${extraInstructions}
99
-
100
- For each event, return a JSON object with:
101
- - time_sec: the time in seconds (be very precise with the timestamp, Gemini is good at this when the format is correct)
102
- - event: a short description of the event (e.g., "gunshot", "explosion", "clutch moment")
103
- - confidence: your confidence level (0.0 to 1.0)
104
-
105
- Return ONLY a JSON array, no explanation. Format:
106
- [
107
- {"time_sec": 12.5, "event": "gunshot", "confidence": 0.8},
108
- {"time_sec": 45.2, "event": "explosion", "confidence": 0.9}
109
- ]`;
110
-
111
- const result = await model.generateContent([
112
- { inlineData: { mimeType: 'audio/wav', data: base64Audio } },
113
- prompt,
114
- ]);
115
-
116
- const text = result.response.text();
117
- log.info(`[audio:gemini] response: ${text}`);
118
-
119
- const cleaned = text
120
- .replace(/^```(?:json)?\s*/i, '')
121
- .replace(/\s*```\s*$/i, '')
122
- .trim();
123
-
124
- const parsed = GeminiEventSchema.safeParse(JSON.parse(cleaned));
125
- if (!parsed.success) {
126
- throw new Error(`Gemini response failed validation: ${parsed.error.message}`);
127
- }
128
-
129
- return parsed.data.map((e) => ({
130
- time: normalizeGeminiTime(e.time_sec, chunkDurationSec) + chunkOffsetSec,
131
- event: e.event,
132
- confidence: e.confidence,
133
- source: this.source,
134
- }));
135
- }
136
- }
@@ -1,6 +0,0 @@
1
- export { AudioAnalyzer } from './base.js';
2
- export { GeminiAudioAnalyzer, normalizeGeminiTime } from './gemini.js';
3
- export { WhisperAudioAnalyzer, getPythonBin } from './whisper.js';
4
- export { YAMNetAudioAnalyzer } from './yamnet.js';
5
- export { createAnalyzerChain, parseProviderChain } from './factory.js';
6
- export type { AudioProviderName } from '../../types/index.js';
@@ -1,80 +0,0 @@
1
- import { execa } from 'execa';
2
- import { config } from '../../config/index.js';
3
- import { log } from '../../utils/logger.js';
4
- import type { AudioEvent } from '../../types/index.js';
5
- import { AudioAnalyzer } from './base.js';
6
-
7
- /**
8
- * Resolves the Python interpreter binary, caching the result after the first
9
- * successful lookup. Shared by both Python-based analyzers (Whisper, YAMNet).
10
- */
11
- let _pythonBin: string | null = null;
12
-
13
- export async function getPythonBin(): Promise<string> {
14
- if (_pythonBin) return _pythonBin;
15
-
16
- for (const bin of ['python3', 'python']) {
17
- try {
18
- await execa(bin, ['--version']);
19
- _pythonBin = bin;
20
- return bin;
21
- } catch {
22
- log.warn(`[audio] ${bin} not found, trying next binary...`);
23
- }
24
- }
25
-
26
- throw new Error(
27
- 'No Python interpreter found (tried python3, python). Install Python 3 to use YAMNet or Whisper.',
28
- );
29
- }
30
-
31
- /**
32
- * Uses OpenAI Whisper (local) to transcribe the audio chunk and scan the
33
- * resulting transcript for hype keywords per game profile.
34
- *
35
- * Requires: pip install openai-whisper
36
- */
37
- export class WhisperAudioAnalyzer extends AudioAnalyzer {
38
- readonly source = 'whisper' as const;
39
-
40
- async detect(
41
- audioPath: string,
42
- gameProfile: string,
43
- chunkOffsetSec: number,
44
- _chunkDurationSec: number,
45
- ): Promise<AudioEvent[]> {
46
- const python = await getPythonBin();
47
-
48
- let stdout: string;
49
- try {
50
- const result = await execa(python, [
51
- 'scripts/detect_events_whisper.py',
52
- audioPath,
53
- String(config.AUDIO_CONFIDENCE_THRESHOLD),
54
- gameProfile,
55
- config.AUDIO_WHISPER_MODEL,
56
- ]);
57
- stdout = result.stdout;
58
- } catch (err) {
59
- const message = err instanceof Error ? err.message : String(err);
60
-
61
- if (message.includes('ModuleNotFoundError') || message.includes('No module named')) {
62
- throw new Error(
63
- 'openai-whisper not installed. Run: pip install openai-whisper\n' +
64
- 'Or set AUDIO_PROVIDER=gemini in .env and configure GOOGLE_GENERATIVE_AI_API_KEY.',
65
- );
66
- }
67
-
68
- throw new Error(`Whisper detection failed: ${message}`);
69
- }
70
-
71
- const events = JSON.parse(stdout) as Array<{ time: number; event: string; confidence: number }>;
72
-
73
- return events.map((e) => ({
74
- time: e.time + chunkOffsetSec,
75
- event: e.event,
76
- confidence: e.confidence,
77
- source: this.source,
78
- }));
79
- }
80
- }
@@ -1,54 +0,0 @@
1
- import { execa } from 'execa';
2
- import { config } from '../../config/index.js';
3
- import type { AudioEvent } from '../../types/index.js';
4
- import { AudioAnalyzer } from './base.js';
5
- import { getPythonBin } from './whisper.js';
6
-
7
- /**
8
- * Uses YAMNet (TensorFlow Hub) via a Python script to classify audio frames
9
- * against a fixed set of game-relevant sound classes (gunshot, explosion, etc.).
10
- *
11
- * Requires: pip install tensorflow-hub soundfile numpy
12
- */
13
- export class YAMNetAudioAnalyzer extends AudioAnalyzer {
14
- readonly source = 'yamnet' as const;
15
-
16
- async detect(
17
- audioPath: string,
18
- _gameProfile: string,
19
- chunkOffsetSec: number,
20
- _chunkDurationSec: number,
21
- ): Promise<AudioEvent[]> {
22
- const python = await getPythonBin();
23
-
24
- let stdout: string;
25
- try {
26
- const result = await execa(python, [
27
- 'scripts/detect_events.py',
28
- audioPath,
29
- String(config.AUDIO_CONFIDENCE_THRESHOLD),
30
- ]);
31
- stdout = result.stdout;
32
- } catch (err) {
33
- const message = err instanceof Error ? err.message : String(err);
34
-
35
- if (message.includes('ModuleNotFoundError') || message.includes('No module named')) {
36
- throw new Error(
37
- 'YAMNet dependencies missing. Run: pip3 install tensorflow-hub soundfile numpy\n' +
38
- 'Or set AUDIO_PROVIDER=gemini in .env and configure GOOGLE_GENERATIVE_AI_API_KEY.',
39
- );
40
- }
41
-
42
- throw new Error(`YAMNet detection failed: ${message}`);
43
- }
44
-
45
- const events = JSON.parse(stdout) as Array<{ time: number; event: string; confidence: number }>;
46
-
47
- return events.map((e) => ({
48
- time: e.time + chunkOffsetSec,
49
- event: e.event,
50
- confidence: e.confidence,
51
- source: this.source,
52
- }));
53
- }
54
- }
@@ -1,102 +0,0 @@
1
- import { execa } from 'execa';
2
- import * as fs from 'fs';
3
- import * as path from 'path';
4
- import { config } from '../../config/index.js';
5
-
6
- function displayProgress(stream: 'stdout' | 'stderr'): (data: Buffer | string) => void {
7
- return (data: Buffer | string) => {
8
- const text = String(data);
9
- const lines = text.split('\n').filter((line) => line.trim());
10
-
11
- for (const line of lines) {
12
- const progressMatch = line.match(/\[download\]\s+(\d+\.?\d*%)/);
13
- if (progressMatch) {
14
- process.stdout.write(`\r${progressMatch[0]}`);
15
- }
16
- }
17
- };
18
- }
19
-
20
- export async function downloadAudio(videoId: string, outputDir: string): Promise<string> {
21
- if (!fs.existsSync(outputDir)) {
22
- fs.mkdirSync(outputDir, { recursive: true });
23
- }
24
-
25
- const outputPath = path.join(outputDir, `${videoId}_audio.wav`);
26
-
27
- if (fs.existsSync(outputPath)) {
28
- console.log(`[audio] Using cached audio: ${outputPath}`);
29
- return outputPath;
30
- }
31
-
32
- console.log(`[audio] Downloading audio for ${videoId}...`);
33
-
34
- try {
35
- const args = [
36
- '-x',
37
- '--audio-format',
38
- 'wav',
39
- '--audio-quality',
40
- '0',
41
- '--postprocessor-args',
42
- 'ffmpeg:-ar 16000 -ac 1',
43
- '-o',
44
- outputPath,
45
- '--newline',
46
- `https://youtube.com/watch?v=${videoId}`,
47
- ];
48
-
49
- if (config.FFMPEG_PATH) {
50
- args.unshift('--ffmpeg-location', config.FFMPEG_PATH);
51
- }
52
-
53
- if (config.YT_DLP_COOKIES_FROM_BROWSER) {
54
- args.unshift('--cookies-from-browser', config.YT_DLP_COOKIES_FROM_BROWSER);
55
- } else if (config.YT_DLP_COOKIES_FILE) {
56
- args.unshift('--cookies', config.YT_DLP_COOKIES_FILE);
57
- }
58
-
59
- const subprocess = execa('yt-dlp', args);
60
-
61
- subprocess.stdout?.on('data', displayProgress('stdout'));
62
- subprocess.stderr?.on('data', displayProgress('stderr'));
63
-
64
- await subprocess;
65
- process.stdout.write('\n');
66
- } catch (err) {
67
- const message = err instanceof Error ? err.message : String(err);
68
-
69
- if (message.includes('command not found') || message.includes('ENOENT')) {
70
- throw new Error('yt-dlp is required. Install it: https://github.com/yt-dlp/yt-dlp');
71
- }
72
-
73
- if (message.includes('ffprobe and ffmpeg not found') || message.includes('ffmpeg not found')) {
74
- throw new Error(
75
- 'ffmpeg is required for audio processing. Install it: `brew install ffmpeg`' +
76
- (config.FFMPEG_PATH
77
- ? ''
78
- : '\nOr set FFMPEG_PATH in your .env to your ffmpeg binary location.'),
79
- );
80
- }
81
-
82
- if (message.includes('Private video')) {
83
- throw new Error(`Video "${videoId}" is private and cannot be downloaded.`);
84
- }
85
-
86
- if (message.includes('Sign in to confirm') || message.includes('confirm you')) {
87
- throw new Error(
88
- `yt-dlp was blocked by YouTube bot detection for "${videoId}". ` +
89
- `Set YT_DLP_COOKIES_FROM_BROWSER=chrome (or firefox/safari) in your .env to authenticate.`,
90
- );
91
- }
92
-
93
- if (message.includes('not available in your country') || message.includes('geo')) {
94
- throw new Error(`Video "${videoId}" is geo-blocked in your region.`);
95
- }
96
-
97
- throw new Error(`Audio download failed: ${message}`);
98
- }
99
-
100
- console.log(`[audio] Audio saved to ${outputPath}`);
101
- return outputPath;
102
- }
@@ -1,82 +0,0 @@
1
- import type { TranscriptLine, MicroBlock, LLMChunk } from '../../types/index.js';
2
-
3
- /**
4
- * Groups raw transcript lines into micro-blocks of approximately `windowSec` seconds.
5
- *
6
- * A new block starts whenever the next line's `start` falls outside the current window.
7
- * Empty input returns an empty array.
8
- */
9
- export function buildMicroBlocks(lines: TranscriptLine[], windowSec: number): MicroBlock[] {
10
- if (lines.length === 0) return [];
11
-
12
- const blocks: MicroBlock[] = [];
13
- let windowStart = lines[0].start;
14
- let texts: string[] = [];
15
-
16
- for (const line of lines) {
17
- if (line.start >= windowStart + windowSec) {
18
- blocks.push({
19
- start: windowStart,
20
- end: line.start,
21
- text: texts.join(' '),
22
- });
23
- windowStart = line.start;
24
- texts = [];
25
- }
26
- texts.push(line.text);
27
- }
28
-
29
- if (texts.length > 0) {
30
- const lastLine = lines[lines.length - 1];
31
- blocks.push({
32
- start: windowStart,
33
- end: lastLine.start + lastLine.duration,
34
- text: texts.join(' '),
35
- });
36
- }
37
-
38
- return blocks;
39
- }
40
-
41
- /**
42
- * Groups micro-blocks into larger LLM analysis chunks using a sliding window
43
- * of `chunkLen` seconds with `overlap` seconds of overlap between consecutive chunks.
44
- *
45
- * Each chunk spans from the first block whose `start >= chunkStart` to the last block
46
- * whose `start < chunkStart + chunkLen`. The next chunk starts at
47
- * `chunkStart + chunkLen - overlap`.
48
- *
49
- * Empty input returns an empty array.
50
- */
51
- export function buildLLMChunks(
52
- blocks: MicroBlock[],
53
- chunkLen: number,
54
- overlap: number,
55
- ): LLMChunk[] {
56
- if (blocks.length === 0) return [];
57
-
58
- const chunks: LLMChunk[] = [];
59
- const totalEnd = blocks[blocks.length - 1].end;
60
- let chunkStart = blocks[0].start;
61
-
62
- while (chunkStart < totalEnd) {
63
- const chunkEnd = chunkStart + chunkLen;
64
-
65
- const window = blocks.filter((b) => b.start >= chunkStart && b.start < chunkEnd);
66
-
67
- if (window.length > 0) {
68
- chunks.push({
69
- start: window[0].start,
70
- end: window[window.length - 1].end,
71
- text: window.map((b) => b.text).join(' '),
72
- });
73
- }
74
-
75
- const step = chunkLen - overlap;
76
- chunkStart += step;
77
-
78
- if (step <= 0) break;
79
- }
80
-
81
- return chunks;
82
- }