@thunderkiller/video-clipper 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/CONTRIBUTING.md +100 -0
- package/LICENSE +15 -0
- package/commitlint.config.js +25 -0
- package/package.json +3 -1
- package/.github/workflows/ci.yml +0 -42
- package/.github/workflows/release.yml +0 -76
- package/.husky/pre-commit +0 -3
- package/.prettierignore +0 -6
- package/.prettierrc +0 -7
- package/.releaserc.json +0 -21
- package/AGENTS.md +0 -122
- package/docs/free-models.md +0 -78
- package/docs/plan.md +0 -442
- package/docs/refactorPhases.md +0 -105
- package/docs/yt-downloader.md +0 -440
- package/requirements.txt +0 -5
- package/scripts/detect_events.py +0 -81
- package/scripts/detect_events_whisper.py +0 -101
- package/scripts/transcribe_whisper.py +0 -70
- package/src/cli.ts +0 -186
- package/src/config/env.ts +0 -18
- package/src/config/index.ts +0 -2
- package/src/index.ts +0 -46
- package/src/pipeline/runner.ts +0 -147
- package/src/pipeline/stages/audioProcessor.ts +0 -127
- package/src/pipeline/stages/clipExporter.ts +0 -76
- package/src/pipeline/stages/segmentAnalyzer.ts +0 -72
- package/src/pipeline/stages/segmentSelector.ts +0 -39
- package/src/pipeline/stages/videoResolver.ts +0 -44
- package/src/services/audioAnalyzers/base.ts +0 -32
- package/src/services/audioAnalyzers/factory.ts +0 -69
- package/src/services/audioAnalyzers/gemini.ts +0 -136
- package/src/services/audioAnalyzers/index.ts +0 -6
- package/src/services/audioAnalyzers/whisper.ts +0 -80
- package/src/services/audioAnalyzers/yamnet.ts +0 -54
- package/src/services/audioDownloader/index.ts +0 -102
- package/src/services/chunkBuilder/index.ts +0 -82
- package/src/services/clipGenerator/index.ts +0 -210
- package/src/services/clipRefiner/index.ts +0 -141
- package/src/services/eventDetector/index.ts +0 -68
- package/src/services/llmAnalyzer/LLMAnalyzer.ts +0 -98
- package/src/services/llmAnalyzer/index.ts +0 -231
- package/src/services/metadataExtractor/index.ts +0 -83
- package/src/services/segmentRanker/index.ts +0 -88
- package/src/services/signalMerger/index.ts +0 -53
- package/src/services/transcriptAnalyzers/base.ts +0 -26
- package/src/services/transcriptAnalyzers/factory.ts +0 -66
- package/src/services/transcriptAnalyzers/gemini.ts +0 -24
- package/src/services/transcriptAnalyzers/index.ts +0 -6
- package/src/services/transcriptAnalyzers/whisper.ts +0 -68
- package/src/services/transcriptAnalyzers/ytdlp.ts +0 -19
- package/src/services/transcriptDetector/index.ts +0 -122
- package/src/services/transcriptFetcher/index.ts +0 -147
- package/src/services/urlParser/index.ts +0 -52
- package/src/services/videoDownloader/index.ts +0 -268
- package/src/types/analyzer.ts +0 -23
- package/src/types/audio.ts +0 -19
- package/src/types/cache.ts +0 -8
- package/src/types/cli.ts +0 -22
- package/src/types/config.ts +0 -151
- package/src/types/downloader.ts +0 -15
- package/src/types/factory.ts +0 -3
- package/src/types/index.ts +0 -40
- package/src/types/pipeline.ts +0 -60
- package/src/types/segment.ts +0 -43
- package/src/types/transcript.ts +0 -22
- package/src/types/video.ts +0 -18
- package/src/utils/cache.ts +0 -224
- package/src/utils/chunker.ts +0 -60
- package/src/utils/dumper.ts +0 -41
- package/src/utils/format.ts +0 -10
- package/src/utils/logger.ts +0 -17
- package/src/utils/modelFactory.ts +0 -71
- package/src/utils/redactConfig.ts +0 -23
- package/src/utils/sliceAudio.ts +0 -35
- package/test-trigger.txt +0 -1
- package/tests/analyzerFactory.test.ts +0 -146
- package/tests/audioEventDetector.test.ts +0 -69
- package/tests/cache.test.ts +0 -203
- package/tests/chunkBuilder.test.ts +0 -146
- package/tests/chunker.test.ts +0 -95
- package/tests/eventDetector.test.ts +0 -103
- package/tests/llmAnalyzer.test.ts +0 -283
- package/tests/segmentRanker.test.ts +0 -133
- package/tests/setup.ts +0 -48
- package/tests/signalMerger.test.ts +0 -197
- package/tests/transcriptDetector.test.ts +0 -150
- package/tests/transcriptFetcher.test.ts +0 -179
- package/tests/urlParser.test.ts +0 -70
- package/tsconfig.json +0 -16
- package/tsconfig.test.json +0 -8
- package/vitest.config.ts +0 -8
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach } from 'vitest';
|
|
2
|
-
import { rankSegments } from '../src/services/segmentRanker/index.js';
|
|
3
|
-
import type { MergedCandidate } from '../src/types/index.js';
|
|
4
|
-
|
|
5
|
-
function seg(
|
|
6
|
-
start: number,
|
|
7
|
-
end: number,
|
|
8
|
-
score: number,
|
|
9
|
-
source: 'transcript' | 'audio' | 'both' = 'transcript',
|
|
10
|
-
audio_event?: string,
|
|
11
|
-
): MergedCandidate {
|
|
12
|
-
return {
|
|
13
|
-
start,
|
|
14
|
-
end,
|
|
15
|
-
score,
|
|
16
|
-
source,
|
|
17
|
-
reason: `reason for ${start}-${end}`,
|
|
18
|
-
audio_event,
|
|
19
|
-
};
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
// ---------------------------------------------------------------------------
|
|
23
|
-
// Tests
|
|
24
|
-
// ---------------------------------------------------------------------------
|
|
25
|
-
|
|
26
|
-
describe('rankSegments', () => {
|
|
27
|
-
it('returns empty array when input is empty', () => {
|
|
28
|
-
expect(rankSegments([], 7, 10)).toEqual([]);
|
|
29
|
-
});
|
|
30
|
-
|
|
31
|
-
it('filters out segments with score below threshold', () => {
|
|
32
|
-
const segments = [seg(0, 30, 6), seg(30, 60, 8)];
|
|
33
|
-
const result = rankSegments(segments, 7, 10);
|
|
34
|
-
expect(result).toHaveLength(1);
|
|
35
|
-
expect(result[0].score).toBe(8);
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
it('filters out segments with score below threshold', () => {
|
|
39
|
-
const segments = [seg(0, 30, 6, 'transcript'), seg(30, 60, 8, 'transcript')];
|
|
40
|
-
const result = rankSegments(segments, 7, 10);
|
|
41
|
-
expect(result).toHaveLength(1);
|
|
42
|
-
expect(result[0].score).toBe(8);
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
it('filters out segments exactly at threshold (must be >=)', () => {
|
|
46
|
-
const segments = [seg(0, 30, 7), seg(30, 60, 6)];
|
|
47
|
-
const result = rankSegments(segments, 7, 10);
|
|
48
|
-
expect(result).toHaveLength(1);
|
|
49
|
-
expect(result[0].score).toBe(7);
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
it('sorts segments by score descending', () => {
|
|
53
|
-
const segments = [seg(0, 30, 7), seg(60, 90, 9), seg(120, 150, 8)];
|
|
54
|
-
const result = rankSegments(segments, 7, 10);
|
|
55
|
-
expect(result.map((r) => r.score)).toEqual([9, 8, 7]);
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
it('assigns sequential ranks starting at 1', () => {
|
|
59
|
-
const segments = [seg(0, 30, 9), seg(60, 90, 8), seg(120, 150, 7)];
|
|
60
|
-
const result = rankSegments(segments, 7, 10);
|
|
61
|
-
expect(result.map((r) => r.rank)).toEqual([1, 2, 3]);
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
it('caps output at topN', () => {
|
|
65
|
-
const segments = Array.from({ length: 8 }, (_, i) => seg(i * 60, i * 60 + 30, 10 - i));
|
|
66
|
-
const result = rankSegments(segments, 1, 3);
|
|
67
|
-
expect(result).toHaveLength(3);
|
|
68
|
-
expect(result[0].rank).toBe(1);
|
|
69
|
-
expect(result[2].rank).toBe(3);
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
it('renames clip_start/clip_end to start/end', () => {
|
|
73
|
-
const segments = [seg(42, 99, 8)];
|
|
74
|
-
const result = rankSegments(segments, 7, 10);
|
|
75
|
-
expect(result[0].start).toBe(42);
|
|
76
|
-
expect(result[0].end).toBe(99);
|
|
77
|
-
// RankedSegment type should not have clip_start / clip_end
|
|
78
|
-
expect((result[0] as Record<string, unknown>)['clip_start']).toBeUndefined();
|
|
79
|
-
expect((result[0] as Record<string, unknown>)['clip_end']).toBeUndefined();
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
describe('deduplication', () => {
|
|
83
|
-
it('drops the lower-scored segment when overlap > 50% of either duration', () => {
|
|
84
|
-
// Segment A: 0–100 (100s), Segment B: 60–120 (60s)
|
|
85
|
-
// Overlap: 60–100 = 40s
|
|
86
|
-
// 40/100 = 40% of A, 40/60 = 66% of B → significant overlap → drop B (lower score)
|
|
87
|
-
const segments = [seg(0, 100, 9), seg(60, 120, 8)];
|
|
88
|
-
const result = rankSegments(segments, 7, 10);
|
|
89
|
-
expect(result).toHaveLength(1);
|
|
90
|
-
expect(result[0].score).toBe(9);
|
|
91
|
-
expect(result[0].start).toBe(0);
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
it('keeps both segments when overlap is <= 50% of both durations', () => {
|
|
95
|
-
// Segment A: 0–100 (100s), Segment B: 80–200 (120s)
|
|
96
|
-
// Overlap: 80–100 = 20s
|
|
97
|
-
// 20/100 = 20% of A, 20/120 = 16% of B → not significant → keep both
|
|
98
|
-
const segments = [seg(0, 100, 9), seg(80, 200, 8)];
|
|
99
|
-
const result = rankSegments(segments, 7, 10);
|
|
100
|
-
expect(result).toHaveLength(2);
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
it('keeps both non-overlapping segments', () => {
|
|
104
|
-
const segments = [seg(0, 30, 9), seg(60, 90, 8)];
|
|
105
|
-
const result = rankSegments(segments, 7, 10);
|
|
106
|
-
expect(result).toHaveLength(2);
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
it('keeps the higher-scored segment when two nearly identical segments compete', () => {
|
|
110
|
-
const segments = [seg(0, 60, 7), seg(10, 70, 9)];
|
|
111
|
-
// Overlap: 10–60 = 50s; A duration=60, B duration=60
|
|
112
|
-
// 50/60 ≈ 83% → significant → keep score=9
|
|
113
|
-
const result = rankSegments(segments, 7, 10);
|
|
114
|
-
expect(result).toHaveLength(1);
|
|
115
|
-
expect(result[0].score).toBe(9);
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
it('correctly deduplicates multiple overlapping segments keeping highest scores', () => {
|
|
119
|
-
// Three segments all overlapping with the first
|
|
120
|
-
const segments = [
|
|
121
|
-
seg(0, 60, 9), // best, kept
|
|
122
|
-
seg(10, 70, 8), // overlaps A > 50% → dropped
|
|
123
|
-
seg(20, 80, 7), // overlaps A > 50% → dropped
|
|
124
|
-
seg(200, 250, 8), // no overlap → kept
|
|
125
|
-
];
|
|
126
|
-
const result = rankSegments(segments, 7, 10);
|
|
127
|
-
expect(result).toHaveLength(2);
|
|
128
|
-
expect(result[0].score).toBe(9);
|
|
129
|
-
expect(result[1].score).toBe(8);
|
|
130
|
-
expect(result[1].start).toBe(200);
|
|
131
|
-
});
|
|
132
|
-
});
|
|
133
|
-
});
|
package/tests/setup.ts
DELETED
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import { vi } from 'vitest';
|
|
2
|
-
|
|
3
|
-
vi.mock('../src/config/index.js', () => ({
|
|
4
|
-
config: {
|
|
5
|
-
LLM_PROVIDER: 'openai',
|
|
6
|
-
OPENAI_API_KEY: 'test-key',
|
|
7
|
-
ANTHROPIC_API_KEY: undefined,
|
|
8
|
-
GOOGLE_GENERATIVE_AI_API_KEY: undefined,
|
|
9
|
-
XAI_API_KEY: undefined,
|
|
10
|
-
MISTRAL_API_KEY: undefined,
|
|
11
|
-
GROQ_API_KEY: undefined,
|
|
12
|
-
ZAI_API_KEY: undefined,
|
|
13
|
-
OPENROUTER_API_KEY: undefined,
|
|
14
|
-
SCORE_THRESHOLD: 7,
|
|
15
|
-
TOP_N_SEGMENTS: 10,
|
|
16
|
-
CHUNK_LENGTH_SEC: 120,
|
|
17
|
-
CHUNK_OVERLAP_SEC: 20,
|
|
18
|
-
MICRO_BLOCK_SEC: 15,
|
|
19
|
-
LLM_MODEL: 'gpt-4o',
|
|
20
|
-
LLM_MAX_RETRIES: 3,
|
|
21
|
-
DOWNLOAD_DIR: 'downloads/',
|
|
22
|
-
OUTPUT_DIR: 'outputs/',
|
|
23
|
-
CACHE_DIR: 'outputs/cache',
|
|
24
|
-
DUMP_OUTPUTS: false,
|
|
25
|
-
MAX_CHUNKS: undefined,
|
|
26
|
-
LLM_CONCURRENCY: 3,
|
|
27
|
-
CLIP_CONCURRENCY: 1,
|
|
28
|
-
LLM_SYSTEM_PROMPT: undefined,
|
|
29
|
-
AUDIO_GEMINI_MODEL: 'gemini-2.5-flash',
|
|
30
|
-
AUDIO_EXTRA_INSTRUCTIONS: undefined,
|
|
31
|
-
DOWNLOAD_SECTIONS_MODE: 'all' as const,
|
|
32
|
-
FFMPEG_PATH: undefined,
|
|
33
|
-
FFPROBE_PATH: undefined,
|
|
34
|
-
FFMPEG_PRESET: 'fast' as const,
|
|
35
|
-
TIMESTAMP_OFFSET_SECONDS: 0,
|
|
36
|
-
TRANSCRIPT_PROVIDER: 'ytdlp',
|
|
37
|
-
AUDIO_DETECTION_ENABLED: true,
|
|
38
|
-
AUDIO_PROVIDER: 'gemini,whisper',
|
|
39
|
-
AUDIO_CONFIDENCE_THRESHOLD: 0.3,
|
|
40
|
-
AUDIO_CLIP_PRE_ROLL: 5,
|
|
41
|
-
AUDIO_CLIP_POST_ROLL: 15,
|
|
42
|
-
AUDIO_LLM_BOOST_WINDOW: 10,
|
|
43
|
-
AUDIO_LLM_SCORE_BOOST: 2,
|
|
44
|
-
GAME_PROFILE: 'general' as const,
|
|
45
|
-
YT_DLP_COOKIES_FROM_BROWSER: undefined,
|
|
46
|
-
YT_DLP_COOKIES_FILE: undefined,
|
|
47
|
-
},
|
|
48
|
-
}));
|
|
@@ -1,197 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
2
|
-
import { mergeSignals } from '../src/services/signalMerger/index.js';
|
|
3
|
-
import type { ChunkEvaluation, AudioEvent } from '../src/types/index.js';
|
|
4
|
-
|
|
5
|
-
function createSuccessEvaluation(
|
|
6
|
-
clipStart: number,
|
|
7
|
-
clipEnd: number,
|
|
8
|
-
score: number,
|
|
9
|
-
interesting = true,
|
|
10
|
-
): ChunkEvaluation {
|
|
11
|
-
return {
|
|
12
|
-
status: 'success',
|
|
13
|
-
chunk_index: 0,
|
|
14
|
-
chunk_start: 0,
|
|
15
|
-
chunk_end: 100,
|
|
16
|
-
interesting,
|
|
17
|
-
score,
|
|
18
|
-
reason: 'Test reason',
|
|
19
|
-
clip_start: clipStart,
|
|
20
|
-
clip_end: clipEnd,
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
function createFailedEvaluation(): ChunkEvaluation {
|
|
25
|
-
return {
|
|
26
|
-
status: 'failed',
|
|
27
|
-
chunk_index: 0,
|
|
28
|
-
chunk_start: 0,
|
|
29
|
-
chunk_end: 100,
|
|
30
|
-
error: 'Test error',
|
|
31
|
-
};
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
function createAudioEvent(time: number, confidence = 0.8, event = 'gunshot'): AudioEvent {
|
|
35
|
-
return {
|
|
36
|
-
time,
|
|
37
|
-
event,
|
|
38
|
-
confidence,
|
|
39
|
-
source: 'gemini',
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
describe('mergeSignals', () => {
|
|
44
|
-
it('returns empty array when both inputs are empty', () => {
|
|
45
|
-
const result = mergeSignals([], [], 10, 2, 5, 15);
|
|
46
|
-
expect(result).toEqual([]);
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
it('processes LLM-only segments with no audio events', () => {
|
|
50
|
-
const llmSegments = [createSuccessEvaluation(10, 20, 7)];
|
|
51
|
-
const audioEvents: AudioEvent[] = [];
|
|
52
|
-
|
|
53
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
54
|
-
|
|
55
|
-
expect(result).toHaveLength(1);
|
|
56
|
-
expect(result[0]).toMatchObject({
|
|
57
|
-
start: 10,
|
|
58
|
-
end: 20,
|
|
59
|
-
score: 7,
|
|
60
|
-
source: 'transcript',
|
|
61
|
-
reason: 'Test reason',
|
|
62
|
-
audio_event: undefined,
|
|
63
|
-
});
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
it('processes audio-only events with no nearby LLM segments', () => {
|
|
67
|
-
const llmSegments: ChunkEvaluation[] = [];
|
|
68
|
-
const audioEvents = [createAudioEvent(30, 0.9, 'explosion')];
|
|
69
|
-
|
|
70
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
71
|
-
|
|
72
|
-
expect(result).toHaveLength(1);
|
|
73
|
-
expect(result[0]).toMatchObject({
|
|
74
|
-
start: 25,
|
|
75
|
-
end: 45,
|
|
76
|
-
score: 9,
|
|
77
|
-
source: 'audio',
|
|
78
|
-
reason: 'Audio event: explosion (90% confidence)',
|
|
79
|
-
audio_event: 'explosion',
|
|
80
|
-
});
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
it('boosts LLM segment score when nearby audio event exists', () => {
|
|
84
|
-
const llmSegments = [createSuccessEvaluation(10, 20, 6)];
|
|
85
|
-
const audioEvents = [createAudioEvent(15, 0.8, 'gunshot')];
|
|
86
|
-
|
|
87
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
88
|
-
|
|
89
|
-
expect(result).toHaveLength(1);
|
|
90
|
-
expect(result[0]).toMatchObject({
|
|
91
|
-
start: 10,
|
|
92
|
-
end: 20,
|
|
93
|
-
score: 8,
|
|
94
|
-
source: 'both',
|
|
95
|
-
reason: 'Test reason',
|
|
96
|
-
audio_event: 'gunshot',
|
|
97
|
-
});
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
it('caps boosted score at 10', () => {
|
|
101
|
-
const llmSegments = [createSuccessEvaluation(10, 20, 9)];
|
|
102
|
-
const audioEvents = [createAudioEvent(15, 0.8, 'gunshot')];
|
|
103
|
-
|
|
104
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
105
|
-
|
|
106
|
-
expect(result[0].score).toBe(10);
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
it('ignores audio event when LLM segment is outside boost window', () => {
|
|
110
|
-
const llmSegments = [createSuccessEvaluation(10, 20, 7)];
|
|
111
|
-
const audioEvents = [createAudioEvent(50, 0.8, 'gunshot')];
|
|
112
|
-
|
|
113
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
114
|
-
|
|
115
|
-
expect(result).toHaveLength(2);
|
|
116
|
-
expect(result[0]).toMatchObject({
|
|
117
|
-
source: 'transcript',
|
|
118
|
-
score: 7,
|
|
119
|
-
});
|
|
120
|
-
expect(result[1]).toMatchObject({
|
|
121
|
-
source: 'audio',
|
|
122
|
-
start: 45,
|
|
123
|
-
end: 65,
|
|
124
|
-
});
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
it('ignores LLM segments with interesting=false', () => {
|
|
128
|
-
const llmSegments = [createSuccessEvaluation(10, 20, 7, false)];
|
|
129
|
-
const audioEvents = [createAudioEvent(15, 0.8, 'gunshot')];
|
|
130
|
-
|
|
131
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
132
|
-
|
|
133
|
-
expect(result).toHaveLength(0);
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
it('ignores failed LLM segments', () => {
|
|
137
|
-
const llmSegments = [createFailedEvaluation()];
|
|
138
|
-
const audioEvents = [createAudioEvent(15, 0.8, 'gunshot')];
|
|
139
|
-
|
|
140
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
141
|
-
|
|
142
|
-
expect(result).toHaveLength(1);
|
|
143
|
-
expect(result[0]).toMatchObject({
|
|
144
|
-
source: 'audio',
|
|
145
|
-
});
|
|
146
|
-
});
|
|
147
|
-
|
|
148
|
-
it('handles multiple LLM segments and audio events correctly', () => {
|
|
149
|
-
const llmSegments = [createSuccessEvaluation(10, 20, 6), createSuccessEvaluation(50, 60, 7)];
|
|
150
|
-
const audioEvents = [
|
|
151
|
-
createAudioEvent(15, 0.8, 'gunshot'),
|
|
152
|
-
createAudioEvent(100, 0.9, 'explosion'),
|
|
153
|
-
];
|
|
154
|
-
|
|
155
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
156
|
-
|
|
157
|
-
expect(result).toHaveLength(3);
|
|
158
|
-
expect(result[0]).toMatchObject({
|
|
159
|
-
source: 'both',
|
|
160
|
-
score: 8,
|
|
161
|
-
});
|
|
162
|
-
expect(result[1]).toMatchObject({
|
|
163
|
-
source: 'transcript',
|
|
164
|
-
score: 7,
|
|
165
|
-
});
|
|
166
|
-
expect(result[2]).toMatchObject({
|
|
167
|
-
source: 'audio',
|
|
168
|
-
start: 95,
|
|
169
|
-
end: 115,
|
|
170
|
-
});
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
it('handles audio event at start time with clamp to 0 for negative start', () => {
|
|
174
|
-
const llmSegments: ChunkEvaluation[] = [];
|
|
175
|
-
const audioEvents = [createAudioEvent(3, 0.8, 'gunshot')];
|
|
176
|
-
|
|
177
|
-
const result = mergeSignals(llmSegments, audioEvents, 10, 2, 5, 15);
|
|
178
|
-
|
|
179
|
-
expect(result).toHaveLength(1);
|
|
180
|
-
expect(result[0]).toMatchObject({
|
|
181
|
-
start: 0,
|
|
182
|
-
end: 18,
|
|
183
|
-
});
|
|
184
|
-
});
|
|
185
|
-
|
|
186
|
-
it('uses default config values when overrides not provided', () => {
|
|
187
|
-
const llmSegments = [createSuccessEvaluation(10, 20, 6)];
|
|
188
|
-
const audioEvents = [createAudioEvent(15, 0.8, 'gunshot')];
|
|
189
|
-
|
|
190
|
-
const result = mergeSignals(llmSegments, audioEvents);
|
|
191
|
-
|
|
192
|
-
expect(result).toHaveLength(1);
|
|
193
|
-
expect(result[0]).toMatchObject({
|
|
194
|
-
source: 'both',
|
|
195
|
-
});
|
|
196
|
-
});
|
|
197
|
-
});
|
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
2
|
-
import { TranscriptDetector } from '../src/services/transcriptDetector/index.js';
|
|
3
|
-
import type { TranscriptAnalyzer } from '../src/services/transcriptAnalyzers/index.js';
|
|
4
|
-
import type { TranscriptLine } from '../src/types/index.js';
|
|
5
|
-
import type { Cache } from '../src/utils/cache.js';
|
|
6
|
-
|
|
7
|
-
// ---------------------------------------------------------------------------
|
|
8
|
-
// Helpers
|
|
9
|
-
// ---------------------------------------------------------------------------
|
|
10
|
-
|
|
11
|
-
const LINE_A: TranscriptLine = { text: 'Hello world', start: 0, duration: 2 };
|
|
12
|
-
const LINE_B: TranscriptLine = { text: 'Second line', start: 2, duration: 2 };
|
|
13
|
-
|
|
14
|
-
function makeAnalyzer(source: string, result: TranscriptLine[] | Error): TranscriptAnalyzer {
|
|
15
|
-
return {
|
|
16
|
-
source,
|
|
17
|
-
detect: vi
|
|
18
|
-
.fn()
|
|
19
|
-
.mockImplementation(() =>
|
|
20
|
-
result instanceof Error ? Promise.reject(result) : Promise.resolve(result),
|
|
21
|
-
),
|
|
22
|
-
} as unknown as TranscriptAnalyzer;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
function makeCache(cachedLines: TranscriptLine[] | null = null): Cache {
|
|
26
|
-
return {
|
|
27
|
-
readTranscript: vi.fn().mockResolvedValue(cachedLines),
|
|
28
|
-
writeTranscript: vi.fn().mockResolvedValue(undefined),
|
|
29
|
-
} as unknown as Cache;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// ---------------------------------------------------------------------------
|
|
33
|
-
// Tests
|
|
34
|
-
// ---------------------------------------------------------------------------
|
|
35
|
-
|
|
36
|
-
describe('TranscriptDetector', () => {
|
|
37
|
-
describe('constructor', () => {
|
|
38
|
-
it('throws when chain is empty', () => {
|
|
39
|
-
expect(() => new TranscriptDetector([])).toThrow(
|
|
40
|
-
'TranscriptDetector requires at least one TranscriptAnalyzer',
|
|
41
|
-
);
|
|
42
|
-
});
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
describe('detect — cache-first behaviour', () => {
|
|
46
|
-
it('returns cached lines without calling any analyzer', async () => {
|
|
47
|
-
const cached = [LINE_A, LINE_B];
|
|
48
|
-
const analyzer = makeAnalyzer('ytdlp', []);
|
|
49
|
-
const cache = makeCache(cached);
|
|
50
|
-
const detector = new TranscriptDetector([analyzer]);
|
|
51
|
-
|
|
52
|
-
const result = await detector.detect('abc123', null, cache);
|
|
53
|
-
|
|
54
|
-
expect(result.lines).toEqual(cached);
|
|
55
|
-
expect(analyzer.detect).not.toHaveBeenCalled();
|
|
56
|
-
expect(cache.writeTranscript).not.toHaveBeenCalled();
|
|
57
|
-
});
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
describe('detect — provider chain', () => {
|
|
61
|
-
let cache: Cache;
|
|
62
|
-
|
|
63
|
-
beforeEach(() => {
|
|
64
|
-
cache = makeCache(null); // no cache → always hits the chain
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
it('returns lines from the first analyzer on success', async () => {
|
|
68
|
-
const analyzer = makeAnalyzer('ytdlp', [LINE_A]);
|
|
69
|
-
const detector = new TranscriptDetector([analyzer]);
|
|
70
|
-
|
|
71
|
-
const result = await detector.detect('abc123', null, cache);
|
|
72
|
-
|
|
73
|
-
expect(result.lines).toEqual([LINE_A]);
|
|
74
|
-
expect(cache.writeTranscript).toHaveBeenCalledWith('abc123', [LINE_A]);
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
it('falls back to second analyzer when first throws', async () => {
|
|
78
|
-
const first = makeAnalyzer('ytdlp', new Error('no subtitles'));
|
|
79
|
-
const second = makeAnalyzer('whisper', [LINE_B]);
|
|
80
|
-
const detector = new TranscriptDetector([first, second]);
|
|
81
|
-
|
|
82
|
-
const result = await detector.detect('abc123', null, cache);
|
|
83
|
-
|
|
84
|
-
expect(result.lines).toEqual([LINE_B]);
|
|
85
|
-
expect(cache.writeTranscript).toHaveBeenCalledWith('abc123', [LINE_B]);
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
it('re-throws the last error when the whole chain is exhausted', async () => {
|
|
89
|
-
const err1 = new Error('ytdlp failed');
|
|
90
|
-
const err2 = new Error('whisper crashed');
|
|
91
|
-
const first = makeAnalyzer('ytdlp', err1);
|
|
92
|
-
const second = makeAnalyzer('whisper', err2);
|
|
93
|
-
const detector = new TranscriptDetector([first, second]);
|
|
94
|
-
|
|
95
|
-
await expect(detector.detect('abc123', null, cache)).rejects.toThrow('whisper crashed');
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
it('does not call the second analyzer when the first succeeds', async () => {
|
|
99
|
-
const first = makeAnalyzer('ytdlp', [LINE_A]);
|
|
100
|
-
const second = makeAnalyzer('whisper', [LINE_B]);
|
|
101
|
-
const detector = new TranscriptDetector([first, second]);
|
|
102
|
-
|
|
103
|
-
await detector.detect('abc123', null, cache);
|
|
104
|
-
expect(second.detect).not.toHaveBeenCalled();
|
|
105
|
-
});
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
describe('detect — chunk/micro-block building', () => {
|
|
109
|
-
it('returns non-empty microBlocks and chunks for multi-line transcripts', async () => {
|
|
110
|
-
// Produce enough lines to form at least one micro-block and one chunk
|
|
111
|
-
const lines: TranscriptLine[] = Array.from({ length: 20 }, (_, i) => ({
|
|
112
|
-
text: `line ${i}`,
|
|
113
|
-
start: i * 5,
|
|
114
|
-
duration: 5,
|
|
115
|
-
}));
|
|
116
|
-
const analyzer = makeAnalyzer('ytdlp', lines);
|
|
117
|
-
const cache = makeCache(null);
|
|
118
|
-
const detector = new TranscriptDetector([analyzer]);
|
|
119
|
-
|
|
120
|
-
const result = await detector.detect('abc123', null, cache);
|
|
121
|
-
|
|
122
|
-
expect(result.lines).toHaveLength(20);
|
|
123
|
-
expect(result.microBlocks.length).toBeGreaterThan(0);
|
|
124
|
-
expect(result.chunks.length).toBeGreaterThan(0);
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
it('returns empty microBlocks and chunks for an empty transcript', async () => {
|
|
128
|
-
const analyzer = makeAnalyzer('ytdlp', []);
|
|
129
|
-
const cache = makeCache(null);
|
|
130
|
-
const detector = new TranscriptDetector([analyzer]);
|
|
131
|
-
|
|
132
|
-
const result = await detector.detect('abc123', null, cache);
|
|
133
|
-
|
|
134
|
-
expect(result.lines).toHaveLength(0);
|
|
135
|
-
expect(result.microBlocks).toHaveLength(0);
|
|
136
|
-
expect(result.chunks).toHaveLength(0);
|
|
137
|
-
});
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
describe('detect — audioPath forwarding', () => {
|
|
141
|
-
it('passes audioPath through to the analyzer', async () => {
|
|
142
|
-
const analyzer = makeAnalyzer('whisper', [LINE_A]);
|
|
143
|
-
const cache = makeCache(null);
|
|
144
|
-
const detector = new TranscriptDetector([analyzer]);
|
|
145
|
-
|
|
146
|
-
await detector.detect('abc123', '/tmp/audio.wav', cache);
|
|
147
|
-
expect(analyzer.detect).toHaveBeenCalledWith('abc123', '/tmp/audio.wav');
|
|
148
|
-
});
|
|
149
|
-
});
|
|
150
|
-
});
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from 'vitest';
|
|
2
|
-
import { parseVtt } from '../src/services/transcriptFetcher/index.js';
|
|
3
|
-
|
|
4
|
-
const BASIC_VTT = `WEBVTT
|
|
5
|
-
Kind: captions
|
|
6
|
-
Language: en
|
|
7
|
-
|
|
8
|
-
00:00:01.000 --> 00:00:03.500
|
|
9
|
-
Hello world
|
|
10
|
-
|
|
11
|
-
00:00:04.000 --> 00:00:06.000
|
|
12
|
-
This is a test
|
|
13
|
-
|
|
14
|
-
`;
|
|
15
|
-
|
|
16
|
-
const INLINE_TAGS_VTT = `WEBVTT
|
|
17
|
-
|
|
18
|
-
00:00:01.000 --> 00:00:04.000
|
|
19
|
-
<00:00:01.000><c>Hello</c> <00:00:02.000><c>world</c>
|
|
20
|
-
|
|
21
|
-
00:00:05.000 --> 00:00:08.000
|
|
22
|
-
<c>Some</c> <c>tagged</c> <c>text</c>
|
|
23
|
-
|
|
24
|
-
`;
|
|
25
|
-
|
|
26
|
-
const DUPLICATE_CUES_VTT = `WEBVTT
|
|
27
|
-
|
|
28
|
-
00:00:01.000 --> 00:00:03.000
|
|
29
|
-
Repeated line
|
|
30
|
-
|
|
31
|
-
00:00:02.000 --> 00:00:04.000
|
|
32
|
-
Repeated line
|
|
33
|
-
|
|
34
|
-
00:00:04.000 --> 00:00:06.000
|
|
35
|
-
New line
|
|
36
|
-
|
|
37
|
-
`;
|
|
38
|
-
|
|
39
|
-
const HTML_ENTITIES_VTT = `WEBVTT
|
|
40
|
-
|
|
41
|
-
00:00:01.000 --> 00:00:03.000
|
|
42
|
-
Hello & world
|
|
43
|
-
|
|
44
|
-
00:00:04.000 --> 00:00:06.000
|
|
45
|
-
<tag> content
|
|
46
|
-
|
|
47
|
-
`;
|
|
48
|
-
|
|
49
|
-
const MULTILINE_CUE_VTT = `WEBVTT
|
|
50
|
-
|
|
51
|
-
00:00:01.000 --> 00:00:04.000
|
|
52
|
-
First line
|
|
53
|
-
Second line
|
|
54
|
-
|
|
55
|
-
`;
|
|
56
|
-
|
|
57
|
-
const EMPTY_CUES_VTT = `WEBVTT
|
|
58
|
-
|
|
59
|
-
00:00:01.000 --> 00:00:03.000
|
|
60
|
-
<c></c>
|
|
61
|
-
|
|
62
|
-
00:00:04.000 --> 00:00:06.000
|
|
63
|
-
Real content
|
|
64
|
-
|
|
65
|
-
`;
|
|
66
|
-
|
|
67
|
-
const HOURS_VTT = `WEBVTT
|
|
68
|
-
|
|
69
|
-
01:30:00.000 --> 01:30:05.500
|
|
70
|
-
Deep into the video
|
|
71
|
-
|
|
72
|
-
`;
|
|
73
|
-
|
|
74
|
-
const COMMA_SEPARATOR_VTT = `WEBVTT
|
|
75
|
-
|
|
76
|
-
00:00:01,000 --> 00:00:03,500
|
|
77
|
-
Comma separated timestamps
|
|
78
|
-
|
|
79
|
-
`;
|
|
80
|
-
|
|
81
|
-
describe('parseVtt', () => {
|
|
82
|
-
describe('basic parsing', () => {
|
|
83
|
-
it('parses a standard VTT with two cues', () => {
|
|
84
|
-
const result = parseVtt(BASIC_VTT);
|
|
85
|
-
expect(result).toHaveLength(2);
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
it('normalizes timestamps to seconds', () => {
|
|
89
|
-
const result = parseVtt(BASIC_VTT);
|
|
90
|
-
expect(result[0].start).toBeCloseTo(1.0);
|
|
91
|
-
expect(result[0].duration).toBeCloseTo(2.5);
|
|
92
|
-
expect(result[1].start).toBeCloseTo(4.0);
|
|
93
|
-
expect(result[1].duration).toBeCloseTo(2.0);
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
it('preserves cue text', () => {
|
|
97
|
-
const result = parseVtt(BASIC_VTT);
|
|
98
|
-
expect(result[0].text).toBe('Hello world');
|
|
99
|
-
expect(result[1].text).toBe('This is a test');
|
|
100
|
-
});
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
describe('inline tag stripping', () => {
|
|
104
|
-
it('strips VTT timestamp tags and <c> tags', () => {
|
|
105
|
-
const result = parseVtt(INLINE_TAGS_VTT);
|
|
106
|
-
expect(result[0].text).toBe('Hello world');
|
|
107
|
-
expect(result[1].text).toBe('Some tagged text');
|
|
108
|
-
});
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
describe('deduplication', () => {
|
|
112
|
-
it('skips consecutive duplicate cue text', () => {
|
|
113
|
-
const result = parseVtt(DUPLICATE_CUES_VTT);
|
|
114
|
-
// "Repeated line" should appear only once
|
|
115
|
-
expect(result.filter((l) => l.text === 'Repeated line')).toHaveLength(1);
|
|
116
|
-
});
|
|
117
|
-
|
|
118
|
-
it('keeps non-duplicate cues after duplicates', () => {
|
|
119
|
-
const result = parseVtt(DUPLICATE_CUES_VTT);
|
|
120
|
-
expect(result[result.length - 1].text).toBe('New line');
|
|
121
|
-
});
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
describe('HTML entity decoding', () => {
|
|
125
|
-
it('decodes &', () => {
|
|
126
|
-
const result = parseVtt(HTML_ENTITIES_VTT);
|
|
127
|
-
expect(result[0].text).toBe('Hello & world');
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
it('decodes < and >', () => {
|
|
131
|
-
const result = parseVtt(HTML_ENTITIES_VTT);
|
|
132
|
-
expect(result[1].text).toBe('<tag> content');
|
|
133
|
-
});
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
describe('multiline cues', () => {
|
|
137
|
-
it('joins multiple text lines within a cue with a space', () => {
|
|
138
|
-
const result = parseVtt(MULTILINE_CUE_VTT);
|
|
139
|
-
expect(result[0].text).toBe('First line Second line');
|
|
140
|
-
});
|
|
141
|
-
});
|
|
142
|
-
|
|
143
|
-
describe('empty cues', () => {
|
|
144
|
-
it('skips cues that are empty after tag stripping', () => {
|
|
145
|
-
const result = parseVtt(EMPTY_CUES_VTT);
|
|
146
|
-
expect(result).toHaveLength(1);
|
|
147
|
-
expect(result[0].text).toBe('Real content');
|
|
148
|
-
});
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
describe('timestamp formats', () => {
|
|
152
|
-
it('handles HH:MM:SS.mmm with large hours', () => {
|
|
153
|
-
const result = parseVtt(HOURS_VTT);
|
|
154
|
-
expect(result[0].start).toBeCloseTo(1 * 3600 + 30 * 60);
|
|
155
|
-
expect(result[0].duration).toBeCloseTo(5.5);
|
|
156
|
-
});
|
|
157
|
-
|
|
158
|
-
it('handles comma as decimal separator in timestamps', () => {
|
|
159
|
-
const result = parseVtt(COMMA_SEPARATOR_VTT);
|
|
160
|
-
expect(result[0].start).toBeCloseTo(1.0);
|
|
161
|
-
expect(result[0].duration).toBeCloseTo(2.5);
|
|
162
|
-
});
|
|
163
|
-
});
|
|
164
|
-
|
|
165
|
-
describe('edge cases', () => {
|
|
166
|
-
it('returns empty array for empty string', () => {
|
|
167
|
-
expect(parseVtt('')).toEqual([]);
|
|
168
|
-
});
|
|
169
|
-
|
|
170
|
-
it('returns empty array for WEBVTT header only', () => {
|
|
171
|
-
expect(parseVtt('WEBVTT\n')).toEqual([]);
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
it('returns empty array when all cues are empty after stripping', () => {
|
|
175
|
-
const vtt = 'WEBVTT\n\n00:00:01.000 --> 00:00:02.000\n<c></c>\n\n';
|
|
176
|
-
expect(parseVtt(vtt)).toEqual([]);
|
|
177
|
-
});
|
|
178
|
-
});
|
|
179
|
-
});
|