@thunderkiller/video-clipper 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/LICENSE +15 -0
- package/package.json +1 -1
- package/.github/workflows/ci.yml +0 -42
- package/.github/workflows/release.yml +0 -76
- package/.husky/pre-commit +0 -3
- package/.prettierignore +0 -6
- package/.prettierrc +0 -7
- package/.releaserc.json +0 -21
- package/AGENTS.md +0 -122
- package/docs/free-models.md +0 -78
- package/docs/plan.md +0 -442
- package/docs/refactorPhases.md +0 -105
- package/docs/yt-downloader.md +0 -440
- package/requirements.txt +0 -5
- package/scripts/detect_events.py +0 -81
- package/scripts/detect_events_whisper.py +0 -101
- package/scripts/transcribe_whisper.py +0 -70
- package/src/cli.ts +0 -186
- package/src/config/env.ts +0 -18
- package/src/config/index.ts +0 -2
- package/src/index.ts +0 -46
- package/src/pipeline/runner.ts +0 -147
- package/src/pipeline/stages/audioProcessor.ts +0 -127
- package/src/pipeline/stages/clipExporter.ts +0 -76
- package/src/pipeline/stages/segmentAnalyzer.ts +0 -72
- package/src/pipeline/stages/segmentSelector.ts +0 -39
- package/src/pipeline/stages/videoResolver.ts +0 -44
- package/src/services/audioAnalyzers/base.ts +0 -32
- package/src/services/audioAnalyzers/factory.ts +0 -69
- package/src/services/audioAnalyzers/gemini.ts +0 -136
- package/src/services/audioAnalyzers/index.ts +0 -6
- package/src/services/audioAnalyzers/whisper.ts +0 -80
- package/src/services/audioAnalyzers/yamnet.ts +0 -54
- package/src/services/audioDownloader/index.ts +0 -102
- package/src/services/chunkBuilder/index.ts +0 -82
- package/src/services/clipGenerator/index.ts +0 -210
- package/src/services/clipRefiner/index.ts +0 -141
- package/src/services/eventDetector/index.ts +0 -68
- package/src/services/llmAnalyzer/LLMAnalyzer.ts +0 -98
- package/src/services/llmAnalyzer/index.ts +0 -231
- package/src/services/metadataExtractor/index.ts +0 -83
- package/src/services/segmentRanker/index.ts +0 -88
- package/src/services/signalMerger/index.ts +0 -53
- package/src/services/transcriptAnalyzers/base.ts +0 -26
- package/src/services/transcriptAnalyzers/factory.ts +0 -66
- package/src/services/transcriptAnalyzers/gemini.ts +0 -24
- package/src/services/transcriptAnalyzers/index.ts +0 -6
- package/src/services/transcriptAnalyzers/whisper.ts +0 -68
- package/src/services/transcriptAnalyzers/ytdlp.ts +0 -19
- package/src/services/transcriptDetector/index.ts +0 -122
- package/src/services/transcriptFetcher/index.ts +0 -147
- package/src/services/urlParser/index.ts +0 -52
- package/src/services/videoDownloader/index.ts +0 -268
- package/src/types/analyzer.ts +0 -23
- package/src/types/audio.ts +0 -19
- package/src/types/cache.ts +0 -8
- package/src/types/cli.ts +0 -22
- package/src/types/config.ts +0 -151
- package/src/types/downloader.ts +0 -15
- package/src/types/factory.ts +0 -3
- package/src/types/index.ts +0 -40
- package/src/types/pipeline.ts +0 -60
- package/src/types/segment.ts +0 -43
- package/src/types/transcript.ts +0 -22
- package/src/types/video.ts +0 -18
- package/src/utils/cache.ts +0 -224
- package/src/utils/chunker.ts +0 -60
- package/src/utils/dumper.ts +0 -41
- package/src/utils/format.ts +0 -10
- package/src/utils/logger.ts +0 -17
- package/src/utils/modelFactory.ts +0 -71
- package/src/utils/redactConfig.ts +0 -23
- package/src/utils/sliceAudio.ts +0 -35
- package/test-trigger.txt +0 -1
- package/tests/analyzerFactory.test.ts +0 -146
- package/tests/audioEventDetector.test.ts +0 -69
- package/tests/cache.test.ts +0 -203
- package/tests/chunkBuilder.test.ts +0 -146
- package/tests/chunker.test.ts +0 -95
- package/tests/eventDetector.test.ts +0 -103
- package/tests/llmAnalyzer.test.ts +0 -283
- package/tests/segmentRanker.test.ts +0 -133
- package/tests/setup.ts +0 -48
- package/tests/signalMerger.test.ts +0 -197
- package/tests/transcriptDetector.test.ts +0 -150
- package/tests/transcriptFetcher.test.ts +0 -179
- package/tests/urlParser.test.ts +0 -70
- package/tsconfig.json +0 -16
- package/tsconfig.test.json +0 -8
- package/vitest.config.ts +0 -8
package/docs/yt-downloader.md
DELETED
|
@@ -1,440 +0,0 @@
|
|
|
1
|
-
# yt-dlp Download Modes
|
|
2
|
-
|
|
3
|
-
This CLI supports two download strategies for generating video clips.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Mode 1: Full Video Download (Default)
|
|
8
|
-
|
|
9
|
-
Downloads entire video first, then uses `ffmpeg` to cut individual clips.
|
|
10
|
-
|
|
11
|
-
**When to use:**
|
|
12
|
-
|
|
13
|
-
- Generating many clips from one video
|
|
14
|
-
- Want flexibility to cut different clips later
|
|
15
|
-
- Internet connection is fast/stable
|
|
16
|
-
|
|
17
|
-
**Trade-offs:**
|
|
18
|
-
| Aspect | Full Download |
|
|
19
|
-
|--------|--------------|
|
|
20
|
-
| Speed | Slower initial download |
|
|
21
|
-
| Bandwidth | Higher |
|
|
22
|
-
| Disk Usage | Higher |
|
|
23
|
-
| Flexibility | Can cut different clips later |
|
|
24
|
-
|
|
25
|
-
**Command:**
|
|
26
|
-
|
|
27
|
-
```bash
|
|
28
|
-
# Default behavior (when using --clip)
|
|
29
|
-
npm run start -- <url> --clip
|
|
30
|
-
|
|
31
|
-
# Explicit flag
|
|
32
|
-
npm run start -- <url> --clip --download-sections all
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
---
|
|
36
|
-
|
|
37
|
-
## Mode 2: Segments Download
|
|
38
|
-
|
|
39
|
-
Downloads only top N segments using yt-dlp's `--download-sections` feature.
|
|
40
|
-
|
|
41
|
-
**When to use:**
|
|
42
|
-
|
|
43
|
-
- Generating only a few clips (1-5)
|
|
44
|
-
- Want to save bandwidth
|
|
45
|
-
- Video is very long but only need short clips
|
|
46
|
-
|
|
47
|
-
**Trade-offs:**
|
|
48
|
-
| Aspect | Segments Download |
|
|
49
|
-
|--------|-------------------|
|
|
50
|
-
| Speed | Faster for few clips |
|
|
51
|
-
| Bandwidth | Lower (only needed portions) |
|
|
52
|
-
| Disk Usage | Lower |
|
|
53
|
-
| Flexibility | Clips are final |
|
|
54
|
-
|
|
55
|
-
**Command:**
|
|
56
|
-
|
|
57
|
-
```bash
|
|
58
|
-
# Download top 3 segments
|
|
59
|
-
npm run start -- <url> --download-sections 3
|
|
60
|
-
|
|
61
|
-
# Download top 5 segments to custom directory
|
|
62
|
-
npm run start -- <url> --download-sections 5 --video-path ./my-clips
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
**Note:** Using `--download-sections N` implicitly enables `--clip` mode.
|
|
66
|
-
|
|
67
|
-
---
|
|
68
|
-
|
|
69
|
-
## Custom Output Path
|
|
70
|
-
|
|
71
|
-
Override default download/clip directories with `--video-path`:
|
|
72
|
-
|
|
73
|
-
```bash
|
|
74
|
-
# Full video to custom path
|
|
75
|
-
npm run start -- <url> --clip --video-path ./downloads
|
|
76
|
-
|
|
77
|
-
# Segments to custom path
|
|
78
|
-
npm run start -- <url> --download-sections 3 --video-path ./my-clips
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
This flag overrides:
|
|
82
|
-
|
|
83
|
-
- `DOWNLOAD_DIR` for full video downloads
|
|
84
|
-
- `OUTPUT_DIR` for segment downloads and clip organization
|
|
85
|
-
|
|
86
|
-
---
|
|
87
|
-
|
|
88
|
-
## Working with Pre-Downloaded Videos
|
|
89
|
-
|
|
90
|
-
If you already have a video downloaded (from yt-dlp, browser download, or other tool), you can skip the download step and work directly with that file.
|
|
91
|
-
|
|
92
|
-
**Workflow:**
|
|
93
|
-
|
|
94
|
-
```bash
|
|
95
|
-
# Step 1: Run analysis once to get segment timestamps
|
|
96
|
-
npm run start -- <url> --output-json analysis.json
|
|
97
|
-
|
|
98
|
-
# Step 2: (Optional) Edit timestamps in analysis.json if needed
|
|
99
|
-
# Edit the "start" and "end" values for each segment
|
|
100
|
-
|
|
101
|
-
# Step 3: Place your video in downloads/ directory
|
|
102
|
-
cp /path/to/your/video.mp4 downloads/<videoId>.mp4
|
|
103
|
-
|
|
104
|
-
# Step 4: Run again - will skip download and use your video
|
|
105
|
-
npm run start -- <url> --clip
|
|
106
|
-
```
|
|
107
|
-
|
|
108
|
-
**Use cases:**
|
|
109
|
-
|
|
110
|
-
- **Testing different settings** - Run different clip configurations without re-downloading
|
|
111
|
-
- **Manual timestamp adjustment** - Fine-tune segment boundaries based on visual inspection
|
|
112
|
-
- **Alternative video sources** - Work with videos downloaded from other tools or browsers
|
|
113
|
-
- **Large video files** - If you have a high-quality version, use that instead
|
|
114
|
-
|
|
115
|
-
**Notes:**
|
|
116
|
-
|
|
117
|
-
- The video file must be named exactly `{videoId}.mp4` in the `DOWNLOAD_DIR`
|
|
118
|
-
- You can apply `TIMESTAMP_OFFSET_SECONDS` globally instead of editing each timestamp
|
|
119
|
-
- Transcript cache is used, so re-running is fast (no API calls)
|
|
120
|
-
|
|
121
|
-
### Combining with Timestamp Offset
|
|
122
|
-
|
|
123
|
-
For pre-downloaded videos with known sync issues:
|
|
124
|
-
|
|
125
|
-
```bash
|
|
126
|
-
# Skip download, apply 3-second offset to all clips
|
|
127
|
-
TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --clip
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
The CLI will find the existing video in `downloads/`, skip the download step, and apply the offset to all clip generation.
|
|
131
|
-
|
|
132
|
-
---
|
|
133
|
-
|
|
134
|
-
## How It Works
|
|
135
|
-
|
|
136
|
-
### Full Download Mode
|
|
137
|
-
|
|
138
|
-
1. Download entire video: `yt-dlp <url>`
|
|
139
|
-
2. Cut clips with ffmpeg: `ffmpeg -i video.mp4 -ss <start> -to <end> -c:v libx264 -preset fast -c:a aac clip.mp4`
|
|
140
|
-
3. Re-encodes with libx264 (video) and aac (audio) for perfect audio/video sync
|
|
141
|
-
|
|
142
|
-
### Segments Download Mode
|
|
143
|
-
|
|
144
|
-
1. For top N segments: `yt-dlp --download-sections "*{start}-{end}" <url>`
|
|
145
|
-
2. Downloads are parallel (concurrency controlled by `LLM_CONCURRENCY`)
|
|
146
|
-
3. No ffmpeg cutting needed — segments are pre-cut by yt-dlp
|
|
147
|
-
4. yt-dlp's `--download-sections` ensures proper audio/video sync
|
|
148
|
-
5. Only top N segments (by score) are downloaded
|
|
149
|
-
|
|
150
|
-
**Note:** The full download mode re-encodes clips to ensure audio/video synchronization, which is slower but produces accurate results. The segments download mode relies on yt-dlp's built-in cutting which also maintains proper sync.
|
|
151
|
-
|
|
152
|
-
### Millisecond Precision
|
|
153
|
-
|
|
154
|
-
The `--download-sections` mode now uses millisecond precision (HH:MM:SS.mmm format) instead of just HH:MM:SS. This ensures accurate segment downloads, especially important for short clips.
|
|
155
|
-
|
|
156
|
-
**Before:**
|
|
157
|
-
|
|
158
|
-
```
|
|
159
|
-
*00:02:00-00:02:30 # Lost decimal part (120.5s became 120s)
|
|
160
|
-
```
|
|
161
|
-
|
|
162
|
-
**After:**
|
|
163
|
-
|
|
164
|
-
```
|
|
165
|
-
*00:02:00.500-00:02:30.000 # Preserves exact timestamp (120.5s kept as 120.500s)
|
|
166
|
-
```
|
|
167
|
-
|
|
168
|
-
### Timestamp Offset
|
|
169
|
-
|
|
170
|
-
The `TIMESTAMP_OFFSET_SECONDS` config option applies a global adjustment to all timestamps in both modes:
|
|
171
|
-
|
|
172
|
-
- **Positive value** = Shift clips later in time
|
|
173
|
-
- **Negative value** = Shift clips earlier in time
|
|
174
|
-
- **Default** = 0 (no adjustment)
|
|
175
|
-
|
|
176
|
-
This is useful when transcript timestamps don't perfectly match the actual video timing.
|
|
177
|
-
|
|
178
|
-
---
|
|
179
|
-
|
|
180
|
-
## Troubleshooting: Timestamp Alignment
|
|
181
|
-
|
|
182
|
-
### Problem: Audio is delayed or starts early
|
|
183
|
-
|
|
184
|
-
**Symptoms:**
|
|
185
|
-
|
|
186
|
-
- Video starts at correct moment but audio plays 2-5 seconds later/earlier
|
|
187
|
-
- Lip movements don't match speech in the clip
|
|
188
|
-
- Content in clip doesn't match the transcript segment
|
|
189
|
-
|
|
190
|
-
**Root Causes:**
|
|
191
|
-
|
|
192
|
-
1. **Transcript misalignment** - Transcript timestamps don't perfectly match the video
|
|
193
|
-
- **Auto-generated captions**: Often have 1-3 second delays
|
|
194
|
-
- **Manual captions**: Usually more accurate but can have timing issues
|
|
195
|
-
- **Multiple caption tracks**: Transcripts from different video versions
|
|
196
|
-
|
|
197
|
-
2. **Millisecond precision loss** - Old implementation lost decimal seconds
|
|
198
|
-
- **Fixed**: Now using HH:MM:SS.mmm format for `--download-sections`
|
|
199
|
-
|
|
200
|
-
3. **Version differences** - The transcript might be from a slightly different version of the video
|
|
201
|
-
|
|
202
|
-
### Solution: Use `TIMESTAMP_OFFSET_SECONDS`
|
|
203
|
-
|
|
204
|
-
**What it does:**
|
|
205
|
-
Applies a global offset to all clip timestamps. Positive = shift later, negative = shift earlier.
|
|
206
|
-
|
|
207
|
-
**How to use:**
|
|
208
|
-
|
|
209
|
-
```bash
|
|
210
|
-
# Add to .env
|
|
211
|
-
TIMESTAMP_OFFSET_SECONDS=-3
|
|
212
|
-
|
|
213
|
-
# Or inline
|
|
214
|
-
TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --clip
|
|
215
|
-
```
|
|
216
|
-
|
|
217
|
-
### Finding the Correct Offset
|
|
218
|
-
|
|
219
|
-
**Step 1: Test with logging**
|
|
220
|
-
|
|
221
|
-
Run a single segment and observe the logs:
|
|
222
|
-
|
|
223
|
-
```bash
|
|
224
|
-
TIMESTAMP_OFFSET_SECONDS=0 npm run start -- <url> --download-sections 1
|
|
225
|
-
```
|
|
226
|
-
|
|
227
|
-
Look for these log lines:
|
|
228
|
-
|
|
229
|
-
```
|
|
230
|
-
[info] Downloading segment 1: 00:02:00.500-00:02:30.000 (strong opinion...)
|
|
231
|
-
[info] Requested: 120.50s - 150.00s
|
|
232
|
-
[info] Adjusted: 117.50s - 147.00s (offset: -3s)
|
|
233
|
-
[info] Cutting clip: start=117.50s, end=147.00s, duration=29.50s
|
|
234
|
-
```
|
|
235
|
-
|
|
236
|
-
**Step 2: Play and verify**
|
|
237
|
-
|
|
238
|
-
- Open the generated clip
|
|
239
|
-
- Check if the moment matches the transcript description
|
|
240
|
-
- Note if it's too early or too late
|
|
241
|
-
|
|
242
|
-
**Step 3: Adjust offset**
|
|
243
|
-
|
|
244
|
-
If clip **starts 3 seconds late**:
|
|
245
|
-
|
|
246
|
-
```bash
|
|
247
|
-
TIMESTAMP_OFFSET_SECONDS=-3 # Negative = shift earlier
|
|
248
|
-
```
|
|
249
|
-
|
|
250
|
-
If clip **starts 2 seconds early**:
|
|
251
|
-
|
|
252
|
-
```bash
|
|
253
|
-
TIMESTAMP_OFFSET_SECONDS=2 # Positive = shift later
|
|
254
|
-
```
|
|
255
|
-
|
|
256
|
-
**Step 4: Verify with multiple clips**
|
|
257
|
-
|
|
258
|
-
```bash
|
|
259
|
-
TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --download-sections 3
|
|
260
|
-
```
|
|
261
|
-
|
|
262
|
-
Check if the offset works consistently across different segments.
|
|
263
|
-
|
|
264
|
-
### Binary Search for Optimal Offset
|
|
265
|
-
|
|
266
|
-
If you're unsure of the exact offset:
|
|
267
|
-
|
|
268
|
-
```bash
|
|
269
|
-
# Try 0, -3, -6, -9 to see which is closest
|
|
270
|
-
for offset in 0 -3 -6 -9; do
|
|
271
|
-
TIMESTAMP_OFFSET_SECONDS=$offset npm run start -- <url> --download-sections 1
|
|
272
|
-
echo "Tested offset: $offset"
|
|
273
|
-
# Play and check accuracy
|
|
274
|
-
done
|
|
275
|
-
```
|
|
276
|
-
|
|
277
|
-
Then narrow down: `-3` seems good, try `-2` and `-4`, etc.
|
|
278
|
-
|
|
279
|
-
### Common Scenarios
|
|
280
|
-
|
|
281
|
-
| Scenario | Likely Offset | Explanation |
|
|
282
|
-
| ----------------------- | ------------- | --------------------------------------------------- |
|
|
283
|
-
| Auto-generated captions | `-1` to `-3` | ASR timing often lags behind actual speech |
|
|
284
|
-
| Manual captions | `0` to `-1` | Usually more accurate, small sync issues |
|
|
285
|
-
| Multiple caption tracks | `-2` to `-5` | Different versions may have systematic offset |
|
|
286
|
-
| Regional variations | Varies | Different regions may have different caption timing |
|
|
287
|
-
|
|
288
|
-
### Verifying the Fix
|
|
289
|
-
|
|
290
|
-
After applying `TIMESTAMP_OFFSET_SECONDS`, verify:
|
|
291
|
-
|
|
292
|
-
1. **Watch the clip**: Audio and video should be synchronized
|
|
293
|
-
2. **Check multiple clips**: Offset should work consistently
|
|
294
|
-
3. **Compare with original**: Clip should match the described content
|
|
295
|
-
|
|
296
|
-
If offset varies between segments, the issue might be video-specific rather than a global transcript offset.
|
|
297
|
-
|
|
298
|
-
---
|
|
299
|
-
|
|
300
|
-
## Progress Display
|
|
301
|
-
|
|
302
|
-
Both modes show real-time yt-dlp progress:
|
|
303
|
-
|
|
304
|
-
```
|
|
305
|
-
[download] 45.2% of 125MiB at 2.5MiB/s ETA 00:32
|
|
306
|
-
```
|
|
307
|
-
|
|
308
|
-
Progress updates inline (same line) to keep logs clean.
|
|
309
|
-
|
|
310
|
-
---
|
|
311
|
-
|
|
312
|
-
## ⚠️ Requirements
|
|
313
|
-
|
|
314
|
-
Both modes require:
|
|
315
|
-
|
|
316
|
-
1. **yt-dlp** — Install from https://github.com/yt-dlp/yt-dlp
|
|
317
|
-
2. **ffmpeg** — Required for full download mode
|
|
318
|
-
|
|
319
|
-
```bash
|
|
320
|
-
# macOS
|
|
321
|
-
brew install yt-dlp ffmpeg
|
|
322
|
-
|
|
323
|
-
# Ubuntu/Debian
|
|
324
|
-
sudo apt-get install yt-dlp ffmpeg
|
|
325
|
-
|
|
326
|
-
# Windows
|
|
327
|
-
# Install from GitHub releases or use winget
|
|
328
|
-
```
|
|
329
|
-
|
|
330
|
-
---
|
|
331
|
-
|
|
332
|
-
## Examples
|
|
333
|
-
|
|
334
|
-
### Download full video and cut clips
|
|
335
|
-
|
|
336
|
-
```bash
|
|
337
|
-
npm run start -- https://youtube.com/watch?v=abc123 --clip
|
|
338
|
-
```
|
|
339
|
-
|
|
340
|
-
### Download top 3 segments only
|
|
341
|
-
|
|
342
|
-
```bash
|
|
343
|
-
npm run start -- https://youtube.com/watch?v=abc123 --download-sections 3
|
|
344
|
-
```
|
|
345
|
-
|
|
346
|
-
### Download top 5 segments to custom directory
|
|
347
|
-
|
|
348
|
-
```bash
|
|
349
|
-
npm run start -- https://youtube.com/watch?v=abc123 --download-sections 5 --video-path ./my-clips
|
|
350
|
-
```
|
|
351
|
-
|
|
352
|
-
### Download segments with timestamp offset
|
|
353
|
-
|
|
354
|
-
```bash
|
|
355
|
-
# Fix 3-second audio delay
|
|
356
|
-
TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --download-sections 3
|
|
357
|
-
|
|
358
|
-
# Custom quality preset with offset
|
|
359
|
-
FFMPEG_PRESET=medium TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --download-sections 5
|
|
360
|
-
```
|
|
361
|
-
|
|
362
|
-
### Full video to custom directory
|
|
363
|
-
|
|
364
|
-
```bash
|
|
365
|
-
npm run start -- https://youtube.com/watch?v=abc123 --clip --video-path ./downloads
|
|
366
|
-
```
|
|
367
|
-
|
|
368
|
-
### Set default mode via environment
|
|
369
|
-
|
|
370
|
-
```bash
|
|
371
|
-
# Add to .env
|
|
372
|
-
DOWNLOAD_SECTIONS_MODE=all
|
|
373
|
-
|
|
374
|
-
# Or inline
|
|
375
|
-
DOWNLOAD_SECTIONS_MODE=all npm run start -- <url> --clip
|
|
376
|
-
```
|
|
377
|
-
|
|
378
|
-
### Pre-downloaded video workflow
|
|
379
|
-
|
|
380
|
-
```bash
|
|
381
|
-
# Step 1: Download your video manually (any method)
|
|
382
|
-
cp /path/to/video.mp4 downloads/abc123.mp4
|
|
383
|
-
|
|
384
|
-
# Step 2: Run analysis (will skip download)
|
|
385
|
-
npm run start -- https://youtube.com/watch?v=abc123 --clip
|
|
386
|
-
|
|
387
|
-
# Add offset if needed
|
|
388
|
-
TIMESTAMP_OFFSET_SECONDS=-2 npm run start -- https://youtube.com/watch?v=abc123 --clip
|
|
389
|
-
```
|
|
390
|
-
|
|
391
|
-
---
|
|
392
|
-
|
|
393
|
-
## Backward Compatibility Note
|
|
394
|
-
|
|
395
|
-
The old `--download-sections segments` flag is deprecated but still works (shows a warning). It now behaves the same as `--download-sections all` (downloads full video).
|
|
396
|
-
|
|
397
|
-
```bash
|
|
398
|
-
# Old style (deprecated, still works)
|
|
399
|
-
npm run start -- <url> --clip --download-sections segments
|
|
400
|
-
|
|
401
|
-
# New recommended style
|
|
402
|
-
npm run start -- <url> --clip --download-sections all
|
|
403
|
-
```
|
|
404
|
-
|
|
405
|
-
---
|
|
406
|
-
|
|
407
|
-
## Configuration Options
|
|
408
|
-
|
|
409
|
-
### FFMPEG_PRESET
|
|
410
|
-
|
|
411
|
-
Controls encoding speed/quality trade-off for clip generation in full download mode:
|
|
412
|
-
|
|
413
|
-
| Preset | Speed | Quality | Use Case |
|
|
414
|
-
| ---------------- | --------- | ------- | ---------------------- |
|
|
415
|
-
| `ultrafast` | Very fast | Lowest | Quick testing |
|
|
416
|
-
| `fast` (default) | Fast | Good | Balanced performance |
|
|
417
|
-
| `medium` | Medium | Better | Higher quality clips |
|
|
418
|
-
| `slow` | Slow | High | Final production clips |
|
|
419
|
-
|
|
420
|
-
```bash
|
|
421
|
-
# Set in .env
|
|
422
|
-
FFMPEG_PRESET=medium
|
|
423
|
-
|
|
424
|
-
# Or inline
|
|
425
|
-
FFMPEG_PRESET=slow npm run start -- <url> --clip
|
|
426
|
-
```
|
|
427
|
-
|
|
428
|
-
### TIMESTAMP_OFFSET_SECONDS
|
|
429
|
-
|
|
430
|
-
Global timestamp adjustment in seconds. Negative = earlier, Positive = later.
|
|
431
|
-
|
|
432
|
-
```bash
|
|
433
|
-
# Set in .env
|
|
434
|
-
TIMESTAMP_OFFSET_SECONDS=-3
|
|
435
|
-
|
|
436
|
-
# Or inline
|
|
437
|
-
TIMESTAMP_OFFSET_SECONDS=2 npm run start -- <url> --clip
|
|
438
|
-
```
|
|
439
|
-
|
|
440
|
-
Use this to fix systematic audio/video desynchronization when transcript timestamps don't match the video.
|
package/requirements.txt
DELETED
package/scripts/detect_events.py
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
import tensorflow_hub as hub
|
|
2
|
-
import soundfile as sf
|
|
3
|
-
import numpy as np
|
|
4
|
-
import json
|
|
5
|
-
import sys
|
|
6
|
-
|
|
7
|
-
GAME_EVENTS = {
|
|
8
|
-
67: 'gunshot',
|
|
9
|
-
366: 'explosion',
|
|
10
|
-
389: 'crowd_cheering',
|
|
11
|
-
63: 'gunfire_burst',
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
def cluster_events(events, gap=1.5):
|
|
15
|
-
if not events:
|
|
16
|
-
return []
|
|
17
|
-
|
|
18
|
-
events = sorted(events, key=lambda x: x['time'])
|
|
19
|
-
clusters = []
|
|
20
|
-
current_cluster = [events[0]]
|
|
21
|
-
|
|
22
|
-
for i in range(1, len(events)):
|
|
23
|
-
if events[i]['time'] - events[i - 1]['time'] <= gap:
|
|
24
|
-
current_cluster.append(events[i])
|
|
25
|
-
else:
|
|
26
|
-
max_conf = max(e['confidence'] for e in current_cluster)
|
|
27
|
-
first_time = current_cluster[0]['time']
|
|
28
|
-
clusters.append({
|
|
29
|
-
'time': first_time,
|
|
30
|
-
'event': current_cluster[0]['event'],
|
|
31
|
-
'confidence': max_conf,
|
|
32
|
-
})
|
|
33
|
-
current_cluster = [events[i]]
|
|
34
|
-
|
|
35
|
-
if current_cluster:
|
|
36
|
-
max_conf = max(e['confidence'] for e in current_cluster)
|
|
37
|
-
first_time = current_cluster[0]['time']
|
|
38
|
-
clusters.append({
|
|
39
|
-
'time': first_time,
|
|
40
|
-
'event': current_cluster[0]['event'],
|
|
41
|
-
'confidence': max_conf,
|
|
42
|
-
})
|
|
43
|
-
|
|
44
|
-
return clusters
|
|
45
|
-
|
|
46
|
-
def detect_events(audio_path, threshold=0.30):
|
|
47
|
-
model = hub.load('https://tfhub.dev/google/yamnet/1')
|
|
48
|
-
wav, sr = sf.read(audio_path, dtype='float32')
|
|
49
|
-
|
|
50
|
-
if sr != 16000:
|
|
51
|
-
import warnings
|
|
52
|
-
warnings.warn(f'Audio sample rate is {sr} Hz, expected 16000 Hz for YAMNet')
|
|
53
|
-
|
|
54
|
-
scores, _, _ = model(wav)
|
|
55
|
-
events = []
|
|
56
|
-
|
|
57
|
-
for i, frame in enumerate(scores.numpy()):
|
|
58
|
-
for cid, label in GAME_EVENTS.items():
|
|
59
|
-
if frame[cid] > threshold:
|
|
60
|
-
events.append({
|
|
61
|
-
'time': round(i * 0.48, 2),
|
|
62
|
-
'event': label,
|
|
63
|
-
'confidence': float(frame[cid]),
|
|
64
|
-
})
|
|
65
|
-
|
|
66
|
-
return cluster_events(events, gap=1.5)
|
|
67
|
-
|
|
68
|
-
if __name__ == '__main__':
|
|
69
|
-
if len(sys.argv) < 2:
|
|
70
|
-
print(json.dumps({'error': 'Usage: python detect_events.py <audio_path> [threshold]'}))
|
|
71
|
-
sys.exit(1)
|
|
72
|
-
|
|
73
|
-
audio_path = sys.argv[1]
|
|
74
|
-
threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.30
|
|
75
|
-
|
|
76
|
-
try:
|
|
77
|
-
result = detect_events(audio_path, threshold)
|
|
78
|
-
print(json.dumps(result))
|
|
79
|
-
except Exception as e:
|
|
80
|
-
print(json.dumps({'error': str(e)}))
|
|
81
|
-
sys.exit(1)
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import sys
|
|
3
|
-
|
|
4
|
-
# Keyword sets per game profile.
|
|
5
|
-
# Keys are lowercase; matches are case-insensitive.
|
|
6
|
-
PROFILE_KEYWORDS: dict[str, list[str]] = {
|
|
7
|
-
'valorant': [
|
|
8
|
-
'ace', 'clutch', 'defuse', 'spike', '1v1', '1v2', '1v3', '1v4', '1v5',
|
|
9
|
-
"let's go", 'no way', 'insane', 'bro', 'what', 'oh my god', 'omg',
|
|
10
|
-
'unbelievable', 'crazy', 'yooo', 'yo', 'filthy', 'clean',
|
|
11
|
-
'wallbang', 'headshot',
|
|
12
|
-
],
|
|
13
|
-
'fps': [
|
|
14
|
-
'kill', 'headshot', 'streak', 'collateral', 'insane', 'no way',
|
|
15
|
-
"let's go", 'yooo', 'yo', 'crazy', 'oh my god', 'omg', 'unbelievable',
|
|
16
|
-
'nice', 'what', 'bro',
|
|
17
|
-
],
|
|
18
|
-
'boss_fight': [
|
|
19
|
-
'finally', "let's go", 'dead', 'down', 'phase', 'unbelievable', 'insane',
|
|
20
|
-
'crazy', 'no way', 'oh my god', 'omg', 'yooo', 'yo', 'what', 'bro',
|
|
21
|
-
],
|
|
22
|
-
'general': [
|
|
23
|
-
'insane', 'crazy', 'no way', "let's go", 'oh my god', 'omg',
|
|
24
|
-
'what', 'wow', 'yooo', 'yo', 'unbelievable', 'bro',
|
|
25
|
-
],
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
# Phrases that get full confidence (exact multi-word match carries more signal).
|
|
29
|
-
HIGH_CONFIDENCE_PHRASES: set[str] = {
|
|
30
|
-
'ace', 'clutch', "let's go", 'no way', 'oh my god', 'omg', 'unbelievable',
|
|
31
|
-
'1v1', '1v2', '1v3', '1v4', '1v5', 'finally',
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def score_text(text: str, keywords: list[str]) -> tuple[str | None, float]:
|
|
36
|
-
"""
|
|
37
|
-
Return the first matching keyword and its confidence, or (None, 0).
|
|
38
|
-
Multi-word phrases and high-confidence phrases get confidence 1.0;
|
|
39
|
-
single-word partial matches get 0.8.
|
|
40
|
-
"""
|
|
41
|
-
lower = text.lower()
|
|
42
|
-
for kw in keywords:
|
|
43
|
-
if kw in lower:
|
|
44
|
-
conf = 1.0 if kw in HIGH_CONFIDENCE_PHRASES else 0.8
|
|
45
|
-
return kw, conf
|
|
46
|
-
return None, 0.0
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def detect_events_whisper(
|
|
50
|
-
audio_path: str,
|
|
51
|
-
model_size: str = 'medium',
|
|
52
|
-
game_profile: str = 'general',
|
|
53
|
-
threshold: float = 0.3,
|
|
54
|
-
) -> list[dict]:
|
|
55
|
-
try:
|
|
56
|
-
import whisper # type: ignore
|
|
57
|
-
except ImportError:
|
|
58
|
-
print(
|
|
59
|
-
json.dumps({'error': 'openai-whisper not installed. Run: pip install openai-whisper'}),
|
|
60
|
-
file=sys.stderr,
|
|
61
|
-
)
|
|
62
|
-
sys.exit(2)
|
|
63
|
-
|
|
64
|
-
keywords = PROFILE_KEYWORDS.get(game_profile, PROFILE_KEYWORDS['general'])
|
|
65
|
-
|
|
66
|
-
model = whisper.load_model(model_size)
|
|
67
|
-
result = model.transcribe(audio_path, word_timestamps=False, fp16=False)
|
|
68
|
-
|
|
69
|
-
events: list[dict] = []
|
|
70
|
-
for seg in result.get('segments', []):
|
|
71
|
-
text: str = seg.get('text', '')
|
|
72
|
-
start: float = float(seg.get('start', 0))
|
|
73
|
-
matched_kw, confidence = score_text(text, keywords)
|
|
74
|
-
if matched_kw is not None and confidence >= threshold:
|
|
75
|
-
events.append({
|
|
76
|
-
'time': round(start, 2),
|
|
77
|
-
'event': matched_kw,
|
|
78
|
-
'confidence': confidence,
|
|
79
|
-
})
|
|
80
|
-
|
|
81
|
-
return events
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if __name__ == '__main__':
|
|
85
|
-
if len(sys.argv) < 2:
|
|
86
|
-
print(
|
|
87
|
-
json.dumps({'error': 'Usage: python detect_events_whisper.py <audio_path> [threshold] [game_profile] [model_size]'}),
|
|
88
|
-
)
|
|
89
|
-
sys.exit(1)
|
|
90
|
-
|
|
91
|
-
audio_path = sys.argv[1]
|
|
92
|
-
threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.3
|
|
93
|
-
game_profile = sys.argv[3] if len(sys.argv) > 3 else 'general'
|
|
94
|
-
model_size = sys.argv[4] if len(sys.argv) > 4 else 'medium'
|
|
95
|
-
|
|
96
|
-
try:
|
|
97
|
-
result = detect_events_whisper(audio_path, model_size, game_profile, threshold)
|
|
98
|
-
print(json.dumps(result))
|
|
99
|
-
except Exception as e:
|
|
100
|
-
print(json.dumps({'error': str(e)}))
|
|
101
|
-
sys.exit(1)
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
transcribe_whisper.py — Full Whisper transcription for transcript generation.
|
|
3
|
-
|
|
4
|
-
Runs OpenAI Whisper on the provided audio file and writes a JSON array of
|
|
5
|
-
transcript segments to stdout:
|
|
6
|
-
|
|
7
|
-
[{"text": "...", "start": 0.0, "duration": 3.5}, ...]
|
|
8
|
-
|
|
9
|
-
Usage:
|
|
10
|
-
python transcribe_whisper.py <audio_path> [model_size]
|
|
11
|
-
|
|
12
|
-
Arguments:
|
|
13
|
-
audio_path - Path to the audio WAV file
|
|
14
|
-
model_size - Whisper model to use (default: medium)
|
|
15
|
-
Options: tiny, base, small, medium, large-v3
|
|
16
|
-
|
|
17
|
-
Requires: pip install openai-whisper
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
import json
|
|
21
|
-
import sys
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def transcribe(audio_path: str, model_size: str = 'medium') -> list[dict]:
|
|
25
|
-
try:
|
|
26
|
-
import whisper # type: ignore
|
|
27
|
-
except ImportError:
|
|
28
|
-
print(
|
|
29
|
-
'ModuleNotFoundError: openai-whisper not installed. Run: pip install openai-whisper',
|
|
30
|
-
file=sys.stderr,
|
|
31
|
-
)
|
|
32
|
-
sys.exit(2)
|
|
33
|
-
|
|
34
|
-
model = whisper.load_model(model_size)
|
|
35
|
-
result = model.transcribe(audio_path, word_timestamps=False, fp16=False)
|
|
36
|
-
|
|
37
|
-
segments: list[dict] = []
|
|
38
|
-
for seg in result.get('segments', []):
|
|
39
|
-
text: str = seg.get('text', '').strip()
|
|
40
|
-
start: float = float(seg.get('start', 0))
|
|
41
|
-
end: float = float(seg.get('end', start))
|
|
42
|
-
duration = max(0.0, round(end - start, 3))
|
|
43
|
-
|
|
44
|
-
if text:
|
|
45
|
-
segments.append({
|
|
46
|
-
'text': text,
|
|
47
|
-
'start': round(start, 3),
|
|
48
|
-
'duration': duration,
|
|
49
|
-
})
|
|
50
|
-
|
|
51
|
-
return segments
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
if __name__ == '__main__':
|
|
55
|
-
if len(sys.argv) < 2:
|
|
56
|
-
print(
|
|
57
|
-
'Usage: python transcribe_whisper.py <audio_path> [model_size]',
|
|
58
|
-
file=sys.stderr,
|
|
59
|
-
)
|
|
60
|
-
sys.exit(1)
|
|
61
|
-
|
|
62
|
-
audio_path = sys.argv[1]
|
|
63
|
-
model_size = sys.argv[2] if len(sys.argv) > 2 else 'medium'
|
|
64
|
-
|
|
65
|
-
try:
|
|
66
|
-
output = transcribe(audio_path, model_size)
|
|
67
|
-
print(json.dumps(output))
|
|
68
|
-
except Exception as e:
|
|
69
|
-
print(f'Error: {e}', file=sys.stderr)
|
|
70
|
-
sys.exit(1)
|