@thunderkiller/video-clipper 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/LICENSE +15 -0
  3. package/package.json +1 -1
  4. package/.github/workflows/ci.yml +0 -42
  5. package/.github/workflows/release.yml +0 -76
  6. package/.husky/pre-commit +0 -3
  7. package/.prettierignore +0 -6
  8. package/.prettierrc +0 -7
  9. package/.releaserc.json +0 -21
  10. package/AGENTS.md +0 -122
  11. package/docs/free-models.md +0 -78
  12. package/docs/plan.md +0 -442
  13. package/docs/refactorPhases.md +0 -105
  14. package/docs/yt-downloader.md +0 -440
  15. package/requirements.txt +0 -5
  16. package/scripts/detect_events.py +0 -81
  17. package/scripts/detect_events_whisper.py +0 -101
  18. package/scripts/transcribe_whisper.py +0 -70
  19. package/src/cli.ts +0 -186
  20. package/src/config/env.ts +0 -18
  21. package/src/config/index.ts +0 -2
  22. package/src/index.ts +0 -46
  23. package/src/pipeline/runner.ts +0 -147
  24. package/src/pipeline/stages/audioProcessor.ts +0 -127
  25. package/src/pipeline/stages/clipExporter.ts +0 -76
  26. package/src/pipeline/stages/segmentAnalyzer.ts +0 -72
  27. package/src/pipeline/stages/segmentSelector.ts +0 -39
  28. package/src/pipeline/stages/videoResolver.ts +0 -44
  29. package/src/services/audioAnalyzers/base.ts +0 -32
  30. package/src/services/audioAnalyzers/factory.ts +0 -69
  31. package/src/services/audioAnalyzers/gemini.ts +0 -136
  32. package/src/services/audioAnalyzers/index.ts +0 -6
  33. package/src/services/audioAnalyzers/whisper.ts +0 -80
  34. package/src/services/audioAnalyzers/yamnet.ts +0 -54
  35. package/src/services/audioDownloader/index.ts +0 -102
  36. package/src/services/chunkBuilder/index.ts +0 -82
  37. package/src/services/clipGenerator/index.ts +0 -210
  38. package/src/services/clipRefiner/index.ts +0 -141
  39. package/src/services/eventDetector/index.ts +0 -68
  40. package/src/services/llmAnalyzer/LLMAnalyzer.ts +0 -98
  41. package/src/services/llmAnalyzer/index.ts +0 -231
  42. package/src/services/metadataExtractor/index.ts +0 -83
  43. package/src/services/segmentRanker/index.ts +0 -88
  44. package/src/services/signalMerger/index.ts +0 -53
  45. package/src/services/transcriptAnalyzers/base.ts +0 -26
  46. package/src/services/transcriptAnalyzers/factory.ts +0 -66
  47. package/src/services/transcriptAnalyzers/gemini.ts +0 -24
  48. package/src/services/transcriptAnalyzers/index.ts +0 -6
  49. package/src/services/transcriptAnalyzers/whisper.ts +0 -68
  50. package/src/services/transcriptAnalyzers/ytdlp.ts +0 -19
  51. package/src/services/transcriptDetector/index.ts +0 -122
  52. package/src/services/transcriptFetcher/index.ts +0 -147
  53. package/src/services/urlParser/index.ts +0 -52
  54. package/src/services/videoDownloader/index.ts +0 -268
  55. package/src/types/analyzer.ts +0 -23
  56. package/src/types/audio.ts +0 -19
  57. package/src/types/cache.ts +0 -8
  58. package/src/types/cli.ts +0 -22
  59. package/src/types/config.ts +0 -151
  60. package/src/types/downloader.ts +0 -15
  61. package/src/types/factory.ts +0 -3
  62. package/src/types/index.ts +0 -40
  63. package/src/types/pipeline.ts +0 -60
  64. package/src/types/segment.ts +0 -43
  65. package/src/types/transcript.ts +0 -22
  66. package/src/types/video.ts +0 -18
  67. package/src/utils/cache.ts +0 -224
  68. package/src/utils/chunker.ts +0 -60
  69. package/src/utils/dumper.ts +0 -41
  70. package/src/utils/format.ts +0 -10
  71. package/src/utils/logger.ts +0 -17
  72. package/src/utils/modelFactory.ts +0 -71
  73. package/src/utils/redactConfig.ts +0 -23
  74. package/src/utils/sliceAudio.ts +0 -35
  75. package/test-trigger.txt +0 -1
  76. package/tests/analyzerFactory.test.ts +0 -146
  77. package/tests/audioEventDetector.test.ts +0 -69
  78. package/tests/cache.test.ts +0 -203
  79. package/tests/chunkBuilder.test.ts +0 -146
  80. package/tests/chunker.test.ts +0 -95
  81. package/tests/eventDetector.test.ts +0 -103
  82. package/tests/llmAnalyzer.test.ts +0 -283
  83. package/tests/segmentRanker.test.ts +0 -133
  84. package/tests/setup.ts +0 -48
  85. package/tests/signalMerger.test.ts +0 -197
  86. package/tests/transcriptDetector.test.ts +0 -150
  87. package/tests/transcriptFetcher.test.ts +0 -179
  88. package/tests/urlParser.test.ts +0 -70
  89. package/tsconfig.json +0 -16
  90. package/tsconfig.test.json +0 -8
  91. package/vitest.config.ts +0 -8
@@ -1,440 +0,0 @@
1
- # yt-dlp Download Modes
2
-
3
- This CLI supports two download strategies for generating video clips.
4
-
5
- ---
6
-
7
- ## Mode 1: Full Video Download (Default)
8
-
9
- Downloads entire video first, then uses `ffmpeg` to cut individual clips.
10
-
11
- **When to use:**
12
-
13
- - Generating many clips from one video
14
- - Want flexibility to cut different clips later
15
- - Internet connection is fast/stable
16
-
17
- **Trade-offs:**
18
- | Aspect | Full Download |
19
- |--------|--------------|
20
- | Speed | Slower initial download |
21
- | Bandwidth | Higher |
22
- | Disk Usage | Higher |
23
- | Flexibility | Can cut different clips later |
24
-
25
- **Command:**
26
-
27
- ```bash
28
- # Default behavior (when using --clip)
29
- npm run start -- <url> --clip
30
-
31
- # Explicit flag
32
- npm run start -- <url> --clip --download-sections all
33
- ```
34
-
35
- ---
36
-
37
- ## Mode 2: Segments Download
38
-
39
- Downloads only top N segments using yt-dlp's `--download-sections` feature.
40
-
41
- **When to use:**
42
-
43
- - Generating only a few clips (1-5)
44
- - Want to save bandwidth
45
- - Video is very long but only need short clips
46
-
47
- **Trade-offs:**
48
- | Aspect | Segments Download |
49
- |--------|-------------------|
50
- | Speed | Faster for few clips |
51
- | Bandwidth | Lower (only needed portions) |
52
- | Disk Usage | Lower |
53
- | Flexibility | Clips are final |
54
-
55
- **Command:**
56
-
57
- ```bash
58
- # Download top 3 segments
59
- npm run start -- <url> --download-sections 3
60
-
61
- # Download top 5 segments to custom directory
62
- npm run start -- <url> --download-sections 5 --video-path ./my-clips
63
- ```
64
-
65
- **Note:** Using `--download-sections N` implicitly enables `--clip` mode.
66
-
67
- ---
68
-
69
- ## Custom Output Path
70
-
71
- Override default download/clip directories with `--video-path`:
72
-
73
- ```bash
74
- # Full video to custom path
75
- npm run start -- <url> --clip --video-path ./downloads
76
-
77
- # Segments to custom path
78
- npm run start -- <url> --download-sections 3 --video-path ./my-clips
79
- ```
80
-
81
- This flag overrides:
82
-
83
- - `DOWNLOAD_DIR` for full video downloads
84
- - `OUTPUT_DIR` for segment downloads and clip organization
85
-
86
- ---
87
-
88
- ## Working with Pre-Downloaded Videos
89
-
90
- If you already have a video downloaded (from yt-dlp, browser download, or other tool), you can skip the download step and work directly with that file.
91
-
92
- **Workflow:**
93
-
94
- ```bash
95
- # Step 1: Run analysis once to get segment timestamps
96
- npm run start -- <url> --output-json analysis.json
97
-
98
- # Step 2: (Optional) Edit timestamps in analysis.json if needed
99
- # Edit the "start" and "end" values for each segment
100
-
101
- # Step 3: Place your video in downloads/ directory
102
- cp /path/to/your/video.mp4 downloads/<videoId>.mp4
103
-
104
- # Step 4: Run again - will skip download and use your video
105
- npm run start -- <url> --clip
106
- ```
107
-
108
- **Use cases:**
109
-
110
- - **Testing different settings** - Run different clip configurations without re-downloading
111
- - **Manual timestamp adjustment** - Fine-tune segment boundaries based on visual inspection
112
- - **Alternative video sources** - Work with videos downloaded from other tools or browsers
113
- - **Large video files** - If you have a high-quality version, use that instead
114
-
115
- **Notes:**
116
-
117
- - The video file must be named exactly `{videoId}.mp4` in the `DOWNLOAD_DIR`
118
- - You can apply `TIMESTAMP_OFFSET_SECONDS` globally instead of editing each timestamp
119
- - Transcript cache is used, so re-running is fast (no API calls)
120
-
121
- ### Combining with Timestamp Offset
122
-
123
- For pre-downloaded videos with known sync issues:
124
-
125
- ```bash
126
- # Skip download, apply 3-second offset to all clips
127
- TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --clip
128
- ```
129
-
130
- The CLI will find the existing video in `downloads/`, skip the download step, and apply the offset to all clip generation.
131
-
132
- ---
133
-
134
- ## How It Works
135
-
136
- ### Full Download Mode
137
-
138
- 1. Download entire video: `yt-dlp <url>`
139
- 2. Cut clips with ffmpeg: `ffmpeg -i video.mp4 -ss <start> -to <end> -c:v libx264 -preset fast -c:a aac clip.mp4`
140
- 3. Re-encodes with libx264 (video) and aac (audio) for perfect audio/video sync
141
-
142
- ### Segments Download Mode
143
-
144
- 1. For top N segments: `yt-dlp --download-sections "*{start}-{end}" <url>`
145
- 2. Downloads are parallel (concurrency controlled by `LLM_CONCURRENCY`)
146
- 3. No ffmpeg cutting needed — segments are pre-cut by yt-dlp
147
- 4. yt-dlp's `--download-sections` ensures proper audio/video sync
148
- 5. Only top N segments (by score) are downloaded
149
-
150
- **Note:** The full download mode re-encodes clips to ensure audio/video synchronization, which is slower but produces accurate results. The segments download mode relies on yt-dlp's built-in cutting which also maintains proper sync.
151
-
152
- ### Millisecond Precision
153
-
154
- The `--download-sections` mode now uses millisecond precision (HH:MM:SS.mmm format) instead of just HH:MM:SS. This ensures accurate segment downloads, especially important for short clips.
155
-
156
- **Before:**
157
-
158
- ```
159
- *00:02:00-00:02:30 # Lost decimal part (120.5s became 120s)
160
- ```
161
-
162
- **After:**
163
-
164
- ```
165
- *00:02:00.500-00:02:30.000 # Preserves exact timestamp (120.5s kept as 120.500s)
166
- ```
167
-
168
- ### Timestamp Offset
169
-
170
- The `TIMESTAMP_OFFSET_SECONDS` config option applies a global adjustment to all timestamps in both modes:
171
-
172
- - **Positive value** = Shift clips later in time
173
- - **Negative value** = Shift clips earlier in time
174
- - **Default** = 0 (no adjustment)
175
-
176
- This is useful when transcript timestamps don't perfectly match the actual video timing.
177
-
178
- ---
179
-
180
- ## Troubleshooting: Timestamp Alignment
181
-
182
- ### Problem: Audio is delayed or starts early
183
-
184
- **Symptoms:**
185
-
186
- - Video starts at correct moment but audio plays 2-5 seconds later/earlier
187
- - Lip movements don't match speech in the clip
188
- - Content in clip doesn't match the transcript segment
189
-
190
- **Root Causes:**
191
-
192
- 1. **Transcript misalignment** - Transcript timestamps don't perfectly match the video
193
- - **Auto-generated captions**: Often have 1-3 second delays
194
- - **Manual captions**: Usually more accurate but can have timing issues
195
- - **Multiple caption tracks**: Transcripts from different video versions
196
-
197
- 2. **Millisecond precision loss** - Old implementation lost decimal seconds
198
- - **Fixed**: Now using HH:MM:SS.mmm format for `--download-sections`
199
-
200
- 3. **Version differences** - The transcript might be from a slightly different version of the video
201
-
202
- ### Solution: Use `TIMESTAMP_OFFSET_SECONDS`
203
-
204
- **What it does:**
205
- Applies a global offset to all clip timestamps. Positive = shift later, negative = shift earlier.
206
-
207
- **How to use:**
208
-
209
- ```bash
210
- # Add to .env
211
- TIMESTAMP_OFFSET_SECONDS=-3
212
-
213
- # Or inline
214
- TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --clip
215
- ```
216
-
217
- ### Finding the Correct Offset
218
-
219
- **Step 1: Test with logging**
220
-
221
- Run a single segment and observe the logs:
222
-
223
- ```bash
224
- TIMESTAMP_OFFSET_SECONDS=0 npm run start -- <url> --download-sections 1
225
- ```
226
-
227
- Look for these log lines:
228
-
229
- ```
230
- [info] Downloading segment 1: 00:02:00.500-00:02:30.000 (strong opinion...)
231
- [info] Requested: 120.50s - 150.00s
232
- [info] Adjusted: 117.50s - 147.00s (offset: -3s)
233
- [info] Cutting clip: start=117.50s, end=147.00s, duration=29.50s
234
- ```
235
-
236
- **Step 2: Play and verify**
237
-
238
- - Open the generated clip
239
- - Check if the moment matches the transcript description
240
- - Note if it's too early or too late
241
-
242
- **Step 3: Adjust offset**
243
-
244
- If clip **starts 3 seconds late**:
245
-
246
- ```bash
247
- TIMESTAMP_OFFSET_SECONDS=-3 # Negative = shift earlier
248
- ```
249
-
250
- If clip **starts 2 seconds early**:
251
-
252
- ```bash
253
- TIMESTAMP_OFFSET_SECONDS=2 # Positive = shift later
254
- ```
255
-
256
- **Step 4: Verify with multiple clips**
257
-
258
- ```bash
259
- TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --download-sections 3
260
- ```
261
-
262
- Check if the offset works consistently across different segments.
263
-
264
- ### Binary Search for Optimal Offset
265
-
266
- If you're unsure of the exact offset:
267
-
268
- ```bash
269
- # Try 0, -3, -6, -9 to see which is closest
270
- for offset in 0 -3 -6 -9; do
271
- TIMESTAMP_OFFSET_SECONDS=$offset npm run start -- <url> --download-sections 1
272
- echo "Tested offset: $offset"
273
- # Play and check accuracy
274
- done
275
- ```
276
-
277
- Then narrow down: `-3` seems good, try `-2` and `-4`, etc.
278
-
279
- ### Common Scenarios
280
-
281
- | Scenario | Likely Offset | Explanation |
282
- | ----------------------- | ------------- | --------------------------------------------------- |
283
- | Auto-generated captions | `-1` to `-3` | ASR timing often lags behind actual speech |
284
- | Manual captions | `0` to `-1` | Usually more accurate, small sync issues |
285
- | Multiple caption tracks | `-2` to `-5` | Different versions may have systematic offset |
286
- | Regional variations | Varies | Different regions may have different caption timing |
287
-
288
- ### Verifying the Fix
289
-
290
- After applying `TIMESTAMP_OFFSET_SECONDS`, verify:
291
-
292
- 1. **Watch the clip**: Audio and video should be synchronized
293
- 2. **Check multiple clips**: Offset should work consistently
294
- 3. **Compare with original**: Clip should match the described content
295
-
296
- If offset varies between segments, the issue might be video-specific rather than a global transcript offset.
297
-
298
- ---
299
-
300
- ## Progress Display
301
-
302
- Both modes show real-time yt-dlp progress:
303
-
304
- ```
305
- [download] 45.2% of 125MiB at 2.5MiB/s ETA 00:32
306
- ```
307
-
308
- Progress updates inline (same line) to keep logs clean.
309
-
310
- ---
311
-
312
- ## ⚠️ Requirements
313
-
314
- Both modes require:
315
-
316
- 1. **yt-dlp** — Install from https://github.com/yt-dlp/yt-dlp
317
- 2. **ffmpeg** — Required for full download mode
318
-
319
- ```bash
320
- # macOS
321
- brew install yt-dlp ffmpeg
322
-
323
- # Ubuntu/Debian
324
- sudo apt-get install yt-dlp ffmpeg
325
-
326
- # Windows
327
- # Install from GitHub releases or use winget
328
- ```
329
-
330
- ---
331
-
332
- ## Examples
333
-
334
- ### Download full video and cut clips
335
-
336
- ```bash
337
- npm run start -- https://youtube.com/watch?v=abc123 --clip
338
- ```
339
-
340
- ### Download top 3 segments only
341
-
342
- ```bash
343
- npm run start -- https://youtube.com/watch?v=abc123 --download-sections 3
344
- ```
345
-
346
- ### Download top 5 segments to custom directory
347
-
348
- ```bash
349
- npm run start -- https://youtube.com/watch?v=abc123 --download-sections 5 --video-path ./my-clips
350
- ```
351
-
352
- ### Download segments with timestamp offset
353
-
354
- ```bash
355
- # Fix 3-second audio delay
356
- TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --download-sections 3
357
-
358
- # Custom quality preset with offset
359
- FFMPEG_PRESET=medium TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --download-sections 5
360
- ```
361
-
362
- ### Full video to custom directory
363
-
364
- ```bash
365
- npm run start -- https://youtube.com/watch?v=abc123 --clip --video-path ./downloads
366
- ```
367
-
368
- ### Set default mode via environment
369
-
370
- ```bash
371
- # Add to .env
372
- DOWNLOAD_SECTIONS_MODE=all
373
-
374
- # Or inline
375
- DOWNLOAD_SECTIONS_MODE=all npm run start -- <url> --clip
376
- ```
377
-
378
- ### Pre-downloaded video workflow
379
-
380
- ```bash
381
- # Step 1: Download your video manually (any method)
382
- cp /path/to/video.mp4 downloads/abc123.mp4
383
-
384
- # Step 2: Run analysis (will skip download)
385
- npm run start -- https://youtube.com/watch?v=abc123 --clip
386
-
387
- # Add offset if needed
388
- TIMESTAMP_OFFSET_SECONDS=-2 npm run start -- https://youtube.com/watch?v=abc123 --clip
389
- ```
390
-
391
- ---
392
-
393
- ## Backward Compatibility Note
394
-
395
- The old `--download-sections segments` flag is deprecated but still works (shows a warning). It now behaves the same as `--download-sections all` (downloads full video).
396
-
397
- ```bash
398
- # Old style (deprecated, still works)
399
- npm run start -- <url> --clip --download-sections segments
400
-
401
- # New recommended style
402
- npm run start -- <url> --clip --download-sections all
403
- ```
404
-
405
- ---
406
-
407
- ## Configuration Options
408
-
409
- ### FFMPEG_PRESET
410
-
411
- Controls encoding speed/quality trade-off for clip generation in full download mode:
412
-
413
- | Preset | Speed | Quality | Use Case |
414
- | ---------------- | --------- | ------- | ---------------------- |
415
- | `ultrafast` | Very fast | Lowest | Quick testing |
416
- | `fast` (default) | Fast | Good | Balanced performance |
417
- | `medium` | Medium | Better | Higher quality clips |
418
- | `slow` | Slow | High | Final production clips |
419
-
420
- ```bash
421
- # Set in .env
422
- FFMPEG_PRESET=medium
423
-
424
- # Or inline
425
- FFMPEG_PRESET=slow npm run start -- <url> --clip
426
- ```
427
-
428
- ### TIMESTAMP_OFFSET_SECONDS
429
-
430
- Global timestamp adjustment in seconds. Negative = earlier, Positive = later.
431
-
432
- ```bash
433
- # Set in .env
434
- TIMESTAMP_OFFSET_SECONDS=-3
435
-
436
- # Or inline
437
- TIMESTAMP_OFFSET_SECONDS=2 npm run start -- <url> --clip
438
- ```
439
-
440
- Use this to fix systematic audio/video desynchronization when transcript timestamps don't match the video.
package/requirements.txt DELETED
@@ -1,5 +0,0 @@
1
- tensorflow
2
- tensorflow-hub
3
- soundfile
4
- numpy
5
- openai-whisper
@@ -1,81 +0,0 @@
1
- import tensorflow_hub as hub
2
- import soundfile as sf
3
- import numpy as np
4
- import json
5
- import sys
6
-
7
- GAME_EVENTS = {
8
- 67: 'gunshot',
9
- 366: 'explosion',
10
- 389: 'crowd_cheering',
11
- 63: 'gunfire_burst',
12
- }
13
-
14
- def cluster_events(events, gap=1.5):
15
- if not events:
16
- return []
17
-
18
- events = sorted(events, key=lambda x: x['time'])
19
- clusters = []
20
- current_cluster = [events[0]]
21
-
22
- for i in range(1, len(events)):
23
- if events[i]['time'] - events[i - 1]['time'] <= gap:
24
- current_cluster.append(events[i])
25
- else:
26
- max_conf = max(e['confidence'] for e in current_cluster)
27
- first_time = current_cluster[0]['time']
28
- clusters.append({
29
- 'time': first_time,
30
- 'event': current_cluster[0]['event'],
31
- 'confidence': max_conf,
32
- })
33
- current_cluster = [events[i]]
34
-
35
- if current_cluster:
36
- max_conf = max(e['confidence'] for e in current_cluster)
37
- first_time = current_cluster[0]['time']
38
- clusters.append({
39
- 'time': first_time,
40
- 'event': current_cluster[0]['event'],
41
- 'confidence': max_conf,
42
- })
43
-
44
- return clusters
45
-
46
- def detect_events(audio_path, threshold=0.30):
47
- model = hub.load('https://tfhub.dev/google/yamnet/1')
48
- wav, sr = sf.read(audio_path, dtype='float32')
49
-
50
- if sr != 16000:
51
- import warnings
52
- warnings.warn(f'Audio sample rate is {sr} Hz, expected 16000 Hz for YAMNet')
53
-
54
- scores, _, _ = model(wav)
55
- events = []
56
-
57
- for i, frame in enumerate(scores.numpy()):
58
- for cid, label in GAME_EVENTS.items():
59
- if frame[cid] > threshold:
60
- events.append({
61
- 'time': round(i * 0.48, 2),
62
- 'event': label,
63
- 'confidence': float(frame[cid]),
64
- })
65
-
66
- return cluster_events(events, gap=1.5)
67
-
68
- if __name__ == '__main__':
69
- if len(sys.argv) < 2:
70
- print(json.dumps({'error': 'Usage: python detect_events.py <audio_path> [threshold]'}))
71
- sys.exit(1)
72
-
73
- audio_path = sys.argv[1]
74
- threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.30
75
-
76
- try:
77
- result = detect_events(audio_path, threshold)
78
- print(json.dumps(result))
79
- except Exception as e:
80
- print(json.dumps({'error': str(e)}))
81
- sys.exit(1)
@@ -1,101 +0,0 @@
1
- import json
2
- import sys
3
-
4
- # Keyword sets per game profile.
5
- # Keys are lowercase; matches are case-insensitive.
6
- PROFILE_KEYWORDS: dict[str, list[str]] = {
7
- 'valorant': [
8
- 'ace', 'clutch', 'defuse', 'spike', '1v1', '1v2', '1v3', '1v4', '1v5',
9
- "let's go", 'no way', 'insane', 'bro', 'what', 'oh my god', 'omg',
10
- 'unbelievable', 'crazy', 'yooo', 'yo', 'filthy', 'clean',
11
- 'wallbang', 'headshot',
12
- ],
13
- 'fps': [
14
- 'kill', 'headshot', 'streak', 'collateral', 'insane', 'no way',
15
- "let's go", 'yooo', 'yo', 'crazy', 'oh my god', 'omg', 'unbelievable',
16
- 'nice', 'what', 'bro',
17
- ],
18
- 'boss_fight': [
19
- 'finally', "let's go", 'dead', 'down', 'phase', 'unbelievable', 'insane',
20
- 'crazy', 'no way', 'oh my god', 'omg', 'yooo', 'yo', 'what', 'bro',
21
- ],
22
- 'general': [
23
- 'insane', 'crazy', 'no way', "let's go", 'oh my god', 'omg',
24
- 'what', 'wow', 'yooo', 'yo', 'unbelievable', 'bro',
25
- ],
26
- }
27
-
28
- # Phrases that get full confidence (exact multi-word match carries more signal).
29
- HIGH_CONFIDENCE_PHRASES: set[str] = {
30
- 'ace', 'clutch', "let's go", 'no way', 'oh my god', 'omg', 'unbelievable',
31
- '1v1', '1v2', '1v3', '1v4', '1v5', 'finally',
32
- }
33
-
34
-
35
- def score_text(text: str, keywords: list[str]) -> tuple[str | None, float]:
36
- """
37
- Return the first matching keyword and its confidence, or (None, 0).
38
- Multi-word phrases and high-confidence phrases get confidence 1.0;
39
- single-word partial matches get 0.8.
40
- """
41
- lower = text.lower()
42
- for kw in keywords:
43
- if kw in lower:
44
- conf = 1.0 if kw in HIGH_CONFIDENCE_PHRASES else 0.8
45
- return kw, conf
46
- return None, 0.0
47
-
48
-
49
- def detect_events_whisper(
50
- audio_path: str,
51
- model_size: str = 'medium',
52
- game_profile: str = 'general',
53
- threshold: float = 0.3,
54
- ) -> list[dict]:
55
- try:
56
- import whisper # type: ignore
57
- except ImportError:
58
- print(
59
- json.dumps({'error': 'openai-whisper not installed. Run: pip install openai-whisper'}),
60
- file=sys.stderr,
61
- )
62
- sys.exit(2)
63
-
64
- keywords = PROFILE_KEYWORDS.get(game_profile, PROFILE_KEYWORDS['general'])
65
-
66
- model = whisper.load_model(model_size)
67
- result = model.transcribe(audio_path, word_timestamps=False, fp16=False)
68
-
69
- events: list[dict] = []
70
- for seg in result.get('segments', []):
71
- text: str = seg.get('text', '')
72
- start: float = float(seg.get('start', 0))
73
- matched_kw, confidence = score_text(text, keywords)
74
- if matched_kw is not None and confidence >= threshold:
75
- events.append({
76
- 'time': round(start, 2),
77
- 'event': matched_kw,
78
- 'confidence': confidence,
79
- })
80
-
81
- return events
82
-
83
-
84
- if __name__ == '__main__':
85
- if len(sys.argv) < 2:
86
- print(
87
- json.dumps({'error': 'Usage: python detect_events_whisper.py <audio_path> [threshold] [game_profile] [model_size]'}),
88
- )
89
- sys.exit(1)
90
-
91
- audio_path = sys.argv[1]
92
- threshold = float(sys.argv[2]) if len(sys.argv) > 2 else 0.3
93
- game_profile = sys.argv[3] if len(sys.argv) > 3 else 'general'
94
- model_size = sys.argv[4] if len(sys.argv) > 4 else 'medium'
95
-
96
- try:
97
- result = detect_events_whisper(audio_path, model_size, game_profile, threshold)
98
- print(json.dumps(result))
99
- except Exception as e:
100
- print(json.dumps({'error': str(e)}))
101
- sys.exit(1)
@@ -1,70 +0,0 @@
1
- """
2
- transcribe_whisper.py — Full Whisper transcription for transcript generation.
3
-
4
- Runs OpenAI Whisper on the provided audio file and writes a JSON array of
5
- transcript segments to stdout:
6
-
7
- [{"text": "...", "start": 0.0, "duration": 3.5}, ...]
8
-
9
- Usage:
10
- python transcribe_whisper.py <audio_path> [model_size]
11
-
12
- Arguments:
13
- audio_path - Path to the audio WAV file
14
- model_size - Whisper model to use (default: medium)
15
- Options: tiny, base, small, medium, large-v3
16
-
17
- Requires: pip install openai-whisper
18
- """
19
-
20
- import json
21
- import sys
22
-
23
-
24
- def transcribe(audio_path: str, model_size: str = 'medium') -> list[dict]:
25
- try:
26
- import whisper # type: ignore
27
- except ImportError:
28
- print(
29
- 'ModuleNotFoundError: openai-whisper not installed. Run: pip install openai-whisper',
30
- file=sys.stderr,
31
- )
32
- sys.exit(2)
33
-
34
- model = whisper.load_model(model_size)
35
- result = model.transcribe(audio_path, word_timestamps=False, fp16=False)
36
-
37
- segments: list[dict] = []
38
- for seg in result.get('segments', []):
39
- text: str = seg.get('text', '').strip()
40
- start: float = float(seg.get('start', 0))
41
- end: float = float(seg.get('end', start))
42
- duration = max(0.0, round(end - start, 3))
43
-
44
- if text:
45
- segments.append({
46
- 'text': text,
47
- 'start': round(start, 3),
48
- 'duration': duration,
49
- })
50
-
51
- return segments
52
-
53
-
54
- if __name__ == '__main__':
55
- if len(sys.argv) < 2:
56
- print(
57
- 'Usage: python transcribe_whisper.py <audio_path> [model_size]',
58
- file=sys.stderr,
59
- )
60
- sys.exit(1)
61
-
62
- audio_path = sys.argv[1]
63
- model_size = sys.argv[2] if len(sys.argv) > 2 else 'medium'
64
-
65
- try:
66
- output = transcribe(audio_path, model_size)
67
- print(json.dumps(output))
68
- except Exception as e:
69
- print(f'Error: {e}', file=sys.stderr)
70
- sys.exit(1)