@thunderkiller/video-clipper 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +130 -0
- package/.github/workflows/ci.yml +42 -0
- package/.github/workflows/release.yml +72 -0
- package/.husky/pre-commit +3 -0
- package/.prettierignore +6 -0
- package/.prettierrc +7 -0
- package/.releaserc.json +21 -0
- package/AGENTS.md +122 -0
- package/CHANGELOG.md +45 -0
- package/README.md +410 -0
- package/dist/cli.js +187 -0
- package/dist/config/env.js +14 -0
- package/dist/config/index.js +1 -0
- package/dist/index.js +35 -0
- package/dist/pipeline/runner.js +132 -0
- package/dist/pipeline/stages/audioProcessor.js +75 -0
- package/dist/pipeline/stages/clipExporter.js +44 -0
- package/dist/pipeline/stages/segmentAnalyzer.js +46 -0
- package/dist/pipeline/stages/segmentSelector.js +23 -0
- package/dist/pipeline/stages/videoResolver.js +34 -0
- package/dist/services/audioAnalyzers/base.js +13 -0
- package/dist/services/audioAnalyzers/factory.js +56 -0
- package/dist/services/audioAnalyzers/gemini.js +109 -0
- package/dist/services/audioAnalyzers/index.js +5 -0
- package/dist/services/audioAnalyzers/whisper.js +62 -0
- package/dist/services/audioAnalyzers/yamnet.js +40 -0
- package/dist/services/audioDownloader/index.js +81 -0
- package/dist/services/chunkBuilder/index.js +71 -0
- package/dist/services/clipGenerator/index.js +156 -0
- package/dist/services/clipRefiner/index.js +103 -0
- package/dist/services/eventDetector/index.js +54 -0
- package/dist/services/llmAnalyzer/LLMAnalyzer.js +63 -0
- package/dist/services/llmAnalyzer/index.js +173 -0
- package/dist/services/metadataExtractor/index.js +66 -0
- package/dist/services/segmentRanker/index.js +40 -0
- package/dist/services/signalMerger/index.js +36 -0
- package/dist/services/transcriptAnalyzers/base.js +13 -0
- package/dist/services/transcriptAnalyzers/factory.js +51 -0
- package/dist/services/transcriptAnalyzers/gemini.js +19 -0
- package/dist/services/transcriptAnalyzers/index.js +5 -0
- package/dist/services/transcriptAnalyzers/whisper.js +55 -0
- package/dist/services/transcriptAnalyzers/ytdlp.js +16 -0
- package/dist/services/transcriptDetector/index.js +102 -0
- package/dist/services/transcriptFetcher/index.js +124 -0
- package/dist/services/urlParser/index.js +46 -0
- package/dist/services/videoDownloader/index.js +212 -0
- package/dist/types/audio.js +15 -0
- package/dist/types/cli.js +1 -0
- package/dist/types/config.js +150 -0
- package/dist/types/index.js +5 -0
- package/dist/types/pipeline.js +9 -0
- package/dist/types/segment.js +36 -0
- package/dist/types/transcript.js +16 -0
- package/dist/types/video.js +14 -0
- package/dist/utils/cache.js +143 -0
- package/dist/utils/chunker.js +51 -0
- package/dist/utils/dumper.js +36 -0
- package/dist/utils/format.js +10 -0
- package/dist/utils/logger.js +16 -0
- package/dist/utils/modelFactory.js +60 -0
- package/dist/utils/redactConfig.js +20 -0
- package/dist/utils/sliceAudio.js +26 -0
- package/docs/free-models.md +78 -0
- package/docs/plan.md +442 -0
- package/docs/refactorPhases.md +105 -0
- package/docs/yt-downloader.md +440 -0
- package/package.json +65 -0
- package/requirements.txt +5 -0
- package/scripts/detect_events.py +81 -0
- package/scripts/detect_events_whisper.py +101 -0
- package/scripts/transcribe_whisper.py +70 -0
- package/src/cli.ts +186 -0
- package/src/config/env.ts +18 -0
- package/src/config/index.ts +2 -0
- package/src/index.ts +46 -0
- package/src/pipeline/runner.ts +155 -0
- package/src/pipeline/stages/audioProcessor.ts +129 -0
- package/src/pipeline/stages/clipExporter.ts +80 -0
- package/src/pipeline/stages/segmentAnalyzer.ts +72 -0
- package/src/pipeline/stages/segmentSelector.ts +39 -0
- package/src/pipeline/stages/videoResolver.ts +47 -0
- package/src/services/audioAnalyzers/base.ts +32 -0
- package/src/services/audioAnalyzers/factory.ts +71 -0
- package/src/services/audioAnalyzers/gemini.ts +137 -0
- package/src/services/audioAnalyzers/index.ts +6 -0
- package/src/services/audioAnalyzers/whisper.ts +80 -0
- package/src/services/audioAnalyzers/yamnet.ts +54 -0
- package/src/services/audioDownloader/index.ts +102 -0
- package/src/services/chunkBuilder/index.ts +86 -0
- package/src/services/clipGenerator/index.ts +210 -0
- package/src/services/clipRefiner/index.ts +141 -0
- package/src/services/eventDetector/index.ts +68 -0
- package/src/services/llmAnalyzer/LLMAnalyzer.ts +114 -0
- package/src/services/llmAnalyzer/index.ts +231 -0
- package/src/services/metadataExtractor/index.ts +83 -0
- package/src/services/segmentRanker/index.ts +88 -0
- package/src/services/signalMerger/index.ts +53 -0
- package/src/services/transcriptAnalyzers/base.ts +26 -0
- package/src/services/transcriptAnalyzers/factory.ts +67 -0
- package/src/services/transcriptAnalyzers/gemini.ts +24 -0
- package/src/services/transcriptAnalyzers/index.ts +6 -0
- package/src/services/transcriptAnalyzers/whisper.ts +68 -0
- package/src/services/transcriptAnalyzers/ytdlp.ts +19 -0
- package/src/services/transcriptDetector/index.ts +128 -0
- package/src/services/transcriptFetcher/index.ts +151 -0
- package/src/services/urlParser/index.ts +53 -0
- package/src/services/videoDownloader/index.ts +282 -0
- package/src/types/audio.ts +19 -0
- package/src/types/cli.ts +22 -0
- package/src/types/config.ts +174 -0
- package/src/types/index.ts +26 -0
- package/src/types/pipeline.ts +93 -0
- package/src/types/segment.ts +43 -0
- package/src/types/transcript.ts +22 -0
- package/src/types/video.ts +18 -0
- package/src/utils/cache.ts +223 -0
- package/src/utils/chunker.ts +60 -0
- package/src/utils/dumper.ts +41 -0
- package/src/utils/format.ts +10 -0
- package/src/utils/logger.ts +17 -0
- package/src/utils/modelFactory.ts +71 -0
- package/src/utils/redactConfig.ts +23 -0
- package/src/utils/sliceAudio.ts +35 -0
- package/test-trigger.txt +1 -0
- package/tests/analyzerFactory.test.ts +146 -0
- package/tests/audioEventDetector.test.ts +69 -0
- package/tests/cache.test.ts +203 -0
- package/tests/chunkBuilder.test.ts +146 -0
- package/tests/chunker.test.ts +95 -0
- package/tests/eventDetector.test.ts +103 -0
- package/tests/llmAnalyzer.test.ts +283 -0
- package/tests/segmentRanker.test.ts +133 -0
- package/tests/setup.ts +48 -0
- package/tests/signalMerger.test.ts +197 -0
- package/tests/transcriptDetector.test.ts +150 -0
- package/tests/transcriptFetcher.test.ts +179 -0
- package/tests/urlParser.test.ts +70 -0
- package/tsconfig.json +16 -0
- package/tsconfig.test.json +8 -0
- package/vitest.config.ts +8 -0
package/README.md
ADDED
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
# video-clipper
|
|
2
|
+
|
|
3
|
+
A TypeScript CLI tool that takes a YouTube URL, analyzes the transcript with an LLM, and returns the most interesting moments as ranked timestamp ranges. Optionally downloads the video and cuts clips automatically.
|
|
4
|
+
|
|
5
|
+
## How it works
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
YouTube URL
|
|
9
|
+
│
|
|
10
|
+
▼
|
|
11
|
+
Parse URL → fetch transcript → group into chunks
|
|
12
|
+
│
|
|
13
|
+
▼
|
|
14
|
+
Parallel LLM analysis (Vercel AI SDK + gpt-4o)
|
|
15
|
+
│
|
|
16
|
+
▼
|
|
17
|
+
Rank & deduplicate segments
|
|
18
|
+
│
|
|
19
|
+
▼
|
|
20
|
+
Refine clip boundaries (second LLM pass)
|
|
21
|
+
│
|
|
22
|
+
▼
|
|
23
|
+
(Optional) Download video + cut clips with ffmpeg
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Tech Stack
|
|
27
|
+
|
|
28
|
+
| Layer | Choice |
|
|
29
|
+
| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
|
30
|
+
| Language | TypeScript (Node.js 18+) |
|
|
31
|
+
| Transcript | `youtube-transcript` |
|
|
32
|
+
| LLM | Vercel AI SDK (`ai` + `@ai-sdk/openai`, `@ai-sdk/anthropic`, `@ai-sdk/google`, `@ai-sdk/xai`, `@ai-sdk/mistral`, `@ai-sdk/groq`, `@ai-sdk/openrouter`) |
|
|
33
|
+
| Structured output | `generateObject` + `zod` |
|
|
34
|
+
| Video download | `yt-dlp` via `execa` |
|
|
35
|
+
| Clip cutting | `fluent-ffmpeg` |
|
|
36
|
+
| Config validation | `zod` |
|
|
37
|
+
| Concurrency | `p-limit` |
|
|
38
|
+
|
|
39
|
+
## Requirements
|
|
40
|
+
|
|
41
|
+
- Node.js 18+
|
|
42
|
+
- `yt-dlp` (for video download)
|
|
43
|
+
- `ffmpeg` (for clip cutting)
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# macOS
|
|
47
|
+
brew install yt-dlp ffmpeg
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Audio/Video Sync
|
|
51
|
+
|
|
52
|
+
Clips are generated by re-encoding with `libx264` (video) and `aac` (audio) to ensure perfect audio/video synchronization. This is slower than stream copy mode but prevents the common issue where video and audio become desynchronized in the output clips.
|
|
53
|
+
|
|
54
|
+
**Performance vs Quality Trade-off:**
|
|
55
|
+
|
|
56
|
+
Use the `FFMPEG_PRESET` environment variable to adjust encoding speed:
|
|
57
|
+
|
|
58
|
+
| Preset | Speed | Quality | Use Case |
|
|
59
|
+
| ---------------- | --------- | ------- | ---------------------- |
|
|
60
|
+
| `ultrafast` | Very fast | Lowest | Quick testing |
|
|
61
|
+
| `fast` (default) | Fast | Good | Balanced performance |
|
|
62
|
+
| `medium` | Medium | Better | Higher quality clips |
|
|
63
|
+
| `slow` | Slow | High | Final production clips |
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# Faster processing (lower quality)
|
|
69
|
+
FFMPEG_PRESET=ultrafast npm run start -- <url> --clip
|
|
70
|
+
|
|
71
|
+
# Higher quality (slower)
|
|
72
|
+
FFMPEG_PRESET=medium npm run start -- <url> --clip
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Setup
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
npm install
|
|
79
|
+
cp .env.example .env
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Edit `.env` and configure your LLM provider:
|
|
83
|
+
|
|
84
|
+
```env
|
|
85
|
+
# Choose your provider (openai, anthropic, google, xai, mistral, groq, zai, openrouter)
|
|
86
|
+
LLM_PROVIDER=openai
|
|
87
|
+
OPENAI_API_KEY=your_key_here
|
|
88
|
+
|
|
89
|
+
# Or use a free model via OpenRouter:
|
|
90
|
+
# LLM_PROVIDER=openrouter
|
|
91
|
+
# OPENROUTER_API_KEY=sk-or-...
|
|
92
|
+
# LLM_MODEL=meta-llama/llama-3.3-70b-instruct:free
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Configuration
|
|
96
|
+
|
|
97
|
+
All parameters are set via `.env`:
|
|
98
|
+
|
|
99
|
+
| Variable | Default | Description |
|
|
100
|
+
| ------------------------------ | ---------------- | ------------------------------------------------------------------------------------------------------ |
|
|
101
|
+
| **Provider selection** |
|
|
102
|
+
| `LLM_PROVIDER` | `openai` | LLM provider (openai, anthropic, google, xai, mistral, groq, zai, openrouter) |
|
|
103
|
+
| `OPENAI_API_KEY` | — | Your OpenAI API key (required if LLM_PROVIDER=openai) |
|
|
104
|
+
| `ANTHROPIC_API_KEY` | — | Your Anthropic API key (required if LLM_PROVIDER=anthropic) |
|
|
105
|
+
| `GOOGLE_GENERATIVE_AI_API_KEY` | — | Your Google API key (required if LLM_PROVIDER=google) |
|
|
106
|
+
| `XAI_API_KEY` | — | Your XAI API key (required if LLM_PROVIDER=xai) |
|
|
107
|
+
| `MISTRAL_API_KEY` | — | Your Mistral API key (required if LLM_PROVIDER=mistral) |
|
|
108
|
+
| `GROQ_API_KEY` | — | Your Groq API key (required if LLM_PROVIDER=groq) |
|
|
109
|
+
| `ZAI_API_KEY` | — | Your Zai API key (required if LLM_PROVIDER=zai) |
|
|
110
|
+
| `OPENROUTER_API_KEY` | — | Your OpenRouter API key (required if LLM_PROVIDER=openrouter) |
|
|
111
|
+
| **Model & LLM** |
|
|
112
|
+
| `LLM_MODEL` | `gpt-4o` | Model ID (depends on provider) |
|
|
113
|
+
| `LLM_MAX_RETRIES` | `3` | Max retries on rate-limit errors |
|
|
114
|
+
| `LLM_CONCURRENCY` | `3` | Max parallel LLM calls |
|
|
115
|
+
| `LLM_SYSTEM_PROMPT` | (default prompt) | Custom system prompt for LLM analysis |
|
|
116
|
+
| **Analysis parameters** |
|
|
117
|
+
| `SCORE_THRESHOLD` | `7` | Minimum score (1–10) to keep a segment |
|
|
118
|
+
| `TOP_N_SEGMENTS` | `10` | Max number of segments to return |
|
|
119
|
+
| `CHUNK_LENGTH_SEC` | `120` | LLM analysis window size in seconds |
|
|
120
|
+
| `CHUNK_OVERLAP_SEC` | `20` | Overlap between consecutive chunks |
|
|
121
|
+
| `MICRO_BLOCK_SEC` | `15` | Transcript grouping window in seconds |
|
|
122
|
+
| `MAX_CHUNKS` | — | Limit number of chunks sent to LLM (optional) |
|
|
123
|
+
| **Video download** |
|
|
124
|
+
| `DOWNLOAD_SECTIONS_MODE` | `all` | yt-dlp mode: all (full video) or N (top N segments only, e.g. 1, 2, 3...) |
|
|
125
|
+
| `FFMPEG_PRESET` | `fast` | ffmpeg encoding preset: ultrafast, superfast, veryfast, fast (default), medium, slow, slower |
|
|
126
|
+
| `TIMESTAMP_OFFSET_SECONDS` | `0` | Adjust all clip timestamps (positive = later, negative = earlier) to fix transcript-video misalignment |
|
|
127
|
+
| **Paths** |
|
|
128
|
+
| `DOWNLOAD_DIR` | `downloads/` | Where to store downloaded videos |
|
|
129
|
+
| `OUTPUT_DIR` | `outputs/` | Where to store generated clips and dumps |
|
|
130
|
+
| `CACHE_DIR` | `outputs/cache` | Where to store transcript and LLM result cache |
|
|
131
|
+
| **Output options** |
|
|
132
|
+
| `DUMP_OUTPUTS` | `true` | Write transcript/analysis JSON dumps |
|
|
133
|
+
|
|
134
|
+
## Output
|
|
135
|
+
|
|
136
|
+
```json
|
|
137
|
+
{
|
|
138
|
+
"video_id": "abc123",
|
|
139
|
+
"title": "Video Title",
|
|
140
|
+
"duration": 1823,
|
|
141
|
+
"segments": [
|
|
142
|
+
{
|
|
143
|
+
"rank": 1,
|
|
144
|
+
"start": 120,
|
|
145
|
+
"end": 150,
|
|
146
|
+
"score": 9,
|
|
147
|
+
"reason": "strong controversial opinion"
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
"rank": 2,
|
|
151
|
+
"start": 420,
|
|
152
|
+
"end": 455,
|
|
153
|
+
"score": 8,
|
|
154
|
+
"reason": "funny storytelling moment"
|
|
155
|
+
}
|
|
156
|
+
]
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## Caching
|
|
161
|
+
|
|
162
|
+
The CLI caches both transcript fetches and LLM chunk results to speed up subsequent runs:
|
|
163
|
+
|
|
164
|
+
- **Transcript cache**: Stored per video ID in `CACHE_DIR`
|
|
165
|
+
- **LLM chunk cache**: Stores successful chunk analyses to avoid re-analyzing the same content
|
|
166
|
+
|
|
167
|
+
Cache is automatically used on re-runs. Use `--no-cache` to bypass.
|
|
168
|
+
|
|
169
|
+
## Working with Pre-Downloaded Videos
|
|
170
|
+
|
|
171
|
+
If you already have a video downloaded (from yt-dlp, browser download, or other tool), you can skip the download step and work directly with that file.
|
|
172
|
+
|
|
173
|
+
**Workflow:**
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
# Step 1: Run analysis once to get segment timestamps
|
|
177
|
+
npm run start -- <url> --output-json analysis.json
|
|
178
|
+
|
|
179
|
+
# Step 2: (Optional) Edit timestamps in analysis.json if needed
|
|
180
|
+
# Edit the "start" and "end" values for each segment
|
|
181
|
+
|
|
182
|
+
# Step 3: Place your video in downloads/ directory
|
|
183
|
+
cp /path/to/your/video.mp4 downloads/<videoId>.mp4
|
|
184
|
+
|
|
185
|
+
# Step 4: Run again - will skip download and use your video
|
|
186
|
+
npm run start -- <url> --clip
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Use cases:**
|
|
190
|
+
|
|
191
|
+
- **Testing different settings** - Run different clip configurations without re-downloading
|
|
192
|
+
- **Manual timestamp adjustment** - Fine-tune segment boundaries based on visual inspection
|
|
193
|
+
- **Alternative video sources** - Work with videos downloaded from other tools or browsers
|
|
194
|
+
- **Large video files** - If you have a high-quality version, use that instead
|
|
195
|
+
|
|
196
|
+
**Notes:**
|
|
197
|
+
|
|
198
|
+
- The video file must be named exactly `{videoId}.mp4` in the `DOWNLOAD_DIR`
|
|
199
|
+
- You can apply `TIMESTAMP_OFFSET_SECONDS` globally instead of editing each timestamp
|
|
200
|
+
- Transcript cache is used, so re-running is fast (no API calls)
|
|
201
|
+
|
|
202
|
+
### Combining with Timestamp Offset
|
|
203
|
+
|
|
204
|
+
For pre-downloaded videos with known sync issues:
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# Skip download, apply 3-second offset to all clips
|
|
208
|
+
TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --clip
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
The CLI will find the existing video in `downloads/`, skip the download step, and apply the offset to all clip generation.
|
|
212
|
+
|
|
213
|
+
## Usage
|
|
214
|
+
|
|
215
|
+
### Basic analysis (no download)
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
npm run start -- https://youtube.com/watch?v=abc123
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Download full video and generate clips
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
npm run start -- https://youtube.com/watch?v=abc123 --clip
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### Download top N segments only
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
# Download top 3 segments
|
|
231
|
+
npm run start -- https://youtube.com/watch?v=abc123 --download-sections 3
|
|
232
|
+
|
|
233
|
+
# Download top 5 segments
|
|
234
|
+
npm run start -- https://youtube.com/watch?v=abc123 --download-sections 5
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
### Custom output directory
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
# Store clips in custom directory
|
|
241
|
+
npm run start -- https://youtube.com/watch?v=abc123 --clip --video-path ./my-clips
|
|
242
|
+
|
|
243
|
+
# Download segments to custom path
|
|
244
|
+
npm run start -- https://youtube.com/watch?v=abc123 --download-sections 3 --video-path ./downloads
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### Custom thresholds
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
npm run start -- https://youtube.com/watch?v=abc123 --threshold 8 --top-n 5
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
### Testing with limited chunks
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
npm run start -- https://youtube.com/watch?v=abc123 --max-chunks 3
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### Custom thresholds with timestamp offset
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
# Fix 3-second audio delay (shift earlier)
|
|
263
|
+
TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --clip
|
|
264
|
+
|
|
265
|
+
# Fix 2-second early start (shift later)
|
|
266
|
+
TIMESTAMP_OFFSET_SECONDS=2 npm run start -- <url> --clip
|
|
267
|
+
|
|
268
|
+
# High quality, slower processing, with offset
|
|
269
|
+
FFMPEG_PRESET=slow TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --clip
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## Troubleshooting Audio Sync Issues
|
|
273
|
+
|
|
274
|
+
### Problem: Audio is delayed or starts early
|
|
275
|
+
|
|
276
|
+
**Symptoms:**
|
|
277
|
+
|
|
278
|
+
- Video starts at correct moment but audio plays 2-5 seconds later/earlier
|
|
279
|
+
- Lip movements don't match speech in the clip
|
|
280
|
+
- Content in clip doesn't match the transcript segment
|
|
281
|
+
|
|
282
|
+
**Root Causes:**
|
|
283
|
+
|
|
284
|
+
1. **Transcript misalignment** - Transcript timestamps don't perfectly match the video
|
|
285
|
+
- **Auto-generated captions**: Often have 1-3 second delays
|
|
286
|
+
- **Manual captions**: Usually more accurate but can have timing issues
|
|
287
|
+
- **Multiple caption tracks**: Transcripts from different video versions
|
|
288
|
+
|
|
289
|
+
2. **Millisecond precision loss** - Old implementation lost decimal seconds
|
|
290
|
+
- Now fixed: `--download-sections` uses HH:MM:SS.mmm format
|
|
291
|
+
|
|
292
|
+
3. **Version differences** - The transcript might be from a slightly different version of the video
|
|
293
|
+
|
|
294
|
+
### Solution: Use `TIMESTAMP_OFFSET_SECONDS`
|
|
295
|
+
|
|
296
|
+
**What it does:**
|
|
297
|
+
Applies a global offset to all clip timestamps. Positive = shift later, negative = shift earlier.
|
|
298
|
+
|
|
299
|
+
**How to use:**
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
# Add to .env
|
|
303
|
+
TIMESTAMP_OFFSET_SECONDS=-3
|
|
304
|
+
|
|
305
|
+
# Or inline
|
|
306
|
+
TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --clip
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
### Finding the Correct Offset
|
|
310
|
+
|
|
311
|
+
**Step 1: Test with logging**
|
|
312
|
+
|
|
313
|
+
Run a single segment and observe the logs:
|
|
314
|
+
|
|
315
|
+
```bash
|
|
316
|
+
TIMESTAMP_OFFSET_SECONDS=0 npm run start -- <url> --download-sections 1
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
Look for these log lines:
|
|
320
|
+
|
|
321
|
+
```
|
|
322
|
+
[info] Downloading segment 1: 00:02:00.500-00:02:30.000 (strong opinion...)
|
|
323
|
+
[info] Requested: 120.50s - 150.00s
|
|
324
|
+
[info] Adjusted: 117.50s - 147.00s (offset: -3s)
|
|
325
|
+
[info] Cutting clip: start=117.50s, end=147.00s, duration=29.50s
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
**Step 2: Play and verify**
|
|
329
|
+
|
|
330
|
+
- Open the generated clip
|
|
331
|
+
- Check if the moment matches the transcript description
|
|
332
|
+
- Note if it's too early or too late
|
|
333
|
+
|
|
334
|
+
**Step 3: Adjust offset**
|
|
335
|
+
|
|
336
|
+
If clip **starts 3 seconds late**:
|
|
337
|
+
|
|
338
|
+
```bash
|
|
339
|
+
TIMESTAMP_OFFSET_SECONDS=-3 # Negative = shift earlier
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
If clip **starts 2 seconds early**:
|
|
343
|
+
|
|
344
|
+
```bash
|
|
345
|
+
TIMESTAMP_OFFSET_SECONDS=2 # Positive = shift later
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
**Step 4: Verify with multiple clips**
|
|
349
|
+
|
|
350
|
+
```bash
|
|
351
|
+
TIMESTAMP_OFFSET_SECONDS=-3 npm run start -- <url> --download-sections 3
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
Check if the offset works consistently across different segments.
|
|
355
|
+
|
|
356
|
+
### Binary Search for Optimal Offset
|
|
357
|
+
|
|
358
|
+
If you're unsure of the exact offset:
|
|
359
|
+
|
|
360
|
+
```bash
|
|
361
|
+
# Try 0, -3, -6, -9 to see which is closest
|
|
362
|
+
for offset in 0 -3 -6 -9; do
|
|
363
|
+
TIMESTAMP_OFFSET_SECONDS=$offset npm run start -- <url> --download-sections 1
|
|
364
|
+
echo "Tested offset: $offset"
|
|
365
|
+
# Play and check accuracy
|
|
366
|
+
done
|
|
367
|
+
```
|
|
368
|
+
|
|
369
|
+
Then narrow down: `-3` seems good, try `-2` and `-4`, etc.
|
|
370
|
+
|
|
371
|
+
### Common Scenarios
|
|
372
|
+
|
|
373
|
+
| Scenario | Likely Offset | Explanation |
|
|
374
|
+
| ----------------------- | ------------- | --------------------------------------------------- |
|
|
375
|
+
| Auto-generated captions | `-1` to `-3` | ASR timing often lags behind actual speech |
|
|
376
|
+
| Manual captions | `0` to `-1` | Usually more accurate, small sync issues |
|
|
377
|
+
| Multiple caption tracks | `-2` to `-5` | Different versions may have systematic offset |
|
|
378
|
+
| Regional variations | Varies | Different regions may have different caption timing |
|
|
379
|
+
|
|
380
|
+
### Verifying the Fix
|
|
381
|
+
|
|
382
|
+
After applying `TIMESTAMP_OFFSET_SECONDS`, verify:
|
|
383
|
+
|
|
384
|
+
1. **Watch the clip**: Audio and video should be synchronized
|
|
385
|
+
2. **Check multiple clips**: Offset should work consistently
|
|
386
|
+
3. **Compare with original**: Clip should match the described content
|
|
387
|
+
|
|
388
|
+
If offset varies between segments, the issue might be video-specific rather than a global transcript offset.
|
|
389
|
+
|
|
390
|
+
### CLI Flags
|
|
391
|
+
|
|
392
|
+
| Flag | Description |
|
|
393
|
+
| ---------------------------- | ----------------------------------------------------------------------------- |
|
|
394
|
+
| `--clip` | Download video and generate mp4 clips for each segment |
|
|
395
|
+
| `--download-sections <mode>` | yt-dlp mode: `all` (full video) or `N` (top N segments only, e.g. 1, 2, 3...) |
|
|
396
|
+
| `--video-path <path>` | Custom output directory for downloaded videos and clips |
|
|
397
|
+
| `--threshold <n>` | Minimum score (1–10) to keep a segment |
|
|
398
|
+
| `--top-n <n>` | Maximum number of segments to return |
|
|
399
|
+
| `--max-duration <s>` | Abort if video is longer than N seconds |
|
|
400
|
+
| `--max-chunks <n>` | Limit number of transcript chunks sent to LLM |
|
|
401
|
+
| `--max-parallel <n>` | Max number of LLM calls to run in parallel |
|
|
402
|
+
| `--output-json <path>` | Write output JSON to file instead of stdout |
|
|
403
|
+
| `--no-cache` | Bypass all caches and force a fresh run |
|
|
404
|
+
| `--help, -h` | Show help message |
|
|
405
|
+
|
|
406
|
+
## Docs
|
|
407
|
+
|
|
408
|
+
Full architecture and build plan: [docs/plan.md](docs/plan.md)
|
|
409
|
+
|
|
410
|
+
yt-dlp download modes: [docs/yt-downloader.md](docs/yt-downloader.md)
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import { config } from './config/index.js';
|
|
2
|
+
import { log } from './utils/logger.js';
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Argument parser
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
export function parseArgs(argv) {
|
|
7
|
+
const args = argv.slice(2);
|
|
8
|
+
const result = {
|
|
9
|
+
url: undefined,
|
|
10
|
+
clip: false,
|
|
11
|
+
downloadSections: undefined,
|
|
12
|
+
videoPath: undefined,
|
|
13
|
+
threshold: undefined,
|
|
14
|
+
topN: undefined,
|
|
15
|
+
maxDuration: undefined,
|
|
16
|
+
maxChunks: undefined,
|
|
17
|
+
maxParallel: undefined,
|
|
18
|
+
outputJson: undefined,
|
|
19
|
+
noCache: false,
|
|
20
|
+
noAudio: false,
|
|
21
|
+
gameProfile: undefined,
|
|
22
|
+
help: false,
|
|
23
|
+
};
|
|
24
|
+
for (let i = 0; i < args.length; i++) {
|
|
25
|
+
const arg = args[i];
|
|
26
|
+
if (arg === '--help' || arg === '-h') {
|
|
27
|
+
result.help = true;
|
|
28
|
+
}
|
|
29
|
+
else if (arg === '--clip') {
|
|
30
|
+
result.clip = true;
|
|
31
|
+
}
|
|
32
|
+
else if (arg === '--download-sections') {
|
|
33
|
+
const val = args[++i];
|
|
34
|
+
if (!val) {
|
|
35
|
+
log.error(`--download-sections requires a value: 'all' or a number (1, 2, 3, ...)`);
|
|
36
|
+
process.exit(1);
|
|
37
|
+
}
|
|
38
|
+
if (val === 'all') {
|
|
39
|
+
result.downloadSections = 'all';
|
|
40
|
+
}
|
|
41
|
+
else if (val === 'segments') {
|
|
42
|
+
log.warn(`--download-sections segments is deprecated. Use a number like --download-sections 5 to download top 5 segments, or --download-sections all for full video.`);
|
|
43
|
+
result.downloadSections = 'all';
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
const num = Number(val);
|
|
47
|
+
if (isNaN(num) || !Number.isInteger(num) || num < 1) {
|
|
48
|
+
log.error(`--download-sections requires 'all' or a positive integer (1, 2, 3, ...)`);
|
|
49
|
+
process.exit(1);
|
|
50
|
+
}
|
|
51
|
+
result.downloadSections = num;
|
|
52
|
+
}
|
|
53
|
+
result.clip = true;
|
|
54
|
+
}
|
|
55
|
+
else if (arg === '--video-path') {
|
|
56
|
+
const val = args[++i];
|
|
57
|
+
if (!val) {
|
|
58
|
+
log.error(`--video-path requires a directory path`);
|
|
59
|
+
process.exit(1);
|
|
60
|
+
}
|
|
61
|
+
result.videoPath = val;
|
|
62
|
+
}
|
|
63
|
+
else if (arg === '--local-video') {
|
|
64
|
+
const val = args[++i];
|
|
65
|
+
if (!val) {
|
|
66
|
+
log.error(`--local-video requires a file path`);
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
result.localVideo = val;
|
|
70
|
+
result.clip = true;
|
|
71
|
+
}
|
|
72
|
+
else if (arg === '--no-cache') {
|
|
73
|
+
result.noCache = true;
|
|
74
|
+
}
|
|
75
|
+
else if (arg === '--threshold') {
|
|
76
|
+
const val = Number(args[++i]);
|
|
77
|
+
if (isNaN(val)) {
|
|
78
|
+
log.error(`--threshold requires a numeric value`);
|
|
79
|
+
process.exit(1);
|
|
80
|
+
}
|
|
81
|
+
result.threshold = val;
|
|
82
|
+
}
|
|
83
|
+
else if (arg === '--top-n') {
|
|
84
|
+
const val = Number(args[++i]);
|
|
85
|
+
if (isNaN(val)) {
|
|
86
|
+
log.error(`--top-n requires a numeric value`);
|
|
87
|
+
process.exit(1);
|
|
88
|
+
}
|
|
89
|
+
result.topN = val;
|
|
90
|
+
}
|
|
91
|
+
else if (arg === '--max-duration') {
|
|
92
|
+
const val = Number(args[++i]);
|
|
93
|
+
if (isNaN(val)) {
|
|
94
|
+
log.error(`--max-duration requires a numeric value`);
|
|
95
|
+
process.exit(1);
|
|
96
|
+
}
|
|
97
|
+
result.maxDuration = val;
|
|
98
|
+
}
|
|
99
|
+
else if (arg === '--max-chunks') {
|
|
100
|
+
const val = Number(args[++i]);
|
|
101
|
+
if (isNaN(val) || !Number.isInteger(val) || val < 1) {
|
|
102
|
+
log.error(`--max-chunks requires a positive integer`);
|
|
103
|
+
process.exit(1);
|
|
104
|
+
}
|
|
105
|
+
result.maxChunks = val;
|
|
106
|
+
}
|
|
107
|
+
else if (arg === '--max-parallel') {
|
|
108
|
+
const val = Number(args[++i]);
|
|
109
|
+
if (isNaN(val) || !Number.isInteger(val) || val < 1) {
|
|
110
|
+
log.error(`--max-parallel requires a positive integer`);
|
|
111
|
+
process.exit(1);
|
|
112
|
+
}
|
|
113
|
+
result.maxParallel = val;
|
|
114
|
+
}
|
|
115
|
+
else if (arg === '--no-audio') {
|
|
116
|
+
result.noAudio = true;
|
|
117
|
+
}
|
|
118
|
+
else if (arg === '--game-profile') {
|
|
119
|
+
const val = args[++i];
|
|
120
|
+
if (!val) {
|
|
121
|
+
log.error(`--game-profile requires a value (valorant, fps, boss_fight, general)`);
|
|
122
|
+
process.exit(1);
|
|
123
|
+
}
|
|
124
|
+
result.gameProfile = val;
|
|
125
|
+
}
|
|
126
|
+
else if (arg === '--output-json') {
|
|
127
|
+
result.outputJson = args[++i];
|
|
128
|
+
if (!result.outputJson) {
|
|
129
|
+
log.error(`--output-json requires a file path`);
|
|
130
|
+
process.exit(1);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
else if (!arg.startsWith('--')) {
|
|
134
|
+
result.url = arg;
|
|
135
|
+
}
|
|
136
|
+
else {
|
|
137
|
+
log.error(`Unknown flag: ${arg}`);
|
|
138
|
+
printUsage();
|
|
139
|
+
process.exit(1);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return result;
|
|
143
|
+
}
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
// Usage text
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
export function printUsage() {
|
|
148
|
+
console.log(`
|
|
149
|
+
Usage: npm run start -- <youtube-url> [options]
|
|
150
|
+
npx tsx src/index.ts <youtube-url> [options]
|
|
151
|
+
|
|
152
|
+
Note: when invoking via npm run, use -- to pass flags to the script:
|
|
153
|
+
npm run start -- <url> --max-chunks 3
|
|
154
|
+
|
|
155
|
+
Arguments:
|
|
156
|
+
<youtube-url> YouTube video URL (required)
|
|
157
|
+
|
|
158
|
+
Options:
|
|
159
|
+
--clip Download video and generate mp4 clips for each segment
|
|
160
|
+
--download-sections <mode> yt-dlp download mode: 'all' (full video) or N (top N segments only, e.g. 1, 2, 3...) (default: ${config.DOWNLOAD_SECTIONS_MODE})
|
|
161
|
+
--local-video <path> Path to local video file (skips yt-dlp download, requires --clip)
|
|
162
|
+
--video-path <path> Custom output directory for downloaded videos and clips (overrides DOWNLOAD_DIR/OUTPUT_DIR)
|
|
163
|
+
--threshold <n> Minimum score to keep a segment (default: ${config.SCORE_THRESHOLD})
|
|
164
|
+
--top-n <n> Maximum number of segments to return (default: ${config.TOP_N_SEGMENTS})
|
|
165
|
+
--max-duration <s> Abort if video is longer than <s> seconds
|
|
166
|
+
--max-chunks <n> Limit the number of transcript chunks sent to the LLM (useful for testing/cost control)
|
|
167
|
+
--max-parallel <n> Max number of LLM calls to run in parallel (default: LLM_CONCURRENCY env, or 3)
|
|
168
|
+
--output-json <path> Write output JSON to file instead of stdout
|
|
169
|
+
--no-cache Bypass all caches and force a fresh run (transcript + chunk LLM results)
|
|
170
|
+
--no-audio Disable audio event detection (transcript-only mode)
|
|
171
|
+
--game-profile <type> Game profile: valorant, fps, boss_fight, general (default: ${config.GAME_PROFILE})
|
|
172
|
+
--help, -h Show this help message
|
|
173
|
+
|
|
174
|
+
Examples:
|
|
175
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ
|
|
176
|
+
npm run start -- https://youtu.be/dQw4w9WgXcQ --clip
|
|
177
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --download-sections all
|
|
178
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --download-sections 3
|
|
179
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --download-sections 5 --video-path ./my-clips
|
|
180
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --local-video ./downloads/dQw4w9WgXcQ.mp4
|
|
181
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --local-video /path/to/video.mp4 --top-n 5
|
|
182
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --threshold 8 --top-n 5
|
|
183
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --output-json results.json
|
|
184
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --max-chunks 3
|
|
185
|
+
npm run start -- https://youtube.com/watch?v=dQw4w9WgXcQ --max-parallel 5
|
|
186
|
+
`.trim());
|
|
187
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import 'dotenv/config';
|
|
2
|
+
import { ConfigSchema } from '../types/config.js';
|
|
3
|
+
function loadConfig() {
|
|
4
|
+
const result = ConfigSchema.safeParse(process.env);
|
|
5
|
+
if (!result.success) {
|
|
6
|
+
const issues = result.error.issues
|
|
7
|
+
.map((i) => ` - ${i.path.join('.')}: ${i.message}`)
|
|
8
|
+
.join('\n');
|
|
9
|
+
console.error(`[error] Invalid configuration:\n${issues}`);
|
|
10
|
+
process.exit(1);
|
|
11
|
+
}
|
|
12
|
+
return result.data;
|
|
13
|
+
}
|
|
14
|
+
export const config = loadConfig();
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { config } from './env.js';
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { log } from './utils/logger.js';
|
|
2
|
+
import { formatConfig } from './utils/redactConfig.js';
|
|
3
|
+
import { config } from './config/index.js';
|
|
4
|
+
import { parseArgs, printUsage } from './cli.js';
|
|
5
|
+
import { runPipeline } from './pipeline/runner.js';
|
|
6
|
+
const args = parseArgs(process.argv);
|
|
7
|
+
if (args.help) {
|
|
8
|
+
printUsage();
|
|
9
|
+
process.exit(0);
|
|
10
|
+
}
|
|
11
|
+
if (!args.url) {
|
|
12
|
+
log.error('No YouTube URL provided.');
|
|
13
|
+
printUsage();
|
|
14
|
+
process.exit(1);
|
|
15
|
+
}
|
|
16
|
+
if (args.localVideo && !args.clip) {
|
|
17
|
+
log.error('--local-video requires --clip flag');
|
|
18
|
+
printUsage();
|
|
19
|
+
process.exit(1);
|
|
20
|
+
}
|
|
21
|
+
if (args.localVideo && args.downloadSections) {
|
|
22
|
+
log.warn('--download-sections is ignored when using --local-video (clipping all segments from --top-n)');
|
|
23
|
+
}
|
|
24
|
+
log.info(`Starting video-clipper (model: ${config.LLM_MODEL})` +
|
|
25
|
+
(args.clip ? ' [--clip enabled]' : '') +
|
|
26
|
+
(args.localVideo ? ` [--local-video: ${args.localVideo}]` : '') +
|
|
27
|
+
(args.downloadSections !== undefined && args.downloadSections !== 'all'
|
|
28
|
+
? ` [--download-sections: ${args.downloadSections}]`
|
|
29
|
+
: '') +
|
|
30
|
+
(args.videoPath ? ` [--video-path: ${args.videoPath}]` : ''));
|
|
31
|
+
log.info(`Config: ${formatConfig(config)}`);
|
|
32
|
+
runPipeline(args).catch((err) => {
|
|
33
|
+
log.error(err instanceof Error ? err.message : String(err));
|
|
34
|
+
process.exit(1);
|
|
35
|
+
});
|