@nadimtuhin/ytranscript 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +210 -123
- package/dist/cli.d.ts +6 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +104 -51
- package/dist/index.d.ts +30 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +63 -25
- package/dist/lib/fetcher.d.ts +26 -0
- package/dist/lib/fetcher.d.ts.map +1 -0
- package/dist/lib/fs.d.ts +20 -0
- package/dist/lib/fs.d.ts.map +1 -0
- package/dist/lib/processor.d.ts +14 -0
- package/dist/lib/processor.d.ts.map +1 -0
- package/dist/loaders/history.d.ts +9 -0
- package/dist/loaders/history.d.ts.map +1 -0
- package/dist/loaders/index.d.ts +20 -0
- package/dist/loaders/index.d.ts.map +1 -0
- package/dist/loaders/watchLater.d.ts +9 -0
- package/dist/loaders/watchLater.d.ts.map +1 -0
- package/dist/mcp.d.ts +8 -0
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +24 -7
- package/dist/outputs/index.d.ts +30 -0
- package/dist/outputs/index.d.ts.map +1 -0
- package/dist/types.d.ts +93 -0
- package/dist/types.d.ts.map +1 -0
- package/package.json +6 -6
package/README.md
CHANGED
|
@@ -1,36 +1,98 @@
|
|
|
1
1
|
# ytranscript
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/@nadimtuhin/ytranscript)
|
|
4
|
+
[](https://www.npmjs.com/package/@nadimtuhin/ytranscript)
|
|
5
|
+
[](https://github.com/nadimtuhin/ytranscript/actions/workflows/ci.yml)
|
|
6
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
7
|
|
|
5
|
-
|
|
8
|
+
Extract transcripts from your entire YouTube watch history in minutes. Build AI-powered video summaries, searchable archives, or feed transcripts directly to Claude, Cursor, and other AI assistants via the built-in MCP server.
|
|
6
9
|
|
|
7
|
-
|
|
10
|
+
**[Read the blog post: "Automating My Second Brain with YouTube Transcripts"](https://nadimtuhin.com/blog/ytranscript-mcp-youtube-transcripts)**
|
|
8
11
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
- **
|
|
12
|
-
- **
|
|
12
|
+
## Why ytranscript?
|
|
13
|
+
|
|
14
|
+
- **No API keys required** - Uses YouTube's public innertube API directly
|
|
15
|
+
- **Works with AI assistants** - Built-in MCP server for Claude, Cursor, and others
|
|
16
|
+
- **Bulk processing** - Process thousands of videos from Google Takeout exports
|
|
13
17
|
- **Resume-safe** - Automatically skips already-processed videos
|
|
14
|
-
- **Multiple
|
|
15
|
-
|
|
16
|
-
|
|
18
|
+
- **Multiple formats** - JSON, JSONL, CSV, SRT, VTT, plain text
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# Get a transcript in 10 seconds
|
|
24
|
+
npx @nadimtuhin/ytranscript get dQw4w9WgXcQ
|
|
25
|
+
|
|
26
|
+
# Output: "We're no strangers to love, you know the rules..."
|
|
27
|
+
```
|
|
17
28
|
|
|
18
29
|
## Installation
|
|
19
30
|
|
|
20
31
|
```bash
|
|
21
|
-
#
|
|
32
|
+
# Global install (recommended for CLI usage)
|
|
22
33
|
npm install -g @nadimtuhin/ytranscript
|
|
23
34
|
|
|
24
|
-
# Or use
|
|
35
|
+
# Or use with npx (no install)
|
|
36
|
+
npx @nadimtuhin/ytranscript get VIDEO_ID
|
|
37
|
+
|
|
38
|
+
# Add to a project (for library usage)
|
|
25
39
|
npm add @nadimtuhin/ytranscript
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**Runtimes supported:** Node.js 18+ and Bun 1.0+
|
|
43
|
+
|
|
44
|
+
## MCP Server (AI Assistant Integration)
|
|
45
|
+
|
|
46
|
+
ytranscript includes an MCP (Model Context Protocol) server that lets Claude, Cursor, and other AI assistants fetch YouTube transcripts directly.
|
|
47
|
+
|
|
48
|
+
### Available Tools
|
|
49
|
+
|
|
50
|
+
| Tool | Description |
|
|
51
|
+
|------|-------------|
|
|
52
|
+
| `get_transcript` | Fetch transcript with format options (text, segments, srt, vtt) |
|
|
53
|
+
| `get_transcript_languages` | List available caption languages for a video |
|
|
54
|
+
| `extract_video_id` | Extract video ID from various YouTube URL formats |
|
|
55
|
+
| `get_transcripts_bulk` | Fetch transcripts for multiple videos at once |
|
|
26
56
|
|
|
27
|
-
|
|
28
|
-
|
|
57
|
+
### Setup with Claude Desktop
|
|
58
|
+
|
|
59
|
+
Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS):
|
|
60
|
+
|
|
61
|
+
```json
|
|
62
|
+
{
|
|
63
|
+
"mcpServers": {
|
|
64
|
+
"ytranscript": {
|
|
65
|
+
"command": "npx",
|
|
66
|
+
"args": ["-y", "@nadimtuhin/ytranscript", "mcp"]
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
29
70
|
```
|
|
30
71
|
|
|
72
|
+
Or if installed globally:
|
|
73
|
+
|
|
74
|
+
```json
|
|
75
|
+
{
|
|
76
|
+
"mcpServers": {
|
|
77
|
+
"ytranscript": {
|
|
78
|
+
"command": "ytranscript-mcp"
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Example Prompts for Claude
|
|
85
|
+
|
|
86
|
+
Once configured, you can ask Claude:
|
|
87
|
+
|
|
88
|
+
- "Get the transcript for this YouTube video: https://youtube.com/watch?v=dQw4w9WgXcQ"
|
|
89
|
+
- "Summarize the key points from this video"
|
|
90
|
+
- "What languages are available for this video's captions?"
|
|
91
|
+
- "Get transcripts for these 5 videos and compare their content"
|
|
92
|
+
|
|
31
93
|
## CLI Usage
|
|
32
94
|
|
|
33
|
-
###
|
|
95
|
+
### Single Video
|
|
34
96
|
|
|
35
97
|
```bash
|
|
36
98
|
# Basic usage (outputs plain text)
|
|
@@ -49,13 +111,17 @@ ytranscript get dQw4w9WgXcQ --format srt -o video.srt
|
|
|
49
111
|
ytranscript get dQw4w9WgXcQ --format json
|
|
50
112
|
```
|
|
51
113
|
|
|
52
|
-
### Check
|
|
114
|
+
### Check Available Languages
|
|
53
115
|
|
|
54
116
|
```bash
|
|
55
117
|
ytranscript info dQw4w9WgXcQ
|
|
118
|
+
# Output:
|
|
119
|
+
# en English (auto-generated)
|
|
120
|
+
# es Spanish
|
|
121
|
+
# fr French
|
|
56
122
|
```
|
|
57
123
|
|
|
58
|
-
### Bulk
|
|
124
|
+
### Bulk Processing
|
|
59
125
|
|
|
60
126
|
```bash
|
|
61
127
|
# From Google Takeout exports
|
|
@@ -71,36 +137,77 @@ ytranscript bulk --videos "dQw4w9WgXcQ,jNQXAC9IVRw,9bZkp7q19f0"
|
|
|
71
137
|
# From a file (one ID or URL per line)
|
|
72
138
|
ytranscript bulk --file videos.txt
|
|
73
139
|
|
|
74
|
-
# Resume a previous run
|
|
140
|
+
# Resume a previous run (skips already-processed videos)
|
|
75
141
|
ytranscript bulk --history watch-history.json --resume
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Rate Limiting
|
|
145
|
+
|
|
146
|
+
YouTube may rate-limit requests. Use these flags to control pacing:
|
|
76
147
|
|
|
77
|
-
|
|
148
|
+
```bash
|
|
78
149
|
ytranscript bulk \
|
|
79
150
|
--history watch-history.json \
|
|
80
|
-
--concurrency
|
|
81
|
-
--pause-after
|
|
82
|
-
--pause-ms
|
|
151
|
+
--concurrency 4 \ # Max concurrent requests (default: 4, safe: 1-8)
|
|
152
|
+
--pause-after 10 \ # Pause after N requests (default: 10)
|
|
153
|
+
--pause-ms 5000 # Pause duration in ms (default: 5000)
|
|
83
154
|
```
|
|
84
155
|
|
|
85
|
-
|
|
156
|
+
**Recommended for large batches:** `--concurrency 2 --pause-after 10 --pause-ms 5000`
|
|
157
|
+
|
|
158
|
+
### Proxy Support
|
|
159
|
+
|
|
160
|
+
Route requests through an HTTP proxy to avoid rate limiting or access from restricted networks:
|
|
86
161
|
|
|
87
|
-
|
|
162
|
+
```bash
|
|
163
|
+
# CLI with proxy
|
|
164
|
+
ytranscript get dQw4w9WgXcQ --proxy http://localhost:8080
|
|
165
|
+
|
|
166
|
+
# Bulk with proxy
|
|
167
|
+
ytranscript bulk --history watch-history.json --proxy http://user:pass@proxy.example.com:8080
|
|
168
|
+
|
|
169
|
+
# With authentication
|
|
170
|
+
ytranscript get dQw4w9WgXcQ --proxy http://username:password@proxy:8080
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Programmatic usage:
|
|
88
174
|
|
|
89
175
|
```typescript
|
|
90
176
|
import { fetchTranscript } from '@nadimtuhin/ytranscript';
|
|
91
177
|
|
|
92
178
|
const transcript = await fetchTranscript('dQw4w9WgXcQ', {
|
|
93
|
-
|
|
94
|
-
|
|
179
|
+
proxy: {
|
|
180
|
+
url: 'http://localhost:8080',
|
|
181
|
+
},
|
|
95
182
|
});
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
> Proxy support inspired by [ytfetcher](https://github.com/kaya70875/ytfetcher)
|
|
186
|
+
|
|
187
|
+
## Programmatic API
|
|
188
|
+
|
|
189
|
+
### Fetch a Single Transcript
|
|
190
|
+
|
|
191
|
+
```typescript
|
|
192
|
+
import { fetchTranscript } from '@nadimtuhin/ytranscript';
|
|
96
193
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
194
|
+
try {
|
|
195
|
+
const transcript = await fetchTranscript('dQw4w9WgXcQ', {
|
|
196
|
+
languages: ['en', 'es'], // Preference order
|
|
197
|
+
includeAutoGenerated: true,
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
console.log(transcript.text); // Full transcript text
|
|
201
|
+
console.log(transcript.segments); // Array of { text, start, duration }
|
|
202
|
+
console.log(transcript.language); // 'en'
|
|
203
|
+
console.log(transcript.isAutoGenerated); // true/false
|
|
204
|
+
} catch (error) {
|
|
205
|
+
// See "Error Handling" section below
|
|
206
|
+
console.error(error.message);
|
|
207
|
+
}
|
|
101
208
|
```
|
|
102
209
|
|
|
103
|
-
### Bulk
|
|
210
|
+
### Bulk Processing
|
|
104
211
|
|
|
105
212
|
```typescript
|
|
106
213
|
import {
|
|
@@ -132,7 +239,7 @@ const results = await processVideos(videos, {
|
|
|
132
239
|
const transcripts = results.filter((r) => r.transcript);
|
|
133
240
|
```
|
|
134
241
|
|
|
135
|
-
### Streaming for
|
|
242
|
+
### Streaming for Large Datasets
|
|
136
243
|
|
|
137
244
|
```typescript
|
|
138
245
|
import { streamVideos, appendJsonl } from '@nadimtuhin/ytranscript';
|
|
@@ -143,20 +250,21 @@ for await (const result of streamVideos(videos, { concurrency: 4 })) {
|
|
|
143
250
|
}
|
|
144
251
|
```
|
|
145
252
|
|
|
146
|
-
### Output
|
|
253
|
+
### Output Formatting
|
|
147
254
|
|
|
148
255
|
```typescript
|
|
149
256
|
import { fetchTranscript, formatSrt, formatVtt, formatText } from '@nadimtuhin/ytranscript';
|
|
257
|
+
import { writeFile } from 'fs/promises';
|
|
150
258
|
|
|
151
259
|
const transcript = await fetchTranscript('dQw4w9WgXcQ');
|
|
152
260
|
|
|
153
261
|
// SRT subtitles
|
|
154
262
|
const srt = formatSrt(transcript);
|
|
155
|
-
await
|
|
263
|
+
await writeFile('video.srt', srt);
|
|
156
264
|
|
|
157
265
|
// VTT subtitles
|
|
158
266
|
const vtt = formatVtt(transcript);
|
|
159
|
-
await
|
|
267
|
+
await writeFile('video.vtt', vtt);
|
|
160
268
|
|
|
161
269
|
// Plain text with timestamps
|
|
162
270
|
const text = formatText(transcript, true);
|
|
@@ -164,6 +272,43 @@ const text = formatText(transcript, true);
|
|
|
164
272
|
// [0:05] Second line...
|
|
165
273
|
```
|
|
166
274
|
|
|
275
|
+
## Error Handling
|
|
276
|
+
|
|
277
|
+
The library throws errors for various failure cases:
|
|
278
|
+
|
|
279
|
+
| Error Message | Cause | Solution |
|
|
280
|
+
|---------------|-------|----------|
|
|
281
|
+
| `No captions available for this video` | Video has no captions/subtitles | Check with `ytranscript info` first |
|
|
282
|
+
| `No suitable caption track found` | Requested language not available | Use `includeAutoGenerated: true` or different language |
|
|
283
|
+
| `Caption track is empty` | Captions exist but have no content | Rare; try a different language |
|
|
284
|
+
| `HTTP 429` | Rate limited by YouTube | Reduce concurrency, add pauses |
|
|
285
|
+
| `HTTP 403` | Video is private or region-locked | Cannot access this video |
|
|
286
|
+
|
|
287
|
+
```typescript
|
|
288
|
+
try {
|
|
289
|
+
const transcript = await fetchTranscript(videoId);
|
|
290
|
+
} catch (error) {
|
|
291
|
+
if (error.message.includes('No captions available')) {
|
|
292
|
+
console.log('This video has no subtitles');
|
|
293
|
+
} else if (error.message.includes('429')) {
|
|
294
|
+
console.log('Rate limited - slow down requests');
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## Limitations
|
|
300
|
+
|
|
301
|
+
| Scenario | Supported |
|
|
302
|
+
|----------|-----------|
|
|
303
|
+
| Public videos with captions | ✅ Yes |
|
|
304
|
+
| Auto-generated captions | ✅ Yes |
|
|
305
|
+
| Manual/community captions | ✅ Yes |
|
|
306
|
+
| Private videos | ❌ No |
|
|
307
|
+
| Age-restricted videos | ❌ No |
|
|
308
|
+
| Live streams (while live) | ❌ No |
|
|
309
|
+
| Premiere videos (before premiere) | ❌ No |
|
|
310
|
+
| Region-locked videos | ❌ No (unless you're in the allowed region) |
|
|
311
|
+
|
|
167
312
|
## Google Takeout
|
|
168
313
|
|
|
169
314
|
To export your YouTube data:
|
|
@@ -195,110 +340,52 @@ interface Transcript {
|
|
|
195
340
|
|
|
196
341
|
interface TranscriptSegment {
|
|
197
342
|
text: string;
|
|
198
|
-
start: number;
|
|
199
|
-
duration: number;
|
|
343
|
+
start: number; // seconds
|
|
344
|
+
duration: number; // seconds
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
interface WatchHistoryMeta {
|
|
348
|
+
videoId: string;
|
|
349
|
+
title?: string;
|
|
350
|
+
url?: string;
|
|
351
|
+
channel?: { name?: string; url?: string };
|
|
352
|
+
watchedAt?: string;
|
|
353
|
+
source: 'history' | 'watch_later' | 'manual';
|
|
200
354
|
}
|
|
201
355
|
|
|
202
356
|
interface TranscriptResult {
|
|
203
357
|
meta: WatchHistoryMeta;
|
|
204
358
|
transcript: Transcript | null;
|
|
205
|
-
error?: string;
|
|
359
|
+
error?: string; // Present when transcript is null
|
|
206
360
|
}
|
|
207
361
|
|
|
208
362
|
interface FetchOptions {
|
|
209
|
-
languages?: string[];
|
|
210
|
-
timeout?: number;
|
|
211
|
-
includeAutoGenerated?: boolean;
|
|
363
|
+
languages?: string[]; // Default: ['en']
|
|
364
|
+
timeout?: number; // Default: 30000 (ms)
|
|
365
|
+
includeAutoGenerated?: boolean; // Default: true
|
|
366
|
+
proxy?: ProxyConfig; // Optional proxy configuration
|
|
212
367
|
}
|
|
213
368
|
|
|
214
|
-
interface
|
|
215
|
-
|
|
216
|
-
pauseAfter?: number;
|
|
217
|
-
pauseDuration?: number;
|
|
218
|
-
skipIds?: Set<string>;
|
|
219
|
-
onProgress?: (completed: number, total: number, result: TranscriptResult) => void;
|
|
369
|
+
interface ProxyConfig {
|
|
370
|
+
url: string; // HTTP proxy URL (e.g., "http://user:pass@host:port")
|
|
220
371
|
}
|
|
221
|
-
```
|
|
222
|
-
|
|
223
|
-
## License
|
|
224
|
-
|
|
225
|
-
MIT
|
|
226
|
-
|
|
227
|
-
---
|
|
228
|
-
|
|
229
|
-
## MCP Server (Model Context Protocol)
|
|
230
372
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
| `get_transcript` | Fetch transcript for a YouTube video with format options (text, segments, srt, vtt) |
|
|
238
|
-
| `get_transcript_languages` | List available caption languages for a video |
|
|
239
|
-
| `extract_video_id` | Extract video ID from various YouTube URL formats |
|
|
240
|
-
| `get_transcripts_bulk` | Fetch transcripts for multiple videos at once |
|
|
241
|
-
|
|
242
|
-
### Setup with Claude Desktop
|
|
243
|
-
|
|
244
|
-
Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
|
|
245
|
-
|
|
246
|
-
```json
|
|
247
|
-
{
|
|
248
|
-
"mcpServers": {
|
|
249
|
-
"ytranscript": {
|
|
250
|
-
"command": "npx",
|
|
251
|
-
"args": ["-y", "ytranscript-mcp"]
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
```
|
|
256
|
-
|
|
257
|
-
Or if installed globally:
|
|
258
|
-
|
|
259
|
-
```json
|
|
260
|
-
{
|
|
261
|
-
"mcpServers": {
|
|
262
|
-
"ytranscript": {
|
|
263
|
-
"command": "ytranscript-mcp"
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
```
|
|
268
|
-
|
|
269
|
-
### Setup with Cursor
|
|
270
|
-
|
|
271
|
-
Add to your Cursor MCP settings:
|
|
272
|
-
|
|
273
|
-
```json
|
|
274
|
-
{
|
|
275
|
-
"mcpServers": {
|
|
276
|
-
"ytranscript": {
|
|
277
|
-
"command": "npx",
|
|
278
|
-
"args": ["-y", "ytranscript-mcp"]
|
|
279
|
-
}
|
|
280
|
-
}
|
|
373
|
+
interface BulkOptions extends FetchOptions {
|
|
374
|
+
concurrency?: number; // Default: 4
|
|
375
|
+
pauseAfter?: number; // Default: 10
|
|
376
|
+
pauseDuration?: number; // Default: 5000 (ms)
|
|
377
|
+
skipIds?: Set<string>; // Videos to skip
|
|
378
|
+
onProgress?: (completed: number, total: number, result: TranscriptResult) => void;
|
|
281
379
|
}
|
|
282
380
|
```
|
|
283
381
|
|
|
284
|
-
|
|
382
|
+
## Contributing
|
|
285
383
|
|
|
286
|
-
|
|
384
|
+
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
287
385
|
|
|
288
|
-
-
|
|
289
|
-
-
|
|
290
|
-
- "Summarize the transcript of this video"
|
|
291
|
-
- "Get transcripts for these 5 videos and compare their content"
|
|
386
|
+
- Report bugs via [GitHub Issues](https://github.com/nadimtuhin/ytranscript/issues)
|
|
387
|
+
- Security issues: see [SECURITY.md](SECURITY.md)
|
|
292
388
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
```bash
|
|
296
|
-
# Via npx
|
|
297
|
-
npx ytranscript-mcp
|
|
298
|
-
|
|
299
|
-
# Or if installed globally
|
|
300
|
-
ytranscript-mcp
|
|
389
|
+
## License
|
|
301
390
|
|
|
302
|
-
|
|
303
|
-
bun run dev:mcp
|
|
304
|
-
```
|
|
391
|
+
MIT
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA;;GAEG"}
|