mcp-headless-youtube-transcript 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/README.md +94 -0
- package/build/index.d.ts +3 -0
- package/build/index.js +94 -0
- package/build/utils.d.ts +3 -0
- package/build/utils.js +26 -0
- package/package.json +49 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"WebFetch(domain:github.com)",
|
|
5
|
+
"Bash(mkdir:*)",
|
|
6
|
+
"Bash(npm install)",
|
|
7
|
+
"Bash(npm run build:*)",
|
|
8
|
+
"Bash(rm:*)",
|
|
9
|
+
"Bash(npm run test:run:*)",
|
|
10
|
+
"Bash(timeout 5s npm start)",
|
|
11
|
+
"Bash(npm view:*)"
|
|
12
|
+
],
|
|
13
|
+
"deny": []
|
|
14
|
+
}
|
|
15
|
+
}
|
package/README.md
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# MCP Headless YouTube Transcript
|
|
2
|
+
|
|
3
|
+
An MCP (Model Context Protocol) server that extracts YouTube video transcripts using the `headless-youtube-captions` library.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Extract transcripts from YouTube videos using video ID or full URL
|
|
8
|
+
- Support for multiple languages
|
|
9
|
+
- Timestamped transcript output
|
|
10
|
+
- Built with TypeScript and the MCP SDK
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
npm install
|
|
16
|
+
npm run build
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
### As an MCP Server
|
|
22
|
+
|
|
23
|
+
This server implements the Model Context Protocol and can be used with MCP clients.
|
|
24
|
+
|
|
25
|
+
### Tools Available
|
|
26
|
+
|
|
27
|
+
#### `get_youtube_transcript`
|
|
28
|
+
|
|
29
|
+
Extracts transcript/captions from a YouTube video.
|
|
30
|
+
|
|
31
|
+
**Parameters:**
|
|
32
|
+
- `videoId` (required): YouTube video ID or full URL
|
|
33
|
+
- `lang` (optional): Language code for captions (e.g., "en", "es", "ko"). Defaults to "en"
|
|
34
|
+
|
|
35
|
+
**Example:**
|
|
36
|
+
```json
|
|
37
|
+
{
|
|
38
|
+
"name": "get_youtube_transcript",
|
|
39
|
+
"arguments": {
|
|
40
|
+
"videoId": "dQw4w9WgXcQ",
|
|
41
|
+
"lang": "en"
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Supported URL Formats
|
|
47
|
+
|
|
48
|
+
- Video ID: `dQw4w9WgXcQ`
|
|
49
|
+
- YouTube URLs:
|
|
50
|
+
- `https://www.youtube.com/watch?v=dQw4w9WgXcQ`
|
|
51
|
+
- `https://youtu.be/dQw4w9WgXcQ`
|
|
52
|
+
- `https://www.youtube.com/embed/dQw4w9WgXcQ`
|
|
53
|
+
- `https://www.youtube.com/v/dQw4w9WgXcQ`
|
|
54
|
+
|
|
55
|
+
## Development
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# Install dependencies
|
|
59
|
+
npm install
|
|
60
|
+
|
|
61
|
+
# Run in development mode
|
|
62
|
+
npm run dev
|
|
63
|
+
|
|
64
|
+
# Build for production
|
|
65
|
+
npm run build
|
|
66
|
+
|
|
67
|
+
# Start the server
|
|
68
|
+
npm start
|
|
69
|
+
|
|
70
|
+
# Run tests
|
|
71
|
+
npm test
|
|
72
|
+
|
|
73
|
+
# Run tests once (CI mode)
|
|
74
|
+
npm run test:run
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Testing
|
|
78
|
+
|
|
79
|
+
The project includes comprehensive tests:
|
|
80
|
+
|
|
81
|
+
- **Unit tests**: Test helper functions like URL parsing and time formatting
|
|
82
|
+
- **Integration tests**: Test the core transcript extraction logic with mocked APIs
|
|
83
|
+
- **Manual tests**: Optional tests that call real YouTube APIs (skipped by default)
|
|
84
|
+
|
|
85
|
+
All tests use Vitest and include mocking of the headless-youtube-captions library to ensure reliable testing without external API dependencies.
|
|
86
|
+
|
|
87
|
+
## Dependencies
|
|
88
|
+
|
|
89
|
+
- `@modelcontextprotocol/sdk`: MCP SDK for building servers
|
|
90
|
+
- `headless-youtube-captions`: Library for extracting YouTube captions
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
MIT
|
package/build/index.d.ts
ADDED
package/build/index.js
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
3
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
|
+
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
5
|
+
import { getSubtitles } from 'headless-youtube-captions';
|
|
6
|
+
import { extractVideoId, formatTime } from './utils.js';
|
|
7
|
+
const server = new Server({
|
|
8
|
+
name: 'mcp-headless-youtube-transcript',
|
|
9
|
+
version: '1.0.0',
|
|
10
|
+
}, {
|
|
11
|
+
capabilities: {
|
|
12
|
+
tools: {},
|
|
13
|
+
},
|
|
14
|
+
});
|
|
15
|
+
// Tool definitions
|
|
16
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
17
|
+
return {
|
|
18
|
+
tools: [
|
|
19
|
+
{
|
|
20
|
+
name: 'get_youtube_transcript',
|
|
21
|
+
description: 'Extract transcript/captions from a YouTube video',
|
|
22
|
+
inputSchema: {
|
|
23
|
+
type: 'object',
|
|
24
|
+
properties: {
|
|
25
|
+
videoId: {
|
|
26
|
+
type: 'string',
|
|
27
|
+
description: 'YouTube video ID or full URL',
|
|
28
|
+
},
|
|
29
|
+
lang: {
|
|
30
|
+
type: 'string',
|
|
31
|
+
description: 'Language code for captions (e.g., "en", "es", "ko"). Defaults to "en"',
|
|
32
|
+
default: 'en',
|
|
33
|
+
},
|
|
34
|
+
},
|
|
35
|
+
required: ['videoId'],
|
|
36
|
+
},
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
};
|
|
40
|
+
});
|
|
41
|
+
// Tool execution
|
|
42
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
43
|
+
const { name, arguments: args } = request.params;
|
|
44
|
+
if (name === 'get_youtube_transcript') {
|
|
45
|
+
try {
|
|
46
|
+
const { videoId, lang = 'en' } = args;
|
|
47
|
+
// Extract video ID from URL if a full URL is provided
|
|
48
|
+
const extractedVideoId = extractVideoId(videoId);
|
|
49
|
+
if (!extractedVideoId) {
|
|
50
|
+
throw new Error('Invalid YouTube video ID or URL');
|
|
51
|
+
}
|
|
52
|
+
// Get subtitles using headless-youtube-captions
|
|
53
|
+
const subtitles = await getSubtitles({
|
|
54
|
+
videoID: extractedVideoId,
|
|
55
|
+
lang: lang,
|
|
56
|
+
});
|
|
57
|
+
// Format the transcript
|
|
58
|
+
const transcript = subtitles
|
|
59
|
+
.map((subtitle) => `[${formatTime(subtitle.start)}] ${subtitle.text}`)
|
|
60
|
+
.join('\n');
|
|
61
|
+
return {
|
|
62
|
+
content: [
|
|
63
|
+
{
|
|
64
|
+
type: 'text',
|
|
65
|
+
text: `YouTube Transcript for video ID: ${extractedVideoId}\nLanguage: ${lang}\n\n${transcript}`,
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
72
|
+
return {
|
|
73
|
+
content: [
|
|
74
|
+
{
|
|
75
|
+
type: 'text',
|
|
76
|
+
text: `Error getting YouTube transcript: ${errorMessage}`,
|
|
77
|
+
},
|
|
78
|
+
],
|
|
79
|
+
isError: true,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
84
|
+
});
|
|
85
|
+
async function main() {
|
|
86
|
+
const transport = new StdioServerTransport();
|
|
87
|
+
await server.connect(transport);
|
|
88
|
+
console.error('MCP Headless YouTube Transcript server running on stdio');
|
|
89
|
+
}
|
|
90
|
+
main().catch((error) => {
|
|
91
|
+
console.error('Server error:', error);
|
|
92
|
+
process.exit(1);
|
|
93
|
+
});
|
|
94
|
+
//# sourceMappingURL=index.js.map
|
package/build/utils.d.ts
ADDED
package/build/utils.js
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
// Helper function to extract video ID from YouTube URL
|
|
2
|
+
export function extractVideoId(input) {
|
|
3
|
+
// If it's already just a video ID (11 characters), return it
|
|
4
|
+
if (/^[a-zA-Z0-9_-]{11}$/.test(input)) {
|
|
5
|
+
return input;
|
|
6
|
+
}
|
|
7
|
+
// Extract from various YouTube URL formats
|
|
8
|
+
const patterns = [
|
|
9
|
+
/(?:youtube\.com\/watch\?.*v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/,
|
|
10
|
+
/youtube\.com\/v\/([a-zA-Z0-9_-]{11})/,
|
|
11
|
+
];
|
|
12
|
+
for (const pattern of patterns) {
|
|
13
|
+
const match = input.match(pattern);
|
|
14
|
+
if (match) {
|
|
15
|
+
return match[1];
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return null;
|
|
19
|
+
}
|
|
20
|
+
// Helper function to format time from seconds to MM:SS format
|
|
21
|
+
export function formatTime(seconds) {
|
|
22
|
+
const minutes = Math.floor(seconds / 60);
|
|
23
|
+
const remainingSeconds = Math.floor(seconds % 60);
|
|
24
|
+
return `${minutes.toString().padStart(2, '0')}:${remainingSeconds.toString().padStart(2, '0')}`;
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=utils.js.map
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mcp-headless-youtube-transcript",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "MCP server for extracting YouTube video transcripts using headless-youtube-captions",
|
|
5
|
+
"main": "build/index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"mcp-headless-youtube-transcript": "build/index.js"
|
|
8
|
+
},
|
|
9
|
+
"type": "module",
|
|
10
|
+
"scripts": {
|
|
11
|
+
"build": "tsc",
|
|
12
|
+
"dev": "tsx src/index.ts",
|
|
13
|
+
"start": "node build/index.js",
|
|
14
|
+
"test": "vitest",
|
|
15
|
+
"test:run": "vitest run",
|
|
16
|
+
"prepublishOnly": "npm run build && npm run test:run"
|
|
17
|
+
},
|
|
18
|
+
"keywords": [
|
|
19
|
+
"mcp",
|
|
20
|
+
"server",
|
|
21
|
+
"youtube",
|
|
22
|
+
"transcript",
|
|
23
|
+
"captions"
|
|
24
|
+
],
|
|
25
|
+
"author": "Andrew Lewin",
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "git+https://github.com/andrewlwn77/mcp-headless-youtube-transcript.git"
|
|
29
|
+
},
|
|
30
|
+
"bugs": {
|
|
31
|
+
"url": "https://github.com/andrewlwn77/mcp-headless-youtube-transcript/issues"
|
|
32
|
+
},
|
|
33
|
+
"homepage": "https://github.com/andrewlwn77/mcp-headless-youtube-transcript#readme",
|
|
34
|
+
"license": "MIT",
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
37
|
+
"headless-youtube-captions": "^1.0.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@types/node": "^22.0.0",
|
|
41
|
+
"tsx": "^4.0.0",
|
|
42
|
+
"typescript": "^5.0.0",
|
|
43
|
+
"vitest": "^2.0.0",
|
|
44
|
+
"@vitest/ui": "^2.0.0"
|
|
45
|
+
},
|
|
46
|
+
"engines": {
|
|
47
|
+
"node": ">=18"
|
|
48
|
+
}
|
|
49
|
+
}
|