mcp-headless-youtube-transcript 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "WebFetch(domain:github.com)",
5
+ "Bash(mkdir:*)",
6
+ "Bash(npm install)",
7
+ "Bash(npm run build:*)",
8
+ "Bash(rm:*)",
9
+ "Bash(npm run test:run:*)",
10
+ "Bash(timeout 5s npm start)",
11
+ "Bash(npm view:*)"
12
+ ],
13
+ "deny": []
14
+ }
15
+ }
package/README.md ADDED
@@ -0,0 +1,94 @@
1
+ # MCP Headless YouTube Transcript
2
+
3
+ An MCP (Model Context Protocol) server that extracts YouTube video transcripts using the `headless-youtube-captions` library.
4
+
5
+ ## Features
6
+
7
+ - Extract transcripts from YouTube videos using video ID or full URL
8
+ - Support for multiple languages
9
+ - Timestamped transcript output
10
+ - Built with TypeScript and the MCP SDK
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ npm install
16
+ npm run build
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ### As an MCP Server
22
+
23
+ This server implements the Model Context Protocol and can be used with MCP clients.
24
+
25
+ ### Tools Available
26
+
27
+ #### `get_youtube_transcript`
28
+
29
+ Extracts transcript/captions from a YouTube video.
30
+
31
+ **Parameters:**
32
+ - `videoId` (required): YouTube video ID or full URL
33
+ - `lang` (optional): Language code for captions (e.g., "en", "es", "ko"). Defaults to "en"
34
+
35
+ **Example:**
36
+ ```json
37
+ {
38
+ "name": "get_youtube_transcript",
39
+ "arguments": {
40
+ "videoId": "dQw4w9WgXcQ",
41
+ "lang": "en"
42
+ }
43
+ }
44
+ ```
45
+
46
+ ## Supported URL Formats
47
+
48
+ - Video ID: `dQw4w9WgXcQ`
49
+ - YouTube URLs:
50
+ - `https://www.youtube.com/watch?v=dQw4w9WgXcQ`
51
+ - `https://youtu.be/dQw4w9WgXcQ`
52
+ - `https://www.youtube.com/embed/dQw4w9WgXcQ`
53
+ - `https://www.youtube.com/v/dQw4w9WgXcQ`
54
+
55
+ ## Development
56
+
57
+ ```bash
58
+ # Install dependencies
59
+ npm install
60
+
61
+ # Run in development mode
62
+ npm run dev
63
+
64
+ # Build for production
65
+ npm run build
66
+
67
+ # Start the server
68
+ npm start
69
+
70
+ # Run tests
71
+ npm test
72
+
73
+ # Run tests once (CI mode)
74
+ npm run test:run
75
+ ```
76
+
77
+ ## Testing
78
+
79
+ The project includes comprehensive tests:
80
+
81
+ - **Unit tests**: Test helper functions like URL parsing and time formatting
82
+ - **Integration tests**: Test the core transcript extraction logic with mocked APIs
83
+ - **Manual tests**: Optional tests that call real YouTube APIs (skipped by default)
84
+
85
+ All tests use Vitest and include mocking of the headless-youtube-captions library to ensure reliable testing without external API dependencies.
86
+
87
+ ## Dependencies
88
+
89
+ - `@modelcontextprotocol/sdk`: MCP SDK for building servers
90
+ - `headless-youtube-captions`: Library for extracting YouTube captions
91
+
92
+ ## License
93
+
94
+ MIT
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
package/build/index.js ADDED
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env node
2
+ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
+ import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
5
+ import { getSubtitles } from 'headless-youtube-captions';
6
+ import { extractVideoId, formatTime } from './utils.js';
7
+ const server = new Server({
8
+ name: 'mcp-headless-youtube-transcript',
9
+ version: '1.0.0',
10
+ }, {
11
+ capabilities: {
12
+ tools: {},
13
+ },
14
+ });
15
+ // Tool definitions
16
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
17
+ return {
18
+ tools: [
19
+ {
20
+ name: 'get_youtube_transcript',
21
+ description: 'Extract transcript/captions from a YouTube video',
22
+ inputSchema: {
23
+ type: 'object',
24
+ properties: {
25
+ videoId: {
26
+ type: 'string',
27
+ description: 'YouTube video ID or full URL',
28
+ },
29
+ lang: {
30
+ type: 'string',
31
+ description: 'Language code for captions (e.g., "en", "es", "ko"). Defaults to "en"',
32
+ default: 'en',
33
+ },
34
+ },
35
+ required: ['videoId'],
36
+ },
37
+ },
38
+ ],
39
+ };
40
+ });
41
+ // Tool execution
42
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
43
+ const { name, arguments: args } = request.params;
44
+ if (name === 'get_youtube_transcript') {
45
+ try {
46
+ const { videoId, lang = 'en' } = args;
47
+ // Extract video ID from URL if a full URL is provided
48
+ const extractedVideoId = extractVideoId(videoId);
49
+ if (!extractedVideoId) {
50
+ throw new Error('Invalid YouTube video ID or URL');
51
+ }
52
+ // Get subtitles using headless-youtube-captions
53
+ const subtitles = await getSubtitles({
54
+ videoID: extractedVideoId,
55
+ lang: lang,
56
+ });
57
+ // Format the transcript
58
+ const transcript = subtitles
59
+ .map((subtitle) => `[${formatTime(subtitle.start)}] ${subtitle.text}`)
60
+ .join('\n');
61
+ return {
62
+ content: [
63
+ {
64
+ type: 'text',
65
+ text: `YouTube Transcript for video ID: ${extractedVideoId}\nLanguage: ${lang}\n\n${transcript}`,
66
+ },
67
+ ],
68
+ };
69
+ }
70
+ catch (error) {
71
+ const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
72
+ return {
73
+ content: [
74
+ {
75
+ type: 'text',
76
+ text: `Error getting YouTube transcript: ${errorMessage}`,
77
+ },
78
+ ],
79
+ isError: true,
80
+ };
81
+ }
82
+ }
83
+ throw new Error(`Unknown tool: ${name}`);
84
+ });
85
+ async function main() {
86
+ const transport = new StdioServerTransport();
87
+ await server.connect(transport);
88
+ console.error('MCP Headless YouTube Transcript server running on stdio');
89
+ }
90
+ main().catch((error) => {
91
+ console.error('Server error:', error);
92
+ process.exit(1);
93
+ });
94
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,3 @@
1
+ export declare function extractVideoId(input: string): string | null;
2
+ export declare function formatTime(seconds: number): string;
3
+ //# sourceMappingURL=utils.d.ts.map
package/build/utils.js ADDED
@@ -0,0 +1,26 @@
1
+ // Helper function to extract video ID from YouTube URL
2
+ export function extractVideoId(input) {
3
+ // If it's already just a video ID (11 characters), return it
4
+ if (/^[a-zA-Z0-9_-]{11}$/.test(input)) {
5
+ return input;
6
+ }
7
+ // Extract from various YouTube URL formats
8
+ const patterns = [
9
+ /(?:youtube\.com\/watch\?.*v=|youtu\.be\/|youtube\.com\/embed\/)([a-zA-Z0-9_-]{11})/,
10
+ /youtube\.com\/v\/([a-zA-Z0-9_-]{11})/,
11
+ ];
12
+ for (const pattern of patterns) {
13
+ const match = input.match(pattern);
14
+ if (match) {
15
+ return match[1];
16
+ }
17
+ }
18
+ return null;
19
+ }
20
+ // Helper function to format time from seconds to MM:SS format
21
+ export function formatTime(seconds) {
22
+ const minutes = Math.floor(seconds / 60);
23
+ const remainingSeconds = Math.floor(seconds % 60);
24
+ return `${minutes.toString().padStart(2, '0')}:${remainingSeconds.toString().padStart(2, '0')}`;
25
+ }
26
+ //# sourceMappingURL=utils.js.map
package/package.json ADDED
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "mcp-headless-youtube-transcript",
3
+ "version": "0.1.0",
4
+ "description": "MCP server for extracting YouTube video transcripts using headless-youtube-captions",
5
+ "main": "build/index.js",
6
+ "bin": {
7
+ "mcp-headless-youtube-transcript": "build/index.js"
8
+ },
9
+ "type": "module",
10
+ "scripts": {
11
+ "build": "tsc",
12
+ "dev": "tsx src/index.ts",
13
+ "start": "node build/index.js",
14
+ "test": "vitest",
15
+ "test:run": "vitest run",
16
+ "prepublishOnly": "npm run build && npm run test:run"
17
+ },
18
+ "keywords": [
19
+ "mcp",
20
+ "server",
21
+ "youtube",
22
+ "transcript",
23
+ "captions"
24
+ ],
25
+ "author": "Andrew Lewin",
26
+ "repository": {
27
+ "type": "git",
28
+ "url": "git+https://github.com/andrewlwn77/mcp-headless-youtube-transcript.git"
29
+ },
30
+ "bugs": {
31
+ "url": "https://github.com/andrewlwn77/mcp-headless-youtube-transcript/issues"
32
+ },
33
+ "homepage": "https://github.com/andrewlwn77/mcp-headless-youtube-transcript#readme",
34
+ "license": "MIT",
35
+ "dependencies": {
36
+ "@modelcontextprotocol/sdk": "^1.0.0",
37
+ "headless-youtube-captions": "^1.0.0"
38
+ },
39
+ "devDependencies": {
40
+ "@types/node": "^22.0.0",
41
+ "tsx": "^4.0.0",
42
+ "typescript": "^5.0.0",
43
+ "vitest": "^2.0.0",
44
+ "@vitest/ui": "^2.0.0"
45
+ },
46
+ "engines": {
47
+ "node": ">=18"
48
+ }
49
+ }