mcp-headless-youtube-transcript 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +164 -3
- package/build/index.js +223 -2
- package/build/utils.d.ts +3 -0
- package/build/utils.js +42 -0
- package/package.json +2 -2
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Andrew Lewin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
# MCP Headless YouTube Transcript
|
|
2
2
|
|
|
3
|
-
An MCP (Model Context Protocol) server that extracts YouTube video transcripts using the `headless-youtube-captions` library.
|
|
3
|
+
An MCP (Model Context Protocol) server that extracts YouTube video transcripts, channel videos, and comments using the `headless-youtube-captions` library.
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
7
|
- Extract transcripts from YouTube videos using video ID or full URL
|
|
8
|
+
- Get videos from YouTube channels with pagination support
|
|
9
|
+
- Search for videos within a specific channel
|
|
10
|
+
- Retrieve comments from YouTube videos
|
|
8
11
|
- Support for multiple languages
|
|
9
12
|
- Automatic pagination for large transcripts (98k character chunks)
|
|
10
13
|
- Clean text output optimized for LLM consumption
|
|
@@ -84,9 +87,93 @@ With pagination:
|
|
|
84
87
|
}
|
|
85
88
|
```
|
|
86
89
|
|
|
87
|
-
|
|
90
|
+
### `get_channel_videos`
|
|
88
91
|
|
|
89
|
-
|
|
92
|
+
Extract videos from a YouTube channel with pagination support.
|
|
93
|
+
|
|
94
|
+
**Parameters:**
|
|
95
|
+
- `channelUrl` (required): YouTube channel URL, @handle, or channel ID
|
|
96
|
+
- `maxVideos` (optional): Maximum number of videos to retrieve. Defaults to 50
|
|
97
|
+
|
|
98
|
+
**Examples:**
|
|
99
|
+
|
|
100
|
+
Using handle:
|
|
101
|
+
```json
|
|
102
|
+
{
|
|
103
|
+
"name": "get_channel_videos",
|
|
104
|
+
"arguments": {
|
|
105
|
+
"channelUrl": "@mkbhd"
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Using channel URL:
|
|
111
|
+
```json
|
|
112
|
+
{
|
|
113
|
+
"name": "get_channel_videos",
|
|
114
|
+
"arguments": {
|
|
115
|
+
"channelUrl": "https://www.youtube.com/channel/UCBJycsmduvYEL83R_U4JriQ",
|
|
116
|
+
"maxVideos": 100
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### `search_channel_videos`
|
|
122
|
+
|
|
123
|
+
Search for specific videos within a YouTube channel.
|
|
124
|
+
|
|
125
|
+
**Parameters:**
|
|
126
|
+
- `channelUrl` (required): YouTube channel URL, @handle, or channel ID
|
|
127
|
+
- `query` (required): Search query to find videos in the channel
|
|
128
|
+
|
|
129
|
+
**Example:**
|
|
130
|
+
```json
|
|
131
|
+
{
|
|
132
|
+
"name": "search_channel_videos",
|
|
133
|
+
"arguments": {
|
|
134
|
+
"channelUrl": "@mkbhd",
|
|
135
|
+
"query": "iPhone review"
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### `get_video_comments`
|
|
141
|
+
|
|
142
|
+
Retrieve comments from a YouTube video.
|
|
143
|
+
|
|
144
|
+
**Parameters:**
|
|
145
|
+
- `videoId` (required): YouTube video ID or full URL
|
|
146
|
+
- `sortBy` (optional): Sort comments by "top" or "newest". Defaults to "top"
|
|
147
|
+
- `maxComments` (optional): Maximum number of comments to retrieve. Defaults to 100
|
|
148
|
+
|
|
149
|
+
**Examples:**
|
|
150
|
+
|
|
151
|
+
Basic usage:
|
|
152
|
+
```json
|
|
153
|
+
{
|
|
154
|
+
"name": "get_video_comments",
|
|
155
|
+
"arguments": {
|
|
156
|
+
"videoId": "dQw4w9WgXcQ"
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
With sorting and limit:
|
|
162
|
+
```json
|
|
163
|
+
{
|
|
164
|
+
"name": "get_video_comments",
|
|
165
|
+
"arguments": {
|
|
166
|
+
"videoId": "dQw4w9WgXcQ",
|
|
167
|
+
"sortBy": "newest",
|
|
168
|
+
"maxComments": 50
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Response Formats
|
|
174
|
+
|
|
175
|
+
### Transcript Response
|
|
176
|
+
For `get_youtube_transcript`, the tool returns the raw transcript text. For large transcripts, the response includes pagination information:
|
|
90
177
|
|
|
91
178
|
```
|
|
92
179
|
[Segment 1 of 3]
|
|
@@ -96,6 +183,50 @@ this is the actual transcript text content...
|
|
|
96
183
|
|
|
97
184
|
When multiple segments are available, you can retrieve subsequent segments by incrementing the `segment` parameter.
|
|
98
185
|
|
|
186
|
+
### Channel Videos Response
|
|
187
|
+
For `get_channel_videos` and `search_channel_videos`, the response is a JSON object containing channel information and video details:
|
|
188
|
+
|
|
189
|
+
```json
|
|
190
|
+
{
|
|
191
|
+
"channel": {
|
|
192
|
+
"name": "Channel Name",
|
|
193
|
+
"subscribers": "1.23M subscribers",
|
|
194
|
+
"videoCount": "500 videos"
|
|
195
|
+
},
|
|
196
|
+
"videos": [
|
|
197
|
+
{
|
|
198
|
+
"id": "videoId123",
|
|
199
|
+
"title": "Video Title",
|
|
200
|
+
"url": "https://www.youtube.com/watch?v=videoId123",
|
|
201
|
+
"views": "1.2M views",
|
|
202
|
+
"uploadTime": "2 weeks ago",
|
|
203
|
+
"duration": "10:34"
|
|
204
|
+
}
|
|
205
|
+
],
|
|
206
|
+
"totalVideosRetrieved": 50
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Comments Response
|
|
211
|
+
For `get_video_comments`, the response includes comment details:
|
|
212
|
+
|
|
213
|
+
```json
|
|
214
|
+
{
|
|
215
|
+
"videoId": "dQw4w9WgXcQ",
|
|
216
|
+
"sortBy": "top",
|
|
217
|
+
"comments": [
|
|
218
|
+
{
|
|
219
|
+
"author": "Username",
|
|
220
|
+
"text": "This is a comment",
|
|
221
|
+
"likes": "1.2K",
|
|
222
|
+
"replyCount": 23,
|
|
223
|
+
"timeAgo": "2 weeks ago"
|
|
224
|
+
}
|
|
225
|
+
],
|
|
226
|
+
"totalComments": 100
|
|
227
|
+
}
|
|
228
|
+
```
|
|
229
|
+
|
|
99
230
|
## Caching
|
|
100
231
|
|
|
101
232
|
The server includes built-in caching to improve performance for paginated requests. The cache behavior can be configured with an environment variable:
|
|
@@ -115,8 +246,29 @@ The server includes built-in caching to improve performance for paginated reques
|
|
|
115
246
|
TRANSCRIPT_CACHE_TTL=600 npx mcp-headless-youtube-transcript
|
|
116
247
|
```
|
|
117
248
|
|
|
249
|
+
## Environment Variables
|
|
250
|
+
|
|
251
|
+
### PUPPETEER_EXECUTABLE_PATH
|
|
252
|
+
|
|
253
|
+
If you need to specify a custom path for the Chromium/Chrome executable used by Puppeteer, you can set the `PUPPETEER_EXECUTABLE_PATH` environment variable:
|
|
254
|
+
|
|
255
|
+
```bash
|
|
256
|
+
# Example: Using system Chrome
|
|
257
|
+
PUPPETEER_EXECUTABLE_PATH="/usr/bin/google-chrome" npx mcp-headless-youtube-transcript
|
|
258
|
+
|
|
259
|
+
# Example: Using a specific Chromium installation
|
|
260
|
+
PUPPETEER_EXECUTABLE_PATH="/path/to/chromium" npx mcp-headless-youtube-transcript
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
This is useful when:
|
|
264
|
+
- Running in containerized environments
|
|
265
|
+
- Using a system-installed Chrome/Chromium instead of the bundled one
|
|
266
|
+
- Working in environments with specific security requirements
|
|
267
|
+
- Troubleshooting Puppeteer launch issues
|
|
268
|
+
|
|
118
269
|
## Supported URL Formats
|
|
119
270
|
|
|
271
|
+
### Video URLs
|
|
120
272
|
- Video ID: `dQw4w9WgXcQ`
|
|
121
273
|
- YouTube URLs:
|
|
122
274
|
- `https://www.youtube.com/watch?v=dQw4w9WgXcQ`
|
|
@@ -124,6 +276,15 @@ TRANSCRIPT_CACHE_TTL=600 npx mcp-headless-youtube-transcript
|
|
|
124
276
|
- `https://www.youtube.com/embed/dQw4w9WgXcQ`
|
|
125
277
|
- `https://www.youtube.com/v/dQw4w9WgXcQ`
|
|
126
278
|
|
|
279
|
+
### Channel URLs
|
|
280
|
+
- Handle: `@mkbhd`
|
|
281
|
+
- Channel ID: `UCBJycsmduvYEL83R_U4JriQ`
|
|
282
|
+
- Channel URLs:
|
|
283
|
+
- `https://www.youtube.com/channel/UCBJycsmduvYEL83R_U4JriQ`
|
|
284
|
+
- `https://www.youtube.com/c/mkbhd`
|
|
285
|
+
- `https://www.youtube.com/user/marquesbrownlee`
|
|
286
|
+
- `https://www.youtube.com/@mkbhd`
|
|
287
|
+
|
|
127
288
|
## Development
|
|
128
289
|
|
|
129
290
|
```bash
|
package/build/index.js
CHANGED
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
3
3
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
4
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
5
|
-
|
|
6
|
-
import {
|
|
5
|
+
// @ts-ignore - Types are defined in global.d.ts
|
|
6
|
+
import { getSubtitles, getChannelVideos, searchChannelVideos, getVideoComments } from 'headless-youtube-captions';
|
|
7
|
+
import { extractVideoId, extractChannelIdentifier, formatChannelUrl, truncateText } from './utils.js';
|
|
7
8
|
// In-memory cache
|
|
8
9
|
const transcriptCache = new Map();
|
|
9
10
|
// Get cache TTL from environment variable (default 5 minutes)
|
|
@@ -75,6 +76,68 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
75
76
|
required: ['videoId'],
|
|
76
77
|
},
|
|
77
78
|
},
|
|
79
|
+
{
|
|
80
|
+
name: 'get_channel_videos',
|
|
81
|
+
description: 'Extract videos from a YouTube channel with pagination support',
|
|
82
|
+
inputSchema: {
|
|
83
|
+
type: 'object',
|
|
84
|
+
properties: {
|
|
85
|
+
channelUrl: {
|
|
86
|
+
type: 'string',
|
|
87
|
+
description: 'YouTube channel URL, @handle, or channel ID',
|
|
88
|
+
},
|
|
89
|
+
maxVideos: {
|
|
90
|
+
type: 'number',
|
|
91
|
+
description: 'Maximum number of videos to retrieve. Defaults to 50',
|
|
92
|
+
default: 50,
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
required: ['channelUrl'],
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
name: 'search_channel_videos',
|
|
100
|
+
description: 'Search for specific videos within a YouTube channel',
|
|
101
|
+
inputSchema: {
|
|
102
|
+
type: 'object',
|
|
103
|
+
properties: {
|
|
104
|
+
channelUrl: {
|
|
105
|
+
type: 'string',
|
|
106
|
+
description: 'YouTube channel URL, @handle, or channel ID',
|
|
107
|
+
},
|
|
108
|
+
query: {
|
|
109
|
+
type: 'string',
|
|
110
|
+
description: 'Search query to find videos in the channel',
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
required: ['channelUrl', 'query'],
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
name: 'get_video_comments',
|
|
118
|
+
description: 'Retrieve comments from a YouTube video',
|
|
119
|
+
inputSchema: {
|
|
120
|
+
type: 'object',
|
|
121
|
+
properties: {
|
|
122
|
+
videoId: {
|
|
123
|
+
type: 'string',
|
|
124
|
+
description: 'YouTube video ID or full URL',
|
|
125
|
+
},
|
|
126
|
+
sortBy: {
|
|
127
|
+
type: 'string',
|
|
128
|
+
description: 'Sort comments by "top" or "newest". Defaults to "top"',
|
|
129
|
+
enum: ['top', 'newest'],
|
|
130
|
+
default: 'top',
|
|
131
|
+
},
|
|
132
|
+
maxComments: {
|
|
133
|
+
type: 'number',
|
|
134
|
+
description: 'Maximum number of comments to retrieve. Defaults to 100',
|
|
135
|
+
default: 100,
|
|
136
|
+
},
|
|
137
|
+
},
|
|
138
|
+
required: ['videoId'],
|
|
139
|
+
},
|
|
140
|
+
},
|
|
78
141
|
],
|
|
79
142
|
};
|
|
80
143
|
});
|
|
@@ -152,6 +215,164 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
152
215
|
cleanupExpiredCache();
|
|
153
216
|
}
|
|
154
217
|
}
|
|
218
|
+
if (name === 'get_channel_videos') {
|
|
219
|
+
try {
|
|
220
|
+
const { channelUrl, maxVideos = 50 } = args;
|
|
221
|
+
// Extract and format channel URL
|
|
222
|
+
const channelIdentifier = extractChannelIdentifier(channelUrl);
|
|
223
|
+
const formattedUrl = formatChannelUrl(channelIdentifier);
|
|
224
|
+
// Get channel videos
|
|
225
|
+
const result = await getChannelVideos({
|
|
226
|
+
channelURL: formattedUrl,
|
|
227
|
+
limit: maxVideos
|
|
228
|
+
});
|
|
229
|
+
// Format the response
|
|
230
|
+
const response = {
|
|
231
|
+
channel: {
|
|
232
|
+
name: result.channel.name,
|
|
233
|
+
subscribers: result.channel.subscribers,
|
|
234
|
+
videoCount: result.channel.videoCount,
|
|
235
|
+
},
|
|
236
|
+
videos: result.videos.map((video) => ({
|
|
237
|
+
id: video.id,
|
|
238
|
+
title: video.title,
|
|
239
|
+
url: video.url,
|
|
240
|
+
views: video.views,
|
|
241
|
+
uploadTime: video.uploadTime,
|
|
242
|
+
duration: video.duration,
|
|
243
|
+
thumbnail: video.thumbnail,
|
|
244
|
+
})),
|
|
245
|
+
totalVideosRetrieved: result.totalLoaded,
|
|
246
|
+
hasMore: result.hasMore,
|
|
247
|
+
};
|
|
248
|
+
return {
|
|
249
|
+
content: [
|
|
250
|
+
{
|
|
251
|
+
type: 'text',
|
|
252
|
+
text: JSON.stringify(response, null, 2),
|
|
253
|
+
},
|
|
254
|
+
],
|
|
255
|
+
};
|
|
256
|
+
}
|
|
257
|
+
catch (error) {
|
|
258
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
259
|
+
return {
|
|
260
|
+
content: [
|
|
261
|
+
{
|
|
262
|
+
type: 'text',
|
|
263
|
+
text: `Error getting channel videos: ${errorMessage}`,
|
|
264
|
+
},
|
|
265
|
+
],
|
|
266
|
+
isError: true,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
if (name === 'search_channel_videos') {
|
|
271
|
+
try {
|
|
272
|
+
const { channelUrl, query } = args;
|
|
273
|
+
// Extract and format channel URL
|
|
274
|
+
const channelIdentifier = extractChannelIdentifier(channelUrl);
|
|
275
|
+
const formattedUrl = formatChannelUrl(channelIdentifier);
|
|
276
|
+
// Search channel videos
|
|
277
|
+
const result = await searchChannelVideos({
|
|
278
|
+
channelURL: formattedUrl,
|
|
279
|
+
query: query
|
|
280
|
+
});
|
|
281
|
+
// Format the response
|
|
282
|
+
const response = {
|
|
283
|
+
query: result.query,
|
|
284
|
+
channelUrl: formattedUrl,
|
|
285
|
+
results: result.results.map((video) => ({
|
|
286
|
+
id: video.id,
|
|
287
|
+
title: video.title,
|
|
288
|
+
url: video.url,
|
|
289
|
+
views: video.views,
|
|
290
|
+
uploadTime: video.uploadTime,
|
|
291
|
+
duration: video.duration,
|
|
292
|
+
thumbnail: video.thumbnail,
|
|
293
|
+
})),
|
|
294
|
+
totalResults: result.totalFound,
|
|
295
|
+
};
|
|
296
|
+
return {
|
|
297
|
+
content: [
|
|
298
|
+
{
|
|
299
|
+
type: 'text',
|
|
300
|
+
text: JSON.stringify(response, null, 2),
|
|
301
|
+
},
|
|
302
|
+
],
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
catch (error) {
|
|
306
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
307
|
+
return {
|
|
308
|
+
content: [
|
|
309
|
+
{
|
|
310
|
+
type: 'text',
|
|
311
|
+
text: `Error searching channel videos: ${errorMessage}`,
|
|
312
|
+
},
|
|
313
|
+
],
|
|
314
|
+
isError: true,
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
if (name === 'get_video_comments') {
|
|
319
|
+
try {
|
|
320
|
+
const { videoId, sortBy = 'top', maxComments = 100 } = args;
|
|
321
|
+
// Extract video ID from URL if needed
|
|
322
|
+
const extractedVideoId = extractVideoId(videoId);
|
|
323
|
+
if (!extractedVideoId) {
|
|
324
|
+
throw new Error('Invalid YouTube video ID or URL');
|
|
325
|
+
}
|
|
326
|
+
// Get video comments
|
|
327
|
+
const result = await getVideoComments({
|
|
328
|
+
videoID: extractedVideoId,
|
|
329
|
+
sortBy: sortBy,
|
|
330
|
+
limit: maxComments
|
|
331
|
+
});
|
|
332
|
+
// Format the response (truncate if needed)
|
|
333
|
+
const response = {
|
|
334
|
+
video: {
|
|
335
|
+
id: result.video.id,
|
|
336
|
+
title: result.video.title,
|
|
337
|
+
channel: result.video.channel,
|
|
338
|
+
views: result.video.views,
|
|
339
|
+
},
|
|
340
|
+
sortBy: result.sortBy,
|
|
341
|
+
comments: result.comments.map((comment) => ({
|
|
342
|
+
author: comment.author,
|
|
343
|
+
text: comment.text,
|
|
344
|
+
likes: comment.likes,
|
|
345
|
+
replyCount: comment.replyCount,
|
|
346
|
+
time: comment.time,
|
|
347
|
+
})),
|
|
348
|
+
totalComments: result.totalComments,
|
|
349
|
+
totalLoaded: result.totalLoaded,
|
|
350
|
+
hasMore: result.hasMore,
|
|
351
|
+
};
|
|
352
|
+
const responseText = JSON.stringify(response, null, 2);
|
|
353
|
+
const truncatedResponse = truncateText(responseText);
|
|
354
|
+
return {
|
|
355
|
+
content: [
|
|
356
|
+
{
|
|
357
|
+
type: 'text',
|
|
358
|
+
text: truncatedResponse,
|
|
359
|
+
},
|
|
360
|
+
],
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
catch (error) {
|
|
364
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
365
|
+
return {
|
|
366
|
+
content: [
|
|
367
|
+
{
|
|
368
|
+
type: 'text',
|
|
369
|
+
text: `Error getting video comments: ${errorMessage}`,
|
|
370
|
+
},
|
|
371
|
+
],
|
|
372
|
+
isError: true,
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
}
|
|
155
376
|
throw new Error(`Unknown tool: ${name}`);
|
|
156
377
|
});
|
|
157
378
|
async function main() {
|
package/build/utils.d.ts
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
1
|
export declare function extractVideoId(input: string): string | null;
|
|
2
2
|
export declare function formatTime(seconds: number): string;
|
|
3
|
+
export declare function extractChannelIdentifier(input: string): string;
|
|
4
|
+
export declare function formatChannelUrl(identifier: string): string;
|
|
5
|
+
export declare function truncateText(text: string, maxLength?: number): string;
|
|
3
6
|
//# sourceMappingURL=utils.d.ts.map
|
package/build/utils.js
CHANGED
|
@@ -23,4 +23,46 @@ export function formatTime(seconds) {
|
|
|
23
23
|
const remainingSeconds = Math.floor(seconds % 60);
|
|
24
24
|
return `${minutes.toString().padStart(2, '0')}:${remainingSeconds.toString().padStart(2, '0')}`;
|
|
25
25
|
}
|
|
26
|
+
// Helper function to extract channel identifier from various YouTube channel URL formats
|
|
27
|
+
export function extractChannelIdentifier(input) {
|
|
28
|
+
// If it's already a channel ID (starts with UC) or a handle (starts with @)
|
|
29
|
+
if (/^UC[a-zA-Z0-9_-]{22}$/.test(input) || /^@[\w.-]+$/.test(input)) {
|
|
30
|
+
return input;
|
|
31
|
+
}
|
|
32
|
+
// Extract from various YouTube channel URL formats
|
|
33
|
+
const patterns = [
|
|
34
|
+
/youtube\.com\/channel\/(UC[a-zA-Z0-9_-]{22})/,
|
|
35
|
+
/youtube\.com\/c\/([^\/]+)/,
|
|
36
|
+
/youtube\.com\/user\/([^\/]+)/,
|
|
37
|
+
/youtube\.com\/(@[\w.-]+)/,
|
|
38
|
+
];
|
|
39
|
+
for (const pattern of patterns) {
|
|
40
|
+
const match = input.match(pattern);
|
|
41
|
+
if (match) {
|
|
42
|
+
return match[1];
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// If no pattern matches, return the input as-is (might be a channel name)
|
|
46
|
+
return input;
|
|
47
|
+
}
|
|
48
|
+
// Helper function to format/normalize channel URLs
|
|
49
|
+
export function formatChannelUrl(identifier) {
|
|
50
|
+
// If it's a handle, construct the URL with the handle
|
|
51
|
+
if (identifier.startsWith('@')) {
|
|
52
|
+
return `https://www.youtube.com/${identifier}`;
|
|
53
|
+
}
|
|
54
|
+
// If it's a channel ID, use the channel URL format
|
|
55
|
+
if (/^UC[a-zA-Z0-9_-]{22}$/.test(identifier)) {
|
|
56
|
+
return `https://www.youtube.com/channel/${identifier}`;
|
|
57
|
+
}
|
|
58
|
+
// Otherwise, assume it's a custom URL/username
|
|
59
|
+
return `https://www.youtube.com/c/${identifier}`;
|
|
60
|
+
}
|
|
61
|
+
// Helper function to truncate text for large responses
|
|
62
|
+
export function truncateText(text, maxLength = 50000) {
|
|
63
|
+
if (text.length <= maxLength) {
|
|
64
|
+
return text;
|
|
65
|
+
}
|
|
66
|
+
return text.substring(0, maxLength) + '\n\n[Content truncated due to length...]';
|
|
67
|
+
}
|
|
26
68
|
//# sourceMappingURL=utils.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-headless-youtube-transcript",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "MCP server for extracting YouTube video transcripts using headless-youtube-captions",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"license": "MIT",
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
37
|
-
"headless-youtube-captions": "^1.0
|
|
37
|
+
"headless-youtube-captions": "^1.2.0"
|
|
38
38
|
},
|
|
39
39
|
"devDependencies": {
|
|
40
40
|
"@types/node": "^22.0.0",
|