mcp-headless-youtube-transcript 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +88 -340
- package/build/utils.d.ts +0 -25
- package/build/utils.js +0 -34
- package/package.json +5 -5
package/build/index.js
CHANGED
|
@@ -4,14 +4,11 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
|
|
|
4
4
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
5
5
|
// @ts-ignore - Types are defined in global.d.ts
|
|
6
6
|
import { getSubtitles, getChannelVideos, searchChannelVideos, getVideoComments, searchYouTubeGlobal, getVideoMetadata } from 'headless-youtube-captions';
|
|
7
|
-
import { extractVideoId, extractChannelIdentifier, formatChannelUrl, truncateText
|
|
7
|
+
import { extractVideoId, extractChannelIdentifier, formatChannelUrl, truncateText } from './utils.js';
|
|
8
8
|
// In-memory caches
|
|
9
9
|
const transcriptCache = new Map();
|
|
10
|
-
const searchCache = new Map();
|
|
11
10
|
// Get cache TTL from environment variables
|
|
12
11
|
const CACHE_TTL_SECONDS = parseInt(process.env.TRANSCRIPT_CACHE_TTL || '300'); // 5 minutes default
|
|
13
|
-
const SEARCH_CACHE_TTL_SECONDS = parseInt(process.env.SEARCH_CACHE_TTL || '3600'); // 1 hour default
|
|
14
|
-
const MAX_SEARCH_CACHE_SIZE = parseInt(process.env.MAX_SEARCH_CACHE_SIZE || '100');
|
|
15
12
|
// Cache helper functions
|
|
16
13
|
function getCacheKey(videoId, lang) {
|
|
17
14
|
return `${videoId}:${lang}`;
|
|
@@ -35,47 +32,13 @@ function setCachedTranscript(videoId, lang, transcript) {
|
|
|
35
32
|
const expiresAt = Date.now() + (CACHE_TTL_SECONDS * 1000);
|
|
36
33
|
transcriptCache.set(key, { transcript, expiresAt });
|
|
37
34
|
}
|
|
38
|
-
// Search cache helper functions
|
|
39
|
-
function getCachedSearchResults(query, resultTypes, maxResults) {
|
|
40
|
-
const key = getSearchCacheKey(query, resultTypes, maxResults);
|
|
41
|
-
const entry = searchCache.get(key);
|
|
42
|
-
if (!entry)
|
|
43
|
-
return null;
|
|
44
|
-
const now = Date.now();
|
|
45
|
-
if (now > entry.expiresAt) {
|
|
46
|
-
searchCache.delete(key);
|
|
47
|
-
return null;
|
|
48
|
-
}
|
|
49
|
-
// Update expiration time on read
|
|
50
|
-
entry.expiresAt = now + (SEARCH_CACHE_TTL_SECONDS * 1000);
|
|
51
|
-
return entry.results;
|
|
52
|
-
}
|
|
53
|
-
function setCachedSearchResults(query, resultTypes, maxResults, results) {
|
|
54
|
-
const key = getSearchCacheKey(query, resultTypes, maxResults);
|
|
55
|
-
const expiresAt = Date.now() + (SEARCH_CACHE_TTL_SECONDS * 1000);
|
|
56
|
-
// LRU eviction if cache is full
|
|
57
|
-
if (searchCache.size >= MAX_SEARCH_CACHE_SIZE) {
|
|
58
|
-
const firstKey = searchCache.keys().next().value;
|
|
59
|
-
if (firstKey) {
|
|
60
|
-
searchCache.delete(firstKey);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
searchCache.set(key, { results, expiresAt });
|
|
64
|
-
}
|
|
65
35
|
function cleanupExpiredCache() {
|
|
66
36
|
const now = Date.now();
|
|
67
|
-
// Cleanup transcript cache
|
|
68
37
|
for (const [key, entry] of transcriptCache.entries()) {
|
|
69
38
|
if (now > entry.expiresAt) {
|
|
70
39
|
transcriptCache.delete(key);
|
|
71
40
|
}
|
|
72
41
|
}
|
|
73
|
-
// Cleanup search cache
|
|
74
|
-
for (const [key, entry] of searchCache.entries()) {
|
|
75
|
-
if (now > entry.expiresAt) {
|
|
76
|
-
searchCache.delete(key);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
42
|
}
|
|
80
43
|
const server = new Server({
|
|
81
44
|
name: 'mcp-headless-youtube-transcript',
|
|
@@ -115,7 +78,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
115
78
|
},
|
|
116
79
|
{
|
|
117
80
|
name: 'get_channel_videos',
|
|
118
|
-
description: '
|
|
81
|
+
description: 'Get videos from a YouTube channel with pagination',
|
|
119
82
|
inputSchema: {
|
|
120
83
|
type: 'object',
|
|
121
84
|
properties: {
|
|
@@ -123,10 +86,15 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
123
86
|
type: 'string',
|
|
124
87
|
description: 'YouTube channel URL, @handle, or channel ID',
|
|
125
88
|
},
|
|
126
|
-
|
|
89
|
+
page: {
|
|
90
|
+
type: 'number',
|
|
91
|
+
description: 'Page number (1-based). Defaults to 1',
|
|
92
|
+
default: 1,
|
|
93
|
+
},
|
|
94
|
+
pageSize: {
|
|
127
95
|
type: 'number',
|
|
128
|
-
description: '
|
|
129
|
-
default:
|
|
96
|
+
description: 'Results per page (1-50). Defaults to 20',
|
|
97
|
+
default: 20,
|
|
130
98
|
},
|
|
131
99
|
},
|
|
132
100
|
required: ['channelUrl'],
|
|
@@ -146,6 +114,16 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
146
114
|
type: 'string',
|
|
147
115
|
description: 'Search query to find videos in the channel',
|
|
148
116
|
},
|
|
117
|
+
page: {
|
|
118
|
+
type: 'number',
|
|
119
|
+
description: 'Page number (1-based). Defaults to 1',
|
|
120
|
+
default: 1,
|
|
121
|
+
},
|
|
122
|
+
pageSize: {
|
|
123
|
+
type: 'number',
|
|
124
|
+
description: 'Results per page (1-50). Defaults to 20',
|
|
125
|
+
default: 20,
|
|
126
|
+
},
|
|
149
127
|
},
|
|
150
128
|
required: ['channelUrl', 'query'],
|
|
151
129
|
},
|
|
@@ -166,10 +144,15 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
166
144
|
enum: ['top', 'newest'],
|
|
167
145
|
default: 'top',
|
|
168
146
|
},
|
|
169
|
-
|
|
147
|
+
page: {
|
|
148
|
+
type: 'number',
|
|
149
|
+
description: 'Page number (1-based). Defaults to 1',
|
|
150
|
+
default: 1,
|
|
151
|
+
},
|
|
152
|
+
pageSize: {
|
|
170
153
|
type: 'number',
|
|
171
|
-
description: '
|
|
172
|
-
default:
|
|
154
|
+
description: 'Comments per page (1-50). Defaults to 20',
|
|
155
|
+
default: 20,
|
|
173
156
|
},
|
|
174
157
|
},
|
|
175
158
|
required: ['videoId'],
|
|
@@ -177,29 +160,23 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
177
160
|
},
|
|
178
161
|
{
|
|
179
162
|
name: 'search_youtube_global',
|
|
180
|
-
description: 'Search across all of YouTube
|
|
163
|
+
description: 'Search across all of YouTube for videos',
|
|
181
164
|
inputSchema: {
|
|
182
165
|
type: 'object',
|
|
183
166
|
properties: {
|
|
184
167
|
query: {
|
|
185
168
|
type: 'string',
|
|
186
|
-
description: 'Search term to find videos
|
|
169
|
+
description: 'Search term to find videos',
|
|
187
170
|
},
|
|
188
|
-
|
|
171
|
+
page: {
|
|
189
172
|
type: 'number',
|
|
190
|
-
description: '
|
|
191
|
-
default:
|
|
192
|
-
minimum: 1,
|
|
193
|
-
maximum: 20,
|
|
173
|
+
description: 'Page number (1-based). Defaults to 1',
|
|
174
|
+
default: 1,
|
|
194
175
|
},
|
|
195
|
-
|
|
196
|
-
type: '
|
|
197
|
-
description: '
|
|
198
|
-
|
|
199
|
-
type: 'string',
|
|
200
|
-
enum: ['videos', 'channels', 'all'],
|
|
201
|
-
},
|
|
202
|
-
default: ['all'],
|
|
176
|
+
pageSize: {
|
|
177
|
+
type: 'number',
|
|
178
|
+
description: 'Results per page (1-20). Defaults to 10',
|
|
179
|
+
default: 10,
|
|
203
180
|
},
|
|
204
181
|
},
|
|
205
182
|
required: ['query'],
|
|
@@ -207,7 +184,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
207
184
|
},
|
|
208
185
|
{
|
|
209
186
|
name: 'get_video_metadata',
|
|
210
|
-
description: 'Extract comprehensive video metadata including description, upload date, like count',
|
|
187
|
+
description: 'Extract comprehensive video metadata including description, upload date, like count, tags, and channel info',
|
|
211
188
|
inputSchema: {
|
|
212
189
|
type: 'object',
|
|
213
190
|
properties: {
|
|
@@ -215,34 +192,10 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
215
192
|
type: 'string',
|
|
216
193
|
description: 'YouTube video ID or full URL',
|
|
217
194
|
},
|
|
218
|
-
expandDescription: {
|
|
219
|
-
type: 'boolean',
|
|
220
|
-
description: 'Whether to expand truncated descriptions. Defaults to true',
|
|
221
|
-
default: true,
|
|
222
|
-
},
|
|
223
195
|
},
|
|
224
196
|
required: ['videoId'],
|
|
225
197
|
},
|
|
226
198
|
},
|
|
227
|
-
{
|
|
228
|
-
name: 'navigate_search_result',
|
|
229
|
-
description: 'Navigate to a video or channel page from search results',
|
|
230
|
-
inputSchema: {
|
|
231
|
-
type: 'object',
|
|
232
|
-
properties: {
|
|
233
|
-
resultUrl: {
|
|
234
|
-
type: 'string',
|
|
235
|
-
description: 'YouTube URL from search results to navigate to',
|
|
236
|
-
},
|
|
237
|
-
resultType: {
|
|
238
|
-
type: 'string',
|
|
239
|
-
description: 'Type of the result being navigated to',
|
|
240
|
-
enum: ['video', 'channel'],
|
|
241
|
-
},
|
|
242
|
-
},
|
|
243
|
-
required: ['resultUrl', 'resultType'],
|
|
244
|
-
},
|
|
245
|
-
},
|
|
246
199
|
],
|
|
247
200
|
};
|
|
248
201
|
});
|
|
@@ -252,7 +205,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
252
205
|
if (name === 'get_youtube_transcript') {
|
|
253
206
|
try {
|
|
254
207
|
const { videoId, lang = 'en', segment = 1 } = args;
|
|
255
|
-
// Extract video ID from URL if a full URL is provided
|
|
256
208
|
const extractedVideoId = extractVideoId(videoId);
|
|
257
209
|
if (!extractedVideoId) {
|
|
258
210
|
throw new Error('Invalid YouTube video ID or URL');
|
|
@@ -260,14 +212,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
260
212
|
// Check cache first
|
|
261
213
|
let fullTranscript = getCachedTranscript(extractedVideoId, lang);
|
|
262
214
|
if (!fullTranscript) {
|
|
263
|
-
// Get subtitles using headless-youtube-captions
|
|
264
215
|
const subtitles = await getSubtitles({
|
|
265
216
|
videoID: extractedVideoId,
|
|
266
217
|
lang: lang,
|
|
267
218
|
});
|
|
268
|
-
|
|
269
|
-
fullTranscript = subtitles.map(s => s.text).join(' ');
|
|
270
|
-
// Cache the full transcript
|
|
219
|
+
fullTranscript = subtitles.map((s) => s.text).join(' ');
|
|
271
220
|
setCachedTranscript(extractedVideoId, lang, fullTranscript);
|
|
272
221
|
}
|
|
273
222
|
// Split into 98k character chunks
|
|
@@ -276,383 +225,182 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
276
225
|
for (let i = 0; i < fullTranscript.length; i += chunkSize) {
|
|
277
226
|
chunks.push(fullTranscript.substring(i, i + chunkSize));
|
|
278
227
|
}
|
|
279
|
-
// Validate segment number
|
|
280
228
|
if (segment < 1 || segment > chunks.length) {
|
|
281
229
|
return {
|
|
282
|
-
content: [
|
|
283
|
-
{
|
|
230
|
+
content: [{
|
|
284
231
|
type: 'text',
|
|
285
232
|
text: `Error: Invalid segment ${segment}. Available segments: 1-${chunks.length}`,
|
|
286
|
-
},
|
|
287
|
-
],
|
|
233
|
+
}],
|
|
288
234
|
isError: true,
|
|
289
235
|
};
|
|
290
236
|
}
|
|
291
|
-
// Get the requested segment (convert to 0-based index)
|
|
292
237
|
const requestedChunk = chunks[segment - 1];
|
|
293
|
-
// Add metadata about segmentation
|
|
294
238
|
const segmentInfo = chunks.length > 1
|
|
295
239
|
? `[Segment ${segment} of ${chunks.length}]\n\n`
|
|
296
240
|
: '';
|
|
297
241
|
return {
|
|
298
|
-
content: [
|
|
299
|
-
{
|
|
242
|
+
content: [{
|
|
300
243
|
type: 'text',
|
|
301
244
|
text: segmentInfo + requestedChunk,
|
|
302
|
-
},
|
|
303
|
-
],
|
|
245
|
+
}],
|
|
304
246
|
};
|
|
305
247
|
}
|
|
306
248
|
catch (error) {
|
|
307
249
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
308
250
|
return {
|
|
309
|
-
content: [
|
|
310
|
-
{
|
|
251
|
+
content: [{
|
|
311
252
|
type: 'text',
|
|
312
253
|
text: `Error getting YouTube transcript: ${errorMessage}`,
|
|
313
|
-
},
|
|
314
|
-
],
|
|
254
|
+
}],
|
|
315
255
|
isError: true,
|
|
316
256
|
};
|
|
317
257
|
}
|
|
318
258
|
finally {
|
|
319
|
-
// Cleanup expired cache entries after each request
|
|
320
259
|
cleanupExpiredCache();
|
|
321
260
|
}
|
|
322
261
|
}
|
|
323
262
|
if (name === 'get_channel_videos') {
|
|
324
263
|
try {
|
|
325
|
-
const { channelUrl,
|
|
326
|
-
// Extract and format channel URL
|
|
264
|
+
const { channelUrl, page = 1, pageSize = 20 } = args;
|
|
327
265
|
const channelIdentifier = extractChannelIdentifier(channelUrl);
|
|
328
266
|
const formattedUrl = formatChannelUrl(channelIdentifier);
|
|
329
|
-
// Get channel videos
|
|
330
267
|
const result = await getChannelVideos({
|
|
331
268
|
channelURL: formattedUrl,
|
|
332
|
-
|
|
269
|
+
page,
|
|
270
|
+
pageSize: Math.min(pageSize, 50),
|
|
333
271
|
});
|
|
334
|
-
// Format the response
|
|
335
|
-
const response = {
|
|
336
|
-
channel: {
|
|
337
|
-
name: result.channel.name,
|
|
338
|
-
subscribers: result.channel.subscribers,
|
|
339
|
-
videoCount: result.channel.videoCount,
|
|
340
|
-
},
|
|
341
|
-
videos: result.videos.map((video) => ({
|
|
342
|
-
id: video.id,
|
|
343
|
-
title: video.title,
|
|
344
|
-
url: video.url,
|
|
345
|
-
views: video.views,
|
|
346
|
-
uploadTime: video.uploadTime,
|
|
347
|
-
duration: video.duration,
|
|
348
|
-
thumbnail: video.thumbnail,
|
|
349
|
-
})),
|
|
350
|
-
totalVideosRetrieved: result.totalLoaded,
|
|
351
|
-
hasMore: result.hasMore,
|
|
352
|
-
};
|
|
353
272
|
return {
|
|
354
|
-
content: [
|
|
355
|
-
{
|
|
273
|
+
content: [{
|
|
356
274
|
type: 'text',
|
|
357
|
-
text: JSON.stringify(
|
|
358
|
-
},
|
|
359
|
-
],
|
|
275
|
+
text: JSON.stringify(result, null, 2),
|
|
276
|
+
}],
|
|
360
277
|
};
|
|
361
278
|
}
|
|
362
279
|
catch (error) {
|
|
363
280
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
364
281
|
return {
|
|
365
|
-
content: [
|
|
366
|
-
{
|
|
282
|
+
content: [{
|
|
367
283
|
type: 'text',
|
|
368
284
|
text: `Error getting channel videos: ${errorMessage}`,
|
|
369
|
-
},
|
|
370
|
-
],
|
|
285
|
+
}],
|
|
371
286
|
isError: true,
|
|
372
287
|
};
|
|
373
288
|
}
|
|
374
289
|
}
|
|
375
290
|
if (name === 'search_channel_videos') {
|
|
376
291
|
try {
|
|
377
|
-
const { channelUrl, query } = args;
|
|
378
|
-
// Extract and format channel URL
|
|
292
|
+
const { channelUrl, query, page = 1, pageSize = 20 } = args;
|
|
379
293
|
const channelIdentifier = extractChannelIdentifier(channelUrl);
|
|
380
294
|
const formattedUrl = formatChannelUrl(channelIdentifier);
|
|
381
|
-
// Search channel videos
|
|
382
295
|
const result = await searchChannelVideos({
|
|
383
296
|
channelURL: formattedUrl,
|
|
384
|
-
query
|
|
297
|
+
query,
|
|
298
|
+
page,
|
|
299
|
+
pageSize: Math.min(pageSize, 50),
|
|
385
300
|
});
|
|
386
|
-
// Format the response
|
|
387
|
-
const response = {
|
|
388
|
-
query: result.query,
|
|
389
|
-
channelUrl: formattedUrl,
|
|
390
|
-
results: result.results.map((video) => ({
|
|
391
|
-
id: video.id,
|
|
392
|
-
title: video.title,
|
|
393
|
-
url: video.url,
|
|
394
|
-
views: video.views,
|
|
395
|
-
uploadTime: video.uploadTime,
|
|
396
|
-
duration: video.duration,
|
|
397
|
-
thumbnail: video.thumbnail,
|
|
398
|
-
})),
|
|
399
|
-
totalResults: result.totalFound,
|
|
400
|
-
};
|
|
401
301
|
return {
|
|
402
|
-
content: [
|
|
403
|
-
{
|
|
302
|
+
content: [{
|
|
404
303
|
type: 'text',
|
|
405
|
-
text: JSON.stringify(
|
|
406
|
-
},
|
|
407
|
-
],
|
|
304
|
+
text: JSON.stringify(result, null, 2),
|
|
305
|
+
}],
|
|
408
306
|
};
|
|
409
307
|
}
|
|
410
308
|
catch (error) {
|
|
411
309
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
412
310
|
return {
|
|
413
|
-
content: [
|
|
414
|
-
{
|
|
311
|
+
content: [{
|
|
415
312
|
type: 'text',
|
|
416
313
|
text: `Error searching channel videos: ${errorMessage}`,
|
|
417
|
-
},
|
|
418
|
-
],
|
|
314
|
+
}],
|
|
419
315
|
isError: true,
|
|
420
316
|
};
|
|
421
317
|
}
|
|
422
318
|
}
|
|
423
319
|
if (name === 'get_video_comments') {
|
|
424
320
|
try {
|
|
425
|
-
const { videoId, sortBy = 'top',
|
|
426
|
-
// Extract video ID from URL if needed
|
|
321
|
+
const { videoId, sortBy = 'top', page = 1, pageSize = 20 } = args;
|
|
427
322
|
const extractedVideoId = extractVideoId(videoId);
|
|
428
323
|
if (!extractedVideoId) {
|
|
429
324
|
throw new Error('Invalid YouTube video ID or URL');
|
|
430
325
|
}
|
|
431
|
-
// Get video comments
|
|
432
326
|
const result = await getVideoComments({
|
|
433
327
|
videoID: extractedVideoId,
|
|
434
|
-
sortBy
|
|
435
|
-
|
|
328
|
+
sortBy,
|
|
329
|
+
page,
|
|
330
|
+
pageSize: Math.min(pageSize, 50),
|
|
436
331
|
});
|
|
437
|
-
|
|
438
|
-
const response = {
|
|
439
|
-
video: {
|
|
440
|
-
id: result.video.id,
|
|
441
|
-
title: result.video.title,
|
|
442
|
-
channel: result.video.channel,
|
|
443
|
-
views: result.video.views,
|
|
444
|
-
},
|
|
445
|
-
sortBy: result.sortBy,
|
|
446
|
-
comments: result.comments.map((comment) => ({
|
|
447
|
-
author: comment.author,
|
|
448
|
-
text: comment.text,
|
|
449
|
-
likes: comment.likes,
|
|
450
|
-
replyCount: comment.replyCount,
|
|
451
|
-
time: comment.time,
|
|
452
|
-
})),
|
|
453
|
-
totalComments: result.totalComments,
|
|
454
|
-
totalLoaded: result.totalLoaded,
|
|
455
|
-
hasMore: result.hasMore,
|
|
456
|
-
};
|
|
457
|
-
const responseText = JSON.stringify(response, null, 2);
|
|
458
|
-
const truncatedResponse = truncateText(responseText);
|
|
332
|
+
const responseText = JSON.stringify(result, null, 2);
|
|
459
333
|
return {
|
|
460
|
-
content: [
|
|
461
|
-
{
|
|
334
|
+
content: [{
|
|
462
335
|
type: 'text',
|
|
463
|
-
text:
|
|
464
|
-
},
|
|
465
|
-
],
|
|
336
|
+
text: truncateText(responseText),
|
|
337
|
+
}],
|
|
466
338
|
};
|
|
467
339
|
}
|
|
468
340
|
catch (error) {
|
|
469
341
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
470
342
|
return {
|
|
471
|
-
content: [
|
|
472
|
-
{
|
|
343
|
+
content: [{
|
|
473
344
|
type: 'text',
|
|
474
345
|
text: `Error getting video comments: ${errorMessage}`,
|
|
475
|
-
},
|
|
476
|
-
],
|
|
346
|
+
}],
|
|
477
347
|
isError: true,
|
|
478
348
|
};
|
|
479
349
|
}
|
|
480
350
|
}
|
|
481
351
|
if (name === 'search_youtube_global') {
|
|
482
352
|
try {
|
|
483
|
-
const { query,
|
|
484
|
-
// Validate inputs
|
|
353
|
+
const { query, page = 1, pageSize = 10 } = args;
|
|
485
354
|
if (!query.trim()) {
|
|
486
355
|
throw new Error('Search query cannot be empty');
|
|
487
356
|
}
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
const cachedResults = getCachedSearchResults(query, resultTypes, maxResults);
|
|
494
|
-
if (cachedResults) {
|
|
495
|
-
console.error('Using cached search results');
|
|
496
|
-
results = cachedResults;
|
|
497
|
-
}
|
|
498
|
-
else {
|
|
499
|
-
// Use the real headless-youtube-captions search function
|
|
500
|
-
console.error('Performing new YouTube search...');
|
|
501
|
-
const searchResult = await searchYouTubeGlobal({
|
|
502
|
-
query: query,
|
|
503
|
-
maxResults: maxResults,
|
|
504
|
-
resultTypes: resultTypes
|
|
505
|
-
});
|
|
506
|
-
// Convert to our SearchResult format
|
|
507
|
-
results = searchResult.results.map((result) => ({
|
|
508
|
-
id: result.id,
|
|
509
|
-
type: result.type,
|
|
510
|
-
title: result.title,
|
|
511
|
-
url: result.url,
|
|
512
|
-
thumbnail: result.thumbnail || '',
|
|
513
|
-
channel: result.channel || '',
|
|
514
|
-
views: result.views || '',
|
|
515
|
-
duration: result.duration || '',
|
|
516
|
-
uploadTime: result.uploadTime || '',
|
|
517
|
-
subscribers: result.subscribers || '',
|
|
518
|
-
videoCount: result.videoCount || ''
|
|
519
|
-
}));
|
|
520
|
-
// Cache the results
|
|
521
|
-
setCachedSearchResults(query, resultTypes, maxResults, results);
|
|
522
|
-
}
|
|
523
|
-
// Filter by result types if not 'all'
|
|
524
|
-
if (!resultTypes.includes('all')) {
|
|
525
|
-
results = results.filter(result => (resultTypes.includes('videos') && result.type === 'video') ||
|
|
526
|
-
(resultTypes.includes('channels') && result.type === 'channel'));
|
|
527
|
-
}
|
|
528
|
-
// Limit results
|
|
529
|
-
const limitedResults = results.slice(0, maxResults);
|
|
530
|
-
const response = {
|
|
531
|
-
query: query,
|
|
532
|
-
resultTypes: resultTypes,
|
|
533
|
-
maxResults: maxResults,
|
|
534
|
-
totalFound: limitedResults.length,
|
|
535
|
-
results: limitedResults,
|
|
536
|
-
cached: results === getCachedSearchResults(query, resultTypes, maxResults)
|
|
537
|
-
};
|
|
357
|
+
const result = await searchYouTubeGlobal({
|
|
358
|
+
query,
|
|
359
|
+
page,
|
|
360
|
+
pageSize: Math.min(pageSize, 20),
|
|
361
|
+
});
|
|
538
362
|
return {
|
|
539
|
-
content: [
|
|
540
|
-
{
|
|
363
|
+
content: [{
|
|
541
364
|
type: 'text',
|
|
542
|
-
text: JSON.stringify(
|
|
543
|
-
},
|
|
544
|
-
],
|
|
365
|
+
text: JSON.stringify(result, null, 2),
|
|
366
|
+
}],
|
|
545
367
|
};
|
|
546
368
|
}
|
|
547
369
|
catch (error) {
|
|
548
370
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
549
371
|
return {
|
|
550
|
-
content: [
|
|
551
|
-
{
|
|
372
|
+
content: [{
|
|
552
373
|
type: 'text',
|
|
553
374
|
text: `Error searching YouTube: ${errorMessage}`,
|
|
554
|
-
},
|
|
555
|
-
],
|
|
375
|
+
}],
|
|
556
376
|
isError: true,
|
|
557
377
|
};
|
|
558
378
|
}
|
|
559
|
-
finally {
|
|
560
|
-
cleanupExpiredCache();
|
|
561
|
-
}
|
|
562
379
|
}
|
|
563
380
|
if (name === 'get_video_metadata') {
|
|
564
381
|
try {
|
|
565
|
-
const { videoId
|
|
566
|
-
// Extract video ID from URL if needed
|
|
382
|
+
const { videoId } = args;
|
|
567
383
|
const extractedVideoId = extractVideoId(videoId);
|
|
568
384
|
if (!extractedVideoId) {
|
|
569
385
|
throw new Error('Invalid YouTube video ID or URL');
|
|
570
386
|
}
|
|
571
|
-
// Check cache first - reuse the same cache key pattern
|
|
572
|
-
let cachedMetadata = getCachedTranscript(extractedVideoId, `metadata_${expandDescription}`);
|
|
573
|
-
if (cachedMetadata) {
|
|
574
|
-
try {
|
|
575
|
-
const parsedMetadata = JSON.parse(cachedMetadata);
|
|
576
|
-
return {
|
|
577
|
-
content: [
|
|
578
|
-
{
|
|
579
|
-
type: 'text',
|
|
580
|
-
text: JSON.stringify(parsedMetadata, null, 2),
|
|
581
|
-
},
|
|
582
|
-
],
|
|
583
|
-
};
|
|
584
|
-
}
|
|
585
|
-
catch (e) {
|
|
586
|
-
// Invalid cached data, proceed with fresh extraction
|
|
587
|
-
}
|
|
588
|
-
}
|
|
589
|
-
// Get video metadata using headless-youtube-captions
|
|
590
387
|
const metadata = await getVideoMetadata({
|
|
591
388
|
videoID: extractedVideoId,
|
|
592
|
-
expandDescription: expandDescription,
|
|
593
389
|
});
|
|
594
|
-
// Cache the result (using the transcript cache infrastructure)
|
|
595
|
-
setCachedTranscript(extractedVideoId, `metadata_${expandDescription}`, JSON.stringify(metadata));
|
|
596
390
|
return {
|
|
597
|
-
content: [
|
|
598
|
-
{
|
|
391
|
+
content: [{
|
|
599
392
|
type: 'text',
|
|
600
393
|
text: JSON.stringify(metadata, null, 2),
|
|
601
|
-
},
|
|
602
|
-
],
|
|
394
|
+
}],
|
|
603
395
|
};
|
|
604
396
|
}
|
|
605
397
|
catch (error) {
|
|
606
398
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
607
399
|
return {
|
|
608
|
-
content: [
|
|
609
|
-
{
|
|
400
|
+
content: [{
|
|
610
401
|
type: 'text',
|
|
611
402
|
text: `Error getting video metadata: ${errorMessage}`,
|
|
612
|
-
},
|
|
613
|
-
],
|
|
614
|
-
isError: true,
|
|
615
|
-
};
|
|
616
|
-
}
|
|
617
|
-
finally {
|
|
618
|
-
// Cleanup expired cache entries after each request
|
|
619
|
-
cleanupExpiredCache();
|
|
620
|
-
}
|
|
621
|
-
}
|
|
622
|
-
if (name === 'navigate_search_result') {
|
|
623
|
-
try {
|
|
624
|
-
const { resultUrl, resultType } = args;
|
|
625
|
-
// Validate URL
|
|
626
|
-
if (!isValidYouTubeUrl(resultUrl)) {
|
|
627
|
-
throw new Error('Invalid YouTube URL provided');
|
|
628
|
-
}
|
|
629
|
-
// For now, just return confirmation of navigation
|
|
630
|
-
// In full implementation, this would use Puppeteer to navigate
|
|
631
|
-
const response = {
|
|
632
|
-
success: true,
|
|
633
|
-
navigatedTo: resultUrl,
|
|
634
|
-
resultType: resultType,
|
|
635
|
-
message: `Successfully navigated to ${resultType}: ${resultUrl}`,
|
|
636
|
-
timestamp: new Date().toISOString()
|
|
637
|
-
};
|
|
638
|
-
return {
|
|
639
|
-
content: [
|
|
640
|
-
{
|
|
641
|
-
type: 'text',
|
|
642
|
-
text: JSON.stringify(response, null, 2),
|
|
643
|
-
},
|
|
644
|
-
],
|
|
645
|
-
};
|
|
646
|
-
}
|
|
647
|
-
catch (error) {
|
|
648
|
-
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
649
|
-
return {
|
|
650
|
-
content: [
|
|
651
|
-
{
|
|
652
|
-
type: 'text',
|
|
653
|
-
text: `Error navigating to search result: ${errorMessage}`,
|
|
654
|
-
},
|
|
655
|
-
],
|
|
403
|
+
}],
|
|
656
404
|
isError: true,
|
|
657
405
|
};
|
|
658
406
|
}
|
package/build/utils.d.ts
CHANGED
|
@@ -3,29 +3,4 @@ export declare function formatTime(seconds: number): string;
|
|
|
3
3
|
export declare function extractChannelIdentifier(input: string): string;
|
|
4
4
|
export declare function formatChannelUrl(identifier: string): string;
|
|
5
5
|
export declare function truncateText(text: string, maxLength?: number): string;
|
|
6
|
-
export interface SearchResult {
|
|
7
|
-
id: string;
|
|
8
|
-
type: 'video' | 'channel';
|
|
9
|
-
title: string;
|
|
10
|
-
url: string;
|
|
11
|
-
thumbnail?: string;
|
|
12
|
-
channel?: string;
|
|
13
|
-
views?: string;
|
|
14
|
-
duration?: string;
|
|
15
|
-
uploadTime?: string;
|
|
16
|
-
}
|
|
17
|
-
export declare const SEARCH_SELECTORS: {
|
|
18
|
-
readonly searchInput: "input[name=\"search_query\"]";
|
|
19
|
-
readonly searchButton: "button[aria-label=\"Search\"]";
|
|
20
|
-
readonly resultsContainer: "#contents";
|
|
21
|
-
readonly videoResult: "ytd-video-renderer";
|
|
22
|
-
readonly channelResult: "ytd-channel-renderer";
|
|
23
|
-
readonly videoTitle: "h3 a";
|
|
24
|
-
readonly channelName: "#text a[href*=\"/channel/\"], #text a[href*=\"/@\"]";
|
|
25
|
-
readonly thumbnail: "img";
|
|
26
|
-
readonly metadata: "#metadata-line";
|
|
27
|
-
};
|
|
28
|
-
export declare function parseSearchResults(resultsHtml: string): SearchResult[];
|
|
29
|
-
export declare function isValidYouTubeUrl(url: string): boolean;
|
|
30
|
-
export declare function getSearchCacheKey(query: string, resultTypes: string[], maxResults: number): string;
|
|
31
6
|
//# sourceMappingURL=utils.d.ts.map
|
package/build/utils.js
CHANGED
|
@@ -65,38 +65,4 @@ export function truncateText(text, maxLength = 50000) {
|
|
|
65
65
|
}
|
|
66
66
|
return text.substring(0, maxLength) + '\n\n[Content truncated due to length...]';
|
|
67
67
|
}
|
|
68
|
-
// Validated selectors from discovery work
|
|
69
|
-
export const SEARCH_SELECTORS = {
|
|
70
|
-
searchInput: 'input[name="search_query"]',
|
|
71
|
-
searchButton: 'button[aria-label="Search"]',
|
|
72
|
-
resultsContainer: '#contents',
|
|
73
|
-
videoResult: 'ytd-video-renderer',
|
|
74
|
-
channelResult: 'ytd-channel-renderer',
|
|
75
|
-
videoTitle: 'h3 a',
|
|
76
|
-
channelName: '#text a[href*="/channel/"], #text a[href*="/@"]',
|
|
77
|
-
thumbnail: 'img',
|
|
78
|
-
metadata: '#metadata-line'
|
|
79
|
-
};
|
|
80
|
-
// Helper function to parse search results from DOM
|
|
81
|
-
export function parseSearchResults(resultsHtml) {
|
|
82
|
-
// This would typically use a DOM parser, but for the MCP server
|
|
83
|
-
// we'll implement the extraction logic using the validated selectors
|
|
84
|
-
// This is a placeholder for the actual DOM parsing implementation
|
|
85
|
-
return [];
|
|
86
|
-
}
|
|
87
|
-
// Helper function to validate search result URL
|
|
88
|
-
export function isValidYouTubeUrl(url) {
|
|
89
|
-
const youtubePatterns = [
|
|
90
|
-
/^https:\/\/www\.youtube\.com\/watch\?v=[a-zA-Z0-9_-]{11}/,
|
|
91
|
-
/^https:\/\/www\.youtube\.com\/channel\//,
|
|
92
|
-
/^https:\/\/www\.youtube\.com\/@/
|
|
93
|
-
];
|
|
94
|
-
return youtubePatterns.some(pattern => pattern.test(url));
|
|
95
|
-
}
|
|
96
|
-
// Helper function to generate cache key for search results
|
|
97
|
-
export function getSearchCacheKey(query, resultTypes, maxResults) {
|
|
98
|
-
const normalizedQuery = query.toLowerCase().trim();
|
|
99
|
-
const sortedTypes = [...resultTypes].sort();
|
|
100
|
-
return `search:${normalizedQuery}:${sortedTypes.join(',')}:${maxResults}`;
|
|
101
|
-
}
|
|
102
68
|
//# sourceMappingURL=utils.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-headless-youtube-transcript",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "MCP server for extracting YouTube video transcripts, metadata, and comprehensive video information using headless-youtube-captions",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -47,16 +47,16 @@
|
|
|
47
47
|
"license": "MIT",
|
|
48
48
|
"dependencies": {
|
|
49
49
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
50
|
-
"headless-youtube-captions": "
|
|
50
|
+
"headless-youtube-captions": "file:../headless-youtube-captions"
|
|
51
51
|
},
|
|
52
52
|
"devDependencies": {
|
|
53
53
|
"@types/node": "^22.0.0",
|
|
54
|
+
"@vitest/ui": "^2.0.0",
|
|
54
55
|
"tsx": "^4.0.0",
|
|
55
56
|
"typescript": "^5.0.0",
|
|
56
|
-
"vitest": "^2.0.0"
|
|
57
|
-
"@vitest/ui": "^2.0.0"
|
|
57
|
+
"vitest": "^2.0.0"
|
|
58
58
|
},
|
|
59
59
|
"engines": {
|
|
60
60
|
"node": ">=18"
|
|
61
61
|
}
|
|
62
|
-
}
|
|
62
|
+
}
|