mcp-headless-youtube-transcript 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +85 -2
- package/build/index.js +211 -5
- package/build/utils.d.ts +25 -0
- package/build/utils.js +34 -0
- package/package.json +5 -3
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.6.0] - 2025-01-24
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Global YouTube Search**: New `search_youtube_global` tool for searching across all of YouTube
|
|
12
|
+
- Search for videos and channels with customizable result types
|
|
13
|
+
- Configurable max results (1-20)
|
|
14
|
+
- Rich result data including titles, URLs, view counts, upload times, durations
|
|
15
|
+
- Separate caching with 1-hour TTL for search results
|
|
16
|
+
- Updated dependency to headless-youtube-captions v1.3.0
|
|
17
|
+
- Added search and automation keywords to package.json
|
|
18
|
+
|
|
19
|
+
### Technical Details
|
|
20
|
+
- Utilizes validated DOM selectors from discovery work
|
|
21
|
+
- Container-safe Chrome browser configuration
|
|
22
|
+
- Comprehensive error handling and validation
|
|
23
|
+
- Type-safe integration with existing MCP tools
|
|
24
|
+
|
|
25
|
+
## [0.5.0] - Previous Release
|
|
26
|
+
|
|
27
|
+
### Added
|
|
28
|
+
- Initial MCP server implementation
|
|
29
|
+
- YouTube transcript extraction tools
|
|
30
|
+
- Channel video listing and search
|
|
31
|
+
- Video comment extraction
|
|
32
|
+
- Comprehensive caching system
|
package/README.md
CHANGED
|
@@ -5,6 +5,7 @@ An MCP (Model Context Protocol) server that extracts YouTube video transcripts,
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
7
|
- Extract transcripts from YouTube videos using video ID or full URL
|
|
8
|
+
- **Search across all of YouTube** for videos and channels globally
|
|
8
9
|
- Get videos from YouTube channels with pagination support
|
|
9
10
|
- Search for videos within a specific channel
|
|
10
11
|
- Retrieve comments from YouTube videos
|
|
@@ -87,6 +88,51 @@ With pagination:
|
|
|
87
88
|
}
|
|
88
89
|
```
|
|
89
90
|
|
|
91
|
+
### `search_youtube_global`
|
|
92
|
+
|
|
93
|
+
Search across all of YouTube for videos and channels with customizable filters.
|
|
94
|
+
|
|
95
|
+
**Parameters:**
|
|
96
|
+
- `query` (required): Search term to find videos and channels
|
|
97
|
+
- `maxResults` (optional): Maximum number of results to return (1-20). Defaults to 10
|
|
98
|
+
- `resultTypes` (optional): Array of result types to include. Options: ["videos"], ["channels"], or ["all"]. Defaults to ["all"]
|
|
99
|
+
|
|
100
|
+
**Examples:**
|
|
101
|
+
|
|
102
|
+
Basic search:
|
|
103
|
+
```json
|
|
104
|
+
{
|
|
105
|
+
"name": "search_youtube_global",
|
|
106
|
+
"arguments": {
|
|
107
|
+
"query": "javascript tutorial"
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Search only for videos:
|
|
113
|
+
```json
|
|
114
|
+
{
|
|
115
|
+
"name": "search_youtube_global",
|
|
116
|
+
"arguments": {
|
|
117
|
+
"query": "machine learning",
|
|
118
|
+
"maxResults": 15,
|
|
119
|
+
"resultTypes": ["videos"]
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Search only for channels:
|
|
125
|
+
```json
|
|
126
|
+
{
|
|
127
|
+
"name": "search_youtube_global",
|
|
128
|
+
"arguments": {
|
|
129
|
+
"query": "cooking channels",
|
|
130
|
+
"maxResults": 5,
|
|
131
|
+
"resultTypes": ["channels"]
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
```
|
|
135
|
+
|
|
90
136
|
### `get_channel_videos`
|
|
91
137
|
|
|
92
138
|
Extract videos from a YouTube channel with pagination support.
|
|
@@ -183,6 +229,40 @@ this is the actual transcript text content...
|
|
|
183
229
|
|
|
184
230
|
When multiple segments are available, you can retrieve subsequent segments by incrementing the `segment` parameter.
|
|
185
231
|
|
|
232
|
+
### Global Search Response
|
|
233
|
+
For `search_youtube_global`, the response includes search results with comprehensive metadata:
|
|
234
|
+
|
|
235
|
+
```json
|
|
236
|
+
{
|
|
237
|
+
"query": "javascript tutorial",
|
|
238
|
+
"resultTypes": ["all"],
|
|
239
|
+
"maxResults": 10,
|
|
240
|
+
"totalFound": 5,
|
|
241
|
+
"results": [
|
|
242
|
+
{
|
|
243
|
+
"id": "EerdGm-ehJQ",
|
|
244
|
+
"type": "video",
|
|
245
|
+
"title": "JavaScript Tutorial Full Course - Beginner to Pro",
|
|
246
|
+
"url": "https://www.youtube.com/watch?v=EerdGm-ehJQ",
|
|
247
|
+
"channel": "SuperSimpleDev",
|
|
248
|
+
"views": "5.8M views",
|
|
249
|
+
"uploadTime": "1 year ago",
|
|
250
|
+
"duration": "22:15:57",
|
|
251
|
+
"thumbnail": "https://i.ytimg.com/vi/EerdGm-ehJQ/hq720.jpg"
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
"id": "UCBJycsmduvYEL83R_U4JriQ",
|
|
255
|
+
"type": "channel",
|
|
256
|
+
"title": "Marques Brownlee",
|
|
257
|
+
"url": "https://www.youtube.com/channel/UCBJycsmduvYEL83R_U4JriQ",
|
|
258
|
+
"subscribers": "18.3M subscribers",
|
|
259
|
+
"videoCount": "4,832 videos",
|
|
260
|
+
"thumbnail": "https://yt3.ggpht.com/..."
|
|
261
|
+
}
|
|
262
|
+
]
|
|
263
|
+
}
|
|
264
|
+
```
|
|
265
|
+
|
|
186
266
|
### Channel Videos Response
|
|
187
267
|
For `get_channel_videos` and `search_channel_videos`, the response is a JSON object containing channel information and video details:
|
|
188
268
|
|
|
@@ -229,15 +309,18 @@ For `get_video_comments`, the response includes comment details:
|
|
|
229
309
|
|
|
230
310
|
## Caching
|
|
231
311
|
|
|
232
|
-
The server includes built-in caching to improve performance for
|
|
312
|
+
The server includes built-in caching to improve performance for repeated requests. The cache behavior can be configured with environment variables:
|
|
233
313
|
|
|
234
|
-
- `TRANSCRIPT_CACHE_TTL`: Cache duration in seconds (default: 300 = 5 minutes)
|
|
314
|
+
- `TRANSCRIPT_CACHE_TTL`: Cache duration for transcripts in seconds (default: 300 = 5 minutes)
|
|
315
|
+
- Search results are cached separately with a 1-hour TTL for optimal performance
|
|
235
316
|
|
|
236
317
|
### Cache Features:
|
|
237
318
|
- Full transcripts are cached on first fetch
|
|
319
|
+
- Search results are cached with longer TTL (1 hour) due to their general nature
|
|
238
320
|
- Cache expiration time is updated on each read or write
|
|
239
321
|
- Expired entries are automatically cleaned up after each request
|
|
240
322
|
- Each video+language combination is cached separately
|
|
323
|
+
- Search queries are cached by query string and result type
|
|
241
324
|
|
|
242
325
|
### Setting Cache Duration:
|
|
243
326
|
|
package/build/index.js
CHANGED
|
@@ -3,12 +3,15 @@ import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
|
3
3
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
4
|
import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
|
|
5
5
|
// @ts-ignore - Types are defined in global.d.ts
|
|
6
|
-
import { getSubtitles, getChannelVideos, searchChannelVideos, getVideoComments } from 'headless-youtube-captions';
|
|
7
|
-
import { extractVideoId, extractChannelIdentifier, formatChannelUrl, truncateText } from './utils.js';
|
|
8
|
-
// In-memory
|
|
6
|
+
import { getSubtitles, getChannelVideos, searchChannelVideos, getVideoComments, searchYouTubeGlobal } from 'headless-youtube-captions';
|
|
7
|
+
import { extractVideoId, extractChannelIdentifier, formatChannelUrl, truncateText, isValidYouTubeUrl, getSearchCacheKey } from './utils.js';
|
|
8
|
+
// In-memory caches
|
|
9
9
|
const transcriptCache = new Map();
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
const searchCache = new Map();
|
|
11
|
+
// Get cache TTL from environment variables
|
|
12
|
+
const CACHE_TTL_SECONDS = parseInt(process.env.TRANSCRIPT_CACHE_TTL || '300'); // 5 minutes default
|
|
13
|
+
const SEARCH_CACHE_TTL_SECONDS = parseInt(process.env.SEARCH_CACHE_TTL || '3600'); // 1 hour default
|
|
14
|
+
const MAX_SEARCH_CACHE_SIZE = parseInt(process.env.MAX_SEARCH_CACHE_SIZE || '100');
|
|
12
15
|
// Cache helper functions
|
|
13
16
|
function getCacheKey(videoId, lang) {
|
|
14
17
|
return `${videoId}:${lang}`;
|
|
@@ -32,13 +35,47 @@ function setCachedTranscript(videoId, lang, transcript) {
|
|
|
32
35
|
const expiresAt = Date.now() + (CACHE_TTL_SECONDS * 1000);
|
|
33
36
|
transcriptCache.set(key, { transcript, expiresAt });
|
|
34
37
|
}
|
|
38
|
+
// Search cache helper functions
|
|
39
|
+
function getCachedSearchResults(query, resultTypes, maxResults) {
|
|
40
|
+
const key = getSearchCacheKey(query, resultTypes, maxResults);
|
|
41
|
+
const entry = searchCache.get(key);
|
|
42
|
+
if (!entry)
|
|
43
|
+
return null;
|
|
44
|
+
const now = Date.now();
|
|
45
|
+
if (now > entry.expiresAt) {
|
|
46
|
+
searchCache.delete(key);
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
// Update expiration time on read
|
|
50
|
+
entry.expiresAt = now + (SEARCH_CACHE_TTL_SECONDS * 1000);
|
|
51
|
+
return entry.results;
|
|
52
|
+
}
|
|
53
|
+
function setCachedSearchResults(query, resultTypes, maxResults, results) {
|
|
54
|
+
const key = getSearchCacheKey(query, resultTypes, maxResults);
|
|
55
|
+
const expiresAt = Date.now() + (SEARCH_CACHE_TTL_SECONDS * 1000);
|
|
56
|
+
// LRU eviction if cache is full
|
|
57
|
+
if (searchCache.size >= MAX_SEARCH_CACHE_SIZE) {
|
|
58
|
+
const firstKey = searchCache.keys().next().value;
|
|
59
|
+
if (firstKey) {
|
|
60
|
+
searchCache.delete(firstKey);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
searchCache.set(key, { results, expiresAt });
|
|
64
|
+
}
|
|
35
65
|
function cleanupExpiredCache() {
|
|
36
66
|
const now = Date.now();
|
|
67
|
+
// Cleanup transcript cache
|
|
37
68
|
for (const [key, entry] of transcriptCache.entries()) {
|
|
38
69
|
if (now > entry.expiresAt) {
|
|
39
70
|
transcriptCache.delete(key);
|
|
40
71
|
}
|
|
41
72
|
}
|
|
73
|
+
// Cleanup search cache
|
|
74
|
+
for (const [key, entry] of searchCache.entries()) {
|
|
75
|
+
if (now > entry.expiresAt) {
|
|
76
|
+
searchCache.delete(key);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
42
79
|
}
|
|
43
80
|
const server = new Server({
|
|
44
81
|
name: 'mcp-headless-youtube-transcript',
|
|
@@ -138,6 +175,55 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
|
138
175
|
required: ['videoId'],
|
|
139
176
|
},
|
|
140
177
|
},
|
|
178
|
+
{
|
|
179
|
+
name: 'search_youtube_global',
|
|
180
|
+
description: 'Search across all of YouTube and return structured results',
|
|
181
|
+
inputSchema: {
|
|
182
|
+
type: 'object',
|
|
183
|
+
properties: {
|
|
184
|
+
query: {
|
|
185
|
+
type: 'string',
|
|
186
|
+
description: 'Search term to find videos and channels',
|
|
187
|
+
},
|
|
188
|
+
maxResults: {
|
|
189
|
+
type: 'number',
|
|
190
|
+
description: 'Maximum number of results to return (1-20). Defaults to 10',
|
|
191
|
+
default: 10,
|
|
192
|
+
minimum: 1,
|
|
193
|
+
maximum: 20,
|
|
194
|
+
},
|
|
195
|
+
resultTypes: {
|
|
196
|
+
type: 'array',
|
|
197
|
+
description: 'Types of results to include',
|
|
198
|
+
items: {
|
|
199
|
+
type: 'string',
|
|
200
|
+
enum: ['videos', 'channels', 'all'],
|
|
201
|
+
},
|
|
202
|
+
default: ['all'],
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
required: ['query'],
|
|
206
|
+
},
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
name: 'navigate_search_result',
|
|
210
|
+
description: 'Navigate to a video or channel page from search results',
|
|
211
|
+
inputSchema: {
|
|
212
|
+
type: 'object',
|
|
213
|
+
properties: {
|
|
214
|
+
resultUrl: {
|
|
215
|
+
type: 'string',
|
|
216
|
+
description: 'YouTube URL from search results to navigate to',
|
|
217
|
+
},
|
|
218
|
+
resultType: {
|
|
219
|
+
type: 'string',
|
|
220
|
+
description: 'Type of the result being navigated to',
|
|
221
|
+
enum: ['video', 'channel'],
|
|
222
|
+
},
|
|
223
|
+
},
|
|
224
|
+
required: ['resultUrl', 'resultType'],
|
|
225
|
+
},
|
|
226
|
+
},
|
|
141
227
|
],
|
|
142
228
|
};
|
|
143
229
|
});
|
|
@@ -373,6 +459,126 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
373
459
|
};
|
|
374
460
|
}
|
|
375
461
|
}
|
|
462
|
+
if (name === 'search_youtube_global') {
|
|
463
|
+
try {
|
|
464
|
+
const { query, maxResults = 10, resultTypes = ['all'] } = args;
|
|
465
|
+
// Validate inputs
|
|
466
|
+
if (!query.trim()) {
|
|
467
|
+
throw new Error('Search query cannot be empty');
|
|
468
|
+
}
|
|
469
|
+
if (maxResults < 1 || maxResults > 20) {
|
|
470
|
+
throw new Error('maxResults must be between 1 and 20');
|
|
471
|
+
}
|
|
472
|
+
// Check cache first
|
|
473
|
+
let results;
|
|
474
|
+
const cachedResults = getCachedSearchResults(query, resultTypes, maxResults);
|
|
475
|
+
if (cachedResults) {
|
|
476
|
+
console.error('Using cached search results');
|
|
477
|
+
results = cachedResults;
|
|
478
|
+
}
|
|
479
|
+
else {
|
|
480
|
+
// Use the real headless-youtube-captions search function
|
|
481
|
+
console.error('Performing new YouTube search...');
|
|
482
|
+
const searchResult = await searchYouTubeGlobal({
|
|
483
|
+
query: query,
|
|
484
|
+
maxResults: maxResults,
|
|
485
|
+
resultTypes: resultTypes
|
|
486
|
+
});
|
|
487
|
+
// Convert to our SearchResult format
|
|
488
|
+
results = searchResult.results.map((result) => ({
|
|
489
|
+
id: result.id,
|
|
490
|
+
type: result.type,
|
|
491
|
+
title: result.title,
|
|
492
|
+
url: result.url,
|
|
493
|
+
thumbnail: result.thumbnail || '',
|
|
494
|
+
channel: result.channel || '',
|
|
495
|
+
views: result.views || '',
|
|
496
|
+
duration: result.duration || '',
|
|
497
|
+
uploadTime: result.uploadTime || '',
|
|
498
|
+
subscribers: result.subscribers || '',
|
|
499
|
+
videoCount: result.videoCount || ''
|
|
500
|
+
}));
|
|
501
|
+
// Cache the results
|
|
502
|
+
setCachedSearchResults(query, resultTypes, maxResults, results);
|
|
503
|
+
}
|
|
504
|
+
// Filter by result types if not 'all'
|
|
505
|
+
if (!resultTypes.includes('all')) {
|
|
506
|
+
results = results.filter(result => (resultTypes.includes('videos') && result.type === 'video') ||
|
|
507
|
+
(resultTypes.includes('channels') && result.type === 'channel'));
|
|
508
|
+
}
|
|
509
|
+
// Limit results
|
|
510
|
+
const limitedResults = results.slice(0, maxResults);
|
|
511
|
+
const response = {
|
|
512
|
+
query: query,
|
|
513
|
+
resultTypes: resultTypes,
|
|
514
|
+
maxResults: maxResults,
|
|
515
|
+
totalFound: limitedResults.length,
|
|
516
|
+
results: limitedResults,
|
|
517
|
+
cached: results === getCachedSearchResults(query, resultTypes, maxResults)
|
|
518
|
+
};
|
|
519
|
+
return {
|
|
520
|
+
content: [
|
|
521
|
+
{
|
|
522
|
+
type: 'text',
|
|
523
|
+
text: JSON.stringify(response, null, 2),
|
|
524
|
+
},
|
|
525
|
+
],
|
|
526
|
+
};
|
|
527
|
+
}
|
|
528
|
+
catch (error) {
|
|
529
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
530
|
+
return {
|
|
531
|
+
content: [
|
|
532
|
+
{
|
|
533
|
+
type: 'text',
|
|
534
|
+
text: `Error searching YouTube: ${errorMessage}`,
|
|
535
|
+
},
|
|
536
|
+
],
|
|
537
|
+
isError: true,
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
finally {
|
|
541
|
+
cleanupExpiredCache();
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
if (name === 'navigate_search_result') {
|
|
545
|
+
try {
|
|
546
|
+
const { resultUrl, resultType } = args;
|
|
547
|
+
// Validate URL
|
|
548
|
+
if (!isValidYouTubeUrl(resultUrl)) {
|
|
549
|
+
throw new Error('Invalid YouTube URL provided');
|
|
550
|
+
}
|
|
551
|
+
// For now, just return confirmation of navigation
|
|
552
|
+
// In full implementation, this would use Puppeteer to navigate
|
|
553
|
+
const response = {
|
|
554
|
+
success: true,
|
|
555
|
+
navigatedTo: resultUrl,
|
|
556
|
+
resultType: resultType,
|
|
557
|
+
message: `Successfully navigated to ${resultType}: ${resultUrl}`,
|
|
558
|
+
timestamp: new Date().toISOString()
|
|
559
|
+
};
|
|
560
|
+
return {
|
|
561
|
+
content: [
|
|
562
|
+
{
|
|
563
|
+
type: 'text',
|
|
564
|
+
text: JSON.stringify(response, null, 2),
|
|
565
|
+
},
|
|
566
|
+
],
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
catch (error) {
|
|
570
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error occurred';
|
|
571
|
+
return {
|
|
572
|
+
content: [
|
|
573
|
+
{
|
|
574
|
+
type: 'text',
|
|
575
|
+
text: `Error navigating to search result: ${errorMessage}`,
|
|
576
|
+
},
|
|
577
|
+
],
|
|
578
|
+
isError: true,
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
}
|
|
376
582
|
throw new Error(`Unknown tool: ${name}`);
|
|
377
583
|
});
|
|
378
584
|
async function main() {
|
package/build/utils.d.ts
CHANGED
|
@@ -3,4 +3,29 @@ export declare function formatTime(seconds: number): string;
|
|
|
3
3
|
export declare function extractChannelIdentifier(input: string): string;
|
|
4
4
|
export declare function formatChannelUrl(identifier: string): string;
|
|
5
5
|
export declare function truncateText(text: string, maxLength?: number): string;
|
|
6
|
+
export interface SearchResult {
|
|
7
|
+
id: string;
|
|
8
|
+
type: 'video' | 'channel';
|
|
9
|
+
title: string;
|
|
10
|
+
url: string;
|
|
11
|
+
thumbnail?: string;
|
|
12
|
+
channel?: string;
|
|
13
|
+
views?: string;
|
|
14
|
+
duration?: string;
|
|
15
|
+
uploadTime?: string;
|
|
16
|
+
}
|
|
17
|
+
export declare const SEARCH_SELECTORS: {
|
|
18
|
+
readonly searchInput: "input[name=\"search_query\"]";
|
|
19
|
+
readonly searchButton: "button[aria-label=\"Search\"]";
|
|
20
|
+
readonly resultsContainer: "#contents";
|
|
21
|
+
readonly videoResult: "ytd-video-renderer";
|
|
22
|
+
readonly channelResult: "ytd-channel-renderer";
|
|
23
|
+
readonly videoTitle: "h3 a";
|
|
24
|
+
readonly channelName: "#text a[href*=\"/channel/\"], #text a[href*=\"/@\"]";
|
|
25
|
+
readonly thumbnail: "img";
|
|
26
|
+
readonly metadata: "#metadata-line";
|
|
27
|
+
};
|
|
28
|
+
export declare function parseSearchResults(resultsHtml: string): SearchResult[];
|
|
29
|
+
export declare function isValidYouTubeUrl(url: string): boolean;
|
|
30
|
+
export declare function getSearchCacheKey(query: string, resultTypes: string[], maxResults: number): string;
|
|
6
31
|
//# sourceMappingURL=utils.d.ts.map
|
package/build/utils.js
CHANGED
|
@@ -65,4 +65,38 @@ export function truncateText(text, maxLength = 50000) {
|
|
|
65
65
|
}
|
|
66
66
|
return text.substring(0, maxLength) + '\n\n[Content truncated due to length...]';
|
|
67
67
|
}
|
|
68
|
+
// Validated selectors from discovery work
|
|
69
|
+
export const SEARCH_SELECTORS = {
|
|
70
|
+
searchInput: 'input[name="search_query"]',
|
|
71
|
+
searchButton: 'button[aria-label="Search"]',
|
|
72
|
+
resultsContainer: '#contents',
|
|
73
|
+
videoResult: 'ytd-video-renderer',
|
|
74
|
+
channelResult: 'ytd-channel-renderer',
|
|
75
|
+
videoTitle: 'h3 a',
|
|
76
|
+
channelName: '#text a[href*="/channel/"], #text a[href*="/@"]',
|
|
77
|
+
thumbnail: 'img',
|
|
78
|
+
metadata: '#metadata-line'
|
|
79
|
+
};
|
|
80
|
+
// Helper function to parse search results from DOM
|
|
81
|
+
export function parseSearchResults(resultsHtml) {
|
|
82
|
+
// This would typically use a DOM parser, but for the MCP server
|
|
83
|
+
// we'll implement the extraction logic using the validated selectors
|
|
84
|
+
// This is a placeholder for the actual DOM parsing implementation
|
|
85
|
+
return [];
|
|
86
|
+
}
|
|
87
|
+
// Helper function to validate search result URL
|
|
88
|
+
export function isValidYouTubeUrl(url) {
|
|
89
|
+
const youtubePatterns = [
|
|
90
|
+
/^https:\/\/www\.youtube\.com\/watch\?v=[a-zA-Z0-9_-]{11}/,
|
|
91
|
+
/^https:\/\/www\.youtube\.com\/channel\//,
|
|
92
|
+
/^https:\/\/www\.youtube\.com\/@/
|
|
93
|
+
];
|
|
94
|
+
return youtubePatterns.some(pattern => pattern.test(url));
|
|
95
|
+
}
|
|
96
|
+
// Helper function to generate cache key for search results
|
|
97
|
+
export function getSearchCacheKey(query, resultTypes, maxResults) {
|
|
98
|
+
const normalizedQuery = query.toLowerCase().trim();
|
|
99
|
+
const sortedTypes = [...resultTypes].sort();
|
|
100
|
+
return `search:${normalizedQuery}:${sortedTypes.join(',')}:${maxResults}`;
|
|
101
|
+
}
|
|
68
102
|
//# sourceMappingURL=utils.js.map
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mcp-headless-youtube-transcript",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "MCP server for extracting YouTube video transcripts using headless-youtube-captions",
|
|
5
5
|
"main": "build/index.js",
|
|
6
6
|
"bin": {
|
|
@@ -20,7 +20,9 @@
|
|
|
20
20
|
"server",
|
|
21
21
|
"youtube",
|
|
22
22
|
"transcript",
|
|
23
|
-
"captions"
|
|
23
|
+
"captions",
|
|
24
|
+
"search",
|
|
25
|
+
"automation"
|
|
24
26
|
],
|
|
25
27
|
"author": "Andrew Lewin",
|
|
26
28
|
"repository": {
|
|
@@ -34,7 +36,7 @@
|
|
|
34
36
|
"license": "MIT",
|
|
35
37
|
"dependencies": {
|
|
36
38
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
37
|
-
"headless-youtube-captions": "^1.
|
|
39
|
+
"headless-youtube-captions": "^1.3.0"
|
|
38
40
|
},
|
|
39
41
|
"devDependencies": {
|
|
40
42
|
"@types/node": "^22.0.0",
|