yt-transcript-strapi-plugin 0.0.21 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,14 +4,10 @@ const types_js = require("@modelcontextprotocol/sdk/types.js");
4
4
  const zod = require("zod");
5
5
  const node_crypto = require("node:crypto");
6
6
  const streamableHttp_js = require("@modelcontextprotocol/sdk/server/streamableHttp.js");
7
- const textsplitters = require("@langchain/textsplitters");
8
- const prompts = require("@langchain/core/prompts");
9
- const openai = require("@langchain/openai");
10
7
  const youtubei_js = require("youtubei.js");
11
8
  const undici = require("undici");
12
9
  const FetchTranscriptSchema = zod.z.object({
13
- videoId: zod.z.string().min(1, "Video ID or URL is required"),
14
- generateReadable: zod.z.boolean().optional().default(false)
10
+ videoId: zod.z.string().min(1, "Video ID or URL is required")
15
11
  });
16
12
  const ListTranscriptsSchema = zod.z.object({
17
13
  page: zod.z.number().int().min(1).optional().default(1),
@@ -19,7 +15,18 @@ const ListTranscriptsSchema = zod.z.object({
19
15
  sort: zod.z.string().optional().default("createdAt:desc")
20
16
  });
21
17
  const GetTranscriptSchema = zod.z.object({
22
- videoId: zod.z.string().min(1, "Video ID is required")
18
+ videoId: zod.z.string().min(1, "Video ID is required"),
19
+ includeFullTranscript: zod.z.boolean().optional().default(false),
20
+ includeTimecodes: zod.z.boolean().optional().default(false),
21
+ startTime: zod.z.number().min(0).optional(),
22
+ endTime: zod.z.number().min(0).optional(),
23
+ chunkIndex: zod.z.number().int().min(0).optional(),
24
+ chunkSize: zod.z.number().int().min(30).optional()
25
+ });
26
+ const SearchTranscriptSchema = zod.z.object({
27
+ videoId: zod.z.string().min(1, "Video ID is required"),
28
+ query: zod.z.string().min(1, "Search query is required"),
29
+ maxResults: zod.z.number().int().min(1).max(20).optional().default(5)
23
30
  });
24
31
  const FindTranscriptsSchema = zod.z.object({
25
32
  query: zod.z.string().optional(),
@@ -34,6 +41,7 @@ const ToolSchemas = {
34
41
  fetch_transcript: FetchTranscriptSchema,
35
42
  list_transcripts: ListTranscriptsSchema,
36
43
  get_transcript: GetTranscriptSchema,
44
+ search_transcript: SearchTranscriptSchema,
37
45
  find_transcripts: FindTranscriptsSchema
38
46
  };
39
47
  function validateToolInput(toolName, input) {
@@ -68,31 +76,64 @@ function extractYouTubeID(urlOrID) {
68
76
  }
69
77
  const fetchTranscriptTool = {
70
78
  name: "fetch_transcript",
71
- description: "Fetch a transcript from YouTube for a given video ID or URL. Optionally generates a human-readable version using AI. The transcript is saved to the database for future retrieval.",
79
+ description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database. Returns metadata and preview only to avoid context overflow. Use get_transcript to retrieve content.",
72
80
  inputSchema: {
73
81
  type: "object",
74
82
  properties: {
75
83
  videoId: {
76
84
  type: "string",
77
85
  description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
78
- },
79
- generateReadable: {
80
- type: "boolean",
81
- description: "If true, uses AI to add punctuation and formatting to make the transcript more readable. Requires OpenAI API key configuration.",
82
- default: false
83
86
  }
84
87
  },
85
88
  required: ["videoId"]
86
89
  }
87
90
  };
88
- async function handleFetchTranscript(strapi2, args) {
91
+ function getVideoDurationMs$1(timecodes) {
92
+ if (!timecodes || timecodes.length === 0) return 0;
93
+ const lastEntry = timecodes[timecodes.length - 1];
94
+ return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
95
+ }
96
+ function formatTime$2(ms) {
97
+ const totalSeconds = Math.floor(ms / 1e3);
98
+ const hours = Math.floor(totalSeconds / 3600);
99
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
100
+ const seconds = totalSeconds % 60;
101
+ if (hours > 0) {
102
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
103
+ }
104
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
105
+ }
106
+ function buildMetadataResponse(transcript2, previewLength, cached) {
107
+ const fullText = transcript2.fullTranscript || "";
108
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
109
+ const durationMs = getVideoDurationMs$1(timecodes);
110
+ const wordCount = fullText.split(/\s+/).length;
111
+ const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
112
+ return {
113
+ message: cached ? "Transcript already exists in database" : "Transcript fetched and saved successfully",
114
+ cached,
115
+ videoId: transcript2.videoId,
116
+ title: transcript2.title,
117
+ metadata: {
118
+ wordCount,
119
+ characterCount: fullText.length,
120
+ duration: formatTime$2(durationMs),
121
+ durationSeconds: Math.floor(durationMs / 1e3)
122
+ },
123
+ preview,
124
+ usage: "Use get_transcript with videoId to retrieve full content, specific time ranges, or paginated chunks."
125
+ };
126
+ }
127
+ async function handleFetchTranscript(strapi, args) {
89
128
  const validatedArgs = validateToolInput("fetch_transcript", args);
90
- const { videoId: videoIdOrUrl, generateReadable } = validatedArgs;
129
+ const { videoId: videoIdOrUrl } = validatedArgs;
130
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
131
+ const previewLength = pluginConfig?.previewLength || 500;
91
132
  const videoId = extractYouTubeID(videoIdOrUrl);
92
133
  if (!videoId) {
93
134
  throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
94
135
  }
95
- const service2 = strapi2.plugin("yt-transcript-strapi-plugin").service("service");
136
+ const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
96
137
  const existingTranscript = await service2.findTranscript(videoId);
97
138
  if (existingTranscript) {
98
139
  return {
@@ -100,11 +141,7 @@ async function handleFetchTranscript(strapi2, args) {
100
141
  {
101
142
  type: "text",
102
143
  text: JSON.stringify(
103
- {
104
- message: "Transcript already exists in database",
105
- data: existingTranscript,
106
- cached: true
107
- },
144
+ buildMetadataResponse(existingTranscript, previewLength, true),
108
145
  null,
109
146
  2
110
147
  )
@@ -122,25 +159,13 @@ async function handleFetchTranscript(strapi2, args) {
122
159
  fullTranscript: transcriptData.fullTranscript,
123
160
  transcriptWithTimeCodes: transcriptData.transcriptWithTimeCodes
124
161
  };
125
- if (generateReadable && transcriptData.fullTranscript) {
126
- try {
127
- const readableTranscript = await service2.generateHumanReadableTranscript(transcriptData.fullTranscript);
128
- payload.readableTranscript = readableTranscript;
129
- } catch (error) {
130
- strapi2.log.warn("[yt-transcript-mcp] Failed to generate readable transcript:", error);
131
- }
132
- }
133
162
  const savedTranscript = await service2.saveTranscript(payload);
134
163
  return {
135
164
  content: [
136
165
  {
137
166
  type: "text",
138
167
  text: JSON.stringify(
139
- {
140
- message: "Transcript fetched and saved successfully",
141
- data: savedTranscript,
142
- cached: false
143
- },
168
+ buildMetadataResponse(savedTranscript, previewLength, false),
144
169
  null,
145
170
  2
146
171
  )
@@ -173,17 +198,17 @@ const listTranscriptsTool = {
173
198
  required: []
174
199
  }
175
200
  };
176
- async function handleListTranscripts(strapi2, args) {
201
+ async function handleListTranscripts(strapi, args) {
177
202
  const validatedArgs = validateToolInput("list_transcripts", args);
178
203
  const { page, pageSize, sort } = validatedArgs;
179
204
  const start = (page - 1) * pageSize;
180
- const transcripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
205
+ const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
181
206
  sort,
182
207
  limit: pageSize,
183
208
  start,
184
209
  fields: ["id", "documentId", "title", "videoId", "createdAt", "updatedAt"]
185
210
  });
186
- const allTranscripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({});
211
+ const allTranscripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({});
187
212
  const total = allTranscripts.length;
188
213
  return {
189
214
  content: [
@@ -208,26 +233,305 @@ async function handleListTranscripts(strapi2, args) {
208
233
  }
209
234
  const getTranscriptTool = {
210
235
  name: "get_transcript",
211
- description: "Get a specific saved transcript by YouTube video ID. Returns the full transcript data including any readable version if available.",
236
+ description: "Get a saved transcript by YouTube video ID. Returns metadata and preview by default. Use parameters to get full content or specific time ranges to avoid context overflow.",
212
237
  inputSchema: {
213
238
  type: "object",
214
239
  properties: {
215
240
  videoId: {
216
241
  type: "string",
217
242
  description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
243
+ },
244
+ includeFullTranscript: {
245
+ type: "boolean",
246
+ description: "Include the complete transcript text. Warning: may cause context overflow for long videos. Default: false",
247
+ default: false
248
+ },
249
+ includeTimecodes: {
250
+ type: "boolean",
251
+ description: "Include the transcript with timecodes array. Warning: significantly increases response size. Default: false",
252
+ default: false
253
+ },
254
+ startTime: {
255
+ type: "number",
256
+ description: "Start time in seconds for fetching a specific portion of the transcript"
257
+ },
258
+ endTime: {
259
+ type: "number",
260
+ description: "End time in seconds for fetching a specific portion of the transcript"
261
+ },
262
+ chunkIndex: {
263
+ type: "number",
264
+ description: "Chunk index (0-based) when paginating through transcript. Use with chunkSize to paginate through long videos."
265
+ },
266
+ chunkSize: {
267
+ type: "number",
268
+ description: "Chunk size in seconds. Overrides config default. Use with chunkIndex for pagination."
218
269
  }
219
270
  },
220
271
  required: ["videoId"]
221
272
  }
222
273
  };
223
- async function handleGetTranscript(strapi2, args) {
274
+ function getTranscriptForTimeRange(timecodes, startTimeMs, endTimeMs) {
275
+ const entries = timecodes.filter(
276
+ (entry) => entry.start >= startTimeMs && entry.start < endTimeMs
277
+ );
278
+ const text = entries.map((e) => e.text).join(" ");
279
+ return { text, entries };
280
+ }
281
+ function getVideoDurationMs(timecodes) {
282
+ if (!timecodes || timecodes.length === 0) return 0;
283
+ const lastEntry = timecodes[timecodes.length - 1];
284
+ return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
285
+ }
286
+ function formatTime$1(ms) {
287
+ const totalSeconds = Math.floor(ms / 1e3);
288
+ const hours = Math.floor(totalSeconds / 3600);
289
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
290
+ const seconds = totalSeconds % 60;
291
+ if (hours > 0) {
292
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
293
+ }
294
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
295
+ }
296
+ async function handleGetTranscript(strapi, args) {
224
297
  const validatedArgs = validateToolInput("get_transcript", args);
225
- const { videoId: videoIdOrUrl } = validatedArgs;
298
+ const {
299
+ videoId: videoIdOrUrl,
300
+ includeFullTranscript,
301
+ includeTimecodes,
302
+ startTime,
303
+ endTime,
304
+ chunkIndex,
305
+ chunkSize: chunkSizeOverride
306
+ } = validatedArgs;
307
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
308
+ const defaultChunkSize = pluginConfig?.chunkSizeSeconds || 300;
309
+ const previewLength = pluginConfig?.previewLength || 500;
310
+ const maxFullTranscriptLength = pluginConfig?.maxFullTranscriptLength || 5e4;
311
+ const chunkSizeSeconds = chunkSizeOverride || defaultChunkSize;
312
+ const videoId = extractYouTubeID(videoIdOrUrl);
313
+ if (!videoId) {
314
+ throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
315
+ }
316
+ const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
317
+ const transcript2 = await service2.findTranscript(videoId);
318
+ if (!transcript2) {
319
+ return {
320
+ content: [
321
+ {
322
+ type: "text",
323
+ text: JSON.stringify(
324
+ {
325
+ error: true,
326
+ message: `No transcript found for video ID: ${videoId}. Use fetch_transcript to fetch it from YouTube first.`,
327
+ videoId
328
+ },
329
+ null,
330
+ 2
331
+ )
332
+ }
333
+ ]
334
+ };
335
+ }
336
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
337
+ const fullText = transcript2.fullTranscript || "";
338
+ const durationMs = getVideoDurationMs(timecodes);
339
+ const totalChunks = Math.ceil(durationMs / (chunkSizeSeconds * 1e3));
340
+ const wordCount = fullText.split(/\s+/).length;
341
+ const response = {
342
+ videoId: transcript2.videoId,
343
+ title: transcript2.title,
344
+ metadata: {
345
+ wordCount,
346
+ characterCount: fullText.length,
347
+ duration: formatTime$1(durationMs),
348
+ durationSeconds: Math.floor(durationMs / 1e3),
349
+ totalChunks,
350
+ chunkSizeSeconds
351
+ }
352
+ };
353
+ if (startTime !== void 0 || endTime !== void 0) {
354
+ const startMs = (startTime || 0) * 1e3;
355
+ const endMs = endTime !== void 0 ? endTime * 1e3 : durationMs;
356
+ const { text, entries } = getTranscriptForTimeRange(timecodes, startMs, endMs);
357
+ response.timeRange = {
358
+ startTime: startTime || 0,
359
+ endTime: endTime || Math.floor(durationMs / 1e3),
360
+ startFormatted: formatTime$1(startMs),
361
+ endFormatted: formatTime$1(endMs)
362
+ };
363
+ response.transcript = text;
364
+ if (includeTimecodes) {
365
+ response.transcriptWithTimeCodes = entries;
366
+ }
367
+ } else if (chunkIndex !== void 0) {
368
+ const chunkStartMs = chunkIndex * chunkSizeSeconds * 1e3;
369
+ const chunkEndMs = Math.min((chunkIndex + 1) * chunkSizeSeconds * 1e3, durationMs);
370
+ if (chunkStartMs >= durationMs) {
371
+ response.error = `Chunk index ${chunkIndex} is out of range. Total chunks: ${totalChunks} (0-${totalChunks - 1})`;
372
+ } else {
373
+ const { text, entries } = getTranscriptForTimeRange(timecodes, chunkStartMs, chunkEndMs);
374
+ response.chunk = {
375
+ index: chunkIndex,
376
+ totalChunks,
377
+ startTime: Math.floor(chunkStartMs / 1e3),
378
+ endTime: Math.floor(chunkEndMs / 1e3),
379
+ startFormatted: formatTime$1(chunkStartMs),
380
+ endFormatted: formatTime$1(chunkEndMs)
381
+ };
382
+ response.transcript = text;
383
+ if (includeTimecodes) {
384
+ response.transcriptWithTimeCodes = entries;
385
+ }
386
+ if (chunkIndex < totalChunks - 1) {
387
+ response.nextChunk = `Use chunkIndex: ${chunkIndex + 1} to get the next portion`;
388
+ }
389
+ if (chunkIndex > 0) {
390
+ response.previousChunk = `Use chunkIndex: ${chunkIndex - 1} to get the previous portion`;
391
+ }
392
+ }
393
+ } else if (includeFullTranscript || fullText.length <= maxFullTranscriptLength) {
394
+ response.transcript = fullText;
395
+ if (includeTimecodes) {
396
+ response.transcriptWithTimeCodes = timecodes;
397
+ }
398
+ if (includeFullTranscript && fullText.length > maxFullTranscriptLength) {
399
+ response.warning = "Full transcript included. For long videos, consider using chunkIndex, startTime/endTime, or search_transcript to reduce response size.";
400
+ } else if (fullText.length <= maxFullTranscriptLength) {
401
+ response.note = "Full transcript auto-loaded (fits within context limit).";
402
+ }
403
+ } else {
404
+ const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
405
+ response.preview = preview;
406
+ response.isLargeTranscript = true;
407
+ response.usage = {
408
+ fullTranscript: "Set includeFullTranscript: true to get complete text (warning: may exceed context)",
409
+ search: "Use search_transcript to find relevant portions by keyword (recommended for large transcripts)",
410
+ timeRange: "Use startTime and endTime (in seconds) to get a specific portion",
411
+ pagination: `Use chunkIndex (0-${totalChunks - 1}) to paginate through ${chunkSizeSeconds}s chunks`
412
+ };
413
+ }
414
+ return {
415
+ content: [
416
+ {
417
+ type: "text",
418
+ text: JSON.stringify(response, null, 2)
419
+ }
420
+ ]
421
+ };
422
+ }
423
+ const searchTranscriptTool = {
424
+ name: "search_transcript",
425
+ description: "Search within a saved transcript using BM25 scoring. Returns the most relevant segments matching your query with timestamps. Use this to find specific content in long videos without loading the entire transcript.",
426
+ inputSchema: {
427
+ type: "object",
428
+ properties: {
429
+ videoId: {
430
+ type: "string",
431
+ description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
432
+ },
433
+ query: {
434
+ type: "string",
435
+ description: "Search query - keywords or phrases to find in the transcript"
436
+ },
437
+ maxResults: {
438
+ type: "number",
439
+ description: "Maximum number of results to return (default: 5, max: 20)",
440
+ default: 5
441
+ }
442
+ },
443
+ required: ["videoId", "query"]
444
+ }
445
+ };
446
+ function tokenize(text) {
447
+ return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
448
+ }
449
+ function calculateIDF(segments, vocabulary) {
450
+ const idf = /* @__PURE__ */ new Map();
451
+ const N = segments.length;
452
+ for (const term of vocabulary) {
453
+ const docsWithTerm = segments.filter(
454
+ (seg) => tokenize(seg.text).includes(term)
455
+ ).length;
456
+ idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
457
+ }
458
+ return idf;
459
+ }
460
+ function bm25Score(segmentTokens, queryTokens, idf, avgDocLength, k1 = 1.5, b = 0.75) {
461
+ const docLength = segmentTokens.length;
462
+ let score = 0;
463
+ const tf = /* @__PURE__ */ new Map();
464
+ for (const token of segmentTokens) {
465
+ tf.set(token, (tf.get(token) || 0) + 1);
466
+ }
467
+ for (const term of queryTokens) {
468
+ const termFreq = tf.get(term) || 0;
469
+ const termIdf = idf.get(term) || 0;
470
+ if (termFreq > 0) {
471
+ const numerator = termFreq * (k1 + 1);
472
+ const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
473
+ score += termIdf * (numerator / denominator);
474
+ }
475
+ }
476
+ return score;
477
+ }
478
+ function formatTime(ms) {
479
+ const totalSeconds = Math.floor(ms / 1e3);
480
+ const hours = Math.floor(totalSeconds / 3600);
481
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
482
+ const seconds = totalSeconds % 60;
483
+ if (hours > 0) {
484
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
485
+ }
486
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
487
+ }
488
+ function createSegments(timecodes, segmentDurationMs) {
489
+ if (!timecodes || timecodes.length === 0) return [];
490
+ const segments = [];
491
+ let currentSegment = [];
492
+ let segmentStartTime = timecodes[0].start;
493
+ for (const entry of timecodes) {
494
+ const segmentEndTime = segmentStartTime + segmentDurationMs;
495
+ if (entry.start < segmentEndTime) {
496
+ currentSegment.push(entry);
497
+ } else {
498
+ if (currentSegment.length > 0) {
499
+ const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
500
+ segments.push({
501
+ text: currentSegment.map((e) => e.text).join(" "),
502
+ startTime: Math.floor(segmentStartTime / 1e3),
503
+ endTime: Math.floor(endTime / 1e3),
504
+ startFormatted: formatTime(segmentStartTime),
505
+ endFormatted: formatTime(endTime)
506
+ });
507
+ }
508
+ segmentStartTime = entry.start;
509
+ currentSegment = [entry];
510
+ }
511
+ }
512
+ if (currentSegment.length > 0) {
513
+ const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
514
+ segments.push({
515
+ text: currentSegment.map((e) => e.text).join(" "),
516
+ startTime: Math.floor(segmentStartTime / 1e3),
517
+ endTime: Math.floor(endTime / 1e3),
518
+ startFormatted: formatTime(segmentStartTime),
519
+ endFormatted: formatTime(endTime)
520
+ });
521
+ }
522
+ return segments;
523
+ }
524
+ async function handleSearchTranscript(strapi, args) {
525
+ const validatedArgs = validateToolInput("search_transcript", args);
526
+ const { videoId: videoIdOrUrl, query, maxResults: maxResultsInput } = validatedArgs;
527
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
528
+ const segmentSeconds = pluginConfig?.searchSegmentSeconds || 30;
529
+ const maxResults = Math.min(Math.max(maxResultsInput || 5, 1), 20);
226
530
  const videoId = extractYouTubeID(videoIdOrUrl);
227
531
  if (!videoId) {
228
532
  throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
229
533
  }
230
- const service2 = strapi2.plugin("yt-transcript-strapi-plugin").service("service");
534
+ const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
231
535
  const transcript2 = await service2.findTranscript(videoId);
232
536
  if (!transcript2) {
233
537
  return {
@@ -247,13 +551,90 @@ async function handleGetTranscript(strapi2, args) {
247
551
  ]
248
552
  };
249
553
  }
554
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
555
+ if (timecodes.length === 0) {
556
+ return {
557
+ content: [
558
+ {
559
+ type: "text",
560
+ text: JSON.stringify(
561
+ {
562
+ error: true,
563
+ message: "Transcript has no timecode data for searching.",
564
+ videoId
565
+ },
566
+ null,
567
+ 2
568
+ )
569
+ }
570
+ ]
571
+ };
572
+ }
573
+ const segments = createSegments(timecodes, segmentSeconds * 1e3);
574
+ if (segments.length === 0) {
575
+ return {
576
+ content: [
577
+ {
578
+ type: "text",
579
+ text: JSON.stringify(
580
+ {
581
+ error: true,
582
+ message: "Could not create searchable segments from transcript.",
583
+ videoId
584
+ },
585
+ null,
586
+ 2
587
+ )
588
+ }
589
+ ]
590
+ };
591
+ }
592
+ const queryTokens = tokenize(query);
593
+ if (queryTokens.length === 0) {
594
+ return {
595
+ content: [
596
+ {
597
+ type: "text",
598
+ text: JSON.stringify(
599
+ {
600
+ error: true,
601
+ message: "Query is empty or contains only stop words.",
602
+ query
603
+ },
604
+ null,
605
+ 2
606
+ )
607
+ }
608
+ ]
609
+ };
610
+ }
611
+ const vocabulary = new Set(queryTokens);
612
+ const idf = calculateIDF(segments, vocabulary);
613
+ const avgDocLength = segments.reduce((sum, seg) => sum + tokenize(seg.text).length, 0) / segments.length;
614
+ const scoredSegments = segments.map((segment) => ({
615
+ ...segment,
616
+ score: bm25Score(tokenize(segment.text), queryTokens, idf, avgDocLength)
617
+ }));
618
+ const results = scoredSegments.filter((seg) => seg.score > 0).sort((a, b) => b.score - a.score).slice(0, maxResults);
250
619
  return {
251
620
  content: [
252
621
  {
253
622
  type: "text",
254
623
  text: JSON.stringify(
255
624
  {
256
- data: transcript2
625
+ videoId: transcript2.videoId,
626
+ title: transcript2.title,
627
+ query,
628
+ totalSegments: segments.length,
629
+ matchingResults: results.length,
630
+ results: results.map((r) => ({
631
+ text: r.text,
632
+ startTime: r.startTime,
633
+ endTime: r.endTime,
634
+ timeRange: `${r.startFormatted} - ${r.endFormatted}`,
635
+ score: Math.round(r.score * 100) / 100
636
+ })),
637
+ usage: results.length > 0 ? `Use get_transcript with startTime: ${results[0].startTime} and endTime: ${results[0].endTime} to get full context for the top result.` : "No matches found. Try different keywords."
257
638
  },
258
639
  null,
259
640
  2
@@ -312,11 +693,10 @@ function truncateText(text, maxLength) {
312
693
  function truncateTranscripts(transcripts) {
313
694
  return transcripts.map((transcript2) => ({
314
695
  ...transcript2,
315
- fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH),
316
- readableTranscript: truncateText(transcript2.readableTranscript, TRANSCRIPT_PREVIEW_LENGTH)
696
+ fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH)
317
697
  }));
318
698
  }
319
- async function handleFindTranscripts(strapi2, args) {
699
+ async function handleFindTranscripts(strapi, args) {
320
700
  const validatedArgs = validateToolInput("find_transcripts", args);
321
701
  const { query, videoId, title, includeFullContent, page, pageSize, sort } = validatedArgs;
322
702
  const start = (page - 1) * pageSize;
@@ -331,17 +711,16 @@ async function handleFindTranscripts(strapi2, args) {
331
711
  filters.$or = [
332
712
  { title: { $containsi: query } },
333
713
  { videoId: { $containsi: query } },
334
- { fullTranscript: { $containsi: query } },
335
- { readableTranscript: { $containsi: query } }
714
+ { fullTranscript: { $containsi: query } }
336
715
  ];
337
716
  }
338
- const transcripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
717
+ const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
339
718
  filters,
340
719
  sort,
341
720
  limit: pageSize,
342
721
  start
343
722
  });
344
- const allMatching = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
723
+ const allMatching = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
345
724
  filters
346
725
  });
347
726
  const total = allMatching.length;
@@ -377,15 +756,17 @@ const tools = [
377
756
  fetchTranscriptTool,
378
757
  listTranscriptsTool,
379
758
  getTranscriptTool,
759
+ searchTranscriptTool,
380
760
  findTranscriptsTool
381
761
  ];
382
762
  const toolHandlers = {
383
763
  fetch_transcript: handleFetchTranscript,
384
764
  list_transcripts: handleListTranscripts,
385
765
  get_transcript: handleGetTranscript,
766
+ search_transcript: handleSearchTranscript,
386
767
  find_transcripts: handleFindTranscripts
387
768
  };
388
- async function handleToolCall(strapi2, request) {
769
+ async function handleToolCall(strapi, request) {
389
770
  const { name, arguments: args } = request.params;
390
771
  const handler = toolHandlers[name];
391
772
  if (!handler) {
@@ -393,13 +774,13 @@ async function handleToolCall(strapi2, request) {
393
774
  }
394
775
  const startTime = Date.now();
395
776
  try {
396
- const result = await handler(strapi2, args || {});
777
+ const result = await handler(strapi, args || {});
397
778
  const duration = Date.now() - startTime;
398
- strapi2.log.debug(`[yt-transcript-mcp] Tool ${name} executed successfully in ${duration}ms`);
779
+ strapi.log.debug(`[yt-transcript-mcp] Tool ${name} executed successfully in ${duration}ms`);
399
780
  return result;
400
781
  } catch (error) {
401
782
  const duration = Date.now() - startTime;
402
- strapi2.log.error(`[yt-transcript-mcp] Tool ${name} failed after ${duration}ms`, {
783
+ strapi.log.error(`[yt-transcript-mcp] Tool ${name} failed after ${duration}ms`, {
403
784
  error: error instanceof Error ? error.message : String(error)
404
785
  });
405
786
  return {
@@ -420,7 +801,7 @@ async function handleToolCall(strapi2, request) {
420
801
  };
421
802
  }
422
803
  }
423
- function createMcpServer(strapi2) {
804
+ function createMcpServer(strapi) {
424
805
  const server = new index_js.Server(
425
806
  {
426
807
  name: "yt-transcript-mcp",
@@ -433,53 +814,57 @@ function createMcpServer(strapi2) {
433
814
  }
434
815
  );
435
816
  server.setRequestHandler(types_js.ListToolsRequestSchema, async () => {
436
- strapi2.log.debug("[yt-transcript-mcp] Listing tools");
817
+ strapi.log.debug("[yt-transcript-mcp] Listing tools");
437
818
  return { tools };
438
819
  });
439
820
  server.setRequestHandler(types_js.CallToolRequestSchema, async (request) => {
440
- strapi2.log.debug(`[yt-transcript-mcp] Tool call: ${request.params.name}`);
441
- return handleToolCall(strapi2, request);
821
+ strapi.log.debug(`[yt-transcript-mcp] Tool call: ${request.params.name}`);
822
+ return handleToolCall(strapi, request);
442
823
  });
443
- strapi2.log.info("[yt-transcript-mcp] MCP server created with tools:", {
824
+ strapi.log.info("[yt-transcript-mcp] MCP server created with tools:", {
444
825
  tools: tools.map((t) => t.name)
445
826
  });
446
827
  return server;
447
828
  }
448
- const bootstrap = async ({ strapi: strapi2 }) => {
449
- const plugin = strapi2.plugin("yt-transcript-strapi-plugin");
450
- plugin.createMcpServer = () => createMcpServer(strapi2);
829
+ const bootstrap = async ({ strapi }) => {
830
+ const plugin = strapi.plugin("yt-transcript-strapi-plugin");
831
+ plugin.createMcpServer = () => createMcpServer(strapi);
451
832
  plugin.sessions = /* @__PURE__ */ new Map();
452
- strapi2.log.info("[yt-transcript-mcp] MCP plugin initialized");
453
- strapi2.log.info("[yt-transcript-mcp] MCP endpoint available at: /api/yt-transcript-strapi-plugin/mcp");
833
+ strapi.log.info("[yt-transcript-mcp] MCP plugin initialized");
834
+ strapi.log.info("[yt-transcript-mcp] MCP endpoint available at: /api/yt-transcript-strapi-plugin/mcp");
454
835
  };
455
- const destroy = ({ strapi: strapi2 }) => {
836
+ const destroy = ({ strapi }) => {
456
837
  };
457
- const register = ({ strapi: strapi2 }) => {
838
+ const register = ({ strapi }) => {
458
839
  };
459
840
  const config = {
460
841
  default: {
461
- openAIApiKey: "",
462
- model: "gpt-4o-mini",
463
- temp: 0.7,
464
- maxTokens: 4096,
465
- proxyUrl: ""
842
+ proxyUrl: "",
466
843
  // Optional: HTTP/HTTPS proxy for YouTube requests (e.g., 'http://user:pass@proxy.example.com:8080')
844
+ chunkSizeSeconds: 300,
845
+ // Default chunk size for transcript pagination (5 minutes)
846
+ previewLength: 500,
847
+ // Default preview length in characters
848
+ maxFullTranscriptLength: 5e4,
849
+ // Auto-load full transcript if under this character count (~12K tokens)
850
+ searchSegmentSeconds: 30
851
+ // Segment size for BM25 search scoring
467
852
  },
468
853
  validator(config2) {
469
- if (config2.openAIApiKey && typeof config2.openAIApiKey !== "string") {
470
- throw new Error("openAIApiKey must be a string");
854
+ if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
855
+ throw new Error("proxyUrl must be a string");
471
856
  }
472
- if (config2.model && typeof config2.model !== "string") {
473
- throw new Error("model must be a string");
857
+ if (config2.chunkSizeSeconds !== void 0 && (typeof config2.chunkSizeSeconds !== "number" || config2.chunkSizeSeconds < 30)) {
858
+ throw new Error("chunkSizeSeconds must be a number >= 30");
474
859
  }
475
- if (config2.temp !== void 0 && (typeof config2.temp !== "number" || config2.temp < 0 || config2.temp > 2)) {
476
- throw new Error("temp must be a number between 0 and 2");
860
+ if (config2.previewLength !== void 0 && (typeof config2.previewLength !== "number" || config2.previewLength < 100)) {
861
+ throw new Error("previewLength must be a number >= 100");
477
862
  }
478
- if (config2.maxTokens !== void 0 && (typeof config2.maxTokens !== "number" || config2.maxTokens < 1)) {
479
- throw new Error("maxTokens must be a positive number");
863
+ if (config2.maxFullTranscriptLength !== void 0 && (typeof config2.maxFullTranscriptLength !== "number" || config2.maxFullTranscriptLength < 1e3)) {
864
+ throw new Error("maxFullTranscriptLength must be a number >= 1000");
480
865
  }
481
- if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
482
- throw new Error("proxyUrl must be a string");
866
+ if (config2.searchSegmentSeconds !== void 0 && (typeof config2.searchSegmentSeconds !== "number" || config2.searchSegmentSeconds < 10)) {
867
+ throw new Error("searchSegmentSeconds must be a number >= 10");
483
868
  }
484
869
  }
485
870
  };
@@ -513,9 +898,6 @@ const attributes = {
513
898
  },
514
899
  transcriptWithTimeCodes: {
515
900
  type: "json"
516
- },
517
- readableTranscript: {
518
- type: "richtext"
519
901
  }
520
902
  };
521
903
  const schema = {
@@ -532,41 +914,34 @@ const transcript = {
532
914
  const contentTypes = {
533
915
  transcript
534
916
  };
535
- const controller = ({ strapi: strapi2 }) => ({
917
+ const controller = ({ strapi }) => ({
536
918
  async getTranscript(ctx) {
537
919
  const videoId = extractYouTubeID(ctx.params.videoId);
538
920
  if (!videoId) {
539
921
  return ctx.body = { error: "Invalid YouTube URL or ID", data: null };
540
922
  }
541
- const found = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").findTranscript(videoId);
923
+ const found = await strapi.plugin("yt-transcript-strapi-plugin").service("service").findTranscript(videoId);
542
924
  if (found) {
543
925
  return ctx.body = { data: found };
544
926
  }
545
- const transcriptData = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").getTranscript(videoId);
546
- let readableTranscript = null;
547
- try {
548
- readableTranscript = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").generateHumanReadableTranscript(transcriptData.fullTranscript);
549
- } catch (error) {
550
- strapi2.log.debug("[yt-transcript] Readable transcript generation skipped");
551
- }
927
+ const transcriptData = await strapi.plugin("yt-transcript-strapi-plugin").service("service").getTranscript(videoId);
552
928
  const payload = {
553
929
  videoId,
554
930
  title: transcriptData?.title || "No title found",
555
931
  fullTranscript: transcriptData?.fullTranscript,
556
- transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes,
557
- readableTranscript
932
+ transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes
558
933
  };
559
- const transcript2 = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").saveTranscript(payload);
934
+ const transcript2 = await strapi.plugin("yt-transcript-strapi-plugin").service("service").saveTranscript(payload);
560
935
  ctx.body = { data: transcript2 };
561
936
  }
562
937
  });
563
- const mcpController = ({ strapi: strapi2 }) => ({
938
+ const mcpController = ({ strapi }) => ({
564
939
  /**
565
940
  * Handle MCP requests (POST, GET, DELETE)
566
941
  * Creates a new server+transport per session for proper isolation
567
942
  */
568
943
  async handle(ctx) {
569
- const plugin = strapi2.plugin("yt-transcript-strapi-plugin");
944
+ const plugin = strapi.plugin("yt-transcript-strapi-plugin");
570
945
  if (!plugin.createMcpServer) {
571
946
  ctx.status = 503;
572
947
  ctx.body = {
@@ -586,12 +961,12 @@ const mcpController = ({ strapi: strapi2 }) => ({
586
961
  await server.connect(transport);
587
962
  session = { server, transport, createdAt: Date.now() };
588
963
  plugin.sessions.set(sessionId, session);
589
- strapi2.log.debug(`[yt-transcript-mcp] New session created: ${sessionId}`);
964
+ strapi.log.debug(`[yt-transcript-mcp] New session created: ${sessionId}`);
590
965
  }
591
966
  await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
592
967
  ctx.respond = false;
593
968
  } catch (error) {
594
- strapi2.log.error("[yt-transcript-mcp] Error handling MCP request", {
969
+ strapi.log.error("[yt-transcript-mcp] Error handling MCP request", {
595
970
  error: error instanceof Error ? error.message : String(error),
596
971
  method: ctx.method,
597
972
  path: ctx.path
@@ -671,18 +1046,6 @@ const routes = {
671
1046
  routes: [...admin]
672
1047
  }
673
1048
  };
674
- async function initializeModel({
675
- openAIApiKey,
676
- model,
677
- temp
678
- }) {
679
- return new openai.ChatOpenAI({
680
- temperature: temp,
681
- openAIApiKey,
682
- modelName: model,
683
- maxTokens: 1e3
684
- });
685
- }
686
1049
  function isRequestLike(input) {
687
1050
  return typeof input === "object" && input !== null && "url" in input && typeof input.url === "string" && "method" in input;
688
1051
  }
@@ -825,49 +1188,14 @@ const fetchTranscript = async (videoId, options2) => {
825
1188
  );
826
1189
  }
827
1190
  };
828
- async function processTextChunks(chunks, model) {
829
- const punctuationPrompt = prompts.PromptTemplate.fromTemplate(
830
- "Add proper punctuation and capitalization to the following text chunk:\n\n{chunk}"
831
- );
832
- const punctuationChain = punctuationPrompt.pipe(model);
833
- const processedChunks = await Promise.all(
834
- chunks.map(async (chunk) => {
835
- const result = await punctuationChain.invoke({ chunk });
836
- return result.content;
837
- })
838
- );
839
- return processedChunks.join(" ");
840
- }
841
- async function generateModifiedTranscript(rawTranscript) {
842
- const pluginSettings = await strapi.config.get(
843
- "plugin::yt-transcript-strapi-plugin"
844
- );
845
- if (!pluginSettings.openAIApiKey || !pluginSettings.model || !pluginSettings.temp || !pluginSettings.maxTokens) {
846
- throw new Error("Missing required configuration for YTTranscript");
847
- }
848
- const chatModel = await initializeModel({
849
- openAIApiKey: pluginSettings.openAIApiKey,
850
- model: pluginSettings.model,
851
- temp: pluginSettings.temp,
852
- maxTokens: pluginSettings.maxTokens
853
- });
854
- const splitter = new textsplitters.TokenTextSplitter({
855
- chunkSize: 1e3,
856
- chunkOverlap: 200
857
- });
858
- const transcriptChunks = await splitter.createDocuments([rawTranscript]);
859
- const chunkTexts = transcriptChunks.map((chunk) => chunk.pageContent);
860
- const modifiedTranscript = await processTextChunks(chunkTexts, chatModel);
861
- return modifiedTranscript;
862
- }
863
- const service = ({ strapi: strapi2 }) => ({
1191
+ const service = ({ strapi }) => ({
864
1192
  async getTranscript(identifier) {
865
1193
  const youtubeIdRegex = /^[a-zA-Z0-9_-]{11}$/;
866
1194
  const isValid = youtubeIdRegex.test(identifier);
867
1195
  if (!isValid) {
868
1196
  return { error: "Invalid video ID", data: null };
869
1197
  }
870
- const pluginSettings = await strapi2.config.get(
1198
+ const pluginSettings = await strapi.config.get(
871
1199
  "plugin::yt-transcript-strapi-plugin"
872
1200
  );
873
1201
  const transcriptData = await fetchTranscript(identifier, {
@@ -880,20 +1208,16 @@ const service = ({ strapi: strapi2 }) => ({
880
1208
  };
881
1209
  },
882
1210
  async saveTranscript(payload) {
883
- return await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").create({
1211
+ return await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").create({
884
1212
  data: payload
885
1213
  });
886
1214
  },
887
1215
  async findTranscript(videoId) {
888
- const transcriptData = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findFirst({
1216
+ const transcriptData = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findFirst({
889
1217
  filters: { videoId }
890
1218
  });
891
1219
  if (!transcriptData) return null;
892
1220
  return transcriptData;
893
- },
894
- async generateHumanReadableTranscript(transcript2) {
895
- const modifiedTranscript = await generateModifiedTranscript(transcript2);
896
- return modifiedTranscript;
897
1221
  }
898
1222
  });
899
1223
  const services = {