yt-transcript-strapi-plugin 0.0.21 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,10 @@ import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprot
3
3
  import { z } from "zod";
4
4
  import { randomUUID } from "node:crypto";
5
5
  import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
6
- import { TokenTextSplitter } from "@langchain/textsplitters";
7
- import { PromptTemplate } from "@langchain/core/prompts";
8
- import { ChatOpenAI } from "@langchain/openai";
9
6
  import { Innertube } from "youtubei.js";
10
7
  import { ProxyAgent, fetch as fetch$1 } from "undici";
11
8
  const FetchTranscriptSchema = z.object({
12
- videoId: z.string().min(1, "Video ID or URL is required"),
13
- generateReadable: z.boolean().optional().default(false)
9
+ videoId: z.string().min(1, "Video ID or URL is required")
14
10
  });
15
11
  const ListTranscriptsSchema = z.object({
16
12
  page: z.number().int().min(1).optional().default(1),
@@ -18,7 +14,18 @@ const ListTranscriptsSchema = z.object({
18
14
  sort: z.string().optional().default("createdAt:desc")
19
15
  });
20
16
  const GetTranscriptSchema = z.object({
21
- videoId: z.string().min(1, "Video ID is required")
17
+ videoId: z.string().min(1, "Video ID is required"),
18
+ includeFullTranscript: z.boolean().optional().default(false),
19
+ includeTimecodes: z.boolean().optional().default(false),
20
+ startTime: z.number().min(0).optional(),
21
+ endTime: z.number().min(0).optional(),
22
+ chunkIndex: z.number().int().min(0).optional(),
23
+ chunkSize: z.number().int().min(30).optional()
24
+ });
25
+ const SearchTranscriptSchema = z.object({
26
+ videoId: z.string().min(1, "Video ID is required"),
27
+ query: z.string().min(1, "Search query is required"),
28
+ maxResults: z.number().int().min(1).max(20).optional().default(5)
22
29
  });
23
30
  const FindTranscriptsSchema = z.object({
24
31
  query: z.string().optional(),
@@ -33,6 +40,7 @@ const ToolSchemas = {
33
40
  fetch_transcript: FetchTranscriptSchema,
34
41
  list_transcripts: ListTranscriptsSchema,
35
42
  get_transcript: GetTranscriptSchema,
43
+ search_transcript: SearchTranscriptSchema,
36
44
  find_transcripts: FindTranscriptsSchema
37
45
  };
38
46
  function validateToolInput(toolName, input) {
@@ -67,31 +75,64 @@ function extractYouTubeID(urlOrID) {
67
75
  }
68
76
  const fetchTranscriptTool = {
69
77
  name: "fetch_transcript",
70
- description: "Fetch a transcript from YouTube for a given video ID or URL. Optionally generates a human-readable version using AI. The transcript is saved to the database for future retrieval.",
78
+ description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database. Returns metadata and preview only to avoid context overflow. Use get_transcript to retrieve content.",
71
79
  inputSchema: {
72
80
  type: "object",
73
81
  properties: {
74
82
  videoId: {
75
83
  type: "string",
76
84
  description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
77
- },
78
- generateReadable: {
79
- type: "boolean",
80
- description: "If true, uses AI to add punctuation and formatting to make the transcript more readable. Requires OpenAI API key configuration.",
81
- default: false
82
85
  }
83
86
  },
84
87
  required: ["videoId"]
85
88
  }
86
89
  };
87
- async function handleFetchTranscript(strapi2, args) {
90
+ function getVideoDurationMs$1(timecodes) {
91
+ if (!timecodes || timecodes.length === 0) return 0;
92
+ const lastEntry = timecodes[timecodes.length - 1];
93
+ return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
94
+ }
95
+ function formatTime$2(ms) {
96
+ const totalSeconds = Math.floor(ms / 1e3);
97
+ const hours = Math.floor(totalSeconds / 3600);
98
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
99
+ const seconds = totalSeconds % 60;
100
+ if (hours > 0) {
101
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
102
+ }
103
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
104
+ }
105
+ function buildMetadataResponse(transcript2, previewLength, cached) {
106
+ const fullText = transcript2.fullTranscript || "";
107
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
108
+ const durationMs = getVideoDurationMs$1(timecodes);
109
+ const wordCount = fullText.split(/\s+/).length;
110
+ const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
111
+ return {
112
+ message: cached ? "Transcript already exists in database" : "Transcript fetched and saved successfully",
113
+ cached,
114
+ videoId: transcript2.videoId,
115
+ title: transcript2.title,
116
+ metadata: {
117
+ wordCount,
118
+ characterCount: fullText.length,
119
+ duration: formatTime$2(durationMs),
120
+ durationSeconds: Math.floor(durationMs / 1e3)
121
+ },
122
+ preview,
123
+ usage: "Use get_transcript with videoId to retrieve full content, specific time ranges, or paginated chunks."
124
+ };
125
+ }
126
+ async function handleFetchTranscript(strapi, args) {
88
127
  const validatedArgs = validateToolInput("fetch_transcript", args);
89
- const { videoId: videoIdOrUrl, generateReadable } = validatedArgs;
128
+ const { videoId: videoIdOrUrl } = validatedArgs;
129
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
130
+ const previewLength = pluginConfig?.previewLength || 500;
90
131
  const videoId = extractYouTubeID(videoIdOrUrl);
91
132
  if (!videoId) {
92
133
  throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
93
134
  }
94
- const service2 = strapi2.plugin("yt-transcript-strapi-plugin").service("service");
135
+ const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
95
136
  const existingTranscript = await service2.findTranscript(videoId);
96
137
  if (existingTranscript) {
97
138
  return {
@@ -99,11 +140,7 @@ async function handleFetchTranscript(strapi2, args) {
99
140
  {
100
141
  type: "text",
101
142
  text: JSON.stringify(
102
- {
103
- message: "Transcript already exists in database",
104
- data: existingTranscript,
105
- cached: true
106
- },
143
+ buildMetadataResponse(existingTranscript, previewLength, true),
107
144
  null,
108
145
  2
109
146
  )
@@ -121,25 +158,13 @@ async function handleFetchTranscript(strapi2, args) {
121
158
  fullTranscript: transcriptData.fullTranscript,
122
159
  transcriptWithTimeCodes: transcriptData.transcriptWithTimeCodes
123
160
  };
124
- if (generateReadable && transcriptData.fullTranscript) {
125
- try {
126
- const readableTranscript = await service2.generateHumanReadableTranscript(transcriptData.fullTranscript);
127
- payload.readableTranscript = readableTranscript;
128
- } catch (error) {
129
- strapi2.log.warn("[yt-transcript-mcp] Failed to generate readable transcript:", error);
130
- }
131
- }
132
161
  const savedTranscript = await service2.saveTranscript(payload);
133
162
  return {
134
163
  content: [
135
164
  {
136
165
  type: "text",
137
166
  text: JSON.stringify(
138
- {
139
- message: "Transcript fetched and saved successfully",
140
- data: savedTranscript,
141
- cached: false
142
- },
167
+ buildMetadataResponse(savedTranscript, previewLength, false),
143
168
  null,
144
169
  2
145
170
  )
@@ -172,17 +197,17 @@ const listTranscriptsTool = {
172
197
  required: []
173
198
  }
174
199
  };
175
- async function handleListTranscripts(strapi2, args) {
200
+ async function handleListTranscripts(strapi, args) {
176
201
  const validatedArgs = validateToolInput("list_transcripts", args);
177
202
  const { page, pageSize, sort } = validatedArgs;
178
203
  const start = (page - 1) * pageSize;
179
- const transcripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
204
+ const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
180
205
  sort,
181
206
  limit: pageSize,
182
207
  start,
183
208
  fields: ["id", "documentId", "title", "videoId", "createdAt", "updatedAt"]
184
209
  });
185
- const allTranscripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({});
210
+ const allTranscripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({});
186
211
  const total = allTranscripts.length;
187
212
  return {
188
213
  content: [
@@ -207,26 +232,305 @@ async function handleListTranscripts(strapi2, args) {
207
232
  }
208
233
  const getTranscriptTool = {
209
234
  name: "get_transcript",
210
- description: "Get a specific saved transcript by YouTube video ID. Returns the full transcript data including any readable version if available.",
235
+ description: "Get a saved transcript by YouTube video ID. Returns metadata and preview by default. Use parameters to get full content or specific time ranges to avoid context overflow.",
211
236
  inputSchema: {
212
237
  type: "object",
213
238
  properties: {
214
239
  videoId: {
215
240
  type: "string",
216
241
  description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
242
+ },
243
+ includeFullTranscript: {
244
+ type: "boolean",
245
+ description: "Include the complete transcript text. Warning: may cause context overflow for long videos. Default: false",
246
+ default: false
247
+ },
248
+ includeTimecodes: {
249
+ type: "boolean",
250
+ description: "Include the transcript with timecodes array. Warning: significantly increases response size. Default: false",
251
+ default: false
252
+ },
253
+ startTime: {
254
+ type: "number",
255
+ description: "Start time in seconds for fetching a specific portion of the transcript"
256
+ },
257
+ endTime: {
258
+ type: "number",
259
+ description: "End time in seconds for fetching a specific portion of the transcript"
260
+ },
261
+ chunkIndex: {
262
+ type: "number",
263
+ description: "Chunk index (0-based) when paginating through transcript. Use with chunkSize to paginate through long videos."
264
+ },
265
+ chunkSize: {
266
+ type: "number",
267
+ description: "Chunk size in seconds. Overrides config default. Use with chunkIndex for pagination."
217
268
  }
218
269
  },
219
270
  required: ["videoId"]
220
271
  }
221
272
  };
222
- async function handleGetTranscript(strapi2, args) {
273
+ function getTranscriptForTimeRange(timecodes, startTimeMs, endTimeMs) {
274
+ const entries = timecodes.filter(
275
+ (entry) => entry.start >= startTimeMs && entry.start < endTimeMs
276
+ );
277
+ const text = entries.map((e) => e.text).join(" ");
278
+ return { text, entries };
279
+ }
280
+ function getVideoDurationMs(timecodes) {
281
+ if (!timecodes || timecodes.length === 0) return 0;
282
+ const lastEntry = timecodes[timecodes.length - 1];
283
+ return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
284
+ }
285
+ function formatTime$1(ms) {
286
+ const totalSeconds = Math.floor(ms / 1e3);
287
+ const hours = Math.floor(totalSeconds / 3600);
288
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
289
+ const seconds = totalSeconds % 60;
290
+ if (hours > 0) {
291
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
292
+ }
293
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
294
+ }
295
+ async function handleGetTranscript(strapi, args) {
223
296
  const validatedArgs = validateToolInput("get_transcript", args);
224
- const { videoId: videoIdOrUrl } = validatedArgs;
297
+ const {
298
+ videoId: videoIdOrUrl,
299
+ includeFullTranscript,
300
+ includeTimecodes,
301
+ startTime,
302
+ endTime,
303
+ chunkIndex,
304
+ chunkSize: chunkSizeOverride
305
+ } = validatedArgs;
306
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
307
+ const defaultChunkSize = pluginConfig?.chunkSizeSeconds || 300;
308
+ const previewLength = pluginConfig?.previewLength || 500;
309
+ const maxFullTranscriptLength = pluginConfig?.maxFullTranscriptLength || 5e4;
310
+ const chunkSizeSeconds = chunkSizeOverride || defaultChunkSize;
311
+ const videoId = extractYouTubeID(videoIdOrUrl);
312
+ if (!videoId) {
313
+ throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
314
+ }
315
+ const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
316
+ const transcript2 = await service2.findTranscript(videoId);
317
+ if (!transcript2) {
318
+ return {
319
+ content: [
320
+ {
321
+ type: "text",
322
+ text: JSON.stringify(
323
+ {
324
+ error: true,
325
+ message: `No transcript found for video ID: ${videoId}. Use fetch_transcript to fetch it from YouTube first.`,
326
+ videoId
327
+ },
328
+ null,
329
+ 2
330
+ )
331
+ }
332
+ ]
333
+ };
334
+ }
335
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
336
+ const fullText = transcript2.fullTranscript || "";
337
+ const durationMs = getVideoDurationMs(timecodes);
338
+ const totalChunks = Math.ceil(durationMs / (chunkSizeSeconds * 1e3));
339
+ const wordCount = fullText.split(/\s+/).length;
340
+ const response = {
341
+ videoId: transcript2.videoId,
342
+ title: transcript2.title,
343
+ metadata: {
344
+ wordCount,
345
+ characterCount: fullText.length,
346
+ duration: formatTime$1(durationMs),
347
+ durationSeconds: Math.floor(durationMs / 1e3),
348
+ totalChunks,
349
+ chunkSizeSeconds
350
+ }
351
+ };
352
+ if (startTime !== void 0 || endTime !== void 0) {
353
+ const startMs = (startTime || 0) * 1e3;
354
+ const endMs = endTime !== void 0 ? endTime * 1e3 : durationMs;
355
+ const { text, entries } = getTranscriptForTimeRange(timecodes, startMs, endMs);
356
+ response.timeRange = {
357
+ startTime: startTime || 0,
358
+ endTime: endTime || Math.floor(durationMs / 1e3),
359
+ startFormatted: formatTime$1(startMs),
360
+ endFormatted: formatTime$1(endMs)
361
+ };
362
+ response.transcript = text;
363
+ if (includeTimecodes) {
364
+ response.transcriptWithTimeCodes = entries;
365
+ }
366
+ } else if (chunkIndex !== void 0) {
367
+ const chunkStartMs = chunkIndex * chunkSizeSeconds * 1e3;
368
+ const chunkEndMs = Math.min((chunkIndex + 1) * chunkSizeSeconds * 1e3, durationMs);
369
+ if (chunkStartMs >= durationMs) {
370
+ response.error = `Chunk index ${chunkIndex} is out of range. Total chunks: ${totalChunks} (0-${totalChunks - 1})`;
371
+ } else {
372
+ const { text, entries } = getTranscriptForTimeRange(timecodes, chunkStartMs, chunkEndMs);
373
+ response.chunk = {
374
+ index: chunkIndex,
375
+ totalChunks,
376
+ startTime: Math.floor(chunkStartMs / 1e3),
377
+ endTime: Math.floor(chunkEndMs / 1e3),
378
+ startFormatted: formatTime$1(chunkStartMs),
379
+ endFormatted: formatTime$1(chunkEndMs)
380
+ };
381
+ response.transcript = text;
382
+ if (includeTimecodes) {
383
+ response.transcriptWithTimeCodes = entries;
384
+ }
385
+ if (chunkIndex < totalChunks - 1) {
386
+ response.nextChunk = `Use chunkIndex: ${chunkIndex + 1} to get the next portion`;
387
+ }
388
+ if (chunkIndex > 0) {
389
+ response.previousChunk = `Use chunkIndex: ${chunkIndex - 1} to get the previous portion`;
390
+ }
391
+ }
392
+ } else if (includeFullTranscript || fullText.length <= maxFullTranscriptLength) {
393
+ response.transcript = fullText;
394
+ if (includeTimecodes) {
395
+ response.transcriptWithTimeCodes = timecodes;
396
+ }
397
+ if (includeFullTranscript && fullText.length > maxFullTranscriptLength) {
398
+ response.warning = "Full transcript included. For long videos, consider using chunkIndex, startTime/endTime, or search_transcript to reduce response size.";
399
+ } else if (fullText.length <= maxFullTranscriptLength) {
400
+ response.note = "Full transcript auto-loaded (fits within context limit).";
401
+ }
402
+ } else {
403
+ const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
404
+ response.preview = preview;
405
+ response.isLargeTranscript = true;
406
+ response.usage = {
407
+ fullTranscript: "Set includeFullTranscript: true to get complete text (warning: may exceed context)",
408
+ search: "Use search_transcript to find relevant portions by keyword (recommended for large transcripts)",
409
+ timeRange: "Use startTime and endTime (in seconds) to get a specific portion",
410
+ pagination: `Use chunkIndex (0-${totalChunks - 1}) to paginate through ${chunkSizeSeconds}s chunks`
411
+ };
412
+ }
413
+ return {
414
+ content: [
415
+ {
416
+ type: "text",
417
+ text: JSON.stringify(response, null, 2)
418
+ }
419
+ ]
420
+ };
421
+ }
422
+ const searchTranscriptTool = {
423
+ name: "search_transcript",
424
+ description: "Search within a saved transcript using BM25 scoring. Returns the most relevant segments matching your query with timestamps. Use this to find specific content in long videos without loading the entire transcript.",
425
+ inputSchema: {
426
+ type: "object",
427
+ properties: {
428
+ videoId: {
429
+ type: "string",
430
+ description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
431
+ },
432
+ query: {
433
+ type: "string",
434
+ description: "Search query - keywords or phrases to find in the transcript"
435
+ },
436
+ maxResults: {
437
+ type: "number",
438
+ description: "Maximum number of results to return (default: 5, max: 20)",
439
+ default: 5
440
+ }
441
+ },
442
+ required: ["videoId", "query"]
443
+ }
444
+ };
445
+ function tokenize(text) {
446
+ return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
447
+ }
448
+ function calculateIDF(segments, vocabulary) {
449
+ const idf = /* @__PURE__ */ new Map();
450
+ const N = segments.length;
451
+ for (const term of vocabulary) {
452
+ const docsWithTerm = segments.filter(
453
+ (seg) => tokenize(seg.text).includes(term)
454
+ ).length;
455
+ idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
456
+ }
457
+ return idf;
458
+ }
459
+ function bm25Score(segmentTokens, queryTokens, idf, avgDocLength, k1 = 1.5, b = 0.75) {
460
+ const docLength = segmentTokens.length;
461
+ let score = 0;
462
+ const tf = /* @__PURE__ */ new Map();
463
+ for (const token of segmentTokens) {
464
+ tf.set(token, (tf.get(token) || 0) + 1);
465
+ }
466
+ for (const term of queryTokens) {
467
+ const termFreq = tf.get(term) || 0;
468
+ const termIdf = idf.get(term) || 0;
469
+ if (termFreq > 0) {
470
+ const numerator = termFreq * (k1 + 1);
471
+ const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
472
+ score += termIdf * (numerator / denominator);
473
+ }
474
+ }
475
+ return score;
476
+ }
477
+ function formatTime(ms) {
478
+ const totalSeconds = Math.floor(ms / 1e3);
479
+ const hours = Math.floor(totalSeconds / 3600);
480
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
481
+ const seconds = totalSeconds % 60;
482
+ if (hours > 0) {
483
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
484
+ }
485
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
486
+ }
487
+ function createSegments(timecodes, segmentDurationMs) {
488
+ if (!timecodes || timecodes.length === 0) return [];
489
+ const segments = [];
490
+ let currentSegment = [];
491
+ let segmentStartTime = timecodes[0].start;
492
+ for (const entry of timecodes) {
493
+ const segmentEndTime = segmentStartTime + segmentDurationMs;
494
+ if (entry.start < segmentEndTime) {
495
+ currentSegment.push(entry);
496
+ } else {
497
+ if (currentSegment.length > 0) {
498
+ const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
499
+ segments.push({
500
+ text: currentSegment.map((e) => e.text).join(" "),
501
+ startTime: Math.floor(segmentStartTime / 1e3),
502
+ endTime: Math.floor(endTime / 1e3),
503
+ startFormatted: formatTime(segmentStartTime),
504
+ endFormatted: formatTime(endTime)
505
+ });
506
+ }
507
+ segmentStartTime = entry.start;
508
+ currentSegment = [entry];
509
+ }
510
+ }
511
+ if (currentSegment.length > 0) {
512
+ const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
513
+ segments.push({
514
+ text: currentSegment.map((e) => e.text).join(" "),
515
+ startTime: Math.floor(segmentStartTime / 1e3),
516
+ endTime: Math.floor(endTime / 1e3),
517
+ startFormatted: formatTime(segmentStartTime),
518
+ endFormatted: formatTime(endTime)
519
+ });
520
+ }
521
+ return segments;
522
+ }
523
+ async function handleSearchTranscript(strapi, args) {
524
+ const validatedArgs = validateToolInput("search_transcript", args);
525
+ const { videoId: videoIdOrUrl, query, maxResults: maxResultsInput } = validatedArgs;
526
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
527
+ const segmentSeconds = pluginConfig?.searchSegmentSeconds || 30;
528
+ const maxResults = Math.min(Math.max(maxResultsInput || 5, 1), 20);
225
529
  const videoId = extractYouTubeID(videoIdOrUrl);
226
530
  if (!videoId) {
227
531
  throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
228
532
  }
229
- const service2 = strapi2.plugin("yt-transcript-strapi-plugin").service("service");
533
+ const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
230
534
  const transcript2 = await service2.findTranscript(videoId);
231
535
  if (!transcript2) {
232
536
  return {
@@ -246,13 +550,90 @@ async function handleGetTranscript(strapi2, args) {
246
550
  ]
247
551
  };
248
552
  }
553
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
554
+ if (timecodes.length === 0) {
555
+ return {
556
+ content: [
557
+ {
558
+ type: "text",
559
+ text: JSON.stringify(
560
+ {
561
+ error: true,
562
+ message: "Transcript has no timecode data for searching.",
563
+ videoId
564
+ },
565
+ null,
566
+ 2
567
+ )
568
+ }
569
+ ]
570
+ };
571
+ }
572
+ const segments = createSegments(timecodes, segmentSeconds * 1e3);
573
+ if (segments.length === 0) {
574
+ return {
575
+ content: [
576
+ {
577
+ type: "text",
578
+ text: JSON.stringify(
579
+ {
580
+ error: true,
581
+ message: "Could not create searchable segments from transcript.",
582
+ videoId
583
+ },
584
+ null,
585
+ 2
586
+ )
587
+ }
588
+ ]
589
+ };
590
+ }
591
+ const queryTokens = tokenize(query);
592
+ if (queryTokens.length === 0) {
593
+ return {
594
+ content: [
595
+ {
596
+ type: "text",
597
+ text: JSON.stringify(
598
+ {
599
+ error: true,
600
+ message: "Query is empty or contains only stop words.",
601
+ query
602
+ },
603
+ null,
604
+ 2
605
+ )
606
+ }
607
+ ]
608
+ };
609
+ }
610
+ const vocabulary = new Set(queryTokens);
611
+ const idf = calculateIDF(segments, vocabulary);
612
+ const avgDocLength = segments.reduce((sum, seg) => sum + tokenize(seg.text).length, 0) / segments.length;
613
+ const scoredSegments = segments.map((segment) => ({
614
+ ...segment,
615
+ score: bm25Score(tokenize(segment.text), queryTokens, idf, avgDocLength)
616
+ }));
617
+ const results = scoredSegments.filter((seg) => seg.score > 0).sort((a, b) => b.score - a.score).slice(0, maxResults);
249
618
  return {
250
619
  content: [
251
620
  {
252
621
  type: "text",
253
622
  text: JSON.stringify(
254
623
  {
255
- data: transcript2
624
+ videoId: transcript2.videoId,
625
+ title: transcript2.title,
626
+ query,
627
+ totalSegments: segments.length,
628
+ matchingResults: results.length,
629
+ results: results.map((r) => ({
630
+ text: r.text,
631
+ startTime: r.startTime,
632
+ endTime: r.endTime,
633
+ timeRange: `${r.startFormatted} - ${r.endFormatted}`,
634
+ score: Math.round(r.score * 100) / 100
635
+ })),
636
+ usage: results.length > 0 ? `Use get_transcript with startTime: ${results[0].startTime} and endTime: ${results[0].endTime} to get full context for the top result.` : "No matches found. Try different keywords."
256
637
  },
257
638
  null,
258
639
  2
@@ -311,11 +692,10 @@ function truncateText(text, maxLength) {
311
692
  function truncateTranscripts(transcripts) {
312
693
  return transcripts.map((transcript2) => ({
313
694
  ...transcript2,
314
- fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH),
315
- readableTranscript: truncateText(transcript2.readableTranscript, TRANSCRIPT_PREVIEW_LENGTH)
695
+ fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH)
316
696
  }));
317
697
  }
318
- async function handleFindTranscripts(strapi2, args) {
698
+ async function handleFindTranscripts(strapi, args) {
319
699
  const validatedArgs = validateToolInput("find_transcripts", args);
320
700
  const { query, videoId, title, includeFullContent, page, pageSize, sort } = validatedArgs;
321
701
  const start = (page - 1) * pageSize;
@@ -330,17 +710,16 @@ async function handleFindTranscripts(strapi2, args) {
330
710
  filters.$or = [
331
711
  { title: { $containsi: query } },
332
712
  { videoId: { $containsi: query } },
333
- { fullTranscript: { $containsi: query } },
334
- { readableTranscript: { $containsi: query } }
713
+ { fullTranscript: { $containsi: query } }
335
714
  ];
336
715
  }
337
- const transcripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
716
+ const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
338
717
  filters,
339
718
  sort,
340
719
  limit: pageSize,
341
720
  start
342
721
  });
343
- const allMatching = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
722
+ const allMatching = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
344
723
  filters
345
724
  });
346
725
  const total = allMatching.length;
@@ -376,15 +755,17 @@ const tools = [
376
755
  fetchTranscriptTool,
377
756
  listTranscriptsTool,
378
757
  getTranscriptTool,
758
+ searchTranscriptTool,
379
759
  findTranscriptsTool
380
760
  ];
381
761
  const toolHandlers = {
382
762
  fetch_transcript: handleFetchTranscript,
383
763
  list_transcripts: handleListTranscripts,
384
764
  get_transcript: handleGetTranscript,
765
+ search_transcript: handleSearchTranscript,
385
766
  find_transcripts: handleFindTranscripts
386
767
  };
387
- async function handleToolCall(strapi2, request) {
768
+ async function handleToolCall(strapi, request) {
388
769
  const { name, arguments: args } = request.params;
389
770
  const handler = toolHandlers[name];
390
771
  if (!handler) {
@@ -392,13 +773,13 @@ async function handleToolCall(strapi2, request) {
392
773
  }
393
774
  const startTime = Date.now();
394
775
  try {
395
- const result = await handler(strapi2, args || {});
776
+ const result = await handler(strapi, args || {});
396
777
  const duration = Date.now() - startTime;
397
- strapi2.log.debug(`[yt-transcript-mcp] Tool ${name} executed successfully in ${duration}ms`);
778
+ strapi.log.debug(`[yt-transcript-mcp] Tool ${name} executed successfully in ${duration}ms`);
398
779
  return result;
399
780
  } catch (error) {
400
781
  const duration = Date.now() - startTime;
401
- strapi2.log.error(`[yt-transcript-mcp] Tool ${name} failed after ${duration}ms`, {
782
+ strapi.log.error(`[yt-transcript-mcp] Tool ${name} failed after ${duration}ms`, {
402
783
  error: error instanceof Error ? error.message : String(error)
403
784
  });
404
785
  return {
@@ -419,7 +800,7 @@ async function handleToolCall(strapi2, request) {
419
800
  };
420
801
  }
421
802
  }
422
- function createMcpServer(strapi2) {
803
+ function createMcpServer(strapi) {
423
804
  const server = new Server(
424
805
  {
425
806
  name: "yt-transcript-mcp",
@@ -432,53 +813,57 @@ function createMcpServer(strapi2) {
432
813
  }
433
814
  );
434
815
  server.setRequestHandler(ListToolsRequestSchema, async () => {
435
- strapi2.log.debug("[yt-transcript-mcp] Listing tools");
816
+ strapi.log.debug("[yt-transcript-mcp] Listing tools");
436
817
  return { tools };
437
818
  });
438
819
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
439
- strapi2.log.debug(`[yt-transcript-mcp] Tool call: ${request.params.name}`);
440
- return handleToolCall(strapi2, request);
820
+ strapi.log.debug(`[yt-transcript-mcp] Tool call: ${request.params.name}`);
821
+ return handleToolCall(strapi, request);
441
822
  });
442
- strapi2.log.info("[yt-transcript-mcp] MCP server created with tools:", {
823
+ strapi.log.info("[yt-transcript-mcp] MCP server created with tools:", {
443
824
  tools: tools.map((t) => t.name)
444
825
  });
445
826
  return server;
446
827
  }
447
- const bootstrap = async ({ strapi: strapi2 }) => {
448
- const plugin = strapi2.plugin("yt-transcript-strapi-plugin");
449
- plugin.createMcpServer = () => createMcpServer(strapi2);
828
+ const bootstrap = async ({ strapi }) => {
829
+ const plugin = strapi.plugin("yt-transcript-strapi-plugin");
830
+ plugin.createMcpServer = () => createMcpServer(strapi);
450
831
  plugin.sessions = /* @__PURE__ */ new Map();
451
- strapi2.log.info("[yt-transcript-mcp] MCP plugin initialized");
452
- strapi2.log.info("[yt-transcript-mcp] MCP endpoint available at: /api/yt-transcript-strapi-plugin/mcp");
832
+ strapi.log.info("[yt-transcript-mcp] MCP plugin initialized");
833
+ strapi.log.info("[yt-transcript-mcp] MCP endpoint available at: /api/yt-transcript-strapi-plugin/mcp");
453
834
  };
454
- const destroy = ({ strapi: strapi2 }) => {
835
+ const destroy = ({ strapi }) => {
455
836
  };
456
- const register = ({ strapi: strapi2 }) => {
837
+ const register = ({ strapi }) => {
457
838
  };
458
839
  const config = {
459
840
  default: {
460
- openAIApiKey: "",
461
- model: "gpt-4o-mini",
462
- temp: 0.7,
463
- maxTokens: 4096,
464
- proxyUrl: ""
841
+ proxyUrl: "",
465
842
  // Optional: HTTP/HTTPS proxy for YouTube requests (e.g., 'http://user:pass@proxy.example.com:8080')
843
+ chunkSizeSeconds: 300,
844
+ // Default chunk size for transcript pagination (5 minutes)
845
+ previewLength: 500,
846
+ // Default preview length in characters
847
+ maxFullTranscriptLength: 5e4,
848
+ // Auto-load full transcript if under this character count (~12K tokens)
849
+ searchSegmentSeconds: 30
850
+ // Segment size for BM25 search scoring
466
851
  },
467
852
  validator(config2) {
468
- if (config2.openAIApiKey && typeof config2.openAIApiKey !== "string") {
469
- throw new Error("openAIApiKey must be a string");
853
+ if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
854
+ throw new Error("proxyUrl must be a string");
470
855
  }
471
- if (config2.model && typeof config2.model !== "string") {
472
- throw new Error("model must be a string");
856
+ if (config2.chunkSizeSeconds !== void 0 && (typeof config2.chunkSizeSeconds !== "number" || config2.chunkSizeSeconds < 30)) {
857
+ throw new Error("chunkSizeSeconds must be a number >= 30");
473
858
  }
474
- if (config2.temp !== void 0 && (typeof config2.temp !== "number" || config2.temp < 0 || config2.temp > 2)) {
475
- throw new Error("temp must be a number between 0 and 2");
859
+ if (config2.previewLength !== void 0 && (typeof config2.previewLength !== "number" || config2.previewLength < 100)) {
860
+ throw new Error("previewLength must be a number >= 100");
476
861
  }
477
- if (config2.maxTokens !== void 0 && (typeof config2.maxTokens !== "number" || config2.maxTokens < 1)) {
478
- throw new Error("maxTokens must be a positive number");
862
+ if (config2.maxFullTranscriptLength !== void 0 && (typeof config2.maxFullTranscriptLength !== "number" || config2.maxFullTranscriptLength < 1e3)) {
863
+ throw new Error("maxFullTranscriptLength must be a number >= 1000");
479
864
  }
480
- if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
481
- throw new Error("proxyUrl must be a string");
865
+ if (config2.searchSegmentSeconds !== void 0 && (typeof config2.searchSegmentSeconds !== "number" || config2.searchSegmentSeconds < 10)) {
866
+ throw new Error("searchSegmentSeconds must be a number >= 10");
482
867
  }
483
868
  }
484
869
  };
@@ -512,9 +897,6 @@ const attributes = {
512
897
  },
513
898
  transcriptWithTimeCodes: {
514
899
  type: "json"
515
- },
516
- readableTranscript: {
517
- type: "richtext"
518
900
  }
519
901
  };
520
902
  const schema = {
@@ -531,41 +913,34 @@ const transcript = {
531
913
  const contentTypes = {
532
914
  transcript
533
915
  };
534
- const controller = ({ strapi: strapi2 }) => ({
916
+ const controller = ({ strapi }) => ({
535
917
  async getTranscript(ctx) {
536
918
  const videoId = extractYouTubeID(ctx.params.videoId);
537
919
  if (!videoId) {
538
920
  return ctx.body = { error: "Invalid YouTube URL or ID", data: null };
539
921
  }
540
- const found = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").findTranscript(videoId);
922
+ const found = await strapi.plugin("yt-transcript-strapi-plugin").service("service").findTranscript(videoId);
541
923
  if (found) {
542
924
  return ctx.body = { data: found };
543
925
  }
544
- const transcriptData = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").getTranscript(videoId);
545
- let readableTranscript = null;
546
- try {
547
- readableTranscript = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").generateHumanReadableTranscript(transcriptData.fullTranscript);
548
- } catch (error) {
549
- strapi2.log.debug("[yt-transcript] Readable transcript generation skipped");
550
- }
926
+ const transcriptData = await strapi.plugin("yt-transcript-strapi-plugin").service("service").getTranscript(videoId);
551
927
  const payload = {
552
928
  videoId,
553
929
  title: transcriptData?.title || "No title found",
554
930
  fullTranscript: transcriptData?.fullTranscript,
555
- transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes,
556
- readableTranscript
931
+ transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes
557
932
  };
558
- const transcript2 = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").saveTranscript(payload);
933
+ const transcript2 = await strapi.plugin("yt-transcript-strapi-plugin").service("service").saveTranscript(payload);
559
934
  ctx.body = { data: transcript2 };
560
935
  }
561
936
  });
562
- const mcpController = ({ strapi: strapi2 }) => ({
937
+ const mcpController = ({ strapi }) => ({
563
938
  /**
564
939
  * Handle MCP requests (POST, GET, DELETE)
565
940
  * Creates a new server+transport per session for proper isolation
566
941
  */
567
942
  async handle(ctx) {
568
- const plugin = strapi2.plugin("yt-transcript-strapi-plugin");
943
+ const plugin = strapi.plugin("yt-transcript-strapi-plugin");
569
944
  if (!plugin.createMcpServer) {
570
945
  ctx.status = 503;
571
946
  ctx.body = {
@@ -585,12 +960,12 @@ const mcpController = ({ strapi: strapi2 }) => ({
585
960
  await server.connect(transport);
586
961
  session = { server, transport, createdAt: Date.now() };
587
962
  plugin.sessions.set(sessionId, session);
588
- strapi2.log.debug(`[yt-transcript-mcp] New session created: ${sessionId}`);
963
+ strapi.log.debug(`[yt-transcript-mcp] New session created: ${sessionId}`);
589
964
  }
590
965
  await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
591
966
  ctx.respond = false;
592
967
  } catch (error) {
593
- strapi2.log.error("[yt-transcript-mcp] Error handling MCP request", {
968
+ strapi.log.error("[yt-transcript-mcp] Error handling MCP request", {
594
969
  error: error instanceof Error ? error.message : String(error),
595
970
  method: ctx.method,
596
971
  path: ctx.path
@@ -670,18 +1045,6 @@ const routes = {
670
1045
  routes: [...admin]
671
1046
  }
672
1047
  };
673
- async function initializeModel({
674
- openAIApiKey,
675
- model,
676
- temp
677
- }) {
678
- return new ChatOpenAI({
679
- temperature: temp,
680
- openAIApiKey,
681
- modelName: model,
682
- maxTokens: 1e3
683
- });
684
- }
685
1048
  function isRequestLike(input) {
686
1049
  return typeof input === "object" && input !== null && "url" in input && typeof input.url === "string" && "method" in input;
687
1050
  }
@@ -824,49 +1187,14 @@ const fetchTranscript = async (videoId, options2) => {
824
1187
  );
825
1188
  }
826
1189
  };
827
- async function processTextChunks(chunks, model) {
828
- const punctuationPrompt = PromptTemplate.fromTemplate(
829
- "Add proper punctuation and capitalization to the following text chunk:\n\n{chunk}"
830
- );
831
- const punctuationChain = punctuationPrompt.pipe(model);
832
- const processedChunks = await Promise.all(
833
- chunks.map(async (chunk) => {
834
- const result = await punctuationChain.invoke({ chunk });
835
- return result.content;
836
- })
837
- );
838
- return processedChunks.join(" ");
839
- }
840
- async function generateModifiedTranscript(rawTranscript) {
841
- const pluginSettings = await strapi.config.get(
842
- "plugin::yt-transcript-strapi-plugin"
843
- );
844
- if (!pluginSettings.openAIApiKey || !pluginSettings.model || !pluginSettings.temp || !pluginSettings.maxTokens) {
845
- throw new Error("Missing required configuration for YTTranscript");
846
- }
847
- const chatModel = await initializeModel({
848
- openAIApiKey: pluginSettings.openAIApiKey,
849
- model: pluginSettings.model,
850
- temp: pluginSettings.temp,
851
- maxTokens: pluginSettings.maxTokens
852
- });
853
- const splitter = new TokenTextSplitter({
854
- chunkSize: 1e3,
855
- chunkOverlap: 200
856
- });
857
- const transcriptChunks = await splitter.createDocuments([rawTranscript]);
858
- const chunkTexts = transcriptChunks.map((chunk) => chunk.pageContent);
859
- const modifiedTranscript = await processTextChunks(chunkTexts, chatModel);
860
- return modifiedTranscript;
861
- }
862
- const service = ({ strapi: strapi2 }) => ({
1190
+ const service = ({ strapi }) => ({
863
1191
  async getTranscript(identifier) {
864
1192
  const youtubeIdRegex = /^[a-zA-Z0-9_-]{11}$/;
865
1193
  const isValid = youtubeIdRegex.test(identifier);
866
1194
  if (!isValid) {
867
1195
  return { error: "Invalid video ID", data: null };
868
1196
  }
869
- const pluginSettings = await strapi2.config.get(
1197
+ const pluginSettings = await strapi.config.get(
870
1198
  "plugin::yt-transcript-strapi-plugin"
871
1199
  );
872
1200
  const transcriptData = await fetchTranscript(identifier, {
@@ -879,20 +1207,16 @@ const service = ({ strapi: strapi2 }) => ({
879
1207
  };
880
1208
  },
881
1209
  async saveTranscript(payload) {
882
- return await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").create({
1210
+ return await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").create({
883
1211
  data: payload
884
1212
  });
885
1213
  },
886
1214
  async findTranscript(videoId) {
887
- const transcriptData = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findFirst({
1215
+ const transcriptData = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findFirst({
888
1216
  filters: { videoId }
889
1217
  });
890
1218
  if (!transcriptData) return null;
891
1219
  return transcriptData;
892
- },
893
- async generateHumanReadableTranscript(transcript2) {
894
- const modifiedTranscript = await generateModifiedTranscript(transcript2);
895
- return modifiedTranscript;
896
1220
  }
897
1221
  });
898
1222
  const services = {