yt-transcript-strapi-plugin 0.0.22 → 0.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/server/index.js +437 -36
  2. package/dist/server/index.mjs +437 -36
  3. package/dist/server/src/config/index.d.ts +8 -8
  4. package/dist/server/src/index.d.ts +10 -19
  5. package/dist/server/src/mcp/schemas/index.d.ts +63 -0
  6. package/dist/server/src/mcp/tools/get-transcript.d.ts +26 -0
  7. package/dist/server/src/mcp/tools/search-transcript.d.ts +30 -0
  8. package/dist/server/src/routes/content-api.d.ts +2 -11
  9. package/dist/server/src/routes/index.d.ts +2 -11
  10. package/node_modules/which/CHANGELOG.md +166 -0
  11. package/package.json +7 -2
  12. package/node_modules/express/node_modules/media-typer/HISTORY.md +0 -50
  13. package/node_modules/express/node_modules/media-typer/LICENSE +0 -22
  14. package/node_modules/express/node_modules/media-typer/README.md +0 -93
  15. package/node_modules/express/node_modules/media-typer/index.js +0 -143
  16. package/node_modules/express/node_modules/media-typer/package.json +0 -33
  17. package/node_modules/express/node_modules/type-is/HISTORY.md +0 -292
  18. package/node_modules/express/node_modules/type-is/LICENSE +0 -23
  19. package/node_modules/express/node_modules/type-is/README.md +0 -198
  20. package/node_modules/express/node_modules/type-is/index.js +0 -250
  21. package/node_modules/express/node_modules/type-is/package.json +0 -47
  22. /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/HISTORY.md +0 -0
  23. /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/LICENSE +0 -0
  24. /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/README.md +0 -0
  25. /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/index.js +0 -0
  26. /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/package.json +0 -0
  27. /package/node_modules/{body-parser/node_modules/type-is → type-is}/HISTORY.md +0 -0
  28. /package/node_modules/{body-parser/node_modules/type-is → type-is}/LICENSE +0 -0
  29. /package/node_modules/{body-parser/node_modules/type-is → type-is}/README.md +0 -0
  30. /package/node_modules/{body-parser/node_modules/type-is → type-is}/index.js +0 -0
  31. /package/node_modules/{body-parser → type-is}/node_modules/mime-types/HISTORY.md +0 -0
  32. /package/node_modules/{body-parser → type-is}/node_modules/mime-types/LICENSE +0 -0
  33. /package/node_modules/{body-parser → type-is}/node_modules/mime-types/README.md +0 -0
  34. /package/node_modules/{body-parser → type-is}/node_modules/mime-types/index.js +0 -0
  35. /package/node_modules/{body-parser → type-is}/node_modules/mime-types/mimeScore.js +0 -0
  36. /package/node_modules/{body-parser → type-is}/node_modules/mime-types/package.json +0 -0
  37. /package/node_modules/{body-parser/node_modules/type-is → type-is}/package.json +0 -0
@@ -15,7 +15,18 @@ const ListTranscriptsSchema = zod.z.object({
15
15
  sort: zod.z.string().optional().default("createdAt:desc")
16
16
  });
17
17
  const GetTranscriptSchema = zod.z.object({
18
- videoId: zod.z.string().min(1, "Video ID is required")
18
+ videoId: zod.z.string().min(1, "Video ID is required"),
19
+ includeFullTranscript: zod.z.boolean().optional().default(false),
20
+ includeTimecodes: zod.z.boolean().optional().default(false),
21
+ startTime: zod.z.number().min(0).optional(),
22
+ endTime: zod.z.number().min(0).optional(),
23
+ chunkIndex: zod.z.number().int().min(0).optional(),
24
+ chunkSize: zod.z.number().int().min(30).optional()
25
+ });
26
+ const SearchTranscriptSchema = zod.z.object({
27
+ videoId: zod.z.string().min(1, "Video ID is required"),
28
+ query: zod.z.string().min(1, "Search query is required"),
29
+ maxResults: zod.z.number().int().min(1).max(20).optional().default(5)
19
30
  });
20
31
  const FindTranscriptsSchema = zod.z.object({
21
32
  query: zod.z.string().optional(),
@@ -30,6 +41,7 @@ const ToolSchemas = {
30
41
  fetch_transcript: FetchTranscriptSchema,
31
42
  list_transcripts: ListTranscriptsSchema,
32
43
  get_transcript: GetTranscriptSchema,
44
+ search_transcript: SearchTranscriptSchema,
33
45
  find_transcripts: FindTranscriptsSchema
34
46
  };
35
47
  function validateToolInput(toolName, input) {
@@ -64,7 +76,7 @@ function extractYouTubeID(urlOrID) {
64
76
  }
65
77
  const fetchTranscriptTool = {
66
78
  name: "fetch_transcript",
67
- description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database for future retrieval.",
79
+ description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database. Returns metadata and preview only to avoid context overflow. Use get_transcript to retrieve content.",
68
80
  inputSchema: {
69
81
  type: "object",
70
82
  properties: {
@@ -76,9 +88,47 @@ const fetchTranscriptTool = {
76
88
  required: ["videoId"]
77
89
  }
78
90
  };
91
+ function getVideoDurationMs$1(timecodes) {
92
+ if (!timecodes || timecodes.length === 0) return 0;
93
+ const lastEntry = timecodes[timecodes.length - 1];
94
+ return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
95
+ }
96
+ function formatTime$2(ms) {
97
+ const totalSeconds = Math.floor(ms / 1e3);
98
+ const hours = Math.floor(totalSeconds / 3600);
99
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
100
+ const seconds = totalSeconds % 60;
101
+ if (hours > 0) {
102
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
103
+ }
104
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
105
+ }
106
+ function buildMetadataResponse(transcript2, previewLength, cached) {
107
+ const fullText = transcript2.fullTranscript || "";
108
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
109
+ const durationMs = getVideoDurationMs$1(timecodes);
110
+ const wordCount = fullText.split(/\s+/).length;
111
+ const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
112
+ return {
113
+ message: cached ? "Transcript already exists in database" : "Transcript fetched and saved successfully",
114
+ cached,
115
+ videoId: transcript2.videoId,
116
+ title: transcript2.title,
117
+ metadata: {
118
+ wordCount,
119
+ characterCount: fullText.length,
120
+ duration: formatTime$2(durationMs),
121
+ durationSeconds: Math.floor(durationMs / 1e3)
122
+ },
123
+ preview,
124
+ usage: "Use get_transcript with videoId to retrieve full content, specific time ranges, or paginated chunks."
125
+ };
126
+ }
79
127
  async function handleFetchTranscript(strapi, args) {
80
128
  const validatedArgs = validateToolInput("fetch_transcript", args);
81
129
  const { videoId: videoIdOrUrl } = validatedArgs;
130
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
131
+ const previewLength = pluginConfig?.previewLength || 500;
82
132
  const videoId = extractYouTubeID(videoIdOrUrl);
83
133
  if (!videoId) {
84
134
  throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
@@ -91,11 +141,7 @@ async function handleFetchTranscript(strapi, args) {
91
141
  {
92
142
  type: "text",
93
143
  text: JSON.stringify(
94
- {
95
- message: "Transcript already exists in database",
96
- data: existingTranscript,
97
- cached: true
98
- },
144
+ buildMetadataResponse(existingTranscript, previewLength, true),
99
145
  null,
100
146
  2
101
147
  )
@@ -119,11 +165,7 @@ async function handleFetchTranscript(strapi, args) {
119
165
  {
120
166
  type: "text",
121
167
  text: JSON.stringify(
122
- {
123
- message: "Transcript fetched and saved successfully",
124
- data: savedTranscript,
125
- cached: false
126
- },
168
+ buildMetadataResponse(savedTranscript, previewLength, false),
127
169
  null,
128
170
  2
129
171
  )
@@ -191,21 +233,82 @@ async function handleListTranscripts(strapi, args) {
191
233
  }
192
234
  const getTranscriptTool = {
193
235
  name: "get_transcript",
194
- description: "Get a specific saved transcript by YouTube video ID. Returns the full transcript data including any readable version if available.",
236
+ description: "Get a saved transcript by YouTube video ID. Returns metadata and preview by default. Use parameters to get full content or specific time ranges to avoid context overflow.",
195
237
  inputSchema: {
196
238
  type: "object",
197
239
  properties: {
198
240
  videoId: {
199
241
  type: "string",
200
242
  description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
243
+ },
244
+ includeFullTranscript: {
245
+ type: "boolean",
246
+ description: "Include the complete transcript text. Warning: may cause context overflow for long videos. Default: false",
247
+ default: false
248
+ },
249
+ includeTimecodes: {
250
+ type: "boolean",
251
+ description: "Include the transcript with timecodes array. Warning: significantly increases response size. Default: false",
252
+ default: false
253
+ },
254
+ startTime: {
255
+ type: "number",
256
+ description: "Start time in seconds for fetching a specific portion of the transcript"
257
+ },
258
+ endTime: {
259
+ type: "number",
260
+ description: "End time in seconds for fetching a specific portion of the transcript"
261
+ },
262
+ chunkIndex: {
263
+ type: "number",
264
+ description: "Chunk index (0-based) when paginating through transcript. Use with chunkSize to paginate through long videos."
265
+ },
266
+ chunkSize: {
267
+ type: "number",
268
+ description: "Chunk size in seconds. Overrides config default. Use with chunkIndex for pagination."
201
269
  }
202
270
  },
203
271
  required: ["videoId"]
204
272
  }
205
273
  };
274
+ function getTranscriptForTimeRange(timecodes, startTimeMs, endTimeMs) {
275
+ const entries = timecodes.filter(
276
+ (entry) => entry.start >= startTimeMs && entry.start < endTimeMs
277
+ );
278
+ const text = entries.map((e) => e.text).join(" ");
279
+ return { text, entries };
280
+ }
281
+ function getVideoDurationMs(timecodes) {
282
+ if (!timecodes || timecodes.length === 0) return 0;
283
+ const lastEntry = timecodes[timecodes.length - 1];
284
+ return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
285
+ }
286
+ function formatTime$1(ms) {
287
+ const totalSeconds = Math.floor(ms / 1e3);
288
+ const hours = Math.floor(totalSeconds / 3600);
289
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
290
+ const seconds = totalSeconds % 60;
291
+ if (hours > 0) {
292
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
293
+ }
294
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
295
+ }
206
296
  async function handleGetTranscript(strapi, args) {
207
297
  const validatedArgs = validateToolInput("get_transcript", args);
208
- const { videoId: videoIdOrUrl } = validatedArgs;
298
+ const {
299
+ videoId: videoIdOrUrl,
300
+ includeFullTranscript,
301
+ includeTimecodes,
302
+ startTime,
303
+ endTime,
304
+ chunkIndex,
305
+ chunkSize: chunkSizeOverride
306
+ } = validatedArgs;
307
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
308
+ const defaultChunkSize = pluginConfig?.chunkSizeSeconds || 300;
309
+ const previewLength = pluginConfig?.previewLength || 500;
310
+ const maxFullTranscriptLength = pluginConfig?.maxFullTranscriptLength || 5e4;
311
+ const chunkSizeSeconds = chunkSizeOverride || defaultChunkSize;
209
312
  const videoId = extractYouTubeID(videoIdOrUrl);
210
313
  if (!videoId) {
211
314
  throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
@@ -230,13 +333,308 @@ async function handleGetTranscript(strapi, args) {
230
333
  ]
231
334
  };
232
335
  }
336
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
337
+ const fullText = transcript2.fullTranscript || "";
338
+ const durationMs = getVideoDurationMs(timecodes);
339
+ const totalChunks = Math.ceil(durationMs / (chunkSizeSeconds * 1e3));
340
+ const wordCount = fullText.split(/\s+/).length;
341
+ const response = {
342
+ videoId: transcript2.videoId,
343
+ title: transcript2.title,
344
+ metadata: {
345
+ wordCount,
346
+ characterCount: fullText.length,
347
+ duration: formatTime$1(durationMs),
348
+ durationSeconds: Math.floor(durationMs / 1e3),
349
+ totalChunks,
350
+ chunkSizeSeconds
351
+ }
352
+ };
353
+ if (startTime !== void 0 || endTime !== void 0) {
354
+ const startMs = (startTime || 0) * 1e3;
355
+ const endMs = endTime !== void 0 ? endTime * 1e3 : durationMs;
356
+ const { text, entries } = getTranscriptForTimeRange(timecodes, startMs, endMs);
357
+ response.timeRange = {
358
+ startTime: startTime || 0,
359
+ endTime: endTime || Math.floor(durationMs / 1e3),
360
+ startFormatted: formatTime$1(startMs),
361
+ endFormatted: formatTime$1(endMs)
362
+ };
363
+ response.transcript = text;
364
+ if (includeTimecodes) {
365
+ response.transcriptWithTimeCodes = entries;
366
+ }
367
+ } else if (chunkIndex !== void 0) {
368
+ const chunkStartMs = chunkIndex * chunkSizeSeconds * 1e3;
369
+ const chunkEndMs = Math.min((chunkIndex + 1) * chunkSizeSeconds * 1e3, durationMs);
370
+ if (chunkStartMs >= durationMs) {
371
+ response.error = `Chunk index ${chunkIndex} is out of range. Total chunks: ${totalChunks} (0-${totalChunks - 1})`;
372
+ } else {
373
+ const { text, entries } = getTranscriptForTimeRange(timecodes, chunkStartMs, chunkEndMs);
374
+ response.chunk = {
375
+ index: chunkIndex,
376
+ totalChunks,
377
+ startTime: Math.floor(chunkStartMs / 1e3),
378
+ endTime: Math.floor(chunkEndMs / 1e3),
379
+ startFormatted: formatTime$1(chunkStartMs),
380
+ endFormatted: formatTime$1(chunkEndMs)
381
+ };
382
+ response.transcript = text;
383
+ if (includeTimecodes) {
384
+ response.transcriptWithTimeCodes = entries;
385
+ }
386
+ if (chunkIndex < totalChunks - 1) {
387
+ response.nextChunk = `Use chunkIndex: ${chunkIndex + 1} to get the next portion`;
388
+ }
389
+ if (chunkIndex > 0) {
390
+ response.previousChunk = `Use chunkIndex: ${chunkIndex - 1} to get the previous portion`;
391
+ }
392
+ }
393
+ } else if (includeFullTranscript || fullText.length <= maxFullTranscriptLength) {
394
+ response.transcript = fullText;
395
+ if (includeTimecodes) {
396
+ response.transcriptWithTimeCodes = timecodes;
397
+ }
398
+ if (includeFullTranscript && fullText.length > maxFullTranscriptLength) {
399
+ response.warning = "Full transcript included. For long videos, consider using chunkIndex, startTime/endTime, or search_transcript to reduce response size.";
400
+ } else if (fullText.length <= maxFullTranscriptLength) {
401
+ response.note = "Full transcript auto-loaded (fits within context limit).";
402
+ }
403
+ } else {
404
+ const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
405
+ response.preview = preview;
406
+ response.isLargeTranscript = true;
407
+ response.usage = {
408
+ fullTranscript: "Set includeFullTranscript: true to get complete text (warning: may exceed context)",
409
+ search: "Use search_transcript to find relevant portions by keyword (recommended for large transcripts)",
410
+ timeRange: "Use startTime and endTime (in seconds) to get a specific portion",
411
+ pagination: `Use chunkIndex (0-${totalChunks - 1}) to paginate through ${chunkSizeSeconds}s chunks`
412
+ };
413
+ }
414
+ return {
415
+ content: [
416
+ {
417
+ type: "text",
418
+ text: JSON.stringify(response, null, 2)
419
+ }
420
+ ]
421
+ };
422
+ }
423
+ const searchTranscriptTool = {
424
+ name: "search_transcript",
425
+ description: "Search within a saved transcript using BM25 scoring. Returns the most relevant segments matching your query with timestamps. Use this to find specific content in long videos without loading the entire transcript.",
426
+ inputSchema: {
427
+ type: "object",
428
+ properties: {
429
+ videoId: {
430
+ type: "string",
431
+ description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
432
+ },
433
+ query: {
434
+ type: "string",
435
+ description: "Search query - keywords or phrases to find in the transcript"
436
+ },
437
+ maxResults: {
438
+ type: "number",
439
+ description: "Maximum number of results to return (default: 5, max: 20)",
440
+ default: 5
441
+ }
442
+ },
443
+ required: ["videoId", "query"]
444
+ }
445
+ };
446
+ function tokenize(text) {
447
+ return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
448
+ }
449
+ function calculateIDF(segments, vocabulary) {
450
+ const idf = /* @__PURE__ */ new Map();
451
+ const N = segments.length;
452
+ for (const term of vocabulary) {
453
+ const docsWithTerm = segments.filter(
454
+ (seg) => tokenize(seg.text).includes(term)
455
+ ).length;
456
+ idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
457
+ }
458
+ return idf;
459
+ }
460
+ function bm25Score(segmentTokens, queryTokens, idf, avgDocLength, k1 = 1.5, b = 0.75) {
461
+ const docLength = segmentTokens.length;
462
+ let score = 0;
463
+ const tf = /* @__PURE__ */ new Map();
464
+ for (const token of segmentTokens) {
465
+ tf.set(token, (tf.get(token) || 0) + 1);
466
+ }
467
+ for (const term of queryTokens) {
468
+ const termFreq = tf.get(term) || 0;
469
+ const termIdf = idf.get(term) || 0;
470
+ if (termFreq > 0) {
471
+ const numerator = termFreq * (k1 + 1);
472
+ const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
473
+ score += termIdf * (numerator / denominator);
474
+ }
475
+ }
476
+ return score;
477
+ }
478
+ function formatTime(ms) {
479
+ const totalSeconds = Math.floor(ms / 1e3);
480
+ const hours = Math.floor(totalSeconds / 3600);
481
+ const minutes = Math.floor(totalSeconds % 3600 / 60);
482
+ const seconds = totalSeconds % 60;
483
+ if (hours > 0) {
484
+ return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
485
+ }
486
+ return `${minutes}:${seconds.toString().padStart(2, "0")}`;
487
+ }
488
+ function createSegments(timecodes, segmentDurationMs) {
489
+ if (!timecodes || timecodes.length === 0) return [];
490
+ const segments = [];
491
+ let currentSegment = [];
492
+ let segmentStartTime = timecodes[0].start;
493
+ for (const entry of timecodes) {
494
+ const segmentEndTime = segmentStartTime + segmentDurationMs;
495
+ if (entry.start < segmentEndTime) {
496
+ currentSegment.push(entry);
497
+ } else {
498
+ if (currentSegment.length > 0) {
499
+ const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
500
+ segments.push({
501
+ text: currentSegment.map((e) => e.text).join(" "),
502
+ startTime: Math.floor(segmentStartTime / 1e3),
503
+ endTime: Math.floor(endTime / 1e3),
504
+ startFormatted: formatTime(segmentStartTime),
505
+ endFormatted: formatTime(endTime)
506
+ });
507
+ }
508
+ segmentStartTime = entry.start;
509
+ currentSegment = [entry];
510
+ }
511
+ }
512
+ if (currentSegment.length > 0) {
513
+ const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
514
+ segments.push({
515
+ text: currentSegment.map((e) => e.text).join(" "),
516
+ startTime: Math.floor(segmentStartTime / 1e3),
517
+ endTime: Math.floor(endTime / 1e3),
518
+ startFormatted: formatTime(segmentStartTime),
519
+ endFormatted: formatTime(endTime)
520
+ });
521
+ }
522
+ return segments;
523
+ }
524
+ async function handleSearchTranscript(strapi, args) {
525
+ const validatedArgs = validateToolInput("search_transcript", args);
526
+ const { videoId: videoIdOrUrl, query, maxResults: maxResultsInput } = validatedArgs;
527
+ const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
528
+ const segmentSeconds = pluginConfig?.searchSegmentSeconds || 30;
529
+ const maxResults = Math.min(Math.max(maxResultsInput || 5, 1), 20);
530
+ const videoId = extractYouTubeID(videoIdOrUrl);
531
+ if (!videoId) {
532
+ throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
533
+ }
534
+ const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
535
+ const transcript2 = await service2.findTranscript(videoId);
536
+ if (!transcript2) {
537
+ return {
538
+ content: [
539
+ {
540
+ type: "text",
541
+ text: JSON.stringify(
542
+ {
543
+ error: true,
544
+ message: `No transcript found for video ID: ${videoId}. Use fetch_transcript to fetch it from YouTube first.`,
545
+ videoId
546
+ },
547
+ null,
548
+ 2
549
+ )
550
+ }
551
+ ]
552
+ };
553
+ }
554
+ const timecodes = transcript2.transcriptWithTimeCodes || [];
555
+ if (timecodes.length === 0) {
556
+ return {
557
+ content: [
558
+ {
559
+ type: "text",
560
+ text: JSON.stringify(
561
+ {
562
+ error: true,
563
+ message: "Transcript has no timecode data for searching.",
564
+ videoId
565
+ },
566
+ null,
567
+ 2
568
+ )
569
+ }
570
+ ]
571
+ };
572
+ }
573
+ const segments = createSegments(timecodes, segmentSeconds * 1e3);
574
+ if (segments.length === 0) {
575
+ return {
576
+ content: [
577
+ {
578
+ type: "text",
579
+ text: JSON.stringify(
580
+ {
581
+ error: true,
582
+ message: "Could not create searchable segments from transcript.",
583
+ videoId
584
+ },
585
+ null,
586
+ 2
587
+ )
588
+ }
589
+ ]
590
+ };
591
+ }
592
+ const queryTokens = tokenize(query);
593
+ if (queryTokens.length === 0) {
594
+ return {
595
+ content: [
596
+ {
597
+ type: "text",
598
+ text: JSON.stringify(
599
+ {
600
+ error: true,
601
+ message: "Query is empty or contains only stop words.",
602
+ query
603
+ },
604
+ null,
605
+ 2
606
+ )
607
+ }
608
+ ]
609
+ };
610
+ }
611
+ const vocabulary = new Set(queryTokens);
612
+ const idf = calculateIDF(segments, vocabulary);
613
+ const avgDocLength = segments.reduce((sum, seg) => sum + tokenize(seg.text).length, 0) / segments.length;
614
+ const scoredSegments = segments.map((segment) => ({
615
+ ...segment,
616
+ score: bm25Score(tokenize(segment.text), queryTokens, idf, avgDocLength)
617
+ }));
618
+ const results = scoredSegments.filter((seg) => seg.score > 0).sort((a, b) => b.score - a.score).slice(0, maxResults);
233
619
  return {
234
620
  content: [
235
621
  {
236
622
  type: "text",
237
623
  text: JSON.stringify(
238
624
  {
239
- data: transcript2
625
+ videoId: transcript2.videoId,
626
+ title: transcript2.title,
627
+ query,
628
+ totalSegments: segments.length,
629
+ matchingResults: results.length,
630
+ results: results.map((r) => ({
631
+ text: r.text,
632
+ startTime: r.startTime,
633
+ endTime: r.endTime,
634
+ timeRange: `${r.startFormatted} - ${r.endFormatted}`,
635
+ score: Math.round(r.score * 100) / 100
636
+ })),
637
+ usage: results.length > 0 ? `Use get_transcript with startTime: ${results[0].startTime} and endTime: ${results[0].endTime} to get full context for the top result.` : "No matches found. Try different keywords."
240
638
  },
241
639
  null,
242
640
  2
@@ -358,12 +756,14 @@ const tools = [
358
756
  fetchTranscriptTool,
359
757
  listTranscriptsTool,
360
758
  getTranscriptTool,
759
+ searchTranscriptTool,
361
760
  findTranscriptsTool
362
761
  ];
363
762
  const toolHandlers = {
364
763
  fetch_transcript: handleFetchTranscript,
365
764
  list_transcripts: handleListTranscripts,
366
765
  get_transcript: handleGetTranscript,
766
+ search_transcript: handleSearchTranscript,
367
767
  find_transcripts: handleFindTranscripts
368
768
  };
369
769
  async function handleToolCall(strapi, request) {
@@ -439,28 +839,32 @@ const register = ({ strapi }) => {
439
839
  };
440
840
  const config = {
441
841
  default: {
442
- openAIApiKey: "",
443
- model: "gpt-4o-mini",
444
- temp: 0.7,
445
- maxTokens: 4096,
446
- proxyUrl: ""
842
+ proxyUrl: "",
447
843
  // Optional: HTTP/HTTPS proxy for YouTube requests (e.g., 'http://user:pass@proxy.example.com:8080')
844
+ chunkSizeSeconds: 300,
845
+ // Default chunk size for transcript pagination (5 minutes)
846
+ previewLength: 500,
847
+ // Default preview length in characters
848
+ maxFullTranscriptLength: 5e4,
849
+ // Auto-load full transcript if under this character count (~12K tokens)
850
+ searchSegmentSeconds: 30
851
+ // Segment size for BM25 search scoring
448
852
  },
449
853
  validator(config2) {
450
- if (config2.openAIApiKey && typeof config2.openAIApiKey !== "string") {
451
- throw new Error("openAIApiKey must be a string");
854
+ if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
855
+ throw new Error("proxyUrl must be a string");
452
856
  }
453
- if (config2.model && typeof config2.model !== "string") {
454
- throw new Error("model must be a string");
857
+ if (config2.chunkSizeSeconds !== void 0 && (typeof config2.chunkSizeSeconds !== "number" || config2.chunkSizeSeconds < 30)) {
858
+ throw new Error("chunkSizeSeconds must be a number >= 30");
455
859
  }
456
- if (config2.temp !== void 0 && (typeof config2.temp !== "number" || config2.temp < 0 || config2.temp > 2)) {
457
- throw new Error("temp must be a number between 0 and 2");
860
+ if (config2.previewLength !== void 0 && (typeof config2.previewLength !== "number" || config2.previewLength < 100)) {
861
+ throw new Error("previewLength must be a number >= 100");
458
862
  }
459
- if (config2.maxTokens !== void 0 && (typeof config2.maxTokens !== "number" || config2.maxTokens < 1)) {
460
- throw new Error("maxTokens must be a positive number");
863
+ if (config2.maxFullTranscriptLength !== void 0 && (typeof config2.maxFullTranscriptLength !== "number" || config2.maxFullTranscriptLength < 1e3)) {
864
+ throw new Error("maxFullTranscriptLength must be a number >= 1000");
461
865
  }
462
- if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
463
- throw new Error("proxyUrl must be a string");
866
+ if (config2.searchSegmentSeconds !== void 0 && (typeof config2.searchSegmentSeconds !== "number" || config2.searchSegmentSeconds < 10)) {
867
+ throw new Error("searchSegmentSeconds must be a number >= 10");
464
868
  }
465
869
  }
466
870
  };
@@ -590,8 +994,7 @@ const contentApi = [
590
994
  path: "/mcp",
591
995
  handler: "mcp.handle",
592
996
  config: {
593
- policies: [],
594
- auth: false
997
+ policies: []
595
998
  }
596
999
  },
597
1000
  {
@@ -599,8 +1002,7 @@ const contentApi = [
599
1002
  path: "/mcp",
600
1003
  handler: "mcp.handle",
601
1004
  config: {
602
- policies: [],
603
- auth: false
1005
+ policies: []
604
1006
  }
605
1007
  },
606
1008
  {
@@ -608,8 +1010,7 @@ const contentApi = [
608
1010
  path: "/mcp",
609
1011
  handler: "mcp.handle",
610
1012
  config: {
611
- policies: [],
612
- auth: false
1013
+ policies: []
613
1014
  }
614
1015
  },
615
1016
  // Other routes