npm - yt-transcript-strapi-plugin - Versions diffs - 0.0.21 → 0.0.25 - Mend

yt-transcript-strapi-plugin 0.0.21 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/server/index.js +477 -153
package/dist/server/index.mjs +477 -153
package/dist/server/src/config/index.d.ts +8 -8
package/dist/server/src/content-types/index.d.ts +0 -3
package/dist/server/src/content-types/transcript/index.d.ts +0 -3
package/dist/server/src/index.d.ts +8 -12
package/dist/server/src/mcp/schemas/index.d.ts +63 -6
package/dist/server/src/mcp/tools/fetch-transcript.d.ts +0 -5
package/dist/server/src/mcp/tools/get-transcript.d.ts +26 -0
package/dist/server/src/mcp/tools/index.d.ts +13 -13
package/dist/server/src/mcp/tools/search-transcript.d.ts +30 -0
package/dist/server/src/services/index.d.ts +0 -1
package/dist/server/src/services/service.d.ts +0 -2
package/package.json +7 -6
package/dist/server/src/utils/openai.d.ts +0 -9

package/dist/server/index.mjs CHANGED Viewed

@@ -3,14 +3,10 @@ import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprot
 import { z } from "zod";
 import { randomUUID } from "node:crypto";
 import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
-import { TokenTextSplitter } from "@langchain/textsplitters";
-import { PromptTemplate } from "@langchain/core/prompts";
-import { ChatOpenAI } from "@langchain/openai";
 import { Innertube } from "youtubei.js";
 import { ProxyAgent, fetch as fetch$1 } from "undici";
 const FetchTranscriptSchema = z.object({
-  videoId: z.string().min(1, "Video ID or URL is required"),
-  generateReadable: z.boolean().optional().default(false)
+  videoId: z.string().min(1, "Video ID or URL is required")
 });
 const ListTranscriptsSchema = z.object({
   page: z.number().int().min(1).optional().default(1),
@@ -18,7 +14,18 @@ const ListTranscriptsSchema = z.object({
   sort: z.string().optional().default("createdAt:desc")
 });
 const GetTranscriptSchema = z.object({
-  videoId: z.string().min(1, "Video ID is required")
+  videoId: z.string().min(1, "Video ID is required"),
+  includeFullTranscript: z.boolean().optional().default(false),
+  includeTimecodes: z.boolean().optional().default(false),
+  startTime: z.number().min(0).optional(),
+  endTime: z.number().min(0).optional(),
+  chunkIndex: z.number().int().min(0).optional(),
+  chunkSize: z.number().int().min(30).optional()
+});
+const SearchTranscriptSchema = z.object({
+  videoId: z.string().min(1, "Video ID is required"),
+  query: z.string().min(1, "Search query is required"),
+  maxResults: z.number().int().min(1).max(20).optional().default(5)
 });
 const FindTranscriptsSchema = z.object({
   query: z.string().optional(),
@@ -33,6 +40,7 @@ const ToolSchemas = {
   fetch_transcript: FetchTranscriptSchema,
   list_transcripts: ListTranscriptsSchema,
   get_transcript: GetTranscriptSchema,
+  search_transcript: SearchTranscriptSchema,
   find_transcripts: FindTranscriptsSchema
 };
 function validateToolInput(toolName, input) {
@@ -67,31 +75,64 @@ function extractYouTubeID(urlOrID) {
 }
 const fetchTranscriptTool = {
   name: "fetch_transcript",
-  description: "Fetch a transcript from YouTube for a given video ID or URL. Optionally generates a human-readable version using AI. The transcript is saved to the database for future retrieval.",
+  description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database. Returns metadata and preview only to avoid context overflow. Use get_transcript to retrieve content.",
   inputSchema: {
     type: "object",
     properties: {
       videoId: {
         type: "string",
         description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
-      },
-      generateReadable: {
-        type: "boolean",
-        description: "If true, uses AI to add punctuation and formatting to make the transcript more readable. Requires OpenAI API key configuration.",
-        default: false
       }
     },
     required: ["videoId"]
   }
 };
-async function handleFetchTranscript(strapi2, args) {
+function getVideoDurationMs$1(timecodes) {
+  if (!timecodes || timecodes.length === 0) return 0;
+  const lastEntry = timecodes[timecodes.length - 1];
+  return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
+}
+function formatTime$2(ms) {
+  const totalSeconds = Math.floor(ms / 1e3);
+  const hours = Math.floor(totalSeconds / 3600);
+  const minutes = Math.floor(totalSeconds % 3600 / 60);
+  const seconds = totalSeconds % 60;
+  if (hours > 0) {
+    return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+  }
+  return `${minutes}:${seconds.toString().padStart(2, "0")}`;
+}
+function buildMetadataResponse(transcript2, previewLength, cached) {
+  const fullText = transcript2.fullTranscript || "";
+  const timecodes = transcript2.transcriptWithTimeCodes || [];
+  const durationMs = getVideoDurationMs$1(timecodes);
+  const wordCount = fullText.split(/\s+/).length;
+  const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
+  return {
+    message: cached ? "Transcript already exists in database" : "Transcript fetched and saved successfully",
+    cached,
+    videoId: transcript2.videoId,
+    title: transcript2.title,
+    metadata: {
+      wordCount,
+      characterCount: fullText.length,
+      duration: formatTime$2(durationMs),
+      durationSeconds: Math.floor(durationMs / 1e3)
+    },
+    preview,
+    usage: "Use get_transcript with videoId to retrieve full content, specific time ranges, or paginated chunks."
+  };
+}
+async function handleFetchTranscript(strapi, args) {
   const validatedArgs = validateToolInput("fetch_transcript", args);
-  const { videoId: videoIdOrUrl, generateReadable } = validatedArgs;
+  const { videoId: videoIdOrUrl } = validatedArgs;
+  const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
+  const previewLength = pluginConfig?.previewLength || 500;
   const videoId = extractYouTubeID(videoIdOrUrl);
   if (!videoId) {
     throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
   }
-  const service2 = strapi2.plugin("yt-transcript-strapi-plugin").service("service");
+  const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
   const existingTranscript = await service2.findTranscript(videoId);
   if (existingTranscript) {
     return {
@@ -99,11 +140,7 @@ async function handleFetchTranscript(strapi2, args) {
         {
           type: "text",
           text: JSON.stringify(
-            {
-              message: "Transcript already exists in database",
-              data: existingTranscript,
-              cached: true
-            },
+            buildMetadataResponse(existingTranscript, previewLength, true),
             null,
             2
           )
@@ -121,25 +158,13 @@ async function handleFetchTranscript(strapi2, args) {
     fullTranscript: transcriptData.fullTranscript,
     transcriptWithTimeCodes: transcriptData.transcriptWithTimeCodes
   };
-  if (generateReadable && transcriptData.fullTranscript) {
-    try {
-      const readableTranscript = await service2.generateHumanReadableTranscript(transcriptData.fullTranscript);
-      payload.readableTranscript = readableTranscript;
-    } catch (error) {
-      strapi2.log.warn("[yt-transcript-mcp] Failed to generate readable transcript:", error);
-    }
-  }
   const savedTranscript = await service2.saveTranscript(payload);
   return {
     content: [
       {
         type: "text",
         text: JSON.stringify(
-          {
-            message: "Transcript fetched and saved successfully",
-            data: savedTranscript,
-            cached: false
-          },
+          buildMetadataResponse(savedTranscript, previewLength, false),
           null,
           2
         )
@@ -172,17 +197,17 @@ const listTranscriptsTool = {
     required: []
   }
 };
-async function handleListTranscripts(strapi2, args) {
+async function handleListTranscripts(strapi, args) {
   const validatedArgs = validateToolInput("list_transcripts", args);
   const { page, pageSize, sort } = validatedArgs;
   const start = (page - 1) * pageSize;
-  const transcripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
+  const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
     sort,
     limit: pageSize,
     start,
     fields: ["id", "documentId", "title", "videoId", "createdAt", "updatedAt"]
   });
-  const allTranscripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({});
+  const allTranscripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({});
   const total = allTranscripts.length;
   return {
     content: [
@@ -207,26 +232,305 @@ async function handleListTranscripts(strapi2, args) {
 }
 const getTranscriptTool = {
   name: "get_transcript",
-  description: "Get a specific saved transcript by YouTube video ID. Returns the full transcript data including any readable version if available.",
+  description: "Get a saved transcript by YouTube video ID. Returns metadata and preview by default. Use parameters to get full content or specific time ranges to avoid context overflow.",
   inputSchema: {
     type: "object",
     properties: {
       videoId: {
         type: "string",
         description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
+      },
+      includeFullTranscript: {
+        type: "boolean",
+        description: "Include the complete transcript text. Warning: may cause context overflow for long videos. Default: false",
+        default: false
+      },
+      includeTimecodes: {
+        type: "boolean",
+        description: "Include the transcript with timecodes array. Warning: significantly increases response size. Default: false",
+        default: false
+      },
+      startTime: {
+        type: "number",
+        description: "Start time in seconds for fetching a specific portion of the transcript"
+      },
+      endTime: {
+        type: "number",
+        description: "End time in seconds for fetching a specific portion of the transcript"
+      },
+      chunkIndex: {
+        type: "number",
+        description: "Chunk index (0-based) when paginating through transcript. Use with chunkSize to paginate through long videos."
+      },
+      chunkSize: {
+        type: "number",
+        description: "Chunk size in seconds. Overrides config default. Use with chunkIndex for pagination."
       }
     },
     required: ["videoId"]
   }
 };
-async function handleGetTranscript(strapi2, args) {
+function getTranscriptForTimeRange(timecodes, startTimeMs, endTimeMs) {
+  const entries = timecodes.filter(
+    (entry) => entry.start >= startTimeMs && entry.start < endTimeMs
+  );
+  const text = entries.map((e) => e.text).join(" ");
+  return { text, entries };
+}
+function getVideoDurationMs(timecodes) {
+  if (!timecodes || timecodes.length === 0) return 0;
+  const lastEntry = timecodes[timecodes.length - 1];
+  return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
+}
+function formatTime$1(ms) {
+  const totalSeconds = Math.floor(ms / 1e3);
+  const hours = Math.floor(totalSeconds / 3600);
+  const minutes = Math.floor(totalSeconds % 3600 / 60);
+  const seconds = totalSeconds % 60;
+  if (hours > 0) {
+    return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+  }
+  return `${minutes}:${seconds.toString().padStart(2, "0")}`;
+}
+async function handleGetTranscript(strapi, args) {
   const validatedArgs = validateToolInput("get_transcript", args);
-  const { videoId: videoIdOrUrl } = validatedArgs;
+  const {
+    videoId: videoIdOrUrl,
+    includeFullTranscript,
+    includeTimecodes,
+    startTime,
+    endTime,
+    chunkIndex,
+    chunkSize: chunkSizeOverride
+  } = validatedArgs;
+  const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
+  const defaultChunkSize = pluginConfig?.chunkSizeSeconds || 300;
+  const previewLength = pluginConfig?.previewLength || 500;
+  const maxFullTranscriptLength = pluginConfig?.maxFullTranscriptLength || 5e4;
+  const chunkSizeSeconds = chunkSizeOverride || defaultChunkSize;
+  const videoId = extractYouTubeID(videoIdOrUrl);
+  if (!videoId) {
+    throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
+  }
+  const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
+  const transcript2 = await service2.findTranscript(videoId);
+  if (!transcript2) {
+    return {
+      content: [
+        {
+          type: "text",
+          text: JSON.stringify(
+            {
+              error: true,
+              message: `No transcript found for video ID: ${videoId}. Use fetch_transcript to fetch it from YouTube first.`,
+              videoId
+            },
+            null,
+            2
+          )
+        }
+      ]
+    };
+  }
+  const timecodes = transcript2.transcriptWithTimeCodes || [];
+  const fullText = transcript2.fullTranscript || "";
+  const durationMs = getVideoDurationMs(timecodes);
+  const totalChunks = Math.ceil(durationMs / (chunkSizeSeconds * 1e3));
+  const wordCount = fullText.split(/\s+/).length;
+  const response = {
+    videoId: transcript2.videoId,
+    title: transcript2.title,
+    metadata: {
+      wordCount,
+      characterCount: fullText.length,
+      duration: formatTime$1(durationMs),
+      durationSeconds: Math.floor(durationMs / 1e3),
+      totalChunks,
+      chunkSizeSeconds
+    }
+  };
+  if (startTime !== void 0 || endTime !== void 0) {
+    const startMs = (startTime || 0) * 1e3;
+    const endMs = endTime !== void 0 ? endTime * 1e3 : durationMs;
+    const { text, entries } = getTranscriptForTimeRange(timecodes, startMs, endMs);
+    response.timeRange = {
+      startTime: startTime || 0,
+      endTime: endTime || Math.floor(durationMs / 1e3),
+      startFormatted: formatTime$1(startMs),
+      endFormatted: formatTime$1(endMs)
+    };
+    response.transcript = text;
+    if (includeTimecodes) {
+      response.transcriptWithTimeCodes = entries;
+    }
+  } else if (chunkIndex !== void 0) {
+    const chunkStartMs = chunkIndex * chunkSizeSeconds * 1e3;
+    const chunkEndMs = Math.min((chunkIndex + 1) * chunkSizeSeconds * 1e3, durationMs);
+    if (chunkStartMs >= durationMs) {
+      response.error = `Chunk index ${chunkIndex} is out of range. Total chunks: ${totalChunks} (0-${totalChunks - 1})`;
+    } else {
+      const { text, entries } = getTranscriptForTimeRange(timecodes, chunkStartMs, chunkEndMs);
+      response.chunk = {
+        index: chunkIndex,
+        totalChunks,
+        startTime: Math.floor(chunkStartMs / 1e3),
+        endTime: Math.floor(chunkEndMs / 1e3),
+        startFormatted: formatTime$1(chunkStartMs),
+        endFormatted: formatTime$1(chunkEndMs)
+      };
+      response.transcript = text;
+      if (includeTimecodes) {
+        response.transcriptWithTimeCodes = entries;
+      }
+      if (chunkIndex < totalChunks - 1) {
+        response.nextChunk = `Use chunkIndex: ${chunkIndex + 1} to get the next portion`;
+      }
+      if (chunkIndex > 0) {
+        response.previousChunk = `Use chunkIndex: ${chunkIndex - 1} to get the previous portion`;
+      }
+    }
+  } else if (includeFullTranscript || fullText.length <= maxFullTranscriptLength) {
+    response.transcript = fullText;
+    if (includeTimecodes) {
+      response.transcriptWithTimeCodes = timecodes;
+    }
+    if (includeFullTranscript && fullText.length > maxFullTranscriptLength) {
+      response.warning = "Full transcript included. For long videos, consider using chunkIndex, startTime/endTime, or search_transcript to reduce response size.";
+    } else if (fullText.length <= maxFullTranscriptLength) {
+      response.note = "Full transcript auto-loaded (fits within context limit).";
+    }
+  } else {
+    const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
+    response.preview = preview;
+    response.isLargeTranscript = true;
+    response.usage = {
+      fullTranscript: "Set includeFullTranscript: true to get complete text (warning: may exceed context)",
+      search: "Use search_transcript to find relevant portions by keyword (recommended for large transcripts)",
+      timeRange: "Use startTime and endTime (in seconds) to get a specific portion",
+      pagination: `Use chunkIndex (0-${totalChunks - 1}) to paginate through ${chunkSizeSeconds}s chunks`
+    };
+  }
+  return {
+    content: [
+      {
+        type: "text",
+        text: JSON.stringify(response, null, 2)
+      }
+    ]
+  };
+}
+const searchTranscriptTool = {
+  name: "search_transcript",
+  description: "Search within a saved transcript using BM25 scoring. Returns the most relevant segments matching your query with timestamps. Use this to find specific content in long videos without loading the entire transcript.",
+  inputSchema: {
+    type: "object",
+    properties: {
+      videoId: {
+        type: "string",
+        description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
+      },
+      query: {
+        type: "string",
+        description: "Search query - keywords or phrases to find in the transcript"
+      },
+      maxResults: {
+        type: "number",
+        description: "Maximum number of results to return (default: 5, max: 20)",
+        default: 5
+      }
+    },
+    required: ["videoId", "query"]
+  }
+};
+function tokenize(text) {
+  return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
+}
+function calculateIDF(segments, vocabulary) {
+  const idf = /* @__PURE__ */ new Map();
+  const N = segments.length;
+  for (const term of vocabulary) {
+    const docsWithTerm = segments.filter(
+      (seg) => tokenize(seg.text).includes(term)
+    ).length;
+    idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
+  }
+  return idf;
+}
+function bm25Score(segmentTokens, queryTokens, idf, avgDocLength, k1 = 1.5, b = 0.75) {
+  const docLength = segmentTokens.length;
+  let score = 0;
+  const tf = /* @__PURE__ */ new Map();
+  for (const token of segmentTokens) {
+    tf.set(token, (tf.get(token) || 0) + 1);
+  }
+  for (const term of queryTokens) {
+    const termFreq = tf.get(term) || 0;
+    const termIdf = idf.get(term) || 0;
+    if (termFreq > 0) {
+      const numerator = termFreq * (k1 + 1);
+      const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
+      score += termIdf * (numerator / denominator);
+    }
+  }
+  return score;
+}
+function formatTime(ms) {
+  const totalSeconds = Math.floor(ms / 1e3);
+  const hours = Math.floor(totalSeconds / 3600);
+  const minutes = Math.floor(totalSeconds % 3600 / 60);
+  const seconds = totalSeconds % 60;
+  if (hours > 0) {
+    return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+  }
+  return `${minutes}:${seconds.toString().padStart(2, "0")}`;
+}
+function createSegments(timecodes, segmentDurationMs) {
+  if (!timecodes || timecodes.length === 0) return [];
+  const segments = [];
+  let currentSegment = [];
+  let segmentStartTime = timecodes[0].start;
+  for (const entry of timecodes) {
+    const segmentEndTime = segmentStartTime + segmentDurationMs;
+    if (entry.start < segmentEndTime) {
+      currentSegment.push(entry);
+    } else {
+      if (currentSegment.length > 0) {
+        const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
+        segments.push({
+          text: currentSegment.map((e) => e.text).join(" "),
+          startTime: Math.floor(segmentStartTime / 1e3),
+          endTime: Math.floor(endTime / 1e3),
+          startFormatted: formatTime(segmentStartTime),
+          endFormatted: formatTime(endTime)
+        });
+      }
+      segmentStartTime = entry.start;
+      currentSegment = [entry];
+    }
+  }
+  if (currentSegment.length > 0) {
+    const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
+    segments.push({
+      text: currentSegment.map((e) => e.text).join(" "),
+      startTime: Math.floor(segmentStartTime / 1e3),
+      endTime: Math.floor(endTime / 1e3),
+      startFormatted: formatTime(segmentStartTime),
+      endFormatted: formatTime(endTime)
+    });
+  }
+  return segments;
+}
+async function handleSearchTranscript(strapi, args) {
+  const validatedArgs = validateToolInput("search_transcript", args);
+  const { videoId: videoIdOrUrl, query, maxResults: maxResultsInput } = validatedArgs;
+  const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
+  const segmentSeconds = pluginConfig?.searchSegmentSeconds || 30;
+  const maxResults = Math.min(Math.max(maxResultsInput || 5, 1), 20);
   const videoId = extractYouTubeID(videoIdOrUrl);
   if (!videoId) {
     throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
   }
-  const service2 = strapi2.plugin("yt-transcript-strapi-plugin").service("service");
+  const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
   const transcript2 = await service2.findTranscript(videoId);
   if (!transcript2) {
     return {
@@ -246,13 +550,90 @@ async function handleGetTranscript(strapi2, args) {
       ]
     };
   }
+  const timecodes = transcript2.transcriptWithTimeCodes || [];
+  if (timecodes.length === 0) {
+    return {
+      content: [
+        {
+          type: "text",
+          text: JSON.stringify(
+            {
+              error: true,
+              message: "Transcript has no timecode data for searching.",
+              videoId
+            },
+            null,
+            2
+          )
+        }
+      ]
+    };
+  }
+  const segments = createSegments(timecodes, segmentSeconds * 1e3);
+  if (segments.length === 0) {
+    return {
+      content: [
+        {
+          type: "text",
+          text: JSON.stringify(
+            {
+              error: true,
+              message: "Could not create searchable segments from transcript.",
+              videoId
+            },
+            null,
+            2
+          )
+        }
+      ]
+    };
+  }
+  const queryTokens = tokenize(query);
+  if (queryTokens.length === 0) {
+    return {
+      content: [
+        {
+          type: "text",
+          text: JSON.stringify(
+            {
+              error: true,
+              message: "Query is empty or contains only stop words.",
+              query
+            },
+            null,
+            2
+          )
+        }
+      ]
+    };
+  }
+  const vocabulary = new Set(queryTokens);
+  const idf = calculateIDF(segments, vocabulary);
+  const avgDocLength = segments.reduce((sum, seg) => sum + tokenize(seg.text).length, 0) / segments.length;
+  const scoredSegments = segments.map((segment) => ({
+    ...segment,
+    score: bm25Score(tokenize(segment.text), queryTokens, idf, avgDocLength)
+  }));
+  const results = scoredSegments.filter((seg) => seg.score > 0).sort((a, b) => b.score - a.score).slice(0, maxResults);
   return {
     content: [
       {
         type: "text",
         text: JSON.stringify(
           {
-            data: transcript2
+            videoId: transcript2.videoId,
+            title: transcript2.title,
+            query,
+            totalSegments: segments.length,
+            matchingResults: results.length,
+            results: results.map((r) => ({
+              text: r.text,
+              startTime: r.startTime,
+              endTime: r.endTime,
+              timeRange: `${r.startFormatted} - ${r.endFormatted}`,
+              score: Math.round(r.score * 100) / 100
+            })),
+            usage: results.length > 0 ? `Use get_transcript with startTime: ${results[0].startTime} and endTime: ${results[0].endTime} to get full context for the top result.` : "No matches found. Try different keywords."
           },
           null,
           2
@@ -311,11 +692,10 @@ function truncateText(text, maxLength) {
 function truncateTranscripts(transcripts) {
   return transcripts.map((transcript2) => ({
     ...transcript2,
-    fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH),
-    readableTranscript: truncateText(transcript2.readableTranscript, TRANSCRIPT_PREVIEW_LENGTH)
+    fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH)
   }));
 }
-async function handleFindTranscripts(strapi2, args) {
+async function handleFindTranscripts(strapi, args) {
   const validatedArgs = validateToolInput("find_transcripts", args);
   const { query, videoId, title, includeFullContent, page, pageSize, sort } = validatedArgs;
   const start = (page - 1) * pageSize;
@@ -330,17 +710,16 @@ async function handleFindTranscripts(strapi2, args) {
     filters.$or = [
       { title: { $containsi: query } },
       { videoId: { $containsi: query } },
-      { fullTranscript: { $containsi: query } },
-      { readableTranscript: { $containsi: query } }
+      { fullTranscript: { $containsi: query } }
     ];
   }
-  const transcripts = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
+  const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
     filters,
     sort,
     limit: pageSize,
     start
   });
-  const allMatching = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
+  const allMatching = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
     filters
   });
   const total = allMatching.length;
@@ -376,15 +755,17 @@ const tools = [
   fetchTranscriptTool,
   listTranscriptsTool,
   getTranscriptTool,
+  searchTranscriptTool,
   findTranscriptsTool
 ];
 const toolHandlers = {
   fetch_transcript: handleFetchTranscript,
   list_transcripts: handleListTranscripts,
   get_transcript: handleGetTranscript,
+  search_transcript: handleSearchTranscript,
   find_transcripts: handleFindTranscripts
 };
-async function handleToolCall(strapi2, request) {
+async function handleToolCall(strapi, request) {
   const { name, arguments: args } = request.params;
   const handler = toolHandlers[name];
   if (!handler) {
@@ -392,13 +773,13 @@ async function handleToolCall(strapi2, request) {
   }
   const startTime = Date.now();
   try {
-    const result = await handler(strapi2, args || {});
+    const result = await handler(strapi, args || {});
     const duration = Date.now() - startTime;
-    strapi2.log.debug(`[yt-transcript-mcp] Tool ${name} executed successfully in ${duration}ms`);
+    strapi.log.debug(`[yt-transcript-mcp] Tool ${name} executed successfully in ${duration}ms`);
     return result;
   } catch (error) {
     const duration = Date.now() - startTime;
-    strapi2.log.error(`[yt-transcript-mcp] Tool ${name} failed after ${duration}ms`, {
+    strapi.log.error(`[yt-transcript-mcp] Tool ${name} failed after ${duration}ms`, {
       error: error instanceof Error ? error.message : String(error)
     });
     return {
@@ -419,7 +800,7 @@ async function handleToolCall(strapi2, request) {
     };
   }
 }
-function createMcpServer(strapi2) {
+function createMcpServer(strapi) {
   const server = new Server(
     {
       name: "yt-transcript-mcp",
@@ -432,53 +813,57 @@ function createMcpServer(strapi2) {
     }
   );
   server.setRequestHandler(ListToolsRequestSchema, async () => {
-    strapi2.log.debug("[yt-transcript-mcp] Listing tools");
+    strapi.log.debug("[yt-transcript-mcp] Listing tools");
     return { tools };
   });
   server.setRequestHandler(CallToolRequestSchema, async (request) => {
-    strapi2.log.debug(`[yt-transcript-mcp] Tool call: ${request.params.name}`);
-    return handleToolCall(strapi2, request);
+    strapi.log.debug(`[yt-transcript-mcp] Tool call: ${request.params.name}`);
+    return handleToolCall(strapi, request);
   });
-  strapi2.log.info("[yt-transcript-mcp] MCP server created with tools:", {
+  strapi.log.info("[yt-transcript-mcp] MCP server created with tools:", {
     tools: tools.map((t) => t.name)
   });
   return server;
 }
-const bootstrap = async ({ strapi: strapi2 }) => {
-  const plugin = strapi2.plugin("yt-transcript-strapi-plugin");
-  plugin.createMcpServer = () => createMcpServer(strapi2);
+const bootstrap = async ({ strapi }) => {
+  const plugin = strapi.plugin("yt-transcript-strapi-plugin");
+  plugin.createMcpServer = () => createMcpServer(strapi);
   plugin.sessions = /* @__PURE__ */ new Map();
-  strapi2.log.info("[yt-transcript-mcp] MCP plugin initialized");
-  strapi2.log.info("[yt-transcript-mcp] MCP endpoint available at: /api/yt-transcript-strapi-plugin/mcp");
+  strapi.log.info("[yt-transcript-mcp] MCP plugin initialized");
+  strapi.log.info("[yt-transcript-mcp] MCP endpoint available at: /api/yt-transcript-strapi-plugin/mcp");
 };
-const destroy = ({ strapi: strapi2 }) => {
+const destroy = ({ strapi }) => {
 };
-const register = ({ strapi: strapi2 }) => {
+const register = ({ strapi }) => {
 };
 const config = {
   default: {
-    openAIApiKey: "",
-    model: "gpt-4o-mini",
-    temp: 0.7,
-    maxTokens: 4096,
-    proxyUrl: ""
+    proxyUrl: "",
     // Optional: HTTP/HTTPS proxy for YouTube requests (e.g., 'http://user:pass@proxy.example.com:8080')
+    chunkSizeSeconds: 300,
+    // Default chunk size for transcript pagination (5 minutes)
+    previewLength: 500,
+    // Default preview length in characters
+    maxFullTranscriptLength: 5e4,
+    // Auto-load full transcript if under this character count (~12K tokens)
+    searchSegmentSeconds: 30
+    // Segment size for BM25 search scoring
   },
   validator(config2) {
-    if (config2.openAIApiKey && typeof config2.openAIApiKey !== "string") {
-      throw new Error("openAIApiKey must be a string");
+    if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
+      throw new Error("proxyUrl must be a string");
     }
-    if (config2.model && typeof config2.model !== "string") {
-      throw new Error("model must be a string");
+    if (config2.chunkSizeSeconds !== void 0 && (typeof config2.chunkSizeSeconds !== "number" || config2.chunkSizeSeconds < 30)) {
+      throw new Error("chunkSizeSeconds must be a number >= 30");
     }
-    if (config2.temp !== void 0 && (typeof config2.temp !== "number" || config2.temp < 0 || config2.temp > 2)) {
-      throw new Error("temp must be a number between 0 and 2");
+    if (config2.previewLength !== void 0 && (typeof config2.previewLength !== "number" || config2.previewLength < 100)) {
+      throw new Error("previewLength must be a number >= 100");
     }
-    if (config2.maxTokens !== void 0 && (typeof config2.maxTokens !== "number" || config2.maxTokens < 1)) {
-      throw new Error("maxTokens must be a positive number");
+    if (config2.maxFullTranscriptLength !== void 0 && (typeof config2.maxFullTranscriptLength !== "number" || config2.maxFullTranscriptLength < 1e3)) {
+      throw new Error("maxFullTranscriptLength must be a number >= 1000");
     }
-    if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
-      throw new Error("proxyUrl must be a string");
+    if (config2.searchSegmentSeconds !== void 0 && (typeof config2.searchSegmentSeconds !== "number" || config2.searchSegmentSeconds < 10)) {
+      throw new Error("searchSegmentSeconds must be a number >= 10");
     }
   }
 };
@@ -512,9 +897,6 @@ const attributes = {
   },
   transcriptWithTimeCodes: {
     type: "json"
-  },
-  readableTranscript: {
-    type: "richtext"
   }
 };
 const schema = {
@@ -531,41 +913,34 @@ const transcript = {
 const contentTypes = {
   transcript
 };
-const controller = ({ strapi: strapi2 }) => ({
+const controller = ({ strapi }) => ({
   async getTranscript(ctx) {
     const videoId = extractYouTubeID(ctx.params.videoId);
     if (!videoId) {
       return ctx.body = { error: "Invalid YouTube URL or ID", data: null };
     }
-    const found = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").findTranscript(videoId);
+    const found = await strapi.plugin("yt-transcript-strapi-plugin").service("service").findTranscript(videoId);
     if (found) {
       return ctx.body = { data: found };
     }
-    const transcriptData = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").getTranscript(videoId);
-    let readableTranscript = null;
-    try {
-      readableTranscript = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").generateHumanReadableTranscript(transcriptData.fullTranscript);
-    } catch (error) {
-      strapi2.log.debug("[yt-transcript] Readable transcript generation skipped");
-    }
+    const transcriptData = await strapi.plugin("yt-transcript-strapi-plugin").service("service").getTranscript(videoId);
     const payload = {
       videoId,
       title: transcriptData?.title || "No title found",
       fullTranscript: transcriptData?.fullTranscript,
-      transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes,
-      readableTranscript
+      transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes
     };
-    const transcript2 = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").saveTranscript(payload);
+    const transcript2 = await strapi.plugin("yt-transcript-strapi-plugin").service("service").saveTranscript(payload);
     ctx.body = { data: transcript2 };
   }
 });
-const mcpController = ({ strapi: strapi2 }) => ({
+const mcpController = ({ strapi }) => ({
   /**
    * Handle MCP requests (POST, GET, DELETE)
    * Creates a new server+transport per session for proper isolation
    */
   async handle(ctx) {
-    const plugin = strapi2.plugin("yt-transcript-strapi-plugin");
+    const plugin = strapi.plugin("yt-transcript-strapi-plugin");
     if (!plugin.createMcpServer) {
       ctx.status = 503;
       ctx.body = {
@@ -585,12 +960,12 @@ const mcpController = ({ strapi: strapi2 }) => ({
         await server.connect(transport);
         session = { server, transport, createdAt: Date.now() };
         plugin.sessions.set(sessionId, session);
-        strapi2.log.debug(`[yt-transcript-mcp] New session created: ${sessionId}`);
+        strapi.log.debug(`[yt-transcript-mcp] New session created: ${sessionId}`);
       }
       await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
       ctx.respond = false;
     } catch (error) {
-      strapi2.log.error("[yt-transcript-mcp] Error handling MCP request", {
+      strapi.log.error("[yt-transcript-mcp] Error handling MCP request", {
         error: error instanceof Error ? error.message : String(error),
         method: ctx.method,
         path: ctx.path
@@ -670,18 +1045,6 @@ const routes = {
     routes: [...admin]
   }
 };
-async function initializeModel({
-  openAIApiKey,
-  model,
-  temp
-}) {
-  return new ChatOpenAI({
-    temperature: temp,
-    openAIApiKey,
-    modelName: model,
-    maxTokens: 1e3
-  });
-}
 function isRequestLike(input) {
   return typeof input === "object" && input !== null && "url" in input && typeof input.url === "string" && "method" in input;
 }
@@ -824,49 +1187,14 @@ const fetchTranscript = async (videoId, options2) => {
     );
   }
 };
-async function processTextChunks(chunks, model) {
-  const punctuationPrompt = PromptTemplate.fromTemplate(
-    "Add proper punctuation and capitalization to the following text chunk:\n\n{chunk}"
-  );
-  const punctuationChain = punctuationPrompt.pipe(model);
-  const processedChunks = await Promise.all(
-    chunks.map(async (chunk) => {
-      const result = await punctuationChain.invoke({ chunk });
-      return result.content;
-    })
-  );
-  return processedChunks.join(" ");
-}
-async function generateModifiedTranscript(rawTranscript) {
-  const pluginSettings = await strapi.config.get(
-    "plugin::yt-transcript-strapi-plugin"
-  );
-  if (!pluginSettings.openAIApiKey || !pluginSettings.model || !pluginSettings.temp || !pluginSettings.maxTokens) {
-    throw new Error("Missing required configuration for YTTranscript");
-  }
-  const chatModel = await initializeModel({
-    openAIApiKey: pluginSettings.openAIApiKey,
-    model: pluginSettings.model,
-    temp: pluginSettings.temp,
-    maxTokens: pluginSettings.maxTokens
-  });
-  const splitter = new TokenTextSplitter({
-    chunkSize: 1e3,
-    chunkOverlap: 200
-  });
-  const transcriptChunks = await splitter.createDocuments([rawTranscript]);
-  const chunkTexts = transcriptChunks.map((chunk) => chunk.pageContent);
-  const modifiedTranscript = await processTextChunks(chunkTexts, chatModel);
-  return modifiedTranscript;
-}
-const service = ({ strapi: strapi2 }) => ({
+const service = ({ strapi }) => ({
   async getTranscript(identifier) {
     const youtubeIdRegex = /^[a-zA-Z0-9_-]{11}$/;
     const isValid = youtubeIdRegex.test(identifier);
     if (!isValid) {
       return { error: "Invalid video ID", data: null };
     }
-    const pluginSettings = await strapi2.config.get(
+    const pluginSettings = await strapi.config.get(
       "plugin::yt-transcript-strapi-plugin"
     );
     const transcriptData = await fetchTranscript(identifier, {
@@ -879,20 +1207,16 @@ const service = ({ strapi: strapi2 }) => ({
     };
   },
   async saveTranscript(payload) {
-    return await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").create({
+    return await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").create({
       data: payload
     });
   },
   async findTranscript(videoId) {
-    const transcriptData = await strapi2.documents("plugin::yt-transcript-strapi-plugin.transcript").findFirst({
+    const transcriptData = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findFirst({
       filters: { videoId }
     });
     if (!transcriptData) return null;
     return transcriptData;
-  },
-  async generateHumanReadableTranscript(transcript2) {
-    const modifiedTranscript = await generateModifiedTranscript(transcript2);
-    return modifiedTranscript;
   }
 });
 const services = {