yt-transcript-strapi-plugin 0.0.21 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/index.js +477 -153
- package/dist/server/index.mjs +477 -153
- package/dist/server/src/config/index.d.ts +8 -8
- package/dist/server/src/content-types/index.d.ts +0 -3
- package/dist/server/src/content-types/transcript/index.d.ts +0 -3
- package/dist/server/src/index.d.ts +8 -12
- package/dist/server/src/mcp/schemas/index.d.ts +63 -6
- package/dist/server/src/mcp/tools/fetch-transcript.d.ts +0 -5
- package/dist/server/src/mcp/tools/get-transcript.d.ts +26 -0
- package/dist/server/src/mcp/tools/index.d.ts +13 -13
- package/dist/server/src/mcp/tools/search-transcript.d.ts +30 -0
- package/dist/server/src/services/index.d.ts +0 -1
- package/dist/server/src/services/service.d.ts +0 -2
- package/package.json +7 -6
- package/dist/server/src/utils/openai.d.ts +0 -9
package/dist/server/index.mjs
CHANGED
|
@@ -3,14 +3,10 @@ import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprot
|
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { randomUUID } from "node:crypto";
|
|
5
5
|
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
6
|
-
import { TokenTextSplitter } from "@langchain/textsplitters";
|
|
7
|
-
import { PromptTemplate } from "@langchain/core/prompts";
|
|
8
|
-
import { ChatOpenAI } from "@langchain/openai";
|
|
9
6
|
import { Innertube } from "youtubei.js";
|
|
10
7
|
import { ProxyAgent, fetch as fetch$1 } from "undici";
|
|
11
8
|
const FetchTranscriptSchema = z.object({
|
|
12
|
-
videoId: z.string().min(1, "Video ID or URL is required")
|
|
13
|
-
generateReadable: z.boolean().optional().default(false)
|
|
9
|
+
videoId: z.string().min(1, "Video ID or URL is required")
|
|
14
10
|
});
|
|
15
11
|
const ListTranscriptsSchema = z.object({
|
|
16
12
|
page: z.number().int().min(1).optional().default(1),
|
|
@@ -18,7 +14,18 @@ const ListTranscriptsSchema = z.object({
|
|
|
18
14
|
sort: z.string().optional().default("createdAt:desc")
|
|
19
15
|
});
|
|
20
16
|
const GetTranscriptSchema = z.object({
|
|
21
|
-
videoId: z.string().min(1, "Video ID is required")
|
|
17
|
+
videoId: z.string().min(1, "Video ID is required"),
|
|
18
|
+
includeFullTranscript: z.boolean().optional().default(false),
|
|
19
|
+
includeTimecodes: z.boolean().optional().default(false),
|
|
20
|
+
startTime: z.number().min(0).optional(),
|
|
21
|
+
endTime: z.number().min(0).optional(),
|
|
22
|
+
chunkIndex: z.number().int().min(0).optional(),
|
|
23
|
+
chunkSize: z.number().int().min(30).optional()
|
|
24
|
+
});
|
|
25
|
+
const SearchTranscriptSchema = z.object({
|
|
26
|
+
videoId: z.string().min(1, "Video ID is required"),
|
|
27
|
+
query: z.string().min(1, "Search query is required"),
|
|
28
|
+
maxResults: z.number().int().min(1).max(20).optional().default(5)
|
|
22
29
|
});
|
|
23
30
|
const FindTranscriptsSchema = z.object({
|
|
24
31
|
query: z.string().optional(),
|
|
@@ -33,6 +40,7 @@ const ToolSchemas = {
|
|
|
33
40
|
fetch_transcript: FetchTranscriptSchema,
|
|
34
41
|
list_transcripts: ListTranscriptsSchema,
|
|
35
42
|
get_transcript: GetTranscriptSchema,
|
|
43
|
+
search_transcript: SearchTranscriptSchema,
|
|
36
44
|
find_transcripts: FindTranscriptsSchema
|
|
37
45
|
};
|
|
38
46
|
function validateToolInput(toolName, input) {
|
|
@@ -67,31 +75,64 @@ function extractYouTubeID(urlOrID) {
|
|
|
67
75
|
}
|
|
68
76
|
const fetchTranscriptTool = {
|
|
69
77
|
name: "fetch_transcript",
|
|
70
|
-
description: "Fetch a transcript from YouTube for a given video ID or URL.
|
|
78
|
+
description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database. Returns metadata and preview only to avoid context overflow. Use get_transcript to retrieve content.",
|
|
71
79
|
inputSchema: {
|
|
72
80
|
type: "object",
|
|
73
81
|
properties: {
|
|
74
82
|
videoId: {
|
|
75
83
|
type: "string",
|
|
76
84
|
description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
|
|
77
|
-
},
|
|
78
|
-
generateReadable: {
|
|
79
|
-
type: "boolean",
|
|
80
|
-
description: "If true, uses AI to add punctuation and formatting to make the transcript more readable. Requires OpenAI API key configuration.",
|
|
81
|
-
default: false
|
|
82
85
|
}
|
|
83
86
|
},
|
|
84
87
|
required: ["videoId"]
|
|
85
88
|
}
|
|
86
89
|
};
|
|
87
|
-
|
|
90
|
+
function getVideoDurationMs$1(timecodes) {
|
|
91
|
+
if (!timecodes || timecodes.length === 0) return 0;
|
|
92
|
+
const lastEntry = timecodes[timecodes.length - 1];
|
|
93
|
+
return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
|
|
94
|
+
}
|
|
95
|
+
function formatTime$2(ms) {
|
|
96
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
97
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
98
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
99
|
+
const seconds = totalSeconds % 60;
|
|
100
|
+
if (hours > 0) {
|
|
101
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
102
|
+
}
|
|
103
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
104
|
+
}
|
|
105
|
+
function buildMetadataResponse(transcript2, previewLength, cached) {
|
|
106
|
+
const fullText = transcript2.fullTranscript || "";
|
|
107
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
108
|
+
const durationMs = getVideoDurationMs$1(timecodes);
|
|
109
|
+
const wordCount = fullText.split(/\s+/).length;
|
|
110
|
+
const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
|
|
111
|
+
return {
|
|
112
|
+
message: cached ? "Transcript already exists in database" : "Transcript fetched and saved successfully",
|
|
113
|
+
cached,
|
|
114
|
+
videoId: transcript2.videoId,
|
|
115
|
+
title: transcript2.title,
|
|
116
|
+
metadata: {
|
|
117
|
+
wordCount,
|
|
118
|
+
characterCount: fullText.length,
|
|
119
|
+
duration: formatTime$2(durationMs),
|
|
120
|
+
durationSeconds: Math.floor(durationMs / 1e3)
|
|
121
|
+
},
|
|
122
|
+
preview,
|
|
123
|
+
usage: "Use get_transcript with videoId to retrieve full content, specific time ranges, or paginated chunks."
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
async function handleFetchTranscript(strapi, args) {
|
|
88
127
|
const validatedArgs = validateToolInput("fetch_transcript", args);
|
|
89
|
-
const { videoId: videoIdOrUrl
|
|
128
|
+
const { videoId: videoIdOrUrl } = validatedArgs;
|
|
129
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
130
|
+
const previewLength = pluginConfig?.previewLength || 500;
|
|
90
131
|
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
91
132
|
if (!videoId) {
|
|
92
133
|
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
93
134
|
}
|
|
94
|
-
const service2 =
|
|
135
|
+
const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
|
|
95
136
|
const existingTranscript = await service2.findTranscript(videoId);
|
|
96
137
|
if (existingTranscript) {
|
|
97
138
|
return {
|
|
@@ -99,11 +140,7 @@ async function handleFetchTranscript(strapi2, args) {
|
|
|
99
140
|
{
|
|
100
141
|
type: "text",
|
|
101
142
|
text: JSON.stringify(
|
|
102
|
-
|
|
103
|
-
message: "Transcript already exists in database",
|
|
104
|
-
data: existingTranscript,
|
|
105
|
-
cached: true
|
|
106
|
-
},
|
|
143
|
+
buildMetadataResponse(existingTranscript, previewLength, true),
|
|
107
144
|
null,
|
|
108
145
|
2
|
|
109
146
|
)
|
|
@@ -121,25 +158,13 @@ async function handleFetchTranscript(strapi2, args) {
|
|
|
121
158
|
fullTranscript: transcriptData.fullTranscript,
|
|
122
159
|
transcriptWithTimeCodes: transcriptData.transcriptWithTimeCodes
|
|
123
160
|
};
|
|
124
|
-
if (generateReadable && transcriptData.fullTranscript) {
|
|
125
|
-
try {
|
|
126
|
-
const readableTranscript = await service2.generateHumanReadableTranscript(transcriptData.fullTranscript);
|
|
127
|
-
payload.readableTranscript = readableTranscript;
|
|
128
|
-
} catch (error) {
|
|
129
|
-
strapi2.log.warn("[yt-transcript-mcp] Failed to generate readable transcript:", error);
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
161
|
const savedTranscript = await service2.saveTranscript(payload);
|
|
133
162
|
return {
|
|
134
163
|
content: [
|
|
135
164
|
{
|
|
136
165
|
type: "text",
|
|
137
166
|
text: JSON.stringify(
|
|
138
|
-
|
|
139
|
-
message: "Transcript fetched and saved successfully",
|
|
140
|
-
data: savedTranscript,
|
|
141
|
-
cached: false
|
|
142
|
-
},
|
|
167
|
+
buildMetadataResponse(savedTranscript, previewLength, false),
|
|
143
168
|
null,
|
|
144
169
|
2
|
|
145
170
|
)
|
|
@@ -172,17 +197,17 @@ const listTranscriptsTool = {
|
|
|
172
197
|
required: []
|
|
173
198
|
}
|
|
174
199
|
};
|
|
175
|
-
async function handleListTranscripts(
|
|
200
|
+
async function handleListTranscripts(strapi, args) {
|
|
176
201
|
const validatedArgs = validateToolInput("list_transcripts", args);
|
|
177
202
|
const { page, pageSize, sort } = validatedArgs;
|
|
178
203
|
const start = (page - 1) * pageSize;
|
|
179
|
-
const transcripts = await
|
|
204
|
+
const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
|
|
180
205
|
sort,
|
|
181
206
|
limit: pageSize,
|
|
182
207
|
start,
|
|
183
208
|
fields: ["id", "documentId", "title", "videoId", "createdAt", "updatedAt"]
|
|
184
209
|
});
|
|
185
|
-
const allTranscripts = await
|
|
210
|
+
const allTranscripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({});
|
|
186
211
|
const total = allTranscripts.length;
|
|
187
212
|
return {
|
|
188
213
|
content: [
|
|
@@ -207,26 +232,305 @@ async function handleListTranscripts(strapi2, args) {
|
|
|
207
232
|
}
|
|
208
233
|
const getTranscriptTool = {
|
|
209
234
|
name: "get_transcript",
|
|
210
|
-
description: "Get a
|
|
235
|
+
description: "Get a saved transcript by YouTube video ID. Returns metadata and preview by default. Use parameters to get full content or specific time ranges to avoid context overflow.",
|
|
211
236
|
inputSchema: {
|
|
212
237
|
type: "object",
|
|
213
238
|
properties: {
|
|
214
239
|
videoId: {
|
|
215
240
|
type: "string",
|
|
216
241
|
description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
|
|
242
|
+
},
|
|
243
|
+
includeFullTranscript: {
|
|
244
|
+
type: "boolean",
|
|
245
|
+
description: "Include the complete transcript text. Warning: may cause context overflow for long videos. Default: false",
|
|
246
|
+
default: false
|
|
247
|
+
},
|
|
248
|
+
includeTimecodes: {
|
|
249
|
+
type: "boolean",
|
|
250
|
+
description: "Include the transcript with timecodes array. Warning: significantly increases response size. Default: false",
|
|
251
|
+
default: false
|
|
252
|
+
},
|
|
253
|
+
startTime: {
|
|
254
|
+
type: "number",
|
|
255
|
+
description: "Start time in seconds for fetching a specific portion of the transcript"
|
|
256
|
+
},
|
|
257
|
+
endTime: {
|
|
258
|
+
type: "number",
|
|
259
|
+
description: "End time in seconds for fetching a specific portion of the transcript"
|
|
260
|
+
},
|
|
261
|
+
chunkIndex: {
|
|
262
|
+
type: "number",
|
|
263
|
+
description: "Chunk index (0-based) when paginating through transcript. Use with chunkSize to paginate through long videos."
|
|
264
|
+
},
|
|
265
|
+
chunkSize: {
|
|
266
|
+
type: "number",
|
|
267
|
+
description: "Chunk size in seconds. Overrides config default. Use with chunkIndex for pagination."
|
|
217
268
|
}
|
|
218
269
|
},
|
|
219
270
|
required: ["videoId"]
|
|
220
271
|
}
|
|
221
272
|
};
|
|
222
|
-
|
|
273
|
+
function getTranscriptForTimeRange(timecodes, startTimeMs, endTimeMs) {
|
|
274
|
+
const entries = timecodes.filter(
|
|
275
|
+
(entry) => entry.start >= startTimeMs && entry.start < endTimeMs
|
|
276
|
+
);
|
|
277
|
+
const text = entries.map((e) => e.text).join(" ");
|
|
278
|
+
return { text, entries };
|
|
279
|
+
}
|
|
280
|
+
function getVideoDurationMs(timecodes) {
|
|
281
|
+
if (!timecodes || timecodes.length === 0) return 0;
|
|
282
|
+
const lastEntry = timecodes[timecodes.length - 1];
|
|
283
|
+
return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
|
|
284
|
+
}
|
|
285
|
+
function formatTime$1(ms) {
|
|
286
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
287
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
288
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
289
|
+
const seconds = totalSeconds % 60;
|
|
290
|
+
if (hours > 0) {
|
|
291
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
292
|
+
}
|
|
293
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
294
|
+
}
|
|
295
|
+
async function handleGetTranscript(strapi, args) {
|
|
223
296
|
const validatedArgs = validateToolInput("get_transcript", args);
|
|
224
|
-
const {
|
|
297
|
+
const {
|
|
298
|
+
videoId: videoIdOrUrl,
|
|
299
|
+
includeFullTranscript,
|
|
300
|
+
includeTimecodes,
|
|
301
|
+
startTime,
|
|
302
|
+
endTime,
|
|
303
|
+
chunkIndex,
|
|
304
|
+
chunkSize: chunkSizeOverride
|
|
305
|
+
} = validatedArgs;
|
|
306
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
307
|
+
const defaultChunkSize = pluginConfig?.chunkSizeSeconds || 300;
|
|
308
|
+
const previewLength = pluginConfig?.previewLength || 500;
|
|
309
|
+
const maxFullTranscriptLength = pluginConfig?.maxFullTranscriptLength || 5e4;
|
|
310
|
+
const chunkSizeSeconds = chunkSizeOverride || defaultChunkSize;
|
|
311
|
+
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
312
|
+
if (!videoId) {
|
|
313
|
+
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
314
|
+
}
|
|
315
|
+
const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
|
|
316
|
+
const transcript2 = await service2.findTranscript(videoId);
|
|
317
|
+
if (!transcript2) {
|
|
318
|
+
return {
|
|
319
|
+
content: [
|
|
320
|
+
{
|
|
321
|
+
type: "text",
|
|
322
|
+
text: JSON.stringify(
|
|
323
|
+
{
|
|
324
|
+
error: true,
|
|
325
|
+
message: `No transcript found for video ID: ${videoId}. Use fetch_transcript to fetch it from YouTube first.`,
|
|
326
|
+
videoId
|
|
327
|
+
},
|
|
328
|
+
null,
|
|
329
|
+
2
|
|
330
|
+
)
|
|
331
|
+
}
|
|
332
|
+
]
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
336
|
+
const fullText = transcript2.fullTranscript || "";
|
|
337
|
+
const durationMs = getVideoDurationMs(timecodes);
|
|
338
|
+
const totalChunks = Math.ceil(durationMs / (chunkSizeSeconds * 1e3));
|
|
339
|
+
const wordCount = fullText.split(/\s+/).length;
|
|
340
|
+
const response = {
|
|
341
|
+
videoId: transcript2.videoId,
|
|
342
|
+
title: transcript2.title,
|
|
343
|
+
metadata: {
|
|
344
|
+
wordCount,
|
|
345
|
+
characterCount: fullText.length,
|
|
346
|
+
duration: formatTime$1(durationMs),
|
|
347
|
+
durationSeconds: Math.floor(durationMs / 1e3),
|
|
348
|
+
totalChunks,
|
|
349
|
+
chunkSizeSeconds
|
|
350
|
+
}
|
|
351
|
+
};
|
|
352
|
+
if (startTime !== void 0 || endTime !== void 0) {
|
|
353
|
+
const startMs = (startTime || 0) * 1e3;
|
|
354
|
+
const endMs = endTime !== void 0 ? endTime * 1e3 : durationMs;
|
|
355
|
+
const { text, entries } = getTranscriptForTimeRange(timecodes, startMs, endMs);
|
|
356
|
+
response.timeRange = {
|
|
357
|
+
startTime: startTime || 0,
|
|
358
|
+
endTime: endTime || Math.floor(durationMs / 1e3),
|
|
359
|
+
startFormatted: formatTime$1(startMs),
|
|
360
|
+
endFormatted: formatTime$1(endMs)
|
|
361
|
+
};
|
|
362
|
+
response.transcript = text;
|
|
363
|
+
if (includeTimecodes) {
|
|
364
|
+
response.transcriptWithTimeCodes = entries;
|
|
365
|
+
}
|
|
366
|
+
} else if (chunkIndex !== void 0) {
|
|
367
|
+
const chunkStartMs = chunkIndex * chunkSizeSeconds * 1e3;
|
|
368
|
+
const chunkEndMs = Math.min((chunkIndex + 1) * chunkSizeSeconds * 1e3, durationMs);
|
|
369
|
+
if (chunkStartMs >= durationMs) {
|
|
370
|
+
response.error = `Chunk index ${chunkIndex} is out of range. Total chunks: ${totalChunks} (0-${totalChunks - 1})`;
|
|
371
|
+
} else {
|
|
372
|
+
const { text, entries } = getTranscriptForTimeRange(timecodes, chunkStartMs, chunkEndMs);
|
|
373
|
+
response.chunk = {
|
|
374
|
+
index: chunkIndex,
|
|
375
|
+
totalChunks,
|
|
376
|
+
startTime: Math.floor(chunkStartMs / 1e3),
|
|
377
|
+
endTime: Math.floor(chunkEndMs / 1e3),
|
|
378
|
+
startFormatted: formatTime$1(chunkStartMs),
|
|
379
|
+
endFormatted: formatTime$1(chunkEndMs)
|
|
380
|
+
};
|
|
381
|
+
response.transcript = text;
|
|
382
|
+
if (includeTimecodes) {
|
|
383
|
+
response.transcriptWithTimeCodes = entries;
|
|
384
|
+
}
|
|
385
|
+
if (chunkIndex < totalChunks - 1) {
|
|
386
|
+
response.nextChunk = `Use chunkIndex: ${chunkIndex + 1} to get the next portion`;
|
|
387
|
+
}
|
|
388
|
+
if (chunkIndex > 0) {
|
|
389
|
+
response.previousChunk = `Use chunkIndex: ${chunkIndex - 1} to get the previous portion`;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
} else if (includeFullTranscript || fullText.length <= maxFullTranscriptLength) {
|
|
393
|
+
response.transcript = fullText;
|
|
394
|
+
if (includeTimecodes) {
|
|
395
|
+
response.transcriptWithTimeCodes = timecodes;
|
|
396
|
+
}
|
|
397
|
+
if (includeFullTranscript && fullText.length > maxFullTranscriptLength) {
|
|
398
|
+
response.warning = "Full transcript included. For long videos, consider using chunkIndex, startTime/endTime, or search_transcript to reduce response size.";
|
|
399
|
+
} else if (fullText.length <= maxFullTranscriptLength) {
|
|
400
|
+
response.note = "Full transcript auto-loaded (fits within context limit).";
|
|
401
|
+
}
|
|
402
|
+
} else {
|
|
403
|
+
const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
|
|
404
|
+
response.preview = preview;
|
|
405
|
+
response.isLargeTranscript = true;
|
|
406
|
+
response.usage = {
|
|
407
|
+
fullTranscript: "Set includeFullTranscript: true to get complete text (warning: may exceed context)",
|
|
408
|
+
search: "Use search_transcript to find relevant portions by keyword (recommended for large transcripts)",
|
|
409
|
+
timeRange: "Use startTime and endTime (in seconds) to get a specific portion",
|
|
410
|
+
pagination: `Use chunkIndex (0-${totalChunks - 1}) to paginate through ${chunkSizeSeconds}s chunks`
|
|
411
|
+
};
|
|
412
|
+
}
|
|
413
|
+
return {
|
|
414
|
+
content: [
|
|
415
|
+
{
|
|
416
|
+
type: "text",
|
|
417
|
+
text: JSON.stringify(response, null, 2)
|
|
418
|
+
}
|
|
419
|
+
]
|
|
420
|
+
};
|
|
421
|
+
}
|
|
422
|
+
const searchTranscriptTool = {
|
|
423
|
+
name: "search_transcript",
|
|
424
|
+
description: "Search within a saved transcript using BM25 scoring. Returns the most relevant segments matching your query with timestamps. Use this to find specific content in long videos without loading the entire transcript.",
|
|
425
|
+
inputSchema: {
|
|
426
|
+
type: "object",
|
|
427
|
+
properties: {
|
|
428
|
+
videoId: {
|
|
429
|
+
type: "string",
|
|
430
|
+
description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
|
|
431
|
+
},
|
|
432
|
+
query: {
|
|
433
|
+
type: "string",
|
|
434
|
+
description: "Search query - keywords or phrases to find in the transcript"
|
|
435
|
+
},
|
|
436
|
+
maxResults: {
|
|
437
|
+
type: "number",
|
|
438
|
+
description: "Maximum number of results to return (default: 5, max: 20)",
|
|
439
|
+
default: 5
|
|
440
|
+
}
|
|
441
|
+
},
|
|
442
|
+
required: ["videoId", "query"]
|
|
443
|
+
}
|
|
444
|
+
};
|
|
445
|
+
function tokenize(text) {
|
|
446
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
|
|
447
|
+
}
|
|
448
|
+
function calculateIDF(segments, vocabulary) {
|
|
449
|
+
const idf = /* @__PURE__ */ new Map();
|
|
450
|
+
const N = segments.length;
|
|
451
|
+
for (const term of vocabulary) {
|
|
452
|
+
const docsWithTerm = segments.filter(
|
|
453
|
+
(seg) => tokenize(seg.text).includes(term)
|
|
454
|
+
).length;
|
|
455
|
+
idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
|
|
456
|
+
}
|
|
457
|
+
return idf;
|
|
458
|
+
}
|
|
459
|
+
function bm25Score(segmentTokens, queryTokens, idf, avgDocLength, k1 = 1.5, b = 0.75) {
|
|
460
|
+
const docLength = segmentTokens.length;
|
|
461
|
+
let score = 0;
|
|
462
|
+
const tf = /* @__PURE__ */ new Map();
|
|
463
|
+
for (const token of segmentTokens) {
|
|
464
|
+
tf.set(token, (tf.get(token) || 0) + 1);
|
|
465
|
+
}
|
|
466
|
+
for (const term of queryTokens) {
|
|
467
|
+
const termFreq = tf.get(term) || 0;
|
|
468
|
+
const termIdf = idf.get(term) || 0;
|
|
469
|
+
if (termFreq > 0) {
|
|
470
|
+
const numerator = termFreq * (k1 + 1);
|
|
471
|
+
const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
|
|
472
|
+
score += termIdf * (numerator / denominator);
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
return score;
|
|
476
|
+
}
|
|
477
|
+
function formatTime(ms) {
|
|
478
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
479
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
480
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
481
|
+
const seconds = totalSeconds % 60;
|
|
482
|
+
if (hours > 0) {
|
|
483
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
484
|
+
}
|
|
485
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
486
|
+
}
|
|
487
|
+
function createSegments(timecodes, segmentDurationMs) {
|
|
488
|
+
if (!timecodes || timecodes.length === 0) return [];
|
|
489
|
+
const segments = [];
|
|
490
|
+
let currentSegment = [];
|
|
491
|
+
let segmentStartTime = timecodes[0].start;
|
|
492
|
+
for (const entry of timecodes) {
|
|
493
|
+
const segmentEndTime = segmentStartTime + segmentDurationMs;
|
|
494
|
+
if (entry.start < segmentEndTime) {
|
|
495
|
+
currentSegment.push(entry);
|
|
496
|
+
} else {
|
|
497
|
+
if (currentSegment.length > 0) {
|
|
498
|
+
const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
|
|
499
|
+
segments.push({
|
|
500
|
+
text: currentSegment.map((e) => e.text).join(" "),
|
|
501
|
+
startTime: Math.floor(segmentStartTime / 1e3),
|
|
502
|
+
endTime: Math.floor(endTime / 1e3),
|
|
503
|
+
startFormatted: formatTime(segmentStartTime),
|
|
504
|
+
endFormatted: formatTime(endTime)
|
|
505
|
+
});
|
|
506
|
+
}
|
|
507
|
+
segmentStartTime = entry.start;
|
|
508
|
+
currentSegment = [entry];
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
if (currentSegment.length > 0) {
|
|
512
|
+
const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
|
|
513
|
+
segments.push({
|
|
514
|
+
text: currentSegment.map((e) => e.text).join(" "),
|
|
515
|
+
startTime: Math.floor(segmentStartTime / 1e3),
|
|
516
|
+
endTime: Math.floor(endTime / 1e3),
|
|
517
|
+
startFormatted: formatTime(segmentStartTime),
|
|
518
|
+
endFormatted: formatTime(endTime)
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
return segments;
|
|
522
|
+
}
|
|
523
|
+
async function handleSearchTranscript(strapi, args) {
|
|
524
|
+
const validatedArgs = validateToolInput("search_transcript", args);
|
|
525
|
+
const { videoId: videoIdOrUrl, query, maxResults: maxResultsInput } = validatedArgs;
|
|
526
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
527
|
+
const segmentSeconds = pluginConfig?.searchSegmentSeconds || 30;
|
|
528
|
+
const maxResults = Math.min(Math.max(maxResultsInput || 5, 1), 20);
|
|
225
529
|
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
226
530
|
if (!videoId) {
|
|
227
531
|
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
228
532
|
}
|
|
229
|
-
const service2 =
|
|
533
|
+
const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
|
|
230
534
|
const transcript2 = await service2.findTranscript(videoId);
|
|
231
535
|
if (!transcript2) {
|
|
232
536
|
return {
|
|
@@ -246,13 +550,90 @@ async function handleGetTranscript(strapi2, args) {
|
|
|
246
550
|
]
|
|
247
551
|
};
|
|
248
552
|
}
|
|
553
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
554
|
+
if (timecodes.length === 0) {
|
|
555
|
+
return {
|
|
556
|
+
content: [
|
|
557
|
+
{
|
|
558
|
+
type: "text",
|
|
559
|
+
text: JSON.stringify(
|
|
560
|
+
{
|
|
561
|
+
error: true,
|
|
562
|
+
message: "Transcript has no timecode data for searching.",
|
|
563
|
+
videoId
|
|
564
|
+
},
|
|
565
|
+
null,
|
|
566
|
+
2
|
|
567
|
+
)
|
|
568
|
+
}
|
|
569
|
+
]
|
|
570
|
+
};
|
|
571
|
+
}
|
|
572
|
+
const segments = createSegments(timecodes, segmentSeconds * 1e3);
|
|
573
|
+
if (segments.length === 0) {
|
|
574
|
+
return {
|
|
575
|
+
content: [
|
|
576
|
+
{
|
|
577
|
+
type: "text",
|
|
578
|
+
text: JSON.stringify(
|
|
579
|
+
{
|
|
580
|
+
error: true,
|
|
581
|
+
message: "Could not create searchable segments from transcript.",
|
|
582
|
+
videoId
|
|
583
|
+
},
|
|
584
|
+
null,
|
|
585
|
+
2
|
|
586
|
+
)
|
|
587
|
+
}
|
|
588
|
+
]
|
|
589
|
+
};
|
|
590
|
+
}
|
|
591
|
+
const queryTokens = tokenize(query);
|
|
592
|
+
if (queryTokens.length === 0) {
|
|
593
|
+
return {
|
|
594
|
+
content: [
|
|
595
|
+
{
|
|
596
|
+
type: "text",
|
|
597
|
+
text: JSON.stringify(
|
|
598
|
+
{
|
|
599
|
+
error: true,
|
|
600
|
+
message: "Query is empty or contains only stop words.",
|
|
601
|
+
query
|
|
602
|
+
},
|
|
603
|
+
null,
|
|
604
|
+
2
|
|
605
|
+
)
|
|
606
|
+
}
|
|
607
|
+
]
|
|
608
|
+
};
|
|
609
|
+
}
|
|
610
|
+
const vocabulary = new Set(queryTokens);
|
|
611
|
+
const idf = calculateIDF(segments, vocabulary);
|
|
612
|
+
const avgDocLength = segments.reduce((sum, seg) => sum + tokenize(seg.text).length, 0) / segments.length;
|
|
613
|
+
const scoredSegments = segments.map((segment) => ({
|
|
614
|
+
...segment,
|
|
615
|
+
score: bm25Score(tokenize(segment.text), queryTokens, idf, avgDocLength)
|
|
616
|
+
}));
|
|
617
|
+
const results = scoredSegments.filter((seg) => seg.score > 0).sort((a, b) => b.score - a.score).slice(0, maxResults);
|
|
249
618
|
return {
|
|
250
619
|
content: [
|
|
251
620
|
{
|
|
252
621
|
type: "text",
|
|
253
622
|
text: JSON.stringify(
|
|
254
623
|
{
|
|
255
|
-
|
|
624
|
+
videoId: transcript2.videoId,
|
|
625
|
+
title: transcript2.title,
|
|
626
|
+
query,
|
|
627
|
+
totalSegments: segments.length,
|
|
628
|
+
matchingResults: results.length,
|
|
629
|
+
results: results.map((r) => ({
|
|
630
|
+
text: r.text,
|
|
631
|
+
startTime: r.startTime,
|
|
632
|
+
endTime: r.endTime,
|
|
633
|
+
timeRange: `${r.startFormatted} - ${r.endFormatted}`,
|
|
634
|
+
score: Math.round(r.score * 100) / 100
|
|
635
|
+
})),
|
|
636
|
+
usage: results.length > 0 ? `Use get_transcript with startTime: ${results[0].startTime} and endTime: ${results[0].endTime} to get full context for the top result.` : "No matches found. Try different keywords."
|
|
256
637
|
},
|
|
257
638
|
null,
|
|
258
639
|
2
|
|
@@ -311,11 +692,10 @@ function truncateText(text, maxLength) {
|
|
|
311
692
|
function truncateTranscripts(transcripts) {
|
|
312
693
|
return transcripts.map((transcript2) => ({
|
|
313
694
|
...transcript2,
|
|
314
|
-
fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH)
|
|
315
|
-
readableTranscript: truncateText(transcript2.readableTranscript, TRANSCRIPT_PREVIEW_LENGTH)
|
|
695
|
+
fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH)
|
|
316
696
|
}));
|
|
317
697
|
}
|
|
318
|
-
async function handleFindTranscripts(
|
|
698
|
+
async function handleFindTranscripts(strapi, args) {
|
|
319
699
|
const validatedArgs = validateToolInput("find_transcripts", args);
|
|
320
700
|
const { query, videoId, title, includeFullContent, page, pageSize, sort } = validatedArgs;
|
|
321
701
|
const start = (page - 1) * pageSize;
|
|
@@ -330,17 +710,16 @@ async function handleFindTranscripts(strapi2, args) {
|
|
|
330
710
|
filters.$or = [
|
|
331
711
|
{ title: { $containsi: query } },
|
|
332
712
|
{ videoId: { $containsi: query } },
|
|
333
|
-
{ fullTranscript: { $containsi: query } }
|
|
334
|
-
{ readableTranscript: { $containsi: query } }
|
|
713
|
+
{ fullTranscript: { $containsi: query } }
|
|
335
714
|
];
|
|
336
715
|
}
|
|
337
|
-
const transcripts = await
|
|
716
|
+
const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
|
|
338
717
|
filters,
|
|
339
718
|
sort,
|
|
340
719
|
limit: pageSize,
|
|
341
720
|
start
|
|
342
721
|
});
|
|
343
|
-
const allMatching = await
|
|
722
|
+
const allMatching = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
|
|
344
723
|
filters
|
|
345
724
|
});
|
|
346
725
|
const total = allMatching.length;
|
|
@@ -376,15 +755,17 @@ const tools = [
|
|
|
376
755
|
fetchTranscriptTool,
|
|
377
756
|
listTranscriptsTool,
|
|
378
757
|
getTranscriptTool,
|
|
758
|
+
searchTranscriptTool,
|
|
379
759
|
findTranscriptsTool
|
|
380
760
|
];
|
|
381
761
|
const toolHandlers = {
|
|
382
762
|
fetch_transcript: handleFetchTranscript,
|
|
383
763
|
list_transcripts: handleListTranscripts,
|
|
384
764
|
get_transcript: handleGetTranscript,
|
|
765
|
+
search_transcript: handleSearchTranscript,
|
|
385
766
|
find_transcripts: handleFindTranscripts
|
|
386
767
|
};
|
|
387
|
-
async function handleToolCall(
|
|
768
|
+
async function handleToolCall(strapi, request) {
|
|
388
769
|
const { name, arguments: args } = request.params;
|
|
389
770
|
const handler = toolHandlers[name];
|
|
390
771
|
if (!handler) {
|
|
@@ -392,13 +773,13 @@ async function handleToolCall(strapi2, request) {
|
|
|
392
773
|
}
|
|
393
774
|
const startTime = Date.now();
|
|
394
775
|
try {
|
|
395
|
-
const result = await handler(
|
|
776
|
+
const result = await handler(strapi, args || {});
|
|
396
777
|
const duration = Date.now() - startTime;
|
|
397
|
-
|
|
778
|
+
strapi.log.debug(`[yt-transcript-mcp] Tool ${name} executed successfully in ${duration}ms`);
|
|
398
779
|
return result;
|
|
399
780
|
} catch (error) {
|
|
400
781
|
const duration = Date.now() - startTime;
|
|
401
|
-
|
|
782
|
+
strapi.log.error(`[yt-transcript-mcp] Tool ${name} failed after ${duration}ms`, {
|
|
402
783
|
error: error instanceof Error ? error.message : String(error)
|
|
403
784
|
});
|
|
404
785
|
return {
|
|
@@ -419,7 +800,7 @@ async function handleToolCall(strapi2, request) {
|
|
|
419
800
|
};
|
|
420
801
|
}
|
|
421
802
|
}
|
|
422
|
-
function createMcpServer(
|
|
803
|
+
function createMcpServer(strapi) {
|
|
423
804
|
const server = new Server(
|
|
424
805
|
{
|
|
425
806
|
name: "yt-transcript-mcp",
|
|
@@ -432,53 +813,57 @@ function createMcpServer(strapi2) {
|
|
|
432
813
|
}
|
|
433
814
|
);
|
|
434
815
|
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
435
|
-
|
|
816
|
+
strapi.log.debug("[yt-transcript-mcp] Listing tools");
|
|
436
817
|
return { tools };
|
|
437
818
|
});
|
|
438
819
|
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
439
|
-
|
|
440
|
-
return handleToolCall(
|
|
820
|
+
strapi.log.debug(`[yt-transcript-mcp] Tool call: ${request.params.name}`);
|
|
821
|
+
return handleToolCall(strapi, request);
|
|
441
822
|
});
|
|
442
|
-
|
|
823
|
+
strapi.log.info("[yt-transcript-mcp] MCP server created with tools:", {
|
|
443
824
|
tools: tools.map((t) => t.name)
|
|
444
825
|
});
|
|
445
826
|
return server;
|
|
446
827
|
}
|
|
447
|
-
const bootstrap = async ({ strapi
|
|
448
|
-
const plugin =
|
|
449
|
-
plugin.createMcpServer = () => createMcpServer(
|
|
828
|
+
const bootstrap = async ({ strapi }) => {
|
|
829
|
+
const plugin = strapi.plugin("yt-transcript-strapi-plugin");
|
|
830
|
+
plugin.createMcpServer = () => createMcpServer(strapi);
|
|
450
831
|
plugin.sessions = /* @__PURE__ */ new Map();
|
|
451
|
-
|
|
452
|
-
|
|
832
|
+
strapi.log.info("[yt-transcript-mcp] MCP plugin initialized");
|
|
833
|
+
strapi.log.info("[yt-transcript-mcp] MCP endpoint available at: /api/yt-transcript-strapi-plugin/mcp");
|
|
453
834
|
};
|
|
454
|
-
const destroy = ({ strapi
|
|
835
|
+
const destroy = ({ strapi }) => {
|
|
455
836
|
};
|
|
456
|
-
const register = ({ strapi
|
|
837
|
+
const register = ({ strapi }) => {
|
|
457
838
|
};
|
|
458
839
|
const config = {
|
|
459
840
|
default: {
|
|
460
|
-
|
|
461
|
-
model: "gpt-4o-mini",
|
|
462
|
-
temp: 0.7,
|
|
463
|
-
maxTokens: 4096,
|
|
464
|
-
proxyUrl: ""
|
|
841
|
+
proxyUrl: "",
|
|
465
842
|
// Optional: HTTP/HTTPS proxy for YouTube requests (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
843
|
+
chunkSizeSeconds: 300,
|
|
844
|
+
// Default chunk size for transcript pagination (5 minutes)
|
|
845
|
+
previewLength: 500,
|
|
846
|
+
// Default preview length in characters
|
|
847
|
+
maxFullTranscriptLength: 5e4,
|
|
848
|
+
// Auto-load full transcript if under this character count (~12K tokens)
|
|
849
|
+
searchSegmentSeconds: 30
|
|
850
|
+
// Segment size for BM25 search scoring
|
|
466
851
|
},
|
|
467
852
|
validator(config2) {
|
|
468
|
-
if (config2.
|
|
469
|
-
throw new Error("
|
|
853
|
+
if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
|
|
854
|
+
throw new Error("proxyUrl must be a string");
|
|
470
855
|
}
|
|
471
|
-
if (config2.
|
|
472
|
-
throw new Error("
|
|
856
|
+
if (config2.chunkSizeSeconds !== void 0 && (typeof config2.chunkSizeSeconds !== "number" || config2.chunkSizeSeconds < 30)) {
|
|
857
|
+
throw new Error("chunkSizeSeconds must be a number >= 30");
|
|
473
858
|
}
|
|
474
|
-
if (config2.
|
|
475
|
-
throw new Error("
|
|
859
|
+
if (config2.previewLength !== void 0 && (typeof config2.previewLength !== "number" || config2.previewLength < 100)) {
|
|
860
|
+
throw new Error("previewLength must be a number >= 100");
|
|
476
861
|
}
|
|
477
|
-
if (config2.
|
|
478
|
-
throw new Error("
|
|
862
|
+
if (config2.maxFullTranscriptLength !== void 0 && (typeof config2.maxFullTranscriptLength !== "number" || config2.maxFullTranscriptLength < 1e3)) {
|
|
863
|
+
throw new Error("maxFullTranscriptLength must be a number >= 1000");
|
|
479
864
|
}
|
|
480
|
-
if (config2.
|
|
481
|
-
throw new Error("
|
|
865
|
+
if (config2.searchSegmentSeconds !== void 0 && (typeof config2.searchSegmentSeconds !== "number" || config2.searchSegmentSeconds < 10)) {
|
|
866
|
+
throw new Error("searchSegmentSeconds must be a number >= 10");
|
|
482
867
|
}
|
|
483
868
|
}
|
|
484
869
|
};
|
|
@@ -512,9 +897,6 @@ const attributes = {
|
|
|
512
897
|
},
|
|
513
898
|
transcriptWithTimeCodes: {
|
|
514
899
|
type: "json"
|
|
515
|
-
},
|
|
516
|
-
readableTranscript: {
|
|
517
|
-
type: "richtext"
|
|
518
900
|
}
|
|
519
901
|
};
|
|
520
902
|
const schema = {
|
|
@@ -531,41 +913,34 @@ const transcript = {
|
|
|
531
913
|
const contentTypes = {
|
|
532
914
|
transcript
|
|
533
915
|
};
|
|
534
|
-
const controller = ({ strapi
|
|
916
|
+
const controller = ({ strapi }) => ({
|
|
535
917
|
async getTranscript(ctx) {
|
|
536
918
|
const videoId = extractYouTubeID(ctx.params.videoId);
|
|
537
919
|
if (!videoId) {
|
|
538
920
|
return ctx.body = { error: "Invalid YouTube URL or ID", data: null };
|
|
539
921
|
}
|
|
540
|
-
const found = await
|
|
922
|
+
const found = await strapi.plugin("yt-transcript-strapi-plugin").service("service").findTranscript(videoId);
|
|
541
923
|
if (found) {
|
|
542
924
|
return ctx.body = { data: found };
|
|
543
925
|
}
|
|
544
|
-
const transcriptData = await
|
|
545
|
-
let readableTranscript = null;
|
|
546
|
-
try {
|
|
547
|
-
readableTranscript = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").generateHumanReadableTranscript(transcriptData.fullTranscript);
|
|
548
|
-
} catch (error) {
|
|
549
|
-
strapi2.log.debug("[yt-transcript] Readable transcript generation skipped");
|
|
550
|
-
}
|
|
926
|
+
const transcriptData = await strapi.plugin("yt-transcript-strapi-plugin").service("service").getTranscript(videoId);
|
|
551
927
|
const payload = {
|
|
552
928
|
videoId,
|
|
553
929
|
title: transcriptData?.title || "No title found",
|
|
554
930
|
fullTranscript: transcriptData?.fullTranscript,
|
|
555
|
-
transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes
|
|
556
|
-
readableTranscript
|
|
931
|
+
transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes
|
|
557
932
|
};
|
|
558
|
-
const transcript2 = await
|
|
933
|
+
const transcript2 = await strapi.plugin("yt-transcript-strapi-plugin").service("service").saveTranscript(payload);
|
|
559
934
|
ctx.body = { data: transcript2 };
|
|
560
935
|
}
|
|
561
936
|
});
|
|
562
|
-
const mcpController = ({ strapi
|
|
937
|
+
const mcpController = ({ strapi }) => ({
|
|
563
938
|
/**
|
|
564
939
|
* Handle MCP requests (POST, GET, DELETE)
|
|
565
940
|
* Creates a new server+transport per session for proper isolation
|
|
566
941
|
*/
|
|
567
942
|
async handle(ctx) {
|
|
568
|
-
const plugin =
|
|
943
|
+
const plugin = strapi.plugin("yt-transcript-strapi-plugin");
|
|
569
944
|
if (!plugin.createMcpServer) {
|
|
570
945
|
ctx.status = 503;
|
|
571
946
|
ctx.body = {
|
|
@@ -585,12 +960,12 @@ const mcpController = ({ strapi: strapi2 }) => ({
|
|
|
585
960
|
await server.connect(transport);
|
|
586
961
|
session = { server, transport, createdAt: Date.now() };
|
|
587
962
|
plugin.sessions.set(sessionId, session);
|
|
588
|
-
|
|
963
|
+
strapi.log.debug(`[yt-transcript-mcp] New session created: ${sessionId}`);
|
|
589
964
|
}
|
|
590
965
|
await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
|
|
591
966
|
ctx.respond = false;
|
|
592
967
|
} catch (error) {
|
|
593
|
-
|
|
968
|
+
strapi.log.error("[yt-transcript-mcp] Error handling MCP request", {
|
|
594
969
|
error: error instanceof Error ? error.message : String(error),
|
|
595
970
|
method: ctx.method,
|
|
596
971
|
path: ctx.path
|
|
@@ -670,18 +1045,6 @@ const routes = {
|
|
|
670
1045
|
routes: [...admin]
|
|
671
1046
|
}
|
|
672
1047
|
};
|
|
673
|
-
async function initializeModel({
|
|
674
|
-
openAIApiKey,
|
|
675
|
-
model,
|
|
676
|
-
temp
|
|
677
|
-
}) {
|
|
678
|
-
return new ChatOpenAI({
|
|
679
|
-
temperature: temp,
|
|
680
|
-
openAIApiKey,
|
|
681
|
-
modelName: model,
|
|
682
|
-
maxTokens: 1e3
|
|
683
|
-
});
|
|
684
|
-
}
|
|
685
1048
|
function isRequestLike(input) {
|
|
686
1049
|
return typeof input === "object" && input !== null && "url" in input && typeof input.url === "string" && "method" in input;
|
|
687
1050
|
}
|
|
@@ -824,49 +1187,14 @@ const fetchTranscript = async (videoId, options2) => {
|
|
|
824
1187
|
);
|
|
825
1188
|
}
|
|
826
1189
|
};
|
|
827
|
-
|
|
828
|
-
const punctuationPrompt = PromptTemplate.fromTemplate(
|
|
829
|
-
"Add proper punctuation and capitalization to the following text chunk:\n\n{chunk}"
|
|
830
|
-
);
|
|
831
|
-
const punctuationChain = punctuationPrompt.pipe(model);
|
|
832
|
-
const processedChunks = await Promise.all(
|
|
833
|
-
chunks.map(async (chunk) => {
|
|
834
|
-
const result = await punctuationChain.invoke({ chunk });
|
|
835
|
-
return result.content;
|
|
836
|
-
})
|
|
837
|
-
);
|
|
838
|
-
return processedChunks.join(" ");
|
|
839
|
-
}
|
|
840
|
-
async function generateModifiedTranscript(rawTranscript) {
|
|
841
|
-
const pluginSettings = await strapi.config.get(
|
|
842
|
-
"plugin::yt-transcript-strapi-plugin"
|
|
843
|
-
);
|
|
844
|
-
if (!pluginSettings.openAIApiKey || !pluginSettings.model || !pluginSettings.temp || !pluginSettings.maxTokens) {
|
|
845
|
-
throw new Error("Missing required configuration for YTTranscript");
|
|
846
|
-
}
|
|
847
|
-
const chatModel = await initializeModel({
|
|
848
|
-
openAIApiKey: pluginSettings.openAIApiKey,
|
|
849
|
-
model: pluginSettings.model,
|
|
850
|
-
temp: pluginSettings.temp,
|
|
851
|
-
maxTokens: pluginSettings.maxTokens
|
|
852
|
-
});
|
|
853
|
-
const splitter = new TokenTextSplitter({
|
|
854
|
-
chunkSize: 1e3,
|
|
855
|
-
chunkOverlap: 200
|
|
856
|
-
});
|
|
857
|
-
const transcriptChunks = await splitter.createDocuments([rawTranscript]);
|
|
858
|
-
const chunkTexts = transcriptChunks.map((chunk) => chunk.pageContent);
|
|
859
|
-
const modifiedTranscript = await processTextChunks(chunkTexts, chatModel);
|
|
860
|
-
return modifiedTranscript;
|
|
861
|
-
}
|
|
862
|
-
const service = ({ strapi: strapi2 }) => ({
|
|
1190
|
+
const service = ({ strapi }) => ({
|
|
863
1191
|
async getTranscript(identifier) {
|
|
864
1192
|
const youtubeIdRegex = /^[a-zA-Z0-9_-]{11}$/;
|
|
865
1193
|
const isValid = youtubeIdRegex.test(identifier);
|
|
866
1194
|
if (!isValid) {
|
|
867
1195
|
return { error: "Invalid video ID", data: null };
|
|
868
1196
|
}
|
|
869
|
-
const pluginSettings = await
|
|
1197
|
+
const pluginSettings = await strapi.config.get(
|
|
870
1198
|
"plugin::yt-transcript-strapi-plugin"
|
|
871
1199
|
);
|
|
872
1200
|
const transcriptData = await fetchTranscript(identifier, {
|
|
@@ -879,20 +1207,16 @@ const service = ({ strapi: strapi2 }) => ({
|
|
|
879
1207
|
};
|
|
880
1208
|
},
|
|
881
1209
|
async saveTranscript(payload) {
|
|
882
|
-
return await
|
|
1210
|
+
return await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").create({
|
|
883
1211
|
data: payload
|
|
884
1212
|
});
|
|
885
1213
|
},
|
|
886
1214
|
async findTranscript(videoId) {
|
|
887
|
-
const transcriptData = await
|
|
1215
|
+
const transcriptData = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findFirst({
|
|
888
1216
|
filters: { videoId }
|
|
889
1217
|
});
|
|
890
1218
|
if (!transcriptData) return null;
|
|
891
1219
|
return transcriptData;
|
|
892
|
-
},
|
|
893
|
-
async generateHumanReadableTranscript(transcript2) {
|
|
894
|
-
const modifiedTranscript = await generateModifiedTranscript(transcript2);
|
|
895
|
-
return modifiedTranscript;
|
|
896
1220
|
}
|
|
897
1221
|
});
|
|
898
1222
|
const services = {
|