yt-transcript-strapi-plugin 0.0.21 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/index.js +477 -153
- package/dist/server/index.mjs +477 -153
- package/dist/server/src/config/index.d.ts +8 -8
- package/dist/server/src/content-types/index.d.ts +0 -3
- package/dist/server/src/content-types/transcript/index.d.ts +0 -3
- package/dist/server/src/index.d.ts +8 -12
- package/dist/server/src/mcp/schemas/index.d.ts +63 -6
- package/dist/server/src/mcp/tools/fetch-transcript.d.ts +0 -5
- package/dist/server/src/mcp/tools/get-transcript.d.ts +26 -0
- package/dist/server/src/mcp/tools/index.d.ts +13 -13
- package/dist/server/src/mcp/tools/search-transcript.d.ts +30 -0
- package/dist/server/src/services/index.d.ts +0 -1
- package/dist/server/src/services/service.d.ts +0 -2
- package/package.json +7 -6
- package/dist/server/src/utils/openai.d.ts +0 -9
package/dist/server/index.js
CHANGED
|
@@ -4,14 +4,10 @@ const types_js = require("@modelcontextprotocol/sdk/types.js");
|
|
|
4
4
|
const zod = require("zod");
|
|
5
5
|
const node_crypto = require("node:crypto");
|
|
6
6
|
const streamableHttp_js = require("@modelcontextprotocol/sdk/server/streamableHttp.js");
|
|
7
|
-
const textsplitters = require("@langchain/textsplitters");
|
|
8
|
-
const prompts = require("@langchain/core/prompts");
|
|
9
|
-
const openai = require("@langchain/openai");
|
|
10
7
|
const youtubei_js = require("youtubei.js");
|
|
11
8
|
const undici = require("undici");
|
|
12
9
|
const FetchTranscriptSchema = zod.z.object({
|
|
13
|
-
videoId: zod.z.string().min(1, "Video ID or URL is required")
|
|
14
|
-
generateReadable: zod.z.boolean().optional().default(false)
|
|
10
|
+
videoId: zod.z.string().min(1, "Video ID or URL is required")
|
|
15
11
|
});
|
|
16
12
|
const ListTranscriptsSchema = zod.z.object({
|
|
17
13
|
page: zod.z.number().int().min(1).optional().default(1),
|
|
@@ -19,7 +15,18 @@ const ListTranscriptsSchema = zod.z.object({
|
|
|
19
15
|
sort: zod.z.string().optional().default("createdAt:desc")
|
|
20
16
|
});
|
|
21
17
|
const GetTranscriptSchema = zod.z.object({
|
|
22
|
-
videoId: zod.z.string().min(1, "Video ID is required")
|
|
18
|
+
videoId: zod.z.string().min(1, "Video ID is required"),
|
|
19
|
+
includeFullTranscript: zod.z.boolean().optional().default(false),
|
|
20
|
+
includeTimecodes: zod.z.boolean().optional().default(false),
|
|
21
|
+
startTime: zod.z.number().min(0).optional(),
|
|
22
|
+
endTime: zod.z.number().min(0).optional(),
|
|
23
|
+
chunkIndex: zod.z.number().int().min(0).optional(),
|
|
24
|
+
chunkSize: zod.z.number().int().min(30).optional()
|
|
25
|
+
});
|
|
26
|
+
const SearchTranscriptSchema = zod.z.object({
|
|
27
|
+
videoId: zod.z.string().min(1, "Video ID is required"),
|
|
28
|
+
query: zod.z.string().min(1, "Search query is required"),
|
|
29
|
+
maxResults: zod.z.number().int().min(1).max(20).optional().default(5)
|
|
23
30
|
});
|
|
24
31
|
const FindTranscriptsSchema = zod.z.object({
|
|
25
32
|
query: zod.z.string().optional(),
|
|
@@ -34,6 +41,7 @@ const ToolSchemas = {
|
|
|
34
41
|
fetch_transcript: FetchTranscriptSchema,
|
|
35
42
|
list_transcripts: ListTranscriptsSchema,
|
|
36
43
|
get_transcript: GetTranscriptSchema,
|
|
44
|
+
search_transcript: SearchTranscriptSchema,
|
|
37
45
|
find_transcripts: FindTranscriptsSchema
|
|
38
46
|
};
|
|
39
47
|
function validateToolInput(toolName, input) {
|
|
@@ -68,31 +76,64 @@ function extractYouTubeID(urlOrID) {
|
|
|
68
76
|
}
|
|
69
77
|
const fetchTranscriptTool = {
|
|
70
78
|
name: "fetch_transcript",
|
|
71
|
-
description: "Fetch a transcript from YouTube for a given video ID or URL.
|
|
79
|
+
description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database. Returns metadata and preview only to avoid context overflow. Use get_transcript to retrieve content.",
|
|
72
80
|
inputSchema: {
|
|
73
81
|
type: "object",
|
|
74
82
|
properties: {
|
|
75
83
|
videoId: {
|
|
76
84
|
type: "string",
|
|
77
85
|
description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
|
|
78
|
-
},
|
|
79
|
-
generateReadable: {
|
|
80
|
-
type: "boolean",
|
|
81
|
-
description: "If true, uses AI to add punctuation and formatting to make the transcript more readable. Requires OpenAI API key configuration.",
|
|
82
|
-
default: false
|
|
83
86
|
}
|
|
84
87
|
},
|
|
85
88
|
required: ["videoId"]
|
|
86
89
|
}
|
|
87
90
|
};
|
|
88
|
-
|
|
91
|
+
function getVideoDurationMs$1(timecodes) {
|
|
92
|
+
if (!timecodes || timecodes.length === 0) return 0;
|
|
93
|
+
const lastEntry = timecodes[timecodes.length - 1];
|
|
94
|
+
return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
|
|
95
|
+
}
|
|
96
|
+
function formatTime$2(ms) {
|
|
97
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
98
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
99
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
100
|
+
const seconds = totalSeconds % 60;
|
|
101
|
+
if (hours > 0) {
|
|
102
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
103
|
+
}
|
|
104
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
105
|
+
}
|
|
106
|
+
function buildMetadataResponse(transcript2, previewLength, cached) {
|
|
107
|
+
const fullText = transcript2.fullTranscript || "";
|
|
108
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
109
|
+
const durationMs = getVideoDurationMs$1(timecodes);
|
|
110
|
+
const wordCount = fullText.split(/\s+/).length;
|
|
111
|
+
const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
|
|
112
|
+
return {
|
|
113
|
+
message: cached ? "Transcript already exists in database" : "Transcript fetched and saved successfully",
|
|
114
|
+
cached,
|
|
115
|
+
videoId: transcript2.videoId,
|
|
116
|
+
title: transcript2.title,
|
|
117
|
+
metadata: {
|
|
118
|
+
wordCount,
|
|
119
|
+
characterCount: fullText.length,
|
|
120
|
+
duration: formatTime$2(durationMs),
|
|
121
|
+
durationSeconds: Math.floor(durationMs / 1e3)
|
|
122
|
+
},
|
|
123
|
+
preview,
|
|
124
|
+
usage: "Use get_transcript with videoId to retrieve full content, specific time ranges, or paginated chunks."
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
async function handleFetchTranscript(strapi, args) {
|
|
89
128
|
const validatedArgs = validateToolInput("fetch_transcript", args);
|
|
90
|
-
const { videoId: videoIdOrUrl
|
|
129
|
+
const { videoId: videoIdOrUrl } = validatedArgs;
|
|
130
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
131
|
+
const previewLength = pluginConfig?.previewLength || 500;
|
|
91
132
|
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
92
133
|
if (!videoId) {
|
|
93
134
|
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
94
135
|
}
|
|
95
|
-
const service2 =
|
|
136
|
+
const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
|
|
96
137
|
const existingTranscript = await service2.findTranscript(videoId);
|
|
97
138
|
if (existingTranscript) {
|
|
98
139
|
return {
|
|
@@ -100,11 +141,7 @@ async function handleFetchTranscript(strapi2, args) {
|
|
|
100
141
|
{
|
|
101
142
|
type: "text",
|
|
102
143
|
text: JSON.stringify(
|
|
103
|
-
|
|
104
|
-
message: "Transcript already exists in database",
|
|
105
|
-
data: existingTranscript,
|
|
106
|
-
cached: true
|
|
107
|
-
},
|
|
144
|
+
buildMetadataResponse(existingTranscript, previewLength, true),
|
|
108
145
|
null,
|
|
109
146
|
2
|
|
110
147
|
)
|
|
@@ -122,25 +159,13 @@ async function handleFetchTranscript(strapi2, args) {
|
|
|
122
159
|
fullTranscript: transcriptData.fullTranscript,
|
|
123
160
|
transcriptWithTimeCodes: transcriptData.transcriptWithTimeCodes
|
|
124
161
|
};
|
|
125
|
-
if (generateReadable && transcriptData.fullTranscript) {
|
|
126
|
-
try {
|
|
127
|
-
const readableTranscript = await service2.generateHumanReadableTranscript(transcriptData.fullTranscript);
|
|
128
|
-
payload.readableTranscript = readableTranscript;
|
|
129
|
-
} catch (error) {
|
|
130
|
-
strapi2.log.warn("[yt-transcript-mcp] Failed to generate readable transcript:", error);
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
162
|
const savedTranscript = await service2.saveTranscript(payload);
|
|
134
163
|
return {
|
|
135
164
|
content: [
|
|
136
165
|
{
|
|
137
166
|
type: "text",
|
|
138
167
|
text: JSON.stringify(
|
|
139
|
-
|
|
140
|
-
message: "Transcript fetched and saved successfully",
|
|
141
|
-
data: savedTranscript,
|
|
142
|
-
cached: false
|
|
143
|
-
},
|
|
168
|
+
buildMetadataResponse(savedTranscript, previewLength, false),
|
|
144
169
|
null,
|
|
145
170
|
2
|
|
146
171
|
)
|
|
@@ -173,17 +198,17 @@ const listTranscriptsTool = {
|
|
|
173
198
|
required: []
|
|
174
199
|
}
|
|
175
200
|
};
|
|
176
|
-
async function handleListTranscripts(
|
|
201
|
+
async function handleListTranscripts(strapi, args) {
|
|
177
202
|
const validatedArgs = validateToolInput("list_transcripts", args);
|
|
178
203
|
const { page, pageSize, sort } = validatedArgs;
|
|
179
204
|
const start = (page - 1) * pageSize;
|
|
180
|
-
const transcripts = await
|
|
205
|
+
const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
|
|
181
206
|
sort,
|
|
182
207
|
limit: pageSize,
|
|
183
208
|
start,
|
|
184
209
|
fields: ["id", "documentId", "title", "videoId", "createdAt", "updatedAt"]
|
|
185
210
|
});
|
|
186
|
-
const allTranscripts = await
|
|
211
|
+
const allTranscripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({});
|
|
187
212
|
const total = allTranscripts.length;
|
|
188
213
|
return {
|
|
189
214
|
content: [
|
|
@@ -208,26 +233,305 @@ async function handleListTranscripts(strapi2, args) {
|
|
|
208
233
|
}
|
|
209
234
|
const getTranscriptTool = {
|
|
210
235
|
name: "get_transcript",
|
|
211
|
-
description: "Get a
|
|
236
|
+
description: "Get a saved transcript by YouTube video ID. Returns metadata and preview by default. Use parameters to get full content or specific time ranges to avoid context overflow.",
|
|
212
237
|
inputSchema: {
|
|
213
238
|
type: "object",
|
|
214
239
|
properties: {
|
|
215
240
|
videoId: {
|
|
216
241
|
type: "string",
|
|
217
242
|
description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
|
|
243
|
+
},
|
|
244
|
+
includeFullTranscript: {
|
|
245
|
+
type: "boolean",
|
|
246
|
+
description: "Include the complete transcript text. Warning: may cause context overflow for long videos. Default: false",
|
|
247
|
+
default: false
|
|
248
|
+
},
|
|
249
|
+
includeTimecodes: {
|
|
250
|
+
type: "boolean",
|
|
251
|
+
description: "Include the transcript with timecodes array. Warning: significantly increases response size. Default: false",
|
|
252
|
+
default: false
|
|
253
|
+
},
|
|
254
|
+
startTime: {
|
|
255
|
+
type: "number",
|
|
256
|
+
description: "Start time in seconds for fetching a specific portion of the transcript"
|
|
257
|
+
},
|
|
258
|
+
endTime: {
|
|
259
|
+
type: "number",
|
|
260
|
+
description: "End time in seconds for fetching a specific portion of the transcript"
|
|
261
|
+
},
|
|
262
|
+
chunkIndex: {
|
|
263
|
+
type: "number",
|
|
264
|
+
description: "Chunk index (0-based) when paginating through transcript. Use with chunkSize to paginate through long videos."
|
|
265
|
+
},
|
|
266
|
+
chunkSize: {
|
|
267
|
+
type: "number",
|
|
268
|
+
description: "Chunk size in seconds. Overrides config default. Use with chunkIndex for pagination."
|
|
218
269
|
}
|
|
219
270
|
},
|
|
220
271
|
required: ["videoId"]
|
|
221
272
|
}
|
|
222
273
|
};
|
|
223
|
-
|
|
274
|
+
function getTranscriptForTimeRange(timecodes, startTimeMs, endTimeMs) {
|
|
275
|
+
const entries = timecodes.filter(
|
|
276
|
+
(entry) => entry.start >= startTimeMs && entry.start < endTimeMs
|
|
277
|
+
);
|
|
278
|
+
const text = entries.map((e) => e.text).join(" ");
|
|
279
|
+
return { text, entries };
|
|
280
|
+
}
|
|
281
|
+
function getVideoDurationMs(timecodes) {
|
|
282
|
+
if (!timecodes || timecodes.length === 0) return 0;
|
|
283
|
+
const lastEntry = timecodes[timecodes.length - 1];
|
|
284
|
+
return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
|
|
285
|
+
}
|
|
286
|
+
function formatTime$1(ms) {
|
|
287
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
288
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
289
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
290
|
+
const seconds = totalSeconds % 60;
|
|
291
|
+
if (hours > 0) {
|
|
292
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
293
|
+
}
|
|
294
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
295
|
+
}
|
|
296
|
+
async function handleGetTranscript(strapi, args) {
|
|
224
297
|
const validatedArgs = validateToolInput("get_transcript", args);
|
|
225
|
-
const {
|
|
298
|
+
const {
|
|
299
|
+
videoId: videoIdOrUrl,
|
|
300
|
+
includeFullTranscript,
|
|
301
|
+
includeTimecodes,
|
|
302
|
+
startTime,
|
|
303
|
+
endTime,
|
|
304
|
+
chunkIndex,
|
|
305
|
+
chunkSize: chunkSizeOverride
|
|
306
|
+
} = validatedArgs;
|
|
307
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
308
|
+
const defaultChunkSize = pluginConfig?.chunkSizeSeconds || 300;
|
|
309
|
+
const previewLength = pluginConfig?.previewLength || 500;
|
|
310
|
+
const maxFullTranscriptLength = pluginConfig?.maxFullTranscriptLength || 5e4;
|
|
311
|
+
const chunkSizeSeconds = chunkSizeOverride || defaultChunkSize;
|
|
312
|
+
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
313
|
+
if (!videoId) {
|
|
314
|
+
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
315
|
+
}
|
|
316
|
+
const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
|
|
317
|
+
const transcript2 = await service2.findTranscript(videoId);
|
|
318
|
+
if (!transcript2) {
|
|
319
|
+
return {
|
|
320
|
+
content: [
|
|
321
|
+
{
|
|
322
|
+
type: "text",
|
|
323
|
+
text: JSON.stringify(
|
|
324
|
+
{
|
|
325
|
+
error: true,
|
|
326
|
+
message: `No transcript found for video ID: ${videoId}. Use fetch_transcript to fetch it from YouTube first.`,
|
|
327
|
+
videoId
|
|
328
|
+
},
|
|
329
|
+
null,
|
|
330
|
+
2
|
|
331
|
+
)
|
|
332
|
+
}
|
|
333
|
+
]
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
337
|
+
const fullText = transcript2.fullTranscript || "";
|
|
338
|
+
const durationMs = getVideoDurationMs(timecodes);
|
|
339
|
+
const totalChunks = Math.ceil(durationMs / (chunkSizeSeconds * 1e3));
|
|
340
|
+
const wordCount = fullText.split(/\s+/).length;
|
|
341
|
+
const response = {
|
|
342
|
+
videoId: transcript2.videoId,
|
|
343
|
+
title: transcript2.title,
|
|
344
|
+
metadata: {
|
|
345
|
+
wordCount,
|
|
346
|
+
characterCount: fullText.length,
|
|
347
|
+
duration: formatTime$1(durationMs),
|
|
348
|
+
durationSeconds: Math.floor(durationMs / 1e3),
|
|
349
|
+
totalChunks,
|
|
350
|
+
chunkSizeSeconds
|
|
351
|
+
}
|
|
352
|
+
};
|
|
353
|
+
if (startTime !== void 0 || endTime !== void 0) {
|
|
354
|
+
const startMs = (startTime || 0) * 1e3;
|
|
355
|
+
const endMs = endTime !== void 0 ? endTime * 1e3 : durationMs;
|
|
356
|
+
const { text, entries } = getTranscriptForTimeRange(timecodes, startMs, endMs);
|
|
357
|
+
response.timeRange = {
|
|
358
|
+
startTime: startTime || 0,
|
|
359
|
+
endTime: endTime || Math.floor(durationMs / 1e3),
|
|
360
|
+
startFormatted: formatTime$1(startMs),
|
|
361
|
+
endFormatted: formatTime$1(endMs)
|
|
362
|
+
};
|
|
363
|
+
response.transcript = text;
|
|
364
|
+
if (includeTimecodes) {
|
|
365
|
+
response.transcriptWithTimeCodes = entries;
|
|
366
|
+
}
|
|
367
|
+
} else if (chunkIndex !== void 0) {
|
|
368
|
+
const chunkStartMs = chunkIndex * chunkSizeSeconds * 1e3;
|
|
369
|
+
const chunkEndMs = Math.min((chunkIndex + 1) * chunkSizeSeconds * 1e3, durationMs);
|
|
370
|
+
if (chunkStartMs >= durationMs) {
|
|
371
|
+
response.error = `Chunk index ${chunkIndex} is out of range. Total chunks: ${totalChunks} (0-${totalChunks - 1})`;
|
|
372
|
+
} else {
|
|
373
|
+
const { text, entries } = getTranscriptForTimeRange(timecodes, chunkStartMs, chunkEndMs);
|
|
374
|
+
response.chunk = {
|
|
375
|
+
index: chunkIndex,
|
|
376
|
+
totalChunks,
|
|
377
|
+
startTime: Math.floor(chunkStartMs / 1e3),
|
|
378
|
+
endTime: Math.floor(chunkEndMs / 1e3),
|
|
379
|
+
startFormatted: formatTime$1(chunkStartMs),
|
|
380
|
+
endFormatted: formatTime$1(chunkEndMs)
|
|
381
|
+
};
|
|
382
|
+
response.transcript = text;
|
|
383
|
+
if (includeTimecodes) {
|
|
384
|
+
response.transcriptWithTimeCodes = entries;
|
|
385
|
+
}
|
|
386
|
+
if (chunkIndex < totalChunks - 1) {
|
|
387
|
+
response.nextChunk = `Use chunkIndex: ${chunkIndex + 1} to get the next portion`;
|
|
388
|
+
}
|
|
389
|
+
if (chunkIndex > 0) {
|
|
390
|
+
response.previousChunk = `Use chunkIndex: ${chunkIndex - 1} to get the previous portion`;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
} else if (includeFullTranscript || fullText.length <= maxFullTranscriptLength) {
|
|
394
|
+
response.transcript = fullText;
|
|
395
|
+
if (includeTimecodes) {
|
|
396
|
+
response.transcriptWithTimeCodes = timecodes;
|
|
397
|
+
}
|
|
398
|
+
if (includeFullTranscript && fullText.length > maxFullTranscriptLength) {
|
|
399
|
+
response.warning = "Full transcript included. For long videos, consider using chunkIndex, startTime/endTime, or search_transcript to reduce response size.";
|
|
400
|
+
} else if (fullText.length <= maxFullTranscriptLength) {
|
|
401
|
+
response.note = "Full transcript auto-loaded (fits within context limit).";
|
|
402
|
+
}
|
|
403
|
+
} else {
|
|
404
|
+
const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
|
|
405
|
+
response.preview = preview;
|
|
406
|
+
response.isLargeTranscript = true;
|
|
407
|
+
response.usage = {
|
|
408
|
+
fullTranscript: "Set includeFullTranscript: true to get complete text (warning: may exceed context)",
|
|
409
|
+
search: "Use search_transcript to find relevant portions by keyword (recommended for large transcripts)",
|
|
410
|
+
timeRange: "Use startTime and endTime (in seconds) to get a specific portion",
|
|
411
|
+
pagination: `Use chunkIndex (0-${totalChunks - 1}) to paginate through ${chunkSizeSeconds}s chunks`
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
return {
|
|
415
|
+
content: [
|
|
416
|
+
{
|
|
417
|
+
type: "text",
|
|
418
|
+
text: JSON.stringify(response, null, 2)
|
|
419
|
+
}
|
|
420
|
+
]
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
const searchTranscriptTool = {
|
|
424
|
+
name: "search_transcript",
|
|
425
|
+
description: "Search within a saved transcript using BM25 scoring. Returns the most relevant segments matching your query with timestamps. Use this to find specific content in long videos without loading the entire transcript.",
|
|
426
|
+
inputSchema: {
|
|
427
|
+
type: "object",
|
|
428
|
+
properties: {
|
|
429
|
+
videoId: {
|
|
430
|
+
type: "string",
|
|
431
|
+
description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
|
|
432
|
+
},
|
|
433
|
+
query: {
|
|
434
|
+
type: "string",
|
|
435
|
+
description: "Search query - keywords or phrases to find in the transcript"
|
|
436
|
+
},
|
|
437
|
+
maxResults: {
|
|
438
|
+
type: "number",
|
|
439
|
+
description: "Maximum number of results to return (default: 5, max: 20)",
|
|
440
|
+
default: 5
|
|
441
|
+
}
|
|
442
|
+
},
|
|
443
|
+
required: ["videoId", "query"]
|
|
444
|
+
}
|
|
445
|
+
};
|
|
446
|
+
function tokenize(text) {
|
|
447
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
|
|
448
|
+
}
|
|
449
|
+
function calculateIDF(segments, vocabulary) {
|
|
450
|
+
const idf = /* @__PURE__ */ new Map();
|
|
451
|
+
const N = segments.length;
|
|
452
|
+
for (const term of vocabulary) {
|
|
453
|
+
const docsWithTerm = segments.filter(
|
|
454
|
+
(seg) => tokenize(seg.text).includes(term)
|
|
455
|
+
).length;
|
|
456
|
+
idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
|
|
457
|
+
}
|
|
458
|
+
return idf;
|
|
459
|
+
}
|
|
460
|
+
function bm25Score(segmentTokens, queryTokens, idf, avgDocLength, k1 = 1.5, b = 0.75) {
|
|
461
|
+
const docLength = segmentTokens.length;
|
|
462
|
+
let score = 0;
|
|
463
|
+
const tf = /* @__PURE__ */ new Map();
|
|
464
|
+
for (const token of segmentTokens) {
|
|
465
|
+
tf.set(token, (tf.get(token) || 0) + 1);
|
|
466
|
+
}
|
|
467
|
+
for (const term of queryTokens) {
|
|
468
|
+
const termFreq = tf.get(term) || 0;
|
|
469
|
+
const termIdf = idf.get(term) || 0;
|
|
470
|
+
if (termFreq > 0) {
|
|
471
|
+
const numerator = termFreq * (k1 + 1);
|
|
472
|
+
const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
|
|
473
|
+
score += termIdf * (numerator / denominator);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
return score;
|
|
477
|
+
}
|
|
478
|
+
function formatTime(ms) {
|
|
479
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
480
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
481
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
482
|
+
const seconds = totalSeconds % 60;
|
|
483
|
+
if (hours > 0) {
|
|
484
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
485
|
+
}
|
|
486
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
487
|
+
}
|
|
488
|
+
function createSegments(timecodes, segmentDurationMs) {
|
|
489
|
+
if (!timecodes || timecodes.length === 0) return [];
|
|
490
|
+
const segments = [];
|
|
491
|
+
let currentSegment = [];
|
|
492
|
+
let segmentStartTime = timecodes[0].start;
|
|
493
|
+
for (const entry of timecodes) {
|
|
494
|
+
const segmentEndTime = segmentStartTime + segmentDurationMs;
|
|
495
|
+
if (entry.start < segmentEndTime) {
|
|
496
|
+
currentSegment.push(entry);
|
|
497
|
+
} else {
|
|
498
|
+
if (currentSegment.length > 0) {
|
|
499
|
+
const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
|
|
500
|
+
segments.push({
|
|
501
|
+
text: currentSegment.map((e) => e.text).join(" "),
|
|
502
|
+
startTime: Math.floor(segmentStartTime / 1e3),
|
|
503
|
+
endTime: Math.floor(endTime / 1e3),
|
|
504
|
+
startFormatted: formatTime(segmentStartTime),
|
|
505
|
+
endFormatted: formatTime(endTime)
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
segmentStartTime = entry.start;
|
|
509
|
+
currentSegment = [entry];
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
if (currentSegment.length > 0) {
|
|
513
|
+
const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
|
|
514
|
+
segments.push({
|
|
515
|
+
text: currentSegment.map((e) => e.text).join(" "),
|
|
516
|
+
startTime: Math.floor(segmentStartTime / 1e3),
|
|
517
|
+
endTime: Math.floor(endTime / 1e3),
|
|
518
|
+
startFormatted: formatTime(segmentStartTime),
|
|
519
|
+
endFormatted: formatTime(endTime)
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
return segments;
|
|
523
|
+
}
|
|
524
|
+
async function handleSearchTranscript(strapi, args) {
|
|
525
|
+
const validatedArgs = validateToolInput("search_transcript", args);
|
|
526
|
+
const { videoId: videoIdOrUrl, query, maxResults: maxResultsInput } = validatedArgs;
|
|
527
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
528
|
+
const segmentSeconds = pluginConfig?.searchSegmentSeconds || 30;
|
|
529
|
+
const maxResults = Math.min(Math.max(maxResultsInput || 5, 1), 20);
|
|
226
530
|
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
227
531
|
if (!videoId) {
|
|
228
532
|
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
229
533
|
}
|
|
230
|
-
const service2 =
|
|
534
|
+
const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
|
|
231
535
|
const transcript2 = await service2.findTranscript(videoId);
|
|
232
536
|
if (!transcript2) {
|
|
233
537
|
return {
|
|
@@ -247,13 +551,90 @@ async function handleGetTranscript(strapi2, args) {
|
|
|
247
551
|
]
|
|
248
552
|
};
|
|
249
553
|
}
|
|
554
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
555
|
+
if (timecodes.length === 0) {
|
|
556
|
+
return {
|
|
557
|
+
content: [
|
|
558
|
+
{
|
|
559
|
+
type: "text",
|
|
560
|
+
text: JSON.stringify(
|
|
561
|
+
{
|
|
562
|
+
error: true,
|
|
563
|
+
message: "Transcript has no timecode data for searching.",
|
|
564
|
+
videoId
|
|
565
|
+
},
|
|
566
|
+
null,
|
|
567
|
+
2
|
|
568
|
+
)
|
|
569
|
+
}
|
|
570
|
+
]
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
const segments = createSegments(timecodes, segmentSeconds * 1e3);
|
|
574
|
+
if (segments.length === 0) {
|
|
575
|
+
return {
|
|
576
|
+
content: [
|
|
577
|
+
{
|
|
578
|
+
type: "text",
|
|
579
|
+
text: JSON.stringify(
|
|
580
|
+
{
|
|
581
|
+
error: true,
|
|
582
|
+
message: "Could not create searchable segments from transcript.",
|
|
583
|
+
videoId
|
|
584
|
+
},
|
|
585
|
+
null,
|
|
586
|
+
2
|
|
587
|
+
)
|
|
588
|
+
}
|
|
589
|
+
]
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
const queryTokens = tokenize(query);
|
|
593
|
+
if (queryTokens.length === 0) {
|
|
594
|
+
return {
|
|
595
|
+
content: [
|
|
596
|
+
{
|
|
597
|
+
type: "text",
|
|
598
|
+
text: JSON.stringify(
|
|
599
|
+
{
|
|
600
|
+
error: true,
|
|
601
|
+
message: "Query is empty or contains only stop words.",
|
|
602
|
+
query
|
|
603
|
+
},
|
|
604
|
+
null,
|
|
605
|
+
2
|
|
606
|
+
)
|
|
607
|
+
}
|
|
608
|
+
]
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
const vocabulary = new Set(queryTokens);
|
|
612
|
+
const idf = calculateIDF(segments, vocabulary);
|
|
613
|
+
const avgDocLength = segments.reduce((sum, seg) => sum + tokenize(seg.text).length, 0) / segments.length;
|
|
614
|
+
const scoredSegments = segments.map((segment) => ({
|
|
615
|
+
...segment,
|
|
616
|
+
score: bm25Score(tokenize(segment.text), queryTokens, idf, avgDocLength)
|
|
617
|
+
}));
|
|
618
|
+
const results = scoredSegments.filter((seg) => seg.score > 0).sort((a, b) => b.score - a.score).slice(0, maxResults);
|
|
250
619
|
return {
|
|
251
620
|
content: [
|
|
252
621
|
{
|
|
253
622
|
type: "text",
|
|
254
623
|
text: JSON.stringify(
|
|
255
624
|
{
|
|
256
|
-
|
|
625
|
+
videoId: transcript2.videoId,
|
|
626
|
+
title: transcript2.title,
|
|
627
|
+
query,
|
|
628
|
+
totalSegments: segments.length,
|
|
629
|
+
matchingResults: results.length,
|
|
630
|
+
results: results.map((r) => ({
|
|
631
|
+
text: r.text,
|
|
632
|
+
startTime: r.startTime,
|
|
633
|
+
endTime: r.endTime,
|
|
634
|
+
timeRange: `${r.startFormatted} - ${r.endFormatted}`,
|
|
635
|
+
score: Math.round(r.score * 100) / 100
|
|
636
|
+
})),
|
|
637
|
+
usage: results.length > 0 ? `Use get_transcript with startTime: ${results[0].startTime} and endTime: ${results[0].endTime} to get full context for the top result.` : "No matches found. Try different keywords."
|
|
257
638
|
},
|
|
258
639
|
null,
|
|
259
640
|
2
|
|
@@ -312,11 +693,10 @@ function truncateText(text, maxLength) {
|
|
|
312
693
|
function truncateTranscripts(transcripts) {
|
|
313
694
|
return transcripts.map((transcript2) => ({
|
|
314
695
|
...transcript2,
|
|
315
|
-
fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH)
|
|
316
|
-
readableTranscript: truncateText(transcript2.readableTranscript, TRANSCRIPT_PREVIEW_LENGTH)
|
|
696
|
+
fullTranscript: truncateText(transcript2.fullTranscript, TRANSCRIPT_PREVIEW_LENGTH)
|
|
317
697
|
}));
|
|
318
698
|
}
|
|
319
|
-
async function handleFindTranscripts(
|
|
699
|
+
async function handleFindTranscripts(strapi, args) {
|
|
320
700
|
const validatedArgs = validateToolInput("find_transcripts", args);
|
|
321
701
|
const { query, videoId, title, includeFullContent, page, pageSize, sort } = validatedArgs;
|
|
322
702
|
const start = (page - 1) * pageSize;
|
|
@@ -331,17 +711,16 @@ async function handleFindTranscripts(strapi2, args) {
|
|
|
331
711
|
filters.$or = [
|
|
332
712
|
{ title: { $containsi: query } },
|
|
333
713
|
{ videoId: { $containsi: query } },
|
|
334
|
-
{ fullTranscript: { $containsi: query } }
|
|
335
|
-
{ readableTranscript: { $containsi: query } }
|
|
714
|
+
{ fullTranscript: { $containsi: query } }
|
|
336
715
|
];
|
|
337
716
|
}
|
|
338
|
-
const transcripts = await
|
|
717
|
+
const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
|
|
339
718
|
filters,
|
|
340
719
|
sort,
|
|
341
720
|
limit: pageSize,
|
|
342
721
|
start
|
|
343
722
|
});
|
|
344
|
-
const allMatching = await
|
|
723
|
+
const allMatching = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({
|
|
345
724
|
filters
|
|
346
725
|
});
|
|
347
726
|
const total = allMatching.length;
|
|
@@ -377,15 +756,17 @@ const tools = [
|
|
|
377
756
|
fetchTranscriptTool,
|
|
378
757
|
listTranscriptsTool,
|
|
379
758
|
getTranscriptTool,
|
|
759
|
+
searchTranscriptTool,
|
|
380
760
|
findTranscriptsTool
|
|
381
761
|
];
|
|
382
762
|
const toolHandlers = {
|
|
383
763
|
fetch_transcript: handleFetchTranscript,
|
|
384
764
|
list_transcripts: handleListTranscripts,
|
|
385
765
|
get_transcript: handleGetTranscript,
|
|
766
|
+
search_transcript: handleSearchTranscript,
|
|
386
767
|
find_transcripts: handleFindTranscripts
|
|
387
768
|
};
|
|
388
|
-
async function handleToolCall(
|
|
769
|
+
async function handleToolCall(strapi, request) {
|
|
389
770
|
const { name, arguments: args } = request.params;
|
|
390
771
|
const handler = toolHandlers[name];
|
|
391
772
|
if (!handler) {
|
|
@@ -393,13 +774,13 @@ async function handleToolCall(strapi2, request) {
|
|
|
393
774
|
}
|
|
394
775
|
const startTime = Date.now();
|
|
395
776
|
try {
|
|
396
|
-
const result = await handler(
|
|
777
|
+
const result = await handler(strapi, args || {});
|
|
397
778
|
const duration = Date.now() - startTime;
|
|
398
|
-
|
|
779
|
+
strapi.log.debug(`[yt-transcript-mcp] Tool ${name} executed successfully in ${duration}ms`);
|
|
399
780
|
return result;
|
|
400
781
|
} catch (error) {
|
|
401
782
|
const duration = Date.now() - startTime;
|
|
402
|
-
|
|
783
|
+
strapi.log.error(`[yt-transcript-mcp] Tool ${name} failed after ${duration}ms`, {
|
|
403
784
|
error: error instanceof Error ? error.message : String(error)
|
|
404
785
|
});
|
|
405
786
|
return {
|
|
@@ -420,7 +801,7 @@ async function handleToolCall(strapi2, request) {
|
|
|
420
801
|
};
|
|
421
802
|
}
|
|
422
803
|
}
|
|
423
|
-
function createMcpServer(
|
|
804
|
+
function createMcpServer(strapi) {
|
|
424
805
|
const server = new index_js.Server(
|
|
425
806
|
{
|
|
426
807
|
name: "yt-transcript-mcp",
|
|
@@ -433,53 +814,57 @@ function createMcpServer(strapi2) {
|
|
|
433
814
|
}
|
|
434
815
|
);
|
|
435
816
|
server.setRequestHandler(types_js.ListToolsRequestSchema, async () => {
|
|
436
|
-
|
|
817
|
+
strapi.log.debug("[yt-transcript-mcp] Listing tools");
|
|
437
818
|
return { tools };
|
|
438
819
|
});
|
|
439
820
|
server.setRequestHandler(types_js.CallToolRequestSchema, async (request) => {
|
|
440
|
-
|
|
441
|
-
return handleToolCall(
|
|
821
|
+
strapi.log.debug(`[yt-transcript-mcp] Tool call: ${request.params.name}`);
|
|
822
|
+
return handleToolCall(strapi, request);
|
|
442
823
|
});
|
|
443
|
-
|
|
824
|
+
strapi.log.info("[yt-transcript-mcp] MCP server created with tools:", {
|
|
444
825
|
tools: tools.map((t) => t.name)
|
|
445
826
|
});
|
|
446
827
|
return server;
|
|
447
828
|
}
|
|
448
|
-
const bootstrap = async ({ strapi
|
|
449
|
-
const plugin =
|
|
450
|
-
plugin.createMcpServer = () => createMcpServer(
|
|
829
|
+
const bootstrap = async ({ strapi }) => {
|
|
830
|
+
const plugin = strapi.plugin("yt-transcript-strapi-plugin");
|
|
831
|
+
plugin.createMcpServer = () => createMcpServer(strapi);
|
|
451
832
|
plugin.sessions = /* @__PURE__ */ new Map();
|
|
452
|
-
|
|
453
|
-
|
|
833
|
+
strapi.log.info("[yt-transcript-mcp] MCP plugin initialized");
|
|
834
|
+
strapi.log.info("[yt-transcript-mcp] MCP endpoint available at: /api/yt-transcript-strapi-plugin/mcp");
|
|
454
835
|
};
|
|
455
|
-
const destroy = ({ strapi
|
|
836
|
+
const destroy = ({ strapi }) => {
|
|
456
837
|
};
|
|
457
|
-
const register = ({ strapi
|
|
838
|
+
const register = ({ strapi }) => {
|
|
458
839
|
};
|
|
459
840
|
const config = {
|
|
460
841
|
default: {
|
|
461
|
-
|
|
462
|
-
model: "gpt-4o-mini",
|
|
463
|
-
temp: 0.7,
|
|
464
|
-
maxTokens: 4096,
|
|
465
|
-
proxyUrl: ""
|
|
842
|
+
proxyUrl: "",
|
|
466
843
|
// Optional: HTTP/HTTPS proxy for YouTube requests (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
844
|
+
chunkSizeSeconds: 300,
|
|
845
|
+
// Default chunk size for transcript pagination (5 minutes)
|
|
846
|
+
previewLength: 500,
|
|
847
|
+
// Default preview length in characters
|
|
848
|
+
maxFullTranscriptLength: 5e4,
|
|
849
|
+
// Auto-load full transcript if under this character count (~12K tokens)
|
|
850
|
+
searchSegmentSeconds: 30
|
|
851
|
+
// Segment size for BM25 search scoring
|
|
467
852
|
},
|
|
468
853
|
validator(config2) {
|
|
469
|
-
if (config2.
|
|
470
|
-
throw new Error("
|
|
854
|
+
if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
|
|
855
|
+
throw new Error("proxyUrl must be a string");
|
|
471
856
|
}
|
|
472
|
-
if (config2.
|
|
473
|
-
throw new Error("
|
|
857
|
+
if (config2.chunkSizeSeconds !== void 0 && (typeof config2.chunkSizeSeconds !== "number" || config2.chunkSizeSeconds < 30)) {
|
|
858
|
+
throw new Error("chunkSizeSeconds must be a number >= 30");
|
|
474
859
|
}
|
|
475
|
-
if (config2.
|
|
476
|
-
throw new Error("
|
|
860
|
+
if (config2.previewLength !== void 0 && (typeof config2.previewLength !== "number" || config2.previewLength < 100)) {
|
|
861
|
+
throw new Error("previewLength must be a number >= 100");
|
|
477
862
|
}
|
|
478
|
-
if (config2.
|
|
479
|
-
throw new Error("
|
|
863
|
+
if (config2.maxFullTranscriptLength !== void 0 && (typeof config2.maxFullTranscriptLength !== "number" || config2.maxFullTranscriptLength < 1e3)) {
|
|
864
|
+
throw new Error("maxFullTranscriptLength must be a number >= 1000");
|
|
480
865
|
}
|
|
481
|
-
if (config2.
|
|
482
|
-
throw new Error("
|
|
866
|
+
if (config2.searchSegmentSeconds !== void 0 && (typeof config2.searchSegmentSeconds !== "number" || config2.searchSegmentSeconds < 10)) {
|
|
867
|
+
throw new Error("searchSegmentSeconds must be a number >= 10");
|
|
483
868
|
}
|
|
484
869
|
}
|
|
485
870
|
};
|
|
@@ -513,9 +898,6 @@ const attributes = {
|
|
|
513
898
|
},
|
|
514
899
|
transcriptWithTimeCodes: {
|
|
515
900
|
type: "json"
|
|
516
|
-
},
|
|
517
|
-
readableTranscript: {
|
|
518
|
-
type: "richtext"
|
|
519
901
|
}
|
|
520
902
|
};
|
|
521
903
|
const schema = {
|
|
@@ -532,41 +914,34 @@ const transcript = {
|
|
|
532
914
|
const contentTypes = {
|
|
533
915
|
transcript
|
|
534
916
|
};
|
|
535
|
-
const controller = ({ strapi
|
|
917
|
+
const controller = ({ strapi }) => ({
|
|
536
918
|
async getTranscript(ctx) {
|
|
537
919
|
const videoId = extractYouTubeID(ctx.params.videoId);
|
|
538
920
|
if (!videoId) {
|
|
539
921
|
return ctx.body = { error: "Invalid YouTube URL or ID", data: null };
|
|
540
922
|
}
|
|
541
|
-
const found = await
|
|
923
|
+
const found = await strapi.plugin("yt-transcript-strapi-plugin").service("service").findTranscript(videoId);
|
|
542
924
|
if (found) {
|
|
543
925
|
return ctx.body = { data: found };
|
|
544
926
|
}
|
|
545
|
-
const transcriptData = await
|
|
546
|
-
let readableTranscript = null;
|
|
547
|
-
try {
|
|
548
|
-
readableTranscript = await strapi2.plugin("yt-transcript-strapi-plugin").service("service").generateHumanReadableTranscript(transcriptData.fullTranscript);
|
|
549
|
-
} catch (error) {
|
|
550
|
-
strapi2.log.debug("[yt-transcript] Readable transcript generation skipped");
|
|
551
|
-
}
|
|
927
|
+
const transcriptData = await strapi.plugin("yt-transcript-strapi-plugin").service("service").getTranscript(videoId);
|
|
552
928
|
const payload = {
|
|
553
929
|
videoId,
|
|
554
930
|
title: transcriptData?.title || "No title found",
|
|
555
931
|
fullTranscript: transcriptData?.fullTranscript,
|
|
556
|
-
transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes
|
|
557
|
-
readableTranscript
|
|
932
|
+
transcriptWithTimeCodes: transcriptData?.transcriptWithTimeCodes
|
|
558
933
|
};
|
|
559
|
-
const transcript2 = await
|
|
934
|
+
const transcript2 = await strapi.plugin("yt-transcript-strapi-plugin").service("service").saveTranscript(payload);
|
|
560
935
|
ctx.body = { data: transcript2 };
|
|
561
936
|
}
|
|
562
937
|
});
|
|
563
|
-
const mcpController = ({ strapi
|
|
938
|
+
const mcpController = ({ strapi }) => ({
|
|
564
939
|
/**
|
|
565
940
|
* Handle MCP requests (POST, GET, DELETE)
|
|
566
941
|
* Creates a new server+transport per session for proper isolation
|
|
567
942
|
*/
|
|
568
943
|
async handle(ctx) {
|
|
569
|
-
const plugin =
|
|
944
|
+
const plugin = strapi.plugin("yt-transcript-strapi-plugin");
|
|
570
945
|
if (!plugin.createMcpServer) {
|
|
571
946
|
ctx.status = 503;
|
|
572
947
|
ctx.body = {
|
|
@@ -586,12 +961,12 @@ const mcpController = ({ strapi: strapi2 }) => ({
|
|
|
586
961
|
await server.connect(transport);
|
|
587
962
|
session = { server, transport, createdAt: Date.now() };
|
|
588
963
|
plugin.sessions.set(sessionId, session);
|
|
589
|
-
|
|
964
|
+
strapi.log.debug(`[yt-transcript-mcp] New session created: ${sessionId}`);
|
|
590
965
|
}
|
|
591
966
|
await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
|
|
592
967
|
ctx.respond = false;
|
|
593
968
|
} catch (error) {
|
|
594
|
-
|
|
969
|
+
strapi.log.error("[yt-transcript-mcp] Error handling MCP request", {
|
|
595
970
|
error: error instanceof Error ? error.message : String(error),
|
|
596
971
|
method: ctx.method,
|
|
597
972
|
path: ctx.path
|
|
@@ -671,18 +1046,6 @@ const routes = {
|
|
|
671
1046
|
routes: [...admin]
|
|
672
1047
|
}
|
|
673
1048
|
};
|
|
674
|
-
async function initializeModel({
|
|
675
|
-
openAIApiKey,
|
|
676
|
-
model,
|
|
677
|
-
temp
|
|
678
|
-
}) {
|
|
679
|
-
return new openai.ChatOpenAI({
|
|
680
|
-
temperature: temp,
|
|
681
|
-
openAIApiKey,
|
|
682
|
-
modelName: model,
|
|
683
|
-
maxTokens: 1e3
|
|
684
|
-
});
|
|
685
|
-
}
|
|
686
1049
|
function isRequestLike(input) {
|
|
687
1050
|
return typeof input === "object" && input !== null && "url" in input && typeof input.url === "string" && "method" in input;
|
|
688
1051
|
}
|
|
@@ -825,49 +1188,14 @@ const fetchTranscript = async (videoId, options2) => {
|
|
|
825
1188
|
);
|
|
826
1189
|
}
|
|
827
1190
|
};
|
|
828
|
-
|
|
829
|
-
const punctuationPrompt = prompts.PromptTemplate.fromTemplate(
|
|
830
|
-
"Add proper punctuation and capitalization to the following text chunk:\n\n{chunk}"
|
|
831
|
-
);
|
|
832
|
-
const punctuationChain = punctuationPrompt.pipe(model);
|
|
833
|
-
const processedChunks = await Promise.all(
|
|
834
|
-
chunks.map(async (chunk) => {
|
|
835
|
-
const result = await punctuationChain.invoke({ chunk });
|
|
836
|
-
return result.content;
|
|
837
|
-
})
|
|
838
|
-
);
|
|
839
|
-
return processedChunks.join(" ");
|
|
840
|
-
}
|
|
841
|
-
async function generateModifiedTranscript(rawTranscript) {
|
|
842
|
-
const pluginSettings = await strapi.config.get(
|
|
843
|
-
"plugin::yt-transcript-strapi-plugin"
|
|
844
|
-
);
|
|
845
|
-
if (!pluginSettings.openAIApiKey || !pluginSettings.model || !pluginSettings.temp || !pluginSettings.maxTokens) {
|
|
846
|
-
throw new Error("Missing required configuration for YTTranscript");
|
|
847
|
-
}
|
|
848
|
-
const chatModel = await initializeModel({
|
|
849
|
-
openAIApiKey: pluginSettings.openAIApiKey,
|
|
850
|
-
model: pluginSettings.model,
|
|
851
|
-
temp: pluginSettings.temp,
|
|
852
|
-
maxTokens: pluginSettings.maxTokens
|
|
853
|
-
});
|
|
854
|
-
const splitter = new textsplitters.TokenTextSplitter({
|
|
855
|
-
chunkSize: 1e3,
|
|
856
|
-
chunkOverlap: 200
|
|
857
|
-
});
|
|
858
|
-
const transcriptChunks = await splitter.createDocuments([rawTranscript]);
|
|
859
|
-
const chunkTexts = transcriptChunks.map((chunk) => chunk.pageContent);
|
|
860
|
-
const modifiedTranscript = await processTextChunks(chunkTexts, chatModel);
|
|
861
|
-
return modifiedTranscript;
|
|
862
|
-
}
|
|
863
|
-
const service = ({ strapi: strapi2 }) => ({
|
|
1191
|
+
const service = ({ strapi }) => ({
|
|
864
1192
|
async getTranscript(identifier) {
|
|
865
1193
|
const youtubeIdRegex = /^[a-zA-Z0-9_-]{11}$/;
|
|
866
1194
|
const isValid = youtubeIdRegex.test(identifier);
|
|
867
1195
|
if (!isValid) {
|
|
868
1196
|
return { error: "Invalid video ID", data: null };
|
|
869
1197
|
}
|
|
870
|
-
const pluginSettings = await
|
|
1198
|
+
const pluginSettings = await strapi.config.get(
|
|
871
1199
|
"plugin::yt-transcript-strapi-plugin"
|
|
872
1200
|
);
|
|
873
1201
|
const transcriptData = await fetchTranscript(identifier, {
|
|
@@ -880,20 +1208,16 @@ const service = ({ strapi: strapi2 }) => ({
|
|
|
880
1208
|
};
|
|
881
1209
|
},
|
|
882
1210
|
async saveTranscript(payload) {
|
|
883
|
-
return await
|
|
1211
|
+
return await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").create({
|
|
884
1212
|
data: payload
|
|
885
1213
|
});
|
|
886
1214
|
},
|
|
887
1215
|
async findTranscript(videoId) {
|
|
888
|
-
const transcriptData = await
|
|
1216
|
+
const transcriptData = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findFirst({
|
|
889
1217
|
filters: { videoId }
|
|
890
1218
|
});
|
|
891
1219
|
if (!transcriptData) return null;
|
|
892
1220
|
return transcriptData;
|
|
893
|
-
},
|
|
894
|
-
async generateHumanReadableTranscript(transcript2) {
|
|
895
|
-
const modifiedTranscript = await generateModifiedTranscript(transcript2);
|
|
896
|
-
return modifiedTranscript;
|
|
897
1221
|
}
|
|
898
1222
|
});
|
|
899
1223
|
const services = {
|