yt-transcript-strapi-plugin 0.0.22 → 0.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/server/index.js +437 -36
- package/dist/server/index.mjs +437 -36
- package/dist/server/src/config/index.d.ts +8 -8
- package/dist/server/src/index.d.ts +10 -19
- package/dist/server/src/mcp/schemas/index.d.ts +63 -0
- package/dist/server/src/mcp/tools/get-transcript.d.ts +26 -0
- package/dist/server/src/mcp/tools/search-transcript.d.ts +30 -0
- package/dist/server/src/routes/content-api.d.ts +2 -11
- package/dist/server/src/routes/index.d.ts +2 -11
- package/node_modules/which/CHANGELOG.md +166 -0
- package/package.json +7 -2
- package/node_modules/express/node_modules/media-typer/HISTORY.md +0 -50
- package/node_modules/express/node_modules/media-typer/LICENSE +0 -22
- package/node_modules/express/node_modules/media-typer/README.md +0 -93
- package/node_modules/express/node_modules/media-typer/index.js +0 -143
- package/node_modules/express/node_modules/media-typer/package.json +0 -33
- package/node_modules/express/node_modules/type-is/HISTORY.md +0 -292
- package/node_modules/express/node_modules/type-is/LICENSE +0 -23
- package/node_modules/express/node_modules/type-is/README.md +0 -198
- package/node_modules/express/node_modules/type-is/index.js +0 -250
- package/node_modules/express/node_modules/type-is/package.json +0 -47
- /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/HISTORY.md +0 -0
- /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/LICENSE +0 -0
- /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/README.md +0 -0
- /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/index.js +0 -0
- /package/node_modules/{body-parser/node_modules/media-typer → media-typer}/package.json +0 -0
- /package/node_modules/{body-parser/node_modules/type-is → type-is}/HISTORY.md +0 -0
- /package/node_modules/{body-parser/node_modules/type-is → type-is}/LICENSE +0 -0
- /package/node_modules/{body-parser/node_modules/type-is → type-is}/README.md +0 -0
- /package/node_modules/{body-parser/node_modules/type-is → type-is}/index.js +0 -0
- /package/node_modules/{body-parser → type-is}/node_modules/mime-types/HISTORY.md +0 -0
- /package/node_modules/{body-parser → type-is}/node_modules/mime-types/LICENSE +0 -0
- /package/node_modules/{body-parser → type-is}/node_modules/mime-types/README.md +0 -0
- /package/node_modules/{body-parser → type-is}/node_modules/mime-types/index.js +0 -0
- /package/node_modules/{body-parser → type-is}/node_modules/mime-types/mimeScore.js +0 -0
- /package/node_modules/{body-parser → type-is}/node_modules/mime-types/package.json +0 -0
- /package/node_modules/{body-parser/node_modules/type-is → type-is}/package.json +0 -0
package/dist/server/index.js
CHANGED
|
@@ -15,7 +15,18 @@ const ListTranscriptsSchema = zod.z.object({
|
|
|
15
15
|
sort: zod.z.string().optional().default("createdAt:desc")
|
|
16
16
|
});
|
|
17
17
|
const GetTranscriptSchema = zod.z.object({
|
|
18
|
-
videoId: zod.z.string().min(1, "Video ID is required")
|
|
18
|
+
videoId: zod.z.string().min(1, "Video ID is required"),
|
|
19
|
+
includeFullTranscript: zod.z.boolean().optional().default(false),
|
|
20
|
+
includeTimecodes: zod.z.boolean().optional().default(false),
|
|
21
|
+
startTime: zod.z.number().min(0).optional(),
|
|
22
|
+
endTime: zod.z.number().min(0).optional(),
|
|
23
|
+
chunkIndex: zod.z.number().int().min(0).optional(),
|
|
24
|
+
chunkSize: zod.z.number().int().min(30).optional()
|
|
25
|
+
});
|
|
26
|
+
const SearchTranscriptSchema = zod.z.object({
|
|
27
|
+
videoId: zod.z.string().min(1, "Video ID is required"),
|
|
28
|
+
query: zod.z.string().min(1, "Search query is required"),
|
|
29
|
+
maxResults: zod.z.number().int().min(1).max(20).optional().default(5)
|
|
19
30
|
});
|
|
20
31
|
const FindTranscriptsSchema = zod.z.object({
|
|
21
32
|
query: zod.z.string().optional(),
|
|
@@ -30,6 +41,7 @@ const ToolSchemas = {
|
|
|
30
41
|
fetch_transcript: FetchTranscriptSchema,
|
|
31
42
|
list_transcripts: ListTranscriptsSchema,
|
|
32
43
|
get_transcript: GetTranscriptSchema,
|
|
44
|
+
search_transcript: SearchTranscriptSchema,
|
|
33
45
|
find_transcripts: FindTranscriptsSchema
|
|
34
46
|
};
|
|
35
47
|
function validateToolInput(toolName, input) {
|
|
@@ -64,7 +76,7 @@ function extractYouTubeID(urlOrID) {
|
|
|
64
76
|
}
|
|
65
77
|
const fetchTranscriptTool = {
|
|
66
78
|
name: "fetch_transcript",
|
|
67
|
-
description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database
|
|
79
|
+
description: "Fetch a transcript from YouTube for a given video ID or URL. The transcript is saved to the database. Returns metadata and preview only to avoid context overflow. Use get_transcript to retrieve content.",
|
|
68
80
|
inputSchema: {
|
|
69
81
|
type: "object",
|
|
70
82
|
properties: {
|
|
@@ -76,9 +88,47 @@ const fetchTranscriptTool = {
|
|
|
76
88
|
required: ["videoId"]
|
|
77
89
|
}
|
|
78
90
|
};
|
|
91
|
+
function getVideoDurationMs$1(timecodes) {
|
|
92
|
+
if (!timecodes || timecodes.length === 0) return 0;
|
|
93
|
+
const lastEntry = timecodes[timecodes.length - 1];
|
|
94
|
+
return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
|
|
95
|
+
}
|
|
96
|
+
function formatTime$2(ms) {
|
|
97
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
98
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
99
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
100
|
+
const seconds = totalSeconds % 60;
|
|
101
|
+
if (hours > 0) {
|
|
102
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
103
|
+
}
|
|
104
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
105
|
+
}
|
|
106
|
+
function buildMetadataResponse(transcript2, previewLength, cached) {
|
|
107
|
+
const fullText = transcript2.fullTranscript || "";
|
|
108
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
109
|
+
const durationMs = getVideoDurationMs$1(timecodes);
|
|
110
|
+
const wordCount = fullText.split(/\s+/).length;
|
|
111
|
+
const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
|
|
112
|
+
return {
|
|
113
|
+
message: cached ? "Transcript already exists in database" : "Transcript fetched and saved successfully",
|
|
114
|
+
cached,
|
|
115
|
+
videoId: transcript2.videoId,
|
|
116
|
+
title: transcript2.title,
|
|
117
|
+
metadata: {
|
|
118
|
+
wordCount,
|
|
119
|
+
characterCount: fullText.length,
|
|
120
|
+
duration: formatTime$2(durationMs),
|
|
121
|
+
durationSeconds: Math.floor(durationMs / 1e3)
|
|
122
|
+
},
|
|
123
|
+
preview,
|
|
124
|
+
usage: "Use get_transcript with videoId to retrieve full content, specific time ranges, or paginated chunks."
|
|
125
|
+
};
|
|
126
|
+
}
|
|
79
127
|
async function handleFetchTranscript(strapi, args) {
|
|
80
128
|
const validatedArgs = validateToolInput("fetch_transcript", args);
|
|
81
129
|
const { videoId: videoIdOrUrl } = validatedArgs;
|
|
130
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
131
|
+
const previewLength = pluginConfig?.previewLength || 500;
|
|
82
132
|
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
83
133
|
if (!videoId) {
|
|
84
134
|
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
@@ -91,11 +141,7 @@ async function handleFetchTranscript(strapi, args) {
|
|
|
91
141
|
{
|
|
92
142
|
type: "text",
|
|
93
143
|
text: JSON.stringify(
|
|
94
|
-
|
|
95
|
-
message: "Transcript already exists in database",
|
|
96
|
-
data: existingTranscript,
|
|
97
|
-
cached: true
|
|
98
|
-
},
|
|
144
|
+
buildMetadataResponse(existingTranscript, previewLength, true),
|
|
99
145
|
null,
|
|
100
146
|
2
|
|
101
147
|
)
|
|
@@ -119,11 +165,7 @@ async function handleFetchTranscript(strapi, args) {
|
|
|
119
165
|
{
|
|
120
166
|
type: "text",
|
|
121
167
|
text: JSON.stringify(
|
|
122
|
-
|
|
123
|
-
message: "Transcript fetched and saved successfully",
|
|
124
|
-
data: savedTranscript,
|
|
125
|
-
cached: false
|
|
126
|
-
},
|
|
168
|
+
buildMetadataResponse(savedTranscript, previewLength, false),
|
|
127
169
|
null,
|
|
128
170
|
2
|
|
129
171
|
)
|
|
@@ -191,21 +233,82 @@ async function handleListTranscripts(strapi, args) {
|
|
|
191
233
|
}
|
|
192
234
|
const getTranscriptTool = {
|
|
193
235
|
name: "get_transcript",
|
|
194
|
-
description: "Get a
|
|
236
|
+
description: "Get a saved transcript by YouTube video ID. Returns metadata and preview by default. Use parameters to get full content or specific time ranges to avoid context overflow.",
|
|
195
237
|
inputSchema: {
|
|
196
238
|
type: "object",
|
|
197
239
|
properties: {
|
|
198
240
|
videoId: {
|
|
199
241
|
type: "string",
|
|
200
242
|
description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
|
|
243
|
+
},
|
|
244
|
+
includeFullTranscript: {
|
|
245
|
+
type: "boolean",
|
|
246
|
+
description: "Include the complete transcript text. Warning: may cause context overflow for long videos. Default: false",
|
|
247
|
+
default: false
|
|
248
|
+
},
|
|
249
|
+
includeTimecodes: {
|
|
250
|
+
type: "boolean",
|
|
251
|
+
description: "Include the transcript with timecodes array. Warning: significantly increases response size. Default: false",
|
|
252
|
+
default: false
|
|
253
|
+
},
|
|
254
|
+
startTime: {
|
|
255
|
+
type: "number",
|
|
256
|
+
description: "Start time in seconds for fetching a specific portion of the transcript"
|
|
257
|
+
},
|
|
258
|
+
endTime: {
|
|
259
|
+
type: "number",
|
|
260
|
+
description: "End time in seconds for fetching a specific portion of the transcript"
|
|
261
|
+
},
|
|
262
|
+
chunkIndex: {
|
|
263
|
+
type: "number",
|
|
264
|
+
description: "Chunk index (0-based) when paginating through transcript. Use with chunkSize to paginate through long videos."
|
|
265
|
+
},
|
|
266
|
+
chunkSize: {
|
|
267
|
+
type: "number",
|
|
268
|
+
description: "Chunk size in seconds. Overrides config default. Use with chunkIndex for pagination."
|
|
201
269
|
}
|
|
202
270
|
},
|
|
203
271
|
required: ["videoId"]
|
|
204
272
|
}
|
|
205
273
|
};
|
|
274
|
+
function getTranscriptForTimeRange(timecodes, startTimeMs, endTimeMs) {
|
|
275
|
+
const entries = timecodes.filter(
|
|
276
|
+
(entry) => entry.start >= startTimeMs && entry.start < endTimeMs
|
|
277
|
+
);
|
|
278
|
+
const text = entries.map((e) => e.text).join(" ");
|
|
279
|
+
return { text, entries };
|
|
280
|
+
}
|
|
281
|
+
function getVideoDurationMs(timecodes) {
|
|
282
|
+
if (!timecodes || timecodes.length === 0) return 0;
|
|
283
|
+
const lastEntry = timecodes[timecodes.length - 1];
|
|
284
|
+
return lastEntry.end || lastEntry.start + (lastEntry.duration || 0);
|
|
285
|
+
}
|
|
286
|
+
function formatTime$1(ms) {
|
|
287
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
288
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
289
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
290
|
+
const seconds = totalSeconds % 60;
|
|
291
|
+
if (hours > 0) {
|
|
292
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
293
|
+
}
|
|
294
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
295
|
+
}
|
|
206
296
|
async function handleGetTranscript(strapi, args) {
|
|
207
297
|
const validatedArgs = validateToolInput("get_transcript", args);
|
|
208
|
-
const {
|
|
298
|
+
const {
|
|
299
|
+
videoId: videoIdOrUrl,
|
|
300
|
+
includeFullTranscript,
|
|
301
|
+
includeTimecodes,
|
|
302
|
+
startTime,
|
|
303
|
+
endTime,
|
|
304
|
+
chunkIndex,
|
|
305
|
+
chunkSize: chunkSizeOverride
|
|
306
|
+
} = validatedArgs;
|
|
307
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
308
|
+
const defaultChunkSize = pluginConfig?.chunkSizeSeconds || 300;
|
|
309
|
+
const previewLength = pluginConfig?.previewLength || 500;
|
|
310
|
+
const maxFullTranscriptLength = pluginConfig?.maxFullTranscriptLength || 5e4;
|
|
311
|
+
const chunkSizeSeconds = chunkSizeOverride || defaultChunkSize;
|
|
209
312
|
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
210
313
|
if (!videoId) {
|
|
211
314
|
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
@@ -230,13 +333,308 @@ async function handleGetTranscript(strapi, args) {
|
|
|
230
333
|
]
|
|
231
334
|
};
|
|
232
335
|
}
|
|
336
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
337
|
+
const fullText = transcript2.fullTranscript || "";
|
|
338
|
+
const durationMs = getVideoDurationMs(timecodes);
|
|
339
|
+
const totalChunks = Math.ceil(durationMs / (chunkSizeSeconds * 1e3));
|
|
340
|
+
const wordCount = fullText.split(/\s+/).length;
|
|
341
|
+
const response = {
|
|
342
|
+
videoId: transcript2.videoId,
|
|
343
|
+
title: transcript2.title,
|
|
344
|
+
metadata: {
|
|
345
|
+
wordCount,
|
|
346
|
+
characterCount: fullText.length,
|
|
347
|
+
duration: formatTime$1(durationMs),
|
|
348
|
+
durationSeconds: Math.floor(durationMs / 1e3),
|
|
349
|
+
totalChunks,
|
|
350
|
+
chunkSizeSeconds
|
|
351
|
+
}
|
|
352
|
+
};
|
|
353
|
+
if (startTime !== void 0 || endTime !== void 0) {
|
|
354
|
+
const startMs = (startTime || 0) * 1e3;
|
|
355
|
+
const endMs = endTime !== void 0 ? endTime * 1e3 : durationMs;
|
|
356
|
+
const { text, entries } = getTranscriptForTimeRange(timecodes, startMs, endMs);
|
|
357
|
+
response.timeRange = {
|
|
358
|
+
startTime: startTime || 0,
|
|
359
|
+
endTime: endTime || Math.floor(durationMs / 1e3),
|
|
360
|
+
startFormatted: formatTime$1(startMs),
|
|
361
|
+
endFormatted: formatTime$1(endMs)
|
|
362
|
+
};
|
|
363
|
+
response.transcript = text;
|
|
364
|
+
if (includeTimecodes) {
|
|
365
|
+
response.transcriptWithTimeCodes = entries;
|
|
366
|
+
}
|
|
367
|
+
} else if (chunkIndex !== void 0) {
|
|
368
|
+
const chunkStartMs = chunkIndex * chunkSizeSeconds * 1e3;
|
|
369
|
+
const chunkEndMs = Math.min((chunkIndex + 1) * chunkSizeSeconds * 1e3, durationMs);
|
|
370
|
+
if (chunkStartMs >= durationMs) {
|
|
371
|
+
response.error = `Chunk index ${chunkIndex} is out of range. Total chunks: ${totalChunks} (0-${totalChunks - 1})`;
|
|
372
|
+
} else {
|
|
373
|
+
const { text, entries } = getTranscriptForTimeRange(timecodes, chunkStartMs, chunkEndMs);
|
|
374
|
+
response.chunk = {
|
|
375
|
+
index: chunkIndex,
|
|
376
|
+
totalChunks,
|
|
377
|
+
startTime: Math.floor(chunkStartMs / 1e3),
|
|
378
|
+
endTime: Math.floor(chunkEndMs / 1e3),
|
|
379
|
+
startFormatted: formatTime$1(chunkStartMs),
|
|
380
|
+
endFormatted: formatTime$1(chunkEndMs)
|
|
381
|
+
};
|
|
382
|
+
response.transcript = text;
|
|
383
|
+
if (includeTimecodes) {
|
|
384
|
+
response.transcriptWithTimeCodes = entries;
|
|
385
|
+
}
|
|
386
|
+
if (chunkIndex < totalChunks - 1) {
|
|
387
|
+
response.nextChunk = `Use chunkIndex: ${chunkIndex + 1} to get the next portion`;
|
|
388
|
+
}
|
|
389
|
+
if (chunkIndex > 0) {
|
|
390
|
+
response.previousChunk = `Use chunkIndex: ${chunkIndex - 1} to get the previous portion`;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
} else if (includeFullTranscript || fullText.length <= maxFullTranscriptLength) {
|
|
394
|
+
response.transcript = fullText;
|
|
395
|
+
if (includeTimecodes) {
|
|
396
|
+
response.transcriptWithTimeCodes = timecodes;
|
|
397
|
+
}
|
|
398
|
+
if (includeFullTranscript && fullText.length > maxFullTranscriptLength) {
|
|
399
|
+
response.warning = "Full transcript included. For long videos, consider using chunkIndex, startTime/endTime, or search_transcript to reduce response size.";
|
|
400
|
+
} else if (fullText.length <= maxFullTranscriptLength) {
|
|
401
|
+
response.note = "Full transcript auto-loaded (fits within context limit).";
|
|
402
|
+
}
|
|
403
|
+
} else {
|
|
404
|
+
const preview = fullText.length > previewLength ? fullText.substring(0, previewLength) + "..." : fullText;
|
|
405
|
+
response.preview = preview;
|
|
406
|
+
response.isLargeTranscript = true;
|
|
407
|
+
response.usage = {
|
|
408
|
+
fullTranscript: "Set includeFullTranscript: true to get complete text (warning: may exceed context)",
|
|
409
|
+
search: "Use search_transcript to find relevant portions by keyword (recommended for large transcripts)",
|
|
410
|
+
timeRange: "Use startTime and endTime (in seconds) to get a specific portion",
|
|
411
|
+
pagination: `Use chunkIndex (0-${totalChunks - 1}) to paginate through ${chunkSizeSeconds}s chunks`
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
return {
|
|
415
|
+
content: [
|
|
416
|
+
{
|
|
417
|
+
type: "text",
|
|
418
|
+
text: JSON.stringify(response, null, 2)
|
|
419
|
+
}
|
|
420
|
+
]
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
const searchTranscriptTool = {
|
|
424
|
+
name: "search_transcript",
|
|
425
|
+
description: "Search within a saved transcript using BM25 scoring. Returns the most relevant segments matching your query with timestamps. Use this to find specific content in long videos without loading the entire transcript.",
|
|
426
|
+
inputSchema: {
|
|
427
|
+
type: "object",
|
|
428
|
+
properties: {
|
|
429
|
+
videoId: {
|
|
430
|
+
type: "string",
|
|
431
|
+
description: 'YouTube video ID (e.g., "dQw4w9WgXcQ") or full YouTube URL'
|
|
432
|
+
},
|
|
433
|
+
query: {
|
|
434
|
+
type: "string",
|
|
435
|
+
description: "Search query - keywords or phrases to find in the transcript"
|
|
436
|
+
},
|
|
437
|
+
maxResults: {
|
|
438
|
+
type: "number",
|
|
439
|
+
description: "Maximum number of results to return (default: 5, max: 20)",
|
|
440
|
+
default: 5
|
|
441
|
+
}
|
|
442
|
+
},
|
|
443
|
+
required: ["videoId", "query"]
|
|
444
|
+
}
|
|
445
|
+
};
|
|
446
|
+
function tokenize(text) {
|
|
447
|
+
return text.toLowerCase().replace(/[^\w\s]/g, " ").split(/\s+/).filter((word) => word.length > 1);
|
|
448
|
+
}
|
|
449
|
+
function calculateIDF(segments, vocabulary) {
|
|
450
|
+
const idf = /* @__PURE__ */ new Map();
|
|
451
|
+
const N = segments.length;
|
|
452
|
+
for (const term of vocabulary) {
|
|
453
|
+
const docsWithTerm = segments.filter(
|
|
454
|
+
(seg) => tokenize(seg.text).includes(term)
|
|
455
|
+
).length;
|
|
456
|
+
idf.set(term, Math.log((N - docsWithTerm + 0.5) / (docsWithTerm + 0.5) + 1));
|
|
457
|
+
}
|
|
458
|
+
return idf;
|
|
459
|
+
}
|
|
460
|
+
function bm25Score(segmentTokens, queryTokens, idf, avgDocLength, k1 = 1.5, b = 0.75) {
|
|
461
|
+
const docLength = segmentTokens.length;
|
|
462
|
+
let score = 0;
|
|
463
|
+
const tf = /* @__PURE__ */ new Map();
|
|
464
|
+
for (const token of segmentTokens) {
|
|
465
|
+
tf.set(token, (tf.get(token) || 0) + 1);
|
|
466
|
+
}
|
|
467
|
+
for (const term of queryTokens) {
|
|
468
|
+
const termFreq = tf.get(term) || 0;
|
|
469
|
+
const termIdf = idf.get(term) || 0;
|
|
470
|
+
if (termFreq > 0) {
|
|
471
|
+
const numerator = termFreq * (k1 + 1);
|
|
472
|
+
const denominator = termFreq + k1 * (1 - b + b * (docLength / avgDocLength));
|
|
473
|
+
score += termIdf * (numerator / denominator);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
return score;
|
|
477
|
+
}
|
|
478
|
+
function formatTime(ms) {
|
|
479
|
+
const totalSeconds = Math.floor(ms / 1e3);
|
|
480
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
481
|
+
const minutes = Math.floor(totalSeconds % 3600 / 60);
|
|
482
|
+
const seconds = totalSeconds % 60;
|
|
483
|
+
if (hours > 0) {
|
|
484
|
+
return `${hours}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
485
|
+
}
|
|
486
|
+
return `${minutes}:${seconds.toString().padStart(2, "0")}`;
|
|
487
|
+
}
|
|
488
|
+
function createSegments(timecodes, segmentDurationMs) {
|
|
489
|
+
if (!timecodes || timecodes.length === 0) return [];
|
|
490
|
+
const segments = [];
|
|
491
|
+
let currentSegment = [];
|
|
492
|
+
let segmentStartTime = timecodes[0].start;
|
|
493
|
+
for (const entry of timecodes) {
|
|
494
|
+
const segmentEndTime = segmentStartTime + segmentDurationMs;
|
|
495
|
+
if (entry.start < segmentEndTime) {
|
|
496
|
+
currentSegment.push(entry);
|
|
497
|
+
} else {
|
|
498
|
+
if (currentSegment.length > 0) {
|
|
499
|
+
const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
|
|
500
|
+
segments.push({
|
|
501
|
+
text: currentSegment.map((e) => e.text).join(" "),
|
|
502
|
+
startTime: Math.floor(segmentStartTime / 1e3),
|
|
503
|
+
endTime: Math.floor(endTime / 1e3),
|
|
504
|
+
startFormatted: formatTime(segmentStartTime),
|
|
505
|
+
endFormatted: formatTime(endTime)
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
segmentStartTime = entry.start;
|
|
509
|
+
currentSegment = [entry];
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
if (currentSegment.length > 0) {
|
|
513
|
+
const endTime = currentSegment[currentSegment.length - 1].end || currentSegment[currentSegment.length - 1].start + (currentSegment[currentSegment.length - 1].duration || 0);
|
|
514
|
+
segments.push({
|
|
515
|
+
text: currentSegment.map((e) => e.text).join(" "),
|
|
516
|
+
startTime: Math.floor(segmentStartTime / 1e3),
|
|
517
|
+
endTime: Math.floor(endTime / 1e3),
|
|
518
|
+
startFormatted: formatTime(segmentStartTime),
|
|
519
|
+
endFormatted: formatTime(endTime)
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
return segments;
|
|
523
|
+
}
|
|
524
|
+
async function handleSearchTranscript(strapi, args) {
|
|
525
|
+
const validatedArgs = validateToolInput("search_transcript", args);
|
|
526
|
+
const { videoId: videoIdOrUrl, query, maxResults: maxResultsInput } = validatedArgs;
|
|
527
|
+
const pluginConfig = await strapi.config.get("plugin::yt-transcript-strapi-plugin");
|
|
528
|
+
const segmentSeconds = pluginConfig?.searchSegmentSeconds || 30;
|
|
529
|
+
const maxResults = Math.min(Math.max(maxResultsInput || 5, 1), 20);
|
|
530
|
+
const videoId = extractYouTubeID(videoIdOrUrl);
|
|
531
|
+
if (!videoId) {
|
|
532
|
+
throw new Error(`Invalid YouTube video ID or URL: "${videoIdOrUrl}". Please provide a valid 11-character video ID or YouTube URL.`);
|
|
533
|
+
}
|
|
534
|
+
const service2 = strapi.plugin("yt-transcript-strapi-plugin").service("service");
|
|
535
|
+
const transcript2 = await service2.findTranscript(videoId);
|
|
536
|
+
if (!transcript2) {
|
|
537
|
+
return {
|
|
538
|
+
content: [
|
|
539
|
+
{
|
|
540
|
+
type: "text",
|
|
541
|
+
text: JSON.stringify(
|
|
542
|
+
{
|
|
543
|
+
error: true,
|
|
544
|
+
message: `No transcript found for video ID: ${videoId}. Use fetch_transcript to fetch it from YouTube first.`,
|
|
545
|
+
videoId
|
|
546
|
+
},
|
|
547
|
+
null,
|
|
548
|
+
2
|
|
549
|
+
)
|
|
550
|
+
}
|
|
551
|
+
]
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
const timecodes = transcript2.transcriptWithTimeCodes || [];
|
|
555
|
+
if (timecodes.length === 0) {
|
|
556
|
+
return {
|
|
557
|
+
content: [
|
|
558
|
+
{
|
|
559
|
+
type: "text",
|
|
560
|
+
text: JSON.stringify(
|
|
561
|
+
{
|
|
562
|
+
error: true,
|
|
563
|
+
message: "Transcript has no timecode data for searching.",
|
|
564
|
+
videoId
|
|
565
|
+
},
|
|
566
|
+
null,
|
|
567
|
+
2
|
|
568
|
+
)
|
|
569
|
+
}
|
|
570
|
+
]
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
const segments = createSegments(timecodes, segmentSeconds * 1e3);
|
|
574
|
+
if (segments.length === 0) {
|
|
575
|
+
return {
|
|
576
|
+
content: [
|
|
577
|
+
{
|
|
578
|
+
type: "text",
|
|
579
|
+
text: JSON.stringify(
|
|
580
|
+
{
|
|
581
|
+
error: true,
|
|
582
|
+
message: "Could not create searchable segments from transcript.",
|
|
583
|
+
videoId
|
|
584
|
+
},
|
|
585
|
+
null,
|
|
586
|
+
2
|
|
587
|
+
)
|
|
588
|
+
}
|
|
589
|
+
]
|
|
590
|
+
};
|
|
591
|
+
}
|
|
592
|
+
const queryTokens = tokenize(query);
|
|
593
|
+
if (queryTokens.length === 0) {
|
|
594
|
+
return {
|
|
595
|
+
content: [
|
|
596
|
+
{
|
|
597
|
+
type: "text",
|
|
598
|
+
text: JSON.stringify(
|
|
599
|
+
{
|
|
600
|
+
error: true,
|
|
601
|
+
message: "Query is empty or contains only stop words.",
|
|
602
|
+
query
|
|
603
|
+
},
|
|
604
|
+
null,
|
|
605
|
+
2
|
|
606
|
+
)
|
|
607
|
+
}
|
|
608
|
+
]
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
const vocabulary = new Set(queryTokens);
|
|
612
|
+
const idf = calculateIDF(segments, vocabulary);
|
|
613
|
+
const avgDocLength = segments.reduce((sum, seg) => sum + tokenize(seg.text).length, 0) / segments.length;
|
|
614
|
+
const scoredSegments = segments.map((segment) => ({
|
|
615
|
+
...segment,
|
|
616
|
+
score: bm25Score(tokenize(segment.text), queryTokens, idf, avgDocLength)
|
|
617
|
+
}));
|
|
618
|
+
const results = scoredSegments.filter((seg) => seg.score > 0).sort((a, b) => b.score - a.score).slice(0, maxResults);
|
|
233
619
|
return {
|
|
234
620
|
content: [
|
|
235
621
|
{
|
|
236
622
|
type: "text",
|
|
237
623
|
text: JSON.stringify(
|
|
238
624
|
{
|
|
239
|
-
|
|
625
|
+
videoId: transcript2.videoId,
|
|
626
|
+
title: transcript2.title,
|
|
627
|
+
query,
|
|
628
|
+
totalSegments: segments.length,
|
|
629
|
+
matchingResults: results.length,
|
|
630
|
+
results: results.map((r) => ({
|
|
631
|
+
text: r.text,
|
|
632
|
+
startTime: r.startTime,
|
|
633
|
+
endTime: r.endTime,
|
|
634
|
+
timeRange: `${r.startFormatted} - ${r.endFormatted}`,
|
|
635
|
+
score: Math.round(r.score * 100) / 100
|
|
636
|
+
})),
|
|
637
|
+
usage: results.length > 0 ? `Use get_transcript with startTime: ${results[0].startTime} and endTime: ${results[0].endTime} to get full context for the top result.` : "No matches found. Try different keywords."
|
|
240
638
|
},
|
|
241
639
|
null,
|
|
242
640
|
2
|
|
@@ -358,12 +756,14 @@ const tools = [
|
|
|
358
756
|
fetchTranscriptTool,
|
|
359
757
|
listTranscriptsTool,
|
|
360
758
|
getTranscriptTool,
|
|
759
|
+
searchTranscriptTool,
|
|
361
760
|
findTranscriptsTool
|
|
362
761
|
];
|
|
363
762
|
const toolHandlers = {
|
|
364
763
|
fetch_transcript: handleFetchTranscript,
|
|
365
764
|
list_transcripts: handleListTranscripts,
|
|
366
765
|
get_transcript: handleGetTranscript,
|
|
766
|
+
search_transcript: handleSearchTranscript,
|
|
367
767
|
find_transcripts: handleFindTranscripts
|
|
368
768
|
};
|
|
369
769
|
async function handleToolCall(strapi, request) {
|
|
@@ -439,28 +839,32 @@ const register = ({ strapi }) => {
|
|
|
439
839
|
};
|
|
440
840
|
const config = {
|
|
441
841
|
default: {
|
|
442
|
-
|
|
443
|
-
model: "gpt-4o-mini",
|
|
444
|
-
temp: 0.7,
|
|
445
|
-
maxTokens: 4096,
|
|
446
|
-
proxyUrl: ""
|
|
842
|
+
proxyUrl: "",
|
|
447
843
|
// Optional: HTTP/HTTPS proxy for YouTube requests (e.g., 'http://user:pass@proxy.example.com:8080')
|
|
844
|
+
chunkSizeSeconds: 300,
|
|
845
|
+
// Default chunk size for transcript pagination (5 minutes)
|
|
846
|
+
previewLength: 500,
|
|
847
|
+
// Default preview length in characters
|
|
848
|
+
maxFullTranscriptLength: 5e4,
|
|
849
|
+
// Auto-load full transcript if under this character count (~12K tokens)
|
|
850
|
+
searchSegmentSeconds: 30
|
|
851
|
+
// Segment size for BM25 search scoring
|
|
448
852
|
},
|
|
449
853
|
validator(config2) {
|
|
450
|
-
if (config2.
|
|
451
|
-
throw new Error("
|
|
854
|
+
if (config2.proxyUrl && typeof config2.proxyUrl !== "string") {
|
|
855
|
+
throw new Error("proxyUrl must be a string");
|
|
452
856
|
}
|
|
453
|
-
if (config2.
|
|
454
|
-
throw new Error("
|
|
857
|
+
if (config2.chunkSizeSeconds !== void 0 && (typeof config2.chunkSizeSeconds !== "number" || config2.chunkSizeSeconds < 30)) {
|
|
858
|
+
throw new Error("chunkSizeSeconds must be a number >= 30");
|
|
455
859
|
}
|
|
456
|
-
if (config2.
|
|
457
|
-
throw new Error("
|
|
860
|
+
if (config2.previewLength !== void 0 && (typeof config2.previewLength !== "number" || config2.previewLength < 100)) {
|
|
861
|
+
throw new Error("previewLength must be a number >= 100");
|
|
458
862
|
}
|
|
459
|
-
if (config2.
|
|
460
|
-
throw new Error("
|
|
863
|
+
if (config2.maxFullTranscriptLength !== void 0 && (typeof config2.maxFullTranscriptLength !== "number" || config2.maxFullTranscriptLength < 1e3)) {
|
|
864
|
+
throw new Error("maxFullTranscriptLength must be a number >= 1000");
|
|
461
865
|
}
|
|
462
|
-
if (config2.
|
|
463
|
-
throw new Error("
|
|
866
|
+
if (config2.searchSegmentSeconds !== void 0 && (typeof config2.searchSegmentSeconds !== "number" || config2.searchSegmentSeconds < 10)) {
|
|
867
|
+
throw new Error("searchSegmentSeconds must be a number >= 10");
|
|
464
868
|
}
|
|
465
869
|
}
|
|
466
870
|
};
|
|
@@ -590,8 +994,7 @@ const contentApi = [
|
|
|
590
994
|
path: "/mcp",
|
|
591
995
|
handler: "mcp.handle",
|
|
592
996
|
config: {
|
|
593
|
-
policies: []
|
|
594
|
-
auth: false
|
|
997
|
+
policies: []
|
|
595
998
|
}
|
|
596
999
|
},
|
|
597
1000
|
{
|
|
@@ -599,8 +1002,7 @@ const contentApi = [
|
|
|
599
1002
|
path: "/mcp",
|
|
600
1003
|
handler: "mcp.handle",
|
|
601
1004
|
config: {
|
|
602
|
-
policies: []
|
|
603
|
-
auth: false
|
|
1005
|
+
policies: []
|
|
604
1006
|
}
|
|
605
1007
|
},
|
|
606
1008
|
{
|
|
@@ -608,8 +1010,7 @@ const contentApi = [
|
|
|
608
1010
|
path: "/mcp",
|
|
609
1011
|
handler: "mcp.handle",
|
|
610
1012
|
config: {
|
|
611
|
-
policies: []
|
|
612
|
-
auth: false
|
|
1013
|
+
policies: []
|
|
613
1014
|
}
|
|
614
1015
|
},
|
|
615
1016
|
// Other routes
|