yt-embeddings-strapi-plugin 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +531 -0
- package/dist/_chunks/App-Cv1cdLAr.js +587 -0
- package/dist/_chunks/App-bN58O1bN.mjs +583 -0
- package/dist/_chunks/en-B4KWt_jN.js +4 -0
- package/dist/_chunks/en-Byx4XI2L.mjs +4 -0
- package/dist/_chunks/index-BAfBs5PQ.js +172 -0
- package/dist/_chunks/index-K6X5FM2O.mjs +173 -0
- package/dist/admin/index.js +4 -0
- package/dist/admin/index.mjs +5 -0
- package/dist/admin/src/components/Initializer.d.ts +5 -0
- package/dist/admin/src/components/PluginIcon.d.ts +2 -0
- package/dist/admin/src/components/custom/BackLink.d.ts +5 -0
- package/dist/admin/src/components/custom/ChatModal.d.ts +1 -0
- package/dist/admin/src/components/custom/EmbeddingsModal.d.ts +1 -0
- package/dist/admin/src/components/custom/EmbeddingsWidget.d.ts +1 -0
- package/dist/admin/src/components/custom/Illo.d.ts +1 -0
- package/dist/admin/src/components/custom/Markdown.d.ts +5 -0
- package/dist/admin/src/components/custom/RobotIcon.d.ts +6 -0
- package/dist/admin/src/index.d.ts +12 -0
- package/dist/admin/src/pages/App.d.ts +2 -0
- package/dist/admin/src/pages/EmbeddingDetails.d.ts +1 -0
- package/dist/admin/src/pages/HomePage.d.ts +1 -0
- package/dist/admin/src/pluginId.d.ts +1 -0
- package/dist/admin/src/utils/api.d.ts +81 -0
- package/dist/admin/src/utils/getTranslation.d.ts +2 -0
- package/dist/server/index.js +2220 -0
- package/dist/server/index.mjs +2203 -0
- package/dist/server/src/bootstrap.d.ts +5 -0
- package/dist/server/src/config/index.d.ts +38 -0
- package/dist/server/src/content-types/index.d.ts +2 -0
- package/dist/server/src/controllers/controller.d.ts +13 -0
- package/dist/server/src/controllers/index.d.ts +30 -0
- package/dist/server/src/controllers/mcp.d.ts +18 -0
- package/dist/server/src/controllers/yt-controller.d.ts +13 -0
- package/dist/server/src/destroy.d.ts +5 -0
- package/dist/server/src/index.d.ts +280 -0
- package/dist/server/src/mcp/index.d.ts +6 -0
- package/dist/server/src/mcp/schemas/index.d.ts +55 -0
- package/dist/server/src/mcp/server.d.ts +8 -0
- package/dist/server/src/mcp/tools/get-video-transcript-range.d.ts +33 -0
- package/dist/server/src/mcp/tools/get-yt-video-summary.d.ts +23 -0
- package/dist/server/src/mcp/tools/index.d.ts +38 -0
- package/dist/server/src/mcp/tools/list-yt-videos.d.ts +28 -0
- package/dist/server/src/mcp/tools/search-yt-knowledge.d.ts +51 -0
- package/dist/server/src/middlewares/index.d.ts +2 -0
- package/dist/server/src/migrations/002-yt-tables.d.ts +2 -0
- package/dist/server/src/plugin-manager.d.ts +81 -0
- package/dist/server/src/policies/index.d.ts +2 -0
- package/dist/server/src/register.d.ts +5 -0
- package/dist/server/src/routes/admin.d.ts +14 -0
- package/dist/server/src/routes/content-api.d.ts +20 -0
- package/dist/server/src/routes/index.d.ts +41 -0
- package/dist/server/src/services/ai-tools.d.ts +127 -0
- package/dist/server/src/services/index.d.ts +185 -0
- package/dist/server/src/services/yt-embeddings.d.ts +68 -0
- package/dist/server/src/services/yt-metadata.d.ts +12 -0
- package/dist/server/src/tools/get-video-transcript-range.d.ts +32 -0
- package/dist/server/src/tools/get-yt-video-summary.d.ts +36 -0
- package/dist/server/src/tools/index.d.ts +126 -0
- package/dist/server/src/tools/list-yt-videos.d.ts +25 -0
- package/dist/server/src/tools/search-yt-knowledge.d.ts +35 -0
- package/dist/server/src/utils/chunking.d.ts +44 -0
- package/dist/server/src/utils/preprocessing.d.ts +26 -0
- package/dist/server/src/utils/yt-chunker.d.ts +16 -0
- package/package.json +106 -0
|
@@ -0,0 +1,2203 @@
|
|
|
1
|
+
import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai";
|
|
2
|
+
import { PGVectorStore } from "@langchain/community/vectorstores/pgvector";
|
|
3
|
+
import { StringOutputParser } from "@langchain/core/output_parsers";
|
|
4
|
+
import { ChatPromptTemplate } from "@langchain/core/prompts";
|
|
5
|
+
import { RunnableSequence, RunnablePassthrough } from "@langchain/core/runnables";
|
|
6
|
+
import { Pool } from "pg";
|
|
7
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
8
|
+
import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
9
|
+
import { z } from "zod";
|
|
10
|
+
import { randomUUID } from "node:crypto";
|
|
11
|
+
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
|
|
12
|
+
import * as crypto from "crypto";
|
|
13
|
+
const EMBEDDING_MODELS = {
|
|
14
|
+
"text-embedding-3-small": { dimensions: 1536 },
|
|
15
|
+
"text-embedding-3-large": { dimensions: 3072 },
|
|
16
|
+
"text-embedding-ada-002": { dimensions: 1536 }
|
|
17
|
+
};
|
|
18
|
+
const config = {
|
|
19
|
+
default: {
|
|
20
|
+
openAIApiKey: "",
|
|
21
|
+
neonConnectionString: "",
|
|
22
|
+
embeddingModel: "text-embedding-3-small",
|
|
23
|
+
chunkSize: 4e3,
|
|
24
|
+
chunkOverlap: 200,
|
|
25
|
+
autoChunk: false,
|
|
26
|
+
preprocessContent: true
|
|
27
|
+
},
|
|
28
|
+
validator(config2) {
|
|
29
|
+
if (!config2.openAIApiKey) {
|
|
30
|
+
console.warn(
|
|
31
|
+
"yt-embeddings-strapi-plugin: openAIApiKey is not configured. Plugin features will be disabled."
|
|
32
|
+
);
|
|
33
|
+
}
|
|
34
|
+
if (!config2.neonConnectionString) {
|
|
35
|
+
console.warn(
|
|
36
|
+
"yt-embeddings-strapi-plugin: neonConnectionString is not configured. Plugin features will be disabled."
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
if (config2.embeddingModel && !EMBEDDING_MODELS[config2.embeddingModel]) {
|
|
40
|
+
console.warn(
|
|
41
|
+
`yt-embeddings-strapi-plugin: Invalid embeddingModel "${config2.embeddingModel}". Valid options: ${Object.keys(EMBEDDING_MODELS).join(", ")}. Defaulting to "text-embedding-3-small".`
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
if (config2.chunkSize && (config2.chunkSize < 100 || config2.chunkSize > 8e3)) {
|
|
45
|
+
console.warn(
|
|
46
|
+
`yt-embeddings-strapi-plugin: chunkSize ${config2.chunkSize} is outside recommended range (100-8000). Using default value of 4000.`
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
class PluginManager {
|
|
52
|
+
constructor() {
|
|
53
|
+
this.embeddings = null;
|
|
54
|
+
this.chat = null;
|
|
55
|
+
this.pool = null;
|
|
56
|
+
this.embeddingModel = "text-embedding-3-small";
|
|
57
|
+
this.dimensions = 1536;
|
|
58
|
+
this.vectorStoreConfig = null;
|
|
59
|
+
}
|
|
60
|
+
async initializePool(connectionString) {
|
|
61
|
+
console.log("Initializing Neon DB Pool");
|
|
62
|
+
if (this.pool) return this.pool;
|
|
63
|
+
try {
|
|
64
|
+
const poolConfig = {
|
|
65
|
+
connectionString,
|
|
66
|
+
ssl: { rejectUnauthorized: false },
|
|
67
|
+
max: 10
|
|
68
|
+
};
|
|
69
|
+
this.pool = new Pool(poolConfig);
|
|
70
|
+
const client = await this.pool.connect();
|
|
71
|
+
await client.query("SELECT 1");
|
|
72
|
+
client.release();
|
|
73
|
+
await this.initializeVectorTable();
|
|
74
|
+
console.log("Neon DB Pool initialized successfully");
|
|
75
|
+
return this.pool;
|
|
76
|
+
} catch (error) {
|
|
77
|
+
console.error(`Failed to initialize Neon DB Pool: ${error}`);
|
|
78
|
+
throw new Error(`Failed to initialize Neon DB Pool: ${error}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
async initializeVectorTable() {
|
|
82
|
+
if (!this.pool) throw new Error("Pool not initialized");
|
|
83
|
+
const client = await this.pool.connect();
|
|
84
|
+
try {
|
|
85
|
+
await client.query("CREATE EXTENSION IF NOT EXISTS vector");
|
|
86
|
+
await client.query(`
|
|
87
|
+
CREATE TABLE IF NOT EXISTS embeddings_documents (
|
|
88
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
89
|
+
content TEXT,
|
|
90
|
+
metadata JSONB,
|
|
91
|
+
embedding vector(${this.dimensions})
|
|
92
|
+
)
|
|
93
|
+
`);
|
|
94
|
+
await client.query(`
|
|
95
|
+
DROP INDEX IF EXISTS embeddings_documents_embedding_idx
|
|
96
|
+
`);
|
|
97
|
+
await client.query(`
|
|
98
|
+
CREATE INDEX IF NOT EXISTS embeddings_documents_embedding_hnsw_idx
|
|
99
|
+
ON embeddings_documents
|
|
100
|
+
USING hnsw (embedding vector_cosine_ops)
|
|
101
|
+
`);
|
|
102
|
+
await client.query(`
|
|
103
|
+
CREATE INDEX IF NOT EXISTS embeddings_documents_metadata_idx
|
|
104
|
+
ON embeddings_documents
|
|
105
|
+
USING gin (metadata)
|
|
106
|
+
`);
|
|
107
|
+
console.log(`Vector table initialized (dimensions: ${this.dimensions})`);
|
|
108
|
+
} catch (error) {
|
|
109
|
+
console.log("Note: Index creation may require more data");
|
|
110
|
+
} finally {
|
|
111
|
+
client.release();
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
async initializeEmbeddings(openAIApiKey) {
|
|
115
|
+
console.log(`Initializing OpenAI Embeddings (model: ${this.embeddingModel})`);
|
|
116
|
+
if (this.embeddings) return this.embeddings;
|
|
117
|
+
try {
|
|
118
|
+
this.embeddings = new OpenAIEmbeddings({
|
|
119
|
+
openAIApiKey,
|
|
120
|
+
modelName: this.embeddingModel,
|
|
121
|
+
dimensions: this.dimensions
|
|
122
|
+
});
|
|
123
|
+
return this.embeddings;
|
|
124
|
+
} catch (error) {
|
|
125
|
+
console.error(`Failed to initialize Embeddings: ${error}`);
|
|
126
|
+
throw new Error(`Failed to initialize Embeddings: ${error}`);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
async initializeChat(openAIApiKey) {
|
|
130
|
+
console.log("Initializing Chat Model");
|
|
131
|
+
if (this.chat) return this.chat;
|
|
132
|
+
try {
|
|
133
|
+
this.chat = new ChatOpenAI({
|
|
134
|
+
modelName: "gpt-4o-mini",
|
|
135
|
+
temperature: 0.7,
|
|
136
|
+
openAIApiKey
|
|
137
|
+
});
|
|
138
|
+
return this.chat;
|
|
139
|
+
} catch (error) {
|
|
140
|
+
console.error(`Failed to initialize Chat: ${error}`);
|
|
141
|
+
throw new Error(`Failed to initialize Chat: ${error}`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
async initialize(config2) {
|
|
145
|
+
const model = config2.embeddingModel || "text-embedding-3-small";
|
|
146
|
+
if (EMBEDDING_MODELS[model]) {
|
|
147
|
+
this.embeddingModel = model;
|
|
148
|
+
this.dimensions = EMBEDDING_MODELS[model].dimensions;
|
|
149
|
+
} else {
|
|
150
|
+
console.warn(`Invalid embedding model "${model}", using default`);
|
|
151
|
+
this.embeddingModel = "text-embedding-3-small";
|
|
152
|
+
this.dimensions = EMBEDDING_MODELS["text-embedding-3-small"].dimensions;
|
|
153
|
+
}
|
|
154
|
+
console.log(`Using embedding model: ${this.embeddingModel} (${this.dimensions} dimensions)`);
|
|
155
|
+
await this.initializePool(config2.neonConnectionString);
|
|
156
|
+
await this.initializeEmbeddings(config2.openAIApiKey);
|
|
157
|
+
await this.initializeChat(config2.openAIApiKey);
|
|
158
|
+
if (this.pool) {
|
|
159
|
+
this.vectorStoreConfig = {
|
|
160
|
+
pool: this.pool,
|
|
161
|
+
tableName: "embeddings_documents",
|
|
162
|
+
columns: {
|
|
163
|
+
idColumnName: "id",
|
|
164
|
+
vectorColumnName: "embedding",
|
|
165
|
+
contentColumnName: "content",
|
|
166
|
+
metadataColumnName: "metadata"
|
|
167
|
+
},
|
|
168
|
+
distanceStrategy: "cosine"
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
console.log("Plugin Manager Initialization Complete");
|
|
172
|
+
}
|
|
173
|
+
async createEmbedding(docData) {
|
|
174
|
+
if (!this.embeddings || !this.vectorStoreConfig || !this.pool) {
|
|
175
|
+
throw new Error("Plugin manager not initialized");
|
|
176
|
+
}
|
|
177
|
+
const maxRetries = 3;
|
|
178
|
+
const retryDelay = 2e3;
|
|
179
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
180
|
+
try {
|
|
181
|
+
const embeddingVector = await this.embeddings.embedQuery(docData.content);
|
|
182
|
+
const metadata = {
|
|
183
|
+
id: docData.id,
|
|
184
|
+
title: docData.title,
|
|
185
|
+
collectionType: docData.collectionType || "standalone",
|
|
186
|
+
fieldName: docData.fieldName || "content"
|
|
187
|
+
};
|
|
188
|
+
const vectorString = `[${embeddingVector.join(",")}]`;
|
|
189
|
+
const result = await this.pool.query(
|
|
190
|
+
`INSERT INTO embeddings_documents (content, metadata, embedding)
|
|
191
|
+
VALUES ($1, $2::jsonb, $3::vector)
|
|
192
|
+
RETURNING id`,
|
|
193
|
+
[docData.content, JSON.stringify(metadata), vectorString]
|
|
194
|
+
);
|
|
195
|
+
return {
|
|
196
|
+
embeddingId: result.rows[0]?.id || "",
|
|
197
|
+
embedding: embeddingVector
|
|
198
|
+
};
|
|
199
|
+
} catch (error) {
|
|
200
|
+
const isRateLimit = error.message?.includes("429") || error.message?.includes("rate");
|
|
201
|
+
const isLastAttempt = attempt === maxRetries;
|
|
202
|
+
if (isRateLimit && !isLastAttempt) {
|
|
203
|
+
console.log(`[createEmbedding] Rate limited, waiting ${retryDelay}ms before retry ${attempt + 1}/${maxRetries}...`);
|
|
204
|
+
await new Promise((resolve) => setTimeout(resolve, retryDelay * attempt));
|
|
205
|
+
continue;
|
|
206
|
+
}
|
|
207
|
+
console.error(`[createEmbedding] Failed (attempt ${attempt}/${maxRetries}):`, error.message || error);
|
|
208
|
+
if (isLastAttempt) {
|
|
209
|
+
throw new Error(`Failed to create embedding after ${maxRetries} attempts: ${error.message || error}`);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
throw new Error("Failed to create embedding: unexpected error");
|
|
214
|
+
}
|
|
215
|
+
async deleteEmbedding(strapiId) {
|
|
216
|
+
if (!this.pool) {
|
|
217
|
+
throw new Error("Plugin manager not initialized");
|
|
218
|
+
}
|
|
219
|
+
try {
|
|
220
|
+
await this.pool.query(
|
|
221
|
+
`DELETE FROM embeddings_documents WHERE metadata->>'id' = $1`,
|
|
222
|
+
[strapiId]
|
|
223
|
+
);
|
|
224
|
+
} catch (error) {
|
|
225
|
+
console.error(`Failed to delete embedding: ${error}`);
|
|
226
|
+
throw new Error(`Failed to delete embedding: ${error}`);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
async queryEmbedding(query) {
|
|
230
|
+
if (!this.embeddings || !this.chat || !this.vectorStoreConfig) {
|
|
231
|
+
throw new Error("Plugin manager not initialized");
|
|
232
|
+
}
|
|
233
|
+
try {
|
|
234
|
+
const vectorStore = await PGVectorStore.initialize(
|
|
235
|
+
this.embeddings,
|
|
236
|
+
this.vectorStoreConfig
|
|
237
|
+
);
|
|
238
|
+
const resultsWithScores = await vectorStore.similaritySearchWithScore(query, 6);
|
|
239
|
+
console.log(`[queryEmbedding] Query: "${query}"`);
|
|
240
|
+
console.log(`[queryEmbedding] Found ${resultsWithScores.length} results:`);
|
|
241
|
+
resultsWithScores.forEach(([doc, score], i) => {
|
|
242
|
+
console.log(` ${i + 1}. Score: ${score.toFixed(4)}, Title: ${doc.metadata?.title || "N/A"}`);
|
|
243
|
+
});
|
|
244
|
+
const SIMILARITY_THRESHOLD = 1;
|
|
245
|
+
const relevantResults = resultsWithScores.filter(([_, score]) => score < SIMILARITY_THRESHOLD);
|
|
246
|
+
console.log(`[queryEmbedding] ${relevantResults.length} results passed threshold (< ${SIMILARITY_THRESHOLD})`);
|
|
247
|
+
const topResults = relevantResults.slice(0, 3);
|
|
248
|
+
const sourceDocuments = topResults.map(([doc]) => doc);
|
|
249
|
+
const bestMatchForDisplay = topResults.length > 0 ? [topResults[0][0]] : [];
|
|
250
|
+
const formatDocs = (docs) => {
|
|
251
|
+
return docs.map((doc) => {
|
|
252
|
+
const title = doc.metadata?.title ? `Title: ${doc.metadata.title}
|
|
253
|
+
` : "";
|
|
254
|
+
return `${title}${doc.pageContent}`;
|
|
255
|
+
}).join("\n\n");
|
|
256
|
+
};
|
|
257
|
+
const ragPrompt = ChatPromptTemplate.fromMessages([
|
|
258
|
+
[
|
|
259
|
+
"system",
|
|
260
|
+
`You are a helpful assistant that answers questions based on the provided context.
|
|
261
|
+
If you cannot find the answer in the context, say so. Be concise and accurate.
|
|
262
|
+
|
|
263
|
+
Context:
|
|
264
|
+
{context}`
|
|
265
|
+
],
|
|
266
|
+
["human", "{question}"]
|
|
267
|
+
]);
|
|
268
|
+
const ragChain = RunnableSequence.from([
|
|
269
|
+
{
|
|
270
|
+
context: async () => formatDocs(sourceDocuments),
|
|
271
|
+
question: new RunnablePassthrough()
|
|
272
|
+
},
|
|
273
|
+
ragPrompt,
|
|
274
|
+
this.chat,
|
|
275
|
+
new StringOutputParser()
|
|
276
|
+
]);
|
|
277
|
+
const text = await ragChain.invoke(query);
|
|
278
|
+
return {
|
|
279
|
+
text,
|
|
280
|
+
sourceDocuments: bestMatchForDisplay
|
|
281
|
+
// Only return best match to display
|
|
282
|
+
};
|
|
283
|
+
} catch (error) {
|
|
284
|
+
console.error(`Failed to query embeddings: ${error}`);
|
|
285
|
+
throw new Error(`Failed to query embeddings: ${error}`);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
async similaritySearch(query, k = 4) {
|
|
289
|
+
if (!this.embeddings || !this.vectorStoreConfig) {
|
|
290
|
+
throw new Error("Plugin manager not initialized");
|
|
291
|
+
}
|
|
292
|
+
try {
|
|
293
|
+
const vectorStore = await PGVectorStore.initialize(
|
|
294
|
+
this.embeddings,
|
|
295
|
+
this.vectorStoreConfig
|
|
296
|
+
);
|
|
297
|
+
return await vectorStore.similaritySearch(query, k);
|
|
298
|
+
} catch (error) {
|
|
299
|
+
console.error(`Failed to perform similarity search: ${error}`);
|
|
300
|
+
throw new Error(`Failed to perform similarity search: ${error}`);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
isInitialized() {
|
|
304
|
+
return !!(this.embeddings && this.chat && this.pool);
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Get all embeddings from Neon DB
|
|
308
|
+
* Returns the metadata (including Strapi documentId) for each embedding
|
|
309
|
+
*/
|
|
310
|
+
async getAllNeonEmbeddings() {
|
|
311
|
+
if (!this.pool) {
|
|
312
|
+
throw new Error("Plugin manager not initialized");
|
|
313
|
+
}
|
|
314
|
+
try {
|
|
315
|
+
const result = await this.pool.query(`
|
|
316
|
+
SELECT
|
|
317
|
+
id,
|
|
318
|
+
content,
|
|
319
|
+
metadata->>'id' as strapi_id,
|
|
320
|
+
metadata->>'title' as title,
|
|
321
|
+
metadata->>'collectionType' as collection_type,
|
|
322
|
+
metadata->>'fieldName' as field_name
|
|
323
|
+
FROM embeddings_documents
|
|
324
|
+
ORDER BY id
|
|
325
|
+
`);
|
|
326
|
+
return result.rows.map((row) => ({
|
|
327
|
+
id: row.id,
|
|
328
|
+
strapiId: row.strapi_id,
|
|
329
|
+
title: row.title || "",
|
|
330
|
+
content: row.content || "",
|
|
331
|
+
collectionType: row.collection_type || "standalone",
|
|
332
|
+
fieldName: row.field_name || "content"
|
|
333
|
+
}));
|
|
334
|
+
} catch (error) {
|
|
335
|
+
console.error(`Failed to get Neon embeddings: ${error}`);
|
|
336
|
+
throw new Error(`Failed to get Neon embeddings: ${error}`);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
/**
|
|
340
|
+
* Delete an embedding from Neon by its Neon UUID (not Strapi ID)
|
|
341
|
+
*/
|
|
342
|
+
async deleteNeonEmbeddingById(neonId) {
|
|
343
|
+
if (!this.pool) {
|
|
344
|
+
throw new Error("Plugin manager not initialized");
|
|
345
|
+
}
|
|
346
|
+
try {
|
|
347
|
+
await this.pool.query(
|
|
348
|
+
`DELETE FROM embeddings_documents WHERE id = $1`,
|
|
349
|
+
[neonId]
|
|
350
|
+
);
|
|
351
|
+
} catch (error) {
|
|
352
|
+
console.error(`Failed to delete Neon embedding: ${error}`);
|
|
353
|
+
throw new Error(`Failed to delete Neon embedding: ${error}`);
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
getPool() {
|
|
357
|
+
return this.pool;
|
|
358
|
+
}
|
|
359
|
+
getEmbeddings() {
|
|
360
|
+
return this.embeddings;
|
|
361
|
+
}
|
|
362
|
+
getEmbeddingModel() {
|
|
363
|
+
return this.embeddingModel;
|
|
364
|
+
}
|
|
365
|
+
getChat() {
|
|
366
|
+
return this.chat;
|
|
367
|
+
}
|
|
368
|
+
async destroy() {
|
|
369
|
+
if (this.pool) {
|
|
370
|
+
await this.pool.end();
|
|
371
|
+
this.pool = null;
|
|
372
|
+
}
|
|
373
|
+
this.embeddings = null;
|
|
374
|
+
this.chat = null;
|
|
375
|
+
this.vectorStoreConfig = null;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Clear all embeddings from Neon DB
|
|
379
|
+
* Returns the number of deleted rows
|
|
380
|
+
*/
|
|
381
|
+
async clearAllNeonEmbeddings() {
|
|
382
|
+
if (!this.pool) {
|
|
383
|
+
throw new Error("Plugin manager not initialized");
|
|
384
|
+
}
|
|
385
|
+
try {
|
|
386
|
+
const result = await this.pool.query(`
|
|
387
|
+
DELETE FROM embeddings_documents
|
|
388
|
+
RETURNING id
|
|
389
|
+
`);
|
|
390
|
+
console.log(`[clearAllNeonEmbeddings] Deleted ${result.rowCount} embeddings from Neon`);
|
|
391
|
+
return result.rowCount || 0;
|
|
392
|
+
} catch (error) {
|
|
393
|
+
console.error(`Failed to clear Neon embeddings: ${error}`);
|
|
394
|
+
throw new Error(`Failed to clear Neon embeddings: ${error}`);
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
/**
|
|
398
|
+
* Debug method to inspect raw data in Neon DB
|
|
399
|
+
*/
|
|
400
|
+
async debugNeonEmbeddings() {
|
|
401
|
+
if (!this.pool) {
|
|
402
|
+
throw new Error("Plugin manager not initialized");
|
|
403
|
+
}
|
|
404
|
+
try {
|
|
405
|
+
const result = await this.pool.query(`
|
|
406
|
+
SELECT
|
|
407
|
+
id,
|
|
408
|
+
content,
|
|
409
|
+
metadata,
|
|
410
|
+
pg_typeof(metadata) as metadata_type,
|
|
411
|
+
embedding IS NOT NULL as has_embedding,
|
|
412
|
+
CASE WHEN embedding IS NOT NULL THEN array_length(embedding::float[], 1) ELSE 0 END as embedding_length
|
|
413
|
+
FROM embeddings_documents
|
|
414
|
+
ORDER BY id
|
|
415
|
+
LIMIT 20
|
|
416
|
+
`);
|
|
417
|
+
return result.rows.map((row) => ({
|
|
418
|
+
id: row.id,
|
|
419
|
+
content: row.content?.substring(0, 200) + (row.content?.length > 200 ? "..." : ""),
|
|
420
|
+
metadata: row.metadata,
|
|
421
|
+
metadataType: row.metadata_type,
|
|
422
|
+
hasEmbedding: row.has_embedding,
|
|
423
|
+
embeddingLength: row.embedding_length || 0
|
|
424
|
+
}));
|
|
425
|
+
} catch (error) {
|
|
426
|
+
console.error(`Failed to debug Neon embeddings: ${error}`);
|
|
427
|
+
throw new Error(`Failed to debug Neon embeddings: ${error}`);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
const pluginManager = new PluginManager();
|
|
432
|
+
const SearchYtKnowledgeSchema = z.object({
|
|
433
|
+
query: z.string().min(1, "Query is required"),
|
|
434
|
+
limit: z.number().min(1).max(20).optional().default(5),
|
|
435
|
+
videoId: z.string().optional(),
|
|
436
|
+
topics: z.array(z.string()).optional(),
|
|
437
|
+
contextWindowSeconds: z.number().min(0).optional().default(30),
|
|
438
|
+
minSimilarity: z.number().min(0).max(1).optional().default(0.65)
|
|
439
|
+
});
|
|
440
|
+
const GetVideoTranscriptRangeSchema = z.object({
|
|
441
|
+
videoId: z.string().min(1, "Video ID is required"),
|
|
442
|
+
startSeconds: z.number().min(0, "Start seconds must be >= 0"),
|
|
443
|
+
endSeconds: z.number().min(0, "End seconds must be >= 0")
|
|
444
|
+
});
|
|
445
|
+
const ListYtVideosSchema = z.object({
|
|
446
|
+
page: z.number().min(1).optional().default(1),
|
|
447
|
+
pageSize: z.number().min(1).max(50).optional().default(25)
|
|
448
|
+
});
|
|
449
|
+
const GetYtVideoSummarySchema = z.object({
|
|
450
|
+
videoId: z.string().min(1, "Video ID is required")
|
|
451
|
+
});
|
|
452
|
+
const ToolSchemas = {
|
|
453
|
+
search_yt_knowledge: SearchYtKnowledgeSchema,
|
|
454
|
+
get_video_transcript_range: GetVideoTranscriptRangeSchema,
|
|
455
|
+
list_yt_videos: ListYtVideosSchema,
|
|
456
|
+
get_yt_video_summary: GetYtVideoSummarySchema
|
|
457
|
+
};
|
|
458
|
+
function validateToolInput(toolName, input) {
|
|
459
|
+
const schema = ToolSchemas[toolName];
|
|
460
|
+
if (!schema) {
|
|
461
|
+
throw new Error(`No schema defined for tool: ${toolName}`);
|
|
462
|
+
}
|
|
463
|
+
const result = schema.safeParse(input);
|
|
464
|
+
if (!result.success) {
|
|
465
|
+
const errors = result.error.errors.map((e) => `${e.path.join(".")}: ${e.message}`).join(", ");
|
|
466
|
+
throw new Error(`Validation failed for ${toolName}: ${errors}`);
|
|
467
|
+
}
|
|
468
|
+
return result.data;
|
|
469
|
+
}
|
|
470
|
+
function formatTime$5(seconds) {
|
|
471
|
+
const m = Math.floor(seconds / 60);
|
|
472
|
+
const s = Math.floor(seconds % 60);
|
|
473
|
+
return `${m}:${s.toString().padStart(2, "0")}`;
|
|
474
|
+
}
|
|
475
|
+
const searchYtKnowledgeMcpTool = {
|
|
476
|
+
name: "search_yt_knowledge",
|
|
477
|
+
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary.",
|
|
478
|
+
inputSchema: {
|
|
479
|
+
type: "object",
|
|
480
|
+
properties: {
|
|
481
|
+
query: {
|
|
482
|
+
type: "string",
|
|
483
|
+
description: "What to search for"
|
|
484
|
+
},
|
|
485
|
+
limit: {
|
|
486
|
+
type: "number",
|
|
487
|
+
description: "Number of results (default: 5)"
|
|
488
|
+
},
|
|
489
|
+
videoId: {
|
|
490
|
+
type: "string",
|
|
491
|
+
description: "Optional: limit search to one video"
|
|
492
|
+
},
|
|
493
|
+
topics: {
|
|
494
|
+
type: "array",
|
|
495
|
+
items: { type: "string" },
|
|
496
|
+
description: 'Optional: filter to videos covering these topics (e.g. ["RAG", "MCP"])'
|
|
497
|
+
},
|
|
498
|
+
contextWindowSeconds: {
|
|
499
|
+
type: "number",
|
|
500
|
+
description: "Seconds of context around match (default: 30)"
|
|
501
|
+
},
|
|
502
|
+
minSimilarity: {
|
|
503
|
+
type: "number",
|
|
504
|
+
description: "Minimum similarity 0-1 (default: 0.65)"
|
|
505
|
+
}
|
|
506
|
+
},
|
|
507
|
+
required: ["query"]
|
|
508
|
+
}
|
|
509
|
+
};
|
|
510
|
+
async function handleSearchYtKnowledge(strapi, args) {
|
|
511
|
+
const results = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").search(args.query, {
|
|
512
|
+
limit: args.limit ?? 5,
|
|
513
|
+
minSimilarity: args.minSimilarity ?? 0.65,
|
|
514
|
+
videoId: args.videoId,
|
|
515
|
+
topics: args.topics,
|
|
516
|
+
contextWindowSeconds: args.contextWindowSeconds ?? 30
|
|
517
|
+
});
|
|
518
|
+
if (!results.length) {
|
|
519
|
+
return {
|
|
520
|
+
content: [{ type: "text", text: "No relevant content found." }]
|
|
521
|
+
};
|
|
522
|
+
}
|
|
523
|
+
const formatted = results.map((r, i) => {
|
|
524
|
+
const topicLine = r.topics?.length ? `Topics: ${r.topics.join(", ")}
|
|
525
|
+
` : "";
|
|
526
|
+
const summaryLine = r.videoSummary ? `Summary: ${r.videoSummary}
|
|
527
|
+
` : "";
|
|
528
|
+
return `
|
|
529
|
+
--- Result ${i + 1} (similarity: ${r.similarity}) ---
|
|
530
|
+
Video: "${r.title}"
|
|
531
|
+
${topicLine}${summaryLine}Timestamp: ${formatTime$5(r.startSeconds)} – ${formatTime$5(r.endSeconds)}
|
|
532
|
+
Watch: ${r.deepLink}
|
|
533
|
+
|
|
534
|
+
${r.contextText}
|
|
535
|
+
`.trim();
|
|
536
|
+
}).join("\n\n");
|
|
537
|
+
return {
|
|
538
|
+
content: [{ type: "text", text: formatted }]
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
function formatTime$4(seconds) {
|
|
542
|
+
const m = Math.floor(seconds / 60);
|
|
543
|
+
const s = Math.floor(seconds % 60);
|
|
544
|
+
return `${m}:${s.toString().padStart(2, "0")}`;
|
|
545
|
+
}
|
|
546
|
+
const getVideoTranscriptRangeMcpTool = {
|
|
547
|
+
name: "get_video_transcript_range",
|
|
548
|
+
description: 'Get the raw transcript text for a specific time range in a YouTube video. Useful for "what was said around 5:30" type queries.',
|
|
549
|
+
inputSchema: {
|
|
550
|
+
type: "object",
|
|
551
|
+
properties: {
|
|
552
|
+
videoId: {
|
|
553
|
+
type: "string",
|
|
554
|
+
description: "YouTube video ID"
|
|
555
|
+
},
|
|
556
|
+
startSeconds: {
|
|
557
|
+
type: "number",
|
|
558
|
+
description: "Start of range in seconds"
|
|
559
|
+
},
|
|
560
|
+
endSeconds: {
|
|
561
|
+
type: "number",
|
|
562
|
+
description: "End of range in seconds"
|
|
563
|
+
}
|
|
564
|
+
},
|
|
565
|
+
required: ["videoId", "startSeconds", "endSeconds"]
|
|
566
|
+
}
|
|
567
|
+
};
|
|
568
|
+
async function handleGetVideoTranscriptRange(strapi, args) {
|
|
569
|
+
const rows = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").getTranscriptRange(args.videoId, args.startSeconds, args.endSeconds);
|
|
570
|
+
if (!rows.length) {
|
|
571
|
+
return {
|
|
572
|
+
content: [{
|
|
573
|
+
type: "text",
|
|
574
|
+
text: `No transcript found for video ${args.videoId} in that time range.`
|
|
575
|
+
}]
|
|
576
|
+
};
|
|
577
|
+
}
|
|
578
|
+
const text = rows.flatMap((r) => r.segments).filter((s) => s.end / 1e3 > args.startSeconds && s.start / 1e3 < args.endSeconds).map((s) => s.text).join(" ").replace(/\s+/g, " ").trim();
|
|
579
|
+
const deepLink = `https://www.youtube.com/watch?v=${args.videoId}&t=${Math.floor(args.startSeconds)}`;
|
|
580
|
+
return {
|
|
581
|
+
content: [{
|
|
582
|
+
type: "text",
|
|
583
|
+
text: `Transcript ${formatTime$4(args.startSeconds)}–${formatTime$4(args.endSeconds)}:
|
|
584
|
+
|
|
585
|
+
${text}
|
|
586
|
+
|
|
587
|
+
Watch: ${deepLink}`
|
|
588
|
+
}]
|
|
589
|
+
};
|
|
590
|
+
}
|
|
591
|
+
function formatDuration$1(seconds) {
|
|
592
|
+
const h = Math.floor(seconds / 3600);
|
|
593
|
+
const m = Math.floor(seconds % 3600 / 60);
|
|
594
|
+
const s = Math.floor(seconds % 60);
|
|
595
|
+
return h > 0 ? `${h}:${m.toString().padStart(2, "0")}:${s.toString().padStart(2, "0")}` : `${m}:${s.toString().padStart(2, "0")}`;
|
|
596
|
+
}
|
|
597
|
+
const listYtVideosMcpTool = {
|
|
598
|
+
name: "list_yt_videos",
|
|
599
|
+
description: "List all ingested YouTube videos with their topics, chunk count, and duration. Useful for discovering available content before searching.",
|
|
600
|
+
inputSchema: {
|
|
601
|
+
type: "object",
|
|
602
|
+
properties: {
|
|
603
|
+
page: {
|
|
604
|
+
type: "number",
|
|
605
|
+
description: "Page number (default: 1)"
|
|
606
|
+
},
|
|
607
|
+
pageSize: {
|
|
608
|
+
type: "number",
|
|
609
|
+
description: "Results per page (default: 25)"
|
|
610
|
+
}
|
|
611
|
+
},
|
|
612
|
+
required: []
|
|
613
|
+
}
|
|
614
|
+
};
|
|
615
|
+
async function handleListYtVideos(strapi, args) {
|
|
616
|
+
const result = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").listVideos({
|
|
617
|
+
page: args.page ?? 1,
|
|
618
|
+
pageSize: args.pageSize ?? 25
|
|
619
|
+
});
|
|
620
|
+
if (!result.data.length) {
|
|
621
|
+
return {
|
|
622
|
+
content: [{ type: "text", text: "No videos have been ingested yet." }]
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
const formatted = result.data.map((v, i) => {
|
|
626
|
+
const topics = v.topics?.length ? `Topics: ${v.topics.join(", ")}` : "";
|
|
627
|
+
const duration = v.duration_seconds ? `Duration: ${formatDuration$1(v.duration_seconds)}` : "";
|
|
628
|
+
return `${i + 1}. "${v.title}"
|
|
629
|
+
Video ID: ${v.video_id}
|
|
630
|
+
${[topics, duration, `Chunks: ${v.chunk_count}`, `Status: ${v.embedding_status}`].filter(Boolean).join(" | ")}`;
|
|
631
|
+
}).join("\n\n");
|
|
632
|
+
return {
|
|
633
|
+
content: [{
|
|
634
|
+
type: "text",
|
|
635
|
+
text: `${result.total} videos (page ${result.page}/${Math.ceil(result.total / result.pageSize)}):
|
|
636
|
+
|
|
637
|
+
${formatted}`
|
|
638
|
+
}]
|
|
639
|
+
};
|
|
640
|
+
}
|
|
641
|
+
function formatTime$3(seconds) {
|
|
642
|
+
const m = Math.floor(seconds / 60);
|
|
643
|
+
const s = Math.floor(seconds % 60);
|
|
644
|
+
return `${m}:${s.toString().padStart(2, "0")}`;
|
|
645
|
+
}
|
|
646
|
+
const getYtVideoSummaryMcpTool = {
|
|
647
|
+
name: "get_yt_video_summary",
|
|
648
|
+
description: "Get a YouTube video's summary, topics, and key moments by video ID. Useful for understanding what a video covers without searching.",
|
|
649
|
+
inputSchema: {
|
|
650
|
+
type: "object",
|
|
651
|
+
properties: {
|
|
652
|
+
videoId: {
|
|
653
|
+
type: "string",
|
|
654
|
+
description: "YouTube video ID"
|
|
655
|
+
}
|
|
656
|
+
},
|
|
657
|
+
required: ["videoId"]
|
|
658
|
+
}
|
|
659
|
+
};
|
|
660
|
+
async function handleGetYtVideoSummary(strapi, args) {
|
|
661
|
+
const video = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").getVideo(args.videoId);
|
|
662
|
+
if (!video) {
|
|
663
|
+
return {
|
|
664
|
+
content: [{ type: "text", text: `Video ${args.videoId} not found.` }]
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
const topics = video.topics?.length ? `Topics: ${video.topics.join(", ")}` : "";
|
|
668
|
+
const summary = video.summary || "No summary available.";
|
|
669
|
+
let keyMoments = "";
|
|
670
|
+
if (video.key_moments?.length) {
|
|
671
|
+
keyMoments = "\n\nKey Moments:\n" + video.key_moments.map((km) => ` ${formatTime$3(km.timestampSeconds)} — ${km.label}`).join("\n");
|
|
672
|
+
}
|
|
673
|
+
const watchLink = `https://www.youtube.com/watch?v=${video.video_id}`;
|
|
674
|
+
return {
|
|
675
|
+
content: [{
|
|
676
|
+
type: "text",
|
|
677
|
+
text: `"${video.title}"
|
|
678
|
+
${topics}
|
|
679
|
+
Chunks: ${video.chunk_count} | Status: ${video.embedding_status}
|
|
680
|
+
Watch: ${watchLink}
|
|
681
|
+
|
|
682
|
+
Summary:
|
|
683
|
+
${summary}${keyMoments}`
|
|
684
|
+
}]
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
const tools$1 = [
|
|
688
|
+
searchYtKnowledgeMcpTool,
|
|
689
|
+
getVideoTranscriptRangeMcpTool,
|
|
690
|
+
listYtVideosMcpTool,
|
|
691
|
+
getYtVideoSummaryMcpTool
|
|
692
|
+
];
|
|
693
|
+
const toolHandlers = {
|
|
694
|
+
search_yt_knowledge: handleSearchYtKnowledge,
|
|
695
|
+
get_video_transcript_range: handleGetVideoTranscriptRange,
|
|
696
|
+
list_yt_videos: handleListYtVideos,
|
|
697
|
+
get_yt_video_summary: handleGetYtVideoSummary
|
|
698
|
+
};
|
|
699
|
+
async function handleToolCall(strapi, request) {
|
|
700
|
+
const { name, arguments: args } = request.params;
|
|
701
|
+
const handler = toolHandlers[name];
|
|
702
|
+
if (!handler) {
|
|
703
|
+
return {
|
|
704
|
+
content: [
|
|
705
|
+
{
|
|
706
|
+
type: "text",
|
|
707
|
+
text: JSON.stringify({
|
|
708
|
+
error: true,
|
|
709
|
+
message: `Unknown tool: ${name}`,
|
|
710
|
+
availableTools: Object.keys(toolHandlers)
|
|
711
|
+
})
|
|
712
|
+
}
|
|
713
|
+
]
|
|
714
|
+
};
|
|
715
|
+
}
|
|
716
|
+
try {
|
|
717
|
+
const validatedArgs = validateToolInput(name, args || {});
|
|
718
|
+
const result = await handler(strapi, validatedArgs);
|
|
719
|
+
return result;
|
|
720
|
+
} catch (error) {
|
|
721
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
722
|
+
strapi.log.error(`[yt-embeddings-strapi-plugin] Tool ${name} error:`, { error: errorMessage });
|
|
723
|
+
return {
|
|
724
|
+
content: [
|
|
725
|
+
{
|
|
726
|
+
type: "text",
|
|
727
|
+
text: JSON.stringify({
|
|
728
|
+
error: true,
|
|
729
|
+
tool: name,
|
|
730
|
+
message: errorMessage
|
|
731
|
+
}, null, 2)
|
|
732
|
+
}
|
|
733
|
+
]
|
|
734
|
+
};
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
function createMcpServer(strapi) {
|
|
738
|
+
const server = new Server(
|
|
739
|
+
{
|
|
740
|
+
name: "yt-embeddings-strapi-plugin-mcp",
|
|
741
|
+
version: "1.0.0"
|
|
742
|
+
},
|
|
743
|
+
{
|
|
744
|
+
capabilities: {
|
|
745
|
+
tools: {}
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
);
|
|
749
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
750
|
+
return { tools: tools$1 };
|
|
751
|
+
});
|
|
752
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
753
|
+
return handleToolCall(strapi, request);
|
|
754
|
+
});
|
|
755
|
+
return server;
|
|
756
|
+
}
|
|
757
|
+
const YT_TABLES_SQL = `
|
|
758
|
+
-- ─── Videos table (one row per YouTube video) ────────────────────────────────
|
|
759
|
+
CREATE TABLE IF NOT EXISTS yt_videos (
|
|
760
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
761
|
+
strapi_document_id TEXT NOT NULL UNIQUE,
|
|
762
|
+
video_id TEXT NOT NULL UNIQUE,
|
|
763
|
+
url TEXT NOT NULL,
|
|
764
|
+
title TEXT NOT NULL,
|
|
765
|
+
channel_name TEXT,
|
|
766
|
+
channel_id TEXT,
|
|
767
|
+
duration_seconds INTEGER,
|
|
768
|
+
published_at TIMESTAMPTZ,
|
|
769
|
+
thumbnail_url TEXT,
|
|
770
|
+
description TEXT,
|
|
771
|
+
language TEXT NOT NULL DEFAULT 'en',
|
|
772
|
+
|
|
773
|
+
-- Processing state
|
|
774
|
+
content_hash TEXT NOT NULL,
|
|
775
|
+
embedding_status TEXT NOT NULL DEFAULT 'pending'
|
|
776
|
+
CHECK (embedding_status IN ('pending', 'processing', 'complete', 'failed')),
|
|
777
|
+
chunk_count INTEGER NOT NULL DEFAULT 0,
|
|
778
|
+
error_message TEXT,
|
|
779
|
+
embedding_model TEXT NOT NULL DEFAULT 'text-embedding-3-small',
|
|
780
|
+
embedded_at TIMESTAMPTZ,
|
|
781
|
+
|
|
782
|
+
-- LLM-extracted metadata
|
|
783
|
+
topics TEXT[] NOT NULL DEFAULT '{}',
|
|
784
|
+
summary TEXT NOT NULL DEFAULT '',
|
|
785
|
+
key_moments JSONB NOT NULL DEFAULT '[]',
|
|
786
|
+
|
|
787
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
788
|
+
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
789
|
+
);
|
|
790
|
+
|
|
791
|
+
-- ─── Video chunks table (one row per embeddable unit) ────────────────────────
|
|
792
|
+
CREATE TABLE IF NOT EXISTS yt_video_chunks (
|
|
793
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
794
|
+
video_id TEXT NOT NULL REFERENCES yt_videos(video_id) ON DELETE CASCADE,
|
|
795
|
+
strapi_document_id TEXT NOT NULL,
|
|
796
|
+
|
|
797
|
+
text TEXT NOT NULL,
|
|
798
|
+
embedding vector(1536),
|
|
799
|
+
|
|
800
|
+
start_seconds REAL NOT NULL,
|
|
801
|
+
end_seconds REAL NOT NULL,
|
|
802
|
+
chunk_index INTEGER NOT NULL,
|
|
803
|
+
|
|
804
|
+
segments JSONB NOT NULL,
|
|
805
|
+
|
|
806
|
+
prev_chunk_id UUID,
|
|
807
|
+
next_chunk_id UUID,
|
|
808
|
+
|
|
809
|
+
tokens INTEGER,
|
|
810
|
+
|
|
811
|
+
created_at TIMESTAMPTZ DEFAULT NOW()
|
|
812
|
+
);
|
|
813
|
+
|
|
814
|
+
-- ─── Indexes ─────────────────────────────────────────────────────────────────
|
|
815
|
+
|
|
816
|
+
-- Video lookups
|
|
817
|
+
CREATE INDEX IF NOT EXISTS idx_yt_videos_video_id
|
|
818
|
+
ON yt_videos (video_id);
|
|
819
|
+
|
|
820
|
+
CREATE INDEX IF NOT EXISTS idx_yt_videos_strapi_doc
|
|
821
|
+
ON yt_videos (strapi_document_id);
|
|
822
|
+
|
|
823
|
+
CREATE INDEX IF NOT EXISTS idx_yt_videos_content_hash
|
|
824
|
+
ON yt_videos (content_hash);
|
|
825
|
+
|
|
826
|
+
CREATE INDEX IF NOT EXISTS idx_yt_videos_status
|
|
827
|
+
ON yt_videos (embedding_status);
|
|
828
|
+
|
|
829
|
+
-- Topic filtering
|
|
830
|
+
CREATE INDEX IF NOT EXISTS idx_yt_videos_topics
|
|
831
|
+
ON yt_videos USING gin (topics);
|
|
832
|
+
|
|
833
|
+
-- Chunk lookups
|
|
834
|
+
CREATE INDEX IF NOT EXISTS idx_yt_video_chunks_video_time
|
|
835
|
+
ON yt_video_chunks (video_id, start_seconds, end_seconds);
|
|
836
|
+
|
|
837
|
+
CREATE INDEX IF NOT EXISTS idx_yt_video_chunks_video_index
|
|
838
|
+
ON yt_video_chunks (video_id, chunk_index);
|
|
839
|
+
`;
|
|
840
|
+
const YT_HNSW_INDEX_SQL = `
|
|
841
|
+
CREATE INDEX IF NOT EXISTS idx_yt_video_chunks_embedding
|
|
842
|
+
ON yt_video_chunks USING hnsw (embedding vector_cosine_ops);
|
|
843
|
+
`;
|
|
844
|
+
const DROP_OLD_TABLES_SQL = `
|
|
845
|
+
DROP TABLE IF EXISTS yt_chunks CASCADE;
|
|
846
|
+
DROP TABLE IF EXISTS yt_embedding_jobs CASCADE;
|
|
847
|
+
`;
|
|
848
|
+
async function runYtMigration(pool) {
|
|
849
|
+
const client = await pool.connect();
|
|
850
|
+
try {
|
|
851
|
+
await client.query(DROP_OLD_TABLES_SQL);
|
|
852
|
+
await client.query(YT_TABLES_SQL);
|
|
853
|
+
console.log("[yt-migration] yt_videos and yt_video_chunks tables ready");
|
|
854
|
+
try {
|
|
855
|
+
await client.query(YT_HNSW_INDEX_SQL);
|
|
856
|
+
console.log("[yt-migration] HNSW vector index ready");
|
|
857
|
+
} catch {
|
|
858
|
+
console.log("[yt-migration] HNSW index creation deferred (may need data first)");
|
|
859
|
+
}
|
|
860
|
+
} finally {
|
|
861
|
+
client.release();
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
const PLUGIN_ID$3 = "yt-embeddings-strapi-plugin";
|
|
865
|
+
const OAUTH_PLUGIN_ID = "strapi-oauth-mcp-manager";
|
|
866
|
+
function createFallbackAuthMiddleware(strapi) {
|
|
867
|
+
const mcpPath = `/api/${PLUGIN_ID$3}/mcp`;
|
|
868
|
+
return async (ctx, next) => {
|
|
869
|
+
if (!ctx.path.startsWith(mcpPath)) {
|
|
870
|
+
return next();
|
|
871
|
+
}
|
|
872
|
+
const authHeader = ctx.request.headers.authorization;
|
|
873
|
+
if (!authHeader?.startsWith("Bearer ")) {
|
|
874
|
+
ctx.status = 401;
|
|
875
|
+
ctx.body = {
|
|
876
|
+
error: "Unauthorized",
|
|
877
|
+
message: "Bearer token required. Provide a Strapi API token."
|
|
878
|
+
};
|
|
879
|
+
return;
|
|
880
|
+
}
|
|
881
|
+
const token = authHeader.slice(7);
|
|
882
|
+
ctx.state.strapiToken = token;
|
|
883
|
+
ctx.state.authMethod = "api-token";
|
|
884
|
+
return next();
|
|
885
|
+
};
|
|
886
|
+
}
|
|
887
|
+
const bootstrap = async ({ strapi }) => {
|
|
888
|
+
const actions = [
|
|
889
|
+
{
|
|
890
|
+
section: "plugins",
|
|
891
|
+
displayName: "Read",
|
|
892
|
+
uid: "read",
|
|
893
|
+
pluginName: PLUGIN_ID$3
|
|
894
|
+
},
|
|
895
|
+
{
|
|
896
|
+
section: "plugins",
|
|
897
|
+
displayName: "Update",
|
|
898
|
+
uid: "update",
|
|
899
|
+
pluginName: PLUGIN_ID$3
|
|
900
|
+
},
|
|
901
|
+
{
|
|
902
|
+
section: "plugins",
|
|
903
|
+
displayName: "Create",
|
|
904
|
+
uid: "create",
|
|
905
|
+
pluginName: PLUGIN_ID$3
|
|
906
|
+
},
|
|
907
|
+
{
|
|
908
|
+
section: "plugins",
|
|
909
|
+
displayName: "Delete",
|
|
910
|
+
uid: "delete",
|
|
911
|
+
pluginName: PLUGIN_ID$3
|
|
912
|
+
},
|
|
913
|
+
{
|
|
914
|
+
section: "plugins",
|
|
915
|
+
displayName: "Chat",
|
|
916
|
+
uid: "chat",
|
|
917
|
+
pluginName: PLUGIN_ID$3
|
|
918
|
+
}
|
|
919
|
+
];
|
|
920
|
+
await strapi.admin.services.permission.actionProvider.registerMany(actions);
|
|
921
|
+
const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$3}`);
|
|
922
|
+
if (pluginConfig?.openAIApiKey && pluginConfig?.neonConnectionString) {
|
|
923
|
+
try {
|
|
924
|
+
await pluginManager.initialize({
|
|
925
|
+
openAIApiKey: pluginConfig.openAIApiKey,
|
|
926
|
+
neonConnectionString: pluginConfig.neonConnectionString,
|
|
927
|
+
embeddingModel: pluginConfig.embeddingModel
|
|
928
|
+
});
|
|
929
|
+
strapi.contentEmbeddingsManager = pluginManager;
|
|
930
|
+
const pool = pluginManager.getPool();
|
|
931
|
+
if (pool) {
|
|
932
|
+
try {
|
|
933
|
+
await runYtMigration(pool);
|
|
934
|
+
strapi.log.info(`[${PLUGIN_ID$3}] YouTube vector tables ready`);
|
|
935
|
+
} catch (migrationErr) {
|
|
936
|
+
strapi.log.error(`[${PLUGIN_ID$3}] YT migration failed:`, migrationErr);
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
strapi.log.info(`[${PLUGIN_ID$3}] Plugin initialized successfully`);
|
|
940
|
+
} catch (error) {
|
|
941
|
+
strapi.log.error(`[${PLUGIN_ID$3}] Failed to initialize:`, error);
|
|
942
|
+
}
|
|
943
|
+
} else {
|
|
944
|
+
strapi.log.warn(
|
|
945
|
+
`[${PLUGIN_ID$3}] Missing configuration. Set openAIApiKey and neonConnectionString in plugin config.`
|
|
946
|
+
);
|
|
947
|
+
}
|
|
948
|
+
const plugin = strapi.plugin(PLUGIN_ID$3);
|
|
949
|
+
plugin.createMcpServer = () => createMcpServer(strapi);
|
|
950
|
+
plugin.sessions = /* @__PURE__ */ new Map();
|
|
951
|
+
const oauthPlugin = strapi.plugin(OAUTH_PLUGIN_ID);
|
|
952
|
+
if (oauthPlugin) {
|
|
953
|
+
strapi.log.info(`[${PLUGIN_ID$3}] OAuth manager detected - OAuth + API token auth enabled`);
|
|
954
|
+
} else {
|
|
955
|
+
const fallbackMiddleware = createFallbackAuthMiddleware();
|
|
956
|
+
strapi.server.use(fallbackMiddleware);
|
|
957
|
+
strapi.log.info(`[${PLUGIN_ID$3}] Using API token authentication (OAuth manager not installed)`);
|
|
958
|
+
}
|
|
959
|
+
strapi.log.info(`[${PLUGIN_ID$3}] MCP endpoint available at: /api/${PLUGIN_ID$3}/mcp`);
|
|
960
|
+
try {
|
|
961
|
+
strapi.db.lifecycles.subscribe({
|
|
962
|
+
models: ["plugin::yt-transcript-strapi-plugin.transcript"],
|
|
963
|
+
async afterCreate({ result }) {
|
|
964
|
+
strapi.plugin(PLUGIN_ID$3).service("ytEmbeddings").embedTranscript({
|
|
965
|
+
documentId: result.documentId,
|
|
966
|
+
id: result.id,
|
|
967
|
+
videoId: result.videoId,
|
|
968
|
+
title: result.title,
|
|
969
|
+
fullTranscript: result.fullTranscript,
|
|
970
|
+
transcriptWithTimeCodes: result.transcriptWithTimeCodes
|
|
971
|
+
}).catch((err) => strapi.log.error("[yt-embed] Pipeline failed:", err));
|
|
972
|
+
}
|
|
973
|
+
});
|
|
974
|
+
strapi.log.info(`[${PLUGIN_ID$3}] YouTube transcript lifecycle hook registered`);
|
|
975
|
+
} catch (err) {
|
|
976
|
+
strapi.log.warn(`[${PLUGIN_ID$3}] yt-transcript plugin not found, skipping YT lifecycle hook`);
|
|
977
|
+
}
|
|
978
|
+
};
|
|
979
|
+
const destroy = async ({ strapi }) => {
|
|
980
|
+
await pluginManager.destroy();
|
|
981
|
+
console.log("Content Embeddings plugin destroyed");
|
|
982
|
+
};
|
|
983
|
+
const register = (_args) => {
|
|
984
|
+
};
|
|
985
|
+
const contentTypes = {};
|
|
986
|
+
const PLUGIN_ID$2 = "yt-embeddings-strapi-plugin";
|
|
987
|
+
const YT_TRANSCRIPT_UID = "plugin::yt-transcript-strapi-plugin.transcript";
|
|
988
|
+
const controller = ({ strapi }) => ({
|
|
989
|
+
async ytEmbed(ctx) {
|
|
990
|
+
try {
|
|
991
|
+
const { documentId } = ctx.request.body;
|
|
992
|
+
if (!documentId) {
|
|
993
|
+
ctx.throw(400, "documentId is required");
|
|
994
|
+
return;
|
|
995
|
+
}
|
|
996
|
+
const transcript = await strapi.documents(YT_TRANSCRIPT_UID).findOne({ documentId, fields: ["documentId", "videoId", "title", "fullTranscript", "transcriptWithTimeCodes"] });
|
|
997
|
+
if (!transcript) {
|
|
998
|
+
ctx.throw(404, `Transcript ${documentId} not found`);
|
|
999
|
+
return;
|
|
1000
|
+
}
|
|
1001
|
+
const t = transcript;
|
|
1002
|
+
const result = await strapi.plugin(PLUGIN_ID$2).service("ytEmbeddings").embedTranscript({
|
|
1003
|
+
documentId: t.documentId,
|
|
1004
|
+
id: t.id,
|
|
1005
|
+
videoId: t.videoId,
|
|
1006
|
+
title: t.title,
|
|
1007
|
+
fullTranscript: t.fullTranscript,
|
|
1008
|
+
transcriptWithTimeCodes: t.transcriptWithTimeCodes
|
|
1009
|
+
});
|
|
1010
|
+
ctx.body = result;
|
|
1011
|
+
} catch (error) {
|
|
1012
|
+
if (error.status) throw error;
|
|
1013
|
+
console.error("[ytEmbed] Error:", error.message);
|
|
1014
|
+
ctx.throw(500, error.message || "Failed to embed transcript");
|
|
1015
|
+
}
|
|
1016
|
+
},
|
|
1017
|
+
async ytListVideos(ctx) {
|
|
1018
|
+
try {
|
|
1019
|
+
const { page, pageSize, status } = ctx.query;
|
|
1020
|
+
const result = await strapi.plugin(PLUGIN_ID$2).service("ytEmbeddings").listVideos({
|
|
1021
|
+
page: page ? parseInt(page, 10) : 1,
|
|
1022
|
+
pageSize: pageSize ? parseInt(pageSize, 10) : 100,
|
|
1023
|
+
status
|
|
1024
|
+
});
|
|
1025
|
+
ctx.body = result;
|
|
1026
|
+
} catch (error) {
|
|
1027
|
+
console.error("[ytListVideos] Error:", error.message);
|
|
1028
|
+
ctx.throw(500, error.message || "Failed to list videos");
|
|
1029
|
+
}
|
|
1030
|
+
},
|
|
1031
|
+
async ytGetVideo(ctx) {
|
|
1032
|
+
try {
|
|
1033
|
+
const { videoId } = ctx.params;
|
|
1034
|
+
const result = await strapi.plugin(PLUGIN_ID$2).service("ytEmbeddings").getVideo(videoId);
|
|
1035
|
+
if (!result) {
|
|
1036
|
+
ctx.throw(404, "Video not found");
|
|
1037
|
+
return;
|
|
1038
|
+
}
|
|
1039
|
+
ctx.body = { data: result };
|
|
1040
|
+
} catch (error) {
|
|
1041
|
+
if (error.status) throw error;
|
|
1042
|
+
ctx.throw(500, error.message || "Failed to get video");
|
|
1043
|
+
}
|
|
1044
|
+
},
|
|
1045
|
+
async ytGetVideoChunks(ctx) {
|
|
1046
|
+
try {
|
|
1047
|
+
const { videoId } = ctx.params;
|
|
1048
|
+
const { start, end } = ctx.query;
|
|
1049
|
+
const result = await strapi.plugin(PLUGIN_ID$2).service("ytEmbeddings").getVideoChunks(videoId, {
|
|
1050
|
+
start: start ? parseFloat(start) : void 0,
|
|
1051
|
+
end: end ? parseFloat(end) : void 0
|
|
1052
|
+
});
|
|
1053
|
+
ctx.body = { data: result };
|
|
1054
|
+
} catch (error) {
|
|
1055
|
+
ctx.throw(500, error.message || "Failed to get video chunks");
|
|
1056
|
+
}
|
|
1057
|
+
},
|
|
1058
|
+
async ytRecompute(ctx) {
|
|
1059
|
+
try {
|
|
1060
|
+
const result = await strapi.plugin(PLUGIN_ID$2).service("ytEmbeddings").recomputeAll();
|
|
1061
|
+
ctx.body = result;
|
|
1062
|
+
} catch (error) {
|
|
1063
|
+
console.error("[ytRecompute] Error:", error.message);
|
|
1064
|
+
ctx.throw(500, error.message || "Failed to recompute embeddings");
|
|
1065
|
+
}
|
|
1066
|
+
},
|
|
1067
|
+
async ytStatus(ctx) {
|
|
1068
|
+
try {
|
|
1069
|
+
const { documentId } = ctx.params;
|
|
1070
|
+
if (!documentId) {
|
|
1071
|
+
ctx.throw(400, "documentId is required");
|
|
1072
|
+
return;
|
|
1073
|
+
}
|
|
1074
|
+
const result = await strapi.plugin(PLUGIN_ID$2).service("ytEmbeddings").getStatusByDocumentId(documentId);
|
|
1075
|
+
ctx.body = result;
|
|
1076
|
+
} catch (error) {
|
|
1077
|
+
if (error.status) throw error;
|
|
1078
|
+
console.error("[ytStatus] Error:", error.message);
|
|
1079
|
+
ctx.throw(500, error.message || "Failed to get status");
|
|
1080
|
+
}
|
|
1081
|
+
},
|
|
1082
|
+
async queryEmbeddings(ctx) {
|
|
1083
|
+
try {
|
|
1084
|
+
const { query } = ctx.query;
|
|
1085
|
+
if (!query?.trim()) {
|
|
1086
|
+
ctx.body = { error: "Please provide a query" };
|
|
1087
|
+
return;
|
|
1088
|
+
}
|
|
1089
|
+
const ytResults = await strapi.plugin(PLUGIN_ID$2).service("ytEmbeddings").search(query, { limit: 3, minSimilarity: 0.2, contextWindowSeconds: 30 });
|
|
1090
|
+
if (!ytResults.length) {
|
|
1091
|
+
ctx.body = { text: "No relevant transcript content found for your question.", sourceDocuments: [] };
|
|
1092
|
+
return;
|
|
1093
|
+
}
|
|
1094
|
+
const context = ytResults.map(
|
|
1095
|
+
(r) => `Video: "${r.title}" (${r.deepLink})
|
|
1096
|
+
Topics: ${(r.topics || []).join(", ")}
|
|
1097
|
+
|
|
1098
|
+
${r.contextText || r.chunkText}`
|
|
1099
|
+
).join("\n\n---\n\n");
|
|
1100
|
+
const chat = pluginManager.getChat();
|
|
1101
|
+
if (!chat) {
|
|
1102
|
+
ctx.body = {
|
|
1103
|
+
text: ytResults.map((r) => `**${r.title}** (${r.deepLink})
|
|
1104
|
+
${r.chunkText}`).join("\n\n"),
|
|
1105
|
+
sourceDocuments: ytResults.map((r) => ({
|
|
1106
|
+
pageContent: r.chunkText,
|
|
1107
|
+
metadata: { id: r.videoId, title: r.title, deepLink: r.deepLink }
|
|
1108
|
+
}))
|
|
1109
|
+
};
|
|
1110
|
+
return;
|
|
1111
|
+
}
|
|
1112
|
+
const prompt = ChatPromptTemplate.fromMessages([
|
|
1113
|
+
["system", `You are a helpful assistant that answers questions based on YouTube transcript content.
|
|
1114
|
+
Include timestamps and video links when relevant. Be concise and accurate.
|
|
1115
|
+
If you cannot find the answer in the context, say so.
|
|
1116
|
+
|
|
1117
|
+
Context:
|
|
1118
|
+
{context}`],
|
|
1119
|
+
["human", "{question}"]
|
|
1120
|
+
]);
|
|
1121
|
+
const chain = prompt.pipe(chat);
|
|
1122
|
+
const response = await chain.invoke({ context, question: query });
|
|
1123
|
+
ctx.body = {
|
|
1124
|
+
text: response.content,
|
|
1125
|
+
sourceDocuments: ytResults.map((r) => ({
|
|
1126
|
+
pageContent: r.chunkText,
|
|
1127
|
+
metadata: { id: r.videoId, title: r.title, deepLink: r.deepLink }
|
|
1128
|
+
}))
|
|
1129
|
+
};
|
|
1130
|
+
} catch (error) {
|
|
1131
|
+
console.error("[queryEmbeddings] Error:", error.message);
|
|
1132
|
+
ctx.throw(500, error.message || "Failed to query embeddings");
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
});
|
|
1136
|
+
const PLUGIN_ID$1 = "yt-embeddings-strapi-plugin";
|
|
1137
|
+
const SESSION_TIMEOUT_MS = 4 * 60 * 60 * 1e3;
|
|
1138
|
+
function isSessionExpired(session) {
|
|
1139
|
+
return Date.now() - session.createdAt > SESSION_TIMEOUT_MS;
|
|
1140
|
+
}
|
|
1141
|
+
function cleanupExpiredSessions(plugin, strapi) {
|
|
1142
|
+
let cleaned = 0;
|
|
1143
|
+
for (const [sessionId, session] of plugin.sessions.entries()) {
|
|
1144
|
+
if (isSessionExpired(session)) {
|
|
1145
|
+
try {
|
|
1146
|
+
session.server.close();
|
|
1147
|
+
} catch {
|
|
1148
|
+
}
|
|
1149
|
+
plugin.sessions.delete(sessionId);
|
|
1150
|
+
cleaned++;
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
if (cleaned > 0) {
|
|
1154
|
+
strapi.log.debug(`[${PLUGIN_ID$1}] Cleaned up ${cleaned} expired MCP sessions`);
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
1157
|
+
const mcpController = ({ strapi }) => ({
|
|
1158
|
+
/**
|
|
1159
|
+
* Handle MCP requests (POST, GET, DELETE)
|
|
1160
|
+
*/
|
|
1161
|
+
async handle(ctx) {
|
|
1162
|
+
const plugin = strapi.plugin(PLUGIN_ID$1);
|
|
1163
|
+
if (!plugin.createMcpServer) {
|
|
1164
|
+
ctx.status = 503;
|
|
1165
|
+
ctx.body = {
|
|
1166
|
+
error: "MCP not initialized",
|
|
1167
|
+
message: "The MCP server is not available. Check plugin configuration."
|
|
1168
|
+
};
|
|
1169
|
+
return;
|
|
1170
|
+
}
|
|
1171
|
+
if (Math.random() < 0.01) {
|
|
1172
|
+
cleanupExpiredSessions(plugin, strapi);
|
|
1173
|
+
}
|
|
1174
|
+
try {
|
|
1175
|
+
const requestedSessionId = ctx.request.headers["mcp-session-id"];
|
|
1176
|
+
let session = requestedSessionId ? plugin.sessions.get(requestedSessionId) : null;
|
|
1177
|
+
if (session && isSessionExpired(session)) {
|
|
1178
|
+
strapi.log.debug(`[${PLUGIN_ID$1}] Session expired, removing: ${requestedSessionId}`);
|
|
1179
|
+
try {
|
|
1180
|
+
session.server.close();
|
|
1181
|
+
} catch {
|
|
1182
|
+
}
|
|
1183
|
+
plugin.sessions.delete(requestedSessionId);
|
|
1184
|
+
session = null;
|
|
1185
|
+
}
|
|
1186
|
+
if (requestedSessionId && !session) {
|
|
1187
|
+
ctx.status = 400;
|
|
1188
|
+
ctx.body = {
|
|
1189
|
+
jsonrpc: "2.0",
|
|
1190
|
+
error: {
|
|
1191
|
+
code: -32e3,
|
|
1192
|
+
message: "Session expired or invalid. Please reinitialize the connection."
|
|
1193
|
+
},
|
|
1194
|
+
id: null
|
|
1195
|
+
};
|
|
1196
|
+
return;
|
|
1197
|
+
}
|
|
1198
|
+
if (!session) {
|
|
1199
|
+
const sessionId = randomUUID();
|
|
1200
|
+
const server = plugin.createMcpServer();
|
|
1201
|
+
const transport = new StreamableHTTPServerTransport({
|
|
1202
|
+
sessionIdGenerator: () => sessionId
|
|
1203
|
+
});
|
|
1204
|
+
await server.connect(transport);
|
|
1205
|
+
session = {
|
|
1206
|
+
server,
|
|
1207
|
+
transport,
|
|
1208
|
+
createdAt: Date.now(),
|
|
1209
|
+
strapiToken: ctx.state.strapiToken
|
|
1210
|
+
};
|
|
1211
|
+
plugin.sessions.set(sessionId, session);
|
|
1212
|
+
strapi.log.debug(
|
|
1213
|
+
`[${PLUGIN_ID$1}] New MCP session created: ${sessionId} (auth: ${ctx.state.authMethod || "unknown"})`
|
|
1214
|
+
);
|
|
1215
|
+
}
|
|
1216
|
+
try {
|
|
1217
|
+
await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
|
|
1218
|
+
} catch (transportError) {
|
|
1219
|
+
strapi.log.warn(`[${PLUGIN_ID$1}] Transport error, cleaning up session: ${requestedSessionId}`, {
|
|
1220
|
+
error: transportError instanceof Error ? transportError.message : String(transportError)
|
|
1221
|
+
});
|
|
1222
|
+
try {
|
|
1223
|
+
session.server.close();
|
|
1224
|
+
} catch {
|
|
1225
|
+
}
|
|
1226
|
+
plugin.sessions.delete(requestedSessionId);
|
|
1227
|
+
if (!ctx.res.headersSent) {
|
|
1228
|
+
ctx.status = 400;
|
|
1229
|
+
ctx.body = {
|
|
1230
|
+
jsonrpc: "2.0",
|
|
1231
|
+
error: {
|
|
1232
|
+
code: -32e3,
|
|
1233
|
+
message: "Session transport error. Please reinitialize the connection."
|
|
1234
|
+
},
|
|
1235
|
+
id: null
|
|
1236
|
+
};
|
|
1237
|
+
}
|
|
1238
|
+
return;
|
|
1239
|
+
}
|
|
1240
|
+
ctx.respond = false;
|
|
1241
|
+
} catch (error) {
|
|
1242
|
+
strapi.log.error(`[${PLUGIN_ID$1}] Error handling MCP request`, {
|
|
1243
|
+
error: error instanceof Error ? error.message : String(error),
|
|
1244
|
+
method: ctx.method,
|
|
1245
|
+
path: ctx.path
|
|
1246
|
+
});
|
|
1247
|
+
if (!ctx.res.headersSent) {
|
|
1248
|
+
ctx.status = 500;
|
|
1249
|
+
ctx.body = {
|
|
1250
|
+
error: "MCP request failed",
|
|
1251
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
1252
|
+
};
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
});
|
|
1257
|
+
const PLUGIN_ID = "yt-embeddings-strapi-plugin";
|
|
1258
|
+
const ytController = ({ strapi }) => ({
|
|
1259
|
+
// POST /api/yt-embeddings-strapi-plugin/yt/ingest
|
|
1260
|
+
async ingest(ctx) {
|
|
1261
|
+
try {
|
|
1262
|
+
const { documentId } = ctx.request.body;
|
|
1263
|
+
if (!documentId) {
|
|
1264
|
+
ctx.throw(400, "documentId is required");
|
|
1265
|
+
}
|
|
1266
|
+
const transcript = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findOne({ documentId, fields: ["documentId", "videoId", "title", "fullTranscript", "transcriptWithTimeCodes"] });
|
|
1267
|
+
if (!transcript) {
|
|
1268
|
+
ctx.throw(404, `Transcript ${documentId} not found`);
|
|
1269
|
+
}
|
|
1270
|
+
const t = transcript;
|
|
1271
|
+
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").embedTranscript({
|
|
1272
|
+
documentId: t.documentId,
|
|
1273
|
+
id: t.id,
|
|
1274
|
+
videoId: t.videoId,
|
|
1275
|
+
title: t.title,
|
|
1276
|
+
fullTranscript: t.fullTranscript,
|
|
1277
|
+
transcriptWithTimeCodes: t.transcriptWithTimeCodes
|
|
1278
|
+
});
|
|
1279
|
+
ctx.body = result;
|
|
1280
|
+
} catch (error) {
|
|
1281
|
+
if (error.status) throw error;
|
|
1282
|
+
ctx.throw(500, error.message || "Failed to ingest transcript");
|
|
1283
|
+
}
|
|
1284
|
+
},
|
|
1285
|
+
// GET /api/yt-embeddings-strapi-plugin/yt/videos
|
|
1286
|
+
async listVideos(ctx) {
|
|
1287
|
+
try {
|
|
1288
|
+
const { page, pageSize, status } = ctx.query;
|
|
1289
|
+
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").listVideos({
|
|
1290
|
+
page: page ? parseInt(page, 10) : 1,
|
|
1291
|
+
pageSize: pageSize ? parseInt(pageSize, 10) : 25,
|
|
1292
|
+
status
|
|
1293
|
+
});
|
|
1294
|
+
ctx.body = result;
|
|
1295
|
+
} catch (error) {
|
|
1296
|
+
ctx.throw(500, error.message || "Failed to list videos");
|
|
1297
|
+
}
|
|
1298
|
+
},
|
|
1299
|
+
// GET /api/yt-embeddings-strapi-plugin/yt/videos/:videoId
|
|
1300
|
+
async getVideo(ctx) {
|
|
1301
|
+
try {
|
|
1302
|
+
const { videoId } = ctx.params;
|
|
1303
|
+
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").getVideo(videoId);
|
|
1304
|
+
if (!result) {
|
|
1305
|
+
ctx.throw(404, `Video ${videoId} not found`);
|
|
1306
|
+
}
|
|
1307
|
+
ctx.body = result;
|
|
1308
|
+
} catch (error) {
|
|
1309
|
+
if (error.status) throw error;
|
|
1310
|
+
ctx.throw(500, error.message || "Failed to get video");
|
|
1311
|
+
}
|
|
1312
|
+
},
|
|
1313
|
+
// DELETE /api/yt-embeddings-strapi-plugin/yt/videos/:videoId
|
|
1314
|
+
async deleteVideo(ctx) {
|
|
1315
|
+
try {
|
|
1316
|
+
const { videoId } = ctx.params;
|
|
1317
|
+
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").deleteVideo(videoId);
|
|
1318
|
+
if (!result) {
|
|
1319
|
+
ctx.throw(404, `Video ${videoId} not found`);
|
|
1320
|
+
}
|
|
1321
|
+
ctx.body = result;
|
|
1322
|
+
} catch (error) {
|
|
1323
|
+
if (error.status) throw error;
|
|
1324
|
+
ctx.throw(500, error.message || "Failed to delete video");
|
|
1325
|
+
}
|
|
1326
|
+
},
|
|
1327
|
+
// GET /api/yt-embeddings-strapi-plugin/yt/search?q=...&limit=5&videoId=...&topics=RAG,MCP
|
|
1328
|
+
async search(ctx) {
|
|
1329
|
+
try {
|
|
1330
|
+
const { q, limit, videoId, topics, minSimilarity, contextWindowSeconds } = ctx.query;
|
|
1331
|
+
if (!q) {
|
|
1332
|
+
ctx.throw(400, 'Query parameter "q" is required');
|
|
1333
|
+
}
|
|
1334
|
+
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").search(q, {
|
|
1335
|
+
limit: limit ? parseInt(limit, 10) : 5,
|
|
1336
|
+
minSimilarity: minSimilarity ? parseFloat(minSimilarity) : 0.65,
|
|
1337
|
+
videoId,
|
|
1338
|
+
topics: topics ? topics.split(",") : void 0,
|
|
1339
|
+
contextWindowSeconds: contextWindowSeconds ? parseInt(contextWindowSeconds, 10) : 30
|
|
1340
|
+
});
|
|
1341
|
+
ctx.body = result;
|
|
1342
|
+
} catch (error) {
|
|
1343
|
+
if (error.status) throw error;
|
|
1344
|
+
ctx.throw(500, error.message || "Failed to search");
|
|
1345
|
+
}
|
|
1346
|
+
},
|
|
1347
|
+
// GET /api/yt-embeddings-strapi-plugin/yt/videos/:videoId/chunks?start=60&end=120
|
|
1348
|
+
async getVideoChunks(ctx) {
|
|
1349
|
+
try {
|
|
1350
|
+
const { videoId } = ctx.params;
|
|
1351
|
+
const { start, end } = ctx.query;
|
|
1352
|
+
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").getVideoChunks(videoId, {
|
|
1353
|
+
start: start !== void 0 ? parseFloat(start) : void 0,
|
|
1354
|
+
end: end !== void 0 ? parseFloat(end) : void 0
|
|
1355
|
+
});
|
|
1356
|
+
ctx.body = result;
|
|
1357
|
+
} catch (error) {
|
|
1358
|
+
ctx.throw(500, error.message || "Failed to get video chunks");
|
|
1359
|
+
}
|
|
1360
|
+
},
|
|
1361
|
+
// POST /api/yt-embeddings-strapi-plugin/yt/recompute
|
|
1362
|
+
async recompute(ctx) {
|
|
1363
|
+
try {
|
|
1364
|
+
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").recomputeAll();
|
|
1365
|
+
ctx.body = result;
|
|
1366
|
+
} catch (error) {
|
|
1367
|
+
ctx.throw(500, error.message || "Failed to recompute");
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
});
|
|
1371
|
+
const controllers = {
|
|
1372
|
+
controller,
|
|
1373
|
+
mcp: mcpController,
|
|
1374
|
+
ytController
|
|
1375
|
+
};
|
|
1376
|
+
const middlewares = {};
|
|
1377
|
+
const policies = {};
|
|
1378
|
+
const contentApi = [
|
|
1379
|
+
// MCP routes - auth handled by middleware
|
|
1380
|
+
{
|
|
1381
|
+
method: "POST",
|
|
1382
|
+
path: "/mcp",
|
|
1383
|
+
handler: "mcp.handle",
|
|
1384
|
+
config: {
|
|
1385
|
+
auth: false,
|
|
1386
|
+
policies: []
|
|
1387
|
+
}
|
|
1388
|
+
},
|
|
1389
|
+
{
|
|
1390
|
+
method: "GET",
|
|
1391
|
+
path: "/mcp",
|
|
1392
|
+
handler: "mcp.handle",
|
|
1393
|
+
config: {
|
|
1394
|
+
auth: false,
|
|
1395
|
+
policies: []
|
|
1396
|
+
}
|
|
1397
|
+
},
|
|
1398
|
+
{
|
|
1399
|
+
method: "DELETE",
|
|
1400
|
+
path: "/mcp",
|
|
1401
|
+
handler: "mcp.handle",
|
|
1402
|
+
config: {
|
|
1403
|
+
auth: false,
|
|
1404
|
+
policies: []
|
|
1405
|
+
}
|
|
1406
|
+
},
|
|
1407
|
+
// YouTube Knowledge Base API routes
|
|
1408
|
+
{
|
|
1409
|
+
method: "POST",
|
|
1410
|
+
path: "/yt/ingest",
|
|
1411
|
+
handler: "ytController.ingest",
|
|
1412
|
+
config: {
|
|
1413
|
+
description: "Ingest a transcript by documentId"
|
|
1414
|
+
}
|
|
1415
|
+
},
|
|
1416
|
+
{
|
|
1417
|
+
method: "GET",
|
|
1418
|
+
path: "/yt/videos",
|
|
1419
|
+
handler: "ytController.listVideos",
|
|
1420
|
+
config: {
|
|
1421
|
+
description: "List all ingested videos with metadata"
|
|
1422
|
+
}
|
|
1423
|
+
},
|
|
1424
|
+
{
|
|
1425
|
+
method: "GET",
|
|
1426
|
+
path: "/yt/videos/:videoId",
|
|
1427
|
+
handler: "ytController.getVideo",
|
|
1428
|
+
config: {
|
|
1429
|
+
description: "Get a single video with metadata"
|
|
1430
|
+
}
|
|
1431
|
+
},
|
|
1432
|
+
{
|
|
1433
|
+
method: "DELETE",
|
|
1434
|
+
path: "/yt/videos/:videoId",
|
|
1435
|
+
handler: "ytController.deleteVideo",
|
|
1436
|
+
config: {
|
|
1437
|
+
description: "Delete a video and all its chunks"
|
|
1438
|
+
}
|
|
1439
|
+
},
|
|
1440
|
+
{
|
|
1441
|
+
method: "GET",
|
|
1442
|
+
path: "/yt/search",
|
|
1443
|
+
handler: "ytController.search",
|
|
1444
|
+
config: {
|
|
1445
|
+
description: "Semantic search across transcripts"
|
|
1446
|
+
}
|
|
1447
|
+
},
|
|
1448
|
+
{
|
|
1449
|
+
method: "GET",
|
|
1450
|
+
path: "/yt/videos/:videoId/chunks",
|
|
1451
|
+
handler: "ytController.getVideoChunks",
|
|
1452
|
+
config: {
|
|
1453
|
+
description: "Get chunks for a video by time range"
|
|
1454
|
+
}
|
|
1455
|
+
},
|
|
1456
|
+
{
|
|
1457
|
+
method: "POST",
|
|
1458
|
+
path: "/yt/recompute",
|
|
1459
|
+
handler: "ytController.recompute",
|
|
1460
|
+
config: {
|
|
1461
|
+
description: "Re-embed all transcripts"
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
];
|
|
1465
|
+
const admin = [
|
|
1466
|
+
{
|
|
1467
|
+
method: "GET",
|
|
1468
|
+
path: "/yt/videos",
|
|
1469
|
+
handler: "controller.ytListVideos",
|
|
1470
|
+
config: {
|
|
1471
|
+
policies: [
|
|
1472
|
+
{
|
|
1473
|
+
name: "admin::hasPermissions",
|
|
1474
|
+
config: { actions: ["plugin::yt-embeddings-strapi-plugin.read"] }
|
|
1475
|
+
}
|
|
1476
|
+
]
|
|
1477
|
+
}
|
|
1478
|
+
},
|
|
1479
|
+
{
|
|
1480
|
+
method: "GET",
|
|
1481
|
+
path: "/yt/videos/:videoId",
|
|
1482
|
+
handler: "controller.ytGetVideo",
|
|
1483
|
+
config: {
|
|
1484
|
+
policies: [
|
|
1485
|
+
{
|
|
1486
|
+
name: "admin::hasPermissions",
|
|
1487
|
+
config: { actions: ["plugin::yt-embeddings-strapi-plugin.read"] }
|
|
1488
|
+
}
|
|
1489
|
+
]
|
|
1490
|
+
}
|
|
1491
|
+
},
|
|
1492
|
+
{
|
|
1493
|
+
method: "GET",
|
|
1494
|
+
path: "/yt/videos/:videoId/chunks",
|
|
1495
|
+
handler: "controller.ytGetVideoChunks",
|
|
1496
|
+
config: {
|
|
1497
|
+
policies: [
|
|
1498
|
+
{
|
|
1499
|
+
name: "admin::hasPermissions",
|
|
1500
|
+
config: { actions: ["plugin::yt-embeddings-strapi-plugin.read"] }
|
|
1501
|
+
}
|
|
1502
|
+
]
|
|
1503
|
+
}
|
|
1504
|
+
},
|
|
1505
|
+
{
|
|
1506
|
+
method: "GET",
|
|
1507
|
+
path: "/yt/status/:documentId",
|
|
1508
|
+
handler: "controller.ytStatus",
|
|
1509
|
+
config: {
|
|
1510
|
+
policies: [
|
|
1511
|
+
{
|
|
1512
|
+
name: "admin::hasPermissions",
|
|
1513
|
+
config: { actions: ["plugin::yt-embeddings-strapi-plugin.read"] }
|
|
1514
|
+
}
|
|
1515
|
+
]
|
|
1516
|
+
}
|
|
1517
|
+
},
|
|
1518
|
+
{
|
|
1519
|
+
method: "POST",
|
|
1520
|
+
path: "/yt/embed",
|
|
1521
|
+
handler: "controller.ytEmbed",
|
|
1522
|
+
config: {
|
|
1523
|
+
policies: [
|
|
1524
|
+
{
|
|
1525
|
+
name: "admin::hasPermissions",
|
|
1526
|
+
config: { actions: ["plugin::yt-embeddings-strapi-plugin.create"] }
|
|
1527
|
+
}
|
|
1528
|
+
]
|
|
1529
|
+
}
|
|
1530
|
+
},
|
|
1531
|
+
{
|
|
1532
|
+
method: "POST",
|
|
1533
|
+
path: "/yt/recompute",
|
|
1534
|
+
handler: "controller.ytRecompute",
|
|
1535
|
+
config: {
|
|
1536
|
+
policies: [
|
|
1537
|
+
{
|
|
1538
|
+
name: "admin::hasPermissions",
|
|
1539
|
+
config: { actions: ["plugin::yt-embeddings-strapi-plugin.update"] }
|
|
1540
|
+
}
|
|
1541
|
+
]
|
|
1542
|
+
}
|
|
1543
|
+
},
|
|
1544
|
+
{
|
|
1545
|
+
method: "GET",
|
|
1546
|
+
path: "/embeddings/embeddings-query",
|
|
1547
|
+
handler: "controller.queryEmbeddings",
|
|
1548
|
+
config: {
|
|
1549
|
+
policies: [
|
|
1550
|
+
{
|
|
1551
|
+
name: "admin::hasPermissions",
|
|
1552
|
+
config: { actions: ["plugin::yt-embeddings-strapi-plugin.chat"] }
|
|
1553
|
+
}
|
|
1554
|
+
]
|
|
1555
|
+
}
|
|
1556
|
+
}
|
|
1557
|
+
];
|
|
1558
|
+
const routes = {
|
|
1559
|
+
"content-api": {
|
|
1560
|
+
type: "content-api",
|
|
1561
|
+
routes: [...contentApi]
|
|
1562
|
+
},
|
|
1563
|
+
admin: {
|
|
1564
|
+
type: "admin",
|
|
1565
|
+
routes: [...admin]
|
|
1566
|
+
}
|
|
1567
|
+
};
|
|
1568
|
+
function formatTime$2(seconds) {
|
|
1569
|
+
const m = Math.floor(seconds / 60);
|
|
1570
|
+
const s = Math.floor(seconds % 60);
|
|
1571
|
+
return `${m}:${s.toString().padStart(2, "0")}`;
|
|
1572
|
+
}
|
|
1573
|
+
const searchYtKnowledgeTool = {
|
|
1574
|
+
name: "searchYtKnowledge",
|
|
1575
|
+
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary.",
|
|
1576
|
+
schema: SearchYtKnowledgeSchema,
|
|
1577
|
+
execute: async (args, strapi) => {
|
|
1578
|
+
const validated = SearchYtKnowledgeSchema.parse(args);
|
|
1579
|
+
const results = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").search(validated.query, {
|
|
1580
|
+
limit: validated.limit,
|
|
1581
|
+
minSimilarity: validated.minSimilarity,
|
|
1582
|
+
videoId: validated.videoId,
|
|
1583
|
+
topics: validated.topics,
|
|
1584
|
+
contextWindowSeconds: validated.contextWindowSeconds
|
|
1585
|
+
});
|
|
1586
|
+
if (!results.length) {
|
|
1587
|
+
return { results: [], message: "No relevant content found." };
|
|
1588
|
+
}
|
|
1589
|
+
return {
|
|
1590
|
+
results: results.map((r, i) => ({
|
|
1591
|
+
rank: i + 1,
|
|
1592
|
+
similarity: r.similarity,
|
|
1593
|
+
title: r.title,
|
|
1594
|
+
topics: r.topics,
|
|
1595
|
+
videoSummary: r.videoSummary,
|
|
1596
|
+
timestamp: `${formatTime$2(r.startSeconds)} – ${formatTime$2(r.endSeconds)}`,
|
|
1597
|
+
deepLink: r.deepLink,
|
|
1598
|
+
contextText: r.contextText
|
|
1599
|
+
}))
|
|
1600
|
+
};
|
|
1601
|
+
},
|
|
1602
|
+
publicSafe: true
|
|
1603
|
+
};
|
|
1604
|
+
function formatTime$1(seconds) {
|
|
1605
|
+
const m = Math.floor(seconds / 60);
|
|
1606
|
+
const s = Math.floor(seconds % 60);
|
|
1607
|
+
return `${m}:${s.toString().padStart(2, "0")}`;
|
|
1608
|
+
}
|
|
1609
|
+
const getVideoTranscriptRangeTool = {
|
|
1610
|
+
name: "getVideoTranscriptRange",
|
|
1611
|
+
description: 'Get the raw transcript text for a specific time range in a YouTube video. Useful for "what was said around 5:30" type queries.',
|
|
1612
|
+
schema: GetVideoTranscriptRangeSchema,
|
|
1613
|
+
execute: async (args, strapi) => {
|
|
1614
|
+
const validated = GetVideoTranscriptRangeSchema.parse(args);
|
|
1615
|
+
const rows = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").getTranscriptRange(validated.videoId, validated.startSeconds, validated.endSeconds);
|
|
1616
|
+
if (!rows.length) {
|
|
1617
|
+
return {
|
|
1618
|
+
transcript: null,
|
|
1619
|
+
message: `No transcript found for video ${validated.videoId} in that time range.`
|
|
1620
|
+
};
|
|
1621
|
+
}
|
|
1622
|
+
const text = rows.flatMap((r) => r.segments).filter((s) => s.end / 1e3 > validated.startSeconds && s.start / 1e3 < validated.endSeconds).map((s) => s.text).join(" ").replace(/\s+/g, " ").trim();
|
|
1623
|
+
return {
|
|
1624
|
+
videoId: validated.videoId,
|
|
1625
|
+
range: `${formatTime$1(validated.startSeconds)}–${formatTime$1(validated.endSeconds)}`,
|
|
1626
|
+
transcript: text,
|
|
1627
|
+
deepLink: `https://www.youtube.com/watch?v=${validated.videoId}&t=${Math.floor(validated.startSeconds)}`
|
|
1628
|
+
};
|
|
1629
|
+
},
|
|
1630
|
+
publicSafe: true
|
|
1631
|
+
};
|
|
1632
|
+
function formatDuration(seconds) {
|
|
1633
|
+
const h = Math.floor(seconds / 3600);
|
|
1634
|
+
const m = Math.floor(seconds % 3600 / 60);
|
|
1635
|
+
const s = Math.floor(seconds % 60);
|
|
1636
|
+
return h > 0 ? `${h}:${m.toString().padStart(2, "0")}:${s.toString().padStart(2, "0")}` : `${m}:${s.toString().padStart(2, "0")}`;
|
|
1637
|
+
}
|
|
1638
|
+
const listYtVideosTool = {
|
|
1639
|
+
name: "listYtVideos",
|
|
1640
|
+
description: "List all ingested YouTube videos with their topics, chunk count, and duration. Useful for discovering available content before searching.",
|
|
1641
|
+
schema: ListYtVideosSchema,
|
|
1642
|
+
execute: async (args, strapi) => {
|
|
1643
|
+
const validated = ListYtVideosSchema.parse(args);
|
|
1644
|
+
const result = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").listVideos({
|
|
1645
|
+
page: validated.page,
|
|
1646
|
+
pageSize: validated.pageSize
|
|
1647
|
+
});
|
|
1648
|
+
return {
|
|
1649
|
+
videos: result.data.map((v) => ({
|
|
1650
|
+
videoId: v.video_id,
|
|
1651
|
+
title: v.title,
|
|
1652
|
+
topics: v.topics,
|
|
1653
|
+
duration: v.duration_seconds ? formatDuration(v.duration_seconds) : null,
|
|
1654
|
+
chunkCount: v.chunk_count,
|
|
1655
|
+
embeddingStatus: v.embedding_status
|
|
1656
|
+
})),
|
|
1657
|
+
pagination: {
|
|
1658
|
+
page: result.page,
|
|
1659
|
+
pageSize: result.pageSize,
|
|
1660
|
+
total: result.total,
|
|
1661
|
+
pageCount: Math.ceil(result.total / result.pageSize)
|
|
1662
|
+
}
|
|
1663
|
+
};
|
|
1664
|
+
},
|
|
1665
|
+
publicSafe: true
|
|
1666
|
+
};
|
|
1667
|
+
function formatTime(seconds) {
|
|
1668
|
+
const m = Math.floor(seconds / 60);
|
|
1669
|
+
const s = Math.floor(seconds % 60);
|
|
1670
|
+
return `${m}:${s.toString().padStart(2, "0")}`;
|
|
1671
|
+
}
|
|
1672
|
+
const getYtVideoSummaryTool = {
|
|
1673
|
+
name: "getYtVideoSummary",
|
|
1674
|
+
description: "Get a YouTube video's summary, topics, and key moments by video ID. Useful for understanding what a video covers without searching.",
|
|
1675
|
+
schema: GetYtVideoSummarySchema,
|
|
1676
|
+
execute: async (args, strapi) => {
|
|
1677
|
+
const validated = GetYtVideoSummarySchema.parse(args);
|
|
1678
|
+
const video = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").getVideo(validated.videoId);
|
|
1679
|
+
if (!video) {
|
|
1680
|
+
return { error: true, message: `Video ${validated.videoId} not found.` };
|
|
1681
|
+
}
|
|
1682
|
+
return {
|
|
1683
|
+
videoId: video.video_id,
|
|
1684
|
+
title: video.title,
|
|
1685
|
+
topics: video.topics,
|
|
1686
|
+
summary: video.summary || null,
|
|
1687
|
+
chunkCount: video.chunk_count,
|
|
1688
|
+
embeddingStatus: video.embedding_status,
|
|
1689
|
+
watchLink: `https://www.youtube.com/watch?v=${video.video_id}`,
|
|
1690
|
+
keyMoments: video.key_moments?.map((km) => ({
|
|
1691
|
+
timestamp: formatTime(km.timestampSeconds),
|
|
1692
|
+
label: km.label
|
|
1693
|
+
})) || []
|
|
1694
|
+
};
|
|
1695
|
+
},
|
|
1696
|
+
publicSafe: true
|
|
1697
|
+
};
|
|
1698
|
+
const tools = [
|
|
1699
|
+
searchYtKnowledgeTool,
|
|
1700
|
+
getVideoTranscriptRangeTool,
|
|
1701
|
+
listYtVideosTool,
|
|
1702
|
+
getYtVideoSummaryTool
|
|
1703
|
+
];
|
|
1704
|
+
const aiTools = ({ strapi }) => ({
|
|
1705
|
+
getTools() {
|
|
1706
|
+
return tools;
|
|
1707
|
+
}
|
|
1708
|
+
});
|
|
1709
|
+
const TARGET_MS = 6e4;
|
|
1710
|
+
const MAX_MS = 9e4;
|
|
1711
|
+
const MIN_MS = 15e3;
|
|
1712
|
+
function chunkTranscript(segments) {
|
|
1713
|
+
if (!segments.length) return [];
|
|
1714
|
+
validateSegments(segments);
|
|
1715
|
+
const chunks = [];
|
|
1716
|
+
let buffer = [];
|
|
1717
|
+
for (let i = 0; i < segments.length; i++) {
|
|
1718
|
+
const seg = segments[i];
|
|
1719
|
+
const next = segments[i + 1];
|
|
1720
|
+
buffer.push(seg);
|
|
1721
|
+
const bufferDurationMs = buffer[buffer.length - 1].end - buffer[0].start;
|
|
1722
|
+
const isLast = !next;
|
|
1723
|
+
const atHardCap = bufferDurationMs >= MAX_MS;
|
|
1724
|
+
const atTarget = bufferDurationMs >= TARGET_MS;
|
|
1725
|
+
const pauseAfterMs = next ? next.start - seg.end : Infinity;
|
|
1726
|
+
const isNaturalPause = pauseAfterMs > 1e3;
|
|
1727
|
+
const endsSentence = /[.!?]\s*$/.test(seg.text.trim());
|
|
1728
|
+
const shouldFlush = isLast || atHardCap || atTarget && (isNaturalPause || endsSentence);
|
|
1729
|
+
if (shouldFlush && (bufferDurationMs >= MIN_MS || isLast)) {
|
|
1730
|
+
const text = buffer.map((s) => s.text).join(" ").replace(/\s+/g, " ").trim();
|
|
1731
|
+
chunks.push({
|
|
1732
|
+
text,
|
|
1733
|
+
startSeconds: buffer[0].start / 1e3,
|
|
1734
|
+
endSeconds: buffer[buffer.length - 1].end / 1e3,
|
|
1735
|
+
durationSeconds: bufferDurationMs / 1e3,
|
|
1736
|
+
chunkIndex: chunks.length,
|
|
1737
|
+
segments: buffer.map((s) => ({ ...s })),
|
|
1738
|
+
tokens: Math.ceil(text.split(/\s+/).length / 0.75)
|
|
1739
|
+
});
|
|
1740
|
+
buffer = [];
|
|
1741
|
+
}
|
|
1742
|
+
}
|
|
1743
|
+
if (buffer.length > 0) {
|
|
1744
|
+
const text = buffer.map((s) => s.text).join(" ").replace(/\s+/g, " ").trim();
|
|
1745
|
+
if (text.length > 20) {
|
|
1746
|
+
chunks.push({
|
|
1747
|
+
text,
|
|
1748
|
+
startSeconds: buffer[0].start / 1e3,
|
|
1749
|
+
endSeconds: buffer[buffer.length - 1].end / 1e3,
|
|
1750
|
+
durationSeconds: (buffer[buffer.length - 1].end - buffer[0].start) / 1e3,
|
|
1751
|
+
chunkIndex: chunks.length,
|
|
1752
|
+
segments: buffer.map((s) => ({ ...s })),
|
|
1753
|
+
tokens: Math.ceil(text.split(/\s+/).length / 0.75)
|
|
1754
|
+
});
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
validateChunkBoundaries(chunks);
|
|
1758
|
+
return chunks;
|
|
1759
|
+
}
|
|
1760
|
+
function validateSegments(segments) {
|
|
1761
|
+
for (let i = 0; i < segments.length; i++) {
|
|
1762
|
+
const seg = segments[i];
|
|
1763
|
+
if (seg.end <= seg.start) {
|
|
1764
|
+
throw new Error(`[yt-chunker] Segment ${i}: end(${seg.end}) <= start(${seg.start})`);
|
|
1765
|
+
}
|
|
1766
|
+
if (i > 0 && seg.start < segments[i - 1].end) {
|
|
1767
|
+
seg.start = segments[i - 1].end;
|
|
1768
|
+
seg.duration = seg.end - seg.start;
|
|
1769
|
+
}
|
|
1770
|
+
}
|
|
1771
|
+
}
|
|
1772
|
+
function validateChunkBoundaries(chunks) {
|
|
1773
|
+
for (let i = 1; i < chunks.length; i++) {
|
|
1774
|
+
const prev = chunks[i - 1];
|
|
1775
|
+
const curr = chunks[i];
|
|
1776
|
+
if (curr.startSeconds < prev.endSeconds - 0.1) {
|
|
1777
|
+
throw new Error(
|
|
1778
|
+
`[yt-chunker] Chunk ${i} overlaps chunk ${i - 1}: prev ends ${prev.endSeconds}s, curr starts ${curr.startSeconds}s`
|
|
1779
|
+
);
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1783
|
+
const metadataSchema = z.object({
|
|
1784
|
+
topics: z.array(z.string()).max(8),
|
|
1785
|
+
summary: z.string().max(400),
|
|
1786
|
+
keyMoments: z.array(z.object({
|
|
1787
|
+
label: z.string(),
|
|
1788
|
+
startSeconds: z.number(),
|
|
1789
|
+
summary: z.string().max(150)
|
|
1790
|
+
})).max(10),
|
|
1791
|
+
language: z.string().default("en")
|
|
1792
|
+
});
|
|
1793
|
+
async function extractVideoMetadata(title, fullTranscript, durationSeconds, openAIApiKey) {
|
|
1794
|
+
const llm = new ChatOpenAI({
|
|
1795
|
+
modelName: "gpt-4o-mini",
|
|
1796
|
+
temperature: 0,
|
|
1797
|
+
openAIApiKey
|
|
1798
|
+
});
|
|
1799
|
+
const structured = llm.withStructuredOutput(metadataSchema);
|
|
1800
|
+
const words = fullTranscript.split(/\s+/);
|
|
1801
|
+
const sample = words.length > 4e3 ? [...words.slice(0, 2e3), "...", ...words.slice(-2e3)].join(" ") : fullTranscript;
|
|
1802
|
+
const result = await structured.invoke(`
|
|
1803
|
+
Video title: "${title}"
|
|
1804
|
+
Duration: ${Math.floor(durationSeconds / 60)} minutes
|
|
1805
|
+
|
|
1806
|
+
Transcript (may be truncated for long videos):
|
|
1807
|
+
"""
|
|
1808
|
+
${sample}
|
|
1809
|
+
"""
|
|
1810
|
+
|
|
1811
|
+
Extract:
|
|
1812
|
+
- topics: key subjects covered (use specific terms, not generic like "technology")
|
|
1813
|
+
- summary: 2-3 sentences describing what the video teaches or argues
|
|
1814
|
+
- keyMoments: the 5-8 most important moments, with approximate start time in seconds
|
|
1815
|
+
- language: ISO 639-1 language code of the transcript
|
|
1816
|
+
`.trim());
|
|
1817
|
+
return {
|
|
1818
|
+
topics: result.topics ?? [],
|
|
1819
|
+
summary: result.summary ?? "",
|
|
1820
|
+
keyMoments: result.keyMoments ?? [],
|
|
1821
|
+
language: result.language ?? "en"
|
|
1822
|
+
};
|
|
1823
|
+
}
|
|
1824
|
+
function computeContentHash(content) {
|
|
1825
|
+
return crypto.createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
1826
|
+
}
|
|
1827
|
+
const ytEmbeddings = ({ strapi }) => ({
|
|
1828
|
+
// ── Ingest a single transcript ──────────────────────────────────────────────
|
|
1829
|
+
async embedTranscript(transcript) {
|
|
1830
|
+
const pool = pluginManager.getPool();
|
|
1831
|
+
const embeddings = pluginManager.getEmbeddings();
|
|
1832
|
+
const embeddingModel = pluginManager.getEmbeddingModel();
|
|
1833
|
+
if (!pool || !embeddings) {
|
|
1834
|
+
throw new Error("[yt-embed] Plugin manager not initialized");
|
|
1835
|
+
}
|
|
1836
|
+
const contentHash = computeContentHash(transcript.fullTranscript);
|
|
1837
|
+
const existing = await pool.query(
|
|
1838
|
+
"SELECT id, embedding_status, content_hash FROM yt_videos WHERE video_id = $1",
|
|
1839
|
+
[transcript.videoId]
|
|
1840
|
+
);
|
|
1841
|
+
if (existing.rows.length > 0) {
|
|
1842
|
+
const row = existing.rows[0];
|
|
1843
|
+
if (row.embedding_status === "complete" && row.content_hash === contentHash) {
|
|
1844
|
+
strapi.log.info(`[yt-embed] ${transcript.videoId} already ingested, skipping`);
|
|
1845
|
+
return { videoId: transcript.videoId, chunkCount: 0, skipped: true };
|
|
1846
|
+
}
|
|
1847
|
+
strapi.log.info(`[yt-embed] Re-ingesting ${transcript.videoId} (status: ${row.embedding_status}, hash changed: ${row.content_hash !== contentHash})`);
|
|
1848
|
+
await pool.query("DELETE FROM yt_videos WHERE video_id = $1", [transcript.videoId]);
|
|
1849
|
+
}
|
|
1850
|
+
const segs = transcript.transcriptWithTimeCodes;
|
|
1851
|
+
const durationSeconds = segs.length > 0 ? Math.floor(segs[segs.length - 1].end / 1e3) : 0;
|
|
1852
|
+
await pool.query(
|
|
1853
|
+
`INSERT INTO yt_videos
|
|
1854
|
+
(strapi_document_id, video_id, url, title, duration_seconds,
|
|
1855
|
+
content_hash, embedding_status, embedding_model)
|
|
1856
|
+
VALUES ($1, $2, $3, $4, $5, $6, 'processing', $7)`,
|
|
1857
|
+
[
|
|
1858
|
+
transcript.documentId,
|
|
1859
|
+
transcript.videoId,
|
|
1860
|
+
`https://www.youtube.com/watch?v=${transcript.videoId}`,
|
|
1861
|
+
transcript.title,
|
|
1862
|
+
durationSeconds,
|
|
1863
|
+
contentHash,
|
|
1864
|
+
embeddingModel
|
|
1865
|
+
]
|
|
1866
|
+
);
|
|
1867
|
+
try {
|
|
1868
|
+
const config2 = strapi.config.get("plugin::yt-embeddings-strapi-plugin");
|
|
1869
|
+
let topics = [];
|
|
1870
|
+
let summary = "";
|
|
1871
|
+
let keyMoments = [];
|
|
1872
|
+
let language = "en";
|
|
1873
|
+
try {
|
|
1874
|
+
const meta = await extractVideoMetadata(
|
|
1875
|
+
transcript.title,
|
|
1876
|
+
transcript.fullTranscript,
|
|
1877
|
+
durationSeconds,
|
|
1878
|
+
config2.openAIApiKey
|
|
1879
|
+
);
|
|
1880
|
+
topics = meta.topics;
|
|
1881
|
+
summary = meta.summary;
|
|
1882
|
+
keyMoments = meta.keyMoments;
|
|
1883
|
+
language = meta.language;
|
|
1884
|
+
strapi.log.info(`[yt-embed] Metadata extracted: ${topics.length} topics, ${keyMoments.length} key moments`);
|
|
1885
|
+
} catch (err) {
|
|
1886
|
+
strapi.log.warn(`[yt-embed] Metadata extraction failed, continuing without it:`, err);
|
|
1887
|
+
}
|
|
1888
|
+
const chunks = chunkTranscript(transcript.transcriptWithTimeCodes);
|
|
1889
|
+
if (chunks.length === 0) {
|
|
1890
|
+
await pool.query(
|
|
1891
|
+
`UPDATE yt_videos
|
|
1892
|
+
SET embedding_status = 'complete', chunk_count = 0, embedded_at = NOW(),
|
|
1893
|
+
topics = $1, summary = $2, key_moments = $3, language = $4, updated_at = NOW()
|
|
1894
|
+
WHERE video_id = $5`,
|
|
1895
|
+
[topics, summary, JSON.stringify(keyMoments), language, transcript.videoId]
|
|
1896
|
+
);
|
|
1897
|
+
strapi.log.info(`[yt-embed] ${transcript.title} — no chunks (empty transcript)`);
|
|
1898
|
+
return { videoId: transcript.videoId, chunkCount: 0, skipped: false };
|
|
1899
|
+
}
|
|
1900
|
+
const embeddingVectors = await embeddings.embedDocuments(chunks.map((c) => c.text));
|
|
1901
|
+
const insertedIds = [];
|
|
1902
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
1903
|
+
const chunk = chunks[i];
|
|
1904
|
+
const vectorStr = `[${embeddingVectors[i].join(",")}]`;
|
|
1905
|
+
const result = await pool.query(
|
|
1906
|
+
`INSERT INTO yt_video_chunks
|
|
1907
|
+
(video_id, strapi_document_id, text, embedding,
|
|
1908
|
+
start_seconds, end_seconds, chunk_index, segments, tokens)
|
|
1909
|
+
VALUES ($1, $2, $3, $4::vector, $5, $6, $7, $8, $9)
|
|
1910
|
+
RETURNING id`,
|
|
1911
|
+
[
|
|
1912
|
+
transcript.videoId,
|
|
1913
|
+
transcript.documentId,
|
|
1914
|
+
chunk.text,
|
|
1915
|
+
vectorStr,
|
|
1916
|
+
chunk.startSeconds,
|
|
1917
|
+
chunk.endSeconds,
|
|
1918
|
+
i,
|
|
1919
|
+
JSON.stringify(chunk.segments),
|
|
1920
|
+
chunk.tokens
|
|
1921
|
+
]
|
|
1922
|
+
);
|
|
1923
|
+
insertedIds.push(result.rows[0].id);
|
|
1924
|
+
}
|
|
1925
|
+
for (let i = 0; i < insertedIds.length; i++) {
|
|
1926
|
+
await pool.query(
|
|
1927
|
+
"UPDATE yt_video_chunks SET prev_chunk_id = $1, next_chunk_id = $2 WHERE id = $3",
|
|
1928
|
+
[insertedIds[i - 1] ?? null, insertedIds[i + 1] ?? null, insertedIds[i]]
|
|
1929
|
+
);
|
|
1930
|
+
}
|
|
1931
|
+
await pool.query(
|
|
1932
|
+
`UPDATE yt_videos
|
|
1933
|
+
SET embedding_status = 'complete', chunk_count = $1, embedded_at = NOW(),
|
|
1934
|
+
topics = $2, summary = $3, key_moments = $4, language = $5, updated_at = NOW()
|
|
1935
|
+
WHERE video_id = $6`,
|
|
1936
|
+
[chunks.length, topics, summary, JSON.stringify(keyMoments), language, transcript.videoId]
|
|
1937
|
+
);
|
|
1938
|
+
strapi.log.info(`[yt-embed] ${transcript.title} — ${chunks.length} chunks embedded`);
|
|
1939
|
+
return { videoId: transcript.videoId, chunkCount: chunks.length, skipped: false };
|
|
1940
|
+
} catch (err) {
|
|
1941
|
+
await pool.query(
|
|
1942
|
+
`UPDATE yt_videos SET embedding_status = 'failed', error_message = $1, updated_at = NOW() WHERE video_id = $2`,
|
|
1943
|
+
[String(err), transcript.videoId]
|
|
1944
|
+
);
|
|
1945
|
+
throw err;
|
|
1946
|
+
}
|
|
1947
|
+
},
|
|
1948
|
+
// ── Semantic search with context expansion ──────────────────────────────────
|
|
1949
|
+
async search(query, options = {}) {
|
|
1950
|
+
const pool = pluginManager.getPool();
|
|
1951
|
+
const embeddingsClient = pluginManager.getEmbeddings();
|
|
1952
|
+
if (!pool || !embeddingsClient) {
|
|
1953
|
+
throw new Error("[yt-embed] Plugin manager not initialized");
|
|
1954
|
+
}
|
|
1955
|
+
const { limit = 5, minSimilarity = 0.2, contextWindowSeconds = 30 } = options;
|
|
1956
|
+
const queryVector = await embeddingsClient.embedQuery(query);
|
|
1957
|
+
const vectorStr = `[${queryVector.join(",")}]`;
|
|
1958
|
+
const params = [vectorStr, minSimilarity, limit * 2];
|
|
1959
|
+
const filters = [];
|
|
1960
|
+
if (options.videoId) {
|
|
1961
|
+
params.push(options.videoId);
|
|
1962
|
+
filters.push(`vc.video_id = $${params.length}`);
|
|
1963
|
+
}
|
|
1964
|
+
if (options.topics?.length) {
|
|
1965
|
+
params.push(options.topics);
|
|
1966
|
+
filters.push(`v.topics && $${params.length}::text[]`);
|
|
1967
|
+
}
|
|
1968
|
+
const whereExtra = filters.length > 0 ? "AND " + filters.join(" AND ") : "";
|
|
1969
|
+
const rows = await pool.query(`
|
|
1970
|
+
SELECT
|
|
1971
|
+
vc.id, vc.video_id, vc.text, vc.start_seconds, vc.end_seconds,
|
|
1972
|
+
vc.chunk_index, vc.segments, vc.strapi_document_id,
|
|
1973
|
+
vc.prev_chunk_id, vc.next_chunk_id,
|
|
1974
|
+
v.title, v.channel_name, v.summary AS video_summary,
|
|
1975
|
+
v.topics, v.duration_seconds,
|
|
1976
|
+
1 - (vc.embedding <=> $1::vector) AS similarity
|
|
1977
|
+
FROM yt_video_chunks vc
|
|
1978
|
+
JOIN yt_videos v ON v.video_id = vc.video_id
|
|
1979
|
+
WHERE 1 - (vc.embedding <=> $1::vector) >= $2
|
|
1980
|
+
AND v.embedding_status = 'complete'
|
|
1981
|
+
${whereExtra}
|
|
1982
|
+
ORDER BY vc.embedding <=> $1::vector
|
|
1983
|
+
LIMIT $3
|
|
1984
|
+
`, params);
|
|
1985
|
+
if (!rows.rows.length) return [];
|
|
1986
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1987
|
+
const deduped = rows.rows.filter((row) => {
|
|
1988
|
+
if (seen.has(row.prev_chunk_id) || seen.has(row.next_chunk_id)) return false;
|
|
1989
|
+
seen.add(row.id);
|
|
1990
|
+
return true;
|
|
1991
|
+
}).slice(0, limit);
|
|
1992
|
+
return Promise.all(deduped.map(async (row) => {
|
|
1993
|
+
const half = contextWindowSeconds / 2;
|
|
1994
|
+
const ctxStart = Math.max(0, row.start_seconds - half);
|
|
1995
|
+
const ctxEnd = Math.min(
|
|
1996
|
+
row.duration_seconds ?? row.end_seconds + half,
|
|
1997
|
+
row.end_seconds + half
|
|
1998
|
+
);
|
|
1999
|
+
const ctxRows = await pool.query(`
|
|
2000
|
+
SELECT segments, start_seconds, end_seconds
|
|
2001
|
+
FROM yt_video_chunks
|
|
2002
|
+
WHERE video_id = $1
|
|
2003
|
+
AND start_seconds < $2
|
|
2004
|
+
AND end_seconds > $3
|
|
2005
|
+
ORDER BY start_seconds
|
|
2006
|
+
`, [row.video_id, ctxEnd, ctxStart]);
|
|
2007
|
+
const contextText = buildContextText(
|
|
2008
|
+
ctxRows.rows,
|
|
2009
|
+
ctxStart,
|
|
2010
|
+
ctxEnd,
|
|
2011
|
+
row.start_seconds,
|
|
2012
|
+
row.end_seconds
|
|
2013
|
+
);
|
|
2014
|
+
return {
|
|
2015
|
+
chunkText: row.text,
|
|
2016
|
+
startSeconds: row.start_seconds,
|
|
2017
|
+
endSeconds: row.end_seconds,
|
|
2018
|
+
similarity: Math.round(row.similarity * 1e3) / 1e3,
|
|
2019
|
+
contextText,
|
|
2020
|
+
videoId: row.video_id,
|
|
2021
|
+
title: row.title,
|
|
2022
|
+
channelName: row.channel_name,
|
|
2023
|
+
videoSummary: row.video_summary,
|
|
2024
|
+
topics: row.topics,
|
|
2025
|
+
strapiDocumentId: row.strapi_document_id,
|
|
2026
|
+
deepLink: `https://www.youtube.com/watch?v=${row.video_id}&t=${Math.floor(row.start_seconds)}`,
|
|
2027
|
+
contextLink: `https://www.youtube.com/watch?v=${row.video_id}&t=${Math.floor(ctxStart)}`
|
|
2028
|
+
};
|
|
2029
|
+
}));
|
|
2030
|
+
},
|
|
2031
|
+
// ── Get transcript range for a specific video ──────────────────────────────
|
|
2032
|
+
async getTranscriptRange(videoId, startSeconds, endSeconds) {
|
|
2033
|
+
const pool = pluginManager.getPool();
|
|
2034
|
+
if (!pool) throw new Error("[yt-embed] Plugin manager not initialized");
|
|
2035
|
+
const rows = await pool.query(`
|
|
2036
|
+
SELECT text, start_seconds, end_seconds, segments
|
|
2037
|
+
FROM yt_video_chunks
|
|
2038
|
+
WHERE video_id = $1
|
|
2039
|
+
AND start_seconds < $3
|
|
2040
|
+
AND end_seconds > $2
|
|
2041
|
+
ORDER BY start_seconds
|
|
2042
|
+
`, [videoId, startSeconds, endSeconds]);
|
|
2043
|
+
return rows.rows;
|
|
2044
|
+
},
|
|
2045
|
+
// ── List all ingested videos ────────────────────────────────────────────────
|
|
2046
|
+
async listVideos(options = {}) {
|
|
2047
|
+
const pool = pluginManager.getPool();
|
|
2048
|
+
if (!pool) throw new Error("[yt-embed] Plugin manager not initialized");
|
|
2049
|
+
const { page = 1, pageSize = 25, status } = options;
|
|
2050
|
+
const offset = (page - 1) * pageSize;
|
|
2051
|
+
const params = [pageSize, offset];
|
|
2052
|
+
let statusFilter = "";
|
|
2053
|
+
if (status) {
|
|
2054
|
+
params.push(status);
|
|
2055
|
+
statusFilter = `WHERE embedding_status = $${params.length}`;
|
|
2056
|
+
}
|
|
2057
|
+
const [dataResult, countResult] = await Promise.all([
|
|
2058
|
+
pool.query(`
|
|
2059
|
+
SELECT id, strapi_document_id, video_id, url, title, channel_name,
|
|
2060
|
+
duration_seconds, language, topics, summary, key_moments,
|
|
2061
|
+
embedding_status, chunk_count, embedded_at, created_at
|
|
2062
|
+
FROM yt_videos
|
|
2063
|
+
${statusFilter}
|
|
2064
|
+
ORDER BY created_at DESC
|
|
2065
|
+
LIMIT $1 OFFSET $2
|
|
2066
|
+
`, params),
|
|
2067
|
+
pool.query(
|
|
2068
|
+
`SELECT count(*)::int FROM yt_videos ${statusFilter}`,
|
|
2069
|
+
status ? [status] : []
|
|
2070
|
+
)
|
|
2071
|
+
]);
|
|
2072
|
+
return {
|
|
2073
|
+
data: dataResult.rows,
|
|
2074
|
+
total: countResult.rows[0].count,
|
|
2075
|
+
page,
|
|
2076
|
+
pageSize
|
|
2077
|
+
};
|
|
2078
|
+
},
|
|
2079
|
+
// ── Get single video with key moments ──────────────────────────────────────
|
|
2080
|
+
async getVideo(videoId) {
|
|
2081
|
+
const pool = pluginManager.getPool();
|
|
2082
|
+
if (!pool) throw new Error("[yt-embed] Plugin manager not initialized");
|
|
2083
|
+
const result = await pool.query(
|
|
2084
|
+
"SELECT * FROM yt_videos WHERE video_id = $1",
|
|
2085
|
+
[videoId]
|
|
2086
|
+
);
|
|
2087
|
+
return result.rows[0] ?? null;
|
|
2088
|
+
},
|
|
2089
|
+
// ── Delete a video and all its chunks ──────────────────────────────────────
|
|
2090
|
+
async deleteVideo(videoId) {
|
|
2091
|
+
const pool = pluginManager.getPool();
|
|
2092
|
+
if (!pool) throw new Error("[yt-embed] Plugin manager not initialized");
|
|
2093
|
+
const result = await pool.query(
|
|
2094
|
+
"DELETE FROM yt_videos WHERE video_id = $1 RETURNING id, title, chunk_count",
|
|
2095
|
+
[videoId]
|
|
2096
|
+
);
|
|
2097
|
+
return result.rows[0] ?? null;
|
|
2098
|
+
},
|
|
2099
|
+
// ── Get chunks for a video by time range ───────────────────────────────────
|
|
2100
|
+
async getVideoChunks(videoId, options = {}) {
|
|
2101
|
+
const pool = pluginManager.getPool();
|
|
2102
|
+
if (!pool) throw new Error("[yt-embed] Plugin manager not initialized");
|
|
2103
|
+
const params = [videoId];
|
|
2104
|
+
let timeFilter = "";
|
|
2105
|
+
if (options.start !== void 0 && options.end !== void 0) {
|
|
2106
|
+
params.push(options.end, options.start);
|
|
2107
|
+
timeFilter = `AND start_seconds < $${params.length - 1} AND end_seconds > $${params.length}`;
|
|
2108
|
+
}
|
|
2109
|
+
const result = await pool.query(`
|
|
2110
|
+
SELECT id, text, start_seconds, end_seconds, chunk_index, segments, tokens
|
|
2111
|
+
FROM yt_video_chunks
|
|
2112
|
+
WHERE video_id = $1 ${timeFilter}
|
|
2113
|
+
ORDER BY chunk_index
|
|
2114
|
+
`, params);
|
|
2115
|
+
return result.rows;
|
|
2116
|
+
},
|
|
2117
|
+
// ── Check embedding status by Strapi document ID ─────────────────────────
|
|
2118
|
+
async getStatusByDocumentId(documentId) {
|
|
2119
|
+
const pool = pluginManager.getPool();
|
|
2120
|
+
if (!pool) throw new Error("[yt-embed] Plugin manager not initialized");
|
|
2121
|
+
const result = await pool.query(
|
|
2122
|
+
"SELECT video_id, chunk_count, embedded_at, embedding_status FROM yt_videos WHERE strapi_document_id = $1",
|
|
2123
|
+
[documentId]
|
|
2124
|
+
);
|
|
2125
|
+
if (!result.rows.length) return { embedded: false };
|
|
2126
|
+
const row = result.rows[0];
|
|
2127
|
+
return {
|
|
2128
|
+
embedded: row.embedding_status === "complete",
|
|
2129
|
+
videoId: row.video_id,
|
|
2130
|
+
chunkCount: row.chunk_count,
|
|
2131
|
+
embeddedAt: row.embedded_at
|
|
2132
|
+
};
|
|
2133
|
+
},
|
|
2134
|
+
// ── Re-embed all transcripts ───────────────────────────────────────────────
|
|
2135
|
+
async recomputeAll() {
|
|
2136
|
+
const pool = pluginManager.getPool();
|
|
2137
|
+
if (!pool) throw new Error("[yt-embed] Plugin manager not initialized");
|
|
2138
|
+
strapi.log.info("[yt-embed] Recompute: dropping all yt data...");
|
|
2139
|
+
await pool.query("DELETE FROM yt_video_chunks");
|
|
2140
|
+
await pool.query("DELETE FROM yt_videos");
|
|
2141
|
+
const transcripts = await strapi.documents("plugin::yt-transcript-strapi-plugin.transcript").findMany({ fields: ["documentId", "videoId", "title", "fullTranscript", "transcriptWithTimeCodes"] });
|
|
2142
|
+
let processed = 0;
|
|
2143
|
+
for (const t of transcripts) {
|
|
2144
|
+
try {
|
|
2145
|
+
await this.embedTranscript({
|
|
2146
|
+
documentId: t.documentId,
|
|
2147
|
+
id: t.id,
|
|
2148
|
+
videoId: t.videoId,
|
|
2149
|
+
title: t.title,
|
|
2150
|
+
fullTranscript: t.fullTranscript,
|
|
2151
|
+
transcriptWithTimeCodes: t.transcriptWithTimeCodes
|
|
2152
|
+
});
|
|
2153
|
+
processed++;
|
|
2154
|
+
} catch (err) {
|
|
2155
|
+
strapi.log.error(`[yt-embed] Failed to embed ${t.videoId}:`, err);
|
|
2156
|
+
}
|
|
2157
|
+
}
|
|
2158
|
+
strapi.log.info(`[yt-embed] Recompute complete. ${processed}/${transcripts.length} videos embedded.`);
|
|
2159
|
+
return { total: transcripts.length, processed };
|
|
2160
|
+
}
|
|
2161
|
+
});
|
|
2162
|
+
function buildContextText(rows, ctxStart, ctxEnd, matchStart, matchEnd) {
|
|
2163
|
+
const parts = [];
|
|
2164
|
+
let inMatch = false;
|
|
2165
|
+
for (const row of rows) {
|
|
2166
|
+
const segs = row.segments;
|
|
2167
|
+
for (const seg of segs) {
|
|
2168
|
+
const s = seg.start / 1e3;
|
|
2169
|
+
const e = seg.end / 1e3;
|
|
2170
|
+
if (e <= ctxStart || s >= ctxEnd) continue;
|
|
2171
|
+
if (!inMatch && s >= matchStart) {
|
|
2172
|
+
parts.push("[RELEVANT]");
|
|
2173
|
+
inMatch = true;
|
|
2174
|
+
}
|
|
2175
|
+
if (inMatch && s >= matchEnd) {
|
|
2176
|
+
parts.push("[/RELEVANT]");
|
|
2177
|
+
inMatch = false;
|
|
2178
|
+
}
|
|
2179
|
+
parts.push(seg.text);
|
|
2180
|
+
}
|
|
2181
|
+
}
|
|
2182
|
+
if (inMatch) parts.push("[/RELEVANT]");
|
|
2183
|
+
return parts.join(" ").replace(/\s+/g, " ").trim();
|
|
2184
|
+
}
|
|
2185
|
+
const services = {
|
|
2186
|
+
"ai-tools": aiTools,
|
|
2187
|
+
ytEmbeddings
|
|
2188
|
+
};
|
|
2189
|
+
const index = {
|
|
2190
|
+
register,
|
|
2191
|
+
bootstrap,
|
|
2192
|
+
destroy,
|
|
2193
|
+
config,
|
|
2194
|
+
controllers,
|
|
2195
|
+
routes,
|
|
2196
|
+
services,
|
|
2197
|
+
contentTypes,
|
|
2198
|
+
policies,
|
|
2199
|
+
middlewares
|
|
2200
|
+
};
|
|
2201
|
+
export {
|
|
2202
|
+
index as default
|
|
2203
|
+
};
|