yt-embeddings-strapi-plugin 0.0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/{App-Cv1cdLAr.js → App-57XBOCeK.js} +10 -2
- package/dist/_chunks/{App-bN58O1bN.mjs → App-D7Dy3o44.mjs} +10 -2
- package/dist/_chunks/{index-BAfBs5PQ.js → index-CZeQ6zu2.js} +1 -1
- package/dist/_chunks/{index-K6X5FM2O.mjs → index-CuxGkc_5.mjs} +1 -1
- package/dist/admin/index.js +1 -1
- package/dist/admin/index.mjs +1 -1
- package/dist/server/index.js +124 -145
- package/dist/server/index.mjs +124 -145
- package/dist/server/src/plugin-manager.d.ts +16 -13
- package/package.json +3 -8
|
@@ -7,7 +7,7 @@ const react = require("react");
|
|
|
7
7
|
const styled = require("styled-components");
|
|
8
8
|
const designSystem = require("@strapi/design-system");
|
|
9
9
|
const icons = require("@strapi/icons");
|
|
10
|
-
const index = require("./index-
|
|
10
|
+
const index = require("./index-CZeQ6zu2.js");
|
|
11
11
|
const qs = require("qs");
|
|
12
12
|
const ReactMarkdown = require("react-markdown");
|
|
13
13
|
const _interopDefault = (e) => e && e.__esModule ? e : { default: e };
|
|
@@ -488,7 +488,15 @@ function VideoDetails() {
|
|
|
488
488
|
get(`/${index.PLUGIN_ID}/yt/videos/${videoId}`),
|
|
489
489
|
get(`/${index.PLUGIN_ID}/yt/videos/${videoId}/chunks`)
|
|
490
490
|
]).then(([videoRes, chunksRes]) => {
|
|
491
|
-
|
|
491
|
+
const v = videoRes?.data?.data || videoRes?.data || null;
|
|
492
|
+
if (v && typeof v.key_moments === "string") {
|
|
493
|
+
try {
|
|
494
|
+
v.key_moments = JSON.parse(v.key_moments);
|
|
495
|
+
} catch {
|
|
496
|
+
v.key_moments = [];
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
setVideo(v);
|
|
492
500
|
setChunks(chunksRes?.data?.data || chunksRes?.data || []);
|
|
493
501
|
}).catch((err) => console.error("Failed to load video:", err)).finally(() => setIsLoading(false));
|
|
494
502
|
}, [videoId, get]);
|
|
@@ -5,7 +5,7 @@ import { useRef, useState, useEffect, useCallback } from "react";
|
|
|
5
5
|
import styled from "styled-components";
|
|
6
6
|
import { Button, Modal, Box, TextInput, Link, Accordion, Typography, Tr, Main, Flex, Loader, Table, Thead, Th, Tbody, Td, Badge } from "@strapi/design-system";
|
|
7
7
|
import { ArrowClockwise, ArrowLeft } from "@strapi/icons";
|
|
8
|
-
import { R as RobotIcon, P as PLUGIN_ID } from "./index-
|
|
8
|
+
import { R as RobotIcon, P as PLUGIN_ID } from "./index-CuxGkc_5.mjs";
|
|
9
9
|
import qs from "qs";
|
|
10
10
|
import ReactMarkdown from "react-markdown";
|
|
11
11
|
const MarkdownWrapper = styled.div`
|
|
@@ -482,7 +482,15 @@ function VideoDetails() {
|
|
|
482
482
|
get(`/${PLUGIN_ID}/yt/videos/${videoId}`),
|
|
483
483
|
get(`/${PLUGIN_ID}/yt/videos/${videoId}/chunks`)
|
|
484
484
|
]).then(([videoRes, chunksRes]) => {
|
|
485
|
-
|
|
485
|
+
const v = videoRes?.data?.data || videoRes?.data || null;
|
|
486
|
+
if (v && typeof v.key_moments === "string") {
|
|
487
|
+
try {
|
|
488
|
+
v.key_moments = JSON.parse(v.key_moments);
|
|
489
|
+
} catch {
|
|
490
|
+
v.key_moments = [];
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
setVideo(v);
|
|
486
494
|
setChunks(chunksRes?.data?.data || chunksRes?.data || []);
|
|
487
495
|
}).catch((err) => console.error("Failed to load video:", err)).finally(() => setIsLoading(false));
|
|
488
496
|
}, [videoId, get]);
|
|
@@ -130,7 +130,7 @@ const index = {
|
|
|
130
130
|
defaultMessage: PLUGIN_ID
|
|
131
131
|
},
|
|
132
132
|
Component: async () => {
|
|
133
|
-
const { App } = await Promise.resolve().then(() => require("./App-
|
|
133
|
+
const { App } = await Promise.resolve().then(() => require("./App-57XBOCeK.js"));
|
|
134
134
|
return App;
|
|
135
135
|
}
|
|
136
136
|
});
|
package/dist/admin/index.js
CHANGED
package/dist/admin/index.mjs
CHANGED
package/dist/server/index.js
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
const
|
|
3
|
-
const
|
|
4
|
-
const output_parsers = require("@langchain/core/output_parsers");
|
|
5
|
-
const prompts = require("@langchain/core/prompts");
|
|
6
|
-
const runnables = require("@langchain/core/runnables");
|
|
2
|
+
const ai = require("ai");
|
|
3
|
+
const openai = require("@ai-sdk/openai");
|
|
7
4
|
const pg = require("pg");
|
|
8
5
|
const index_js = require("@modelcontextprotocol/sdk/server/index.js");
|
|
9
6
|
const types_js = require("@modelcontextprotocol/sdk/types.js");
|
|
@@ -69,12 +66,11 @@ const config = {
|
|
|
69
66
|
};
|
|
70
67
|
class PluginManager {
|
|
71
68
|
constructor() {
|
|
72
|
-
this.
|
|
73
|
-
this.
|
|
69
|
+
this.embeddingModel_ = null;
|
|
70
|
+
this.chatModel = null;
|
|
74
71
|
this.pool = null;
|
|
75
|
-
this.
|
|
72
|
+
this.embeddingModelName = "text-embedding-3-small";
|
|
76
73
|
this.dimensions = 1536;
|
|
77
|
-
this.vectorStoreConfig = null;
|
|
78
74
|
}
|
|
79
75
|
async initializePool(connectionString) {
|
|
80
76
|
console.log("Initializing Neon DB Pool");
|
|
@@ -130,74 +126,47 @@ class PluginManager {
|
|
|
130
126
|
client.release();
|
|
131
127
|
}
|
|
132
128
|
}
|
|
133
|
-
|
|
134
|
-
console.log(`Initializing OpenAI Embeddings (model: ${this.
|
|
135
|
-
if (this.
|
|
136
|
-
|
|
137
|
-
this.
|
|
138
|
-
|
|
139
|
-
modelName: this.embeddingModel,
|
|
140
|
-
dimensions: this.dimensions
|
|
141
|
-
});
|
|
142
|
-
return this.embeddings;
|
|
143
|
-
} catch (error) {
|
|
144
|
-
console.error(`Failed to initialize Embeddings: ${error}`);
|
|
145
|
-
throw new Error(`Failed to initialize Embeddings: ${error}`);
|
|
146
|
-
}
|
|
129
|
+
initializeEmbeddings(openai2) {
|
|
130
|
+
console.log(`Initializing OpenAI Embeddings (model: ${this.embeddingModelName})`);
|
|
131
|
+
if (this.embeddingModel_) return;
|
|
132
|
+
this.embeddingModel_ = openai2.embedding(this.embeddingModelName, {
|
|
133
|
+
dimensions: this.dimensions
|
|
134
|
+
});
|
|
147
135
|
}
|
|
148
|
-
|
|
136
|
+
initializeChat(openai2) {
|
|
149
137
|
console.log("Initializing Chat Model");
|
|
150
|
-
if (this.
|
|
151
|
-
|
|
152
|
-
this.chat = new openai.ChatOpenAI({
|
|
153
|
-
modelName: "gpt-4o-mini",
|
|
154
|
-
temperature: 0.7,
|
|
155
|
-
openAIApiKey
|
|
156
|
-
});
|
|
157
|
-
return this.chat;
|
|
158
|
-
} catch (error) {
|
|
159
|
-
console.error(`Failed to initialize Chat: ${error}`);
|
|
160
|
-
throw new Error(`Failed to initialize Chat: ${error}`);
|
|
161
|
-
}
|
|
138
|
+
if (this.chatModel) return;
|
|
139
|
+
this.chatModel = openai2("gpt-4o-mini");
|
|
162
140
|
}
|
|
163
141
|
async initialize(config2) {
|
|
164
142
|
const model = config2.embeddingModel || "text-embedding-3-small";
|
|
165
143
|
if (EMBEDDING_MODELS[model]) {
|
|
166
|
-
this.
|
|
144
|
+
this.embeddingModelName = model;
|
|
167
145
|
this.dimensions = EMBEDDING_MODELS[model].dimensions;
|
|
168
146
|
} else {
|
|
169
147
|
console.warn(`Invalid embedding model "${model}", using default`);
|
|
170
|
-
this.
|
|
148
|
+
this.embeddingModelName = "text-embedding-3-small";
|
|
171
149
|
this.dimensions = EMBEDDING_MODELS["text-embedding-3-small"].dimensions;
|
|
172
150
|
}
|
|
173
|
-
console.log(`Using embedding model: ${this.
|
|
151
|
+
console.log(`Using embedding model: ${this.embeddingModelName} (${this.dimensions} dimensions)`);
|
|
174
152
|
await this.initializePool(config2.neonConnectionString);
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
this.vectorStoreConfig = {
|
|
179
|
-
pool: this.pool,
|
|
180
|
-
tableName: "embeddings_documents",
|
|
181
|
-
columns: {
|
|
182
|
-
idColumnName: "id",
|
|
183
|
-
vectorColumnName: "embedding",
|
|
184
|
-
contentColumnName: "content",
|
|
185
|
-
metadataColumnName: "metadata"
|
|
186
|
-
},
|
|
187
|
-
distanceStrategy: "cosine"
|
|
188
|
-
};
|
|
189
|
-
}
|
|
153
|
+
const openai$1 = openai.createOpenAI({ apiKey: config2.openAIApiKey });
|
|
154
|
+
this.initializeEmbeddings(openai$1);
|
|
155
|
+
this.initializeChat(openai$1);
|
|
190
156
|
console.log("Plugin Manager Initialization Complete");
|
|
191
157
|
}
|
|
192
158
|
async createEmbedding(docData) {
|
|
193
|
-
if (!this.
|
|
159
|
+
if (!this.embeddingModel_ || !this.pool) {
|
|
194
160
|
throw new Error("Plugin manager not initialized");
|
|
195
161
|
}
|
|
196
162
|
const maxRetries = 3;
|
|
197
163
|
const retryDelay = 2e3;
|
|
198
164
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
199
165
|
try {
|
|
200
|
-
const embeddingVector = await
|
|
166
|
+
const { embedding: embeddingVector } = await ai.embed({
|
|
167
|
+
model: this.embeddingModel_,
|
|
168
|
+
value: docData.content
|
|
169
|
+
});
|
|
201
170
|
const metadata = {
|
|
202
171
|
id: docData.id,
|
|
203
172
|
title: docData.title,
|
|
@@ -246,58 +215,56 @@ class PluginManager {
|
|
|
246
215
|
}
|
|
247
216
|
}
|
|
248
217
|
async queryEmbedding(query) {
|
|
249
|
-
if (!this.
|
|
218
|
+
if (!this.embeddingModel_ || !this.chatModel || !this.pool) {
|
|
250
219
|
throw new Error("Plugin manager not initialized");
|
|
251
220
|
}
|
|
252
221
|
try {
|
|
253
|
-
const
|
|
254
|
-
this.
|
|
255
|
-
|
|
256
|
-
);
|
|
257
|
-
const
|
|
222
|
+
const { embedding: queryVector } = await ai.embed({
|
|
223
|
+
model: this.embeddingModel_,
|
|
224
|
+
value: query
|
|
225
|
+
});
|
|
226
|
+
const vectorStr = `[${queryVector.join(",")}]`;
|
|
227
|
+
const results = await this.pool.query(`
|
|
228
|
+
SELECT
|
|
229
|
+
content,
|
|
230
|
+
metadata,
|
|
231
|
+
1 - (embedding <=> $1::vector) AS similarity
|
|
232
|
+
FROM embeddings_documents
|
|
233
|
+
WHERE 1 - (embedding <=> $1::vector) > 0
|
|
234
|
+
ORDER BY embedding <=> $1::vector
|
|
235
|
+
LIMIT 6
|
|
236
|
+
`, [vectorStr]);
|
|
258
237
|
console.log(`[queryEmbedding] Query: "${query}"`);
|
|
259
|
-
console.log(`[queryEmbedding] Found ${
|
|
260
|
-
|
|
261
|
-
console.log(` ${i + 1}. Score: ${
|
|
238
|
+
console.log(`[queryEmbedding] Found ${results.rows.length} results:`);
|
|
239
|
+
results.rows.forEach((row, i) => {
|
|
240
|
+
console.log(` ${i + 1}. Score: ${row.similarity.toFixed(4)}, Title: ${row.metadata?.title || "N/A"}`);
|
|
262
241
|
});
|
|
263
242
|
const SIMILARITY_THRESHOLD = 1;
|
|
264
|
-
const relevantResults =
|
|
243
|
+
const relevantResults = results.rows.filter((row) => row.similarity < SIMILARITY_THRESHOLD);
|
|
265
244
|
console.log(`[queryEmbedding] ${relevantResults.length} results passed threshold (< ${SIMILARITY_THRESHOLD})`);
|
|
266
245
|
const topResults = relevantResults.slice(0, 3);
|
|
267
|
-
const sourceDocuments = topResults.map((
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
246
|
+
const sourceDocuments = topResults.map((row) => ({
|
|
247
|
+
pageContent: row.content,
|
|
248
|
+
metadata: row.metadata
|
|
249
|
+
}));
|
|
250
|
+
const bestMatchForDisplay = topResults.length > 0 ? [{ pageContent: topResults[0].content, metadata: topResults[0].metadata }] : [];
|
|
251
|
+
const context = sourceDocuments.map((doc) => {
|
|
252
|
+
const title = doc.metadata?.title ? `Title: ${doc.metadata.title}
|
|
272
253
|
` : "";
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
"system",
|
|
279
|
-
`You are a helpful assistant that answers questions based on the provided context.
|
|
254
|
+
return `${title}${doc.pageContent}`;
|
|
255
|
+
}).join("\n\n");
|
|
256
|
+
const { text } = await ai.generateText({
|
|
257
|
+
model: this.chatModel,
|
|
258
|
+
system: `You are a helpful assistant that answers questions based on the provided context.
|
|
280
259
|
If you cannot find the answer in the context, say so. Be concise and accurate.
|
|
281
260
|
|
|
282
261
|
Context:
|
|
283
|
-
{context}
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
]);
|
|
287
|
-
const ragChain = runnables.RunnableSequence.from([
|
|
288
|
-
{
|
|
289
|
-
context: async () => formatDocs(sourceDocuments),
|
|
290
|
-
question: new runnables.RunnablePassthrough()
|
|
291
|
-
},
|
|
292
|
-
ragPrompt,
|
|
293
|
-
this.chat,
|
|
294
|
-
new output_parsers.StringOutputParser()
|
|
295
|
-
]);
|
|
296
|
-
const text = await ragChain.invoke(query);
|
|
262
|
+
${context}`,
|
|
263
|
+
prompt: query
|
|
264
|
+
});
|
|
297
265
|
return {
|
|
298
266
|
text,
|
|
299
267
|
sourceDocuments: bestMatchForDisplay
|
|
300
|
-
// Only return best match to display
|
|
301
268
|
};
|
|
302
269
|
} catch (error) {
|
|
303
270
|
console.error(`Failed to query embeddings: ${error}`);
|
|
@@ -305,22 +272,32 @@ Context:
|
|
|
305
272
|
}
|
|
306
273
|
}
|
|
307
274
|
async similaritySearch(query, k = 4) {
|
|
308
|
-
if (!this.
|
|
275
|
+
if (!this.embeddingModel_ || !this.pool) {
|
|
309
276
|
throw new Error("Plugin manager not initialized");
|
|
310
277
|
}
|
|
311
278
|
try {
|
|
312
|
-
const
|
|
313
|
-
this.
|
|
314
|
-
|
|
315
|
-
);
|
|
316
|
-
|
|
279
|
+
const { embedding: queryVector } = await ai.embed({
|
|
280
|
+
model: this.embeddingModel_,
|
|
281
|
+
value: query
|
|
282
|
+
});
|
|
283
|
+
const vectorStr = `[${queryVector.join(",")}]`;
|
|
284
|
+
const results = await this.pool.query(`
|
|
285
|
+
SELECT content, metadata
|
|
286
|
+
FROM embeddings_documents
|
|
287
|
+
ORDER BY embedding <=> $1::vector
|
|
288
|
+
LIMIT $2
|
|
289
|
+
`, [vectorStr, k]);
|
|
290
|
+
return results.rows.map((row) => ({
|
|
291
|
+
pageContent: row.content,
|
|
292
|
+
metadata: row.metadata
|
|
293
|
+
}));
|
|
317
294
|
} catch (error) {
|
|
318
295
|
console.error(`Failed to perform similarity search: ${error}`);
|
|
319
296
|
throw new Error(`Failed to perform similarity search: ${error}`);
|
|
320
297
|
}
|
|
321
298
|
}
|
|
322
299
|
isInitialized() {
|
|
323
|
-
return !!(this.
|
|
300
|
+
return !!(this.embeddingModel_ && this.chatModel && this.pool);
|
|
324
301
|
}
|
|
325
302
|
/**
|
|
326
303
|
* Get all embeddings from Neon DB
|
|
@@ -375,23 +352,19 @@ Context:
|
|
|
375
352
|
getPool() {
|
|
376
353
|
return this.pool;
|
|
377
354
|
}
|
|
378
|
-
|
|
379
|
-
return this.
|
|
355
|
+
getEmbeddingModel_() {
|
|
356
|
+
return this.embeddingModel_;
|
|
380
357
|
}
|
|
381
|
-
|
|
382
|
-
return this.
|
|
383
|
-
}
|
|
384
|
-
getChat() {
|
|
385
|
-
return this.chat;
|
|
358
|
+
getEmbeddingModelName() {
|
|
359
|
+
return this.embeddingModelName;
|
|
386
360
|
}
|
|
387
361
|
async destroy() {
|
|
388
362
|
if (this.pool) {
|
|
389
363
|
await this.pool.end();
|
|
390
364
|
this.pool = null;
|
|
391
365
|
}
|
|
392
|
-
this.
|
|
393
|
-
this.
|
|
394
|
-
this.vectorStoreConfig = null;
|
|
366
|
+
this.embeddingModel_ = null;
|
|
367
|
+
this.chatModel = null;
|
|
395
368
|
}
|
|
396
369
|
/**
|
|
397
370
|
* Clear all embeddings from Neon DB
|
|
@@ -454,7 +427,7 @@ const SearchYtKnowledgeSchema = zod.z.object({
|
|
|
454
427
|
videoId: zod.z.string().optional(),
|
|
455
428
|
topics: zod.z.array(zod.z.string()).optional(),
|
|
456
429
|
contextWindowSeconds: zod.z.number().min(0).optional().default(30),
|
|
457
|
-
minSimilarity: zod.z.number().min(0).max(1).optional().default(0.
|
|
430
|
+
minSimilarity: zod.z.number().min(0).max(1).optional().default(0.3)
|
|
458
431
|
});
|
|
459
432
|
const GetVideoTranscriptRangeSchema = zod.z.object({
|
|
460
433
|
videoId: zod.z.string().min(1, "Video ID is required"),
|
|
@@ -493,7 +466,7 @@ function formatTime$5(seconds) {
|
|
|
493
466
|
}
|
|
494
467
|
const searchYtKnowledgeMcpTool = {
|
|
495
468
|
name: "search_yt_knowledge",
|
|
496
|
-
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary.",
|
|
469
|
+
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary. IMPORTANT: After receiving results, use the contextText to directly answer the user's question. Cite the video title, timestamp, and deep link. Do not just list results — synthesize an answer from the transcript content.",
|
|
497
470
|
inputSchema: {
|
|
498
471
|
type: "object",
|
|
499
472
|
properties: {
|
|
@@ -529,7 +502,7 @@ const searchYtKnowledgeMcpTool = {
|
|
|
529
502
|
async function handleSearchYtKnowledge(strapi, args) {
|
|
530
503
|
const results = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").search(args.query, {
|
|
531
504
|
limit: args.limit ?? 5,
|
|
532
|
-
minSimilarity: args.minSimilarity ?? 0.
|
|
505
|
+
minSimilarity: args.minSimilarity ?? 0.3,
|
|
533
506
|
videoId: args.videoId,
|
|
534
507
|
topics: args.topics,
|
|
535
508
|
contextWindowSeconds: args.contextWindowSeconds ?? 30
|
|
@@ -1116,8 +1089,8 @@ Topics: ${(r.topics || []).join(", ")}
|
|
|
1116
1089
|
|
|
1117
1090
|
${r.contextText || r.chunkText}`
|
|
1118
1091
|
).join("\n\n---\n\n");
|
|
1119
|
-
const
|
|
1120
|
-
if (!
|
|
1092
|
+
const config2 = strapi.config.get("plugin::yt-embeddings-strapi-plugin");
|
|
1093
|
+
if (!config2?.openAIApiKey) {
|
|
1121
1094
|
ctx.body = {
|
|
1122
1095
|
text: ytResults.map((r) => `**${r.title}** (${r.deepLink})
|
|
1123
1096
|
${r.chunkText}`).join("\n\n"),
|
|
@@ -1128,19 +1101,19 @@ ${r.chunkText}`).join("\n\n"),
|
|
|
1128
1101
|
};
|
|
1129
1102
|
return;
|
|
1130
1103
|
}
|
|
1131
|
-
const
|
|
1132
|
-
|
|
1104
|
+
const openai$1 = openai.createOpenAI({ apiKey: config2.openAIApiKey });
|
|
1105
|
+
const { text } = await ai.generateText({
|
|
1106
|
+
model: openai$1("gpt-4o-mini"),
|
|
1107
|
+
system: `You are a helpful assistant that answers questions based on YouTube transcript content.
|
|
1133
1108
|
Include timestamps and video links when relevant. Be concise and accurate.
|
|
1134
1109
|
If you cannot find the answer in the context, say so.
|
|
1135
1110
|
|
|
1136
1111
|
Context:
|
|
1137
|
-
{context}
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
const chain = prompt.pipe(chat);
|
|
1141
|
-
const response = await chain.invoke({ context, question: query });
|
|
1112
|
+
${context}`,
|
|
1113
|
+
prompt: query
|
|
1114
|
+
});
|
|
1142
1115
|
ctx.body = {
|
|
1143
|
-
text
|
|
1116
|
+
text,
|
|
1144
1117
|
sourceDocuments: ytResults.map((r) => ({
|
|
1145
1118
|
pageContent: r.chunkText,
|
|
1146
1119
|
metadata: { id: r.videoId, title: r.title, deepLink: r.deepLink }
|
|
@@ -1352,7 +1325,7 @@ const ytController = ({ strapi }) => ({
|
|
|
1352
1325
|
}
|
|
1353
1326
|
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").search(q, {
|
|
1354
1327
|
limit: limit ? parseInt(limit, 10) : 5,
|
|
1355
|
-
minSimilarity: minSimilarity ? parseFloat(minSimilarity) : 0.
|
|
1328
|
+
minSimilarity: minSimilarity ? parseFloat(minSimilarity) : 0.3,
|
|
1356
1329
|
videoId,
|
|
1357
1330
|
topics: topics ? topics.split(",") : void 0,
|
|
1358
1331
|
contextWindowSeconds: contextWindowSeconds ? parseInt(contextWindowSeconds, 10) : 30
|
|
@@ -1591,7 +1564,7 @@ function formatTime$2(seconds) {
|
|
|
1591
1564
|
}
|
|
1592
1565
|
const searchYtKnowledgeTool = {
|
|
1593
1566
|
name: "searchYtKnowledge",
|
|
1594
|
-
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary.",
|
|
1567
|
+
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary. IMPORTANT: After receiving results, use the contextText to directly answer the user's question. Cite the video title, timestamp, and deep link. Do not just list results — synthesize an answer from the transcript content.",
|
|
1595
1568
|
schema: SearchYtKnowledgeSchema,
|
|
1596
1569
|
execute: async (args, strapi) => {
|
|
1597
1570
|
const validated = SearchYtKnowledgeSchema.parse(args);
|
|
@@ -1810,15 +1783,14 @@ const metadataSchema = zod.z.object({
|
|
|
1810
1783
|
language: zod.z.string().default("en")
|
|
1811
1784
|
});
|
|
1812
1785
|
async function extractVideoMetadata(title, fullTranscript, durationSeconds, openAIApiKey) {
|
|
1813
|
-
const
|
|
1814
|
-
modelName: "gpt-4o-mini",
|
|
1815
|
-
temperature: 0,
|
|
1816
|
-
openAIApiKey
|
|
1817
|
-
});
|
|
1818
|
-
const structured = llm.withStructuredOutput(metadataSchema);
|
|
1786
|
+
const openai$1 = openai.createOpenAI({ apiKey: openAIApiKey });
|
|
1819
1787
|
const words = fullTranscript.split(/\s+/);
|
|
1820
1788
|
const sample = words.length > 4e3 ? [...words.slice(0, 2e3), "...", ...words.slice(-2e3)].join(" ") : fullTranscript;
|
|
1821
|
-
const
|
|
1789
|
+
const { object } = await ai.generateObject({
|
|
1790
|
+
model: openai$1("gpt-4o-mini"),
|
|
1791
|
+
schema: metadataSchema,
|
|
1792
|
+
temperature: 0,
|
|
1793
|
+
prompt: `
|
|
1822
1794
|
Video title: "${title}"
|
|
1823
1795
|
Duration: ${Math.floor(durationSeconds / 60)} minutes
|
|
1824
1796
|
|
|
@@ -1832,12 +1804,13 @@ Extract:
|
|
|
1832
1804
|
- summary: 2-3 sentences describing what the video teaches or argues
|
|
1833
1805
|
- keyMoments: the 5-8 most important moments, with approximate start time in seconds
|
|
1834
1806
|
- language: ISO 639-1 language code of the transcript
|
|
1835
|
-
|
|
1807
|
+
`.trim()
|
|
1808
|
+
});
|
|
1836
1809
|
return {
|
|
1837
|
-
topics:
|
|
1838
|
-
summary:
|
|
1839
|
-
keyMoments:
|
|
1840
|
-
language:
|
|
1810
|
+
topics: object.topics ?? [],
|
|
1811
|
+
summary: object.summary ?? "",
|
|
1812
|
+
keyMoments: object.keyMoments ?? [],
|
|
1813
|
+
language: object.language ?? "en"
|
|
1841
1814
|
};
|
|
1842
1815
|
}
|
|
1843
1816
|
function computeContentHash(content) {
|
|
@@ -1847,9 +1820,9 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1847
1820
|
// ── Ingest a single transcript ──────────────────────────────────────────────
|
|
1848
1821
|
async embedTranscript(transcript) {
|
|
1849
1822
|
const pool = pluginManager.getPool();
|
|
1850
|
-
const
|
|
1851
|
-
const
|
|
1852
|
-
if (!pool || !
|
|
1823
|
+
const embeddingModel = pluginManager.getEmbeddingModel_();
|
|
1824
|
+
const embeddingModelName = pluginManager.getEmbeddingModelName();
|
|
1825
|
+
if (!pool || !embeddingModel) {
|
|
1853
1826
|
throw new Error("[yt-embed] Plugin manager not initialized");
|
|
1854
1827
|
}
|
|
1855
1828
|
const contentHash = computeContentHash(transcript.fullTranscript);
|
|
@@ -1880,7 +1853,7 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1880
1853
|
transcript.title,
|
|
1881
1854
|
durationSeconds,
|
|
1882
1855
|
contentHash,
|
|
1883
|
-
|
|
1856
|
+
embeddingModelName
|
|
1884
1857
|
]
|
|
1885
1858
|
);
|
|
1886
1859
|
try {
|
|
@@ -1916,7 +1889,7 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1916
1889
|
strapi.log.info(`[yt-embed] ${transcript.title} — no chunks (empty transcript)`);
|
|
1917
1890
|
return { videoId: transcript.videoId, chunkCount: 0, skipped: false };
|
|
1918
1891
|
}
|
|
1919
|
-
const embeddingVectors = await
|
|
1892
|
+
const { embeddings: embeddingVectors } = await ai.embedMany({ model: embeddingModel, values: chunks.map((c) => c.text) });
|
|
1920
1893
|
const insertedIds = [];
|
|
1921
1894
|
for (let i = 0; i < chunks.length; i++) {
|
|
1922
1895
|
const chunk = chunks[i];
|
|
@@ -1967,12 +1940,14 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1967
1940
|
// ── Semantic search with context expansion ──────────────────────────────────
|
|
1968
1941
|
async search(query, options = {}) {
|
|
1969
1942
|
const pool = pluginManager.getPool();
|
|
1970
|
-
const
|
|
1971
|
-
if (!pool || !
|
|
1943
|
+
const embeddingModel = pluginManager.getEmbeddingModel_();
|
|
1944
|
+
if (!pool || !embeddingModel) {
|
|
1972
1945
|
throw new Error("[yt-embed] Plugin manager not initialized");
|
|
1973
1946
|
}
|
|
1974
1947
|
const { limit = 5, minSimilarity = 0.2, contextWindowSeconds = 30 } = options;
|
|
1975
|
-
|
|
1948
|
+
console.log(`[yt-embed search] Embedding query: "${query}" with model`);
|
|
1949
|
+
const { embedding: queryVector } = await ai.embed({ model: embeddingModel, value: query });
|
|
1950
|
+
console.log(`[yt-embed search] Got embedding vector, length: ${queryVector.length}`);
|
|
1976
1951
|
const vectorStr = `[${queryVector.join(",")}]`;
|
|
1977
1952
|
const params = [vectorStr, minSimilarity, limit * 2];
|
|
1978
1953
|
const filters = [];
|
|
@@ -2001,6 +1976,10 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
2001
1976
|
ORDER BY vc.embedding <=> $1::vector
|
|
2002
1977
|
LIMIT $3
|
|
2003
1978
|
`, params);
|
|
1979
|
+
console.log(`[yt-embed search] Query returned ${rows.rows.length} rows (minSimilarity: ${minSimilarity})`);
|
|
1980
|
+
if (rows.rows.length > 0) {
|
|
1981
|
+
console.log(`[yt-embed search] Top similarity: ${rows.rows[0].similarity}`);
|
|
1982
|
+
}
|
|
2004
1983
|
if (!rows.rows.length) return [];
|
|
2005
1984
|
const seen = /* @__PURE__ */ new Set();
|
|
2006
1985
|
const deduped = rows.rows.filter((row) => {
|
package/dist/server/index.mjs
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { StringOutputParser } from "@langchain/core/output_parsers";
|
|
4
|
-
import { ChatPromptTemplate } from "@langchain/core/prompts";
|
|
5
|
-
import { RunnableSequence, RunnablePassthrough } from "@langchain/core/runnables";
|
|
1
|
+
import { embed, generateText, generateObject, embedMany } from "ai";
|
|
2
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
6
3
|
import { Pool } from "pg";
|
|
7
4
|
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
8
5
|
import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
@@ -50,12 +47,11 @@ const config = {
|
|
|
50
47
|
};
|
|
51
48
|
class PluginManager {
|
|
52
49
|
constructor() {
|
|
53
|
-
this.
|
|
54
|
-
this.
|
|
50
|
+
this.embeddingModel_ = null;
|
|
51
|
+
this.chatModel = null;
|
|
55
52
|
this.pool = null;
|
|
56
|
-
this.
|
|
53
|
+
this.embeddingModelName = "text-embedding-3-small";
|
|
57
54
|
this.dimensions = 1536;
|
|
58
|
-
this.vectorStoreConfig = null;
|
|
59
55
|
}
|
|
60
56
|
async initializePool(connectionString) {
|
|
61
57
|
console.log("Initializing Neon DB Pool");
|
|
@@ -111,74 +107,47 @@ class PluginManager {
|
|
|
111
107
|
client.release();
|
|
112
108
|
}
|
|
113
109
|
}
|
|
114
|
-
|
|
115
|
-
console.log(`Initializing OpenAI Embeddings (model: ${this.
|
|
116
|
-
if (this.
|
|
117
|
-
|
|
118
|
-
this.
|
|
119
|
-
|
|
120
|
-
modelName: this.embeddingModel,
|
|
121
|
-
dimensions: this.dimensions
|
|
122
|
-
});
|
|
123
|
-
return this.embeddings;
|
|
124
|
-
} catch (error) {
|
|
125
|
-
console.error(`Failed to initialize Embeddings: ${error}`);
|
|
126
|
-
throw new Error(`Failed to initialize Embeddings: ${error}`);
|
|
127
|
-
}
|
|
110
|
+
initializeEmbeddings(openai) {
|
|
111
|
+
console.log(`Initializing OpenAI Embeddings (model: ${this.embeddingModelName})`);
|
|
112
|
+
if (this.embeddingModel_) return;
|
|
113
|
+
this.embeddingModel_ = openai.embedding(this.embeddingModelName, {
|
|
114
|
+
dimensions: this.dimensions
|
|
115
|
+
});
|
|
128
116
|
}
|
|
129
|
-
|
|
117
|
+
initializeChat(openai) {
|
|
130
118
|
console.log("Initializing Chat Model");
|
|
131
|
-
if (this.
|
|
132
|
-
|
|
133
|
-
this.chat = new ChatOpenAI({
|
|
134
|
-
modelName: "gpt-4o-mini",
|
|
135
|
-
temperature: 0.7,
|
|
136
|
-
openAIApiKey
|
|
137
|
-
});
|
|
138
|
-
return this.chat;
|
|
139
|
-
} catch (error) {
|
|
140
|
-
console.error(`Failed to initialize Chat: ${error}`);
|
|
141
|
-
throw new Error(`Failed to initialize Chat: ${error}`);
|
|
142
|
-
}
|
|
119
|
+
if (this.chatModel) return;
|
|
120
|
+
this.chatModel = openai("gpt-4o-mini");
|
|
143
121
|
}
|
|
144
122
|
async initialize(config2) {
|
|
145
123
|
const model = config2.embeddingModel || "text-embedding-3-small";
|
|
146
124
|
if (EMBEDDING_MODELS[model]) {
|
|
147
|
-
this.
|
|
125
|
+
this.embeddingModelName = model;
|
|
148
126
|
this.dimensions = EMBEDDING_MODELS[model].dimensions;
|
|
149
127
|
} else {
|
|
150
128
|
console.warn(`Invalid embedding model "${model}", using default`);
|
|
151
|
-
this.
|
|
129
|
+
this.embeddingModelName = "text-embedding-3-small";
|
|
152
130
|
this.dimensions = EMBEDDING_MODELS["text-embedding-3-small"].dimensions;
|
|
153
131
|
}
|
|
154
|
-
console.log(`Using embedding model: ${this.
|
|
132
|
+
console.log(`Using embedding model: ${this.embeddingModelName} (${this.dimensions} dimensions)`);
|
|
155
133
|
await this.initializePool(config2.neonConnectionString);
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
this.vectorStoreConfig = {
|
|
160
|
-
pool: this.pool,
|
|
161
|
-
tableName: "embeddings_documents",
|
|
162
|
-
columns: {
|
|
163
|
-
idColumnName: "id",
|
|
164
|
-
vectorColumnName: "embedding",
|
|
165
|
-
contentColumnName: "content",
|
|
166
|
-
metadataColumnName: "metadata"
|
|
167
|
-
},
|
|
168
|
-
distanceStrategy: "cosine"
|
|
169
|
-
};
|
|
170
|
-
}
|
|
134
|
+
const openai = createOpenAI({ apiKey: config2.openAIApiKey });
|
|
135
|
+
this.initializeEmbeddings(openai);
|
|
136
|
+
this.initializeChat(openai);
|
|
171
137
|
console.log("Plugin Manager Initialization Complete");
|
|
172
138
|
}
|
|
173
139
|
async createEmbedding(docData) {
|
|
174
|
-
if (!this.
|
|
140
|
+
if (!this.embeddingModel_ || !this.pool) {
|
|
175
141
|
throw new Error("Plugin manager not initialized");
|
|
176
142
|
}
|
|
177
143
|
const maxRetries = 3;
|
|
178
144
|
const retryDelay = 2e3;
|
|
179
145
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
180
146
|
try {
|
|
181
|
-
const embeddingVector = await
|
|
147
|
+
const { embedding: embeddingVector } = await embed({
|
|
148
|
+
model: this.embeddingModel_,
|
|
149
|
+
value: docData.content
|
|
150
|
+
});
|
|
182
151
|
const metadata = {
|
|
183
152
|
id: docData.id,
|
|
184
153
|
title: docData.title,
|
|
@@ -227,58 +196,56 @@ class PluginManager {
|
|
|
227
196
|
}
|
|
228
197
|
}
|
|
229
198
|
async queryEmbedding(query) {
|
|
230
|
-
if (!this.
|
|
199
|
+
if (!this.embeddingModel_ || !this.chatModel || !this.pool) {
|
|
231
200
|
throw new Error("Plugin manager not initialized");
|
|
232
201
|
}
|
|
233
202
|
try {
|
|
234
|
-
const
|
|
235
|
-
this.
|
|
236
|
-
|
|
237
|
-
);
|
|
238
|
-
const
|
|
203
|
+
const { embedding: queryVector } = await embed({
|
|
204
|
+
model: this.embeddingModel_,
|
|
205
|
+
value: query
|
|
206
|
+
});
|
|
207
|
+
const vectorStr = `[${queryVector.join(",")}]`;
|
|
208
|
+
const results = await this.pool.query(`
|
|
209
|
+
SELECT
|
|
210
|
+
content,
|
|
211
|
+
metadata,
|
|
212
|
+
1 - (embedding <=> $1::vector) AS similarity
|
|
213
|
+
FROM embeddings_documents
|
|
214
|
+
WHERE 1 - (embedding <=> $1::vector) > 0
|
|
215
|
+
ORDER BY embedding <=> $1::vector
|
|
216
|
+
LIMIT 6
|
|
217
|
+
`, [vectorStr]);
|
|
239
218
|
console.log(`[queryEmbedding] Query: "${query}"`);
|
|
240
|
-
console.log(`[queryEmbedding] Found ${
|
|
241
|
-
|
|
242
|
-
console.log(` ${i + 1}. Score: ${
|
|
219
|
+
console.log(`[queryEmbedding] Found ${results.rows.length} results:`);
|
|
220
|
+
results.rows.forEach((row, i) => {
|
|
221
|
+
console.log(` ${i + 1}. Score: ${row.similarity.toFixed(4)}, Title: ${row.metadata?.title || "N/A"}`);
|
|
243
222
|
});
|
|
244
223
|
const SIMILARITY_THRESHOLD = 1;
|
|
245
|
-
const relevantResults =
|
|
224
|
+
const relevantResults = results.rows.filter((row) => row.similarity < SIMILARITY_THRESHOLD);
|
|
246
225
|
console.log(`[queryEmbedding] ${relevantResults.length} results passed threshold (< ${SIMILARITY_THRESHOLD})`);
|
|
247
226
|
const topResults = relevantResults.slice(0, 3);
|
|
248
|
-
const sourceDocuments = topResults.map((
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
227
|
+
const sourceDocuments = topResults.map((row) => ({
|
|
228
|
+
pageContent: row.content,
|
|
229
|
+
metadata: row.metadata
|
|
230
|
+
}));
|
|
231
|
+
const bestMatchForDisplay = topResults.length > 0 ? [{ pageContent: topResults[0].content, metadata: topResults[0].metadata }] : [];
|
|
232
|
+
const context = sourceDocuments.map((doc) => {
|
|
233
|
+
const title = doc.metadata?.title ? `Title: ${doc.metadata.title}
|
|
253
234
|
` : "";
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
"system",
|
|
260
|
-
`You are a helpful assistant that answers questions based on the provided context.
|
|
235
|
+
return `${title}${doc.pageContent}`;
|
|
236
|
+
}).join("\n\n");
|
|
237
|
+
const { text } = await generateText({
|
|
238
|
+
model: this.chatModel,
|
|
239
|
+
system: `You are a helpful assistant that answers questions based on the provided context.
|
|
261
240
|
If you cannot find the answer in the context, say so. Be concise and accurate.
|
|
262
241
|
|
|
263
242
|
Context:
|
|
264
|
-
{context}
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
]);
|
|
268
|
-
const ragChain = RunnableSequence.from([
|
|
269
|
-
{
|
|
270
|
-
context: async () => formatDocs(sourceDocuments),
|
|
271
|
-
question: new RunnablePassthrough()
|
|
272
|
-
},
|
|
273
|
-
ragPrompt,
|
|
274
|
-
this.chat,
|
|
275
|
-
new StringOutputParser()
|
|
276
|
-
]);
|
|
277
|
-
const text = await ragChain.invoke(query);
|
|
243
|
+
${context}`,
|
|
244
|
+
prompt: query
|
|
245
|
+
});
|
|
278
246
|
return {
|
|
279
247
|
text,
|
|
280
248
|
sourceDocuments: bestMatchForDisplay
|
|
281
|
-
// Only return best match to display
|
|
282
249
|
};
|
|
283
250
|
} catch (error) {
|
|
284
251
|
console.error(`Failed to query embeddings: ${error}`);
|
|
@@ -286,22 +253,32 @@ Context:
|
|
|
286
253
|
}
|
|
287
254
|
}
|
|
288
255
|
async similaritySearch(query, k = 4) {
|
|
289
|
-
if (!this.
|
|
256
|
+
if (!this.embeddingModel_ || !this.pool) {
|
|
290
257
|
throw new Error("Plugin manager not initialized");
|
|
291
258
|
}
|
|
292
259
|
try {
|
|
293
|
-
const
|
|
294
|
-
this.
|
|
295
|
-
|
|
296
|
-
);
|
|
297
|
-
|
|
260
|
+
const { embedding: queryVector } = await embed({
|
|
261
|
+
model: this.embeddingModel_,
|
|
262
|
+
value: query
|
|
263
|
+
});
|
|
264
|
+
const vectorStr = `[${queryVector.join(",")}]`;
|
|
265
|
+
const results = await this.pool.query(`
|
|
266
|
+
SELECT content, metadata
|
|
267
|
+
FROM embeddings_documents
|
|
268
|
+
ORDER BY embedding <=> $1::vector
|
|
269
|
+
LIMIT $2
|
|
270
|
+
`, [vectorStr, k]);
|
|
271
|
+
return results.rows.map((row) => ({
|
|
272
|
+
pageContent: row.content,
|
|
273
|
+
metadata: row.metadata
|
|
274
|
+
}));
|
|
298
275
|
} catch (error) {
|
|
299
276
|
console.error(`Failed to perform similarity search: ${error}`);
|
|
300
277
|
throw new Error(`Failed to perform similarity search: ${error}`);
|
|
301
278
|
}
|
|
302
279
|
}
|
|
303
280
|
isInitialized() {
|
|
304
|
-
return !!(this.
|
|
281
|
+
return !!(this.embeddingModel_ && this.chatModel && this.pool);
|
|
305
282
|
}
|
|
306
283
|
/**
|
|
307
284
|
* Get all embeddings from Neon DB
|
|
@@ -356,23 +333,19 @@ Context:
|
|
|
356
333
|
getPool() {
|
|
357
334
|
return this.pool;
|
|
358
335
|
}
|
|
359
|
-
|
|
360
|
-
return this.
|
|
336
|
+
getEmbeddingModel_() {
|
|
337
|
+
return this.embeddingModel_;
|
|
361
338
|
}
|
|
362
|
-
|
|
363
|
-
return this.
|
|
364
|
-
}
|
|
365
|
-
getChat() {
|
|
366
|
-
return this.chat;
|
|
339
|
+
getEmbeddingModelName() {
|
|
340
|
+
return this.embeddingModelName;
|
|
367
341
|
}
|
|
368
342
|
async destroy() {
|
|
369
343
|
if (this.pool) {
|
|
370
344
|
await this.pool.end();
|
|
371
345
|
this.pool = null;
|
|
372
346
|
}
|
|
373
|
-
this.
|
|
374
|
-
this.
|
|
375
|
-
this.vectorStoreConfig = null;
|
|
347
|
+
this.embeddingModel_ = null;
|
|
348
|
+
this.chatModel = null;
|
|
376
349
|
}
|
|
377
350
|
/**
|
|
378
351
|
* Clear all embeddings from Neon DB
|
|
@@ -435,7 +408,7 @@ const SearchYtKnowledgeSchema = z.object({
|
|
|
435
408
|
videoId: z.string().optional(),
|
|
436
409
|
topics: z.array(z.string()).optional(),
|
|
437
410
|
contextWindowSeconds: z.number().min(0).optional().default(30),
|
|
438
|
-
minSimilarity: z.number().min(0).max(1).optional().default(0.
|
|
411
|
+
minSimilarity: z.number().min(0).max(1).optional().default(0.3)
|
|
439
412
|
});
|
|
440
413
|
const GetVideoTranscriptRangeSchema = z.object({
|
|
441
414
|
videoId: z.string().min(1, "Video ID is required"),
|
|
@@ -474,7 +447,7 @@ function formatTime$5(seconds) {
|
|
|
474
447
|
}
|
|
475
448
|
const searchYtKnowledgeMcpTool = {
|
|
476
449
|
name: "search_yt_knowledge",
|
|
477
|
-
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary.",
|
|
450
|
+
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary. IMPORTANT: After receiving results, use the contextText to directly answer the user's question. Cite the video title, timestamp, and deep link. Do not just list results — synthesize an answer from the transcript content.",
|
|
478
451
|
inputSchema: {
|
|
479
452
|
type: "object",
|
|
480
453
|
properties: {
|
|
@@ -510,7 +483,7 @@ const searchYtKnowledgeMcpTool = {
|
|
|
510
483
|
async function handleSearchYtKnowledge(strapi, args) {
|
|
511
484
|
const results = await strapi.plugin("yt-embeddings-strapi-plugin").service("ytEmbeddings").search(args.query, {
|
|
512
485
|
limit: args.limit ?? 5,
|
|
513
|
-
minSimilarity: args.minSimilarity ?? 0.
|
|
486
|
+
minSimilarity: args.minSimilarity ?? 0.3,
|
|
514
487
|
videoId: args.videoId,
|
|
515
488
|
topics: args.topics,
|
|
516
489
|
contextWindowSeconds: args.contextWindowSeconds ?? 30
|
|
@@ -1097,8 +1070,8 @@ Topics: ${(r.topics || []).join(", ")}
|
|
|
1097
1070
|
|
|
1098
1071
|
${r.contextText || r.chunkText}`
|
|
1099
1072
|
).join("\n\n---\n\n");
|
|
1100
|
-
const
|
|
1101
|
-
if (!
|
|
1073
|
+
const config2 = strapi.config.get("plugin::yt-embeddings-strapi-plugin");
|
|
1074
|
+
if (!config2?.openAIApiKey) {
|
|
1102
1075
|
ctx.body = {
|
|
1103
1076
|
text: ytResults.map((r) => `**${r.title}** (${r.deepLink})
|
|
1104
1077
|
${r.chunkText}`).join("\n\n"),
|
|
@@ -1109,19 +1082,19 @@ ${r.chunkText}`).join("\n\n"),
|
|
|
1109
1082
|
};
|
|
1110
1083
|
return;
|
|
1111
1084
|
}
|
|
1112
|
-
const
|
|
1113
|
-
|
|
1085
|
+
const openai = createOpenAI({ apiKey: config2.openAIApiKey });
|
|
1086
|
+
const { text } = await generateText({
|
|
1087
|
+
model: openai("gpt-4o-mini"),
|
|
1088
|
+
system: `You are a helpful assistant that answers questions based on YouTube transcript content.
|
|
1114
1089
|
Include timestamps and video links when relevant. Be concise and accurate.
|
|
1115
1090
|
If you cannot find the answer in the context, say so.
|
|
1116
1091
|
|
|
1117
1092
|
Context:
|
|
1118
|
-
{context}
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
const chain = prompt.pipe(chat);
|
|
1122
|
-
const response = await chain.invoke({ context, question: query });
|
|
1093
|
+
${context}`,
|
|
1094
|
+
prompt: query
|
|
1095
|
+
});
|
|
1123
1096
|
ctx.body = {
|
|
1124
|
-
text
|
|
1097
|
+
text,
|
|
1125
1098
|
sourceDocuments: ytResults.map((r) => ({
|
|
1126
1099
|
pageContent: r.chunkText,
|
|
1127
1100
|
metadata: { id: r.videoId, title: r.title, deepLink: r.deepLink }
|
|
@@ -1333,7 +1306,7 @@ const ytController = ({ strapi }) => ({
|
|
|
1333
1306
|
}
|
|
1334
1307
|
const result = await strapi.plugin(PLUGIN_ID).service("ytEmbeddings").search(q, {
|
|
1335
1308
|
limit: limit ? parseInt(limit, 10) : 5,
|
|
1336
|
-
minSimilarity: minSimilarity ? parseFloat(minSimilarity) : 0.
|
|
1309
|
+
minSimilarity: minSimilarity ? parseFloat(minSimilarity) : 0.3,
|
|
1337
1310
|
videoId,
|
|
1338
1311
|
topics: topics ? topics.split(",") : void 0,
|
|
1339
1312
|
contextWindowSeconds: contextWindowSeconds ? parseInt(contextWindowSeconds, 10) : 30
|
|
@@ -1572,7 +1545,7 @@ function formatTime$2(seconds) {
|
|
|
1572
1545
|
}
|
|
1573
1546
|
const searchYtKnowledgeTool = {
|
|
1574
1547
|
name: "searchYtKnowledge",
|
|
1575
|
-
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary.",
|
|
1548
|
+
description: "Semantically search YouTube video transcripts. Returns relevant passages with timestamps, deep links, video topics, and summary. IMPORTANT: After receiving results, use the contextText to directly answer the user's question. Cite the video title, timestamp, and deep link. Do not just list results — synthesize an answer from the transcript content.",
|
|
1576
1549
|
schema: SearchYtKnowledgeSchema,
|
|
1577
1550
|
execute: async (args, strapi) => {
|
|
1578
1551
|
const validated = SearchYtKnowledgeSchema.parse(args);
|
|
@@ -1791,15 +1764,14 @@ const metadataSchema = z.object({
|
|
|
1791
1764
|
language: z.string().default("en")
|
|
1792
1765
|
});
|
|
1793
1766
|
async function extractVideoMetadata(title, fullTranscript, durationSeconds, openAIApiKey) {
|
|
1794
|
-
const
|
|
1795
|
-
modelName: "gpt-4o-mini",
|
|
1796
|
-
temperature: 0,
|
|
1797
|
-
openAIApiKey
|
|
1798
|
-
});
|
|
1799
|
-
const structured = llm.withStructuredOutput(metadataSchema);
|
|
1767
|
+
const openai = createOpenAI({ apiKey: openAIApiKey });
|
|
1800
1768
|
const words = fullTranscript.split(/\s+/);
|
|
1801
1769
|
const sample = words.length > 4e3 ? [...words.slice(0, 2e3), "...", ...words.slice(-2e3)].join(" ") : fullTranscript;
|
|
1802
|
-
const
|
|
1770
|
+
const { object } = await generateObject({
|
|
1771
|
+
model: openai("gpt-4o-mini"),
|
|
1772
|
+
schema: metadataSchema,
|
|
1773
|
+
temperature: 0,
|
|
1774
|
+
prompt: `
|
|
1803
1775
|
Video title: "${title}"
|
|
1804
1776
|
Duration: ${Math.floor(durationSeconds / 60)} minutes
|
|
1805
1777
|
|
|
@@ -1813,12 +1785,13 @@ Extract:
|
|
|
1813
1785
|
- summary: 2-3 sentences describing what the video teaches or argues
|
|
1814
1786
|
- keyMoments: the 5-8 most important moments, with approximate start time in seconds
|
|
1815
1787
|
- language: ISO 639-1 language code of the transcript
|
|
1816
|
-
|
|
1788
|
+
`.trim()
|
|
1789
|
+
});
|
|
1817
1790
|
return {
|
|
1818
|
-
topics:
|
|
1819
|
-
summary:
|
|
1820
|
-
keyMoments:
|
|
1821
|
-
language:
|
|
1791
|
+
topics: object.topics ?? [],
|
|
1792
|
+
summary: object.summary ?? "",
|
|
1793
|
+
keyMoments: object.keyMoments ?? [],
|
|
1794
|
+
language: object.language ?? "en"
|
|
1822
1795
|
};
|
|
1823
1796
|
}
|
|
1824
1797
|
function computeContentHash(content) {
|
|
@@ -1828,9 +1801,9 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1828
1801
|
// ── Ingest a single transcript ──────────────────────────────────────────────
|
|
1829
1802
|
async embedTranscript(transcript) {
|
|
1830
1803
|
const pool = pluginManager.getPool();
|
|
1831
|
-
const
|
|
1832
|
-
const
|
|
1833
|
-
if (!pool || !
|
|
1804
|
+
const embeddingModel = pluginManager.getEmbeddingModel_();
|
|
1805
|
+
const embeddingModelName = pluginManager.getEmbeddingModelName();
|
|
1806
|
+
if (!pool || !embeddingModel) {
|
|
1834
1807
|
throw new Error("[yt-embed] Plugin manager not initialized");
|
|
1835
1808
|
}
|
|
1836
1809
|
const contentHash = computeContentHash(transcript.fullTranscript);
|
|
@@ -1861,7 +1834,7 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1861
1834
|
transcript.title,
|
|
1862
1835
|
durationSeconds,
|
|
1863
1836
|
contentHash,
|
|
1864
|
-
|
|
1837
|
+
embeddingModelName
|
|
1865
1838
|
]
|
|
1866
1839
|
);
|
|
1867
1840
|
try {
|
|
@@ -1897,7 +1870,7 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1897
1870
|
strapi.log.info(`[yt-embed] ${transcript.title} — no chunks (empty transcript)`);
|
|
1898
1871
|
return { videoId: transcript.videoId, chunkCount: 0, skipped: false };
|
|
1899
1872
|
}
|
|
1900
|
-
const embeddingVectors = await
|
|
1873
|
+
const { embeddings: embeddingVectors } = await embedMany({ model: embeddingModel, values: chunks.map((c) => c.text) });
|
|
1901
1874
|
const insertedIds = [];
|
|
1902
1875
|
for (let i = 0; i < chunks.length; i++) {
|
|
1903
1876
|
const chunk = chunks[i];
|
|
@@ -1948,12 +1921,14 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1948
1921
|
// ── Semantic search with context expansion ──────────────────────────────────
|
|
1949
1922
|
async search(query, options = {}) {
|
|
1950
1923
|
const pool = pluginManager.getPool();
|
|
1951
|
-
const
|
|
1952
|
-
if (!pool || !
|
|
1924
|
+
const embeddingModel = pluginManager.getEmbeddingModel_();
|
|
1925
|
+
if (!pool || !embeddingModel) {
|
|
1953
1926
|
throw new Error("[yt-embed] Plugin manager not initialized");
|
|
1954
1927
|
}
|
|
1955
1928
|
const { limit = 5, minSimilarity = 0.2, contextWindowSeconds = 30 } = options;
|
|
1956
|
-
|
|
1929
|
+
console.log(`[yt-embed search] Embedding query: "${query}" with model`);
|
|
1930
|
+
const { embedding: queryVector } = await embed({ model: embeddingModel, value: query });
|
|
1931
|
+
console.log(`[yt-embed search] Got embedding vector, length: ${queryVector.length}`);
|
|
1957
1932
|
const vectorStr = `[${queryVector.join(",")}]`;
|
|
1958
1933
|
const params = [vectorStr, minSimilarity, limit * 2];
|
|
1959
1934
|
const filters = [];
|
|
@@ -1982,6 +1957,10 @@ const ytEmbeddings = ({ strapi }) => ({
|
|
|
1982
1957
|
ORDER BY vc.embedding <=> $1::vector
|
|
1983
1958
|
LIMIT $3
|
|
1984
1959
|
`, params);
|
|
1960
|
+
console.log(`[yt-embed search] Query returned ${rows.rows.length} rows (minSimilarity: ${minSimilarity})`);
|
|
1961
|
+
if (rows.rows.length > 0) {
|
|
1962
|
+
console.log(`[yt-embed search] Top similarity: ${rows.rows[0].similarity}`);
|
|
1963
|
+
}
|
|
1985
1964
|
if (!rows.rows.length) return [];
|
|
1986
1965
|
const seen = /* @__PURE__ */ new Set();
|
|
1987
1966
|
const deduped = rows.rows.filter((row) => {
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { Document } from "@langchain/core/documents";
|
|
1
|
+
import type { EmbeddingModel } from "ai";
|
|
3
2
|
import { Pool } from "pg";
|
|
4
3
|
import { type EmbeddingModelName } from "./config";
|
|
5
4
|
interface PluginConfig {
|
|
@@ -20,24 +19,29 @@ interface CreateEmbeddingResult {
|
|
|
20
19
|
}
|
|
21
20
|
interface QueryResponse {
|
|
22
21
|
text: string;
|
|
23
|
-
sourceDocuments:
|
|
22
|
+
sourceDocuments: Array<{
|
|
23
|
+
pageContent: string;
|
|
24
|
+
metadata: any;
|
|
25
|
+
}>;
|
|
24
26
|
}
|
|
25
27
|
declare class PluginManager {
|
|
26
|
-
private
|
|
27
|
-
private
|
|
28
|
+
private embeddingModel_;
|
|
29
|
+
private chatModel;
|
|
28
30
|
private pool;
|
|
29
|
-
private
|
|
31
|
+
private embeddingModelName;
|
|
30
32
|
private dimensions;
|
|
31
|
-
private vectorStoreConfig;
|
|
32
33
|
initializePool(connectionString: string): Promise<Pool>;
|
|
33
34
|
private initializeVectorTable;
|
|
34
|
-
initializeEmbeddings
|
|
35
|
-
initializeChat
|
|
35
|
+
private initializeEmbeddings;
|
|
36
|
+
private initializeChat;
|
|
36
37
|
initialize(config: PluginConfig): Promise<void>;
|
|
37
38
|
createEmbedding(docData: EmbeddingDocument): Promise<CreateEmbeddingResult>;
|
|
38
39
|
deleteEmbedding(strapiId: string): Promise<void>;
|
|
39
40
|
queryEmbedding(query: string): Promise<QueryResponse>;
|
|
40
|
-
similaritySearch(query: string, k?: number): Promise<
|
|
41
|
+
similaritySearch(query: string, k?: number): Promise<Array<{
|
|
42
|
+
pageContent: string;
|
|
43
|
+
metadata: any;
|
|
44
|
+
}>>;
|
|
41
45
|
isInitialized(): boolean;
|
|
42
46
|
/**
|
|
43
47
|
* Get all embeddings from Neon DB
|
|
@@ -56,9 +60,8 @@ declare class PluginManager {
|
|
|
56
60
|
*/
|
|
57
61
|
deleteNeonEmbeddingById(neonId: string): Promise<void>;
|
|
58
62
|
getPool(): Pool | null;
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
getChat(): ChatOpenAI | null;
|
|
63
|
+
getEmbeddingModel_(): EmbeddingModel<string> | null;
|
|
64
|
+
getEmbeddingModelName(): EmbeddingModelName;
|
|
62
65
|
destroy(): Promise<void>;
|
|
63
66
|
/**
|
|
64
67
|
* Clear all embeddings from Neon DB
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "yt-embeddings-strapi-plugin",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "Strapi v5 plugin for vector embeddings with OpenAI and Neon PostgreSQL. Enables semantic search, RAG chat, and MCP (Model Context Protocol) integration.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"strapi",
|
|
@@ -25,9 +25,6 @@
|
|
|
25
25
|
"url": "https://github.com/PaulBratslavsky/strapi-content-embeddings/issues"
|
|
26
26
|
},
|
|
27
27
|
"type": "commonjs",
|
|
28
|
-
"overrides": {
|
|
29
|
-
"@langchain/core": "^1.1.31"
|
|
30
|
-
},
|
|
31
28
|
"exports": {
|
|
32
29
|
"./package.json": "./package.json",
|
|
33
30
|
"./strapi-admin": {
|
|
@@ -58,15 +55,13 @@
|
|
|
58
55
|
"watch:link": "strapi-plugin watch:link"
|
|
59
56
|
},
|
|
60
57
|
"dependencies": {
|
|
61
|
-
"@
|
|
62
|
-
"@langchain/core": "^1.1.31",
|
|
63
|
-
"@langchain/openai": "^1.2.1",
|
|
58
|
+
"@ai-sdk/openai": "^1.x",
|
|
64
59
|
"@mdxeditor/editor": "^3.52.3",
|
|
65
60
|
"@modelcontextprotocol/sdk": "^1.12.0",
|
|
66
61
|
"@strapi/design-system": "^2.0.0-rc.12",
|
|
67
62
|
"@strapi/icons": "^2.0.0-rc.12",
|
|
63
|
+
"ai": "^4.x",
|
|
68
64
|
"html-to-text": "^9.0.5",
|
|
69
|
-
"langchain": "^1.2.4",
|
|
70
65
|
"pg": "^8.13.1",
|
|
71
66
|
"remove-markdown": "^0.5.5",
|
|
72
67
|
"qs": "^6.13.1",
|