yt-embeddings-strapi-plugin 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,6 @@
1
1
  "use strict";
2
- const openai = require("@langchain/openai");
3
- const pgvector = require("@langchain/community/vectorstores/pgvector");
4
- const output_parsers = require("@langchain/core/output_parsers");
5
- const prompts = require("@langchain/core/prompts");
6
- const runnables = require("@langchain/core/runnables");
2
+ const ai = require("ai");
3
+ const openai = require("@ai-sdk/openai");
7
4
  const pg = require("pg");
8
5
  const index_js = require("@modelcontextprotocol/sdk/server/index.js");
9
6
  const types_js = require("@modelcontextprotocol/sdk/types.js");
@@ -69,12 +66,11 @@ const config = {
69
66
  };
70
67
  class PluginManager {
71
68
  constructor() {
72
- this.embeddings = null;
73
- this.chat = null;
69
+ this.embeddingModel_ = null;
70
+ this.chatModel = null;
74
71
  this.pool = null;
75
- this.embeddingModel = "text-embedding-3-small";
72
+ this.embeddingModelName = "text-embedding-3-small";
76
73
  this.dimensions = 1536;
77
- this.vectorStoreConfig = null;
78
74
  }
79
75
  async initializePool(connectionString) {
80
76
  console.log("Initializing Neon DB Pool");
@@ -130,74 +126,47 @@ class PluginManager {
130
126
  client.release();
131
127
  }
132
128
  }
133
- async initializeEmbeddings(openAIApiKey) {
134
- console.log(`Initializing OpenAI Embeddings (model: ${this.embeddingModel})`);
135
- if (this.embeddings) return this.embeddings;
136
- try {
137
- this.embeddings = new openai.OpenAIEmbeddings({
138
- openAIApiKey,
139
- modelName: this.embeddingModel,
140
- dimensions: this.dimensions
141
- });
142
- return this.embeddings;
143
- } catch (error) {
144
- console.error(`Failed to initialize Embeddings: ${error}`);
145
- throw new Error(`Failed to initialize Embeddings: ${error}`);
146
- }
129
+ initializeEmbeddings(openai2) {
130
+ console.log(`Initializing OpenAI Embeddings (model: ${this.embeddingModelName})`);
131
+ if (this.embeddingModel_) return;
132
+ this.embeddingModel_ = openai2.embedding(this.embeddingModelName, {
133
+ dimensions: this.dimensions
134
+ });
147
135
  }
148
- async initializeChat(openAIApiKey) {
136
+ initializeChat(openai2) {
149
137
  console.log("Initializing Chat Model");
150
- if (this.chat) return this.chat;
151
- try {
152
- this.chat = new openai.ChatOpenAI({
153
- modelName: "gpt-4o-mini",
154
- temperature: 0.7,
155
- openAIApiKey
156
- });
157
- return this.chat;
158
- } catch (error) {
159
- console.error(`Failed to initialize Chat: ${error}`);
160
- throw new Error(`Failed to initialize Chat: ${error}`);
161
- }
138
+ if (this.chatModel) return;
139
+ this.chatModel = openai2("gpt-4o-mini");
162
140
  }
163
141
  async initialize(config2) {
164
142
  const model = config2.embeddingModel || "text-embedding-3-small";
165
143
  if (EMBEDDING_MODELS[model]) {
166
- this.embeddingModel = model;
144
+ this.embeddingModelName = model;
167
145
  this.dimensions = EMBEDDING_MODELS[model].dimensions;
168
146
  } else {
169
147
  console.warn(`Invalid embedding model "${model}", using default`);
170
- this.embeddingModel = "text-embedding-3-small";
148
+ this.embeddingModelName = "text-embedding-3-small";
171
149
  this.dimensions = EMBEDDING_MODELS["text-embedding-3-small"].dimensions;
172
150
  }
173
- console.log(`Using embedding model: ${this.embeddingModel} (${this.dimensions} dimensions)`);
151
+ console.log(`Using embedding model: ${this.embeddingModelName} (${this.dimensions} dimensions)`);
174
152
  await this.initializePool(config2.neonConnectionString);
175
- await this.initializeEmbeddings(config2.openAIApiKey);
176
- await this.initializeChat(config2.openAIApiKey);
177
- if (this.pool) {
178
- this.vectorStoreConfig = {
179
- pool: this.pool,
180
- tableName: "embeddings_documents",
181
- columns: {
182
- idColumnName: "id",
183
- vectorColumnName: "embedding",
184
- contentColumnName: "content",
185
- metadataColumnName: "metadata"
186
- },
187
- distanceStrategy: "cosine"
188
- };
189
- }
153
+ const openai$1 = openai.createOpenAI({ apiKey: config2.openAIApiKey });
154
+ this.initializeEmbeddings(openai$1);
155
+ this.initializeChat(openai$1);
190
156
  console.log("Plugin Manager Initialization Complete");
191
157
  }
192
158
  async createEmbedding(docData) {
193
- if (!this.embeddings || !this.vectorStoreConfig || !this.pool) {
159
+ if (!this.embeddingModel_ || !this.pool) {
194
160
  throw new Error("Plugin manager not initialized");
195
161
  }
196
162
  const maxRetries = 3;
197
163
  const retryDelay = 2e3;
198
164
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
199
165
  try {
200
- const embeddingVector = await this.embeddings.embedQuery(docData.content);
166
+ const { embedding: embeddingVector } = await ai.embed({
167
+ model: this.embeddingModel_,
168
+ value: docData.content
169
+ });
201
170
  const metadata = {
202
171
  id: docData.id,
203
172
  title: docData.title,
@@ -246,58 +215,56 @@ class PluginManager {
246
215
  }
247
216
  }
248
217
  async queryEmbedding(query) {
249
- if (!this.embeddings || !this.chat || !this.vectorStoreConfig) {
218
+ if (!this.embeddingModel_ || !this.chatModel || !this.pool) {
250
219
  throw new Error("Plugin manager not initialized");
251
220
  }
252
221
  try {
253
- const vectorStore = await pgvector.PGVectorStore.initialize(
254
- this.embeddings,
255
- this.vectorStoreConfig
256
- );
257
- const resultsWithScores = await vectorStore.similaritySearchWithScore(query, 6);
222
+ const { embedding: queryVector } = await ai.embed({
223
+ model: this.embeddingModel_,
224
+ value: query
225
+ });
226
+ const vectorStr = `[${queryVector.join(",")}]`;
227
+ const results = await this.pool.query(`
228
+ SELECT
229
+ content,
230
+ metadata,
231
+ 1 - (embedding <=> $1::vector) AS similarity
232
+ FROM embeddings_documents
233
+ WHERE 1 - (embedding <=> $1::vector) > 0
234
+ ORDER BY embedding <=> $1::vector
235
+ LIMIT 6
236
+ `, [vectorStr]);
258
237
  console.log(`[queryEmbedding] Query: "${query}"`);
259
- console.log(`[queryEmbedding] Found ${resultsWithScores.length} results:`);
260
- resultsWithScores.forEach(([doc, score], i) => {
261
- console.log(` ${i + 1}. Score: ${score.toFixed(4)}, Title: ${doc.metadata?.title || "N/A"}`);
238
+ console.log(`[queryEmbedding] Found ${results.rows.length} results:`);
239
+ results.rows.forEach((row, i) => {
240
+ console.log(` ${i + 1}. Score: ${row.similarity.toFixed(4)}, Title: ${row.metadata?.title || "N/A"}`);
262
241
  });
263
242
  const SIMILARITY_THRESHOLD = 1;
264
- const relevantResults = resultsWithScores.filter(([_, score]) => score < SIMILARITY_THRESHOLD);
243
+ const relevantResults = results.rows.filter((row) => row.similarity < SIMILARITY_THRESHOLD);
265
244
  console.log(`[queryEmbedding] ${relevantResults.length} results passed threshold (< ${SIMILARITY_THRESHOLD})`);
266
245
  const topResults = relevantResults.slice(0, 3);
267
- const sourceDocuments = topResults.map(([doc]) => doc);
268
- const bestMatchForDisplay = topResults.length > 0 ? [topResults[0][0]] : [];
269
- const formatDocs = (docs) => {
270
- return docs.map((doc) => {
271
- const title = doc.metadata?.title ? `Title: ${doc.metadata.title}
246
+ const sourceDocuments = topResults.map((row) => ({
247
+ pageContent: row.content,
248
+ metadata: row.metadata
249
+ }));
250
+ const bestMatchForDisplay = topResults.length > 0 ? [{ pageContent: topResults[0].content, metadata: topResults[0].metadata }] : [];
251
+ const context = sourceDocuments.map((doc) => {
252
+ const title = doc.metadata?.title ? `Title: ${doc.metadata.title}
272
253
  ` : "";
273
- return `${title}${doc.pageContent}`;
274
- }).join("\n\n");
275
- };
276
- const ragPrompt = prompts.ChatPromptTemplate.fromMessages([
277
- [
278
- "system",
279
- `You are a helpful assistant that answers questions based on the provided context.
254
+ return `${title}${doc.pageContent}`;
255
+ }).join("\n\n");
256
+ const { text } = await ai.generateText({
257
+ model: this.chatModel,
258
+ system: `You are a helpful assistant that answers questions based on the provided context.
280
259
  If you cannot find the answer in the context, say so. Be concise and accurate.
281
260
 
282
261
  Context:
283
- {context}`
284
- ],
285
- ["human", "{question}"]
286
- ]);
287
- const ragChain = runnables.RunnableSequence.from([
288
- {
289
- context: async () => formatDocs(sourceDocuments),
290
- question: new runnables.RunnablePassthrough()
291
- },
292
- ragPrompt,
293
- this.chat,
294
- new output_parsers.StringOutputParser()
295
- ]);
296
- const text = await ragChain.invoke(query);
262
+ ${context}`,
263
+ prompt: query
264
+ });
297
265
  return {
298
266
  text,
299
267
  sourceDocuments: bestMatchForDisplay
300
- // Only return best match to display
301
268
  };
302
269
  } catch (error) {
303
270
  console.error(`Failed to query embeddings: ${error}`);
@@ -305,22 +272,32 @@ Context:
305
272
  }
306
273
  }
307
274
  async similaritySearch(query, k = 4) {
308
- if (!this.embeddings || !this.vectorStoreConfig) {
275
+ if (!this.embeddingModel_ || !this.pool) {
309
276
  throw new Error("Plugin manager not initialized");
310
277
  }
311
278
  try {
312
- const vectorStore = await pgvector.PGVectorStore.initialize(
313
- this.embeddings,
314
- this.vectorStoreConfig
315
- );
316
- return await vectorStore.similaritySearch(query, k);
279
+ const { embedding: queryVector } = await ai.embed({
280
+ model: this.embeddingModel_,
281
+ value: query
282
+ });
283
+ const vectorStr = `[${queryVector.join(",")}]`;
284
+ const results = await this.pool.query(`
285
+ SELECT content, metadata
286
+ FROM embeddings_documents
287
+ ORDER BY embedding <=> $1::vector
288
+ LIMIT $2
289
+ `, [vectorStr, k]);
290
+ return results.rows.map((row) => ({
291
+ pageContent: row.content,
292
+ metadata: row.metadata
293
+ }));
317
294
  } catch (error) {
318
295
  console.error(`Failed to perform similarity search: ${error}`);
319
296
  throw new Error(`Failed to perform similarity search: ${error}`);
320
297
  }
321
298
  }
322
299
  isInitialized() {
323
- return !!(this.embeddings && this.chat && this.pool);
300
+ return !!(this.embeddingModel_ && this.chatModel && this.pool);
324
301
  }
325
302
  /**
326
303
  * Get all embeddings from Neon DB
@@ -375,23 +352,19 @@ Context:
375
352
  getPool() {
376
353
  return this.pool;
377
354
  }
378
- getEmbeddings() {
379
- return this.embeddings;
355
+ getEmbeddingModel_() {
356
+ return this.embeddingModel_;
380
357
  }
381
- getEmbeddingModel() {
382
- return this.embeddingModel;
383
- }
384
- getChat() {
385
- return this.chat;
358
+ getEmbeddingModelName() {
359
+ return this.embeddingModelName;
386
360
  }
387
361
  async destroy() {
388
362
  if (this.pool) {
389
363
  await this.pool.end();
390
364
  this.pool = null;
391
365
  }
392
- this.embeddings = null;
393
- this.chat = null;
394
- this.vectorStoreConfig = null;
366
+ this.embeddingModel_ = null;
367
+ this.chatModel = null;
395
368
  }
396
369
  /**
397
370
  * Clear all embeddings from Neon DB
@@ -1116,8 +1089,8 @@ Topics: ${(r.topics || []).join(", ")}
1116
1089
 
1117
1090
  ${r.contextText || r.chunkText}`
1118
1091
  ).join("\n\n---\n\n");
1119
- const chat = pluginManager.getChat();
1120
- if (!chat) {
1092
+ const config2 = strapi.config.get("plugin::yt-embeddings-strapi-plugin");
1093
+ if (!config2?.openAIApiKey) {
1121
1094
  ctx.body = {
1122
1095
  text: ytResults.map((r) => `**${r.title}** (${r.deepLink})
1123
1096
  ${r.chunkText}`).join("\n\n"),
@@ -1128,19 +1101,19 @@ ${r.chunkText}`).join("\n\n"),
1128
1101
  };
1129
1102
  return;
1130
1103
  }
1131
- const prompt = prompts.ChatPromptTemplate.fromMessages([
1132
- ["system", `You are a helpful assistant that answers questions based on YouTube transcript content.
1104
+ const openai$1 = openai.createOpenAI({ apiKey: config2.openAIApiKey });
1105
+ const { text } = await ai.generateText({
1106
+ model: openai$1("gpt-4o-mini"),
1107
+ system: `You are a helpful assistant that answers questions based on YouTube transcript content.
1133
1108
  Include timestamps and video links when relevant. Be concise and accurate.
1134
1109
  If you cannot find the answer in the context, say so.
1135
1110
 
1136
1111
  Context:
1137
- {context}`],
1138
- ["human", "{question}"]
1139
- ]);
1140
- const chain = prompt.pipe(chat);
1141
- const response = await chain.invoke({ context, question: query });
1112
+ ${context}`,
1113
+ prompt: query
1114
+ });
1142
1115
  ctx.body = {
1143
- text: response.content,
1116
+ text,
1144
1117
  sourceDocuments: ytResults.map((r) => ({
1145
1118
  pageContent: r.chunkText,
1146
1119
  metadata: { id: r.videoId, title: r.title, deepLink: r.deepLink }
@@ -1810,15 +1783,14 @@ const metadataSchema = zod.z.object({
1810
1783
  language: zod.z.string().default("en")
1811
1784
  });
1812
1785
  async function extractVideoMetadata(title, fullTranscript, durationSeconds, openAIApiKey) {
1813
- const llm = new openai.ChatOpenAI({
1814
- modelName: "gpt-4o-mini",
1815
- temperature: 0,
1816
- openAIApiKey
1817
- });
1818
- const structured = llm.withStructuredOutput(metadataSchema);
1786
+ const openai$1 = openai.createOpenAI({ apiKey: openAIApiKey });
1819
1787
  const words = fullTranscript.split(/\s+/);
1820
1788
  const sample = words.length > 4e3 ? [...words.slice(0, 2e3), "...", ...words.slice(-2e3)].join(" ") : fullTranscript;
1821
- const result = await structured.invoke(`
1789
+ const { object } = await ai.generateObject({
1790
+ model: openai$1("gpt-4o-mini"),
1791
+ schema: metadataSchema,
1792
+ temperature: 0,
1793
+ prompt: `
1822
1794
  Video title: "${title}"
1823
1795
  Duration: ${Math.floor(durationSeconds / 60)} minutes
1824
1796
 
@@ -1832,12 +1804,13 @@ Extract:
1832
1804
  - summary: 2-3 sentences describing what the video teaches or argues
1833
1805
  - keyMoments: the 5-8 most important moments, with approximate start time in seconds
1834
1806
  - language: ISO 639-1 language code of the transcript
1835
- `.trim());
1807
+ `.trim()
1808
+ });
1836
1809
  return {
1837
- topics: result.topics ?? [],
1838
- summary: result.summary ?? "",
1839
- keyMoments: result.keyMoments ?? [],
1840
- language: result.language ?? "en"
1810
+ topics: object.topics ?? [],
1811
+ summary: object.summary ?? "",
1812
+ keyMoments: object.keyMoments ?? [],
1813
+ language: object.language ?? "en"
1841
1814
  };
1842
1815
  }
1843
1816
  function computeContentHash(content) {
@@ -1847,9 +1820,9 @@ const ytEmbeddings = ({ strapi }) => ({
1847
1820
  // ── Ingest a single transcript ──────────────────────────────────────────────
1848
1821
  async embedTranscript(transcript) {
1849
1822
  const pool = pluginManager.getPool();
1850
- const embeddings = pluginManager.getEmbeddings();
1851
- const embeddingModel = pluginManager.getEmbeddingModel();
1852
- if (!pool || !embeddings) {
1823
+ const embeddingModel = pluginManager.getEmbeddingModel_();
1824
+ const embeddingModelName = pluginManager.getEmbeddingModelName();
1825
+ if (!pool || !embeddingModel) {
1853
1826
  throw new Error("[yt-embed] Plugin manager not initialized");
1854
1827
  }
1855
1828
  const contentHash = computeContentHash(transcript.fullTranscript);
@@ -1880,7 +1853,7 @@ const ytEmbeddings = ({ strapi }) => ({
1880
1853
  transcript.title,
1881
1854
  durationSeconds,
1882
1855
  contentHash,
1883
- embeddingModel
1856
+ embeddingModelName
1884
1857
  ]
1885
1858
  );
1886
1859
  try {
@@ -1916,7 +1889,7 @@ const ytEmbeddings = ({ strapi }) => ({
1916
1889
  strapi.log.info(`[yt-embed] ${transcript.title} — no chunks (empty transcript)`);
1917
1890
  return { videoId: transcript.videoId, chunkCount: 0, skipped: false };
1918
1891
  }
1919
- const embeddingVectors = await embeddings.embedDocuments(chunks.map((c) => c.text));
1892
+ const { embeddings: embeddingVectors } = await ai.embedMany({ model: embeddingModel, values: chunks.map((c) => c.text) });
1920
1893
  const insertedIds = [];
1921
1894
  for (let i = 0; i < chunks.length; i++) {
1922
1895
  const chunk = chunks[i];
@@ -1967,12 +1940,12 @@ const ytEmbeddings = ({ strapi }) => ({
1967
1940
  // ── Semantic search with context expansion ──────────────────────────────────
1968
1941
  async search(query, options = {}) {
1969
1942
  const pool = pluginManager.getPool();
1970
- const embeddingsClient = pluginManager.getEmbeddings();
1971
- if (!pool || !embeddingsClient) {
1943
+ const embeddingModel = pluginManager.getEmbeddingModel_();
1944
+ if (!pool || !embeddingModel) {
1972
1945
  throw new Error("[yt-embed] Plugin manager not initialized");
1973
1946
  }
1974
1947
  const { limit = 5, minSimilarity = 0.2, contextWindowSeconds = 30 } = options;
1975
- const queryVector = await embeddingsClient.embedQuery(query);
1948
+ const { embedding: queryVector } = await ai.embed({ model: embeddingModel, value: query });
1976
1949
  const vectorStr = `[${queryVector.join(",")}]`;
1977
1950
  const params = [vectorStr, minSimilarity, limit * 2];
1978
1951
  const filters = [];
@@ -1,8 +1,5 @@
1
- import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai";
2
- import { PGVectorStore } from "@langchain/community/vectorstores/pgvector";
3
- import { StringOutputParser } from "@langchain/core/output_parsers";
4
- import { ChatPromptTemplate } from "@langchain/core/prompts";
5
- import { RunnableSequence, RunnablePassthrough } from "@langchain/core/runnables";
1
+ import { embed, generateText, generateObject, embedMany } from "ai";
2
+ import { createOpenAI } from "@ai-sdk/openai";
6
3
  import { Pool } from "pg";
7
4
  import { Server } from "@modelcontextprotocol/sdk/server/index.js";
8
5
  import { ListToolsRequestSchema, CallToolRequestSchema } from "@modelcontextprotocol/sdk/types.js";
@@ -50,12 +47,11 @@ const config = {
50
47
  };
51
48
  class PluginManager {
52
49
  constructor() {
53
- this.embeddings = null;
54
- this.chat = null;
50
+ this.embeddingModel_ = null;
51
+ this.chatModel = null;
55
52
  this.pool = null;
56
- this.embeddingModel = "text-embedding-3-small";
53
+ this.embeddingModelName = "text-embedding-3-small";
57
54
  this.dimensions = 1536;
58
- this.vectorStoreConfig = null;
59
55
  }
60
56
  async initializePool(connectionString) {
61
57
  console.log("Initializing Neon DB Pool");
@@ -111,74 +107,47 @@ class PluginManager {
111
107
  client.release();
112
108
  }
113
109
  }
114
- async initializeEmbeddings(openAIApiKey) {
115
- console.log(`Initializing OpenAI Embeddings (model: ${this.embeddingModel})`);
116
- if (this.embeddings) return this.embeddings;
117
- try {
118
- this.embeddings = new OpenAIEmbeddings({
119
- openAIApiKey,
120
- modelName: this.embeddingModel,
121
- dimensions: this.dimensions
122
- });
123
- return this.embeddings;
124
- } catch (error) {
125
- console.error(`Failed to initialize Embeddings: ${error}`);
126
- throw new Error(`Failed to initialize Embeddings: ${error}`);
127
- }
110
+ initializeEmbeddings(openai) {
111
+ console.log(`Initializing OpenAI Embeddings (model: ${this.embeddingModelName})`);
112
+ if (this.embeddingModel_) return;
113
+ this.embeddingModel_ = openai.embedding(this.embeddingModelName, {
114
+ dimensions: this.dimensions
115
+ });
128
116
  }
129
- async initializeChat(openAIApiKey) {
117
+ initializeChat(openai) {
130
118
  console.log("Initializing Chat Model");
131
- if (this.chat) return this.chat;
132
- try {
133
- this.chat = new ChatOpenAI({
134
- modelName: "gpt-4o-mini",
135
- temperature: 0.7,
136
- openAIApiKey
137
- });
138
- return this.chat;
139
- } catch (error) {
140
- console.error(`Failed to initialize Chat: ${error}`);
141
- throw new Error(`Failed to initialize Chat: ${error}`);
142
- }
119
+ if (this.chatModel) return;
120
+ this.chatModel = openai("gpt-4o-mini");
143
121
  }
144
122
  async initialize(config2) {
145
123
  const model = config2.embeddingModel || "text-embedding-3-small";
146
124
  if (EMBEDDING_MODELS[model]) {
147
- this.embeddingModel = model;
125
+ this.embeddingModelName = model;
148
126
  this.dimensions = EMBEDDING_MODELS[model].dimensions;
149
127
  } else {
150
128
  console.warn(`Invalid embedding model "${model}", using default`);
151
- this.embeddingModel = "text-embedding-3-small";
129
+ this.embeddingModelName = "text-embedding-3-small";
152
130
  this.dimensions = EMBEDDING_MODELS["text-embedding-3-small"].dimensions;
153
131
  }
154
- console.log(`Using embedding model: ${this.embeddingModel} (${this.dimensions} dimensions)`);
132
+ console.log(`Using embedding model: ${this.embeddingModelName} (${this.dimensions} dimensions)`);
155
133
  await this.initializePool(config2.neonConnectionString);
156
- await this.initializeEmbeddings(config2.openAIApiKey);
157
- await this.initializeChat(config2.openAIApiKey);
158
- if (this.pool) {
159
- this.vectorStoreConfig = {
160
- pool: this.pool,
161
- tableName: "embeddings_documents",
162
- columns: {
163
- idColumnName: "id",
164
- vectorColumnName: "embedding",
165
- contentColumnName: "content",
166
- metadataColumnName: "metadata"
167
- },
168
- distanceStrategy: "cosine"
169
- };
170
- }
134
+ const openai = createOpenAI({ apiKey: config2.openAIApiKey });
135
+ this.initializeEmbeddings(openai);
136
+ this.initializeChat(openai);
171
137
  console.log("Plugin Manager Initialization Complete");
172
138
  }
173
139
  async createEmbedding(docData) {
174
- if (!this.embeddings || !this.vectorStoreConfig || !this.pool) {
140
+ if (!this.embeddingModel_ || !this.pool) {
175
141
  throw new Error("Plugin manager not initialized");
176
142
  }
177
143
  const maxRetries = 3;
178
144
  const retryDelay = 2e3;
179
145
  for (let attempt = 1; attempt <= maxRetries; attempt++) {
180
146
  try {
181
- const embeddingVector = await this.embeddings.embedQuery(docData.content);
147
+ const { embedding: embeddingVector } = await embed({
148
+ model: this.embeddingModel_,
149
+ value: docData.content
150
+ });
182
151
  const metadata = {
183
152
  id: docData.id,
184
153
  title: docData.title,
@@ -227,58 +196,56 @@ class PluginManager {
227
196
  }
228
197
  }
229
198
  async queryEmbedding(query) {
230
- if (!this.embeddings || !this.chat || !this.vectorStoreConfig) {
199
+ if (!this.embeddingModel_ || !this.chatModel || !this.pool) {
231
200
  throw new Error("Plugin manager not initialized");
232
201
  }
233
202
  try {
234
- const vectorStore = await PGVectorStore.initialize(
235
- this.embeddings,
236
- this.vectorStoreConfig
237
- );
238
- const resultsWithScores = await vectorStore.similaritySearchWithScore(query, 6);
203
+ const { embedding: queryVector } = await embed({
204
+ model: this.embeddingModel_,
205
+ value: query
206
+ });
207
+ const vectorStr = `[${queryVector.join(",")}]`;
208
+ const results = await this.pool.query(`
209
+ SELECT
210
+ content,
211
+ metadata,
212
+ 1 - (embedding <=> $1::vector) AS similarity
213
+ FROM embeddings_documents
214
+ WHERE 1 - (embedding <=> $1::vector) > 0
215
+ ORDER BY embedding <=> $1::vector
216
+ LIMIT 6
217
+ `, [vectorStr]);
239
218
  console.log(`[queryEmbedding] Query: "${query}"`);
240
- console.log(`[queryEmbedding] Found ${resultsWithScores.length} results:`);
241
- resultsWithScores.forEach(([doc, score], i) => {
242
- console.log(` ${i + 1}. Score: ${score.toFixed(4)}, Title: ${doc.metadata?.title || "N/A"}`);
219
+ console.log(`[queryEmbedding] Found ${results.rows.length} results:`);
220
+ results.rows.forEach((row, i) => {
221
+ console.log(` ${i + 1}. Score: ${row.similarity.toFixed(4)}, Title: ${row.metadata?.title || "N/A"}`);
243
222
  });
244
223
  const SIMILARITY_THRESHOLD = 1;
245
- const relevantResults = resultsWithScores.filter(([_, score]) => score < SIMILARITY_THRESHOLD);
224
+ const relevantResults = results.rows.filter((row) => row.similarity < SIMILARITY_THRESHOLD);
246
225
  console.log(`[queryEmbedding] ${relevantResults.length} results passed threshold (< ${SIMILARITY_THRESHOLD})`);
247
226
  const topResults = relevantResults.slice(0, 3);
248
- const sourceDocuments = topResults.map(([doc]) => doc);
249
- const bestMatchForDisplay = topResults.length > 0 ? [topResults[0][0]] : [];
250
- const formatDocs = (docs) => {
251
- return docs.map((doc) => {
252
- const title = doc.metadata?.title ? `Title: ${doc.metadata.title}
227
+ const sourceDocuments = topResults.map((row) => ({
228
+ pageContent: row.content,
229
+ metadata: row.metadata
230
+ }));
231
+ const bestMatchForDisplay = topResults.length > 0 ? [{ pageContent: topResults[0].content, metadata: topResults[0].metadata }] : [];
232
+ const context = sourceDocuments.map((doc) => {
233
+ const title = doc.metadata?.title ? `Title: ${doc.metadata.title}
253
234
  ` : "";
254
- return `${title}${doc.pageContent}`;
255
- }).join("\n\n");
256
- };
257
- const ragPrompt = ChatPromptTemplate.fromMessages([
258
- [
259
- "system",
260
- `You are a helpful assistant that answers questions based on the provided context.
235
+ return `${title}${doc.pageContent}`;
236
+ }).join("\n\n");
237
+ const { text } = await generateText({
238
+ model: this.chatModel,
239
+ system: `You are a helpful assistant that answers questions based on the provided context.
261
240
  If you cannot find the answer in the context, say so. Be concise and accurate.
262
241
 
263
242
  Context:
264
- {context}`
265
- ],
266
- ["human", "{question}"]
267
- ]);
268
- const ragChain = RunnableSequence.from([
269
- {
270
- context: async () => formatDocs(sourceDocuments),
271
- question: new RunnablePassthrough()
272
- },
273
- ragPrompt,
274
- this.chat,
275
- new StringOutputParser()
276
- ]);
277
- const text = await ragChain.invoke(query);
243
+ ${context}`,
244
+ prompt: query
245
+ });
278
246
  return {
279
247
  text,
280
248
  sourceDocuments: bestMatchForDisplay
281
- // Only return best match to display
282
249
  };
283
250
  } catch (error) {
284
251
  console.error(`Failed to query embeddings: ${error}`);
@@ -286,22 +253,32 @@ Context:
286
253
  }
287
254
  }
288
255
  async similaritySearch(query, k = 4) {
289
- if (!this.embeddings || !this.vectorStoreConfig) {
256
+ if (!this.embeddingModel_ || !this.pool) {
290
257
  throw new Error("Plugin manager not initialized");
291
258
  }
292
259
  try {
293
- const vectorStore = await PGVectorStore.initialize(
294
- this.embeddings,
295
- this.vectorStoreConfig
296
- );
297
- return await vectorStore.similaritySearch(query, k);
260
+ const { embedding: queryVector } = await embed({
261
+ model: this.embeddingModel_,
262
+ value: query
263
+ });
264
+ const vectorStr = `[${queryVector.join(",")}]`;
265
+ const results = await this.pool.query(`
266
+ SELECT content, metadata
267
+ FROM embeddings_documents
268
+ ORDER BY embedding <=> $1::vector
269
+ LIMIT $2
270
+ `, [vectorStr, k]);
271
+ return results.rows.map((row) => ({
272
+ pageContent: row.content,
273
+ metadata: row.metadata
274
+ }));
298
275
  } catch (error) {
299
276
  console.error(`Failed to perform similarity search: ${error}`);
300
277
  throw new Error(`Failed to perform similarity search: ${error}`);
301
278
  }
302
279
  }
303
280
  isInitialized() {
304
- return !!(this.embeddings && this.chat && this.pool);
281
+ return !!(this.embeddingModel_ && this.chatModel && this.pool);
305
282
  }
306
283
  /**
307
284
  * Get all embeddings from Neon DB
@@ -356,23 +333,19 @@ Context:
356
333
  getPool() {
357
334
  return this.pool;
358
335
  }
359
- getEmbeddings() {
360
- return this.embeddings;
336
+ getEmbeddingModel_() {
337
+ return this.embeddingModel_;
361
338
  }
362
- getEmbeddingModel() {
363
- return this.embeddingModel;
364
- }
365
- getChat() {
366
- return this.chat;
339
+ getEmbeddingModelName() {
340
+ return this.embeddingModelName;
367
341
  }
368
342
  async destroy() {
369
343
  if (this.pool) {
370
344
  await this.pool.end();
371
345
  this.pool = null;
372
346
  }
373
- this.embeddings = null;
374
- this.chat = null;
375
- this.vectorStoreConfig = null;
347
+ this.embeddingModel_ = null;
348
+ this.chatModel = null;
376
349
  }
377
350
  /**
378
351
  * Clear all embeddings from Neon DB
@@ -1097,8 +1070,8 @@ Topics: ${(r.topics || []).join(", ")}
1097
1070
 
1098
1071
  ${r.contextText || r.chunkText}`
1099
1072
  ).join("\n\n---\n\n");
1100
- const chat = pluginManager.getChat();
1101
- if (!chat) {
1073
+ const config2 = strapi.config.get("plugin::yt-embeddings-strapi-plugin");
1074
+ if (!config2?.openAIApiKey) {
1102
1075
  ctx.body = {
1103
1076
  text: ytResults.map((r) => `**${r.title}** (${r.deepLink})
1104
1077
  ${r.chunkText}`).join("\n\n"),
@@ -1109,19 +1082,19 @@ ${r.chunkText}`).join("\n\n"),
1109
1082
  };
1110
1083
  return;
1111
1084
  }
1112
- const prompt = ChatPromptTemplate.fromMessages([
1113
- ["system", `You are a helpful assistant that answers questions based on YouTube transcript content.
1085
+ const openai = createOpenAI({ apiKey: config2.openAIApiKey });
1086
+ const { text } = await generateText({
1087
+ model: openai("gpt-4o-mini"),
1088
+ system: `You are a helpful assistant that answers questions based on YouTube transcript content.
1114
1089
  Include timestamps and video links when relevant. Be concise and accurate.
1115
1090
  If you cannot find the answer in the context, say so.
1116
1091
 
1117
1092
  Context:
1118
- {context}`],
1119
- ["human", "{question}"]
1120
- ]);
1121
- const chain = prompt.pipe(chat);
1122
- const response = await chain.invoke({ context, question: query });
1093
+ ${context}`,
1094
+ prompt: query
1095
+ });
1123
1096
  ctx.body = {
1124
- text: response.content,
1097
+ text,
1125
1098
  sourceDocuments: ytResults.map((r) => ({
1126
1099
  pageContent: r.chunkText,
1127
1100
  metadata: { id: r.videoId, title: r.title, deepLink: r.deepLink }
@@ -1791,15 +1764,14 @@ const metadataSchema = z.object({
1791
1764
  language: z.string().default("en")
1792
1765
  });
1793
1766
  async function extractVideoMetadata(title, fullTranscript, durationSeconds, openAIApiKey) {
1794
- const llm = new ChatOpenAI({
1795
- modelName: "gpt-4o-mini",
1796
- temperature: 0,
1797
- openAIApiKey
1798
- });
1799
- const structured = llm.withStructuredOutput(metadataSchema);
1767
+ const openai = createOpenAI({ apiKey: openAIApiKey });
1800
1768
  const words = fullTranscript.split(/\s+/);
1801
1769
  const sample = words.length > 4e3 ? [...words.slice(0, 2e3), "...", ...words.slice(-2e3)].join(" ") : fullTranscript;
1802
- const result = await structured.invoke(`
1770
+ const { object } = await generateObject({
1771
+ model: openai("gpt-4o-mini"),
1772
+ schema: metadataSchema,
1773
+ temperature: 0,
1774
+ prompt: `
1803
1775
  Video title: "${title}"
1804
1776
  Duration: ${Math.floor(durationSeconds / 60)} minutes
1805
1777
 
@@ -1813,12 +1785,13 @@ Extract:
1813
1785
  - summary: 2-3 sentences describing what the video teaches or argues
1814
1786
  - keyMoments: the 5-8 most important moments, with approximate start time in seconds
1815
1787
  - language: ISO 639-1 language code of the transcript
1816
- `.trim());
1788
+ `.trim()
1789
+ });
1817
1790
  return {
1818
- topics: result.topics ?? [],
1819
- summary: result.summary ?? "",
1820
- keyMoments: result.keyMoments ?? [],
1821
- language: result.language ?? "en"
1791
+ topics: object.topics ?? [],
1792
+ summary: object.summary ?? "",
1793
+ keyMoments: object.keyMoments ?? [],
1794
+ language: object.language ?? "en"
1822
1795
  };
1823
1796
  }
1824
1797
  function computeContentHash(content) {
@@ -1828,9 +1801,9 @@ const ytEmbeddings = ({ strapi }) => ({
1828
1801
  // ── Ingest a single transcript ──────────────────────────────────────────────
1829
1802
  async embedTranscript(transcript) {
1830
1803
  const pool = pluginManager.getPool();
1831
- const embeddings = pluginManager.getEmbeddings();
1832
- const embeddingModel = pluginManager.getEmbeddingModel();
1833
- if (!pool || !embeddings) {
1804
+ const embeddingModel = pluginManager.getEmbeddingModel_();
1805
+ const embeddingModelName = pluginManager.getEmbeddingModelName();
1806
+ if (!pool || !embeddingModel) {
1834
1807
  throw new Error("[yt-embed] Plugin manager not initialized");
1835
1808
  }
1836
1809
  const contentHash = computeContentHash(transcript.fullTranscript);
@@ -1861,7 +1834,7 @@ const ytEmbeddings = ({ strapi }) => ({
1861
1834
  transcript.title,
1862
1835
  durationSeconds,
1863
1836
  contentHash,
1864
- embeddingModel
1837
+ embeddingModelName
1865
1838
  ]
1866
1839
  );
1867
1840
  try {
@@ -1897,7 +1870,7 @@ const ytEmbeddings = ({ strapi }) => ({
1897
1870
  strapi.log.info(`[yt-embed] ${transcript.title} — no chunks (empty transcript)`);
1898
1871
  return { videoId: transcript.videoId, chunkCount: 0, skipped: false };
1899
1872
  }
1900
- const embeddingVectors = await embeddings.embedDocuments(chunks.map((c) => c.text));
1873
+ const { embeddings: embeddingVectors } = await embedMany({ model: embeddingModel, values: chunks.map((c) => c.text) });
1901
1874
  const insertedIds = [];
1902
1875
  for (let i = 0; i < chunks.length; i++) {
1903
1876
  const chunk = chunks[i];
@@ -1948,12 +1921,12 @@ const ytEmbeddings = ({ strapi }) => ({
1948
1921
  // ── Semantic search with context expansion ──────────────────────────────────
1949
1922
  async search(query, options = {}) {
1950
1923
  const pool = pluginManager.getPool();
1951
- const embeddingsClient = pluginManager.getEmbeddings();
1952
- if (!pool || !embeddingsClient) {
1924
+ const embeddingModel = pluginManager.getEmbeddingModel_();
1925
+ if (!pool || !embeddingModel) {
1953
1926
  throw new Error("[yt-embed] Plugin manager not initialized");
1954
1927
  }
1955
1928
  const { limit = 5, minSimilarity = 0.2, contextWindowSeconds = 30 } = options;
1956
- const queryVector = await embeddingsClient.embedQuery(query);
1929
+ const { embedding: queryVector } = await embed({ model: embeddingModel, value: query });
1957
1930
  const vectorStr = `[${queryVector.join(",")}]`;
1958
1931
  const params = [vectorStr, minSimilarity, limit * 2];
1959
1932
  const filters = [];
@@ -1,5 +1,4 @@
1
- import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai";
2
- import { Document } from "@langchain/core/documents";
1
+ import type { EmbeddingModel } from "ai";
3
2
  import { Pool } from "pg";
4
3
  import { type EmbeddingModelName } from "./config";
5
4
  interface PluginConfig {
@@ -20,24 +19,29 @@ interface CreateEmbeddingResult {
20
19
  }
21
20
  interface QueryResponse {
22
21
  text: string;
23
- sourceDocuments: Document[];
22
+ sourceDocuments: Array<{
23
+ pageContent: string;
24
+ metadata: any;
25
+ }>;
24
26
  }
25
27
  declare class PluginManager {
26
- private embeddings;
27
- private chat;
28
+ private embeddingModel_;
29
+ private chatModel;
28
30
  private pool;
29
- private embeddingModel;
31
+ private embeddingModelName;
30
32
  private dimensions;
31
- private vectorStoreConfig;
32
33
  initializePool(connectionString: string): Promise<Pool>;
33
34
  private initializeVectorTable;
34
- initializeEmbeddings(openAIApiKey: string): Promise<OpenAIEmbeddings>;
35
- initializeChat(openAIApiKey: string): Promise<ChatOpenAI>;
35
+ private initializeEmbeddings;
36
+ private initializeChat;
36
37
  initialize(config: PluginConfig): Promise<void>;
37
38
  createEmbedding(docData: EmbeddingDocument): Promise<CreateEmbeddingResult>;
38
39
  deleteEmbedding(strapiId: string): Promise<void>;
39
40
  queryEmbedding(query: string): Promise<QueryResponse>;
40
- similaritySearch(query: string, k?: number): Promise<Document[]>;
41
+ similaritySearch(query: string, k?: number): Promise<Array<{
42
+ pageContent: string;
43
+ metadata: any;
44
+ }>>;
41
45
  isInitialized(): boolean;
42
46
  /**
43
47
  * Get all embeddings from Neon DB
@@ -56,9 +60,8 @@ declare class PluginManager {
56
60
  */
57
61
  deleteNeonEmbeddingById(neonId: string): Promise<void>;
58
62
  getPool(): Pool | null;
59
- getEmbeddings(): OpenAIEmbeddings | null;
60
- getEmbeddingModel(): EmbeddingModelName;
61
- getChat(): ChatOpenAI | null;
63
+ getEmbeddingModel_(): EmbeddingModel<string> | null;
64
+ getEmbeddingModelName(): EmbeddingModelName;
62
65
  destroy(): Promise<void>;
63
66
  /**
64
67
  * Clear all embeddings from Neon DB
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "yt-embeddings-strapi-plugin",
3
- "version": "0.0.1",
3
+ "version": "0.1.0",
4
4
  "description": "Strapi v5 plugin for vector embeddings with OpenAI and Neon PostgreSQL. Enables semantic search, RAG chat, and MCP (Model Context Protocol) integration.",
5
5
  "keywords": [
6
6
  "strapi",
@@ -25,9 +25,6 @@
25
25
  "url": "https://github.com/PaulBratslavsky/strapi-content-embeddings/issues"
26
26
  },
27
27
  "type": "commonjs",
28
- "overrides": {
29
- "@langchain/core": "^1.1.31"
30
- },
31
28
  "exports": {
32
29
  "./package.json": "./package.json",
33
30
  "./strapi-admin": {
@@ -58,15 +55,13 @@
58
55
  "watch:link": "strapi-plugin watch:link"
59
56
  },
60
57
  "dependencies": {
61
- "@langchain/community": "^1.1.2",
62
- "@langchain/core": "^1.1.31",
63
- "@langchain/openai": "^1.2.1",
58
+ "@ai-sdk/openai": "^1.x",
64
59
  "@mdxeditor/editor": "^3.52.3",
65
60
  "@modelcontextprotocol/sdk": "^1.12.0",
66
61
  "@strapi/design-system": "^2.0.0-rc.12",
67
62
  "@strapi/icons": "^2.0.0-rc.12",
63
+ "ai": "^4.x",
68
64
  "html-to-text": "^9.0.5",
69
- "langchain": "^1.2.4",
70
65
  "pg": "^8.13.1",
71
66
  "remove-markdown": "^0.5.5",
72
67
  "qs": "^6.13.1",