strapi-content-embeddings 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +187 -0
- package/dist/_chunks/{App-4UwemHRe.mjs → App-C5NFY1UT.mjs} +287 -103
- package/dist/_chunks/{App-CnXhqiao.js → App-CA5bQnKQ.js} +286 -102
- package/dist/_chunks/{index-BWSiu_nE.mjs → index-CIpGvEcJ.mjs} +122 -104
- package/dist/_chunks/{index-BaPVw3mi.js → index-CVCA8dDp.js} +119 -101
- package/dist/admin/index.js +1 -1
- package/dist/admin/index.mjs +1 -1
- package/dist/admin/src/components/custom/EmbeddingsTable.d.ts +1 -1
- package/dist/admin/src/components/custom/MarkdownEditor.d.ts +1 -1
- package/dist/server/index.js +1137 -84
- package/dist/server/index.mjs +1137 -84
- package/dist/server/src/config/index.d.ts +9 -0
- package/dist/server/src/controllers/controller.d.ts +32 -0
- package/dist/server/src/controllers/index.d.ts +5 -0
- package/dist/server/src/index.d.ts +42 -2
- package/dist/server/src/mcp/tools/create-embedding.d.ts +6 -0
- package/dist/server/src/mcp/tools/index.d.ts +4 -0
- package/dist/server/src/plugin-manager.d.ts +32 -0
- package/dist/server/src/routes/content-api.d.ts +10 -0
- package/dist/server/src/routes/index.d.ts +10 -0
- package/dist/server/src/services/embeddings.d.ts +43 -2
- package/dist/server/src/services/index.d.ts +24 -2
- package/dist/server/src/services/sync.d.ts +71 -0
- package/dist/server/src/utils/chunking.d.ts +44 -0
- package/package.json +1 -1
package/dist/server/index.mjs
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai";
|
|
2
2
|
import { PGVectorStore } from "@langchain/community/vectorstores/pgvector";
|
|
3
|
-
import { Document } from "@langchain/core/documents";
|
|
4
3
|
import { StringOutputParser } from "@langchain/core/output_parsers";
|
|
5
4
|
import { ChatPromptTemplate } from "@langchain/core/prompts";
|
|
6
5
|
import { RunnableSequence, RunnablePassthrough } from "@langchain/core/runnables";
|
|
@@ -19,7 +18,10 @@ const config = {
|
|
|
19
18
|
default: {
|
|
20
19
|
openAIApiKey: "",
|
|
21
20
|
neonConnectionString: "",
|
|
22
|
-
embeddingModel: "text-embedding-3-small"
|
|
21
|
+
embeddingModel: "text-embedding-3-small",
|
|
22
|
+
chunkSize: 4e3,
|
|
23
|
+
chunkOverlap: 200,
|
|
24
|
+
autoChunk: false
|
|
23
25
|
},
|
|
24
26
|
validator(config2) {
|
|
25
27
|
if (!config2.openAIApiKey) {
|
|
@@ -37,6 +39,11 @@ const config = {
|
|
|
37
39
|
`strapi-content-embeddings: Invalid embeddingModel "${config2.embeddingModel}". Valid options: ${Object.keys(EMBEDDING_MODELS).join(", ")}. Defaulting to "text-embedding-3-small".`
|
|
38
40
|
);
|
|
39
41
|
}
|
|
42
|
+
if (config2.chunkSize && (config2.chunkSize < 100 || config2.chunkSize > 8e3)) {
|
|
43
|
+
console.warn(
|
|
44
|
+
`strapi-content-embeddings: chunkSize ${config2.chunkSize} is outside recommended range (100-8000). Using default value of 4000.`
|
|
45
|
+
);
|
|
46
|
+
}
|
|
40
47
|
}
|
|
41
48
|
};
|
|
42
49
|
class PluginManager {
|
|
@@ -162,39 +169,46 @@ class PluginManager {
|
|
|
162
169
|
console.log("Plugin Manager Initialization Complete");
|
|
163
170
|
}
|
|
164
171
|
async createEmbedding(docData) {
|
|
165
|
-
if (!this.embeddings || !this.vectorStoreConfig) {
|
|
172
|
+
if (!this.embeddings || !this.vectorStoreConfig || !this.pool) {
|
|
166
173
|
throw new Error("Plugin manager not initialized");
|
|
167
174
|
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
175
|
+
const maxRetries = 3;
|
|
176
|
+
const retryDelay = 2e3;
|
|
177
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
178
|
+
try {
|
|
179
|
+
const embeddingVector = await this.embeddings.embedQuery(docData.content);
|
|
180
|
+
const metadata = {
|
|
173
181
|
id: docData.id,
|
|
174
182
|
title: docData.title,
|
|
175
183
|
collectionType: docData.collectionType || "standalone",
|
|
176
184
|
fieldName: docData.fieldName || "content"
|
|
185
|
+
};
|
|
186
|
+
const vectorString = `[${embeddingVector.join(",")}]`;
|
|
187
|
+
const result = await this.pool.query(
|
|
188
|
+
`INSERT INTO embeddings_documents (content, metadata, embedding)
|
|
189
|
+
VALUES ($1, $2::jsonb, $3::vector)
|
|
190
|
+
RETURNING id`,
|
|
191
|
+
[docData.content, JSON.stringify(metadata), vectorString]
|
|
192
|
+
);
|
|
193
|
+
return {
|
|
194
|
+
embeddingId: result.rows[0]?.id || "",
|
|
195
|
+
embedding: embeddingVector
|
|
196
|
+
};
|
|
197
|
+
} catch (error) {
|
|
198
|
+
const isRateLimit = error.message?.includes("429") || error.message?.includes("rate");
|
|
199
|
+
const isLastAttempt = attempt === maxRetries;
|
|
200
|
+
if (isRateLimit && !isLastAttempt) {
|
|
201
|
+
console.log(`[createEmbedding] Rate limited, waiting ${retryDelay}ms before retry ${attempt + 1}/${maxRetries}...`);
|
|
202
|
+
await new Promise((resolve) => setTimeout(resolve, retryDelay * attempt));
|
|
203
|
+
continue;
|
|
177
204
|
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
);
|
|
184
|
-
const result = await this.pool.query(
|
|
185
|
-
`SELECT id FROM embeddings_documents
|
|
186
|
-
WHERE metadata->>'id' = $1
|
|
187
|
-
ORDER BY id DESC LIMIT 1`,
|
|
188
|
-
[docData.id]
|
|
189
|
-
);
|
|
190
|
-
return {
|
|
191
|
-
embeddingId: result.rows[0]?.id || "",
|
|
192
|
-
embedding: embeddingVector
|
|
193
|
-
};
|
|
194
|
-
} catch (error) {
|
|
195
|
-
console.error(`Failed to create embedding: ${error}`);
|
|
196
|
-
throw new Error(`Failed to create embedding: ${error}`);
|
|
205
|
+
console.error(`[createEmbedding] Failed (attempt ${attempt}/${maxRetries}):`, error.message || error);
|
|
206
|
+
if (isLastAttempt) {
|
|
207
|
+
throw new Error(`Failed to create embedding after ${maxRetries} attempts: ${error.message || error}`);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
197
210
|
}
|
|
211
|
+
throw new Error("Failed to create embedding: unexpected error");
|
|
198
212
|
}
|
|
199
213
|
async deleteEmbedding(strapiId) {
|
|
200
214
|
if (!this.pool) {
|
|
@@ -220,8 +234,14 @@ class PluginManager {
|
|
|
220
234
|
this.vectorStoreConfig
|
|
221
235
|
);
|
|
222
236
|
const resultsWithScores = await vectorStore.similaritySearchWithScore(query, 6);
|
|
223
|
-
|
|
237
|
+
console.log(`[queryEmbedding] Query: "${query}"`);
|
|
238
|
+
console.log(`[queryEmbedding] Found ${resultsWithScores.length} results:`);
|
|
239
|
+
resultsWithScores.forEach(([doc, score], i) => {
|
|
240
|
+
console.log(` ${i + 1}. Score: ${score.toFixed(4)}, Title: ${doc.metadata?.title || "N/A"}`);
|
|
241
|
+
});
|
|
242
|
+
const SIMILARITY_THRESHOLD = 1;
|
|
224
243
|
const relevantResults = resultsWithScores.filter(([_, score]) => score < SIMILARITY_THRESHOLD);
|
|
244
|
+
console.log(`[queryEmbedding] ${relevantResults.length} results passed threshold (< ${SIMILARITY_THRESHOLD})`);
|
|
225
245
|
const topResults = relevantResults.slice(0, 3);
|
|
226
246
|
const sourceDocuments = topResults.map(([doc]) => doc);
|
|
227
247
|
const bestMatchForDisplay = topResults.length > 0 ? [topResults[0][0]] : [];
|
|
@@ -281,6 +301,56 @@ Context:
|
|
|
281
301
|
isInitialized() {
|
|
282
302
|
return !!(this.embeddings && this.chat && this.pool);
|
|
283
303
|
}
|
|
304
|
+
/**
|
|
305
|
+
* Get all embeddings from Neon DB
|
|
306
|
+
* Returns the metadata (including Strapi documentId) for each embedding
|
|
307
|
+
*/
|
|
308
|
+
async getAllNeonEmbeddings() {
|
|
309
|
+
if (!this.pool) {
|
|
310
|
+
throw new Error("Plugin manager not initialized");
|
|
311
|
+
}
|
|
312
|
+
try {
|
|
313
|
+
const result = await this.pool.query(`
|
|
314
|
+
SELECT
|
|
315
|
+
id,
|
|
316
|
+
content,
|
|
317
|
+
metadata->>'id' as strapi_id,
|
|
318
|
+
metadata->>'title' as title,
|
|
319
|
+
metadata->>'collectionType' as collection_type,
|
|
320
|
+
metadata->>'fieldName' as field_name
|
|
321
|
+
FROM embeddings_documents
|
|
322
|
+
ORDER BY id
|
|
323
|
+
`);
|
|
324
|
+
return result.rows.map((row) => ({
|
|
325
|
+
id: row.id,
|
|
326
|
+
strapiId: row.strapi_id,
|
|
327
|
+
title: row.title || "",
|
|
328
|
+
content: row.content || "",
|
|
329
|
+
collectionType: row.collection_type || "standalone",
|
|
330
|
+
fieldName: row.field_name || "content"
|
|
331
|
+
}));
|
|
332
|
+
} catch (error) {
|
|
333
|
+
console.error(`Failed to get Neon embeddings: ${error}`);
|
|
334
|
+
throw new Error(`Failed to get Neon embeddings: ${error}`);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
/**
|
|
338
|
+
* Delete an embedding from Neon by its Neon UUID (not Strapi ID)
|
|
339
|
+
*/
|
|
340
|
+
async deleteNeonEmbeddingById(neonId) {
|
|
341
|
+
if (!this.pool) {
|
|
342
|
+
throw new Error("Plugin manager not initialized");
|
|
343
|
+
}
|
|
344
|
+
try {
|
|
345
|
+
await this.pool.query(
|
|
346
|
+
`DELETE FROM embeddings_documents WHERE id = $1`,
|
|
347
|
+
[neonId]
|
|
348
|
+
);
|
|
349
|
+
} catch (error) {
|
|
350
|
+
console.error(`Failed to delete Neon embedding: ${error}`);
|
|
351
|
+
throw new Error(`Failed to delete Neon embedding: ${error}`);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
284
354
|
async destroy() {
|
|
285
355
|
if (this.pool) {
|
|
286
356
|
await this.pool.end();
|
|
@@ -290,6 +360,59 @@ Context:
|
|
|
290
360
|
this.chat = null;
|
|
291
361
|
this.vectorStoreConfig = null;
|
|
292
362
|
}
|
|
363
|
+
/**
|
|
364
|
+
* Clear all embeddings from Neon DB
|
|
365
|
+
* Returns the number of deleted rows
|
|
366
|
+
*/
|
|
367
|
+
async clearAllNeonEmbeddings() {
|
|
368
|
+
if (!this.pool) {
|
|
369
|
+
throw new Error("Plugin manager not initialized");
|
|
370
|
+
}
|
|
371
|
+
try {
|
|
372
|
+
const result = await this.pool.query(`
|
|
373
|
+
DELETE FROM embeddings_documents
|
|
374
|
+
RETURNING id
|
|
375
|
+
`);
|
|
376
|
+
console.log(`[clearAllNeonEmbeddings] Deleted ${result.rowCount} embeddings from Neon`);
|
|
377
|
+
return result.rowCount || 0;
|
|
378
|
+
} catch (error) {
|
|
379
|
+
console.error(`Failed to clear Neon embeddings: ${error}`);
|
|
380
|
+
throw new Error(`Failed to clear Neon embeddings: ${error}`);
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Debug method to inspect raw data in Neon DB
|
|
385
|
+
*/
|
|
386
|
+
async debugNeonEmbeddings() {
|
|
387
|
+
if (!this.pool) {
|
|
388
|
+
throw new Error("Plugin manager not initialized");
|
|
389
|
+
}
|
|
390
|
+
try {
|
|
391
|
+
const result = await this.pool.query(`
|
|
392
|
+
SELECT
|
|
393
|
+
id,
|
|
394
|
+
content,
|
|
395
|
+
metadata,
|
|
396
|
+
pg_typeof(metadata) as metadata_type,
|
|
397
|
+
embedding IS NOT NULL as has_embedding,
|
|
398
|
+
CASE WHEN embedding IS NOT NULL THEN array_length(embedding::float[], 1) ELSE 0 END as embedding_length
|
|
399
|
+
FROM embeddings_documents
|
|
400
|
+
ORDER BY id
|
|
401
|
+
LIMIT 20
|
|
402
|
+
`);
|
|
403
|
+
return result.rows.map((row) => ({
|
|
404
|
+
id: row.id,
|
|
405
|
+
content: row.content?.substring(0, 200) + (row.content?.length > 200 ? "..." : ""),
|
|
406
|
+
metadata: row.metadata,
|
|
407
|
+
metadataType: row.metadata_type,
|
|
408
|
+
hasEmbedding: row.has_embedding,
|
|
409
|
+
embeddingLength: row.embedding_length || 0
|
|
410
|
+
}));
|
|
411
|
+
} catch (error) {
|
|
412
|
+
console.error(`Failed to debug Neon embeddings: ${error}`);
|
|
413
|
+
throw new Error(`Failed to debug Neon embeddings: ${error}`);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
293
416
|
}
|
|
294
417
|
const pluginManager = new PluginManager();
|
|
295
418
|
const SemanticSearchSchema = z.object({
|
|
@@ -580,7 +703,7 @@ async function handleGetEmbedding(strapi, args) {
|
|
|
580
703
|
}
|
|
581
704
|
const createEmbeddingTool = {
|
|
582
705
|
name: "create_embedding",
|
|
583
|
-
description: "Create a new embedding from text content. The content will be vectorized and stored for semantic search.",
|
|
706
|
+
description: "Create a new embedding from text content. The content will be vectorized and stored for semantic search. For large content (over 4000 characters), enable autoChunk to automatically split into multiple embeddings.",
|
|
584
707
|
inputSchema: {
|
|
585
708
|
type: "object",
|
|
586
709
|
properties: {
|
|
@@ -595,21 +718,68 @@ const createEmbeddingTool = {
|
|
|
595
718
|
metadata: {
|
|
596
719
|
type: "object",
|
|
597
720
|
description: "Optional metadata to associate with the embedding (tags, source, etc.)"
|
|
721
|
+
},
|
|
722
|
+
autoChunk: {
|
|
723
|
+
type: "boolean",
|
|
724
|
+
description: "Automatically split large content into chunks (default: false). When enabled, content over 4000 characters will be split into multiple embeddings with overlap for context preservation."
|
|
598
725
|
}
|
|
599
726
|
},
|
|
600
727
|
required: ["title", "content"]
|
|
601
728
|
}
|
|
602
729
|
};
|
|
603
730
|
async function handleCreateEmbedding(strapi, args) {
|
|
604
|
-
const { title, content, metadata } = args;
|
|
731
|
+
const { title, content, metadata, autoChunk } = args;
|
|
605
732
|
try {
|
|
606
733
|
const embeddingsService = strapi.plugin("strapi-content-embeddings").service("embeddings");
|
|
734
|
+
if (autoChunk) {
|
|
735
|
+
const result = await embeddingsService.createChunkedEmbedding({
|
|
736
|
+
data: {
|
|
737
|
+
title,
|
|
738
|
+
content,
|
|
739
|
+
metadata: metadata || {},
|
|
740
|
+
collectionType: "standalone",
|
|
741
|
+
fieldName: "content"
|
|
742
|
+
}
|
|
743
|
+
});
|
|
744
|
+
return {
|
|
745
|
+
content: [
|
|
746
|
+
{
|
|
747
|
+
type: "text",
|
|
748
|
+
text: JSON.stringify(
|
|
749
|
+
{
|
|
750
|
+
success: true,
|
|
751
|
+
message: result.wasChunked ? `Content chunked into ${result.totalChunks} embeddings` : "Embedding created successfully (no chunking needed)",
|
|
752
|
+
wasChunked: result.wasChunked,
|
|
753
|
+
totalChunks: result.totalChunks,
|
|
754
|
+
primaryEmbedding: {
|
|
755
|
+
id: result.entity.id,
|
|
756
|
+
documentId: result.entity.documentId,
|
|
757
|
+
title: result.entity.title,
|
|
758
|
+
embeddingId: result.entity.embeddingId
|
|
759
|
+
},
|
|
760
|
+
chunks: result.chunks.map((chunk) => ({
|
|
761
|
+
documentId: chunk.documentId,
|
|
762
|
+
title: chunk.title,
|
|
763
|
+
contentLength: chunk.content?.length || 0
|
|
764
|
+
})),
|
|
765
|
+
contentLength: content.length,
|
|
766
|
+
estimatedTokens: Math.ceil(content.length / 4)
|
|
767
|
+
},
|
|
768
|
+
null,
|
|
769
|
+
2
|
|
770
|
+
)
|
|
771
|
+
}
|
|
772
|
+
]
|
|
773
|
+
};
|
|
774
|
+
}
|
|
607
775
|
const embedding2 = await embeddingsService.createEmbedding({
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
776
|
+
data: {
|
|
777
|
+
title,
|
|
778
|
+
content,
|
|
779
|
+
metadata: metadata || {},
|
|
780
|
+
collectionType: "standalone",
|
|
781
|
+
fieldName: "content"
|
|
782
|
+
}
|
|
613
783
|
});
|
|
614
784
|
return {
|
|
615
785
|
content: [
|
|
@@ -627,7 +797,8 @@ async function handleCreateEmbedding(strapi, args) {
|
|
|
627
797
|
contentLength: content.length,
|
|
628
798
|
metadata: embedding2.metadata,
|
|
629
799
|
createdAt: embedding2.createdAt
|
|
630
|
-
}
|
|
800
|
+
},
|
|
801
|
+
hint: content.length > 4e3 ? "Content is large. Consider using autoChunk: true for better search results." : void 0
|
|
631
802
|
},
|
|
632
803
|
null,
|
|
633
804
|
2
|
|
@@ -713,10 +884,10 @@ function createMcpServer(strapi) {
|
|
|
713
884
|
});
|
|
714
885
|
return server;
|
|
715
886
|
}
|
|
716
|
-
const PLUGIN_ID$
|
|
887
|
+
const PLUGIN_ID$5 = "strapi-content-embeddings";
|
|
717
888
|
const OAUTH_PLUGIN_ID = "strapi-oauth-mcp-manager";
|
|
718
889
|
function createFallbackAuthMiddleware(strapi) {
|
|
719
|
-
const mcpPath = `/api/${PLUGIN_ID$
|
|
890
|
+
const mcpPath = `/api/${PLUGIN_ID$5}/mcp`;
|
|
720
891
|
return async (ctx, next) => {
|
|
721
892
|
if (!ctx.path.startsWith(mcpPath)) {
|
|
722
893
|
return next();
|
|
@@ -742,35 +913,35 @@ const bootstrap = async ({ strapi }) => {
|
|
|
742
913
|
section: "plugins",
|
|
743
914
|
displayName: "Read",
|
|
744
915
|
uid: "read",
|
|
745
|
-
pluginName: PLUGIN_ID$
|
|
916
|
+
pluginName: PLUGIN_ID$5
|
|
746
917
|
},
|
|
747
918
|
{
|
|
748
919
|
section: "plugins",
|
|
749
920
|
displayName: "Update",
|
|
750
921
|
uid: "update",
|
|
751
|
-
pluginName: PLUGIN_ID$
|
|
922
|
+
pluginName: PLUGIN_ID$5
|
|
752
923
|
},
|
|
753
924
|
{
|
|
754
925
|
section: "plugins",
|
|
755
926
|
displayName: "Create",
|
|
756
927
|
uid: "create",
|
|
757
|
-
pluginName: PLUGIN_ID$
|
|
928
|
+
pluginName: PLUGIN_ID$5
|
|
758
929
|
},
|
|
759
930
|
{
|
|
760
931
|
section: "plugins",
|
|
761
932
|
displayName: "Delete",
|
|
762
933
|
uid: "delete",
|
|
763
|
-
pluginName: PLUGIN_ID$
|
|
934
|
+
pluginName: PLUGIN_ID$5
|
|
764
935
|
},
|
|
765
936
|
{
|
|
766
937
|
section: "plugins",
|
|
767
938
|
displayName: "Chat",
|
|
768
939
|
uid: "chat",
|
|
769
|
-
pluginName: PLUGIN_ID$
|
|
940
|
+
pluginName: PLUGIN_ID$5
|
|
770
941
|
}
|
|
771
942
|
];
|
|
772
943
|
await strapi.admin.services.permission.actionProvider.registerMany(actions);
|
|
773
|
-
const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$
|
|
944
|
+
const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$5}`);
|
|
774
945
|
if (pluginConfig?.openAIApiKey && pluginConfig?.neonConnectionString) {
|
|
775
946
|
try {
|
|
776
947
|
await pluginManager.initialize({
|
|
@@ -779,42 +950,42 @@ const bootstrap = async ({ strapi }) => {
|
|
|
779
950
|
embeddingModel: pluginConfig.embeddingModel
|
|
780
951
|
});
|
|
781
952
|
strapi.contentEmbeddingsManager = pluginManager;
|
|
782
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
953
|
+
strapi.log.info(`[${PLUGIN_ID$5}] Plugin initialized successfully`);
|
|
783
954
|
} catch (error) {
|
|
784
|
-
strapi.log.error(`[${PLUGIN_ID$
|
|
955
|
+
strapi.log.error(`[${PLUGIN_ID$5}] Failed to initialize:`, error);
|
|
785
956
|
}
|
|
786
957
|
} else {
|
|
787
958
|
strapi.log.warn(
|
|
788
|
-
`[${PLUGIN_ID$
|
|
959
|
+
`[${PLUGIN_ID$5}] Missing configuration. Set openAIApiKey and neonConnectionString in plugin config.`
|
|
789
960
|
);
|
|
790
961
|
}
|
|
791
|
-
const plugin = strapi.plugin(PLUGIN_ID$
|
|
962
|
+
const plugin = strapi.plugin(PLUGIN_ID$5);
|
|
792
963
|
plugin.createMcpServer = () => createMcpServer(strapi);
|
|
793
964
|
plugin.sessions = /* @__PURE__ */ new Map();
|
|
794
965
|
const oauthPlugin = strapi.plugin(OAUTH_PLUGIN_ID);
|
|
795
966
|
if (oauthPlugin) {
|
|
796
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
967
|
+
strapi.log.info(`[${PLUGIN_ID$5}] OAuth manager detected - OAuth + API token auth enabled`);
|
|
797
968
|
} else {
|
|
798
969
|
const fallbackMiddleware = createFallbackAuthMiddleware();
|
|
799
970
|
strapi.server.use(fallbackMiddleware);
|
|
800
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
971
|
+
strapi.log.info(`[${PLUGIN_ID$5}] Using API token authentication (OAuth manager not installed)`);
|
|
801
972
|
}
|
|
802
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
973
|
+
strapi.log.info(`[${PLUGIN_ID$5}] MCP endpoint available at: /api/${PLUGIN_ID$5}/mcp`);
|
|
803
974
|
};
|
|
804
975
|
const destroy = async ({ strapi }) => {
|
|
805
976
|
await pluginManager.destroy();
|
|
806
977
|
console.log("Content Embeddings plugin destroyed");
|
|
807
978
|
};
|
|
808
|
-
const PLUGIN_ID$
|
|
979
|
+
const PLUGIN_ID$4 = "strapi-content-embeddings";
|
|
809
980
|
const register = ({ strapi }) => {
|
|
810
981
|
Object.values(strapi.contentTypes).forEach((contentType) => {
|
|
811
|
-
if (contentType.uid.startsWith("admin::") || contentType.uid.startsWith("strapi::") || contentType.uid === `plugin::${PLUGIN_ID$
|
|
982
|
+
if (contentType.uid.startsWith("admin::") || contentType.uid.startsWith("strapi::") || contentType.uid === `plugin::${PLUGIN_ID$4}.embedding`) {
|
|
812
983
|
return;
|
|
813
984
|
}
|
|
814
985
|
contentType.attributes.embedding = {
|
|
815
986
|
type: "relation",
|
|
816
987
|
relation: "morphOne",
|
|
817
|
-
target: `plugin::${PLUGIN_ID$
|
|
988
|
+
target: `plugin::${PLUGIN_ID$4}.embedding`,
|
|
818
989
|
morphBy: "related",
|
|
819
990
|
private: false,
|
|
820
991
|
configurable: false
|
|
@@ -884,20 +1055,23 @@ const embedding = {
|
|
|
884
1055
|
const contentTypes = {
|
|
885
1056
|
embedding
|
|
886
1057
|
};
|
|
887
|
-
const PLUGIN_ID$
|
|
1058
|
+
const PLUGIN_ID$3 = "strapi-content-embeddings";
|
|
888
1059
|
const controller = ({ strapi }) => ({
|
|
889
1060
|
async createEmbedding(ctx) {
|
|
890
1061
|
try {
|
|
891
|
-
|
|
1062
|
+
console.log("[createEmbedding] Starting, autoChunk:", ctx.request.body?.data?.autoChunk);
|
|
1063
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").createEmbedding(ctx.request.body);
|
|
1064
|
+
console.log("[createEmbedding] Completed, documentId:", result?.documentId);
|
|
892
1065
|
ctx.body = result;
|
|
893
1066
|
} catch (error) {
|
|
1067
|
+
console.error("[createEmbedding] Error:", error.message);
|
|
894
1068
|
ctx.throw(500, error.message || "Failed to create embedding");
|
|
895
1069
|
}
|
|
896
1070
|
},
|
|
897
1071
|
async deleteEmbedding(ctx) {
|
|
898
1072
|
try {
|
|
899
1073
|
const { id } = ctx.params;
|
|
900
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1074
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").deleteEmbedding(id);
|
|
901
1075
|
ctx.body = result;
|
|
902
1076
|
} catch (error) {
|
|
903
1077
|
ctx.throw(500, error.message || "Failed to delete embedding");
|
|
@@ -906,7 +1080,7 @@ const controller = ({ strapi }) => ({
|
|
|
906
1080
|
async updateEmbedding(ctx) {
|
|
907
1081
|
try {
|
|
908
1082
|
const { id } = ctx.params;
|
|
909
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1083
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").updateEmbedding(id, ctx.request.body);
|
|
910
1084
|
ctx.body = result;
|
|
911
1085
|
} catch (error) {
|
|
912
1086
|
ctx.throw(500, error.message || "Failed to update embedding");
|
|
@@ -915,7 +1089,7 @@ const controller = ({ strapi }) => ({
|
|
|
915
1089
|
async getEmbeddings(ctx) {
|
|
916
1090
|
try {
|
|
917
1091
|
const { page, pageSize, filters } = ctx.query;
|
|
918
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1092
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").getEmbeddings({
|
|
919
1093
|
page: page ? parseInt(page, 10) : 1,
|
|
920
1094
|
pageSize: pageSize ? parseInt(pageSize, 10) : 10,
|
|
921
1095
|
filters
|
|
@@ -928,7 +1102,7 @@ const controller = ({ strapi }) => ({
|
|
|
928
1102
|
async getEmbedding(ctx) {
|
|
929
1103
|
try {
|
|
930
1104
|
const { id } = ctx.params;
|
|
931
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1105
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").getEmbedding(id);
|
|
932
1106
|
if (!result) {
|
|
933
1107
|
ctx.throw(404, "Embedding not found");
|
|
934
1108
|
}
|
|
@@ -943,14 +1117,96 @@ const controller = ({ strapi }) => ({
|
|
|
943
1117
|
async queryEmbeddings(ctx) {
|
|
944
1118
|
try {
|
|
945
1119
|
const { query } = ctx.query;
|
|
946
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1120
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").queryEmbeddings(query);
|
|
947
1121
|
ctx.body = result;
|
|
948
1122
|
} catch (error) {
|
|
949
1123
|
ctx.throw(500, error.message || "Failed to query embeddings");
|
|
950
1124
|
}
|
|
1125
|
+
},
|
|
1126
|
+
/**
|
|
1127
|
+
* Get all chunks related to a document
|
|
1128
|
+
* GET /api/strapi-content-embeddings/embeddings/related-chunks/:id
|
|
1129
|
+
*/
|
|
1130
|
+
async getRelatedChunks(ctx) {
|
|
1131
|
+
try {
|
|
1132
|
+
const { id } = ctx.params;
|
|
1133
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").findRelatedChunks(id);
|
|
1134
|
+
console.log(`[getRelatedChunks] Found ${result.length} chunks for document ${id}`);
|
|
1135
|
+
ctx.body = {
|
|
1136
|
+
data: result,
|
|
1137
|
+
count: result.length
|
|
1138
|
+
};
|
|
1139
|
+
} catch (error) {
|
|
1140
|
+
ctx.throw(500, error.message || "Failed to get related chunks");
|
|
1141
|
+
}
|
|
1142
|
+
},
|
|
1143
|
+
/**
|
|
1144
|
+
* Sync embeddings from Neon DB to Strapi DB
|
|
1145
|
+
* GET /api/strapi-content-embeddings/sync
|
|
1146
|
+
*
|
|
1147
|
+
* Query params:
|
|
1148
|
+
* - removeOrphans: boolean (default: false) - Remove Strapi entries that don't exist in Neon
|
|
1149
|
+
* - dryRun: boolean (default: false) - Preview changes without applying them
|
|
1150
|
+
*/
|
|
1151
|
+
async syncFromNeon(ctx) {
|
|
1152
|
+
try {
|
|
1153
|
+
const { removeOrphans, dryRun } = ctx.query;
|
|
1154
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("sync").syncFromNeon({
|
|
1155
|
+
removeOrphans: removeOrphans === "true",
|
|
1156
|
+
dryRun: dryRun === "true"
|
|
1157
|
+
});
|
|
1158
|
+
ctx.body = result;
|
|
1159
|
+
} catch (error) {
|
|
1160
|
+
ctx.throw(500, error.message || "Failed to sync embeddings");
|
|
1161
|
+
}
|
|
1162
|
+
},
|
|
1163
|
+
/**
|
|
1164
|
+
* Get sync status - compare Neon and Strapi without making changes
|
|
1165
|
+
* GET /api/strapi-content-embeddings/sync/status
|
|
1166
|
+
*/
|
|
1167
|
+
async getSyncStatus(ctx) {
|
|
1168
|
+
try {
|
|
1169
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("sync").getSyncStatus();
|
|
1170
|
+
ctx.body = result;
|
|
1171
|
+
} catch (error) {
|
|
1172
|
+
ctx.throw(500, error.message || "Failed to get sync status");
|
|
1173
|
+
}
|
|
1174
|
+
},
|
|
1175
|
+
/**
|
|
1176
|
+
* Debug endpoint to inspect Neon DB contents
|
|
1177
|
+
* GET /api/strapi-content-embeddings/debug/neon
|
|
1178
|
+
*/
|
|
1179
|
+
async debugNeon(ctx) {
|
|
1180
|
+
try {
|
|
1181
|
+
const { pluginManager: pluginManager2 } = require("../plugin-manager");
|
|
1182
|
+
const result = await pluginManager2.debugNeonEmbeddings();
|
|
1183
|
+
ctx.body = {
|
|
1184
|
+
count: result.length,
|
|
1185
|
+
embeddings: result
|
|
1186
|
+
};
|
|
1187
|
+
} catch (error) {
|
|
1188
|
+
ctx.throw(500, error.message || "Failed to debug Neon");
|
|
1189
|
+
}
|
|
1190
|
+
},
|
|
1191
|
+
/**
|
|
1192
|
+
* Recreate all embeddings in Neon from Strapi data
|
|
1193
|
+
* POST /api/strapi-content-embeddings/recreate
|
|
1194
|
+
*
|
|
1195
|
+
* Use this when embeddings were created with incorrect metadata format
|
|
1196
|
+
* WARNING: This will delete ALL existing Neon embeddings and recreate them
|
|
1197
|
+
*/
|
|
1198
|
+
async recreateEmbeddings(ctx) {
|
|
1199
|
+
try {
|
|
1200
|
+
console.log("[recreateEmbeddings] Starting recreation of all embeddings...");
|
|
1201
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("sync").recreateAllEmbeddings();
|
|
1202
|
+
ctx.body = result;
|
|
1203
|
+
} catch (error) {
|
|
1204
|
+
console.error("[recreateEmbeddings] Error:", error.message);
|
|
1205
|
+
ctx.throw(500, error.message || "Failed to recreate embeddings");
|
|
1206
|
+
}
|
|
951
1207
|
}
|
|
952
1208
|
});
|
|
953
|
-
const PLUGIN_ID$
|
|
1209
|
+
const PLUGIN_ID$2 = "strapi-content-embeddings";
|
|
954
1210
|
const SESSION_TIMEOUT_MS = 4 * 60 * 60 * 1e3;
|
|
955
1211
|
function isSessionExpired(session) {
|
|
956
1212
|
return Date.now() - session.createdAt > SESSION_TIMEOUT_MS;
|
|
@@ -968,7 +1224,7 @@ function cleanupExpiredSessions(plugin, strapi) {
|
|
|
968
1224
|
}
|
|
969
1225
|
}
|
|
970
1226
|
if (cleaned > 0) {
|
|
971
|
-
strapi.log.debug(`[${PLUGIN_ID$
|
|
1227
|
+
strapi.log.debug(`[${PLUGIN_ID$2}] Cleaned up ${cleaned} expired MCP sessions`);
|
|
972
1228
|
}
|
|
973
1229
|
}
|
|
974
1230
|
const mcpController = ({ strapi }) => ({
|
|
@@ -976,7 +1232,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
976
1232
|
* Handle MCP requests (POST, GET, DELETE)
|
|
977
1233
|
*/
|
|
978
1234
|
async handle(ctx) {
|
|
979
|
-
const plugin = strapi.plugin(PLUGIN_ID$
|
|
1235
|
+
const plugin = strapi.plugin(PLUGIN_ID$2);
|
|
980
1236
|
if (!plugin.createMcpServer) {
|
|
981
1237
|
ctx.status = 503;
|
|
982
1238
|
ctx.body = {
|
|
@@ -992,7 +1248,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
992
1248
|
const requestedSessionId = ctx.request.headers["mcp-session-id"];
|
|
993
1249
|
let session = requestedSessionId ? plugin.sessions.get(requestedSessionId) : null;
|
|
994
1250
|
if (session && isSessionExpired(session)) {
|
|
995
|
-
strapi.log.debug(`[${PLUGIN_ID$
|
|
1251
|
+
strapi.log.debug(`[${PLUGIN_ID$2}] Session expired, removing: ${requestedSessionId}`);
|
|
996
1252
|
try {
|
|
997
1253
|
session.server.close();
|
|
998
1254
|
} catch {
|
|
@@ -1027,13 +1283,13 @@ const mcpController = ({ strapi }) => ({
|
|
|
1027
1283
|
};
|
|
1028
1284
|
plugin.sessions.set(sessionId, session);
|
|
1029
1285
|
strapi.log.debug(
|
|
1030
|
-
`[${PLUGIN_ID$
|
|
1286
|
+
`[${PLUGIN_ID$2}] New MCP session created: ${sessionId} (auth: ${ctx.state.authMethod || "unknown"})`
|
|
1031
1287
|
);
|
|
1032
1288
|
}
|
|
1033
1289
|
try {
|
|
1034
1290
|
await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
|
|
1035
1291
|
} catch (transportError) {
|
|
1036
|
-
strapi.log.warn(`[${PLUGIN_ID$
|
|
1292
|
+
strapi.log.warn(`[${PLUGIN_ID$2}] Transport error, cleaning up session: ${requestedSessionId}`, {
|
|
1037
1293
|
error: transportError instanceof Error ? transportError.message : String(transportError)
|
|
1038
1294
|
});
|
|
1039
1295
|
try {
|
|
@@ -1056,7 +1312,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
1056
1312
|
}
|
|
1057
1313
|
ctx.respond = false;
|
|
1058
1314
|
} catch (error) {
|
|
1059
|
-
strapi.log.error(`[${PLUGIN_ID$
|
|
1315
|
+
strapi.log.error(`[${PLUGIN_ID$2}] Error handling MCP request`, {
|
|
1060
1316
|
error: error instanceof Error ? error.message : String(error),
|
|
1061
1317
|
method: ctx.method,
|
|
1062
1318
|
path: ctx.path
|
|
@@ -1083,6 +1339,32 @@ const contentApi = [
|
|
|
1083
1339
|
path: "/embeddings-query",
|
|
1084
1340
|
handler: "controller.queryEmbeddings"
|
|
1085
1341
|
},
|
|
1342
|
+
// Sync routes - for cron jobs or manual triggering
|
|
1343
|
+
// Use API token for authentication
|
|
1344
|
+
{
|
|
1345
|
+
method: "GET",
|
|
1346
|
+
path: "/sync",
|
|
1347
|
+
handler: "controller.syncFromNeon",
|
|
1348
|
+
config: {
|
|
1349
|
+
description: "Sync embeddings from Neon DB to Strapi. Query params: removeOrphans=true, dryRun=true"
|
|
1350
|
+
}
|
|
1351
|
+
},
|
|
1352
|
+
{
|
|
1353
|
+
method: "POST",
|
|
1354
|
+
path: "/sync",
|
|
1355
|
+
handler: "controller.syncFromNeon",
|
|
1356
|
+
config: {
|
|
1357
|
+
description: "Sync embeddings from Neon DB to Strapi. Query params: removeOrphans=true, dryRun=true"
|
|
1358
|
+
}
|
|
1359
|
+
},
|
|
1360
|
+
{
|
|
1361
|
+
method: "GET",
|
|
1362
|
+
path: "/sync/status",
|
|
1363
|
+
handler: "controller.getSyncStatus",
|
|
1364
|
+
config: {
|
|
1365
|
+
description: "Get sync status between Neon and Strapi without making changes"
|
|
1366
|
+
}
|
|
1367
|
+
},
|
|
1086
1368
|
// MCP routes - auth handled by middleware
|
|
1087
1369
|
{
|
|
1088
1370
|
method: "POST",
|
|
@@ -1190,6 +1472,45 @@ const admin = [
|
|
|
1190
1472
|
}
|
|
1191
1473
|
]
|
|
1192
1474
|
}
|
|
1475
|
+
},
|
|
1476
|
+
{
|
|
1477
|
+
method: "GET",
|
|
1478
|
+
path: "/embeddings/related-chunks/:id",
|
|
1479
|
+
handler: "controller.getRelatedChunks",
|
|
1480
|
+
config: {
|
|
1481
|
+
policies: [
|
|
1482
|
+
{
|
|
1483
|
+
name: "admin::hasPermissions",
|
|
1484
|
+
config: { actions: ["plugin::strapi-content-embeddings.read"] }
|
|
1485
|
+
}
|
|
1486
|
+
]
|
|
1487
|
+
}
|
|
1488
|
+
},
|
|
1489
|
+
{
|
|
1490
|
+
method: "GET",
|
|
1491
|
+
path: "/debug/neon",
|
|
1492
|
+
handler: "controller.debugNeon",
|
|
1493
|
+
config: {
|
|
1494
|
+
policies: [
|
|
1495
|
+
{
|
|
1496
|
+
name: "admin::hasPermissions",
|
|
1497
|
+
config: { actions: ["plugin::strapi-content-embeddings.read"] }
|
|
1498
|
+
}
|
|
1499
|
+
]
|
|
1500
|
+
}
|
|
1501
|
+
},
|
|
1502
|
+
{
|
|
1503
|
+
method: "POST",
|
|
1504
|
+
path: "/recreate",
|
|
1505
|
+
handler: "controller.recreateEmbeddings",
|
|
1506
|
+
config: {
|
|
1507
|
+
policies: [
|
|
1508
|
+
{
|
|
1509
|
+
name: "admin::hasPermissions",
|
|
1510
|
+
config: { actions: ["plugin::strapi-content-embeddings.update"] }
|
|
1511
|
+
}
|
|
1512
|
+
]
|
|
1513
|
+
}
|
|
1193
1514
|
}
|
|
1194
1515
|
];
|
|
1195
1516
|
const routes = {
|
|
@@ -1202,11 +1523,180 @@ const routes = {
|
|
|
1202
1523
|
routes: [...admin]
|
|
1203
1524
|
}
|
|
1204
1525
|
};
|
|
1205
|
-
const
|
|
1206
|
-
|
|
1526
|
+
const DEFAULT_SEPARATORS = [
|
|
1527
|
+
"\n\n",
|
|
1528
|
+
// Paragraphs
|
|
1529
|
+
"\n",
|
|
1530
|
+
// Lines
|
|
1531
|
+
". ",
|
|
1532
|
+
// Sentences
|
|
1533
|
+
"! ",
|
|
1534
|
+
// Exclamations
|
|
1535
|
+
"? ",
|
|
1536
|
+
// Questions
|
|
1537
|
+
"; ",
|
|
1538
|
+
// Semicolons
|
|
1539
|
+
", ",
|
|
1540
|
+
// Commas
|
|
1541
|
+
" ",
|
|
1542
|
+
// Words
|
|
1543
|
+
""
|
|
1544
|
+
// Characters (last resort)
|
|
1545
|
+
];
|
|
1546
|
+
function estimateTokens(text) {
|
|
1547
|
+
return Math.ceil(text.length / 4);
|
|
1548
|
+
}
|
|
1549
|
+
function needsChunking(content, maxChars = 4e3) {
|
|
1550
|
+
return content.length > maxChars;
|
|
1551
|
+
}
|
|
1552
|
+
function splitWithSeparator(text, separator) {
|
|
1553
|
+
if (separator === "") {
|
|
1554
|
+
return text.split("");
|
|
1555
|
+
}
|
|
1556
|
+
const parts = text.split(separator);
|
|
1557
|
+
const result = [];
|
|
1558
|
+
for (let i = 0; i < parts.length; i++) {
|
|
1559
|
+
if (i < parts.length - 1) {
|
|
1560
|
+
result.push(parts[i] + separator);
|
|
1561
|
+
} else if (parts[i]) {
|
|
1562
|
+
result.push(parts[i]);
|
|
1563
|
+
}
|
|
1564
|
+
}
|
|
1565
|
+
return result;
|
|
1566
|
+
}
|
|
1567
|
+
function splitText(text, chunkSize, separators) {
|
|
1568
|
+
if (text.length <= chunkSize) {
|
|
1569
|
+
return [text];
|
|
1570
|
+
}
|
|
1571
|
+
let bestSeparator = separators[separators.length - 1];
|
|
1572
|
+
for (const sep of separators) {
|
|
1573
|
+
if (text.includes(sep)) {
|
|
1574
|
+
bestSeparator = sep;
|
|
1575
|
+
break;
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
1578
|
+
const splits = splitWithSeparator(text, bestSeparator);
|
|
1579
|
+
const chunks = [];
|
|
1580
|
+
let currentChunk = "";
|
|
1581
|
+
for (const split of splits) {
|
|
1582
|
+
if ((currentChunk + split).length <= chunkSize) {
|
|
1583
|
+
currentChunk += split;
|
|
1584
|
+
} else {
|
|
1585
|
+
if (currentChunk) {
|
|
1586
|
+
chunks.push(currentChunk);
|
|
1587
|
+
}
|
|
1588
|
+
if (split.length > chunkSize) {
|
|
1589
|
+
const remainingSeparators = separators.slice(separators.indexOf(bestSeparator) + 1);
|
|
1590
|
+
if (remainingSeparators.length > 0) {
|
|
1591
|
+
chunks.push(...splitText(split, chunkSize, remainingSeparators));
|
|
1592
|
+
} else {
|
|
1593
|
+
for (let i = 0; i < split.length; i += chunkSize) {
|
|
1594
|
+
chunks.push(split.slice(i, i + chunkSize));
|
|
1595
|
+
}
|
|
1596
|
+
}
|
|
1597
|
+
currentChunk = "";
|
|
1598
|
+
} else {
|
|
1599
|
+
currentChunk = split;
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
}
|
|
1603
|
+
if (currentChunk) {
|
|
1604
|
+
chunks.push(currentChunk);
|
|
1605
|
+
}
|
|
1606
|
+
return chunks;
|
|
1607
|
+
}
|
|
1608
|
+
function addOverlap(chunks, overlap) {
|
|
1609
|
+
if (overlap <= 0 || chunks.length <= 1) {
|
|
1610
|
+
return chunks;
|
|
1611
|
+
}
|
|
1612
|
+
const result = [];
|
|
1613
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
1614
|
+
let chunk = chunks[i];
|
|
1615
|
+
if (i > 0) {
|
|
1616
|
+
const prevChunk = chunks[i - 1];
|
|
1617
|
+
const overlapText = prevChunk.slice(-overlap);
|
|
1618
|
+
chunk = overlapText + chunk;
|
|
1619
|
+
}
|
|
1620
|
+
result.push(chunk);
|
|
1621
|
+
}
|
|
1622
|
+
return result;
|
|
1623
|
+
}
|
|
1624
|
+
function chunkContent(content, options2 = {}) {
|
|
1625
|
+
const {
|
|
1626
|
+
chunkSize = 4e3,
|
|
1627
|
+
chunkOverlap = 200,
|
|
1628
|
+
separators = DEFAULT_SEPARATORS
|
|
1629
|
+
} = options2;
|
|
1630
|
+
const cleanContent = content.trim();
|
|
1631
|
+
if (!cleanContent) {
|
|
1632
|
+
return [];
|
|
1633
|
+
}
|
|
1634
|
+
if (cleanContent.length <= chunkSize) {
|
|
1635
|
+
return [{
|
|
1636
|
+
text: cleanContent,
|
|
1637
|
+
chunkIndex: 0,
|
|
1638
|
+
totalChunks: 1,
|
|
1639
|
+
startOffset: 0,
|
|
1640
|
+
endOffset: cleanContent.length
|
|
1641
|
+
}];
|
|
1642
|
+
}
|
|
1643
|
+
const rawChunks = splitText(cleanContent, chunkSize - chunkOverlap, separators);
|
|
1644
|
+
const chunksWithOverlap = addOverlap(rawChunks, chunkOverlap);
|
|
1645
|
+
const result = [];
|
|
1646
|
+
let currentOffset = 0;
|
|
1647
|
+
for (let i = 0; i < chunksWithOverlap.length; i++) {
|
|
1648
|
+
const text = chunksWithOverlap[i].trim();
|
|
1649
|
+
if (text) {
|
|
1650
|
+
result.push({
|
|
1651
|
+
text,
|
|
1652
|
+
chunkIndex: i,
|
|
1653
|
+
totalChunks: chunksWithOverlap.length,
|
|
1654
|
+
startOffset: currentOffset,
|
|
1655
|
+
endOffset: currentOffset + rawChunks[i].length
|
|
1656
|
+
});
|
|
1657
|
+
}
|
|
1658
|
+
currentOffset += rawChunks[i].length;
|
|
1659
|
+
}
|
|
1660
|
+
const totalChunks = result.length;
|
|
1661
|
+
result.forEach((chunk, idx) => {
|
|
1662
|
+
chunk.chunkIndex = idx;
|
|
1663
|
+
chunk.totalChunks = totalChunks;
|
|
1664
|
+
});
|
|
1665
|
+
return result;
|
|
1666
|
+
}
|
|
1667
|
+
function formatChunkTitle(baseTitle, chunkIndex, totalChunks) {
|
|
1668
|
+
if (totalChunks === 1) {
|
|
1669
|
+
return baseTitle;
|
|
1670
|
+
}
|
|
1671
|
+
return `${baseTitle} [Part ${chunkIndex + 1}/${totalChunks}]`;
|
|
1672
|
+
}
|
|
1673
|
+
const PLUGIN_ID$1 = "strapi-content-embeddings";
|
|
1674
|
+
const CONTENT_TYPE_UID$1 = `plugin::${PLUGIN_ID$1}.embedding`;
|
|
1207
1675
|
const embeddings = ({ strapi }) => ({
|
|
1676
|
+
/**
|
|
1677
|
+
* Get plugin config with defaults
|
|
1678
|
+
*/
|
|
1679
|
+
getConfig() {
|
|
1680
|
+
const config2 = strapi.config.get("plugin::strapi-content-embeddings") || {};
|
|
1681
|
+
return {
|
|
1682
|
+
chunkSize: config2.chunkSize || 4e3,
|
|
1683
|
+
chunkOverlap: config2.chunkOverlap || 200,
|
|
1684
|
+
autoChunk: config2.autoChunk || false,
|
|
1685
|
+
...config2
|
|
1686
|
+
};
|
|
1687
|
+
},
|
|
1688
|
+
/**
|
|
1689
|
+
* Create a single embedding (no chunking)
|
|
1690
|
+
*/
|
|
1208
1691
|
async createEmbedding(data) {
|
|
1209
|
-
const { title, content, collectionType, fieldName, metadata, related } = data.data;
|
|
1692
|
+
const { title, content, collectionType, fieldName, metadata, related, autoChunk } = data.data;
|
|
1693
|
+
const config2 = this.getConfig();
|
|
1694
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
1695
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1696
|
+
if (shouldChunk && needsChunking(content, chunkSize)) {
|
|
1697
|
+
const result = await this.createChunkedEmbedding(data);
|
|
1698
|
+
return result.entity;
|
|
1699
|
+
}
|
|
1210
1700
|
const entityData = {
|
|
1211
1701
|
title,
|
|
1212
1702
|
content,
|
|
@@ -1217,7 +1707,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1217
1707
|
if (related && related.__type && related.id) {
|
|
1218
1708
|
entityData.related = related;
|
|
1219
1709
|
}
|
|
1220
|
-
const entity = await strapi.documents(CONTENT_TYPE_UID).create({
|
|
1710
|
+
const entity = await strapi.documents(CONTENT_TYPE_UID$1).create({
|
|
1221
1711
|
data: entityData
|
|
1222
1712
|
});
|
|
1223
1713
|
if (!pluginManager.isInitialized()) {
|
|
@@ -1232,7 +1722,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1232
1722
|
collectionType: collectionType || "standalone",
|
|
1233
1723
|
fieldName: fieldName || "content"
|
|
1234
1724
|
});
|
|
1235
|
-
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
1725
|
+
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1236
1726
|
documentId: entity.documentId,
|
|
1237
1727
|
data: {
|
|
1238
1728
|
embeddingId: result.embeddingId,
|
|
@@ -1245,8 +1735,120 @@ const embeddings = ({ strapi }) => ({
|
|
|
1245
1735
|
return entity;
|
|
1246
1736
|
}
|
|
1247
1737
|
},
|
|
1738
|
+
/**
|
|
1739
|
+
* Create embeddings with automatic chunking for large content
|
|
1740
|
+
* Creates multiple embedding entities, one per chunk
|
|
1741
|
+
*/
|
|
1742
|
+
async createChunkedEmbedding(data) {
|
|
1743
|
+
const { title, content, collectionType, fieldName, metadata, related } = data.data;
|
|
1744
|
+
const config2 = this.getConfig();
|
|
1745
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1746
|
+
const chunkOverlap = config2.chunkOverlap || 200;
|
|
1747
|
+
const chunks = chunkContent(content, { chunkSize, chunkOverlap });
|
|
1748
|
+
if (chunks.length === 0) {
|
|
1749
|
+
throw new Error("Content is empty or could not be chunked");
|
|
1750
|
+
}
|
|
1751
|
+
if (chunks.length === 1) {
|
|
1752
|
+
const entity = await this.createEmbedding({
|
|
1753
|
+
data: {
|
|
1754
|
+
...data.data,
|
|
1755
|
+
autoChunk: false
|
|
1756
|
+
// Prevent recursive chunking
|
|
1757
|
+
}
|
|
1758
|
+
});
|
|
1759
|
+
return {
|
|
1760
|
+
entity,
|
|
1761
|
+
chunks: [entity],
|
|
1762
|
+
totalChunks: 1,
|
|
1763
|
+
wasChunked: false
|
|
1764
|
+
};
|
|
1765
|
+
}
|
|
1766
|
+
console.log(`[createChunkedEmbedding] Chunking content into ${chunks.length} parts (chunkSize: ${chunkSize}, overlap: ${chunkOverlap})`);
|
|
1767
|
+
const createdChunks = [];
|
|
1768
|
+
let parentDocumentId = null;
|
|
1769
|
+
for (const chunk of chunks) {
|
|
1770
|
+
console.log(`[createChunkedEmbedding] Processing chunk ${chunk.chunkIndex + 1}/${chunks.length}`);
|
|
1771
|
+
const chunkTitle = formatChunkTitle(title, chunk.chunkIndex, chunk.totalChunks);
|
|
1772
|
+
const chunkMetadata = {
|
|
1773
|
+
...metadata,
|
|
1774
|
+
isChunk: true,
|
|
1775
|
+
chunkIndex: chunk.chunkIndex,
|
|
1776
|
+
totalChunks: chunk.totalChunks,
|
|
1777
|
+
startOffset: chunk.startOffset,
|
|
1778
|
+
endOffset: chunk.endOffset,
|
|
1779
|
+
originalTitle: title,
|
|
1780
|
+
parentDocumentId,
|
|
1781
|
+
estimatedTokens: estimateTokens(chunk.text)
|
|
1782
|
+
};
|
|
1783
|
+
const entityData = {
|
|
1784
|
+
title: chunkTitle,
|
|
1785
|
+
content: chunk.text,
|
|
1786
|
+
collectionType: collectionType || "standalone",
|
|
1787
|
+
fieldName: fieldName || "content",
|
|
1788
|
+
metadata: chunkMetadata
|
|
1789
|
+
};
|
|
1790
|
+
if (chunk.chunkIndex === 0 && related && related.__type && related.id) {
|
|
1791
|
+
entityData.related = related;
|
|
1792
|
+
}
|
|
1793
|
+
console.log(`[chunk ${chunk.chunkIndex + 1}] Creating entity in DB...`);
|
|
1794
|
+
const entity = await strapi.documents(CONTENT_TYPE_UID$1).create({
|
|
1795
|
+
data: entityData
|
|
1796
|
+
});
|
|
1797
|
+
console.log(`[chunk ${chunk.chunkIndex + 1}] Entity created: ${entity.documentId}`);
|
|
1798
|
+
if (chunk.chunkIndex === 0) {
|
|
1799
|
+
parentDocumentId = entity.documentId;
|
|
1800
|
+
} else {
|
|
1801
|
+
console.log(`[chunk ${chunk.chunkIndex + 1}] Updating metadata with parent ref...`);
|
|
1802
|
+
await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1803
|
+
documentId: entity.documentId,
|
|
1804
|
+
data: {
|
|
1805
|
+
metadata: {
|
|
1806
|
+
...chunkMetadata,
|
|
1807
|
+
parentDocumentId
|
|
1808
|
+
}
|
|
1809
|
+
}
|
|
1810
|
+
});
|
|
1811
|
+
console.log(`[chunk ${chunk.chunkIndex + 1}] Metadata updated`);
|
|
1812
|
+
}
|
|
1813
|
+
if (pluginManager.isInitialized()) {
|
|
1814
|
+
try {
|
|
1815
|
+
console.log(`[chunk ${chunk.chunkIndex + 1}] Creating OpenAI embedding...`);
|
|
1816
|
+
const result = await pluginManager.createEmbedding({
|
|
1817
|
+
id: entity.documentId,
|
|
1818
|
+
title: chunkTitle,
|
|
1819
|
+
content: chunk.text,
|
|
1820
|
+
collectionType: collectionType || "standalone",
|
|
1821
|
+
fieldName: fieldName || "content"
|
|
1822
|
+
});
|
|
1823
|
+
console.log(`[chunk ${chunk.chunkIndex + 1}] OpenAI embedding created`);
|
|
1824
|
+
console.log(`[chunk ${chunk.chunkIndex + 1}] Saving embedding to DB...`);
|
|
1825
|
+
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1826
|
+
documentId: entity.documentId,
|
|
1827
|
+
data: {
|
|
1828
|
+
embeddingId: result.embeddingId,
|
|
1829
|
+
embedding: result.embedding
|
|
1830
|
+
}
|
|
1831
|
+
});
|
|
1832
|
+
console.log(`[chunk ${chunk.chunkIndex + 1}] Chunk complete`);
|
|
1833
|
+
createdChunks.push(updatedEntity);
|
|
1834
|
+
} catch (error) {
|
|
1835
|
+
console.error(`[chunk ${chunk.chunkIndex + 1}] FAILED:`, error.message || error);
|
|
1836
|
+
createdChunks.push(entity);
|
|
1837
|
+
}
|
|
1838
|
+
} else {
|
|
1839
|
+
createdChunks.push(entity);
|
|
1840
|
+
}
|
|
1841
|
+
}
|
|
1842
|
+
console.log(`[createChunkedEmbedding] Completed, created ${createdChunks.length} chunks, first documentId: ${createdChunks[0]?.documentId}`);
|
|
1843
|
+
return {
|
|
1844
|
+
entity: createdChunks[0],
|
|
1845
|
+
chunks: createdChunks,
|
|
1846
|
+
totalChunks: createdChunks.length,
|
|
1847
|
+
wasChunked: true
|
|
1848
|
+
};
|
|
1849
|
+
},
|
|
1248
1850
|
async deleteEmbedding(id) {
|
|
1249
|
-
const currentEntry = await strapi.documents(CONTENT_TYPE_UID).findOne({
|
|
1851
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1250
1852
|
documentId: String(id)
|
|
1251
1853
|
});
|
|
1252
1854
|
if (!currentEntry) {
|
|
@@ -1259,25 +1861,180 @@ const embeddings = ({ strapi }) => ({
|
|
|
1259
1861
|
console.error("Failed to delete from vector store:", error);
|
|
1260
1862
|
}
|
|
1261
1863
|
}
|
|
1262
|
-
const deletedEntry = await strapi.documents(CONTENT_TYPE_UID).delete({
|
|
1864
|
+
const deletedEntry = await strapi.documents(CONTENT_TYPE_UID$1).delete({
|
|
1263
1865
|
documentId: String(id)
|
|
1264
1866
|
});
|
|
1265
1867
|
return deletedEntry;
|
|
1266
1868
|
},
|
|
1869
|
+
/**
|
|
1870
|
+
* Find all chunks related to a parent document
|
|
1871
|
+
* Returns chunks including the parent itself
|
|
1872
|
+
*/
|
|
1873
|
+
async findRelatedChunks(documentId) {
|
|
1874
|
+
const entry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1875
|
+
documentId
|
|
1876
|
+
});
|
|
1877
|
+
if (!entry) {
|
|
1878
|
+
return [];
|
|
1879
|
+
}
|
|
1880
|
+
const metadata = entry.metadata;
|
|
1881
|
+
const parentId = metadata?.parentDocumentId || documentId;
|
|
1882
|
+
const isChunked = metadata?.isChunk === true;
|
|
1883
|
+
if (!isChunked && !metadata?.parentDocumentId) {
|
|
1884
|
+
const children = await strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1885
|
+
filters: {
|
|
1886
|
+
metadata: {
|
|
1887
|
+
$containsi: `"parentDocumentId":"${documentId}"`
|
|
1888
|
+
}
|
|
1889
|
+
},
|
|
1890
|
+
limit: -1
|
|
1891
|
+
// No limit - get all
|
|
1892
|
+
});
|
|
1893
|
+
console.log(`[findRelatedChunks] Found ${children.length} children for parent ${documentId}`);
|
|
1894
|
+
if (children.length === 0) {
|
|
1895
|
+
return [entry];
|
|
1896
|
+
}
|
|
1897
|
+
return [entry, ...children];
|
|
1898
|
+
}
|
|
1899
|
+
const allChunks = await strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1900
|
+
filters: {
|
|
1901
|
+
$or: [
|
|
1902
|
+
{ documentId: parentId },
|
|
1903
|
+
{
|
|
1904
|
+
metadata: {
|
|
1905
|
+
$containsi: `"parentDocumentId":"${parentId}"`
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
]
|
|
1909
|
+
},
|
|
1910
|
+
limit: -1
|
|
1911
|
+
// No limit - get all
|
|
1912
|
+
});
|
|
1913
|
+
console.log(`[findRelatedChunks] Found ${allChunks.length} total chunks for parent ${parentId}`);
|
|
1914
|
+
return allChunks.sort((a, b) => {
|
|
1915
|
+
const aIndex = a.metadata?.chunkIndex ?? 0;
|
|
1916
|
+
const bIndex = b.metadata?.chunkIndex ?? 0;
|
|
1917
|
+
return aIndex - bIndex;
|
|
1918
|
+
});
|
|
1919
|
+
},
|
|
1920
|
+
/**
|
|
1921
|
+
* Delete all chunks related to a parent document
|
|
1922
|
+
*/
|
|
1923
|
+
async deleteRelatedChunks(documentId) {
|
|
1924
|
+
const chunks = await this.findRelatedChunks(documentId);
|
|
1925
|
+
for (const chunk of chunks) {
|
|
1926
|
+
if (pluginManager.isInitialized()) {
|
|
1927
|
+
try {
|
|
1928
|
+
await pluginManager.deleteEmbedding(chunk.documentId);
|
|
1929
|
+
} catch (error) {
|
|
1930
|
+
console.error(`Failed to delete chunk ${chunk.documentId} from vector store:`, error);
|
|
1931
|
+
}
|
|
1932
|
+
}
|
|
1933
|
+
await strapi.documents(CONTENT_TYPE_UID$1).delete({
|
|
1934
|
+
documentId: chunk.documentId
|
|
1935
|
+
});
|
|
1936
|
+
}
|
|
1937
|
+
return chunks.length;
|
|
1938
|
+
},
|
|
1939
|
+
/**
|
|
1940
|
+
* Update embeddings with automatic chunking support
|
|
1941
|
+
* Handles re-chunking when content changes and exceeds chunk size
|
|
1942
|
+
*/
|
|
1943
|
+
async updateChunkedEmbedding(id, data) {
|
|
1944
|
+
const { title, content, metadata, autoChunk } = data.data;
|
|
1945
|
+
const config2 = this.getConfig();
|
|
1946
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1947
|
+
documentId: id
|
|
1948
|
+
});
|
|
1949
|
+
if (!currentEntry) {
|
|
1950
|
+
throw new Error(`Embedding with id ${id} not found`);
|
|
1951
|
+
}
|
|
1952
|
+
const currentMetadata = currentEntry.metadata;
|
|
1953
|
+
const parentDocumentId = currentMetadata?.parentDocumentId || id;
|
|
1954
|
+
const newContent = content ?? currentEntry.content;
|
|
1955
|
+
const newTitle = title ?? currentMetadata?.originalTitle ?? currentEntry.title;
|
|
1956
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
1957
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1958
|
+
const contentNeedsChunking = shouldChunk && needsChunking(newContent, chunkSize);
|
|
1959
|
+
const existingChunks = await this.findRelatedChunks(id);
|
|
1960
|
+
let originalRelated;
|
|
1961
|
+
const firstChunk = existingChunks.find(
|
|
1962
|
+
(c) => c.metadata?.chunkIndex === 0 || c.documentId === parentDocumentId
|
|
1963
|
+
);
|
|
1964
|
+
if (firstChunk?.related) {
|
|
1965
|
+
originalRelated = firstChunk.related;
|
|
1966
|
+
}
|
|
1967
|
+
const deletedCount = await this.deleteRelatedChunks(id);
|
|
1968
|
+
console.log(`Deleted ${deletedCount} existing chunk(s) for update`);
|
|
1969
|
+
const preservedMetadata = { ...metadata };
|
|
1970
|
+
delete preservedMetadata?.isChunk;
|
|
1971
|
+
delete preservedMetadata?.chunkIndex;
|
|
1972
|
+
delete preservedMetadata?.totalChunks;
|
|
1973
|
+
delete preservedMetadata?.startOffset;
|
|
1974
|
+
delete preservedMetadata?.endOffset;
|
|
1975
|
+
delete preservedMetadata?.originalTitle;
|
|
1976
|
+
delete preservedMetadata?.parentDocumentId;
|
|
1977
|
+
delete preservedMetadata?.estimatedTokens;
|
|
1978
|
+
if (contentNeedsChunking) {
|
|
1979
|
+
return await this.createChunkedEmbedding({
|
|
1980
|
+
data: {
|
|
1981
|
+
title: newTitle.replace(/\s*\[Part \d+\/\d+\]$/, ""),
|
|
1982
|
+
// Remove old part suffix
|
|
1983
|
+
content: newContent,
|
|
1984
|
+
collectionType: currentEntry.collectionType || "standalone",
|
|
1985
|
+
fieldName: currentEntry.fieldName || "content",
|
|
1986
|
+
metadata: preservedMetadata,
|
|
1987
|
+
related: originalRelated,
|
|
1988
|
+
autoChunk: true
|
|
1989
|
+
}
|
|
1990
|
+
});
|
|
1991
|
+
} else {
|
|
1992
|
+
const entity = await this.createEmbedding({
|
|
1993
|
+
data: {
|
|
1994
|
+
title: newTitle.replace(/\s*\[Part \d+\/\d+\]$/, ""),
|
|
1995
|
+
// Remove old part suffix
|
|
1996
|
+
content: newContent,
|
|
1997
|
+
collectionType: currentEntry.collectionType || "standalone",
|
|
1998
|
+
fieldName: currentEntry.fieldName || "content",
|
|
1999
|
+
metadata: preservedMetadata,
|
|
2000
|
+
related: originalRelated,
|
|
2001
|
+
autoChunk: false
|
|
2002
|
+
}
|
|
2003
|
+
});
|
|
2004
|
+
return {
|
|
2005
|
+
entity,
|
|
2006
|
+
chunks: [entity],
|
|
2007
|
+
totalChunks: 1,
|
|
2008
|
+
wasChunked: false
|
|
2009
|
+
};
|
|
2010
|
+
}
|
|
2011
|
+
},
|
|
1267
2012
|
async updateEmbedding(id, data) {
|
|
1268
|
-
const { title, content, metadata } = data.data;
|
|
1269
|
-
const
|
|
2013
|
+
const { title, content, metadata, autoChunk } = data.data;
|
|
2014
|
+
const config2 = this.getConfig();
|
|
2015
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1270
2016
|
documentId: id
|
|
1271
2017
|
});
|
|
1272
2018
|
if (!currentEntry) {
|
|
1273
2019
|
throw new Error(`Embedding with id ${id} not found`);
|
|
1274
2020
|
}
|
|
2021
|
+
const currentMetadata = currentEntry.metadata;
|
|
2022
|
+
const isCurrentlyChunked = currentMetadata?.isChunk === true;
|
|
2023
|
+
const hasRelatedChunks = currentMetadata?.parentDocumentId || isCurrentlyChunked;
|
|
2024
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
2025
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
2026
|
+
const newContent = content ?? currentEntry.content;
|
|
2027
|
+
const contentNeedsChunking = shouldChunk && needsChunking(newContent, chunkSize);
|
|
2028
|
+
const contentChanged = content !== void 0 && content !== currentEntry.content;
|
|
2029
|
+
if (hasRelatedChunks || contentNeedsChunking) {
|
|
2030
|
+
const result = await this.updateChunkedEmbedding(id, data);
|
|
2031
|
+
return result.entity;
|
|
2032
|
+
}
|
|
1275
2033
|
const updateData = {};
|
|
1276
2034
|
if (title !== void 0) updateData.title = title;
|
|
1277
2035
|
if (content !== void 0) updateData.content = content;
|
|
1278
2036
|
if (metadata !== void 0) updateData.metadata = metadata;
|
|
1279
|
-
|
|
1280
|
-
let updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
2037
|
+
let updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1281
2038
|
documentId: id,
|
|
1282
2039
|
data: updateData
|
|
1283
2040
|
});
|
|
@@ -1291,7 +2048,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1291
2048
|
collectionType: currentEntry.collectionType || "standalone",
|
|
1292
2049
|
fieldName: currentEntry.fieldName || "content"
|
|
1293
2050
|
});
|
|
1294
|
-
updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
2051
|
+
updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1295
2052
|
documentId: id,
|
|
1296
2053
|
data: {
|
|
1297
2054
|
embeddingId: result.embeddingId,
|
|
@@ -1320,7 +2077,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1320
2077
|
}
|
|
1321
2078
|
},
|
|
1322
2079
|
async getEmbedding(id) {
|
|
1323
|
-
return await strapi.documents(CONTENT_TYPE_UID).findOne({
|
|
2080
|
+
return await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1324
2081
|
documentId: String(id)
|
|
1325
2082
|
});
|
|
1326
2083
|
},
|
|
@@ -1329,12 +2086,12 @@ const embeddings = ({ strapi }) => ({
|
|
|
1329
2086
|
const pageSize = params?.pageSize || 10;
|
|
1330
2087
|
const start = (page - 1) * pageSize;
|
|
1331
2088
|
const [data, totalCount] = await Promise.all([
|
|
1332
|
-
strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
2089
|
+
strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1333
2090
|
limit: pageSize,
|
|
1334
2091
|
start,
|
|
1335
2092
|
filters: params?.filters
|
|
1336
2093
|
}),
|
|
1337
|
-
strapi.documents(CONTENT_TYPE_UID).count({
|
|
2094
|
+
strapi.documents(CONTENT_TYPE_UID$1).count({
|
|
1338
2095
|
filters: params?.filters
|
|
1339
2096
|
})
|
|
1340
2097
|
]);
|
|
@@ -1345,8 +2102,304 @@ const embeddings = ({ strapi }) => ({
|
|
|
1345
2102
|
};
|
|
1346
2103
|
}
|
|
1347
2104
|
});
|
|
2105
|
+
const PLUGIN_ID = "strapi-content-embeddings";
|
|
2106
|
+
const CONTENT_TYPE_UID = `plugin::${PLUGIN_ID}.embedding`;
|
|
2107
|
+
const sync = ({ strapi }) => ({
|
|
2108
|
+
/**
|
|
2109
|
+
* Sync embeddings from Neon DB to Strapi DB
|
|
2110
|
+
*
|
|
2111
|
+
* This performs the following operations:
|
|
2112
|
+
* 1. Fetches all embeddings from Neon DB (source of truth)
|
|
2113
|
+
* 2. Fetches all embeddings from Strapi DB
|
|
2114
|
+
* 3. Creates missing entries in Strapi that exist in Neon
|
|
2115
|
+
* 4. Updates Strapi entries where content differs from Neon
|
|
2116
|
+
* 5. Optionally removes orphaned Strapi entries (no matching Neon record)
|
|
2117
|
+
*/
|
|
2118
|
+
async syncFromNeon(options2) {
|
|
2119
|
+
const { removeOrphans = false, dryRun = false } = options2 || {};
|
|
2120
|
+
const result = {
|
|
2121
|
+
success: false,
|
|
2122
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2123
|
+
neonCount: 0,
|
|
2124
|
+
strapiCount: 0,
|
|
2125
|
+
actions: {
|
|
2126
|
+
created: 0,
|
|
2127
|
+
updated: 0,
|
|
2128
|
+
orphansRemoved: 0
|
|
2129
|
+
},
|
|
2130
|
+
details: {
|
|
2131
|
+
created: [],
|
|
2132
|
+
updated: [],
|
|
2133
|
+
orphansRemoved: []
|
|
2134
|
+
},
|
|
2135
|
+
errors: []
|
|
2136
|
+
};
|
|
2137
|
+
if (!pluginManager.isInitialized()) {
|
|
2138
|
+
result.errors.push(
|
|
2139
|
+
"Plugin manager not initialized. Check your Neon and OpenAI configuration."
|
|
2140
|
+
);
|
|
2141
|
+
return result;
|
|
2142
|
+
}
|
|
2143
|
+
try {
|
|
2144
|
+
const neonEmbeddings = await pluginManager.getAllNeonEmbeddings();
|
|
2145
|
+
result.neonCount = neonEmbeddings.length;
|
|
2146
|
+
const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
2147
|
+
limit: 1e4
|
|
2148
|
+
// High limit to get all
|
|
2149
|
+
});
|
|
2150
|
+
result.strapiCount = strapiEmbeddings.length;
|
|
2151
|
+
const neonBystrapiId = /* @__PURE__ */ new Map();
|
|
2152
|
+
for (const neon of neonEmbeddings) {
|
|
2153
|
+
if (neon.strapiId) {
|
|
2154
|
+
neonBystrapiId.set(neon.strapiId, neon);
|
|
2155
|
+
}
|
|
2156
|
+
}
|
|
2157
|
+
const strapiByDocumentId = /* @__PURE__ */ new Map();
|
|
2158
|
+
for (const strapi2 of strapiEmbeddings) {
|
|
2159
|
+
strapiByDocumentId.set(strapi2.documentId, strapi2);
|
|
2160
|
+
}
|
|
2161
|
+
for (const neon of neonEmbeddings) {
|
|
2162
|
+
if (!neon.strapiId) {
|
|
2163
|
+
result.errors.push(
|
|
2164
|
+
`Neon embedding ${neon.id} has no strapiId in metadata`
|
|
2165
|
+
);
|
|
2166
|
+
continue;
|
|
2167
|
+
}
|
|
2168
|
+
const existingStrapi = strapiByDocumentId.get(neon.strapiId);
|
|
2169
|
+
if (!existingStrapi) {
|
|
2170
|
+
if (!dryRun) {
|
|
2171
|
+
try {
|
|
2172
|
+
await strapi.documents(CONTENT_TYPE_UID).create({
|
|
2173
|
+
data: {
|
|
2174
|
+
documentId: neon.strapiId,
|
|
2175
|
+
title: neon.title,
|
|
2176
|
+
content: neon.content,
|
|
2177
|
+
embeddingId: neon.id,
|
|
2178
|
+
collectionType: neon.collectionType,
|
|
2179
|
+
fieldName: neon.fieldName
|
|
2180
|
+
}
|
|
2181
|
+
});
|
|
2182
|
+
result.actions.created++;
|
|
2183
|
+
result.details.created.push(
|
|
2184
|
+
`${neon.strapiId} (${neon.title || "untitled"})`
|
|
2185
|
+
);
|
|
2186
|
+
} catch (error) {
|
|
2187
|
+
result.errors.push(
|
|
2188
|
+
`Failed to create Strapi entry for ${neon.strapiId}: ${error}`
|
|
2189
|
+
);
|
|
2190
|
+
}
|
|
2191
|
+
} else {
|
|
2192
|
+
result.actions.created++;
|
|
2193
|
+
result.details.created.push(
|
|
2194
|
+
`[DRY RUN] ${neon.strapiId} (${neon.title || "untitled"})`
|
|
2195
|
+
);
|
|
2196
|
+
}
|
|
2197
|
+
} else {
|
|
2198
|
+
const contentChanged = existingStrapi.content !== neon.content;
|
|
2199
|
+
const titleChanged = existingStrapi.title !== neon.title;
|
|
2200
|
+
const embeddingIdMissing = !existingStrapi.embeddingId;
|
|
2201
|
+
if (contentChanged || titleChanged || embeddingIdMissing) {
|
|
2202
|
+
if (!dryRun) {
|
|
2203
|
+
try {
|
|
2204
|
+
await strapi.documents(CONTENT_TYPE_UID).update({
|
|
2205
|
+
documentId: neon.strapiId,
|
|
2206
|
+
data: {
|
|
2207
|
+
title: neon.title,
|
|
2208
|
+
content: neon.content,
|
|
2209
|
+
embeddingId: neon.id
|
|
2210
|
+
}
|
|
2211
|
+
});
|
|
2212
|
+
result.actions.updated++;
|
|
2213
|
+
result.details.updated.push(
|
|
2214
|
+
`${neon.strapiId} (${neon.title || "untitled"})`
|
|
2215
|
+
);
|
|
2216
|
+
} catch (error) {
|
|
2217
|
+
result.errors.push(
|
|
2218
|
+
`Failed to update Strapi entry ${neon.strapiId}: ${error}`
|
|
2219
|
+
);
|
|
2220
|
+
}
|
|
2221
|
+
} else {
|
|
2222
|
+
result.actions.updated++;
|
|
2223
|
+
result.details.updated.push(
|
|
2224
|
+
`[DRY RUN] ${neon.strapiId} (${neon.title || "untitled"})`
|
|
2225
|
+
);
|
|
2226
|
+
}
|
|
2227
|
+
}
|
|
2228
|
+
}
|
|
2229
|
+
}
|
|
2230
|
+
if (removeOrphans) {
|
|
2231
|
+
for (const strapiEmbed of strapiEmbeddings) {
|
|
2232
|
+
const hasNeonRecord = neonBystrapiId.has(strapiEmbed.documentId);
|
|
2233
|
+
if (!hasNeonRecord) {
|
|
2234
|
+
if (!dryRun) {
|
|
2235
|
+
try {
|
|
2236
|
+
await strapi.documents(CONTENT_TYPE_UID).delete({
|
|
2237
|
+
documentId: strapiEmbed.documentId
|
|
2238
|
+
});
|
|
2239
|
+
result.actions.orphansRemoved++;
|
|
2240
|
+
result.details.orphansRemoved.push(
|
|
2241
|
+
`${strapiEmbed.documentId} (${strapiEmbed.title || "untitled"})`
|
|
2242
|
+
);
|
|
2243
|
+
} catch (error) {
|
|
2244
|
+
result.errors.push(
|
|
2245
|
+
`Failed to remove orphan ${strapiEmbed.documentId}: ${error}`
|
|
2246
|
+
);
|
|
2247
|
+
}
|
|
2248
|
+
} else {
|
|
2249
|
+
result.actions.orphansRemoved++;
|
|
2250
|
+
result.details.orphansRemoved.push(
|
|
2251
|
+
`[DRY RUN] ${strapiEmbed.documentId} (${strapiEmbed.title || "untitled"})`
|
|
2252
|
+
);
|
|
2253
|
+
}
|
|
2254
|
+
}
|
|
2255
|
+
}
|
|
2256
|
+
}
|
|
2257
|
+
result.success = result.errors.length === 0;
|
|
2258
|
+
return result;
|
|
2259
|
+
} catch (error) {
|
|
2260
|
+
result.errors.push(`Sync failed: ${error}`);
|
|
2261
|
+
return result;
|
|
2262
|
+
}
|
|
2263
|
+
},
|
|
2264
|
+
/**
|
|
2265
|
+
* Get sync status - compare Neon and Strapi without making changes
|
|
2266
|
+
*/
|
|
2267
|
+
async getSyncStatus() {
|
|
2268
|
+
if (!pluginManager.isInitialized()) {
|
|
2269
|
+
throw new Error("Plugin manager not initialized");
|
|
2270
|
+
}
|
|
2271
|
+
const neonEmbeddings = await pluginManager.getAllNeonEmbeddings();
|
|
2272
|
+
const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
2273
|
+
limit: 1e4
|
|
2274
|
+
});
|
|
2275
|
+
const neonBystrapiId = /* @__PURE__ */ new Map();
|
|
2276
|
+
for (const neon of neonEmbeddings) {
|
|
2277
|
+
if (neon.strapiId) {
|
|
2278
|
+
neonBystrapiId.set(neon.strapiId, neon);
|
|
2279
|
+
}
|
|
2280
|
+
}
|
|
2281
|
+
const strapiByDocumentId = /* @__PURE__ */ new Map();
|
|
2282
|
+
for (const s of strapiEmbeddings) {
|
|
2283
|
+
strapiByDocumentId.set(s.documentId, s);
|
|
2284
|
+
}
|
|
2285
|
+
let missingInStrapi = 0;
|
|
2286
|
+
let contentDifferences = 0;
|
|
2287
|
+
for (const neon of neonEmbeddings) {
|
|
2288
|
+
if (!neon.strapiId) continue;
|
|
2289
|
+
const strapiRecord = strapiByDocumentId.get(neon.strapiId);
|
|
2290
|
+
if (!strapiRecord) {
|
|
2291
|
+
missingInStrapi++;
|
|
2292
|
+
} else if (strapiRecord.content !== neon.content) {
|
|
2293
|
+
contentDifferences++;
|
|
2294
|
+
}
|
|
2295
|
+
}
|
|
2296
|
+
let missingInNeon = 0;
|
|
2297
|
+
for (const s of strapiEmbeddings) {
|
|
2298
|
+
if (!neonBystrapiId.has(s.documentId)) {
|
|
2299
|
+
missingInNeon++;
|
|
2300
|
+
}
|
|
2301
|
+
}
|
|
2302
|
+
return {
|
|
2303
|
+
neonCount: neonEmbeddings.length,
|
|
2304
|
+
strapiCount: strapiEmbeddings.length,
|
|
2305
|
+
inSync: missingInStrapi === 0 && missingInNeon === 0 && contentDifferences === 0,
|
|
2306
|
+
missingInStrapi,
|
|
2307
|
+
missingInNeon,
|
|
2308
|
+
contentDifferences
|
|
2309
|
+
};
|
|
2310
|
+
},
|
|
2311
|
+
/**
|
|
2312
|
+
* Recreate all embeddings in Neon DB from Strapi data
|
|
2313
|
+
*
|
|
2314
|
+
* This will:
|
|
2315
|
+
* 1. Delete ALL embeddings from Neon DB
|
|
2316
|
+
* 2. Re-create embeddings for each Strapi embedding entry
|
|
2317
|
+
* 3. Update Strapi entries with new embedding IDs
|
|
2318
|
+
*
|
|
2319
|
+
* Use this when embeddings were created with incorrect metadata format
|
|
2320
|
+
*/
|
|
2321
|
+
async recreateAllEmbeddings() {
|
|
2322
|
+
const result = {
|
|
2323
|
+
success: false,
|
|
2324
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2325
|
+
deletedFromNeon: 0,
|
|
2326
|
+
processedFromStrapi: 0,
|
|
2327
|
+
recreatedInNeon: 0,
|
|
2328
|
+
errors: [],
|
|
2329
|
+
details: {
|
|
2330
|
+
recreated: [],
|
|
2331
|
+
failed: []
|
|
2332
|
+
}
|
|
2333
|
+
};
|
|
2334
|
+
if (!pluginManager.isInitialized()) {
|
|
2335
|
+
result.errors.push(
|
|
2336
|
+
"Plugin manager not initialized. Check your Neon and OpenAI configuration."
|
|
2337
|
+
);
|
|
2338
|
+
return result;
|
|
2339
|
+
}
|
|
2340
|
+
try {
|
|
2341
|
+
console.log("[recreateAllEmbeddings] Step 1: Clearing Neon DB...");
|
|
2342
|
+
result.deletedFromNeon = await pluginManager.clearAllNeonEmbeddings();
|
|
2343
|
+
console.log(`[recreateAllEmbeddings] Deleted ${result.deletedFromNeon} embeddings from Neon`);
|
|
2344
|
+
console.log("[recreateAllEmbeddings] Step 2: Fetching Strapi embeddings...");
|
|
2345
|
+
const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
2346
|
+
limit: -1
|
|
2347
|
+
// Get all
|
|
2348
|
+
});
|
|
2349
|
+
result.processedFromStrapi = strapiEmbeddings.length;
|
|
2350
|
+
console.log(`[recreateAllEmbeddings] Found ${strapiEmbeddings.length} embeddings in Strapi`);
|
|
2351
|
+
if (strapiEmbeddings.length === 0) {
|
|
2352
|
+
result.success = true;
|
|
2353
|
+
return result;
|
|
2354
|
+
}
|
|
2355
|
+
console.log("[recreateAllEmbeddings] Step 3: Recreating embeddings in Neon...");
|
|
2356
|
+
for (let i = 0; i < strapiEmbeddings.length; i++) {
|
|
2357
|
+
const entry = strapiEmbeddings[i];
|
|
2358
|
+
const progress = `[${i + 1}/${strapiEmbeddings.length}]`;
|
|
2359
|
+
if (!entry.content) {
|
|
2360
|
+
console.log(`${progress} Skipping ${entry.documentId} - no content`);
|
|
2361
|
+
result.details.failed.push(`${entry.documentId}: no content`);
|
|
2362
|
+
continue;
|
|
2363
|
+
}
|
|
2364
|
+
try {
|
|
2365
|
+
console.log(`${progress} Creating embedding for: ${entry.title || entry.documentId}`);
|
|
2366
|
+
const embeddingResult = await pluginManager.createEmbedding({
|
|
2367
|
+
id: entry.documentId,
|
|
2368
|
+
title: entry.title || "",
|
|
2369
|
+
content: entry.content,
|
|
2370
|
+
collectionType: entry.collectionType || "standalone",
|
|
2371
|
+
fieldName: entry.fieldName || "content"
|
|
2372
|
+
});
|
|
2373
|
+
await strapi.documents(CONTENT_TYPE_UID).update({
|
|
2374
|
+
documentId: entry.documentId,
|
|
2375
|
+
data: {
|
|
2376
|
+
embeddingId: embeddingResult.embeddingId,
|
|
2377
|
+
embedding: embeddingResult.embedding
|
|
2378
|
+
}
|
|
2379
|
+
});
|
|
2380
|
+
result.recreatedInNeon++;
|
|
2381
|
+
result.details.recreated.push(`${entry.documentId} (${entry.title || "untitled"})`);
|
|
2382
|
+
if (i < strapiEmbeddings.length - 1) {
|
|
2383
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
2384
|
+
}
|
|
2385
|
+
} catch (error) {
|
|
2386
|
+
console.error(`${progress} Failed:`, error.message || error);
|
|
2387
|
+
result.errors.push(`${entry.documentId}: ${error.message || error}`);
|
|
2388
|
+
result.details.failed.push(`${entry.documentId}: ${error.message || error}`);
|
|
2389
|
+
}
|
|
2390
|
+
}
|
|
2391
|
+
result.success = result.errors.length === 0;
|
|
2392
|
+
console.log(`[recreateAllEmbeddings] Complete. Recreated: ${result.recreatedInNeon}, Failed: ${result.details.failed.length}`);
|
|
2393
|
+
return result;
|
|
2394
|
+
} catch (error) {
|
|
2395
|
+
result.errors.push(`Recreate failed: ${error.message || error}`);
|
|
2396
|
+
return result;
|
|
2397
|
+
}
|
|
2398
|
+
}
|
|
2399
|
+
});
|
|
1348
2400
|
const services = {
|
|
1349
|
-
embeddings
|
|
2401
|
+
embeddings,
|
|
2402
|
+
sync
|
|
1350
2403
|
};
|
|
1351
2404
|
const index = {
|
|
1352
2405
|
register,
|