strapi-content-embeddings 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +187 -0
- package/dist/_chunks/{App-Swmo_WMf.js → App-Rq72tIgS.js} +37 -55
- package/dist/_chunks/App-Rq72tIgS.js.map +1 -0
- package/dist/_chunks/{App-BlCKKuQN.mjs → App-j180lztd.mjs} +37 -55
- package/dist/_chunks/App-j180lztd.mjs.map +1 -0
- package/dist/_chunks/en-B4KWt_jN.js +1 -0
- package/dist/_chunks/en-B4KWt_jN.js.map +1 -0
- package/dist/_chunks/en-Byx4XI2L.mjs +1 -0
- package/dist/_chunks/en-Byx4XI2L.mjs.map +1 -0
- package/dist/_chunks/{index-CXVoFiJp.mjs → index-B3j0IFUi.mjs} +70 -27
- package/dist/_chunks/index-B3j0IFUi.mjs.map +1 -0
- package/dist/_chunks/{index-BpKkUIJY.js → index-jf6vikTZ.js} +70 -27
- package/dist/_chunks/index-jf6vikTZ.js.map +1 -0
- package/dist/admin/index.js +2 -1
- package/dist/admin/index.js.map +1 -0
- package/dist/admin/index.mjs +2 -1
- package/dist/admin/index.mjs.map +1 -0
- package/dist/admin/src/components/custom/MarkdownEditor.d.ts +1 -1
- package/dist/server/index.js +850 -57
- package/dist/server/index.js.map +1 -0
- package/dist/server/index.mjs +850 -57
- package/dist/server/index.mjs.map +1 -0
- package/dist/server/src/config/index.d.ts +9 -0
- package/dist/server/src/controllers/controller.d.ts +14 -0
- package/dist/server/src/controllers/index.d.ts +2 -0
- package/dist/server/src/index.d.ts +38 -2
- package/dist/server/src/mcp/tools/create-embedding.d.ts +6 -0
- package/dist/server/src/mcp/tools/index.d.ts +4 -0
- package/dist/server/src/plugin-manager.d.ts +16 -0
- package/dist/server/src/routes/content-api.d.ts +10 -0
- package/dist/server/src/routes/index.d.ts +10 -0
- package/dist/server/src/services/embeddings.d.ts +43 -2
- package/dist/server/src/services/index.d.ts +23 -2
- package/dist/server/src/services/sync.d.ts +48 -0
- package/dist/server/src/utils/chunking.d.ts +44 -0
- package/package.json +1 -1
package/dist/server/index.js
CHANGED
|
@@ -20,7 +20,10 @@ const config = {
|
|
|
20
20
|
default: {
|
|
21
21
|
openAIApiKey: "",
|
|
22
22
|
neonConnectionString: "",
|
|
23
|
-
embeddingModel: "text-embedding-3-small"
|
|
23
|
+
embeddingModel: "text-embedding-3-small",
|
|
24
|
+
chunkSize: 4e3,
|
|
25
|
+
chunkOverlap: 200,
|
|
26
|
+
autoChunk: false
|
|
24
27
|
},
|
|
25
28
|
validator(config2) {
|
|
26
29
|
if (!config2.openAIApiKey) {
|
|
@@ -38,6 +41,11 @@ const config = {
|
|
|
38
41
|
`strapi-content-embeddings: Invalid embeddingModel "${config2.embeddingModel}". Valid options: ${Object.keys(EMBEDDING_MODELS).join(", ")}. Defaulting to "text-embedding-3-small".`
|
|
39
42
|
);
|
|
40
43
|
}
|
|
44
|
+
if (config2.chunkSize && (config2.chunkSize < 100 || config2.chunkSize > 8e3)) {
|
|
45
|
+
console.warn(
|
|
46
|
+
`strapi-content-embeddings: chunkSize ${config2.chunkSize} is outside recommended range (100-8000). Using default value of 4000.`
|
|
47
|
+
);
|
|
48
|
+
}
|
|
41
49
|
}
|
|
42
50
|
};
|
|
43
51
|
class PluginManager {
|
|
@@ -282,6 +290,56 @@ Context:
|
|
|
282
290
|
isInitialized() {
|
|
283
291
|
return !!(this.embeddings && this.chat && this.pool);
|
|
284
292
|
}
|
|
293
|
+
/**
|
|
294
|
+
* Get all embeddings from Neon DB
|
|
295
|
+
* Returns the metadata (including Strapi documentId) for each embedding
|
|
296
|
+
*/
|
|
297
|
+
async getAllNeonEmbeddings() {
|
|
298
|
+
if (!this.pool) {
|
|
299
|
+
throw new Error("Plugin manager not initialized");
|
|
300
|
+
}
|
|
301
|
+
try {
|
|
302
|
+
const result = await this.pool.query(`
|
|
303
|
+
SELECT
|
|
304
|
+
id,
|
|
305
|
+
content,
|
|
306
|
+
metadata->>'id' as strapi_id,
|
|
307
|
+
metadata->>'title' as title,
|
|
308
|
+
metadata->>'collectionType' as collection_type,
|
|
309
|
+
metadata->>'fieldName' as field_name
|
|
310
|
+
FROM embeddings_documents
|
|
311
|
+
ORDER BY id
|
|
312
|
+
`);
|
|
313
|
+
return result.rows.map((row) => ({
|
|
314
|
+
id: row.id,
|
|
315
|
+
strapiId: row.strapi_id,
|
|
316
|
+
title: row.title || "",
|
|
317
|
+
content: row.content || "",
|
|
318
|
+
collectionType: row.collection_type || "standalone",
|
|
319
|
+
fieldName: row.field_name || "content"
|
|
320
|
+
}));
|
|
321
|
+
} catch (error) {
|
|
322
|
+
console.error(`Failed to get Neon embeddings: ${error}`);
|
|
323
|
+
throw new Error(`Failed to get Neon embeddings: ${error}`);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Delete an embedding from Neon by its Neon UUID (not Strapi ID)
|
|
328
|
+
*/
|
|
329
|
+
async deleteNeonEmbeddingById(neonId) {
|
|
330
|
+
if (!this.pool) {
|
|
331
|
+
throw new Error("Plugin manager not initialized");
|
|
332
|
+
}
|
|
333
|
+
try {
|
|
334
|
+
await this.pool.query(
|
|
335
|
+
`DELETE FROM embeddings_documents WHERE id = $1`,
|
|
336
|
+
[neonId]
|
|
337
|
+
);
|
|
338
|
+
} catch (error) {
|
|
339
|
+
console.error(`Failed to delete Neon embedding: ${error}`);
|
|
340
|
+
throw new Error(`Failed to delete Neon embedding: ${error}`);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
285
343
|
async destroy() {
|
|
286
344
|
if (this.pool) {
|
|
287
345
|
await this.pool.end();
|
|
@@ -581,7 +639,7 @@ async function handleGetEmbedding(strapi, args) {
|
|
|
581
639
|
}
|
|
582
640
|
const createEmbeddingTool = {
|
|
583
641
|
name: "create_embedding",
|
|
584
|
-
description: "Create a new embedding from text content. The content will be vectorized and stored for semantic search.",
|
|
642
|
+
description: "Create a new embedding from text content. The content will be vectorized and stored for semantic search. For large content (over 4000 characters), enable autoChunk to automatically split into multiple embeddings.",
|
|
585
643
|
inputSchema: {
|
|
586
644
|
type: "object",
|
|
587
645
|
properties: {
|
|
@@ -596,21 +654,68 @@ const createEmbeddingTool = {
|
|
|
596
654
|
metadata: {
|
|
597
655
|
type: "object",
|
|
598
656
|
description: "Optional metadata to associate with the embedding (tags, source, etc.)"
|
|
657
|
+
},
|
|
658
|
+
autoChunk: {
|
|
659
|
+
type: "boolean",
|
|
660
|
+
description: "Automatically split large content into chunks (default: false). When enabled, content over 4000 characters will be split into multiple embeddings with overlap for context preservation."
|
|
599
661
|
}
|
|
600
662
|
},
|
|
601
663
|
required: ["title", "content"]
|
|
602
664
|
}
|
|
603
665
|
};
|
|
604
666
|
async function handleCreateEmbedding(strapi, args) {
|
|
605
|
-
const { title, content, metadata } = args;
|
|
667
|
+
const { title, content, metadata, autoChunk } = args;
|
|
606
668
|
try {
|
|
607
669
|
const embeddingsService = strapi.plugin("strapi-content-embeddings").service("embeddings");
|
|
670
|
+
if (autoChunk) {
|
|
671
|
+
const result = await embeddingsService.createChunkedEmbedding({
|
|
672
|
+
data: {
|
|
673
|
+
title,
|
|
674
|
+
content,
|
|
675
|
+
metadata: metadata || {},
|
|
676
|
+
collectionType: "standalone",
|
|
677
|
+
fieldName: "content"
|
|
678
|
+
}
|
|
679
|
+
});
|
|
680
|
+
return {
|
|
681
|
+
content: [
|
|
682
|
+
{
|
|
683
|
+
type: "text",
|
|
684
|
+
text: JSON.stringify(
|
|
685
|
+
{
|
|
686
|
+
success: true,
|
|
687
|
+
message: result.wasChunked ? `Content chunked into ${result.totalChunks} embeddings` : "Embedding created successfully (no chunking needed)",
|
|
688
|
+
wasChunked: result.wasChunked,
|
|
689
|
+
totalChunks: result.totalChunks,
|
|
690
|
+
primaryEmbedding: {
|
|
691
|
+
id: result.entity.id,
|
|
692
|
+
documentId: result.entity.documentId,
|
|
693
|
+
title: result.entity.title,
|
|
694
|
+
embeddingId: result.entity.embeddingId
|
|
695
|
+
},
|
|
696
|
+
chunks: result.chunks.map((chunk) => ({
|
|
697
|
+
documentId: chunk.documentId,
|
|
698
|
+
title: chunk.title,
|
|
699
|
+
contentLength: chunk.content?.length || 0
|
|
700
|
+
})),
|
|
701
|
+
contentLength: content.length,
|
|
702
|
+
estimatedTokens: Math.ceil(content.length / 4)
|
|
703
|
+
},
|
|
704
|
+
null,
|
|
705
|
+
2
|
|
706
|
+
)
|
|
707
|
+
}
|
|
708
|
+
]
|
|
709
|
+
};
|
|
710
|
+
}
|
|
608
711
|
const embedding2 = await embeddingsService.createEmbedding({
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
712
|
+
data: {
|
|
713
|
+
title,
|
|
714
|
+
content,
|
|
715
|
+
metadata: metadata || {},
|
|
716
|
+
collectionType: "standalone",
|
|
717
|
+
fieldName: "content"
|
|
718
|
+
}
|
|
614
719
|
});
|
|
615
720
|
return {
|
|
616
721
|
content: [
|
|
@@ -628,7 +733,8 @@ async function handleCreateEmbedding(strapi, args) {
|
|
|
628
733
|
contentLength: content.length,
|
|
629
734
|
metadata: embedding2.metadata,
|
|
630
735
|
createdAt: embedding2.createdAt
|
|
631
|
-
}
|
|
736
|
+
},
|
|
737
|
+
hint: content.length > 4e3 ? "Content is large. Consider using autoChunk: true for better search results." : void 0
|
|
632
738
|
},
|
|
633
739
|
null,
|
|
634
740
|
2
|
|
@@ -714,10 +820,10 @@ function createMcpServer(strapi) {
|
|
|
714
820
|
});
|
|
715
821
|
return server;
|
|
716
822
|
}
|
|
717
|
-
const PLUGIN_ID$
|
|
823
|
+
const PLUGIN_ID$5 = "strapi-content-embeddings";
|
|
718
824
|
const OAUTH_PLUGIN_ID = "strapi-oauth-mcp-manager";
|
|
719
825
|
function createFallbackAuthMiddleware(strapi) {
|
|
720
|
-
const mcpPath = `/api/${PLUGIN_ID$
|
|
826
|
+
const mcpPath = `/api/${PLUGIN_ID$5}/mcp`;
|
|
721
827
|
return async (ctx, next) => {
|
|
722
828
|
if (!ctx.path.startsWith(mcpPath)) {
|
|
723
829
|
return next();
|
|
@@ -743,35 +849,35 @@ const bootstrap = async ({ strapi }) => {
|
|
|
743
849
|
section: "plugins",
|
|
744
850
|
displayName: "Read",
|
|
745
851
|
uid: "read",
|
|
746
|
-
pluginName: PLUGIN_ID$
|
|
852
|
+
pluginName: PLUGIN_ID$5
|
|
747
853
|
},
|
|
748
854
|
{
|
|
749
855
|
section: "plugins",
|
|
750
856
|
displayName: "Update",
|
|
751
857
|
uid: "update",
|
|
752
|
-
pluginName: PLUGIN_ID$
|
|
858
|
+
pluginName: PLUGIN_ID$5
|
|
753
859
|
},
|
|
754
860
|
{
|
|
755
861
|
section: "plugins",
|
|
756
862
|
displayName: "Create",
|
|
757
863
|
uid: "create",
|
|
758
|
-
pluginName: PLUGIN_ID$
|
|
864
|
+
pluginName: PLUGIN_ID$5
|
|
759
865
|
},
|
|
760
866
|
{
|
|
761
867
|
section: "plugins",
|
|
762
868
|
displayName: "Delete",
|
|
763
869
|
uid: "delete",
|
|
764
|
-
pluginName: PLUGIN_ID$
|
|
870
|
+
pluginName: PLUGIN_ID$5
|
|
765
871
|
},
|
|
766
872
|
{
|
|
767
873
|
section: "plugins",
|
|
768
874
|
displayName: "Chat",
|
|
769
875
|
uid: "chat",
|
|
770
|
-
pluginName: PLUGIN_ID$
|
|
876
|
+
pluginName: PLUGIN_ID$5
|
|
771
877
|
}
|
|
772
878
|
];
|
|
773
879
|
await strapi.admin.services.permission.actionProvider.registerMany(actions);
|
|
774
|
-
const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$
|
|
880
|
+
const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$5}`);
|
|
775
881
|
if (pluginConfig?.openAIApiKey && pluginConfig?.neonConnectionString) {
|
|
776
882
|
try {
|
|
777
883
|
await pluginManager.initialize({
|
|
@@ -780,42 +886,42 @@ const bootstrap = async ({ strapi }) => {
|
|
|
780
886
|
embeddingModel: pluginConfig.embeddingModel
|
|
781
887
|
});
|
|
782
888
|
strapi.contentEmbeddingsManager = pluginManager;
|
|
783
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
889
|
+
strapi.log.info(`[${PLUGIN_ID$5}] Plugin initialized successfully`);
|
|
784
890
|
} catch (error) {
|
|
785
|
-
strapi.log.error(`[${PLUGIN_ID$
|
|
891
|
+
strapi.log.error(`[${PLUGIN_ID$5}] Failed to initialize:`, error);
|
|
786
892
|
}
|
|
787
893
|
} else {
|
|
788
894
|
strapi.log.warn(
|
|
789
|
-
`[${PLUGIN_ID$
|
|
895
|
+
`[${PLUGIN_ID$5}] Missing configuration. Set openAIApiKey and neonConnectionString in plugin config.`
|
|
790
896
|
);
|
|
791
897
|
}
|
|
792
|
-
const plugin = strapi.plugin(PLUGIN_ID$
|
|
898
|
+
const plugin = strapi.plugin(PLUGIN_ID$5);
|
|
793
899
|
plugin.createMcpServer = () => createMcpServer(strapi);
|
|
794
900
|
plugin.sessions = /* @__PURE__ */ new Map();
|
|
795
901
|
const oauthPlugin = strapi.plugin(OAUTH_PLUGIN_ID);
|
|
796
902
|
if (oauthPlugin) {
|
|
797
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
903
|
+
strapi.log.info(`[${PLUGIN_ID$5}] OAuth manager detected - OAuth + API token auth enabled`);
|
|
798
904
|
} else {
|
|
799
905
|
const fallbackMiddleware = createFallbackAuthMiddleware();
|
|
800
906
|
strapi.server.use(fallbackMiddleware);
|
|
801
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
907
|
+
strapi.log.info(`[${PLUGIN_ID$5}] Using API token authentication (OAuth manager not installed)`);
|
|
802
908
|
}
|
|
803
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
909
|
+
strapi.log.info(`[${PLUGIN_ID$5}] MCP endpoint available at: /api/${PLUGIN_ID$5}/mcp`);
|
|
804
910
|
};
|
|
805
911
|
const destroy = async ({ strapi }) => {
|
|
806
912
|
await pluginManager.destroy();
|
|
807
913
|
console.log("Content Embeddings plugin destroyed");
|
|
808
914
|
};
|
|
809
|
-
const PLUGIN_ID$
|
|
915
|
+
const PLUGIN_ID$4 = "strapi-content-embeddings";
|
|
810
916
|
const register = ({ strapi }) => {
|
|
811
917
|
Object.values(strapi.contentTypes).forEach((contentType) => {
|
|
812
|
-
if (contentType.uid.startsWith("admin::") || contentType.uid.startsWith("strapi::") || contentType.uid === `plugin::${PLUGIN_ID$
|
|
918
|
+
if (contentType.uid.startsWith("admin::") || contentType.uid.startsWith("strapi::") || contentType.uid === `plugin::${PLUGIN_ID$4}.embedding`) {
|
|
813
919
|
return;
|
|
814
920
|
}
|
|
815
921
|
contentType.attributes.embedding = {
|
|
816
922
|
type: "relation",
|
|
817
923
|
relation: "morphOne",
|
|
818
|
-
target: `plugin::${PLUGIN_ID$
|
|
924
|
+
target: `plugin::${PLUGIN_ID$4}.embedding`,
|
|
819
925
|
morphBy: "related",
|
|
820
926
|
private: false,
|
|
821
927
|
configurable: false
|
|
@@ -885,11 +991,11 @@ const embedding = {
|
|
|
885
991
|
const contentTypes = {
|
|
886
992
|
embedding
|
|
887
993
|
};
|
|
888
|
-
const PLUGIN_ID$
|
|
994
|
+
const PLUGIN_ID$3 = "strapi-content-embeddings";
|
|
889
995
|
const controller = ({ strapi }) => ({
|
|
890
996
|
async createEmbedding(ctx) {
|
|
891
997
|
try {
|
|
892
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
998
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").createEmbedding(ctx.request.body);
|
|
893
999
|
ctx.body = result;
|
|
894
1000
|
} catch (error) {
|
|
895
1001
|
ctx.throw(500, error.message || "Failed to create embedding");
|
|
@@ -898,7 +1004,7 @@ const controller = ({ strapi }) => ({
|
|
|
898
1004
|
async deleteEmbedding(ctx) {
|
|
899
1005
|
try {
|
|
900
1006
|
const { id } = ctx.params;
|
|
901
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1007
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").deleteEmbedding(id);
|
|
902
1008
|
ctx.body = result;
|
|
903
1009
|
} catch (error) {
|
|
904
1010
|
ctx.throw(500, error.message || "Failed to delete embedding");
|
|
@@ -907,7 +1013,7 @@ const controller = ({ strapi }) => ({
|
|
|
907
1013
|
async updateEmbedding(ctx) {
|
|
908
1014
|
try {
|
|
909
1015
|
const { id } = ctx.params;
|
|
910
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1016
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").updateEmbedding(id, ctx.request.body);
|
|
911
1017
|
ctx.body = result;
|
|
912
1018
|
} catch (error) {
|
|
913
1019
|
ctx.throw(500, error.message || "Failed to update embedding");
|
|
@@ -916,7 +1022,7 @@ const controller = ({ strapi }) => ({
|
|
|
916
1022
|
async getEmbeddings(ctx) {
|
|
917
1023
|
try {
|
|
918
1024
|
const { page, pageSize, filters } = ctx.query;
|
|
919
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1025
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").getEmbeddings({
|
|
920
1026
|
page: page ? parseInt(page, 10) : 1,
|
|
921
1027
|
pageSize: pageSize ? parseInt(pageSize, 10) : 10,
|
|
922
1028
|
filters
|
|
@@ -929,7 +1035,7 @@ const controller = ({ strapi }) => ({
|
|
|
929
1035
|
async getEmbedding(ctx) {
|
|
930
1036
|
try {
|
|
931
1037
|
const { id } = ctx.params;
|
|
932
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1038
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").getEmbedding(id);
|
|
933
1039
|
if (!result) {
|
|
934
1040
|
ctx.throw(404, "Embedding not found");
|
|
935
1041
|
}
|
|
@@ -944,14 +1050,46 @@ const controller = ({ strapi }) => ({
|
|
|
944
1050
|
async queryEmbeddings(ctx) {
|
|
945
1051
|
try {
|
|
946
1052
|
const { query } = ctx.query;
|
|
947
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1053
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").queryEmbeddings(query);
|
|
948
1054
|
ctx.body = result;
|
|
949
1055
|
} catch (error) {
|
|
950
1056
|
ctx.throw(500, error.message || "Failed to query embeddings");
|
|
951
1057
|
}
|
|
1058
|
+
},
|
|
1059
|
+
/**
|
|
1060
|
+
* Sync embeddings from Neon DB to Strapi DB
|
|
1061
|
+
* GET /api/strapi-content-embeddings/sync
|
|
1062
|
+
*
|
|
1063
|
+
* Query params:
|
|
1064
|
+
* - removeOrphans: boolean (default: false) - Remove Strapi entries that don't exist in Neon
|
|
1065
|
+
* - dryRun: boolean (default: false) - Preview changes without applying them
|
|
1066
|
+
*/
|
|
1067
|
+
async syncFromNeon(ctx) {
|
|
1068
|
+
try {
|
|
1069
|
+
const { removeOrphans, dryRun } = ctx.query;
|
|
1070
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("sync").syncFromNeon({
|
|
1071
|
+
removeOrphans: removeOrphans === "true",
|
|
1072
|
+
dryRun: dryRun === "true"
|
|
1073
|
+
});
|
|
1074
|
+
ctx.body = result;
|
|
1075
|
+
} catch (error) {
|
|
1076
|
+
ctx.throw(500, error.message || "Failed to sync embeddings");
|
|
1077
|
+
}
|
|
1078
|
+
},
|
|
1079
|
+
/**
|
|
1080
|
+
* Get sync status - compare Neon and Strapi without making changes
|
|
1081
|
+
* GET /api/strapi-content-embeddings/sync/status
|
|
1082
|
+
*/
|
|
1083
|
+
async getSyncStatus(ctx) {
|
|
1084
|
+
try {
|
|
1085
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("sync").getSyncStatus();
|
|
1086
|
+
ctx.body = result;
|
|
1087
|
+
} catch (error) {
|
|
1088
|
+
ctx.throw(500, error.message || "Failed to get sync status");
|
|
1089
|
+
}
|
|
952
1090
|
}
|
|
953
1091
|
});
|
|
954
|
-
const PLUGIN_ID$
|
|
1092
|
+
const PLUGIN_ID$2 = "strapi-content-embeddings";
|
|
955
1093
|
const SESSION_TIMEOUT_MS = 4 * 60 * 60 * 1e3;
|
|
956
1094
|
function isSessionExpired(session) {
|
|
957
1095
|
return Date.now() - session.createdAt > SESSION_TIMEOUT_MS;
|
|
@@ -969,7 +1107,7 @@ function cleanupExpiredSessions(plugin, strapi) {
|
|
|
969
1107
|
}
|
|
970
1108
|
}
|
|
971
1109
|
if (cleaned > 0) {
|
|
972
|
-
strapi.log.debug(`[${PLUGIN_ID$
|
|
1110
|
+
strapi.log.debug(`[${PLUGIN_ID$2}] Cleaned up ${cleaned} expired MCP sessions`);
|
|
973
1111
|
}
|
|
974
1112
|
}
|
|
975
1113
|
const mcpController = ({ strapi }) => ({
|
|
@@ -977,7 +1115,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
977
1115
|
* Handle MCP requests (POST, GET, DELETE)
|
|
978
1116
|
*/
|
|
979
1117
|
async handle(ctx) {
|
|
980
|
-
const plugin = strapi.plugin(PLUGIN_ID$
|
|
1118
|
+
const plugin = strapi.plugin(PLUGIN_ID$2);
|
|
981
1119
|
if (!plugin.createMcpServer) {
|
|
982
1120
|
ctx.status = 503;
|
|
983
1121
|
ctx.body = {
|
|
@@ -993,7 +1131,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
993
1131
|
const requestedSessionId = ctx.request.headers["mcp-session-id"];
|
|
994
1132
|
let session = requestedSessionId ? plugin.sessions.get(requestedSessionId) : null;
|
|
995
1133
|
if (session && isSessionExpired(session)) {
|
|
996
|
-
strapi.log.debug(`[${PLUGIN_ID$
|
|
1134
|
+
strapi.log.debug(`[${PLUGIN_ID$2}] Session expired, removing: ${requestedSessionId}`);
|
|
997
1135
|
try {
|
|
998
1136
|
session.server.close();
|
|
999
1137
|
} catch {
|
|
@@ -1028,13 +1166,13 @@ const mcpController = ({ strapi }) => ({
|
|
|
1028
1166
|
};
|
|
1029
1167
|
plugin.sessions.set(sessionId, session);
|
|
1030
1168
|
strapi.log.debug(
|
|
1031
|
-
`[${PLUGIN_ID$
|
|
1169
|
+
`[${PLUGIN_ID$2}] New MCP session created: ${sessionId} (auth: ${ctx.state.authMethod || "unknown"})`
|
|
1032
1170
|
);
|
|
1033
1171
|
}
|
|
1034
1172
|
try {
|
|
1035
1173
|
await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
|
|
1036
1174
|
} catch (transportError) {
|
|
1037
|
-
strapi.log.warn(`[${PLUGIN_ID$
|
|
1175
|
+
strapi.log.warn(`[${PLUGIN_ID$2}] Transport error, cleaning up session: ${requestedSessionId}`, {
|
|
1038
1176
|
error: transportError instanceof Error ? transportError.message : String(transportError)
|
|
1039
1177
|
});
|
|
1040
1178
|
try {
|
|
@@ -1057,7 +1195,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
1057
1195
|
}
|
|
1058
1196
|
ctx.respond = false;
|
|
1059
1197
|
} catch (error) {
|
|
1060
|
-
strapi.log.error(`[${PLUGIN_ID$
|
|
1198
|
+
strapi.log.error(`[${PLUGIN_ID$2}] Error handling MCP request`, {
|
|
1061
1199
|
error: error instanceof Error ? error.message : String(error),
|
|
1062
1200
|
method: ctx.method,
|
|
1063
1201
|
path: ctx.path
|
|
@@ -1084,6 +1222,32 @@ const contentApi = [
|
|
|
1084
1222
|
path: "/embeddings-query",
|
|
1085
1223
|
handler: "controller.queryEmbeddings"
|
|
1086
1224
|
},
|
|
1225
|
+
// Sync routes - for cron jobs or manual triggering
|
|
1226
|
+
// Use API token for authentication
|
|
1227
|
+
{
|
|
1228
|
+
method: "GET",
|
|
1229
|
+
path: "/sync",
|
|
1230
|
+
handler: "controller.syncFromNeon",
|
|
1231
|
+
config: {
|
|
1232
|
+
description: "Sync embeddings from Neon DB to Strapi. Query params: removeOrphans=true, dryRun=true"
|
|
1233
|
+
}
|
|
1234
|
+
},
|
|
1235
|
+
{
|
|
1236
|
+
method: "POST",
|
|
1237
|
+
path: "/sync",
|
|
1238
|
+
handler: "controller.syncFromNeon",
|
|
1239
|
+
config: {
|
|
1240
|
+
description: "Sync embeddings from Neon DB to Strapi. Query params: removeOrphans=true, dryRun=true"
|
|
1241
|
+
}
|
|
1242
|
+
},
|
|
1243
|
+
{
|
|
1244
|
+
method: "GET",
|
|
1245
|
+
path: "/sync/status",
|
|
1246
|
+
handler: "controller.getSyncStatus",
|
|
1247
|
+
config: {
|
|
1248
|
+
description: "Get sync status between Neon and Strapi without making changes"
|
|
1249
|
+
}
|
|
1250
|
+
},
|
|
1087
1251
|
// MCP routes - auth handled by middleware
|
|
1088
1252
|
{
|
|
1089
1253
|
method: "POST",
|
|
@@ -1203,11 +1367,180 @@ const routes = {
|
|
|
1203
1367
|
routes: [...admin]
|
|
1204
1368
|
}
|
|
1205
1369
|
};
|
|
1206
|
-
const
|
|
1207
|
-
|
|
1370
|
+
const DEFAULT_SEPARATORS = [
|
|
1371
|
+
"\n\n",
|
|
1372
|
+
// Paragraphs
|
|
1373
|
+
"\n",
|
|
1374
|
+
// Lines
|
|
1375
|
+
". ",
|
|
1376
|
+
// Sentences
|
|
1377
|
+
"! ",
|
|
1378
|
+
// Exclamations
|
|
1379
|
+
"? ",
|
|
1380
|
+
// Questions
|
|
1381
|
+
"; ",
|
|
1382
|
+
// Semicolons
|
|
1383
|
+
", ",
|
|
1384
|
+
// Commas
|
|
1385
|
+
" ",
|
|
1386
|
+
// Words
|
|
1387
|
+
""
|
|
1388
|
+
// Characters (last resort)
|
|
1389
|
+
];
|
|
1390
|
+
function estimateTokens(text) {
|
|
1391
|
+
return Math.ceil(text.length / 4);
|
|
1392
|
+
}
|
|
1393
|
+
function needsChunking(content, maxChars = 4e3) {
|
|
1394
|
+
return content.length > maxChars;
|
|
1395
|
+
}
|
|
1396
|
+
function splitWithSeparator(text, separator) {
|
|
1397
|
+
if (separator === "") {
|
|
1398
|
+
return text.split("");
|
|
1399
|
+
}
|
|
1400
|
+
const parts = text.split(separator);
|
|
1401
|
+
const result = [];
|
|
1402
|
+
for (let i = 0; i < parts.length; i++) {
|
|
1403
|
+
if (i < parts.length - 1) {
|
|
1404
|
+
result.push(parts[i] + separator);
|
|
1405
|
+
} else if (parts[i]) {
|
|
1406
|
+
result.push(parts[i]);
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
return result;
|
|
1410
|
+
}
|
|
1411
|
+
function splitText(text, chunkSize, separators) {
|
|
1412
|
+
if (text.length <= chunkSize) {
|
|
1413
|
+
return [text];
|
|
1414
|
+
}
|
|
1415
|
+
let bestSeparator = separators[separators.length - 1];
|
|
1416
|
+
for (const sep of separators) {
|
|
1417
|
+
if (text.includes(sep)) {
|
|
1418
|
+
bestSeparator = sep;
|
|
1419
|
+
break;
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
const splits = splitWithSeparator(text, bestSeparator);
|
|
1423
|
+
const chunks = [];
|
|
1424
|
+
let currentChunk = "";
|
|
1425
|
+
for (const split of splits) {
|
|
1426
|
+
if ((currentChunk + split).length <= chunkSize) {
|
|
1427
|
+
currentChunk += split;
|
|
1428
|
+
} else {
|
|
1429
|
+
if (currentChunk) {
|
|
1430
|
+
chunks.push(currentChunk);
|
|
1431
|
+
}
|
|
1432
|
+
if (split.length > chunkSize) {
|
|
1433
|
+
const remainingSeparators = separators.slice(separators.indexOf(bestSeparator) + 1);
|
|
1434
|
+
if (remainingSeparators.length > 0) {
|
|
1435
|
+
chunks.push(...splitText(split, chunkSize, remainingSeparators));
|
|
1436
|
+
} else {
|
|
1437
|
+
for (let i = 0; i < split.length; i += chunkSize) {
|
|
1438
|
+
chunks.push(split.slice(i, i + chunkSize));
|
|
1439
|
+
}
|
|
1440
|
+
}
|
|
1441
|
+
currentChunk = "";
|
|
1442
|
+
} else {
|
|
1443
|
+
currentChunk = split;
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
if (currentChunk) {
|
|
1448
|
+
chunks.push(currentChunk);
|
|
1449
|
+
}
|
|
1450
|
+
return chunks;
|
|
1451
|
+
}
|
|
1452
|
+
function addOverlap(chunks, overlap) {
|
|
1453
|
+
if (overlap <= 0 || chunks.length <= 1) {
|
|
1454
|
+
return chunks;
|
|
1455
|
+
}
|
|
1456
|
+
const result = [];
|
|
1457
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
1458
|
+
let chunk = chunks[i];
|
|
1459
|
+
if (i > 0) {
|
|
1460
|
+
const prevChunk = chunks[i - 1];
|
|
1461
|
+
const overlapText = prevChunk.slice(-overlap);
|
|
1462
|
+
chunk = overlapText + chunk;
|
|
1463
|
+
}
|
|
1464
|
+
result.push(chunk);
|
|
1465
|
+
}
|
|
1466
|
+
return result;
|
|
1467
|
+
}
|
|
1468
|
+
function chunkContent(content, options2 = {}) {
|
|
1469
|
+
const {
|
|
1470
|
+
chunkSize = 4e3,
|
|
1471
|
+
chunkOverlap = 200,
|
|
1472
|
+
separators = DEFAULT_SEPARATORS
|
|
1473
|
+
} = options2;
|
|
1474
|
+
const cleanContent = content.trim();
|
|
1475
|
+
if (!cleanContent) {
|
|
1476
|
+
return [];
|
|
1477
|
+
}
|
|
1478
|
+
if (cleanContent.length <= chunkSize) {
|
|
1479
|
+
return [{
|
|
1480
|
+
text: cleanContent,
|
|
1481
|
+
chunkIndex: 0,
|
|
1482
|
+
totalChunks: 1,
|
|
1483
|
+
startOffset: 0,
|
|
1484
|
+
endOffset: cleanContent.length
|
|
1485
|
+
}];
|
|
1486
|
+
}
|
|
1487
|
+
const rawChunks = splitText(cleanContent, chunkSize - chunkOverlap, separators);
|
|
1488
|
+
const chunksWithOverlap = addOverlap(rawChunks, chunkOverlap);
|
|
1489
|
+
const result = [];
|
|
1490
|
+
let currentOffset = 0;
|
|
1491
|
+
for (let i = 0; i < chunksWithOverlap.length; i++) {
|
|
1492
|
+
const text = chunksWithOverlap[i].trim();
|
|
1493
|
+
if (text) {
|
|
1494
|
+
result.push({
|
|
1495
|
+
text,
|
|
1496
|
+
chunkIndex: i,
|
|
1497
|
+
totalChunks: chunksWithOverlap.length,
|
|
1498
|
+
startOffset: currentOffset,
|
|
1499
|
+
endOffset: currentOffset + rawChunks[i].length
|
|
1500
|
+
});
|
|
1501
|
+
}
|
|
1502
|
+
currentOffset += rawChunks[i].length;
|
|
1503
|
+
}
|
|
1504
|
+
const totalChunks = result.length;
|
|
1505
|
+
result.forEach((chunk, idx) => {
|
|
1506
|
+
chunk.chunkIndex = idx;
|
|
1507
|
+
chunk.totalChunks = totalChunks;
|
|
1508
|
+
});
|
|
1509
|
+
return result;
|
|
1510
|
+
}
|
|
1511
|
+
function formatChunkTitle(baseTitle, chunkIndex, totalChunks) {
|
|
1512
|
+
if (totalChunks === 1) {
|
|
1513
|
+
return baseTitle;
|
|
1514
|
+
}
|
|
1515
|
+
return `${baseTitle} [Part ${chunkIndex + 1}/${totalChunks}]`;
|
|
1516
|
+
}
|
|
1517
|
+
const PLUGIN_ID$1 = "strapi-content-embeddings";
|
|
1518
|
+
const CONTENT_TYPE_UID$1 = `plugin::${PLUGIN_ID$1}.embedding`;
|
|
1208
1519
|
const embeddings = ({ strapi }) => ({
|
|
1520
|
+
/**
|
|
1521
|
+
* Get plugin config with defaults
|
|
1522
|
+
*/
|
|
1523
|
+
getConfig() {
|
|
1524
|
+
const config2 = strapi.config.get("plugin::strapi-content-embeddings") || {};
|
|
1525
|
+
return {
|
|
1526
|
+
chunkSize: config2.chunkSize || 4e3,
|
|
1527
|
+
chunkOverlap: config2.chunkOverlap || 200,
|
|
1528
|
+
autoChunk: config2.autoChunk || false,
|
|
1529
|
+
...config2
|
|
1530
|
+
};
|
|
1531
|
+
},
|
|
1532
|
+
/**
|
|
1533
|
+
* Create a single embedding (no chunking)
|
|
1534
|
+
*/
|
|
1209
1535
|
async createEmbedding(data) {
|
|
1210
|
-
const { title, content, collectionType, fieldName, metadata, related } = data.data;
|
|
1536
|
+
const { title, content, collectionType, fieldName, metadata, related, autoChunk } = data.data;
|
|
1537
|
+
const config2 = this.getConfig();
|
|
1538
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
1539
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1540
|
+
if (shouldChunk && needsChunking(content, chunkSize)) {
|
|
1541
|
+
const result = await this.createChunkedEmbedding(data);
|
|
1542
|
+
return result.entity;
|
|
1543
|
+
}
|
|
1211
1544
|
const entityData = {
|
|
1212
1545
|
title,
|
|
1213
1546
|
content,
|
|
@@ -1218,7 +1551,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1218
1551
|
if (related && related.__type && related.id) {
|
|
1219
1552
|
entityData.related = related;
|
|
1220
1553
|
}
|
|
1221
|
-
const entity = await strapi.documents(CONTENT_TYPE_UID).create({
|
|
1554
|
+
const entity = await strapi.documents(CONTENT_TYPE_UID$1).create({
|
|
1222
1555
|
data: entityData
|
|
1223
1556
|
});
|
|
1224
1557
|
if (!pluginManager.isInitialized()) {
|
|
@@ -1233,7 +1566,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1233
1566
|
collectionType: collectionType || "standalone",
|
|
1234
1567
|
fieldName: fieldName || "content"
|
|
1235
1568
|
});
|
|
1236
|
-
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
1569
|
+
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1237
1570
|
documentId: entity.documentId,
|
|
1238
1571
|
data: {
|
|
1239
1572
|
embeddingId: result.embeddingId,
|
|
@@ -1246,8 +1579,110 @@ const embeddings = ({ strapi }) => ({
|
|
|
1246
1579
|
return entity;
|
|
1247
1580
|
}
|
|
1248
1581
|
},
|
|
1582
|
+
/**
|
|
1583
|
+
* Create embeddings with automatic chunking for large content
|
|
1584
|
+
* Creates multiple embedding entities, one per chunk
|
|
1585
|
+
*/
|
|
1586
|
+
async createChunkedEmbedding(data) {
|
|
1587
|
+
const { title, content, collectionType, fieldName, metadata, related } = data.data;
|
|
1588
|
+
const config2 = this.getConfig();
|
|
1589
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1590
|
+
const chunkOverlap = config2.chunkOverlap || 200;
|
|
1591
|
+
const chunks = chunkContent(content, { chunkSize, chunkOverlap });
|
|
1592
|
+
if (chunks.length === 0) {
|
|
1593
|
+
throw new Error("Content is empty or could not be chunked");
|
|
1594
|
+
}
|
|
1595
|
+
if (chunks.length === 1) {
|
|
1596
|
+
const entity = await this.createEmbedding({
|
|
1597
|
+
data: {
|
|
1598
|
+
...data.data,
|
|
1599
|
+
autoChunk: false
|
|
1600
|
+
// Prevent recursive chunking
|
|
1601
|
+
}
|
|
1602
|
+
});
|
|
1603
|
+
return {
|
|
1604
|
+
entity,
|
|
1605
|
+
chunks: [entity],
|
|
1606
|
+
totalChunks: 1,
|
|
1607
|
+
wasChunked: false
|
|
1608
|
+
};
|
|
1609
|
+
}
|
|
1610
|
+
console.log(`Chunking content into ${chunks.length} parts (chunkSize: ${chunkSize}, overlap: ${chunkOverlap})`);
|
|
1611
|
+
const createdChunks = [];
|
|
1612
|
+
let parentDocumentId = null;
|
|
1613
|
+
for (const chunk of chunks) {
|
|
1614
|
+
const chunkTitle = formatChunkTitle(title, chunk.chunkIndex, chunk.totalChunks);
|
|
1615
|
+
const chunkMetadata = {
|
|
1616
|
+
...metadata,
|
|
1617
|
+
isChunk: true,
|
|
1618
|
+
chunkIndex: chunk.chunkIndex,
|
|
1619
|
+
totalChunks: chunk.totalChunks,
|
|
1620
|
+
startOffset: chunk.startOffset,
|
|
1621
|
+
endOffset: chunk.endOffset,
|
|
1622
|
+
originalTitle: title,
|
|
1623
|
+
parentDocumentId,
|
|
1624
|
+
estimatedTokens: estimateTokens(chunk.text)
|
|
1625
|
+
};
|
|
1626
|
+
const entityData = {
|
|
1627
|
+
title: chunkTitle,
|
|
1628
|
+
content: chunk.text,
|
|
1629
|
+
collectionType: collectionType || "standalone",
|
|
1630
|
+
fieldName: fieldName || "content",
|
|
1631
|
+
metadata: chunkMetadata
|
|
1632
|
+
};
|
|
1633
|
+
if (chunk.chunkIndex === 0 && related && related.__type && related.id) {
|
|
1634
|
+
entityData.related = related;
|
|
1635
|
+
}
|
|
1636
|
+
const entity = await strapi.documents(CONTENT_TYPE_UID$1).create({
|
|
1637
|
+
data: entityData
|
|
1638
|
+
});
|
|
1639
|
+
if (chunk.chunkIndex === 0) {
|
|
1640
|
+
parentDocumentId = entity.documentId;
|
|
1641
|
+
} else {
|
|
1642
|
+
await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1643
|
+
documentId: entity.documentId,
|
|
1644
|
+
data: {
|
|
1645
|
+
metadata: {
|
|
1646
|
+
...chunkMetadata,
|
|
1647
|
+
parentDocumentId
|
|
1648
|
+
}
|
|
1649
|
+
}
|
|
1650
|
+
});
|
|
1651
|
+
}
|
|
1652
|
+
if (pluginManager.isInitialized()) {
|
|
1653
|
+
try {
|
|
1654
|
+
const result = await pluginManager.createEmbedding({
|
|
1655
|
+
id: entity.documentId,
|
|
1656
|
+
title: chunkTitle,
|
|
1657
|
+
content: chunk.text,
|
|
1658
|
+
collectionType: collectionType || "standalone",
|
|
1659
|
+
fieldName: fieldName || "content"
|
|
1660
|
+
});
|
|
1661
|
+
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1662
|
+
documentId: entity.documentId,
|
|
1663
|
+
data: {
|
|
1664
|
+
embeddingId: result.embeddingId,
|
|
1665
|
+
embedding: result.embedding
|
|
1666
|
+
}
|
|
1667
|
+
});
|
|
1668
|
+
createdChunks.push(updatedEntity);
|
|
1669
|
+
} catch (error) {
|
|
1670
|
+
console.error(`Failed to create embedding for chunk ${chunk.chunkIndex}:`, error);
|
|
1671
|
+
createdChunks.push(entity);
|
|
1672
|
+
}
|
|
1673
|
+
} else {
|
|
1674
|
+
createdChunks.push(entity);
|
|
1675
|
+
}
|
|
1676
|
+
}
|
|
1677
|
+
return {
|
|
1678
|
+
entity: createdChunks[0],
|
|
1679
|
+
chunks: createdChunks,
|
|
1680
|
+
totalChunks: createdChunks.length,
|
|
1681
|
+
wasChunked: true
|
|
1682
|
+
};
|
|
1683
|
+
},
|
|
1249
1684
|
async deleteEmbedding(id) {
|
|
1250
|
-
const currentEntry = await strapi.documents(CONTENT_TYPE_UID).findOne({
|
|
1685
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1251
1686
|
documentId: String(id)
|
|
1252
1687
|
});
|
|
1253
1688
|
if (!currentEntry) {
|
|
@@ -1260,25 +1695,174 @@ const embeddings = ({ strapi }) => ({
|
|
|
1260
1695
|
console.error("Failed to delete from vector store:", error);
|
|
1261
1696
|
}
|
|
1262
1697
|
}
|
|
1263
|
-
const deletedEntry = await strapi.documents(CONTENT_TYPE_UID).delete({
|
|
1698
|
+
const deletedEntry = await strapi.documents(CONTENT_TYPE_UID$1).delete({
|
|
1264
1699
|
documentId: String(id)
|
|
1265
1700
|
});
|
|
1266
1701
|
return deletedEntry;
|
|
1267
1702
|
},
|
|
1703
|
+
/**
|
|
1704
|
+
* Find all chunks related to a parent document
|
|
1705
|
+
* Returns chunks including the parent itself
|
|
1706
|
+
*/
|
|
1707
|
+
async findRelatedChunks(documentId) {
|
|
1708
|
+
const entry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1709
|
+
documentId
|
|
1710
|
+
});
|
|
1711
|
+
if (!entry) {
|
|
1712
|
+
return [];
|
|
1713
|
+
}
|
|
1714
|
+
const metadata = entry.metadata;
|
|
1715
|
+
const parentId = metadata?.parentDocumentId || documentId;
|
|
1716
|
+
const isChunked = metadata?.isChunk === true;
|
|
1717
|
+
if (!isChunked && !metadata?.parentDocumentId) {
|
|
1718
|
+
const children = await strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1719
|
+
filters: {
|
|
1720
|
+
metadata: {
|
|
1721
|
+
$containsi: `"parentDocumentId":"${documentId}"`
|
|
1722
|
+
}
|
|
1723
|
+
}
|
|
1724
|
+
});
|
|
1725
|
+
if (children.length === 0) {
|
|
1726
|
+
return [entry];
|
|
1727
|
+
}
|
|
1728
|
+
return [entry, ...children];
|
|
1729
|
+
}
|
|
1730
|
+
const allChunks = await strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1731
|
+
filters: {
|
|
1732
|
+
$or: [
|
|
1733
|
+
{ documentId: parentId },
|
|
1734
|
+
{
|
|
1735
|
+
metadata: {
|
|
1736
|
+
$containsi: `"parentDocumentId":"${parentId}"`
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
]
|
|
1740
|
+
}
|
|
1741
|
+
});
|
|
1742
|
+
return allChunks.sort((a, b) => {
|
|
1743
|
+
const aIndex = a.metadata?.chunkIndex ?? 0;
|
|
1744
|
+
const bIndex = b.metadata?.chunkIndex ?? 0;
|
|
1745
|
+
return aIndex - bIndex;
|
|
1746
|
+
});
|
|
1747
|
+
},
|
|
1748
|
+
/**
|
|
1749
|
+
* Delete all chunks related to a parent document
|
|
1750
|
+
*/
|
|
1751
|
+
async deleteRelatedChunks(documentId) {
|
|
1752
|
+
const chunks = await this.findRelatedChunks(documentId);
|
|
1753
|
+
for (const chunk of chunks) {
|
|
1754
|
+
if (pluginManager.isInitialized()) {
|
|
1755
|
+
try {
|
|
1756
|
+
await pluginManager.deleteEmbedding(chunk.documentId);
|
|
1757
|
+
} catch (error) {
|
|
1758
|
+
console.error(`Failed to delete chunk ${chunk.documentId} from vector store:`, error);
|
|
1759
|
+
}
|
|
1760
|
+
}
|
|
1761
|
+
await strapi.documents(CONTENT_TYPE_UID$1).delete({
|
|
1762
|
+
documentId: chunk.documentId
|
|
1763
|
+
});
|
|
1764
|
+
}
|
|
1765
|
+
return chunks.length;
|
|
1766
|
+
},
|
|
1767
|
+
/**
|
|
1768
|
+
* Update embeddings with automatic chunking support
|
|
1769
|
+
* Handles re-chunking when content changes and exceeds chunk size
|
|
1770
|
+
*/
|
|
1771
|
+
async updateChunkedEmbedding(id, data) {
|
|
1772
|
+
const { title, content, metadata, autoChunk } = data.data;
|
|
1773
|
+
const config2 = this.getConfig();
|
|
1774
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1775
|
+
documentId: id
|
|
1776
|
+
});
|
|
1777
|
+
if (!currentEntry) {
|
|
1778
|
+
throw new Error(`Embedding with id ${id} not found`);
|
|
1779
|
+
}
|
|
1780
|
+
const currentMetadata = currentEntry.metadata;
|
|
1781
|
+
const parentDocumentId = currentMetadata?.parentDocumentId || id;
|
|
1782
|
+
const newContent = content ?? currentEntry.content;
|
|
1783
|
+
const newTitle = title ?? currentMetadata?.originalTitle ?? currentEntry.title;
|
|
1784
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
1785
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1786
|
+
const contentNeedsChunking = shouldChunk && needsChunking(newContent, chunkSize);
|
|
1787
|
+
const existingChunks = await this.findRelatedChunks(id);
|
|
1788
|
+
let originalRelated;
|
|
1789
|
+
const firstChunk = existingChunks.find(
|
|
1790
|
+
(c) => c.metadata?.chunkIndex === 0 || c.documentId === parentDocumentId
|
|
1791
|
+
);
|
|
1792
|
+
if (firstChunk?.related) {
|
|
1793
|
+
originalRelated = firstChunk.related;
|
|
1794
|
+
}
|
|
1795
|
+
const deletedCount = await this.deleteRelatedChunks(id);
|
|
1796
|
+
console.log(`Deleted ${deletedCount} existing chunk(s) for update`);
|
|
1797
|
+
const preservedMetadata = { ...metadata };
|
|
1798
|
+
delete preservedMetadata?.isChunk;
|
|
1799
|
+
delete preservedMetadata?.chunkIndex;
|
|
1800
|
+
delete preservedMetadata?.totalChunks;
|
|
1801
|
+
delete preservedMetadata?.startOffset;
|
|
1802
|
+
delete preservedMetadata?.endOffset;
|
|
1803
|
+
delete preservedMetadata?.originalTitle;
|
|
1804
|
+
delete preservedMetadata?.parentDocumentId;
|
|
1805
|
+
delete preservedMetadata?.estimatedTokens;
|
|
1806
|
+
if (contentNeedsChunking) {
|
|
1807
|
+
return await this.createChunkedEmbedding({
|
|
1808
|
+
data: {
|
|
1809
|
+
title: newTitle.replace(/\s*\[Part \d+\/\d+\]$/, ""),
|
|
1810
|
+
// Remove old part suffix
|
|
1811
|
+
content: newContent,
|
|
1812
|
+
collectionType: currentEntry.collectionType || "standalone",
|
|
1813
|
+
fieldName: currentEntry.fieldName || "content",
|
|
1814
|
+
metadata: preservedMetadata,
|
|
1815
|
+
related: originalRelated,
|
|
1816
|
+
autoChunk: true
|
|
1817
|
+
}
|
|
1818
|
+
});
|
|
1819
|
+
} else {
|
|
1820
|
+
const entity = await this.createEmbedding({
|
|
1821
|
+
data: {
|
|
1822
|
+
title: newTitle.replace(/\s*\[Part \d+\/\d+\]$/, ""),
|
|
1823
|
+
// Remove old part suffix
|
|
1824
|
+
content: newContent,
|
|
1825
|
+
collectionType: currentEntry.collectionType || "standalone",
|
|
1826
|
+
fieldName: currentEntry.fieldName || "content",
|
|
1827
|
+
metadata: preservedMetadata,
|
|
1828
|
+
related: originalRelated,
|
|
1829
|
+
autoChunk: false
|
|
1830
|
+
}
|
|
1831
|
+
});
|
|
1832
|
+
return {
|
|
1833
|
+
entity,
|
|
1834
|
+
chunks: [entity],
|
|
1835
|
+
totalChunks: 1,
|
|
1836
|
+
wasChunked: false
|
|
1837
|
+
};
|
|
1838
|
+
}
|
|
1839
|
+
},
|
|
1268
1840
|
async updateEmbedding(id, data) {
|
|
1269
|
-
const { title, content, metadata } = data.data;
|
|
1270
|
-
const
|
|
1841
|
+
const { title, content, metadata, autoChunk } = data.data;
|
|
1842
|
+
const config2 = this.getConfig();
|
|
1843
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1271
1844
|
documentId: id
|
|
1272
1845
|
});
|
|
1273
1846
|
if (!currentEntry) {
|
|
1274
1847
|
throw new Error(`Embedding with id ${id} not found`);
|
|
1275
1848
|
}
|
|
1849
|
+
const currentMetadata = currentEntry.metadata;
|
|
1850
|
+
const isCurrentlyChunked = currentMetadata?.isChunk === true;
|
|
1851
|
+
const hasRelatedChunks = currentMetadata?.parentDocumentId || isCurrentlyChunked;
|
|
1852
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
1853
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1854
|
+
const newContent = content ?? currentEntry.content;
|
|
1855
|
+
const contentNeedsChunking = shouldChunk && needsChunking(newContent, chunkSize);
|
|
1856
|
+
const contentChanged = content !== void 0 && content !== currentEntry.content;
|
|
1857
|
+
if (hasRelatedChunks || contentNeedsChunking) {
|
|
1858
|
+
const result = await this.updateChunkedEmbedding(id, data);
|
|
1859
|
+
return result.entity;
|
|
1860
|
+
}
|
|
1276
1861
|
const updateData = {};
|
|
1277
1862
|
if (title !== void 0) updateData.title = title;
|
|
1278
1863
|
if (content !== void 0) updateData.content = content;
|
|
1279
1864
|
if (metadata !== void 0) updateData.metadata = metadata;
|
|
1280
|
-
|
|
1281
|
-
let updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
1865
|
+
let updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1282
1866
|
documentId: id,
|
|
1283
1867
|
data: updateData
|
|
1284
1868
|
});
|
|
@@ -1292,7 +1876,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1292
1876
|
collectionType: currentEntry.collectionType || "standalone",
|
|
1293
1877
|
fieldName: currentEntry.fieldName || "content"
|
|
1294
1878
|
});
|
|
1295
|
-
updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
1879
|
+
updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1296
1880
|
documentId: id,
|
|
1297
1881
|
data: {
|
|
1298
1882
|
embeddingId: result.embeddingId,
|
|
@@ -1321,7 +1905,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1321
1905
|
}
|
|
1322
1906
|
},
|
|
1323
1907
|
async getEmbedding(id) {
|
|
1324
|
-
return await strapi.documents(CONTENT_TYPE_UID).findOne({
|
|
1908
|
+
return await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1325
1909
|
documentId: String(id)
|
|
1326
1910
|
});
|
|
1327
1911
|
},
|
|
@@ -1330,12 +1914,12 @@ const embeddings = ({ strapi }) => ({
|
|
|
1330
1914
|
const pageSize = params?.pageSize || 10;
|
|
1331
1915
|
const start = (page - 1) * pageSize;
|
|
1332
1916
|
const [data, totalCount] = await Promise.all([
|
|
1333
|
-
strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
1917
|
+
strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1334
1918
|
limit: pageSize,
|
|
1335
1919
|
start,
|
|
1336
1920
|
filters: params?.filters
|
|
1337
1921
|
}),
|
|
1338
|
-
strapi.documents(CONTENT_TYPE_UID).count({
|
|
1922
|
+
strapi.documents(CONTENT_TYPE_UID$1).count({
|
|
1339
1923
|
filters: params?.filters
|
|
1340
1924
|
})
|
|
1341
1925
|
]);
|
|
@@ -1346,8 +1930,216 @@ const embeddings = ({ strapi }) => ({
|
|
|
1346
1930
|
};
|
|
1347
1931
|
}
|
|
1348
1932
|
});
|
|
1933
|
+
const PLUGIN_ID = "strapi-content-embeddings";
|
|
1934
|
+
const CONTENT_TYPE_UID = `plugin::${PLUGIN_ID}.embedding`;
|
|
1935
|
+
const sync = ({ strapi }) => ({
|
|
1936
|
+
/**
|
|
1937
|
+
* Sync embeddings from Neon DB to Strapi DB
|
|
1938
|
+
*
|
|
1939
|
+
* This performs the following operations:
|
|
1940
|
+
* 1. Fetches all embeddings from Neon DB (source of truth)
|
|
1941
|
+
* 2. Fetches all embeddings from Strapi DB
|
|
1942
|
+
* 3. Creates missing entries in Strapi that exist in Neon
|
|
1943
|
+
* 4. Updates Strapi entries where content differs from Neon
|
|
1944
|
+
* 5. Optionally removes orphaned Strapi entries (no matching Neon record)
|
|
1945
|
+
*/
|
|
1946
|
+
async syncFromNeon(options2) {
|
|
1947
|
+
const { removeOrphans = false, dryRun = false } = options2 || {};
|
|
1948
|
+
const result = {
|
|
1949
|
+
success: false,
|
|
1950
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1951
|
+
neonCount: 0,
|
|
1952
|
+
strapiCount: 0,
|
|
1953
|
+
actions: {
|
|
1954
|
+
created: 0,
|
|
1955
|
+
updated: 0,
|
|
1956
|
+
orphansRemoved: 0
|
|
1957
|
+
},
|
|
1958
|
+
details: {
|
|
1959
|
+
created: [],
|
|
1960
|
+
updated: [],
|
|
1961
|
+
orphansRemoved: []
|
|
1962
|
+
},
|
|
1963
|
+
errors: []
|
|
1964
|
+
};
|
|
1965
|
+
if (!pluginManager.isInitialized()) {
|
|
1966
|
+
result.errors.push(
|
|
1967
|
+
"Plugin manager not initialized. Check your Neon and OpenAI configuration."
|
|
1968
|
+
);
|
|
1969
|
+
return result;
|
|
1970
|
+
}
|
|
1971
|
+
try {
|
|
1972
|
+
const neonEmbeddings = await pluginManager.getAllNeonEmbeddings();
|
|
1973
|
+
result.neonCount = neonEmbeddings.length;
|
|
1974
|
+
const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
1975
|
+
limit: 1e4
|
|
1976
|
+
// High limit to get all
|
|
1977
|
+
});
|
|
1978
|
+
result.strapiCount = strapiEmbeddings.length;
|
|
1979
|
+
const neonBystrapiId = /* @__PURE__ */ new Map();
|
|
1980
|
+
for (const neon of neonEmbeddings) {
|
|
1981
|
+
if (neon.strapiId) {
|
|
1982
|
+
neonBystrapiId.set(neon.strapiId, neon);
|
|
1983
|
+
}
|
|
1984
|
+
}
|
|
1985
|
+
const strapiByDocumentId = /* @__PURE__ */ new Map();
|
|
1986
|
+
for (const strapi2 of strapiEmbeddings) {
|
|
1987
|
+
strapiByDocumentId.set(strapi2.documentId, strapi2);
|
|
1988
|
+
}
|
|
1989
|
+
for (const neon of neonEmbeddings) {
|
|
1990
|
+
if (!neon.strapiId) {
|
|
1991
|
+
result.errors.push(
|
|
1992
|
+
`Neon embedding ${neon.id} has no strapiId in metadata`
|
|
1993
|
+
);
|
|
1994
|
+
continue;
|
|
1995
|
+
}
|
|
1996
|
+
const existingStrapi = strapiByDocumentId.get(neon.strapiId);
|
|
1997
|
+
if (!existingStrapi) {
|
|
1998
|
+
if (!dryRun) {
|
|
1999
|
+
try {
|
|
2000
|
+
await strapi.documents(CONTENT_TYPE_UID).create({
|
|
2001
|
+
data: {
|
|
2002
|
+
documentId: neon.strapiId,
|
|
2003
|
+
title: neon.title,
|
|
2004
|
+
content: neon.content,
|
|
2005
|
+
embeddingId: neon.id,
|
|
2006
|
+
collectionType: neon.collectionType,
|
|
2007
|
+
fieldName: neon.fieldName
|
|
2008
|
+
}
|
|
2009
|
+
});
|
|
2010
|
+
result.actions.created++;
|
|
2011
|
+
result.details.created.push(
|
|
2012
|
+
`${neon.strapiId} (${neon.title || "untitled"})`
|
|
2013
|
+
);
|
|
2014
|
+
} catch (error) {
|
|
2015
|
+
result.errors.push(
|
|
2016
|
+
`Failed to create Strapi entry for ${neon.strapiId}: ${error}`
|
|
2017
|
+
);
|
|
2018
|
+
}
|
|
2019
|
+
} else {
|
|
2020
|
+
result.actions.created++;
|
|
2021
|
+
result.details.created.push(
|
|
2022
|
+
`[DRY RUN] ${neon.strapiId} (${neon.title || "untitled"})`
|
|
2023
|
+
);
|
|
2024
|
+
}
|
|
2025
|
+
} else {
|
|
2026
|
+
const contentChanged = existingStrapi.content !== neon.content;
|
|
2027
|
+
const titleChanged = existingStrapi.title !== neon.title;
|
|
2028
|
+
const embeddingIdMissing = !existingStrapi.embeddingId;
|
|
2029
|
+
if (contentChanged || titleChanged || embeddingIdMissing) {
|
|
2030
|
+
if (!dryRun) {
|
|
2031
|
+
try {
|
|
2032
|
+
await strapi.documents(CONTENT_TYPE_UID).update({
|
|
2033
|
+
documentId: neon.strapiId,
|
|
2034
|
+
data: {
|
|
2035
|
+
title: neon.title,
|
|
2036
|
+
content: neon.content,
|
|
2037
|
+
embeddingId: neon.id
|
|
2038
|
+
}
|
|
2039
|
+
});
|
|
2040
|
+
result.actions.updated++;
|
|
2041
|
+
result.details.updated.push(
|
|
2042
|
+
`${neon.strapiId} (${neon.title || "untitled"})`
|
|
2043
|
+
);
|
|
2044
|
+
} catch (error) {
|
|
2045
|
+
result.errors.push(
|
|
2046
|
+
`Failed to update Strapi entry ${neon.strapiId}: ${error}`
|
|
2047
|
+
);
|
|
2048
|
+
}
|
|
2049
|
+
} else {
|
|
2050
|
+
result.actions.updated++;
|
|
2051
|
+
result.details.updated.push(
|
|
2052
|
+
`[DRY RUN] ${neon.strapiId} (${neon.title || "untitled"})`
|
|
2053
|
+
);
|
|
2054
|
+
}
|
|
2055
|
+
}
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
if (removeOrphans) {
|
|
2059
|
+
for (const strapiEmbed of strapiEmbeddings) {
|
|
2060
|
+
const hasNeonRecord = neonBystrapiId.has(strapiEmbed.documentId);
|
|
2061
|
+
if (!hasNeonRecord) {
|
|
2062
|
+
if (!dryRun) {
|
|
2063
|
+
try {
|
|
2064
|
+
await strapi.documents(CONTENT_TYPE_UID).delete({
|
|
2065
|
+
documentId: strapiEmbed.documentId
|
|
2066
|
+
});
|
|
2067
|
+
result.actions.orphansRemoved++;
|
|
2068
|
+
result.details.orphansRemoved.push(
|
|
2069
|
+
`${strapiEmbed.documentId} (${strapiEmbed.title || "untitled"})`
|
|
2070
|
+
);
|
|
2071
|
+
} catch (error) {
|
|
2072
|
+
result.errors.push(
|
|
2073
|
+
`Failed to remove orphan ${strapiEmbed.documentId}: ${error}`
|
|
2074
|
+
);
|
|
2075
|
+
}
|
|
2076
|
+
} else {
|
|
2077
|
+
result.actions.orphansRemoved++;
|
|
2078
|
+
result.details.orphansRemoved.push(
|
|
2079
|
+
`[DRY RUN] ${strapiEmbed.documentId} (${strapiEmbed.title || "untitled"})`
|
|
2080
|
+
);
|
|
2081
|
+
}
|
|
2082
|
+
}
|
|
2083
|
+
}
|
|
2084
|
+
}
|
|
2085
|
+
result.success = result.errors.length === 0;
|
|
2086
|
+
return result;
|
|
2087
|
+
} catch (error) {
|
|
2088
|
+
result.errors.push(`Sync failed: ${error}`);
|
|
2089
|
+
return result;
|
|
2090
|
+
}
|
|
2091
|
+
},
|
|
2092
|
+
/**
|
|
2093
|
+
* Get sync status - compare Neon and Strapi without making changes
|
|
2094
|
+
*/
|
|
2095
|
+
async getSyncStatus() {
|
|
2096
|
+
if (!pluginManager.isInitialized()) {
|
|
2097
|
+
throw new Error("Plugin manager not initialized");
|
|
2098
|
+
}
|
|
2099
|
+
const neonEmbeddings = await pluginManager.getAllNeonEmbeddings();
|
|
2100
|
+
const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
2101
|
+
limit: 1e4
|
|
2102
|
+
});
|
|
2103
|
+
const neonBystrapiId = /* @__PURE__ */ new Map();
|
|
2104
|
+
for (const neon of neonEmbeddings) {
|
|
2105
|
+
if (neon.strapiId) {
|
|
2106
|
+
neonBystrapiId.set(neon.strapiId, neon);
|
|
2107
|
+
}
|
|
2108
|
+
}
|
|
2109
|
+
const strapiByDocumentId = /* @__PURE__ */ new Map();
|
|
2110
|
+
for (const s of strapiEmbeddings) {
|
|
2111
|
+
strapiByDocumentId.set(s.documentId, s);
|
|
2112
|
+
}
|
|
2113
|
+
let missingInStrapi = 0;
|
|
2114
|
+
let contentDifferences = 0;
|
|
2115
|
+
for (const neon of neonEmbeddings) {
|
|
2116
|
+
if (!neon.strapiId) continue;
|
|
2117
|
+
const strapiRecord = strapiByDocumentId.get(neon.strapiId);
|
|
2118
|
+
if (!strapiRecord) {
|
|
2119
|
+
missingInStrapi++;
|
|
2120
|
+
} else if (strapiRecord.content !== neon.content) {
|
|
2121
|
+
contentDifferences++;
|
|
2122
|
+
}
|
|
2123
|
+
}
|
|
2124
|
+
let missingInNeon = 0;
|
|
2125
|
+
for (const s of strapiEmbeddings) {
|
|
2126
|
+
if (!neonBystrapiId.has(s.documentId)) {
|
|
2127
|
+
missingInNeon++;
|
|
2128
|
+
}
|
|
2129
|
+
}
|
|
2130
|
+
return {
|
|
2131
|
+
neonCount: neonEmbeddings.length,
|
|
2132
|
+
strapiCount: strapiEmbeddings.length,
|
|
2133
|
+
inSync: missingInStrapi === 0 && missingInNeon === 0 && contentDifferences === 0,
|
|
2134
|
+
missingInStrapi,
|
|
2135
|
+
missingInNeon,
|
|
2136
|
+
contentDifferences
|
|
2137
|
+
};
|
|
2138
|
+
}
|
|
2139
|
+
});
|
|
1349
2140
|
const services = {
|
|
1350
|
-
embeddings
|
|
2141
|
+
embeddings,
|
|
2142
|
+
sync
|
|
1351
2143
|
};
|
|
1352
2144
|
const index = {
|
|
1353
2145
|
register,
|
|
@@ -1362,3 +2154,4 @@ const index = {
|
|
|
1362
2154
|
middlewares
|
|
1363
2155
|
};
|
|
1364
2156
|
module.exports = index;
|
|
2157
|
+
//# sourceMappingURL=index.js.map
|