strapi-content-embeddings 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +187 -0
- package/dist/_chunks/{App-Swmo_WMf.js → App-Rq72tIgS.js} +37 -55
- package/dist/_chunks/App-Rq72tIgS.js.map +1 -0
- package/dist/_chunks/{App-BlCKKuQN.mjs → App-j180lztd.mjs} +37 -55
- package/dist/_chunks/App-j180lztd.mjs.map +1 -0
- package/dist/_chunks/en-B4KWt_jN.js +1 -0
- package/dist/_chunks/en-B4KWt_jN.js.map +1 -0
- package/dist/_chunks/en-Byx4XI2L.mjs +1 -0
- package/dist/_chunks/en-Byx4XI2L.mjs.map +1 -0
- package/dist/_chunks/{index-CXVoFiJp.mjs → index-B3j0IFUi.mjs} +70 -27
- package/dist/_chunks/index-B3j0IFUi.mjs.map +1 -0
- package/dist/_chunks/{index-BpKkUIJY.js → index-jf6vikTZ.js} +70 -27
- package/dist/_chunks/index-jf6vikTZ.js.map +1 -0
- package/dist/admin/index.js +2 -1
- package/dist/admin/index.js.map +1 -0
- package/dist/admin/index.mjs +2 -1
- package/dist/admin/index.mjs.map +1 -0
- package/dist/admin/src/components/custom/MarkdownEditor.d.ts +1 -1
- package/dist/server/index.js +850 -57
- package/dist/server/index.js.map +1 -0
- package/dist/server/index.mjs +850 -57
- package/dist/server/index.mjs.map +1 -0
- package/dist/server/src/config/index.d.ts +9 -0
- package/dist/server/src/controllers/controller.d.ts +14 -0
- package/dist/server/src/controllers/index.d.ts +2 -0
- package/dist/server/src/index.d.ts +38 -2
- package/dist/server/src/mcp/tools/create-embedding.d.ts +6 -0
- package/dist/server/src/mcp/tools/index.d.ts +4 -0
- package/dist/server/src/plugin-manager.d.ts +16 -0
- package/dist/server/src/routes/content-api.d.ts +10 -0
- package/dist/server/src/routes/index.d.ts +10 -0
- package/dist/server/src/services/embeddings.d.ts +43 -2
- package/dist/server/src/services/index.d.ts +23 -2
- package/dist/server/src/services/sync.d.ts +48 -0
- package/dist/server/src/utils/chunking.d.ts +44 -0
- package/package.json +1 -1
package/dist/server/index.mjs
CHANGED
|
@@ -19,7 +19,10 @@ const config = {
|
|
|
19
19
|
default: {
|
|
20
20
|
openAIApiKey: "",
|
|
21
21
|
neonConnectionString: "",
|
|
22
|
-
embeddingModel: "text-embedding-3-small"
|
|
22
|
+
embeddingModel: "text-embedding-3-small",
|
|
23
|
+
chunkSize: 4e3,
|
|
24
|
+
chunkOverlap: 200,
|
|
25
|
+
autoChunk: false
|
|
23
26
|
},
|
|
24
27
|
validator(config2) {
|
|
25
28
|
if (!config2.openAIApiKey) {
|
|
@@ -37,6 +40,11 @@ const config = {
|
|
|
37
40
|
`strapi-content-embeddings: Invalid embeddingModel "${config2.embeddingModel}". Valid options: ${Object.keys(EMBEDDING_MODELS).join(", ")}. Defaulting to "text-embedding-3-small".`
|
|
38
41
|
);
|
|
39
42
|
}
|
|
43
|
+
if (config2.chunkSize && (config2.chunkSize < 100 || config2.chunkSize > 8e3)) {
|
|
44
|
+
console.warn(
|
|
45
|
+
`strapi-content-embeddings: chunkSize ${config2.chunkSize} is outside recommended range (100-8000). Using default value of 4000.`
|
|
46
|
+
);
|
|
47
|
+
}
|
|
40
48
|
}
|
|
41
49
|
};
|
|
42
50
|
class PluginManager {
|
|
@@ -281,6 +289,56 @@ Context:
|
|
|
281
289
|
isInitialized() {
|
|
282
290
|
return !!(this.embeddings && this.chat && this.pool);
|
|
283
291
|
}
|
|
292
|
+
/**
|
|
293
|
+
* Get all embeddings from Neon DB
|
|
294
|
+
* Returns the metadata (including Strapi documentId) for each embedding
|
|
295
|
+
*/
|
|
296
|
+
async getAllNeonEmbeddings() {
|
|
297
|
+
if (!this.pool) {
|
|
298
|
+
throw new Error("Plugin manager not initialized");
|
|
299
|
+
}
|
|
300
|
+
try {
|
|
301
|
+
const result = await this.pool.query(`
|
|
302
|
+
SELECT
|
|
303
|
+
id,
|
|
304
|
+
content,
|
|
305
|
+
metadata->>'id' as strapi_id,
|
|
306
|
+
metadata->>'title' as title,
|
|
307
|
+
metadata->>'collectionType' as collection_type,
|
|
308
|
+
metadata->>'fieldName' as field_name
|
|
309
|
+
FROM embeddings_documents
|
|
310
|
+
ORDER BY id
|
|
311
|
+
`);
|
|
312
|
+
return result.rows.map((row) => ({
|
|
313
|
+
id: row.id,
|
|
314
|
+
strapiId: row.strapi_id,
|
|
315
|
+
title: row.title || "",
|
|
316
|
+
content: row.content || "",
|
|
317
|
+
collectionType: row.collection_type || "standalone",
|
|
318
|
+
fieldName: row.field_name || "content"
|
|
319
|
+
}));
|
|
320
|
+
} catch (error) {
|
|
321
|
+
console.error(`Failed to get Neon embeddings: ${error}`);
|
|
322
|
+
throw new Error(`Failed to get Neon embeddings: ${error}`);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Delete an embedding from Neon by its Neon UUID (not Strapi ID)
|
|
327
|
+
*/
|
|
328
|
+
async deleteNeonEmbeddingById(neonId) {
|
|
329
|
+
if (!this.pool) {
|
|
330
|
+
throw new Error("Plugin manager not initialized");
|
|
331
|
+
}
|
|
332
|
+
try {
|
|
333
|
+
await this.pool.query(
|
|
334
|
+
`DELETE FROM embeddings_documents WHERE id = $1`,
|
|
335
|
+
[neonId]
|
|
336
|
+
);
|
|
337
|
+
} catch (error) {
|
|
338
|
+
console.error(`Failed to delete Neon embedding: ${error}`);
|
|
339
|
+
throw new Error(`Failed to delete Neon embedding: ${error}`);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
284
342
|
async destroy() {
|
|
285
343
|
if (this.pool) {
|
|
286
344
|
await this.pool.end();
|
|
@@ -580,7 +638,7 @@ async function handleGetEmbedding(strapi, args) {
|
|
|
580
638
|
}
|
|
581
639
|
const createEmbeddingTool = {
|
|
582
640
|
name: "create_embedding",
|
|
583
|
-
description: "Create a new embedding from text content. The content will be vectorized and stored for semantic search.",
|
|
641
|
+
description: "Create a new embedding from text content. The content will be vectorized and stored for semantic search. For large content (over 4000 characters), enable autoChunk to automatically split into multiple embeddings.",
|
|
584
642
|
inputSchema: {
|
|
585
643
|
type: "object",
|
|
586
644
|
properties: {
|
|
@@ -595,21 +653,68 @@ const createEmbeddingTool = {
|
|
|
595
653
|
metadata: {
|
|
596
654
|
type: "object",
|
|
597
655
|
description: "Optional metadata to associate with the embedding (tags, source, etc.)"
|
|
656
|
+
},
|
|
657
|
+
autoChunk: {
|
|
658
|
+
type: "boolean",
|
|
659
|
+
description: "Automatically split large content into chunks (default: false). When enabled, content over 4000 characters will be split into multiple embeddings with overlap for context preservation."
|
|
598
660
|
}
|
|
599
661
|
},
|
|
600
662
|
required: ["title", "content"]
|
|
601
663
|
}
|
|
602
664
|
};
|
|
603
665
|
async function handleCreateEmbedding(strapi, args) {
|
|
604
|
-
const { title, content, metadata } = args;
|
|
666
|
+
const { title, content, metadata, autoChunk } = args;
|
|
605
667
|
try {
|
|
606
668
|
const embeddingsService = strapi.plugin("strapi-content-embeddings").service("embeddings");
|
|
669
|
+
if (autoChunk) {
|
|
670
|
+
const result = await embeddingsService.createChunkedEmbedding({
|
|
671
|
+
data: {
|
|
672
|
+
title,
|
|
673
|
+
content,
|
|
674
|
+
metadata: metadata || {},
|
|
675
|
+
collectionType: "standalone",
|
|
676
|
+
fieldName: "content"
|
|
677
|
+
}
|
|
678
|
+
});
|
|
679
|
+
return {
|
|
680
|
+
content: [
|
|
681
|
+
{
|
|
682
|
+
type: "text",
|
|
683
|
+
text: JSON.stringify(
|
|
684
|
+
{
|
|
685
|
+
success: true,
|
|
686
|
+
message: result.wasChunked ? `Content chunked into ${result.totalChunks} embeddings` : "Embedding created successfully (no chunking needed)",
|
|
687
|
+
wasChunked: result.wasChunked,
|
|
688
|
+
totalChunks: result.totalChunks,
|
|
689
|
+
primaryEmbedding: {
|
|
690
|
+
id: result.entity.id,
|
|
691
|
+
documentId: result.entity.documentId,
|
|
692
|
+
title: result.entity.title,
|
|
693
|
+
embeddingId: result.entity.embeddingId
|
|
694
|
+
},
|
|
695
|
+
chunks: result.chunks.map((chunk) => ({
|
|
696
|
+
documentId: chunk.documentId,
|
|
697
|
+
title: chunk.title,
|
|
698
|
+
contentLength: chunk.content?.length || 0
|
|
699
|
+
})),
|
|
700
|
+
contentLength: content.length,
|
|
701
|
+
estimatedTokens: Math.ceil(content.length / 4)
|
|
702
|
+
},
|
|
703
|
+
null,
|
|
704
|
+
2
|
|
705
|
+
)
|
|
706
|
+
}
|
|
707
|
+
]
|
|
708
|
+
};
|
|
709
|
+
}
|
|
607
710
|
const embedding2 = await embeddingsService.createEmbedding({
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
711
|
+
data: {
|
|
712
|
+
title,
|
|
713
|
+
content,
|
|
714
|
+
metadata: metadata || {},
|
|
715
|
+
collectionType: "standalone",
|
|
716
|
+
fieldName: "content"
|
|
717
|
+
}
|
|
613
718
|
});
|
|
614
719
|
return {
|
|
615
720
|
content: [
|
|
@@ -627,7 +732,8 @@ async function handleCreateEmbedding(strapi, args) {
|
|
|
627
732
|
contentLength: content.length,
|
|
628
733
|
metadata: embedding2.metadata,
|
|
629
734
|
createdAt: embedding2.createdAt
|
|
630
|
-
}
|
|
735
|
+
},
|
|
736
|
+
hint: content.length > 4e3 ? "Content is large. Consider using autoChunk: true for better search results." : void 0
|
|
631
737
|
},
|
|
632
738
|
null,
|
|
633
739
|
2
|
|
@@ -713,10 +819,10 @@ function createMcpServer(strapi) {
|
|
|
713
819
|
});
|
|
714
820
|
return server;
|
|
715
821
|
}
|
|
716
|
-
const PLUGIN_ID$
|
|
822
|
+
const PLUGIN_ID$5 = "strapi-content-embeddings";
|
|
717
823
|
const OAUTH_PLUGIN_ID = "strapi-oauth-mcp-manager";
|
|
718
824
|
function createFallbackAuthMiddleware(strapi) {
|
|
719
|
-
const mcpPath = `/api/${PLUGIN_ID$
|
|
825
|
+
const mcpPath = `/api/${PLUGIN_ID$5}/mcp`;
|
|
720
826
|
return async (ctx, next) => {
|
|
721
827
|
if (!ctx.path.startsWith(mcpPath)) {
|
|
722
828
|
return next();
|
|
@@ -742,35 +848,35 @@ const bootstrap = async ({ strapi }) => {
|
|
|
742
848
|
section: "plugins",
|
|
743
849
|
displayName: "Read",
|
|
744
850
|
uid: "read",
|
|
745
|
-
pluginName: PLUGIN_ID$
|
|
851
|
+
pluginName: PLUGIN_ID$5
|
|
746
852
|
},
|
|
747
853
|
{
|
|
748
854
|
section: "plugins",
|
|
749
855
|
displayName: "Update",
|
|
750
856
|
uid: "update",
|
|
751
|
-
pluginName: PLUGIN_ID$
|
|
857
|
+
pluginName: PLUGIN_ID$5
|
|
752
858
|
},
|
|
753
859
|
{
|
|
754
860
|
section: "plugins",
|
|
755
861
|
displayName: "Create",
|
|
756
862
|
uid: "create",
|
|
757
|
-
pluginName: PLUGIN_ID$
|
|
863
|
+
pluginName: PLUGIN_ID$5
|
|
758
864
|
},
|
|
759
865
|
{
|
|
760
866
|
section: "plugins",
|
|
761
867
|
displayName: "Delete",
|
|
762
868
|
uid: "delete",
|
|
763
|
-
pluginName: PLUGIN_ID$
|
|
869
|
+
pluginName: PLUGIN_ID$5
|
|
764
870
|
},
|
|
765
871
|
{
|
|
766
872
|
section: "plugins",
|
|
767
873
|
displayName: "Chat",
|
|
768
874
|
uid: "chat",
|
|
769
|
-
pluginName: PLUGIN_ID$
|
|
875
|
+
pluginName: PLUGIN_ID$5
|
|
770
876
|
}
|
|
771
877
|
];
|
|
772
878
|
await strapi.admin.services.permission.actionProvider.registerMany(actions);
|
|
773
|
-
const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$
|
|
879
|
+
const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$5}`);
|
|
774
880
|
if (pluginConfig?.openAIApiKey && pluginConfig?.neonConnectionString) {
|
|
775
881
|
try {
|
|
776
882
|
await pluginManager.initialize({
|
|
@@ -779,42 +885,42 @@ const bootstrap = async ({ strapi }) => {
|
|
|
779
885
|
embeddingModel: pluginConfig.embeddingModel
|
|
780
886
|
});
|
|
781
887
|
strapi.contentEmbeddingsManager = pluginManager;
|
|
782
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
888
|
+
strapi.log.info(`[${PLUGIN_ID$5}] Plugin initialized successfully`);
|
|
783
889
|
} catch (error) {
|
|
784
|
-
strapi.log.error(`[${PLUGIN_ID$
|
|
890
|
+
strapi.log.error(`[${PLUGIN_ID$5}] Failed to initialize:`, error);
|
|
785
891
|
}
|
|
786
892
|
} else {
|
|
787
893
|
strapi.log.warn(
|
|
788
|
-
`[${PLUGIN_ID$
|
|
894
|
+
`[${PLUGIN_ID$5}] Missing configuration. Set openAIApiKey and neonConnectionString in plugin config.`
|
|
789
895
|
);
|
|
790
896
|
}
|
|
791
|
-
const plugin = strapi.plugin(PLUGIN_ID$
|
|
897
|
+
const plugin = strapi.plugin(PLUGIN_ID$5);
|
|
792
898
|
plugin.createMcpServer = () => createMcpServer(strapi);
|
|
793
899
|
plugin.sessions = /* @__PURE__ */ new Map();
|
|
794
900
|
const oauthPlugin = strapi.plugin(OAUTH_PLUGIN_ID);
|
|
795
901
|
if (oauthPlugin) {
|
|
796
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
902
|
+
strapi.log.info(`[${PLUGIN_ID$5}] OAuth manager detected - OAuth + API token auth enabled`);
|
|
797
903
|
} else {
|
|
798
904
|
const fallbackMiddleware = createFallbackAuthMiddleware();
|
|
799
905
|
strapi.server.use(fallbackMiddleware);
|
|
800
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
906
|
+
strapi.log.info(`[${PLUGIN_ID$5}] Using API token authentication (OAuth manager not installed)`);
|
|
801
907
|
}
|
|
802
|
-
strapi.log.info(`[${PLUGIN_ID$
|
|
908
|
+
strapi.log.info(`[${PLUGIN_ID$5}] MCP endpoint available at: /api/${PLUGIN_ID$5}/mcp`);
|
|
803
909
|
};
|
|
804
910
|
const destroy = async ({ strapi }) => {
|
|
805
911
|
await pluginManager.destroy();
|
|
806
912
|
console.log("Content Embeddings plugin destroyed");
|
|
807
913
|
};
|
|
808
|
-
const PLUGIN_ID$
|
|
914
|
+
const PLUGIN_ID$4 = "strapi-content-embeddings";
|
|
809
915
|
const register = ({ strapi }) => {
|
|
810
916
|
Object.values(strapi.contentTypes).forEach((contentType) => {
|
|
811
|
-
if (contentType.uid.startsWith("admin::") || contentType.uid.startsWith("strapi::") || contentType.uid === `plugin::${PLUGIN_ID$
|
|
917
|
+
if (contentType.uid.startsWith("admin::") || contentType.uid.startsWith("strapi::") || contentType.uid === `plugin::${PLUGIN_ID$4}.embedding`) {
|
|
812
918
|
return;
|
|
813
919
|
}
|
|
814
920
|
contentType.attributes.embedding = {
|
|
815
921
|
type: "relation",
|
|
816
922
|
relation: "morphOne",
|
|
817
|
-
target: `plugin::${PLUGIN_ID$
|
|
923
|
+
target: `plugin::${PLUGIN_ID$4}.embedding`,
|
|
818
924
|
morphBy: "related",
|
|
819
925
|
private: false,
|
|
820
926
|
configurable: false
|
|
@@ -884,11 +990,11 @@ const embedding = {
|
|
|
884
990
|
const contentTypes = {
|
|
885
991
|
embedding
|
|
886
992
|
};
|
|
887
|
-
const PLUGIN_ID$
|
|
993
|
+
const PLUGIN_ID$3 = "strapi-content-embeddings";
|
|
888
994
|
const controller = ({ strapi }) => ({
|
|
889
995
|
async createEmbedding(ctx) {
|
|
890
996
|
try {
|
|
891
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
997
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").createEmbedding(ctx.request.body);
|
|
892
998
|
ctx.body = result;
|
|
893
999
|
} catch (error) {
|
|
894
1000
|
ctx.throw(500, error.message || "Failed to create embedding");
|
|
@@ -897,7 +1003,7 @@ const controller = ({ strapi }) => ({
|
|
|
897
1003
|
async deleteEmbedding(ctx) {
|
|
898
1004
|
try {
|
|
899
1005
|
const { id } = ctx.params;
|
|
900
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1006
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").deleteEmbedding(id);
|
|
901
1007
|
ctx.body = result;
|
|
902
1008
|
} catch (error) {
|
|
903
1009
|
ctx.throw(500, error.message || "Failed to delete embedding");
|
|
@@ -906,7 +1012,7 @@ const controller = ({ strapi }) => ({
|
|
|
906
1012
|
async updateEmbedding(ctx) {
|
|
907
1013
|
try {
|
|
908
1014
|
const { id } = ctx.params;
|
|
909
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1015
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").updateEmbedding(id, ctx.request.body);
|
|
910
1016
|
ctx.body = result;
|
|
911
1017
|
} catch (error) {
|
|
912
1018
|
ctx.throw(500, error.message || "Failed to update embedding");
|
|
@@ -915,7 +1021,7 @@ const controller = ({ strapi }) => ({
|
|
|
915
1021
|
async getEmbeddings(ctx) {
|
|
916
1022
|
try {
|
|
917
1023
|
const { page, pageSize, filters } = ctx.query;
|
|
918
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1024
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").getEmbeddings({
|
|
919
1025
|
page: page ? parseInt(page, 10) : 1,
|
|
920
1026
|
pageSize: pageSize ? parseInt(pageSize, 10) : 10,
|
|
921
1027
|
filters
|
|
@@ -928,7 +1034,7 @@ const controller = ({ strapi }) => ({
|
|
|
928
1034
|
async getEmbedding(ctx) {
|
|
929
1035
|
try {
|
|
930
1036
|
const { id } = ctx.params;
|
|
931
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1037
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").getEmbedding(id);
|
|
932
1038
|
if (!result) {
|
|
933
1039
|
ctx.throw(404, "Embedding not found");
|
|
934
1040
|
}
|
|
@@ -943,14 +1049,46 @@ const controller = ({ strapi }) => ({
|
|
|
943
1049
|
async queryEmbeddings(ctx) {
|
|
944
1050
|
try {
|
|
945
1051
|
const { query } = ctx.query;
|
|
946
|
-
const result = await strapi.plugin(PLUGIN_ID$
|
|
1052
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").queryEmbeddings(query);
|
|
947
1053
|
ctx.body = result;
|
|
948
1054
|
} catch (error) {
|
|
949
1055
|
ctx.throw(500, error.message || "Failed to query embeddings");
|
|
950
1056
|
}
|
|
1057
|
+
},
|
|
1058
|
+
/**
|
|
1059
|
+
* Sync embeddings from Neon DB to Strapi DB
|
|
1060
|
+
* GET /api/strapi-content-embeddings/sync
|
|
1061
|
+
*
|
|
1062
|
+
* Query params:
|
|
1063
|
+
* - removeOrphans: boolean (default: false) - Remove Strapi entries that don't exist in Neon
|
|
1064
|
+
* - dryRun: boolean (default: false) - Preview changes without applying them
|
|
1065
|
+
*/
|
|
1066
|
+
async syncFromNeon(ctx) {
|
|
1067
|
+
try {
|
|
1068
|
+
const { removeOrphans, dryRun } = ctx.query;
|
|
1069
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("sync").syncFromNeon({
|
|
1070
|
+
removeOrphans: removeOrphans === "true",
|
|
1071
|
+
dryRun: dryRun === "true"
|
|
1072
|
+
});
|
|
1073
|
+
ctx.body = result;
|
|
1074
|
+
} catch (error) {
|
|
1075
|
+
ctx.throw(500, error.message || "Failed to sync embeddings");
|
|
1076
|
+
}
|
|
1077
|
+
},
|
|
1078
|
+
/**
|
|
1079
|
+
* Get sync status - compare Neon and Strapi without making changes
|
|
1080
|
+
* GET /api/strapi-content-embeddings/sync/status
|
|
1081
|
+
*/
|
|
1082
|
+
async getSyncStatus(ctx) {
|
|
1083
|
+
try {
|
|
1084
|
+
const result = await strapi.plugin(PLUGIN_ID$3).service("sync").getSyncStatus();
|
|
1085
|
+
ctx.body = result;
|
|
1086
|
+
} catch (error) {
|
|
1087
|
+
ctx.throw(500, error.message || "Failed to get sync status");
|
|
1088
|
+
}
|
|
951
1089
|
}
|
|
952
1090
|
});
|
|
953
|
-
const PLUGIN_ID$
|
|
1091
|
+
const PLUGIN_ID$2 = "strapi-content-embeddings";
|
|
954
1092
|
const SESSION_TIMEOUT_MS = 4 * 60 * 60 * 1e3;
|
|
955
1093
|
function isSessionExpired(session) {
|
|
956
1094
|
return Date.now() - session.createdAt > SESSION_TIMEOUT_MS;
|
|
@@ -968,7 +1106,7 @@ function cleanupExpiredSessions(plugin, strapi) {
|
|
|
968
1106
|
}
|
|
969
1107
|
}
|
|
970
1108
|
if (cleaned > 0) {
|
|
971
|
-
strapi.log.debug(`[${PLUGIN_ID$
|
|
1109
|
+
strapi.log.debug(`[${PLUGIN_ID$2}] Cleaned up ${cleaned} expired MCP sessions`);
|
|
972
1110
|
}
|
|
973
1111
|
}
|
|
974
1112
|
const mcpController = ({ strapi }) => ({
|
|
@@ -976,7 +1114,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
976
1114
|
* Handle MCP requests (POST, GET, DELETE)
|
|
977
1115
|
*/
|
|
978
1116
|
async handle(ctx) {
|
|
979
|
-
const plugin = strapi.plugin(PLUGIN_ID$
|
|
1117
|
+
const plugin = strapi.plugin(PLUGIN_ID$2);
|
|
980
1118
|
if (!plugin.createMcpServer) {
|
|
981
1119
|
ctx.status = 503;
|
|
982
1120
|
ctx.body = {
|
|
@@ -992,7 +1130,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
992
1130
|
const requestedSessionId = ctx.request.headers["mcp-session-id"];
|
|
993
1131
|
let session = requestedSessionId ? plugin.sessions.get(requestedSessionId) : null;
|
|
994
1132
|
if (session && isSessionExpired(session)) {
|
|
995
|
-
strapi.log.debug(`[${PLUGIN_ID$
|
|
1133
|
+
strapi.log.debug(`[${PLUGIN_ID$2}] Session expired, removing: ${requestedSessionId}`);
|
|
996
1134
|
try {
|
|
997
1135
|
session.server.close();
|
|
998
1136
|
} catch {
|
|
@@ -1027,13 +1165,13 @@ const mcpController = ({ strapi }) => ({
|
|
|
1027
1165
|
};
|
|
1028
1166
|
plugin.sessions.set(sessionId, session);
|
|
1029
1167
|
strapi.log.debug(
|
|
1030
|
-
`[${PLUGIN_ID$
|
|
1168
|
+
`[${PLUGIN_ID$2}] New MCP session created: ${sessionId} (auth: ${ctx.state.authMethod || "unknown"})`
|
|
1031
1169
|
);
|
|
1032
1170
|
}
|
|
1033
1171
|
try {
|
|
1034
1172
|
await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
|
|
1035
1173
|
} catch (transportError) {
|
|
1036
|
-
strapi.log.warn(`[${PLUGIN_ID$
|
|
1174
|
+
strapi.log.warn(`[${PLUGIN_ID$2}] Transport error, cleaning up session: ${requestedSessionId}`, {
|
|
1037
1175
|
error: transportError instanceof Error ? transportError.message : String(transportError)
|
|
1038
1176
|
});
|
|
1039
1177
|
try {
|
|
@@ -1056,7 +1194,7 @@ const mcpController = ({ strapi }) => ({
|
|
|
1056
1194
|
}
|
|
1057
1195
|
ctx.respond = false;
|
|
1058
1196
|
} catch (error) {
|
|
1059
|
-
strapi.log.error(`[${PLUGIN_ID$
|
|
1197
|
+
strapi.log.error(`[${PLUGIN_ID$2}] Error handling MCP request`, {
|
|
1060
1198
|
error: error instanceof Error ? error.message : String(error),
|
|
1061
1199
|
method: ctx.method,
|
|
1062
1200
|
path: ctx.path
|
|
@@ -1083,6 +1221,32 @@ const contentApi = [
|
|
|
1083
1221
|
path: "/embeddings-query",
|
|
1084
1222
|
handler: "controller.queryEmbeddings"
|
|
1085
1223
|
},
|
|
1224
|
+
// Sync routes - for cron jobs or manual triggering
|
|
1225
|
+
// Use API token for authentication
|
|
1226
|
+
{
|
|
1227
|
+
method: "GET",
|
|
1228
|
+
path: "/sync",
|
|
1229
|
+
handler: "controller.syncFromNeon",
|
|
1230
|
+
config: {
|
|
1231
|
+
description: "Sync embeddings from Neon DB to Strapi. Query params: removeOrphans=true, dryRun=true"
|
|
1232
|
+
}
|
|
1233
|
+
},
|
|
1234
|
+
{
|
|
1235
|
+
method: "POST",
|
|
1236
|
+
path: "/sync",
|
|
1237
|
+
handler: "controller.syncFromNeon",
|
|
1238
|
+
config: {
|
|
1239
|
+
description: "Sync embeddings from Neon DB to Strapi. Query params: removeOrphans=true, dryRun=true"
|
|
1240
|
+
}
|
|
1241
|
+
},
|
|
1242
|
+
{
|
|
1243
|
+
method: "GET",
|
|
1244
|
+
path: "/sync/status",
|
|
1245
|
+
handler: "controller.getSyncStatus",
|
|
1246
|
+
config: {
|
|
1247
|
+
description: "Get sync status between Neon and Strapi without making changes"
|
|
1248
|
+
}
|
|
1249
|
+
},
|
|
1086
1250
|
// MCP routes - auth handled by middleware
|
|
1087
1251
|
{
|
|
1088
1252
|
method: "POST",
|
|
@@ -1202,11 +1366,180 @@ const routes = {
|
|
|
1202
1366
|
routes: [...admin]
|
|
1203
1367
|
}
|
|
1204
1368
|
};
|
|
1205
|
-
const
|
|
1206
|
-
|
|
1369
|
+
const DEFAULT_SEPARATORS = [
|
|
1370
|
+
"\n\n",
|
|
1371
|
+
// Paragraphs
|
|
1372
|
+
"\n",
|
|
1373
|
+
// Lines
|
|
1374
|
+
". ",
|
|
1375
|
+
// Sentences
|
|
1376
|
+
"! ",
|
|
1377
|
+
// Exclamations
|
|
1378
|
+
"? ",
|
|
1379
|
+
// Questions
|
|
1380
|
+
"; ",
|
|
1381
|
+
// Semicolons
|
|
1382
|
+
", ",
|
|
1383
|
+
// Commas
|
|
1384
|
+
" ",
|
|
1385
|
+
// Words
|
|
1386
|
+
""
|
|
1387
|
+
// Characters (last resort)
|
|
1388
|
+
];
|
|
1389
|
+
function estimateTokens(text) {
|
|
1390
|
+
return Math.ceil(text.length / 4);
|
|
1391
|
+
}
|
|
1392
|
+
function needsChunking(content, maxChars = 4e3) {
|
|
1393
|
+
return content.length > maxChars;
|
|
1394
|
+
}
|
|
1395
|
+
function splitWithSeparator(text, separator) {
|
|
1396
|
+
if (separator === "") {
|
|
1397
|
+
return text.split("");
|
|
1398
|
+
}
|
|
1399
|
+
const parts = text.split(separator);
|
|
1400
|
+
const result = [];
|
|
1401
|
+
for (let i = 0; i < parts.length; i++) {
|
|
1402
|
+
if (i < parts.length - 1) {
|
|
1403
|
+
result.push(parts[i] + separator);
|
|
1404
|
+
} else if (parts[i]) {
|
|
1405
|
+
result.push(parts[i]);
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
return result;
|
|
1409
|
+
}
|
|
1410
|
+
function splitText(text, chunkSize, separators) {
|
|
1411
|
+
if (text.length <= chunkSize) {
|
|
1412
|
+
return [text];
|
|
1413
|
+
}
|
|
1414
|
+
let bestSeparator = separators[separators.length - 1];
|
|
1415
|
+
for (const sep of separators) {
|
|
1416
|
+
if (text.includes(sep)) {
|
|
1417
|
+
bestSeparator = sep;
|
|
1418
|
+
break;
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
const splits = splitWithSeparator(text, bestSeparator);
|
|
1422
|
+
const chunks = [];
|
|
1423
|
+
let currentChunk = "";
|
|
1424
|
+
for (const split of splits) {
|
|
1425
|
+
if ((currentChunk + split).length <= chunkSize) {
|
|
1426
|
+
currentChunk += split;
|
|
1427
|
+
} else {
|
|
1428
|
+
if (currentChunk) {
|
|
1429
|
+
chunks.push(currentChunk);
|
|
1430
|
+
}
|
|
1431
|
+
if (split.length > chunkSize) {
|
|
1432
|
+
const remainingSeparators = separators.slice(separators.indexOf(bestSeparator) + 1);
|
|
1433
|
+
if (remainingSeparators.length > 0) {
|
|
1434
|
+
chunks.push(...splitText(split, chunkSize, remainingSeparators));
|
|
1435
|
+
} else {
|
|
1436
|
+
for (let i = 0; i < split.length; i += chunkSize) {
|
|
1437
|
+
chunks.push(split.slice(i, i + chunkSize));
|
|
1438
|
+
}
|
|
1439
|
+
}
|
|
1440
|
+
currentChunk = "";
|
|
1441
|
+
} else {
|
|
1442
|
+
currentChunk = split;
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
if (currentChunk) {
|
|
1447
|
+
chunks.push(currentChunk);
|
|
1448
|
+
}
|
|
1449
|
+
return chunks;
|
|
1450
|
+
}
|
|
1451
|
+
function addOverlap(chunks, overlap) {
|
|
1452
|
+
if (overlap <= 0 || chunks.length <= 1) {
|
|
1453
|
+
return chunks;
|
|
1454
|
+
}
|
|
1455
|
+
const result = [];
|
|
1456
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
1457
|
+
let chunk = chunks[i];
|
|
1458
|
+
if (i > 0) {
|
|
1459
|
+
const prevChunk = chunks[i - 1];
|
|
1460
|
+
const overlapText = prevChunk.slice(-overlap);
|
|
1461
|
+
chunk = overlapText + chunk;
|
|
1462
|
+
}
|
|
1463
|
+
result.push(chunk);
|
|
1464
|
+
}
|
|
1465
|
+
return result;
|
|
1466
|
+
}
|
|
1467
|
+
function chunkContent(content, options2 = {}) {
|
|
1468
|
+
const {
|
|
1469
|
+
chunkSize = 4e3,
|
|
1470
|
+
chunkOverlap = 200,
|
|
1471
|
+
separators = DEFAULT_SEPARATORS
|
|
1472
|
+
} = options2;
|
|
1473
|
+
const cleanContent = content.trim();
|
|
1474
|
+
if (!cleanContent) {
|
|
1475
|
+
return [];
|
|
1476
|
+
}
|
|
1477
|
+
if (cleanContent.length <= chunkSize) {
|
|
1478
|
+
return [{
|
|
1479
|
+
text: cleanContent,
|
|
1480
|
+
chunkIndex: 0,
|
|
1481
|
+
totalChunks: 1,
|
|
1482
|
+
startOffset: 0,
|
|
1483
|
+
endOffset: cleanContent.length
|
|
1484
|
+
}];
|
|
1485
|
+
}
|
|
1486
|
+
const rawChunks = splitText(cleanContent, chunkSize - chunkOverlap, separators);
|
|
1487
|
+
const chunksWithOverlap = addOverlap(rawChunks, chunkOverlap);
|
|
1488
|
+
const result = [];
|
|
1489
|
+
let currentOffset = 0;
|
|
1490
|
+
for (let i = 0; i < chunksWithOverlap.length; i++) {
|
|
1491
|
+
const text = chunksWithOverlap[i].trim();
|
|
1492
|
+
if (text) {
|
|
1493
|
+
result.push({
|
|
1494
|
+
text,
|
|
1495
|
+
chunkIndex: i,
|
|
1496
|
+
totalChunks: chunksWithOverlap.length,
|
|
1497
|
+
startOffset: currentOffset,
|
|
1498
|
+
endOffset: currentOffset + rawChunks[i].length
|
|
1499
|
+
});
|
|
1500
|
+
}
|
|
1501
|
+
currentOffset += rawChunks[i].length;
|
|
1502
|
+
}
|
|
1503
|
+
const totalChunks = result.length;
|
|
1504
|
+
result.forEach((chunk, idx) => {
|
|
1505
|
+
chunk.chunkIndex = idx;
|
|
1506
|
+
chunk.totalChunks = totalChunks;
|
|
1507
|
+
});
|
|
1508
|
+
return result;
|
|
1509
|
+
}
|
|
1510
|
+
function formatChunkTitle(baseTitle, chunkIndex, totalChunks) {
|
|
1511
|
+
if (totalChunks === 1) {
|
|
1512
|
+
return baseTitle;
|
|
1513
|
+
}
|
|
1514
|
+
return `${baseTitle} [Part ${chunkIndex + 1}/${totalChunks}]`;
|
|
1515
|
+
}
|
|
1516
|
+
const PLUGIN_ID$1 = "strapi-content-embeddings";
|
|
1517
|
+
const CONTENT_TYPE_UID$1 = `plugin::${PLUGIN_ID$1}.embedding`;
|
|
1207
1518
|
const embeddings = ({ strapi }) => ({
|
|
1519
|
+
/**
|
|
1520
|
+
* Get plugin config with defaults
|
|
1521
|
+
*/
|
|
1522
|
+
getConfig() {
|
|
1523
|
+
const config2 = strapi.config.get("plugin::strapi-content-embeddings") || {};
|
|
1524
|
+
return {
|
|
1525
|
+
chunkSize: config2.chunkSize || 4e3,
|
|
1526
|
+
chunkOverlap: config2.chunkOverlap || 200,
|
|
1527
|
+
autoChunk: config2.autoChunk || false,
|
|
1528
|
+
...config2
|
|
1529
|
+
};
|
|
1530
|
+
},
|
|
1531
|
+
/**
|
|
1532
|
+
* Create a single embedding (no chunking)
|
|
1533
|
+
*/
|
|
1208
1534
|
async createEmbedding(data) {
|
|
1209
|
-
const { title, content, collectionType, fieldName, metadata, related } = data.data;
|
|
1535
|
+
const { title, content, collectionType, fieldName, metadata, related, autoChunk } = data.data;
|
|
1536
|
+
const config2 = this.getConfig();
|
|
1537
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
1538
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1539
|
+
if (shouldChunk && needsChunking(content, chunkSize)) {
|
|
1540
|
+
const result = await this.createChunkedEmbedding(data);
|
|
1541
|
+
return result.entity;
|
|
1542
|
+
}
|
|
1210
1543
|
const entityData = {
|
|
1211
1544
|
title,
|
|
1212
1545
|
content,
|
|
@@ -1217,7 +1550,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1217
1550
|
if (related && related.__type && related.id) {
|
|
1218
1551
|
entityData.related = related;
|
|
1219
1552
|
}
|
|
1220
|
-
const entity = await strapi.documents(CONTENT_TYPE_UID).create({
|
|
1553
|
+
const entity = await strapi.documents(CONTENT_TYPE_UID$1).create({
|
|
1221
1554
|
data: entityData
|
|
1222
1555
|
});
|
|
1223
1556
|
if (!pluginManager.isInitialized()) {
|
|
@@ -1232,7 +1565,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1232
1565
|
collectionType: collectionType || "standalone",
|
|
1233
1566
|
fieldName: fieldName || "content"
|
|
1234
1567
|
});
|
|
1235
|
-
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
1568
|
+
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1236
1569
|
documentId: entity.documentId,
|
|
1237
1570
|
data: {
|
|
1238
1571
|
embeddingId: result.embeddingId,
|
|
@@ -1245,8 +1578,110 @@ const embeddings = ({ strapi }) => ({
|
|
|
1245
1578
|
return entity;
|
|
1246
1579
|
}
|
|
1247
1580
|
},
|
|
1581
|
+
/**
|
|
1582
|
+
* Create embeddings with automatic chunking for large content
|
|
1583
|
+
* Creates multiple embedding entities, one per chunk
|
|
1584
|
+
*/
|
|
1585
|
+
async createChunkedEmbedding(data) {
|
|
1586
|
+
const { title, content, collectionType, fieldName, metadata, related } = data.data;
|
|
1587
|
+
const config2 = this.getConfig();
|
|
1588
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1589
|
+
const chunkOverlap = config2.chunkOverlap || 200;
|
|
1590
|
+
const chunks = chunkContent(content, { chunkSize, chunkOverlap });
|
|
1591
|
+
if (chunks.length === 0) {
|
|
1592
|
+
throw new Error("Content is empty or could not be chunked");
|
|
1593
|
+
}
|
|
1594
|
+
if (chunks.length === 1) {
|
|
1595
|
+
const entity = await this.createEmbedding({
|
|
1596
|
+
data: {
|
|
1597
|
+
...data.data,
|
|
1598
|
+
autoChunk: false
|
|
1599
|
+
// Prevent recursive chunking
|
|
1600
|
+
}
|
|
1601
|
+
});
|
|
1602
|
+
return {
|
|
1603
|
+
entity,
|
|
1604
|
+
chunks: [entity],
|
|
1605
|
+
totalChunks: 1,
|
|
1606
|
+
wasChunked: false
|
|
1607
|
+
};
|
|
1608
|
+
}
|
|
1609
|
+
console.log(`Chunking content into ${chunks.length} parts (chunkSize: ${chunkSize}, overlap: ${chunkOverlap})`);
|
|
1610
|
+
const createdChunks = [];
|
|
1611
|
+
let parentDocumentId = null;
|
|
1612
|
+
for (const chunk of chunks) {
|
|
1613
|
+
const chunkTitle = formatChunkTitle(title, chunk.chunkIndex, chunk.totalChunks);
|
|
1614
|
+
const chunkMetadata = {
|
|
1615
|
+
...metadata,
|
|
1616
|
+
isChunk: true,
|
|
1617
|
+
chunkIndex: chunk.chunkIndex,
|
|
1618
|
+
totalChunks: chunk.totalChunks,
|
|
1619
|
+
startOffset: chunk.startOffset,
|
|
1620
|
+
endOffset: chunk.endOffset,
|
|
1621
|
+
originalTitle: title,
|
|
1622
|
+
parentDocumentId,
|
|
1623
|
+
estimatedTokens: estimateTokens(chunk.text)
|
|
1624
|
+
};
|
|
1625
|
+
const entityData = {
|
|
1626
|
+
title: chunkTitle,
|
|
1627
|
+
content: chunk.text,
|
|
1628
|
+
collectionType: collectionType || "standalone",
|
|
1629
|
+
fieldName: fieldName || "content",
|
|
1630
|
+
metadata: chunkMetadata
|
|
1631
|
+
};
|
|
1632
|
+
if (chunk.chunkIndex === 0 && related && related.__type && related.id) {
|
|
1633
|
+
entityData.related = related;
|
|
1634
|
+
}
|
|
1635
|
+
const entity = await strapi.documents(CONTENT_TYPE_UID$1).create({
|
|
1636
|
+
data: entityData
|
|
1637
|
+
});
|
|
1638
|
+
if (chunk.chunkIndex === 0) {
|
|
1639
|
+
parentDocumentId = entity.documentId;
|
|
1640
|
+
} else {
|
|
1641
|
+
await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1642
|
+
documentId: entity.documentId,
|
|
1643
|
+
data: {
|
|
1644
|
+
metadata: {
|
|
1645
|
+
...chunkMetadata,
|
|
1646
|
+
parentDocumentId
|
|
1647
|
+
}
|
|
1648
|
+
}
|
|
1649
|
+
});
|
|
1650
|
+
}
|
|
1651
|
+
if (pluginManager.isInitialized()) {
|
|
1652
|
+
try {
|
|
1653
|
+
const result = await pluginManager.createEmbedding({
|
|
1654
|
+
id: entity.documentId,
|
|
1655
|
+
title: chunkTitle,
|
|
1656
|
+
content: chunk.text,
|
|
1657
|
+
collectionType: collectionType || "standalone",
|
|
1658
|
+
fieldName: fieldName || "content"
|
|
1659
|
+
});
|
|
1660
|
+
const updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1661
|
+
documentId: entity.documentId,
|
|
1662
|
+
data: {
|
|
1663
|
+
embeddingId: result.embeddingId,
|
|
1664
|
+
embedding: result.embedding
|
|
1665
|
+
}
|
|
1666
|
+
});
|
|
1667
|
+
createdChunks.push(updatedEntity);
|
|
1668
|
+
} catch (error) {
|
|
1669
|
+
console.error(`Failed to create embedding for chunk ${chunk.chunkIndex}:`, error);
|
|
1670
|
+
createdChunks.push(entity);
|
|
1671
|
+
}
|
|
1672
|
+
} else {
|
|
1673
|
+
createdChunks.push(entity);
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
return {
|
|
1677
|
+
entity: createdChunks[0],
|
|
1678
|
+
chunks: createdChunks,
|
|
1679
|
+
totalChunks: createdChunks.length,
|
|
1680
|
+
wasChunked: true
|
|
1681
|
+
};
|
|
1682
|
+
},
|
|
1248
1683
|
async deleteEmbedding(id) {
|
|
1249
|
-
const currentEntry = await strapi.documents(CONTENT_TYPE_UID).findOne({
|
|
1684
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1250
1685
|
documentId: String(id)
|
|
1251
1686
|
});
|
|
1252
1687
|
if (!currentEntry) {
|
|
@@ -1259,25 +1694,174 @@ const embeddings = ({ strapi }) => ({
|
|
|
1259
1694
|
console.error("Failed to delete from vector store:", error);
|
|
1260
1695
|
}
|
|
1261
1696
|
}
|
|
1262
|
-
const deletedEntry = await strapi.documents(CONTENT_TYPE_UID).delete({
|
|
1697
|
+
const deletedEntry = await strapi.documents(CONTENT_TYPE_UID$1).delete({
|
|
1263
1698
|
documentId: String(id)
|
|
1264
1699
|
});
|
|
1265
1700
|
return deletedEntry;
|
|
1266
1701
|
},
|
|
1702
|
+
/**
|
|
1703
|
+
* Find all chunks related to a parent document
|
|
1704
|
+
* Returns chunks including the parent itself
|
|
1705
|
+
*/
|
|
1706
|
+
async findRelatedChunks(documentId) {
|
|
1707
|
+
const entry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1708
|
+
documentId
|
|
1709
|
+
});
|
|
1710
|
+
if (!entry) {
|
|
1711
|
+
return [];
|
|
1712
|
+
}
|
|
1713
|
+
const metadata = entry.metadata;
|
|
1714
|
+
const parentId = metadata?.parentDocumentId || documentId;
|
|
1715
|
+
const isChunked = metadata?.isChunk === true;
|
|
1716
|
+
if (!isChunked && !metadata?.parentDocumentId) {
|
|
1717
|
+
const children = await strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1718
|
+
filters: {
|
|
1719
|
+
metadata: {
|
|
1720
|
+
$containsi: `"parentDocumentId":"${documentId}"`
|
|
1721
|
+
}
|
|
1722
|
+
}
|
|
1723
|
+
});
|
|
1724
|
+
if (children.length === 0) {
|
|
1725
|
+
return [entry];
|
|
1726
|
+
}
|
|
1727
|
+
return [entry, ...children];
|
|
1728
|
+
}
|
|
1729
|
+
const allChunks = await strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1730
|
+
filters: {
|
|
1731
|
+
$or: [
|
|
1732
|
+
{ documentId: parentId },
|
|
1733
|
+
{
|
|
1734
|
+
metadata: {
|
|
1735
|
+
$containsi: `"parentDocumentId":"${parentId}"`
|
|
1736
|
+
}
|
|
1737
|
+
}
|
|
1738
|
+
]
|
|
1739
|
+
}
|
|
1740
|
+
});
|
|
1741
|
+
return allChunks.sort((a, b) => {
|
|
1742
|
+
const aIndex = a.metadata?.chunkIndex ?? 0;
|
|
1743
|
+
const bIndex = b.metadata?.chunkIndex ?? 0;
|
|
1744
|
+
return aIndex - bIndex;
|
|
1745
|
+
});
|
|
1746
|
+
},
|
|
1747
|
+
/**
|
|
1748
|
+
* Delete all chunks related to a parent document
|
|
1749
|
+
*/
|
|
1750
|
+
async deleteRelatedChunks(documentId) {
|
|
1751
|
+
const chunks = await this.findRelatedChunks(documentId);
|
|
1752
|
+
for (const chunk of chunks) {
|
|
1753
|
+
if (pluginManager.isInitialized()) {
|
|
1754
|
+
try {
|
|
1755
|
+
await pluginManager.deleteEmbedding(chunk.documentId);
|
|
1756
|
+
} catch (error) {
|
|
1757
|
+
console.error(`Failed to delete chunk ${chunk.documentId} from vector store:`, error);
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
await strapi.documents(CONTENT_TYPE_UID$1).delete({
|
|
1761
|
+
documentId: chunk.documentId
|
|
1762
|
+
});
|
|
1763
|
+
}
|
|
1764
|
+
return chunks.length;
|
|
1765
|
+
},
|
|
1766
|
+
/**
|
|
1767
|
+
* Update embeddings with automatic chunking support
|
|
1768
|
+
* Handles re-chunking when content changes and exceeds chunk size
|
|
1769
|
+
*/
|
|
1770
|
+
async updateChunkedEmbedding(id, data) {
|
|
1771
|
+
const { title, content, metadata, autoChunk } = data.data;
|
|
1772
|
+
const config2 = this.getConfig();
|
|
1773
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1774
|
+
documentId: id
|
|
1775
|
+
});
|
|
1776
|
+
if (!currentEntry) {
|
|
1777
|
+
throw new Error(`Embedding with id ${id} not found`);
|
|
1778
|
+
}
|
|
1779
|
+
const currentMetadata = currentEntry.metadata;
|
|
1780
|
+
const parentDocumentId = currentMetadata?.parentDocumentId || id;
|
|
1781
|
+
const newContent = content ?? currentEntry.content;
|
|
1782
|
+
const newTitle = title ?? currentMetadata?.originalTitle ?? currentEntry.title;
|
|
1783
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
1784
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1785
|
+
const contentNeedsChunking = shouldChunk && needsChunking(newContent, chunkSize);
|
|
1786
|
+
const existingChunks = await this.findRelatedChunks(id);
|
|
1787
|
+
let originalRelated;
|
|
1788
|
+
const firstChunk = existingChunks.find(
|
|
1789
|
+
(c) => c.metadata?.chunkIndex === 0 || c.documentId === parentDocumentId
|
|
1790
|
+
);
|
|
1791
|
+
if (firstChunk?.related) {
|
|
1792
|
+
originalRelated = firstChunk.related;
|
|
1793
|
+
}
|
|
1794
|
+
const deletedCount = await this.deleteRelatedChunks(id);
|
|
1795
|
+
console.log(`Deleted ${deletedCount} existing chunk(s) for update`);
|
|
1796
|
+
const preservedMetadata = { ...metadata };
|
|
1797
|
+
delete preservedMetadata?.isChunk;
|
|
1798
|
+
delete preservedMetadata?.chunkIndex;
|
|
1799
|
+
delete preservedMetadata?.totalChunks;
|
|
1800
|
+
delete preservedMetadata?.startOffset;
|
|
1801
|
+
delete preservedMetadata?.endOffset;
|
|
1802
|
+
delete preservedMetadata?.originalTitle;
|
|
1803
|
+
delete preservedMetadata?.parentDocumentId;
|
|
1804
|
+
delete preservedMetadata?.estimatedTokens;
|
|
1805
|
+
if (contentNeedsChunking) {
|
|
1806
|
+
return await this.createChunkedEmbedding({
|
|
1807
|
+
data: {
|
|
1808
|
+
title: newTitle.replace(/\s*\[Part \d+\/\d+\]$/, ""),
|
|
1809
|
+
// Remove old part suffix
|
|
1810
|
+
content: newContent,
|
|
1811
|
+
collectionType: currentEntry.collectionType || "standalone",
|
|
1812
|
+
fieldName: currentEntry.fieldName || "content",
|
|
1813
|
+
metadata: preservedMetadata,
|
|
1814
|
+
related: originalRelated,
|
|
1815
|
+
autoChunk: true
|
|
1816
|
+
}
|
|
1817
|
+
});
|
|
1818
|
+
} else {
|
|
1819
|
+
const entity = await this.createEmbedding({
|
|
1820
|
+
data: {
|
|
1821
|
+
title: newTitle.replace(/\s*\[Part \d+\/\d+\]$/, ""),
|
|
1822
|
+
// Remove old part suffix
|
|
1823
|
+
content: newContent,
|
|
1824
|
+
collectionType: currentEntry.collectionType || "standalone",
|
|
1825
|
+
fieldName: currentEntry.fieldName || "content",
|
|
1826
|
+
metadata: preservedMetadata,
|
|
1827
|
+
related: originalRelated,
|
|
1828
|
+
autoChunk: false
|
|
1829
|
+
}
|
|
1830
|
+
});
|
|
1831
|
+
return {
|
|
1832
|
+
entity,
|
|
1833
|
+
chunks: [entity],
|
|
1834
|
+
totalChunks: 1,
|
|
1835
|
+
wasChunked: false
|
|
1836
|
+
};
|
|
1837
|
+
}
|
|
1838
|
+
},
|
|
1267
1839
|
async updateEmbedding(id, data) {
|
|
1268
|
-
const { title, content, metadata } = data.data;
|
|
1269
|
-
const
|
|
1840
|
+
const { title, content, metadata, autoChunk } = data.data;
|
|
1841
|
+
const config2 = this.getConfig();
|
|
1842
|
+
const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1270
1843
|
documentId: id
|
|
1271
1844
|
});
|
|
1272
1845
|
if (!currentEntry) {
|
|
1273
1846
|
throw new Error(`Embedding with id ${id} not found`);
|
|
1274
1847
|
}
|
|
1848
|
+
const currentMetadata = currentEntry.metadata;
|
|
1849
|
+
const isCurrentlyChunked = currentMetadata?.isChunk === true;
|
|
1850
|
+
const hasRelatedChunks = currentMetadata?.parentDocumentId || isCurrentlyChunked;
|
|
1851
|
+
const shouldChunk = autoChunk ?? config2.autoChunk;
|
|
1852
|
+
const chunkSize = config2.chunkSize || 4e3;
|
|
1853
|
+
const newContent = content ?? currentEntry.content;
|
|
1854
|
+
const contentNeedsChunking = shouldChunk && needsChunking(newContent, chunkSize);
|
|
1855
|
+
const contentChanged = content !== void 0 && content !== currentEntry.content;
|
|
1856
|
+
if (hasRelatedChunks || contentNeedsChunking) {
|
|
1857
|
+
const result = await this.updateChunkedEmbedding(id, data);
|
|
1858
|
+
return result.entity;
|
|
1859
|
+
}
|
|
1275
1860
|
const updateData = {};
|
|
1276
1861
|
if (title !== void 0) updateData.title = title;
|
|
1277
1862
|
if (content !== void 0) updateData.content = content;
|
|
1278
1863
|
if (metadata !== void 0) updateData.metadata = metadata;
|
|
1279
|
-
|
|
1280
|
-
let updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
1864
|
+
let updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1281
1865
|
documentId: id,
|
|
1282
1866
|
data: updateData
|
|
1283
1867
|
});
|
|
@@ -1291,7 +1875,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1291
1875
|
collectionType: currentEntry.collectionType || "standalone",
|
|
1292
1876
|
fieldName: currentEntry.fieldName || "content"
|
|
1293
1877
|
});
|
|
1294
|
-
updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
|
|
1878
|
+
updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
|
|
1295
1879
|
documentId: id,
|
|
1296
1880
|
data: {
|
|
1297
1881
|
embeddingId: result.embeddingId,
|
|
@@ -1320,7 +1904,7 @@ const embeddings = ({ strapi }) => ({
|
|
|
1320
1904
|
}
|
|
1321
1905
|
},
|
|
1322
1906
|
async getEmbedding(id) {
|
|
1323
|
-
return await strapi.documents(CONTENT_TYPE_UID).findOne({
|
|
1907
|
+
return await strapi.documents(CONTENT_TYPE_UID$1).findOne({
|
|
1324
1908
|
documentId: String(id)
|
|
1325
1909
|
});
|
|
1326
1910
|
},
|
|
@@ -1329,12 +1913,12 @@ const embeddings = ({ strapi }) => ({
|
|
|
1329
1913
|
const pageSize = params?.pageSize || 10;
|
|
1330
1914
|
const start = (page - 1) * pageSize;
|
|
1331
1915
|
const [data, totalCount] = await Promise.all([
|
|
1332
|
-
strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
1916
|
+
strapi.documents(CONTENT_TYPE_UID$1).findMany({
|
|
1333
1917
|
limit: pageSize,
|
|
1334
1918
|
start,
|
|
1335
1919
|
filters: params?.filters
|
|
1336
1920
|
}),
|
|
1337
|
-
strapi.documents(CONTENT_TYPE_UID).count({
|
|
1921
|
+
strapi.documents(CONTENT_TYPE_UID$1).count({
|
|
1338
1922
|
filters: params?.filters
|
|
1339
1923
|
})
|
|
1340
1924
|
]);
|
|
@@ -1345,8 +1929,216 @@ const embeddings = ({ strapi }) => ({
|
|
|
1345
1929
|
};
|
|
1346
1930
|
}
|
|
1347
1931
|
});
|
|
1932
|
+
const PLUGIN_ID = "strapi-content-embeddings";
|
|
1933
|
+
const CONTENT_TYPE_UID = `plugin::${PLUGIN_ID}.embedding`;
|
|
1934
|
+
const sync = ({ strapi }) => ({
|
|
1935
|
+
/**
|
|
1936
|
+
* Sync embeddings from Neon DB to Strapi DB
|
|
1937
|
+
*
|
|
1938
|
+
* This performs the following operations:
|
|
1939
|
+
* 1. Fetches all embeddings from Neon DB (source of truth)
|
|
1940
|
+
* 2. Fetches all embeddings from Strapi DB
|
|
1941
|
+
* 3. Creates missing entries in Strapi that exist in Neon
|
|
1942
|
+
* 4. Updates Strapi entries where content differs from Neon
|
|
1943
|
+
* 5. Optionally removes orphaned Strapi entries (no matching Neon record)
|
|
1944
|
+
*/
|
|
1945
|
+
async syncFromNeon(options2) {
|
|
1946
|
+
const { removeOrphans = false, dryRun = false } = options2 || {};
|
|
1947
|
+
const result = {
|
|
1948
|
+
success: false,
|
|
1949
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1950
|
+
neonCount: 0,
|
|
1951
|
+
strapiCount: 0,
|
|
1952
|
+
actions: {
|
|
1953
|
+
created: 0,
|
|
1954
|
+
updated: 0,
|
|
1955
|
+
orphansRemoved: 0
|
|
1956
|
+
},
|
|
1957
|
+
details: {
|
|
1958
|
+
created: [],
|
|
1959
|
+
updated: [],
|
|
1960
|
+
orphansRemoved: []
|
|
1961
|
+
},
|
|
1962
|
+
errors: []
|
|
1963
|
+
};
|
|
1964
|
+
if (!pluginManager.isInitialized()) {
|
|
1965
|
+
result.errors.push(
|
|
1966
|
+
"Plugin manager not initialized. Check your Neon and OpenAI configuration."
|
|
1967
|
+
);
|
|
1968
|
+
return result;
|
|
1969
|
+
}
|
|
1970
|
+
try {
|
|
1971
|
+
const neonEmbeddings = await pluginManager.getAllNeonEmbeddings();
|
|
1972
|
+
result.neonCount = neonEmbeddings.length;
|
|
1973
|
+
const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
1974
|
+
limit: 1e4
|
|
1975
|
+
// High limit to get all
|
|
1976
|
+
});
|
|
1977
|
+
result.strapiCount = strapiEmbeddings.length;
|
|
1978
|
+
const neonBystrapiId = /* @__PURE__ */ new Map();
|
|
1979
|
+
for (const neon of neonEmbeddings) {
|
|
1980
|
+
if (neon.strapiId) {
|
|
1981
|
+
neonBystrapiId.set(neon.strapiId, neon);
|
|
1982
|
+
}
|
|
1983
|
+
}
|
|
1984
|
+
const strapiByDocumentId = /* @__PURE__ */ new Map();
|
|
1985
|
+
for (const strapi2 of strapiEmbeddings) {
|
|
1986
|
+
strapiByDocumentId.set(strapi2.documentId, strapi2);
|
|
1987
|
+
}
|
|
1988
|
+
for (const neon of neonEmbeddings) {
|
|
1989
|
+
if (!neon.strapiId) {
|
|
1990
|
+
result.errors.push(
|
|
1991
|
+
`Neon embedding ${neon.id} has no strapiId in metadata`
|
|
1992
|
+
);
|
|
1993
|
+
continue;
|
|
1994
|
+
}
|
|
1995
|
+
const existingStrapi = strapiByDocumentId.get(neon.strapiId);
|
|
1996
|
+
if (!existingStrapi) {
|
|
1997
|
+
if (!dryRun) {
|
|
1998
|
+
try {
|
|
1999
|
+
await strapi.documents(CONTENT_TYPE_UID).create({
|
|
2000
|
+
data: {
|
|
2001
|
+
documentId: neon.strapiId,
|
|
2002
|
+
title: neon.title,
|
|
2003
|
+
content: neon.content,
|
|
2004
|
+
embeddingId: neon.id,
|
|
2005
|
+
collectionType: neon.collectionType,
|
|
2006
|
+
fieldName: neon.fieldName
|
|
2007
|
+
}
|
|
2008
|
+
});
|
|
2009
|
+
result.actions.created++;
|
|
2010
|
+
result.details.created.push(
|
|
2011
|
+
`${neon.strapiId} (${neon.title || "untitled"})`
|
|
2012
|
+
);
|
|
2013
|
+
} catch (error) {
|
|
2014
|
+
result.errors.push(
|
|
2015
|
+
`Failed to create Strapi entry for ${neon.strapiId}: ${error}`
|
|
2016
|
+
);
|
|
2017
|
+
}
|
|
2018
|
+
} else {
|
|
2019
|
+
result.actions.created++;
|
|
2020
|
+
result.details.created.push(
|
|
2021
|
+
`[DRY RUN] ${neon.strapiId} (${neon.title || "untitled"})`
|
|
2022
|
+
);
|
|
2023
|
+
}
|
|
2024
|
+
} else {
|
|
2025
|
+
const contentChanged = existingStrapi.content !== neon.content;
|
|
2026
|
+
const titleChanged = existingStrapi.title !== neon.title;
|
|
2027
|
+
const embeddingIdMissing = !existingStrapi.embeddingId;
|
|
2028
|
+
if (contentChanged || titleChanged || embeddingIdMissing) {
|
|
2029
|
+
if (!dryRun) {
|
|
2030
|
+
try {
|
|
2031
|
+
await strapi.documents(CONTENT_TYPE_UID).update({
|
|
2032
|
+
documentId: neon.strapiId,
|
|
2033
|
+
data: {
|
|
2034
|
+
title: neon.title,
|
|
2035
|
+
content: neon.content,
|
|
2036
|
+
embeddingId: neon.id
|
|
2037
|
+
}
|
|
2038
|
+
});
|
|
2039
|
+
result.actions.updated++;
|
|
2040
|
+
result.details.updated.push(
|
|
2041
|
+
`${neon.strapiId} (${neon.title || "untitled"})`
|
|
2042
|
+
);
|
|
2043
|
+
} catch (error) {
|
|
2044
|
+
result.errors.push(
|
|
2045
|
+
`Failed to update Strapi entry ${neon.strapiId}: ${error}`
|
|
2046
|
+
);
|
|
2047
|
+
}
|
|
2048
|
+
} else {
|
|
2049
|
+
result.actions.updated++;
|
|
2050
|
+
result.details.updated.push(
|
|
2051
|
+
`[DRY RUN] ${neon.strapiId} (${neon.title || "untitled"})`
|
|
2052
|
+
);
|
|
2053
|
+
}
|
|
2054
|
+
}
|
|
2055
|
+
}
|
|
2056
|
+
}
|
|
2057
|
+
if (removeOrphans) {
|
|
2058
|
+
for (const strapiEmbed of strapiEmbeddings) {
|
|
2059
|
+
const hasNeonRecord = neonBystrapiId.has(strapiEmbed.documentId);
|
|
2060
|
+
if (!hasNeonRecord) {
|
|
2061
|
+
if (!dryRun) {
|
|
2062
|
+
try {
|
|
2063
|
+
await strapi.documents(CONTENT_TYPE_UID).delete({
|
|
2064
|
+
documentId: strapiEmbed.documentId
|
|
2065
|
+
});
|
|
2066
|
+
result.actions.orphansRemoved++;
|
|
2067
|
+
result.details.orphansRemoved.push(
|
|
2068
|
+
`${strapiEmbed.documentId} (${strapiEmbed.title || "untitled"})`
|
|
2069
|
+
);
|
|
2070
|
+
} catch (error) {
|
|
2071
|
+
result.errors.push(
|
|
2072
|
+
`Failed to remove orphan ${strapiEmbed.documentId}: ${error}`
|
|
2073
|
+
);
|
|
2074
|
+
}
|
|
2075
|
+
} else {
|
|
2076
|
+
result.actions.orphansRemoved++;
|
|
2077
|
+
result.details.orphansRemoved.push(
|
|
2078
|
+
`[DRY RUN] ${strapiEmbed.documentId} (${strapiEmbed.title || "untitled"})`
|
|
2079
|
+
);
|
|
2080
|
+
}
|
|
2081
|
+
}
|
|
2082
|
+
}
|
|
2083
|
+
}
|
|
2084
|
+
result.success = result.errors.length === 0;
|
|
2085
|
+
return result;
|
|
2086
|
+
} catch (error) {
|
|
2087
|
+
result.errors.push(`Sync failed: ${error}`);
|
|
2088
|
+
return result;
|
|
2089
|
+
}
|
|
2090
|
+
},
|
|
2091
|
+
/**
|
|
2092
|
+
* Get sync status - compare Neon and Strapi without making changes
|
|
2093
|
+
*/
|
|
2094
|
+
async getSyncStatus() {
|
|
2095
|
+
if (!pluginManager.isInitialized()) {
|
|
2096
|
+
throw new Error("Plugin manager not initialized");
|
|
2097
|
+
}
|
|
2098
|
+
const neonEmbeddings = await pluginManager.getAllNeonEmbeddings();
|
|
2099
|
+
const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
|
|
2100
|
+
limit: 1e4
|
|
2101
|
+
});
|
|
2102
|
+
const neonBystrapiId = /* @__PURE__ */ new Map();
|
|
2103
|
+
for (const neon of neonEmbeddings) {
|
|
2104
|
+
if (neon.strapiId) {
|
|
2105
|
+
neonBystrapiId.set(neon.strapiId, neon);
|
|
2106
|
+
}
|
|
2107
|
+
}
|
|
2108
|
+
const strapiByDocumentId = /* @__PURE__ */ new Map();
|
|
2109
|
+
for (const s of strapiEmbeddings) {
|
|
2110
|
+
strapiByDocumentId.set(s.documentId, s);
|
|
2111
|
+
}
|
|
2112
|
+
let missingInStrapi = 0;
|
|
2113
|
+
let contentDifferences = 0;
|
|
2114
|
+
for (const neon of neonEmbeddings) {
|
|
2115
|
+
if (!neon.strapiId) continue;
|
|
2116
|
+
const strapiRecord = strapiByDocumentId.get(neon.strapiId);
|
|
2117
|
+
if (!strapiRecord) {
|
|
2118
|
+
missingInStrapi++;
|
|
2119
|
+
} else if (strapiRecord.content !== neon.content) {
|
|
2120
|
+
contentDifferences++;
|
|
2121
|
+
}
|
|
2122
|
+
}
|
|
2123
|
+
let missingInNeon = 0;
|
|
2124
|
+
for (const s of strapiEmbeddings) {
|
|
2125
|
+
if (!neonBystrapiId.has(s.documentId)) {
|
|
2126
|
+
missingInNeon++;
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
2129
|
+
return {
|
|
2130
|
+
neonCount: neonEmbeddings.length,
|
|
2131
|
+
strapiCount: strapiEmbeddings.length,
|
|
2132
|
+
inSync: missingInStrapi === 0 && missingInNeon === 0 && contentDifferences === 0,
|
|
2133
|
+
missingInStrapi,
|
|
2134
|
+
missingInNeon,
|
|
2135
|
+
contentDifferences
|
|
2136
|
+
};
|
|
2137
|
+
}
|
|
2138
|
+
});
|
|
1348
2139
|
const services = {
|
|
1349
|
-
embeddings
|
|
2140
|
+
embeddings,
|
|
2141
|
+
sync
|
|
1350
2142
|
};
|
|
1351
2143
|
const index = {
|
|
1352
2144
|
register,
|
|
@@ -1363,3 +2155,4 @@ const index = {
|
|
|
1363
2155
|
export {
|
|
1364
2156
|
index as default
|
|
1365
2157
|
};
|
|
2158
|
+
//# sourceMappingURL=index.mjs.map
|