strapi-content-embeddings 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,5 @@
1
1
  import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai";
2
2
  import { PGVectorStore } from "@langchain/community/vectorstores/pgvector";
3
- import { Document } from "@langchain/core/documents";
4
3
  import { StringOutputParser } from "@langchain/core/output_parsers";
5
4
  import { ChatPromptTemplate } from "@langchain/core/prompts";
6
5
  import { RunnableSequence, RunnablePassthrough } from "@langchain/core/runnables";
@@ -19,7 +18,10 @@ const config = {
19
18
  default: {
20
19
  openAIApiKey: "",
21
20
  neonConnectionString: "",
22
- embeddingModel: "text-embedding-3-small"
21
+ embeddingModel: "text-embedding-3-small",
22
+ chunkSize: 4e3,
23
+ chunkOverlap: 200,
24
+ autoChunk: false
23
25
  },
24
26
  validator(config2) {
25
27
  if (!config2.openAIApiKey) {
@@ -37,6 +39,11 @@ const config = {
37
39
  `strapi-content-embeddings: Invalid embeddingModel "${config2.embeddingModel}". Valid options: ${Object.keys(EMBEDDING_MODELS).join(", ")}. Defaulting to "text-embedding-3-small".`
38
40
  );
39
41
  }
42
+ if (config2.chunkSize && (config2.chunkSize < 100 || config2.chunkSize > 8e3)) {
43
+ console.warn(
44
+ `strapi-content-embeddings: chunkSize ${config2.chunkSize} is outside recommended range (100-8000). Using default value of 4000.`
45
+ );
46
+ }
40
47
  }
41
48
  };
42
49
  class PluginManager {
@@ -162,39 +169,46 @@ class PluginManager {
162
169
  console.log("Plugin Manager Initialization Complete");
163
170
  }
164
171
  async createEmbedding(docData) {
165
- if (!this.embeddings || !this.vectorStoreConfig) {
172
+ if (!this.embeddings || !this.vectorStoreConfig || !this.pool) {
166
173
  throw new Error("Plugin manager not initialized");
167
174
  }
168
- try {
169
- const embeddingVector = await this.embeddings.embedQuery(docData.content);
170
- const doc = new Document({
171
- pageContent: docData.content,
172
- metadata: {
175
+ const maxRetries = 3;
176
+ const retryDelay = 2e3;
177
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
178
+ try {
179
+ const embeddingVector = await this.embeddings.embedQuery(docData.content);
180
+ const metadata = {
173
181
  id: docData.id,
174
182
  title: docData.title,
175
183
  collectionType: docData.collectionType || "standalone",
176
184
  fieldName: docData.fieldName || "content"
185
+ };
186
+ const vectorString = `[${embeddingVector.join(",")}]`;
187
+ const result = await this.pool.query(
188
+ `INSERT INTO embeddings_documents (content, metadata, embedding)
189
+ VALUES ($1, $2::jsonb, $3::vector)
190
+ RETURNING id`,
191
+ [docData.content, JSON.stringify(metadata), vectorString]
192
+ );
193
+ return {
194
+ embeddingId: result.rows[0]?.id || "",
195
+ embedding: embeddingVector
196
+ };
197
+ } catch (error) {
198
+ const isRateLimit = error.message?.includes("429") || error.message?.includes("rate");
199
+ const isLastAttempt = attempt === maxRetries;
200
+ if (isRateLimit && !isLastAttempt) {
201
+ console.log(`[createEmbedding] Rate limited, waiting ${retryDelay}ms before retry ${attempt + 1}/${maxRetries}...`);
202
+ await new Promise((resolve) => setTimeout(resolve, retryDelay * attempt));
203
+ continue;
177
204
  }
178
- });
179
- await PGVectorStore.fromDocuments(
180
- [doc],
181
- this.embeddings,
182
- this.vectorStoreConfig
183
- );
184
- const result = await this.pool.query(
185
- `SELECT id FROM embeddings_documents
186
- WHERE metadata->>'id' = $1
187
- ORDER BY id DESC LIMIT 1`,
188
- [docData.id]
189
- );
190
- return {
191
- embeddingId: result.rows[0]?.id || "",
192
- embedding: embeddingVector
193
- };
194
- } catch (error) {
195
- console.error(`Failed to create embedding: ${error}`);
196
- throw new Error(`Failed to create embedding: ${error}`);
205
+ console.error(`[createEmbedding] Failed (attempt ${attempt}/${maxRetries}):`, error.message || error);
206
+ if (isLastAttempt) {
207
+ throw new Error(`Failed to create embedding after ${maxRetries} attempts: ${error.message || error}`);
208
+ }
209
+ }
197
210
  }
211
+ throw new Error("Failed to create embedding: unexpected error");
198
212
  }
199
213
  async deleteEmbedding(strapiId) {
200
214
  if (!this.pool) {
@@ -220,8 +234,14 @@ class PluginManager {
220
234
  this.vectorStoreConfig
221
235
  );
222
236
  const resultsWithScores = await vectorStore.similaritySearchWithScore(query, 6);
223
- const SIMILARITY_THRESHOLD = 0.5;
237
+ console.log(`[queryEmbedding] Query: "${query}"`);
238
+ console.log(`[queryEmbedding] Found ${resultsWithScores.length} results:`);
239
+ resultsWithScores.forEach(([doc, score], i) => {
240
+ console.log(` ${i + 1}. Score: ${score.toFixed(4)}, Title: ${doc.metadata?.title || "N/A"}`);
241
+ });
242
+ const SIMILARITY_THRESHOLD = 1;
224
243
  const relevantResults = resultsWithScores.filter(([_, score]) => score < SIMILARITY_THRESHOLD);
244
+ console.log(`[queryEmbedding] ${relevantResults.length} results passed threshold (< ${SIMILARITY_THRESHOLD})`);
225
245
  const topResults = relevantResults.slice(0, 3);
226
246
  const sourceDocuments = topResults.map(([doc]) => doc);
227
247
  const bestMatchForDisplay = topResults.length > 0 ? [topResults[0][0]] : [];
@@ -281,6 +301,56 @@ Context:
281
301
  isInitialized() {
282
302
  return !!(this.embeddings && this.chat && this.pool);
283
303
  }
304
+ /**
305
+ * Get all embeddings from Neon DB
306
+ * Returns the metadata (including Strapi documentId) for each embedding
307
+ */
308
+ async getAllNeonEmbeddings() {
309
+ if (!this.pool) {
310
+ throw new Error("Plugin manager not initialized");
311
+ }
312
+ try {
313
+ const result = await this.pool.query(`
314
+ SELECT
315
+ id,
316
+ content,
317
+ metadata->>'id' as strapi_id,
318
+ metadata->>'title' as title,
319
+ metadata->>'collectionType' as collection_type,
320
+ metadata->>'fieldName' as field_name
321
+ FROM embeddings_documents
322
+ ORDER BY id
323
+ `);
324
+ return result.rows.map((row) => ({
325
+ id: row.id,
326
+ strapiId: row.strapi_id,
327
+ title: row.title || "",
328
+ content: row.content || "",
329
+ collectionType: row.collection_type || "standalone",
330
+ fieldName: row.field_name || "content"
331
+ }));
332
+ } catch (error) {
333
+ console.error(`Failed to get Neon embeddings: ${error}`);
334
+ throw new Error(`Failed to get Neon embeddings: ${error}`);
335
+ }
336
+ }
337
+ /**
338
+ * Delete an embedding from Neon by its Neon UUID (not Strapi ID)
339
+ */
340
+ async deleteNeonEmbeddingById(neonId) {
341
+ if (!this.pool) {
342
+ throw new Error("Plugin manager not initialized");
343
+ }
344
+ try {
345
+ await this.pool.query(
346
+ `DELETE FROM embeddings_documents WHERE id = $1`,
347
+ [neonId]
348
+ );
349
+ } catch (error) {
350
+ console.error(`Failed to delete Neon embedding: ${error}`);
351
+ throw new Error(`Failed to delete Neon embedding: ${error}`);
352
+ }
353
+ }
284
354
  async destroy() {
285
355
  if (this.pool) {
286
356
  await this.pool.end();
@@ -290,6 +360,59 @@ Context:
290
360
  this.chat = null;
291
361
  this.vectorStoreConfig = null;
292
362
  }
363
+ /**
364
+ * Clear all embeddings from Neon DB
365
+ * Returns the number of deleted rows
366
+ */
367
+ async clearAllNeonEmbeddings() {
368
+ if (!this.pool) {
369
+ throw new Error("Plugin manager not initialized");
370
+ }
371
+ try {
372
+ const result = await this.pool.query(`
373
+ DELETE FROM embeddings_documents
374
+ RETURNING id
375
+ `);
376
+ console.log(`[clearAllNeonEmbeddings] Deleted ${result.rowCount} embeddings from Neon`);
377
+ return result.rowCount || 0;
378
+ } catch (error) {
379
+ console.error(`Failed to clear Neon embeddings: ${error}`);
380
+ throw new Error(`Failed to clear Neon embeddings: ${error}`);
381
+ }
382
+ }
383
+ /**
384
+ * Debug method to inspect raw data in Neon DB
385
+ */
386
+ async debugNeonEmbeddings() {
387
+ if (!this.pool) {
388
+ throw new Error("Plugin manager not initialized");
389
+ }
390
+ try {
391
+ const result = await this.pool.query(`
392
+ SELECT
393
+ id,
394
+ content,
395
+ metadata,
396
+ pg_typeof(metadata) as metadata_type,
397
+ embedding IS NOT NULL as has_embedding,
398
+ CASE WHEN embedding IS NOT NULL THEN array_length(embedding::float[], 1) ELSE 0 END as embedding_length
399
+ FROM embeddings_documents
400
+ ORDER BY id
401
+ LIMIT 20
402
+ `);
403
+ return result.rows.map((row) => ({
404
+ id: row.id,
405
+ content: row.content?.substring(0, 200) + (row.content?.length > 200 ? "..." : ""),
406
+ metadata: row.metadata,
407
+ metadataType: row.metadata_type,
408
+ hasEmbedding: row.has_embedding,
409
+ embeddingLength: row.embedding_length || 0
410
+ }));
411
+ } catch (error) {
412
+ console.error(`Failed to debug Neon embeddings: ${error}`);
413
+ throw new Error(`Failed to debug Neon embeddings: ${error}`);
414
+ }
415
+ }
293
416
  }
294
417
  const pluginManager = new PluginManager();
295
418
  const SemanticSearchSchema = z.object({
@@ -580,7 +703,7 @@ async function handleGetEmbedding(strapi, args) {
580
703
  }
581
704
  const createEmbeddingTool = {
582
705
  name: "create_embedding",
583
- description: "Create a new embedding from text content. The content will be vectorized and stored for semantic search.",
706
+ description: "Create a new embedding from text content. The content will be vectorized and stored for semantic search. For large content (over 4000 characters), enable autoChunk to automatically split into multiple embeddings.",
584
707
  inputSchema: {
585
708
  type: "object",
586
709
  properties: {
@@ -595,21 +718,68 @@ const createEmbeddingTool = {
595
718
  metadata: {
596
719
  type: "object",
597
720
  description: "Optional metadata to associate with the embedding (tags, source, etc.)"
721
+ },
722
+ autoChunk: {
723
+ type: "boolean",
724
+ description: "Automatically split large content into chunks (default: false). When enabled, content over 4000 characters will be split into multiple embeddings with overlap for context preservation."
598
725
  }
599
726
  },
600
727
  required: ["title", "content"]
601
728
  }
602
729
  };
603
730
  async function handleCreateEmbedding(strapi, args) {
604
- const { title, content, metadata } = args;
731
+ const { title, content, metadata, autoChunk } = args;
605
732
  try {
606
733
  const embeddingsService = strapi.plugin("strapi-content-embeddings").service("embeddings");
734
+ if (autoChunk) {
735
+ const result = await embeddingsService.createChunkedEmbedding({
736
+ data: {
737
+ title,
738
+ content,
739
+ metadata: metadata || {},
740
+ collectionType: "standalone",
741
+ fieldName: "content"
742
+ }
743
+ });
744
+ return {
745
+ content: [
746
+ {
747
+ type: "text",
748
+ text: JSON.stringify(
749
+ {
750
+ success: true,
751
+ message: result.wasChunked ? `Content chunked into ${result.totalChunks} embeddings` : "Embedding created successfully (no chunking needed)",
752
+ wasChunked: result.wasChunked,
753
+ totalChunks: result.totalChunks,
754
+ primaryEmbedding: {
755
+ id: result.entity.id,
756
+ documentId: result.entity.documentId,
757
+ title: result.entity.title,
758
+ embeddingId: result.entity.embeddingId
759
+ },
760
+ chunks: result.chunks.map((chunk) => ({
761
+ documentId: chunk.documentId,
762
+ title: chunk.title,
763
+ contentLength: chunk.content?.length || 0
764
+ })),
765
+ contentLength: content.length,
766
+ estimatedTokens: Math.ceil(content.length / 4)
767
+ },
768
+ null,
769
+ 2
770
+ )
771
+ }
772
+ ]
773
+ };
774
+ }
607
775
  const embedding2 = await embeddingsService.createEmbedding({
608
- title,
609
- content,
610
- metadata: metadata || {},
611
- collectionType: "standalone",
612
- fieldName: "content"
776
+ data: {
777
+ title,
778
+ content,
779
+ metadata: metadata || {},
780
+ collectionType: "standalone",
781
+ fieldName: "content"
782
+ }
613
783
  });
614
784
  return {
615
785
  content: [
@@ -627,7 +797,8 @@ async function handleCreateEmbedding(strapi, args) {
627
797
  contentLength: content.length,
628
798
  metadata: embedding2.metadata,
629
799
  createdAt: embedding2.createdAt
630
- }
800
+ },
801
+ hint: content.length > 4e3 ? "Content is large. Consider using autoChunk: true for better search results." : void 0
631
802
  },
632
803
  null,
633
804
  2
@@ -713,10 +884,10 @@ function createMcpServer(strapi) {
713
884
  });
714
885
  return server;
715
886
  }
716
- const PLUGIN_ID$4 = "strapi-content-embeddings";
887
+ const PLUGIN_ID$5 = "strapi-content-embeddings";
717
888
  const OAUTH_PLUGIN_ID = "strapi-oauth-mcp-manager";
718
889
  function createFallbackAuthMiddleware(strapi) {
719
- const mcpPath = `/api/${PLUGIN_ID$4}/mcp`;
890
+ const mcpPath = `/api/${PLUGIN_ID$5}/mcp`;
720
891
  return async (ctx, next) => {
721
892
  if (!ctx.path.startsWith(mcpPath)) {
722
893
  return next();
@@ -742,35 +913,35 @@ const bootstrap = async ({ strapi }) => {
742
913
  section: "plugins",
743
914
  displayName: "Read",
744
915
  uid: "read",
745
- pluginName: PLUGIN_ID$4
916
+ pluginName: PLUGIN_ID$5
746
917
  },
747
918
  {
748
919
  section: "plugins",
749
920
  displayName: "Update",
750
921
  uid: "update",
751
- pluginName: PLUGIN_ID$4
922
+ pluginName: PLUGIN_ID$5
752
923
  },
753
924
  {
754
925
  section: "plugins",
755
926
  displayName: "Create",
756
927
  uid: "create",
757
- pluginName: PLUGIN_ID$4
928
+ pluginName: PLUGIN_ID$5
758
929
  },
759
930
  {
760
931
  section: "plugins",
761
932
  displayName: "Delete",
762
933
  uid: "delete",
763
- pluginName: PLUGIN_ID$4
934
+ pluginName: PLUGIN_ID$5
764
935
  },
765
936
  {
766
937
  section: "plugins",
767
938
  displayName: "Chat",
768
939
  uid: "chat",
769
- pluginName: PLUGIN_ID$4
940
+ pluginName: PLUGIN_ID$5
770
941
  }
771
942
  ];
772
943
  await strapi.admin.services.permission.actionProvider.registerMany(actions);
773
- const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$4}`);
944
+ const pluginConfig = strapi.config.get(`plugin::${PLUGIN_ID$5}`);
774
945
  if (pluginConfig?.openAIApiKey && pluginConfig?.neonConnectionString) {
775
946
  try {
776
947
  await pluginManager.initialize({
@@ -779,42 +950,42 @@ const bootstrap = async ({ strapi }) => {
779
950
  embeddingModel: pluginConfig.embeddingModel
780
951
  });
781
952
  strapi.contentEmbeddingsManager = pluginManager;
782
- strapi.log.info(`[${PLUGIN_ID$4}] Plugin initialized successfully`);
953
+ strapi.log.info(`[${PLUGIN_ID$5}] Plugin initialized successfully`);
783
954
  } catch (error) {
784
- strapi.log.error(`[${PLUGIN_ID$4}] Failed to initialize:`, error);
955
+ strapi.log.error(`[${PLUGIN_ID$5}] Failed to initialize:`, error);
785
956
  }
786
957
  } else {
787
958
  strapi.log.warn(
788
- `[${PLUGIN_ID$4}] Missing configuration. Set openAIApiKey and neonConnectionString in plugin config.`
959
+ `[${PLUGIN_ID$5}] Missing configuration. Set openAIApiKey and neonConnectionString in plugin config.`
789
960
  );
790
961
  }
791
- const plugin = strapi.plugin(PLUGIN_ID$4);
962
+ const plugin = strapi.plugin(PLUGIN_ID$5);
792
963
  plugin.createMcpServer = () => createMcpServer(strapi);
793
964
  plugin.sessions = /* @__PURE__ */ new Map();
794
965
  const oauthPlugin = strapi.plugin(OAUTH_PLUGIN_ID);
795
966
  if (oauthPlugin) {
796
- strapi.log.info(`[${PLUGIN_ID$4}] OAuth manager detected - OAuth + API token auth enabled`);
967
+ strapi.log.info(`[${PLUGIN_ID$5}] OAuth manager detected - OAuth + API token auth enabled`);
797
968
  } else {
798
969
  const fallbackMiddleware = createFallbackAuthMiddleware();
799
970
  strapi.server.use(fallbackMiddleware);
800
- strapi.log.info(`[${PLUGIN_ID$4}] Using API token authentication (OAuth manager not installed)`);
971
+ strapi.log.info(`[${PLUGIN_ID$5}] Using API token authentication (OAuth manager not installed)`);
801
972
  }
802
- strapi.log.info(`[${PLUGIN_ID$4}] MCP endpoint available at: /api/${PLUGIN_ID$4}/mcp`);
973
+ strapi.log.info(`[${PLUGIN_ID$5}] MCP endpoint available at: /api/${PLUGIN_ID$5}/mcp`);
803
974
  };
804
975
  const destroy = async ({ strapi }) => {
805
976
  await pluginManager.destroy();
806
977
  console.log("Content Embeddings plugin destroyed");
807
978
  };
808
- const PLUGIN_ID$3 = "strapi-content-embeddings";
979
+ const PLUGIN_ID$4 = "strapi-content-embeddings";
809
980
  const register = ({ strapi }) => {
810
981
  Object.values(strapi.contentTypes).forEach((contentType) => {
811
- if (contentType.uid.startsWith("admin::") || contentType.uid.startsWith("strapi::") || contentType.uid === `plugin::${PLUGIN_ID$3}.embedding`) {
982
+ if (contentType.uid.startsWith("admin::") || contentType.uid.startsWith("strapi::") || contentType.uid === `plugin::${PLUGIN_ID$4}.embedding`) {
812
983
  return;
813
984
  }
814
985
  contentType.attributes.embedding = {
815
986
  type: "relation",
816
987
  relation: "morphOne",
817
- target: `plugin::${PLUGIN_ID$3}.embedding`,
988
+ target: `plugin::${PLUGIN_ID$4}.embedding`,
818
989
  morphBy: "related",
819
990
  private: false,
820
991
  configurable: false
@@ -884,20 +1055,23 @@ const embedding = {
884
1055
  const contentTypes = {
885
1056
  embedding
886
1057
  };
887
- const PLUGIN_ID$2 = "strapi-content-embeddings";
1058
+ const PLUGIN_ID$3 = "strapi-content-embeddings";
888
1059
  const controller = ({ strapi }) => ({
889
1060
  async createEmbedding(ctx) {
890
1061
  try {
891
- const result = await strapi.plugin(PLUGIN_ID$2).service("embeddings").createEmbedding(ctx.request.body);
1062
+ console.log("[createEmbedding] Starting, autoChunk:", ctx.request.body?.data?.autoChunk);
1063
+ const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").createEmbedding(ctx.request.body);
1064
+ console.log("[createEmbedding] Completed, documentId:", result?.documentId);
892
1065
  ctx.body = result;
893
1066
  } catch (error) {
1067
+ console.error("[createEmbedding] Error:", error.message);
894
1068
  ctx.throw(500, error.message || "Failed to create embedding");
895
1069
  }
896
1070
  },
897
1071
  async deleteEmbedding(ctx) {
898
1072
  try {
899
1073
  const { id } = ctx.params;
900
- const result = await strapi.plugin(PLUGIN_ID$2).service("embeddings").deleteEmbedding(id);
1074
+ const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").deleteEmbedding(id);
901
1075
  ctx.body = result;
902
1076
  } catch (error) {
903
1077
  ctx.throw(500, error.message || "Failed to delete embedding");
@@ -906,7 +1080,7 @@ const controller = ({ strapi }) => ({
906
1080
  async updateEmbedding(ctx) {
907
1081
  try {
908
1082
  const { id } = ctx.params;
909
- const result = await strapi.plugin(PLUGIN_ID$2).service("embeddings").updateEmbedding(id, ctx.request.body);
1083
+ const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").updateEmbedding(id, ctx.request.body);
910
1084
  ctx.body = result;
911
1085
  } catch (error) {
912
1086
  ctx.throw(500, error.message || "Failed to update embedding");
@@ -915,7 +1089,7 @@ const controller = ({ strapi }) => ({
915
1089
  async getEmbeddings(ctx) {
916
1090
  try {
917
1091
  const { page, pageSize, filters } = ctx.query;
918
- const result = await strapi.plugin(PLUGIN_ID$2).service("embeddings").getEmbeddings({
1092
+ const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").getEmbeddings({
919
1093
  page: page ? parseInt(page, 10) : 1,
920
1094
  pageSize: pageSize ? parseInt(pageSize, 10) : 10,
921
1095
  filters
@@ -928,7 +1102,7 @@ const controller = ({ strapi }) => ({
928
1102
  async getEmbedding(ctx) {
929
1103
  try {
930
1104
  const { id } = ctx.params;
931
- const result = await strapi.plugin(PLUGIN_ID$2).service("embeddings").getEmbedding(id);
1105
+ const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").getEmbedding(id);
932
1106
  if (!result) {
933
1107
  ctx.throw(404, "Embedding not found");
934
1108
  }
@@ -943,14 +1117,96 @@ const controller = ({ strapi }) => ({
943
1117
  async queryEmbeddings(ctx) {
944
1118
  try {
945
1119
  const { query } = ctx.query;
946
- const result = await strapi.plugin(PLUGIN_ID$2).service("embeddings").queryEmbeddings(query);
1120
+ const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").queryEmbeddings(query);
947
1121
  ctx.body = result;
948
1122
  } catch (error) {
949
1123
  ctx.throw(500, error.message || "Failed to query embeddings");
950
1124
  }
1125
+ },
1126
+ /**
1127
+ * Get all chunks related to a document
1128
+ * GET /api/strapi-content-embeddings/embeddings/related-chunks/:id
1129
+ */
1130
+ async getRelatedChunks(ctx) {
1131
+ try {
1132
+ const { id } = ctx.params;
1133
+ const result = await strapi.plugin(PLUGIN_ID$3).service("embeddings").findRelatedChunks(id);
1134
+ console.log(`[getRelatedChunks] Found ${result.length} chunks for document ${id}`);
1135
+ ctx.body = {
1136
+ data: result,
1137
+ count: result.length
1138
+ };
1139
+ } catch (error) {
1140
+ ctx.throw(500, error.message || "Failed to get related chunks");
1141
+ }
1142
+ },
1143
+ /**
1144
+ * Sync embeddings from Neon DB to Strapi DB
1145
+ * GET /api/strapi-content-embeddings/sync
1146
+ *
1147
+ * Query params:
1148
+ * - removeOrphans: boolean (default: false) - Remove Strapi entries that don't exist in Neon
1149
+ * - dryRun: boolean (default: false) - Preview changes without applying them
1150
+ */
1151
+ async syncFromNeon(ctx) {
1152
+ try {
1153
+ const { removeOrphans, dryRun } = ctx.query;
1154
+ const result = await strapi.plugin(PLUGIN_ID$3).service("sync").syncFromNeon({
1155
+ removeOrphans: removeOrphans === "true",
1156
+ dryRun: dryRun === "true"
1157
+ });
1158
+ ctx.body = result;
1159
+ } catch (error) {
1160
+ ctx.throw(500, error.message || "Failed to sync embeddings");
1161
+ }
1162
+ },
1163
+ /**
1164
+ * Get sync status - compare Neon and Strapi without making changes
1165
+ * GET /api/strapi-content-embeddings/sync/status
1166
+ */
1167
+ async getSyncStatus(ctx) {
1168
+ try {
1169
+ const result = await strapi.plugin(PLUGIN_ID$3).service("sync").getSyncStatus();
1170
+ ctx.body = result;
1171
+ } catch (error) {
1172
+ ctx.throw(500, error.message || "Failed to get sync status");
1173
+ }
1174
+ },
1175
+ /**
1176
+ * Debug endpoint to inspect Neon DB contents
1177
+ * GET /api/strapi-content-embeddings/debug/neon
1178
+ */
1179
+ async debugNeon(ctx) {
1180
+ try {
1181
+ const { pluginManager: pluginManager2 } = require("../plugin-manager");
1182
+ const result = await pluginManager2.debugNeonEmbeddings();
1183
+ ctx.body = {
1184
+ count: result.length,
1185
+ embeddings: result
1186
+ };
1187
+ } catch (error) {
1188
+ ctx.throw(500, error.message || "Failed to debug Neon");
1189
+ }
1190
+ },
1191
+ /**
1192
+ * Recreate all embeddings in Neon from Strapi data
1193
+ * POST /api/strapi-content-embeddings/recreate
1194
+ *
1195
+ * Use this when embeddings were created with incorrect metadata format
1196
+ * WARNING: This will delete ALL existing Neon embeddings and recreate them
1197
+ */
1198
+ async recreateEmbeddings(ctx) {
1199
+ try {
1200
+ console.log("[recreateEmbeddings] Starting recreation of all embeddings...");
1201
+ const result = await strapi.plugin(PLUGIN_ID$3).service("sync").recreateAllEmbeddings();
1202
+ ctx.body = result;
1203
+ } catch (error) {
1204
+ console.error("[recreateEmbeddings] Error:", error.message);
1205
+ ctx.throw(500, error.message || "Failed to recreate embeddings");
1206
+ }
951
1207
  }
952
1208
  });
953
- const PLUGIN_ID$1 = "strapi-content-embeddings";
1209
+ const PLUGIN_ID$2 = "strapi-content-embeddings";
954
1210
  const SESSION_TIMEOUT_MS = 4 * 60 * 60 * 1e3;
955
1211
  function isSessionExpired(session) {
956
1212
  return Date.now() - session.createdAt > SESSION_TIMEOUT_MS;
@@ -968,7 +1224,7 @@ function cleanupExpiredSessions(plugin, strapi) {
968
1224
  }
969
1225
  }
970
1226
  if (cleaned > 0) {
971
- strapi.log.debug(`[${PLUGIN_ID$1}] Cleaned up ${cleaned} expired MCP sessions`);
1227
+ strapi.log.debug(`[${PLUGIN_ID$2}] Cleaned up ${cleaned} expired MCP sessions`);
972
1228
  }
973
1229
  }
974
1230
  const mcpController = ({ strapi }) => ({
@@ -976,7 +1232,7 @@ const mcpController = ({ strapi }) => ({
976
1232
  * Handle MCP requests (POST, GET, DELETE)
977
1233
  */
978
1234
  async handle(ctx) {
979
- const plugin = strapi.plugin(PLUGIN_ID$1);
1235
+ const plugin = strapi.plugin(PLUGIN_ID$2);
980
1236
  if (!plugin.createMcpServer) {
981
1237
  ctx.status = 503;
982
1238
  ctx.body = {
@@ -992,7 +1248,7 @@ const mcpController = ({ strapi }) => ({
992
1248
  const requestedSessionId = ctx.request.headers["mcp-session-id"];
993
1249
  let session = requestedSessionId ? plugin.sessions.get(requestedSessionId) : null;
994
1250
  if (session && isSessionExpired(session)) {
995
- strapi.log.debug(`[${PLUGIN_ID$1}] Session expired, removing: ${requestedSessionId}`);
1251
+ strapi.log.debug(`[${PLUGIN_ID$2}] Session expired, removing: ${requestedSessionId}`);
996
1252
  try {
997
1253
  session.server.close();
998
1254
  } catch {
@@ -1027,13 +1283,13 @@ const mcpController = ({ strapi }) => ({
1027
1283
  };
1028
1284
  plugin.sessions.set(sessionId, session);
1029
1285
  strapi.log.debug(
1030
- `[${PLUGIN_ID$1}] New MCP session created: ${sessionId} (auth: ${ctx.state.authMethod || "unknown"})`
1286
+ `[${PLUGIN_ID$2}] New MCP session created: ${sessionId} (auth: ${ctx.state.authMethod || "unknown"})`
1031
1287
  );
1032
1288
  }
1033
1289
  try {
1034
1290
  await session.transport.handleRequest(ctx.req, ctx.res, ctx.request.body);
1035
1291
  } catch (transportError) {
1036
- strapi.log.warn(`[${PLUGIN_ID$1}] Transport error, cleaning up session: ${requestedSessionId}`, {
1292
+ strapi.log.warn(`[${PLUGIN_ID$2}] Transport error, cleaning up session: ${requestedSessionId}`, {
1037
1293
  error: transportError instanceof Error ? transportError.message : String(transportError)
1038
1294
  });
1039
1295
  try {
@@ -1056,7 +1312,7 @@ const mcpController = ({ strapi }) => ({
1056
1312
  }
1057
1313
  ctx.respond = false;
1058
1314
  } catch (error) {
1059
- strapi.log.error(`[${PLUGIN_ID$1}] Error handling MCP request`, {
1315
+ strapi.log.error(`[${PLUGIN_ID$2}] Error handling MCP request`, {
1060
1316
  error: error instanceof Error ? error.message : String(error),
1061
1317
  method: ctx.method,
1062
1318
  path: ctx.path
@@ -1083,6 +1339,32 @@ const contentApi = [
1083
1339
  path: "/embeddings-query",
1084
1340
  handler: "controller.queryEmbeddings"
1085
1341
  },
1342
+ // Sync routes - for cron jobs or manual triggering
1343
+ // Use API token for authentication
1344
+ {
1345
+ method: "GET",
1346
+ path: "/sync",
1347
+ handler: "controller.syncFromNeon",
1348
+ config: {
1349
+ description: "Sync embeddings from Neon DB to Strapi. Query params: removeOrphans=true, dryRun=true"
1350
+ }
1351
+ },
1352
+ {
1353
+ method: "POST",
1354
+ path: "/sync",
1355
+ handler: "controller.syncFromNeon",
1356
+ config: {
1357
+ description: "Sync embeddings from Neon DB to Strapi. Query params: removeOrphans=true, dryRun=true"
1358
+ }
1359
+ },
1360
+ {
1361
+ method: "GET",
1362
+ path: "/sync/status",
1363
+ handler: "controller.getSyncStatus",
1364
+ config: {
1365
+ description: "Get sync status between Neon and Strapi without making changes"
1366
+ }
1367
+ },
1086
1368
  // MCP routes - auth handled by middleware
1087
1369
  {
1088
1370
  method: "POST",
@@ -1190,6 +1472,45 @@ const admin = [
1190
1472
  }
1191
1473
  ]
1192
1474
  }
1475
+ },
1476
+ {
1477
+ method: "GET",
1478
+ path: "/embeddings/related-chunks/:id",
1479
+ handler: "controller.getRelatedChunks",
1480
+ config: {
1481
+ policies: [
1482
+ {
1483
+ name: "admin::hasPermissions",
1484
+ config: { actions: ["plugin::strapi-content-embeddings.read"] }
1485
+ }
1486
+ ]
1487
+ }
1488
+ },
1489
+ {
1490
+ method: "GET",
1491
+ path: "/debug/neon",
1492
+ handler: "controller.debugNeon",
1493
+ config: {
1494
+ policies: [
1495
+ {
1496
+ name: "admin::hasPermissions",
1497
+ config: { actions: ["plugin::strapi-content-embeddings.read"] }
1498
+ }
1499
+ ]
1500
+ }
1501
+ },
1502
+ {
1503
+ method: "POST",
1504
+ path: "/recreate",
1505
+ handler: "controller.recreateEmbeddings",
1506
+ config: {
1507
+ policies: [
1508
+ {
1509
+ name: "admin::hasPermissions",
1510
+ config: { actions: ["plugin::strapi-content-embeddings.update"] }
1511
+ }
1512
+ ]
1513
+ }
1193
1514
  }
1194
1515
  ];
1195
1516
  const routes = {
@@ -1202,11 +1523,180 @@ const routes = {
1202
1523
  routes: [...admin]
1203
1524
  }
1204
1525
  };
1205
- const PLUGIN_ID = "strapi-content-embeddings";
1206
- const CONTENT_TYPE_UID = `plugin::${PLUGIN_ID}.embedding`;
1526
+ const DEFAULT_SEPARATORS = [
1527
+ "\n\n",
1528
+ // Paragraphs
1529
+ "\n",
1530
+ // Lines
1531
+ ". ",
1532
+ // Sentences
1533
+ "! ",
1534
+ // Exclamations
1535
+ "? ",
1536
+ // Questions
1537
+ "; ",
1538
+ // Semicolons
1539
+ ", ",
1540
+ // Commas
1541
+ " ",
1542
+ // Words
1543
+ ""
1544
+ // Characters (last resort)
1545
+ ];
1546
+ function estimateTokens(text) {
1547
+ return Math.ceil(text.length / 4);
1548
+ }
1549
+ function needsChunking(content, maxChars = 4e3) {
1550
+ return content.length > maxChars;
1551
+ }
1552
+ function splitWithSeparator(text, separator) {
1553
+ if (separator === "") {
1554
+ return text.split("");
1555
+ }
1556
+ const parts = text.split(separator);
1557
+ const result = [];
1558
+ for (let i = 0; i < parts.length; i++) {
1559
+ if (i < parts.length - 1) {
1560
+ result.push(parts[i] + separator);
1561
+ } else if (parts[i]) {
1562
+ result.push(parts[i]);
1563
+ }
1564
+ }
1565
+ return result;
1566
+ }
1567
+ function splitText(text, chunkSize, separators) {
1568
+ if (text.length <= chunkSize) {
1569
+ return [text];
1570
+ }
1571
+ let bestSeparator = separators[separators.length - 1];
1572
+ for (const sep of separators) {
1573
+ if (text.includes(sep)) {
1574
+ bestSeparator = sep;
1575
+ break;
1576
+ }
1577
+ }
1578
+ const splits = splitWithSeparator(text, bestSeparator);
1579
+ const chunks = [];
1580
+ let currentChunk = "";
1581
+ for (const split of splits) {
1582
+ if ((currentChunk + split).length <= chunkSize) {
1583
+ currentChunk += split;
1584
+ } else {
1585
+ if (currentChunk) {
1586
+ chunks.push(currentChunk);
1587
+ }
1588
+ if (split.length > chunkSize) {
1589
+ const remainingSeparators = separators.slice(separators.indexOf(bestSeparator) + 1);
1590
+ if (remainingSeparators.length > 0) {
1591
+ chunks.push(...splitText(split, chunkSize, remainingSeparators));
1592
+ } else {
1593
+ for (let i = 0; i < split.length; i += chunkSize) {
1594
+ chunks.push(split.slice(i, i + chunkSize));
1595
+ }
1596
+ }
1597
+ currentChunk = "";
1598
+ } else {
1599
+ currentChunk = split;
1600
+ }
1601
+ }
1602
+ }
1603
+ if (currentChunk) {
1604
+ chunks.push(currentChunk);
1605
+ }
1606
+ return chunks;
1607
+ }
1608
+ function addOverlap(chunks, overlap) {
1609
+ if (overlap <= 0 || chunks.length <= 1) {
1610
+ return chunks;
1611
+ }
1612
+ const result = [];
1613
+ for (let i = 0; i < chunks.length; i++) {
1614
+ let chunk = chunks[i];
1615
+ if (i > 0) {
1616
+ const prevChunk = chunks[i - 1];
1617
+ const overlapText = prevChunk.slice(-overlap);
1618
+ chunk = overlapText + chunk;
1619
+ }
1620
+ result.push(chunk);
1621
+ }
1622
+ return result;
1623
+ }
1624
+ function chunkContent(content, options2 = {}) {
1625
+ const {
1626
+ chunkSize = 4e3,
1627
+ chunkOverlap = 200,
1628
+ separators = DEFAULT_SEPARATORS
1629
+ } = options2;
1630
+ const cleanContent = content.trim();
1631
+ if (!cleanContent) {
1632
+ return [];
1633
+ }
1634
+ if (cleanContent.length <= chunkSize) {
1635
+ return [{
1636
+ text: cleanContent,
1637
+ chunkIndex: 0,
1638
+ totalChunks: 1,
1639
+ startOffset: 0,
1640
+ endOffset: cleanContent.length
1641
+ }];
1642
+ }
1643
+ const rawChunks = splitText(cleanContent, chunkSize - chunkOverlap, separators);
1644
+ const chunksWithOverlap = addOverlap(rawChunks, chunkOverlap);
1645
+ const result = [];
1646
+ let currentOffset = 0;
1647
+ for (let i = 0; i < chunksWithOverlap.length; i++) {
1648
+ const text = chunksWithOverlap[i].trim();
1649
+ if (text) {
1650
+ result.push({
1651
+ text,
1652
+ chunkIndex: i,
1653
+ totalChunks: chunksWithOverlap.length,
1654
+ startOffset: currentOffset,
1655
+ endOffset: currentOffset + rawChunks[i].length
1656
+ });
1657
+ }
1658
+ currentOffset += rawChunks[i].length;
1659
+ }
1660
+ const totalChunks = result.length;
1661
+ result.forEach((chunk, idx) => {
1662
+ chunk.chunkIndex = idx;
1663
+ chunk.totalChunks = totalChunks;
1664
+ });
1665
+ return result;
1666
+ }
1667
+ function formatChunkTitle(baseTitle, chunkIndex, totalChunks) {
1668
+ if (totalChunks === 1) {
1669
+ return baseTitle;
1670
+ }
1671
+ return `${baseTitle} [Part ${chunkIndex + 1}/${totalChunks}]`;
1672
+ }
1673
+ const PLUGIN_ID$1 = "strapi-content-embeddings";
1674
+ const CONTENT_TYPE_UID$1 = `plugin::${PLUGIN_ID$1}.embedding`;
1207
1675
  const embeddings = ({ strapi }) => ({
1676
+ /**
1677
+ * Get plugin config with defaults
1678
+ */
1679
+ getConfig() {
1680
+ const config2 = strapi.config.get("plugin::strapi-content-embeddings") || {};
1681
+ return {
1682
+ chunkSize: config2.chunkSize || 4e3,
1683
+ chunkOverlap: config2.chunkOverlap || 200,
1684
+ autoChunk: config2.autoChunk || false,
1685
+ ...config2
1686
+ };
1687
+ },
1688
+ /**
1689
+ * Create a single embedding (no chunking)
1690
+ */
1208
1691
  async createEmbedding(data) {
1209
- const { title, content, collectionType, fieldName, metadata, related } = data.data;
1692
+ const { title, content, collectionType, fieldName, metadata, related, autoChunk } = data.data;
1693
+ const config2 = this.getConfig();
1694
+ const shouldChunk = autoChunk ?? config2.autoChunk;
1695
+ const chunkSize = config2.chunkSize || 4e3;
1696
+ if (shouldChunk && needsChunking(content, chunkSize)) {
1697
+ const result = await this.createChunkedEmbedding(data);
1698
+ return result.entity;
1699
+ }
1210
1700
  const entityData = {
1211
1701
  title,
1212
1702
  content,
@@ -1217,7 +1707,7 @@ const embeddings = ({ strapi }) => ({
1217
1707
  if (related && related.__type && related.id) {
1218
1708
  entityData.related = related;
1219
1709
  }
1220
- const entity = await strapi.documents(CONTENT_TYPE_UID).create({
1710
+ const entity = await strapi.documents(CONTENT_TYPE_UID$1).create({
1221
1711
  data: entityData
1222
1712
  });
1223
1713
  if (!pluginManager.isInitialized()) {
@@ -1232,7 +1722,7 @@ const embeddings = ({ strapi }) => ({
1232
1722
  collectionType: collectionType || "standalone",
1233
1723
  fieldName: fieldName || "content"
1234
1724
  });
1235
- const updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
1725
+ const updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
1236
1726
  documentId: entity.documentId,
1237
1727
  data: {
1238
1728
  embeddingId: result.embeddingId,
@@ -1245,8 +1735,120 @@ const embeddings = ({ strapi }) => ({
1245
1735
  return entity;
1246
1736
  }
1247
1737
  },
1738
+ /**
1739
+ * Create embeddings with automatic chunking for large content
1740
+ * Creates multiple embedding entities, one per chunk
1741
+ */
1742
+ async createChunkedEmbedding(data) {
1743
+ const { title, content, collectionType, fieldName, metadata, related } = data.data;
1744
+ const config2 = this.getConfig();
1745
+ const chunkSize = config2.chunkSize || 4e3;
1746
+ const chunkOverlap = config2.chunkOverlap || 200;
1747
+ const chunks = chunkContent(content, { chunkSize, chunkOverlap });
1748
+ if (chunks.length === 0) {
1749
+ throw new Error("Content is empty or could not be chunked");
1750
+ }
1751
+ if (chunks.length === 1) {
1752
+ const entity = await this.createEmbedding({
1753
+ data: {
1754
+ ...data.data,
1755
+ autoChunk: false
1756
+ // Prevent recursive chunking
1757
+ }
1758
+ });
1759
+ return {
1760
+ entity,
1761
+ chunks: [entity],
1762
+ totalChunks: 1,
1763
+ wasChunked: false
1764
+ };
1765
+ }
1766
+ console.log(`[createChunkedEmbedding] Chunking content into ${chunks.length} parts (chunkSize: ${chunkSize}, overlap: ${chunkOverlap})`);
1767
+ const createdChunks = [];
1768
+ let parentDocumentId = null;
1769
+ for (const chunk of chunks) {
1770
+ console.log(`[createChunkedEmbedding] Processing chunk ${chunk.chunkIndex + 1}/${chunks.length}`);
1771
+ const chunkTitle = formatChunkTitle(title, chunk.chunkIndex, chunk.totalChunks);
1772
+ const chunkMetadata = {
1773
+ ...metadata,
1774
+ isChunk: true,
1775
+ chunkIndex: chunk.chunkIndex,
1776
+ totalChunks: chunk.totalChunks,
1777
+ startOffset: chunk.startOffset,
1778
+ endOffset: chunk.endOffset,
1779
+ originalTitle: title,
1780
+ parentDocumentId,
1781
+ estimatedTokens: estimateTokens(chunk.text)
1782
+ };
1783
+ const entityData = {
1784
+ title: chunkTitle,
1785
+ content: chunk.text,
1786
+ collectionType: collectionType || "standalone",
1787
+ fieldName: fieldName || "content",
1788
+ metadata: chunkMetadata
1789
+ };
1790
+ if (chunk.chunkIndex === 0 && related && related.__type && related.id) {
1791
+ entityData.related = related;
1792
+ }
1793
+ console.log(`[chunk ${chunk.chunkIndex + 1}] Creating entity in DB...`);
1794
+ const entity = await strapi.documents(CONTENT_TYPE_UID$1).create({
1795
+ data: entityData
1796
+ });
1797
+ console.log(`[chunk ${chunk.chunkIndex + 1}] Entity created: ${entity.documentId}`);
1798
+ if (chunk.chunkIndex === 0) {
1799
+ parentDocumentId = entity.documentId;
1800
+ } else {
1801
+ console.log(`[chunk ${chunk.chunkIndex + 1}] Updating metadata with parent ref...`);
1802
+ await strapi.documents(CONTENT_TYPE_UID$1).update({
1803
+ documentId: entity.documentId,
1804
+ data: {
1805
+ metadata: {
1806
+ ...chunkMetadata,
1807
+ parentDocumentId
1808
+ }
1809
+ }
1810
+ });
1811
+ console.log(`[chunk ${chunk.chunkIndex + 1}] Metadata updated`);
1812
+ }
1813
+ if (pluginManager.isInitialized()) {
1814
+ try {
1815
+ console.log(`[chunk ${chunk.chunkIndex + 1}] Creating OpenAI embedding...`);
1816
+ const result = await pluginManager.createEmbedding({
1817
+ id: entity.documentId,
1818
+ title: chunkTitle,
1819
+ content: chunk.text,
1820
+ collectionType: collectionType || "standalone",
1821
+ fieldName: fieldName || "content"
1822
+ });
1823
+ console.log(`[chunk ${chunk.chunkIndex + 1}] OpenAI embedding created`);
1824
+ console.log(`[chunk ${chunk.chunkIndex + 1}] Saving embedding to DB...`);
1825
+ const updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
1826
+ documentId: entity.documentId,
1827
+ data: {
1828
+ embeddingId: result.embeddingId,
1829
+ embedding: result.embedding
1830
+ }
1831
+ });
1832
+ console.log(`[chunk ${chunk.chunkIndex + 1}] Chunk complete`);
1833
+ createdChunks.push(updatedEntity);
1834
+ } catch (error) {
1835
+ console.error(`[chunk ${chunk.chunkIndex + 1}] FAILED:`, error.message || error);
1836
+ createdChunks.push(entity);
1837
+ }
1838
+ } else {
1839
+ createdChunks.push(entity);
1840
+ }
1841
+ }
1842
+ console.log(`[createChunkedEmbedding] Completed, created ${createdChunks.length} chunks, first documentId: ${createdChunks[0]?.documentId}`);
1843
+ return {
1844
+ entity: createdChunks[0],
1845
+ chunks: createdChunks,
1846
+ totalChunks: createdChunks.length,
1847
+ wasChunked: true
1848
+ };
1849
+ },
1248
1850
  async deleteEmbedding(id) {
1249
- const currentEntry = await strapi.documents(CONTENT_TYPE_UID).findOne({
1851
+ const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
1250
1852
  documentId: String(id)
1251
1853
  });
1252
1854
  if (!currentEntry) {
@@ -1259,25 +1861,180 @@ const embeddings = ({ strapi }) => ({
1259
1861
  console.error("Failed to delete from vector store:", error);
1260
1862
  }
1261
1863
  }
1262
- const deletedEntry = await strapi.documents(CONTENT_TYPE_UID).delete({
1864
+ const deletedEntry = await strapi.documents(CONTENT_TYPE_UID$1).delete({
1263
1865
  documentId: String(id)
1264
1866
  });
1265
1867
  return deletedEntry;
1266
1868
  },
1869
+ /**
1870
+ * Find all chunks related to a parent document
1871
+ * Returns chunks including the parent itself
1872
+ */
1873
+ async findRelatedChunks(documentId) {
1874
+ const entry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
1875
+ documentId
1876
+ });
1877
+ if (!entry) {
1878
+ return [];
1879
+ }
1880
+ const metadata = entry.metadata;
1881
+ const parentId = metadata?.parentDocumentId || documentId;
1882
+ const isChunked = metadata?.isChunk === true;
1883
+ if (!isChunked && !metadata?.parentDocumentId) {
1884
+ const children = await strapi.documents(CONTENT_TYPE_UID$1).findMany({
1885
+ filters: {
1886
+ metadata: {
1887
+ $containsi: `"parentDocumentId":"${documentId}"`
1888
+ }
1889
+ },
1890
+ limit: -1
1891
+ // No limit - get all
1892
+ });
1893
+ console.log(`[findRelatedChunks] Found ${children.length} children for parent ${documentId}`);
1894
+ if (children.length === 0) {
1895
+ return [entry];
1896
+ }
1897
+ return [entry, ...children];
1898
+ }
1899
+ const allChunks = await strapi.documents(CONTENT_TYPE_UID$1).findMany({
1900
+ filters: {
1901
+ $or: [
1902
+ { documentId: parentId },
1903
+ {
1904
+ metadata: {
1905
+ $containsi: `"parentDocumentId":"${parentId}"`
1906
+ }
1907
+ }
1908
+ ]
1909
+ },
1910
+ limit: -1
1911
+ // No limit - get all
1912
+ });
1913
+ console.log(`[findRelatedChunks] Found ${allChunks.length} total chunks for parent ${parentId}`);
1914
+ return allChunks.sort((a, b) => {
1915
+ const aIndex = a.metadata?.chunkIndex ?? 0;
1916
+ const bIndex = b.metadata?.chunkIndex ?? 0;
1917
+ return aIndex - bIndex;
1918
+ });
1919
+ },
1920
+ /**
1921
+ * Delete all chunks related to a parent document
1922
+ */
1923
+ async deleteRelatedChunks(documentId) {
1924
+ const chunks = await this.findRelatedChunks(documentId);
1925
+ for (const chunk of chunks) {
1926
+ if (pluginManager.isInitialized()) {
1927
+ try {
1928
+ await pluginManager.deleteEmbedding(chunk.documentId);
1929
+ } catch (error) {
1930
+ console.error(`Failed to delete chunk ${chunk.documentId} from vector store:`, error);
1931
+ }
1932
+ }
1933
+ await strapi.documents(CONTENT_TYPE_UID$1).delete({
1934
+ documentId: chunk.documentId
1935
+ });
1936
+ }
1937
+ return chunks.length;
1938
+ },
1939
+ /**
1940
+ * Update embeddings with automatic chunking support
1941
+ * Handles re-chunking when content changes and exceeds chunk size
1942
+ */
1943
+ async updateChunkedEmbedding(id, data) {
1944
+ const { title, content, metadata, autoChunk } = data.data;
1945
+ const config2 = this.getConfig();
1946
+ const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
1947
+ documentId: id
1948
+ });
1949
+ if (!currentEntry) {
1950
+ throw new Error(`Embedding with id ${id} not found`);
1951
+ }
1952
+ const currentMetadata = currentEntry.metadata;
1953
+ const parentDocumentId = currentMetadata?.parentDocumentId || id;
1954
+ const newContent = content ?? currentEntry.content;
1955
+ const newTitle = title ?? currentMetadata?.originalTitle ?? currentEntry.title;
1956
+ const shouldChunk = autoChunk ?? config2.autoChunk;
1957
+ const chunkSize = config2.chunkSize || 4e3;
1958
+ const contentNeedsChunking = shouldChunk && needsChunking(newContent, chunkSize);
1959
+ const existingChunks = await this.findRelatedChunks(id);
1960
+ let originalRelated;
1961
+ const firstChunk = existingChunks.find(
1962
+ (c) => c.metadata?.chunkIndex === 0 || c.documentId === parentDocumentId
1963
+ );
1964
+ if (firstChunk?.related) {
1965
+ originalRelated = firstChunk.related;
1966
+ }
1967
+ const deletedCount = await this.deleteRelatedChunks(id);
1968
+ console.log(`Deleted ${deletedCount} existing chunk(s) for update`);
1969
+ const preservedMetadata = { ...metadata };
1970
+ delete preservedMetadata?.isChunk;
1971
+ delete preservedMetadata?.chunkIndex;
1972
+ delete preservedMetadata?.totalChunks;
1973
+ delete preservedMetadata?.startOffset;
1974
+ delete preservedMetadata?.endOffset;
1975
+ delete preservedMetadata?.originalTitle;
1976
+ delete preservedMetadata?.parentDocumentId;
1977
+ delete preservedMetadata?.estimatedTokens;
1978
+ if (contentNeedsChunking) {
1979
+ return await this.createChunkedEmbedding({
1980
+ data: {
1981
+ title: newTitle.replace(/\s*\[Part \d+\/\d+\]$/, ""),
1982
+ // Remove old part suffix
1983
+ content: newContent,
1984
+ collectionType: currentEntry.collectionType || "standalone",
1985
+ fieldName: currentEntry.fieldName || "content",
1986
+ metadata: preservedMetadata,
1987
+ related: originalRelated,
1988
+ autoChunk: true
1989
+ }
1990
+ });
1991
+ } else {
1992
+ const entity = await this.createEmbedding({
1993
+ data: {
1994
+ title: newTitle.replace(/\s*\[Part \d+\/\d+\]$/, ""),
1995
+ // Remove old part suffix
1996
+ content: newContent,
1997
+ collectionType: currentEntry.collectionType || "standalone",
1998
+ fieldName: currentEntry.fieldName || "content",
1999
+ metadata: preservedMetadata,
2000
+ related: originalRelated,
2001
+ autoChunk: false
2002
+ }
2003
+ });
2004
+ return {
2005
+ entity,
2006
+ chunks: [entity],
2007
+ totalChunks: 1,
2008
+ wasChunked: false
2009
+ };
2010
+ }
2011
+ },
1267
2012
  async updateEmbedding(id, data) {
1268
- const { title, content, metadata } = data.data;
1269
- const currentEntry = await strapi.documents(CONTENT_TYPE_UID).findOne({
2013
+ const { title, content, metadata, autoChunk } = data.data;
2014
+ const config2 = this.getConfig();
2015
+ const currentEntry = await strapi.documents(CONTENT_TYPE_UID$1).findOne({
1270
2016
  documentId: id
1271
2017
  });
1272
2018
  if (!currentEntry) {
1273
2019
  throw new Error(`Embedding with id ${id} not found`);
1274
2020
  }
2021
+ const currentMetadata = currentEntry.metadata;
2022
+ const isCurrentlyChunked = currentMetadata?.isChunk === true;
2023
+ const hasRelatedChunks = currentMetadata?.parentDocumentId || isCurrentlyChunked;
2024
+ const shouldChunk = autoChunk ?? config2.autoChunk;
2025
+ const chunkSize = config2.chunkSize || 4e3;
2026
+ const newContent = content ?? currentEntry.content;
2027
+ const contentNeedsChunking = shouldChunk && needsChunking(newContent, chunkSize);
2028
+ const contentChanged = content !== void 0 && content !== currentEntry.content;
2029
+ if (hasRelatedChunks || contentNeedsChunking) {
2030
+ const result = await this.updateChunkedEmbedding(id, data);
2031
+ return result.entity;
2032
+ }
1275
2033
  const updateData = {};
1276
2034
  if (title !== void 0) updateData.title = title;
1277
2035
  if (content !== void 0) updateData.content = content;
1278
2036
  if (metadata !== void 0) updateData.metadata = metadata;
1279
- const contentChanged = content !== void 0 && content !== currentEntry.content;
1280
- let updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
2037
+ let updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
1281
2038
  documentId: id,
1282
2039
  data: updateData
1283
2040
  });
@@ -1291,7 +2048,7 @@ const embeddings = ({ strapi }) => ({
1291
2048
  collectionType: currentEntry.collectionType || "standalone",
1292
2049
  fieldName: currentEntry.fieldName || "content"
1293
2050
  });
1294
- updatedEntity = await strapi.documents(CONTENT_TYPE_UID).update({
2051
+ updatedEntity = await strapi.documents(CONTENT_TYPE_UID$1).update({
1295
2052
  documentId: id,
1296
2053
  data: {
1297
2054
  embeddingId: result.embeddingId,
@@ -1320,7 +2077,7 @@ const embeddings = ({ strapi }) => ({
1320
2077
  }
1321
2078
  },
1322
2079
  async getEmbedding(id) {
1323
- return await strapi.documents(CONTENT_TYPE_UID).findOne({
2080
+ return await strapi.documents(CONTENT_TYPE_UID$1).findOne({
1324
2081
  documentId: String(id)
1325
2082
  });
1326
2083
  },
@@ -1329,12 +2086,12 @@ const embeddings = ({ strapi }) => ({
1329
2086
  const pageSize = params?.pageSize || 10;
1330
2087
  const start = (page - 1) * pageSize;
1331
2088
  const [data, totalCount] = await Promise.all([
1332
- strapi.documents(CONTENT_TYPE_UID).findMany({
2089
+ strapi.documents(CONTENT_TYPE_UID$1).findMany({
1333
2090
  limit: pageSize,
1334
2091
  start,
1335
2092
  filters: params?.filters
1336
2093
  }),
1337
- strapi.documents(CONTENT_TYPE_UID).count({
2094
+ strapi.documents(CONTENT_TYPE_UID$1).count({
1338
2095
  filters: params?.filters
1339
2096
  })
1340
2097
  ]);
@@ -1345,8 +2102,304 @@ const embeddings = ({ strapi }) => ({
1345
2102
  };
1346
2103
  }
1347
2104
  });
2105
+ const PLUGIN_ID = "strapi-content-embeddings";
2106
+ const CONTENT_TYPE_UID = `plugin::${PLUGIN_ID}.embedding`;
2107
+ const sync = ({ strapi }) => ({
2108
+ /**
2109
+ * Sync embeddings from Neon DB to Strapi DB
2110
+ *
2111
+ * This performs the following operations:
2112
+ * 1. Fetches all embeddings from Neon DB (source of truth)
2113
+ * 2. Fetches all embeddings from Strapi DB
2114
+ * 3. Creates missing entries in Strapi that exist in Neon
2115
+ * 4. Updates Strapi entries where content differs from Neon
2116
+ * 5. Optionally removes orphaned Strapi entries (no matching Neon record)
2117
+ */
2118
+ async syncFromNeon(options2) {
2119
+ const { removeOrphans = false, dryRun = false } = options2 || {};
2120
+ const result = {
2121
+ success: false,
2122
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2123
+ neonCount: 0,
2124
+ strapiCount: 0,
2125
+ actions: {
2126
+ created: 0,
2127
+ updated: 0,
2128
+ orphansRemoved: 0
2129
+ },
2130
+ details: {
2131
+ created: [],
2132
+ updated: [],
2133
+ orphansRemoved: []
2134
+ },
2135
+ errors: []
2136
+ };
2137
+ if (!pluginManager.isInitialized()) {
2138
+ result.errors.push(
2139
+ "Plugin manager not initialized. Check your Neon and OpenAI configuration."
2140
+ );
2141
+ return result;
2142
+ }
2143
+ try {
2144
+ const neonEmbeddings = await pluginManager.getAllNeonEmbeddings();
2145
+ result.neonCount = neonEmbeddings.length;
2146
+ const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
2147
+ limit: 1e4
2148
+ // High limit to get all
2149
+ });
2150
+ result.strapiCount = strapiEmbeddings.length;
2151
+ const neonBystrapiId = /* @__PURE__ */ new Map();
2152
+ for (const neon of neonEmbeddings) {
2153
+ if (neon.strapiId) {
2154
+ neonBystrapiId.set(neon.strapiId, neon);
2155
+ }
2156
+ }
2157
+ const strapiByDocumentId = /* @__PURE__ */ new Map();
2158
+ for (const strapi2 of strapiEmbeddings) {
2159
+ strapiByDocumentId.set(strapi2.documentId, strapi2);
2160
+ }
2161
+ for (const neon of neonEmbeddings) {
2162
+ if (!neon.strapiId) {
2163
+ result.errors.push(
2164
+ `Neon embedding ${neon.id} has no strapiId in metadata`
2165
+ );
2166
+ continue;
2167
+ }
2168
+ const existingStrapi = strapiByDocumentId.get(neon.strapiId);
2169
+ if (!existingStrapi) {
2170
+ if (!dryRun) {
2171
+ try {
2172
+ await strapi.documents(CONTENT_TYPE_UID).create({
2173
+ data: {
2174
+ documentId: neon.strapiId,
2175
+ title: neon.title,
2176
+ content: neon.content,
2177
+ embeddingId: neon.id,
2178
+ collectionType: neon.collectionType,
2179
+ fieldName: neon.fieldName
2180
+ }
2181
+ });
2182
+ result.actions.created++;
2183
+ result.details.created.push(
2184
+ `${neon.strapiId} (${neon.title || "untitled"})`
2185
+ );
2186
+ } catch (error) {
2187
+ result.errors.push(
2188
+ `Failed to create Strapi entry for ${neon.strapiId}: ${error}`
2189
+ );
2190
+ }
2191
+ } else {
2192
+ result.actions.created++;
2193
+ result.details.created.push(
2194
+ `[DRY RUN] ${neon.strapiId} (${neon.title || "untitled"})`
2195
+ );
2196
+ }
2197
+ } else {
2198
+ const contentChanged = existingStrapi.content !== neon.content;
2199
+ const titleChanged = existingStrapi.title !== neon.title;
2200
+ const embeddingIdMissing = !existingStrapi.embeddingId;
2201
+ if (contentChanged || titleChanged || embeddingIdMissing) {
2202
+ if (!dryRun) {
2203
+ try {
2204
+ await strapi.documents(CONTENT_TYPE_UID).update({
2205
+ documentId: neon.strapiId,
2206
+ data: {
2207
+ title: neon.title,
2208
+ content: neon.content,
2209
+ embeddingId: neon.id
2210
+ }
2211
+ });
2212
+ result.actions.updated++;
2213
+ result.details.updated.push(
2214
+ `${neon.strapiId} (${neon.title || "untitled"})`
2215
+ );
2216
+ } catch (error) {
2217
+ result.errors.push(
2218
+ `Failed to update Strapi entry ${neon.strapiId}: ${error}`
2219
+ );
2220
+ }
2221
+ } else {
2222
+ result.actions.updated++;
2223
+ result.details.updated.push(
2224
+ `[DRY RUN] ${neon.strapiId} (${neon.title || "untitled"})`
2225
+ );
2226
+ }
2227
+ }
2228
+ }
2229
+ }
2230
+ if (removeOrphans) {
2231
+ for (const strapiEmbed of strapiEmbeddings) {
2232
+ const hasNeonRecord = neonBystrapiId.has(strapiEmbed.documentId);
2233
+ if (!hasNeonRecord) {
2234
+ if (!dryRun) {
2235
+ try {
2236
+ await strapi.documents(CONTENT_TYPE_UID).delete({
2237
+ documentId: strapiEmbed.documentId
2238
+ });
2239
+ result.actions.orphansRemoved++;
2240
+ result.details.orphansRemoved.push(
2241
+ `${strapiEmbed.documentId} (${strapiEmbed.title || "untitled"})`
2242
+ );
2243
+ } catch (error) {
2244
+ result.errors.push(
2245
+ `Failed to remove orphan ${strapiEmbed.documentId}: ${error}`
2246
+ );
2247
+ }
2248
+ } else {
2249
+ result.actions.orphansRemoved++;
2250
+ result.details.orphansRemoved.push(
2251
+ `[DRY RUN] ${strapiEmbed.documentId} (${strapiEmbed.title || "untitled"})`
2252
+ );
2253
+ }
2254
+ }
2255
+ }
2256
+ }
2257
+ result.success = result.errors.length === 0;
2258
+ return result;
2259
+ } catch (error) {
2260
+ result.errors.push(`Sync failed: ${error}`);
2261
+ return result;
2262
+ }
2263
+ },
2264
+ /**
2265
+ * Get sync status - compare Neon and Strapi without making changes
2266
+ */
2267
+ async getSyncStatus() {
2268
+ if (!pluginManager.isInitialized()) {
2269
+ throw new Error("Plugin manager not initialized");
2270
+ }
2271
+ const neonEmbeddings = await pluginManager.getAllNeonEmbeddings();
2272
+ const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
2273
+ limit: 1e4
2274
+ });
2275
+ const neonBystrapiId = /* @__PURE__ */ new Map();
2276
+ for (const neon of neonEmbeddings) {
2277
+ if (neon.strapiId) {
2278
+ neonBystrapiId.set(neon.strapiId, neon);
2279
+ }
2280
+ }
2281
+ const strapiByDocumentId = /* @__PURE__ */ new Map();
2282
+ for (const s of strapiEmbeddings) {
2283
+ strapiByDocumentId.set(s.documentId, s);
2284
+ }
2285
+ let missingInStrapi = 0;
2286
+ let contentDifferences = 0;
2287
+ for (const neon of neonEmbeddings) {
2288
+ if (!neon.strapiId) continue;
2289
+ const strapiRecord = strapiByDocumentId.get(neon.strapiId);
2290
+ if (!strapiRecord) {
2291
+ missingInStrapi++;
2292
+ } else if (strapiRecord.content !== neon.content) {
2293
+ contentDifferences++;
2294
+ }
2295
+ }
2296
+ let missingInNeon = 0;
2297
+ for (const s of strapiEmbeddings) {
2298
+ if (!neonBystrapiId.has(s.documentId)) {
2299
+ missingInNeon++;
2300
+ }
2301
+ }
2302
+ return {
2303
+ neonCount: neonEmbeddings.length,
2304
+ strapiCount: strapiEmbeddings.length,
2305
+ inSync: missingInStrapi === 0 && missingInNeon === 0 && contentDifferences === 0,
2306
+ missingInStrapi,
2307
+ missingInNeon,
2308
+ contentDifferences
2309
+ };
2310
+ },
2311
+ /**
2312
+ * Recreate all embeddings in Neon DB from Strapi data
2313
+ *
2314
+ * This will:
2315
+ * 1. Delete ALL embeddings from Neon DB
2316
+ * 2. Re-create embeddings for each Strapi embedding entry
2317
+ * 3. Update Strapi entries with new embedding IDs
2318
+ *
2319
+ * Use this when embeddings were created with incorrect metadata format
2320
+ */
2321
+ async recreateAllEmbeddings() {
2322
+ const result = {
2323
+ success: false,
2324
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
2325
+ deletedFromNeon: 0,
2326
+ processedFromStrapi: 0,
2327
+ recreatedInNeon: 0,
2328
+ errors: [],
2329
+ details: {
2330
+ recreated: [],
2331
+ failed: []
2332
+ }
2333
+ };
2334
+ if (!pluginManager.isInitialized()) {
2335
+ result.errors.push(
2336
+ "Plugin manager not initialized. Check your Neon and OpenAI configuration."
2337
+ );
2338
+ return result;
2339
+ }
2340
+ try {
2341
+ console.log("[recreateAllEmbeddings] Step 1: Clearing Neon DB...");
2342
+ result.deletedFromNeon = await pluginManager.clearAllNeonEmbeddings();
2343
+ console.log(`[recreateAllEmbeddings] Deleted ${result.deletedFromNeon} embeddings from Neon`);
2344
+ console.log("[recreateAllEmbeddings] Step 2: Fetching Strapi embeddings...");
2345
+ const strapiEmbeddings = await strapi.documents(CONTENT_TYPE_UID).findMany({
2346
+ limit: -1
2347
+ // Get all
2348
+ });
2349
+ result.processedFromStrapi = strapiEmbeddings.length;
2350
+ console.log(`[recreateAllEmbeddings] Found ${strapiEmbeddings.length} embeddings in Strapi`);
2351
+ if (strapiEmbeddings.length === 0) {
2352
+ result.success = true;
2353
+ return result;
2354
+ }
2355
+ console.log("[recreateAllEmbeddings] Step 3: Recreating embeddings in Neon...");
2356
+ for (let i = 0; i < strapiEmbeddings.length; i++) {
2357
+ const entry = strapiEmbeddings[i];
2358
+ const progress = `[${i + 1}/${strapiEmbeddings.length}]`;
2359
+ if (!entry.content) {
2360
+ console.log(`${progress} Skipping ${entry.documentId} - no content`);
2361
+ result.details.failed.push(`${entry.documentId}: no content`);
2362
+ continue;
2363
+ }
2364
+ try {
2365
+ console.log(`${progress} Creating embedding for: ${entry.title || entry.documentId}`);
2366
+ const embeddingResult = await pluginManager.createEmbedding({
2367
+ id: entry.documentId,
2368
+ title: entry.title || "",
2369
+ content: entry.content,
2370
+ collectionType: entry.collectionType || "standalone",
2371
+ fieldName: entry.fieldName || "content"
2372
+ });
2373
+ await strapi.documents(CONTENT_TYPE_UID).update({
2374
+ documentId: entry.documentId,
2375
+ data: {
2376
+ embeddingId: embeddingResult.embeddingId,
2377
+ embedding: embeddingResult.embedding
2378
+ }
2379
+ });
2380
+ result.recreatedInNeon++;
2381
+ result.details.recreated.push(`${entry.documentId} (${entry.title || "untitled"})`);
2382
+ if (i < strapiEmbeddings.length - 1) {
2383
+ await new Promise((resolve) => setTimeout(resolve, 500));
2384
+ }
2385
+ } catch (error) {
2386
+ console.error(`${progress} Failed:`, error.message || error);
2387
+ result.errors.push(`${entry.documentId}: ${error.message || error}`);
2388
+ result.details.failed.push(`${entry.documentId}: ${error.message || error}`);
2389
+ }
2390
+ }
2391
+ result.success = result.errors.length === 0;
2392
+ console.log(`[recreateAllEmbeddings] Complete. Recreated: ${result.recreatedInNeon}, Failed: ${result.details.failed.length}`);
2393
+ return result;
2394
+ } catch (error) {
2395
+ result.errors.push(`Recreate failed: ${error.message || error}`);
2396
+ return result;
2397
+ }
2398
+ }
2399
+ });
1348
2400
  const services = {
1349
- embeddings
2401
+ embeddings,
2402
+ sync
1350
2403
  };
1351
2404
  const index = {
1352
2405
  register,