kongbrain 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/surreal.ts CHANGED
@@ -125,12 +125,29 @@ export class SurrealStore {
125
125
  private reconnecting: Promise<void> | null = null;
126
126
  private shutdownFlag = false;
127
127
  private initialized = false;
128
+ /**
129
+ * The embedding provider tag used to stamp writes and filter searches.
130
+ * Set once at startup via setActiveProvider() after the EmbeddingService
131
+ * is constructed. Falls back to "local-bge-m3" so existing single-provider
132
+ * deployments keep working if the wire-up step is ever skipped.
133
+ */
134
+ private activeProvider: string = "local-bge-m3";
128
135
 
129
136
  constructor(config: SurrealConfig) {
130
137
  this.config = config;
131
138
  this.db = new Surreal();
132
139
  }
133
140
 
141
+ /** Set the embedding provider id used to stamp writes and filter searches. */
142
+ setActiveProvider(providerId: string): void {
143
+ this.activeProvider = providerId;
144
+ }
145
+
146
+ /** Get the active provider id (for callers writing records via direct CREATE). */
147
+ getActiveProvider(): string {
148
+ return this.activeProvider;
149
+ }
150
+
134
151
  /** Connect and run schema. Returns true if a new connection was made, false if already initialized. */
135
152
  async initialize(): Promise<boolean> {
136
153
  // Only connect once — subsequent calls are no-ops.
@@ -361,46 +378,55 @@ export class SurrealStore {
361
378
  const crossTurnLim = lim.turn - sessionTurnLim;
362
379
  const emb = withEmbeddings ? ", embedding" : "";
363
380
 
364
- // Batch all 7 vector searches into a single round-trip (limits inlined — per-table)
381
+ // Batch all 7 vector searches into a single round-trip (limits inlined — per-table).
382
+ // Each query filters by embedding_provider so vectors from different
383
+ // models never mix in the same result set (different vector spaces).
365
384
  const stmts = [
366
385
  `SELECT id, text, role, timestamp, 0 AS accessCount, 'turn' AS table,
367
386
  vector::similarity::cosine(embedding, $vec) AS score${emb}
368
387
  FROM turn WHERE embedding != NONE AND array::len(embedding) > 0
388
+ AND embedding_provider = $provider
369
389
  AND session_id = $sid ORDER BY score DESC LIMIT ${sessionTurnLim}`,
370
390
  `SELECT id, text, role, timestamp, 0 AS accessCount, 'turn' AS table,
371
391
  vector::similarity::cosine(embedding, $vec) AS score${emb}
372
392
  FROM turn WHERE embedding != NONE AND array::len(embedding) > 0
393
+ AND embedding_provider = $provider
373
394
  AND session_id != $sid ORDER BY score DESC LIMIT ${crossTurnLim}`,
374
395
  `SELECT id, content AS text, stability AS importance, access_count AS accessCount,
375
396
  created_at AS timestamp, 'concept' AS table,
376
397
  vector::similarity::cosine(embedding, $vec) AS score${emb}
377
398
  FROM concept WHERE embedding != NONE AND array::len(embedding) > 0
399
+ AND embedding_provider = $provider
378
400
  ORDER BY score DESC LIMIT ${lim.concept}`,
379
401
  `SELECT id, text, importance, access_count AS accessCount,
380
402
  created_at AS timestamp, session_id AS sessionId, 'memory' AS table,
381
403
  vector::similarity::cosine(embedding, $vec) AS score${emb}
382
404
  FROM memory WHERE embedding != NONE AND array::len(embedding) > 0
405
+ AND embedding_provider = $provider
383
406
  AND (status = 'active' OR status IS NONE) ORDER BY score DESC LIMIT ${lim.memory}`,
384
407
  `SELECT id, description AS text, 0 AS accessCount,
385
408
  created_at AS timestamp, 'artifact' AS table,
386
409
  vector::similarity::cosine(embedding, $vec) AS score${emb}
387
410
  FROM artifact WHERE embedding != NONE AND array::len(embedding) > 0
411
+ AND embedding_provider = $provider
388
412
  ORDER BY score DESC LIMIT ${lim.artifact}`,
389
413
  `SELECT id, content AS text, category AS source, 0.5 AS importance, 0 AS accessCount,
390
414
  timestamp, 'monologue' AS table,
391
415
  vector::similarity::cosine(embedding, $vec) AS score${emb}
392
416
  FROM monologue WHERE embedding != NONE AND array::len(embedding) > 0
417
+ AND embedding_provider = $provider
393
418
  ORDER BY score DESC LIMIT ${lim.monologue}`,
394
419
  `SELECT id, text, importance, 0 AS accessCount,
395
420
  'identity_chunk' AS table,
396
421
  vector::similarity::cosine(embedding, $vec) AS score${emb}
397
422
  FROM identity_chunk WHERE embedding != NONE AND array::len(embedding) > 0
423
+ AND embedding_provider = $provider
398
424
  ORDER BY score DESC LIMIT ${lim.identity}`,
399
425
  ];
400
426
 
401
427
  let batchResults: any[][];
402
428
  try {
403
- batchResults = await this.queryBatch<any>(stmts, { vec, sid: sessionId });
429
+ batchResults = await this.queryBatch<any>(stmts, { vec, sid: sessionId, provider: this.activeProvider });
404
430
  } catch (e) {
405
431
  swallow.warn("surreal:vectorSearch:batch", e);
406
432
  return [];
@@ -422,7 +448,9 @@ export class SurrealStore {
422
448
 
423
449
  async upsertTurn(turn: TurnRecord): Promise<string> {
424
450
  const { embedding, ...rest } = turn;
425
- const record = embedding?.length ? { ...rest, embedding } : rest;
451
+ const record = embedding?.length
452
+ ? { ...rest, embedding, embedding_provider: this.activeProvider }
453
+ : rest;
426
454
  const rows = await this.queryFirst<{ id: string }>(
427
455
  `CREATE turn CONTENT $turn RETURN id`,
428
456
  { turn: record },
@@ -620,10 +648,11 @@ export class SurrealStore {
620
648
  vector::similarity::cosine(embedding, $vec) AS score
621
649
  FROM concept
622
650
  WHERE embedding != NONE AND array::len(embedding) > 0
651
+ AND embedding_provider = $provider
623
652
  AND (${tagConditions})
624
653
  ORDER BY score DESC
625
654
  LIMIT $limit`,
626
- { vec: queryVec, limit },
655
+ { vec: queryVec, limit, provider: this.activeProvider },
627
656
  );
628
657
  return rows as VectorSearchResult[];
629
658
  } catch (e) {
@@ -659,9 +688,13 @@ export class SurrealStore {
659
688
  "produced", "derived_from", "performed", "owns",
660
689
  ];
661
690
 
691
+ // Graph traversal returns nodes regardless of provider (the edges
692
+ // are still meaningful for structure), but the cosine score only
693
+ // applies to vectors in the active provider's space — others get
694
+ // score 0 so they sort below current-space matches.
662
695
  const scoreExpr =
663
- ", IF embedding != NONE AND array::len(embedding) > 0 THEN vector::similarity::cosine(embedding, $vec) ELSE 0 END AS score";
664
- const bindings = { vec: queryVec };
696
+ ", IF embedding != NONE AND array::len(embedding) > 0 AND embedding_provider = $provider THEN vector::similarity::cosine(embedding, $vec) ELSE 0 END AS score";
697
+ const bindings = { vec: queryVec, provider: this.activeProvider };
665
698
  const selectFields = `SELECT id, text, content, description, importance, stability,
666
699
  access_count AS accessCount, created_at AS timestamp,
667
700
  meta::tb(id) AS table${scoreExpr}`;
@@ -752,11 +785,15 @@ export class SurrealStore {
752
785
  );
753
786
  if (rows.length > 0) {
754
787
  const id = String(rows[0].id);
755
- // Backfill embedding if the existing concept is missing one
788
+ // Backfill embedding if the existing concept is missing one. The
789
+ // backfilled embedding is tagged with the active provider so it can be
790
+ // searched alongside other current-provider rows.
756
791
  if (embedding?.length) {
757
792
  await this.queryExec(
758
- `UPDATE ${id} SET access_count += 1, last_accessed = time::now(), embedding = IF embedding IS NONE OR array::len(embedding) = 0 THEN $emb ELSE embedding END`,
759
- { emb: embedding },
793
+ `UPDATE ${id} SET access_count += 1, last_accessed = time::now(),
794
+ embedding = IF embedding IS NONE OR array::len(embedding) = 0 THEN $emb ELSE embedding END,
795
+ embedding_provider = IF embedding IS NONE OR array::len(embedding) = 0 THEN $provider ELSE embedding_provider END`,
796
+ { emb: embedding, provider: this.activeProvider },
760
797
  );
761
798
  } else {
762
799
  await this.queryExec(
@@ -767,7 +804,10 @@ export class SurrealStore {
767
804
  }
768
805
  const emb = embedding?.length ? embedding : undefined;
769
806
  const record: Record<string, unknown> = { content, source: source ?? undefined };
770
- if (emb) record.embedding = emb;
807
+ if (emb) {
808
+ record.embedding = emb;
809
+ record.embedding_provider = this.activeProvider;
810
+ }
771
811
  const created = await this.queryFirst<{ id: string }>(
772
812
  `CREATE concept CONTENT $record RETURN id`,
773
813
  { record },
@@ -782,7 +822,10 @@ export class SurrealStore {
782
822
  embedding: number[] | null,
783
823
  ): Promise<string> {
784
824
  const record: Record<string, unknown> = { path, type, description };
785
- if (embedding?.length) record.embedding = embedding;
825
+ if (embedding?.length) {
826
+ record.embedding = embedding;
827
+ record.embedding_provider = this.activeProvider;
828
+ }
786
829
  const rows = await this.queryFirst<{ id: string }>(
787
830
  `CREATE artifact CONTENT $record RETURN id`,
788
831
  { record },
@@ -800,6 +843,8 @@ export class SurrealStore {
800
843
  const source = category ?? "general";
801
844
 
802
845
  if (embedding?.length) {
846
+ // Dedup search must filter by provider — same vector value in a different
847
+ // space is meaningless and would produce false-positive merges.
803
848
  const dupes = await this.queryFirst<{
804
849
  id: string;
805
850
  importance: number;
@@ -809,10 +854,11 @@ export class SurrealStore {
809
854
  vector::similarity::cosine(embedding, $vec) AS score
810
855
  FROM memory
811
856
  WHERE embedding != NONE AND array::len(embedding) > 0
857
+ AND embedding_provider = $provider
812
858
  AND category = $cat
813
859
  ORDER BY score DESC
814
860
  LIMIT 1`,
815
- { vec: embedding, cat: source },
861
+ { vec: embedding, cat: source, provider: this.activeProvider },
816
862
  );
817
863
  if (dupes.length > 0 && dupes[0].score > 0.92) {
818
864
  const existing = dupes[0];
@@ -826,7 +872,10 @@ export class SurrealStore {
826
872
  }
827
873
 
828
874
  const record: Record<string, unknown> = { text, importance, category: source, source };
829
- if (embedding?.length) record.embedding = embedding;
875
+ if (embedding?.length) {
876
+ record.embedding = embedding;
877
+ record.embedding_provider = this.activeProvider;
878
+ }
830
879
  if (sessionId) record.session_id = sessionId;
831
880
  const rows = await this.queryFirst<{ id: string }>(
832
881
  `CREATE memory CONTENT $record RETURN id`,
@@ -842,7 +891,10 @@ export class SurrealStore {
842
891
  embedding: number[] | null,
843
892
  ): Promise<string> {
844
893
  const record: Record<string, unknown> = { session_id: sessionId, category, content };
845
- if (embedding?.length) record.embedding = embedding;
894
+ if (embedding?.length) {
895
+ record.embedding = embedding;
896
+ record.embedding_provider = this.activeProvider;
897
+ }
846
898
  const rows = await this.queryFirst<{ id: string }>(
847
899
  `CREATE monologue CONTENT $record RETURN id`,
848
900
  { record },
@@ -1224,7 +1276,8 @@ export class SurrealStore {
1224
1276
  let merged = 0;
1225
1277
  const seen = new Set<string>();
1226
1278
 
1227
- // Pass 1: Vector similarity dedup
1279
+ // Pass 1: Vector similarity dedup. Restrict to current provider so we
1280
+ // don't compare vectors across spaces.
1228
1281
  const embMemories = await this.queryFirst<{
1229
1282
  id: string;
1230
1283
  text: string;
@@ -1236,8 +1289,10 @@ export class SurrealStore {
1236
1289
  `SELECT id, text, importance, category, access_count, embedding, created_at
1237
1290
  FROM memory
1238
1291
  WHERE embedding != NONE AND array::len(embedding) > 0
1292
+ AND embedding_provider = $provider
1239
1293
  ORDER BY created_at ASC
1240
1294
  LIMIT 50`,
1295
+ { provider: this.activeProvider },
1241
1296
  );
1242
1297
 
1243
1298
  for (const mem of embMemories) {
@@ -1255,9 +1310,10 @@ export class SurrealStore {
1255
1310
  WHERE id != $mid
1256
1311
  AND category = $cat
1257
1312
  AND embedding != NONE AND array::len(embedding) > 0
1313
+ AND embedding_provider = $provider
1258
1314
  ORDER BY score DESC
1259
1315
  LIMIT 3`,
1260
- { vec: mem.embedding, mid: mem.id, cat: mem.category },
1316
+ { vec: mem.embedding, mid: mem.id, cat: mem.category, provider: this.activeProvider },
1261
1317
  );
1262
1318
 
1263
1319
  for (const dupe of dupes) {
@@ -1300,9 +1356,10 @@ export class SurrealStore {
1300
1356
  try {
1301
1357
  const emb = await embedFn(mem.text);
1302
1358
  if (!emb) continue;
1359
+ // Backfilled embedding is in the active provider's space, so tag it.
1303
1360
  await this.queryExec(
1304
- `UPDATE ${String(mem.id)} SET embedding = $emb`,
1305
- { emb },
1361
+ `UPDATE ${String(mem.id)} SET embedding = $emb, embedding_provider = $provider`,
1362
+ { emb, provider: this.activeProvider },
1306
1363
  );
1307
1364
 
1308
1365
  const dupes = await this.queryFirst<{
@@ -1317,9 +1374,10 @@ export class SurrealStore {
1317
1374
  WHERE id != $mid
1318
1375
  AND category = $cat
1319
1376
  AND embedding != NONE AND array::len(embedding) > 0
1377
+ AND embedding_provider = $provider
1320
1378
  ORDER BY score DESC
1321
1379
  LIMIT 3`,
1322
- { vec: emb, mid: mem.id, cat: mem.category },
1380
+ { vec: emb, mid: mem.id, cat: mem.category, provider: this.activeProvider },
1323
1381
  );
1324
1382
  for (const dupe of dupes) {
1325
1383
  if (dupe.score < 0.88) break;
@@ -204,6 +204,7 @@ export async function migrateWorkspace(
204
204
  content: file.content,
205
205
  content_hash: simpleHash(file.content),
206
206
  embedding,
207
+ embedding_provider: embedding ? embeddings.providerId : undefined,
207
208
  tags: ["workspace-migration", fileType],
208
209
  migrated_from: "openclaw-default",
209
210
  },
@@ -459,6 +460,7 @@ async function ingestSkill(
459
460
  name: skillName,
460
461
  description,
461
462
  embedding,
463
+ embedding_provider: embedding ? embeddings.providerId : undefined,
462
464
  preconditions: preconditions.length > 0 ? preconditions.join("; ") : null,
463
465
  steps: steps.length > 0 ? steps : null,
464
466
  postconditions: null,
@@ -484,6 +486,7 @@ async function ingestSkill(
484
486
  content: file.content,
485
487
  content_hash: simpleHash(file.content),
486
488
  embedding,
489
+ embedding_provider: embedding ? embeddings.providerId : undefined,
487
490
  tags: ["workspace-migration", "skill", skillName],
488
491
  migrated_from: "openclaw-default",
489
492
  },