kongbrain 0.4.4 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/supersedes.ts CHANGED
@@ -61,11 +61,12 @@ export async function linkSupersedesEdges(
61
61
  `SELECT id, vector::similarity::cosine(embedding, $vec) AS score, stability
62
62
  FROM concept
63
63
  WHERE embedding != NONE AND array::len(embedding) > 0
64
+ AND embedding_provider = $provider
64
65
  AND superseded_at IS NONE
65
66
  AND stability > $floor
66
67
  ORDER BY score DESC
67
68
  LIMIT 5`,
68
- { vec: originalVec, floor: STABILITY_FLOOR },
69
+ { vec: originalVec, floor: STABILITY_FLOOR, provider: embeddings.providerId },
69
70
  );
70
71
 
71
72
  for (const candidate of candidates) {
package/src/surreal.ts CHANGED
@@ -47,6 +47,10 @@ export interface UtilityCacheEntry {
47
47
  retrieval_count: number;
48
48
  }
49
49
 
50
+ export interface SurrealStoreOptions {
51
+ embeddingDimensions?: number;
52
+ }
53
+
50
54
  const RECORD_ID_RE = /^[a-zA-Z_][a-zA-Z0-9_]*:[a-zA-Z0-9_]+$/;
51
55
 
52
56
  function assertRecordId(id: string): void {
@@ -125,12 +129,31 @@ export class SurrealStore {
125
129
  private reconnecting: Promise<void> | null = null;
126
130
  private shutdownFlag = false;
127
131
  private initialized = false;
132
+ /**
133
+ * The embedding provider tag used to stamp writes and filter searches.
134
+ * Set once at startup via setActiveProvider() after the EmbeddingService
135
+ * is constructed. Falls back to "local-bge-m3" so existing single-provider
136
+ * deployments keep working if the wire-up step is ever skipped.
137
+ */
138
+ private activeProvider: string = "local-bge-m3";
139
+ private schemaOptions: SurrealStoreOptions;
128
140
 
129
- constructor(config: SurrealConfig) {
141
+ constructor(config: SurrealConfig, options: SurrealStoreOptions = {}) {
130
142
  this.config = config;
143
+ this.schemaOptions = options;
131
144
  this.db = new Surreal();
132
145
  }
133
146
 
147
+ /** Set the embedding provider id used to stamp writes and filter searches. */
148
+ setActiveProvider(providerId: string): void {
149
+ this.activeProvider = providerId;
150
+ }
151
+
152
+ /** Get the active provider id (for callers writing records via direct CREATE). */
153
+ getActiveProvider(): string {
154
+ return this.activeProvider;
155
+ }
156
+
134
157
  /** Connect and run schema. Returns true if a new connection was made, false if already initialized. */
135
158
  async initialize(): Promise<boolean> {
136
159
  // Only connect once — subsequent calls are no-ops.
@@ -199,7 +222,9 @@ export class SurrealStore {
199
222
  }
200
223
 
201
224
  private async runSchema(): Promise<void> {
202
- const schema = loadSchema();
225
+ const schema = loadSchema({
226
+ embeddingDimensions: this.schemaOptions.embeddingDimensions,
227
+ });
203
228
  await this.db.query(schema);
204
229
  }
205
230
 
@@ -361,46 +386,55 @@ export class SurrealStore {
361
386
  const crossTurnLim = lim.turn - sessionTurnLim;
362
387
  const emb = withEmbeddings ? ", embedding" : "";
363
388
 
364
- // Batch all 7 vector searches into a single round-trip (limits inlined — per-table)
389
+ // Batch all 7 vector searches into a single round-trip (limits inlined — per-table).
390
+ // Each query filters by embedding_provider so vectors from different
391
+ // models never mix in the same result set (different vector spaces).
365
392
  const stmts = [
366
393
  `SELECT id, text, role, timestamp, 0 AS accessCount, 'turn' AS table,
367
394
  vector::similarity::cosine(embedding, $vec) AS score${emb}
368
395
  FROM turn WHERE embedding != NONE AND array::len(embedding) > 0
396
+ AND embedding_provider = $provider
369
397
  AND session_id = $sid ORDER BY score DESC LIMIT ${sessionTurnLim}`,
370
398
  `SELECT id, text, role, timestamp, 0 AS accessCount, 'turn' AS table,
371
399
  vector::similarity::cosine(embedding, $vec) AS score${emb}
372
400
  FROM turn WHERE embedding != NONE AND array::len(embedding) > 0
401
+ AND embedding_provider = $provider
373
402
  AND session_id != $sid ORDER BY score DESC LIMIT ${crossTurnLim}`,
374
403
  `SELECT id, content AS text, stability AS importance, access_count AS accessCount,
375
404
  created_at AS timestamp, 'concept' AS table,
376
405
  vector::similarity::cosine(embedding, $vec) AS score${emb}
377
406
  FROM concept WHERE embedding != NONE AND array::len(embedding) > 0
407
+ AND embedding_provider = $provider
378
408
  ORDER BY score DESC LIMIT ${lim.concept}`,
379
409
  `SELECT id, text, importance, access_count AS accessCount,
380
410
  created_at AS timestamp, session_id AS sessionId, 'memory' AS table,
381
411
  vector::similarity::cosine(embedding, $vec) AS score${emb}
382
412
  FROM memory WHERE embedding != NONE AND array::len(embedding) > 0
413
+ AND embedding_provider = $provider
383
414
  AND (status = 'active' OR status IS NONE) ORDER BY score DESC LIMIT ${lim.memory}`,
384
415
  `SELECT id, description AS text, 0 AS accessCount,
385
416
  created_at AS timestamp, 'artifact' AS table,
386
417
  vector::similarity::cosine(embedding, $vec) AS score${emb}
387
418
  FROM artifact WHERE embedding != NONE AND array::len(embedding) > 0
419
+ AND embedding_provider = $provider
388
420
  ORDER BY score DESC LIMIT ${lim.artifact}`,
389
421
  `SELECT id, content AS text, category AS source, 0.5 AS importance, 0 AS accessCount,
390
422
  timestamp, 'monologue' AS table,
391
423
  vector::similarity::cosine(embedding, $vec) AS score${emb}
392
424
  FROM monologue WHERE embedding != NONE AND array::len(embedding) > 0
425
+ AND embedding_provider = $provider
393
426
  ORDER BY score DESC LIMIT ${lim.monologue}`,
394
427
  `SELECT id, text, importance, 0 AS accessCount,
395
428
  'identity_chunk' AS table,
396
429
  vector::similarity::cosine(embedding, $vec) AS score${emb}
397
430
  FROM identity_chunk WHERE embedding != NONE AND array::len(embedding) > 0
431
+ AND embedding_provider = $provider
398
432
  ORDER BY score DESC LIMIT ${lim.identity}`,
399
433
  ];
400
434
 
401
435
  let batchResults: any[][];
402
436
  try {
403
- batchResults = await this.queryBatch<any>(stmts, { vec, sid: sessionId });
437
+ batchResults = await this.queryBatch<any>(stmts, { vec, sid: sessionId, provider: this.activeProvider });
404
438
  } catch (e) {
405
439
  swallow.warn("surreal:vectorSearch:batch", e);
406
440
  return [];
@@ -422,7 +456,9 @@ export class SurrealStore {
422
456
 
423
457
  async upsertTurn(turn: TurnRecord): Promise<string> {
424
458
  const { embedding, ...rest } = turn;
425
- const record = embedding?.length ? { ...rest, embedding } : rest;
459
+ const record = embedding?.length
460
+ ? { ...rest, embedding, embedding_provider: this.activeProvider }
461
+ : rest;
426
462
  const rows = await this.queryFirst<{ id: string }>(
427
463
  `CREATE turn CONTENT $turn RETURN id`,
428
464
  { turn: record },
@@ -620,10 +656,11 @@ export class SurrealStore {
620
656
  vector::similarity::cosine(embedding, $vec) AS score
621
657
  FROM concept
622
658
  WHERE embedding != NONE AND array::len(embedding) > 0
659
+ AND embedding_provider = $provider
623
660
  AND (${tagConditions})
624
661
  ORDER BY score DESC
625
662
  LIMIT $limit`,
626
- { vec: queryVec, limit },
663
+ { vec: queryVec, limit, provider: this.activeProvider },
627
664
  );
628
665
  return rows as VectorSearchResult[];
629
666
  } catch (e) {
@@ -659,9 +696,13 @@ export class SurrealStore {
659
696
  "produced", "derived_from", "performed", "owns",
660
697
  ];
661
698
 
699
+ // Graph traversal returns nodes regardless of provider (the edges
700
+ // are still meaningful for structure), but the cosine score only
701
+ // applies to vectors in the active provider's space — others get
702
+ // score 0 so they sort below current-space matches.
662
703
  const scoreExpr =
663
- ", IF embedding != NONE AND array::len(embedding) > 0 THEN vector::similarity::cosine(embedding, $vec) ELSE 0 END AS score";
664
- const bindings = { vec: queryVec };
704
+ ", IF embedding != NONE AND array::len(embedding) > 0 AND embedding_provider = $provider THEN vector::similarity::cosine(embedding, $vec) ELSE 0 END AS score";
705
+ const bindings = { vec: queryVec, provider: this.activeProvider };
665
706
  const selectFields = `SELECT id, text, content, description, importance, stability,
666
707
  access_count AS accessCount, created_at AS timestamp,
667
708
  meta::tb(id) AS table${scoreExpr}`;
@@ -752,11 +793,15 @@ export class SurrealStore {
752
793
  );
753
794
  if (rows.length > 0) {
754
795
  const id = String(rows[0].id);
755
- // Backfill embedding if the existing concept is missing one
796
+ // Backfill embedding if the existing concept is missing one. The
797
+ // backfilled embedding is tagged with the active provider so it can be
798
+ // searched alongside other current-provider rows.
756
799
  if (embedding?.length) {
757
800
  await this.queryExec(
758
- `UPDATE ${id} SET access_count += 1, last_accessed = time::now(), embedding = IF embedding IS NONE OR array::len(embedding) = 0 THEN $emb ELSE embedding END`,
759
- { emb: embedding },
801
+ `UPDATE ${id} SET access_count += 1, last_accessed = time::now(),
802
+ embedding = IF embedding IS NONE OR array::len(embedding) = 0 THEN $emb ELSE embedding END,
803
+ embedding_provider = IF embedding IS NONE OR array::len(embedding) = 0 THEN $provider ELSE embedding_provider END`,
804
+ { emb: embedding, provider: this.activeProvider },
760
805
  );
761
806
  } else {
762
807
  await this.queryExec(
@@ -767,7 +812,10 @@ export class SurrealStore {
767
812
  }
768
813
  const emb = embedding?.length ? embedding : undefined;
769
814
  const record: Record<string, unknown> = { content, source: source ?? undefined };
770
- if (emb) record.embedding = emb;
815
+ if (emb) {
816
+ record.embedding = emb;
817
+ record.embedding_provider = this.activeProvider;
818
+ }
771
819
  const created = await this.queryFirst<{ id: string }>(
772
820
  `CREATE concept CONTENT $record RETURN id`,
773
821
  { record },
@@ -782,7 +830,10 @@ export class SurrealStore {
782
830
  embedding: number[] | null,
783
831
  ): Promise<string> {
784
832
  const record: Record<string, unknown> = { path, type, description };
785
- if (embedding?.length) record.embedding = embedding;
833
+ if (embedding?.length) {
834
+ record.embedding = embedding;
835
+ record.embedding_provider = this.activeProvider;
836
+ }
786
837
  const rows = await this.queryFirst<{ id: string }>(
787
838
  `CREATE artifact CONTENT $record RETURN id`,
788
839
  { record },
@@ -800,6 +851,8 @@ export class SurrealStore {
800
851
  const source = category ?? "general";
801
852
 
802
853
  if (embedding?.length) {
854
+ // Dedup search must filter by provider — same vector value in a different
855
+ // space is meaningless and would produce false-positive merges.
803
856
  const dupes = await this.queryFirst<{
804
857
  id: string;
805
858
  importance: number;
@@ -809,10 +862,11 @@ export class SurrealStore {
809
862
  vector::similarity::cosine(embedding, $vec) AS score
810
863
  FROM memory
811
864
  WHERE embedding != NONE AND array::len(embedding) > 0
865
+ AND embedding_provider = $provider
812
866
  AND category = $cat
813
867
  ORDER BY score DESC
814
868
  LIMIT 1`,
815
- { vec: embedding, cat: source },
869
+ { vec: embedding, cat: source, provider: this.activeProvider },
816
870
  );
817
871
  if (dupes.length > 0 && dupes[0].score > 0.92) {
818
872
  const existing = dupes[0];
@@ -826,7 +880,10 @@ export class SurrealStore {
826
880
  }
827
881
 
828
882
  const record: Record<string, unknown> = { text, importance, category: source, source };
829
- if (embedding?.length) record.embedding = embedding;
883
+ if (embedding?.length) {
884
+ record.embedding = embedding;
885
+ record.embedding_provider = this.activeProvider;
886
+ }
830
887
  if (sessionId) record.session_id = sessionId;
831
888
  const rows = await this.queryFirst<{ id: string }>(
832
889
  `CREATE memory CONTENT $record RETURN id`,
@@ -842,7 +899,10 @@ export class SurrealStore {
842
899
  embedding: number[] | null,
843
900
  ): Promise<string> {
844
901
  const record: Record<string, unknown> = { session_id: sessionId, category, content };
845
- if (embedding?.length) record.embedding = embedding;
902
+ if (embedding?.length) {
903
+ record.embedding = embedding;
904
+ record.embedding_provider = this.activeProvider;
905
+ }
846
906
  const rows = await this.queryFirst<{ id: string }>(
847
907
  `CREATE monologue CONTENT $record RETURN id`,
848
908
  { record },
@@ -1224,7 +1284,8 @@ export class SurrealStore {
1224
1284
  let merged = 0;
1225
1285
  const seen = new Set<string>();
1226
1286
 
1227
- // Pass 1: Vector similarity dedup
1287
+ // Pass 1: Vector similarity dedup. Restrict to current provider so we
1288
+ // don't compare vectors across spaces.
1228
1289
  const embMemories = await this.queryFirst<{
1229
1290
  id: string;
1230
1291
  text: string;
@@ -1236,8 +1297,10 @@ export class SurrealStore {
1236
1297
  `SELECT id, text, importance, category, access_count, embedding, created_at
1237
1298
  FROM memory
1238
1299
  WHERE embedding != NONE AND array::len(embedding) > 0
1300
+ AND embedding_provider = $provider
1239
1301
  ORDER BY created_at ASC
1240
1302
  LIMIT 50`,
1303
+ { provider: this.activeProvider },
1241
1304
  );
1242
1305
 
1243
1306
  for (const mem of embMemories) {
@@ -1255,9 +1318,10 @@ export class SurrealStore {
1255
1318
  WHERE id != $mid
1256
1319
  AND category = $cat
1257
1320
  AND embedding != NONE AND array::len(embedding) > 0
1321
+ AND embedding_provider = $provider
1258
1322
  ORDER BY score DESC
1259
1323
  LIMIT 3`,
1260
- { vec: mem.embedding, mid: mem.id, cat: mem.category },
1324
+ { vec: mem.embedding, mid: mem.id, cat: mem.category, provider: this.activeProvider },
1261
1325
  );
1262
1326
 
1263
1327
  for (const dupe of dupes) {
@@ -1300,9 +1364,10 @@ export class SurrealStore {
1300
1364
  try {
1301
1365
  const emb = await embedFn(mem.text);
1302
1366
  if (!emb) continue;
1367
+ // Backfilled embedding is in the active provider's space, so tag it.
1303
1368
  await this.queryExec(
1304
- `UPDATE ${String(mem.id)} SET embedding = $emb`,
1305
- { emb },
1369
+ `UPDATE ${String(mem.id)} SET embedding = $emb, embedding_provider = $provider`,
1370
+ { emb, provider: this.activeProvider },
1306
1371
  );
1307
1372
 
1308
1373
  const dupes = await this.queryFirst<{
@@ -1317,9 +1382,10 @@ export class SurrealStore {
1317
1382
  WHERE id != $mid
1318
1383
  AND category = $cat
1319
1384
  AND embedding != NONE AND array::len(embedding) > 0
1385
+ AND embedding_provider = $provider
1320
1386
  ORDER BY score DESC
1321
1387
  LIMIT 3`,
1322
- { vec: emb, mid: mem.id, cat: mem.category },
1388
+ { vec: emb, mid: mem.id, cat: mem.category, provider: this.activeProvider },
1323
1389
  );
1324
1390
  for (const dupe of dupes) {
1325
1391
  if (dupe.score < 0.88) break;
@@ -204,6 +204,7 @@ export async function migrateWorkspace(
204
204
  content: file.content,
205
205
  content_hash: simpleHash(file.content),
206
206
  embedding,
207
+ embedding_provider: embedding ? embeddings.providerId : undefined,
207
208
  tags: ["workspace-migration", fileType],
208
209
  migrated_from: "openclaw-default",
209
210
  },
@@ -459,6 +460,7 @@ async function ingestSkill(
459
460
  name: skillName,
460
461
  description,
461
462
  embedding,
463
+ embedding_provider: embedding ? embeddings.providerId : undefined,
462
464
  preconditions: preconditions.length > 0 ? preconditions.join("; ") : null,
463
465
  steps: steps.length > 0 ? steps : null,
464
466
  postconditions: null,
@@ -484,6 +486,7 @@ async function ingestSkill(
484
486
  content: file.content,
485
487
  content_hash: simpleHash(file.content),
486
488
  embedding,
489
+ embedding_provider: embedding ? embeddings.providerId : undefined,
487
490
  tags: ["workspace-migration", "skill", skillName],
488
491
  migrated_from: "openclaw-default",
489
492
  },