kongbrain 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +25 -18
- package/.github/workflows/pr-check.yml +4 -4
- package/CHANGELOG.md +47 -0
- package/README.github.md +53 -3
- package/README.md +29 -3
- package/README.npm.md +29 -3
- package/SKILL.md +1 -1
- package/bin/kongbrain-reembed.ts +143 -0
- package/openclaw.plugin.json +37 -7
- package/package.json +4 -1
- package/src/causal.ts +4 -1
- package/src/cognitive-bootstrap.ts +1 -0
- package/src/concept-extract.ts +4 -2
- package/src/config.ts +56 -10
- package/src/embeddings-openai.ts +232 -0
- package/src/embeddings.ts +48 -6
- package/src/identity.ts +2 -0
- package/src/index.ts +54 -5
- package/src/memory-daemon.ts +1 -1
- package/src/migrate-reembed.ts +305 -0
- package/src/reflection.ts +10 -4
- package/src/schema.surql +29 -0
- package/src/skills.ts +14 -5
- package/src/supersedes.ts +2 -1
- package/src/surreal.ts +77 -19
- package/src/workspace-migrate.ts +3 -0
package/src/surreal.ts
CHANGED
|
@@ -125,12 +125,29 @@ export class SurrealStore {
|
|
|
125
125
|
private reconnecting: Promise<void> | null = null;
|
|
126
126
|
private shutdownFlag = false;
|
|
127
127
|
private initialized = false;
|
|
128
|
+
/**
|
|
129
|
+
* The embedding provider tag used to stamp writes and filter searches.
|
|
130
|
+
* Set once at startup via setActiveProvider() after the EmbeddingService
|
|
131
|
+
* is constructed. Falls back to "local-bge-m3" so existing single-provider
|
|
132
|
+
* deployments keep working if the wire-up step is ever skipped.
|
|
133
|
+
*/
|
|
134
|
+
private activeProvider: string = "local-bge-m3";
|
|
128
135
|
|
|
129
136
|
constructor(config: SurrealConfig) {
|
|
130
137
|
this.config = config;
|
|
131
138
|
this.db = new Surreal();
|
|
132
139
|
}
|
|
133
140
|
|
|
141
|
+
/** Set the embedding provider id used to stamp writes and filter searches. */
|
|
142
|
+
setActiveProvider(providerId: string): void {
|
|
143
|
+
this.activeProvider = providerId;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Get the active provider id (for callers writing records via direct CREATE). */
|
|
147
|
+
getActiveProvider(): string {
|
|
148
|
+
return this.activeProvider;
|
|
149
|
+
}
|
|
150
|
+
|
|
134
151
|
/** Connect and run schema. Returns true if a new connection was made, false if already initialized. */
|
|
135
152
|
async initialize(): Promise<boolean> {
|
|
136
153
|
// Only connect once — subsequent calls are no-ops.
|
|
@@ -361,46 +378,55 @@ export class SurrealStore {
|
|
|
361
378
|
const crossTurnLim = lim.turn - sessionTurnLim;
|
|
362
379
|
const emb = withEmbeddings ? ", embedding" : "";
|
|
363
380
|
|
|
364
|
-
// Batch all 7 vector searches into a single round-trip (limits inlined — per-table)
|
|
381
|
+
// Batch all 7 vector searches into a single round-trip (limits inlined — per-table).
|
|
382
|
+
// Each query filters by embedding_provider so vectors from different
|
|
383
|
+
// models never mix in the same result set (different vector spaces).
|
|
365
384
|
const stmts = [
|
|
366
385
|
`SELECT id, text, role, timestamp, 0 AS accessCount, 'turn' AS table,
|
|
367
386
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
368
387
|
FROM turn WHERE embedding != NONE AND array::len(embedding) > 0
|
|
388
|
+
AND embedding_provider = $provider
|
|
369
389
|
AND session_id = $sid ORDER BY score DESC LIMIT ${sessionTurnLim}`,
|
|
370
390
|
`SELECT id, text, role, timestamp, 0 AS accessCount, 'turn' AS table,
|
|
371
391
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
372
392
|
FROM turn WHERE embedding != NONE AND array::len(embedding) > 0
|
|
393
|
+
AND embedding_provider = $provider
|
|
373
394
|
AND session_id != $sid ORDER BY score DESC LIMIT ${crossTurnLim}`,
|
|
374
395
|
`SELECT id, content AS text, stability AS importance, access_count AS accessCount,
|
|
375
396
|
created_at AS timestamp, 'concept' AS table,
|
|
376
397
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
377
398
|
FROM concept WHERE embedding != NONE AND array::len(embedding) > 0
|
|
399
|
+
AND embedding_provider = $provider
|
|
378
400
|
ORDER BY score DESC LIMIT ${lim.concept}`,
|
|
379
401
|
`SELECT id, text, importance, access_count AS accessCount,
|
|
380
402
|
created_at AS timestamp, session_id AS sessionId, 'memory' AS table,
|
|
381
403
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
382
404
|
FROM memory WHERE embedding != NONE AND array::len(embedding) > 0
|
|
405
|
+
AND embedding_provider = $provider
|
|
383
406
|
AND (status = 'active' OR status IS NONE) ORDER BY score DESC LIMIT ${lim.memory}`,
|
|
384
407
|
`SELECT id, description AS text, 0 AS accessCount,
|
|
385
408
|
created_at AS timestamp, 'artifact' AS table,
|
|
386
409
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
387
410
|
FROM artifact WHERE embedding != NONE AND array::len(embedding) > 0
|
|
411
|
+
AND embedding_provider = $provider
|
|
388
412
|
ORDER BY score DESC LIMIT ${lim.artifact}`,
|
|
389
413
|
`SELECT id, content AS text, category AS source, 0.5 AS importance, 0 AS accessCount,
|
|
390
414
|
timestamp, 'monologue' AS table,
|
|
391
415
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
392
416
|
FROM monologue WHERE embedding != NONE AND array::len(embedding) > 0
|
|
417
|
+
AND embedding_provider = $provider
|
|
393
418
|
ORDER BY score DESC LIMIT ${lim.monologue}`,
|
|
394
419
|
`SELECT id, text, importance, 0 AS accessCount,
|
|
395
420
|
'identity_chunk' AS table,
|
|
396
421
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
397
422
|
FROM identity_chunk WHERE embedding != NONE AND array::len(embedding) > 0
|
|
423
|
+
AND embedding_provider = $provider
|
|
398
424
|
ORDER BY score DESC LIMIT ${lim.identity}`,
|
|
399
425
|
];
|
|
400
426
|
|
|
401
427
|
let batchResults: any[][];
|
|
402
428
|
try {
|
|
403
|
-
batchResults = await this.queryBatch<any>(stmts, { vec, sid: sessionId });
|
|
429
|
+
batchResults = await this.queryBatch<any>(stmts, { vec, sid: sessionId, provider: this.activeProvider });
|
|
404
430
|
} catch (e) {
|
|
405
431
|
swallow.warn("surreal:vectorSearch:batch", e);
|
|
406
432
|
return [];
|
|
@@ -422,7 +448,9 @@ export class SurrealStore {
|
|
|
422
448
|
|
|
423
449
|
async upsertTurn(turn: TurnRecord): Promise<string> {
|
|
424
450
|
const { embedding, ...rest } = turn;
|
|
425
|
-
const record = embedding?.length
|
|
451
|
+
const record = embedding?.length
|
|
452
|
+
? { ...rest, embedding, embedding_provider: this.activeProvider }
|
|
453
|
+
: rest;
|
|
426
454
|
const rows = await this.queryFirst<{ id: string }>(
|
|
427
455
|
`CREATE turn CONTENT $turn RETURN id`,
|
|
428
456
|
{ turn: record },
|
|
@@ -620,10 +648,11 @@ export class SurrealStore {
|
|
|
620
648
|
vector::similarity::cosine(embedding, $vec) AS score
|
|
621
649
|
FROM concept
|
|
622
650
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
651
|
+
AND embedding_provider = $provider
|
|
623
652
|
AND (${tagConditions})
|
|
624
653
|
ORDER BY score DESC
|
|
625
654
|
LIMIT $limit`,
|
|
626
|
-
{ vec: queryVec, limit },
|
|
655
|
+
{ vec: queryVec, limit, provider: this.activeProvider },
|
|
627
656
|
);
|
|
628
657
|
return rows as VectorSearchResult[];
|
|
629
658
|
} catch (e) {
|
|
@@ -659,9 +688,13 @@ export class SurrealStore {
|
|
|
659
688
|
"produced", "derived_from", "performed", "owns",
|
|
660
689
|
];
|
|
661
690
|
|
|
691
|
+
// Graph traversal returns nodes regardless of provider (the edges
|
|
692
|
+
// are still meaningful for structure), but the cosine score only
|
|
693
|
+
// applies to vectors in the active provider's space — others get
|
|
694
|
+
// score 0 so they sort below current-space matches.
|
|
662
695
|
const scoreExpr =
|
|
663
|
-
", IF embedding != NONE AND array::len(embedding) > 0 THEN vector::similarity::cosine(embedding, $vec) ELSE 0 END AS score";
|
|
664
|
-
const bindings = { vec: queryVec };
|
|
696
|
+
", IF embedding != NONE AND array::len(embedding) > 0 AND embedding_provider = $provider THEN vector::similarity::cosine(embedding, $vec) ELSE 0 END AS score";
|
|
697
|
+
const bindings = { vec: queryVec, provider: this.activeProvider };
|
|
665
698
|
const selectFields = `SELECT id, text, content, description, importance, stability,
|
|
666
699
|
access_count AS accessCount, created_at AS timestamp,
|
|
667
700
|
meta::tb(id) AS table${scoreExpr}`;
|
|
@@ -752,11 +785,15 @@ export class SurrealStore {
|
|
|
752
785
|
);
|
|
753
786
|
if (rows.length > 0) {
|
|
754
787
|
const id = String(rows[0].id);
|
|
755
|
-
// Backfill embedding if the existing concept is missing one
|
|
788
|
+
// Backfill embedding if the existing concept is missing one. The
|
|
789
|
+
// backfilled embedding is tagged with the active provider so it can be
|
|
790
|
+
// searched alongside other current-provider rows.
|
|
756
791
|
if (embedding?.length) {
|
|
757
792
|
await this.queryExec(
|
|
758
|
-
`UPDATE ${id} SET access_count += 1, last_accessed = time::now(),
|
|
759
|
-
|
|
793
|
+
`UPDATE ${id} SET access_count += 1, last_accessed = time::now(),
|
|
794
|
+
embedding = IF embedding IS NONE OR array::len(embedding) = 0 THEN $emb ELSE embedding END,
|
|
795
|
+
embedding_provider = IF embedding IS NONE OR array::len(embedding) = 0 THEN $provider ELSE embedding_provider END`,
|
|
796
|
+
{ emb: embedding, provider: this.activeProvider },
|
|
760
797
|
);
|
|
761
798
|
} else {
|
|
762
799
|
await this.queryExec(
|
|
@@ -767,7 +804,10 @@ export class SurrealStore {
|
|
|
767
804
|
}
|
|
768
805
|
const emb = embedding?.length ? embedding : undefined;
|
|
769
806
|
const record: Record<string, unknown> = { content, source: source ?? undefined };
|
|
770
|
-
if (emb)
|
|
807
|
+
if (emb) {
|
|
808
|
+
record.embedding = emb;
|
|
809
|
+
record.embedding_provider = this.activeProvider;
|
|
810
|
+
}
|
|
771
811
|
const created = await this.queryFirst<{ id: string }>(
|
|
772
812
|
`CREATE concept CONTENT $record RETURN id`,
|
|
773
813
|
{ record },
|
|
@@ -782,7 +822,10 @@ export class SurrealStore {
|
|
|
782
822
|
embedding: number[] | null,
|
|
783
823
|
): Promise<string> {
|
|
784
824
|
const record: Record<string, unknown> = { path, type, description };
|
|
785
|
-
if (embedding?.length)
|
|
825
|
+
if (embedding?.length) {
|
|
826
|
+
record.embedding = embedding;
|
|
827
|
+
record.embedding_provider = this.activeProvider;
|
|
828
|
+
}
|
|
786
829
|
const rows = await this.queryFirst<{ id: string }>(
|
|
787
830
|
`CREATE artifact CONTENT $record RETURN id`,
|
|
788
831
|
{ record },
|
|
@@ -800,6 +843,8 @@ export class SurrealStore {
|
|
|
800
843
|
const source = category ?? "general";
|
|
801
844
|
|
|
802
845
|
if (embedding?.length) {
|
|
846
|
+
// Dedup search must filter by provider — same vector value in a different
|
|
847
|
+
// space is meaningless and would produce false-positive merges.
|
|
803
848
|
const dupes = await this.queryFirst<{
|
|
804
849
|
id: string;
|
|
805
850
|
importance: number;
|
|
@@ -809,10 +854,11 @@ export class SurrealStore {
|
|
|
809
854
|
vector::similarity::cosine(embedding, $vec) AS score
|
|
810
855
|
FROM memory
|
|
811
856
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
857
|
+
AND embedding_provider = $provider
|
|
812
858
|
AND category = $cat
|
|
813
859
|
ORDER BY score DESC
|
|
814
860
|
LIMIT 1`,
|
|
815
|
-
{ vec: embedding, cat: source },
|
|
861
|
+
{ vec: embedding, cat: source, provider: this.activeProvider },
|
|
816
862
|
);
|
|
817
863
|
if (dupes.length > 0 && dupes[0].score > 0.92) {
|
|
818
864
|
const existing = dupes[0];
|
|
@@ -826,7 +872,10 @@ export class SurrealStore {
|
|
|
826
872
|
}
|
|
827
873
|
|
|
828
874
|
const record: Record<string, unknown> = { text, importance, category: source, source };
|
|
829
|
-
if (embedding?.length)
|
|
875
|
+
if (embedding?.length) {
|
|
876
|
+
record.embedding = embedding;
|
|
877
|
+
record.embedding_provider = this.activeProvider;
|
|
878
|
+
}
|
|
830
879
|
if (sessionId) record.session_id = sessionId;
|
|
831
880
|
const rows = await this.queryFirst<{ id: string }>(
|
|
832
881
|
`CREATE memory CONTENT $record RETURN id`,
|
|
@@ -842,7 +891,10 @@ export class SurrealStore {
|
|
|
842
891
|
embedding: number[] | null,
|
|
843
892
|
): Promise<string> {
|
|
844
893
|
const record: Record<string, unknown> = { session_id: sessionId, category, content };
|
|
845
|
-
if (embedding?.length)
|
|
894
|
+
if (embedding?.length) {
|
|
895
|
+
record.embedding = embedding;
|
|
896
|
+
record.embedding_provider = this.activeProvider;
|
|
897
|
+
}
|
|
846
898
|
const rows = await this.queryFirst<{ id: string }>(
|
|
847
899
|
`CREATE monologue CONTENT $record RETURN id`,
|
|
848
900
|
{ record },
|
|
@@ -1224,7 +1276,8 @@ export class SurrealStore {
|
|
|
1224
1276
|
let merged = 0;
|
|
1225
1277
|
const seen = new Set<string>();
|
|
1226
1278
|
|
|
1227
|
-
// Pass 1: Vector similarity dedup
|
|
1279
|
+
// Pass 1: Vector similarity dedup. Restrict to current provider so we
|
|
1280
|
+
// don't compare vectors across spaces.
|
|
1228
1281
|
const embMemories = await this.queryFirst<{
|
|
1229
1282
|
id: string;
|
|
1230
1283
|
text: string;
|
|
@@ -1236,8 +1289,10 @@ export class SurrealStore {
|
|
|
1236
1289
|
`SELECT id, text, importance, category, access_count, embedding, created_at
|
|
1237
1290
|
FROM memory
|
|
1238
1291
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
1292
|
+
AND embedding_provider = $provider
|
|
1239
1293
|
ORDER BY created_at ASC
|
|
1240
1294
|
LIMIT 50`,
|
|
1295
|
+
{ provider: this.activeProvider },
|
|
1241
1296
|
);
|
|
1242
1297
|
|
|
1243
1298
|
for (const mem of embMemories) {
|
|
@@ -1255,9 +1310,10 @@ export class SurrealStore {
|
|
|
1255
1310
|
WHERE id != $mid
|
|
1256
1311
|
AND category = $cat
|
|
1257
1312
|
AND embedding != NONE AND array::len(embedding) > 0
|
|
1313
|
+
AND embedding_provider = $provider
|
|
1258
1314
|
ORDER BY score DESC
|
|
1259
1315
|
LIMIT 3`,
|
|
1260
|
-
{ vec: mem.embedding, mid: mem.id, cat: mem.category },
|
|
1316
|
+
{ vec: mem.embedding, mid: mem.id, cat: mem.category, provider: this.activeProvider },
|
|
1261
1317
|
);
|
|
1262
1318
|
|
|
1263
1319
|
for (const dupe of dupes) {
|
|
@@ -1300,9 +1356,10 @@ export class SurrealStore {
|
|
|
1300
1356
|
try {
|
|
1301
1357
|
const emb = await embedFn(mem.text);
|
|
1302
1358
|
if (!emb) continue;
|
|
1359
|
+
// Backfilled embedding is in the active provider's space, so tag it.
|
|
1303
1360
|
await this.queryExec(
|
|
1304
|
-
`UPDATE ${String(mem.id)} SET embedding = $emb`,
|
|
1305
|
-
{ emb },
|
|
1361
|
+
`UPDATE ${String(mem.id)} SET embedding = $emb, embedding_provider = $provider`,
|
|
1362
|
+
{ emb, provider: this.activeProvider },
|
|
1306
1363
|
);
|
|
1307
1364
|
|
|
1308
1365
|
const dupes = await this.queryFirst<{
|
|
@@ -1317,9 +1374,10 @@ export class SurrealStore {
|
|
|
1317
1374
|
WHERE id != $mid
|
|
1318
1375
|
AND category = $cat
|
|
1319
1376
|
AND embedding != NONE AND array::len(embedding) > 0
|
|
1377
|
+
AND embedding_provider = $provider
|
|
1320
1378
|
ORDER BY score DESC
|
|
1321
1379
|
LIMIT 3`,
|
|
1322
|
-
{ vec: emb, mid: mem.id, cat: mem.category },
|
|
1380
|
+
{ vec: emb, mid: mem.id, cat: mem.category, provider: this.activeProvider },
|
|
1323
1381
|
);
|
|
1324
1382
|
for (const dupe of dupes) {
|
|
1325
1383
|
if (dupe.score < 0.88) break;
|
package/src/workspace-migrate.ts
CHANGED
|
@@ -204,6 +204,7 @@ export async function migrateWorkspace(
|
|
|
204
204
|
content: file.content,
|
|
205
205
|
content_hash: simpleHash(file.content),
|
|
206
206
|
embedding,
|
|
207
|
+
embedding_provider: embedding ? embeddings.providerId : undefined,
|
|
207
208
|
tags: ["workspace-migration", fileType],
|
|
208
209
|
migrated_from: "openclaw-default",
|
|
209
210
|
},
|
|
@@ -459,6 +460,7 @@ async function ingestSkill(
|
|
|
459
460
|
name: skillName,
|
|
460
461
|
description,
|
|
461
462
|
embedding,
|
|
463
|
+
embedding_provider: embedding ? embeddings.providerId : undefined,
|
|
462
464
|
preconditions: preconditions.length > 0 ? preconditions.join("; ") : null,
|
|
463
465
|
steps: steps.length > 0 ? steps : null,
|
|
464
466
|
postconditions: null,
|
|
@@ -484,6 +486,7 @@ async function ingestSkill(
|
|
|
484
486
|
content: file.content,
|
|
485
487
|
content_hash: simpleHash(file.content),
|
|
486
488
|
embedding,
|
|
489
|
+
embedding_provider: embedding ? embeddings.providerId : undefined,
|
|
487
490
|
tags: ["workspace-migration", "skill", skillName],
|
|
488
491
|
migrated_from: "openclaw-default",
|
|
489
492
|
},
|