kongbrain 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.kongcode-handoff.json +8 -0
- package/CHANGELOG.md +47 -0
- package/README.github.md +56 -4
- package/README.md +29 -3
- package/README.npm.md +29 -3
- package/SKILL.md +1 -1
- package/bin/kongbrain-reembed.ts +143 -0
- package/openclaw.plugin.json +37 -7
- package/package.json +4 -1
- package/src/causal.ts +4 -1
- package/src/cognitive-bootstrap.ts +1 -0
- package/src/concept-extract.ts +4 -2
- package/src/config.ts +64 -10
- package/src/context-engine.ts +4 -2
- package/src/embeddings-openai.ts +232 -0
- package/src/embeddings.ts +48 -6
- package/src/identity.ts +2 -0
- package/src/index.ts +106 -25
- package/src/memory-daemon.ts +1 -1
- package/src/migrate-reembed.ts +305 -0
- package/src/model-resolution.ts +98 -0
- package/src/reflection.ts +10 -4
- package/src/schema-loader.ts +21 -3
- package/src/schema.surql +37 -8
- package/src/skills.ts +14 -5
- package/src/supersedes.ts +2 -1
- package/src/surreal.ts +87 -21
- package/src/workspace-migrate.ts +3 -0
package/src/supersedes.ts
CHANGED
|
@@ -61,11 +61,12 @@ export async function linkSupersedesEdges(
|
|
|
61
61
|
`SELECT id, vector::similarity::cosine(embedding, $vec) AS score, stability
|
|
62
62
|
FROM concept
|
|
63
63
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
64
|
+
AND embedding_provider = $provider
|
|
64
65
|
AND superseded_at IS NONE
|
|
65
66
|
AND stability > $floor
|
|
66
67
|
ORDER BY score DESC
|
|
67
68
|
LIMIT 5`,
|
|
68
|
-
{ vec: originalVec, floor: STABILITY_FLOOR },
|
|
69
|
+
{ vec: originalVec, floor: STABILITY_FLOOR, provider: embeddings.providerId },
|
|
69
70
|
);
|
|
70
71
|
|
|
71
72
|
for (const candidate of candidates) {
|
package/src/surreal.ts
CHANGED
|
@@ -47,6 +47,10 @@ export interface UtilityCacheEntry {
|
|
|
47
47
|
retrieval_count: number;
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
+
export interface SurrealStoreOptions {
|
|
51
|
+
embeddingDimensions?: number;
|
|
52
|
+
}
|
|
53
|
+
|
|
50
54
|
const RECORD_ID_RE = /^[a-zA-Z_][a-zA-Z0-9_]*:[a-zA-Z0-9_]+$/;
|
|
51
55
|
|
|
52
56
|
function assertRecordId(id: string): void {
|
|
@@ -125,12 +129,31 @@ export class SurrealStore {
|
|
|
125
129
|
private reconnecting: Promise<void> | null = null;
|
|
126
130
|
private shutdownFlag = false;
|
|
127
131
|
private initialized = false;
|
|
132
|
+
/**
|
|
133
|
+
* The embedding provider tag used to stamp writes and filter searches.
|
|
134
|
+
* Set once at startup via setActiveProvider() after the EmbeddingService
|
|
135
|
+
* is constructed. Falls back to "local-bge-m3" so existing single-provider
|
|
136
|
+
* deployments keep working if the wire-up step is ever skipped.
|
|
137
|
+
*/
|
|
138
|
+
private activeProvider: string = "local-bge-m3";
|
|
139
|
+
private schemaOptions: SurrealStoreOptions;
|
|
128
140
|
|
|
129
|
-
constructor(config: SurrealConfig) {
|
|
141
|
+
constructor(config: SurrealConfig, options: SurrealStoreOptions = {}) {
|
|
130
142
|
this.config = config;
|
|
143
|
+
this.schemaOptions = options;
|
|
131
144
|
this.db = new Surreal();
|
|
132
145
|
}
|
|
133
146
|
|
|
147
|
+
/** Set the embedding provider id used to stamp writes and filter searches. */
|
|
148
|
+
setActiveProvider(providerId: string): void {
|
|
149
|
+
this.activeProvider = providerId;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/** Get the active provider id (for callers writing records via direct CREATE). */
|
|
153
|
+
getActiveProvider(): string {
|
|
154
|
+
return this.activeProvider;
|
|
155
|
+
}
|
|
156
|
+
|
|
134
157
|
/** Connect and run schema. Returns true if a new connection was made, false if already initialized. */
|
|
135
158
|
async initialize(): Promise<boolean> {
|
|
136
159
|
// Only connect once — subsequent calls are no-ops.
|
|
@@ -199,7 +222,9 @@ export class SurrealStore {
|
|
|
199
222
|
}
|
|
200
223
|
|
|
201
224
|
private async runSchema(): Promise<void> {
|
|
202
|
-
const schema = loadSchema(
|
|
225
|
+
const schema = loadSchema({
|
|
226
|
+
embeddingDimensions: this.schemaOptions.embeddingDimensions,
|
|
227
|
+
});
|
|
203
228
|
await this.db.query(schema);
|
|
204
229
|
}
|
|
205
230
|
|
|
@@ -361,46 +386,55 @@ export class SurrealStore {
|
|
|
361
386
|
const crossTurnLim = lim.turn - sessionTurnLim;
|
|
362
387
|
const emb = withEmbeddings ? ", embedding" : "";
|
|
363
388
|
|
|
364
|
-
// Batch all 7 vector searches into a single round-trip (limits inlined — per-table)
|
|
389
|
+
// Batch all 7 vector searches into a single round-trip (limits inlined — per-table).
|
|
390
|
+
// Each query filters by embedding_provider so vectors from different
|
|
391
|
+
// models never mix in the same result set (different vector spaces).
|
|
365
392
|
const stmts = [
|
|
366
393
|
`SELECT id, text, role, timestamp, 0 AS accessCount, 'turn' AS table,
|
|
367
394
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
368
395
|
FROM turn WHERE embedding != NONE AND array::len(embedding) > 0
|
|
396
|
+
AND embedding_provider = $provider
|
|
369
397
|
AND session_id = $sid ORDER BY score DESC LIMIT ${sessionTurnLim}`,
|
|
370
398
|
`SELECT id, text, role, timestamp, 0 AS accessCount, 'turn' AS table,
|
|
371
399
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
372
400
|
FROM turn WHERE embedding != NONE AND array::len(embedding) > 0
|
|
401
|
+
AND embedding_provider = $provider
|
|
373
402
|
AND session_id != $sid ORDER BY score DESC LIMIT ${crossTurnLim}`,
|
|
374
403
|
`SELECT id, content AS text, stability AS importance, access_count AS accessCount,
|
|
375
404
|
created_at AS timestamp, 'concept' AS table,
|
|
376
405
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
377
406
|
FROM concept WHERE embedding != NONE AND array::len(embedding) > 0
|
|
407
|
+
AND embedding_provider = $provider
|
|
378
408
|
ORDER BY score DESC LIMIT ${lim.concept}`,
|
|
379
409
|
`SELECT id, text, importance, access_count AS accessCount,
|
|
380
410
|
created_at AS timestamp, session_id AS sessionId, 'memory' AS table,
|
|
381
411
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
382
412
|
FROM memory WHERE embedding != NONE AND array::len(embedding) > 0
|
|
413
|
+
AND embedding_provider = $provider
|
|
383
414
|
AND (status = 'active' OR status IS NONE) ORDER BY score DESC LIMIT ${lim.memory}`,
|
|
384
415
|
`SELECT id, description AS text, 0 AS accessCount,
|
|
385
416
|
created_at AS timestamp, 'artifact' AS table,
|
|
386
417
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
387
418
|
FROM artifact WHERE embedding != NONE AND array::len(embedding) > 0
|
|
419
|
+
AND embedding_provider = $provider
|
|
388
420
|
ORDER BY score DESC LIMIT ${lim.artifact}`,
|
|
389
421
|
`SELECT id, content AS text, category AS source, 0.5 AS importance, 0 AS accessCount,
|
|
390
422
|
timestamp, 'monologue' AS table,
|
|
391
423
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
392
424
|
FROM monologue WHERE embedding != NONE AND array::len(embedding) > 0
|
|
425
|
+
AND embedding_provider = $provider
|
|
393
426
|
ORDER BY score DESC LIMIT ${lim.monologue}`,
|
|
394
427
|
`SELECT id, text, importance, 0 AS accessCount,
|
|
395
428
|
'identity_chunk' AS table,
|
|
396
429
|
vector::similarity::cosine(embedding, $vec) AS score${emb}
|
|
397
430
|
FROM identity_chunk WHERE embedding != NONE AND array::len(embedding) > 0
|
|
431
|
+
AND embedding_provider = $provider
|
|
398
432
|
ORDER BY score DESC LIMIT ${lim.identity}`,
|
|
399
433
|
];
|
|
400
434
|
|
|
401
435
|
let batchResults: any[][];
|
|
402
436
|
try {
|
|
403
|
-
batchResults = await this.queryBatch<any>(stmts, { vec, sid: sessionId });
|
|
437
|
+
batchResults = await this.queryBatch<any>(stmts, { vec, sid: sessionId, provider: this.activeProvider });
|
|
404
438
|
} catch (e) {
|
|
405
439
|
swallow.warn("surreal:vectorSearch:batch", e);
|
|
406
440
|
return [];
|
|
@@ -422,7 +456,9 @@ export class SurrealStore {
|
|
|
422
456
|
|
|
423
457
|
async upsertTurn(turn: TurnRecord): Promise<string> {
|
|
424
458
|
const { embedding, ...rest } = turn;
|
|
425
|
-
const record = embedding?.length
|
|
459
|
+
const record = embedding?.length
|
|
460
|
+
? { ...rest, embedding, embedding_provider: this.activeProvider }
|
|
461
|
+
: rest;
|
|
426
462
|
const rows = await this.queryFirst<{ id: string }>(
|
|
427
463
|
`CREATE turn CONTENT $turn RETURN id`,
|
|
428
464
|
{ turn: record },
|
|
@@ -620,10 +656,11 @@ export class SurrealStore {
|
|
|
620
656
|
vector::similarity::cosine(embedding, $vec) AS score
|
|
621
657
|
FROM concept
|
|
622
658
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
659
|
+
AND embedding_provider = $provider
|
|
623
660
|
AND (${tagConditions})
|
|
624
661
|
ORDER BY score DESC
|
|
625
662
|
LIMIT $limit`,
|
|
626
|
-
{ vec: queryVec, limit },
|
|
663
|
+
{ vec: queryVec, limit, provider: this.activeProvider },
|
|
627
664
|
);
|
|
628
665
|
return rows as VectorSearchResult[];
|
|
629
666
|
} catch (e) {
|
|
@@ -659,9 +696,13 @@ export class SurrealStore {
|
|
|
659
696
|
"produced", "derived_from", "performed", "owns",
|
|
660
697
|
];
|
|
661
698
|
|
|
699
|
+
// Graph traversal returns nodes regardless of provider (the edges
|
|
700
|
+
// are still meaningful for structure), but the cosine score only
|
|
701
|
+
// applies to vectors in the active provider's space — others get
|
|
702
|
+
// score 0 so they sort below current-space matches.
|
|
662
703
|
const scoreExpr =
|
|
663
|
-
", IF embedding != NONE AND array::len(embedding) > 0 THEN vector::similarity::cosine(embedding, $vec) ELSE 0 END AS score";
|
|
664
|
-
const bindings = { vec: queryVec };
|
|
704
|
+
", IF embedding != NONE AND array::len(embedding) > 0 AND embedding_provider = $provider THEN vector::similarity::cosine(embedding, $vec) ELSE 0 END AS score";
|
|
705
|
+
const bindings = { vec: queryVec, provider: this.activeProvider };
|
|
665
706
|
const selectFields = `SELECT id, text, content, description, importance, stability,
|
|
666
707
|
access_count AS accessCount, created_at AS timestamp,
|
|
667
708
|
meta::tb(id) AS table${scoreExpr}`;
|
|
@@ -752,11 +793,15 @@ export class SurrealStore {
|
|
|
752
793
|
);
|
|
753
794
|
if (rows.length > 0) {
|
|
754
795
|
const id = String(rows[0].id);
|
|
755
|
-
// Backfill embedding if the existing concept is missing one
|
|
796
|
+
// Backfill embedding if the existing concept is missing one. The
|
|
797
|
+
// backfilled embedding is tagged with the active provider so it can be
|
|
798
|
+
// searched alongside other current-provider rows.
|
|
756
799
|
if (embedding?.length) {
|
|
757
800
|
await this.queryExec(
|
|
758
|
-
`UPDATE ${id} SET access_count += 1, last_accessed = time::now(),
|
|
759
|
-
|
|
801
|
+
`UPDATE ${id} SET access_count += 1, last_accessed = time::now(),
|
|
802
|
+
embedding = IF embedding IS NONE OR array::len(embedding) = 0 THEN $emb ELSE embedding END,
|
|
803
|
+
embedding_provider = IF embedding IS NONE OR array::len(embedding) = 0 THEN $provider ELSE embedding_provider END`,
|
|
804
|
+
{ emb: embedding, provider: this.activeProvider },
|
|
760
805
|
);
|
|
761
806
|
} else {
|
|
762
807
|
await this.queryExec(
|
|
@@ -767,7 +812,10 @@ export class SurrealStore {
|
|
|
767
812
|
}
|
|
768
813
|
const emb = embedding?.length ? embedding : undefined;
|
|
769
814
|
const record: Record<string, unknown> = { content, source: source ?? undefined };
|
|
770
|
-
if (emb)
|
|
815
|
+
if (emb) {
|
|
816
|
+
record.embedding = emb;
|
|
817
|
+
record.embedding_provider = this.activeProvider;
|
|
818
|
+
}
|
|
771
819
|
const created = await this.queryFirst<{ id: string }>(
|
|
772
820
|
`CREATE concept CONTENT $record RETURN id`,
|
|
773
821
|
{ record },
|
|
@@ -782,7 +830,10 @@ export class SurrealStore {
|
|
|
782
830
|
embedding: number[] | null,
|
|
783
831
|
): Promise<string> {
|
|
784
832
|
const record: Record<string, unknown> = { path, type, description };
|
|
785
|
-
if (embedding?.length)
|
|
833
|
+
if (embedding?.length) {
|
|
834
|
+
record.embedding = embedding;
|
|
835
|
+
record.embedding_provider = this.activeProvider;
|
|
836
|
+
}
|
|
786
837
|
const rows = await this.queryFirst<{ id: string }>(
|
|
787
838
|
`CREATE artifact CONTENT $record RETURN id`,
|
|
788
839
|
{ record },
|
|
@@ -800,6 +851,8 @@ export class SurrealStore {
|
|
|
800
851
|
const source = category ?? "general";
|
|
801
852
|
|
|
802
853
|
if (embedding?.length) {
|
|
854
|
+
// Dedup search must filter by provider — same vector value in a different
|
|
855
|
+
// space is meaningless and would produce false-positive merges.
|
|
803
856
|
const dupes = await this.queryFirst<{
|
|
804
857
|
id: string;
|
|
805
858
|
importance: number;
|
|
@@ -809,10 +862,11 @@ export class SurrealStore {
|
|
|
809
862
|
vector::similarity::cosine(embedding, $vec) AS score
|
|
810
863
|
FROM memory
|
|
811
864
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
865
|
+
AND embedding_provider = $provider
|
|
812
866
|
AND category = $cat
|
|
813
867
|
ORDER BY score DESC
|
|
814
868
|
LIMIT 1`,
|
|
815
|
-
{ vec: embedding, cat: source },
|
|
869
|
+
{ vec: embedding, cat: source, provider: this.activeProvider },
|
|
816
870
|
);
|
|
817
871
|
if (dupes.length > 0 && dupes[0].score > 0.92) {
|
|
818
872
|
const existing = dupes[0];
|
|
@@ -826,7 +880,10 @@ export class SurrealStore {
|
|
|
826
880
|
}
|
|
827
881
|
|
|
828
882
|
const record: Record<string, unknown> = { text, importance, category: source, source };
|
|
829
|
-
if (embedding?.length)
|
|
883
|
+
if (embedding?.length) {
|
|
884
|
+
record.embedding = embedding;
|
|
885
|
+
record.embedding_provider = this.activeProvider;
|
|
886
|
+
}
|
|
830
887
|
if (sessionId) record.session_id = sessionId;
|
|
831
888
|
const rows = await this.queryFirst<{ id: string }>(
|
|
832
889
|
`CREATE memory CONTENT $record RETURN id`,
|
|
@@ -842,7 +899,10 @@ export class SurrealStore {
|
|
|
842
899
|
embedding: number[] | null,
|
|
843
900
|
): Promise<string> {
|
|
844
901
|
const record: Record<string, unknown> = { session_id: sessionId, category, content };
|
|
845
|
-
if (embedding?.length)
|
|
902
|
+
if (embedding?.length) {
|
|
903
|
+
record.embedding = embedding;
|
|
904
|
+
record.embedding_provider = this.activeProvider;
|
|
905
|
+
}
|
|
846
906
|
const rows = await this.queryFirst<{ id: string }>(
|
|
847
907
|
`CREATE monologue CONTENT $record RETURN id`,
|
|
848
908
|
{ record },
|
|
@@ -1224,7 +1284,8 @@ export class SurrealStore {
|
|
|
1224
1284
|
let merged = 0;
|
|
1225
1285
|
const seen = new Set<string>();
|
|
1226
1286
|
|
|
1227
|
-
// Pass 1: Vector similarity dedup
|
|
1287
|
+
// Pass 1: Vector similarity dedup. Restrict to current provider so we
|
|
1288
|
+
// don't compare vectors across spaces.
|
|
1228
1289
|
const embMemories = await this.queryFirst<{
|
|
1229
1290
|
id: string;
|
|
1230
1291
|
text: string;
|
|
@@ -1236,8 +1297,10 @@ export class SurrealStore {
|
|
|
1236
1297
|
`SELECT id, text, importance, category, access_count, embedding, created_at
|
|
1237
1298
|
FROM memory
|
|
1238
1299
|
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
1300
|
+
AND embedding_provider = $provider
|
|
1239
1301
|
ORDER BY created_at ASC
|
|
1240
1302
|
LIMIT 50`,
|
|
1303
|
+
{ provider: this.activeProvider },
|
|
1241
1304
|
);
|
|
1242
1305
|
|
|
1243
1306
|
for (const mem of embMemories) {
|
|
@@ -1255,9 +1318,10 @@ export class SurrealStore {
|
|
|
1255
1318
|
WHERE id != $mid
|
|
1256
1319
|
AND category = $cat
|
|
1257
1320
|
AND embedding != NONE AND array::len(embedding) > 0
|
|
1321
|
+
AND embedding_provider = $provider
|
|
1258
1322
|
ORDER BY score DESC
|
|
1259
1323
|
LIMIT 3`,
|
|
1260
|
-
{ vec: mem.embedding, mid: mem.id, cat: mem.category },
|
|
1324
|
+
{ vec: mem.embedding, mid: mem.id, cat: mem.category, provider: this.activeProvider },
|
|
1261
1325
|
);
|
|
1262
1326
|
|
|
1263
1327
|
for (const dupe of dupes) {
|
|
@@ -1300,9 +1364,10 @@ export class SurrealStore {
|
|
|
1300
1364
|
try {
|
|
1301
1365
|
const emb = await embedFn(mem.text);
|
|
1302
1366
|
if (!emb) continue;
|
|
1367
|
+
// Backfilled embedding is in the active provider's space, so tag it.
|
|
1303
1368
|
await this.queryExec(
|
|
1304
|
-
`UPDATE ${String(mem.id)} SET embedding = $emb`,
|
|
1305
|
-
{ emb },
|
|
1369
|
+
`UPDATE ${String(mem.id)} SET embedding = $emb, embedding_provider = $provider`,
|
|
1370
|
+
{ emb, provider: this.activeProvider },
|
|
1306
1371
|
);
|
|
1307
1372
|
|
|
1308
1373
|
const dupes = await this.queryFirst<{
|
|
@@ -1317,9 +1382,10 @@ export class SurrealStore {
|
|
|
1317
1382
|
WHERE id != $mid
|
|
1318
1383
|
AND category = $cat
|
|
1319
1384
|
AND embedding != NONE AND array::len(embedding) > 0
|
|
1385
|
+
AND embedding_provider = $provider
|
|
1320
1386
|
ORDER BY score DESC
|
|
1321
1387
|
LIMIT 3`,
|
|
1322
|
-
{ vec: emb, mid: mem.id, cat: mem.category },
|
|
1388
|
+
{ vec: emb, mid: mem.id, cat: mem.category, provider: this.activeProvider },
|
|
1323
1389
|
);
|
|
1324
1390
|
for (const dupe of dupes) {
|
|
1325
1391
|
if (dupe.score < 0.88) break;
|
package/src/workspace-migrate.ts
CHANGED
|
@@ -204,6 +204,7 @@ export async function migrateWorkspace(
|
|
|
204
204
|
content: file.content,
|
|
205
205
|
content_hash: simpleHash(file.content),
|
|
206
206
|
embedding,
|
|
207
|
+
embedding_provider: embedding ? embeddings.providerId : undefined,
|
|
207
208
|
tags: ["workspace-migration", fileType],
|
|
208
209
|
migrated_from: "openclaw-default",
|
|
209
210
|
},
|
|
@@ -459,6 +460,7 @@ async function ingestSkill(
|
|
|
459
460
|
name: skillName,
|
|
460
461
|
description,
|
|
461
462
|
embedding,
|
|
463
|
+
embedding_provider: embedding ? embeddings.providerId : undefined,
|
|
462
464
|
preconditions: preconditions.length > 0 ? preconditions.join("; ") : null,
|
|
463
465
|
steps: steps.length > 0 ? steps : null,
|
|
464
466
|
postconditions: null,
|
|
@@ -484,6 +486,7 @@ async function ingestSkill(
|
|
|
484
486
|
content: file.content,
|
|
485
487
|
content_hash: simpleHash(file.content),
|
|
486
488
|
embedding,
|
|
489
|
+
embedding_provider: embedding ? embeddings.providerId : undefined,
|
|
487
490
|
tags: ["workspace-migration", "skill", skillName],
|
|
488
491
|
migrated_from: "openclaw-default",
|
|
489
492
|
},
|