kongbrain 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +25 -18
- package/.github/workflows/pr-check.yml +4 -4
- package/SKILL.md +1 -1
- package/package.json +1 -1
- package/src/context-engine.ts +24 -2
- package/src/daemon-manager.ts +32 -15
- package/src/deferred-cleanup.ts +1 -1
- package/src/graph-context.ts +14 -7
- package/src/index.ts +23 -7
- package/src/memory-daemon.ts +1 -0
- package/src/schema.surql +3 -0
- package/src/supersedes.ts +99 -0
- package/src/surreal.ts +40 -0
package/.github/workflows/ci.yml
CHANGED
|
@@ -10,33 +10,40 @@ jobs:
|
|
|
10
10
|
test:
|
|
11
11
|
runs-on: ubuntu-latest
|
|
12
12
|
|
|
13
|
-
services:
|
|
14
|
-
surrealdb:
|
|
15
|
-
image: surrealdb/surrealdb:latest
|
|
16
|
-
ports:
|
|
17
|
-
- 8000:8000
|
|
18
|
-
options: >-
|
|
19
|
-
--health-cmd "curl -sf http://localhost:8000/health || exit 1"
|
|
20
|
-
--health-interval 5s
|
|
21
|
-
--health-timeout 5s
|
|
22
|
-
--health-retries 10
|
|
23
|
-
env:
|
|
24
|
-
SURREAL_USER: root
|
|
25
|
-
SURREAL_PASS: root
|
|
26
|
-
|
|
27
13
|
steps:
|
|
28
|
-
- uses: actions/checkout@
|
|
14
|
+
- uses: actions/checkout@v5
|
|
29
15
|
|
|
30
|
-
- uses: actions/setup-node@
|
|
16
|
+
- uses: actions/setup-node@v5
|
|
31
17
|
with:
|
|
32
|
-
node-version:
|
|
18
|
+
node-version: 22
|
|
33
19
|
cache: npm
|
|
34
20
|
|
|
35
|
-
-
|
|
21
|
+
- name: Start SurrealDB
|
|
22
|
+
run: |
|
|
23
|
+
docker run -d --name surrealdb -p 8000:8000 \
|
|
24
|
+
surrealdb/surrealdb:latest start \
|
|
25
|
+
--user root --pass root --bind 0.0.0.0:8000 memory
|
|
26
|
+
# Wait for it to be ready (up to 60s)
|
|
27
|
+
for i in $(seq 1 60); do
|
|
28
|
+
if curl -sf http://localhost:8000/health > /dev/null 2>&1; then
|
|
29
|
+
echo "SurrealDB ready after ${i}s"
|
|
30
|
+
break
|
|
31
|
+
fi
|
|
32
|
+
sleep 1
|
|
33
|
+
done
|
|
34
|
+
# Verify it's actually responding
|
|
35
|
+
curl -sf http://localhost:8000/health || (echo "SurrealDB failed to start" && exit 1)
|
|
36
|
+
|
|
37
|
+
- run: npm install --ignore-scripts --legacy-peer-deps
|
|
36
38
|
|
|
37
39
|
- name: Run unit tests
|
|
38
40
|
run: npx vitest run --exclude test/integration.test.ts
|
|
39
41
|
|
|
42
|
+
- name: Verify SurrealDB is running
|
|
43
|
+
run: |
|
|
44
|
+
docker logs surrealdb 2>&1 | tail -20
|
|
45
|
+
curl -sf http://localhost:8000/health && echo "Health OK" || echo "Health FAILED"
|
|
46
|
+
|
|
40
47
|
- name: Run integration tests
|
|
41
48
|
run: npx vitest run test/integration.test.ts
|
|
42
49
|
env:
|
|
@@ -6,11 +6,11 @@ jobs:
|
|
|
6
6
|
lint:
|
|
7
7
|
runs-on: ubuntu-latest
|
|
8
8
|
steps:
|
|
9
|
-
- uses: actions/checkout@
|
|
10
|
-
- uses: actions/setup-node@
|
|
9
|
+
- uses: actions/checkout@v5
|
|
10
|
+
- uses: actions/setup-node@v5
|
|
11
11
|
with:
|
|
12
|
-
node-version:
|
|
12
|
+
node-version: 22
|
|
13
13
|
cache: npm
|
|
14
|
-
- run: npm
|
|
14
|
+
- run: npm install --ignore-scripts --legacy-peer-deps
|
|
15
15
|
- run: npx tsc --noEmit || true # Type check (peer deps may be missing)
|
|
16
16
|
- run: npx vitest run --exclude test/integration.test.ts
|
package/SKILL.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: kongbrain
|
|
3
3
|
description: Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.4
|
|
5
5
|
homepage: https://github.com/42U/kongbrain
|
|
6
6
|
metadata:
|
|
7
7
|
openclaw:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "kongbrain",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.4",
|
|
4
4
|
"description": "Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
package/src/context-engine.ts
CHANGED
|
@@ -50,6 +50,7 @@ import { generateReflection } from "./reflection.js";
|
|
|
50
50
|
import { graduateCausalToSkills } from "./skills.js";
|
|
51
51
|
import { attemptGraduation, evolveSoul, checkStageTransition } from "./soul.js";
|
|
52
52
|
import { swallow } from "./errors.js";
|
|
53
|
+
import { log } from "./log.js";
|
|
53
54
|
|
|
54
55
|
/** OpenClaw ContextEngine backed by SurrealDB graph retrieval and BGE-M3 embeddings. */
|
|
55
56
|
export class KongBrainContextEngine implements ContextEngine {
|
|
@@ -449,11 +450,31 @@ export class KongBrainContextEngine implements ContextEngine {
|
|
|
449
450
|
prePromptMessageCount: number;
|
|
450
451
|
}): Promise<void> {
|
|
451
452
|
const sessionKey = params.sessionKey ?? params.sessionId;
|
|
452
|
-
|
|
453
|
-
|
|
453
|
+
log.debug(`afterTurn: session=${sessionKey} messages=${params.messages.length}`);
|
|
454
|
+
// Use getOrCreateSession so resumed sessions (where session_start
|
|
455
|
+
// didn't fire after a gateway restart) still get a session object.
|
|
456
|
+
const session = this.state.getOrCreateSession(sessionKey, params.sessionId);
|
|
454
457
|
|
|
455
458
|
const { store, embeddings } = this.state;
|
|
456
459
|
|
|
460
|
+
// Lazy daemon start: if session was resumed after gateway restart,
|
|
461
|
+
// session_start won't re-fire, so the daemon never started.
|
|
462
|
+
if (!session.daemon && typeof this.state.complete === "function") {
|
|
463
|
+
try {
|
|
464
|
+
session.daemon = startMemoryDaemon(
|
|
465
|
+
store,
|
|
466
|
+
embeddings,
|
|
467
|
+
session.sessionId,
|
|
468
|
+
this.state.complete,
|
|
469
|
+
this.state.config.thresholds.extractionTimeoutMs,
|
|
470
|
+
session.taskId,
|
|
471
|
+
session.projectId,
|
|
472
|
+
);
|
|
473
|
+
} catch (e) {
|
|
474
|
+
swallow.warn("afterTurn:lazyDaemonStart", e);
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
|
|
457
478
|
// Deferred cleanup: run once on first turn when complete() is available
|
|
458
479
|
if (session.userTurnCount <= 1 && typeof this.state.complete === "function") {
|
|
459
480
|
runDeferredCleanup(store, embeddings, this.state.complete)
|
|
@@ -503,6 +524,7 @@ export class KongBrainContextEngine implements ContextEngine {
|
|
|
503
524
|
// Flush to daemon when token threshold OR turn count threshold is reached
|
|
504
525
|
const tokenReady = session.newContentTokens >= session.daemonTokenThreshold;
|
|
505
526
|
const turnReady = session.userTurnCount >= session.lastDaemonFlushTurnCount + 3;
|
|
527
|
+
log.debug(`flush check: daemon=${!!session.daemon} tokenReady=${tokenReady} turnReady=${turnReady} turns=${session.userTurnCount}`);
|
|
506
528
|
if (session.daemon && (tokenReady || turnReady)) {
|
|
507
529
|
try {
|
|
508
530
|
const recentTurns = allSessionTurns.slice(-20);
|
package/src/daemon-manager.ts
CHANGED
|
@@ -36,7 +36,7 @@ export function startMemoryDaemon(
|
|
|
36
36
|
sharedEmbeddings: EmbeddingService,
|
|
37
37
|
sessionId: string,
|
|
38
38
|
complete: CompleteFn,
|
|
39
|
-
extractionTimeoutMs =
|
|
39
|
+
extractionTimeoutMs = 120_000,
|
|
40
40
|
taskId?: string,
|
|
41
41
|
projectId?: string,
|
|
42
42
|
): MemoryDaemon {
|
|
@@ -115,15 +115,25 @@ export function startMemoryDaemon(
|
|
|
115
115
|
outputFormat: { type: "json_schema", schema: extractionSchema },
|
|
116
116
|
});
|
|
117
117
|
|
|
118
|
-
|
|
118
|
+
let responseText = response.text;
|
|
119
|
+
|
|
120
|
+
// Sanitize: strip BOM, markdown fences, and trim
|
|
121
|
+
responseText = responseText.replace(/^\uFEFF/, "").trim();
|
|
122
|
+
const fenceMatch = responseText.match(/^```(?:json)?\s*\n([\s\S]*?)\n```\s*$/);
|
|
123
|
+
if (fenceMatch) responseText = fenceMatch[1].trim();
|
|
119
124
|
|
|
120
125
|
// With structured output the response should be valid JSON directly.
|
|
121
126
|
// Fall back to regex extraction if the provider doesn't support outputFormat.
|
|
122
127
|
let result: Record<string, any>;
|
|
123
128
|
try {
|
|
124
129
|
result = JSON.parse(responseText);
|
|
125
|
-
} catch {
|
|
126
|
-
|
|
130
|
+
} catch (parseErr) {
|
|
131
|
+
swallow.warn("daemon:parseDebug", new Error(
|
|
132
|
+
`JSON.parse failed: ${(parseErr as Error).message}; ` +
|
|
133
|
+
`len=${responseText.length}; first100=${JSON.stringify(responseText.slice(0, 100))}; ` +
|
|
134
|
+
`last100=${JSON.stringify(responseText.slice(-100))}`
|
|
135
|
+
));
|
|
136
|
+
const jsonMatch = responseText.match(/\{[\s\S]*\}/);
|
|
127
137
|
if (!jsonMatch) {
|
|
128
138
|
swallow.warn("daemon:noJson", new Error(`LLM response contained no JSON (${responseText.length} chars)`));
|
|
129
139
|
return;
|
|
@@ -131,21 +141,28 @@ export function startMemoryDaemon(
|
|
|
131
141
|
try {
|
|
132
142
|
result = JSON.parse(jsonMatch[0]);
|
|
133
143
|
} catch {
|
|
144
|
+
// Try fixing trailing commas
|
|
134
145
|
try {
|
|
135
146
|
result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
|
|
136
147
|
} catch {
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
148
|
+
// Try stripping control characters
|
|
149
|
+
try {
|
|
150
|
+
const cleaned = jsonMatch[0].replace(/[\x00-\x08\x0b\x0c\x0e-\x1f]/g, "");
|
|
151
|
+
result = JSON.parse(cleaned);
|
|
152
|
+
} catch {
|
|
153
|
+
result = {};
|
|
154
|
+
const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
|
|
155
|
+
for (const field of fields) {
|
|
156
|
+
const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
|
|
157
|
+
if (fieldMatch) {
|
|
158
|
+
try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
|
|
162
|
+
if (!PRIMARY_FIELDS.some(f => f in result)) {
|
|
163
|
+
swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
|
|
164
|
+
return;
|
|
143
165
|
}
|
|
144
|
-
}
|
|
145
|
-
const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
|
|
146
|
-
if (!PRIMARY_FIELDS.some(f => f in result)) {
|
|
147
|
-
swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
|
|
148
|
-
return;
|
|
149
166
|
}
|
|
150
167
|
}
|
|
151
168
|
}
|
package/src/deferred-cleanup.ts
CHANGED
|
@@ -104,7 +104,7 @@ async function processOrphanedSession(
|
|
|
104
104
|
|
|
105
105
|
try {
|
|
106
106
|
log.info(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
|
|
107
|
-
const LLM_CALL_TIMEOUT_MS =
|
|
107
|
+
const LLM_CALL_TIMEOUT_MS = 120_000;
|
|
108
108
|
const response = await Promise.race([
|
|
109
109
|
complete({
|
|
110
110
|
system: systemPrompt,
|
package/src/graph-context.ts
CHANGED
|
@@ -90,8 +90,8 @@ const CORE_MEMORY_SHARE = 0.155; // ~10k for core memory/directives
|
|
|
90
90
|
const TOOL_HISTORY_SHARE = 0.23; // ~15k for recent tool results
|
|
91
91
|
const CORE_MEMORY_TTL = 300_000;
|
|
92
92
|
const MAX_ITEM_CHARS = 1200; // ~350 tokens per item (matches claw-code MAX_INSTRUCTION_FILE_CHARS)
|
|
93
|
-
const MIN_RELEVANCE_SCORE = 0.
|
|
94
|
-
const MIN_COSINE = 0.
|
|
93
|
+
const MIN_RELEVANCE_SCORE = 0.40; // Floor for graph-scored results after WMR/ACAN (tuned: cosine-heavy weights produce lower absolute scores)
|
|
94
|
+
const MIN_COSINE = 0.35; // Minimum cosine similarity to consider a result (raised from 0.25)
|
|
95
95
|
|
|
96
96
|
// Deduplication thresholds
|
|
97
97
|
const DEDUP_COSINE_THRESHOLD = 0.88;
|
|
@@ -417,8 +417,8 @@ async function scoreResults(
|
|
|
417
417
|
const reflectionBoost = r.sessionId ? (reflectedSessions.has(r.sessionId) ? 1.0 : 0) : 0;
|
|
418
418
|
|
|
419
419
|
const finalScore =
|
|
420
|
-
0.
|
|
421
|
-
0.
|
|
420
|
+
0.35 * cosine + 0.18 * recency + 0.07 * importance +
|
|
421
|
+
0.02 * access + 0.10 * neighborBonus + 0.18 * provenUtility +
|
|
422
422
|
0.10 * reflectionBoost - utilityPenalty;
|
|
423
423
|
|
|
424
424
|
return { ...r, finalScore, fromNeighbor: neighborIds.has(r.id) };
|
|
@@ -1104,7 +1104,7 @@ async function graphTransformInner(
|
|
|
1104
1104
|
|
|
1105
1105
|
const currentIntent = config?.intent ?? "unknown";
|
|
1106
1106
|
const baseLimits = config?.vectorSearchLimits ?? {
|
|
1107
|
-
turn: 25, identity: 10, concept:
|
|
1107
|
+
turn: 25, identity: 10, concept: 35, memory: 20, artifact: 10,
|
|
1108
1108
|
};
|
|
1109
1109
|
// Scale search limits with context window — larger windows can use more results
|
|
1110
1110
|
const cwScale = Math.max(0.5, Math.min(2.0, contextWindow / 200_000));
|
|
@@ -1151,9 +1151,16 @@ async function graphTransformInner(
|
|
|
1151
1151
|
}
|
|
1152
1152
|
}
|
|
1153
1153
|
|
|
1154
|
-
// Vector search (cache miss path)
|
|
1154
|
+
// Vector search + tag-boosted retrieval (cache miss path, run in parallel)
|
|
1155
1155
|
recordPrefetchMiss();
|
|
1156
|
-
const
|
|
1156
|
+
const [vectorResults, tagResults] = await Promise.all([
|
|
1157
|
+
store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive()),
|
|
1158
|
+
store.tagBoostedConcepts(queryText, queryVec, 10).catch(e => { swallow.warn("graph-context:tagBoost", e); return [] as VectorSearchResult[]; }),
|
|
1159
|
+
]);
|
|
1160
|
+
// Merge: dedupe tag results against vector results, then combine
|
|
1161
|
+
const vectorIds = new Set(vectorResults.map(r => r.id));
|
|
1162
|
+
const uniqueTagResults = tagResults.filter(r => !vectorIds.has(r.id));
|
|
1163
|
+
const results = [...vectorResults, ...uniqueTagResults];
|
|
1157
1164
|
|
|
1158
1165
|
// Graph neighbor expansion
|
|
1159
1166
|
const topIds = results
|
package/src/index.ts
CHANGED
|
@@ -337,10 +337,8 @@ export default definePluginEntry({
|
|
|
337
337
|
}
|
|
338
338
|
|
|
339
339
|
const complete: CompleteFn = async (params) => {
|
|
340
|
-
//
|
|
341
|
-
|
|
342
|
-
return apiRef.runtime.complete(params);
|
|
343
|
-
}
|
|
340
|
+
// NOTE: runtime.complete exists in 2026.4.2 but fails for plugin-initiated
|
|
341
|
+
// calls with "Profile anthropic:default timed out" — use pi-ai directly instead.
|
|
344
342
|
if (!piAi) {
|
|
345
343
|
if (!piAiPath) {
|
|
346
344
|
throw new Error("LLM completion not available: @mariozechner/pi-ai not found and runtime.complete missing");
|
|
@@ -349,8 +347,20 @@ export default definePluginEntry({
|
|
|
349
347
|
}
|
|
350
348
|
// Fall back to calling pi-ai directly (runtime.complete not in OpenClaw 2026.3.24)
|
|
351
349
|
const provider = params.provider ?? apiRef.runtime.agent.defaults.provider;
|
|
352
|
-
const
|
|
353
|
-
|
|
350
|
+
const rawModel = params.model ?? apiRef.runtime.agent.defaults.model;
|
|
351
|
+
// defaults.model may be an object {primary: '...', fallbacks: []} — unwrap it
|
|
352
|
+
const modelIdRaw = typeof rawModel === 'object' && rawModel !== null
|
|
353
|
+
? (rawModel as any).primary ?? (rawModel as any).id ?? String(rawModel)
|
|
354
|
+
: rawModel;
|
|
355
|
+
// modelId may be "provider/model" format — split if provider not set
|
|
356
|
+
let resolvedProvider = provider;
|
|
357
|
+
let modelId = modelIdRaw;
|
|
358
|
+
if (typeof modelId === 'string' && modelId.includes('/') && !resolvedProvider) {
|
|
359
|
+
const idx = modelId.indexOf('/');
|
|
360
|
+
resolvedProvider = modelId.slice(0, idx);
|
|
361
|
+
modelId = modelId.slice(idx + 1);
|
|
362
|
+
}
|
|
363
|
+
const model = piAi!.getModel(resolvedProvider, modelId);
|
|
354
364
|
if (!model) {
|
|
355
365
|
throw new Error(`Model "${modelId}" not found for provider "${provider}"`);
|
|
356
366
|
}
|
|
@@ -369,10 +379,16 @@ export default definePluginEntry({
|
|
|
369
379
|
);
|
|
370
380
|
const context = { systemPrompt: params.system, messages };
|
|
371
381
|
// Pass apiKey directly in options so the provider can use it
|
|
382
|
+
log.info(`complete(): provider=${resolvedProvider} model=${modelId} msgs=${params.messages.length}`);
|
|
383
|
+
// NOTE: outputFormat (structured output) is intentionally NOT passed to pi-ai.
|
|
384
|
+
// pi-ai's SimpleStreamOptions doesn't support it, and injecting it via onPayload
|
|
385
|
+
// causes the Anthropic API to return empty responses. The daemon's JSON parsing
|
|
386
|
+
// cascade (direct parse → greedy regex → trailing comma fix → field-by-field)
|
|
387
|
+
// handles free-text JSON extraction reliably without structured output.
|
|
372
388
|
const response = await piAi!.completeSimple(model, context, {
|
|
373
389
|
apiKey: auth.apiKey,
|
|
374
|
-
...(params.outputFormat && { outputFormat: params.outputFormat }),
|
|
375
390
|
});
|
|
391
|
+
log.info(`complete(): blocks=${response.content?.length} stop=${response.stopReason}`);
|
|
376
392
|
let text = "";
|
|
377
393
|
let thinking: string | undefined;
|
|
378
394
|
for (const block of response.content) {
|
package/src/memory-daemon.ts
CHANGED
|
@@ -14,6 +14,7 @@ import type { EmbeddingService } from "./embeddings.js";
|
|
|
14
14
|
import { swallow } from "./errors.js";
|
|
15
15
|
import { assertRecordId } from "./surreal.js";
|
|
16
16
|
import { linkConceptHierarchy, linkToRelevantConcepts } from "./concept-extract.js";
|
|
17
|
+
import { linkSupersedesEdges } from "./supersedes.js";
|
|
17
18
|
|
|
18
19
|
// --- Build the extraction prompt ---
|
|
19
20
|
|
package/src/schema.surql
CHANGED
|
@@ -153,6 +153,9 @@ DEFINE TABLE IF NOT EXISTS supports TYPE RELATION IN memory OUT memory;
|
|
|
153
153
|
DEFINE TABLE IF NOT EXISTS contradicts TYPE RELATION IN memory OUT memory;
|
|
154
154
|
DEFINE TABLE IF NOT EXISTS describes TYPE RELATION IN memory OUT memory;
|
|
155
155
|
|
|
156
|
+
-- Concept evolution
|
|
157
|
+
DEFINE TABLE IF NOT EXISTS supersedes TYPE RELATION IN memory OUT concept;
|
|
158
|
+
|
|
156
159
|
-- Cross-pillar links
|
|
157
160
|
DEFINE TABLE IF NOT EXISTS about_concept TYPE RELATION IN memory OUT concept;
|
|
158
161
|
DEFINE TABLE IF NOT EXISTS artifact_mentions TYPE RELATION IN artifact OUT concept;
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Supersedes — concept evolution tracking.
|
|
3
|
+
*
|
|
4
|
+
* When the daemon extracts a correction (user correcting the assistant),
|
|
5
|
+
* this module finds the concept(s) that contained the stale knowledge
|
|
6
|
+
* and creates `supersedes` edges from the correction memory to those
|
|
7
|
+
* concepts, decaying their stability so they lose priority in recall.
|
|
8
|
+
*
|
|
9
|
+
* Edge direction: correction_memory -> supersedes -> stale_concept
|
|
10
|
+
*
|
|
11
|
+
* This ensures that:
|
|
12
|
+
* 1. Stale knowledge doesn't win over corrections in retrieval
|
|
13
|
+
* 2. The graph records *why* a concept was deprecated
|
|
14
|
+
* 3. Stability decay is proportional to correction confidence
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import type { SurrealStore } from "./surreal.js";
|
|
18
|
+
import type { EmbeddingService } from "./embeddings.js";
|
|
19
|
+
import { swallow } from "./errors.js";
|
|
20
|
+
|
|
21
|
+
/** Minimum cosine similarity to consider a concept as the target of a correction. */
|
|
22
|
+
const SUPERSEDE_THRESHOLD = 0.70;
|
|
23
|
+
|
|
24
|
+
/** How much to decay stability of superseded concepts (multiplicative). */
|
|
25
|
+
const STABILITY_DECAY_FACTOR = 0.4;
|
|
26
|
+
|
|
27
|
+
/** Floor — don't decay below this so the concept remains discoverable. */
|
|
28
|
+
const STABILITY_FLOOR = 0.15;
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Find concepts that match the "original" (wrong) statement in a correction,
|
|
32
|
+
* create supersedes edges, and decay their stability.
|
|
33
|
+
*
|
|
34
|
+
* @param correctionMemId - The memory:xxx record ID of the correction
|
|
35
|
+
* @param originalText - The "original" (incorrect) text from the correction
|
|
36
|
+
* @param correctionText - The "corrected" (right) text from the correction
|
|
37
|
+
* @param store - SurrealDB store
|
|
38
|
+
* @param embeddings - Embedding service
|
|
39
|
+
* @param precomputedVec - Optional pre-computed embedding of the full correction text
|
|
40
|
+
*/
|
|
41
|
+
export async function linkSupersedesEdges(
|
|
42
|
+
correctionMemId: string,
|
|
43
|
+
originalText: string,
|
|
44
|
+
correctionText: string,
|
|
45
|
+
store: SurrealStore,
|
|
46
|
+
embeddings: EmbeddingService,
|
|
47
|
+
precomputedVec?: number[] | null,
|
|
48
|
+
): Promise<number> {
|
|
49
|
+
if (!embeddings.isAvailable() || !originalText) return 0;
|
|
50
|
+
|
|
51
|
+
let supersededCount = 0;
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
// Embed the *original* (wrong) text — that's what we're looking for in the graph
|
|
55
|
+
const originalVec = await embeddings.embed(originalText);
|
|
56
|
+
if (!originalVec?.length) return 0;
|
|
57
|
+
|
|
58
|
+
// Find concepts whose content is semantically similar to the wrong statement
|
|
59
|
+
// Pre-filter: skip already-superseded or floored concepts to avoid redundant work
|
|
60
|
+
const candidates = await store.queryFirst<{ id: string; score: number; stability: number }>(
|
|
61
|
+
`SELECT id, vector::similarity::cosine(embedding, $vec) AS score, stability
|
|
62
|
+
FROM concept
|
|
63
|
+
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
64
|
+
AND superseded_at IS NONE
|
|
65
|
+
AND stability > $floor
|
|
66
|
+
ORDER BY score DESC
|
|
67
|
+
LIMIT 5`,
|
|
68
|
+
{ vec: originalVec, floor: STABILITY_FLOOR },
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
for (const candidate of candidates) {
|
|
72
|
+
if (candidate.score < SUPERSEDE_THRESHOLD) break;
|
|
73
|
+
|
|
74
|
+
const conceptId = String(candidate.id);
|
|
75
|
+
|
|
76
|
+
// Create supersedes edge: correction -> supersedes -> stale concept
|
|
77
|
+
await store.relate(correctionMemId, "supersedes", conceptId)
|
|
78
|
+
.catch(e => swallow("supersedes:relate", e));
|
|
79
|
+
|
|
80
|
+
// Decay stability of the stale concept
|
|
81
|
+
const currentStability = candidate.stability ?? 1.0;
|
|
82
|
+
const newStability = Math.max(
|
|
83
|
+
STABILITY_FLOOR,
|
|
84
|
+
currentStability * STABILITY_DECAY_FACTOR,
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
await store.queryExec(
|
|
88
|
+
`UPDATE $conceptId SET stability = $newStability, superseded_at = time::now(), superseded_by = $correctionId`,
|
|
89
|
+
{ conceptId, newStability, correctionId: correctionMemId },
|
|
90
|
+
).catch(e => swallow("supersedes:decay", e));
|
|
91
|
+
|
|
92
|
+
supersededCount++;
|
|
93
|
+
}
|
|
94
|
+
} catch (e) {
|
|
95
|
+
swallow("supersedes:link", e);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return supersededCount;
|
|
99
|
+
}
|
package/src/surreal.ts
CHANGED
|
@@ -67,6 +67,8 @@ const VALID_EDGES = new Set([
|
|
|
67
67
|
"produced", "derived_from", "relevant_to", "used_in", "artifact_mentions",
|
|
68
68
|
// Causal edges
|
|
69
69
|
"caused_by", "supports", "contradicts", "describes",
|
|
70
|
+
// Evolution edges
|
|
71
|
+
"supersedes",
|
|
70
72
|
// Session edges
|
|
71
73
|
"part_of",
|
|
72
74
|
]);
|
|
@@ -592,6 +594,44 @@ export class SurrealStore {
|
|
|
592
594
|
* BFS expansion from seed nodes along typed edges, with batched per-hop queries.
|
|
593
595
|
* Each edge query is LIMIT 3 (EDGE_NEIGHBOR_LIMIT) to bound fan-out per node.
|
|
594
596
|
*/
|
|
597
|
+
/**
|
|
598
|
+
* Tag-boosted concept retrieval: extract keywords from query text,
|
|
599
|
+
* find concepts tagged with matching terms, score by cosine similarity.
|
|
600
|
+
* Returns concepts that pure vector search might miss due to embedding mismatch.
|
|
601
|
+
*/
|
|
602
|
+
async tagBoostedConcepts(
|
|
603
|
+
queryText: string,
|
|
604
|
+
queryVec: number[],
|
|
605
|
+
limit = 10,
|
|
606
|
+
): Promise<VectorSearchResult[]> {
|
|
607
|
+
// Extract candidate tags from query — lowercase, deduplicate
|
|
608
|
+
const stopwords = new Set(["the","a","an","is","are","was","were","be","been","being","have","has","had","do","does","did","will","would","could","should","may","might","can","shall","to","of","in","for","on","with","at","by","from","as","into","about","between","through","during","it","its","this","that","these","those","i","you","we","they","my","your","our","their","what","which","who","how","when","where","why","not","no","and","or","but","if","so","any","all","some","more","just","also","than","very","too","much","many"]);
|
|
609
|
+
const words = queryText.toLowerCase().replace(/[^a-z0-9\s-]/g, "").split(/\s+/)
|
|
610
|
+
.filter(w => w.length > 2 && !stopwords.has(w));
|
|
611
|
+
if (words.length === 0) return [];
|
|
612
|
+
|
|
613
|
+
// Build tag match condition — match any tag that contains a query word
|
|
614
|
+
const tagConditions = words.slice(0, 8).map(w => `tags CONTAINS '${w.replace(/'/g, "")}'`).join(" OR ");
|
|
615
|
+
|
|
616
|
+
try {
|
|
617
|
+
const rows = await this.queryFirst<any>(
|
|
618
|
+
`SELECT id, content AS text, stability AS importance, access_count AS accessCount,
|
|
619
|
+
created_at AS timestamp, 'concept' AS table,
|
|
620
|
+
vector::similarity::cosine(embedding, $vec) AS score
|
|
621
|
+
FROM concept
|
|
622
|
+
WHERE embedding != NONE AND array::len(embedding) > 0
|
|
623
|
+
AND (${tagConditions})
|
|
624
|
+
ORDER BY score DESC
|
|
625
|
+
LIMIT $limit`,
|
|
626
|
+
{ vec: queryVec, limit },
|
|
627
|
+
);
|
|
628
|
+
return rows as VectorSearchResult[];
|
|
629
|
+
} catch (e) {
|
|
630
|
+
swallow.warn("surreal:tagBoostedConcepts", e);
|
|
631
|
+
return [];
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
595
635
|
async graphExpand(
|
|
596
636
|
nodeIds: string[],
|
|
597
637
|
queryVec: number[],
|