kongbrain 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: kongbrain
3
3
  description: Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  homepage: https://github.com/42U/kongbrain
6
6
  metadata:
7
7
  openclaw:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kongbrain",
3
- "version": "0.4.2",
3
+ "version": "0.4.3",
4
4
  "description": "Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -50,6 +50,7 @@ import { generateReflection } from "./reflection.js";
50
50
  import { graduateCausalToSkills } from "./skills.js";
51
51
  import { attemptGraduation, evolveSoul, checkStageTransition } from "./soul.js";
52
52
  import { swallow } from "./errors.js";
53
+ import { log } from "./log.js";
53
54
 
54
55
  /** OpenClaw ContextEngine backed by SurrealDB graph retrieval and BGE-M3 embeddings. */
55
56
  export class KongBrainContextEngine implements ContextEngine {
@@ -449,11 +450,31 @@ export class KongBrainContextEngine implements ContextEngine {
449
450
  prePromptMessageCount: number;
450
451
  }): Promise<void> {
451
452
  const sessionKey = params.sessionKey ?? params.sessionId;
452
- const session = this.state.getSession(sessionKey);
453
- if (!session) return;
453
+ log.debug(`afterTurn: session=${sessionKey} messages=${params.messages.length}`);
454
+ // Use getOrCreateSession so resumed sessions (where session_start
455
+ // didn't fire after a gateway restart) still get a session object.
456
+ const session = this.state.getOrCreateSession(sessionKey, params.sessionId);
454
457
 
455
458
  const { store, embeddings } = this.state;
456
459
 
460
+ // Lazy daemon start: if session was resumed after gateway restart,
461
+ // session_start won't re-fire, so the daemon never started.
462
+ if (!session.daemon && typeof this.state.complete === "function") {
463
+ try {
464
+ session.daemon = startMemoryDaemon(
465
+ store,
466
+ embeddings,
467
+ session.sessionId,
468
+ this.state.complete,
469
+ this.state.config.thresholds.extractionTimeoutMs,
470
+ session.taskId,
471
+ session.projectId,
472
+ );
473
+ } catch (e) {
474
+ swallow.warn("afterTurn:lazyDaemonStart", e);
475
+ }
476
+ }
477
+
457
478
  // Deferred cleanup: run once on first turn when complete() is available
458
479
  if (session.userTurnCount <= 1 && typeof this.state.complete === "function") {
459
480
  runDeferredCleanup(store, embeddings, this.state.complete)
@@ -503,6 +524,7 @@ export class KongBrainContextEngine implements ContextEngine {
503
524
  // Flush to daemon when token threshold OR turn count threshold is reached
504
525
  const tokenReady = session.newContentTokens >= session.daemonTokenThreshold;
505
526
  const turnReady = session.userTurnCount >= session.lastDaemonFlushTurnCount + 3;
527
+ log.debug(`flush check: daemon=${!!session.daemon} tokenReady=${tokenReady} turnReady=${turnReady} turns=${session.userTurnCount}`);
506
528
  if (session.daemon && (tokenReady || turnReady)) {
507
529
  try {
508
530
  const recentTurns = allSessionTurns.slice(-20);
@@ -36,7 +36,7 @@ export function startMemoryDaemon(
36
36
  sharedEmbeddings: EmbeddingService,
37
37
  sessionId: string,
38
38
  complete: CompleteFn,
39
- extractionTimeoutMs = 60_000,
39
+ extractionTimeoutMs = 120_000,
40
40
  taskId?: string,
41
41
  projectId?: string,
42
42
  ): MemoryDaemon {
@@ -115,15 +115,25 @@ export function startMemoryDaemon(
115
115
  outputFormat: { type: "json_schema", schema: extractionSchema },
116
116
  });
117
117
 
118
- const responseText = response.text;
118
+ let responseText = response.text;
119
+
120
+ // Sanitize: strip BOM, markdown fences, and trim
121
+ responseText = responseText.replace(/^\uFEFF/, "").trim();
122
+ const fenceMatch = responseText.match(/^```(?:json)?\s*\n([\s\S]*?)\n```\s*$/);
123
+ if (fenceMatch) responseText = fenceMatch[1].trim();
119
124
 
120
125
  // With structured output the response should be valid JSON directly.
121
126
  // Fall back to regex extraction if the provider doesn't support outputFormat.
122
127
  let result: Record<string, any>;
123
128
  try {
124
129
  result = JSON.parse(responseText);
125
- } catch {
126
- const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
130
+ } catch (parseErr) {
131
+ swallow.warn("daemon:parseDebug", new Error(
132
+ `JSON.parse failed: ${(parseErr as Error).message}; ` +
133
+ `len=${responseText.length}; first100=${JSON.stringify(responseText.slice(0, 100))}; ` +
134
+ `last100=${JSON.stringify(responseText.slice(-100))}`
135
+ ));
136
+ const jsonMatch = responseText.match(/\{[\s\S]*\}/);
127
137
  if (!jsonMatch) {
128
138
  swallow.warn("daemon:noJson", new Error(`LLM response contained no JSON (${responseText.length} chars)`));
129
139
  return;
@@ -131,21 +141,28 @@ export function startMemoryDaemon(
131
141
  try {
132
142
  result = JSON.parse(jsonMatch[0]);
133
143
  } catch {
144
+ // Try fixing trailing commas
134
145
  try {
135
146
  result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
136
147
  } catch {
137
- result = {};
138
- const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
139
- for (const field of fields) {
140
- const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
141
- if (fieldMatch) {
142
- try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
148
+ // Try stripping control characters
149
+ try {
150
+ const cleaned = jsonMatch[0].replace(/[\x00-\x08\x0b\x0c\x0e-\x1f]/g, "");
151
+ result = JSON.parse(cleaned);
152
+ } catch {
153
+ result = {};
154
+ const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
155
+ for (const field of fields) {
156
+ const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
157
+ if (fieldMatch) {
158
+ try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
159
+ }
160
+ }
161
+ const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
162
+ if (!PRIMARY_FIELDS.some(f => f in result)) {
163
+ swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
164
+ return;
143
165
  }
144
- }
145
- const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
146
- if (!PRIMARY_FIELDS.some(f => f in result)) {
147
- swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
148
- return;
149
166
  }
150
167
  }
151
168
  }
@@ -104,7 +104,7 @@ async function processOrphanedSession(
104
104
 
105
105
  try {
106
106
  log.info(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
107
- const LLM_CALL_TIMEOUT_MS = 30_000;
107
+ const LLM_CALL_TIMEOUT_MS = 120_000;
108
108
  const response = await Promise.race([
109
109
  complete({
110
110
  system: systemPrompt,
@@ -90,8 +90,8 @@ const CORE_MEMORY_SHARE = 0.155; // ~10k for core memory/directives
90
90
  const TOOL_HISTORY_SHARE = 0.23; // ~15k for recent tool results
91
91
  const CORE_MEMORY_TTL = 300_000;
92
92
  const MAX_ITEM_CHARS = 1200; // ~350 tokens per item (matches claw-code MAX_INSTRUCTION_FILE_CHARS)
93
- const MIN_RELEVANCE_SCORE = 0.35; // Floor for graph-scored results after WMR/ACAN
94
- const MIN_COSINE = 0.25; // Minimum cosine similarity to consider a result
93
+ const MIN_RELEVANCE_SCORE = 0.40; // Floor for graph-scored results after WMR/ACAN (tuned: cosine-heavy weights produce lower absolute scores)
94
+ const MIN_COSINE = 0.35; // Minimum cosine similarity to consider a result (raised from 0.25)
95
95
 
96
96
  // Deduplication thresholds
97
97
  const DEDUP_COSINE_THRESHOLD = 0.88;
@@ -417,8 +417,8 @@ async function scoreResults(
417
417
  const reflectionBoost = r.sessionId ? (reflectedSessions.has(r.sessionId) ? 1.0 : 0) : 0;
418
418
 
419
419
  const finalScore =
420
- 0.27 * cosine + 0.28 * recency + 0.05 * importance +
421
- 0.05 * access + 0.10 * neighborBonus + 0.15 * provenUtility +
420
+ 0.35 * cosine + 0.18 * recency + 0.07 * importance +
421
+ 0.02 * access + 0.10 * neighborBonus + 0.18 * provenUtility +
422
422
  0.10 * reflectionBoost - utilityPenalty;
423
423
 
424
424
  return { ...r, finalScore, fromNeighbor: neighborIds.has(r.id) };
@@ -1104,7 +1104,7 @@ async function graphTransformInner(
1104
1104
 
1105
1105
  const currentIntent = config?.intent ?? "unknown";
1106
1106
  const baseLimits = config?.vectorSearchLimits ?? {
1107
- turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
1107
+ turn: 25, identity: 10, concept: 35, memory: 20, artifact: 10,
1108
1108
  };
1109
1109
  // Scale search limits with context window — larger windows can use more results
1110
1110
  const cwScale = Math.max(0.5, Math.min(2.0, contextWindow / 200_000));
@@ -1151,9 +1151,16 @@ async function graphTransformInner(
1151
1151
  }
1152
1152
  }
1153
1153
 
1154
- // Vector search (cache miss path)
1154
+ // Vector search + tag-boosted retrieval (cache miss path, run in parallel)
1155
1155
  recordPrefetchMiss();
1156
- const results = await store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive());
1156
+ const [vectorResults, tagResults] = await Promise.all([
1157
+ store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive()),
1158
+ store.tagBoostedConcepts(queryText, queryVec, 10).catch(e => { swallow.warn("graph-context:tagBoost", e); return [] as VectorSearchResult[]; }),
1159
+ ]);
1160
+ // Merge: dedupe tag results against vector results, then combine
1161
+ const vectorIds = new Set(vectorResults.map(r => r.id));
1162
+ const uniqueTagResults = tagResults.filter(r => !vectorIds.has(r.id));
1163
+ const results = [...vectorResults, ...uniqueTagResults];
1157
1164
 
1158
1165
  // Graph neighbor expansion
1159
1166
  const topIds = results
package/src/index.ts CHANGED
@@ -337,10 +337,8 @@ export default definePluginEntry({
337
337
  }
338
338
 
339
339
  const complete: CompleteFn = async (params) => {
340
- // Try runtime.complete first (future-proof for when it ships)
341
- if (typeof apiRef.runtime?.complete === "function") {
342
- return apiRef.runtime.complete(params);
343
- }
340
+ // NOTE: runtime.complete exists in 2026.4.2 but fails for plugin-initiated
341
+ // calls with "Profile anthropic:default timed out" — use pi-ai directly instead.
344
342
  if (!piAi) {
345
343
  if (!piAiPath) {
346
344
  throw new Error("LLM completion not available: @mariozechner/pi-ai not found and runtime.complete missing");
@@ -349,8 +347,20 @@ export default definePluginEntry({
349
347
  }
350
348
  // Fall back to calling pi-ai directly (runtime.complete not in OpenClaw 2026.3.24)
351
349
  const provider = params.provider ?? apiRef.runtime.agent.defaults.provider;
352
- const modelId = params.model ?? apiRef.runtime.agent.defaults.model;
353
- const model = piAi!.getModel(provider, modelId);
350
+ const rawModel = params.model ?? apiRef.runtime.agent.defaults.model;
351
+ // defaults.model may be an object {primary: '...', fallbacks: []} — unwrap it
352
+ const modelIdRaw = typeof rawModel === 'object' && rawModel !== null
353
+ ? (rawModel as any).primary ?? (rawModel as any).id ?? String(rawModel)
354
+ : rawModel;
355
+ // modelId may be "provider/model" format — split if provider not set
356
+ let resolvedProvider = provider;
357
+ let modelId = modelIdRaw;
358
+ if (typeof modelId === 'string' && modelId.includes('/') && !resolvedProvider) {
359
+ const idx = modelId.indexOf('/');
360
+ resolvedProvider = modelId.slice(0, idx);
361
+ modelId = modelId.slice(idx + 1);
362
+ }
363
+ const model = piAi!.getModel(resolvedProvider, modelId);
354
364
  if (!model) {
355
365
  throw new Error(`Model "${modelId}" not found for provider "${provider}"`);
356
366
  }
@@ -369,10 +379,16 @@ export default definePluginEntry({
369
379
  );
370
380
  const context = { systemPrompt: params.system, messages };
371
381
  // Pass apiKey directly in options so the provider can use it
382
+ log.info(`complete(): provider=${resolvedProvider} model=${modelId} msgs=${params.messages.length}`);
383
+ // NOTE: outputFormat (structured output) is intentionally NOT passed to pi-ai.
384
+ // pi-ai's SimpleStreamOptions doesn't support it, and injecting it via onPayload
385
+ // causes the Anthropic API to return empty responses. The daemon's JSON parsing
386
+ // cascade (direct parse → greedy regex → trailing comma fix → field-by-field)
387
+ // handles free-text JSON extraction reliably without structured output.
372
388
  const response = await piAi!.completeSimple(model, context, {
373
389
  apiKey: auth.apiKey,
374
- ...(params.outputFormat && { outputFormat: params.outputFormat }),
375
390
  });
391
+ log.info(`complete(): blocks=${response.content?.length} stop=${response.stopReason}`);
376
392
  let text = "";
377
393
  let thinking: string | undefined;
378
394
  for (const block of response.content) {
@@ -14,6 +14,7 @@ import type { EmbeddingService } from "./embeddings.js";
14
14
  import { swallow } from "./errors.js";
15
15
  import { assertRecordId } from "./surreal.js";
16
16
  import { linkConceptHierarchy, linkToRelevantConcepts } from "./concept-extract.js";
17
+ import { linkSupersedesEdges } from "./supersedes.js";
17
18
 
18
19
  // --- Build the extraction prompt ---
19
20
 
package/src/schema.surql CHANGED
@@ -153,6 +153,9 @@ DEFINE TABLE IF NOT EXISTS supports TYPE RELATION IN memory OUT memory;
153
153
  DEFINE TABLE IF NOT EXISTS contradicts TYPE RELATION IN memory OUT memory;
154
154
  DEFINE TABLE IF NOT EXISTS describes TYPE RELATION IN memory OUT memory;
155
155
 
156
+ -- Concept evolution
157
+ DEFINE TABLE IF NOT EXISTS supersedes TYPE RELATION IN memory OUT concept;
158
+
156
159
  -- Cross-pillar links
157
160
  DEFINE TABLE IF NOT EXISTS about_concept TYPE RELATION IN memory OUT concept;
158
161
  DEFINE TABLE IF NOT EXISTS artifact_mentions TYPE RELATION IN artifact OUT concept;
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Supersedes — concept evolution tracking.
3
+ *
4
+ * When the daemon extracts a correction (user correcting the assistant),
5
+ * this module finds the concept(s) that contained the stale knowledge
6
+ * and creates `supersedes` edges from the correction memory to those
7
+ * concepts, decaying their stability so they lose priority in recall.
8
+ *
9
+ * Edge direction: correction_memory -> supersedes -> stale_concept
10
+ *
11
+ * This ensures that:
12
+ * 1. Stale knowledge doesn't win over corrections in retrieval
13
+ * 2. The graph records *why* a concept was deprecated
14
+ * 3. Stability decay is proportional to correction confidence
15
+ */
16
+
17
+ import type { SurrealStore } from "./surreal.js";
18
+ import type { EmbeddingService } from "./embeddings.js";
19
+ import { swallow } from "./errors.js";
20
+
21
+ /** Minimum cosine similarity to consider a concept as the target of a correction. */
22
+ const SUPERSEDE_THRESHOLD = 0.70;
23
+
24
+ /** How much to decay stability of superseded concepts (multiplicative). */
25
+ const STABILITY_DECAY_FACTOR = 0.4;
26
+
27
+ /** Floor — don't decay below this so the concept remains discoverable. */
28
+ const STABILITY_FLOOR = 0.15;
29
+
30
+ /**
31
+ * Find concepts that match the "original" (wrong) statement in a correction,
32
+ * create supersedes edges, and decay their stability.
33
+ *
34
+ * @param correctionMemId - The memory:xxx record ID of the correction
35
+ * @param originalText - The "original" (incorrect) text from the correction
36
+ * @param correctionText - The "corrected" (right) text from the correction
37
+ * @param store - SurrealDB store
38
+ * @param embeddings - Embedding service
39
+ * @param precomputedVec - Optional pre-computed embedding of the full correction text
40
+ */
41
+ export async function linkSupersedesEdges(
42
+ correctionMemId: string,
43
+ originalText: string,
44
+ correctionText: string,
45
+ store: SurrealStore,
46
+ embeddings: EmbeddingService,
47
+ precomputedVec?: number[] | null,
48
+ ): Promise<number> {
49
+ if (!embeddings.isAvailable() || !originalText) return 0;
50
+
51
+ let supersededCount = 0;
52
+
53
+ try {
54
+ // Embed the *original* (wrong) text — that's what we're looking for in the graph
55
+ const originalVec = await embeddings.embed(originalText);
56
+ if (!originalVec?.length) return 0;
57
+
58
+ // Find concepts whose content is semantically similar to the wrong statement
59
+ // Pre-filter: skip already-superseded or floored concepts to avoid redundant work
60
+ const candidates = await store.queryFirst<{ id: string; score: number; stability: number }>(
61
+ `SELECT id, vector::similarity::cosine(embedding, $vec) AS score, stability
62
+ FROM concept
63
+ WHERE embedding != NONE AND array::len(embedding) > 0
64
+ AND superseded_at IS NONE
65
+ AND stability > $floor
66
+ ORDER BY score DESC
67
+ LIMIT 5`,
68
+ { vec: originalVec, floor: STABILITY_FLOOR },
69
+ );
70
+
71
+ for (const candidate of candidates) {
72
+ if (candidate.score < SUPERSEDE_THRESHOLD) break;
73
+
74
+ const conceptId = String(candidate.id);
75
+
76
+ // Create supersedes edge: correction -> supersedes -> stale concept
77
+ await store.relate(correctionMemId, "supersedes", conceptId)
78
+ .catch(e => swallow("supersedes:relate", e));
79
+
80
+ // Decay stability of the stale concept
81
+ const currentStability = candidate.stability ?? 1.0;
82
+ const newStability = Math.max(
83
+ STABILITY_FLOOR,
84
+ currentStability * STABILITY_DECAY_FACTOR,
85
+ );
86
+
87
+ await store.queryExec(
88
+ `UPDATE $conceptId SET stability = $newStability, superseded_at = time::now(), superseded_by = $correctionId`,
89
+ { conceptId, newStability, correctionId: correctionMemId },
90
+ ).catch(e => swallow("supersedes:decay", e));
91
+
92
+ supersededCount++;
93
+ }
94
+ } catch (e) {
95
+ swallow("supersedes:link", e);
96
+ }
97
+
98
+ return supersededCount;
99
+ }
package/src/surreal.ts CHANGED
@@ -67,6 +67,8 @@ const VALID_EDGES = new Set([
67
67
  "produced", "derived_from", "relevant_to", "used_in", "artifact_mentions",
68
68
  // Causal edges
69
69
  "caused_by", "supports", "contradicts", "describes",
70
+ // Evolution edges
71
+ "supersedes",
70
72
  // Session edges
71
73
  "part_of",
72
74
  ]);
@@ -592,6 +594,44 @@ export class SurrealStore {
592
594
  * BFS expansion from seed nodes along typed edges, with batched per-hop queries.
593
595
  * Each edge query is LIMIT 3 (EDGE_NEIGHBOR_LIMIT) to bound fan-out per node.
594
596
  */
597
+ /**
598
+ * Tag-boosted concept retrieval: extract keywords from query text,
599
+ * find concepts tagged with matching terms, score by cosine similarity.
600
+ * Returns concepts that pure vector search might miss due to embedding mismatch.
601
+ */
602
+ async tagBoostedConcepts(
603
+ queryText: string,
604
+ queryVec: number[],
605
+ limit = 10,
606
+ ): Promise<VectorSearchResult[]> {
607
+ // Extract candidate tags from query — lowercase, deduplicate
608
+ const stopwords = new Set(["the","a","an","is","are","was","were","be","been","being","have","has","had","do","does","did","will","would","could","should","may","might","can","shall","to","of","in","for","on","with","at","by","from","as","into","about","between","through","during","it","its","this","that","these","those","i","you","we","they","my","your","our","their","what","which","who","how","when","where","why","not","no","and","or","but","if","so","any","all","some","more","just","also","than","very","too","much","many"]);
609
+ const words = queryText.toLowerCase().replace(/[^a-z0-9\s-]/g, "").split(/\s+/)
610
+ .filter(w => w.length > 2 && !stopwords.has(w));
611
+ if (words.length === 0) return [];
612
+
613
+ // Build tag match condition — match any tag that contains a query word
614
+ const tagConditions = words.slice(0, 8).map(w => `tags CONTAINS '${w.replace(/'/g, "")}'`).join(" OR ");
615
+
616
+ try {
617
+ const rows = await this.queryFirst<any>(
618
+ `SELECT id, content AS text, stability AS importance, access_count AS accessCount,
619
+ created_at AS timestamp, 'concept' AS table,
620
+ vector::similarity::cosine(embedding, $vec) AS score
621
+ FROM concept
622
+ WHERE embedding != NONE AND array::len(embedding) > 0
623
+ AND (${tagConditions})
624
+ ORDER BY score DESC
625
+ LIMIT $limit`,
626
+ { vec: queryVec, limit },
627
+ );
628
+ return rows as VectorSearchResult[];
629
+ } catch (e) {
630
+ swallow.warn("surreal:tagBoostedConcepts", e);
631
+ return [];
632
+ }
633
+ }
634
+
595
635
  async graphExpand(
596
636
  nodeIds: string[],
597
637
  queryVec: number[],