npm - kongbrain - Versions diffs - 0.4.2 → 0.4.3 - Mend

kongbrain 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/SKILL.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: kongbrain
 description: Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.
-version: 0.4.2
+version: 0.4.3
 homepage: https://github.com/42U/kongbrain
 metadata:
   openclaw:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kongbrain",
-  "version": "0.4.2",
+  "version": "0.4.3",
   "description": "Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.",
   "type": "module",
   "license": "MIT",

package/src/context-engine.ts CHANGED Viewed

@@ -50,6 +50,7 @@ import { generateReflection } from "./reflection.js";
 import { graduateCausalToSkills } from "./skills.js";
 import { attemptGraduation, evolveSoul, checkStageTransition } from "./soul.js";
 import { swallow } from "./errors.js";
+import { log } from "./log.js";
 /** OpenClaw ContextEngine backed by SurrealDB graph retrieval and BGE-M3 embeddings. */
 export class KongBrainContextEngine implements ContextEngine {
@@ -449,11 +450,31 @@ export class KongBrainContextEngine implements ContextEngine {
     prePromptMessageCount: number;
   }): Promise<void> {
     const sessionKey = params.sessionKey ?? params.sessionId;
-    const session = this.state.getSession(sessionKey);
-    if (!session) return;
+    log.debug(`afterTurn: session=${sessionKey} messages=${params.messages.length}`);
+    // Use getOrCreateSession so resumed sessions (where session_start
+    // didn't fire after a gateway restart) still get a session object.
+    const session = this.state.getOrCreateSession(sessionKey, params.sessionId);
     const { store, embeddings } = this.state;
+    // Lazy daemon start: if session was resumed after gateway restart,
+    // session_start won't re-fire, so the daemon never started.
+    if (!session.daemon && typeof this.state.complete === "function") {
+      try {
+        session.daemon = startMemoryDaemon(
+          store,
+          embeddings,
+          session.sessionId,
+          this.state.complete,
+          this.state.config.thresholds.extractionTimeoutMs,
+          session.taskId,
+          session.projectId,
+        );
+      } catch (e) {
+        swallow.warn("afterTurn:lazyDaemonStart", e);
+      }
+    }
     // Deferred cleanup: run once on first turn when complete() is available
     if (session.userTurnCount <= 1 && typeof this.state.complete === "function") {
       runDeferredCleanup(store, embeddings, this.state.complete)
@@ -503,6 +524,7 @@ export class KongBrainContextEngine implements ContextEngine {
     // Flush to daemon when token threshold OR turn count threshold is reached
     const tokenReady = session.newContentTokens >= session.daemonTokenThreshold;
     const turnReady = session.userTurnCount >= session.lastDaemonFlushTurnCount + 3;
+    log.debug(`flush check: daemon=${!!session.daemon} tokenReady=${tokenReady} turnReady=${turnReady} turns=${session.userTurnCount}`);
     if (session.daemon && (tokenReady || turnReady)) {
       try {
         const recentTurns = allSessionTurns.slice(-20);

package/src/daemon-manager.ts CHANGED Viewed

@@ -36,7 +36,7 @@ export function startMemoryDaemon(
   sharedEmbeddings: EmbeddingService,
   sessionId: string,
   complete: CompleteFn,
-  extractionTimeoutMs = 60_000,
+  extractionTimeoutMs = 120_000,
   taskId?: string,
   projectId?: string,
 ): MemoryDaemon {
@@ -115,15 +115,25 @@ export function startMemoryDaemon(
       outputFormat: { type: "json_schema", schema: extractionSchema },
     });
-    const responseText = response.text;
+    let responseText = response.text;
+    // Sanitize: strip BOM, markdown fences, and trim
+    responseText = responseText.replace(/^\uFEFF/, "").trim();
+    const fenceMatch = responseText.match(/^```(?:json)?\s*\n([\s\S]*?)\n```\s*$/);
+    if (fenceMatch) responseText = fenceMatch[1].trim();
     // With structured output the response should be valid JSON directly.
     // Fall back to regex extraction if the provider doesn't support outputFormat.
     let result: Record<string, any>;
     try {
       result = JSON.parse(responseText);
-    } catch {
-      const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
+    } catch (parseErr) {
+      swallow.warn("daemon:parseDebug", new Error(
+        `JSON.parse failed: ${(parseErr as Error).message}; ` +
+        `len=${responseText.length}; first100=${JSON.stringify(responseText.slice(0, 100))}; ` +
+        `last100=${JSON.stringify(responseText.slice(-100))}`
+      ));
+      const jsonMatch = responseText.match(/\{[\s\S]*\}/);
       if (!jsonMatch) {
         swallow.warn("daemon:noJson", new Error(`LLM response contained no JSON (${responseText.length} chars)`));
         return;
@@ -131,21 +141,28 @@ export function startMemoryDaemon(
       try {
         result = JSON.parse(jsonMatch[0]);
       } catch {
+        // Try fixing trailing commas
         try {
           result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
         } catch {
-          result = {};
-          const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
-          for (const field of fields) {
-            const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
-            if (fieldMatch) {
-              try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
+          // Try stripping control characters
+          try {
+            const cleaned = jsonMatch[0].replace(/[\x00-\x08\x0b\x0c\x0e-\x1f]/g, "");
+            result = JSON.parse(cleaned);
+          } catch {
+            result = {};
+            const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
+            for (const field of fields) {
+              const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
+              if (fieldMatch) {
+                try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
+              }
+            }
+            const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
+            if (!PRIMARY_FIELDS.some(f => f in result)) {
+              swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
+              return;
             }
-          }
-          const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
-          if (!PRIMARY_FIELDS.some(f => f in result)) {
-            swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
-            return;
           }
         }
       }

package/src/deferred-cleanup.ts CHANGED Viewed

@@ -104,7 +104,7 @@ async function processOrphanedSession(
   try {
     log.info(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
-    const LLM_CALL_TIMEOUT_MS = 30_000;
+    const LLM_CALL_TIMEOUT_MS = 120_000;
     const response = await Promise.race([
       complete({
         system: systemPrompt,

package/src/graph-context.ts CHANGED Viewed

@@ -90,8 +90,8 @@ const CORE_MEMORY_SHARE = 0.155;     // ~10k for core memory/directives
 const TOOL_HISTORY_SHARE = 0.23;     // ~15k for recent tool results
 const CORE_MEMORY_TTL = 300_000;
 const MAX_ITEM_CHARS = 1200; // ~350 tokens per item (matches claw-code MAX_INSTRUCTION_FILE_CHARS)
-const MIN_RELEVANCE_SCORE = 0.35; // Floor for graph-scored results after WMR/ACAN
-const MIN_COSINE = 0.25; // Minimum cosine similarity to consider a result
+const MIN_RELEVANCE_SCORE = 0.40; // Floor for graph-scored results after WMR/ACAN (tuned: cosine-heavy weights produce lower absolute scores)
+const MIN_COSINE = 0.35; // Minimum cosine similarity to consider a result (raised from 0.25)
 // Deduplication thresholds
 const DEDUP_COSINE_THRESHOLD = 0.88;
@@ -417,8 +417,8 @@ async function scoreResults(
       const reflectionBoost = r.sessionId ? (reflectedSessions.has(r.sessionId) ? 1.0 : 0) : 0;
       const finalScore =
-        0.27 * cosine + 0.28 * recency + 0.05 * importance +
-        0.05 * access + 0.10 * neighborBonus + 0.15 * provenUtility +
+        0.35 * cosine + 0.18 * recency + 0.07 * importance +
+        0.02 * access + 0.10 * neighborBonus + 0.18 * provenUtility +
         0.10 * reflectionBoost - utilityPenalty;
       return { ...r, finalScore, fromNeighbor: neighborIds.has(r.id) };
@@ -1104,7 +1104,7 @@ async function graphTransformInner(
   const currentIntent = config?.intent ?? "unknown";
   const baseLimits = config?.vectorSearchLimits ?? {
-    turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
+    turn: 25, identity: 10, concept: 35, memory: 20, artifact: 10,
   };
   // Scale search limits with context window — larger windows can use more results
   const cwScale = Math.max(0.5, Math.min(2.0, contextWindow / 200_000));
@@ -1151,9 +1151,16 @@ async function graphTransformInner(
       }
     }
-    // Vector search (cache miss path)
+    // Vector search + tag-boosted retrieval (cache miss path, run in parallel)
     recordPrefetchMiss();
-    const results = await store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive());
+    const [vectorResults, tagResults] = await Promise.all([
+      store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive()),
+      store.tagBoostedConcepts(queryText, queryVec, 10).catch(e => { swallow.warn("graph-context:tagBoost", e); return [] as VectorSearchResult[]; }),
+    ]);
+    // Merge: dedupe tag results against vector results, then combine
+    const vectorIds = new Set(vectorResults.map(r => r.id));
+    const uniqueTagResults = tagResults.filter(r => !vectorIds.has(r.id));
+    const results = [...vectorResults, ...uniqueTagResults];
     // Graph neighbor expansion
     const topIds = results

package/src/index.ts CHANGED Viewed

@@ -337,10 +337,8 @@ export default definePluginEntry({
       }
       const complete: CompleteFn = async (params) => {
-        // Try runtime.complete first (future-proof for when it ships)
-        if (typeof apiRef.runtime?.complete === "function") {
-          return apiRef.runtime.complete(params);
-        }
+        // NOTE: runtime.complete exists in 2026.4.2 but fails for plugin-initiated
+        // calls with "Profile anthropic:default timed out" — use pi-ai directly instead.
         if (!piAi) {
           if (!piAiPath) {
             throw new Error("LLM completion not available: @mariozechner/pi-ai not found and runtime.complete missing");
@@ -349,8 +347,20 @@ export default definePluginEntry({
         }
         // Fall back to calling pi-ai directly (runtime.complete not in OpenClaw 2026.3.24)
         const provider = params.provider ?? apiRef.runtime.agent.defaults.provider;
-        const modelId = params.model ?? apiRef.runtime.agent.defaults.model;
-        const model = piAi!.getModel(provider, modelId);
+        const rawModel = params.model ?? apiRef.runtime.agent.defaults.model;
+        // defaults.model may be an object {primary: '...', fallbacks: []} — unwrap it
+        const modelIdRaw = typeof rawModel === 'object' && rawModel !== null
+          ? (rawModel as any).primary ?? (rawModel as any).id ?? String(rawModel)
+          : rawModel;
+        // modelId may be "provider/model" format — split if provider not set
+        let resolvedProvider = provider;
+        let modelId = modelIdRaw;
+        if (typeof modelId === 'string' && modelId.includes('/') && !resolvedProvider) {
+          const idx = modelId.indexOf('/');
+          resolvedProvider = modelId.slice(0, idx);
+          modelId = modelId.slice(idx + 1);
+        }
+        const model = piAi!.getModel(resolvedProvider, modelId);
         if (!model) {
           throw new Error(`Model "${modelId}" not found for provider "${provider}"`);
         }
@@ -369,10 +379,16 @@ export default definePluginEntry({
         );
         const context = { systemPrompt: params.system, messages };
         // Pass apiKey directly in options so the provider can use it
+        log.info(`complete(): provider=${resolvedProvider} model=${modelId} msgs=${params.messages.length}`);
+        // NOTE: outputFormat (structured output) is intentionally NOT passed to pi-ai.
+        // pi-ai's SimpleStreamOptions doesn't support it, and injecting it via onPayload
+        // causes the Anthropic API to return empty responses. The daemon's JSON parsing
+        // cascade (direct parse → greedy regex → trailing comma fix → field-by-field)
+        // handles free-text JSON extraction reliably without structured output.
         const response = await piAi!.completeSimple(model, context, {
           apiKey: auth.apiKey,
-          ...(params.outputFormat && { outputFormat: params.outputFormat }),
         });
+        log.info(`complete(): blocks=${response.content?.length} stop=${response.stopReason}`);
         let text = "";
         let thinking: string | undefined;
         for (const block of response.content) {

package/src/memory-daemon.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import type { EmbeddingService } from "./embeddings.js";
 import { swallow } from "./errors.js";
 import { assertRecordId } from "./surreal.js";
 import { linkConceptHierarchy, linkToRelevantConcepts } from "./concept-extract.js";
+import { linkSupersedesEdges } from "./supersedes.js";
 // --- Build the extraction prompt ---

package/src/schema.surql CHANGED Viewed

@@ -153,6 +153,9 @@ DEFINE TABLE IF NOT EXISTS supports TYPE RELATION IN memory OUT memory;
 DEFINE TABLE IF NOT EXISTS contradicts TYPE RELATION IN memory OUT memory;
 DEFINE TABLE IF NOT EXISTS describes TYPE RELATION IN memory OUT memory;
+-- Concept evolution
+DEFINE TABLE IF NOT EXISTS supersedes TYPE RELATION IN memory OUT concept;
 -- Cross-pillar links
 DEFINE TABLE IF NOT EXISTS about_concept TYPE RELATION IN memory OUT concept;
 DEFINE TABLE IF NOT EXISTS artifact_mentions TYPE RELATION IN artifact OUT concept;

package/src/supersedes.ts ADDED Viewed

@@ -0,0 +1,99 @@
+/**
+ * Supersedes — concept evolution tracking.
+ *
+ * When the daemon extracts a correction (user correcting the assistant),
+ * this module finds the concept(s) that contained the stale knowledge
+ * and creates `supersedes` edges from the correction memory to those
+ * concepts, decaying their stability so they lose priority in recall.
+ *
+ * Edge direction: correction_memory -> supersedes -> stale_concept
+ *
+ * This ensures that:
+ * 1. Stale knowledge doesn't win over corrections in retrieval
+ * 2. The graph records *why* a concept was deprecated
+ * 3. Stability decay is proportional to correction confidence
+ */
+import type { SurrealStore } from "./surreal.js";
+import type { EmbeddingService } from "./embeddings.js";
+import { swallow } from "./errors.js";
+/** Minimum cosine similarity to consider a concept as the target of a correction. */
+const SUPERSEDE_THRESHOLD = 0.70;
+/** How much to decay stability of superseded concepts (multiplicative). */
+const STABILITY_DECAY_FACTOR = 0.4;
+/** Floor — don't decay below this so the concept remains discoverable. */
+const STABILITY_FLOOR = 0.15;
+/**
+ * Find concepts that match the "original" (wrong) statement in a correction,
+ * create supersedes edges, and decay their stability.
+ *
+ * @param correctionMemId - The memory:xxx record ID of the correction
+ * @param originalText    - The "original" (incorrect) text from the correction
+ * @param correctionText  - The "corrected" (right) text from the correction
+ * @param store           - SurrealDB store
+ * @param embeddings      - Embedding service
+ * @param precomputedVec  - Optional pre-computed embedding of the full correction text
+ */
+export async function linkSupersedesEdges(
+  correctionMemId: string,
+  originalText: string,
+  correctionText: string,
+  store: SurrealStore,
+  embeddings: EmbeddingService,
+  precomputedVec?: number[] | null,
+): Promise<number> {
+  if (!embeddings.isAvailable() || !originalText) return 0;
+  let supersededCount = 0;
+  try {
+    // Embed the *original* (wrong) text — that's what we're looking for in the graph
+    const originalVec = await embeddings.embed(originalText);
+    if (!originalVec?.length) return 0;
+    // Find concepts whose content is semantically similar to the wrong statement
+    // Pre-filter: skip already-superseded or floored concepts to avoid redundant work
+    const candidates = await store.queryFirst<{ id: string; score: number; stability: number }>(
+      `SELECT id, vector::similarity::cosine(embedding, $vec) AS score, stability
+       FROM concept
+       WHERE embedding != NONE AND array::len(embedding) > 0
+         AND superseded_at IS NONE
+         AND stability > $floor
+       ORDER BY score DESC
+       LIMIT 5`,
+      { vec: originalVec, floor: STABILITY_FLOOR },
+    );
+    for (const candidate of candidates) {
+      if (candidate.score < SUPERSEDE_THRESHOLD) break;
+      const conceptId = String(candidate.id);
+      // Create supersedes edge: correction -> supersedes -> stale concept
+      await store.relate(correctionMemId, "supersedes", conceptId)
+        .catch(e => swallow("supersedes:relate", e));
+      // Decay stability of the stale concept
+      const currentStability = candidate.stability ?? 1.0;
+      const newStability = Math.max(
+        STABILITY_FLOOR,
+        currentStability * STABILITY_DECAY_FACTOR,
+      );
+      await store.queryExec(
+        `UPDATE $conceptId SET stability = $newStability, superseded_at = time::now(), superseded_by = $correctionId`,
+        { conceptId, newStability, correctionId: correctionMemId },
+      ).catch(e => swallow("supersedes:decay", e));
+      supersededCount++;
+    }
+  } catch (e) {
+    swallow("supersedes:link", e);
+  }
+  return supersededCount;
+}

package/src/surreal.ts CHANGED Viewed

@@ -67,6 +67,8 @@ const VALID_EDGES = new Set([
   "produced", "derived_from", "relevant_to", "used_in", "artifact_mentions",
   // Causal edges
   "caused_by", "supports", "contradicts", "describes",
+  // Evolution edges
+  "supersedes",
   // Session edges
   "part_of",
 ]);
@@ -592,6 +594,44 @@ export class SurrealStore {
    * BFS expansion from seed nodes along typed edges, with batched per-hop queries.
    * Each edge query is LIMIT 3 (EDGE_NEIGHBOR_LIMIT) to bound fan-out per node.
    */
+  /**
+   * Tag-boosted concept retrieval: extract keywords from query text,
+   * find concepts tagged with matching terms, score by cosine similarity.
+   * Returns concepts that pure vector search might miss due to embedding mismatch.
+   */
+  async tagBoostedConcepts(
+    queryText: string,
+    queryVec: number[],
+    limit = 10,
+  ): Promise<VectorSearchResult[]> {
+    // Extract candidate tags from query — lowercase, deduplicate
+    const stopwords = new Set(["the","a","an","is","are","was","were","be","been","being","have","has","had","do","does","did","will","would","could","should","may","might","can","shall","to","of","in","for","on","with","at","by","from","as","into","about","between","through","during","it","its","this","that","these","those","i","you","we","they","my","your","our","their","what","which","who","how","when","where","why","not","no","and","or","but","if","so","any","all","some","more","just","also","than","very","too","much","many"]);
+    const words = queryText.toLowerCase().replace(/[^a-z0-9\s-]/g, "").split(/\s+/)
+      .filter(w => w.length > 2 && !stopwords.has(w));
+    if (words.length === 0) return [];
+    // Build tag match condition — match any tag that contains a query word
+    const tagConditions = words.slice(0, 8).map(w => `tags CONTAINS '${w.replace(/'/g, "")}'`).join(" OR ");
+    try {
+      const rows = await this.queryFirst<any>(
+        `SELECT id, content AS text, stability AS importance, access_count AS accessCount,
+                created_at AS timestamp, 'concept' AS table,
+                vector::similarity::cosine(embedding, $vec) AS score
+         FROM concept
+         WHERE embedding != NONE AND array::len(embedding) > 0
+           AND (${tagConditions})
+         ORDER BY score DESC
+         LIMIT $limit`,
+        { vec: queryVec, limit },
+      );
+      return rows as VectorSearchResult[];
+    } catch (e) {
+      swallow.warn("surreal:tagBoostedConcepts", e);
+      return [];
+    }
+  }
   async graphExpand(
     nodeIds: string[],
     queryVec: number[],