npm - kongbrain - Versions diffs - 0.4.0 → 0.4.2 - Mend

kongbrain 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.github/workflows/ci.yml +45 -0
package/.github/workflows/pr-check.yml +16 -0
package/CHANGELOG.md +64 -0
package/README.github.md +40 -1
package/SKILL.md +1 -1
package/TOKEN_FLOW.md +184 -0
package/package.json +1 -1
package/src/acan.ts +32 -6
package/src/causal.ts +18 -25
package/src/cognitive-bootstrap.ts +6 -6
package/src/cognitive-check.ts +19 -21
package/src/concept-extract.ts +1 -1
package/src/config.ts +1 -1
package/src/context-engine.ts +81 -48
package/src/daemon-manager.ts +65 -25
package/src/deferred-cleanup.ts +14 -16
package/src/embeddings.ts +6 -7
package/src/errors.ts +5 -3
package/src/graph-context.ts +269 -173
package/src/handoff-file.ts +12 -5
package/src/hooks/after-tool-call.ts +3 -2
package/src/hooks/before-tool-call.ts +15 -11
package/src/hooks/llm-output.ts +18 -10
package/src/index.ts +25 -14
package/src/intent.ts +9 -8
package/src/log.ts +11 -0
package/src/orchestrator.ts +12 -5
package/src/prefetch.ts +2 -2
package/src/reflection.ts +10 -2
package/src/schema.surql +4 -0
package/src/skills.ts +32 -10
package/src/soul.ts +18 -2
package/src/state.ts +31 -0
package/src/surreal.ts +138 -110
package/src/tools/introspect.ts +1 -1
package/src/tools/recall.ts +1 -1
package/src/wakeup.ts +0 -142

package/src/context-engine.ts CHANGED Viewed

@@ -51,11 +51,12 @@ import { graduateCausalToSkills } from "./skills.js";
 import { attemptGraduation, evolveSoul, checkStageTransition } from "./soul.js";
 import { swallow } from "./errors.js";
+/** OpenClaw ContextEngine backed by SurrealDB graph retrieval and BGE-M3 embeddings. */
 export class KongBrainContextEngine implements ContextEngine {
   readonly info: ContextEngineInfo = {
     id: "kongbrain",
     name: "KongBrain",
-    version: "0.1.2",
+    version: "0.4.2",
     ownsCompaction: true,
   };
@@ -63,6 +64,7 @@ export class KongBrainContextEngine implements ContextEngine {
   // ── Bootstrap ──────────────────────────────────────────────────────────
+  /** Initialize schema, create 5-pillar graph nodes, and start the memory daemon. */
   async bootstrap(params: {
     sessionId: string;
     sessionKey?: string;
@@ -139,6 +141,7 @@ export class KongBrainContextEngine implements ContextEngine {
   // ── Assemble ───────────────────────────────────────────────────────────
+  /** Build the context window: graph retrieval + system prompt additions + budget trimming. */
   async assemble(params: {
     sessionId: string;
     sessionKey?: string;
@@ -173,26 +176,22 @@ export class KongBrainContextEngine implements ContextEngine {
     if (systemPromptSection) additions.push(systemPromptSection);
     // Compaction summary (claw-code: compact.rs structured signals — inject once after compaction)
-    const compactionSummary = (session as any)._compactionSummary as string | undefined;
+    const compactionSummary = session._compactionSummary;
     if (compactionSummary) {
       additions.push("[POST-COMPACTION CONTEXT]\n" + compactionSummary);
-      delete (session as any)._compactionSummary;
+      session._compactionSummary = undefined;
     }
     // Wakeup briefing (synthesized at session start, may still be in-flight)
-    const wakeupPromise = (session as any)._wakeupPromise as Promise<string | null> | undefined;
+    const wakeupPromise = session._wakeupPromise;
     if (wakeupPromise) {
       const wakeupBriefing = await wakeupPromise;
-      delete (session as any)._wakeupPromise; // Only inject once
+      session._wakeupPromise = undefined; // Only inject once
       if (wakeupBriefing) additions.push(wakeupBriefing);
     }
     // Graduation celebration — tell the agent it just graduated so it can share with the user
-    const graduation = (session as any)._graduationCelebration as {
-      qualityScore: number;
-      volumeScore: number;
-      soulSummary: string;
-    } | undefined;
+    const graduation = session._graduationCelebration;
     if (graduation) {
       let graduationBlock =
         "[SOUL GRADUATION — CELEBRATE WITH THE USER]\n" +
@@ -211,11 +210,11 @@ export class KongBrainContextEngine implements ContextEngine {
         "identity emerging from YOUR experience. Don't be robotic about it. This only happens once.";
       additions.push(graduationBlock);
-      delete (session as any)._graduationCelebration; // Only inject once
+      session._graduationCelebration = undefined; // Only inject once
     }
     // Migration nudge — tell the agent there are workspace files to offer migrating
-    if ((session as any)._hasMigratableFiles) {
+    if (session._hasMigratableFiles) {
       additions.push(
         "[MIGRATION AVAILABLE] This workspace has files from the default context engine " +
         "(IDENTITY.md, MEMORY.md, skills/, etc.). You can offer to migrate them into the graph " +
@@ -226,15 +225,31 @@ export class KongBrainContextEngine implements ContextEngine {
       );
     }
+    // Apply SPA priority budget — drop lowest-priority sections if over budget
+    // (dropped sections aren't lost — they're in the graph, retrievable on demand)
+    const BYTES_PER_TOKEN = 4; // claw-code: roughTokenCountEstimation default
+    const SPA_BUDGET_CHARS = Math.round(contextWindow * 0.08 * BYTES_PER_TOKEN);
+    let spaTotalChars = 0;
+    const keptAdditions: string[] = [];
+    for (const section of additions) { // additions are already in priority order
+      if (spaTotalChars + section.length > SPA_BUDGET_CHARS && keptAdditions.length > 0) break;
+      keptAdditions.push(section);
+      spaTotalChars += section.length;
+    }
+    const spaText = keptAdditions.length > 0 ? keptAdditions.join("\n\n") : undefined;
+    const spaTokens = spaText ? Math.ceil(spaText.length / BYTES_PER_TOKEN) : 0;
     return {
       messages,
-      estimatedTokens: stats.sentTokens,
-      systemPromptAddition: additions.length > 0 ? additions.join("\n\n") : undefined,
+      estimatedTokens: stats.sentTokens + spaTokens,
+      systemPromptAddition: spaText,
     };
   }
   // ── Ingest ─────────────────────────────────────────────────────────────
+  /** Embed and store a single user or assistant message as a turn node. */
   async ingest(params: {
     sessionId: string;
     sessionKey?: string;
@@ -247,7 +262,7 @@ export class KongBrainContextEngine implements ContextEngine {
     const msg = params.message;
     try {
-      const role = (msg as any).role as string;
+      const role = "role" in msg ? (msg as { role: string }).role : "";
       if (role === "user" || role === "assistant") {
         const text = extractMessageText(msg);
         if (!text) return { ingested: false };
@@ -256,11 +271,16 @@ export class KongBrainContextEngine implements ContextEngine {
         let embedding: number[] | null = null;
         if (worthEmbedding && embeddings.isAvailable()) {
           try {
-            const embedLimit = Math.round(8192 * 3.4 * 0.8);
-            embedding = await embeddings.embed(text.slice(0, embedLimit));
+            const INGEST_EMBED_CHAR_LIMIT = 22_282; // ~6,554 tokens at 3.4 chars/token (BGE-M3 8192-token window * 0.8 safety margin)
+            embedding = await embeddings.embed(text.slice(0, INGEST_EMBED_CHAR_LIMIT));
           } catch (e) { swallow("ingest:embed", e); }
         }
+        // Stash user embedding for reuse in buildContextualQueryVec (avoids re-embedding)
+        if (role === "user" && embedding) {
+          session.lastUserEmbedding = embedding;
+        }
         const turnId = await store.upsertTurn({
           session_id: session.sessionId,
           role,
@@ -327,6 +347,7 @@ export class KongBrainContextEngine implements ContextEngine {
   // ── Compact ────────────────────────────────────────────────────────────
+  /** Extract structured signals (pending work, key files, errors) for post-compaction injection. */
   async compact(params: {
     sessionId: string;
     sessionKey?: string;
@@ -346,8 +367,9 @@ export class KongBrainContextEngine implements ContextEngine {
     // Extract structured compaction signals from stored turns
     let summary: string | undefined;
+    const { store } = this.state;
+    const contextWindow = params.tokenBudget ?? 200_000;
     try {
-      const { store } = this.state;
       if (store.isAvailable()) {
         const turns = await store.getSessionTurnsRich(params.sessionId, 30);
         if (turns.length > 0) {
@@ -370,6 +392,12 @@ export class KongBrainContextEngine implements ContextEngine {
             turns.filter(t => t.tool_name).map(t => t.tool_name!)
           )];
+          // Recent errors — preserve tool failure context across compaction
+          const errorRe = /\b(error|failed|exception|crash|panic|TypeError|ReferenceError)\b[^.\n]{0,120}/gi;
+          const recentErrors = [...fullText.matchAll(errorRe)]
+            .map(m => m[0].trim().slice(0, 160))
+            .slice(-3); // last 3 errors only
           // Current work inference (claw-code: compact.rs:272-279)
           const lastText = turns.filter(t => t.text.length > 10).at(-1)?.text.slice(0, 200) ?? "";
@@ -377,6 +405,7 @@ export class KongBrainContextEngine implements ContextEngine {
           if (pendingMatches.length > 0) parts.push(`PENDING: ${pendingMatches.join("; ")}`);
           if (filePaths.length > 0) parts.push(`FILES: ${filePaths.join(", ")}`);
           if (toolNames.length > 0) parts.push(`TOOLS USED: ${toolNames.join(", ")}`);
+          if (recentErrors.length > 0) parts.push(`RECENT ERRORS: ${recentErrors.join("; ")}`);
           if (lastText) parts.push(`LAST: ${lastText}`);
           parts.push("Resume directly — do not recap what was happening.");
@@ -384,25 +413,34 @@ export class KongBrainContextEngine implements ContextEngine {
             summary = parts.join("\n");
             // Stash for next assemble() to inject
             if (session) {
-              (session as any)._compactionSummary = summary;
+              session._compactionSummary = summary;
             }
           }
         }
       }
     } catch { /* non-critical */ }
+    // Compaction checkpoint — diagnostic trail for debugging
+    if (store.isAvailable() && session) {
+      store.createCompactionCheckpoint(params.sessionId, 0, session.userTurnCount)
+        .catch(e => swallow.warn("compact:checkpoint", e));
+    }
     return {
       ok: true,
-      compacted: !!summary,
-      reason: summary
-        ? "Extracted structured signals for continuation."
-        : "Graph retrieval handles context selection; no LLM-based compaction needed.",
-      result: summary ? { summary, tokensBefore: 0 } : undefined,
+      compacted: true,
+      reason: "Graph-curated context window: assemble() selects relevant context each turn.",
+      result: summary ? {
+        summary,
+        tokensBefore: Math.round(summary.length / 4), // 4 bytes/token (claw-code ratio)
+        tokensAfter: Math.round(contextWindow * 0.325),
+      } : undefined,
     };
   }
   // ── After turn ─────────────────────────────────────────────────────────
+  /** Post-turn: ingest messages, evaluate retrieval quality, flush daemon, and run periodic maintenance. */
   async afterTurn?(params: {
     sessionId: string;
     sessionKey?: string;
@@ -442,11 +480,12 @@ export class KongBrainContextEngine implements ContextEngine {
         .catch(e => swallow.warn("afterTurn:evaluateRetrieval", e));
     }
+    // Single fetch for all downstream consumers (cognitive check, daemon flush, handoff)
+    const allSessionTurns = await store.getSessionTurns(session.sessionId, 50)
+      .catch(() => [] as { role: string; text: string }[]);
     // Cognitive check: periodic reasoning over retrieved context
     if (shouldRunCheck(session.userTurnCount, session) && stagedSnapshot.length > 0) {
-      const recentTurns = await store.getSessionTurns(session.sessionId, 6)
-        .catch(() => [] as { role: string; text: string }[]);
       runCognitiveCheck({
         sessionId: session.sessionId,
         userQuery: session.lastUserText,
@@ -457,7 +496,7 @@ export class KongBrainContextEngine implements ContextEngine {
           score: n.finalScore ?? 0,
           table: n.table,
         })),
-        recentTurns,
+        recentTurns: allSessionTurns.slice(-6),
       }, session, store, this.state.complete).catch(e => swallow.warn("afterTurn:cognitiveCheck", e));
     }
@@ -466,11 +505,11 @@ export class KongBrainContextEngine implements ContextEngine {
     const turnReady = session.userTurnCount >= session.lastDaemonFlushTurnCount + 3;
     if (session.daemon && (tokenReady || turnReady)) {
       try {
-        const recentTurns = await store.getSessionTurns(session.sessionId, 20);
+        const recentTurns = allSessionTurns.slice(-20);
         const turnData = recentTurns.map(t => ({
           role: t.role as "user" | "assistant",
           text: t.text,
-          turnId: String((t as any).id ?? ""),
+          turnId: String((t as { id?: string }).id ?? ""),
         }));
         // Gather retrieved memory IDs for dedup
@@ -503,20 +542,14 @@ export class KongBrainContextEngine implements ContextEngine {
       // Fire-and-forget: these are non-critical background operations
       const cleanupOps: Promise<unknown>[] = [];
-      // Final daemon flush with full transcript before cleanup
+      // Final daemon flush with full transcript before cleanup (reuse allSessionTurns)
       if (session.daemon) {
-        cleanupOps.push(
-          store.getSessionTurns(session.sessionId, 50)
-            .then(recentTurns => {
-              const turnData = recentTurns.map(t => ({
-                role: t.role as "user" | "assistant",
-                text: t.text,
-                turnId: String((t as any).id ?? ""),
-              }));
-              session.daemon!.sendTurnBatch(turnData, [...session.pendingThinking], []);
-            })
-            .catch(e => swallow.warn("midCleanup:daemonFlush", e)),
-        );
+        const turnData = allSessionTurns.map(t => ({
+          role: t.role as "user" | "assistant",
+          text: t.text,
+          turnId: String((t as { id?: string }).id ?? ""),
+        }));
+        session.daemon.sendTurnBatch(turnData, [...session.pendingThinking], []);
       }
       if (session.taskId) {
@@ -542,10 +575,10 @@ export class KongBrainContextEngine implements ContextEngine {
           .catch(e => swallow("midCleanup:acan", e)),
       );
-      // Handoff note — snapshot for wakeup even if session continues
+      // Handoff note — snapshot for wakeup even if session continues (reuse allSessionTurns)
       cleanupOps.push(
         (async () => {
-          const recentTurns = await store.getSessionTurns(session.sessionId, 15);
+          const recentTurns = allSessionTurns.slice(-15);
           if (recentTurns.length < 2) return;
           const turnSummary = recentTurns
             .map(t => `[${t.role}] ${t.text.slice(0, 200)}`)
@@ -635,12 +668,12 @@ export class KongBrainContextEngine implements ContextEngine {
 // ── Helpers ────────────────────────────────────────────────────────────────────
 function extractMessageText(msg: AgentMessage): string {
-  const m = msg as any;
+  const m = msg as { content?: string | { type: string; text?: string }[] };
   if (typeof m.content === "string") return m.content;
   if (Array.isArray(m.content)) {
     return m.content
-      .filter((c: any) => c.type === "text")
-      .map((c: any) => c.text ?? "")
+      .filter((c) => c.type === "text")
+      .map((c) => c.text ?? "")
       .join("\n");
   }
   return "";

package/src/daemon-manager.ts CHANGED Viewed

@@ -79,10 +79,10 @@ export function startMemoryDaemon(
     const { buildSystemPrompt, buildTranscript, writeExtractionResults } = await import("./memory-daemon.js");
     const transcript = buildTranscript(turns);
-    const sections: string[] = [`[TRANSCRIPT]\n${transcript.slice(0, 60000)}`];
+    const sections: string[] = [`[TRANSCRIPT]\n${transcript.slice(0, 30000)}`];
     if (thinking.length > 0) {
-      sections.push(`[THINKING]\n${thinking.slice(-8).join("\n---\n").slice(0, 4000)}`);
+      sections.push(`[THINKING]\n${thinking.slice(-3).join("\n---\n").slice(0, 2000)}`);
     }
     if (retrievedMemories.length > 0) {
@@ -92,37 +92,71 @@ export function startMemoryDaemon(
     const systemPrompt = buildSystemPrompt(thinking.length > 0, retrievedMemories.length > 0, priorState);
+    // Structured output schema — forces API to return valid JSON (no markdown, no preamble)
+    const extractionSchema = {
+      type: "object" as const,
+      properties: {
+        causal: { type: "array", items: { type: "object" } },
+        monologue: { type: "array", items: { type: "object" } },
+        resolved: { type: "array", items: { type: "string" } },
+        concepts: { type: "array", items: { type: "object" } },
+        corrections: { type: "array", items: { type: "object" } },
+        preferences: { type: "array", items: { type: "object" } },
+        artifacts: { type: "array", items: { type: "object" } },
+        decisions: { type: "array", items: { type: "object" } },
+        skills: { type: "array", items: { type: "object" } },
+      },
+      required: ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"],
+    };
     const response = await complete({
       system: systemPrompt,
       messages: [{ role: "user", content: sections.join("\n\n") }],
+      outputFormat: { type: "json_schema", schema: extractionSchema },
     });
     const responseText = response.text;
-    const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
-    if (!jsonMatch) return;
+    // With structured output the response should be valid JSON directly.
+    // Fall back to regex extraction if the provider doesn't support outputFormat.
     let result: Record<string, any>;
     try {
-      result = JSON.parse(jsonMatch[0]);
+      result = JSON.parse(responseText);
     } catch {
+      const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
+      if (!jsonMatch) {
+        swallow.warn("daemon:noJson", new Error(`LLM response contained no JSON (${responseText.length} chars)`));
+        return;
+      }
       try {
-        result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
+        result = JSON.parse(jsonMatch[0]);
       } catch {
-        result = {};
-        const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
-        for (const field of fields) {
-          const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
-          if (fieldMatch) {
-            try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
+        try {
+          result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
+        } catch {
+          result = {};
+          const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
+          for (const field of fields) {
+            const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
+            if (fieldMatch) {
+              try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
+            }
+          }
+          const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
+          if (!PRIMARY_FIELDS.some(f => f in result)) {
+            swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
+            return;
           }
         }
-        if (Object.keys(result).length === 0) return;
       }
     }
-    const counts = await writeExtractionResults(result, sessionId, store, embeddings, priorState, taskId, projectId, turns);
-    extractedTurnCount = turns.length;
+    try {
+      const counts = await writeExtractionResults(result, sessionId, store, embeddings, priorState, taskId, projectId, turns);
+      extractedTurnCount = turns.length;
+    } catch (e) {
+      swallow.warn("daemon:writeExtractionResults", e);
+    }
   }
   // Pending batch (only keep latest — newer batch supersedes older)
@@ -158,7 +192,15 @@ export function startMemoryDaemon(
   return {
     sendTurnBatch(turns, thinking, retrievedMemories, priorExtractions) {
       if (shuttingDown) return;
-      pendingBatch = { turns, thinking, retrievedMemories, priorExtractions };
+      if (pendingBatch) {
+        // Merge into pending batch instead of discarding — prevents turn data loss
+        pendingBatch.turns = [...pendingBatch.turns, ...turns];
+        pendingBatch.thinking = [...pendingBatch.thinking, ...thinking];
+        pendingBatch.retrievedMemories = [...pendingBatch.retrievedMemories, ...retrievedMemories];
+        pendingBatch.priorExtractions = priorExtractions ?? pendingBatch.priorExtractions;
+      } else {
+        pendingBatch = { turns, thinking, retrievedMemories, priorExtractions };
+      }
       // Fire-and-forget
       processPending().catch(e => swallow.warn("daemon:sendBatch", e));
     },
@@ -176,14 +218,12 @@ export function startMemoryDaemon(
       shuttingDown = true;
       // Wait for current extraction to finish
       if (processing) {
-        await Promise.race([
-          new Promise<void>(resolve => {
-            const check = setInterval(() => {
-              if (!processing) { clearInterval(check); resolve(); }
-            }, 100);
-          }),
-          new Promise<void>(resolve => setTimeout(resolve, timeoutMs)),
-        ]);
+        await new Promise<void>(resolve => {
+          const check = setInterval(() => {
+            if (!processing) { clearInterval(check); clearTimeout(timeout); resolve(); }
+          }, 100);
+          const timeout = setTimeout(() => { clearInterval(check); resolve(); }, timeoutMs);
+        });
       }
       // Shared store/embeddings — don't dispose (owned by global state)
     },

package/src/deferred-cleanup.ts CHANGED Viewed

@@ -14,10 +14,12 @@ import type { CompleteFn } from "./state.js";
 import { buildSystemPrompt, buildTranscript, writeExtractionResults } from "./memory-daemon.js";
 import type { PriorExtractions } from "./daemon-types.js";
 import { swallow } from "./errors.js";
+import { log } from "./log.js";
 // Process-global flag — deferred cleanup runs AT MOST ONCE per process.
 // Using Symbol.for so it survives Jiti re-importing this module.
 const RAN_KEY = Symbol.for("kongbrain.deferredCleanup.ran");
+const _g = globalThis as Record<symbol, unknown>;
 /**
  * Find and process orphaned sessions. Runs with a 30s total timeout.
@@ -30,8 +32,8 @@ export async function runDeferredCleanup(
   complete: CompleteFn,
 ): Promise<number> {
   // Once per process — never re-run even if first run times out
-  if ((globalThis as any)[RAN_KEY]) return 0;
-  (globalThis as any)[RAN_KEY] = true;
+  if (_g[RAN_KEY]) return 0;
+  _g[RAN_KEY] = true;
   try {
     return await runDeferredCleanupInner(store, embeddings, complete);
@@ -51,18 +53,14 @@ async function runDeferredCleanupInner(
   const orphaned = await store.getOrphanedSessions(10).catch(() => []);
   if (orphaned.length === 0) return 0;
-  // Immediately claim all orphaned sessions so no concurrent run can pick them up
-  await Promise.all(
-    orphaned.map(s =>
-      store.markSessionEnded(s.id).catch(e => swallow("deferred:claim", e))
-    )
-  );
   let processed = 0;
   const cleanup = async () => {
     for (const session of orphaned) {
       try {
+        // Claim each session just before processing so unclaimed ones remain
+        // available to the next run if we time out partway through
+        await store.markSessionEnded(session.id).catch(e => swallow("deferred:claim", e));
         await processOrphanedSession(session.id, store, embeddings, complete);
         processed++;
       } catch (e) {
@@ -105,7 +103,7 @@ async function processOrphanedSession(
   const systemPrompt = buildSystemPrompt(false, false, priorState);
   try {
-    console.warn(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
+    log.info(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
     const LLM_CALL_TIMEOUT_MS = 30_000;
     const response = await Promise.race([
       complete({
@@ -118,7 +116,7 @@ async function processOrphanedSession(
     ]);
     const responseText = response.text;
-    console.warn(`[deferred] extraction response: ${responseText.length} chars`);
+    log.info(`[deferred] extraction response: ${responseText.length} chars`);
     const jsonMatch = responseText.match(/\{[\s\S]*\}/);
     if (jsonMatch) {
       let result: Record<string, any>;
@@ -132,17 +130,17 @@ async function processOrphanedSession(
       // Strip prototype pollution keys from LLM-generated JSON
       const BANNED_KEYS = new Set(["__proto__", "constructor", "prototype"]);
       for (const key of Object.keys(result)) {
-        if (BANNED_KEYS.has(key)) delete (result as any)[key];
+        if (BANNED_KEYS.has(key)) delete result[key];
       }
       const keys = Object.keys(result);
-      console.warn(`[deferred] parsed ${keys.length} keys: ${keys.join(", ")}`);
+      log.info(`[deferred] parsed ${keys.length} keys: ${keys.join(", ")}`);
       if (keys.length > 0) {
         await writeExtractionResults(result, surrealSessionId, store, embeddings, priorState, undefined, undefined, turnData);
-        console.warn(`[deferred] wrote extraction results for ${surrealSessionId}`);
+        log.info(`[deferred] wrote extraction results for ${surrealSessionId}`);
       }
     } else {
-      console.warn(`[deferred] no JSON found in response`);
+      log.warn(`[deferred] no JSON found in response`);
     }
   } catch (e) {
     swallow.warn("deferredCleanup:extraction", e);
@@ -166,7 +164,7 @@ async function processOrphanedSession(
     ]);
     const handoffText = handoffResponse.text.trim();
-    console.warn(`[deferred] handoff response: ${handoffText.length} chars`);
+    log.info(`[deferred] handoff response: ${handoffText.length} chars`);
     if (handoffText.length > 20) {
       let emb: number[] | null = null;
       if (embeddings.isAvailable()) {

package/src/embeddings.ts CHANGED Viewed

@@ -1,12 +1,14 @@
 import { existsSync } from "node:fs";
 import type { EmbeddingConfig } from "./config.js";
 import { swallow } from "./errors.js";
+import { log } from "./log.js";
 // Lazy-import node-llama-cpp to avoid top-level await issues with jiti.
 // The actual import happens inside initialize() at runtime.
 type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
 type LlamaModel = import("node-llama-cpp").LlamaModel;
+/** BGE-M3 embedding service (1024-dim via GGUF) with an LRU cache of up to 512 entries. */
 export class EmbeddingService {
   private model: LlamaModel | null = null;
   private ctx: LlamaEmbeddingContext | null = null;
@@ -30,8 +32,8 @@ export class EmbeddingService {
       logLevel: LlamaLogLevel.error,
       logger: (level, message) => {
         if (message.includes("missing newline token")) return;
-        if (level === LlamaLogLevel.error) console.error(`[llama] ${message}`);
-        else if (level === LlamaLogLevel.warn) console.warn(`[llama] ${message}`);
+        if (level === LlamaLogLevel.error) log.error(`[llama] ${message}`);
+        else if (level === LlamaLogLevel.warn) log.warn(`[llama] ${message}`);
       },
     });
     this.model = await llama.loadModel({ modelPath: this.config.modelPath });
@@ -40,6 +42,7 @@ export class EmbeddingService {
     return true;
   }
+  /** Return the embedding vector for text, serving from LRU cache on repeat calls. */
   async embed(text: string): Promise<number[]> {
     if (!this.ready || !this.ctx) throw new Error("Embeddings not initialized");
     const cached = this.cache.get(text);
@@ -61,11 +64,7 @@ export class EmbeddingService {
   async embedBatch(texts: string[]): Promise<number[][]> {
     if (texts.length === 0) return [];
-    const results: number[][] = [];
-    for (const text of texts) {
-      results.push(await this.embed(text));
-    }
-    return results;
+    return Promise.all(texts.map(text => this.embed(text)));
   }
   isAvailable(): boolean {

package/src/errors.ts CHANGED Viewed

@@ -9,6 +9,8 @@
  *                           Always logged to stderr with stack trace.
  */
+import { log } from "./log.js";
 const DEBUG = process.env.KONGBRAIN_DEBUG === "1";
 /**
@@ -18,7 +20,7 @@ const DEBUG = process.env.KONGBRAIN_DEBUG === "1";
 function swallow(context: string, err?: unknown): void {
   if (!DEBUG) return;
   const msg = err instanceof Error ? err.message : String(err ?? "unknown");
-  console.debug(`[swallow] ${context}: ${msg}`);
+  log.debug(`[swallow] ${context}: ${msg}`);
 }
 /**
@@ -27,7 +29,7 @@ function swallow(context: string, err?: unknown): void {
  */
 swallow.warn = function swallowWarn(context: string, err?: unknown): void {
   const msg = err instanceof Error ? err.message : String(err ?? "unknown");
-  console.warn(`[warn] ${context}: ${msg}`);
+  log.warn(`${context}: ${msg}`);
 };
 /**
@@ -37,7 +39,7 @@ swallow.warn = function swallowWarn(context: string, err?: unknown): void {
 swallow.error = function swallowError(context: string, err?: unknown): void {
   const msg = err instanceof Error ? err.message : String(err ?? "unknown");
   const stack = err instanceof Error ? `\n${err.stack}` : "";
-  console.error(`[ERROR] ${context}: ${msg}${stack}`);
+  log.error(`${context}: ${msg}${stack}`);
 };
 export { swallow };