npm - kongbrain - Versions diffs - 0.4.1 → 0.4.3 - Mend

kongbrain 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/.github/workflows/ci.yml +45 -0
package/.github/workflows/pr-check.yml +16 -0
package/CHANGELOG.md +64 -0
package/README.github.md +40 -1
package/SKILL.md +1 -1
package/TOKEN_FLOW.md +184 -0
package/package.json +1 -1
package/src/acan.ts +28 -5
package/src/causal.ts +18 -25
package/src/cognitive-bootstrap.ts +6 -6
package/src/cognitive-check.ts +17 -19
package/src/config.ts +1 -1
package/src/context-engine.ts +105 -50
package/src/daemon-manager.ts +70 -19
package/src/deferred-cleanup.ts +12 -10
package/src/embeddings.ts +6 -7
package/src/errors.ts +5 -3
package/src/graph-context.ts +281 -178
package/src/hooks/after-tool-call.ts +2 -1
package/src/hooks/before-tool-call.ts +15 -11
package/src/hooks/llm-output.ts +18 -10
package/src/index.ts +39 -18
package/src/intent.ts +9 -8
package/src/log.ts +11 -0
package/src/memory-daemon.ts +1 -0
package/src/orchestrator.ts +11 -4
package/src/prefetch.ts +2 -2
package/src/reflection.ts +9 -2
package/src/schema.surql +7 -0
package/src/skills.ts +32 -10
package/src/soul.ts +17 -1
package/src/state.ts +31 -0
package/src/supersedes.ts +99 -0
package/src/surreal.ts +174 -110
package/src/tools/introspect.ts +1 -1
package/src/wakeup.ts +0 -142

package/src/context-engine.ts CHANGED Viewed

@@ -50,12 +50,14 @@ import { generateReflection } from "./reflection.js";
 import { graduateCausalToSkills } from "./skills.js";
 import { attemptGraduation, evolveSoul, checkStageTransition } from "./soul.js";
 import { swallow } from "./errors.js";
+import { log } from "./log.js";
+/** OpenClaw ContextEngine backed by SurrealDB graph retrieval and BGE-M3 embeddings. */
 export class KongBrainContextEngine implements ContextEngine {
   readonly info: ContextEngineInfo = {
     id: "kongbrain",
     name: "KongBrain",
-    version: "0.1.2",
+    version: "0.4.2",
     ownsCompaction: true,
   };
@@ -63,6 +65,7 @@ export class KongBrainContextEngine implements ContextEngine {
   // ── Bootstrap ──────────────────────────────────────────────────────────
+  /** Initialize schema, create 5-pillar graph nodes, and start the memory daemon. */
   async bootstrap(params: {
     sessionId: string;
     sessionKey?: string;
@@ -139,6 +142,7 @@ export class KongBrainContextEngine implements ContextEngine {
   // ── Assemble ───────────────────────────────────────────────────────────
+  /** Build the context window: graph retrieval + system prompt additions + budget trimming. */
   async assemble(params: {
     sessionId: string;
     sessionKey?: string;
@@ -173,26 +177,22 @@ export class KongBrainContextEngine implements ContextEngine {
     if (systemPromptSection) additions.push(systemPromptSection);
     // Compaction summary (claw-code: compact.rs structured signals — inject once after compaction)
-    const compactionSummary = (session as any)._compactionSummary as string | undefined;
+    const compactionSummary = session._compactionSummary;
     if (compactionSummary) {
       additions.push("[POST-COMPACTION CONTEXT]\n" + compactionSummary);
-      delete (session as any)._compactionSummary;
+      session._compactionSummary = undefined;
     }
     // Wakeup briefing (synthesized at session start, may still be in-flight)
-    const wakeupPromise = (session as any)._wakeupPromise as Promise<string | null> | undefined;
+    const wakeupPromise = session._wakeupPromise;
     if (wakeupPromise) {
       const wakeupBriefing = await wakeupPromise;
-      delete (session as any)._wakeupPromise; // Only inject once
+      session._wakeupPromise = undefined; // Only inject once
       if (wakeupBriefing) additions.push(wakeupBriefing);
     }
     // Graduation celebration — tell the agent it just graduated so it can share with the user
-    const graduation = (session as any)._graduationCelebration as {
-      qualityScore: number;
-      volumeScore: number;
-      soulSummary: string;
-    } | undefined;
+    const graduation = session._graduationCelebration;
     if (graduation) {
       let graduationBlock =
         "[SOUL GRADUATION — CELEBRATE WITH THE USER]\n" +
@@ -211,11 +211,11 @@ export class KongBrainContextEngine implements ContextEngine {
         "identity emerging from YOUR experience. Don't be robotic about it. This only happens once.";
       additions.push(graduationBlock);
-      delete (session as any)._graduationCelebration; // Only inject once
+      session._graduationCelebration = undefined; // Only inject once
     }
     // Migration nudge — tell the agent there are workspace files to offer migrating
-    if ((session as any)._hasMigratableFiles) {
+    if (session._hasMigratableFiles) {
       additions.push(
         "[MIGRATION AVAILABLE] This workspace has files from the default context engine " +
         "(IDENTITY.md, MEMORY.md, skills/, etc.). You can offer to migrate them into the graph " +
@@ -226,15 +226,31 @@ export class KongBrainContextEngine implements ContextEngine {
       );
     }
+    // Apply SPA priority budget — drop lowest-priority sections if over budget
+    // (dropped sections aren't lost — they're in the graph, retrievable on demand)
+    const BYTES_PER_TOKEN = 4; // claw-code: roughTokenCountEstimation default
+    const SPA_BUDGET_CHARS = Math.round(contextWindow * 0.08 * BYTES_PER_TOKEN);
+    let spaTotalChars = 0;
+    const keptAdditions: string[] = [];
+    for (const section of additions) { // additions are already in priority order
+      if (spaTotalChars + section.length > SPA_BUDGET_CHARS && keptAdditions.length > 0) break;
+      keptAdditions.push(section);
+      spaTotalChars += section.length;
+    }
+    const spaText = keptAdditions.length > 0 ? keptAdditions.join("\n\n") : undefined;
+    const spaTokens = spaText ? Math.ceil(spaText.length / BYTES_PER_TOKEN) : 0;
     return {
       messages,
-      estimatedTokens: stats.sentTokens,
-      systemPromptAddition: additions.length > 0 ? additions.join("\n\n") : undefined,
+      estimatedTokens: stats.sentTokens + spaTokens,
+      systemPromptAddition: spaText,
     };
   }
   // ── Ingest ─────────────────────────────────────────────────────────────
+  /** Embed and store a single user or assistant message as a turn node. */
   async ingest(params: {
     sessionId: string;
     sessionKey?: string;
@@ -247,7 +263,7 @@ export class KongBrainContextEngine implements ContextEngine {
     const msg = params.message;
     try {
-      const role = (msg as any).role as string;
+      const role = "role" in msg ? (msg as { role: string }).role : "";
       if (role === "user" || role === "assistant") {
         const text = extractMessageText(msg);
         if (!text) return { ingested: false };
@@ -256,11 +272,16 @@ export class KongBrainContextEngine implements ContextEngine {
         let embedding: number[] | null = null;
         if (worthEmbedding && embeddings.isAvailable()) {
           try {
-            const embedLimit = Math.round(8192 * 3.4 * 0.8);
-            embedding = await embeddings.embed(text.slice(0, embedLimit));
+            const INGEST_EMBED_CHAR_LIMIT = 22_282; // ~6,554 tokens at 3.4 chars/token (BGE-M3 8192-token window * 0.8 safety margin)
+            embedding = await embeddings.embed(text.slice(0, INGEST_EMBED_CHAR_LIMIT));
           } catch (e) { swallow("ingest:embed", e); }
         }
+        // Stash user embedding for reuse in buildContextualQueryVec (avoids re-embedding)
+        if (role === "user" && embedding) {
+          session.lastUserEmbedding = embedding;
+        }
         const turnId = await store.upsertTurn({
           session_id: session.sessionId,
           role,
@@ -327,6 +348,7 @@ export class KongBrainContextEngine implements ContextEngine {
   // ── Compact ────────────────────────────────────────────────────────────
+  /** Extract structured signals (pending work, key files, errors) for post-compaction injection. */
   async compact(params: {
     sessionId: string;
     sessionKey?: string;
@@ -346,8 +368,9 @@ export class KongBrainContextEngine implements ContextEngine {
     // Extract structured compaction signals from stored turns
     let summary: string | undefined;
+    const { store } = this.state;
+    const contextWindow = params.tokenBudget ?? 200_000;
     try {
-      const { store } = this.state;
       if (store.isAvailable()) {
         const turns = await store.getSessionTurnsRich(params.sessionId, 30);
         if (turns.length > 0) {
@@ -370,6 +393,12 @@ export class KongBrainContextEngine implements ContextEngine {
             turns.filter(t => t.tool_name).map(t => t.tool_name!)
           )];
+          // Recent errors — preserve tool failure context across compaction
+          const errorRe = /\b(error|failed|exception|crash|panic|TypeError|ReferenceError)\b[^.\n]{0,120}/gi;
+          const recentErrors = [...fullText.matchAll(errorRe)]
+            .map(m => m[0].trim().slice(0, 160))
+            .slice(-3); // last 3 errors only
           // Current work inference (claw-code: compact.rs:272-279)
           const lastText = turns.filter(t => t.text.length > 10).at(-1)?.text.slice(0, 200) ?? "";
@@ -377,6 +406,7 @@ export class KongBrainContextEngine implements ContextEngine {
           if (pendingMatches.length > 0) parts.push(`PENDING: ${pendingMatches.join("; ")}`);
           if (filePaths.length > 0) parts.push(`FILES: ${filePaths.join(", ")}`);
           if (toolNames.length > 0) parts.push(`TOOLS USED: ${toolNames.join(", ")}`);
+          if (recentErrors.length > 0) parts.push(`RECENT ERRORS: ${recentErrors.join("; ")}`);
           if (lastText) parts.push(`LAST: ${lastText}`);
           parts.push("Resume directly — do not recap what was happening.");
@@ -384,25 +414,34 @@ export class KongBrainContextEngine implements ContextEngine {
             summary = parts.join("\n");
             // Stash for next assemble() to inject
             if (session) {
-              (session as any)._compactionSummary = summary;
+              session._compactionSummary = summary;
             }
           }
         }
       }
     } catch { /* non-critical */ }
+    // Compaction checkpoint — diagnostic trail for debugging
+    if (store.isAvailable() && session) {
+      store.createCompactionCheckpoint(params.sessionId, 0, session.userTurnCount)
+        .catch(e => swallow.warn("compact:checkpoint", e));
+    }
     return {
       ok: true,
-      compacted: !!summary,
-      reason: summary
-        ? "Extracted structured signals for continuation."
-        : "Graph retrieval handles context selection; no LLM-based compaction needed.",
-      result: summary ? { summary, tokensBefore: 0 } : undefined,
+      compacted: true,
+      reason: "Graph-curated context window: assemble() selects relevant context each turn.",
+      result: summary ? {
+        summary,
+        tokensBefore: Math.round(summary.length / 4), // 4 bytes/token (claw-code ratio)
+        tokensAfter: Math.round(contextWindow * 0.325),
+      } : undefined,
     };
   }
   // ── After turn ─────────────────────────────────────────────────────────
+  /** Post-turn: ingest messages, evaluate retrieval quality, flush daemon, and run periodic maintenance. */
   async afterTurn?(params: {
     sessionId: string;
     sessionKey?: string;
@@ -411,11 +450,31 @@ export class KongBrainContextEngine implements ContextEngine {
     prePromptMessageCount: number;
   }): Promise<void> {
     const sessionKey = params.sessionKey ?? params.sessionId;
-    const session = this.state.getSession(sessionKey);
-    if (!session) return;
+    log.debug(`afterTurn: session=${sessionKey} messages=${params.messages.length}`);
+    // Use getOrCreateSession so resumed sessions (where session_start
+    // didn't fire after a gateway restart) still get a session object.
+    const session = this.state.getOrCreateSession(sessionKey, params.sessionId);
     const { store, embeddings } = this.state;
+    // Lazy daemon start: if session was resumed after gateway restart,
+    // session_start won't re-fire, so the daemon never started.
+    if (!session.daemon && typeof this.state.complete === "function") {
+      try {
+        session.daemon = startMemoryDaemon(
+          store,
+          embeddings,
+          session.sessionId,
+          this.state.complete,
+          this.state.config.thresholds.extractionTimeoutMs,
+          session.taskId,
+          session.projectId,
+        );
+      } catch (e) {
+        swallow.warn("afterTurn:lazyDaemonStart", e);
+      }
+    }
     // Deferred cleanup: run once on first turn when complete() is available
     if (session.userTurnCount <= 1 && typeof this.state.complete === "function") {
       runDeferredCleanup(store, embeddings, this.state.complete)
@@ -442,11 +501,12 @@ export class KongBrainContextEngine implements ContextEngine {
         .catch(e => swallow.warn("afterTurn:evaluateRetrieval", e));
     }
+    // Single fetch for all downstream consumers (cognitive check, daemon flush, handoff)
+    const allSessionTurns = await store.getSessionTurns(session.sessionId, 50)
+      .catch(() => [] as { role: string; text: string }[]);
     // Cognitive check: periodic reasoning over retrieved context
     if (shouldRunCheck(session.userTurnCount, session) && stagedSnapshot.length > 0) {
-      const recentTurns = await store.getSessionTurns(session.sessionId, 6)
-        .catch(() => [] as { role: string; text: string }[]);
       runCognitiveCheck({
         sessionId: session.sessionId,
         userQuery: session.lastUserText,
@@ -457,20 +517,21 @@ export class KongBrainContextEngine implements ContextEngine {
           score: n.finalScore ?? 0,
           table: n.table,
         })),
-        recentTurns,
+        recentTurns: allSessionTurns.slice(-6),
       }, session, store, this.state.complete).catch(e => swallow.warn("afterTurn:cognitiveCheck", e));
     }
     // Flush to daemon when token threshold OR turn count threshold is reached
     const tokenReady = session.newContentTokens >= session.daemonTokenThreshold;
     const turnReady = session.userTurnCount >= session.lastDaemonFlushTurnCount + 3;
+    log.debug(`flush check: daemon=${!!session.daemon} tokenReady=${tokenReady} turnReady=${turnReady} turns=${session.userTurnCount}`);
     if (session.daemon && (tokenReady || turnReady)) {
       try {
-        const recentTurns = await store.getSessionTurns(session.sessionId, 20);
+        const recentTurns = allSessionTurns.slice(-20);
         const turnData = recentTurns.map(t => ({
           role: t.role as "user" | "assistant",
           text: t.text,
-          turnId: String((t as any).id ?? ""),
+          turnId: String((t as { id?: string }).id ?? ""),
         }));
         // Gather retrieved memory IDs for dedup
@@ -503,20 +564,14 @@ export class KongBrainContextEngine implements ContextEngine {
       // Fire-and-forget: these are non-critical background operations
       const cleanupOps: Promise<unknown>[] = [];
-      // Final daemon flush with full transcript before cleanup
+      // Final daemon flush with full transcript before cleanup (reuse allSessionTurns)
       if (session.daemon) {
-        cleanupOps.push(
-          store.getSessionTurns(session.sessionId, 50)
-            .then(recentTurns => {
-              const turnData = recentTurns.map(t => ({
-                role: t.role as "user" | "assistant",
-                text: t.text,
-                turnId: String((t as any).id ?? ""),
-              }));
-              session.daemon!.sendTurnBatch(turnData, [...session.pendingThinking], []);
-            })
-            .catch(e => swallow.warn("midCleanup:daemonFlush", e)),
-        );
+        const turnData = allSessionTurns.map(t => ({
+          role: t.role as "user" | "assistant",
+          text: t.text,
+          turnId: String((t as { id?: string }).id ?? ""),
+        }));
+        session.daemon.sendTurnBatch(turnData, [...session.pendingThinking], []);
       }
       if (session.taskId) {
@@ -542,10 +597,10 @@ export class KongBrainContextEngine implements ContextEngine {
           .catch(e => swallow("midCleanup:acan", e)),
       );
-      // Handoff note — snapshot for wakeup even if session continues
+      // Handoff note — snapshot for wakeup even if session continues (reuse allSessionTurns)
       cleanupOps.push(
         (async () => {
-          const recentTurns = await store.getSessionTurns(session.sessionId, 15);
+          const recentTurns = allSessionTurns.slice(-15);
           if (recentTurns.length < 2) return;
           const turnSummary = recentTurns
             .map(t => `[${t.role}] ${t.text.slice(0, 200)}`)
@@ -635,12 +690,12 @@ export class KongBrainContextEngine implements ContextEngine {
 // ── Helpers ────────────────────────────────────────────────────────────────────
 function extractMessageText(msg: AgentMessage): string {
-  const m = msg as any;
+  const m = msg as { content?: string | { type: string; text?: string }[] };
   if (typeof m.content === "string") return m.content;
   if (Array.isArray(m.content)) {
     return m.content
-      .filter((c: any) => c.type === "text")
-      .map((c: any) => c.text ?? "")
+      .filter((c) => c.type === "text")
+      .map((c) => c.text ?? "")
       .join("\n");
   }
   return "";

package/src/daemon-manager.ts CHANGED Viewed

@@ -36,7 +36,7 @@ export function startMemoryDaemon(
   sharedEmbeddings: EmbeddingService,
   sessionId: string,
   complete: CompleteFn,
-  extractionTimeoutMs = 60_000,
+  extractionTimeoutMs = 120_000,
   taskId?: string,
   projectId?: string,
 ): MemoryDaemon {
@@ -79,10 +79,10 @@ export function startMemoryDaemon(
     const { buildSystemPrompt, buildTranscript, writeExtractionResults } = await import("./memory-daemon.js");
     const transcript = buildTranscript(turns);
-    const sections: string[] = [`[TRANSCRIPT]\n${transcript.slice(0, 60000)}`];
+    const sections: string[] = [`[TRANSCRIPT]\n${transcript.slice(0, 30000)}`];
     if (thinking.length > 0) {
-      sections.push(`[THINKING]\n${thinking.slice(-8).join("\n---\n").slice(0, 4000)}`);
+      sections.push(`[THINKING]\n${thinking.slice(-3).join("\n---\n").slice(0, 2000)}`);
     }
     if (retrievedMemories.length > 0) {
@@ -92,33 +92,79 @@ export function startMemoryDaemon(
     const systemPrompt = buildSystemPrompt(thinking.length > 0, retrievedMemories.length > 0, priorState);
+    // Structured output schema — forces API to return valid JSON (no markdown, no preamble)
+    const extractionSchema = {
+      type: "object" as const,
+      properties: {
+        causal: { type: "array", items: { type: "object" } },
+        monologue: { type: "array", items: { type: "object" } },
+        resolved: { type: "array", items: { type: "string" } },
+        concepts: { type: "array", items: { type: "object" } },
+        corrections: { type: "array", items: { type: "object" } },
+        preferences: { type: "array", items: { type: "object" } },
+        artifacts: { type: "array", items: { type: "object" } },
+        decisions: { type: "array", items: { type: "object" } },
+        skills: { type: "array", items: { type: "object" } },
+      },
+      required: ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"],
+    };
     const response = await complete({
       system: systemPrompt,
       messages: [{ role: "user", content: sections.join("\n\n") }],
+      outputFormat: { type: "json_schema", schema: extractionSchema },
     });
-    const responseText = response.text;
+    let responseText = response.text;
-    const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
-    if (!jsonMatch) return;
+    // Sanitize: strip BOM, markdown fences, and trim
+    responseText = responseText.replace(/^\uFEFF/, "").trim();
+    const fenceMatch = responseText.match(/^```(?:json)?\s*\n([\s\S]*?)\n```\s*$/);
+    if (fenceMatch) responseText = fenceMatch[1].trim();
+    // With structured output the response should be valid JSON directly.
+    // Fall back to regex extraction if the provider doesn't support outputFormat.
     let result: Record<string, any>;
     try {
-      result = JSON.parse(jsonMatch[0]);
-    } catch {
+      result = JSON.parse(responseText);
+    } catch (parseErr) {
+      swallow.warn("daemon:parseDebug", new Error(
+        `JSON.parse failed: ${(parseErr as Error).message}; ` +
+        `len=${responseText.length}; first100=${JSON.stringify(responseText.slice(0, 100))}; ` +
+        `last100=${JSON.stringify(responseText.slice(-100))}`
+      ));
+      const jsonMatch = responseText.match(/\{[\s\S]*\}/);
+      if (!jsonMatch) {
+        swallow.warn("daemon:noJson", new Error(`LLM response contained no JSON (${responseText.length} chars)`));
+        return;
+      }
       try {
-        result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
+        result = JSON.parse(jsonMatch[0]);
       } catch {
-        result = {};
-        const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
-        for (const field of fields) {
-          const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
-          if (fieldMatch) {
-            try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
+        // Try fixing trailing commas
+        try {
+          result = JSON.parse(jsonMatch[0].replace(/,\s*([}\]])/g, "$1"));
+        } catch {
+          // Try stripping control characters
+          try {
+            const cleaned = jsonMatch[0].replace(/[\x00-\x08\x0b\x0c\x0e-\x1f]/g, "");
+            result = JSON.parse(cleaned);
+          } catch {
+            result = {};
+            const fields = ["causal", "monologue", "resolved", "concepts", "corrections", "preferences", "artifacts", "decisions", "skills"];
+            for (const field of fields) {
+              const fieldMatch = jsonMatch[0].match(new RegExp(`"${field}"\\s*:\\s*(\\[[\\s\\S]*?\\])(?=\\s*[,}]\\s*"[a-z]|\\s*\\}$)`, "m"));
+              if (fieldMatch) {
+                try { result[field] = JSON.parse(fieldMatch[1]); } catch { /* skip */ }
+              }
+            }
+            const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
+            if (!PRIMARY_FIELDS.some(f => f in result)) {
+              swallow.warn("daemon:fallbackFailed", new Error(`Regex fallback extracted no primary fields from: ${jsonMatch[0].slice(0, 100)}`));
+              return;
+            }
           }
         }
-        const PRIMARY_FIELDS = ["causal", "monologue", "artifacts"];
-        if (!PRIMARY_FIELDS.some(f => f in result)) return;
       }
     }
@@ -164,9 +210,14 @@ export function startMemoryDaemon(
     sendTurnBatch(turns, thinking, retrievedMemories, priorExtractions) {
       if (shuttingDown) return;
       if (pendingBatch) {
-        swallow.warn("daemon:batchOverwrite", new Error(`Overwriting pending batch (${pendingBatch.turns.length} turns) with new batch (${turns.length} turns)`));
+        // Merge into pending batch instead of discarding — prevents turn data loss
+        pendingBatch.turns = [...pendingBatch.turns, ...turns];
+        pendingBatch.thinking = [...pendingBatch.thinking, ...thinking];
+        pendingBatch.retrievedMemories = [...pendingBatch.retrievedMemories, ...retrievedMemories];
+        pendingBatch.priorExtractions = priorExtractions ?? pendingBatch.priorExtractions;
+      } else {
+        pendingBatch = { turns, thinking, retrievedMemories, priorExtractions };
       }
-      pendingBatch = { turns, thinking, retrievedMemories, priorExtractions };
       // Fire-and-forget
       processPending().catch(e => swallow.warn("daemon:sendBatch", e));
     },

package/src/deferred-cleanup.ts CHANGED Viewed

@@ -14,10 +14,12 @@ import type { CompleteFn } from "./state.js";
 import { buildSystemPrompt, buildTranscript, writeExtractionResults } from "./memory-daemon.js";
 import type { PriorExtractions } from "./daemon-types.js";
 import { swallow } from "./errors.js";
+import { log } from "./log.js";
 // Process-global flag — deferred cleanup runs AT MOST ONCE per process.
 // Using Symbol.for so it survives Jiti re-importing this module.
 const RAN_KEY = Symbol.for("kongbrain.deferredCleanup.ran");
+const _g = globalThis as Record<symbol, unknown>;
 /**
  * Find and process orphaned sessions. Runs with a 30s total timeout.
@@ -30,8 +32,8 @@ export async function runDeferredCleanup(
   complete: CompleteFn,
 ): Promise<number> {
   // Once per process — never re-run even if first run times out
-  if ((globalThis as any)[RAN_KEY]) return 0;
-  (globalThis as any)[RAN_KEY] = true;
+  if (_g[RAN_KEY]) return 0;
+  _g[RAN_KEY] = true;
   try {
     return await runDeferredCleanupInner(store, embeddings, complete);
@@ -101,8 +103,8 @@ async function processOrphanedSession(
   const systemPrompt = buildSystemPrompt(false, false, priorState);
   try {
-    console.warn(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
-    const LLM_CALL_TIMEOUT_MS = 30_000;
+    log.info(`[deferred] extracting session ${surrealSessionId} (${turns.length} turns, transcript ${transcript.length} chars)`);
+    const LLM_CALL_TIMEOUT_MS = 120_000;
     const response = await Promise.race([
       complete({
         system: systemPrompt,
@@ -114,7 +116,7 @@ async function processOrphanedSession(
     ]);
     const responseText = response.text;
-    console.warn(`[deferred] extraction response: ${responseText.length} chars`);
+    log.info(`[deferred] extraction response: ${responseText.length} chars`);
     const jsonMatch = responseText.match(/\{[\s\S]*\}/);
     if (jsonMatch) {
       let result: Record<string, any>;
@@ -128,17 +130,17 @@ async function processOrphanedSession(
       // Strip prototype pollution keys from LLM-generated JSON
       const BANNED_KEYS = new Set(["__proto__", "constructor", "prototype"]);
       for (const key of Object.keys(result)) {
-        if (BANNED_KEYS.has(key)) delete (result as any)[key];
+        if (BANNED_KEYS.has(key)) delete result[key];
       }
       const keys = Object.keys(result);
-      console.warn(`[deferred] parsed ${keys.length} keys: ${keys.join(", ")}`);
+      log.info(`[deferred] parsed ${keys.length} keys: ${keys.join(", ")}`);
       if (keys.length > 0) {
         await writeExtractionResults(result, surrealSessionId, store, embeddings, priorState, undefined, undefined, turnData);
-        console.warn(`[deferred] wrote extraction results for ${surrealSessionId}`);
+        log.info(`[deferred] wrote extraction results for ${surrealSessionId}`);
       }
     } else {
-      console.warn(`[deferred] no JSON found in response`);
+      log.warn(`[deferred] no JSON found in response`);
     }
   } catch (e) {
     swallow.warn("deferredCleanup:extraction", e);
@@ -162,7 +164,7 @@ async function processOrphanedSession(
     ]);
     const handoffText = handoffResponse.text.trim();
-    console.warn(`[deferred] handoff response: ${handoffText.length} chars`);
+    log.info(`[deferred] handoff response: ${handoffText.length} chars`);
     if (handoffText.length > 20) {
       let emb: number[] | null = null;
       if (embeddings.isAvailable()) {

package/src/embeddings.ts CHANGED Viewed

@@ -1,12 +1,14 @@
 import { existsSync } from "node:fs";
 import type { EmbeddingConfig } from "./config.js";
 import { swallow } from "./errors.js";
+import { log } from "./log.js";
 // Lazy-import node-llama-cpp to avoid top-level await issues with jiti.
 // The actual import happens inside initialize() at runtime.
 type LlamaEmbeddingContext = import("node-llama-cpp").LlamaEmbeddingContext;
 type LlamaModel = import("node-llama-cpp").LlamaModel;
+/** BGE-M3 embedding service (1024-dim via GGUF) with an LRU cache of up to 512 entries. */
 export class EmbeddingService {
   private model: LlamaModel | null = null;
   private ctx: LlamaEmbeddingContext | null = null;
@@ -30,8 +32,8 @@ export class EmbeddingService {
       logLevel: LlamaLogLevel.error,
       logger: (level, message) => {
         if (message.includes("missing newline token")) return;
-        if (level === LlamaLogLevel.error) console.error(`[llama] ${message}`);
-        else if (level === LlamaLogLevel.warn) console.warn(`[llama] ${message}`);
+        if (level === LlamaLogLevel.error) log.error(`[llama] ${message}`);
+        else if (level === LlamaLogLevel.warn) log.warn(`[llama] ${message}`);
       },
     });
     this.model = await llama.loadModel({ modelPath: this.config.modelPath });
@@ -40,6 +42,7 @@ export class EmbeddingService {
     return true;
   }
+  /** Return the embedding vector for text, serving from LRU cache on repeat calls. */
   async embed(text: string): Promise<number[]> {
     if (!this.ready || !this.ctx) throw new Error("Embeddings not initialized");
     const cached = this.cache.get(text);
@@ -61,11 +64,7 @@ export class EmbeddingService {
   async embedBatch(texts: string[]): Promise<number[][]> {
     if (texts.length === 0) return [];
-    const results: number[][] = [];
-    for (const text of texts) {
-      results.push(await this.embed(text));
-    }
-    return results;
+    return Promise.all(texts.map(text => this.embed(text)));
   }
   isAvailable(): boolean {

package/src/errors.ts CHANGED Viewed

@@ -9,6 +9,8 @@
  *                           Always logged to stderr with stack trace.
  */
+import { log } from "./log.js";
 const DEBUG = process.env.KONGBRAIN_DEBUG === "1";
 /**
@@ -18,7 +20,7 @@ const DEBUG = process.env.KONGBRAIN_DEBUG === "1";
 function swallow(context: string, err?: unknown): void {
   if (!DEBUG) return;
   const msg = err instanceof Error ? err.message : String(err ?? "unknown");
-  console.debug(`[swallow] ${context}: ${msg}`);
+  log.debug(`[swallow] ${context}: ${msg}`);
 }
 /**
@@ -27,7 +29,7 @@ function swallow(context: string, err?: unknown): void {
  */
 swallow.warn = function swallowWarn(context: string, err?: unknown): void {
   const msg = err instanceof Error ? err.message : String(err ?? "unknown");
-  console.warn(`[warn] ${context}: ${msg}`);
+  log.warn(`${context}: ${msg}`);
 };
 /**
@@ -37,7 +39,7 @@ swallow.warn = function swallowWarn(context: string, err?: unknown): void {
 swallow.error = function swallowError(context: string, err?: unknown): void {
   const msg = err instanceof Error ? err.message : String(err ?? "unknown");
   const stack = err instanceof Error ? `\n${err.stack}` : "";
-  console.error(`[ERROR] ${context}: ${msg}${stack}`);
+  log.error(`${context}: ${msg}${stack}`);
 };
 export { swallow };