npm - kongbrain - Versions diffs - 0.4.1 → 0.4.3 - Mend

kongbrain 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/.github/workflows/ci.yml +45 -0
package/.github/workflows/pr-check.yml +16 -0
package/CHANGELOG.md +64 -0
package/README.github.md +40 -1
package/SKILL.md +1 -1
package/TOKEN_FLOW.md +184 -0
package/package.json +1 -1
package/src/acan.ts +28 -5
package/src/causal.ts +18 -25
package/src/cognitive-bootstrap.ts +6 -6
package/src/cognitive-check.ts +17 -19
package/src/config.ts +1 -1
package/src/context-engine.ts +105 -50
package/src/daemon-manager.ts +70 -19
package/src/deferred-cleanup.ts +12 -10
package/src/embeddings.ts +6 -7
package/src/errors.ts +5 -3
package/src/graph-context.ts +281 -178
package/src/hooks/after-tool-call.ts +2 -1
package/src/hooks/before-tool-call.ts +15 -11
package/src/hooks/llm-output.ts +18 -10
package/src/index.ts +39 -18
package/src/intent.ts +9 -8
package/src/log.ts +11 -0
package/src/memory-daemon.ts +1 -0
package/src/orchestrator.ts +11 -4
package/src/prefetch.ts +2 -2
package/src/reflection.ts +9 -2
package/src/schema.surql +7 -0
package/src/skills.ts +32 -10
package/src/soul.ts +17 -1
package/src/state.ts +31 -0
package/src/supersedes.ts +99 -0
package/src/surreal.ts +174 -110
package/src/tools/introspect.ts +1 -1
package/src/wakeup.ts +0 -142

package/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,45 @@
+name: CI
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    services:
+      surrealdb:
+        image: surrealdb/surrealdb:latest
+        ports:
+          - 8000:8000
+        options: >-
+          --health-cmd "curl -sf http://localhost:8000/health || exit 1"
+          --health-interval 5s
+          --health-timeout 5s
+          --health-retries 10
+        env:
+          SURREAL_USER: root
+          SURREAL_PASS: root
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+      - run: npm ci
+      - name: Run unit tests
+        run: npx vitest run --exclude test/integration.test.ts
+      - name: Run integration tests
+        run: npx vitest run test/integration.test.ts
+        env:
+          SURREAL_URL: ws://localhost:8000/rpc
+          SURREAL_USER: root
+          SURREAL_PASS: root

package/.github/workflows/pr-check.yml ADDED Viewed

@@ -0,0 +1,16 @@
+name: PR Check
+on: pull_request
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 20
+          cache: npm
+      - run: npm ci
+      - run: npx tsc --noEmit || true  # Type check (peer deps may be missing)
+      - run: npx vitest run --exclude test/integration.test.ts

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,64 @@
+# Changelog
+All notable changes to KongBrain are documented here.
+## [0.4.2] - 2026-04-03
+### Performance
+- **DB query batching**: `queryBatch()` sends N SQL statements in 1 round-trip. `graphExpand` (208 queries → 1-2/hop), `queryCausalContext` (120 → 1-2), `vectorSearch` (7 → 1).
+- **Embedding reuse**: User embeddings from ingest stashed in session state and reused in retrieval, eliminating 1-4 redundant BGE-M3 calls per turn.
+- **Token estimation**: Aligned with Claude Code — 4 bytes/token (was 3.4), JSON at 2 bytes/token, images at 2000 tokens, 33% safety margin.
+- **Content stripping**: Old thinking blocks, images, tool results, and assistant filler text surgically replaced with compact stubs. Saves 20-80k tokens/session.
+- **Prompt compression**: Rules suffix (~400 → ~80 tokens), planning gate (~250 → ~60), IKONG description (~120 → ~20), cognitive check (~300 → ~120).
+- **Structured output**: All internal LLM calls use `json_schema` output format when supported. Eliminates markdown fencing and preamble.
+- **Budget model**: 4-way split (conversation 23%, retrieval 38.5%, core 15.5%, tools 23%) with SPA cap at 8% of context window.
+- **Parallel DB calls**: `scoreResults` parallelizes utility cache + reflection session lookups. Single `getSessionTurns` fetch in `afterTurn` reused by all consumers.
+- **Tier 0 dedup**: Core memory fetched once per `assemble()`, passed to inner transform (was fetched twice).
+- **Cognitive check frequency**: Every 5 turns (was 3), skipped when `skipRetrieval=true`.
+### Security
+- **Edge name validation**: `VALID_EDGES` whitelist + `assertValidEdge()` prevents SQL injection via edge interpolation in `graphExpand` and `queryCausalContext`.
+### Bug Fixes
+- Tool limit enforcement: `>` → `>=` (was allowing 1 extra call past limit).
+- Daemon batch merge instead of overwrite (prevents turn data loss when batches arrive faster than extraction).
+- Reflection dedup: `typeof` check on score (prevents undefined bypass creating duplicates).
+- Extraction fallback: Warns on no-JSON and regex fallback failure (was silent).
+- Shutdown errors logged instead of swallowed.
+- Config comment: `midSessionCleanupThreshold` documented as 100k, actual default 25k — fixed.
+- Cognitive bootstrap importance: Float scale (0.85) → integer scale (9) matching rest of codebase.
+### Documentation
+- JSDoc on all critical exported functions.
+- Named constants replacing magic numbers (`DEDUP_COSINE_THRESHOLD`, `MAX_FRONTIER_SEEDS`, `EDGE_NEIGHBOR_LIMIT`, etc.).
+- README: Added Performance section with batching, estimation, stripping, and structured output details.
+### Tests
+- 88 → 415 tests (21 test files). Full coverage: ACAN scorer, hooks, memory daemon extraction, skills, soul system, wakeup, concept extraction, session persistence, tools, subagent lifecycle, and SurrealDB integration tests against live database.
+## [0.4.1] - 2026-04-02
+### Performance
+- Inline intent classification: Parse LOOKUP/EDIT/REFACTOR from assistant text to set tool limits without extra LLM call.
+- Default tool budget reduced to 9.
+- LRU embedding cache (512 entries).
+- System prompt caching split (static vs dynamic sections).
+- Token delta computation (prevent quadratic overcounting).
+- Concept backfilling with embedding similarity.
+### Bug Fixes
+- 17 bugs resolved from deep codebase review.
+## [0.4.0] - 2026-04-01
+### Features
+- Spawned subagent edge wiring.
+- Soul graduation in mid-session cleanup.
+- Fibonacci memory resurfacing.
+- ACAN (Attentive Cross-Attention Network) for learned retrieval scoring.
+- Cognitive checks with directive injection.
+- Handoff file emergency persistence.
+## [0.3.x] - 2026-03
+Initial release series. Graph-backed persistent memory engine with SurrealDB, BGE-M3 embeddings, 9-type knowledge extraction, soul graduation system, and adaptive intent classification.

package/README.github.md CHANGED Viewed

@@ -11,7 +11,7 @@
 [![Node.js](https://img.shields.io/badge/Node.js-20+-339933?style=for-the-badge&logo=node.js&logoColor=white)](https://nodejs.org)
 [![SurrealDB](https://img.shields.io/badge/SurrealDB-3.0-ff00a0?style=for-the-badge&logo=surrealdb&logoColor=white)](https://surrealdb.com)
 [![OpenClaw](https://img.shields.io/badge/OpenClaw-Plugin-ff6b35?style=for-the-badge)](https://github.com/openclaw/openclaw)
-[![Tests](https://img.shields.io/badge/Tests-88_passing-brightgreen?style=for-the-badge&logo=vitest&logoColor=white)](https://vitest.dev)
+[![Tests](https://img.shields.io/badge/Tests-415_passing-brightgreen?style=for-the-badge&logo=vitest&logoColor=white)](https://vitest.dev)
 **A graph-backed cognitive engine for [OpenClaw](https://github.com/openclaw/openclaw).**
@@ -378,6 +378,45 @@ Three tools are registered for the LLM:
 ---
+## Performance
+KongBrain is aggressively optimized for token efficiency and latency, informed by analysis of the Claude Code source.
+### DB Query Batching
+All graph operations use batched multi-statement queries (`queryBatch`). A single `assemble()` call sends ~5 round-trips to SurrealDB instead of ~337 individual queries:
+| Operation | Before | After |
+|-----------|--------|-------|
+| vectorSearch (7 tables) | 7 queries | 1 batched |
+| graphExpand (26 edge types x N nodes) | 130-208 queries | 1-2 batched (per hop) |
+| queryCausalContext (8 edge types x N nodes) | 80-120 queries | 1-2 batched (per hop) |
+### Token Estimation
+Token counting is aligned with the Anthropic API's actual tokenizer characteristics:
+- **4 bytes/token** for prose/code (not the common 3.2-3.5 underestimate)
+- **2 bytes/token** for JSON content (denser single-char tokens)
+- **33% safety margin** on aggregate estimates
+- **2000 tokens** for images/documents (matching API billing)
+### Context Window Efficiency
+Every turn, old messages are surgically stripped to save tokens while preserving recent context:
+- **Thinking blocks** replaced with `[thinking]` marker (saves 1-5k tokens each)
+- **Old tool results** content-cleared to stubs (saves 20-80k tokens/session)
+- **Old assistant filler** collapsed to first line (saves 5-15k/session)
+- **Images** in old messages replaced with `[image]` marker (saves 2k tokens each)
+- **System prompt additions** capped at 8% of context window with priority trimming
+### Structured Output
+All internal LLM calls (memory extraction, cognitive checks, soul generation, skill extraction) use `json_schema` structured output when the provider supports it. This eliminates markdown fencing, preamble text, and parsing failures.
+### Embedding Reuse
+User message embeddings computed at ingest time are stashed in session state and reused during context retrieval, eliminating 1-4 redundant BGE-M3 inference calls per turn.
 ## Development
 ```bash

package/SKILL.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: kongbrain
 description: Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.
-version: 0.4.1
+version: 0.4.3
 homepage: https://github.com/42U/kongbrain
 metadata:
   openclaw:

package/TOKEN_FLOW.md ADDED Viewed

@@ -0,0 +1,184 @@
+# Token Flow Trace
+This document maps the complete lifecycle of tokens through the Claw CLI,
+from user input to API consumption. It identifies every injection point,
+the growth characteristics of each category, and the key findings that
+inform optimization work.
+## Complete Token Lifecycle
+```
+User input
+  |
+  v
+[1] ConversationRuntime::run_turn(user_input)
+    @ rust/crates/runtime/src/conversation.rs:290-294
+    - ConversationMessage::user_text() pushed to session.messages
+    - TOKEN IN: user text stored verbatim (no truncation)
+  |
+  v
+[2] API Request Assembly (inner loop start)
+    @ conversation.rs:312-315
+    - ApiRequest {
+        system_prompt: self.system_prompt.clone(),   // Vec<String> cloned every call
+        messages: self.session.messages.clone(),      // FULL deep clone of all messages
+      }
+    - TOKEN IN: entire system prompt + full message history
+    - PERF NOTE: O(n) deep clone on every API call
+  |
+  v
+[3] Bridge to MessageRequest
+    @ main.rs (AnthropicRuntimeClient::stream)
+    - system: request.system_prompt.join("\n\n")      // sections → single string
+    - messages: convert_messages(&request.messages)    // ConversationMessage → InputMessage
+    - tools: filter_tool_specs()                       // ToolDefinition array (JSON schemas)
+    - TOKEN IN: tool definitions (~5-15K chars of JSON schema)
+  |
+  v
+[4] System Prompt (assembled once per session, sent every call)
+    @ rust/crates/runtime/src/prompt.rs:134-156
+    STATIC sections (before SYSTEM_PROMPT_DYNAMIC_BOUNDARY):
+    ┌──────────────────────────────────┬────────────┐
+    │ Section                          │ ~Chars     │
+    ├──────────────────────────────────┼────────────┤
+    │ Intro                            │ ~400       │
+    │ System guidelines                │ ~600       │
+    │ Doing tasks guidelines           │ ~600       │
+    │ Actions section                  │ ~300       │
+    │ DYNAMIC_BOUNDARY marker          │ 37         │
+    └──────────────────────────────────┴────────────┘
+    DYNAMIC sections (after DYNAMIC_BOUNDARY):
+    ┌──────────────────────────────────┬────────────┐
+    │ Section                          │ ~Chars     │
+    ├──────────────────────────────────┼────────────┤
+    │ Environment context              │ ~150       │
+    │ Project context (date, cwd)      │ ~100       │
+    │ Git status snapshot              │ variable   │
+    │ Git diff snapshot                │ UNBOUNDED  │
+    │ Instruction files                │ ≤12,000    │
+    │ Runtime config                   │ variable   │
+    └──────────────────────────────────┴────────────┘
+  |
+  v
+[5] HTTP POST to Anthropic
+    @ rust/crates/api/src/providers/anthropic.rs:336-354
+    - Full system prompt + full message history + full tool definitions sent
+    - Anthropic prompt caching may cache the prefix (5-min TTL)
+    - API returns Usage { input_tokens, output_tokens,
+      cache_creation_input_tokens, cache_read_input_tokens }
+  |
+  v
+[6] Streaming Response Processing
+    @ conversation.rs:316-330
+    - build_assistant_message() collects:
+      TextDelta, ToolUse, Usage, PromptCache events
+    - AssistantEvent::Usage carries the actual API-reported token counts
+  |
+  v
+[7] Usage Recording
+    @ conversation.rs:331-333
+    - usage_tracker.record(usage) → cumulative counters updated
+    - TOKEN OBSERVATION POINT: where we learn actual consumption
+  |
+  v
+[8] Assistant Message Storage
+    @ conversation.rs:351-354
+    - ConversationMessage with text blocks + tool_use blocks stored in session
+    - TOKEN IN: assistant text + tool_use blocks (id, name, input JSON)
+  |
+  v
+[9] Tool Execution (if tool_uses present)
+    @ conversation.rs:360-458
+    For each pending tool use:
+    a. Pre-tool hook (may modify input)
+    b. Permission check (may deny)
+    c. tool_executor.execute(name, input) → output String
+    d. Post-tool hook (may append feedback)
+    e. ConversationMessage::tool_result(id, name, output, is_error)
+    - TOKEN IN: tool output stored VERBATIM — NO TRUNCATION
+    - This is the #1 source of context bloat
+  |
+  v
+[10] Loop back to step [2] if tool_uses were present
+    - Each iteration re-sends ALL accumulated messages
+    - Context grows monotonically within a single turn
+  |
+  v
+[11] Auto-Compaction Check
+    @ conversation.rs:462, 507-530
+    - Triggers when cumulative input_tokens >= threshold (default 100K)
+    - BUG: uses cumulative (lifetime) tokens, not current context size
+    - compact_session() preserves last 4 messages, summarizes the rest
+    - TOKEN REDUCTION: the only meaningful reduction mechanism in the system
+  |
+  v
+[12] TurnSummary returned to caller
+    - Contains: assistant_messages, tool_results, prompt_cache_events,
+      iterations, cumulative usage, auto_compaction event (if any)
+```
+## Token Contribution Breakdown
+| Category | Per-Call Cost | Growth Pattern | Bounded? |
+|---|---|---|---|
+| System prompt (static sections) | ~2,000 chars / ~500 tokens | Constant per session | Yes |
+| System prompt (environment) | ~150 chars | Constant per session | Yes |
+| Git diff in system prompt | 0 – 50K+ chars | Changes rarely within session | **No** |
+| Instruction files (CLAUDE.md etc) | 0 – 12,000 chars | Constant per session | Yes (budgeted) |
+| Runtime config in system prompt | ~200 chars | Constant per session | Yes |
+| Tool definitions | ~5,000 – 15,000 chars | Constant per session | Yes (fixed schema) |
+| User messages | Variable | Linear with turns | No (until compaction) |
+| Assistant text responses | Variable | Linear with turns | No (until compaction) |
+| Tool use blocks (id+name+input) | ~100 – 500 chars each | Linear with tool calls | No (until compaction) |
+| **Tool result outputs** | **1K – 100K+ chars each** | **Linear with tool calls** | **No (verbatim, unbounded)** |
+## Static vs Dynamic vs Growing Context
+### Static (per-session, never changes)
+- System prompt static sections (intro, system, tasks, actions)
+- Tool definitions
+- Model, max_tokens configuration
+### Dynamic (per-session, changes rarely)
+- Environment context (cwd, date, platform)
+- Git status/diff snapshot (captured once at session start)
+- Instruction files (loaded once at session start)
+### Growing (per-turn, monotonically increasing)
+- User messages
+- Assistant responses (text + tool_use blocks)
+- Tool result outputs ← **dominant growth factor**
+## Key Findings
+### 1. Tool results are the #1 source of context bloat
+Tool outputs are stored verbatim in `ContentBlock::ToolResult { output: String }`
+(session.rs:36-40). A single `read_file` call can inject 50K+ characters.
+A `grep_search` with many matches can inject thousands of lines. These persist
+in the session and are re-sent on every subsequent API call until compaction.
+### 2. Messages are deep-cloned on every API call
+`self.session.messages.clone()` at conversation.rs:314 creates a full deep copy
+of all message content on every API call. For sessions with large tool results,
+this is significant memory allocation churn (O(n*m) where n=messages, m=avg size).
+### 3. The len/4+1 token estimation heuristic systematically undercounts
+The heuristic in compact.rs:392-404 uses `text.len() / 4 + 1` per content block.
+This ignores:
+- JSON serialization overhead (role markers, type tags, key names)
+- Tool metadata fields (tool_use_id, is_error flag)
+- System prompt tokens (not counted by estimate_session_tokens at all)
+- The conversion overhead from ConversationMessage → InputMessage → JSON
+### 4. Auto-compaction trigger uses cumulative instead of current tokens
+`maybe_auto_compact()` at conversation.rs:508 checks
+`self.usage_tracker.cumulative_usage().input_tokens` against the threshold.
+Cumulative never decreases — after compaction, every subsequent turn
+immediately exceeds the threshold, causing unnecessary re-compaction.
+### 5. Git diff in the system prompt is unbounded
+`read_git_diff()` in prompt.rs:245-263 captures the full staged + unstaged diff
+with no size limit. A developer with many uncommitted changes can have 50K+
+characters injected into the system prompt, sent on every single API call.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kongbrain",
-  "version": "0.4.1",
+  "version": "0.4.3",
   "description": "Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.",
   "type": "module",
   "license": "MIT",

package/src/acan.ts CHANGED Viewed

@@ -101,6 +101,15 @@ function loadWeights(path: string): ACANWeights | null {
       if (!Array.isArray(raw.W_q[i]) || raw.W_q[i].length !== ATTN_DIM) return null;
       if (!Array.isArray(raw.W_k[i]) || raw.W_k[i].length !== ATTN_DIM) return null;
     }
+    // Validate numeric values — NaN/Infinity from a bad training run would corrupt scoring.
+    // JSON.stringify(NaN) produces null, so we must also reject null/non-number values.
+    if (typeof raw.bias !== "number" || !isFinite(raw.bias)) return null;
+    if (!raw.W_final.every((v: unknown) => typeof v === "number" && isFinite(v as number))) return null;
+    // Spot-check W_q/W_k (full scan too expensive for 1024x64 matrices)
+    for (const i of checkIndices) {
+      if (!raw.W_q[i].every((v: unknown) => typeof v === "number" && isFinite(v as number))) return null;
+      if (!raw.W_k[i].every((v: unknown) => typeof v === "number" && isFinite(v as number))) return null;
+    }
     return raw as ACANWeights;
   } catch (e) {
     swallow("acan:loadWeights", e);
@@ -196,16 +205,30 @@ async function fetchTrainingData(store: SurrealStore): Promise<TrainingSample[]>
   const uniqueMemIds = [...new Set(outcomes.map((r: any) => String(r.memory_id)))];
   const embeddingMap = new Map<string, number[]>();
+  // Group IDs by table for batched fetches instead of one query per ID
+  const byTable = new Map<string, string[]>();
   for (const mid of uniqueMemIds) {
     try {
       assertRecordId(mid);
-      // Direct interpolation safe: assertRecordId validates format above
-      const flat = await store.queryFirst<{ id: string; embedding: number[] }>(
-        `SELECT id, embedding FROM ${mid} WHERE embedding != NONE`,
+      const table = mid.split(":")[0];
+      if (!byTable.has(table)) byTable.set(table, []);
+      byTable.get(table)!.push(mid);
+    } catch { /* skip invalid */ }
+  }
+  await Promise.all([...byTable.entries()].map(async ([table, ids]) => {
+    try {
+      // Direct interpolation — SurrealDB treats string-array bindings as
+      // literal strings, not record references, causing silent empty results.
+      const idList = ids.join(", ");
+      const rows = await store.queryFirst<{ id: string; embedding: number[] }>(
+        `SELECT id, embedding FROM ${table} WHERE id IN [${idList}] AND embedding != NONE`,
       );
-      if (flat[0]?.embedding) embeddingMap.set(mid, flat[0].embedding);
+      for (const row of rows) {
+        if (row.embedding) embeddingMap.set(String(row.id), row.embedding);
+      }
     } catch (e) { swallow("acan:fetchEmb", e); }
-  }
+  }));
   const samples: TrainingSample[] = [];
   for (const row of outcomes) {

package/src/causal.ts CHANGED Viewed

@@ -134,33 +134,26 @@ export async function queryCausalContext(
          ELSE 0 END AS score`;
   for (let hop = 0; hop < hops && frontier.length > 0; hop++) {
-    const queries = frontier.flatMap((id) => {
+    // Batch all edge traversals for this hop in a single round-trip
+    const selectFields = `SELECT id, text, importance, access_count AS accessCount,
+                  created_at AS timestamp, category, meta::tb(id) AS table${scoreExpr}`;
+    const stmts: string[] = [];
+    for (const id of frontier) {
       assertRecordId(id);
-      // Direct interpolation safe: assertRecordId validates format above
-      return causalEdges.map((edge) =>
-        store.queryFirst<any>(
-          `SELECT id, text, importance, access_count AS accessCount,
-                  created_at AS timestamp, category, meta::tb(id) AS table${scoreExpr}
-           FROM ${id}->${edge}->? LIMIT 3`,
-          bindings,
-        ).catch(e => { swallow.warn("causal:edge-query", e); return [] as any[]; }),
-      );
-    });
-    const reverseQueries = frontier.flatMap((id) => {
-      assertRecordId(id);
-      // Direct interpolation safe: assertRecordId validates format above
-      return causalEdges.map((edge) =>
-        store.queryFirst<any>(
-          `SELECT id, text, importance, access_count AS accessCount,
-                  created_at AS timestamp, category, meta::tb(id) AS table${scoreExpr}
-           FROM ${id}<-${edge}<-? LIMIT 3`,
-          bindings,
-        ).catch(e => { swallow.warn("causal:edge-query", e); return [] as any[]; }),
-      );
-    });
+      for (const edge of causalEdges) {
+        if (!/^[a-z_]+$/.test(edge)) continue; // safety check
+        stmts.push(`${selectFields} FROM ${id}->${edge}->? LIMIT 3`);
+        stmts.push(`${selectFields} FROM ${id}<-${edge}<-? LIMIT 3`);
+      }
+    }
-    const allQueryResults = await Promise.all([...queries, ...reverseQueries]);
+    let allQueryResults: any[][];
+    try {
+      allQueryResults = await store.queryBatch<any>(stmts, bindings);
+    } catch (e) {
+      swallow.warn("causal:batch", e);
+      break;
+    }
     const nextFrontier: string[] = [];
     for (const rows of allQueryResults) {

package/src/cognitive-bootstrap.ts CHANGED Viewed

@@ -51,27 +51,27 @@ const CORE_ENTRIES: { text: string; category: string; priority: number }[] = [
 const IDENTITY_CHUNKS: { text: string; importance: number }[] = [
   {
     text: `KongBrain's memory daemon runs in the background and extracts 9 knowledge types from your conversations every ~4K tokens or 3 turns: causal chains (cause->effect from debugging), monologue traces (doubts, insights, tradeoffs, realizations — episodic reasoning moments), resolved memories (daemon marks issues done when mentioned as fixed), concepts (technical facts worth remembering), corrections (user correcting you — highest signal), preferences (user workflow/style signals), artifacts (files created/modified/read), decisions (choices with rationale), and skills (multi-step procedures that worked). Extraction is quality-gated — weak confidence extractions are skipped, so the same conversation may yield different extractions depending on signal strength.`,
-    importance: 0.85,
+    importance: 9,
   },
   {
     text: `Effective recall queries use specific terms that match how knowledge was stored. Search by: file paths ("/src/auth/login.ts"), error messages ("ECONNREFUSED"), concept names ("rate limiting"), decision descriptions ("chose PostgreSQL over MongoDB"), or skill names ("deploy to staging"). The recall tool does vector similarity search plus graph neighbor expansion — top results pull in related nodes via 25 edge types. Scope options: "all" (default), "memories" (decisions, corrections, preferences), "concepts" (extracted technical knowledge), "turns" (past conversation), "artifacts" (files), "skills" (learned procedures). Retrieval scoring improves automatically over time as the ACAN (learned scoring model) trains on retrieval outcomes — early sessions use heuristic scoring, later sessions benefit from learned weights.`,
-    importance: 0.85,
+    importance: 9,
   },
   {
     text: `KongBrain's memory lifecycle: During a session, the daemon extracts knowledge incrementally. At session end (or mid-session every ~25K tokens): a handoff note is written summarizing progress, skills are extracted from successful tasks, metacognitive reflections are generated (linked to the session via reflects_on edges), and causal chains may graduate to skills. At next session start: the wakeup system synthesizes a first-person briefing from the handoff + identity + monologues + depth signals. Context is also predictively prefetched each turn based on likely follow-up queries — relevant memories may appear in your context without you requesting them.`,
-    importance: 0.80,
+    importance: 8,
   },
   {
     text: `Graph connectivity determines recall quality. 25 edge types link nodes across the graph (26th, spawned, is deferred). Key edges: mentions (turn->concept), about_concept (memory->concept), artifact_mentions (artifact->concept), caused_by/supports/contradicts (memory<->memory), narrower/broader/related_to (concept<->concept), reflects_on (reflection->session), tool_result_of (turn->turn), part_of (turn->session), skill_from_task (skill->task). To maximize connectivity: mention specific artifact paths, reference existing concept names, describe cause-effect relationships explicitly, and note task context. Reuse existing concept names — use introspect or recall to discover what names exist.`,
-    importance: 0.80,
+    importance: 8,
   },
   {
     text: `Three persistence mechanisms serve different purposes. Core memory (Tier 0): you control directly via the core_memory tool. Always loaded every turn. Use for: permanent operational rules, learned patterns, identity refinements. Budget-constrained (~10% of context). Core memory (Tier 1): pinned for the current session only. Use for: session-specific context like "working on auth refactor" or "user prefers verbose logging". Identity chunks: self-knowledge seeded at bootstrap, vector-searchable but not always loaded — surfaces in wakeup briefings. Daemon extraction: automatic, runs on conversation content, writes to memory/concept/skill/artifact tables. You don't control extraction directly, but the quality of your conversation affects what gets extracted.`,
-    importance: 0.75,
+    importance: 8,
   },
   {
     text: `Soul graduation: KongBrain tracks your maturity across 5 stages — nascent (0-3/7 thresholds), developing (4/7), emerging (5/7), maturing (6/7), ready (7/7). The 7 thresholds are: sessions, reflections, causal chains, concepts, monologues, span days, and total memories. Reaching 7/7 is necessary but not sufficient — you must also pass a quality gate (score >= 0.6) based on retrieval utilization, skill success rate, critical reflection rate, and tool failure rate. On graduation, you author a Soul document — a self-assessment grounded in your actual experience, not aspirational claims. Use introspect with action "status" to check your current stage and progress. The Soul document becomes part of your identity once written.`,
-    importance: 0.75,
+    importance: 8,
   },
 ];

package/src/cognitive-check.ts CHANGED Viewed

@@ -85,12 +85,14 @@ const VALID_RECORD_ID = /^[a-z_]+:[a-zA-Z0-9_]+$/;
 // --- Public API ---
-/** Returns true on turn 2, then every 3 turns (2, 5, 8, 11...). False if in-flight. */
+/** Returns true on turn 2, then every 5 turns (2, 7, 12, 17...). False if in-flight or retrieval skipped. */
 export function shouldRunCheck(turnCount: number, session: SessionState): boolean {
   const state = getState(session);
   if (state.checkInFlight) return false;
   if (turnCount < 2) return false;
-  return turnCount === 2 || (turnCount - 2) % 3 === 0;
+  // Skip when retrieval is disabled — no context to evaluate
+  if (session.currentConfig?.skipRetrieval) return false;
+  return turnCount === 2 || (turnCount - 2) % 5 === 0;
 }
 export function getPendingDirectives(session: SessionState): CognitiveDirective[] {
@@ -140,28 +142,24 @@ export async function runCognitiveCheck(
       sections.push(`[TRAJECTORY]\n${trajectory}`);
     }
-    const response = await complete({
-      system: `Assess the retrieved context served to an AI assistant. Return JSON:
-"directives": [{type, target, instruction, priority}] — max 3. Types:
-  "repeat": same topic discussed in a prior session — instruct to acknowledge and build on it
-  "continuation": user is continuing prior work — instruct to maintain thread
-  "contradiction": retrieved info conflicts with current conversation — flag it
-  "noise": node is irrelevant despite high similarity score — instruct to ignore
-  "insight": useful pattern the model should lean into
-Priority: "high" (must address), "medium" (should note), "low" (nice to know)
-"grades": [{id, relevant, reason, score, learned, resolved}] — one per retrieved node. Score 0.0-1.0. "learned": true ONLY if the node is a [CORRECTION] memory AND the assistant's response already follows the correction without being prompted. "resolved": true if this memory's topic has been fully addressed/completed in the current conversation. Both default false.
-"sessionContinuity": "repeat" | "continuation" | "new_topic" | "tangent"
-"preferences": [{observation, confidence: "high"|"medium"}] — max 2. User communication style, values, or working preferences inferred from the conversation. Only include if clearly observable. Empty [] if nothing notable.
+    const cogCheckSchema = {
+      type: "object" as const,
+      properties: {
+        directives: { type: "array", items: { type: "object" } },
+        grades: { type: "array", items: { type: "object" } },
+        sessionContinuity: { type: "string", enum: ["repeat", "continuation", "new_topic", "tangent"] },
+        preferences: { type: "array", items: { type: "object" } },
+      },
+      required: ["directives", "grades", "sessionContinuity", "preferences"],
+    };
-Return ONLY valid JSON.`,
+    const response = await complete({
+      system: `Grade retrieved context. directives: max 3, types: repeat|continuation|contradiction|noise|insight. grades: one per node, learned=true only if CORRECTION already followed unprompted. preferences: max 2, [] if none.`,
       messages: [{
         role: "user",
         content: sections.join("\n\n"),
       }],
+      outputFormat: { type: "json_schema", schema: cogCheckSchema },
     });
     const responseText = response.text;

package/src/config.ts CHANGED Viewed

@@ -18,7 +18,7 @@ export interface EmbeddingConfig {
 export interface ThresholdConfig {
   /** Tokens accumulated before daemon flushes extraction (default: 4000) */
   daemonTokenThreshold: number;
-  /** Cumulative tokens before mid-session cleanup fires (default: 100000) */
+  /** Cumulative tokens before mid-session cleanup fires (default: 25000) */
   midSessionCleanupThreshold: number;
   /** Per-extraction timeout in ms (default: 60000) */
   extractionTimeoutMs: number;