@jonathangu/openclawbrain 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +412 -0
- package/bin/openclawbrain.js +15 -0
- package/docs/END_STATE.md +244 -0
- package/docs/EVIDENCE.md +128 -0
- package/docs/RELEASE_CONTRACT.md +91 -0
- package/docs/agent-tools.md +106 -0
- package/docs/architecture.md +224 -0
- package/docs/configuration.md +178 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/status.json +87 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/summary.md +16 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/trace.json +273 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/validation-report.json +652 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/channels-status.txt +31 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/config-snapshot.json +66 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/doctor.json +14 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-probe.txt +34 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-status.txt +41 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/logs.txt +428 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status-all.txt +60 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status.json +223 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/summary.md +13 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/trace.json +4 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/validation-report.json +334 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/channels-status.txt +25 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/config-snapshot.json +91 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/doctor.json +14 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-probe.txt +36 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-status.txt +44 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/logs.txt +428 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-doctor.json +10 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-sdk-probe.json +11 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-setup-only.json +12 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/summary.md +30 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/validation-report.json +72 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status-all.txt +63 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status.json +200 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/summary.md +13 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/trace.json +4 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/validation-report.json +311 -0
- package/docs/evidence/README.md +16 -0
- package/docs/fts5.md +161 -0
- package/docs/tui.md +506 -0
- package/index.ts +1372 -0
- package/openclaw.plugin.json +136 -0
- package/package.json +66 -0
- package/src/assembler.ts +804 -0
- package/src/brain-cli.ts +316 -0
- package/src/brain-core/decay.ts +35 -0
- package/src/brain-core/episode.ts +82 -0
- package/src/brain-core/graph.ts +321 -0
- package/src/brain-core/health.ts +116 -0
- package/src/brain-core/mutator.ts +281 -0
- package/src/brain-core/pack.ts +117 -0
- package/src/brain-core/policy.ts +153 -0
- package/src/brain-core/replay.ts +1 -0
- package/src/brain-core/teacher.ts +105 -0
- package/src/brain-core/trace.ts +40 -0
- package/src/brain-core/traverse.ts +230 -0
- package/src/brain-core/types.ts +405 -0
- package/src/brain-core/update.ts +123 -0
- package/src/brain-harvest/human.ts +46 -0
- package/src/brain-harvest/scanner.ts +98 -0
- package/src/brain-harvest/self.ts +147 -0
- package/src/brain-runtime/assembler-extension.ts +230 -0
- package/src/brain-runtime/evidence-detectors.ts +68 -0
- package/src/brain-runtime/graph-io.ts +72 -0
- package/src/brain-runtime/harvester-extension.ts +98 -0
- package/src/brain-runtime/service.ts +659 -0
- package/src/brain-runtime/tools.ts +109 -0
- package/src/brain-runtime/worker-state.ts +106 -0
- package/src/brain-runtime/worker-supervisor.ts +169 -0
- package/src/brain-store/embedding.ts +179 -0
- package/src/brain-store/init.ts +347 -0
- package/src/brain-store/migrations.ts +188 -0
- package/src/brain-store/store.ts +816 -0
- package/src/brain-worker/child-runner.ts +321 -0
- package/src/brain-worker/jobs.ts +12 -0
- package/src/brain-worker/mutation-job.ts +5 -0
- package/src/brain-worker/promotion-job.ts +5 -0
- package/src/brain-worker/protocol.ts +79 -0
- package/src/brain-worker/teacher-job.ts +5 -0
- package/src/brain-worker/update-job.ts +5 -0
- package/src/brain-worker/worker.ts +422 -0
- package/src/compaction.ts +1332 -0
- package/src/db/config.ts +265 -0
- package/src/db/connection.ts +72 -0
- package/src/db/features.ts +42 -0
- package/src/db/migration.ts +561 -0
- package/src/engine.ts +1995 -0
- package/src/expansion-auth.ts +351 -0
- package/src/expansion-policy.ts +303 -0
- package/src/expansion.ts +383 -0
- package/src/integrity.ts +600 -0
- package/src/large-files.ts +527 -0
- package/src/openclaw-bridge.ts +22 -0
- package/src/retrieval.ts +357 -0
- package/src/store/conversation-store.ts +748 -0
- package/src/store/fts5-sanitize.ts +29 -0
- package/src/store/full-text-fallback.ts +74 -0
- package/src/store/index.ts +29 -0
- package/src/store/summary-store.ts +918 -0
- package/src/summarize.ts +847 -0
- package/src/tools/common.ts +53 -0
- package/src/tools/lcm-conversation-scope.ts +76 -0
- package/src/tools/lcm-describe-tool.ts +234 -0
- package/src/tools/lcm-expand-query-tool.ts +594 -0
- package/src/tools/lcm-expand-tool.delegation.ts +556 -0
- package/src/tools/lcm-expand-tool.ts +448 -0
- package/src/tools/lcm-expansion-recursion-guard.ts +286 -0
- package/src/tools/lcm-grep-tool.ts +200 -0
- package/src/transcript-repair.ts +301 -0
- package/src/types.ts +149 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
This document describes how lossless-claw works internally — the data model, compaction lifecycle, context assembly, and expansion system.
|
|
4
|
+
|
|
5
|
+
## Data model
|
|
6
|
+
|
|
7
|
+
### Conversations and messages
|
|
8
|
+
|
|
9
|
+
Every OpenClaw session maps to a **conversation**. The first time a session ingests a message, LCM creates a conversation record keyed by the runtime session ID.
|
|
10
|
+
|
|
11
|
+
Messages are stored with:
|
|
12
|
+
- **seq** — Monotonically increasing sequence number within the conversation
|
|
13
|
+
- **role** — `user`, `assistant`, `system`, or `tool`
|
|
14
|
+
- **content** — Plain text extraction of the message
|
|
15
|
+
- **tokenCount** — Estimated token count (~4 chars/token)
|
|
16
|
+
- **createdAt** — Insertion timestamp
|
|
17
|
+
|
|
18
|
+
Each message also has **message_parts** — structured content blocks that preserve the original shape (text blocks, tool calls, tool results, reasoning, file content, etc.). This allows the assembler to reconstruct rich content when building model context, not just flat text.
|
|
19
|
+
|
|
20
|
+
### The summary DAG
|
|
21
|
+
|
|
22
|
+
Summaries form a directed acyclic graph with two node types:
|
|
23
|
+
|
|
24
|
+
**Leaf summaries** (depth 0, kind `"leaf"`):
|
|
25
|
+
- Created from a chunk of raw messages
|
|
26
|
+
- Linked to source messages via `summary_messages`
|
|
27
|
+
- Contain a narrative summary with timestamps
|
|
28
|
+
- Typically 800–1200 tokens
|
|
29
|
+
|
|
30
|
+
**Condensed summaries** (depth 1+, kind `"condensed"`):
|
|
31
|
+
- Created from a chunk of summaries at the same depth
|
|
32
|
+
- Linked to parent summaries via `summary_parents`
|
|
33
|
+
- Each depth tier uses a progressively more abstract prompt
|
|
34
|
+
- Typically 1500–2000 tokens
|
|
35
|
+
|
|
36
|
+
Every summary carries:
|
|
37
|
+
- **summaryId** — `sum_` + 16 hex chars (SHA-256 of content + timestamp)
|
|
38
|
+
- **conversationId** — Which conversation it belongs to
|
|
39
|
+
- **depth** — Position in the hierarchy (0 = leaf)
|
|
40
|
+
- **earliestAt / latestAt** — Time range of source material
|
|
41
|
+
- **descendantCount** — Total number of ancestor summaries (transitive)
|
|
42
|
+
- **fileIds** — References to large files mentioned in the source
|
|
43
|
+
- **tokenCount** — Estimated tokens
|
|
44
|
+
|
|
45
|
+
### Context items
|
|
46
|
+
|
|
47
|
+
The **context_items** table maintains the ordered list of what the model sees for each conversation. Each entry is either a message reference or a summary reference, identified by ordinal.
|
|
48
|
+
|
|
49
|
+
When compaction creates a summary from a range of messages (or summaries), the source items are replaced by a single summary item. This keeps the context list compact while preserving ordering.
|
|
50
|
+
|
|
51
|
+
## Compaction lifecycle
|
|
52
|
+
|
|
53
|
+
### Ingestion
|
|
54
|
+
|
|
55
|
+
When OpenClaw processes a turn, it calls the context engine's lifecycle hooks:
|
|
56
|
+
|
|
57
|
+
1. **bootstrap** — On session start, reconciles the JSONL session file with the LCM database. Imports any messages that exist in the file but not in LCM (crash recovery).
|
|
58
|
+
2. **ingest** / **ingestBatch** — Persists new messages to the database and appends them to context_items.
|
|
59
|
+
3. **afterTurn** — After the model responds, ingests new messages, then evaluates whether compaction should run.
|
|
60
|
+
|
|
61
|
+
### Leaf compaction
|
|
62
|
+
|
|
63
|
+
The **leaf pass** converts raw messages into leaf summaries:
|
|
64
|
+
|
|
65
|
+
1. Identify the oldest contiguous chunk of raw messages outside the **fresh tail** (protected recent messages).
|
|
66
|
+
2. Cap the chunk at `leafChunkTokens` (default 20k tokens).
|
|
67
|
+
3. Concatenate message content with timestamps.
|
|
68
|
+
4. Resolve the most recent prior summary for continuity (passed as `previous_context` so the LLM avoids repeating known information).
|
|
69
|
+
5. Send to the LLM with the leaf prompt.
|
|
70
|
+
6. Normalize provider response blocks (Anthropic/OpenAI text, output_text, and nested content/summary shapes) into plain text.
|
|
71
|
+
7. If normalization is empty, log provider/model/block-type diagnostics and fall back to deterministic truncation.
|
|
72
|
+
8. If the summary is larger than the input (LLM failure), retry with the aggressive prompt. If still too large, fall back to deterministic truncation.
|
|
73
|
+
9. Persist the summary, link to source messages, and replace the message range in context_items.
|
|
74
|
+
|
|
75
|
+
### Condensation
|
|
76
|
+
|
|
77
|
+
The **condensed pass** merges summaries at the same depth into a higher-level summary:
|
|
78
|
+
|
|
79
|
+
1. Find the shallowest depth with enough contiguous same-depth summaries (≥ `leafMinFanout` for d0, ≥ `condensedMinFanout` for d1+).
|
|
80
|
+
2. Concatenate their content with time range headers.
|
|
81
|
+
3. Send to the LLM with the depth-appropriate prompt (d1, d2, or d3+).
|
|
82
|
+
4. Apply the same escalation strategy (normal → aggressive → truncation fallback).
|
|
83
|
+
5. Persist with depth = targetDepth + 1, link to parent summaries, replace the range in context_items.
|
|
84
|
+
|
|
85
|
+
### Compaction modes
|
|
86
|
+
|
|
87
|
+
**Incremental (after each turn):**
|
|
88
|
+
- Checks if raw tokens outside the fresh tail exceed `leafChunkTokens`
|
|
89
|
+
- If so, runs one leaf pass
|
|
90
|
+
- If `incrementalMaxDepth != 0`, follows with condensation passes up to that depth (`-1` for unlimited)
|
|
91
|
+
- Best-effort: failures don't break the conversation
|
|
92
|
+
|
|
93
|
+
**Full sweep (manual `/compact` or overflow):**
|
|
94
|
+
- Phase 1: Repeatedly runs leaf passes until no more eligible chunks
|
|
95
|
+
- Phase 2: Repeatedly runs condensation passes starting from the shallowest eligible depth
|
|
96
|
+
- Each pass checks for progress; stops if no tokens were saved
|
|
97
|
+
|
|
98
|
+
**Budget-targeted (`compactUntilUnder`):**
|
|
99
|
+
- Runs up to `maxRounds` (default 10) of full sweeps
|
|
100
|
+
- Stops when context is under the target token count
|
|
101
|
+
- Used by the overflow recovery path
|
|
102
|
+
|
|
103
|
+
### Three-level escalation
|
|
104
|
+
|
|
105
|
+
Every summarization attempt follows this escalation:
|
|
106
|
+
|
|
107
|
+
1. **Normal** — Standard prompt, temperature 0.2
|
|
108
|
+
2. **Aggressive** — Tighter prompt requesting only durable facts, temperature 0.1, lower target tokens
|
|
109
|
+
3. **Fallback** — Deterministic truncation to ~512 tokens with `[Truncated for context management]` marker
|
|
110
|
+
|
|
111
|
+
This ensures compaction always makes progress, even if the LLM produces poor output.
|
|
112
|
+
|
|
113
|
+
## Context assembly
|
|
114
|
+
|
|
115
|
+
The assembler runs before each model turn and builds the message array:
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
[summary₁, summary₂, ..., summaryₙ, message₁, message₂, ..., messageₘ]
|
|
119
|
+
├── budget-constrained ──┤ ├──── fresh tail (always included) ────┤
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Steps
|
|
123
|
+
|
|
124
|
+
1. Fetch all context_items ordered by ordinal.
|
|
125
|
+
2. Resolve each item — summaries become user messages with XML wrappers; messages are reconstructed from parts.
|
|
126
|
+
3. Split into evictable prefix and protected fresh tail (last `freshTailCount` raw messages).
|
|
127
|
+
4. Compute fresh tail token cost (always included, even if over budget).
|
|
128
|
+
5. Fill remaining budget from the evictable set, keeping newest items and dropping oldest.
|
|
129
|
+
6. Normalize assistant content to array blocks (Anthropic API compatibility).
|
|
130
|
+
7. Sanitize tool-use/result pairing (ensures every tool_result has a matching tool_use).
|
|
131
|
+
|
|
132
|
+
### XML summary format
|
|
133
|
+
|
|
134
|
+
Summaries are presented to the model as user messages wrapped in XML:
|
|
135
|
+
|
|
136
|
+
```xml
|
|
137
|
+
<summary id="sum_abc123" kind="leaf" depth="0" descendant_count="0"
|
|
138
|
+
earliest_at="2026-02-17T07:37:00" latest_at="2026-02-17T08:23:00">
|
|
139
|
+
<content>
|
|
140
|
+
...summary text with timestamps...
|
|
141
|
+
|
|
142
|
+
Expand for details about: exact error messages, full config diff, intermediate debugging steps
|
|
143
|
+
</content>
|
|
144
|
+
</summary>
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Condensed summaries also include parent references:
|
|
148
|
+
|
|
149
|
+
```xml
|
|
150
|
+
<summary id="sum_def456" kind="condensed" depth="1" descendant_count="8" ...>
|
|
151
|
+
<parents>
|
|
152
|
+
<summary_ref id="sum_aaa111" />
|
|
153
|
+
<summary_ref id="sum_bbb222" />
|
|
154
|
+
</parents>
|
|
155
|
+
<content>...</content>
|
|
156
|
+
</summary>
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
The XML attributes give the model enough metadata to reason about summary age, scope, and how to drill deeper. The `<parents>` section enables targeted expansion of specific source summaries.
|
|
160
|
+
|
|
161
|
+
## Expansion system
|
|
162
|
+
|
|
163
|
+
When summaries are too compressed for a task, agents use `lcm_expand_query` to recover detail.
|
|
164
|
+
|
|
165
|
+
### How it works
|
|
166
|
+
|
|
167
|
+
1. Agent calls `lcm_expand_query` with a `prompt` and either `summaryIds` or a `query`.
|
|
168
|
+
2. If `query` is provided, `lcm_grep` finds matching summaries first.
|
|
169
|
+
3. A **delegation grant** is created, scoping the sub-agent to the relevant conversation(s) with a token cap.
|
|
170
|
+
4. A sub-agent session is spawned with the expansion task.
|
|
171
|
+
5. The sub-agent walks the DAG: it can read summary content, follow parent links, access source messages, and inspect stored files.
|
|
172
|
+
6. The sub-agent returns a focused answer (default ≤ 2000 tokens) with cited summary IDs.
|
|
173
|
+
7. The grant is revoked and the sub-agent session is cleaned up.
|
|
174
|
+
|
|
175
|
+
### Security model
|
|
176
|
+
|
|
177
|
+
Expansion uses a delegation grant system:
|
|
178
|
+
|
|
179
|
+
- **Grants** are created at spawn time, scoped to specific conversation IDs
|
|
180
|
+
- **Token caps** limit how much content the sub-agent can access
|
|
181
|
+
- **TTL** ensures grants expire even if cleanup fails
|
|
182
|
+
- **Revocation** happens on completion, cancellation, or sweep
|
|
183
|
+
|
|
184
|
+
The sub-agent only gets `lcm_expand` (the low-level tool), not `lcm_expand_query` — preventing recursive sub-agent spawning.
|
|
185
|
+
|
|
186
|
+
## Large file handling
|
|
187
|
+
|
|
188
|
+
Files embedded in user messages (typically via `<file>` blocks from tool output) are checked at ingestion:
|
|
189
|
+
|
|
190
|
+
1. Parse file blocks from message content.
|
|
191
|
+
2. For each block exceeding `largeFileTokenThreshold` (default 25k tokens):
|
|
192
|
+
- Generate a unique file ID (`file_` prefix)
|
|
193
|
+
- Store the content to `~/.openclaw/lcm-files/<conversation_id>/<file_id>.<ext>`
|
|
194
|
+
- Generate a ~200 token exploration summary (structural analysis, key sections, etc.)
|
|
195
|
+
- Insert a `large_files` record with metadata
|
|
196
|
+
- Replace the file block in the message with a compact reference
|
|
197
|
+
3. The `lcm_describe` tool can retrieve full file content by ID.
|
|
198
|
+
|
|
199
|
+
This prevents a single large file paste from consuming the entire context window while keeping the content accessible.
|
|
200
|
+
|
|
201
|
+
## Session reconciliation
|
|
202
|
+
|
|
203
|
+
LCM handles crash recovery through **bootstrap reconciliation**:
|
|
204
|
+
|
|
205
|
+
1. On session start, read the JSONL session file (OpenClaw's ground truth).
|
|
206
|
+
2. Compare against the LCM database.
|
|
207
|
+
3. Find the most recent message that exists in both (the "anchor").
|
|
208
|
+
4. Import any messages after the anchor that are in JSONL but not in LCM.
|
|
209
|
+
|
|
210
|
+
This handles the case where OpenClaw wrote messages to the session file but crashed before LCM could persist them.
|
|
211
|
+
|
|
212
|
+
## Operation serialization
|
|
213
|
+
|
|
214
|
+
All mutating operations (ingest, compact) are serialized per-session using a promise queue. This prevents races between concurrent afterTurn/compact calls for the same conversation without blocking operations on different conversations.
|
|
215
|
+
|
|
216
|
+
## Authentication
|
|
217
|
+
|
|
218
|
+
LCM needs to call an LLM for summarization. It resolves credentials through a three-tier cascade:
|
|
219
|
+
|
|
220
|
+
1. **Auth profiles** — OpenClaw's OAuth/token/API-key profile system (`auth-profiles.json`), checked in priority order
|
|
221
|
+
2. **Environment variables** — Standard provider env vars (`ANTHROPIC_API_KEY`, etc.)
|
|
222
|
+
3. **Custom provider key** — From models config (e.g., `models.json`)
|
|
223
|
+
|
|
224
|
+
For OAuth providers (e.g., Anthropic via Claude Max), LCM handles token refresh and credential persistence automatically.
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# Configuration guide
|
|
2
|
+
|
|
3
|
+
This guide covers the practical operator setup for **OpenClawBrain v2**.
|
|
4
|
+
|
|
5
|
+
For repo truth, read:
|
|
6
|
+
- `README.md`
|
|
7
|
+
- `docs/RELEASE_CONTRACT.md`
|
|
8
|
+
- `docs/END_STATE.md`
|
|
9
|
+
|
|
10
|
+
## Quick start
|
|
11
|
+
|
|
12
|
+
Install the plugin with OpenClaw's plugin installer:
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
openclaw plugins install @jonathangu/openclawbrain
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
If you're running from a local OpenClaw checkout:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pnpm openclaw plugins install @jonathangu/openclawbrain
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
For local development, link your working copy:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
openclaw plugins install --link /path/to/openclawbrain
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
`openclaw plugins install` handles plugin registration, enabling, and compatible slot selection automatically.
|
|
31
|
+
|
|
32
|
+
## Context engine slot
|
|
33
|
+
|
|
34
|
+
If you must set it manually, point the context engine slot at `openclawbrain`:
|
|
35
|
+
|
|
36
|
+
```json
|
|
37
|
+
{
|
|
38
|
+
"plugins": {
|
|
39
|
+
"slots": {
|
|
40
|
+
"contextEngine": "openclawbrain"
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Recommended starting configuration
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"plugins": {
|
|
51
|
+
"entries": {
|
|
52
|
+
"openclawbrain": {
|
|
53
|
+
"enabled": true,
|
|
54
|
+
"config": {
|
|
55
|
+
"freshTailCount": 32,
|
|
56
|
+
"contextThreshold": 0.75,
|
|
57
|
+
"incrementalMaxDepth": -1,
|
|
58
|
+
"brainRoot": "~/.openclaw/openclawbrain",
|
|
59
|
+
"brainEmbeddingProvider": "ollama",
|
|
60
|
+
"brainEmbeddingModel": "bge-large:latest",
|
|
61
|
+
"brainWorkerMode": "child"
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Why these defaults:
|
|
70
|
+
- `freshTailCount=32` keeps recent turns raw for continuity
|
|
71
|
+
- `contextThreshold=0.75` leaves response headroom
|
|
72
|
+
- `incrementalMaxDepth=-1` lets compaction keep cascading when needed
|
|
73
|
+
- `brainWorkerMode=child` is the practical serving boundary
|
|
74
|
+
|
|
75
|
+
## Initialization
|
|
76
|
+
|
|
77
|
+
The transcript layer works immediately after install. Learned retrieval needs an explicit init:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
openclawbrain init /path/to/workspace
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
That creates the initial graph, writes `state.db`, snapshots pack `v000001`, and promotes it.
|
|
84
|
+
|
|
85
|
+
## Embeddings
|
|
86
|
+
|
|
87
|
+
OpenClawBrain currently targets tested OpenAI-compatible embeddings APIs.
|
|
88
|
+
|
|
89
|
+
### Local Ollama
|
|
90
|
+
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"plugins": {
|
|
94
|
+
"entries": {
|
|
95
|
+
"openclawbrain": {
|
|
96
|
+
"config": {
|
|
97
|
+
"brainEmbeddingProvider": "ollama",
|
|
98
|
+
"brainEmbeddingModel": "bge-large:latest"
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
This defaults to `http://127.0.0.1:11434/v1`.
|
|
107
|
+
|
|
108
|
+
### Remote OpenAI-compatible endpoint
|
|
109
|
+
|
|
110
|
+
```json
|
|
111
|
+
{
|
|
112
|
+
"plugins": {
|
|
113
|
+
"entries": {
|
|
114
|
+
"openclawbrain": {
|
|
115
|
+
"config": {
|
|
116
|
+
"brainEmbeddingProvider": "openai",
|
|
117
|
+
"brainEmbeddingModel": "text-embedding-3-large",
|
|
118
|
+
"brainEmbeddingBaseUrl": "https://your-endpoint.example/v1"
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
If the endpoint requires auth, provide `OPENCLAWBRAIN_EMBEDDING_API_KEY`.
|
|
127
|
+
|
|
128
|
+
## Important environment variables
|
|
129
|
+
|
|
130
|
+
| Variable | Description |
|
|
131
|
+
|---|---|
|
|
132
|
+
| `LCM_DATABASE_PATH` | SQLite path for transcript / summary storage |
|
|
133
|
+
| `LCM_CONTEXT_THRESHOLD` | Fraction of context window that triggers compaction |
|
|
134
|
+
| `LCM_FRESH_TAIL_COUNT` | Recent raw messages protected from compaction |
|
|
135
|
+
| `LCM_INCREMENTAL_MAX_DEPTH` | Automatic condensation depth (`-1` = unlimited) |
|
|
136
|
+
| `OPENCLAWBRAIN_ROOT` | Root for `state.db` and immutable packs |
|
|
137
|
+
| `OPENCLAWBRAIN_EMBEDDING_PROVIDER` | Embedding provider (`openai`, `openai-resp`, `ollama`) |
|
|
138
|
+
| `OPENCLAWBRAIN_EMBEDDING_MODEL` | Embedding model used for init/retrieval/teach |
|
|
139
|
+
| `OPENCLAWBRAIN_EMBEDDING_BASE_URL` | Optional embeddings API base URL |
|
|
140
|
+
| `OPENCLAWBRAIN_EMBEDDING_API_KEY` | Optional explicit auth for remote embedding endpoints |
|
|
141
|
+
| `OPENCLAWBRAIN_SHADOW_MODE` | Record routing without injecting learned context |
|
|
142
|
+
|
|
143
|
+
## Operator commands
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
openclawbrain init [workspace]
|
|
147
|
+
openclawbrain status
|
|
148
|
+
openclawbrain trace [traceId]
|
|
149
|
+
openclawbrain replay
|
|
150
|
+
openclawbrain promote
|
|
151
|
+
openclawbrain rollback [version]
|
|
152
|
+
openclawbrain disable
|
|
153
|
+
openclawbrain enable
|
|
154
|
+
openclawbrain doctor
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Validation commands
|
|
158
|
+
|
|
159
|
+
Deterministic runtime proof harness:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
node scripts/validate-brain-runtime-behavior.ts
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Disposable host-surface harness:
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
node scripts/validate-openclaw-install.mjs --setup-only
|
|
169
|
+
|
|
170
|
+
OPENCLAWBRAIN_VALIDATION_EMBEDDING_PROVIDER=ollama \
|
|
171
|
+
OPENCLAWBRAIN_VALIDATION_EMBEDDING_MODEL=bge-large:latest \
|
|
172
|
+
OPENCLAWBRAIN_VALIDATION_MODEL=ollama/qwen2.5:7b-instruct \
|
|
173
|
+
node scripts/validate-openclaw-install.mjs
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Session reset note
|
|
177
|
+
|
|
178
|
+
OpenClawBrain preserves history through compaction, but it does not override OpenClaw's core session reset policy. If sessions reset sooner than you want, increase OpenClaw's `session.reset.idleMinutes`.
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
{
|
|
2
|
+
"initialized": true,
|
|
3
|
+
"enabled": true,
|
|
4
|
+
"embeddingConfigured": true,
|
|
5
|
+
"embeddingProvider": "openai",
|
|
6
|
+
"embeddingModel": "text-embedding-3-small",
|
|
7
|
+
"embeddingBaseUrl": "https://example.invalid/v1",
|
|
8
|
+
"embeddingAuthMode": "api_key",
|
|
9
|
+
"embeddingConfigError": null,
|
|
10
|
+
"currentPackVersion": 2,
|
|
11
|
+
"currentPackPromotedAt": 1773634784489,
|
|
12
|
+
"shadowMode": false,
|
|
13
|
+
"teacherEnabled": false,
|
|
14
|
+
"teacherConfigured": false,
|
|
15
|
+
"teacherProvider": "",
|
|
16
|
+
"teacherModel": "",
|
|
17
|
+
"teacherConfigError": null,
|
|
18
|
+
"workerMode": "in_process",
|
|
19
|
+
"workerPid": null,
|
|
20
|
+
"workerStatus": "running",
|
|
21
|
+
"workerLastHeartbeatAt": null,
|
|
22
|
+
"workerLastReadyAt": null,
|
|
23
|
+
"workerHealthy": true,
|
|
24
|
+
"workerLastExit": null,
|
|
25
|
+
"pendingEvidence": 1,
|
|
26
|
+
"pendingEvidenceBySource": {
|
|
27
|
+
"human": 1,
|
|
28
|
+
"self": 0,
|
|
29
|
+
"scanner": 0,
|
|
30
|
+
"teacher": 0
|
|
31
|
+
},
|
|
32
|
+
"pendingLabels": 1,
|
|
33
|
+
"pendingLabelsBySource": {
|
|
34
|
+
"human": 1,
|
|
35
|
+
"self": 0,
|
|
36
|
+
"scanner": 0,
|
|
37
|
+
"teacher": 0
|
|
38
|
+
},
|
|
39
|
+
"mutationBacklog": {
|
|
40
|
+
"pending": 0,
|
|
41
|
+
"validated": 0,
|
|
42
|
+
"promoted": 0,
|
|
43
|
+
"rejected": 0
|
|
44
|
+
},
|
|
45
|
+
"seedLearningEnabled": false,
|
|
46
|
+
"recentTraceCount": 2,
|
|
47
|
+
"lastTraceFooter": "Brain · 4 seeds · start bn_630e2b18-631 · 4 hops · 3 fired · 1 veto · 263 chars",
|
|
48
|
+
"lastAssemblyDecision": {
|
|
49
|
+
"mode": "use_brain",
|
|
50
|
+
"conversationId": 1,
|
|
51
|
+
"episodeId": "be_f513c088-527",
|
|
52
|
+
"traceId": "bt_1aa18542",
|
|
53
|
+
"footer": "Brain · 4 seeds · start bn_630e2b18-631 · 4 hops · 3 fired · 1 veto · 263 chars"
|
|
54
|
+
},
|
|
55
|
+
"lastPromotionReason": null,
|
|
56
|
+
"lastReplayFailureReason": null,
|
|
57
|
+
"brainRoot": "/Users/cormorantai/.openclaw-ocbphase1/brain-teach-session-bound/run-20/openclawbrain",
|
|
58
|
+
"nodeCount": 5,
|
|
59
|
+
"edgeCount": 15,
|
|
60
|
+
"nodesByKind": {
|
|
61
|
+
"chunk": 4,
|
|
62
|
+
"workflow": 0,
|
|
63
|
+
"correction": 1,
|
|
64
|
+
"toolcard": 0,
|
|
65
|
+
"episode_anchor": 0,
|
|
66
|
+
"summary_bridge": 0
|
|
67
|
+
},
|
|
68
|
+
"edgesByKind": {
|
|
69
|
+
"sibling": 2,
|
|
70
|
+
"semantic": 6,
|
|
71
|
+
"learned": 6,
|
|
72
|
+
"seed": 0,
|
|
73
|
+
"inhibitory": 1,
|
|
74
|
+
"bridge": 0
|
|
75
|
+
},
|
|
76
|
+
"firedPerQuery": 3,
|
|
77
|
+
"dormantPercent": 0.2,
|
|
78
|
+
"inhibitoryPercent": 0.06666666666666667,
|
|
79
|
+
"orphanCount": 0,
|
|
80
|
+
"avgPathLength": 4.5,
|
|
81
|
+
"avgReward": 0,
|
|
82
|
+
"crossFileEdgePercent": 0.8666666666666667,
|
|
83
|
+
"churn": 0,
|
|
84
|
+
"packVersion": 2,
|
|
85
|
+
"lastUpdateAt": 1773634784490,
|
|
86
|
+
"totalEpisodes": 2
|
|
87
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Session-bound brain_teach validation summary
|
|
2
|
+
|
|
3
|
+
- commit: `3188b50c4ed30f07dea111e35ce52aabefaced63`
|
|
4
|
+
- workspace: `/Users/cormorantai/.openclaw/workspace-ocbphase1`
|
|
5
|
+
- validation state dir: `/Users/cormorantai/.openclaw-ocbphase1`
|
|
6
|
+
- repetitions requested: 20
|
|
7
|
+
- repetitions completed: 20
|
|
8
|
+
- identical pass fingerprints: 1
|
|
9
|
+
- acceptance: PASS
|
|
10
|
+
|
|
11
|
+
## Required proof
|
|
12
|
+
|
|
13
|
+
- session-bound `brain_teach` tool resolves `ctx.sessionKey` to the correct conversation
|
|
14
|
+
- teach action records `brain_teach` evidence against the warmup episode
|
|
15
|
+
- follow-up runtime assembly uses brain retrieval and surfaces the taught correction
|
|
16
|
+
- repeated runs are semantically identical at the asserted boundary
|