@jonathangu/openclawbrain 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +412 -0
- package/bin/openclawbrain.js +15 -0
- package/docs/END_STATE.md +244 -0
- package/docs/EVIDENCE.md +128 -0
- package/docs/RELEASE_CONTRACT.md +91 -0
- package/docs/agent-tools.md +106 -0
- package/docs/architecture.md +224 -0
- package/docs/configuration.md +178 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/status.json +87 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/summary.md +16 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/trace.json +273 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/validation-report.json +652 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/channels-status.txt +31 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/config-snapshot.json +66 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/doctor.json +14 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-probe.txt +34 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-status.txt +41 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/logs.txt +428 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status-all.txt +60 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status.json +223 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/summary.md +13 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/trace.json +4 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/validation-report.json +334 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/channels-status.txt +25 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/config-snapshot.json +91 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/doctor.json +14 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-probe.txt +36 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-status.txt +44 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/logs.txt +428 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-doctor.json +10 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-sdk-probe.json +11 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-setup-only.json +12 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/summary.md +30 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/validation-report.json +72 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status-all.txt +63 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status.json +200 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/summary.md +13 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/trace.json +4 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/validation-report.json +311 -0
- package/docs/evidence/README.md +16 -0
- package/docs/fts5.md +161 -0
- package/docs/tui.md +506 -0
- package/index.ts +1372 -0
- package/openclaw.plugin.json +136 -0
- package/package.json +66 -0
- package/src/assembler.ts +804 -0
- package/src/brain-cli.ts +316 -0
- package/src/brain-core/decay.ts +35 -0
- package/src/brain-core/episode.ts +82 -0
- package/src/brain-core/graph.ts +321 -0
- package/src/brain-core/health.ts +116 -0
- package/src/brain-core/mutator.ts +281 -0
- package/src/brain-core/pack.ts +117 -0
- package/src/brain-core/policy.ts +153 -0
- package/src/brain-core/replay.ts +1 -0
- package/src/brain-core/teacher.ts +105 -0
- package/src/brain-core/trace.ts +40 -0
- package/src/brain-core/traverse.ts +230 -0
- package/src/brain-core/types.ts +405 -0
- package/src/brain-core/update.ts +123 -0
- package/src/brain-harvest/human.ts +46 -0
- package/src/brain-harvest/scanner.ts +98 -0
- package/src/brain-harvest/self.ts +147 -0
- package/src/brain-runtime/assembler-extension.ts +230 -0
- package/src/brain-runtime/evidence-detectors.ts +68 -0
- package/src/brain-runtime/graph-io.ts +72 -0
- package/src/brain-runtime/harvester-extension.ts +98 -0
- package/src/brain-runtime/service.ts +659 -0
- package/src/brain-runtime/tools.ts +109 -0
- package/src/brain-runtime/worker-state.ts +106 -0
- package/src/brain-runtime/worker-supervisor.ts +169 -0
- package/src/brain-store/embedding.ts +179 -0
- package/src/brain-store/init.ts +347 -0
- package/src/brain-store/migrations.ts +188 -0
- package/src/brain-store/store.ts +816 -0
- package/src/brain-worker/child-runner.ts +321 -0
- package/src/brain-worker/jobs.ts +12 -0
- package/src/brain-worker/mutation-job.ts +5 -0
- package/src/brain-worker/promotion-job.ts +5 -0
- package/src/brain-worker/protocol.ts +79 -0
- package/src/brain-worker/teacher-job.ts +5 -0
- package/src/brain-worker/update-job.ts +5 -0
- package/src/brain-worker/worker.ts +422 -0
- package/src/compaction.ts +1332 -0
- package/src/db/config.ts +265 -0
- package/src/db/connection.ts +72 -0
- package/src/db/features.ts +42 -0
- package/src/db/migration.ts +561 -0
- package/src/engine.ts +1995 -0
- package/src/expansion-auth.ts +351 -0
- package/src/expansion-policy.ts +303 -0
- package/src/expansion.ts +383 -0
- package/src/integrity.ts +600 -0
- package/src/large-files.ts +527 -0
- package/src/openclaw-bridge.ts +22 -0
- package/src/retrieval.ts +357 -0
- package/src/store/conversation-store.ts +748 -0
- package/src/store/fts5-sanitize.ts +29 -0
- package/src/store/full-text-fallback.ts +74 -0
- package/src/store/index.ts +29 -0
- package/src/store/summary-store.ts +918 -0
- package/src/summarize.ts +847 -0
- package/src/tools/common.ts +53 -0
- package/src/tools/lcm-conversation-scope.ts +76 -0
- package/src/tools/lcm-describe-tool.ts +234 -0
- package/src/tools/lcm-expand-query-tool.ts +594 -0
- package/src/tools/lcm-expand-tool.delegation.ts +556 -0
- package/src/tools/lcm-expand-tool.ts +448 -0
- package/src/tools/lcm-expansion-recursion-guard.ts +286 -0
- package/src/tools/lcm-grep-tool.ts +200 -0
- package/src/transcript-repair.ts +301 -0
- package/src/types.ts +149 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Source discovery, chunking, and seed graph building for brain init.
|
|
3
|
+
*
|
|
4
|
+
* Brain init is a first-class product moment:
|
|
5
|
+
* 1. Discover sources (markdown, code, sessions, LCM summaries, OpenClaw memory)
|
|
6
|
+
* 2. Structure-aware chunking (heading boundaries, code blocks intact)
|
|
7
|
+
* 3. Create nodes with embeddings
|
|
8
|
+
* 4. Create cold-start edges (sibling + semantic)
|
|
9
|
+
* 5. Build pack v0
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { randomUUID } from "node:crypto";
|
|
13
|
+
import { readdirSync, readFileSync, statSync, existsSync } from "node:fs";
|
|
14
|
+
import { join, extname, relative } from "node:path";
|
|
15
|
+
import type { BrainNode, BrainEdge, NodeKind } from "../brain-core/types.js";
|
|
16
|
+
import { cosineSimilarity } from "../brain-core/graph.js";
|
|
17
|
+
|
|
18
|
+
// ─── Source Discovery ───
|
|
19
|
+
|
|
20
|
+
export interface Source {
|
|
21
|
+
uri: string;
|
|
22
|
+
content: string;
|
|
23
|
+
type: "markdown" | "code" | "config" | "memory";
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function discoverSources(workspaceRoot: string): Source[] {
|
|
27
|
+
const sources: Source[] = [];
|
|
28
|
+
const maxFileSize = 100_000; // 100KB
|
|
29
|
+
|
|
30
|
+
function walk(dir: string, depth = 0): void {
|
|
31
|
+
if (depth > 4) return; // Don't recurse too deep
|
|
32
|
+
let entries: string[];
|
|
33
|
+
try {
|
|
34
|
+
entries = readdirSync(dir);
|
|
35
|
+
} catch {
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
for (const entry of entries) {
|
|
40
|
+
if (entry.startsWith(".") || entry === "node_modules" || entry === "dist") continue;
|
|
41
|
+
const fullPath = join(dir, entry);
|
|
42
|
+
let stat;
|
|
43
|
+
try {
|
|
44
|
+
stat = statSync(fullPath);
|
|
45
|
+
} catch {
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (stat.isDirectory()) {
|
|
50
|
+
walk(fullPath, depth + 1);
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (stat.size > maxFileSize) continue;
|
|
55
|
+
|
|
56
|
+
const ext = extname(entry).toLowerCase();
|
|
57
|
+
const relPath = relative(workspaceRoot, fullPath);
|
|
58
|
+
|
|
59
|
+
if (ext === ".md") {
|
|
60
|
+
sources.push({ uri: relPath, content: readFileSync(fullPath, "utf-8"), type: "markdown" });
|
|
61
|
+
} else if ([".ts", ".js", ".py", ".go", ".rs"].includes(ext)) {
|
|
62
|
+
sources.push({ uri: relPath, content: readFileSync(fullPath, "utf-8"), type: "code" });
|
|
63
|
+
} else if (["package.json", "tsconfig.json", "Dockerfile"].includes(entry) || ext === ".toml" || ext === ".yaml" || ext === ".yml") {
|
|
64
|
+
sources.push({ uri: relPath, content: readFileSync(fullPath, "utf-8"), type: "config" });
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
walk(workspaceRoot);
|
|
70
|
+
|
|
71
|
+
// Also check for OpenClaw memory files
|
|
72
|
+
const memoryDirs = [
|
|
73
|
+
join(workspaceRoot, ".claude", "memory"),
|
|
74
|
+
join(workspaceRoot, "memory"),
|
|
75
|
+
];
|
|
76
|
+
for (const memDir of memoryDirs) {
|
|
77
|
+
if (existsSync(memDir)) {
|
|
78
|
+
try {
|
|
79
|
+
for (const entry of readdirSync(memDir)) {
|
|
80
|
+
if (entry.endsWith(".md")) {
|
|
81
|
+
const fullPath = join(memDir, entry);
|
|
82
|
+
sources.push({ uri: relative(workspaceRoot, fullPath), content: readFileSync(fullPath, "utf-8"), type: "memory" });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
} catch { /* ignore */ }
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Check for MEMORY.md
|
|
90
|
+
const memoryMd = join(workspaceRoot, "MEMORY.md");
|
|
91
|
+
if (existsSync(memoryMd)) {
|
|
92
|
+
sources.push({ uri: "MEMORY.md", content: readFileSync(memoryMd, "utf-8"), type: "memory" });
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return sources;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// ─── Chunking ───
|
|
99
|
+
|
|
100
|
+
export interface Chunk {
|
|
101
|
+
content: string;
|
|
102
|
+
sourceUri: string;
|
|
103
|
+
kind: NodeKind;
|
|
104
|
+
ordinal: number;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Structure-aware chunking.
|
|
109
|
+
* - Markdown: split on ## headings, keep code blocks intact
|
|
110
|
+
* - Code: split on function/class boundaries
|
|
111
|
+
* - Config: one chunk per file
|
|
112
|
+
*/
|
|
113
|
+
export function chunkSources(sources: Source[]): Chunk[] {
|
|
114
|
+
const chunks: Chunk[] = [];
|
|
115
|
+
|
|
116
|
+
for (const source of sources) {
|
|
117
|
+
if (source.type === "markdown" || source.type === "memory") {
|
|
118
|
+
chunks.push(...chunkMarkdown(source.content, source.uri));
|
|
119
|
+
} else if (source.type === "code") {
|
|
120
|
+
chunks.push(...chunkCode(source.content, source.uri));
|
|
121
|
+
} else {
|
|
122
|
+
// Config: one chunk per file
|
|
123
|
+
if (source.content.trim()) {
|
|
124
|
+
chunks.push({ content: source.content.trim(), sourceUri: source.uri, kind: "chunk", ordinal: 0 });
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return chunks;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function chunkMarkdown(content: string, uri: string): Chunk[] {
|
|
133
|
+
const chunks: Chunk[] = [];
|
|
134
|
+
const lines = content.split("\n");
|
|
135
|
+
let current: string[] = [];
|
|
136
|
+
let ordinal = 0;
|
|
137
|
+
|
|
138
|
+
function flush(): void {
|
|
139
|
+
const text = current.join("\n").trim();
|
|
140
|
+
if (text.length > 20) {
|
|
141
|
+
// Detect workflows (numbered steps)
|
|
142
|
+
const kind: NodeKind = /^\s*\d+\.\s/m.test(text) ? "workflow" : "chunk";
|
|
143
|
+
chunks.push({ content: text, sourceUri: uri, kind, ordinal: ordinal++ });
|
|
144
|
+
}
|
|
145
|
+
current = [];
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
for (const line of lines) {
|
|
149
|
+
if (/^#{1,3}\s/.test(line) && current.length > 0) {
|
|
150
|
+
flush();
|
|
151
|
+
}
|
|
152
|
+
current.push(line);
|
|
153
|
+
}
|
|
154
|
+
flush();
|
|
155
|
+
|
|
156
|
+
return chunks;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function chunkCode(content: string, uri: string): Chunk[] {
|
|
160
|
+
const chunks: Chunk[] = [];
|
|
161
|
+
const lines = content.split("\n");
|
|
162
|
+
let current: string[] = [];
|
|
163
|
+
let ordinal = 0;
|
|
164
|
+
|
|
165
|
+
function flush(): void {
|
|
166
|
+
const text = current.join("\n").trim();
|
|
167
|
+
if (text.length > 30) {
|
|
168
|
+
chunks.push({ content: text, sourceUri: uri, kind: "chunk", ordinal: ordinal++ });
|
|
169
|
+
}
|
|
170
|
+
current = [];
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
for (const line of lines) {
|
|
174
|
+
if (/^(export\s+)?(function|class|const\s+\w+\s*=\s*(\(|async\s*\())/.test(line) && current.length > 3) {
|
|
175
|
+
flush();
|
|
176
|
+
}
|
|
177
|
+
current.push(line);
|
|
178
|
+
}
|
|
179
|
+
flush();
|
|
180
|
+
|
|
181
|
+
return chunks;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// ─── Node Creation ───
|
|
185
|
+
|
|
186
|
+
export function createNodesFromChunks(chunks: Chunk[]): BrainNode[] {
|
|
187
|
+
const now = Date.now();
|
|
188
|
+
return chunks.map((chunk) => ({
|
|
189
|
+
id: `bn_${randomUUID().slice(0, 12)}`,
|
|
190
|
+
kind: chunk.kind,
|
|
191
|
+
content: chunk.content,
|
|
192
|
+
embedding: null, // Computed separately
|
|
193
|
+
sourceUri: chunk.sourceUri,
|
|
194
|
+
trust: "scanner" as const,
|
|
195
|
+
tags: [],
|
|
196
|
+
tokenCount: Math.ceil(chunk.content.length / 4),
|
|
197
|
+
metadata: { ordinal: chunk.ordinal },
|
|
198
|
+
createdAt: now,
|
|
199
|
+
updatedAt: now,
|
|
200
|
+
}));
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// ─── Edge Creation ───
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Sibling edges: same-document adjacent chunks.
|
|
207
|
+
*/
|
|
208
|
+
export function createSiblingEdges(nodes: BrainNode[]): BrainEdge[] {
|
|
209
|
+
const edges: BrainEdge[] = [];
|
|
210
|
+
const now = Date.now();
|
|
211
|
+
|
|
212
|
+
// Group by sourceUri
|
|
213
|
+
const bySource = new Map<string, BrainNode[]>();
|
|
214
|
+
for (const node of nodes) {
|
|
215
|
+
if (!node.sourceUri) continue;
|
|
216
|
+
const group = bySource.get(node.sourceUri) ?? [];
|
|
217
|
+
group.push(node);
|
|
218
|
+
bySource.set(node.sourceUri, group);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
for (const group of bySource.values()) {
|
|
222
|
+
// Sort by ordinal
|
|
223
|
+
group.sort((a, b) => ((a.metadata as any).ordinal ?? 0) - ((b.metadata as any).ordinal ?? 0));
|
|
224
|
+
for (let i = 0; i < group.length - 1; i++) {
|
|
225
|
+
edges.push({
|
|
226
|
+
source: group[i].id,
|
|
227
|
+
target: group[i + 1].id,
|
|
228
|
+
kind: "sibling",
|
|
229
|
+
weight: 0.8,
|
|
230
|
+
prior: 1.0,
|
|
231
|
+
metadata: {},
|
|
232
|
+
decayedAt: now,
|
|
233
|
+
createdAt: now,
|
|
234
|
+
});
|
|
235
|
+
// Bidirectional
|
|
236
|
+
edges.push({
|
|
237
|
+
source: group[i + 1].id,
|
|
238
|
+
target: group[i].id,
|
|
239
|
+
kind: "sibling",
|
|
240
|
+
weight: 0.8,
|
|
241
|
+
prior: 1.0,
|
|
242
|
+
metadata: {},
|
|
243
|
+
decayedAt: now,
|
|
244
|
+
createdAt: now,
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return edges;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Semantic edges: embedding cosine similarity above threshold.
|
|
254
|
+
* Top-3 most similar for each node.
|
|
255
|
+
*/
|
|
256
|
+
export function createSemanticEdges(nodes: BrainNode[], threshold: number): BrainEdge[] {
|
|
257
|
+
const edges: BrainEdge[] = [];
|
|
258
|
+
const now = Date.now();
|
|
259
|
+
const withEmbeddings = nodes.filter((n) => n.embedding);
|
|
260
|
+
|
|
261
|
+
for (const node of withEmbeddings) {
|
|
262
|
+
const scored: Array<{ targetId: string; score: number }> = [];
|
|
263
|
+
for (const other of withEmbeddings) {
|
|
264
|
+
if (other.id === node.id) continue;
|
|
265
|
+
const score = cosineSimilarity(node.embedding!, other.embedding!);
|
|
266
|
+
if (score >= threshold) {
|
|
267
|
+
scored.push({ targetId: other.id, score });
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
scored.sort((a, b) => b.score - a.score);
|
|
271
|
+
|
|
272
|
+
for (const { targetId, score } of scored.slice(0, 3)) {
|
|
273
|
+
edges.push({
|
|
274
|
+
source: node.id,
|
|
275
|
+
target: targetId,
|
|
276
|
+
kind: "semantic",
|
|
277
|
+
weight: score,
|
|
278
|
+
prior: score,
|
|
279
|
+
metadata: {},
|
|
280
|
+
decayedAt: now,
|
|
281
|
+
createdAt: now,
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return edges;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// ─── Full Init Pipeline ───
|
|
290
|
+
|
|
291
|
+
export interface InitResult {
|
|
292
|
+
nodeCount: number;
|
|
293
|
+
edgeCount: number;
|
|
294
|
+
nodesByKind: Record<string, number>;
|
|
295
|
+
summary: string;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
export async function initBrain(params: {
|
|
299
|
+
workspaceRoot: string;
|
|
300
|
+
embedFn: (text: string) => Promise<Float32Array>;
|
|
301
|
+
semanticThreshold: number;
|
|
302
|
+
log: { info: (msg: string) => void; warn: (msg: string) => void };
|
|
303
|
+
}): Promise<{ nodes: BrainNode[]; edges: BrainEdge[]; summary: string }> {
|
|
304
|
+
const { workspaceRoot, embedFn, semanticThreshold, log } = params;
|
|
305
|
+
|
|
306
|
+
// Step 1: Discover
|
|
307
|
+
log.info("[brain] Discovering sources...");
|
|
308
|
+
const sources = discoverSources(workspaceRoot);
|
|
309
|
+
log.info(`[brain] Found ${sources.length} sources`);
|
|
310
|
+
|
|
311
|
+
// Step 2: Chunk
|
|
312
|
+
const chunks = chunkSources(sources);
|
|
313
|
+
log.info(`[brain] Created ${chunks.length} chunks`);
|
|
314
|
+
|
|
315
|
+
// Step 3: Create nodes
|
|
316
|
+
const nodes = createNodesFromChunks(chunks);
|
|
317
|
+
|
|
318
|
+
// Step 4: Compute embeddings
|
|
319
|
+
log.info("[brain] Computing embeddings...");
|
|
320
|
+
for (const node of nodes) {
|
|
321
|
+
try {
|
|
322
|
+
node.embedding = await embedFn(node.content.slice(0, 512));
|
|
323
|
+
} catch (err) {
|
|
324
|
+
log.warn(`[brain] Embedding failed for ${node.id}: ${(err as Error).message}`);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
const embeddedCount = nodes.filter((n) => n.embedding).length;
|
|
329
|
+
log.info(`[brain] Embedded ${embeddedCount}/${nodes.length} nodes`);
|
|
330
|
+
|
|
331
|
+
// Step 5: Create edges
|
|
332
|
+
const siblingEdges = createSiblingEdges(nodes);
|
|
333
|
+
const semanticEdges = createSemanticEdges(nodes, semanticThreshold);
|
|
334
|
+
const allEdges = [...siblingEdges, ...semanticEdges];
|
|
335
|
+
log.info(`[brain] Created ${allEdges.length} edges (${siblingEdges.length} sibling, ${semanticEdges.length} semantic)`);
|
|
336
|
+
|
|
337
|
+
// Step 6: Summary
|
|
338
|
+
const kindCounts: Record<string, number> = {};
|
|
339
|
+
for (const node of nodes) {
|
|
340
|
+
kindCounts[node.kind] = (kindCounts[node.kind] ?? 0) + 1;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
const summary = `Brain initialized: ${nodes.length} nodes (${Object.entries(kindCounts).map(([k, v]) => `${v} ${k}`).join(", ")}), ${allEdges.length} edges`;
|
|
344
|
+
log.info(`[brain] ${summary}`);
|
|
345
|
+
|
|
346
|
+
return { nodes, edges: allEdges, summary };
|
|
347
|
+
}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SQLite schema for the brain's learned retrieval graph.
|
|
3
|
+
*
|
|
4
|
+
* These tables live alongside LCM's existing tables in the same database.
|
|
5
|
+
* All brain tables are prefixed with "brain_" to avoid collisions.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { DatabaseSync } from "node:sqlite";
|
|
9
|
+
|
|
10
|
+
export function runBrainMigrations(db: DatabaseSync): void {
|
|
11
|
+
db.exec(`
|
|
12
|
+
-- ═══════════════════════════════════════════
|
|
13
|
+
-- Brain Knowledge Graph
|
|
14
|
+
-- ═══════════════════════════════════════════
|
|
15
|
+
|
|
16
|
+
CREATE TABLE IF NOT EXISTS brain_nodes (
|
|
17
|
+
id TEXT PRIMARY KEY,
|
|
18
|
+
kind TEXT NOT NULL,
|
|
19
|
+
content TEXT NOT NULL,
|
|
20
|
+
embedding BLOB,
|
|
21
|
+
source_uri TEXT,
|
|
22
|
+
trust TEXT NOT NULL DEFAULT 'scanner',
|
|
23
|
+
tags TEXT NOT NULL DEFAULT '[]',
|
|
24
|
+
token_count INTEGER NOT NULL DEFAULT 0,
|
|
25
|
+
metadata TEXT NOT NULL DEFAULT '{}',
|
|
26
|
+
created_at INTEGER NOT NULL,
|
|
27
|
+
updated_at INTEGER NOT NULL
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
CREATE TABLE IF NOT EXISTS brain_edges (
|
|
31
|
+
source TEXT NOT NULL,
|
|
32
|
+
target TEXT NOT NULL,
|
|
33
|
+
kind TEXT NOT NULL,
|
|
34
|
+
weight REAL NOT NULL DEFAULT 0.5,
|
|
35
|
+
prior REAL NOT NULL DEFAULT 0.5,
|
|
36
|
+
metadata TEXT NOT NULL DEFAULT '{}',
|
|
37
|
+
decayed_at INTEGER NOT NULL,
|
|
38
|
+
created_at INTEGER NOT NULL,
|
|
39
|
+
PRIMARY KEY (source, target, kind)
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
CREATE INDEX IF NOT EXISTS brain_edges_source_idx ON brain_edges(source);
|
|
43
|
+
CREATE INDEX IF NOT EXISTS brain_edges_target_idx ON brain_edges(target);
|
|
44
|
+
CREATE INDEX IF NOT EXISTS brain_nodes_kind_idx ON brain_nodes(kind);
|
|
45
|
+
|
|
46
|
+
CREATE TABLE IF NOT EXISTS brain_seed_weights (
|
|
47
|
+
node_id TEXT PRIMARY KEY,
|
|
48
|
+
weight REAL NOT NULL DEFAULT 0.0,
|
|
49
|
+
updated_at INTEGER NOT NULL
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
-- ═══════════════════════════════════════════
|
|
53
|
+
-- Episodes (full traversal records)
|
|
54
|
+
-- ═══════════════════════════════════════════
|
|
55
|
+
|
|
56
|
+
CREATE TABLE IF NOT EXISTS brain_episodes (
|
|
57
|
+
id TEXT PRIMARY KEY,
|
|
58
|
+
conversation_id INTEGER,
|
|
59
|
+
query_text TEXT,
|
|
60
|
+
query_embedding BLOB,
|
|
61
|
+
trajectory TEXT NOT NULL,
|
|
62
|
+
fired_nodes TEXT NOT NULL,
|
|
63
|
+
vetoed_nodes TEXT NOT NULL DEFAULT '[]',
|
|
64
|
+
context_chars INTEGER NOT NULL DEFAULT 0,
|
|
65
|
+
reward REAL,
|
|
66
|
+
reward_source TEXT,
|
|
67
|
+
pack_version INTEGER,
|
|
68
|
+
updated INTEGER NOT NULL DEFAULT 0,
|
|
69
|
+
created_at INTEGER NOT NULL
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
CREATE INDEX IF NOT EXISTS brain_episodes_created_idx ON brain_episodes(created_at);
|
|
73
|
+
|
|
74
|
+
-- ═══════════════════════════════════════════
|
|
75
|
+
-- Labels (pending reward signals)
|
|
76
|
+
-- ═══════════════════════════════════════════
|
|
77
|
+
|
|
78
|
+
CREATE TABLE IF NOT EXISTS brain_labels (
|
|
79
|
+
id TEXT PRIMARY KEY,
|
|
80
|
+
episode_id TEXT NOT NULL,
|
|
81
|
+
source TEXT NOT NULL,
|
|
82
|
+
value REAL NOT NULL,
|
|
83
|
+
confidence REAL NOT NULL DEFAULT 1.0,
|
|
84
|
+
reason TEXT,
|
|
85
|
+
applied INTEGER NOT NULL DEFAULT 0,
|
|
86
|
+
created_at INTEGER NOT NULL
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
CREATE INDEX IF NOT EXISTS brain_labels_episode_idx ON brain_labels(episode_id);
|
|
90
|
+
CREATE INDEX IF NOT EXISTS brain_labels_applied_idx ON brain_labels(applied);
|
|
91
|
+
|
|
92
|
+
-- ═══════════════════════════════════════════
|
|
93
|
+
-- Raw Evidence + Resolved Label Decisions
|
|
94
|
+
-- ═══════════════════════════════════════════
|
|
95
|
+
|
|
96
|
+
CREATE TABLE IF NOT EXISTS brain_evidence (
|
|
97
|
+
id TEXT PRIMARY KEY,
|
|
98
|
+
episode_id TEXT NOT NULL,
|
|
99
|
+
conversation_id INTEGER,
|
|
100
|
+
source TEXT NOT NULL,
|
|
101
|
+
kind TEXT NOT NULL,
|
|
102
|
+
value REAL NOT NULL,
|
|
103
|
+
confidence REAL NOT NULL DEFAULT 1.0,
|
|
104
|
+
reason TEXT,
|
|
105
|
+
content_snippet TEXT,
|
|
106
|
+
metadata TEXT NOT NULL DEFAULT '{}',
|
|
107
|
+
resolved INTEGER NOT NULL DEFAULT 0,
|
|
108
|
+
created_at INTEGER NOT NULL
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
CREATE INDEX IF NOT EXISTS brain_evidence_episode_idx ON brain_evidence(episode_id);
|
|
112
|
+
CREATE INDEX IF NOT EXISTS brain_evidence_resolved_idx ON brain_evidence(resolved, created_at);
|
|
113
|
+
|
|
114
|
+
CREATE TABLE IF NOT EXISTS brain_resolved_labels (
|
|
115
|
+
id TEXT PRIMARY KEY,
|
|
116
|
+
evidence_id TEXT NOT NULL,
|
|
117
|
+
episode_id TEXT NOT NULL,
|
|
118
|
+
source TEXT NOT NULL,
|
|
119
|
+
value REAL NOT NULL,
|
|
120
|
+
confidence REAL NOT NULL DEFAULT 1.0,
|
|
121
|
+
resolution TEXT NOT NULL,
|
|
122
|
+
label_id TEXT,
|
|
123
|
+
note TEXT,
|
|
124
|
+
created_at INTEGER NOT NULL
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
CREATE INDEX IF NOT EXISTS brain_resolved_labels_episode_idx ON brain_resolved_labels(episode_id, created_at);
|
|
128
|
+
CREATE INDEX IF NOT EXISTS brain_resolved_labels_evidence_idx ON brain_resolved_labels(evidence_id);
|
|
129
|
+
|
|
130
|
+
-- ═══════════════════════════════════════════
|
|
131
|
+
-- Packs (immutable serving snapshots)
|
|
132
|
+
-- ═══════════════════════════════════════════
|
|
133
|
+
|
|
134
|
+
CREATE TABLE IF NOT EXISTS brain_packs (
|
|
135
|
+
version INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
136
|
+
node_count INTEGER NOT NULL,
|
|
137
|
+
edge_count INTEGER NOT NULL,
|
|
138
|
+
health_json TEXT NOT NULL,
|
|
139
|
+
promoted_at INTEGER,
|
|
140
|
+
rolled_back INTEGER NOT NULL DEFAULT 0,
|
|
141
|
+
created_at INTEGER NOT NULL
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
-- ═══════════════════════════════════════════
|
|
145
|
+
-- Mutation Proposals
|
|
146
|
+
-- ═══════════════════════════════════════════
|
|
147
|
+
|
|
148
|
+
CREATE TABLE IF NOT EXISTS brain_mutations (
|
|
149
|
+
id TEXT PRIMARY KEY,
|
|
150
|
+
kind TEXT NOT NULL,
|
|
151
|
+
proposal TEXT NOT NULL,
|
|
152
|
+
evidence TEXT,
|
|
153
|
+
expected_gain REAL,
|
|
154
|
+
status TEXT NOT NULL DEFAULT 'pending',
|
|
155
|
+
created_at INTEGER NOT NULL,
|
|
156
|
+
resolved_at INTEGER
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
-- ═══════════════════════════════════════════
|
|
160
|
+
-- Decision Traces
|
|
161
|
+
-- ═══════════════════════════════════════════
|
|
162
|
+
|
|
163
|
+
CREATE TABLE IF NOT EXISTS brain_traces (
|
|
164
|
+
id TEXT PRIMARY KEY,
|
|
165
|
+
episode_id TEXT,
|
|
166
|
+
pack_version INTEGER,
|
|
167
|
+
query_text TEXT,
|
|
168
|
+
seed_scores TEXT NOT NULL,
|
|
169
|
+
trajectory TEXT NOT NULL,
|
|
170
|
+
fired_nodes TEXT NOT NULL,
|
|
171
|
+
vetoed_nodes TEXT NOT NULL DEFAULT '[]',
|
|
172
|
+
context_chars INTEGER NOT NULL,
|
|
173
|
+
footer TEXT NOT NULL,
|
|
174
|
+
created_at INTEGER NOT NULL
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
CREATE INDEX IF NOT EXISTS brain_traces_created_idx ON brain_traces(created_at DESC);
|
|
178
|
+
|
|
179
|
+
-- ═══════════════════════════════════════════
|
|
180
|
+
-- Training State (key-value)
|
|
181
|
+
-- ═══════════════════════════════════════════
|
|
182
|
+
|
|
183
|
+
CREATE TABLE IF NOT EXISTS brain_training_state (
|
|
184
|
+
key TEXT PRIMARY KEY,
|
|
185
|
+
value TEXT NOT NULL
|
|
186
|
+
);
|
|
187
|
+
`);
|
|
188
|
+
}
|