@jonathangu/openclawbrain 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +412 -0
- package/bin/openclawbrain.js +15 -0
- package/docs/END_STATE.md +244 -0
- package/docs/EVIDENCE.md +128 -0
- package/docs/RELEASE_CONTRACT.md +91 -0
- package/docs/agent-tools.md +106 -0
- package/docs/architecture.md +224 -0
- package/docs/configuration.md +178 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/status.json +87 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/summary.md +16 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/trace.json +273 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/validation-report.json +652 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/channels-status.txt +31 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/config-snapshot.json +66 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/doctor.json +14 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-probe.txt +34 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-status.txt +41 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/logs.txt +428 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status-all.txt +60 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status.json +223 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/summary.md +13 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/trace.json +4 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/validation-report.json +334 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/channels-status.txt +25 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/config-snapshot.json +91 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/doctor.json +14 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-probe.txt +36 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-status.txt +44 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/logs.txt +428 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-doctor.json +10 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-sdk-probe.json +11 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-setup-only.json +12 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/summary.md +30 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/validation-report.json +72 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status-all.txt +63 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status.json +200 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/summary.md +13 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/trace.json +4 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/validation-report.json +311 -0
- package/docs/evidence/README.md +16 -0
- package/docs/fts5.md +161 -0
- package/docs/tui.md +506 -0
- package/index.ts +1372 -0
- package/openclaw.plugin.json +136 -0
- package/package.json +66 -0
- package/src/assembler.ts +804 -0
- package/src/brain-cli.ts +316 -0
- package/src/brain-core/decay.ts +35 -0
- package/src/brain-core/episode.ts +82 -0
- package/src/brain-core/graph.ts +321 -0
- package/src/brain-core/health.ts +116 -0
- package/src/brain-core/mutator.ts +281 -0
- package/src/brain-core/pack.ts +117 -0
- package/src/brain-core/policy.ts +153 -0
- package/src/brain-core/replay.ts +1 -0
- package/src/brain-core/teacher.ts +105 -0
- package/src/brain-core/trace.ts +40 -0
- package/src/brain-core/traverse.ts +230 -0
- package/src/brain-core/types.ts +405 -0
- package/src/brain-core/update.ts +123 -0
- package/src/brain-harvest/human.ts +46 -0
- package/src/brain-harvest/scanner.ts +98 -0
- package/src/brain-harvest/self.ts +147 -0
- package/src/brain-runtime/assembler-extension.ts +230 -0
- package/src/brain-runtime/evidence-detectors.ts +68 -0
- package/src/brain-runtime/graph-io.ts +72 -0
- package/src/brain-runtime/harvester-extension.ts +98 -0
- package/src/brain-runtime/service.ts +659 -0
- package/src/brain-runtime/tools.ts +109 -0
- package/src/brain-runtime/worker-state.ts +106 -0
- package/src/brain-runtime/worker-supervisor.ts +169 -0
- package/src/brain-store/embedding.ts +179 -0
- package/src/brain-store/init.ts +347 -0
- package/src/brain-store/migrations.ts +188 -0
- package/src/brain-store/store.ts +816 -0
- package/src/brain-worker/child-runner.ts +321 -0
- package/src/brain-worker/jobs.ts +12 -0
- package/src/brain-worker/mutation-job.ts +5 -0
- package/src/brain-worker/promotion-job.ts +5 -0
- package/src/brain-worker/protocol.ts +79 -0
- package/src/brain-worker/teacher-job.ts +5 -0
- package/src/brain-worker/update-job.ts +5 -0
- package/src/brain-worker/worker.ts +422 -0
- package/src/compaction.ts +1332 -0
- package/src/db/config.ts +265 -0
- package/src/db/connection.ts +72 -0
- package/src/db/features.ts +42 -0
- package/src/db/migration.ts +561 -0
- package/src/engine.ts +1995 -0
- package/src/expansion-auth.ts +351 -0
- package/src/expansion-policy.ts +303 -0
- package/src/expansion.ts +383 -0
- package/src/integrity.ts +600 -0
- package/src/large-files.ts +527 -0
- package/src/openclaw-bridge.ts +22 -0
- package/src/retrieval.ts +357 -0
- package/src/store/conversation-store.ts +748 -0
- package/src/store/fts5-sanitize.ts +29 -0
- package/src/store/full-text-fallback.ts +74 -0
- package/src/store/index.ts +29 -0
- package/src/store/summary-store.ts +918 -0
- package/src/summarize.ts +847 -0
- package/src/tools/common.ts +53 -0
- package/src/tools/lcm-conversation-scope.ts +76 -0
- package/src/tools/lcm-describe-tool.ts +234 -0
- package/src/tools/lcm-expand-query-tool.ts +594 -0
- package/src/tools/lcm-expand-tool.delegation.ts +556 -0
- package/src/tools/lcm-expand-tool.ts +448 -0
- package/src/tools/lcm-expansion-recursion-guard.ts +286 -0
- package/src/tools/lcm-grep-tool.ts +200 -0
- package/src/transcript-repair.ts +301 -0
- package/src/types.ts +149 -0
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core type definitions for the OpenClawBrain learning layer.
|
|
3
|
+
*
|
|
4
|
+
* These types define the learned retrieval graph, traversal MDP,
|
|
5
|
+
* episodes, labels, packs, mutations, and traces.
|
|
6
|
+
*
|
|
7
|
+
* Paper reference: Gu (2016), "Reinforcement Learning"
|
|
8
|
+
* Lemma 6.1: ∂/∂ρ v_ρ(s_t) = E[z · Σ_{l=t}^{T} ∂logP_ρ(a_l|s_l)/∂ρ]
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// ═══════════════════════════════════════════
|
|
12
|
+
// Node & Edge Types
|
|
13
|
+
// ═══════════════════════════════════════════
|
|
14
|
+
|
|
15
|
+
export type NodeKind =
|
|
16
|
+
| "chunk" // Document/code fragment
|
|
17
|
+
| "workflow" // Multi-step procedure
|
|
18
|
+
| "correction" // Human-authored fix ("use X not Y")
|
|
19
|
+
| "toolcard" // When/how to use a specific tool
|
|
20
|
+
| "episode_anchor" // Pointer to a prior successful episode
|
|
21
|
+
| "summary_bridge"; // Bridge to LCM summary DAG
|
|
22
|
+
|
|
23
|
+
export type EdgeKind =
|
|
24
|
+
| "sibling" // Same-document adjacency (prior = 1.0)
|
|
25
|
+
| "semantic" // Embedding cosine similarity (prior = cosine)
|
|
26
|
+
| "learned" // Created by learning (prior = 0.5)
|
|
27
|
+
| "seed" // Learned seed-head parameter from __START__
|
|
28
|
+
| "inhibitory" // Suppresses traversal (weight < 0)
|
|
29
|
+
| "bridge"; // Links brain node to LCM summary
|
|
30
|
+
|
|
31
|
+
export const START_NODE_ID = "__START__";
|
|
32
|
+
|
|
33
|
+
export interface SeedWeightUpdate {
|
|
34
|
+
kind: "seed";
|
|
35
|
+
nodeId: string;
|
|
36
|
+
delta: number;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface EdgeWeightUpdate {
|
|
40
|
+
kind: "edge";
|
|
41
|
+
source: string;
|
|
42
|
+
target: string;
|
|
43
|
+
delta: number;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export type PolicyWeightUpdate = SeedWeightUpdate | EdgeWeightUpdate;
|
|
47
|
+
|
|
48
|
+
export type TrustLevel = "human" | "scanner" | "teacher" | "self";
|
|
49
|
+
|
|
50
|
+
export type RewardSource = TrustLevel;
|
|
51
|
+
|
|
52
|
+
export interface BrainNode {
|
|
53
|
+
id: string;
|
|
54
|
+
kind: NodeKind;
|
|
55
|
+
content: string;
|
|
56
|
+
embedding: Float32Array | null;
|
|
57
|
+
sourceUri: string | null;
|
|
58
|
+
trust: TrustLevel;
|
|
59
|
+
tags: string[];
|
|
60
|
+
tokenCount: number;
|
|
61
|
+
metadata: Record<string, unknown>;
|
|
62
|
+
createdAt: number;
|
|
63
|
+
updatedAt: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface BrainEdge {
|
|
67
|
+
source: string;
|
|
68
|
+
target: string;
|
|
69
|
+
kind: EdgeKind;
|
|
70
|
+
weight: number; // Learned parameter ρ (signed; negative = suppress)
|
|
71
|
+
prior: number; // Immutable structural baseline
|
|
72
|
+
metadata: Record<string, unknown>;
|
|
73
|
+
decayedAt: number;
|
|
74
|
+
createdAt: number;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// ═══════════════════════════════════════════
|
|
78
|
+
// Traversal MDP (paper-faithful)
|
|
79
|
+
// ═══════════════════════════════════════════
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* State s_t in the MDP.
|
|
83
|
+
* Paper: S = {s_0, s_1, s_2, ...}
|
|
84
|
+
*/
|
|
85
|
+
export interface TraversalState {
|
|
86
|
+
currentNodeId: string | null; // null at seed phase (t=0)
|
|
87
|
+
queryEmbedding: Float32Array;
|
|
88
|
+
visited: Set<string>;
|
|
89
|
+
fired: string[];
|
|
90
|
+
budgetRemaining: number;
|
|
91
|
+
hopCount: number;
|
|
92
|
+
maxHops: number;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Action a_t in the MDP.
|
|
97
|
+
* Paper: A(s) ⊂ {a_0, a_1, a_2, ...}
|
|
98
|
+
* Our action set: A(s) = { traverse(neighbor) } ∪ { STOP }
|
|
99
|
+
*/
|
|
100
|
+
export type TraversalAction =
|
|
101
|
+
| { type: "traverse"; targetNodeId: string; seedScore?: number }
|
|
102
|
+
| { type: "stop" };
|
|
103
|
+
|
|
104
|
+
export interface SeedScore {
|
|
105
|
+
nodeId: string;
|
|
106
|
+
priorScore: number;
|
|
107
|
+
learnedSeedWeight: number;
|
|
108
|
+
policyScore: number;
|
|
109
|
+
probability: number;
|
|
110
|
+
chosen: boolean;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export interface SeedWeight {
|
|
114
|
+
nodeId: string;
|
|
115
|
+
weight: number;
|
|
116
|
+
updatedAt: number;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* One step of a recorded trajectory.
|
|
121
|
+
* Captures the full candidate distribution for REINFORCE gradient computation.
|
|
122
|
+
*/
|
|
123
|
+
export interface TrajectoryStep {
|
|
124
|
+
stateSnapshot: {
|
|
125
|
+
currentNodeId: string | null;
|
|
126
|
+
hopCount: number;
|
|
127
|
+
budgetRemaining: number;
|
|
128
|
+
visitedCount: number;
|
|
129
|
+
firedCount: number;
|
|
130
|
+
};
|
|
131
|
+
candidates: Array<{
|
|
132
|
+
action: TraversalAction;
|
|
133
|
+
score: number;
|
|
134
|
+
probability: number;
|
|
135
|
+
priorScore?: number;
|
|
136
|
+
learnedSeedWeight?: number;
|
|
137
|
+
}>;
|
|
138
|
+
chosenAction: TraversalAction;
|
|
139
|
+
chosenActionProbability: number;
|
|
140
|
+
stopProbability: number;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// ═══════════════════════════════════════════
|
|
144
|
+
// Episodes & Labels
|
|
145
|
+
// ═══════════════════════════════════════════
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* A complete episode: one traversal from seed to terminal state.
|
|
149
|
+
* Paper: A game that ends in finite time with terminal reward z.
|
|
150
|
+
*/
|
|
151
|
+
export interface Episode {
|
|
152
|
+
id: string;
|
|
153
|
+
conversationId: number | null;
|
|
154
|
+
queryText: string;
|
|
155
|
+
queryEmbedding: Float32Array | null;
|
|
156
|
+
trajectory: TrajectoryStep[];
|
|
157
|
+
firedNodes: string[];
|
|
158
|
+
vetoedNodes: string[];
|
|
159
|
+
contextChars: number;
|
|
160
|
+
reward: number | null; // Terminal reward z ∈ [-1, +1]
|
|
161
|
+
rewardSource: RewardSource | null;
|
|
162
|
+
packVersion: number | null;
|
|
163
|
+
createdAt: number;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* A reward label from one of four sources.
|
|
168
|
+
* Human > self > scanner > teacher by trust ranking.
|
|
169
|
+
*/
|
|
170
|
+
export interface Label {
|
|
171
|
+
id: string;
|
|
172
|
+
episodeId: string;
|
|
173
|
+
source: RewardSource;
|
|
174
|
+
value: number; // z ∈ [-1, +1]
|
|
175
|
+
confidence: number; // [0, 1]
|
|
176
|
+
reason: string | null;
|
|
177
|
+
applied: boolean;
|
|
178
|
+
createdAt: number;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export type BrainEvidenceKind =
|
|
182
|
+
| "human_feedback"
|
|
183
|
+
| "self_result"
|
|
184
|
+
| "scanner_signal"
|
|
185
|
+
| "teacher_review"
|
|
186
|
+
| "teach_correction";
|
|
187
|
+
|
|
188
|
+
export interface BrainEvidence {
|
|
189
|
+
id: string;
|
|
190
|
+
episodeId: string;
|
|
191
|
+
conversationId: number | null;
|
|
192
|
+
source: RewardSource;
|
|
193
|
+
kind: BrainEvidenceKind;
|
|
194
|
+
value: number;
|
|
195
|
+
confidence: number;
|
|
196
|
+
reason: string | null;
|
|
197
|
+
contentSnippet: string | null;
|
|
198
|
+
metadata: Record<string, unknown>;
|
|
199
|
+
resolved: boolean;
|
|
200
|
+
createdAt: number;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
export type BrainEvidenceResolution =
|
|
204
|
+
| "promoted_to_label"
|
|
205
|
+
| "discarded_missing_episode"
|
|
206
|
+
| "discarded_lower_trust"
|
|
207
|
+
| "discarded_duplicate";
|
|
208
|
+
|
|
209
|
+
export interface ResolvedLabel {
|
|
210
|
+
id: string;
|
|
211
|
+
evidenceId: string;
|
|
212
|
+
episodeId: string;
|
|
213
|
+
source: RewardSource;
|
|
214
|
+
value: number;
|
|
215
|
+
confidence: number;
|
|
216
|
+
resolution: BrainEvidenceResolution;
|
|
217
|
+
labelId: string | null;
|
|
218
|
+
note: string | null;
|
|
219
|
+
createdAt: number;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// ═══════════════════════════════════════════
|
|
223
|
+
// Packs & Mutations
|
|
224
|
+
// ═══════════════════════════════════════════
|
|
225
|
+
|
|
226
|
+
export interface Pack {
|
|
227
|
+
version: number;
|
|
228
|
+
nodeCount: number;
|
|
229
|
+
edgeCount: number;
|
|
230
|
+
healthJson: string;
|
|
231
|
+
promotedAt: number | null;
|
|
232
|
+
rolledBack: boolean;
|
|
233
|
+
createdAt: number;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
export type MutationKind = "split" | "merge" | "prune" | "connect" | "inject";
|
|
237
|
+
export type MutationStatus = "pending" | "validated" | "promoted" | "rejected";
|
|
238
|
+
|
|
239
|
+
export interface MutationProposal {
|
|
240
|
+
id: string;
|
|
241
|
+
kind: MutationKind;
|
|
242
|
+
proposal: unknown;
|
|
243
|
+
evidence: unknown | null;
|
|
244
|
+
expectedGain: number | null;
|
|
245
|
+
status: MutationStatus;
|
|
246
|
+
createdAt: number;
|
|
247
|
+
resolvedAt: number | null;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// ═══════════════════════════════════════════
|
|
251
|
+
// Health Metrics
|
|
252
|
+
// ═══════════════════════════════════════════
|
|
253
|
+
|
|
254
|
+
export interface HealthMetrics {
|
|
255
|
+
nodeCount: number;
|
|
256
|
+
edgeCount: number;
|
|
257
|
+
nodesByKind: Record<NodeKind, number>;
|
|
258
|
+
edgesByKind: Record<EdgeKind, number>;
|
|
259
|
+
firedPerQuery: number;
|
|
260
|
+
dormantPercent: number;
|
|
261
|
+
inhibitoryPercent: number;
|
|
262
|
+
orphanCount: number;
|
|
263
|
+
avgPathLength: number;
|
|
264
|
+
avgReward: number;
|
|
265
|
+
crossFileEdgePercent: number;
|
|
266
|
+
churn: number;
|
|
267
|
+
packVersion: number;
|
|
268
|
+
lastUpdateAt: number;
|
|
269
|
+
totalEpisodes: number;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// ═══════════════════════════════════════════
|
|
273
|
+
// Decision Traces
|
|
274
|
+
// ═══════════════════════════════════════════
|
|
275
|
+
|
|
276
|
+
export interface DecisionTrace {
|
|
277
|
+
id: string;
|
|
278
|
+
episodeId: string | null;
|
|
279
|
+
packVersion: number | null;
|
|
280
|
+
queryText: string;
|
|
281
|
+
seedScores: SeedScore[];
|
|
282
|
+
trajectory: TrajectoryStep[];
|
|
283
|
+
firedNodes: string[];
|
|
284
|
+
vetoedNodes: string[];
|
|
285
|
+
contextChars: number;
|
|
286
|
+
footer: string;
|
|
287
|
+
createdAt: number;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// ═══════════════════════════════════════════
|
|
291
|
+
// Configuration
|
|
292
|
+
// ═══════════════════════════════════════════
|
|
293
|
+
|
|
294
|
+
export interface BrainConfig {
|
|
295
|
+
enabled: boolean;
|
|
296
|
+
root: string;
|
|
297
|
+
maxHops: number;
|
|
298
|
+
servingTemperature: number;
|
|
299
|
+
learningTemperature: number;
|
|
300
|
+
budgetFraction: number;
|
|
301
|
+
maxSeeds: number;
|
|
302
|
+
semanticThreshold: number;
|
|
303
|
+
learningRate: number;
|
|
304
|
+
baselineAlpha: number;
|
|
305
|
+
decayRate: number;
|
|
306
|
+
trainerIntervalMs: number;
|
|
307
|
+
workerMode: "child" | "in_process";
|
|
308
|
+
workerHeartbeatTimeoutMs: number;
|
|
309
|
+
workerRestartDelayMs: number;
|
|
310
|
+
teacherEnabled: boolean;
|
|
311
|
+
mutationsEnabled: boolean;
|
|
312
|
+
replayEpisodeCount: number;
|
|
313
|
+
minFiredPerQuery: number;
|
|
314
|
+
maxDormantPercent: number;
|
|
315
|
+
maxOrphanCount: number;
|
|
316
|
+
shadowMode: boolean;
|
|
317
|
+
embeddingProvider: string;
|
|
318
|
+
embeddingModel: string;
|
|
319
|
+
embeddingBaseUrl: string;
|
|
320
|
+
teacherProvider: string;
|
|
321
|
+
teacherModel: string;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
export const DEFAULT_BRAIN_CONFIG: BrainConfig = {
|
|
325
|
+
enabled: true,
|
|
326
|
+
root: "",
|
|
327
|
+
maxHops: 8,
|
|
328
|
+
servingTemperature: 0.1,
|
|
329
|
+
learningTemperature: 1.0,
|
|
330
|
+
budgetFraction: 0.3,
|
|
331
|
+
maxSeeds: 10,
|
|
332
|
+
semanticThreshold: 0.7,
|
|
333
|
+
learningRate: 0.01,
|
|
334
|
+
baselineAlpha: 0.1,
|
|
335
|
+
decayRate: 0.995,
|
|
336
|
+
trainerIntervalMs: 30_000,
|
|
337
|
+
workerMode: "child",
|
|
338
|
+
workerHeartbeatTimeoutMs: 90_000,
|
|
339
|
+
workerRestartDelayMs: 5_000,
|
|
340
|
+
teacherEnabled: true,
|
|
341
|
+
mutationsEnabled: true,
|
|
342
|
+
replayEpisodeCount: 100,
|
|
343
|
+
minFiredPerQuery: 1.0,
|
|
344
|
+
maxDormantPercent: 0.3,
|
|
345
|
+
maxOrphanCount: 10,
|
|
346
|
+
shadowMode: false,
|
|
347
|
+
embeddingProvider: "openai",
|
|
348
|
+
embeddingModel: "",
|
|
349
|
+
embeddingBaseUrl: "",
|
|
350
|
+
teacherProvider: "",
|
|
351
|
+
teacherModel: "",
|
|
352
|
+
};
|
|
353
|
+
|
|
354
|
+
// ═══════════════════════════════════════════
|
|
355
|
+
// Traversal Result (returned to assembler)
|
|
356
|
+
// ═══════════════════════════════════════════
|
|
357
|
+
|
|
358
|
+
export interface TraversalResult {
|
|
359
|
+
fired: Array<{ nodeId: string; kind: NodeKind; content: string; tokenCount: number }>;
|
|
360
|
+
vetoed: Array<{ nodeId: string; reason: string }>;
|
|
361
|
+
episode: Episode;
|
|
362
|
+
trace: DecisionTrace;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* The route_fn interface: query + candidate IDs → selected subset.
|
|
367
|
+
*/
|
|
368
|
+
export type RouteFn = (query: string, candidateIds: string[]) => Promise<string[]>;
|
|
369
|
+
|
|
370
|
+
// ═══════════════════════════════════════════
|
|
371
|
+
// Policy Parameters
|
|
372
|
+
// ═══════════════════════════════════════════
|
|
373
|
+
|
|
374
|
+
export interface PolicyParams {
|
|
375
|
+
temperature: number;
|
|
376
|
+
stopBias: number;
|
|
377
|
+
budgetPressure: number;
|
|
378
|
+
hopPressure: number;
|
|
379
|
+
edgeKindBias: Record<EdgeKind, number>;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
export const DEFAULT_POLICY_PARAMS: PolicyParams = {
|
|
383
|
+
temperature: 1.0,
|
|
384
|
+
stopBias: -2.0,
|
|
385
|
+
budgetPressure: 3.0,
|
|
386
|
+
hopPressure: 2.0,
|
|
387
|
+
edgeKindBias: {
|
|
388
|
+
sibling: 0.0,
|
|
389
|
+
semantic: 0.1,
|
|
390
|
+
learned: 0.2,
|
|
391
|
+
seed: 0.15,
|
|
392
|
+
inhibitory: -10.0,
|
|
393
|
+
bridge: 0.0,
|
|
394
|
+
},
|
|
395
|
+
};
|
|
396
|
+
|
|
397
|
+
// Trust rank ordering: human > self > scanner > teacher
|
|
398
|
+
export function trustRank(source: RewardSource): number {
|
|
399
|
+
switch (source) {
|
|
400
|
+
case "human": return 4;
|
|
401
|
+
case "self": return 3;
|
|
402
|
+
case "scanner": return 2;
|
|
403
|
+
case "teacher": return 1;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* REINFORCE update rule implementing Lemma 6.1 from the paper.
|
|
3
|
+
*
|
|
4
|
+
* Paper's correct update direction:
|
|
5
|
+
* ∂/∂ρ v_ρ(s_t) = E[z · Σ_{l=t}^{T} ∂logP_ρ(a_l|s_l)/∂ρ]
|
|
6
|
+
*
|
|
7
|
+
* Key insight: The sum goes over the ENTIRE trajectory from t to T.
|
|
8
|
+
* This assigns credit to every routing decision that led to the outcome,
|
|
9
|
+
* not just the last step. Williams (1992) equation (1) is one-step only.
|
|
10
|
+
*
|
|
11
|
+
* For the softmax policy over edge weights:
|
|
12
|
+
* ∂logP(a_j|s)/∂w_j = 1 - P(a_j|s) (for the chosen action's weight)
|
|
13
|
+
* ∂logP(a_j|s)/∂w_k = -P(a_k|s) (for other actions' weights)
|
|
14
|
+
*
|
|
15
|
+
* We update only the chosen edge's weight at each step, using:
|
|
16
|
+
* Δw_j = learningRate × (z - baseline) × (1 - P(a_j|s))
|
|
17
|
+
*
|
|
18
|
+
* The full-trajectory sum is achieved by accumulating updates across all steps.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import type { Episode, PolicyWeightUpdate } from "./types.js";
|
|
22
|
+
import { START_NODE_ID } from "./types.js";
|
|
23
|
+
import type { BrainGraph } from "./graph.js";
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Compute REINFORCE weight updates from a completed episode.
|
|
27
|
+
*
|
|
28
|
+
* Implements Lemma 6.1: full-trajectory credit assignment.
|
|
29
|
+
* Every step l from 0 to T contributes to the gradient.
|
|
30
|
+
*/
|
|
31
|
+
export function computeReinforceUpdates(
|
|
32
|
+
episode: Episode,
|
|
33
|
+
learningRate: number,
|
|
34
|
+
baseline: number,
|
|
35
|
+
): PolicyWeightUpdate[] {
|
|
36
|
+
if (episode.reward === null) return [];
|
|
37
|
+
|
|
38
|
+
const advantage = episode.reward - baseline;
|
|
39
|
+
if (Math.abs(advantage) < 1e-8) return [];
|
|
40
|
+
|
|
41
|
+
const updates: Map<string, PolicyWeightUpdate> = new Map();
|
|
42
|
+
|
|
43
|
+
// Sum over all trajectory steps l = 0 to T (full-trajectory, not one-step)
|
|
44
|
+
for (const step of episode.trajectory) {
|
|
45
|
+
if (step.chosenAction.type !== "traverse") continue;
|
|
46
|
+
|
|
47
|
+
const sourceId = step.stateSnapshot.currentNodeId ?? START_NODE_ID;
|
|
48
|
+
const targetId = step.chosenAction.targetNodeId;
|
|
49
|
+
|
|
50
|
+
// ∂logP(a_l|s_l)/∂ρ for the softmax = (1 - P(a_l|s_l))
|
|
51
|
+
const gradLogP = 1 - step.chosenActionProbability;
|
|
52
|
+
|
|
53
|
+
// Δρ ∝ (z - baseline) × ∂logP/∂ρ
|
|
54
|
+
const delta = learningRate * advantage * gradLogP;
|
|
55
|
+
|
|
56
|
+
if (sourceId === START_NODE_ID) {
|
|
57
|
+
const key = `seed→${targetId}`;
|
|
58
|
+
const existing = updates.get(key);
|
|
59
|
+
if (existing && existing.kind === "seed") {
|
|
60
|
+
existing.delta += delta;
|
|
61
|
+
} else {
|
|
62
|
+
updates.set(key, { kind: "seed", nodeId: targetId, delta });
|
|
63
|
+
}
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Accumulate: the full-trajectory sum means each step adds to the gradient
|
|
68
|
+
const key = `${sourceId}→${targetId}`;
|
|
69
|
+
|
|
70
|
+
const existing = updates.get(key);
|
|
71
|
+
if (existing && existing.kind === "edge") {
|
|
72
|
+
existing.delta += delta;
|
|
73
|
+
} else {
|
|
74
|
+
updates.set(key, { kind: "edge", source: sourceId, target: targetId, delta });
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return [...updates.values()];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Update running baseline via exponential moving average.
|
|
83
|
+
*
|
|
84
|
+
* baseline_{n+1} = α × z_n + (1 - α) × baseline_n
|
|
85
|
+
*
|
|
86
|
+
* The baseline reduces variance in the REINFORCE estimate
|
|
87
|
+
* without introducing bias (standard variance reduction technique).
|
|
88
|
+
*/
|
|
89
|
+
export function updateBaseline(
|
|
90
|
+
currentBaseline: number,
|
|
91
|
+
newReward: number,
|
|
92
|
+
alpha: number,
|
|
93
|
+
): number {
|
|
94
|
+
return alpha * newReward + (1 - alpha) * currentBaseline;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Apply computed weight updates to graph edges.
|
|
99
|
+
*
|
|
100
|
+
* After applying, edge weights may become negative (inhibitory).
|
|
101
|
+
* This is intentional — the paper allows signed outcomes.
|
|
102
|
+
*/
|
|
103
|
+
export function applyWeightUpdates(
|
|
104
|
+
graph: BrainGraph,
|
|
105
|
+
updates: PolicyWeightUpdate[],
|
|
106
|
+
): void {
|
|
107
|
+
for (const update of updates) {
|
|
108
|
+
if (update.kind === "seed") {
|
|
109
|
+
const nextWeight = Math.max(-10, Math.min(10, graph.getSeedWeight(update.nodeId) + update.delta));
|
|
110
|
+
graph.setSeedWeight(update.nodeId, nextWeight);
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const edge = graph.getEdge(update.source, update.target);
|
|
115
|
+
if (!edge) continue;
|
|
116
|
+
|
|
117
|
+
// Update weight: w_new = w_old + Δw
|
|
118
|
+
edge.weight += update.delta;
|
|
119
|
+
|
|
120
|
+
// Clamp to prevent numerical explosion
|
|
121
|
+
edge.weight = Math.max(-10, Math.min(10, edge.weight));
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import type { HarvestResult } from "../brain-runtime/evidence-detectors.js";
|
|
2
|
+
|
|
3
|
+
const NEGATIVE_HUMAN_PATTERNS = [
|
|
4
|
+
/\bno[,.]?\s+(that'?s?\s+)?(not|wrong|incorrect)/i,
|
|
5
|
+
/\bdon'?t\s+(use|do|try)/i,
|
|
6
|
+
/\binstead\s+(use|do|try)/i,
|
|
7
|
+
/\bactually[,]?\s+(it'?s|the|you\s+should)/i,
|
|
8
|
+
/\bthat'?s\s+not\s+(right|correct|what)/i,
|
|
9
|
+
/\bwrong\s+(file|path|approach|answer|tool)/i,
|
|
10
|
+
/\bnot\s+what\s+i\s+(asked|wanted|meant)/i,
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
const POSITIVE_HUMAN_PATTERNS = [
|
|
14
|
+
/\b(perfect|exactly|correct)\b/i,
|
|
15
|
+
/\bthat('?s|\s+is)\s+(exactly\s+)?(right|correct|what\s+i)/i,
|
|
16
|
+
/\bgreat[,!]\s+(that|this)\s+(work|help)/i,
|
|
17
|
+
/\byes[,!.]\s+(that'?s?|exactly)/i,
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
export function detectHumanEvidence(content: string): HarvestResult | null {
|
|
21
|
+
for (const pattern of NEGATIVE_HUMAN_PATTERNS) {
|
|
22
|
+
if (pattern.test(content)) {
|
|
23
|
+
return {
|
|
24
|
+
value: -0.8,
|
|
25
|
+
source: "human",
|
|
26
|
+
reason: `negative pattern: ${pattern.source}`,
|
|
27
|
+
confidence: 0.9,
|
|
28
|
+
kind: "human_feedback",
|
|
29
|
+
extractor: "human_pattern",
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
for (const pattern of POSITIVE_HUMAN_PATTERNS) {
|
|
34
|
+
if (pattern.test(content)) {
|
|
35
|
+
return {
|
|
36
|
+
value: 0.8,
|
|
37
|
+
source: "human",
|
|
38
|
+
reason: `positive pattern: ${pattern.source}`,
|
|
39
|
+
confidence: 0.9,
|
|
40
|
+
kind: "human_feedback",
|
|
41
|
+
extractor: "human_pattern",
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import type { HarvestResult } from "../brain-runtime/evidence-detectors.js";
|
|
2
|
+
|
|
3
|
+
const EXPLICIT_SCANNER_PATTERNS = [
|
|
4
|
+
/\bexpand for details about\b/i,
|
|
5
|
+
/\bsummary bridge\b/i,
|
|
6
|
+
];
|
|
7
|
+
|
|
8
|
+
const DOC_MARKER_PATTERNS = [
|
|
9
|
+
/\b(runbook|workflow|playbook|checklist|troubleshooting|procedure)\b/i,
|
|
10
|
+
/\b(deploy|deployment|incident|recovery|rollback|pull request|release)\b/i,
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
const COMMAND_LINE_PATTERN = /^\s*(?:[-*]\s+|\d+\.\s+)?`?(gh|git|pnpm|npm|node|openclaw|ollama|curl|python3?|bash)\b.*$/gim;
|
|
14
|
+
const NUMBERED_STEP_PATTERN = /^\s*\d+\.\s+\S.+$/gm;
|
|
15
|
+
const BULLET_PATTERN = /^\s*[-*]\s+\S.+$/gm;
|
|
16
|
+
const HEADING_PATTERN = /^\s{0,3}#{1,6}\s+\S.+$/m;
|
|
17
|
+
const FILE_REF_PATTERN = /(?:^|[\s(])(?:\.?\/)?[\w./-]+\.(?:md|txt|ts|tsx|js|jsx|json|yaml|yml|sh|mjs)(?=$|[\s):,])/gim;
|
|
18
|
+
const IMPERATIVE_STEP_PATTERN = /^\s*(?:[-*]\s+|\d+\.\s+)?(?:inspect|check|retry|run|use|open|read|edit|verify|restart|re-?run|apply|deploy|create|install|record|compare|promote|rollback)\b/gim;
|
|
19
|
+
|
|
20
|
+
function countMatches(pattern: RegExp, content: string): number {
|
|
21
|
+
const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
|
|
22
|
+
const matcher = new RegExp(pattern.source, flags);
|
|
23
|
+
return Array.from(content.matchAll(matcher)).length;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function detectScannerEvidence(content: string): HarvestResult | null {
|
|
27
|
+
for (const pattern of EXPLICIT_SCANNER_PATTERNS) {
|
|
28
|
+
if (pattern.test(content)) {
|
|
29
|
+
return {
|
|
30
|
+
value: 0.25,
|
|
31
|
+
source: "scanner",
|
|
32
|
+
reason: `scanner marker: ${pattern.source}`,
|
|
33
|
+
confidence: 0.7,
|
|
34
|
+
kind: "scanner_signal",
|
|
35
|
+
extractor: "scanner_marker",
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const signals: string[] = [];
|
|
41
|
+
let score = 0;
|
|
42
|
+
|
|
43
|
+
for (const pattern of DOC_MARKER_PATTERNS) {
|
|
44
|
+
if (pattern.test(content)) {
|
|
45
|
+
signals.push(`doc:${pattern.source}`);
|
|
46
|
+
score += 1.0;
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const numberedSteps = countMatches(NUMBERED_STEP_PATTERN, content);
|
|
52
|
+
if (numberedSteps >= 2) {
|
|
53
|
+
signals.push(`numbered_steps:${numberedSteps}`);
|
|
54
|
+
score += 1.0;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const bulletLines = countMatches(BULLET_PATTERN, content);
|
|
58
|
+
if (bulletLines >= 3) {
|
|
59
|
+
signals.push(`bullets:${bulletLines}`);
|
|
60
|
+
score += 0.5;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const commandLines = countMatches(COMMAND_LINE_PATTERN, content);
|
|
64
|
+
if (commandLines >= 1) {
|
|
65
|
+
signals.push(`commands:${commandLines}`);
|
|
66
|
+
score += commandLines >= 2 ? 1.0 : 0.6;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const imperativeLines = countMatches(IMPERATIVE_STEP_PATTERN, content);
|
|
70
|
+
if (imperativeLines >= 2) {
|
|
71
|
+
signals.push(`imperatives:${imperativeLines}`);
|
|
72
|
+
score += 0.8;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (HEADING_PATTERN.test(content) && (numberedSteps >= 1 || bulletLines >= 2)) {
|
|
76
|
+
signals.push("heading");
|
|
77
|
+
score += 0.4;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const fileRefs = countMatches(FILE_REF_PATTERN, content);
|
|
81
|
+
if (fileRefs >= 1) {
|
|
82
|
+
signals.push(`file_refs:${fileRefs}`);
|
|
83
|
+
score += 0.3;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (score < 1.8) {
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
value: 0.25,
|
|
92
|
+
source: "scanner",
|
|
93
|
+
reason: `scanner heuristic: ${signals.join(", ")}`,
|
|
94
|
+
confidence: Math.min(0.8, 0.5 + signals.length * 0.05),
|
|
95
|
+
kind: "scanner_signal",
|
|
96
|
+
extractor: "scanner_heuristic",
|
|
97
|
+
};
|
|
98
|
+
}
|