@jonathangu/openclawbrain 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +412 -0
  3. package/bin/openclawbrain.js +15 -0
  4. package/docs/END_STATE.md +244 -0
  5. package/docs/EVIDENCE.md +128 -0
  6. package/docs/RELEASE_CONTRACT.md +91 -0
  7. package/docs/agent-tools.md +106 -0
  8. package/docs/architecture.md +224 -0
  9. package/docs/configuration.md +178 -0
  10. package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/status.json +87 -0
  11. package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/summary.md +16 -0
  12. package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/trace.json +273 -0
  13. package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/validation-report.json +652 -0
  14. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/channels-status.txt +31 -0
  15. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/config-snapshot.json +66 -0
  16. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/doctor.json +14 -0
  17. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-probe.txt +34 -0
  18. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-status.txt +41 -0
  19. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/logs.txt +428 -0
  20. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status-all.txt +60 -0
  21. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status.json +223 -0
  22. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/summary.md +13 -0
  23. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/trace.json +4 -0
  24. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/validation-report.json +334 -0
  25. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/channels-status.txt +25 -0
  26. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/config-snapshot.json +91 -0
  27. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/doctor.json +14 -0
  28. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-probe.txt +36 -0
  29. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-status.txt +44 -0
  30. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/logs.txt +428 -0
  31. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-doctor.json +10 -0
  32. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-sdk-probe.json +11 -0
  33. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-setup-only.json +12 -0
  34. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/summary.md +30 -0
  35. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/validation-report.json +72 -0
  36. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status-all.txt +63 -0
  37. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status.json +200 -0
  38. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/summary.md +13 -0
  39. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/trace.json +4 -0
  40. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/validation-report.json +311 -0
  41. package/docs/evidence/README.md +16 -0
  42. package/docs/fts5.md +161 -0
  43. package/docs/tui.md +506 -0
  44. package/index.ts +1372 -0
  45. package/openclaw.plugin.json +136 -0
  46. package/package.json +66 -0
  47. package/src/assembler.ts +804 -0
  48. package/src/brain-cli.ts +316 -0
  49. package/src/brain-core/decay.ts +35 -0
  50. package/src/brain-core/episode.ts +82 -0
  51. package/src/brain-core/graph.ts +321 -0
  52. package/src/brain-core/health.ts +116 -0
  53. package/src/brain-core/mutator.ts +281 -0
  54. package/src/brain-core/pack.ts +117 -0
  55. package/src/brain-core/policy.ts +153 -0
  56. package/src/brain-core/replay.ts +1 -0
  57. package/src/brain-core/teacher.ts +105 -0
  58. package/src/brain-core/trace.ts +40 -0
  59. package/src/brain-core/traverse.ts +230 -0
  60. package/src/brain-core/types.ts +405 -0
  61. package/src/brain-core/update.ts +123 -0
  62. package/src/brain-harvest/human.ts +46 -0
  63. package/src/brain-harvest/scanner.ts +98 -0
  64. package/src/brain-harvest/self.ts +147 -0
  65. package/src/brain-runtime/assembler-extension.ts +230 -0
  66. package/src/brain-runtime/evidence-detectors.ts +68 -0
  67. package/src/brain-runtime/graph-io.ts +72 -0
  68. package/src/brain-runtime/harvester-extension.ts +98 -0
  69. package/src/brain-runtime/service.ts +659 -0
  70. package/src/brain-runtime/tools.ts +109 -0
  71. package/src/brain-runtime/worker-state.ts +106 -0
  72. package/src/brain-runtime/worker-supervisor.ts +169 -0
  73. package/src/brain-store/embedding.ts +179 -0
  74. package/src/brain-store/init.ts +347 -0
  75. package/src/brain-store/migrations.ts +188 -0
  76. package/src/brain-store/store.ts +816 -0
  77. package/src/brain-worker/child-runner.ts +321 -0
  78. package/src/brain-worker/jobs.ts +12 -0
  79. package/src/brain-worker/mutation-job.ts +5 -0
  80. package/src/brain-worker/promotion-job.ts +5 -0
  81. package/src/brain-worker/protocol.ts +79 -0
  82. package/src/brain-worker/teacher-job.ts +5 -0
  83. package/src/brain-worker/update-job.ts +5 -0
  84. package/src/brain-worker/worker.ts +422 -0
  85. package/src/compaction.ts +1332 -0
  86. package/src/db/config.ts +265 -0
  87. package/src/db/connection.ts +72 -0
  88. package/src/db/features.ts +42 -0
  89. package/src/db/migration.ts +561 -0
  90. package/src/engine.ts +1995 -0
  91. package/src/expansion-auth.ts +351 -0
  92. package/src/expansion-policy.ts +303 -0
  93. package/src/expansion.ts +383 -0
  94. package/src/integrity.ts +600 -0
  95. package/src/large-files.ts +527 -0
  96. package/src/openclaw-bridge.ts +22 -0
  97. package/src/retrieval.ts +357 -0
  98. package/src/store/conversation-store.ts +748 -0
  99. package/src/store/fts5-sanitize.ts +29 -0
  100. package/src/store/full-text-fallback.ts +74 -0
  101. package/src/store/index.ts +29 -0
  102. package/src/store/summary-store.ts +918 -0
  103. package/src/summarize.ts +847 -0
  104. package/src/tools/common.ts +53 -0
  105. package/src/tools/lcm-conversation-scope.ts +76 -0
  106. package/src/tools/lcm-describe-tool.ts +234 -0
  107. package/src/tools/lcm-expand-query-tool.ts +594 -0
  108. package/src/tools/lcm-expand-tool.delegation.ts +556 -0
  109. package/src/tools/lcm-expand-tool.ts +448 -0
  110. package/src/tools/lcm-expansion-recursion-guard.ts +286 -0
  111. package/src/tools/lcm-grep-tool.ts +200 -0
  112. package/src/transcript-repair.ts +301 -0
  113. package/src/types.ts +149 -0
@@ -0,0 +1,405 @@
1
+ /**
2
+ * Core type definitions for the OpenClawBrain learning layer.
3
+ *
4
+ * These types define the learned retrieval graph, traversal MDP,
5
+ * episodes, labels, packs, mutations, and traces.
6
+ *
7
+ * Paper reference: Gu (2016), "Reinforcement Learning"
8
+ * Lemma 6.1: ∂/∂ρ v_ρ(s_t) = E[z · Σ_{l=t}^{T} ∂logP_ρ(a_l|s_l)/∂ρ]
9
+ */
10
+
11
+ // ═══════════════════════════════════════════
12
+ // Node & Edge Types
13
+ // ═══════════════════════════════════════════
14
+
15
+ export type NodeKind =
16
+ | "chunk" // Document/code fragment
17
+ | "workflow" // Multi-step procedure
18
+ | "correction" // Human-authored fix ("use X not Y")
19
+ | "toolcard" // When/how to use a specific tool
20
+ | "episode_anchor" // Pointer to a prior successful episode
21
+ | "summary_bridge"; // Bridge to LCM summary DAG
22
+
23
+ export type EdgeKind =
24
+ | "sibling" // Same-document adjacency (prior = 1.0)
25
+ | "semantic" // Embedding cosine similarity (prior = cosine)
26
+ | "learned" // Created by learning (prior = 0.5)
27
+ | "seed" // Learned seed-head parameter from __START__
28
+ | "inhibitory" // Suppresses traversal (weight < 0)
29
+ | "bridge"; // Links brain node to LCM summary
30
+
31
+ export const START_NODE_ID = "__START__";
32
+
33
+ export interface SeedWeightUpdate {
34
+ kind: "seed";
35
+ nodeId: string;
36
+ delta: number;
37
+ }
38
+
39
+ export interface EdgeWeightUpdate {
40
+ kind: "edge";
41
+ source: string;
42
+ target: string;
43
+ delta: number;
44
+ }
45
+
46
+ export type PolicyWeightUpdate = SeedWeightUpdate | EdgeWeightUpdate;
47
+
48
+ export type TrustLevel = "human" | "scanner" | "teacher" | "self";
49
+
50
+ export type RewardSource = TrustLevel;
51
+
52
+ export interface BrainNode {
53
+ id: string;
54
+ kind: NodeKind;
55
+ content: string;
56
+ embedding: Float32Array | null;
57
+ sourceUri: string | null;
58
+ trust: TrustLevel;
59
+ tags: string[];
60
+ tokenCount: number;
61
+ metadata: Record<string, unknown>;
62
+ createdAt: number;
63
+ updatedAt: number;
64
+ }
65
+
66
+ export interface BrainEdge {
67
+ source: string;
68
+ target: string;
69
+ kind: EdgeKind;
70
+ weight: number; // Learned parameter ρ (signed; negative = suppress)
71
+ prior: number; // Immutable structural baseline
72
+ metadata: Record<string, unknown>;
73
+ decayedAt: number;
74
+ createdAt: number;
75
+ }
76
+
77
+ // ═══════════════════════════════════════════
78
+ // Traversal MDP (paper-faithful)
79
+ // ═══════════════════════════════════════════
80
+
81
+ /**
82
+ * State s_t in the MDP.
83
+ * Paper: S = {s_0, s_1, s_2, ...}
84
+ */
85
+ export interface TraversalState {
86
+ currentNodeId: string | null; // null at seed phase (t=0)
87
+ queryEmbedding: Float32Array;
88
+ visited: Set<string>;
89
+ fired: string[];
90
+ budgetRemaining: number;
91
+ hopCount: number;
92
+ maxHops: number;
93
+ }
94
+
95
+ /**
96
+ * Action a_t in the MDP.
97
+ * Paper: A(s) ⊂ {a_0, a_1, a_2, ...}
98
+ * Our action set: A(s) = { traverse(neighbor) } ∪ { STOP }
99
+ */
100
+ export type TraversalAction =
101
+ | { type: "traverse"; targetNodeId: string; seedScore?: number }
102
+ | { type: "stop" };
103
+
104
+ export interface SeedScore {
105
+ nodeId: string;
106
+ priorScore: number;
107
+ learnedSeedWeight: number;
108
+ policyScore: number;
109
+ probability: number;
110
+ chosen: boolean;
111
+ }
112
+
113
+ export interface SeedWeight {
114
+ nodeId: string;
115
+ weight: number;
116
+ updatedAt: number;
117
+ }
118
+
119
+ /**
120
+ * One step of a recorded trajectory.
121
+ * Captures the full candidate distribution for REINFORCE gradient computation.
122
+ */
123
+ export interface TrajectoryStep {
124
+ stateSnapshot: {
125
+ currentNodeId: string | null;
126
+ hopCount: number;
127
+ budgetRemaining: number;
128
+ visitedCount: number;
129
+ firedCount: number;
130
+ };
131
+ candidates: Array<{
132
+ action: TraversalAction;
133
+ score: number;
134
+ probability: number;
135
+ priorScore?: number;
136
+ learnedSeedWeight?: number;
137
+ }>;
138
+ chosenAction: TraversalAction;
139
+ chosenActionProbability: number;
140
+ stopProbability: number;
141
+ }
142
+
143
+ // ═══════════════════════════════════════════
144
+ // Episodes & Labels
145
+ // ═══════════════════════════════════════════
146
+
147
+ /**
148
+ * A complete episode: one traversal from seed to terminal state.
149
+ * Paper: A game that ends in finite time with terminal reward z.
150
+ */
151
+ export interface Episode {
152
+ id: string;
153
+ conversationId: number | null;
154
+ queryText: string;
155
+ queryEmbedding: Float32Array | null;
156
+ trajectory: TrajectoryStep[];
157
+ firedNodes: string[];
158
+ vetoedNodes: string[];
159
+ contextChars: number;
160
+ reward: number | null; // Terminal reward z ∈ [-1, +1]
161
+ rewardSource: RewardSource | null;
162
+ packVersion: number | null;
163
+ createdAt: number;
164
+ }
165
+
166
+ /**
167
+ * A reward label from one of four sources.
168
+ * Human > self > scanner > teacher by trust ranking.
169
+ */
170
+ export interface Label {
171
+ id: string;
172
+ episodeId: string;
173
+ source: RewardSource;
174
+ value: number; // z ∈ [-1, +1]
175
+ confidence: number; // [0, 1]
176
+ reason: string | null;
177
+ applied: boolean;
178
+ createdAt: number;
179
+ }
180
+
181
+ export type BrainEvidenceKind =
182
+ | "human_feedback"
183
+ | "self_result"
184
+ | "scanner_signal"
185
+ | "teacher_review"
186
+ | "teach_correction";
187
+
188
+ export interface BrainEvidence {
189
+ id: string;
190
+ episodeId: string;
191
+ conversationId: number | null;
192
+ source: RewardSource;
193
+ kind: BrainEvidenceKind;
194
+ value: number;
195
+ confidence: number;
196
+ reason: string | null;
197
+ contentSnippet: string | null;
198
+ metadata: Record<string, unknown>;
199
+ resolved: boolean;
200
+ createdAt: number;
201
+ }
202
+
203
+ export type BrainEvidenceResolution =
204
+ | "promoted_to_label"
205
+ | "discarded_missing_episode"
206
+ | "discarded_lower_trust"
207
+ | "discarded_duplicate";
208
+
209
+ export interface ResolvedLabel {
210
+ id: string;
211
+ evidenceId: string;
212
+ episodeId: string;
213
+ source: RewardSource;
214
+ value: number;
215
+ confidence: number;
216
+ resolution: BrainEvidenceResolution;
217
+ labelId: string | null;
218
+ note: string | null;
219
+ createdAt: number;
220
+ }
221
+
222
+ // ═══════════════════════════════════════════
223
+ // Packs & Mutations
224
+ // ═══════════════════════════════════════════
225
+
226
+ export interface Pack {
227
+ version: number;
228
+ nodeCount: number;
229
+ edgeCount: number;
230
+ healthJson: string;
231
+ promotedAt: number | null;
232
+ rolledBack: boolean;
233
+ createdAt: number;
234
+ }
235
+
236
+ export type MutationKind = "split" | "merge" | "prune" | "connect" | "inject";
237
+ export type MutationStatus = "pending" | "validated" | "promoted" | "rejected";
238
+
239
+ export interface MutationProposal {
240
+ id: string;
241
+ kind: MutationKind;
242
+ proposal: unknown;
243
+ evidence: unknown | null;
244
+ expectedGain: number | null;
245
+ status: MutationStatus;
246
+ createdAt: number;
247
+ resolvedAt: number | null;
248
+ }
249
+
250
+ // ═══════════════════════════════════════════
251
+ // Health Metrics
252
+ // ═══════════════════════════════════════════
253
+
254
+ export interface HealthMetrics {
255
+ nodeCount: number;
256
+ edgeCount: number;
257
+ nodesByKind: Record<NodeKind, number>;
258
+ edgesByKind: Record<EdgeKind, number>;
259
+ firedPerQuery: number;
260
+ dormantPercent: number;
261
+ inhibitoryPercent: number;
262
+ orphanCount: number;
263
+ avgPathLength: number;
264
+ avgReward: number;
265
+ crossFileEdgePercent: number;
266
+ churn: number;
267
+ packVersion: number;
268
+ lastUpdateAt: number;
269
+ totalEpisodes: number;
270
+ }
271
+
272
+ // ═══════════════════════════════════════════
273
+ // Decision Traces
274
+ // ═══════════════════════════════════════════
275
+
276
+ export interface DecisionTrace {
277
+ id: string;
278
+ episodeId: string | null;
279
+ packVersion: number | null;
280
+ queryText: string;
281
+ seedScores: SeedScore[];
282
+ trajectory: TrajectoryStep[];
283
+ firedNodes: string[];
284
+ vetoedNodes: string[];
285
+ contextChars: number;
286
+ footer: string;
287
+ createdAt: number;
288
+ }
289
+
290
+ // ═══════════════════════════════════════════
291
+ // Configuration
292
+ // ═══════════════════════════════════════════
293
+
294
+ export interface BrainConfig {
295
+ enabled: boolean;
296
+ root: string;
297
+ maxHops: number;
298
+ servingTemperature: number;
299
+ learningTemperature: number;
300
+ budgetFraction: number;
301
+ maxSeeds: number;
302
+ semanticThreshold: number;
303
+ learningRate: number;
304
+ baselineAlpha: number;
305
+ decayRate: number;
306
+ trainerIntervalMs: number;
307
+ workerMode: "child" | "in_process";
308
+ workerHeartbeatTimeoutMs: number;
309
+ workerRestartDelayMs: number;
310
+ teacherEnabled: boolean;
311
+ mutationsEnabled: boolean;
312
+ replayEpisodeCount: number;
313
+ minFiredPerQuery: number;
314
+ maxDormantPercent: number;
315
+ maxOrphanCount: number;
316
+ shadowMode: boolean;
317
+ embeddingProvider: string;
318
+ embeddingModel: string;
319
+ embeddingBaseUrl: string;
320
+ teacherProvider: string;
321
+ teacherModel: string;
322
+ }
323
+
324
+ export const DEFAULT_BRAIN_CONFIG: BrainConfig = {
325
+ enabled: true,
326
+ root: "",
327
+ maxHops: 8,
328
+ servingTemperature: 0.1,
329
+ learningTemperature: 1.0,
330
+ budgetFraction: 0.3,
331
+ maxSeeds: 10,
332
+ semanticThreshold: 0.7,
333
+ learningRate: 0.01,
334
+ baselineAlpha: 0.1,
335
+ decayRate: 0.995,
336
+ trainerIntervalMs: 30_000,
337
+ workerMode: "child",
338
+ workerHeartbeatTimeoutMs: 90_000,
339
+ workerRestartDelayMs: 5_000,
340
+ teacherEnabled: true,
341
+ mutationsEnabled: true,
342
+ replayEpisodeCount: 100,
343
+ minFiredPerQuery: 1.0,
344
+ maxDormantPercent: 0.3,
345
+ maxOrphanCount: 10,
346
+ shadowMode: false,
347
+ embeddingProvider: "openai",
348
+ embeddingModel: "",
349
+ embeddingBaseUrl: "",
350
+ teacherProvider: "",
351
+ teacherModel: "",
352
+ };
353
+
354
+ // ═══════════════════════════════════════════
355
+ // Traversal Result (returned to assembler)
356
+ // ═══════════════════════════════════════════
357
+
358
+ export interface TraversalResult {
359
+ fired: Array<{ nodeId: string; kind: NodeKind; content: string; tokenCount: number }>;
360
+ vetoed: Array<{ nodeId: string; reason: string }>;
361
+ episode: Episode;
362
+ trace: DecisionTrace;
363
+ }
364
+
365
+ /**
366
+ * The route_fn interface: query + candidate IDs → selected subset.
367
+ */
368
+ export type RouteFn = (query: string, candidateIds: string[]) => Promise<string[]>;
369
+
370
+ // ═══════════════════════════════════════════
371
+ // Policy Parameters
372
+ // ═══════════════════════════════════════════
373
+
374
+ export interface PolicyParams {
375
+ temperature: number;
376
+ stopBias: number;
377
+ budgetPressure: number;
378
+ hopPressure: number;
379
+ edgeKindBias: Record<EdgeKind, number>;
380
+ }
381
+
382
+ export const DEFAULT_POLICY_PARAMS: PolicyParams = {
383
+ temperature: 1.0,
384
+ stopBias: -2.0,
385
+ budgetPressure: 3.0,
386
+ hopPressure: 2.0,
387
+ edgeKindBias: {
388
+ sibling: 0.0,
389
+ semantic: 0.1,
390
+ learned: 0.2,
391
+ seed: 0.15,
392
+ inhibitory: -10.0,
393
+ bridge: 0.0,
394
+ },
395
+ };
396
+
397
+ // Trust rank ordering: human > self > scanner > teacher
398
+ export function trustRank(source: RewardSource): number {
399
+ switch (source) {
400
+ case "human": return 4;
401
+ case "self": return 3;
402
+ case "scanner": return 2;
403
+ case "teacher": return 1;
404
+ }
405
+ }
@@ -0,0 +1,123 @@
1
+ /**
2
+ * REINFORCE update rule implementing Lemma 6.1 from the paper.
3
+ *
4
+ * Paper's correct update direction:
5
+ * ∂/∂ρ v_ρ(s_t) = E[z · Σ_{l=t}^{T} ∂logP_ρ(a_l|s_l)/∂ρ]
6
+ *
7
+ * Key insight: The sum goes over the ENTIRE trajectory from t to T.
8
+ * This assigns credit to every routing decision that led to the outcome,
9
+ * not just the last step. Williams (1992) equation (1) is one-step only.
10
+ *
11
+ * For the softmax policy over edge weights:
12
+ * ∂logP(a_j|s)/∂w_j = 1 - P(a_j|s) (for the chosen action's weight)
13
+ * ∂logP(a_j|s)/∂w_k = -P(a_k|s) (for other actions' weights)
14
+ *
15
+ * We update only the chosen edge's weight at each step, using:
16
+ * Δw_j = learningRate × (z - baseline) × (1 - P(a_j|s))
17
+ *
18
+ * The full-trajectory sum is achieved by accumulating updates across all steps.
19
+ */
20
+
21
+ import type { Episode, PolicyWeightUpdate } from "./types.js";
22
+ import { START_NODE_ID } from "./types.js";
23
+ import type { BrainGraph } from "./graph.js";
24
+
25
+ /**
26
+ * Compute REINFORCE weight updates from a completed episode.
27
+ *
28
+ * Implements Lemma 6.1: full-trajectory credit assignment.
29
+ * Every step l from 0 to T contributes to the gradient.
30
+ */
31
+ export function computeReinforceUpdates(
32
+ episode: Episode,
33
+ learningRate: number,
34
+ baseline: number,
35
+ ): PolicyWeightUpdate[] {
36
+ if (episode.reward === null) return [];
37
+
38
+ const advantage = episode.reward - baseline;
39
+ if (Math.abs(advantage) < 1e-8) return [];
40
+
41
+ const updates: Map<string, PolicyWeightUpdate> = new Map();
42
+
43
+ // Sum over all trajectory steps l = 0 to T (full-trajectory, not one-step)
44
+ for (const step of episode.trajectory) {
45
+ if (step.chosenAction.type !== "traverse") continue;
46
+
47
+ const sourceId = step.stateSnapshot.currentNodeId ?? START_NODE_ID;
48
+ const targetId = step.chosenAction.targetNodeId;
49
+
50
+ // ∂logP(a_l|s_l)/∂ρ for the softmax = (1 - P(a_l|s_l))
51
+ const gradLogP = 1 - step.chosenActionProbability;
52
+
53
+ // Δρ ∝ (z - baseline) × ∂logP/∂ρ
54
+ const delta = learningRate * advantage * gradLogP;
55
+
56
+ if (sourceId === START_NODE_ID) {
57
+ const key = `seed→${targetId}`;
58
+ const existing = updates.get(key);
59
+ if (existing && existing.kind === "seed") {
60
+ existing.delta += delta;
61
+ } else {
62
+ updates.set(key, { kind: "seed", nodeId: targetId, delta });
63
+ }
64
+ continue;
65
+ }
66
+
67
+ // Accumulate: the full-trajectory sum means each step adds to the gradient
68
+ const key = `${sourceId}→${targetId}`;
69
+
70
+ const existing = updates.get(key);
71
+ if (existing && existing.kind === "edge") {
72
+ existing.delta += delta;
73
+ } else {
74
+ updates.set(key, { kind: "edge", source: sourceId, target: targetId, delta });
75
+ }
76
+ }
77
+
78
+ return [...updates.values()];
79
+ }
80
+
81
+ /**
82
+ * Update running baseline via exponential moving average.
83
+ *
84
+ * baseline_{n+1} = α × z_n + (1 - α) × baseline_n
85
+ *
86
+ * The baseline reduces variance in the REINFORCE estimate
87
+ * without introducing bias (standard variance reduction technique).
88
+ */
89
+ export function updateBaseline(
90
+ currentBaseline: number,
91
+ newReward: number,
92
+ alpha: number,
93
+ ): number {
94
+ return alpha * newReward + (1 - alpha) * currentBaseline;
95
+ }
96
+
97
+ /**
98
+ * Apply computed weight updates to graph edges.
99
+ *
100
+ * After applying, edge weights may become negative (inhibitory).
101
+ * This is intentional — the paper allows signed outcomes.
102
+ */
103
+ export function applyWeightUpdates(
104
+ graph: BrainGraph,
105
+ updates: PolicyWeightUpdate[],
106
+ ): void {
107
+ for (const update of updates) {
108
+ if (update.kind === "seed") {
109
+ const nextWeight = Math.max(-10, Math.min(10, graph.getSeedWeight(update.nodeId) + update.delta));
110
+ graph.setSeedWeight(update.nodeId, nextWeight);
111
+ continue;
112
+ }
113
+
114
+ const edge = graph.getEdge(update.source, update.target);
115
+ if (!edge) continue;
116
+
117
+ // Update weight: w_new = w_old + Δw
118
+ edge.weight += update.delta;
119
+
120
+ // Clamp to prevent numerical explosion
121
+ edge.weight = Math.max(-10, Math.min(10, edge.weight));
122
+ }
123
+ }
@@ -0,0 +1,46 @@
1
+ import type { HarvestResult } from "../brain-runtime/evidence-detectors.js";
2
+
3
+ const NEGATIVE_HUMAN_PATTERNS = [
4
+ /\bno[,.]?\s+(that'?s?\s+)?(not|wrong|incorrect)/i,
5
+ /\bdon'?t\s+(use|do|try)/i,
6
+ /\binstead\s+(use|do|try)/i,
7
+ /\bactually[,]?\s+(it'?s|the|you\s+should)/i,
8
+ /\bthat'?s\s+not\s+(right|correct|what)/i,
9
+ /\bwrong\s+(file|path|approach|answer|tool)/i,
10
+ /\bnot\s+what\s+i\s+(asked|wanted|meant)/i,
11
+ ];
12
+
13
+ const POSITIVE_HUMAN_PATTERNS = [
14
+ /\b(perfect|exactly|correct)\b/i,
15
+ /\bthat('?s|\s+is)\s+(exactly\s+)?(right|correct|what\s+i)/i,
16
+ /\bgreat[,!]\s+(that|this)\s+(work|help)/i,
17
+ /\byes[,!.]\s+(that'?s?|exactly)/i,
18
+ ];
19
+
20
+ export function detectHumanEvidence(content: string): HarvestResult | null {
21
+ for (const pattern of NEGATIVE_HUMAN_PATTERNS) {
22
+ if (pattern.test(content)) {
23
+ return {
24
+ value: -0.8,
25
+ source: "human",
26
+ reason: `negative pattern: ${pattern.source}`,
27
+ confidence: 0.9,
28
+ kind: "human_feedback",
29
+ extractor: "human_pattern",
30
+ };
31
+ }
32
+ }
33
+ for (const pattern of POSITIVE_HUMAN_PATTERNS) {
34
+ if (pattern.test(content)) {
35
+ return {
36
+ value: 0.8,
37
+ source: "human",
38
+ reason: `positive pattern: ${pattern.source}`,
39
+ confidence: 0.9,
40
+ kind: "human_feedback",
41
+ extractor: "human_pattern",
42
+ };
43
+ }
44
+ }
45
+ return null;
46
+ }
@@ -0,0 +1,98 @@
1
+ import type { HarvestResult } from "../brain-runtime/evidence-detectors.js";
2
+
3
+ const EXPLICIT_SCANNER_PATTERNS = [
4
+ /\bexpand for details about\b/i,
5
+ /\bsummary bridge\b/i,
6
+ ];
7
+
8
+ const DOC_MARKER_PATTERNS = [
9
+ /\b(runbook|workflow|playbook|checklist|troubleshooting|procedure)\b/i,
10
+ /\b(deploy|deployment|incident|recovery|rollback|pull request|release)\b/i,
11
+ ];
12
+
13
+ const COMMAND_LINE_PATTERN = /^\s*(?:[-*]\s+|\d+\.\s+)?`?(gh|git|pnpm|npm|node|openclaw|ollama|curl|python3?|bash)\b.*$/gim;
14
+ const NUMBERED_STEP_PATTERN = /^\s*\d+\.\s+\S.+$/gm;
15
+ const BULLET_PATTERN = /^\s*[-*]\s+\S.+$/gm;
16
+ const HEADING_PATTERN = /^\s{0,3}#{1,6}\s+\S.+$/m;
17
+ const FILE_REF_PATTERN = /(?:^|[\s(])(?:\.?\/)?[\w./-]+\.(?:md|txt|ts|tsx|js|jsx|json|yaml|yml|sh|mjs)(?=$|[\s):,])/gim;
18
+ const IMPERATIVE_STEP_PATTERN = /^\s*(?:[-*]\s+|\d+\.\s+)?(?:inspect|check|retry|run|use|open|read|edit|verify|restart|re-?run|apply|deploy|create|install|record|compare|promote|rollback)\b/gim;
19
+
20
+ function countMatches(pattern: RegExp, content: string): number {
21
+ const flags = pattern.flags.includes("g") ? pattern.flags : `${pattern.flags}g`;
22
+ const matcher = new RegExp(pattern.source, flags);
23
+ return Array.from(content.matchAll(matcher)).length;
24
+ }
25
+
26
+ export function detectScannerEvidence(content: string): HarvestResult | null {
27
+ for (const pattern of EXPLICIT_SCANNER_PATTERNS) {
28
+ if (pattern.test(content)) {
29
+ return {
30
+ value: 0.25,
31
+ source: "scanner",
32
+ reason: `scanner marker: ${pattern.source}`,
33
+ confidence: 0.7,
34
+ kind: "scanner_signal",
35
+ extractor: "scanner_marker",
36
+ };
37
+ }
38
+ }
39
+
40
+ const signals: string[] = [];
41
+ let score = 0;
42
+
43
+ for (const pattern of DOC_MARKER_PATTERNS) {
44
+ if (pattern.test(content)) {
45
+ signals.push(`doc:${pattern.source}`);
46
+ score += 1.0;
47
+ break;
48
+ }
49
+ }
50
+
51
+ const numberedSteps = countMatches(NUMBERED_STEP_PATTERN, content);
52
+ if (numberedSteps >= 2) {
53
+ signals.push(`numbered_steps:${numberedSteps}`);
54
+ score += 1.0;
55
+ }
56
+
57
+ const bulletLines = countMatches(BULLET_PATTERN, content);
58
+ if (bulletLines >= 3) {
59
+ signals.push(`bullets:${bulletLines}`);
60
+ score += 0.5;
61
+ }
62
+
63
+ const commandLines = countMatches(COMMAND_LINE_PATTERN, content);
64
+ if (commandLines >= 1) {
65
+ signals.push(`commands:${commandLines}`);
66
+ score += commandLines >= 2 ? 1.0 : 0.6;
67
+ }
68
+
69
+ const imperativeLines = countMatches(IMPERATIVE_STEP_PATTERN, content);
70
+ if (imperativeLines >= 2) {
71
+ signals.push(`imperatives:${imperativeLines}`);
72
+ score += 0.8;
73
+ }
74
+
75
+ if (HEADING_PATTERN.test(content) && (numberedSteps >= 1 || bulletLines >= 2)) {
76
+ signals.push("heading");
77
+ score += 0.4;
78
+ }
79
+
80
+ const fileRefs = countMatches(FILE_REF_PATTERN, content);
81
+ if (fileRefs >= 1) {
82
+ signals.push(`file_refs:${fileRefs}`);
83
+ score += 0.3;
84
+ }
85
+
86
+ if (score < 1.8) {
87
+ return null;
88
+ }
89
+
90
+ return {
91
+ value: 0.25,
92
+ source: "scanner",
93
+ reason: `scanner heuristic: ${signals.join(", ")}`,
94
+ confidence: Math.min(0.8, 0.5 + signals.length * 0.05),
95
+ kind: "scanner_signal",
96
+ extractor: "scanner_heuristic",
97
+ };
98
+ }