@jonathangu/openclawbrain 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +412 -0
  3. package/bin/openclawbrain.js +15 -0
  4. package/docs/END_STATE.md +244 -0
  5. package/docs/EVIDENCE.md +128 -0
  6. package/docs/RELEASE_CONTRACT.md +91 -0
  7. package/docs/agent-tools.md +106 -0
  8. package/docs/architecture.md +224 -0
  9. package/docs/configuration.md +178 -0
  10. package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/status.json +87 -0
  11. package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/summary.md +16 -0
  12. package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/trace.json +273 -0
  13. package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/validation-report.json +652 -0
  14. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/channels-status.txt +31 -0
  15. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/config-snapshot.json +66 -0
  16. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/doctor.json +14 -0
  17. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-probe.txt +34 -0
  18. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-status.txt +41 -0
  19. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/logs.txt +428 -0
  20. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status-all.txt +60 -0
  21. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status.json +223 -0
  22. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/summary.md +13 -0
  23. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/trace.json +4 -0
  24. package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/validation-report.json +334 -0
  25. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/channels-status.txt +25 -0
  26. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/config-snapshot.json +91 -0
  27. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/doctor.json +14 -0
  28. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-probe.txt +36 -0
  29. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-status.txt +44 -0
  30. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/logs.txt +428 -0
  31. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-doctor.json +10 -0
  32. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-sdk-probe.json +11 -0
  33. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-setup-only.json +12 -0
  34. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/summary.md +30 -0
  35. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/validation-report.json +72 -0
  36. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status-all.txt +63 -0
  37. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status.json +200 -0
  38. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/summary.md +13 -0
  39. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/trace.json +4 -0
  40. package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/validation-report.json +311 -0
  41. package/docs/evidence/README.md +16 -0
  42. package/docs/fts5.md +161 -0
  43. package/docs/tui.md +506 -0
  44. package/index.ts +1372 -0
  45. package/openclaw.plugin.json +136 -0
  46. package/package.json +66 -0
  47. package/src/assembler.ts +804 -0
  48. package/src/brain-cli.ts +316 -0
  49. package/src/brain-core/decay.ts +35 -0
  50. package/src/brain-core/episode.ts +82 -0
  51. package/src/brain-core/graph.ts +321 -0
  52. package/src/brain-core/health.ts +116 -0
  53. package/src/brain-core/mutator.ts +281 -0
  54. package/src/brain-core/pack.ts +117 -0
  55. package/src/brain-core/policy.ts +153 -0
  56. package/src/brain-core/replay.ts +1 -0
  57. package/src/brain-core/teacher.ts +105 -0
  58. package/src/brain-core/trace.ts +40 -0
  59. package/src/brain-core/traverse.ts +230 -0
  60. package/src/brain-core/types.ts +405 -0
  61. package/src/brain-core/update.ts +123 -0
  62. package/src/brain-harvest/human.ts +46 -0
  63. package/src/brain-harvest/scanner.ts +98 -0
  64. package/src/brain-harvest/self.ts +147 -0
  65. package/src/brain-runtime/assembler-extension.ts +230 -0
  66. package/src/brain-runtime/evidence-detectors.ts +68 -0
  67. package/src/brain-runtime/graph-io.ts +72 -0
  68. package/src/brain-runtime/harvester-extension.ts +98 -0
  69. package/src/brain-runtime/service.ts +659 -0
  70. package/src/brain-runtime/tools.ts +109 -0
  71. package/src/brain-runtime/worker-state.ts +106 -0
  72. package/src/brain-runtime/worker-supervisor.ts +169 -0
  73. package/src/brain-store/embedding.ts +179 -0
  74. package/src/brain-store/init.ts +347 -0
  75. package/src/brain-store/migrations.ts +188 -0
  76. package/src/brain-store/store.ts +816 -0
  77. package/src/brain-worker/child-runner.ts +321 -0
  78. package/src/brain-worker/jobs.ts +12 -0
  79. package/src/brain-worker/mutation-job.ts +5 -0
  80. package/src/brain-worker/promotion-job.ts +5 -0
  81. package/src/brain-worker/protocol.ts +79 -0
  82. package/src/brain-worker/teacher-job.ts +5 -0
  83. package/src/brain-worker/update-job.ts +5 -0
  84. package/src/brain-worker/worker.ts +422 -0
  85. package/src/compaction.ts +1332 -0
  86. package/src/db/config.ts +265 -0
  87. package/src/db/connection.ts +72 -0
  88. package/src/db/features.ts +42 -0
  89. package/src/db/migration.ts +561 -0
  90. package/src/engine.ts +1995 -0
  91. package/src/expansion-auth.ts +351 -0
  92. package/src/expansion-policy.ts +303 -0
  93. package/src/expansion.ts +383 -0
  94. package/src/integrity.ts +600 -0
  95. package/src/large-files.ts +527 -0
  96. package/src/openclaw-bridge.ts +22 -0
  97. package/src/retrieval.ts +357 -0
  98. package/src/store/conversation-store.ts +748 -0
  99. package/src/store/fts5-sanitize.ts +29 -0
  100. package/src/store/full-text-fallback.ts +74 -0
  101. package/src/store/index.ts +29 -0
  102. package/src/store/summary-store.ts +918 -0
  103. package/src/summarize.ts +847 -0
  104. package/src/tools/common.ts +53 -0
  105. package/src/tools/lcm-conversation-scope.ts +76 -0
  106. package/src/tools/lcm-describe-tool.ts +234 -0
  107. package/src/tools/lcm-expand-query-tool.ts +594 -0
  108. package/src/tools/lcm-expand-tool.delegation.ts +556 -0
  109. package/src/tools/lcm-expand-tool.ts +448 -0
  110. package/src/tools/lcm-expansion-recursion-guard.ts +286 -0
  111. package/src/tools/lcm-grep-tool.ts +200 -0
  112. package/src/transcript-repair.ts +301 -0
  113. package/src/types.ts +149 -0
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Immutable pack management.
3
+ *
4
+ * Gateway reads only promoted pack. Daemon writes only mutable state.
5
+ * Promotion requires passing replay gate and health bounds.
6
+ */
7
+
8
+ import type { Pack, HealthMetrics, Episode } from "./types.js";
9
+ import type { BrainGraph } from "./graph.js";
10
+ import { computeHealth } from "./health.js";
11
+ import { replayEpisode } from "./episode.js";
12
+
13
+ export interface BrainPackPersistence {
14
+ insertPack(params: { nodeCount: number; edgeCount: number; healthJson: string }): Pack;
15
+ promotePack(version: number): void;
16
+ rollbackPack(version: number): void;
17
+ }
18
+
19
+ export class PackManager {
20
+ constructor(
21
+ private persistence: BrainPackPersistence,
22
+ private graph: BrainGraph,
23
+ private log: { info: (msg: string) => void; warn: (msg: string) => void },
24
+ ) {}
25
+
26
+ /**
27
+ * Build a candidate pack from current graph state.
28
+ */
29
+ buildCandidate(health: HealthMetrics): Pack {
30
+ return this.persistence.insertPack({
31
+ nodeCount: health.nodeCount,
32
+ edgeCount: health.edgeCount,
33
+ healthJson: JSON.stringify(health),
34
+ });
35
+ }
36
+
37
+ /**
38
+ * Replay gate: test candidate against recent episodes.
39
+ * Returns whether the candidate should be promoted.
40
+ */
41
+ replayGate(
42
+ recentEpisodes: Episode[],
43
+ config: { minFiredPerQuery: number; maxDormantPercent: number; maxOrphanCount: number },
44
+ candidateGraph: BrainGraph = this.graph,
45
+ ): { passed: boolean; reason: string; health: HealthMetrics } {
46
+ if (recentEpisodes.length === 0) {
47
+ return {
48
+ passed: true,
49
+ reason: "no episodes to replay",
50
+ health: computeHealth(candidateGraph, recentEpisodes, 0),
51
+ };
52
+ }
53
+
54
+ const health = computeHealth(candidateGraph, recentEpisodes, 0);
55
+
56
+ if (health.firedPerQuery < config.minFiredPerQuery) {
57
+ return {
58
+ passed: false,
59
+ reason: `firedPerQuery ${health.firedPerQuery.toFixed(2)} < ${config.minFiredPerQuery}`,
60
+ health,
61
+ };
62
+ }
63
+ if (health.dormantPercent > config.maxDormantPercent) {
64
+ return {
65
+ passed: false,
66
+ reason: `dormantPercent ${(health.dormantPercent * 100).toFixed(1)}% > ${(config.maxDormantPercent * 100).toFixed(1)}%`,
67
+ health,
68
+ };
69
+ }
70
+ if (health.orphanCount > config.maxOrphanCount) {
71
+ return {
72
+ passed: false,
73
+ reason: `orphanCount ${health.orphanCount} > ${config.maxOrphanCount}`,
74
+ health,
75
+ };
76
+ }
77
+
78
+ // Check no human-labeled episodes regressed
79
+ const humanEpisodes = recentEpisodes.filter((ep) => ep.rewardSource === "human" && ep.reward !== null);
80
+ for (const ep of humanEpisodes) {
81
+ const replay = replayEpisode(ep, candidateGraph);
82
+ if (replay.wouldChange && ep.reward! > 0) {
83
+ return {
84
+ passed: false,
85
+ reason: `human-positive episode ${ep.id} would change routing`,
86
+ health,
87
+ };
88
+ }
89
+ }
90
+
91
+ const selfNegativeEpisodes = recentEpisodes.filter(
92
+ (ep) => ep.rewardSource === "self" && ep.reward !== null && ep.reward < 0,
93
+ );
94
+ for (const ep of selfNegativeEpisodes) {
95
+ const replay = replayEpisode(ep, candidateGraph);
96
+ if (!replay.wouldChange) {
97
+ return {
98
+ passed: false,
99
+ reason: `self-negative episode ${ep.id} did not change routing`,
100
+ health,
101
+ };
102
+ }
103
+ }
104
+
105
+ return { passed: true, reason: "all gates passed", health };
106
+ }
107
+
108
+ promote(version: number): void {
109
+ this.persistence.promotePack(version);
110
+ this.log.info(`[brain] Pack v${version} promoted`);
111
+ }
112
+
113
+ rollback(version: number): void {
114
+ this.persistence.rollbackPack(version);
115
+ this.log.warn(`[brain] Pack v${version} rolled back`);
116
+ }
117
+ }
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Softmax routing policy over action sets.
3
+ *
4
+ * Implements P_ρ(a|s) from the paper:
5
+ * P_ρ(a_j | s_t) = exp(score(a_j) / τ) / Σ_k exp(score(a_k) / τ)
6
+ *
7
+ * Policy is ALWAYS stochastic (samples from softmax, never argmax).
8
+ * Temperature τ controls exploration vs exploitation:
9
+ * - Learning: τ = 1.0 (explore)
10
+ * - Serving: τ = 0.1 (exploit, nearly deterministic)
11
+ */
12
+
13
+ import type {
14
+ TraversalAction,
15
+ TraversalState,
16
+ PolicyParams,
17
+ } from "./types.js";
18
+ import { DEFAULT_POLICY_PARAMS } from "./types.js";
19
+ import { BrainGraph, cosineSimilarity } from "./graph.js";
20
+
21
+ /**
22
+ * Score a single action given current state and graph.
23
+ *
24
+ * For STOP: score increases with budget depletion and hop count.
25
+ * For traverse: score = edge.weight * edge.prior + cos(query, target) + bias
26
+ */
27
+ export function scoreAction(
28
+ action: TraversalAction,
29
+ state: TraversalState,
30
+ graph: BrainGraph,
31
+ params: PolicyParams = DEFAULT_POLICY_PARAMS,
32
+ ): number {
33
+ if (action.type === "stop") {
34
+ const totalBudget = state.budgetRemaining + state.fired.reduce((sum, id) => {
35
+ const node = graph.getNode(id);
36
+ return sum + (node?.tokenCount ?? 0);
37
+ }, 0);
38
+ const budgetUsedFraction = totalBudget > 0 ? 1 - state.budgetRemaining / totalBudget : 0;
39
+ const hopFraction = state.maxHops > 0 ? state.hopCount / state.maxHops : 0;
40
+ return params.stopBias
41
+ + params.budgetPressure * budgetUsedFraction
42
+ + params.hopPressure * hopFraction;
43
+ }
44
+
45
+ // Traverse action
46
+ const targetNode = graph.getNode(action.targetNodeId);
47
+ if (!targetNode) return -Infinity;
48
+
49
+ if (state.currentNodeId === null) {
50
+ const seedPrior = action.seedScore ?? 0;
51
+ const learnedSeedWeight = graph.getSeedWeight(action.targetNodeId);
52
+ return seedPrior + learnedSeedWeight;
53
+ }
54
+
55
+ // Find edge from current position to target
56
+ const edge = graph.getEdge(state.currentNodeId, action.targetNodeId);
57
+
58
+ // Base score from edge weight and prior
59
+ const edgeScore = edge ? edge.weight * edge.prior : 0;
60
+
61
+ // Query relevance via embedding cosine similarity
62
+ let relevance = 0;
63
+ if (targetNode.embedding && state.queryEmbedding.length > 0) {
64
+ relevance = cosineSimilarity(state.queryEmbedding, targetNode.embedding);
65
+ }
66
+
67
+ // Edge kind bias
68
+ const kindBias = edge ? (params.edgeKindBias[edge.kind] ?? 0) : 0;
69
+
70
+ return edgeScore + relevance + kindBias;
71
+ }
72
+
73
+ /**
74
+ * Compute softmax distribution over the full action set.
75
+ *
76
+ * Returns sorted candidates with their scores and probabilities.
77
+ * Numerically stable: subtract max score before exp.
78
+ */
79
+ export function softmaxPolicy(
80
+ actions: TraversalAction[],
81
+ state: TraversalState,
82
+ graph: BrainGraph,
83
+ params: PolicyParams = DEFAULT_POLICY_PARAMS,
84
+ ): Array<{ action: TraversalAction; score: number; probability: number }> {
85
+ if (actions.length === 0) return [];
86
+
87
+ const scored = actions.map((action) => ({
88
+ action,
89
+ score: scoreAction(action, state, graph, params),
90
+ }));
91
+
92
+ // Numerically stable softmax
93
+ const maxScore = Math.max(...scored.map((s) => s.score));
94
+ const tau = params.temperature;
95
+
96
+ const expScores = scored.map((s) => ({
97
+ ...s,
98
+ expScore: Math.exp((s.score - maxScore) / tau),
99
+ }));
100
+
101
+ const sumExp = expScores.reduce((sum, s) => sum + s.expScore, 0);
102
+
103
+ return expScores.map((s) => ({
104
+ action: s.action,
105
+ score: s.score,
106
+ probability: sumExp > 0 ? s.expScore / sumExp : 1 / actions.length,
107
+ }));
108
+ }
109
+
110
+ /**
111
+ * Sample an action from the softmax distribution.
112
+ *
113
+ * Stochastic — NEVER argmax. Even at low temperature, this samples
114
+ * from the distribution. This is required for the paper's REINFORCE
115
+ * update to have valid gradients.
116
+ */
117
+ export function sampleAction(
118
+ distribution: Array<{ action: TraversalAction; probability: number }>,
119
+ ): { action: TraversalAction; probability: number; index: number } {
120
+ if (distribution.length === 0) {
121
+ return { action: { type: "stop" }, probability: 1.0, index: 0 };
122
+ }
123
+
124
+ const r = Math.random();
125
+ let cumulative = 0;
126
+
127
+ for (let i = 0; i < distribution.length; i++) {
128
+ cumulative += distribution[i].probability;
129
+ if (r <= cumulative) {
130
+ return {
131
+ action: distribution[i].action,
132
+ probability: distribution[i].probability,
133
+ index: i,
134
+ };
135
+ }
136
+ }
137
+
138
+ // Fallback: numerical precision edge case
139
+ const last = distribution.length - 1;
140
+ return {
141
+ action: distribution[last].action,
142
+ probability: distribution[last].probability,
143
+ index: last,
144
+ };
145
+ }
146
+
147
+ /**
148
+ * Compute log probability of a chosen action.
149
+ * Used in REINFORCE gradient: ∂logP_ρ(a|s)/∂ρ
150
+ */
151
+ export function logProbability(probability: number): number {
152
+ return Math.log(Math.max(probability, 1e-10));
153
+ }
@@ -0,0 +1 @@
1
+ export { replayEpisode } from "./episode.js";
@@ -0,0 +1,105 @@
1
+ /**
2
+ * Off-path async teacher for episode evaluation.
3
+ *
4
+ * CRITICAL RULE: Teacher sees ONLY what the router saw.
5
+ * It evaluates the routing decision, not the overall task outcome.
6
+ * No cheating with extra context.
7
+ */
8
+
9
+ import type { Episode } from "./types.js";
10
+ import type { BrainGraph } from "./graph.js";
11
+
12
+ export type BrainTeacherCompletion = (params: {
13
+ provider?: string;
14
+ model: string;
15
+ apiKey?: string;
16
+ messages: Array<{ role: string; content: unknown }>;
17
+ system?: string;
18
+ maxTokens: number;
19
+ temperature?: number;
20
+ }) => Promise<{ content?: Array<{ text?: string }> }>;
21
+
22
+ export type BrainTeacherResolveModel = () => { provider: string; model: string };
23
+ export type BrainTeacherGetApiKey = (provider: string, model: string) => Promise<string | undefined>;
24
+
25
+ const TEACHER_SYSTEM_PROMPT =
26
+ "You are evaluating a context routing decision. Score the quality of the selected context for the given query. Return ONLY a JSON object: {\"score\": <number from -1.0 to 1.0>, \"reason\": \"<brief explanation>\"}";
27
+
28
+ export class BrainTeacher {
29
+ constructor(
30
+ private complete: BrainTeacherCompletion,
31
+ private resolveModel: BrainTeacherResolveModel,
32
+ private getApiKey: BrainTeacherGetApiKey,
33
+ private graph: BrainGraph,
34
+ private log: { info: (msg: string) => void; error: (msg: string) => void },
35
+ ) {}
36
+
37
+ async evaluate(episode: Episode): Promise<{ score: number; reason: string }> {
38
+ // Build prompt showing only what the router saw
39
+ const candidateDescriptions: string[] = [];
40
+ for (const step of episode.trajectory) {
41
+ for (const candidate of step.candidates) {
42
+ if (candidate.action.type === "traverse") {
43
+ const node = this.graph.getNode(candidate.action.targetNodeId);
44
+ if (node) {
45
+ candidateDescriptions.push(
46
+ `- [${node.kind}] ${node.content.slice(0, 200)}${node.content.length > 200 ? "..." : ""} (prob: ${candidate.probability.toFixed(3)})`,
47
+ );
48
+ }
49
+ }
50
+ }
51
+ }
52
+
53
+ const firedDescriptions = episode.firedNodes.map((id) => {
54
+ const node = this.graph.getNode(id);
55
+ return node
56
+ ? `- [${node.kind}] ${node.content.slice(0, 200)}${node.content.length > 200 ? "..." : ""}`
57
+ : `- ${id} (not found)`;
58
+ });
59
+
60
+ const prompt = `Query: "${episode.queryText}"
61
+
62
+ Candidate nodes the router could have chosen:
63
+ ${candidateDescriptions.join("\n") || "(none)"}
64
+
65
+ Nodes actually selected (fired):
66
+ ${firedDescriptions.join("\n") || "(none)"}
67
+
68
+ Was this the right context for the query? Consider relevance, completeness, and conciseness.`;
69
+
70
+ try {
71
+ const { provider, model } = this.resolveModel();
72
+ const apiKey = await this.getApiKey(provider, model);
73
+
74
+ const result = await this.complete({
75
+ provider,
76
+ model,
77
+ apiKey,
78
+ system: TEACHER_SYSTEM_PROMPT,
79
+ messages: [{ role: "user", content: prompt }],
80
+ maxTokens: 200,
81
+ temperature: 0.1,
82
+ });
83
+
84
+ const text = result.content
85
+ ?.map((b: { text?: string }) => b.text ?? "")
86
+ .join("") ?? "";
87
+
88
+ const jsonMatch = text.match(/\{[\s\S]*"score"[\s\S]*\}/);
89
+ if (!jsonMatch) {
90
+ this.log.error(`[brain] Teacher returned non-JSON: ${text.slice(0, 100)}`);
91
+ return { score: 0, reason: "failed to parse teacher response" };
92
+ }
93
+
94
+ const parsed = JSON.parse(jsonMatch[0]);
95
+ const score = Math.max(-1, Math.min(1, Number(parsed.score) || 0));
96
+ const reason = String(parsed.reason || "teacher evaluation");
97
+
98
+ this.log.info(`[brain] Teacher scored episode ${episode.id}: ${score.toFixed(2)} (${reason})`);
99
+ return { score, reason };
100
+ } catch (err) {
101
+ this.log.error(`[brain] Teacher evaluation failed: ${(err as Error).message}`);
102
+ return { score: 0, reason: "teacher evaluation failed" };
103
+ }
104
+ }
105
+ }
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Decision trace recording and footer generation.
3
+ */
4
+
5
+ import { randomUUID } from "node:crypto";
6
+ import type { DecisionTrace } from "./types.js";
7
+ import type { TraverseResult } from "./traverse.js";
8
+
9
+ export function recordTrace(params: {
10
+ traversalResult: TraverseResult;
11
+ queryText: string;
12
+ episodeId: string | null;
13
+ packVersion: number | null;
14
+ }): DecisionTrace {
15
+ return {
16
+ id: `bt_${randomUUID().slice(0, 8)}`,
17
+ episodeId: params.episodeId,
18
+ packVersion: params.packVersion,
19
+ queryText: params.queryText,
20
+ seedScores: params.traversalResult.seedScores,
21
+ trajectory: params.traversalResult.trajectory,
22
+ firedNodes: params.traversalResult.firedNodes.map((n) => n.nodeId),
23
+ vetoedNodes: params.traversalResult.vetoedNodes.map((v) => v.nodeId),
24
+ contextChars: params.traversalResult.contextChars,
25
+ footer: params.traversalResult.footer,
26
+ createdAt: Date.now(),
27
+ };
28
+ }
29
+
30
+ export function generateFooter(params: {
31
+ packVersion: number;
32
+ seedCount: number;
33
+ hopCount: number;
34
+ firedCount: number;
35
+ vetoCount: number;
36
+ contextChars: number;
37
+ traceId: string;
38
+ }): string {
39
+ return `Brain v${params.packVersion} · ${params.seedCount} seeds · ${params.hopCount} hops · ${params.firedCount} fired · ${params.vetoCount} veto · ${params.contextChars} chars · trace ${params.traceId}`;
40
+ }
@@ -0,0 +1,230 @@
1
+ /**
2
+ * Full traversal loop implementing the paper's finite-time game.
3
+ *
4
+ * Algorithm:
5
+ * 1. Seed phase: select start nodes by embedding similarity
6
+ * 2. Loop: expand candidates → compute softmax policy → sample → fire/veto
7
+ * 3. Terminal: STOP chosen, budget exhausted, max hops, or dead end
8
+ *
9
+ * Paper assumptions honored:
10
+ * - Assumption 1: game ends in finite time (maxHops bound)
11
+ * - Assumption 2: reward only at terminal state (not intermediate)
12
+ * - Stochastic policy P_ρ(a|s) via softmax sampling
13
+ */
14
+
15
+ import type {
16
+ TraversalState,
17
+ TraversalAction,
18
+ TrajectoryStep,
19
+ PolicyParams,
20
+ NodeKind,
21
+ SeedScore,
22
+ } from "./types.js";
23
+ import { DEFAULT_POLICY_PARAMS } from "./types.js";
24
+ import type { BrainGraph } from "./graph.js";
25
+ import { softmaxPolicy, sampleAction } from "./policy.js";
26
+
27
+ export interface TraverseOptions {
28
+ graph: BrainGraph;
29
+ queryEmbedding: Float32Array;
30
+ queryText: string;
31
+ maxHops: number;
32
+ budgetChars: number;
33
+ temperature: number;
34
+ maxSeeds: number;
35
+ semanticThreshold: number;
36
+ policyParams?: Partial<PolicyParams>;
37
+ }
38
+
39
+ export interface TraverseResult {
40
+ firedNodes: Array<{ nodeId: string; kind: NodeKind; content: string; tokenCount: number }>;
41
+ vetoedNodes: Array<{ nodeId: string; reason: string }>;
42
+ trajectory: TrajectoryStep[];
43
+ seedScores: SeedScore[];
44
+ contextChars: number;
45
+ footer: string;
46
+ }
47
+
48
+ /**
49
+ * Execute a full graph traversal.
50
+ *
51
+ * Returns the fired nodes, vetoed nodes, full trajectory (for REINFORCE),
52
+ * seed scores, and a human-readable footer.
53
+ */
54
+ export function traverse(options: TraverseOptions): TraverseResult {
55
+ const {
56
+ graph,
57
+ queryEmbedding,
58
+ maxHops,
59
+ budgetChars,
60
+ temperature,
61
+ maxSeeds,
62
+ semanticThreshold,
63
+ } = options;
64
+
65
+ const params: PolicyParams = {
66
+ ...DEFAULT_POLICY_PARAMS,
67
+ temperature,
68
+ ...options.policyParams,
69
+ };
70
+
71
+ // Step 1: Seed selection
72
+ const seedCandidates = graph.seedByEmbedding(queryEmbedding, maxSeeds, semanticThreshold);
73
+
74
+ if (seedCandidates.length === 0) {
75
+ return {
76
+ firedNodes: [],
77
+ vetoedNodes: [],
78
+ trajectory: [],
79
+ seedScores: [],
80
+ contextChars: 0,
81
+ footer: "Brain · 0 seeds · no traversal",
82
+ };
83
+ }
84
+
85
+ // Initialize traversal state
86
+ const state: TraversalState = {
87
+ currentNodeId: null,
88
+ queryEmbedding,
89
+ visited: new Set(),
90
+ fired: [],
91
+ budgetRemaining: budgetChars,
92
+ hopCount: 0,
93
+ maxHops,
94
+ };
95
+
96
+ const trajectory: TrajectoryStep[] = [];
97
+ const firedNodes: Array<{ nodeId: string; kind: NodeKind; content: string; tokenCount: number }> = [];
98
+ const vetoedNodes: Array<{ nodeId: string; reason: string }> = [];
99
+ let seedScores: SeedScore[] = [];
100
+
101
+ // Step 2: Traversal loop
102
+ for (let hop = 0; hop < maxHops; hop++) {
103
+ // Compute action set
104
+ const actions = graph.getActionSet(
105
+ state.currentNodeId,
106
+ state.visited,
107
+ state.currentNodeId === null ? seedCandidates : undefined,
108
+ );
109
+
110
+ if (actions.length === 0) break; // Dead end
111
+ if (actions.length === 1 && actions[0].type === "stop") {
112
+ // Only STOP available — record step and terminate
113
+ const step: TrajectoryStep = {
114
+ stateSnapshot: {
115
+ currentNodeId: state.currentNodeId,
116
+ hopCount: state.hopCount,
117
+ budgetRemaining: state.budgetRemaining,
118
+ visitedCount: state.visited.size,
119
+ firedCount: state.fired.length,
120
+ },
121
+ candidates: [{ action: { type: "stop" }, score: 0, probability: 1.0 }],
122
+ chosenAction: { type: "stop" },
123
+ chosenActionProbability: 1.0,
124
+ stopProbability: 1.0,
125
+ };
126
+ trajectory.push(step);
127
+ break;
128
+ }
129
+
130
+ // Compute softmax distribution
131
+ const distribution = softmaxPolicy(actions, state, graph, params);
132
+
133
+ // Sample action (stochastic)
134
+ const sampled = sampleAction(distribution);
135
+
136
+ if (state.currentNodeId === null) {
137
+ seedScores = seedCandidates.map((seed) => {
138
+ const traverseEntry = distribution.find(
139
+ (entry) => entry.action.type === "traverse" && entry.action.targetNodeId === seed.nodeId,
140
+ );
141
+ return {
142
+ nodeId: seed.nodeId,
143
+ priorScore: seed.score,
144
+ learnedSeedWeight: graph.getSeedWeight(seed.nodeId),
145
+ policyScore: traverseEntry?.score ?? seed.score,
146
+ probability: traverseEntry?.probability ?? 0,
147
+ chosen: sampled.action.type === "traverse" && sampled.action.targetNodeId === seed.nodeId,
148
+ };
149
+ });
150
+ }
151
+
152
+ // Find STOP probability for trace
153
+ const stopEntry = distribution.find((d) => d.action.type === "stop");
154
+ const stopProb = stopEntry?.probability ?? 0;
155
+
156
+ // Record trajectory step
157
+ const step: TrajectoryStep = {
158
+ stateSnapshot: {
159
+ currentNodeId: state.currentNodeId,
160
+ hopCount: state.hopCount,
161
+ budgetRemaining: state.budgetRemaining,
162
+ visitedCount: state.visited.size,
163
+ firedCount: state.fired.length,
164
+ },
165
+ candidates: distribution.map((d) => ({
166
+ action: d.action,
167
+ score: d.score,
168
+ probability: d.probability,
169
+ priorScore: d.action.type === "traverse" && state.currentNodeId === null ? d.action.seedScore : undefined,
170
+ learnedSeedWeight: d.action.type === "traverse" && state.currentNodeId === null
171
+ ? graph.getSeedWeight(d.action.targetNodeId)
172
+ : undefined,
173
+ })),
174
+ chosenAction: sampled.action,
175
+ chosenActionProbability: sampled.probability,
176
+ stopProbability: stopProb,
177
+ };
178
+ trajectory.push(step);
179
+
180
+ // Execute action
181
+ if (sampled.action.type === "stop") {
182
+ break; // Terminal: STOP chosen
183
+ }
184
+
185
+ const targetNodeId = sampled.action.targetNodeId;
186
+ state.visited.add(targetNodeId);
187
+ state.hopCount++;
188
+
189
+ // Inhibitory veto check
190
+ if (state.currentNodeId && graph.isVetoed(state.currentNodeId, targetNodeId)) {
191
+ const reason = graph.getVetoReason(state.currentNodeId, targetNodeId) ?? "inhibitory";
192
+ vetoedNodes.push({ nodeId: targetNodeId, reason });
193
+ state.currentNodeId = targetNodeId; // Still move to the node, but don't fire it
194
+ continue;
195
+ }
196
+
197
+ // Fire node: add to context
198
+ const targetNode = graph.getNode(targetNodeId);
199
+ if (targetNode) {
200
+ state.fired.push(targetNodeId);
201
+ state.budgetRemaining -= targetNode.tokenCount;
202
+
203
+ firedNodes.push({
204
+ nodeId: targetNode.id,
205
+ kind: targetNode.kind,
206
+ content: targetNode.content,
207
+ tokenCount: targetNode.tokenCount,
208
+ });
209
+ }
210
+
211
+ state.currentNodeId = targetNodeId;
212
+
213
+ // Terminal: budget exhausted
214
+ if (state.budgetRemaining <= 0) break;
215
+ }
216
+
217
+ const contextChars = firedNodes.reduce((sum, n) => sum + n.content.length, 0);
218
+
219
+ const chosenSeed = seedScores.find((seed) => seed.chosen)?.nodeId ?? "none";
220
+ const footer = `Brain · ${seedScores.length} seeds · start ${chosenSeed} · ${state.hopCount} hops · ${firedNodes.length} fired · ${vetoedNodes.length} veto · ${contextChars} chars`;
221
+
222
+ return {
223
+ firedNodes,
224
+ vetoedNodes,
225
+ trajectory,
226
+ seedScores,
227
+ contextChars,
228
+ footer,
229
+ };
230
+ }