@jonathangu/openclawbrain 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +412 -0
- package/bin/openclawbrain.js +15 -0
- package/docs/END_STATE.md +244 -0
- package/docs/EVIDENCE.md +128 -0
- package/docs/RELEASE_CONTRACT.md +91 -0
- package/docs/agent-tools.md +106 -0
- package/docs/architecture.md +224 -0
- package/docs/configuration.md +178 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/status.json +87 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/summary.md +16 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/trace.json +273 -0
- package/docs/evidence/2026-03-16/3188b50c4ed30f07dea111e35ce52aabefaced63/brain-teach-session-bound/validation-report.json +652 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/channels-status.txt +31 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/config-snapshot.json +66 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/doctor.json +14 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-probe.txt +34 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/gateway-status.txt +41 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/logs.txt +428 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status-all.txt +60 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/status.json +223 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/summary.md +13 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/trace.json +4 -0
- package/docs/evidence/2026-03-16/4941429588810da5d6f7ef1509f229f83fa08031/validation-report.json +334 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/channels-status.txt +25 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/config-snapshot.json +91 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/doctor.json +14 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-probe.txt +36 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/gateway-status.txt +44 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/logs.txt +428 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-doctor.json +10 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-sdk-probe.json +11 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/preflight-setup-only.json +12 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/summary.md +30 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/short-static-classification/validation-report.json +72 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status-all.txt +63 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/status.json +200 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/summary.md +13 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/trace.json +4 -0
- package/docs/evidence/2026-03-16/7f8dbcb27e741abdeefd5656c210639d0acdd440/validation-report.json +311 -0
- package/docs/evidence/README.md +16 -0
- package/docs/fts5.md +161 -0
- package/docs/tui.md +506 -0
- package/index.ts +1372 -0
- package/openclaw.plugin.json +136 -0
- package/package.json +66 -0
- package/src/assembler.ts +804 -0
- package/src/brain-cli.ts +316 -0
- package/src/brain-core/decay.ts +35 -0
- package/src/brain-core/episode.ts +82 -0
- package/src/brain-core/graph.ts +321 -0
- package/src/brain-core/health.ts +116 -0
- package/src/brain-core/mutator.ts +281 -0
- package/src/brain-core/pack.ts +117 -0
- package/src/brain-core/policy.ts +153 -0
- package/src/brain-core/replay.ts +1 -0
- package/src/brain-core/teacher.ts +105 -0
- package/src/brain-core/trace.ts +40 -0
- package/src/brain-core/traverse.ts +230 -0
- package/src/brain-core/types.ts +405 -0
- package/src/brain-core/update.ts +123 -0
- package/src/brain-harvest/human.ts +46 -0
- package/src/brain-harvest/scanner.ts +98 -0
- package/src/brain-harvest/self.ts +147 -0
- package/src/brain-runtime/assembler-extension.ts +230 -0
- package/src/brain-runtime/evidence-detectors.ts +68 -0
- package/src/brain-runtime/graph-io.ts +72 -0
- package/src/brain-runtime/harvester-extension.ts +98 -0
- package/src/brain-runtime/service.ts +659 -0
- package/src/brain-runtime/tools.ts +109 -0
- package/src/brain-runtime/worker-state.ts +106 -0
- package/src/brain-runtime/worker-supervisor.ts +169 -0
- package/src/brain-store/embedding.ts +179 -0
- package/src/brain-store/init.ts +347 -0
- package/src/brain-store/migrations.ts +188 -0
- package/src/brain-store/store.ts +816 -0
- package/src/brain-worker/child-runner.ts +321 -0
- package/src/brain-worker/jobs.ts +12 -0
- package/src/brain-worker/mutation-job.ts +5 -0
- package/src/brain-worker/promotion-job.ts +5 -0
- package/src/brain-worker/protocol.ts +79 -0
- package/src/brain-worker/teacher-job.ts +5 -0
- package/src/brain-worker/update-job.ts +5 -0
- package/src/brain-worker/worker.ts +422 -0
- package/src/compaction.ts +1332 -0
- package/src/db/config.ts +265 -0
- package/src/db/connection.ts +72 -0
- package/src/db/features.ts +42 -0
- package/src/db/migration.ts +561 -0
- package/src/engine.ts +1995 -0
- package/src/expansion-auth.ts +351 -0
- package/src/expansion-policy.ts +303 -0
- package/src/expansion.ts +383 -0
- package/src/integrity.ts +600 -0
- package/src/large-files.ts +527 -0
- package/src/openclaw-bridge.ts +22 -0
- package/src/retrieval.ts +357 -0
- package/src/store/conversation-store.ts +748 -0
- package/src/store/fts5-sanitize.ts +29 -0
- package/src/store/full-text-fallback.ts +74 -0
- package/src/store/index.ts +29 -0
- package/src/store/summary-store.ts +918 -0
- package/src/summarize.ts +847 -0
- package/src/tools/common.ts +53 -0
- package/src/tools/lcm-conversation-scope.ts +76 -0
- package/src/tools/lcm-describe-tool.ts +234 -0
- package/src/tools/lcm-expand-query-tool.ts +594 -0
- package/src/tools/lcm-expand-tool.delegation.ts +556 -0
- package/src/tools/lcm-expand-tool.ts +448 -0
- package/src/tools/lcm-expansion-recursion-guard.ts +286 -0
- package/src/tools/lcm-grep-tool.ts +200 -0
- package/src/transcript-repair.ts +301 -0
- package/src/types.ts +149 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Immutable pack management.
|
|
3
|
+
*
|
|
4
|
+
* Gateway reads only promoted pack. Daemon writes only mutable state.
|
|
5
|
+
* Promotion requires passing replay gate and health bounds.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Pack, HealthMetrics, Episode } from "./types.js";
|
|
9
|
+
import type { BrainGraph } from "./graph.js";
|
|
10
|
+
import { computeHealth } from "./health.js";
|
|
11
|
+
import { replayEpisode } from "./episode.js";
|
|
12
|
+
|
|
13
|
+
export interface BrainPackPersistence {
|
|
14
|
+
insertPack(params: { nodeCount: number; edgeCount: number; healthJson: string }): Pack;
|
|
15
|
+
promotePack(version: number): void;
|
|
16
|
+
rollbackPack(version: number): void;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export class PackManager {
|
|
20
|
+
constructor(
|
|
21
|
+
private persistence: BrainPackPersistence,
|
|
22
|
+
private graph: BrainGraph,
|
|
23
|
+
private log: { info: (msg: string) => void; warn: (msg: string) => void },
|
|
24
|
+
) {}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Build a candidate pack from current graph state.
|
|
28
|
+
*/
|
|
29
|
+
buildCandidate(health: HealthMetrics): Pack {
|
|
30
|
+
return this.persistence.insertPack({
|
|
31
|
+
nodeCount: health.nodeCount,
|
|
32
|
+
edgeCount: health.edgeCount,
|
|
33
|
+
healthJson: JSON.stringify(health),
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Replay gate: test candidate against recent episodes.
|
|
39
|
+
* Returns whether the candidate should be promoted.
|
|
40
|
+
*/
|
|
41
|
+
replayGate(
|
|
42
|
+
recentEpisodes: Episode[],
|
|
43
|
+
config: { minFiredPerQuery: number; maxDormantPercent: number; maxOrphanCount: number },
|
|
44
|
+
candidateGraph: BrainGraph = this.graph,
|
|
45
|
+
): { passed: boolean; reason: string; health: HealthMetrics } {
|
|
46
|
+
if (recentEpisodes.length === 0) {
|
|
47
|
+
return {
|
|
48
|
+
passed: true,
|
|
49
|
+
reason: "no episodes to replay",
|
|
50
|
+
health: computeHealth(candidateGraph, recentEpisodes, 0),
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const health = computeHealth(candidateGraph, recentEpisodes, 0);
|
|
55
|
+
|
|
56
|
+
if (health.firedPerQuery < config.minFiredPerQuery) {
|
|
57
|
+
return {
|
|
58
|
+
passed: false,
|
|
59
|
+
reason: `firedPerQuery ${health.firedPerQuery.toFixed(2)} < ${config.minFiredPerQuery}`,
|
|
60
|
+
health,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
if (health.dormantPercent > config.maxDormantPercent) {
|
|
64
|
+
return {
|
|
65
|
+
passed: false,
|
|
66
|
+
reason: `dormantPercent ${(health.dormantPercent * 100).toFixed(1)}% > ${(config.maxDormantPercent * 100).toFixed(1)}%`,
|
|
67
|
+
health,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
if (health.orphanCount > config.maxOrphanCount) {
|
|
71
|
+
return {
|
|
72
|
+
passed: false,
|
|
73
|
+
reason: `orphanCount ${health.orphanCount} > ${config.maxOrphanCount}`,
|
|
74
|
+
health,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Check no human-labeled episodes regressed
|
|
79
|
+
const humanEpisodes = recentEpisodes.filter((ep) => ep.rewardSource === "human" && ep.reward !== null);
|
|
80
|
+
for (const ep of humanEpisodes) {
|
|
81
|
+
const replay = replayEpisode(ep, candidateGraph);
|
|
82
|
+
if (replay.wouldChange && ep.reward! > 0) {
|
|
83
|
+
return {
|
|
84
|
+
passed: false,
|
|
85
|
+
reason: `human-positive episode ${ep.id} would change routing`,
|
|
86
|
+
health,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const selfNegativeEpisodes = recentEpisodes.filter(
|
|
92
|
+
(ep) => ep.rewardSource === "self" && ep.reward !== null && ep.reward < 0,
|
|
93
|
+
);
|
|
94
|
+
for (const ep of selfNegativeEpisodes) {
|
|
95
|
+
const replay = replayEpisode(ep, candidateGraph);
|
|
96
|
+
if (!replay.wouldChange) {
|
|
97
|
+
return {
|
|
98
|
+
passed: false,
|
|
99
|
+
reason: `self-negative episode ${ep.id} did not change routing`,
|
|
100
|
+
health,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return { passed: true, reason: "all gates passed", health };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
promote(version: number): void {
|
|
109
|
+
this.persistence.promotePack(version);
|
|
110
|
+
this.log.info(`[brain] Pack v${version} promoted`);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
rollback(version: number): void {
|
|
114
|
+
this.persistence.rollbackPack(version);
|
|
115
|
+
this.log.warn(`[brain] Pack v${version} rolled back`);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Softmax routing policy over action sets.
|
|
3
|
+
*
|
|
4
|
+
* Implements P_ρ(a|s) from the paper:
|
|
5
|
+
* P_ρ(a_j | s_t) = exp(score(a_j) / τ) / Σ_k exp(score(a_k) / τ)
|
|
6
|
+
*
|
|
7
|
+
* Policy is ALWAYS stochastic (samples from softmax, never argmax).
|
|
8
|
+
* Temperature τ controls exploration vs exploitation:
|
|
9
|
+
* - Learning: τ = 1.0 (explore)
|
|
10
|
+
* - Serving: τ = 0.1 (exploit, nearly deterministic)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type {
|
|
14
|
+
TraversalAction,
|
|
15
|
+
TraversalState,
|
|
16
|
+
PolicyParams,
|
|
17
|
+
} from "./types.js";
|
|
18
|
+
import { DEFAULT_POLICY_PARAMS } from "./types.js";
|
|
19
|
+
import { BrainGraph, cosineSimilarity } from "./graph.js";
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Score a single action given current state and graph.
|
|
23
|
+
*
|
|
24
|
+
* For STOP: score increases with budget depletion and hop count.
|
|
25
|
+
* For traverse: score = edge.weight * edge.prior + cos(query, target) + bias
|
|
26
|
+
*/
|
|
27
|
+
export function scoreAction(
|
|
28
|
+
action: TraversalAction,
|
|
29
|
+
state: TraversalState,
|
|
30
|
+
graph: BrainGraph,
|
|
31
|
+
params: PolicyParams = DEFAULT_POLICY_PARAMS,
|
|
32
|
+
): number {
|
|
33
|
+
if (action.type === "stop") {
|
|
34
|
+
const totalBudget = state.budgetRemaining + state.fired.reduce((sum, id) => {
|
|
35
|
+
const node = graph.getNode(id);
|
|
36
|
+
return sum + (node?.tokenCount ?? 0);
|
|
37
|
+
}, 0);
|
|
38
|
+
const budgetUsedFraction = totalBudget > 0 ? 1 - state.budgetRemaining / totalBudget : 0;
|
|
39
|
+
const hopFraction = state.maxHops > 0 ? state.hopCount / state.maxHops : 0;
|
|
40
|
+
return params.stopBias
|
|
41
|
+
+ params.budgetPressure * budgetUsedFraction
|
|
42
|
+
+ params.hopPressure * hopFraction;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Traverse action
|
|
46
|
+
const targetNode = graph.getNode(action.targetNodeId);
|
|
47
|
+
if (!targetNode) return -Infinity;
|
|
48
|
+
|
|
49
|
+
if (state.currentNodeId === null) {
|
|
50
|
+
const seedPrior = action.seedScore ?? 0;
|
|
51
|
+
const learnedSeedWeight = graph.getSeedWeight(action.targetNodeId);
|
|
52
|
+
return seedPrior + learnedSeedWeight;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Find edge from current position to target
|
|
56
|
+
const edge = graph.getEdge(state.currentNodeId, action.targetNodeId);
|
|
57
|
+
|
|
58
|
+
// Base score from edge weight and prior
|
|
59
|
+
const edgeScore = edge ? edge.weight * edge.prior : 0;
|
|
60
|
+
|
|
61
|
+
// Query relevance via embedding cosine similarity
|
|
62
|
+
let relevance = 0;
|
|
63
|
+
if (targetNode.embedding && state.queryEmbedding.length > 0) {
|
|
64
|
+
relevance = cosineSimilarity(state.queryEmbedding, targetNode.embedding);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Edge kind bias
|
|
68
|
+
const kindBias = edge ? (params.edgeKindBias[edge.kind] ?? 0) : 0;
|
|
69
|
+
|
|
70
|
+
return edgeScore + relevance + kindBias;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Compute softmax distribution over the full action set.
|
|
75
|
+
*
|
|
76
|
+
* Returns sorted candidates with their scores and probabilities.
|
|
77
|
+
* Numerically stable: subtract max score before exp.
|
|
78
|
+
*/
|
|
79
|
+
export function softmaxPolicy(
|
|
80
|
+
actions: TraversalAction[],
|
|
81
|
+
state: TraversalState,
|
|
82
|
+
graph: BrainGraph,
|
|
83
|
+
params: PolicyParams = DEFAULT_POLICY_PARAMS,
|
|
84
|
+
): Array<{ action: TraversalAction; score: number; probability: number }> {
|
|
85
|
+
if (actions.length === 0) return [];
|
|
86
|
+
|
|
87
|
+
const scored = actions.map((action) => ({
|
|
88
|
+
action,
|
|
89
|
+
score: scoreAction(action, state, graph, params),
|
|
90
|
+
}));
|
|
91
|
+
|
|
92
|
+
// Numerically stable softmax
|
|
93
|
+
const maxScore = Math.max(...scored.map((s) => s.score));
|
|
94
|
+
const tau = params.temperature;
|
|
95
|
+
|
|
96
|
+
const expScores = scored.map((s) => ({
|
|
97
|
+
...s,
|
|
98
|
+
expScore: Math.exp((s.score - maxScore) / tau),
|
|
99
|
+
}));
|
|
100
|
+
|
|
101
|
+
const sumExp = expScores.reduce((sum, s) => sum + s.expScore, 0);
|
|
102
|
+
|
|
103
|
+
return expScores.map((s) => ({
|
|
104
|
+
action: s.action,
|
|
105
|
+
score: s.score,
|
|
106
|
+
probability: sumExp > 0 ? s.expScore / sumExp : 1 / actions.length,
|
|
107
|
+
}));
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Sample an action from the softmax distribution.
|
|
112
|
+
*
|
|
113
|
+
* Stochastic — NEVER argmax. Even at low temperature, this samples
|
|
114
|
+
* from the distribution. This is required for the paper's REINFORCE
|
|
115
|
+
* update to have valid gradients.
|
|
116
|
+
*/
|
|
117
|
+
export function sampleAction(
|
|
118
|
+
distribution: Array<{ action: TraversalAction; probability: number }>,
|
|
119
|
+
): { action: TraversalAction; probability: number; index: number } {
|
|
120
|
+
if (distribution.length === 0) {
|
|
121
|
+
return { action: { type: "stop" }, probability: 1.0, index: 0 };
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const r = Math.random();
|
|
125
|
+
let cumulative = 0;
|
|
126
|
+
|
|
127
|
+
for (let i = 0; i < distribution.length; i++) {
|
|
128
|
+
cumulative += distribution[i].probability;
|
|
129
|
+
if (r <= cumulative) {
|
|
130
|
+
return {
|
|
131
|
+
action: distribution[i].action,
|
|
132
|
+
probability: distribution[i].probability,
|
|
133
|
+
index: i,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Fallback: numerical precision edge case
|
|
139
|
+
const last = distribution.length - 1;
|
|
140
|
+
return {
|
|
141
|
+
action: distribution[last].action,
|
|
142
|
+
probability: distribution[last].probability,
|
|
143
|
+
index: last,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Compute log probability of a chosen action.
|
|
149
|
+
* Used in REINFORCE gradient: ∂logP_ρ(a|s)/∂ρ
|
|
150
|
+
*/
|
|
151
|
+
export function logProbability(probability: number): number {
|
|
152
|
+
return Math.log(Math.max(probability, 1e-10));
|
|
153
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { replayEpisode } from "./episode.js";
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Off-path async teacher for episode evaluation.
|
|
3
|
+
*
|
|
4
|
+
* CRITICAL RULE: Teacher sees ONLY what the router saw.
|
|
5
|
+
* It evaluates the routing decision, not the overall task outcome.
|
|
6
|
+
* No cheating with extra context.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { Episode } from "./types.js";
|
|
10
|
+
import type { BrainGraph } from "./graph.js";
|
|
11
|
+
|
|
12
|
+
export type BrainTeacherCompletion = (params: {
|
|
13
|
+
provider?: string;
|
|
14
|
+
model: string;
|
|
15
|
+
apiKey?: string;
|
|
16
|
+
messages: Array<{ role: string; content: unknown }>;
|
|
17
|
+
system?: string;
|
|
18
|
+
maxTokens: number;
|
|
19
|
+
temperature?: number;
|
|
20
|
+
}) => Promise<{ content?: Array<{ text?: string }> }>;
|
|
21
|
+
|
|
22
|
+
export type BrainTeacherResolveModel = () => { provider: string; model: string };
|
|
23
|
+
export type BrainTeacherGetApiKey = (provider: string, model: string) => Promise<string | undefined>;
|
|
24
|
+
|
|
25
|
+
const TEACHER_SYSTEM_PROMPT =
|
|
26
|
+
"You are evaluating a context routing decision. Score the quality of the selected context for the given query. Return ONLY a JSON object: {\"score\": <number from -1.0 to 1.0>, \"reason\": \"<brief explanation>\"}";
|
|
27
|
+
|
|
28
|
+
export class BrainTeacher {
|
|
29
|
+
constructor(
|
|
30
|
+
private complete: BrainTeacherCompletion,
|
|
31
|
+
private resolveModel: BrainTeacherResolveModel,
|
|
32
|
+
private getApiKey: BrainTeacherGetApiKey,
|
|
33
|
+
private graph: BrainGraph,
|
|
34
|
+
private log: { info: (msg: string) => void; error: (msg: string) => void },
|
|
35
|
+
) {}
|
|
36
|
+
|
|
37
|
+
async evaluate(episode: Episode): Promise<{ score: number; reason: string }> {
|
|
38
|
+
// Build prompt showing only what the router saw
|
|
39
|
+
const candidateDescriptions: string[] = [];
|
|
40
|
+
for (const step of episode.trajectory) {
|
|
41
|
+
for (const candidate of step.candidates) {
|
|
42
|
+
if (candidate.action.type === "traverse") {
|
|
43
|
+
const node = this.graph.getNode(candidate.action.targetNodeId);
|
|
44
|
+
if (node) {
|
|
45
|
+
candidateDescriptions.push(
|
|
46
|
+
`- [${node.kind}] ${node.content.slice(0, 200)}${node.content.length > 200 ? "..." : ""} (prob: ${candidate.probability.toFixed(3)})`,
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const firedDescriptions = episode.firedNodes.map((id) => {
|
|
54
|
+
const node = this.graph.getNode(id);
|
|
55
|
+
return node
|
|
56
|
+
? `- [${node.kind}] ${node.content.slice(0, 200)}${node.content.length > 200 ? "..." : ""}`
|
|
57
|
+
: `- ${id} (not found)`;
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
const prompt = `Query: "${episode.queryText}"
|
|
61
|
+
|
|
62
|
+
Candidate nodes the router could have chosen:
|
|
63
|
+
${candidateDescriptions.join("\n") || "(none)"}
|
|
64
|
+
|
|
65
|
+
Nodes actually selected (fired):
|
|
66
|
+
${firedDescriptions.join("\n") || "(none)"}
|
|
67
|
+
|
|
68
|
+
Was this the right context for the query? Consider relevance, completeness, and conciseness.`;
|
|
69
|
+
|
|
70
|
+
try {
|
|
71
|
+
const { provider, model } = this.resolveModel();
|
|
72
|
+
const apiKey = await this.getApiKey(provider, model);
|
|
73
|
+
|
|
74
|
+
const result = await this.complete({
|
|
75
|
+
provider,
|
|
76
|
+
model,
|
|
77
|
+
apiKey,
|
|
78
|
+
system: TEACHER_SYSTEM_PROMPT,
|
|
79
|
+
messages: [{ role: "user", content: prompt }],
|
|
80
|
+
maxTokens: 200,
|
|
81
|
+
temperature: 0.1,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
const text = result.content
|
|
85
|
+
?.map((b: { text?: string }) => b.text ?? "")
|
|
86
|
+
.join("") ?? "";
|
|
87
|
+
|
|
88
|
+
const jsonMatch = text.match(/\{[\s\S]*"score"[\s\S]*\}/);
|
|
89
|
+
if (!jsonMatch) {
|
|
90
|
+
this.log.error(`[brain] Teacher returned non-JSON: ${text.slice(0, 100)}`);
|
|
91
|
+
return { score: 0, reason: "failed to parse teacher response" };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
95
|
+
const score = Math.max(-1, Math.min(1, Number(parsed.score) || 0));
|
|
96
|
+
const reason = String(parsed.reason || "teacher evaluation");
|
|
97
|
+
|
|
98
|
+
this.log.info(`[brain] Teacher scored episode ${episode.id}: ${score.toFixed(2)} (${reason})`);
|
|
99
|
+
return { score, reason };
|
|
100
|
+
} catch (err) {
|
|
101
|
+
this.log.error(`[brain] Teacher evaluation failed: ${(err as Error).message}`);
|
|
102
|
+
return { score: 0, reason: "teacher evaluation failed" };
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Decision trace recording and footer generation.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { randomUUID } from "node:crypto";
|
|
6
|
+
import type { DecisionTrace } from "./types.js";
|
|
7
|
+
import type { TraverseResult } from "./traverse.js";
|
|
8
|
+
|
|
9
|
+
export function recordTrace(params: {
|
|
10
|
+
traversalResult: TraverseResult;
|
|
11
|
+
queryText: string;
|
|
12
|
+
episodeId: string | null;
|
|
13
|
+
packVersion: number | null;
|
|
14
|
+
}): DecisionTrace {
|
|
15
|
+
return {
|
|
16
|
+
id: `bt_${randomUUID().slice(0, 8)}`,
|
|
17
|
+
episodeId: params.episodeId,
|
|
18
|
+
packVersion: params.packVersion,
|
|
19
|
+
queryText: params.queryText,
|
|
20
|
+
seedScores: params.traversalResult.seedScores,
|
|
21
|
+
trajectory: params.traversalResult.trajectory,
|
|
22
|
+
firedNodes: params.traversalResult.firedNodes.map((n) => n.nodeId),
|
|
23
|
+
vetoedNodes: params.traversalResult.vetoedNodes.map((v) => v.nodeId),
|
|
24
|
+
contextChars: params.traversalResult.contextChars,
|
|
25
|
+
footer: params.traversalResult.footer,
|
|
26
|
+
createdAt: Date.now(),
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function generateFooter(params: {
|
|
31
|
+
packVersion: number;
|
|
32
|
+
seedCount: number;
|
|
33
|
+
hopCount: number;
|
|
34
|
+
firedCount: number;
|
|
35
|
+
vetoCount: number;
|
|
36
|
+
contextChars: number;
|
|
37
|
+
traceId: string;
|
|
38
|
+
}): string {
|
|
39
|
+
return `Brain v${params.packVersion} · ${params.seedCount} seeds · ${params.hopCount} hops · ${params.firedCount} fired · ${params.vetoCount} veto · ${params.contextChars} chars · trace ${params.traceId}`;
|
|
40
|
+
}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Full traversal loop implementing the paper's finite-time game.
|
|
3
|
+
*
|
|
4
|
+
* Algorithm:
|
|
5
|
+
* 1. Seed phase: select start nodes by embedding similarity
|
|
6
|
+
* 2. Loop: expand candidates → compute softmax policy → sample → fire/veto
|
|
7
|
+
* 3. Terminal: STOP chosen, budget exhausted, max hops, or dead end
|
|
8
|
+
*
|
|
9
|
+
* Paper assumptions honored:
|
|
10
|
+
* - Assumption 1: game ends in finite time (maxHops bound)
|
|
11
|
+
* - Assumption 2: reward only at terminal state (not intermediate)
|
|
12
|
+
* - Stochastic policy P_ρ(a|s) via softmax sampling
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type {
|
|
16
|
+
TraversalState,
|
|
17
|
+
TraversalAction,
|
|
18
|
+
TrajectoryStep,
|
|
19
|
+
PolicyParams,
|
|
20
|
+
NodeKind,
|
|
21
|
+
SeedScore,
|
|
22
|
+
} from "./types.js";
|
|
23
|
+
import { DEFAULT_POLICY_PARAMS } from "./types.js";
|
|
24
|
+
import type { BrainGraph } from "./graph.js";
|
|
25
|
+
import { softmaxPolicy, sampleAction } from "./policy.js";
|
|
26
|
+
|
|
27
|
+
export interface TraverseOptions {
|
|
28
|
+
graph: BrainGraph;
|
|
29
|
+
queryEmbedding: Float32Array;
|
|
30
|
+
queryText: string;
|
|
31
|
+
maxHops: number;
|
|
32
|
+
budgetChars: number;
|
|
33
|
+
temperature: number;
|
|
34
|
+
maxSeeds: number;
|
|
35
|
+
semanticThreshold: number;
|
|
36
|
+
policyParams?: Partial<PolicyParams>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface TraverseResult {
|
|
40
|
+
firedNodes: Array<{ nodeId: string; kind: NodeKind; content: string; tokenCount: number }>;
|
|
41
|
+
vetoedNodes: Array<{ nodeId: string; reason: string }>;
|
|
42
|
+
trajectory: TrajectoryStep[];
|
|
43
|
+
seedScores: SeedScore[];
|
|
44
|
+
contextChars: number;
|
|
45
|
+
footer: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Execute a full graph traversal.
|
|
50
|
+
*
|
|
51
|
+
* Returns the fired nodes, vetoed nodes, full trajectory (for REINFORCE),
|
|
52
|
+
* seed scores, and a human-readable footer.
|
|
53
|
+
*/
|
|
54
|
+
export function traverse(options: TraverseOptions): TraverseResult {
|
|
55
|
+
const {
|
|
56
|
+
graph,
|
|
57
|
+
queryEmbedding,
|
|
58
|
+
maxHops,
|
|
59
|
+
budgetChars,
|
|
60
|
+
temperature,
|
|
61
|
+
maxSeeds,
|
|
62
|
+
semanticThreshold,
|
|
63
|
+
} = options;
|
|
64
|
+
|
|
65
|
+
const params: PolicyParams = {
|
|
66
|
+
...DEFAULT_POLICY_PARAMS,
|
|
67
|
+
temperature,
|
|
68
|
+
...options.policyParams,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Step 1: Seed selection
|
|
72
|
+
const seedCandidates = graph.seedByEmbedding(queryEmbedding, maxSeeds, semanticThreshold);
|
|
73
|
+
|
|
74
|
+
if (seedCandidates.length === 0) {
|
|
75
|
+
return {
|
|
76
|
+
firedNodes: [],
|
|
77
|
+
vetoedNodes: [],
|
|
78
|
+
trajectory: [],
|
|
79
|
+
seedScores: [],
|
|
80
|
+
contextChars: 0,
|
|
81
|
+
footer: "Brain · 0 seeds · no traversal",
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Initialize traversal state
|
|
86
|
+
const state: TraversalState = {
|
|
87
|
+
currentNodeId: null,
|
|
88
|
+
queryEmbedding,
|
|
89
|
+
visited: new Set(),
|
|
90
|
+
fired: [],
|
|
91
|
+
budgetRemaining: budgetChars,
|
|
92
|
+
hopCount: 0,
|
|
93
|
+
maxHops,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
const trajectory: TrajectoryStep[] = [];
|
|
97
|
+
const firedNodes: Array<{ nodeId: string; kind: NodeKind; content: string; tokenCount: number }> = [];
|
|
98
|
+
const vetoedNodes: Array<{ nodeId: string; reason: string }> = [];
|
|
99
|
+
let seedScores: SeedScore[] = [];
|
|
100
|
+
|
|
101
|
+
// Step 2: Traversal loop
|
|
102
|
+
for (let hop = 0; hop < maxHops; hop++) {
|
|
103
|
+
// Compute action set
|
|
104
|
+
const actions = graph.getActionSet(
|
|
105
|
+
state.currentNodeId,
|
|
106
|
+
state.visited,
|
|
107
|
+
state.currentNodeId === null ? seedCandidates : undefined,
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
if (actions.length === 0) break; // Dead end
|
|
111
|
+
if (actions.length === 1 && actions[0].type === "stop") {
|
|
112
|
+
// Only STOP available — record step and terminate
|
|
113
|
+
const step: TrajectoryStep = {
|
|
114
|
+
stateSnapshot: {
|
|
115
|
+
currentNodeId: state.currentNodeId,
|
|
116
|
+
hopCount: state.hopCount,
|
|
117
|
+
budgetRemaining: state.budgetRemaining,
|
|
118
|
+
visitedCount: state.visited.size,
|
|
119
|
+
firedCount: state.fired.length,
|
|
120
|
+
},
|
|
121
|
+
candidates: [{ action: { type: "stop" }, score: 0, probability: 1.0 }],
|
|
122
|
+
chosenAction: { type: "stop" },
|
|
123
|
+
chosenActionProbability: 1.0,
|
|
124
|
+
stopProbability: 1.0,
|
|
125
|
+
};
|
|
126
|
+
trajectory.push(step);
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Compute softmax distribution
|
|
131
|
+
const distribution = softmaxPolicy(actions, state, graph, params);
|
|
132
|
+
|
|
133
|
+
// Sample action (stochastic)
|
|
134
|
+
const sampled = sampleAction(distribution);
|
|
135
|
+
|
|
136
|
+
if (state.currentNodeId === null) {
|
|
137
|
+
seedScores = seedCandidates.map((seed) => {
|
|
138
|
+
const traverseEntry = distribution.find(
|
|
139
|
+
(entry) => entry.action.type === "traverse" && entry.action.targetNodeId === seed.nodeId,
|
|
140
|
+
);
|
|
141
|
+
return {
|
|
142
|
+
nodeId: seed.nodeId,
|
|
143
|
+
priorScore: seed.score,
|
|
144
|
+
learnedSeedWeight: graph.getSeedWeight(seed.nodeId),
|
|
145
|
+
policyScore: traverseEntry?.score ?? seed.score,
|
|
146
|
+
probability: traverseEntry?.probability ?? 0,
|
|
147
|
+
chosen: sampled.action.type === "traverse" && sampled.action.targetNodeId === seed.nodeId,
|
|
148
|
+
};
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Find STOP probability for trace
|
|
153
|
+
const stopEntry = distribution.find((d) => d.action.type === "stop");
|
|
154
|
+
const stopProb = stopEntry?.probability ?? 0;
|
|
155
|
+
|
|
156
|
+
// Record trajectory step
|
|
157
|
+
const step: TrajectoryStep = {
|
|
158
|
+
stateSnapshot: {
|
|
159
|
+
currentNodeId: state.currentNodeId,
|
|
160
|
+
hopCount: state.hopCount,
|
|
161
|
+
budgetRemaining: state.budgetRemaining,
|
|
162
|
+
visitedCount: state.visited.size,
|
|
163
|
+
firedCount: state.fired.length,
|
|
164
|
+
},
|
|
165
|
+
candidates: distribution.map((d) => ({
|
|
166
|
+
action: d.action,
|
|
167
|
+
score: d.score,
|
|
168
|
+
probability: d.probability,
|
|
169
|
+
priorScore: d.action.type === "traverse" && state.currentNodeId === null ? d.action.seedScore : undefined,
|
|
170
|
+
learnedSeedWeight: d.action.type === "traverse" && state.currentNodeId === null
|
|
171
|
+
? graph.getSeedWeight(d.action.targetNodeId)
|
|
172
|
+
: undefined,
|
|
173
|
+
})),
|
|
174
|
+
chosenAction: sampled.action,
|
|
175
|
+
chosenActionProbability: sampled.probability,
|
|
176
|
+
stopProbability: stopProb,
|
|
177
|
+
};
|
|
178
|
+
trajectory.push(step);
|
|
179
|
+
|
|
180
|
+
// Execute action
|
|
181
|
+
if (sampled.action.type === "stop") {
|
|
182
|
+
break; // Terminal: STOP chosen
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const targetNodeId = sampled.action.targetNodeId;
|
|
186
|
+
state.visited.add(targetNodeId);
|
|
187
|
+
state.hopCount++;
|
|
188
|
+
|
|
189
|
+
// Inhibitory veto check
|
|
190
|
+
if (state.currentNodeId && graph.isVetoed(state.currentNodeId, targetNodeId)) {
|
|
191
|
+
const reason = graph.getVetoReason(state.currentNodeId, targetNodeId) ?? "inhibitory";
|
|
192
|
+
vetoedNodes.push({ nodeId: targetNodeId, reason });
|
|
193
|
+
state.currentNodeId = targetNodeId; // Still move to the node, but don't fire it
|
|
194
|
+
continue;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Fire node: add to context
|
|
198
|
+
const targetNode = graph.getNode(targetNodeId);
|
|
199
|
+
if (targetNode) {
|
|
200
|
+
state.fired.push(targetNodeId);
|
|
201
|
+
state.budgetRemaining -= targetNode.tokenCount;
|
|
202
|
+
|
|
203
|
+
firedNodes.push({
|
|
204
|
+
nodeId: targetNode.id,
|
|
205
|
+
kind: targetNode.kind,
|
|
206
|
+
content: targetNode.content,
|
|
207
|
+
tokenCount: targetNode.tokenCount,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
state.currentNodeId = targetNodeId;
|
|
212
|
+
|
|
213
|
+
// Terminal: budget exhausted
|
|
214
|
+
if (state.budgetRemaining <= 0) break;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const contextChars = firedNodes.reduce((sum, n) => sum + n.content.length, 0);
|
|
218
|
+
|
|
219
|
+
const chosenSeed = seedScores.find((seed) => seed.chosen)?.nodeId ?? "none";
|
|
220
|
+
const footer = `Brain · ${seedScores.length} seeds · start ${chosenSeed} · ${state.hopCount} hops · ${firedNodes.length} fired · ${vetoedNodes.length} veto · ${contextChars} chars`;
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
firedNodes,
|
|
224
|
+
vetoedNodes,
|
|
225
|
+
trajectory,
|
|
226
|
+
seedScores,
|
|
227
|
+
contextChars,
|
|
228
|
+
footer,
|
|
229
|
+
};
|
|
230
|
+
}
|