synergyspec-selfevolving 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -19
- package/dist/commands/learn.d.ts +12 -1
- package/dist/commands/learn.js +373 -31
- package/dist/commands/self-evolution-episode.d.ts +177 -0
- package/dist/commands/self-evolution-episode.js +423 -0
- package/dist/commands/self-evolution.d.ts +12 -190
- package/dist/commands/self-evolution.js +179 -786
- package/dist/commands/workflow/status.js +3 -1
- package/dist/core/archive.d.ts +0 -1
- package/dist/core/archive.js +0 -58
- package/dist/core/artifact-graph/instruction-loader.d.ts +2 -4
- package/dist/core/artifact-graph/instruction-loader.js +3 -31
- package/dist/core/config-prompts.js +4 -0
- package/dist/core/fitness/health/health-metrics.d.ts +26 -56
- package/dist/core/fitness/health/health-metrics.js +19 -58
- package/dist/core/fitness/health/index.d.ts +15 -2
- package/dist/core/fitness/health/index.js +25 -1
- package/dist/core/fitness/health/local-source.d.ts +43 -4
- package/dist/core/fitness/health/local-source.js +181 -25
- package/dist/core/fitness/health/metric-source.d.ts +48 -19
- package/dist/core/fitness/health/metric-source.js +8 -18
- package/dist/core/fitness/health/resolve-source.js +4 -1
- package/dist/core/fitness/loss.d.ts +7 -7
- package/dist/core/fitness/loss.js +6 -6
- package/dist/core/fitness/sample.d.ts +10 -0
- package/dist/core/fitness/test-failures.d.ts +30 -0
- package/dist/core/fitness/test-failures.js +123 -0
- package/dist/core/learn/credit-path.d.ts +36 -0
- package/dist/core/learn/credit-path.js +198 -0
- package/dist/core/learn/trajectory-discovery.d.ts +39 -0
- package/dist/core/learn/trajectory-discovery.js +140 -0
- package/dist/core/learn.d.ts +39 -5
- package/dist/core/learn.js +131 -14
- package/dist/core/project-config.d.ts +4 -0
- package/dist/core/project-config.js +52 -1
- package/dist/core/self-evolution/candidate-fitness.d.ts +23 -1
- package/dist/core/self-evolution/candidate-fitness.js +31 -5
- package/dist/core/self-evolution/candidates.d.ts +0 -9
- package/dist/core/self-evolution/canonical-targets.d.ts +8 -4
- package/dist/core/self-evolution/canonical-targets.js +8 -4
- package/dist/core/self-evolution/critic-agent.d.ts +150 -0
- package/dist/core/self-evolution/critic-agent.js +487 -0
- package/dist/core/self-evolution/edits-contract.d.ts +53 -0
- package/dist/core/self-evolution/edits-contract.js +89 -0
- package/dist/core/self-evolution/episode-orchestrator.d.ts +197 -0
- package/dist/core/self-evolution/episode-orchestrator.js +534 -0
- package/dist/core/self-evolution/episode-store.d.ts +266 -0
- package/dist/core/self-evolution/episode-store.js +573 -0
- package/dist/core/self-evolution/evolution-switches.d.ts +1 -1
- package/dist/core/self-evolution/evolution-switches.js +5 -10
- package/dist/core/self-evolution/evolving-agent.d.ts +162 -0
- package/dist/core/self-evolution/evolving-agent.js +449 -0
- package/dist/core/self-evolution/health-baseline.d.ts +25 -6
- package/dist/core/self-evolution/health-baseline.js +30 -6
- package/dist/core/self-evolution/host-harness.d.ts +1 -2
- package/dist/core/self-evolution/host-harness.js +1 -2
- package/dist/core/self-evolution/index.d.ts +10 -6
- package/dist/core/self-evolution/index.js +19 -6
- package/dist/core/self-evolution/learn-hints.d.ts +31 -0
- package/dist/core/self-evolution/learn-hints.js +16 -0
- package/dist/core/self-evolution/learn-observation-adapter.d.ts +35 -0
- package/dist/core/self-evolution/learn-observation-adapter.js +285 -10
- package/dist/core/self-evolution/line-diff.d.ts +60 -0
- package/dist/core/self-evolution/line-diff.js +130 -0
- package/dist/core/self-evolution/policy/fs-safe.d.ts +19 -0
- package/dist/core/self-evolution/policy/fs-safe.js +89 -0
- package/dist/core/self-evolution/policy/index.d.ts +13 -0
- package/dist/core/self-evolution/policy/index.js +13 -0
- package/dist/core/self-evolution/policy/policy-store.d.ts +217 -0
- package/dist/core/self-evolution/policy/policy-store.js +774 -0
- package/dist/core/self-evolution/policy/reject-buffer.d.ts +48 -0
- package/dist/core/self-evolution/policy/reject-buffer.js +168 -0
- package/dist/core/self-evolution/promote.d.ts +1 -1
- package/dist/core/self-evolution/promote.js +6 -33
- package/dist/core/self-evolution/promotion.js +1 -2
- package/dist/core/self-evolution/proposer-agent.d.ts +41 -0
- package/dist/core/self-evolution/proposer-agent.js +94 -13
- package/dist/core/self-evolution/proposer-slice.d.ts +26 -0
- package/dist/core/self-evolution/proposer-slice.js +54 -0
- package/dist/core/self-evolution/reward-agent.d.ts +234 -0
- package/dist/core/self-evolution/reward-agent.js +564 -0
- package/dist/core/self-evolution/scope-gate.d.ts +66 -0
- package/dist/core/self-evolution/scope-gate.js +107 -0
- package/dist/core/self-evolution/success-channel.d.ts +79 -0
- package/dist/core/self-evolution/success-channel.js +361 -0
- package/dist/core/self-evolution/target-evolution.d.ts +11 -0
- package/dist/core/self-evolution/target-evolution.js +2 -0
- package/dist/core/self-evolution/tool-evolution.js +2 -13
- package/dist/core/self-evolution/verdict.d.ts +8 -5
- package/dist/core/self-evolution/verdict.js +4 -7
- package/dist/core/templates/skill-templates.d.ts +1 -0
- package/dist/core/templates/skill-templates.js +1 -0
- package/dist/core/templates/workflow-manifest.js +2 -0
- package/dist/core/templates/workflows/learn.d.ts +4 -2
- package/dist/core/templates/workflows/learn.js +25 -166
- package/dist/core/templates/workflows/self-evolving.d.ts +13 -0
- package/dist/core/templates/workflows/self-evolving.js +127 -0
- package/dist/core/trajectory/facts.d.ts +16 -0
- package/dist/core/trajectory/facts.js +12 -4
- package/dist/core/trajectory/skeleton.d.ts +43 -0
- package/dist/core/trajectory/skeleton.js +239 -0
- package/dist/dashboard/data.d.ts +25 -51
- package/dist/dashboard/data.js +68 -180
- package/dist/dashboard/react-client.js +458 -503
- package/dist/dashboard/react-styles.js +3 -3
- package/dist/dashboard/server.js +23 -17
- package/dist/ui/ascii-patterns.d.ts +7 -15
- package/dist/ui/ascii-patterns.js +123 -54
- package/dist/ui/welcome-screen.d.ts +0 -14
- package/dist/ui/welcome-screen.js +16 -35
- package/package.json +3 -1
- package/scripts/code-health.py +1066 -638
- package/scripts/slop_rules.yaml +2151 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Action-skeleton projection: a BOUNDED, ordered play-by-play of what the agent
|
|
3
|
+
* actually did in the observed run — `wrote design.md → ran tests: 3 failed →
|
|
4
|
+
* edited normalize.py (x4) → ran tests: 0 failed`.
|
|
5
|
+
*
|
|
6
|
+
* This is the trajectory the critic reads. It is deliberately a PROJECTION of
|
|
7
|
+
* the same window-scoped {@link NormalizedTrajectory} the observed-verified
|
|
8
|
+
* gate grades (one shared implementation over the normalized shape — never
|
|
9
|
+
* per-adapter, so the three harnesses cannot drift), and deliberately lossy:
|
|
10
|
+
* only file edits, test runs, and other shell commands survive; reads, prose,
|
|
11
|
+
* and reasoning are dropped. Ordering is the stitched turn order (main session
|
|
12
|
+
* first, then subagents) — the exact order `collectRunnerResults` grades in.
|
|
13
|
+
*
|
|
14
|
+
* Pure + no throw. `null` in → `null` out.
|
|
15
|
+
*/
|
|
16
|
+
import { parseTestMetrics } from '../fitness/test-metrics.js';
|
|
17
|
+
import { commandText, inputLooksLikeRunner, isExecTool } from './facts.js';
|
|
18
|
+
const MAX_SKELETON_EVENTS = 40;
|
|
19
|
+
const MAX_COMMAND_CHARS = 120;
|
|
20
|
+
/**
|
|
21
|
+
* Matches the NAME of a file-mutating tool across harnesses — Claude
|
|
22
|
+
* `Write`/`Edit`/`MultiEdit`/`NotebookEdit`; opencode `write`/`edit`/`patch`;
|
|
23
|
+
* Codex `apply_patch`; generic `str_replace`/`create_file` variants. Deny by
|
|
24
|
+
* default (same philosophy as facts.ts's EXEC_TOOL_RE): an unrecognized tool
|
|
25
|
+
* name degrades to "event skipped", never to a phantom edit. Word-boundaried
|
|
26
|
+
* on `._-`; `multiedit`/`notebookedit` are single lowercase tokens after
|
|
27
|
+
* `.toLowerCase()`, so they are listed explicitly.
|
|
28
|
+
*/
|
|
29
|
+
const EDIT_TOOL_RE = /(?:^|[._-])(?:write|edit|multiedit|notebookedit|patch|apply_patch|str_replace|replace|create|update)(?:[._-]|$)/i;
|
|
30
|
+
/** Input fields that carry the edited file path, in preference order. */
|
|
31
|
+
const FILE_FIELDS = ['file_path', 'filePath', 'path', 'notebook_path', 'notebookPath'];
|
|
32
|
+
/** `*** (Add|Update|Delete) File: <path>` lines inside an apply_patch payload. */
|
|
33
|
+
const PATCH_FILE_RE = /^\*{3}\s+(?:Add|Update|Delete)\s+File:\s+(.+)$/gm;
|
|
34
|
+
function toPosix(p) {
|
|
35
|
+
return p.replace(/\\/g, '/').trim();
|
|
36
|
+
}
|
|
37
|
+
function isEditTool(tool) {
|
|
38
|
+
return tool !== undefined && EDIT_TOOL_RE.test(tool);
|
|
39
|
+
}
|
|
40
|
+
/** `*** … File:` paths inside an apply_patch-style payload (markers only). */
|
|
41
|
+
function patchFilesFromPayload(input) {
|
|
42
|
+
if (!input)
|
|
43
|
+
return [];
|
|
44
|
+
const files = [];
|
|
45
|
+
for (const value of Object.values(input)) {
|
|
46
|
+
if (typeof value !== 'string')
|
|
47
|
+
continue;
|
|
48
|
+
for (const m of value.matchAll(PATCH_FILE_RE))
|
|
49
|
+
files.push(toPosix(m[1]));
|
|
50
|
+
}
|
|
51
|
+
return files;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Extract the edited file path(s) from a FILE-tool call's input: a path field
|
|
55
|
+
* first, else patch-body markers. Never used for exec tools — their inputs
|
|
56
|
+
* legitimately carry `path`-like fields (cwd) that are not edits.
|
|
57
|
+
*/
|
|
58
|
+
function editedFiles(input) {
|
|
59
|
+
if (!input)
|
|
60
|
+
return [];
|
|
61
|
+
for (const f of FILE_FIELDS) {
|
|
62
|
+
const v = input[f];
|
|
63
|
+
if (typeof v === 'string' && v.trim().length > 0)
|
|
64
|
+
return [toPosix(v)];
|
|
65
|
+
}
|
|
66
|
+
return patchFilesFromPayload(input);
|
|
67
|
+
}
|
|
68
|
+
function capCommand(command) {
|
|
69
|
+
if (!command)
|
|
70
|
+
return undefined;
|
|
71
|
+
const c = command.trim().replace(/\s+/g, ' ');
|
|
72
|
+
return c.length > MAX_COMMAND_CHARS ? `${c.slice(0, MAX_COMMAND_CHARS - 1)}…` : c;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Project the bounded action skeleton from a normalized trajectory. One walk,
|
|
76
|
+
* the same call→result pairing approach as facts.ts's collectRunnerResults
|
|
77
|
+
* (callId map with a positional fallback).
|
|
78
|
+
*/
|
|
79
|
+
export function toActionSkeleton(trajectory) {
|
|
80
|
+
if (!trajectory)
|
|
81
|
+
return null;
|
|
82
|
+
const pendingByCallId = new Map();
|
|
83
|
+
let lastPending = null;
|
|
84
|
+
let totalToolCalls = 0;
|
|
85
|
+
const events = [];
|
|
86
|
+
const append = (event) => {
|
|
87
|
+
// Per-file rollup: consecutive edits to the same file collapse.
|
|
88
|
+
const prev = events[events.length - 1];
|
|
89
|
+
if (event.kind === 'file-edit' &&
|
|
90
|
+
prev?.kind === 'file-edit' &&
|
|
91
|
+
prev.file === event.file &&
|
|
92
|
+
prev.sessionId === event.sessionId) {
|
|
93
|
+
prev.editCount = (prev.editCount ?? 1) + 1;
|
|
94
|
+
return prev;
|
|
95
|
+
}
|
|
96
|
+
events.push(event);
|
|
97
|
+
return event;
|
|
98
|
+
};
|
|
99
|
+
for (const turn of trajectory.turns) {
|
|
100
|
+
for (const part of turn.parts) {
|
|
101
|
+
if (part.kind === 'tool_call') {
|
|
102
|
+
totalToolCalls++;
|
|
103
|
+
const exec = isExecTool(part.tool);
|
|
104
|
+
if (exec) {
|
|
105
|
+
const command = capCommand(commandText(part.input));
|
|
106
|
+
// A shell-driven apply_patch (codex heredoc style) carries
|
|
107
|
+
// `*** Update File: <path>` lines in its payload — that's a file
|
|
108
|
+
// edit, not a command. Markers only: an exec input's `path`-like
|
|
109
|
+
// fields (cwd) must never read as edits.
|
|
110
|
+
const patchedFiles = patchFilesFromPayload(part.input);
|
|
111
|
+
if (patchedFiles.length > 0) {
|
|
112
|
+
for (const file of patchedFiles) {
|
|
113
|
+
append({
|
|
114
|
+
kind: 'file-edit',
|
|
115
|
+
ordinal: 0,
|
|
116
|
+
tool: part.tool,
|
|
117
|
+
file,
|
|
118
|
+
editCount: 1,
|
|
119
|
+
...(turn.sessionId ? { sessionId: turn.sessionId } : {}),
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
lastPending = null;
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
const event = append({
|
|
126
|
+
kind: inputLooksLikeRunner(part.input) ? 'test-run' : 'command',
|
|
127
|
+
ordinal: 0,
|
|
128
|
+
tool: part.tool,
|
|
129
|
+
...(command ? { command } : {}),
|
|
130
|
+
...(turn.sessionId ? { sessionId: turn.sessionId } : {}),
|
|
131
|
+
});
|
|
132
|
+
const pending = { event };
|
|
133
|
+
lastPending = pending;
|
|
134
|
+
if (part.callId)
|
|
135
|
+
pendingByCallId.set(part.callId, pending);
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
if (isEditTool(part.tool)) {
|
|
139
|
+
for (const file of editedFiles(part.input)) {
|
|
140
|
+
append({
|
|
141
|
+
kind: 'file-edit',
|
|
142
|
+
ordinal: 0,
|
|
143
|
+
tool: part.tool,
|
|
144
|
+
file,
|
|
145
|
+
editCount: 1,
|
|
146
|
+
...(turn.sessionId ? { sessionId: turn.sessionId } : {}),
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
lastPending = null;
|
|
151
|
+
}
|
|
152
|
+
else if (part.kind === 'tool_result') {
|
|
153
|
+
const pending = part.callId && pendingByCallId.has(part.callId)
|
|
154
|
+
? pendingByCallId.get(part.callId)
|
|
155
|
+
: !part.callId && lastPending
|
|
156
|
+
? lastPending
|
|
157
|
+
: undefined;
|
|
158
|
+
if (pending) {
|
|
159
|
+
const e = pending.event;
|
|
160
|
+
if (typeof part.exitCode === 'number')
|
|
161
|
+
e.exitCode = part.exitCode;
|
|
162
|
+
if (e.kind === 'test-run' && typeof part.output === 'string') {
|
|
163
|
+
const metrics = parseTestMetrics(part.output);
|
|
164
|
+
if (metrics) {
|
|
165
|
+
e.passRate = metrics.passRate;
|
|
166
|
+
e.failedCount = metrics.failed;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
lastPending = null;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
// Stamp ordinals on the full (rolled-up) sequence, then middle-out truncate.
|
|
175
|
+
events.forEach((e, i) => {
|
|
176
|
+
e.ordinal = i;
|
|
177
|
+
});
|
|
178
|
+
let bounded = events;
|
|
179
|
+
let truncated = false;
|
|
180
|
+
if (events.length > MAX_SKELETON_EVENTS) {
|
|
181
|
+
const head = Math.ceil(MAX_SKELETON_EVENTS / 2);
|
|
182
|
+
const tail = MAX_SKELETON_EVENTS - head;
|
|
183
|
+
bounded = [...events.slice(0, head), ...events.slice(events.length - tail)];
|
|
184
|
+
truncated = true;
|
|
185
|
+
}
|
|
186
|
+
return {
|
|
187
|
+
harness: trajectory.harness,
|
|
188
|
+
events: bounded,
|
|
189
|
+
totalToolCalls,
|
|
190
|
+
truncated,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
function basename(p) {
|
|
194
|
+
const parts = p.split('/');
|
|
195
|
+
return parts[parts.length - 1] || p;
|
|
196
|
+
}
|
|
197
|
+
function renderEvent(e) {
|
|
198
|
+
if (e.kind === 'file-edit') {
|
|
199
|
+
const verb = /(?:^|[._-])(?:write|create)(?:[._-]|$)/i.test(e.tool) ? 'wrote' : 'edited';
|
|
200
|
+
const times = (e.editCount ?? 1) > 1 ? ` (x${e.editCount})` : '';
|
|
201
|
+
return `${verb} ${basename(e.file ?? '?')}${times}`;
|
|
202
|
+
}
|
|
203
|
+
if (e.kind === 'test-run') {
|
|
204
|
+
if (typeof e.failedCount === 'number') {
|
|
205
|
+
return `ran tests: ${e.failedCount} failed`;
|
|
206
|
+
}
|
|
207
|
+
if (typeof e.exitCode === 'number') {
|
|
208
|
+
return `ran tests (exit ${e.exitCode})`;
|
|
209
|
+
}
|
|
210
|
+
return 'ran tests';
|
|
211
|
+
}
|
|
212
|
+
return e.command ? `ran \`${e.command}\`` : `ran ${e.tool}`;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Human-readable one-line(ish) play-by-play. Deterministic; events are dropped
|
|
216
|
+
* middle-out (with an elision marker) until the string fits `maxChars`.
|
|
217
|
+
*/
|
|
218
|
+
export function renderActionSkeleton(skeleton, maxChars = 1600) {
|
|
219
|
+
if (skeleton.events.length === 0)
|
|
220
|
+
return '(no file edits or shell commands observed)';
|
|
221
|
+
let omitted = skeleton.truncated ? 1 : 0; // marker for projection-time truncation
|
|
222
|
+
let items = skeleton.events.map(renderEvent);
|
|
223
|
+
const join = (parts, elided) => {
|
|
224
|
+
if (elided <= 0)
|
|
225
|
+
return parts.join(' → ');
|
|
226
|
+
const head = parts.slice(0, Math.ceil(parts.length / 2));
|
|
227
|
+
const tail = parts.slice(Math.ceil(parts.length / 2));
|
|
228
|
+
return `${head.join(' → ')} → … → ${tail.join(' → ')}`;
|
|
229
|
+
};
|
|
230
|
+
let text = join(items, omitted);
|
|
231
|
+
while (text.length > maxChars && items.length > 2) {
|
|
232
|
+
// Drop the middle event and re-join with an elision marker.
|
|
233
|
+
items = [...items.slice(0, Math.floor(items.length / 2)), ...items.slice(Math.floor(items.length / 2) + 1)];
|
|
234
|
+
omitted++;
|
|
235
|
+
text = join(items, omitted);
|
|
236
|
+
}
|
|
237
|
+
return text.length > maxChars ? `${text.slice(0, maxChars - 1)}…` : text;
|
|
238
|
+
}
|
|
239
|
+
//# sourceMappingURL=skeleton.js.map
|
package/dist/dashboard/data.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { type EpisodeRecord, type PolicyLedgerEntry, type RejectBufferEntry } from '../core/self-evolution/index.js';
|
|
1
2
|
export interface ProjectInfo {
|
|
2
3
|
name: string;
|
|
3
4
|
version: string;
|
|
@@ -32,39 +33,6 @@ export interface CliHistoryEvent {
|
|
|
32
33
|
durationMs?: number;
|
|
33
34
|
metadata?: Record<string, unknown>;
|
|
34
35
|
}
|
|
35
|
-
export type EvolveRunStatus = 'completed' | 'errored' | 'pending' | 'empty';
|
|
36
|
-
export interface EvolveRunSummary {
|
|
37
|
-
schemaVersion?: number;
|
|
38
|
-
runId: string;
|
|
39
|
-
benchmarkId?: string;
|
|
40
|
-
harnessVariant?: string;
|
|
41
|
-
startedAt?: string;
|
|
42
|
-
finishedAt?: string;
|
|
43
|
-
taskCount?: number;
|
|
44
|
-
verdictCounts?: Record<string, number>;
|
|
45
|
-
passRate?: number;
|
|
46
|
-
totalCostUsd?: number;
|
|
47
|
-
totalWallTimeMs?: number;
|
|
48
|
-
interrupted?: boolean;
|
|
49
|
-
isolationMode?: string;
|
|
50
|
-
budget?: Record<string, unknown>;
|
|
51
|
-
status?: EvolveRunStatus;
|
|
52
|
-
failureReasonSummary?: string;
|
|
53
|
-
}
|
|
54
|
-
export interface EvolveArchive {
|
|
55
|
-
schemaVersion?: number;
|
|
56
|
-
createdAt?: string;
|
|
57
|
-
entries: Array<{
|
|
58
|
-
id: string;
|
|
59
|
-
parentId: string | null;
|
|
60
|
-
generation: number;
|
|
61
|
-
createdAt?: string;
|
|
62
|
-
snapshotPath?: string;
|
|
63
|
-
runs?: unknown[];
|
|
64
|
-
childCount?: number;
|
|
65
|
-
}>;
|
|
66
|
-
generations?: unknown[];
|
|
67
|
-
}
|
|
68
36
|
export interface ProjectOverview {
|
|
69
37
|
project: ProjectInfo;
|
|
70
38
|
changes: {
|
|
@@ -73,15 +41,25 @@ export interface ProjectOverview {
|
|
|
73
41
|
inProgress: number;
|
|
74
42
|
};
|
|
75
43
|
evolve: {
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
44
|
+
episodes: number;
|
|
45
|
+
lastEpisodeAt: string | null;
|
|
46
|
+
lastStage: string | null;
|
|
47
|
+
headVersion: number | null;
|
|
79
48
|
};
|
|
80
49
|
cli: {
|
|
81
50
|
totalEvents: number;
|
|
82
51
|
recentFailures: number;
|
|
83
52
|
};
|
|
84
53
|
}
|
|
54
|
+
export interface PolicyLineage {
|
|
55
|
+
targetId: string;
|
|
56
|
+
headVersion: number | null;
|
|
57
|
+
entries: PolicyLedgerEntry[];
|
|
58
|
+
evolveCount: number;
|
|
59
|
+
rollbackCount: number;
|
|
60
|
+
refusedCount: number;
|
|
61
|
+
lastAt: string | null;
|
|
62
|
+
}
|
|
85
63
|
export interface AgentInterfacePlan {
|
|
86
64
|
schemaVersion: 1;
|
|
87
65
|
generatedAt: string;
|
|
@@ -193,21 +171,17 @@ export declare function readProjectInfo(root: string): Promise<ProjectInfo>;
|
|
|
193
171
|
export declare function readChange(root: string, id: string): Promise<ChangeSummary | null>;
|
|
194
172
|
export declare function listChanges(root: string): Promise<ChangeSummary[]>;
|
|
195
173
|
export declare function readCliHistory(root: string, limit?: number): Promise<CliHistoryEvent[]>;
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
}
|
|
208
|
-
export declare function listEvolveRuns(root: string): Promise<EvolveRunSummary[]>;
|
|
209
|
-
export declare function readEvolveRun(root: string, runId: string): Promise<EvolveRunDetail | null>;
|
|
210
|
-
export declare function readEvolveArchive(root: string): Promise<EvolveArchive | null>;
|
|
174
|
+
/**
|
|
175
|
+
* Read the loop-v2 self-evolution surface: per-episode two-arm forward records,
|
|
176
|
+
* the policy version ledger grouped into per-target lineages, and the
|
|
177
|
+
* reject-buffer of rolled-back episodes. Each reader is independently guarded so
|
|
178
|
+
* a single missing/unreadable store yields an empty slice rather than throwing.
|
|
179
|
+
*/
|
|
180
|
+
export declare function readSelfEvolution(root: string): Promise<{
|
|
181
|
+
episodes: EpisodeRecord[];
|
|
182
|
+
policyLineages: PolicyLineage[];
|
|
183
|
+
rejectBuffer: RejectBufferEntry[];
|
|
184
|
+
}>;
|
|
211
185
|
export declare function readAgentInterfacePlan(root: string): Promise<AgentInterfacePlan>;
|
|
212
186
|
export declare function readOverview(root: string): Promise<ProjectOverview>;
|
|
213
187
|
/**
|
package/dist/dashboard/data.js
CHANGED
|
@@ -2,6 +2,7 @@ import { promises as fs } from 'fs';
|
|
|
2
2
|
import { join, resolve, dirname } from 'path';
|
|
3
3
|
import { readAllJsonLines } from './tail.js';
|
|
4
4
|
import { readAgentCognitiveEvents, summarizeAgentCognitiveTrace, } from '../history/cognitive.js';
|
|
5
|
+
import { listEpisodes, readPolicyLedgerAll, readRejectBufferAll, } from '../core/self-evolution/index.js';
|
|
5
6
|
async function tryReadJson(path) {
|
|
6
7
|
try {
|
|
7
8
|
const raw = await fs.readFile(path, 'utf8');
|
|
@@ -137,156 +138,42 @@ export async function readCliHistory(root, limit = 200) {
|
|
|
137
138
|
const events = await readAllJsonLines(path);
|
|
138
139
|
return events.slice(-limit).reverse();
|
|
139
140
|
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
if (stat.size > 0)
|
|
174
|
-
hasWrapperStderr = true;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
}
|
|
178
|
-
catch {
|
|
179
|
-
return 'empty';
|
|
180
|
-
}
|
|
181
|
-
if (hasWrapperStderr)
|
|
182
|
-
return 'errored';
|
|
183
|
-
if (hasAnyFiles)
|
|
184
|
-
return 'pending';
|
|
185
|
-
return 'empty';
|
|
186
|
-
}
|
|
187
|
-
function summarizeFailureReason(reason) {
|
|
188
|
-
if (!reason)
|
|
189
|
-
return undefined;
|
|
190
|
-
const lines = reason
|
|
191
|
-
.split(/\r?\n/)
|
|
192
|
-
.map((line) => line.trim())
|
|
193
|
-
.filter(Boolean);
|
|
194
|
-
if (!lines.length)
|
|
195
|
-
return undefined;
|
|
196
|
-
const mismatchCount = lines.filter((line) => /sha256 mismatch/i.test(line)).length;
|
|
197
|
-
if (mismatchCount > 1)
|
|
198
|
-
return `${mismatchCount} genome file hash mismatches`;
|
|
199
|
-
return lines[0].replace(/\s+/g, ' ').slice(0, 180);
|
|
200
|
-
}
|
|
201
|
-
async function readRunFailureReasonSummary(runDir) {
|
|
202
|
-
const tasksDir = join(runDir, 'tasks');
|
|
203
|
-
let taskNames = [];
|
|
204
|
-
try {
|
|
205
|
-
const items = await fs.readdir(tasksDir, { withFileTypes: true });
|
|
206
|
-
taskNames = items.filter((d) => d.isDirectory()).map((d) => d.name);
|
|
207
|
-
}
|
|
208
|
-
catch {
|
|
209
|
-
return undefined;
|
|
210
|
-
}
|
|
211
|
-
for (const taskName of taskNames) {
|
|
212
|
-
const result = await tryReadJson(join(tasksDir, taskName, 'result.json'));
|
|
213
|
-
const summary = summarizeFailureReason(result?.reason);
|
|
214
|
-
if (summary)
|
|
215
|
-
return summary;
|
|
216
|
-
}
|
|
217
|
-
return undefined;
|
|
218
|
-
}
|
|
219
|
-
export async function listEvolveRuns(root) {
|
|
220
|
-
const dir = join(root, 'evolve', 'runs');
|
|
221
|
-
let entries = [];
|
|
222
|
-
try {
|
|
223
|
-
const items = await fs.readdir(dir, { withFileTypes: true });
|
|
224
|
-
entries = items.filter((d) => d.isDirectory()).map((d) => d.name);
|
|
225
|
-
}
|
|
226
|
-
catch {
|
|
227
|
-
return [];
|
|
228
|
-
}
|
|
229
|
-
const summaries = await Promise.all(entries.map(async (id) => {
|
|
230
|
-
const runDir = join(dir, id);
|
|
231
|
-
const summary = await tryReadJson(join(runDir, 'summary.json'));
|
|
232
|
-
const status = await classifyRunStatus(runDir, summary);
|
|
233
|
-
const failureReasonSummary = await readRunFailureReasonSummary(runDir);
|
|
234
|
-
if (!summary)
|
|
235
|
-
return { runId: id, status, failureReasonSummary };
|
|
236
|
-
return { ...summary, runId: summary.runId ?? id, status, failureReasonSummary };
|
|
237
|
-
}));
|
|
238
|
-
return summaries.sort((a, b) => (b.startedAt ?? '').localeCompare(a.startedAt ?? ''));
|
|
239
|
-
}
|
|
240
|
-
export async function readEvolveRun(root, runId) {
|
|
241
|
-
const dir = join(root, 'evolve', 'runs', runId);
|
|
242
|
-
const dirStat = await tryStat(dir);
|
|
243
|
-
if (!dirStat)
|
|
244
|
-
return null;
|
|
245
|
-
const summary = await tryReadJson(join(dir, 'summary.json'));
|
|
246
|
-
const status = await classifyRunStatus(dir, summary);
|
|
247
|
-
const tasksDir = join(dir, 'tasks');
|
|
248
|
-
let taskNames = [];
|
|
249
|
-
try {
|
|
250
|
-
const items = await fs.readdir(tasksDir, { withFileTypes: true });
|
|
251
|
-
taskNames = items.filter((d) => d.isDirectory()).map((d) => d.name);
|
|
252
|
-
}
|
|
253
|
-
catch {
|
|
254
|
-
// No tasks dir.
|
|
255
|
-
}
|
|
256
|
-
const tasks = await Promise.all(taskNames.map(async (taskId) => {
|
|
257
|
-
const result = await tryReadJson(join(tasksDir, taskId, 'result.json'));
|
|
258
|
-
return {
|
|
259
|
-
taskId,
|
|
260
|
-
verdict: result?.verdict,
|
|
261
|
-
wallTimeMs: result?.telemetry?.wallTimeMs,
|
|
262
|
-
totalCostUsd: result?.telemetry?.totalCostUsd,
|
|
263
|
-
reason: result?.reason,
|
|
264
|
-
};
|
|
265
|
-
}));
|
|
266
|
-
const needsDiagnostics = !summary || status !== 'completed';
|
|
267
|
-
const wrapperStderrTail = needsDiagnostics
|
|
268
|
-
? (await tryReadFileTail(join(dir, 'wrapper.stderr.log'))) ?? undefined
|
|
269
|
-
: undefined;
|
|
270
|
-
const wrapperStdoutTail = needsDiagnostics
|
|
271
|
-
? (await tryReadFileTail(join(dir, 'wrapper.stdout.log'))) ?? undefined
|
|
272
|
-
: undefined;
|
|
273
|
-
let fileListing;
|
|
274
|
-
if (needsDiagnostics) {
|
|
275
|
-
try {
|
|
276
|
-
const items = await fs.readdir(dir, { withFileTypes: true });
|
|
277
|
-
fileListing = items.map((d) => d.name + (d.isDirectory() ? '/' : ''));
|
|
278
|
-
}
|
|
279
|
-
catch {
|
|
280
|
-
fileListing = undefined;
|
|
281
|
-
}
|
|
141
|
+
/**
|
|
142
|
+
* Read the loop-v2 self-evolution surface: per-episode two-arm forward records,
|
|
143
|
+
* the policy version ledger grouped into per-target lineages, and the
|
|
144
|
+
* reject-buffer of rolled-back episodes. Each reader is independently guarded so
|
|
145
|
+
* a single missing/unreadable store yields an empty slice rather than throwing.
|
|
146
|
+
*/
|
|
147
|
+
export async function readSelfEvolution(root) {
|
|
148
|
+
const [episodes, ledger, rejectBuffer] = await Promise.all([
|
|
149
|
+
listEpisodes(root).catch(() => []),
|
|
150
|
+
readPolicyLedgerAll(root).catch(() => []),
|
|
151
|
+
readRejectBufferAll(root).catch(() => []),
|
|
152
|
+
]);
|
|
153
|
+
// Group ledger entries by targetId, preserving append order within each group.
|
|
154
|
+
const byTarget = new Map();
|
|
155
|
+
for (const entry of ledger) {
|
|
156
|
+
const list = byTarget.get(entry.targetId);
|
|
157
|
+
if (list)
|
|
158
|
+
list.push(entry);
|
|
159
|
+
else
|
|
160
|
+
byTarget.set(entry.targetId, [entry]);
|
|
161
|
+
}
|
|
162
|
+
const policyLineages = [];
|
|
163
|
+
for (const [targetId, entries] of byTarget) {
|
|
164
|
+
const last = entries[entries.length - 1];
|
|
165
|
+
policyLineages.push({
|
|
166
|
+
targetId,
|
|
167
|
+
headVersion: last ? last.version : null,
|
|
168
|
+
entries,
|
|
169
|
+
evolveCount: entries.filter((e) => e.action === 'evolve').length,
|
|
170
|
+
rollbackCount: entries.filter((e) => e.action === 'rollback').length,
|
|
171
|
+
refusedCount: entries.filter((e) => e.action === 'refused').length,
|
|
172
|
+
lastAt: last ? last.at : null,
|
|
173
|
+
});
|
|
282
174
|
}
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
: { runId, status };
|
|
286
|
-
return { ...base, tasks, wrapperStderrTail, wrapperStdoutTail, fileListing };
|
|
287
|
-
}
|
|
288
|
-
export async function readEvolveArchive(root) {
|
|
289
|
-
return tryReadJson(join(root, 'evolve', 'archive', 'archive.json'));
|
|
175
|
+
policyLineages.sort((a, b) => (b.lastAt ?? '').localeCompare(a.lastAt ?? ''));
|
|
176
|
+
return { episodes, policyLineages, rejectBuffer };
|
|
290
177
|
}
|
|
291
178
|
export async function readAgentInterfacePlan(root) {
|
|
292
179
|
const { events, skippedRecords } = await readAgentCognitiveEvents({
|
|
@@ -366,10 +253,10 @@ export async function readAgentInterfacePlan(root) {
|
|
|
366
253
|
boundary: 'Same workflow, tool-native syntax.',
|
|
367
254
|
},
|
|
368
255
|
{
|
|
369
|
-
id: '
|
|
256
|
+
id: 'self-evolution',
|
|
370
257
|
label: 'Self-evolution',
|
|
371
|
-
command: 'synergyspec-selfevolving self-evolution
|
|
372
|
-
purpose: '
|
|
258
|
+
command: 'synergyspec-selfevolving self-evolution episode',
|
|
259
|
+
purpose: 'Run one self-evolution episode (main ∥ baseline arms → reward → bounded policy edit).',
|
|
373
260
|
boundary: 'Maintainer/research surface, not the normal user workflow.',
|
|
374
261
|
},
|
|
375
262
|
],
|
|
@@ -384,15 +271,14 @@ export async function readAgentInterfacePlan(root) {
|
|
|
384
271
|
};
|
|
385
272
|
}
|
|
386
273
|
export async function readOverview(root) {
|
|
387
|
-
const [project, changes,
|
|
274
|
+
const [project, changes, se, cli] = await Promise.all([
|
|
388
275
|
readProjectInfo(root),
|
|
389
276
|
listChanges(root),
|
|
390
|
-
|
|
277
|
+
readSelfEvolution(root),
|
|
391
278
|
readCliHistory(root, 500),
|
|
392
279
|
]);
|
|
393
280
|
const completedChanges = changes.filter((c) => c.status === 'completed').length;
|
|
394
281
|
const inProgressChanges = changes.filter((c) => c.status === 'in-progress').length;
|
|
395
|
-
const lastRun = runs[0];
|
|
396
282
|
const recentFailures = cli.filter((e) => e.outcome === 'failure').length;
|
|
397
283
|
return {
|
|
398
284
|
project,
|
|
@@ -402,14 +288,10 @@ export async function readOverview(root) {
|
|
|
402
288
|
inProgress: inProgressChanges,
|
|
403
289
|
},
|
|
404
290
|
evolve: {
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
.filter(([, n]) => n > 0)
|
|
410
|
-
.map(([k]) => k)
|
|
411
|
-
.join(', ') || null
|
|
412
|
-
: null,
|
|
291
|
+
episodes: se.episodes.length,
|
|
292
|
+
lastEpisodeAt: se.episodes[0]?.updatedAt ?? null,
|
|
293
|
+
lastStage: se.episodes[0]?.stage ?? null,
|
|
294
|
+
headVersion: se.policyLineages[0]?.headVersion ?? null,
|
|
413
295
|
},
|
|
414
296
|
cli: { totalEvents: cli.length, recentFailures },
|
|
415
297
|
};
|
|
@@ -604,8 +486,8 @@ export async function readArchitecture(root) {
|
|
|
604
486
|
const domain = [
|
|
605
487
|
{ id: 'change', label: 'Change', role: 'A unit of planned work with its artifacts' },
|
|
606
488
|
{ id: 'memory', label: 'Memory', role: 'Durable attributed lessons across runs' },
|
|
607
|
-
{ id: '
|
|
608
|
-
{ id: 'archive', label: 'Archive', role: 'Finalized changes
|
|
489
|
+
{ id: 'episode', label: 'Episode', role: 'One two-arm forward self-evolution episode (main ∥ baseline arms -> graded advantage)' },
|
|
490
|
+
{ id: 'archive', label: 'Archive', role: 'Finalized changes' },
|
|
609
491
|
];
|
|
610
492
|
logical = { entities: [...artifacts, ...domain] };
|
|
611
493
|
}
|
|
@@ -624,20 +506,16 @@ export async function readArchitecture(root) {
|
|
|
624
506
|
};
|
|
625
507
|
try {
|
|
626
508
|
const plan = await readAgentInterfacePlan(root);
|
|
627
|
-
const
|
|
509
|
+
const se = await readSelfEvolution(root);
|
|
628
510
|
const cli = await readCliHistory(root, 500);
|
|
629
|
-
const
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
.join(', ') || null
|
|
635
|
-
: null;
|
|
636
|
-
const passRates = runs
|
|
637
|
-
.map((r) => r.passRate)
|
|
511
|
+
const lastVerdict = se.episodes[0]?.stage ?? null;
|
|
512
|
+
// passRate = fraction of GRADED episodes (advantage measured) that were
|
|
513
|
+
// positive (advantage > 0), over all graded episodes; null when none graded.
|
|
514
|
+
const gradedAdvantages = se.episodes
|
|
515
|
+
.map((e) => e.advantage)
|
|
638
516
|
.filter((v) => typeof v === 'number' && Number.isFinite(v));
|
|
639
|
-
const passRate =
|
|
640
|
-
?
|
|
517
|
+
const passRate = gradedAdvantages.length > 0
|
|
518
|
+
? gradedAdvantages.filter((v) => v > 0).length / gradedAdvantages.length
|
|
641
519
|
: null;
|
|
642
520
|
const toolCounts = new Map();
|
|
643
521
|
for (const event of cli) {
|
|
@@ -653,7 +531,7 @@ export async function readArchitecture(root) {
|
|
|
653
531
|
toolDistribution,
|
|
654
532
|
traceEvents: plan.summary.traces,
|
|
655
533
|
decisions: plan.summary.decisions,
|
|
656
|
-
runs:
|
|
534
|
+
runs: se.episodes.length,
|
|
657
535
|
lastVerdict,
|
|
658
536
|
passRate,
|
|
659
537
|
};
|
|
@@ -701,8 +579,18 @@ export async function readArchitecture(root) {
|
|
|
701
579
|
rel: '.synergyspec-selfevolving/history/events.ndjson',
|
|
702
580
|
detail: 'Append-only CLI history event log',
|
|
703
581
|
},
|
|
704
|
-
{
|
|
705
|
-
|
|
582
|
+
{
|
|
583
|
+
rel: '.synergyspec-selfevolving/self-evolution/episodes',
|
|
584
|
+
detail: 'Per-episode two-arm forward records',
|
|
585
|
+
},
|
|
586
|
+
{
|
|
587
|
+
rel: '.synergyspec-selfevolving/self-evolution/policy/ledger.ndjson',
|
|
588
|
+
detail: 'Append-only policy version ledger',
|
|
589
|
+
},
|
|
590
|
+
{
|
|
591
|
+
rel: '.synergyspec-selfevolving/self-evolution/policy/reject-buffer.ndjson',
|
|
592
|
+
detail: 'Rolled-back episodes (reject-buffer)',
|
|
593
|
+
},
|
|
706
594
|
];
|
|
707
595
|
const storeSurfaces = [];
|
|
708
596
|
for (const store of storePaths) {
|