@kweaver-ai/kweaver-sdk 0.8.2 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -52
- package/README.zh.md +27 -46
- package/dist/api/agent-chat.d.ts +10 -2
- package/dist/api/agent-chat.js +19 -5
- package/dist/api/datasources.d.ts +14 -0
- package/dist/api/datasources.js +14 -0
- package/dist/api/resources.d.ts +94 -0
- package/dist/api/resources.js +166 -0
- package/dist/cli.js +103 -23
- package/dist/client.d.ts +10 -4
- package/dist/client.js +12 -6
- package/dist/commands/agent-members.js +27 -11
- package/dist/commands/agent.js +383 -272
- package/dist/commands/auth.js +184 -71
- package/dist/commands/bkn-metric.js +37 -16
- package/dist/commands/bkn-ops.d.ts +1 -1
- package/dist/commands/bkn-ops.js +192 -93
- package/dist/commands/bkn-query.js +99 -31
- package/dist/commands/bkn-schema.d.ts +3 -3
- package/dist/commands/bkn-schema.js +127 -86
- package/dist/commands/bkn.js +158 -116
- package/dist/commands/call.js +23 -13
- package/dist/commands/config.js +22 -12
- package/dist/commands/context-loader.js +98 -92
- package/dist/commands/dataflow.js +14 -6
- package/dist/commands/ds.d.ts +0 -31
- package/dist/commands/ds.js +18 -426
- package/dist/commands/explore-bkn.d.ts +7 -1
- package/dist/commands/explore-bkn.js +32 -3
- package/dist/commands/explore.js +18 -15
- package/dist/commands/model.js +53 -42
- package/dist/commands/resource.d.ts +1 -0
- package/dist/commands/{dataview.js → resource.js} +62 -84
- package/dist/commands/skill.js +201 -65
- package/dist/commands/token.js +11 -0
- package/dist/commands/tool.js +46 -29
- package/dist/commands/toolbox.js +31 -15
- package/dist/commands/vega.js +466 -250
- package/dist/help/format.d.ts +65 -0
- package/dist/help/format.js +141 -0
- package/dist/index.d.ts +3 -3
- package/dist/index.js +2 -2
- package/dist/resources/datasources.d.ts +7 -0
- package/dist/resources/datasources.js +7 -0
- package/dist/resources/{dataviews.d.ts → resources.d.ts} +10 -11
- package/dist/resources/{dataviews.js → resources.js} +12 -13
- package/dist/templates/explorer/bkn.js +860 -9
- package/dist/templates/explorer/index.html +1 -0
- package/dist/templates/explorer/style.css +225 -0
- package/dist/templates/explorer/vendor/g6.min.js +68 -0
- package/dist/trace-ai/eval-set/schemas.d.ts +1 -0
- package/dist/trace-ai/eval-set/schemas.js +4 -0
- package/dist/trace-ai/eval-set/types.d.ts +2 -0
- package/dist/trace-ai/exp/capture-fingerprint.d.ts +10 -0
- package/dist/trace-ai/exp/capture-fingerprint.js +12 -0
- package/dist/trace-ai/exp/context/context-assembler.d.ts +18 -0
- package/dist/trace-ai/exp/context/context-assembler.js +42 -0
- package/dist/trace-ai/exp/context/failure-analyzer.d.ts +22 -0
- package/dist/trace-ai/exp/context/failure-analyzer.js +59 -0
- package/dist/trace-ai/exp/context/kn-data-prober.d.ts +13 -0
- package/dist/trace-ai/exp/context/kn-data-prober.js +38 -0
- package/dist/trace-ai/exp/context/kn-schema-client.d.ts +14 -0
- package/dist/trace-ai/exp/context/kn-schema-client.js +41 -0
- package/dist/trace-ai/exp/context/retrieval-health.d.ts +32 -0
- package/dist/trace-ai/exp/context/retrieval-health.js +138 -0
- package/dist/trace-ai/exp/context/vega-catalog-client.d.ts +14 -0
- package/dist/trace-ai/exp/context/vega-catalog-client.js +15 -0
- package/dist/trace-ai/exp/coordinator.d.ts +34 -21
- package/dist/trace-ai/exp/coordinator.js +246 -24
- package/dist/trace-ai/exp/eval-runner.js +4 -2
- package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +1 -0
- package/dist/trace-ai/exp/exp-store/events-jsonl.js +18 -0
- package/dist/trace-ai/exp/exp-store/expected-fingerprint.d.ts +3 -0
- package/dist/trace-ai/exp/exp-store/expected-fingerprint.js +31 -0
- package/dist/trace-ai/exp/exp-store/index.d.ts +63 -2
- package/dist/trace-ai/exp/exp-store/index.js +2 -1
- package/dist/trace-ai/exp/exp-store/rollback-yaml.d.ts +12 -0
- package/dist/trace-ai/exp/exp-store/rollback-yaml.js +29 -0
- package/dist/trace-ai/exp/index.d.ts +2 -0
- package/dist/trace-ai/exp/index.js +68 -3
- package/dist/trace-ai/exp/info.js +1 -1
- package/dist/trace-ai/exp/patch/index.d.ts +13 -2
- package/dist/trace-ai/exp/patch/index.js +65 -10
- package/dist/trace-ai/exp/patch/kn-api-client.d.ts +40 -0
- package/dist/trace-ai/exp/patch/kn-api-client.js +14 -0
- package/dist/trace-ai/exp/patch/kn.d.ts +8 -0
- package/dist/trace-ai/exp/patch/kn.js +36 -0
- package/dist/trace-ai/exp/patch/skill-api-client.d.ts +17 -0
- package/dist/trace-ai/exp/patch/skill-api-client.js +14 -0
- package/dist/trace-ai/exp/patch/skill-content.d.ts +9 -0
- package/dist/trace-ai/exp/patch/skill-content.js +12 -0
- package/dist/trace-ai/exp/preflight.d.ts +77 -0
- package/dist/trace-ai/exp/preflight.js +148 -0
- package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +3 -14
- package/dist/trace-ai/exp/providers/synthesizer-client.js +53 -35
- package/dist/trace-ai/exp/providers/triage-client.d.ts +15 -2
- package/dist/trace-ai/exp/providers/triage-client.js +143 -28
- package/dist/trace-ai/exp/run-preflight.d.ts +19 -0
- package/dist/trace-ai/exp/run-preflight.js +56 -0
- package/dist/trace-ai/exp/schemas.d.ts +402 -44
- package/dist/trace-ai/exp/schemas.js +131 -18
- package/dist/utils/deprecation.d.ts +1 -0
- package/dist/utils/deprecation.js +18 -0
- package/package.json +2 -1
- package/dist/api/dataviews.d.ts +0 -117
- package/dist/api/dataviews.js +0 -265
- package/dist/commands/dataview.d.ts +0 -8
|
@@ -3,9 +3,12 @@ import path from "node:path";
|
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import yaml from "js-yaml";
|
|
5
5
|
import { ExpStore } from "./exp-store/index.js";
|
|
6
|
-
import {
|
|
6
|
+
import { PatchApplier } from "./patch/index.js";
|
|
7
7
|
import { computeScores } from "./scoring.js";
|
|
8
8
|
import { writeBundles } from "./bundle-writer.js";
|
|
9
|
+
import { analyzeFailures, roundRetrievedAnyData } from "./context/failure-analyzer.js";
|
|
10
|
+
import { diagnoseMechanism } from "./context/retrieval-health.js";
|
|
11
|
+
import { runPreflight } from "./run-preflight.js";
|
|
9
12
|
export class ExperimentCoordinator {
|
|
10
13
|
opts;
|
|
11
14
|
store;
|
|
@@ -16,8 +19,9 @@ export class ExperimentCoordinator {
|
|
|
16
19
|
}
|
|
17
20
|
async run() {
|
|
18
21
|
const replayed = await this.store.replayState();
|
|
19
|
-
if (replayed.isTerminal
|
|
20
|
-
throw new Error(`Experiment is in terminal state ${replayed.currentState}.
|
|
22
|
+
if (replayed.isTerminal) {
|
|
23
|
+
throw new Error(`Experiment is in terminal state ${replayed.currentState}. ` +
|
|
24
|
+
`Use --new-run to start a fresh experiment in this directory.`);
|
|
21
25
|
}
|
|
22
26
|
const mission = await this.store.readMission();
|
|
23
27
|
const expId = this.opts.experimentId ?? `exp_${Date.now()}`;
|
|
@@ -26,14 +30,34 @@ export class ExperimentCoordinator {
|
|
|
26
30
|
}
|
|
27
31
|
await this.store.acquireLock();
|
|
28
32
|
this.heartbeatTimer = setInterval(() => { void this.store.updateHeartbeat(); }, 10_000);
|
|
29
|
-
|
|
30
|
-
|
|
33
|
+
const uninstallSignals = this.installSignalHandlers();
|
|
34
|
+
// Layer 2 auto-recovery: prior holder died mid-round without writing step_failed
|
|
35
|
+
// (typically SIGKILL/OOM/power — signal handlers can't help with these). FSM is
|
|
36
|
+
// stuck in an executing-side phase with no lastFailure. Synthesize step_failed so
|
|
37
|
+
// the startRound calc below rewinds to redo the round.
|
|
38
|
+
const stuckMidRound = !replayed.lastFailure
|
|
39
|
+
&& replayed.currentRound > 0
|
|
40
|
+
&& replayed.currentState !== "Init"
|
|
41
|
+
&& replayed.currentState !== "Deciding"
|
|
42
|
+
&& !replayed.isTerminal;
|
|
43
|
+
if (stuckMidRound) {
|
|
44
|
+
await this.store.appendEvent({
|
|
45
|
+
type: "step_failed",
|
|
46
|
+
state: replayed.currentState,
|
|
47
|
+
error: `auto-recovered: prior holder died mid-${replayed.currentState} at round ${replayed.currentRound}`,
|
|
48
|
+
retryable: true,
|
|
49
|
+
});
|
|
50
|
+
process.stderr.write(`Recovered stale ${replayed.currentState} at round ${replayed.currentRound}; redoing round.\n`);
|
|
51
|
+
}
|
|
52
|
+
// If previous run failed mid-round (real failure OR auto-recovered above), retry that round.
|
|
53
|
+
const startRound = (replayed.lastFailure || stuckMidRound) && replayed.currentRound > 0
|
|
31
54
|
? replayed.currentRound - 1
|
|
32
55
|
: replayed.currentRound;
|
|
33
56
|
try {
|
|
34
57
|
await this.runLoop(mission, startRound, expId);
|
|
35
58
|
}
|
|
36
59
|
finally {
|
|
60
|
+
uninstallSignals();
|
|
37
61
|
clearInterval(this.heartbeatTimer);
|
|
38
62
|
await this.store.releaseLock();
|
|
39
63
|
}
|
|
@@ -45,16 +69,88 @@ export class ExperimentCoordinator {
|
|
|
45
69
|
}
|
|
46
70
|
await this.store.acquireLock();
|
|
47
71
|
this.heartbeatTimer = setInterval(() => { void this.store.updateHeartbeat(); }, 10_000);
|
|
72
|
+
const uninstallSignals = this.installSignalHandlers();
|
|
48
73
|
try {
|
|
49
74
|
const mission = await this.store.readMission();
|
|
50
75
|
const expId = `exp_${replayed.currentRound}`;
|
|
51
76
|
await this.runLoop(mission, replayed.currentRound, expId);
|
|
52
77
|
}
|
|
53
78
|
finally {
|
|
79
|
+
uninstallSignals();
|
|
54
80
|
clearInterval(this.heartbeatTimer);
|
|
55
81
|
await this.store.releaseLock();
|
|
56
82
|
}
|
|
57
83
|
}
|
|
84
|
+
/**
|
|
85
|
+
* Install SIGINT/SIGHUP/SIGTERM handlers that flush a final event and release
|
|
86
|
+
* the lock before exit. Returns an uninstaller that MUST be called in the
|
|
87
|
+
* caller's finally block (otherwise normal exit would still fire the handler).
|
|
88
|
+
*
|
|
89
|
+
* Semantics:
|
|
90
|
+
* SIGINT → user-intent abort → emit `aborted` event (terminal)
|
|
91
|
+
* SIGHUP → terminal closed → emit `step_failed` retryable
|
|
92
|
+
* SIGTERM → external kill (ambig.) → emit `step_failed` retryable
|
|
93
|
+
*
|
|
94
|
+
* SIGKILL / OOM / power loss can't be caught here — Layer 2 auto-recovery in
|
|
95
|
+
* run() handles that case on the next start.
|
|
96
|
+
*/
|
|
97
|
+
installSignalHandlers() {
|
|
98
|
+
let firing = false;
|
|
99
|
+
const handler = (signal) => {
|
|
100
|
+
if (firing)
|
|
101
|
+
return;
|
|
102
|
+
firing = true;
|
|
103
|
+
// Process exits in the IIFE — Node won't await the handler itself, so we
|
|
104
|
+
// must drive the async flow and then `process.exit` ourselves.
|
|
105
|
+
void (async () => {
|
|
106
|
+
try {
|
|
107
|
+
const replayed = await this.store.replayState();
|
|
108
|
+
if (signal === "SIGINT") {
|
|
109
|
+
await this.store.appendEvent({
|
|
110
|
+
type: "aborted",
|
|
111
|
+
round: replayed.currentRound,
|
|
112
|
+
reason: `interrupted by ${signal}`,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
// For non-terminal states only — if FSM was already Deciding/Init/terminal,
|
|
117
|
+
// a step_failed would be a lie. Skip the event but still release the lock.
|
|
118
|
+
const recoverable = !replayed.isTerminal
|
|
119
|
+
&& replayed.currentState !== "Init"
|
|
120
|
+
&& replayed.currentState !== "Deciding";
|
|
121
|
+
if (recoverable) {
|
|
122
|
+
await this.store.appendEvent({
|
|
123
|
+
type: "step_failed",
|
|
124
|
+
state: replayed.currentState,
|
|
125
|
+
error: `process interrupted by ${signal}`,
|
|
126
|
+
retryable: true,
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
catch (err) {
|
|
132
|
+
process.stderr.write(`signal handler: failed to append event: ${String(err)}\n`);
|
|
133
|
+
}
|
|
134
|
+
if (this.heartbeatTimer)
|
|
135
|
+
clearInterval(this.heartbeatTimer);
|
|
136
|
+
try {
|
|
137
|
+
await this.store.releaseLock();
|
|
138
|
+
}
|
|
139
|
+
catch { /* best-effort */ }
|
|
140
|
+
// Exit codes per shell convention (128 + signal number).
|
|
141
|
+
const code = signal === "SIGINT" ? 130 : signal === "SIGHUP" ? 129 : 143;
|
|
142
|
+
process.exit(code);
|
|
143
|
+
})();
|
|
144
|
+
};
|
|
145
|
+
process.on("SIGINT", handler);
|
|
146
|
+
process.on("SIGHUP", handler);
|
|
147
|
+
process.on("SIGTERM", handler);
|
|
148
|
+
return () => {
|
|
149
|
+
process.off("SIGINT", handler);
|
|
150
|
+
process.off("SIGHUP", handler);
|
|
151
|
+
process.off("SIGTERM", handler);
|
|
152
|
+
};
|
|
153
|
+
}
|
|
58
154
|
async runLoop(mission, startRound, expId) {
|
|
59
155
|
const round = startRound + 1;
|
|
60
156
|
const maxRounds = mission.max_rounds ?? Infinity;
|
|
@@ -64,12 +160,35 @@ export class ExperimentCoordinator {
|
|
|
64
160
|
await this.store.appendEvent({ type: "state_transition", from: "Deciding", to: "Generating", round });
|
|
65
161
|
const nextChange = mission.next_change;
|
|
66
162
|
if (!nextChange)
|
|
67
|
-
throw new Error("mission.md has no next_change — add one or let
|
|
163
|
+
throw new Error("mission.md has no next_change — add one or let the planner suggest");
|
|
164
|
+
if (!mission.enabled_targets.includes(nextChange.target)) {
|
|
165
|
+
throw new Error(`next_change.target=${nextChange.target} is not in mission.enabled_targets=[${mission.enabled_targets.join(", ")}]. ` +
|
|
166
|
+
`Either add the target to enabled_targets in mission.md, or change next_change to use an enabled target.`);
|
|
167
|
+
}
|
|
68
168
|
const prevRounds = await this.store.readAllRounds();
|
|
69
|
-
// Load current candidate and apply patch
|
|
169
|
+
// Load current candidate and apply patch via PatchApplier (handles agent.*/kn.*/skill.content).
|
|
170
|
+
// PatchApplier may call external APIs (KN/skill) that throw on stubs or transient failures;
|
|
171
|
+
// surface those as Generating-phase step_failed so the FSM stays observable and lineage
|
|
172
|
+
// doesn't get stuck in "running". No auto-retry: patch side-effects (KN writes, skill version
|
|
173
|
+
// publish) are not safe to blindly retry — user inspects step_failed and resumes manually.
|
|
70
174
|
const currentCandidatePath = path.join(this.opts.expDir, mission.current_candidate.path);
|
|
71
175
|
const currentCandidate = yaml.load(await fs.readFile(currentCandidatePath, "utf8"));
|
|
72
|
-
const
|
|
176
|
+
const patchApplier = new PatchApplier(this.opts.expDir, this.opts.knClient, this.opts.skillClient);
|
|
177
|
+
let patched;
|
|
178
|
+
try {
|
|
179
|
+
const result = await patchApplier.apply(currentCandidate, nextChange);
|
|
180
|
+
patched = result.candidate;
|
|
181
|
+
}
|
|
182
|
+
catch (err) {
|
|
183
|
+
await this.store.appendEvent({
|
|
184
|
+
type: "step_failed",
|
|
185
|
+
state: "Generating",
|
|
186
|
+
error: String(err),
|
|
187
|
+
retryable: false,
|
|
188
|
+
});
|
|
189
|
+
process.stderr.write(`\nGenerating failed for target ${nextChange.target}: ${String(err)}\n`);
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
73
192
|
patched["candidate_version"] = `v${round}`;
|
|
74
193
|
const newCandidatePath = path.join(this.opts.expDir, "candidates", `candidate-v${round}.yaml`);
|
|
75
194
|
await fs.writeFile(newCandidatePath, yaml.dump(patched, { lineWidth: -1 }));
|
|
@@ -84,6 +203,43 @@ export class ExperimentCoordinator {
|
|
|
84
203
|
// === Executing ===
|
|
85
204
|
await this.store.appendEvent({ type: "state_transition", from: "Generating", to: "Executing", round });
|
|
86
205
|
const evalSetPaths = mission.eval_sets.map(e => path.join(this.opts.expDir, e.path));
|
|
206
|
+
// Preflight: reconcile the live agent against expectation before any eval
|
|
207
|
+
// chat is sent. A mismatch (wrong KN binding) fails the round fast —
|
|
208
|
+
// non-retryable, since it will not fix itself.
|
|
209
|
+
if (this.opts.fetchAgentConfig) {
|
|
210
|
+
const agentId = patched["agent_id"];
|
|
211
|
+
if (typeof agentId !== "string" || !agentId) {
|
|
212
|
+
// fetchAgentConfig is wired but the agent can't be identified — fail
|
|
213
|
+
// loudly rather than silently skipping the guard.
|
|
214
|
+
await this.store.appendEvent({
|
|
215
|
+
type: "step_failed",
|
|
216
|
+
state: "Executing",
|
|
217
|
+
error: "preflight: candidate has no agent_id — cannot verify the agent under test",
|
|
218
|
+
retryable: false,
|
|
219
|
+
});
|
|
220
|
+
process.stderr.write("\nPreflight check failed — candidate has no agent_id.\n");
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
try {
|
|
224
|
+
await runPreflight({
|
|
225
|
+
expDir: this.opts.expDir,
|
|
226
|
+
agentId,
|
|
227
|
+
fetchConfig: this.opts.fetchAgentConfig,
|
|
228
|
+
evalSetPaths,
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
catch (err) {
|
|
232
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
233
|
+
await this.store.appendEvent({
|
|
234
|
+
type: "step_failed",
|
|
235
|
+
state: "Executing",
|
|
236
|
+
error: `preflight: ${message}`,
|
|
237
|
+
retryable: false,
|
|
238
|
+
});
|
|
239
|
+
process.stderr.write(`\nPreflight check failed — eval not run:\n${message}\n`);
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
87
243
|
let queryResults;
|
|
88
244
|
try {
|
|
89
245
|
const result = await this.withRetry(() => this.opts.runEval({ evalSetPaths, candidatePath: newCandidatePath, expDir: this.opts.expDir, round }), "Executing");
|
|
@@ -109,10 +265,58 @@ export class ExperimentCoordinator {
|
|
|
109
265
|
await this.store.writeRound(round, { round, trial_version: round, scores, per_query_results: queryResults });
|
|
110
266
|
if (await this.checkAbort(round))
|
|
111
267
|
return;
|
|
112
|
-
// === Triaging ===
|
|
268
|
+
// === Triaging (merged: diagnose + propose in one LLM call) ===
|
|
113
269
|
await this.store.appendEvent({ type: "state_transition", from: "Scoring", to: "Triaging", round });
|
|
114
270
|
const currentRoundData = (await this.store.readAllRounds()).find(r => r.round === round) ?? { round, trial_version: round };
|
|
115
271
|
const prevMemory = prevRounds.at(-1)?.triage_conclusion?.cross_round_memory_ref;
|
|
272
|
+
const failureAnalysis = await analyzeFailures(currentRoundData.per_query_results ?? [], this.opts.fetchTrace);
|
|
273
|
+
// Fail-fast on a mechanism failure: if the trace shows the agent retrieved
|
|
274
|
+
// no KN data across the failing queries, this round measured a wiring
|
|
275
|
+
// failure, not the prompt. Running triage would only burn an LLM call
|
|
276
|
+
// proposing prompt patches that cannot fix it — stop and report the root
|
|
277
|
+
// cause instead.
|
|
278
|
+
//
|
|
279
|
+
// diagnoseMechanism only sees failing queries, so confirm against the whole
|
|
280
|
+
// round: if any query (passing or failing) did retrieve KN data, the no-data
|
|
281
|
+
// failures are localized, not a global break — let triage handle them.
|
|
282
|
+
let mechanism = diagnoseMechanism(failureAnalysis);
|
|
283
|
+
if (mechanism.broken
|
|
284
|
+
&& await roundRetrievedAnyData(currentRoundData.per_query_results ?? [], this.opts.fetchTrace)) {
|
|
285
|
+
mechanism = { broken: false, reason: "" };
|
|
286
|
+
}
|
|
287
|
+
if (mechanism.broken) {
|
|
288
|
+
await this.store.appendEvent({
|
|
289
|
+
type: "step_failed",
|
|
290
|
+
state: "Triaging",
|
|
291
|
+
error: `mechanism: ${mechanism.reason}`,
|
|
292
|
+
retryable: false,
|
|
293
|
+
});
|
|
294
|
+
process.stderr.write(`\nMechanism failure — triage skipped, round halted:\n${mechanism.reason}\n`);
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
// Pre-fetch all available context so the merged planner LLM can see KN schema
|
|
298
|
+
// + bound skill content + data probes before deciding both verdict AND next_change.
|
|
299
|
+
// We call assemble() twice (kn target + skill target) since each path fetches
|
|
300
|
+
// distinct artifacts; total cost dominated by the LLM call that follows.
|
|
301
|
+
const knId = patched["kn"]?.["id"];
|
|
302
|
+
const boundSkills = patched["agent"]?.["skills"] ?? [];
|
|
303
|
+
let kn_context;
|
|
304
|
+
let skill_context;
|
|
305
|
+
if (this.opts.contextAssembler) {
|
|
306
|
+
const assembler = this.opts.contextAssembler;
|
|
307
|
+
const empty = {};
|
|
308
|
+
const [knRes, skillRes] = await Promise.all([
|
|
309
|
+
knId
|
|
310
|
+
? assembler.assemble("kn.object_type", knId, boundSkills, failureAnalysis)
|
|
311
|
+
: Promise.resolve(empty),
|
|
312
|
+
boundSkills.length > 0
|
|
313
|
+
? assembler.assemble("skill.content", knId, boundSkills, failureAnalysis)
|
|
314
|
+
: Promise.resolve(empty),
|
|
315
|
+
]);
|
|
316
|
+
kn_context = knRes.kn_context;
|
|
317
|
+
skill_context = skillRes.skill_context;
|
|
318
|
+
}
|
|
319
|
+
const updatedMission = await this.store.readMission();
|
|
116
320
|
let triageResult;
|
|
117
321
|
try {
|
|
118
322
|
triageResult = await this.withRetry(() => this.opts.triage.triage({
|
|
@@ -120,36 +324,54 @@ export class ExperimentCoordinator {
|
|
|
120
324
|
prevRounds,
|
|
121
325
|
candidateConfig: patched,
|
|
122
326
|
crossRoundMemoryRef: prevMemory,
|
|
327
|
+
failureAnalysis,
|
|
328
|
+
mission: updatedMission,
|
|
329
|
+
kn_context,
|
|
330
|
+
skill_context,
|
|
123
331
|
}), "Triaging");
|
|
124
332
|
}
|
|
125
333
|
catch {
|
|
126
334
|
return;
|
|
127
335
|
}
|
|
336
|
+
// Persist triage_conclusion. Only "continue"/"publish" are valid in the typed schema;
|
|
337
|
+
// "abort" is a runtime verdict mapped to "publish" for storage but routed to Aborted below.
|
|
338
|
+
const storedVerdict = triageResult.verdict === "continue" ? "continue" : "publish";
|
|
128
339
|
await this.store.writeRound(round, {
|
|
129
340
|
triage_conclusion: {
|
|
130
341
|
diagnoses: triageResult.diagnoses,
|
|
131
342
|
hints: triageResult.hints,
|
|
132
|
-
verdict:
|
|
343
|
+
verdict: storedVerdict,
|
|
133
344
|
cross_round_memory_ref: triageResult.new_memory_token,
|
|
134
345
|
},
|
|
135
346
|
});
|
|
136
|
-
await this.store.appendEvent({ type: "round_completed", round, verdict:
|
|
137
|
-
//
|
|
347
|
+
await this.store.appendEvent({ type: "round_completed", round, verdict: storedVerdict });
|
|
348
|
+
// Persist failure_attribution in a TriageComplete event for downstream consumers (exp show, etc.)
|
|
349
|
+
await this.store.appendEvent({
|
|
350
|
+
type: "TriageComplete",
|
|
351
|
+
round,
|
|
352
|
+
verdict: triageResult.verdict,
|
|
353
|
+
summary: triageResult.summary,
|
|
354
|
+
failure_attribution: triageResult.failure_attribution,
|
|
355
|
+
});
|
|
356
|
+
// Abort: terminal state — no suggestion, no Deciding pause.
|
|
357
|
+
if (triageResult.verdict === "abort") {
|
|
358
|
+
await this.store.appendEvent({ type: "aborted", round, reason: `triage_abort: ${triageResult.summary ?? "no summary"}` });
|
|
359
|
+
process.stdout.write(`\nExperiment aborted by triage: ${triageResult.summary ?? "(no reason)"}\n`);
|
|
360
|
+
return;
|
|
361
|
+
}
|
|
362
|
+
// Continue: triageResult.next_change was produced in the same LLM call; write it as suggestion.
|
|
138
363
|
if (triageResult.verdict === "continue" && round < maxRounds) {
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
}), "Triaging");
|
|
148
|
-
await this.store.writeSuggestedChange(suggestion);
|
|
149
|
-
}
|
|
150
|
-
catch {
|
|
364
|
+
if (!triageResult.next_change) {
|
|
365
|
+
// Parser already enforces presence when verdict=continue, but guard belt-and-braces.
|
|
366
|
+
await this.store.appendEvent({
|
|
367
|
+
type: "step_failed",
|
|
368
|
+
state: "Triaging",
|
|
369
|
+
error: "verdict=continue but next_change missing in triage output",
|
|
370
|
+
retryable: true,
|
|
371
|
+
});
|
|
151
372
|
return;
|
|
152
373
|
}
|
|
374
|
+
await this.store.writeSuggestedChange(triageResult.next_change);
|
|
153
375
|
}
|
|
154
376
|
// === Deciding ===
|
|
155
377
|
await this.store.appendEvent({ type: "state_transition", from: "Triaging", to: "Deciding", round });
|
|
@@ -6,7 +6,9 @@ import { run as evalSetRun } from "../eval-set/test-runner.js";
|
|
|
6
6
|
export async function runEval(opts) {
|
|
7
7
|
const candidateRaw = yaml.load(await fs.readFile(opts.candidatePath, "utf8"));
|
|
8
8
|
const agentId = candidateRaw["agent_id"] ?? "candidate";
|
|
9
|
-
|
|
9
|
+
// candidate_version ("v1", "v2", …) is the experiment's own round numbering,
|
|
10
|
+
// NOT a platform agent version. The eval always measures the current live
|
|
11
|
+
// agent, which fetchAgent resolves at "latest" — never conflate the two.
|
|
10
12
|
const roundEvalBase = path.join(opts.expDir, ".trace-state", "rounds", `round-${opts.round}-eval`);
|
|
11
13
|
// Run eval for each eval-set (sequentially for MVP-C single-path)
|
|
12
14
|
const allResults = [];
|
|
@@ -17,7 +19,6 @@ export async function runEval(opts) {
|
|
|
17
19
|
await evalSetRun({
|
|
18
20
|
evalSetDir,
|
|
19
21
|
candidateAgentId: agentId,
|
|
20
|
-
candidateAgentVersion: agentVersion,
|
|
21
22
|
outDir,
|
|
22
23
|
maxParallel: opts.maxParallel ?? 4,
|
|
23
24
|
deps: opts.deps,
|
|
@@ -40,6 +41,7 @@ export async function runEval(opts) {
|
|
|
40
41
|
error_codes: [],
|
|
41
42
|
},
|
|
42
43
|
raw_trace_id: c.trace_id ?? undefined,
|
|
44
|
+
conversation_id: c.conversation_id ?? undefined,
|
|
43
45
|
});
|
|
44
46
|
}
|
|
45
47
|
}
|
|
@@ -2,6 +2,7 @@ import type { ExpEvent, ExpFsmState } from "../schemas.js";
|
|
|
2
2
|
export type EventInput = ExpEvent extends infer T ? T extends {
|
|
3
3
|
ts: string;
|
|
4
4
|
} ? Omit<T, "ts"> : never : never;
|
|
5
|
+
export declare function readAllEvents(expDir: string): Promise<Record<string, unknown>[]>;
|
|
5
6
|
export declare function appendEvent(expDir: string, event: EventInput): Promise<void>;
|
|
6
7
|
export interface ReplayedState {
|
|
7
8
|
currentState: ExpFsmState;
|
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
import fs from "node:fs/promises";
|
|
2
2
|
import path from "node:path";
|
|
3
|
+
export async function readAllEvents(expDir) {
|
|
4
|
+
const filePath = path.join(expDir, ".trace-state", "events.jsonl");
|
|
5
|
+
let raw;
|
|
6
|
+
try {
|
|
7
|
+
raw = await fs.readFile(filePath, "utf8");
|
|
8
|
+
}
|
|
9
|
+
catch {
|
|
10
|
+
return [];
|
|
11
|
+
}
|
|
12
|
+
return raw.split("\n").filter(Boolean).map(line => {
|
|
13
|
+
try {
|
|
14
|
+
return JSON.parse(line);
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
}).filter((e) => e !== null);
|
|
20
|
+
}
|
|
3
21
|
export async function appendEvent(expDir, event) {
|
|
4
22
|
const filePath = path.join(expDir, ".trace-state", "events.jsonl");
|
|
5
23
|
const line = JSON.stringify({ ts: new Date().toISOString(), ...event }) + "\n";
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { AgentFingerprint } from "../preflight.js";
|
|
2
|
+
export declare function writeExpectedFingerprint(expDir: string, fingerprint: AgentFingerprint): Promise<void>;
|
|
3
|
+
export declare function readExpectedFingerprint(expDir: string): Promise<AgentFingerprint | undefined>;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
// src/trace-ai/exp/exp-store/expected-fingerprint.ts
|
|
2
|
+
//
|
|
3
|
+
// The loop-owned record of the agent configuration last seen under test.
|
|
4
|
+
// Re-captured by the preflight step every round (never hand-edited) — it is the
|
|
5
|
+
// per-round provenance fingerprint of exactly what config the round measured.
|
|
6
|
+
import fs from "node:fs/promises";
|
|
7
|
+
import path from "node:path";
|
|
8
|
+
import yaml from "js-yaml";
|
|
9
|
+
function fingerprintPath(expDir) {
|
|
10
|
+
return path.join(expDir, ".trace-state", "expected-fingerprint.yaml");
|
|
11
|
+
}
|
|
12
|
+
export async function writeExpectedFingerprint(expDir, fingerprint) {
|
|
13
|
+
const p = fingerprintPath(expDir);
|
|
14
|
+
await fs.mkdir(path.dirname(p), { recursive: true });
|
|
15
|
+
await fs.writeFile(p, yaml.dump(fingerprint, { lineWidth: -1 }), "utf8");
|
|
16
|
+
}
|
|
17
|
+
export async function readExpectedFingerprint(expDir) {
|
|
18
|
+
try {
|
|
19
|
+
const raw = await fs.readFile(fingerprintPath(expDir), "utf8");
|
|
20
|
+
const parsed = yaml.load(raw);
|
|
21
|
+
// Fingerprints written before non_fixed_kn_bindings existed lack the field;
|
|
22
|
+
// normalize so consumers can rely on it always being an array.
|
|
23
|
+
if (parsed && !Array.isArray(parsed.non_fixed_kn_bindings)) {
|
|
24
|
+
parsed.non_fixed_kn_bindings = [];
|
|
25
|
+
}
|
|
26
|
+
return parsed;
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
return undefined;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -16,12 +16,72 @@ export declare class ExpStore {
|
|
|
16
16
|
current_candidate: {
|
|
17
17
|
path: string;
|
|
18
18
|
};
|
|
19
|
+
enabled_targets: ("agent.system_prompt" | "agent.skills" | "kn.object_type" | "kn.relation_type" | "skill.content")[];
|
|
19
20
|
max_rounds?: number | undefined;
|
|
20
21
|
provider?: string | undefined;
|
|
21
22
|
next_change?: {
|
|
22
|
-
target:
|
|
23
|
+
target: "agent.system_prompt";
|
|
24
|
+
patch: string | Record<string, unknown>;
|
|
25
|
+
hypothesis?: string | undefined;
|
|
26
|
+
} | {
|
|
27
|
+
target: "agent.skills";
|
|
28
|
+
patch: {
|
|
29
|
+
unbind: string[];
|
|
30
|
+
bind: {
|
|
31
|
+
id: string;
|
|
32
|
+
version: string;
|
|
33
|
+
}[];
|
|
34
|
+
};
|
|
35
|
+
hypothesis?: string | undefined;
|
|
36
|
+
} | {
|
|
37
|
+
target: "kn.object_type";
|
|
23
38
|
hypothesis: string;
|
|
24
|
-
patch:
|
|
39
|
+
patch: {
|
|
40
|
+
kn_id: string;
|
|
41
|
+
add_object_types: {
|
|
42
|
+
concept_name: string;
|
|
43
|
+
dataview_id: string;
|
|
44
|
+
primary_keys: string[];
|
|
45
|
+
data_properties: {
|
|
46
|
+
name: string;
|
|
47
|
+
type: string;
|
|
48
|
+
}[];
|
|
49
|
+
}[];
|
|
50
|
+
add_relation_types: {
|
|
51
|
+
concept_name: string;
|
|
52
|
+
source_object_type: string;
|
|
53
|
+
target_object_type: string;
|
|
54
|
+
join_key: string;
|
|
55
|
+
}[];
|
|
56
|
+
};
|
|
57
|
+
} | {
|
|
58
|
+
target: "kn.relation_type";
|
|
59
|
+
hypothesis: string;
|
|
60
|
+
patch: {
|
|
61
|
+
kn_id: string;
|
|
62
|
+
add_object_types: {
|
|
63
|
+
concept_name: string;
|
|
64
|
+
dataview_id: string;
|
|
65
|
+
primary_keys: string[];
|
|
66
|
+
data_properties: {
|
|
67
|
+
name: string;
|
|
68
|
+
type: string;
|
|
69
|
+
}[];
|
|
70
|
+
}[];
|
|
71
|
+
add_relation_types: {
|
|
72
|
+
concept_name: string;
|
|
73
|
+
source_object_type: string;
|
|
74
|
+
target_object_type: string;
|
|
75
|
+
join_key: string;
|
|
76
|
+
}[];
|
|
77
|
+
};
|
|
78
|
+
} | {
|
|
79
|
+
target: "skill.content";
|
|
80
|
+
hypothesis: string;
|
|
81
|
+
patch: {
|
|
82
|
+
skill_id: string;
|
|
83
|
+
append_section: string;
|
|
84
|
+
};
|
|
25
85
|
} | undefined;
|
|
26
86
|
guardrails?: {
|
|
27
87
|
name: string;
|
|
@@ -31,6 +91,7 @@ export declare class ExpStore {
|
|
|
31
91
|
}>;
|
|
32
92
|
writeSuggestedChange: (c: NextChange) => Promise<void>;
|
|
33
93
|
appendEvent: (e: EventInput) => Promise<void>;
|
|
94
|
+
readAllEvents: () => Promise<Record<string, unknown>[]>;
|
|
34
95
|
replayState: () => Promise<ReplayedState>;
|
|
35
96
|
acquireLock: () => Promise<void>;
|
|
36
97
|
releaseLock: () => Promise<void>;
|
|
@@ -3,7 +3,7 @@ import fs from "node:fs/promises";
|
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import crypto from "node:crypto";
|
|
5
5
|
import { readMission, writeSuggestedChange } from "./mission-md.js";
|
|
6
|
-
import { appendEvent, replayState } from "./events-jsonl.js";
|
|
6
|
+
import { appendEvent, replayState, readAllEvents } from "./events-jsonl.js";
|
|
7
7
|
import { acquireLock, releaseLock, updateHeartbeat } from "./lock.js";
|
|
8
8
|
import { isAborted, writeAbortSignal, clearAbortSignal } from "./abort-signal.js";
|
|
9
9
|
import { writeRound, readAllRounds } from "./round-yaml.js";
|
|
@@ -44,6 +44,7 @@ export class ExpStore {
|
|
|
44
44
|
readMission = () => readMission(this.expDir);
|
|
45
45
|
writeSuggestedChange = (c) => writeSuggestedChange(this.expDir, c);
|
|
46
46
|
appendEvent = (e) => appendEvent(this.expDir, e);
|
|
47
|
+
readAllEvents = () => readAllEvents(this.expDir);
|
|
47
48
|
replayState = () => replayState(this.expDir);
|
|
48
49
|
acquireLock = () => acquireLock(this.expDir);
|
|
49
50
|
releaseLock = () => releaseLock(this.expDir);
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface RollbackStep {
|
|
2
|
+
op: "remove_object_type" | "remove_relation_type";
|
|
3
|
+
kn_id: string;
|
|
4
|
+
concept_name: string;
|
|
5
|
+
applied_at: string;
|
|
6
|
+
}
|
|
7
|
+
export declare class RollbackYaml {
|
|
8
|
+
private filePath;
|
|
9
|
+
constructor(workDir: string);
|
|
10
|
+
appendStep(step: Omit<RollbackStep, "applied_at">): Promise<void>;
|
|
11
|
+
readSteps(): Promise<RollbackStep[]>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import yaml from "js-yaml";
|
|
4
|
+
export class RollbackYaml {
|
|
5
|
+
filePath;
|
|
6
|
+
constructor(workDir) {
|
|
7
|
+
this.filePath = path.join(workDir, "rollback.yaml");
|
|
8
|
+
}
|
|
9
|
+
async appendStep(step) {
|
|
10
|
+
const existing = await this.readSteps();
|
|
11
|
+
const doc = {
|
|
12
|
+
schema_version: "exp-rollback/v1",
|
|
13
|
+
steps: [...existing, { ...step, applied_at: new Date().toISOString() }],
|
|
14
|
+
};
|
|
15
|
+
await fs.writeFile(this.filePath, yaml.dump(doc), "utf-8");
|
|
16
|
+
}
|
|
17
|
+
async readSteps() {
|
|
18
|
+
try {
|
|
19
|
+
const content = await fs.readFile(this.filePath, "utf-8");
|
|
20
|
+
const parsed = yaml.load(content);
|
|
21
|
+
return parsed?.steps ?? [];
|
|
22
|
+
}
|
|
23
|
+
catch (err) {
|
|
24
|
+
if (err.code === "ENOENT")
|
|
25
|
+
return [];
|
|
26
|
+
throw err;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import type { FailureAttribution } from "./schemas.js";
|
|
2
|
+
export declare function formatFailureAttribution(attribution: FailureAttribution[]): string;
|
|
1
3
|
export interface ParsedExpArgs {
|
|
2
4
|
subcommand: "run" | "resume" | "show" | "status" | "abort" | "doctor" | "list" | "info";
|
|
3
5
|
expDir: string;
|