@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/README.md +39 -5
  2. package/README.zh.md +37 -5
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/agent-observability.d.ts +51 -0
  19. package/dist/api/agent-observability.js +108 -0
  20. package/dist/api/context-loader.d.ts +1 -0
  21. package/dist/api/conversations.d.ts +4 -8
  22. package/dist/api/conversations.js +16 -58
  23. package/dist/api/datasources.d.ts +2 -20
  24. package/dist/api/datasources.js +7 -123
  25. package/dist/api/semantic-search.d.ts +5 -0
  26. package/dist/api/semantic-search.js +5 -0
  27. package/dist/api/skills.d.ts +75 -2
  28. package/dist/api/skills.js +108 -12
  29. package/dist/api/trace.d.ts +49 -0
  30. package/dist/api/trace.js +85 -0
  31. package/dist/api/vega.d.ts +53 -0
  32. package/dist/api/vega.js +144 -0
  33. package/dist/cli.js +12 -5
  34. package/dist/commands/agent/mode.d.ts +6 -0
  35. package/dist/commands/agent/mode.js +75 -0
  36. package/dist/commands/agent.js +101 -29
  37. package/dist/commands/bkn-ops.js +12 -6
  38. package/dist/commands/bkn-utils.d.ts +9 -0
  39. package/dist/commands/bkn-utils.js +17 -0
  40. package/dist/commands/context-loader.js +608 -38
  41. package/dist/commands/ds.js +7 -2
  42. package/dist/commands/skill.d.ts +21 -1
  43. package/dist/commands/skill.js +389 -1
  44. package/dist/commands/trace.d.ts +39 -0
  45. package/dist/commands/trace.js +668 -0
  46. package/dist/index.d.ts +2 -2
  47. package/dist/index.js +1 -1
  48. package/dist/resources/bkn.d.ts +5 -0
  49. package/dist/resources/bkn.js +5 -0
  50. package/dist/resources/datasources.js +2 -1
  51. package/dist/resources/skills.d.ts +17 -1
  52. package/dist/resources/skills.js +32 -1
  53. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  54. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  55. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
  56. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
  57. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
  58. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
  59. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
  60. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
  61. package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
  62. package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
  63. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
  64. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
  65. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
  66. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
  67. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
  68. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
  69. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
  70. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
  71. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
  72. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  73. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  74. package/dist/trace-ai/diagnose/index.js +246 -0
  75. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  76. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  77. package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
  78. package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
  79. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  80. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  81. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  82. package/dist/trace-ai/diagnose/report-assembler.js +100 -0
  83. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  84. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  85. package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
  86. package/dist/trace-ai/diagnose/rule-loader.js +120 -0
  87. package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
  88. package/dist/trace-ai/diagnose/schemas.js +154 -0
  89. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  90. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  91. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  92. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  93. package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
  94. package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
  95. package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
  96. package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
  97. package/dist/trace-ai/diagnose/types.d.ts +173 -0
  98. package/dist/trace-ai/diagnose/types.js +1 -0
  99. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  100. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  101. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  102. package/dist/trace-ai/eval-set/builder.js +126 -0
  103. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  104. package/dist/trace-ai/eval-set/index.js +10 -0
  105. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  106. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  107. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  108. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  109. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  110. package/dist/trace-ai/eval-set/redactor.js +133 -0
  111. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  112. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  113. package/dist/trace-ai/eval-set/schemas.js +130 -0
  114. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  115. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  116. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  117. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  118. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  119. package/dist/trace-ai/eval-set/types.js +8 -0
  120. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  121. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  122. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  123. package/dist/trace-ai/exp/claude-binary.js +30 -0
  124. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  125. package/dist/trace-ai/exp/coordinator.js +203 -0
  126. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  127. package/dist/trace-ai/exp/eval-runner.js +47 -0
  128. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  129. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  130. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  131. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  132. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  133. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  134. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  135. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  136. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  137. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  138. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  139. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  140. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  141. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  142. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  143. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  144. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  145. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  146. package/dist/trace-ai/exp/index.d.ts +8 -0
  147. package/dist/trace-ai/exp/index.js +238 -0
  148. package/dist/trace-ai/exp/info.d.ts +35 -0
  149. package/dist/trace-ai/exp/info.js +120 -0
  150. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  151. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  152. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  153. package/dist/trace-ai/exp/patch/index.js +13 -0
  154. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  155. package/dist/trace-ai/exp/patch/skill.js +24 -0
  156. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  157. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  158. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  159. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  160. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  161. package/dist/trace-ai/exp/schemas.js +50 -0
  162. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  163. package/dist/trace-ai/exp/scoring.js +46 -0
  164. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  165. package/dist/trace-ai/scan/aggregator.js +26 -0
  166. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  167. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  168. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  169. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  170. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  171. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  172. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  173. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  174. package/dist/trace-ai/scan/index.d.ts +31 -0
  175. package/dist/trace-ai/scan/index.js +390 -0
  176. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  177. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  178. package/dist/trace-ai/scan/runner.d.ts +25 -0
  179. package/dist/trace-ai/scan/runner.js +42 -0
  180. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  181. package/dist/trace-ai/scan/sampler.js +81 -0
  182. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  183. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  184. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  185. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  186. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  187. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  188. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  189. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  190. package/package.json +14 -4
@@ -0,0 +1,203 @@
1
+ // src/trace-ai/exp/coordinator.ts
2
+ import path from "node:path";
3
+ import fs from "node:fs/promises";
4
+ import yaml from "js-yaml";
5
+ import { ExpStore } from "./exp-store/index.js";
6
+ import { applyPatch } from "./patch/index.js";
7
+ import { computeScores } from "./scoring.js";
8
+ import { writeBundles } from "./bundle-writer.js";
9
+ export class ExperimentCoordinator {
10
+ opts;
11
+ store;
12
+ heartbeatTimer;
13
+ constructor(opts) {
14
+ this.opts = opts;
15
+ this.store = new ExpStore(opts.expDir);
16
+ }
17
+ async run() {
18
+ const replayed = await this.store.replayState();
19
+ if (replayed.isTerminal && !replayed.currentState.includes("Aborted")) {
20
+ throw new Error(`Experiment is in terminal state ${replayed.currentState}. Use --new-run to start fresh.`);
21
+ }
22
+ const mission = await this.store.readMission();
23
+ const expId = this.opts.experimentId ?? `exp_${Date.now()}`;
24
+ if (replayed.currentRound === 0) {
25
+ await this.store.initDir(mission);
26
+ }
27
+ await this.store.acquireLock();
28
+ this.heartbeatTimer = setInterval(() => { void this.store.updateHeartbeat(); }, 10_000);
29
+ // If previous run failed mid-round, retry that round (startRound = currentRound - 1)
30
+ const startRound = replayed.lastFailure && replayed.currentRound > 0
31
+ ? replayed.currentRound - 1
32
+ : replayed.currentRound;
33
+ try {
34
+ await this.runLoop(mission, startRound, expId);
35
+ }
36
+ finally {
37
+ clearInterval(this.heartbeatTimer);
38
+ await this.store.releaseLock();
39
+ }
40
+ }
41
+ async resume() {
42
+ const replayed = await this.store.replayState();
43
+ if (replayed.currentState !== "Deciding") {
44
+ throw new Error(`Cannot resume: experiment is in state ${replayed.currentState}, not Deciding. Only Deciding state supports resume.`);
45
+ }
46
+ await this.store.acquireLock();
47
+ this.heartbeatTimer = setInterval(() => { void this.store.updateHeartbeat(); }, 10_000);
48
+ try {
49
+ const mission = await this.store.readMission();
50
+ const expId = `exp_${replayed.currentRound}`;
51
+ await this.runLoop(mission, replayed.currentRound, expId);
52
+ }
53
+ finally {
54
+ clearInterval(this.heartbeatTimer);
55
+ await this.store.releaseLock();
56
+ }
57
+ }
58
+ async runLoop(mission, startRound, expId) {
59
+ const round = startRound + 1;
60
+ const maxRounds = mission.max_rounds ?? Infinity;
61
+ if (await this.checkAbort(round))
62
+ return;
63
+ // === Generating (Apply Phase) ===
64
+ await this.store.appendEvent({ type: "state_transition", from: "Deciding", to: "Generating", round });
65
+ const nextChange = mission.next_change;
66
+ if (!nextChange)
67
+ throw new Error("mission.md has no next_change — add one or let Synthesizer suggest");
68
+ const prevRounds = await this.store.readAllRounds();
69
+ // Load current candidate and apply patch
70
+ const currentCandidatePath = path.join(this.opts.expDir, mission.current_candidate.path);
71
+ const currentCandidate = yaml.load(await fs.readFile(currentCandidatePath, "utf8"));
72
+ const patched = applyPatch(currentCandidate, nextChange);
73
+ patched["candidate_version"] = `v${round}`;
74
+ const newCandidatePath = path.join(this.opts.expDir, "candidates", `candidate-v${round}.yaml`);
75
+ await fs.writeFile(newCandidatePath, yaml.dump(patched, { lineWidth: -1 }));
76
+ await this.store.appendLineage({
77
+ version: round,
78
+ candidate_path: `candidates/candidate-v${round}.yaml`,
79
+ next_change: nextChange,
80
+ status: "running",
81
+ });
82
+ if (await this.checkAbort(round))
83
+ return;
84
+ // === Executing ===
85
+ await this.store.appendEvent({ type: "state_transition", from: "Generating", to: "Executing", round });
86
+ const evalSetPaths = mission.eval_sets.map(e => path.join(this.opts.expDir, e.path));
87
+ let queryResults;
88
+ try {
89
+ const result = await this.withRetry(() => this.opts.runEval({ evalSetPaths, candidatePath: newCandidatePath, expDir: this.opts.expDir, round }), "Executing");
90
+ queryResults = result.queryResults;
91
+ }
92
+ catch {
93
+ return; // step_failed already written by withRetry
94
+ }
95
+ if (await this.checkAbort(round))
96
+ return;
97
+ // === Scoring ===
98
+ await this.store.appendEvent({ type: "state_transition", from: "Executing", to: "Scoring", round });
99
+ const guardrails = mission.guardrails ?? [];
100
+ const scores = computeScores(queryResults, guardrails);
101
+ if (scores.guardrail_hard_fail) {
102
+ await this.store.updateLineage(round, { status: "guardrail_failed" });
103
+ await this.store.writeRound(round, { round, trial_version: round, guardrail_failed: true, scores });
104
+ await this.store.appendEvent({ type: "state_transition", from: "Scoring", to: "Deciding", round });
105
+ process.stdout.write(`\nRound ${round}: Guardrail hard gate violated. Fix the candidate and run exp resume.\n`);
106
+ return;
107
+ }
108
+ await this.store.updateLineage(round, { status: "scored" });
109
+ await this.store.writeRound(round, { round, trial_version: round, scores, per_query_results: queryResults });
110
+ if (await this.checkAbort(round))
111
+ return;
112
+ // === Triaging ===
113
+ await this.store.appendEvent({ type: "state_transition", from: "Scoring", to: "Triaging", round });
114
+ const currentRoundData = (await this.store.readAllRounds()).find(r => r.round === round) ?? { round, trial_version: round };
115
+ const prevMemory = prevRounds.at(-1)?.triage_conclusion?.cross_round_memory_ref;
116
+ let triageResult;
117
+ try {
118
+ triageResult = await this.withRetry(() => this.opts.triage.triage({
119
+ currentRound: currentRoundData,
120
+ prevRounds,
121
+ candidateConfig: patched,
122
+ crossRoundMemoryRef: prevMemory,
123
+ }), "Triaging");
124
+ }
125
+ catch {
126
+ return;
127
+ }
128
+ await this.store.writeRound(round, {
129
+ triage_conclusion: {
130
+ diagnoses: triageResult.diagnoses,
131
+ hints: triageResult.hints,
132
+ verdict: triageResult.verdict,
133
+ cross_round_memory_ref: triageResult.new_memory_token,
134
+ },
135
+ });
136
+ await this.store.appendEvent({ type: "round_completed", round, verdict: triageResult.verdict });
137
+ // Generate next suggestion if continuing
138
+ if (triageResult.verdict === "continue" && round < maxRounds) {
139
+ const updatedMission = await this.store.readMission();
140
+ try {
141
+ const suggestion = await this.withRetry(() => this.opts.synthesizer.generate({
142
+ mission: updatedMission,
143
+ candidateConfig: patched,
144
+ prevRound: currentRoundData,
145
+ prevRounds,
146
+ crossRoundMemoryRef: triageResult.new_memory_token,
147
+ }), "Triaging");
148
+ await this.store.writeSuggestedChange(suggestion);
149
+ }
150
+ catch {
151
+ return;
152
+ }
153
+ }
154
+ // === Deciding ===
155
+ await this.store.appendEvent({ type: "state_transition", from: "Triaging", to: "Deciding", round });
156
+ if (triageResult.verdict === "publish" || round >= maxRounds) {
157
+ // Publish immediately
158
+ await this.store.appendEvent({ type: "state_transition", from: "Deciding", to: "Publishing", round });
159
+ const allRounds = await this.store.readAllRounds();
160
+ const allLineage = await this.store.readLineage();
161
+ await writeBundles({ expDir: this.opts.expDir, experimentId: expId, lineage: allLineage, rounds: allRounds, createdBy: process.env["USER"] ?? "unknown" });
162
+ await this.store.appendEvent({ type: "state_transition", from: "Publishing", to: "Published", round });
163
+ process.stdout.write(`\nExperiment complete. Outputs written to ${path.join(this.opts.expDir, "outputs")}\n`);
164
+ }
165
+ else {
166
+ // Pause at Deciding — lock released by run()/resume() finally block
167
+ process.stdout.write(`\nRound ${round} complete.\n`);
168
+ process.stdout.write(`Scores: outcome=${scores.outcome.toFixed(2)}, trajectory=${scores.trajectory.toFixed(2)}\n`);
169
+ process.stdout.write(`Triage: ${triageResult.diagnoses.join("; ")}\n`);
170
+ process.stdout.write(`Next suggestion written to mission.md. Review and run exp resume to continue.\n`);
171
+ }
172
+ }
173
+ async checkAbort(round) {
174
+ if (await this.store.isAborted()) {
175
+ clearInterval(this.heartbeatTimer);
176
+ await this.store.appendEvent({ type: "aborted", round, reason: "user_abort" });
177
+ await this.store.releaseLock();
178
+ return true;
179
+ }
180
+ return false;
181
+ }
182
+ async withRetry(fn, state) {
183
+ let lastErr;
184
+ for (let attempt = 0; attempt < 3; attempt++) {
185
+ try {
186
+ return await fn();
187
+ }
188
+ catch (err) {
189
+ lastErr = err;
190
+ if (attempt < 2) {
191
+ await new Promise(r => setTimeout(r, 1000 * 2 ** attempt));
192
+ }
193
+ }
194
+ }
195
+ await this.store.appendEvent({
196
+ type: "step_failed",
197
+ state: state,
198
+ error: String(lastErr),
199
+ retryable: true,
200
+ });
201
+ throw lastErr;
202
+ }
203
+ }
@@ -0,0 +1,14 @@
1
+ import type { QueryResult } from "./schemas.js";
2
+ import type { RunnerDeps } from "../eval-set/test-runner.js";
3
+ export interface EvalRunnerOpts {
4
+ evalSetPaths: string[];
5
+ candidatePath: string;
6
+ expDir: string;
7
+ round: number;
8
+ deps: RunnerDeps;
9
+ maxParallel?: number;
10
+ }
11
+ export interface EvalRunResult {
12
+ queryResults: QueryResult[];
13
+ }
14
+ export declare function runEval(opts: EvalRunnerOpts): Promise<EvalRunResult>;
@@ -0,0 +1,47 @@
1
+ // src/trace-ai/exp/eval-runner.ts
2
+ import path from "node:path";
3
+ import yaml from "js-yaml";
4
+ import fs from "node:fs/promises";
5
+ import { run as evalSetRun } from "../eval-set/test-runner.js";
6
+ export async function runEval(opts) {
7
+ const candidateRaw = yaml.load(await fs.readFile(opts.candidatePath, "utf8"));
8
+ const agentId = candidateRaw["agent_id"] ?? "candidate";
9
+ const agentVersion = candidateRaw["candidate_version"];
10
+ const roundEvalBase = path.join(opts.expDir, ".trace-state", "rounds", `round-${opts.round}-eval`);
11
+ // Run eval for each eval-set (sequentially for MVP-C single-path)
12
+ const allResults = [];
13
+ for (const evalSetDir of opts.evalSetPaths) {
14
+ // Each eval-set gets its own subdir so outputs from multiple sets don't overwrite each other
15
+ const outDir = path.join(roundEvalBase, path.basename(evalSetDir));
16
+ await fs.mkdir(outDir, { recursive: true });
17
+ await evalSetRun({
18
+ evalSetDir,
19
+ candidateAgentId: agentId,
20
+ candidateAgentVersion: agentVersion,
21
+ outDir,
22
+ maxParallel: opts.maxParallel ?? 4,
23
+ deps: opts.deps,
24
+ });
25
+ // Read report and convert to QueryResult[]
26
+ const reportPath = path.join(outDir, "report.yaml");
27
+ const report = yaml.load(await fs.readFile(reportPath, "utf8"));
28
+ for (const c of report.cases) {
29
+ allResults.push({
30
+ query_id: c.query_id,
31
+ assertion_results: c.assertion_results.map(ar => ({
32
+ type: ar.assertion.type,
33
+ verdict: ar.verdict,
34
+ reason: typeof ar.actual === "string" ? ar.actual : undefined,
35
+ })),
36
+ trajectory_summary: {
37
+ tool_call_sequence: [], // populated from trace if available
38
+ retry_count: 0,
39
+ latency_ms: c.duration_ms ?? 0,
40
+ error_codes: [],
41
+ },
42
+ raw_trace_id: c.trace_id ?? undefined,
43
+ });
44
+ }
45
+ }
46
+ return { queryResults: allResults };
47
+ }
@@ -0,0 +1,3 @@
1
+ export declare function isAborted(expDir: string): Promise<boolean>;
2
+ export declare function writeAbortSignal(expDir: string): Promise<void>;
3
+ export declare function clearAbortSignal(expDir: string): Promise<void>;
@@ -0,0 +1,27 @@
1
+ // src/trace-ai/exp/exp-store/abort-signal.ts
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ function signalPath(expDir) {
5
+ return path.join(expDir, ".trace-state", "abort.signal");
6
+ }
7
+ export async function isAborted(expDir) {
8
+ try {
9
+ await fs.access(signalPath(expDir));
10
+ return true;
11
+ }
12
+ catch {
13
+ return false;
14
+ }
15
+ }
16
+ export async function writeAbortSignal(expDir) {
17
+ await fs.writeFile(signalPath(expDir), new Date().toISOString(), "utf8");
18
+ }
19
+ export async function clearAbortSignal(expDir) {
20
+ try {
21
+ await fs.unlink(signalPath(expDir));
22
+ }
23
+ catch (err) {
24
+ if (err.code !== "ENOENT")
25
+ throw err;
26
+ }
27
+ }
@@ -0,0 +1,4 @@
1
+ import type { LineageEntry } from "../schemas.js";
2
+ export declare function appendLineage(expDir: string, entry: Omit<LineageEntry, "appended_at">): Promise<void>;
3
+ export declare function updateLineage(expDir: string, version: number, patch: Partial<LineageEntry>): Promise<void>;
4
+ export declare function readLineage(expDir: string): Promise<LineageEntry[]>;
@@ -0,0 +1,37 @@
1
+ // src/trace-ai/exp/exp-store/candidate-lineage-yaml.ts
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import yaml from "js-yaml";
5
+ function lineagePath(expDir) {
6
+ return path.join(expDir, ".trace-state", "candidate-lineage.yaml");
7
+ }
8
+ export async function appendLineage(expDir, entry) {
9
+ const p = lineagePath(expDir);
10
+ let entries = [];
11
+ try {
12
+ entries = yaml.load(await fs.readFile(p, "utf8")) ?? [];
13
+ }
14
+ catch { }
15
+ entries.push({ ...entry, appended_at: new Date().toISOString() });
16
+ await fs.writeFile(p, yaml.dump(entries, { lineWidth: -1 }), "utf8");
17
+ }
18
+ export async function updateLineage(expDir, version, patch) {
19
+ const p = lineagePath(expDir);
20
+ let entries = [];
21
+ try {
22
+ entries = yaml.load(await fs.readFile(p, "utf8")) ?? [];
23
+ }
24
+ catch { }
25
+ const idx = entries.findIndex(e => e.version === version);
26
+ if (idx >= 0)
27
+ Object.assign(entries[idx], patch);
28
+ await fs.writeFile(p, yaml.dump(entries, { lineWidth: -1 }), "utf8");
29
+ }
30
+ export async function readLineage(expDir) {
31
+ try {
32
+ return yaml.load(await fs.readFile(lineagePath(expDir), "utf8")) ?? [];
33
+ }
34
+ catch {
35
+ return [];
36
+ }
37
+ }
@@ -0,0 +1,17 @@
1
+ import type { ExpEvent, ExpFsmState } from "../schemas.js";
2
+ export type EventInput = ExpEvent extends infer T ? T extends {
3
+ ts: string;
4
+ } ? Omit<T, "ts"> : never : never;
5
+ export declare function appendEvent(expDir: string, event: EventInput): Promise<void>;
6
+ export interface ReplayedState {
7
+ currentState: ExpFsmState;
8
+ currentRound: number;
9
+ lastEvent: ExpEvent | null;
10
+ lastFailure: {
11
+ state: ExpFsmState;
12
+ error: string;
13
+ retryable: boolean;
14
+ } | null;
15
+ isTerminal: boolean;
16
+ }
17
+ export declare function replayState(expDir: string): Promise<ReplayedState>;
@@ -0,0 +1,60 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ export async function appendEvent(expDir, event) {
4
+ const filePath = path.join(expDir, ".trace-state", "events.jsonl");
5
+ const line = JSON.stringify({ ts: new Date().toISOString(), ...event }) + "\n";
6
+ await fs.appendFile(filePath, line, "utf8");
7
+ }
8
+ const TERMINAL = new Set(["Published", "Aborted"]);
9
+ export async function replayState(expDir) {
10
+ const filePath = path.join(expDir, ".trace-state", "events.jsonl");
11
+ let raw;
12
+ try {
13
+ raw = await fs.readFile(filePath, "utf8");
14
+ }
15
+ catch {
16
+ return { currentState: "Init", currentRound: 0, lastEvent: null, lastFailure: null, isTerminal: false };
17
+ }
18
+ const lines = raw.split("\n").filter(Boolean);
19
+ if (lines.length === 0) {
20
+ return { currentState: "Init", currentRound: 0, lastEvent: null, lastFailure: null, isTerminal: false };
21
+ }
22
+ let currentState = "Init";
23
+ let currentRound = 0;
24
+ let lastEvent = null;
25
+ let lastFailure = null;
26
+ for (const line of lines) {
27
+ let ev;
28
+ try {
29
+ ev = JSON.parse(line);
30
+ }
31
+ catch {
32
+ // Skip malformed lines (e.g. from a crash mid-write) rather than bricking replay.
33
+ process.stderr.write(`[warn] events.jsonl: skipping malformed line: ${line.slice(0, 120)}\n`);
34
+ continue;
35
+ }
36
+ lastEvent = ev;
37
+ if (ev.type === "state_transition") {
38
+ currentState = ev.to;
39
+ currentRound = ev.round;
40
+ lastFailure = null;
41
+ }
42
+ else if (ev.type === "step_failed") {
43
+ currentState = ev.state;
44
+ lastFailure = { state: ev.state, error: ev.error, retryable: ev.retryable };
45
+ }
46
+ else if (ev.type === "aborted") {
47
+ currentState = "Aborted";
48
+ }
49
+ else if (ev.type === "round_completed") {
50
+ currentRound = ev.round;
51
+ }
52
+ }
53
+ return {
54
+ currentState,
55
+ currentRound,
56
+ lastEvent,
57
+ lastFailure,
58
+ isTerminal: TERMINAL.has(currentState),
59
+ };
60
+ }
@@ -0,0 +1,6 @@
1
+ export interface RegistryEntry {
2
+ path: string;
3
+ last_active_ts: string;
4
+ }
5
+ export declare function upsertRegistry(absPath: string, ts: string): Promise<void>;
6
+ export declare function listRegistry(): Promise<RegistryEntry[]>;
@@ -0,0 +1,41 @@
1
+ // src/trace-ai/exp/exp-store/exp-registry.ts
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import { getConfigDir } from "../../../config/store.js";
5
+ function registryFilePath() {
6
+ return path.join(getConfigDir(), "exp-registry.json");
7
+ }
8
+ async function readRegistry() {
9
+ try {
10
+ const raw = await fs.readFile(registryFilePath(), "utf8");
11
+ const parsed = JSON.parse(raw);
12
+ if (!Array.isArray(parsed.entries))
13
+ return { schema_version: "exp-registry/v1", entries: [] };
14
+ return parsed;
15
+ }
16
+ catch {
17
+ return { schema_version: "exp-registry/v1", entries: [] };
18
+ }
19
+ }
20
+ export async function upsertRegistry(absPath, ts) {
21
+ try {
22
+ const reg = await readRegistry();
23
+ const idx = reg.entries.findIndex((e) => e.path === absPath);
24
+ if (idx >= 0) {
25
+ reg.entries[idx].last_active_ts = ts;
26
+ }
27
+ else {
28
+ reg.entries.push({ path: absPath, last_active_ts: ts });
29
+ }
30
+ const filePath = registryFilePath();
31
+ await fs.mkdir(path.dirname(filePath), { recursive: true });
32
+ await fs.writeFile(filePath, JSON.stringify(reg, null, 2) + "\n", "utf8");
33
+ }
34
+ catch (e) {
35
+ process.stderr.write(`warn: exp-registry write failed: ${e instanceof Error ? e.message : String(e)}\n`);
36
+ }
37
+ }
38
+ export async function listRegistry() {
39
+ const reg = await readRegistry();
40
+ return [...reg.entries].sort((a, b) => b.last_active_ts.localeCompare(a.last_active_ts));
41
+ }
@@ -0,0 +1,46 @@
1
+ import type { LineageEntry, Mission, NextChange, RoundData } from "../schemas.js";
2
+ import { type ReplayedState, type EventInput } from "./events-jsonl.js";
3
+ export { type ReplayedState };
4
+ export declare class ExpStore {
5
+ readonly expDir: string;
6
+ constructor(expDir: string);
7
+ initDir(mission: Mission): Promise<string>;
8
+ archiveState(): Promise<void>;
9
+ readMission: () => Promise<{
10
+ schema_version: "trace-mission/v1";
11
+ goal: string;
12
+ eval_sets: {
13
+ path: string;
14
+ role: "seed" | "regression" | "holdout";
15
+ }[];
16
+ current_candidate: {
17
+ path: string;
18
+ };
19
+ max_rounds?: number | undefined;
20
+ provider?: string | undefined;
21
+ next_change?: {
22
+ target: string;
23
+ hypothesis: string;
24
+ patch: string;
25
+ } | undefined;
26
+ guardrails?: {
27
+ name: string;
28
+ kind: "hard" | "soft";
29
+ rule: string;
30
+ }[] | undefined;
31
+ }>;
32
+ writeSuggestedChange: (c: NextChange) => Promise<void>;
33
+ appendEvent: (e: EventInput) => Promise<void>;
34
+ replayState: () => Promise<ReplayedState>;
35
+ acquireLock: () => Promise<void>;
36
+ releaseLock: () => Promise<void>;
37
+ updateHeartbeat: () => Promise<void>;
38
+ isAborted: () => Promise<boolean>;
39
+ writeAbortSignal: () => Promise<void>;
40
+ clearAbortSignal: () => Promise<void>;
41
+ writeRound: (n: number, data: Partial<RoundData>) => Promise<void>;
42
+ readAllRounds: () => Promise<RoundData[]>;
43
+ appendLineage: (e: Omit<LineageEntry, "appended_at">) => Promise<void>;
44
+ updateLineage: (v: number, p: Partial<LineageEntry>) => Promise<void>;
45
+ readLineage: () => Promise<LineageEntry[]>;
46
+ }
@@ -0,0 +1,59 @@
1
+ // src/trace-ai/exp/exp-store/index.ts
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import crypto from "node:crypto";
5
+ import { readMission, writeSuggestedChange } from "./mission-md.js";
6
+ import { appendEvent, replayState } from "./events-jsonl.js";
7
+ import { acquireLock, releaseLock, updateHeartbeat } from "./lock.js";
8
+ import { isAborted, writeAbortSignal, clearAbortSignal } from "./abort-signal.js";
9
+ import { writeRound, readAllRounds } from "./round-yaml.js";
10
+ import { appendLineage, updateLineage, readLineage } from "./candidate-lineage-yaml.js";
11
+ import { renderReadme } from "./readme-template.js";
12
+ export class ExpStore {
13
+ expDir;
14
+ constructor(expDir) {
15
+ this.expDir = expDir;
16
+ }
17
+ async initDir(mission) {
18
+ const experimentId = `exp_${crypto.randomBytes(4).toString("hex")}`;
19
+ await fs.mkdir(path.join(this.expDir, ".trace-state", "rounds"), { recursive: true });
20
+ await fs.mkdir(path.join(this.expDir, "candidates"), { recursive: true });
21
+ await fs.mkdir(path.join(this.expDir, "eval-sets"), { recursive: true });
22
+ await fs.mkdir(path.join(this.expDir, "outputs"), { recursive: true });
23
+ await fs.writeFile(path.join(this.expDir, ".trace-state", "events.jsonl"), "", { flag: "wx" }).catch(() => { }); // already exists ok
24
+ const readmePath = path.join(this.expDir, "README.md");
25
+ try {
26
+ await fs.access(readmePath);
27
+ }
28
+ catch {
29
+ await fs.writeFile(readmePath, renderReadme({
30
+ experimentId,
31
+ timestamp: new Date().toISOString(),
32
+ goal: mission.goal,
33
+ }));
34
+ }
35
+ return experimentId;
36
+ }
37
+ async archiveState() {
38
+ const src = path.join(this.expDir, ".trace-state");
39
+ const dst = path.join(this.expDir, `.trace-state-archived-${Date.now()}`);
40
+ await fs.rename(src, dst);
41
+ await fs.mkdir(path.join(this.expDir, ".trace-state", "rounds"), { recursive: true });
42
+ await fs.writeFile(path.join(this.expDir, ".trace-state", "events.jsonl"), "");
43
+ }
44
+ readMission = () => readMission(this.expDir);
45
+ writeSuggestedChange = (c) => writeSuggestedChange(this.expDir, c);
46
+ appendEvent = (e) => appendEvent(this.expDir, e);
47
+ replayState = () => replayState(this.expDir);
48
+ acquireLock = () => acquireLock(this.expDir);
49
+ releaseLock = () => releaseLock(this.expDir);
50
+ updateHeartbeat = () => updateHeartbeat(this.expDir);
51
+ isAborted = () => isAborted(this.expDir);
52
+ writeAbortSignal = () => writeAbortSignal(this.expDir);
53
+ clearAbortSignal = () => clearAbortSignal(this.expDir);
54
+ writeRound = (n, data) => writeRound(this.expDir, n, data);
55
+ readAllRounds = () => readAllRounds(this.expDir);
56
+ appendLineage = (e) => appendLineage(this.expDir, e);
57
+ updateLineage = (v, p) => updateLineage(this.expDir, v, p);
58
+ readLineage = () => readLineage(this.expDir);
59
+ }
@@ -0,0 +1,3 @@
1
+ export declare function acquireLock(expDir: string): Promise<void>;
2
+ export declare function releaseLock(expDir: string): Promise<void>;
3
+ export declare function updateHeartbeat(expDir: string): Promise<void>;
@@ -0,0 +1,73 @@
1
+ import fs from "node:fs/promises";
2
+ import os from "node:os";
3
+ import path from "node:path";
4
+ const STALE_THRESHOLD_MS = 30_000;
5
+ function lockPath(expDir) {
6
+ return path.join(expDir, ".trace-state", "lock.json");
7
+ }
8
+ export async function acquireLock(expDir) {
9
+ const p = lockPath(expDir);
10
+ const lock = {
11
+ hostname: os.hostname(),
12
+ pid: process.pid,
13
+ started_at: new Date().toISOString(),
14
+ last_heartbeat_ts: new Date().toISOString(),
15
+ };
16
+ const data = JSON.stringify(lock, null, 2);
17
+ // O_EXCL: atomic create — fails with EEXIST if a lock file already exists.
18
+ try {
19
+ await fs.writeFile(p, data, { encoding: "utf8", flag: "wx" });
20
+ return;
21
+ }
22
+ catch (err) {
23
+ if (err.code !== "EEXIST")
24
+ throw err;
25
+ }
26
+ // Lock file exists — check freshness.
27
+ let existing;
28
+ try {
29
+ existing = JSON.parse(await fs.readFile(p, "utf8"));
30
+ }
31
+ catch {
32
+ // Unreadable lock (e.g. partial write) — treat as stale.
33
+ existing = { hostname: "", pid: 0, started_at: "", last_heartbeat_ts: new Date(0).toISOString() };
34
+ }
35
+ const age = Date.now() - new Date(existing.last_heartbeat_ts).getTime();
36
+ if (age < STALE_THRESHOLD_MS) {
37
+ throw new Error(`Experiment is locked by pid ${existing.pid} on ${existing.hostname} (heartbeat ${Math.floor(age / 1000)}s ago). Use exp resume or wait.`);
38
+ }
39
+ // Stale — unlink then retry O_EXCL. If another process beats us here, we'll
40
+ // get EEXIST again and throw a clear error rather than silently overwriting.
41
+ await fs.unlink(p).catch(() => { });
42
+ try {
43
+ await fs.writeFile(p, data, { encoding: "utf8", flag: "wx" });
44
+ }
45
+ catch (err) {
46
+ if (err.code === "EEXIST") {
47
+ throw new Error("Lock acquired by another process during stale recovery. Try again.");
48
+ }
49
+ throw err;
50
+ }
51
+ }
52
+ export async function releaseLock(expDir) {
53
+ try {
54
+ await fs.unlink(lockPath(expDir));
55
+ }
56
+ catch (err) {
57
+ if (err.code !== "ENOENT")
58
+ throw err;
59
+ }
60
+ }
61
+ export async function updateHeartbeat(expDir) {
62
+ const p = lockPath(expDir);
63
+ try {
64
+ const raw = await fs.readFile(p, "utf8");
65
+ const lock = JSON.parse(raw);
66
+ lock.last_heartbeat_ts = new Date().toISOString();
67
+ await fs.writeFile(p, JSON.stringify(lock, null, 2), "utf8");
68
+ }
69
+ catch (err) {
70
+ if (err.code !== "ENOENT")
71
+ throw err;
72
+ }
73
+ }
@@ -0,0 +1,3 @@
1
+ import { type Mission, type NextChange } from "../schemas.js";
2
+ export declare function readMission(expDir: string): Promise<Mission>;
3
+ export declare function writeSuggestedChange(expDir: string, change: NextChange): Promise<void>;