@kweaver-ai/kweaver-sdk 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +40 -52
  2. package/README.zh.md +41 -46
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/context-loader.d.ts +1 -0
  19. package/dist/api/resources.d.ts +94 -0
  20. package/dist/api/resources.js +166 -0
  21. package/dist/api/semantic-search.d.ts +5 -0
  22. package/dist/api/semantic-search.js +5 -0
  23. package/dist/api/skills.d.ts +75 -2
  24. package/dist/api/skills.js +108 -12
  25. package/dist/api/trace.d.ts +5 -0
  26. package/dist/api/trace.js +4 -0
  27. package/dist/cli.js +109 -15
  28. package/dist/client.d.ts +3 -3
  29. package/dist/client.js +5 -5
  30. package/dist/commands/agent/mode.d.ts +6 -0
  31. package/dist/commands/agent/mode.js +75 -0
  32. package/dist/commands/agent-members.js +27 -11
  33. package/dist/commands/agent.js +469 -286
  34. package/dist/commands/auth.js +184 -71
  35. package/dist/commands/bkn-metric.js +37 -16
  36. package/dist/commands/bkn-ops.js +164 -86
  37. package/dist/commands/bkn-query.js +99 -31
  38. package/dist/commands/bkn-schema.d.ts +3 -3
  39. package/dist/commands/bkn-schema.js +127 -86
  40. package/dist/commands/bkn.js +153 -114
  41. package/dist/commands/call.js +23 -13
  42. package/dist/commands/config.js +22 -12
  43. package/dist/commands/context-loader.js +625 -49
  44. package/dist/commands/dataflow.js +14 -6
  45. package/dist/commands/ds.js +52 -30
  46. package/dist/commands/explore.js +18 -15
  47. package/dist/commands/model.js +53 -42
  48. package/dist/commands/resource.d.ts +1 -0
  49. package/dist/commands/{dataview.js → resource.js} +62 -84
  50. package/dist/commands/skill.d.ts +21 -1
  51. package/dist/commands/skill.js +567 -43
  52. package/dist/commands/token.js +11 -0
  53. package/dist/commands/tool.js +46 -29
  54. package/dist/commands/toolbox.js +31 -15
  55. package/dist/commands/trace.d.ts +26 -1
  56. package/dist/commands/trace.js +515 -15
  57. package/dist/commands/vega.js +466 -250
  58. package/dist/help/format.d.ts +65 -0
  59. package/dist/help/format.js +141 -0
  60. package/dist/index.d.ts +5 -5
  61. package/dist/index.js +3 -3
  62. package/dist/resources/bkn.d.ts +5 -0
  63. package/dist/resources/bkn.js +5 -0
  64. package/dist/resources/{dataviews.d.ts → resources.d.ts} +10 -11
  65. package/dist/resources/{dataviews.js → resources.js} +12 -13
  66. package/dist/resources/skills.d.ts +17 -1
  67. package/dist/resources/skills.js +32 -1
  68. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  69. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  70. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  71. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  72. package/dist/trace-ai/diagnose/index.js +246 -0
  73. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  74. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  75. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  76. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  77. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  78. package/dist/{trace-core → trace-ai}/diagnose/report-assembler.js +19 -9
  79. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  80. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  81. package/dist/{trace-core → trace-ai}/diagnose/rule-loader.js +42 -8
  82. package/dist/{trace-core → trace-ai}/diagnose/schemas.d.ts +77 -2
  83. package/dist/trace-ai/diagnose/schemas.js +154 -0
  84. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  85. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  86. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  87. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  88. package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.js +1 -0
  89. package/dist/{trace-core → trace-ai}/diagnose/types.d.ts +55 -6
  90. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  91. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  92. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  93. package/dist/trace-ai/eval-set/builder.js +126 -0
  94. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  95. package/dist/trace-ai/eval-set/index.js +10 -0
  96. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  97. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  98. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  99. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  100. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  101. package/dist/trace-ai/eval-set/redactor.js +133 -0
  102. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  103. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  104. package/dist/trace-ai/eval-set/schemas.js +130 -0
  105. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  106. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  107. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  108. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  109. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  110. package/dist/trace-ai/eval-set/types.js +8 -0
  111. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  112. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  113. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  114. package/dist/trace-ai/exp/claude-binary.js +30 -0
  115. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  116. package/dist/trace-ai/exp/coordinator.js +203 -0
  117. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  118. package/dist/trace-ai/exp/eval-runner.js +47 -0
  119. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  120. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  121. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  122. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  123. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  124. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  125. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  126. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  127. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  128. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  129. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  130. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  131. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  132. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  133. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  134. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  135. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  136. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  137. package/dist/trace-ai/exp/index.d.ts +8 -0
  138. package/dist/trace-ai/exp/index.js +238 -0
  139. package/dist/trace-ai/exp/info.d.ts +35 -0
  140. package/dist/trace-ai/exp/info.js +120 -0
  141. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  142. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  143. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  144. package/dist/trace-ai/exp/patch/index.js +13 -0
  145. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  146. package/dist/trace-ai/exp/patch/skill.js +24 -0
  147. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  148. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  149. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  150. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  151. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  152. package/dist/trace-ai/exp/schemas.js +50 -0
  153. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  154. package/dist/trace-ai/exp/scoring.js +46 -0
  155. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  156. package/dist/trace-ai/scan/aggregator.js +26 -0
  157. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  158. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  159. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  160. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  161. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  162. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  163. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  164. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  165. package/dist/trace-ai/scan/index.d.ts +31 -0
  166. package/dist/trace-ai/scan/index.js +390 -0
  167. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  168. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  169. package/dist/trace-ai/scan/runner.d.ts +25 -0
  170. package/dist/trace-ai/scan/runner.js +42 -0
  171. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  172. package/dist/trace-ai/scan/sampler.js +81 -0
  173. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  174. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  175. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  176. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  177. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  178. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  179. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  180. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  181. package/package.json +2 -2
  182. package/dist/api/dataviews.d.ts +0 -117
  183. package/dist/api/dataviews.js +0 -265
  184. package/dist/commands/dataview.d.ts +0 -8
  185. package/dist/trace-core/diagnose/index.d.ts +0 -9
  186. package/dist/trace-core/diagnose/index.js +0 -104
  187. package/dist/trace-core/diagnose/report-assembler.d.ts +0 -12
  188. package/dist/trace-core/diagnose/schemas.js +0 -94
  189. package/dist/trace-core/diagnose/signal-probe.d.ts +0 -5
  190. package/dist/trace-core/diagnose/signal-probe.js +0 -21
  191. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +0 -0
  192. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +0 -0
  193. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +0 -0
  194. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +0 -0
  195. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.js +0 -0
  196. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +0 -0
  197. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.d.ts +0 -0
  198. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.js +0 -0
  199. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +0 -0
  200. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.js +0 -0
  201. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +0 -0
  202. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.d.ts +0 -0
  203. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.js +0 -0
  204. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.yaml +0 -0
  205. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +0 -0
  206. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.js +0 -0
  207. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.yaml +0 -0
  208. /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.d.ts +0 -0
  209. /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.js +0 -0
  210. /package/dist/{trace-core → trace-ai}/diagnose/rule-loader.d.ts +0 -0
  211. /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.d.ts +0 -0
  212. /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.js +0 -0
  213. /package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.d.ts +0 -0
  214. /package/dist/{trace-core → trace-ai}/diagnose/types.js +0 -0
@@ -0,0 +1,238 @@
1
+ // src/trace-ai/exp/index.ts
2
+ import path from "node:path";
3
+ import fs from "node:fs/promises";
4
+ import { fileURLToPath } from "node:url";
5
+ import { ExpStore } from "./exp-store/index.js";
6
+ import { ExperimentCoordinator } from "./coordinator.js";
7
+ import { ClaudeCodeSynthesizer } from "./providers/synthesizer-client.js";
8
+ import { ClaudeCodeTriageClient } from "./providers/triage-client.js";
9
+ import { runEval } from "./eval-runner.js";
10
+ import { defaultRegistry } from "../../agent-providers/registry.js";
11
+ import { ClaudeCodeSubprocessProvider } from "../../agent-providers/providers/claude-code-subprocess.js";
12
+ import { PromptTemplateRegistry } from "../../agent-providers/prompt-template.js";
13
+ import { createBuiltinSemanticMatchProvider } from "../eval-set/semantic-match-provider.js";
14
+ import { ensureValidToken } from "../../auth/oauth.js";
15
+ import { fetchAgentInfo, sendChatRequest } from "../../api/agent-chat.js";
16
+ import { getTracesByConversation } from "../../api/conversations.js";
17
+ import { upsertRegistry, listRegistry } from "./exp-store/exp-registry.js";
18
+ import { runInfo, runList, getHealthChecks } from "./info.js";
19
+ import { resolveClaudeBinary } from "./claude-binary.js";
20
+ const __expIndexDir = path.dirname(fileURLToPath(import.meta.url));
21
+ const EVAL_SET_RUBRIC_DIR = path.join(__expIndexDir, "..", "eval-set", "rubric-templates");
22
+ function ensureProvider() {
23
+ if (!defaultRegistry.has("claude-code")) {
24
+ defaultRegistry.register(new ClaudeCodeSubprocessProvider({
25
+ binary: resolveClaudeBinary(),
26
+ defaultTimeoutMs: 120_000,
27
+ }), { setAsDefault: true });
28
+ }
29
+ }
30
+ export function parseExpArgs(argv) {
31
+ const [sub, dir, ...flags] = argv;
32
+ const validSubs = ["run", "resume", "show", "status", "abort", "doctor", "list", "info"];
33
+ if (!validSubs.includes(sub)) {
34
+ throw new Error(`Unknown exp subcommand: ${sub}. Use: ${validSubs.join(", ")}`);
35
+ }
36
+ const isDiscoveryCmd = sub === "list" || sub === "info";
37
+ const expDir = isDiscoveryCmd
38
+ ? (dir ? path.resolve(dir) : "")
39
+ : path.resolve(dir ?? ".");
40
+ return {
41
+ subcommand: sub,
42
+ expDir,
43
+ newRun: flags.includes("--new-run"),
44
+ json: flags.includes("--json"),
45
+ };
46
+ }
47
+ export async function runExpCommand(argv) {
48
+ const args = parseExpArgs(argv);
49
+ switch (args.subcommand) {
50
+ case "list": {
51
+ if (args.expDir) {
52
+ await runList([{ path: args.expDir, last_active_ts: new Date().toISOString() }]);
53
+ }
54
+ else {
55
+ const entries = await listRegistry();
56
+ await runList(entries);
57
+ }
58
+ return 0;
59
+ }
60
+ case "info": {
61
+ let expDir = args.expDir;
62
+ if (!expDir) {
63
+ const entries = await listRegistry();
64
+ if (entries.length === 0) {
65
+ process.stderr.write("Error: no experiments in registry. Run 'trace exp run <dir>' first, or provide a path: trace exp info <dir>\n");
66
+ return 1;
67
+ }
68
+ expDir = entries[0].path;
69
+ process.stderr.write(`Using most recent: ${expDir}\n`);
70
+ }
71
+ await runInfo(expDir, { json: args.json });
72
+ return 0;
73
+ }
74
+ case "run": {
75
+ ensureProvider();
76
+ const store = new ExpStore(args.expDir);
77
+ const replayed = await store.replayState();
78
+ if (!replayed.isTerminal && replayed.currentRound > 0 && !replayed.lastFailure) {
79
+ process.stderr.write(`Error: experiment in progress (state: ${replayed.currentState}). Use exp resume.\n`);
80
+ return 2;
81
+ }
82
+ if (replayed.isTerminal && !args.newRun) {
83
+ process.stderr.write(`Error: experiment already in terminal state ${replayed.currentState}. Use --new-run to start fresh.\n`);
84
+ return 2;
85
+ }
86
+ if (replayed.isTerminal && args.newRun) {
87
+ await store.archiveState();
88
+ }
89
+ await upsertRegistry(args.expDir, new Date().toISOString());
90
+ const coord = await makeCoordinator(args.expDir);
91
+ await coord.run();
92
+ return 0;
93
+ }
94
+ case "resume": {
95
+ ensureProvider();
96
+ const store = new ExpStore(args.expDir);
97
+ const replayed = await store.replayState();
98
+ if (replayed.currentState !== "Deciding") {
99
+ process.stderr.write(`Error: cannot resume — experiment is in state ${replayed.currentState}. Only Deciding state supports resume.\n`);
100
+ return 2;
101
+ }
102
+ await upsertRegistry(args.expDir, new Date().toISOString());
103
+ const coord = await makeCoordinator(args.expDir);
104
+ await coord.resume();
105
+ return 0;
106
+ }
107
+ case "show": {
108
+ const store = new ExpStore(args.expDir);
109
+ const replayed = await store.replayState();
110
+ const rounds = await store.readAllRounds();
111
+ const lineage = await store.readLineage();
112
+ const mission = await store.readMission().catch(() => null);
113
+ process.stdout.write(`State: ${replayed.currentState} Round: ${replayed.currentRound}\n`);
114
+ if (mission?.next_change) {
115
+ process.stdout.write(`Suggested next change:\n target: ${mission.next_change.target}\n hypothesis: ${mission.next_change.hypothesis}\n`);
116
+ }
117
+ if (rounds.length > 0) {
118
+ const last = rounds[rounds.length - 1];
119
+ process.stdout.write(`Last round scores: outcome=${last.scores?.outcome.toFixed(2) ?? "?"}, trajectory=${last.scores?.trajectory.toFixed(2) ?? "?"}\n`);
120
+ if (last.triage_conclusion) {
121
+ process.stdout.write(`Triage: ${last.triage_conclusion.diagnoses.join("; ")}\n`);
122
+ }
123
+ }
124
+ process.stdout.write(`Lineage: ${lineage.length} versions\n`);
125
+ return 0;
126
+ }
127
+ case "status": {
128
+ const store = new ExpStore(args.expDir);
129
+ const replayed = await store.replayState();
130
+ process.stdout.write(`${args.expDir}: ${replayed.currentState} (round ${replayed.currentRound})\n`);
131
+ return 0;
132
+ }
133
+ case "abort": {
134
+ const store = new ExpStore(args.expDir);
135
+ await store.writeAbortSignal();
136
+ process.stdout.write(`Abort signal written. Running process will stop at next checkpoint.\n`);
137
+ return 0;
138
+ }
139
+ case "doctor": {
140
+ const store = new ExpStore(args.expDir);
141
+ return runDoctor(args.expDir, store);
142
+ }
143
+ }
144
+ }
145
+ async function runDoctor(expDir, store) {
146
+ let ok = true;
147
+ const check = (label, pass, msg) => {
148
+ process.stdout.write(`${pass ? "✓" : "✗"} ${label}${pass ? "" : `: ${msg}`}\n`);
149
+ if (!pass)
150
+ ok = false;
151
+ };
152
+ try {
153
+ const mission = await store.readMission();
154
+ check("mission.md valid", true, "");
155
+ for (const es of mission.eval_sets) {
156
+ const esPath = path.join(expDir, es.path);
157
+ try {
158
+ await fs.access(esPath);
159
+ check(`eval_set ${es.path}`, true, "");
160
+ }
161
+ catch {
162
+ check(`eval_set ${es.path}`, false, `not found: ${esPath}`);
163
+ }
164
+ }
165
+ const candPath = path.join(expDir, mission.current_candidate.path);
166
+ try {
167
+ await fs.access(candPath);
168
+ check("current_candidate readable", true, "");
169
+ }
170
+ catch {
171
+ check("current_candidate readable", false, `not found: ${candPath}`);
172
+ }
173
+ }
174
+ catch (e) {
175
+ check("mission.md valid", false, String(e));
176
+ }
177
+ const health = await getHealthChecks(expDir);
178
+ check("claude-code provider available", health.provider_available, "run: npx @anthropic-ai/claude-code --version");
179
+ check("no step_failed in events", health.no_step_failed, "step_failed found in events.jsonl");
180
+ return ok ? 0 : 1;
181
+ }
182
+ async function makeCoordinator(expDir) {
183
+ let baseUrl = process.env["KWEAVER_BASE_URL"] ?? "";
184
+ let token = process.env["KWEAVER_TOKEN"] ?? "";
185
+ const bd = process.env["KWEAVER_BUSINESS_DOMAIN"] ?? "bd_public";
186
+ if (!baseUrl || !token) {
187
+ const t = await ensureValidToken();
188
+ if (!baseUrl)
189
+ baseUrl = t.baseUrl;
190
+ if (!token)
191
+ token = t.accessToken;
192
+ }
193
+ let semanticMatchProvider;
194
+ try {
195
+ const provider = defaultRegistry.resolve({ requiredCapabilities: ["structured_output"] });
196
+ if (provider && (await provider.isAvailable())) {
197
+ const promptRegistry = new PromptTemplateRegistry();
198
+ await promptRegistry.loadBuiltinDir(EVAL_SET_RUBRIC_DIR);
199
+ semanticMatchProvider = createBuiltinSemanticMatchProvider({ provider, promptRegistry, lang: "zh" });
200
+ }
201
+ }
202
+ catch {
203
+ process.stderr.write("warn: could not create semantic-match provider — semantic_match assertions will be skipped\n");
204
+ }
205
+ return new ExperimentCoordinator({
206
+ expDir,
207
+ synthesizer: new ClaudeCodeSynthesizer(),
208
+ triage: new ClaudeCodeTriageClient(),
209
+ runEval: ({ evalSetPaths, candidatePath, round }) => runEval({
210
+ evalSetPaths,
211
+ candidatePath,
212
+ expDir,
213
+ round,
214
+ maxParallel: 2,
215
+ deps: {
216
+ fetchAgent: async (agentId) => fetchAgentInfo({ baseUrl, accessToken: token, agentId, version: "latest", businessDomain: bd }),
217
+ sendChat: async ({ agentInfo, query }) => {
218
+ const result = await sendChatRequest({
219
+ baseUrl,
220
+ accessToken: token,
221
+ agentId: agentInfo.id,
222
+ agentKey: agentInfo.key,
223
+ agentVersion: agentInfo.version,
224
+ query,
225
+ stream: true,
226
+ businessDomain: bd,
227
+ });
228
+ return { text: result.text, conversationId: result.conversationId };
229
+ },
230
+ fetchTrace: async (conversationId) => {
231
+ const r = await getTracesByConversation({ baseUrl, accessToken: token, conversationId, businessDomain: bd });
232
+ return { spans: r.spans };
233
+ },
234
+ semanticMatchProvider,
235
+ },
236
+ }),
237
+ });
238
+ }
@@ -0,0 +1,35 @@
1
+ import type { ThreeAxisScores } from "./schemas.js";
2
+ export interface HealthChecks {
3
+ mission_valid: boolean;
4
+ eval_set_valid: boolean;
5
+ candidate_readable: boolean;
6
+ provider_available: boolean;
7
+ no_step_failed: boolean;
8
+ }
9
+ export interface ExpSnapshot {
10
+ workspace: string;
11
+ state: string;
12
+ round: number;
13
+ scores: ThreeAxisScores | null;
14
+ triage_summary: string | null;
15
+ suggested_next: {
16
+ target: string;
17
+ hypothesis: string;
18
+ } | null;
19
+ lineage_versions: number;
20
+ health: HealthChecks;
21
+ }
22
+ export declare function getHealthChecks(expDir: string): Promise<HealthChecks>;
23
+ export declare function buildExpSnapshot(expDir: string): Promise<ExpSnapshot>;
24
+ export declare function formatSnapshotYaml(snap: ExpSnapshot): string;
25
+ export declare function formatSnapshotTableRow(entry: {
26
+ path: string;
27
+ last_active_ts: string;
28
+ }, snap: ExpSnapshot | null): string;
29
+ export declare function runInfo(expDir: string, opts?: {
30
+ json?: boolean;
31
+ }): Promise<void>;
32
+ export declare function runList(registryEntries: Array<{
33
+ path: string;
34
+ last_active_ts: string;
35
+ }>): Promise<void>;
@@ -0,0 +1,120 @@
1
+ // src/trace-ai/exp/info.ts
2
+ import path from "node:path";
3
+ import fs from "node:fs/promises";
4
+ import yaml from "js-yaml";
5
+ import { ExpStore } from "./exp-store/index.js";
6
+ import { defaultRegistry } from "../../agent-providers/registry.js";
7
+ export async function getHealthChecks(expDir) {
8
+ const store = new ExpStore(expDir);
9
+ let mission_valid = false;
10
+ let eval_set_valid = false;
11
+ let candidate_readable = false;
12
+ try {
13
+ const mission = await store.readMission();
14
+ mission_valid = true;
15
+ let allEvalSetsOk = true;
16
+ for (const es of mission.eval_sets) {
17
+ try {
18
+ await fs.access(path.join(expDir, es.path));
19
+ }
20
+ catch {
21
+ allEvalSetsOk = false;
22
+ }
23
+ }
24
+ eval_set_valid = allEvalSetsOk;
25
+ try {
26
+ await fs.access(path.join(expDir, mission.current_candidate.path));
27
+ candidate_readable = true;
28
+ }
29
+ catch {
30
+ candidate_readable = false;
31
+ }
32
+ }
33
+ catch { /* mission_valid stays false */ }
34
+ let provider_available = false;
35
+ try {
36
+ provider_available = defaultRegistry.resolve({ preferred: "claude-code" }) !== null;
37
+ }
38
+ catch {
39
+ provider_available = false;
40
+ }
41
+ const replayed = await store.replayState();
42
+ const no_step_failed = replayed.lastFailure === null;
43
+ return { mission_valid, eval_set_valid, candidate_readable, provider_available, no_step_failed };
44
+ }
45
+ export async function buildExpSnapshot(expDir) {
46
+ // Throw early if the experiment directory doesn't exist, so callers (e.g.
47
+ // runList) can catch and render a "(missing)" row instead of returning a
48
+ // phantom "Init" snapshot for a non-existent path.
49
+ await fs.access(expDir);
50
+ const store = new ExpStore(expDir);
51
+ const replayed = await store.replayState();
52
+ const rounds = await store.readAllRounds();
53
+ const lineage = await store.readLineage();
54
+ const mission = await store.readMission().catch(() => null);
55
+ const health = await getHealthChecks(expDir);
56
+ const lastRound = rounds.length > 0 ? rounds[rounds.length - 1] : null;
57
+ const scores = lastRound?.scores ?? null;
58
+ const triage_summary = lastRound?.triage_conclusion?.diagnoses.join("; ") ?? null;
59
+ const suggested_next = mission?.next_change
60
+ ? { target: mission.next_change.target, hypothesis: mission.next_change.hypothesis }
61
+ : null;
62
+ return {
63
+ workspace: expDir,
64
+ state: replayed.currentState,
65
+ round: replayed.currentRound,
66
+ scores: scores ?? null,
67
+ triage_summary,
68
+ suggested_next,
69
+ lineage_versions: lineage.length,
70
+ health,
71
+ };
72
+ }
73
+ export function formatSnapshotYaml(snap) {
74
+ return yaml.dump(snap, { lineWidth: -1 });
75
+ }
76
+ export function formatSnapshotTableRow(entry, snap) {
77
+ if (snap === null) {
78
+ return [entry.path.padEnd(50), "(missing)"].join(" ");
79
+ }
80
+ const outcome = snap.scores?.outcome.toFixed(2) ?? "-";
81
+ const trajectory = snap.scores?.trajectory.toFixed(2) ?? "-";
82
+ const lastActive = entry.last_active_ts.replace("T", " ").slice(0, 19);
83
+ return [
84
+ entry.path.padEnd(50),
85
+ snap.state.padEnd(12),
86
+ String(snap.round).padEnd(6),
87
+ outcome.padEnd(8),
88
+ trajectory.padEnd(10),
89
+ lastActive,
90
+ ].join(" ");
91
+ }
92
+ export async function runInfo(expDir, opts = {}) {
93
+ const snap = await buildExpSnapshot(expDir);
94
+ if (opts.json) {
95
+ process.stdout.write(JSON.stringify(snap, null, 2) + "\n");
96
+ }
97
+ else {
98
+ process.stdout.write(formatSnapshotYaml(snap));
99
+ }
100
+ }
101
+ export async function runList(registryEntries) {
102
+ const header = [
103
+ "PATH".padEnd(50),
104
+ "STATE".padEnd(12),
105
+ "ROUND".padEnd(6),
106
+ "OUTCOME".padEnd(8),
107
+ "TRAJECTORY".padEnd(10),
108
+ "LAST_ACTIVE",
109
+ ].join(" ");
110
+ process.stdout.write(header + "\n");
111
+ process.stdout.write("-".repeat(header.length) + "\n");
112
+ for (const entry of registryEntries) {
113
+ let snap = null;
114
+ try {
115
+ snap = await buildExpSnapshot(entry.path);
116
+ }
117
+ catch { /* missing path */ }
118
+ process.stdout.write(formatSnapshotTableRow(entry, snap) + "\n");
119
+ }
120
+ }
@@ -0,0 +1 @@
1
+ export declare function applyAgentConfigPatch(candidate: Record<string, unknown>, patchJson: string): Record<string, unknown>;
@@ -0,0 +1,26 @@
1
+ // src/trace-ai/exp/patch/agent-config.ts
2
+ export function applyAgentConfigPatch(candidate, patchJson) {
3
+ const patch = JSON.parse(patchJson);
4
+ if (!patch.agent)
5
+ throw new Error("agent.* patch must have an 'agent' key");
6
+ // Only the agent sub-tree is patched; extra top-level keys in patchJson are intentionally ignored.
7
+ // Callers should scope patch JSON to { agent: { ... } } only.
8
+ const result = structuredClone(candidate);
9
+ result["agent"] = mergePatch(result["agent"], patch["agent"]);
10
+ return result;
11
+ }
12
+ function mergePatch(target, patch) {
13
+ const result = { ...target };
14
+ for (const [k, v] of Object.entries(patch)) {
15
+ if (v === null) {
16
+ delete result[k];
17
+ }
18
+ else if (typeof v === "object" && !Array.isArray(v)) {
19
+ result[k] = mergePatch(result[k] ?? {}, v);
20
+ }
21
+ else {
22
+ result[k] = v;
23
+ }
24
+ }
25
+ return result;
26
+ }
@@ -0,0 +1,2 @@
1
+ import type { NextChange } from "../schemas.js";
2
+ export declare function applyPatch(candidate: Record<string, unknown>, change: NextChange): Record<string, unknown>;
@@ -0,0 +1,13 @@
1
+ import { applyAgentConfigPatch } from "./agent-config.js";
2
+ import { applySkillPatch } from "./skill.js";
3
+ export function applyPatch(candidate, change) {
4
+ const prefix = change.target.split(".")[0];
5
+ switch (prefix) {
6
+ case "agent":
7
+ return applyAgentConfigPatch(candidate, change.patch);
8
+ case "skill":
9
+ return applySkillPatch(candidate, change.patch);
10
+ default:
11
+ throw new Error(`Unsupported target prefix "${prefix}" — only agent.* and skill.* are supported in MVP-C`);
12
+ }
13
+ }
@@ -0,0 +1 @@
1
+ export declare function applySkillPatch(candidate: Record<string, unknown>, patchJson: string): Record<string, unknown>;
@@ -0,0 +1,24 @@
1
+ export function applySkillPatch(candidate, patchJson) {
2
+ const patch = JSON.parse(patchJson);
3
+ if (!patch.skills)
4
+ throw new Error("skill.* patch must have a 'skills' key");
5
+ const result = structuredClone(candidate);
6
+ let skills = result["skills"] ?? [];
7
+ // Order is significant: remove → add → swap
8
+ if (patch.skills.remove) {
9
+ const toRemove = new Set(patch.skills.remove);
10
+ skills = skills.filter(s => !toRemove.has(s.name));
11
+ }
12
+ if (patch.skills.add) {
13
+ skills = [...skills, ...patch.skills.add];
14
+ }
15
+ if (patch.skills.swap) {
16
+ const { from, to } = patch.skills.swap;
17
+ const matched = skills.some(s => s.name === from);
18
+ if (!matched)
19
+ throw new Error(`skill.swap: skill "${from}" not found in candidate`);
20
+ skills = skills.map(s => s.name === from ? to : s);
21
+ }
22
+ result["skills"] = skills;
23
+ return result;
24
+ }
@@ -0,0 +1,14 @@
1
+ import type { Mission, NextChange, RoundData } from "../schemas.js";
2
+ export interface SynthesizerInput {
3
+ mission: Mission;
4
+ candidateConfig: Record<string, unknown>;
5
+ prevRound?: RoundData;
6
+ prevRounds: RoundData[];
7
+ crossRoundMemoryRef?: string;
8
+ }
9
+ export interface SynthesizerClient {
10
+ generate(input: SynthesizerInput): Promise<NextChange>;
11
+ }
12
+ export declare class ClaudeCodeSynthesizer implements SynthesizerClient {
13
+ generate(input: SynthesizerInput): Promise<NextChange>;
14
+ }
@@ -0,0 +1,39 @@
1
+ // src/trace-ai/exp/providers/synthesizer-client.ts
2
+ import yaml from "js-yaml";
3
+ import { defaultRegistry } from "../../../agent-providers/registry.js";
4
+ import { NextChangeSchema } from "../schemas.js";
5
+ export class ClaudeCodeSynthesizer {
6
+ async generate(input) {
7
+ const provider = defaultRegistry.resolve({ preferred: "claude-code" });
8
+ if (!provider)
9
+ throw new Error("claude-code provider not available");
10
+ const prevSummary = input.prevRounds.map(r => `Round ${r.round}: outcome=${r.scores?.outcome.toFixed(2) ?? "?"}, hints=${r.triage_conclusion?.hints.join("; ") ?? "none"}`).join("\n");
11
+ const prompt = `You are an agent optimization assistant. Given an experiment goal and round results, suggest the next change to try.
12
+
13
+ GOAL: ${input.mission.goal}
14
+
15
+ CURRENT CANDIDATE CONFIG:
16
+ ${yaml.dump(input.candidateConfig, { lineWidth: 80 })}
17
+
18
+ PREVIOUS ROUNDS:
19
+ ${prevSummary || "None (first round)"}
20
+
21
+ ${input.prevRound?.triage_conclusion ? `TRIAGE HINTS FROM LAST ROUND:\n${input.prevRound.triage_conclusion.hints.join("\n")}` : ""}
22
+
23
+ ${input.crossRoundMemoryRef ? `CROSS-ROUND CONTEXT: ${input.crossRoundMemoryRef}` : ""}
24
+
25
+ Respond with a JSON object with exactly these fields:
26
+ - "target": one of "agent.system_prompt", "agent.temperature", "agent.model", "skill.add", "skill.remove", "skill.swap"
27
+ - "hypothesis": brief explanation of why this change might help
28
+ - "patch": a JSON Merge Patch string to apply to the candidate config
29
+
30
+ Example for changing system_prompt:
31
+ {"target": "agent.system_prompt", "hypothesis": "Add explicit stop condition", "patch": "{\"agent\":{\"system_prompt\":\"New prompt here\"}}"}`;
32
+ const response = await provider.invoke({
33
+ prompt,
34
+ outputSchema: NextChangeSchema,
35
+ correlationId: `synthesizer-${Date.now()}`,
36
+ });
37
+ return response.output;
38
+ }
39
+ }
@@ -0,0 +1,19 @@
1
+ import type { RoundData } from "../schemas.js";
2
+ export interface TriageInput {
3
+ currentRound: RoundData;
4
+ prevRounds: RoundData[];
5
+ candidateConfig: Record<string, unknown>;
6
+ crossRoundMemoryRef?: string;
7
+ }
8
+ export interface TriageResult {
9
+ diagnoses: string[];
10
+ hints: string[];
11
+ verdict: "continue" | "publish";
12
+ new_memory_token: string;
13
+ }
14
+ export interface TriageClient {
15
+ triage(input: TriageInput): Promise<TriageResult>;
16
+ }
17
+ export declare class ClaudeCodeTriageClient implements TriageClient {
18
+ triage(input: TriageInput): Promise<TriageResult>;
19
+ }
@@ -0,0 +1,51 @@
1
+ // src/trace-ai/exp/providers/triage-client.ts
2
+ import { z } from "zod";
3
+ import { defaultRegistry } from "../../../agent-providers/registry.js";
4
+ const TriageOutputSchema = z.object({
5
+ diagnoses: z.array(z.string()),
6
+ hints: z.array(z.string()),
7
+ verdict: z.enum(["continue", "publish"]),
8
+ new_memory_token: z.string(),
9
+ });
10
+ export class ClaudeCodeTriageClient {
11
+ async triage(input) {
12
+ const provider = defaultRegistry.resolve({ preferred: "claude-code" });
13
+ if (!provider)
14
+ throw new Error("claude-code provider not available");
15
+ const r = input.currentRound;
16
+ const scoresSummary = r.scores
17
+ ? `outcome=${r.scores.outcome.toFixed(2)}, trajectory=${r.scores.trajectory.toFixed(2)}, guardrail=${r.scores.guardrail.toFixed(2)}`
18
+ : "no scores";
19
+ const failedQueries = (r.per_query_results ?? [])
20
+ .filter(q => q.assertion_results.some(a => a.verdict === "fail"))
21
+ .map(q => `${q.query_id}: ${q.assertion_results.filter(a => a.verdict === "fail").map(a => a.type).join(", ")}`)
22
+ .join("\n");
23
+ // candidateConfig is available for future prompt enrichment; omitted here to keep the prompt focused on scores.
24
+ const prompt = `You are an agent evaluation triager. Analyze the current round results and recommend next steps.
25
+
26
+ ROUND ${r.round} SCORES: ${scoresSummary}
27
+
28
+ FAILED QUERIES:
29
+ ${failedQueries || "None"}
30
+
31
+ TRAJECTORY ISSUES:
32
+ ${(r.per_query_results ?? []).filter(q => q.trajectory_summary.retry_count > 1).map(q => `${q.query_id}: ${q.trajectory_summary.retry_count} retries`).join("\n") || "None"}
33
+
34
+ PREVIOUS ROUND HISTORY:
35
+ ${input.prevRounds.map(pr => `Round ${pr.round}: outcome=${pr.scores?.outcome.toFixed(2) ?? "?"}, verdict=${pr.triage_conclusion?.verdict ?? "?"}`).join("\n") || "None"}
36
+
37
+ ${input.crossRoundMemoryRef ? `CONTEXT FROM PREVIOUS TRIAGE: ${input.crossRoundMemoryRef}` : ""}
38
+
39
+ Respond with JSON:
40
+ - "diagnoses": list of root cause observations
41
+ - "hints": list of specific suggestions for next change
42
+ - "verdict": "continue" if more rounds needed, "publish" if this candidate is good enough
43
+ - "new_memory_token": brief summary of key findings to carry forward (1-2 sentences)`;
44
+ const response = await provider.invoke({
45
+ prompt,
46
+ outputSchema: TriageOutputSchema,
47
+ correlationId: `triage-${Date.now()}`,
48
+ });
49
+ return response.output;
50
+ }
51
+ }