@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/README.md +39 -5
  2. package/README.zh.md +37 -5
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/agent-observability.d.ts +51 -0
  19. package/dist/api/agent-observability.js +108 -0
  20. package/dist/api/context-loader.d.ts +1 -0
  21. package/dist/api/conversations.d.ts +4 -8
  22. package/dist/api/conversations.js +16 -58
  23. package/dist/api/datasources.d.ts +2 -20
  24. package/dist/api/datasources.js +7 -123
  25. package/dist/api/semantic-search.d.ts +5 -0
  26. package/dist/api/semantic-search.js +5 -0
  27. package/dist/api/skills.d.ts +75 -2
  28. package/dist/api/skills.js +108 -12
  29. package/dist/api/trace.d.ts +49 -0
  30. package/dist/api/trace.js +85 -0
  31. package/dist/api/vega.d.ts +53 -0
  32. package/dist/api/vega.js +144 -0
  33. package/dist/cli.js +12 -5
  34. package/dist/commands/agent/mode.d.ts +6 -0
  35. package/dist/commands/agent/mode.js +75 -0
  36. package/dist/commands/agent.js +101 -29
  37. package/dist/commands/bkn-ops.js +12 -6
  38. package/dist/commands/bkn-utils.d.ts +9 -0
  39. package/dist/commands/bkn-utils.js +17 -0
  40. package/dist/commands/context-loader.js +608 -38
  41. package/dist/commands/ds.js +7 -2
  42. package/dist/commands/skill.d.ts +21 -1
  43. package/dist/commands/skill.js +389 -1
  44. package/dist/commands/trace.d.ts +39 -0
  45. package/dist/commands/trace.js +668 -0
  46. package/dist/index.d.ts +2 -2
  47. package/dist/index.js +1 -1
  48. package/dist/resources/bkn.d.ts +5 -0
  49. package/dist/resources/bkn.js +5 -0
  50. package/dist/resources/datasources.js +2 -1
  51. package/dist/resources/skills.d.ts +17 -1
  52. package/dist/resources/skills.js +32 -1
  53. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  54. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  55. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
  56. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
  57. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
  58. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
  59. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
  60. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
  61. package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
  62. package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
  63. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
  64. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
  65. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
  66. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
  67. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
  68. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
  69. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
  70. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
  71. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
  72. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  73. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  74. package/dist/trace-ai/diagnose/index.js +246 -0
  75. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  76. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  77. package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
  78. package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
  79. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  80. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  81. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  82. package/dist/trace-ai/diagnose/report-assembler.js +100 -0
  83. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  84. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  85. package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
  86. package/dist/trace-ai/diagnose/rule-loader.js +120 -0
  87. package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
  88. package/dist/trace-ai/diagnose/schemas.js +154 -0
  89. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  90. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  91. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  92. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  93. package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
  94. package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
  95. package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
  96. package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
  97. package/dist/trace-ai/diagnose/types.d.ts +173 -0
  98. package/dist/trace-ai/diagnose/types.js +1 -0
  99. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  100. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  101. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  102. package/dist/trace-ai/eval-set/builder.js +126 -0
  103. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  104. package/dist/trace-ai/eval-set/index.js +10 -0
  105. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  106. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  107. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  108. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  109. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  110. package/dist/trace-ai/eval-set/redactor.js +133 -0
  111. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  112. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  113. package/dist/trace-ai/eval-set/schemas.js +130 -0
  114. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  115. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  116. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  117. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  118. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  119. package/dist/trace-ai/eval-set/types.js +8 -0
  120. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  121. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  122. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  123. package/dist/trace-ai/exp/claude-binary.js +30 -0
  124. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  125. package/dist/trace-ai/exp/coordinator.js +203 -0
  126. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  127. package/dist/trace-ai/exp/eval-runner.js +47 -0
  128. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  129. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  130. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  131. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  132. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  133. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  134. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  135. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  136. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  137. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  138. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  139. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  140. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  141. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  142. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  143. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  144. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  145. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  146. package/dist/trace-ai/exp/index.d.ts +8 -0
  147. package/dist/trace-ai/exp/index.js +238 -0
  148. package/dist/trace-ai/exp/info.d.ts +35 -0
  149. package/dist/trace-ai/exp/info.js +120 -0
  150. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  151. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  152. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  153. package/dist/trace-ai/exp/patch/index.js +13 -0
  154. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  155. package/dist/trace-ai/exp/patch/skill.js +24 -0
  156. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  157. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  158. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  159. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  160. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  161. package/dist/trace-ai/exp/schemas.js +50 -0
  162. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  163. package/dist/trace-ai/exp/scoring.js +46 -0
  164. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  165. package/dist/trace-ai/scan/aggregator.js +26 -0
  166. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  167. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  168. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  169. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  170. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  171. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  172. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  173. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  174. package/dist/trace-ai/scan/index.d.ts +31 -0
  175. package/dist/trace-ai/scan/index.js +390 -0
  176. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  177. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  178. package/dist/trace-ai/scan/runner.d.ts +25 -0
  179. package/dist/trace-ai/scan/runner.js +42 -0
  180. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  181. package/dist/trace-ai/scan/sampler.js +81 -0
  182. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  183. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  184. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  185. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  186. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  187. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  188. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  189. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  190. package/package.json +14 -4
@@ -0,0 +1,37 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import yaml from "js-yaml";
4
+ import { MissionSchema } from "../schemas.js";
5
+ export async function readMission(expDir) {
6
+ const filePath = path.join(expDir, "mission.md");
7
+ let raw;
8
+ try {
9
+ raw = await fs.readFile(filePath, "utf8");
10
+ }
11
+ catch {
12
+ throw new Error(`mission.md not found in ${expDir}`);
13
+ }
14
+ // Extract YAML frontmatter between --- delimiters
15
+ const match = raw.match(/^---\n([\s\S]*?)\n---/);
16
+ if (!match)
17
+ throw new Error(`mission.md in ${expDir} has no YAML frontmatter`);
18
+ const parsed = yaml.load(match[1]);
19
+ const result = MissionSchema.safeParse(parsed);
20
+ if (!result.success) {
21
+ const issues = result.error.issues.map(i => `${i.path.join(".")}: ${i.message}`).join("; ");
22
+ throw new Error(`mission.md schema invalid: ${issues}`);
23
+ }
24
+ return result.data;
25
+ }
26
+ export async function writeSuggestedChange(expDir, change) {
27
+ const filePath = path.join(expDir, "mission.md");
28
+ const raw = await fs.readFile(filePath, "utf8");
29
+ const match = raw.match(/^---\n([\s\S]*?)\n---(\n[\s\S]*)?$/);
30
+ if (!match)
31
+ throw new Error(`mission.md in ${expDir} has no YAML frontmatter`);
32
+ const frontmatter = yaml.load(match[1]);
33
+ frontmatter["next_change"] = change;
34
+ const body = match[2] ?? "";
35
+ const newContent = `---\n${yaml.dump(frontmatter, { lineWidth: -1 })}---${body}`;
36
+ await fs.writeFile(filePath, newContent, "utf8");
37
+ }
@@ -0,0 +1,5 @@
1
+ export declare function renderReadme(opts: {
2
+ experimentId: string;
3
+ timestamp: string;
4
+ goal: string;
5
+ }): string;
@@ -0,0 +1,25 @@
1
+ // src/trace-ai/exp/exp-store/readme-template.ts
2
+ export function renderReadme(opts) {
3
+ return `# Experiment: ${opts.experimentId}
4
+
5
+ Created: ${opts.timestamp}
6
+ Goal: ${opts.goal}
7
+
8
+ ## 目录说明
9
+ - mission.md — 实验意图(你来编辑)
10
+ - eval-sets/ — 评测集(来自 MVP-B 或手动预置)
11
+ - candidates/ — Agent 候选快照
12
+ - outputs/ — 最终产物(bundle / manifest / provenance)
13
+ - .trace-state/ — 运行态,勿手动编辑
14
+
15
+ ## 常用命令
16
+ \`\`\`
17
+ kweaver trace exp run . — 启动 / 新开一轮
18
+ kweaver trace exp resume . — 从 Deciding 状态继续
19
+ kweaver trace exp show . — 查看当前状态和建议
20
+ kweaver trace exp status . — 一行摘要(适合脚本)
21
+ kweaver trace exp abort . — 优雅中止
22
+ kweaver trace exp doctor . — 环境自检
23
+ \`\`\`
24
+ `;
25
+ }
@@ -0,0 +1,3 @@
1
+ import type { RoundData } from "../schemas.js";
2
+ export declare function writeRound(expDir: string, n: number, data: Partial<RoundData>): Promise<void>;
3
+ export declare function readAllRounds(expDir: string): Promise<RoundData[]>;
@@ -0,0 +1,33 @@
1
+ // src/trace-ai/exp/exp-store/round-yaml.ts
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
4
+ import yaml from "js-yaml";
5
+ function roundPath(expDir, n) {
6
+ return path.join(expDir, ".trace-state", "rounds", `round-${n}.yaml`);
7
+ }
8
+ export async function writeRound(expDir, n, data) {
9
+ const p = roundPath(expDir, n);
10
+ await fs.mkdir(path.dirname(p), { recursive: true });
11
+ let existing = {};
12
+ try {
13
+ existing = yaml.load(await fs.readFile(p, "utf8"));
14
+ }
15
+ catch { }
16
+ const merged = { ...existing, round: n, ...data };
17
+ await fs.writeFile(p, yaml.dump(merged, { lineWidth: -1 }), "utf8");
18
+ }
19
+ export async function readAllRounds(expDir) {
20
+ const roundsDir = path.join(expDir, ".trace-state", "rounds");
21
+ try {
22
+ const files = await fs.readdir(roundsDir);
23
+ const rounds = [];
24
+ for (const f of files.filter(f => f.endsWith(".yaml")).sort()) {
25
+ const raw = await fs.readFile(path.join(roundsDir, f), "utf8");
26
+ rounds.push(yaml.load(raw));
27
+ }
28
+ return rounds;
29
+ }
30
+ catch {
31
+ return [];
32
+ }
33
+ }
@@ -0,0 +1,8 @@
1
+ export interface ParsedExpArgs {
2
+ subcommand: "run" | "resume" | "show" | "status" | "abort" | "doctor" | "list" | "info";
3
+ expDir: string;
4
+ newRun?: boolean;
5
+ json?: boolean;
6
+ }
7
+ export declare function parseExpArgs(argv: string[]): ParsedExpArgs;
8
+ export declare function runExpCommand(argv: string[]): Promise<number>;
@@ -0,0 +1,238 @@
1
+ // src/trace-ai/exp/index.ts
2
+ import path from "node:path";
3
+ import fs from "node:fs/promises";
4
+ import { fileURLToPath } from "node:url";
5
+ import { ExpStore } from "./exp-store/index.js";
6
+ import { ExperimentCoordinator } from "./coordinator.js";
7
+ import { ClaudeCodeSynthesizer } from "./providers/synthesizer-client.js";
8
+ import { ClaudeCodeTriageClient } from "./providers/triage-client.js";
9
+ import { runEval } from "./eval-runner.js";
10
+ import { defaultRegistry } from "../../agent-providers/registry.js";
11
+ import { ClaudeCodeSubprocessProvider } from "../../agent-providers/providers/claude-code-subprocess.js";
12
+ import { PromptTemplateRegistry } from "../../agent-providers/prompt-template.js";
13
+ import { createBuiltinSemanticMatchProvider } from "../eval-set/semantic-match-provider.js";
14
+ import { ensureValidToken } from "../../auth/oauth.js";
15
+ import { fetchAgentInfo, sendChatRequest } from "../../api/agent-chat.js";
16
+ import { getTracesByConversation } from "../../api/conversations.js";
17
+ import { upsertRegistry, listRegistry } from "./exp-store/exp-registry.js";
18
+ import { runInfo, runList, getHealthChecks } from "./info.js";
19
+ import { resolveClaudeBinary } from "./claude-binary.js";
20
+ const __expIndexDir = path.dirname(fileURLToPath(import.meta.url));
21
+ const EVAL_SET_RUBRIC_DIR = path.join(__expIndexDir, "..", "eval-set", "rubric-templates");
22
+ function ensureProvider() {
23
+ if (!defaultRegistry.has("claude-code")) {
24
+ defaultRegistry.register(new ClaudeCodeSubprocessProvider({
25
+ binary: resolveClaudeBinary(),
26
+ defaultTimeoutMs: 120_000,
27
+ }), { setAsDefault: true });
28
+ }
29
+ }
30
+ export function parseExpArgs(argv) {
31
+ const [sub, dir, ...flags] = argv;
32
+ const validSubs = ["run", "resume", "show", "status", "abort", "doctor", "list", "info"];
33
+ if (!validSubs.includes(sub)) {
34
+ throw new Error(`Unknown exp subcommand: ${sub}. Use: ${validSubs.join(", ")}`);
35
+ }
36
+ const isDiscoveryCmd = sub === "list" || sub === "info";
37
+ const expDir = isDiscoveryCmd
38
+ ? (dir ? path.resolve(dir) : "")
39
+ : path.resolve(dir ?? ".");
40
+ return {
41
+ subcommand: sub,
42
+ expDir,
43
+ newRun: flags.includes("--new-run"),
44
+ json: flags.includes("--json"),
45
+ };
46
+ }
47
+ export async function runExpCommand(argv) {
48
+ const args = parseExpArgs(argv);
49
+ switch (args.subcommand) {
50
+ case "list": {
51
+ if (args.expDir) {
52
+ await runList([{ path: args.expDir, last_active_ts: new Date().toISOString() }]);
53
+ }
54
+ else {
55
+ const entries = await listRegistry();
56
+ await runList(entries);
57
+ }
58
+ return 0;
59
+ }
60
+ case "info": {
61
+ let expDir = args.expDir;
62
+ if (!expDir) {
63
+ const entries = await listRegistry();
64
+ if (entries.length === 0) {
65
+ process.stderr.write("Error: no experiments in registry. Run 'trace exp run <dir>' first, or provide a path: trace exp info <dir>\n");
66
+ return 1;
67
+ }
68
+ expDir = entries[0].path;
69
+ process.stderr.write(`Using most recent: ${expDir}\n`);
70
+ }
71
+ await runInfo(expDir, { json: args.json });
72
+ return 0;
73
+ }
74
+ case "run": {
75
+ ensureProvider();
76
+ const store = new ExpStore(args.expDir);
77
+ const replayed = await store.replayState();
78
+ if (!replayed.isTerminal && replayed.currentRound > 0 && !replayed.lastFailure) {
79
+ process.stderr.write(`Error: experiment in progress (state: ${replayed.currentState}). Use exp resume.\n`);
80
+ return 2;
81
+ }
82
+ if (replayed.isTerminal && !args.newRun) {
83
+ process.stderr.write(`Error: experiment already in terminal state ${replayed.currentState}. Use --new-run to start fresh.\n`);
84
+ return 2;
85
+ }
86
+ if (replayed.isTerminal && args.newRun) {
87
+ await store.archiveState();
88
+ }
89
+ await upsertRegistry(args.expDir, new Date().toISOString());
90
+ const coord = await makeCoordinator(args.expDir);
91
+ await coord.run();
92
+ return 0;
93
+ }
94
+ case "resume": {
95
+ ensureProvider();
96
+ const store = new ExpStore(args.expDir);
97
+ const replayed = await store.replayState();
98
+ if (replayed.currentState !== "Deciding") {
99
+ process.stderr.write(`Error: cannot resume — experiment is in state ${replayed.currentState}. Only Deciding state supports resume.\n`);
100
+ return 2;
101
+ }
102
+ await upsertRegistry(args.expDir, new Date().toISOString());
103
+ const coord = await makeCoordinator(args.expDir);
104
+ await coord.resume();
105
+ return 0;
106
+ }
107
+ case "show": {
108
+ const store = new ExpStore(args.expDir);
109
+ const replayed = await store.replayState();
110
+ const rounds = await store.readAllRounds();
111
+ const lineage = await store.readLineage();
112
+ const mission = await store.readMission().catch(() => null);
113
+ process.stdout.write(`State: ${replayed.currentState} Round: ${replayed.currentRound}\n`);
114
+ if (mission?.next_change) {
115
+ process.stdout.write(`Suggested next change:\n target: ${mission.next_change.target}\n hypothesis: ${mission.next_change.hypothesis}\n`);
116
+ }
117
+ if (rounds.length > 0) {
118
+ const last = rounds[rounds.length - 1];
119
+ process.stdout.write(`Last round scores: outcome=${last.scores?.outcome.toFixed(2) ?? "?"}, trajectory=${last.scores?.trajectory.toFixed(2) ?? "?"}\n`);
120
+ if (last.triage_conclusion) {
121
+ process.stdout.write(`Triage: ${last.triage_conclusion.diagnoses.join("; ")}\n`);
122
+ }
123
+ }
124
+ process.stdout.write(`Lineage: ${lineage.length} versions\n`);
125
+ return 0;
126
+ }
127
+ case "status": {
128
+ const store = new ExpStore(args.expDir);
129
+ const replayed = await store.replayState();
130
+ process.stdout.write(`${args.expDir}: ${replayed.currentState} (round ${replayed.currentRound})\n`);
131
+ return 0;
132
+ }
133
+ case "abort": {
134
+ const store = new ExpStore(args.expDir);
135
+ await store.writeAbortSignal();
136
+ process.stdout.write(`Abort signal written. Running process will stop at next checkpoint.\n`);
137
+ return 0;
138
+ }
139
+ case "doctor": {
140
+ const store = new ExpStore(args.expDir);
141
+ return runDoctor(args.expDir, store);
142
+ }
143
+ }
144
+ }
145
+ async function runDoctor(expDir, store) {
146
+ let ok = true;
147
+ const check = (label, pass, msg) => {
148
+ process.stdout.write(`${pass ? "✓" : "✗"} ${label}${pass ? "" : `: ${msg}`}\n`);
149
+ if (!pass)
150
+ ok = false;
151
+ };
152
+ try {
153
+ const mission = await store.readMission();
154
+ check("mission.md valid", true, "");
155
+ for (const es of mission.eval_sets) {
156
+ const esPath = path.join(expDir, es.path);
157
+ try {
158
+ await fs.access(esPath);
159
+ check(`eval_set ${es.path}`, true, "");
160
+ }
161
+ catch {
162
+ check(`eval_set ${es.path}`, false, `not found: ${esPath}`);
163
+ }
164
+ }
165
+ const candPath = path.join(expDir, mission.current_candidate.path);
166
+ try {
167
+ await fs.access(candPath);
168
+ check("current_candidate readable", true, "");
169
+ }
170
+ catch {
171
+ check("current_candidate readable", false, `not found: ${candPath}`);
172
+ }
173
+ }
174
+ catch (e) {
175
+ check("mission.md valid", false, String(e));
176
+ }
177
+ const health = await getHealthChecks(expDir);
178
+ check("claude-code provider available", health.provider_available, "run: npx @anthropic-ai/claude-code --version");
179
+ check("no step_failed in events", health.no_step_failed, "step_failed found in events.jsonl");
180
+ return ok ? 0 : 1;
181
+ }
182
+ async function makeCoordinator(expDir) {
183
+ let baseUrl = process.env["KWEAVER_BASE_URL"] ?? "";
184
+ let token = process.env["KWEAVER_TOKEN"] ?? "";
185
+ const bd = process.env["KWEAVER_BUSINESS_DOMAIN"] ?? "bd_public";
186
+ if (!baseUrl || !token) {
187
+ const t = await ensureValidToken();
188
+ if (!baseUrl)
189
+ baseUrl = t.baseUrl;
190
+ if (!token)
191
+ token = t.accessToken;
192
+ }
193
+ let semanticMatchProvider;
194
+ try {
195
+ const provider = defaultRegistry.resolve({ requiredCapabilities: ["structured_output"] });
196
+ if (provider && (await provider.isAvailable())) {
197
+ const promptRegistry = new PromptTemplateRegistry();
198
+ await promptRegistry.loadBuiltinDir(EVAL_SET_RUBRIC_DIR);
199
+ semanticMatchProvider = createBuiltinSemanticMatchProvider({ provider, promptRegistry, lang: "zh" });
200
+ }
201
+ }
202
+ catch {
203
+ process.stderr.write("warn: could not create semantic-match provider — semantic_match assertions will be skipped\n");
204
+ }
205
+ return new ExperimentCoordinator({
206
+ expDir,
207
+ synthesizer: new ClaudeCodeSynthesizer(),
208
+ triage: new ClaudeCodeTriageClient(),
209
+ runEval: ({ evalSetPaths, candidatePath, round }) => runEval({
210
+ evalSetPaths,
211
+ candidatePath,
212
+ expDir,
213
+ round,
214
+ maxParallel: 2,
215
+ deps: {
216
+ fetchAgent: async (agentId) => fetchAgentInfo({ baseUrl, accessToken: token, agentId, version: "latest", businessDomain: bd }),
217
+ sendChat: async ({ agentInfo, query }) => {
218
+ const result = await sendChatRequest({
219
+ baseUrl,
220
+ accessToken: token,
221
+ agentId: agentInfo.id,
222
+ agentKey: agentInfo.key,
223
+ agentVersion: agentInfo.version,
224
+ query,
225
+ stream: true,
226
+ businessDomain: bd,
227
+ });
228
+ return { text: result.text, conversationId: result.conversationId };
229
+ },
230
+ fetchTrace: async (conversationId) => {
231
+ const r = await getTracesByConversation({ baseUrl, accessToken: token, conversationId, businessDomain: bd });
232
+ return { spans: r.spans };
233
+ },
234
+ semanticMatchProvider,
235
+ },
236
+ }),
237
+ });
238
+ }
@@ -0,0 +1,35 @@
1
+ import type { ThreeAxisScores } from "./schemas.js";
2
+ export interface HealthChecks {
3
+ mission_valid: boolean;
4
+ eval_set_valid: boolean;
5
+ candidate_readable: boolean;
6
+ provider_available: boolean;
7
+ no_step_failed: boolean;
8
+ }
9
+ export interface ExpSnapshot {
10
+ workspace: string;
11
+ state: string;
12
+ round: number;
13
+ scores: ThreeAxisScores | null;
14
+ triage_summary: string | null;
15
+ suggested_next: {
16
+ target: string;
17
+ hypothesis: string;
18
+ } | null;
19
+ lineage_versions: number;
20
+ health: HealthChecks;
21
+ }
22
+ export declare function getHealthChecks(expDir: string): Promise<HealthChecks>;
23
+ export declare function buildExpSnapshot(expDir: string): Promise<ExpSnapshot>;
24
+ export declare function formatSnapshotYaml(snap: ExpSnapshot): string;
25
+ export declare function formatSnapshotTableRow(entry: {
26
+ path: string;
27
+ last_active_ts: string;
28
+ }, snap: ExpSnapshot | null): string;
29
+ export declare function runInfo(expDir: string, opts?: {
30
+ json?: boolean;
31
+ }): Promise<void>;
32
+ export declare function runList(registryEntries: Array<{
33
+ path: string;
34
+ last_active_ts: string;
35
+ }>): Promise<void>;
@@ -0,0 +1,120 @@
1
+ // src/trace-ai/exp/info.ts
2
+ import path from "node:path";
3
+ import fs from "node:fs/promises";
4
+ import yaml from "js-yaml";
5
+ import { ExpStore } from "./exp-store/index.js";
6
+ import { defaultRegistry } from "../../agent-providers/registry.js";
7
+ export async function getHealthChecks(expDir) {
8
+ const store = new ExpStore(expDir);
9
+ let mission_valid = false;
10
+ let eval_set_valid = false;
11
+ let candidate_readable = false;
12
+ try {
13
+ const mission = await store.readMission();
14
+ mission_valid = true;
15
+ let allEvalSetsOk = true;
16
+ for (const es of mission.eval_sets) {
17
+ try {
18
+ await fs.access(path.join(expDir, es.path));
19
+ }
20
+ catch {
21
+ allEvalSetsOk = false;
22
+ }
23
+ }
24
+ eval_set_valid = allEvalSetsOk;
25
+ try {
26
+ await fs.access(path.join(expDir, mission.current_candidate.path));
27
+ candidate_readable = true;
28
+ }
29
+ catch {
30
+ candidate_readable = false;
31
+ }
32
+ }
33
+ catch { /* mission_valid stays false */ }
34
+ let provider_available = false;
35
+ try {
36
+ provider_available = defaultRegistry.resolve({ preferred: "claude-code" }) !== null;
37
+ }
38
+ catch {
39
+ provider_available = false;
40
+ }
41
+ const replayed = await store.replayState();
42
+ const no_step_failed = replayed.lastFailure === null;
43
+ return { mission_valid, eval_set_valid, candidate_readable, provider_available, no_step_failed };
44
+ }
45
+ export async function buildExpSnapshot(expDir) {
46
+ // Throw early if the experiment directory doesn't exist, so callers (e.g.
47
+ // runList) can catch and render a "(missing)" row instead of returning a
48
+ // phantom "Init" snapshot for a non-existent path.
49
+ await fs.access(expDir);
50
+ const store = new ExpStore(expDir);
51
+ const replayed = await store.replayState();
52
+ const rounds = await store.readAllRounds();
53
+ const lineage = await store.readLineage();
54
+ const mission = await store.readMission().catch(() => null);
55
+ const health = await getHealthChecks(expDir);
56
+ const lastRound = rounds.length > 0 ? rounds[rounds.length - 1] : null;
57
+ const scores = lastRound?.scores ?? null;
58
+ const triage_summary = lastRound?.triage_conclusion?.diagnoses.join("; ") ?? null;
59
+ const suggested_next = mission?.next_change
60
+ ? { target: mission.next_change.target, hypothesis: mission.next_change.hypothesis }
61
+ : null;
62
+ return {
63
+ workspace: expDir,
64
+ state: replayed.currentState,
65
+ round: replayed.currentRound,
66
+ scores: scores ?? null,
67
+ triage_summary,
68
+ suggested_next,
69
+ lineage_versions: lineage.length,
70
+ health,
71
+ };
72
+ }
73
+ export function formatSnapshotYaml(snap) {
74
+ return yaml.dump(snap, { lineWidth: -1 });
75
+ }
76
+ export function formatSnapshotTableRow(entry, snap) {
77
+ if (snap === null) {
78
+ return [entry.path.padEnd(50), "(missing)"].join(" ");
79
+ }
80
+ const outcome = snap.scores?.outcome.toFixed(2) ?? "-";
81
+ const trajectory = snap.scores?.trajectory.toFixed(2) ?? "-";
82
+ const lastActive = entry.last_active_ts.replace("T", " ").slice(0, 19);
83
+ return [
84
+ entry.path.padEnd(50),
85
+ snap.state.padEnd(12),
86
+ String(snap.round).padEnd(6),
87
+ outcome.padEnd(8),
88
+ trajectory.padEnd(10),
89
+ lastActive,
90
+ ].join(" ");
91
+ }
92
+ export async function runInfo(expDir, opts = {}) {
93
+ const snap = await buildExpSnapshot(expDir);
94
+ if (opts.json) {
95
+ process.stdout.write(JSON.stringify(snap, null, 2) + "\n");
96
+ }
97
+ else {
98
+ process.stdout.write(formatSnapshotYaml(snap));
99
+ }
100
+ }
101
+ export async function runList(registryEntries) {
102
+ const header = [
103
+ "PATH".padEnd(50),
104
+ "STATE".padEnd(12),
105
+ "ROUND".padEnd(6),
106
+ "OUTCOME".padEnd(8),
107
+ "TRAJECTORY".padEnd(10),
108
+ "LAST_ACTIVE",
109
+ ].join(" ");
110
+ process.stdout.write(header + "\n");
111
+ process.stdout.write("-".repeat(header.length) + "\n");
112
+ for (const entry of registryEntries) {
113
+ let snap = null;
114
+ try {
115
+ snap = await buildExpSnapshot(entry.path);
116
+ }
117
+ catch { /* missing path */ }
118
+ process.stdout.write(formatSnapshotTableRow(entry, snap) + "\n");
119
+ }
120
+ }
@@ -0,0 +1 @@
1
+ export declare function applyAgentConfigPatch(candidate: Record<string, unknown>, patchJson: string): Record<string, unknown>;
@@ -0,0 +1,26 @@
1
+ // src/trace-ai/exp/patch/agent-config.ts
2
+ export function applyAgentConfigPatch(candidate, patchJson) {
3
+ const patch = JSON.parse(patchJson);
4
+ if (!patch.agent)
5
+ throw new Error("agent.* patch must have an 'agent' key");
6
+ // Only the agent sub-tree is patched; extra top-level keys in patchJson are intentionally ignored.
7
+ // Callers should scope patch JSON to { agent: { ... } } only.
8
+ const result = structuredClone(candidate);
9
+ result["agent"] = mergePatch(result["agent"], patch["agent"]);
10
+ return result;
11
+ }
12
+ function mergePatch(target, patch) {
13
+ const result = { ...target };
14
+ for (const [k, v] of Object.entries(patch)) {
15
+ if (v === null) {
16
+ delete result[k];
17
+ }
18
+ else if (typeof v === "object" && !Array.isArray(v)) {
19
+ result[k] = mergePatch(result[k] ?? {}, v);
20
+ }
21
+ else {
22
+ result[k] = v;
23
+ }
24
+ }
25
+ return result;
26
+ }
@@ -0,0 +1,2 @@
1
+ import type { NextChange } from "../schemas.js";
2
+ export declare function applyPatch(candidate: Record<string, unknown>, change: NextChange): Record<string, unknown>;
@@ -0,0 +1,13 @@
1
+ import { applyAgentConfigPatch } from "./agent-config.js";
2
+ import { applySkillPatch } from "./skill.js";
3
+ export function applyPatch(candidate, change) {
4
+ const prefix = change.target.split(".")[0];
5
+ switch (prefix) {
6
+ case "agent":
7
+ return applyAgentConfigPatch(candidate, change.patch);
8
+ case "skill":
9
+ return applySkillPatch(candidate, change.patch);
10
+ default:
11
+ throw new Error(`Unsupported target prefix "${prefix}" — only agent.* and skill.* are supported in MVP-C`);
12
+ }
13
+ }
@@ -0,0 +1 @@
1
+ export declare function applySkillPatch(candidate: Record<string, unknown>, patchJson: string): Record<string, unknown>;
@@ -0,0 +1,24 @@
1
+ export function applySkillPatch(candidate, patchJson) {
2
+ const patch = JSON.parse(patchJson);
3
+ if (!patch.skills)
4
+ throw new Error("skill.* patch must have a 'skills' key");
5
+ const result = structuredClone(candidate);
6
+ let skills = result["skills"] ?? [];
7
+ // Order is significant: remove → add → swap
8
+ if (patch.skills.remove) {
9
+ const toRemove = new Set(patch.skills.remove);
10
+ skills = skills.filter(s => !toRemove.has(s.name));
11
+ }
12
+ if (patch.skills.add) {
13
+ skills = [...skills, ...patch.skills.add];
14
+ }
15
+ if (patch.skills.swap) {
16
+ const { from, to } = patch.skills.swap;
17
+ const matched = skills.some(s => s.name === from);
18
+ if (!matched)
19
+ throw new Error(`skill.swap: skill "${from}" not found in candidate`);
20
+ skills = skills.map(s => s.name === from ? to : s);
21
+ }
22
+ result["skills"] = skills;
23
+ return result;
24
+ }
@@ -0,0 +1,14 @@
1
+ import type { Mission, NextChange, RoundData } from "../schemas.js";
2
+ export interface SynthesizerInput {
3
+ mission: Mission;
4
+ candidateConfig: Record<string, unknown>;
5
+ prevRound?: RoundData;
6
+ prevRounds: RoundData[];
7
+ crossRoundMemoryRef?: string;
8
+ }
9
+ export interface SynthesizerClient {
10
+ generate(input: SynthesizerInput): Promise<NextChange>;
11
+ }
12
+ export declare class ClaudeCodeSynthesizer implements SynthesizerClient {
13
+ generate(input: SynthesizerInput): Promise<NextChange>;
14
+ }
@@ -0,0 +1,39 @@
1
+ // src/trace-ai/exp/providers/synthesizer-client.ts
2
+ import yaml from "js-yaml";
3
+ import { defaultRegistry } from "../../../agent-providers/registry.js";
4
+ import { NextChangeSchema } from "../schemas.js";
5
+ export class ClaudeCodeSynthesizer {
6
+ async generate(input) {
7
+ const provider = defaultRegistry.resolve({ preferred: "claude-code" });
8
+ if (!provider)
9
+ throw new Error("claude-code provider not available");
10
+ const prevSummary = input.prevRounds.map(r => `Round ${r.round}: outcome=${r.scores?.outcome.toFixed(2) ?? "?"}, hints=${r.triage_conclusion?.hints.join("; ") ?? "none"}`).join("\n");
11
+ const prompt = `You are an agent optimization assistant. Given an experiment goal and round results, suggest the next change to try.
12
+
13
+ GOAL: ${input.mission.goal}
14
+
15
+ CURRENT CANDIDATE CONFIG:
16
+ ${yaml.dump(input.candidateConfig, { lineWidth: 80 })}
17
+
18
+ PREVIOUS ROUNDS:
19
+ ${prevSummary || "None (first round)"}
20
+
21
+ ${input.prevRound?.triage_conclusion ? `TRIAGE HINTS FROM LAST ROUND:\n${input.prevRound.triage_conclusion.hints.join("\n")}` : ""}
22
+
23
+ ${input.crossRoundMemoryRef ? `CROSS-ROUND CONTEXT: ${input.crossRoundMemoryRef}` : ""}
24
+
25
+ Respond with a JSON object with exactly these fields:
26
+ - "target": one of "agent.system_prompt", "agent.temperature", "agent.model", "skill.add", "skill.remove", "skill.swap"
27
+ - "hypothesis": brief explanation of why this change might help
28
+ - "patch": a JSON Merge Patch string to apply to the candidate config
29
+
30
+ Example for changing system_prompt:
31
+ {"target": "agent.system_prompt", "hypothesis": "Add explicit stop condition", "patch": "{\"agent\":{\"system_prompt\":\"New prompt here\"}}"}`;
32
+ const response = await provider.invoke({
33
+ prompt,
34
+ outputSchema: NextChangeSchema,
35
+ correlationId: `synthesizer-${Date.now()}`,
36
+ });
37
+ return response.output;
38
+ }
39
+ }
@@ -0,0 +1,19 @@
1
+ import type { RoundData } from "../schemas.js";
2
+ export interface TriageInput {
3
+ currentRound: RoundData;
4
+ prevRounds: RoundData[];
5
+ candidateConfig: Record<string, unknown>;
6
+ crossRoundMemoryRef?: string;
7
+ }
8
+ export interface TriageResult {
9
+ diagnoses: string[];
10
+ hints: string[];
11
+ verdict: "continue" | "publish";
12
+ new_memory_token: string;
13
+ }
14
+ export interface TriageClient {
15
+ triage(input: TriageInput): Promise<TriageResult>;
16
+ }
17
+ export declare class ClaudeCodeTriageClient implements TriageClient {
18
+ triage(input: TriageInput): Promise<TriageResult>;
19
+ }