@kweaver-ai/kweaver-sdk 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +40 -52
  2. package/README.zh.md +41 -46
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/context-loader.d.ts +1 -0
  19. package/dist/api/resources.d.ts +94 -0
  20. package/dist/api/resources.js +166 -0
  21. package/dist/api/semantic-search.d.ts +5 -0
  22. package/dist/api/semantic-search.js +5 -0
  23. package/dist/api/skills.d.ts +75 -2
  24. package/dist/api/skills.js +108 -12
  25. package/dist/api/trace.d.ts +5 -0
  26. package/dist/api/trace.js +4 -0
  27. package/dist/cli.js +109 -15
  28. package/dist/client.d.ts +3 -3
  29. package/dist/client.js +5 -5
  30. package/dist/commands/agent/mode.d.ts +6 -0
  31. package/dist/commands/agent/mode.js +75 -0
  32. package/dist/commands/agent-members.js +27 -11
  33. package/dist/commands/agent.js +469 -286
  34. package/dist/commands/auth.js +184 -71
  35. package/dist/commands/bkn-metric.js +37 -16
  36. package/dist/commands/bkn-ops.js +164 -86
  37. package/dist/commands/bkn-query.js +99 -31
  38. package/dist/commands/bkn-schema.d.ts +3 -3
  39. package/dist/commands/bkn-schema.js +127 -86
  40. package/dist/commands/bkn.js +153 -114
  41. package/dist/commands/call.js +23 -13
  42. package/dist/commands/config.js +22 -12
  43. package/dist/commands/context-loader.js +625 -49
  44. package/dist/commands/dataflow.js +14 -6
  45. package/dist/commands/ds.js +52 -30
  46. package/dist/commands/explore.js +18 -15
  47. package/dist/commands/model.js +53 -42
  48. package/dist/commands/resource.d.ts +1 -0
  49. package/dist/commands/{dataview.js → resource.js} +62 -84
  50. package/dist/commands/skill.d.ts +21 -1
  51. package/dist/commands/skill.js +567 -43
  52. package/dist/commands/token.js +11 -0
  53. package/dist/commands/tool.js +46 -29
  54. package/dist/commands/toolbox.js +31 -15
  55. package/dist/commands/trace.d.ts +26 -1
  56. package/dist/commands/trace.js +515 -15
  57. package/dist/commands/vega.js +466 -250
  58. package/dist/help/format.d.ts +65 -0
  59. package/dist/help/format.js +141 -0
  60. package/dist/index.d.ts +5 -5
  61. package/dist/index.js +3 -3
  62. package/dist/resources/bkn.d.ts +5 -0
  63. package/dist/resources/bkn.js +5 -0
  64. package/dist/resources/{dataviews.d.ts → resources.d.ts} +10 -11
  65. package/dist/resources/{dataviews.js → resources.js} +12 -13
  66. package/dist/resources/skills.d.ts +17 -1
  67. package/dist/resources/skills.js +32 -1
  68. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  69. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  70. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  71. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  72. package/dist/trace-ai/diagnose/index.js +246 -0
  73. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  74. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  75. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  76. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  77. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  78. package/dist/{trace-core → trace-ai}/diagnose/report-assembler.js +19 -9
  79. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  80. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  81. package/dist/{trace-core → trace-ai}/diagnose/rule-loader.js +42 -8
  82. package/dist/{trace-core → trace-ai}/diagnose/schemas.d.ts +77 -2
  83. package/dist/trace-ai/diagnose/schemas.js +154 -0
  84. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  85. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  86. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  87. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  88. package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.js +1 -0
  89. package/dist/{trace-core → trace-ai}/diagnose/types.d.ts +55 -6
  90. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  91. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  92. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  93. package/dist/trace-ai/eval-set/builder.js +126 -0
  94. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  95. package/dist/trace-ai/eval-set/index.js +10 -0
  96. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  97. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  98. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  99. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  100. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  101. package/dist/trace-ai/eval-set/redactor.js +133 -0
  102. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  103. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  104. package/dist/trace-ai/eval-set/schemas.js +130 -0
  105. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  106. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  107. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  108. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  109. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  110. package/dist/trace-ai/eval-set/types.js +8 -0
  111. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  112. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  113. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  114. package/dist/trace-ai/exp/claude-binary.js +30 -0
  115. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  116. package/dist/trace-ai/exp/coordinator.js +203 -0
  117. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  118. package/dist/trace-ai/exp/eval-runner.js +47 -0
  119. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  120. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  121. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  122. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  123. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  124. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  125. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  126. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  127. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  128. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  129. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  130. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  131. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  132. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  133. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  134. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  135. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  136. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  137. package/dist/trace-ai/exp/index.d.ts +8 -0
  138. package/dist/trace-ai/exp/index.js +238 -0
  139. package/dist/trace-ai/exp/info.d.ts +35 -0
  140. package/dist/trace-ai/exp/info.js +120 -0
  141. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  142. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  143. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  144. package/dist/trace-ai/exp/patch/index.js +13 -0
  145. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  146. package/dist/trace-ai/exp/patch/skill.js +24 -0
  147. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  148. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  149. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  150. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  151. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  152. package/dist/trace-ai/exp/schemas.js +50 -0
  153. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  154. package/dist/trace-ai/exp/scoring.js +46 -0
  155. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  156. package/dist/trace-ai/scan/aggregator.js +26 -0
  157. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  158. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  159. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  160. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  161. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  162. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  163. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  164. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  165. package/dist/trace-ai/scan/index.d.ts +31 -0
  166. package/dist/trace-ai/scan/index.js +390 -0
  167. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  168. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  169. package/dist/trace-ai/scan/runner.d.ts +25 -0
  170. package/dist/trace-ai/scan/runner.js +42 -0
  171. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  172. package/dist/trace-ai/scan/sampler.js +81 -0
  173. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  174. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  175. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  176. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  177. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  178. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  179. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  180. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  181. package/package.json +2 -2
  182. package/dist/api/dataviews.d.ts +0 -117
  183. package/dist/api/dataviews.js +0 -265
  184. package/dist/commands/dataview.d.ts +0 -8
  185. package/dist/trace-core/diagnose/index.d.ts +0 -9
  186. package/dist/trace-core/diagnose/index.js +0 -104
  187. package/dist/trace-core/diagnose/report-assembler.d.ts +0 -12
  188. package/dist/trace-core/diagnose/schemas.js +0 -94
  189. package/dist/trace-core/diagnose/signal-probe.d.ts +0 -5
  190. package/dist/trace-core/diagnose/signal-probe.js +0 -21
  191. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +0 -0
  192. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +0 -0
  193. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +0 -0
  194. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +0 -0
  195. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.js +0 -0
  196. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +0 -0
  197. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.d.ts +0 -0
  198. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/register.js +0 -0
  199. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +0 -0
  200. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.js +0 -0
  201. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +0 -0
  202. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.d.ts +0 -0
  203. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.js +0 -0
  204. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-error-swallowed.yaml +0 -0
  205. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +0 -0
  206. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.js +0 -0
  207. /package/dist/{trace-core → trace-ai}/diagnose/builtin-rules/tool-loop-no-state-change.yaml +0 -0
  208. /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.d.ts +0 -0
  209. /package/dist/{trace-core → trace-ai}/diagnose/predicate-registry.js +0 -0
  210. /package/dist/{trace-core → trace-ai}/diagnose/rule-loader.d.ts +0 -0
  211. /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.d.ts +0 -0
  212. /package/dist/{trace-core → trace-ai}/diagnose/synthesizer-template.js +0 -0
  213. /package/dist/{trace-core → trace-ai}/diagnose/trace-shaper.d.ts +0 -0
  214. /package/dist/{trace-core → trace-ai}/diagnose/types.js +0 -0
@@ -1,40 +1,142 @@
1
+ import path from "node:path";
2
+ import { readFile } from "node:fs/promises";
3
+ import { fileURLToPath } from "node:url";
1
4
  import yargs from "yargs";
2
- import { diagnose, TraceNotFoundError } from "../trace-core/diagnose/index.js";
3
- import { RuleLoadError } from "../trace-core/diagnose/rule-loader.js";
4
- import { RuleProbeError } from "../trace-core/diagnose/signal-probe.js";
5
- import { RuleSchema } from "../trace-core/diagnose/schemas.js";
5
+ import { derivePaths, diagnose, TraceNotFoundError } from "../trace-ai/diagnose/index.js";
6
+ import { RuleLoadError } from "../trace-ai/diagnose/rule-loader.js";
7
+ import { RuleProbeError } from "../trace-ai/diagnose/signal-probe.js";
8
+ import { RuleSchema } from "../trace-ai/diagnose/schemas.js";
6
9
  import { ensureValidToken } from "../auth/oauth.js";
10
+ import { defaultRegistry } from "../agent-providers/registry.js";
11
+ import { PromptTemplateRegistry } from "../agent-providers/prompt-template.js";
12
+ import { ClaudeCodeSubprocessProvider } from "../agent-providers/providers/claude-code-subprocess.js";
13
+ import { runBatch } from "../trace-ai/scan/index.js";
14
+ import { parseTracesList, TracesListError } from "../trace-ai/scan/traces-list-parser.js";
15
+ import { SingleAgentValidationError } from "../trace-ai/scan/single-agent-validator.js";
16
+ import { build, BuilderError } from "../trace-ai/eval-set/index.js";
17
+ import { run as runEvalSetTest } from "../trace-ai/eval-set/test-runner.js";
18
+ import { createBuiltinSemanticMatchProvider } from "../trace-ai/eval-set/semantic-match-provider.js";
19
+ import { fetchAgentInfo, sendChatRequest } from "../api/agent-chat.js";
20
+ import { getTracesByConversation } from "../api/conversations.js";
21
+ import { EvalSetIndexSchema, EvalSetShardSchema, EvalSetInputSchema, TestReportSchema, } from "../trace-ai/eval-set/schemas.js";
7
22
  import yaml from "js-yaml";
8
23
  import fs from "node:fs/promises";
24
+ import { runExpCommand } from "../trace-ai/exp/index.js";
25
+ import { resolveClaudeBinary } from "../trace-ai/exp/claude-binary.js";
26
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
27
+ const EVAL_SET_RUBRIC_DIR = path.join(__dirname, "..", "trace-ai", "eval-set", "rubric-templates");
28
+ /** Register the default agent provider once per CLI process. Idempotent. */
29
+ function ensureDefaultProviderRegistered() {
30
+ if (defaultRegistry.has("claude-code"))
31
+ return;
32
+ defaultRegistry.register(new ClaudeCodeSubprocessProvider({
33
+ binary: resolveClaudeBinary(),
34
+ defaultTimeoutMs: 120_000,
35
+ }), { setAsDefault: true });
36
+ }
9
37
  export function parseTraceArgs(argv) {
10
38
  if (argv.length === 0) {
11
39
  return defaults("help");
12
40
  }
13
41
  const head = argv[0];
14
- if (head !== "diagnose") {
42
+ if (head !== "diagnose" && head !== "eval-set" && head !== "schema") {
15
43
  return defaults("help");
16
44
  }
17
45
  if (argv[1] === "rules" && argv[2] === "validate") {
18
46
  return { ...defaults("rules-validate"), rulePath: argv[3] };
19
47
  }
20
- // diagnose <traceId> [flags...]
48
+ // M5 PR-A: eval-set build
49
+ if (head === "eval-set" && argv[1] === "build") {
50
+ const parsed = yargs(argv.slice(2))
51
+ .option("queries", { type: "string", default: undefined })
52
+ .option("diagnosis", { type: "string", default: undefined })
53
+ .option("out", { type: "string", default: undefined })
54
+ .option("on-conflict", {
55
+ type: "string",
56
+ choices: ["fail", "skip", "overwrite"],
57
+ default: "fail",
58
+ })
59
+ .option("redaction-rules", { type: "string", default: undefined })
60
+ .option("eval-set-id", { type: "string", default: undefined })
61
+ .help(false)
62
+ .parseSync();
63
+ return {
64
+ ...defaults("eval-set-build"),
65
+ queriesPath: parsed.queries,
66
+ diagnosisPath: parsed.diagnosis,
67
+ out: parsed.out ?? null,
68
+ onConflict: parsed["on-conflict"],
69
+ redactionRules: parsed["redaction-rules"],
70
+ evalSetId: parsed["eval-set-id"],
71
+ };
72
+ }
73
+ // M5 PR-B: eval-set test
74
+ if (head === "eval-set" && argv[1] === "test") {
75
+ const parsed = yargs(argv.slice(2))
76
+ .option("candidate", { type: "string", default: undefined })
77
+ .option("out", { type: "string", default: undefined })
78
+ .option("max-parallel", { type: "number", default: 4 })
79
+ .option("lang", { type: "string", default: undefined })
80
+ .help(false)
81
+ .parseSync();
82
+ const candidateRaw = parsed.candidate ?? "";
83
+ const atIdx = candidateRaw.indexOf("@");
84
+ const candidateAgentId = atIdx >= 0 ? candidateRaw.slice(0, atIdx) : candidateRaw;
85
+ const candidateAgentVersion = atIdx >= 0 ? candidateRaw.slice(atIdx + 1) : undefined;
86
+ return {
87
+ ...defaults("eval-set-test"),
88
+ evalSetPath: String(parsed._[0] ?? ""),
89
+ candidateAgentId,
90
+ candidateAgentVersion,
91
+ out: parsed.out ?? null,
92
+ maxParallel: parsed["max-parallel"],
93
+ lang: parsed.lang ?? null,
94
+ };
95
+ }
96
+ // M5 PR-A: schema validate
97
+ if (head === "schema" && argv[1] === "validate") {
98
+ const parsed = yargs(argv.slice(2))
99
+ .option("kind", { type: "string", default: undefined })
100
+ .help(false)
101
+ .parseSync();
102
+ return {
103
+ ...defaults("schema-validate"),
104
+ schemaValidatePath: String(parsed._[0] ?? ""),
105
+ schemaKind: parsed.kind,
106
+ };
107
+ }
108
+ // diagnose [<conversation_id>] [flags...]
21
109
  const parsed = yargs(argv.slice(1))
22
110
  .option("out", { type: "string", default: undefined })
23
111
  .option("rules", { type: "string", default: undefined })
24
112
  .option("builtin", { type: "boolean", default: true }) // --no-builtin sets this to false
25
- .option("llm", { type: "boolean", default: false }) // PR-A: forced false (--no-llm)
113
+ .option("llm", { type: "boolean", default: true }) // --no-llm sets this to false (PR-B reversal)
114
+ .option("artifacts", { type: "boolean", default: true }) // --no-artifacts sets this to false
115
+ .option("traces", { type: "string", default: undefined })
116
+ .option("max-parallel", { type: "number", default: 4 })
117
+ .option("format", { type: "string", choices: ["yaml", "markdown", "both"], default: undefined })
118
+ .option("lang", { type: "string", choices: ["en", "zh"], default: undefined })
26
119
  .option("token", { type: "string" })
27
120
  .option("base-url", { type: "string" })
28
121
  .option("business-domain", { alias: "bd", type: "string" })
29
122
  .help(false)
30
123
  .parseSync();
124
+ const positional = String(parsed._[0] ?? "");
125
+ const tracesArg = parsed.traces;
126
+ const mode = tracesArg !== undefined ? "batch" : (positional ? "single" : undefined);
31
127
  return {
32
128
  subcommand: "diagnose",
33
- conversationId: String(parsed._[0] ?? ""),
129
+ mode,
130
+ conversationId: mode === "single" ? positional : undefined,
131
+ traces: tracesArg,
34
132
  out: parsed.out ?? null,
35
133
  rulesDir: parsed.rules ?? null,
36
134
  noBuiltin: !parsed.builtin,
37
135
  noLlm: !parsed.llm,
136
+ noArtifacts: !parsed.artifacts,
137
+ maxParallel: parsed["max-parallel"],
138
+ format: parsed.format ?? null,
139
+ lang: parsed.lang ?? null,
38
140
  baseUrl: parsed.baseUrl ?? null,
39
141
  token: parsed.token ?? null,
40
142
  businessDomain: parsed.businessDomain ?? null,
@@ -46,7 +148,11 @@ function defaults(sub) {
46
148
  out: null,
47
149
  rulesDir: null,
48
150
  noBuiltin: false,
49
- noLlm: true,
151
+ noLlm: false,
152
+ noArtifacts: false,
153
+ maxParallel: 4,
154
+ format: null,
155
+ lang: null,
50
156
  baseUrl: null,
51
157
  token: null,
52
158
  businessDomain: null,
@@ -61,15 +167,83 @@ Subcommands:
61
167
  'agent sessions'; spans are fetched from agent-observability)
62
168
  --out <file> Write report to file (default: stdout)
63
169
  --rules <dir> Override <cwd>/diagnosis-rules/
64
- --no-builtin Disable the 5 builtin baseline rules
65
- --no-llm PR-A: always on; PR-B will allow disabling
170
+ --no-builtin Disable the 5+1 builtin baseline rules
171
+ --no-llm Disable LLM-judged rubric rules and the agent synthesizer.
172
+ Rubric findings are skipped (recorded in rules_skipped);
173
+ the within-trace summary falls back to template mode.
174
+ --no-artifacts Disable per-stage artifact persistence (default: artifacts ARE
175
+ written next to <out> as <stem>.artifacts/)
176
+ --format <yaml|markdown|both> Output format. yaml is the machine-readable source of truth;
177
+ markdown is the human-readable view (paste into tickets / PRs).
178
+ When --out is a file path, both = write <stem>.yaml AND
179
+ <stem>.md side by side (default for --out).
180
+ When piping to stdout (no --out), default is yaml; pass
181
+ --format=markdown to emit markdown instead.
182
+ --lang <en|zh> Output locale for agent-judged natural-language fields:
183
+ rubric reasoning, synthesizer headline / fix_priority reason.
184
+ Default: en. JSON keys, enum values, and span IDs always
185
+ remain English regardless of --lang — only prose is localized.
186
+
187
+ trace diagnose --traces=<list> --out=<dir> Batch mode: diagnose N traces for the same agent
188
+ --traces=conv1,conv2,... Comma-separated conversation_ids
189
+ --traces=@/path/to/ids.txt Or @file with one id per line (# comments and blanks ignored)
190
+ --out=<dir> Required; fail-fast if missing
191
+ --no-artifacts Disable artifact persistence
192
+ --max-parallel <n> Concurrency limit (default 4; Sonnet rate-limit friendly)
193
+ --rules <dir> Override <cwd>/diagnosis-rules/
194
+ --no-builtin Disable the 5+1 builtin baseline rules
195
+ --format <yaml|markdown|both> Default 'both'
196
+ --lang <en|zh> Default 'en'
66
197
 
67
198
  trace diagnose rules validate <rule.yaml> Validate a rule yaml file (exit 0 ok, 6 fail)
68
199
 
200
+ trace eval-set build [--diagnosis=<dir> | --queries=<file>] --out=<dir>
201
+ Build a git-trackable eval-set yaml directory from
202
+ either M4 diagnosis reports or a simplified
203
+ queries+golden-truth input file.
204
+ --diagnosis=<dir> Lift suggested_eval_case from M4 report findings
205
+ (mutually exclusive with --queries=)
206
+ --queries=<file> Lift from simplified trace-eval-set-input/v1 yaml
207
+ (mutually exclusive with --diagnosis=)
208
+ --out=<dir> Required output directory; index.yaml + cases.yaml
209
+ --on-conflict=fail|skip|overwrite query_id conflict strategy (default: fail; exit 6 on conflict)
210
+ --redaction-rules=<path> Override <repo>/redaction-rules/ source for PII redaction
211
+ --eval-set-id=<id> Override default eval_set_id (basename of --out)
212
+
213
+ trace eval-set test <eval-set-dir> --candidate=<agent_id>[@<version>] --out=<dir>
214
+ Run each case in the eval-set against a candidate agent
215
+ and write a trace-test-report/v1 yaml to --out/report.yaml.
216
+ --candidate=<id>[@<version>] Agent ID to test; optional @version suffix (default: published)
217
+ --out=<dir> Required output directory; report.yaml is written here
218
+ --max-parallel=<n> Concurrency limit (default 4)
219
+ --lang=en|zh Language for semantic_match reasoning text (default en)
220
+
221
+ trace schema validate <file> [--kind=<kind>]
222
+ Validate a yaml file against its M5/M4 zod schema
223
+ (eval-set / eval-set-index / eval-set-input / test-report).
224
+ --kind auto-inferred from file path; pass explicitly
225
+ if inference fails (exit 2 = kind required).
226
+
69
227
  Auth flags (any subcommand): --token, --base-url, --business-domain (-bd).
228
+
229
+ Batch mode constraints:
230
+ - All --traces conv_ids must resolve to the same agent_id; mismatch → exit 2
231
+ - --no-llm not supported in batch mode → exit 2 (use single-trace for offline)
232
+ - Per-trace yaml on disk is the resume ground truth; rerunning a scan with
233
+ the same --out reuses existing per-trace reports (atomic .partial → rename)
234
+
235
+ Rubric rules and the agent synthesizer use the local 'claude' CLI by default
236
+ (installed via Claude Code). If 'claude' isn't on PATH, rubric rules are
237
+ skipped with reason='provider-not-available:claude-code' and the synthesizer
238
+ falls back to deterministic template mode — the rest of the report is still
239
+ produced.
70
240
  `);
71
241
  }
72
242
  export async function runTraceCommand(rest) {
243
+ // exp subcommand — dispatch before other checks (no platform auth needed)
244
+ if (rest[0] === "exp") {
245
+ return runExpCommand(rest.slice(1));
246
+ }
73
247
  const args = parseTraceArgs(rest);
74
248
  if (args.subcommand === "help") {
75
249
  printHelp();
@@ -78,11 +252,49 @@ export async function runTraceCommand(rest) {
78
252
  if (args.subcommand === "rules-validate") {
79
253
  return await runRulesValidate(args.rulePath ?? "");
80
254
  }
81
- // diagnose
82
- if (!args.conversationId) {
255
+ if (args.subcommand === "eval-set-build") {
256
+ return await runEvalSetBuild(args);
257
+ }
258
+ if (args.subcommand === "eval-set-test") {
259
+ return await runEvalSetTestCmd(args);
260
+ }
261
+ if (args.subcommand === "schema-validate") {
262
+ try {
263
+ return await runSchemaValidate({
264
+ filePath: args.schemaValidatePath ?? "",
265
+ kind: args.schemaKind,
266
+ });
267
+ }
268
+ catch (e) {
269
+ if (e instanceof SchemaKindRequiredError) {
270
+ process.stderr.write(`error: ${e.message}\n`);
271
+ return 2;
272
+ }
273
+ throw e;
274
+ }
275
+ }
276
+ // diagnose — batch or single
277
+ if (args.mode !== "batch" && !args.conversationId) {
83
278
  process.stderr.write("error: missing <conversation_id>\n");
84
279
  return 2;
85
280
  }
281
+ // Validate batch-mode args BEFORE platform/token resolution so arg-validation
282
+ // failures surface as exit 2 (bad usage) regardless of whether the user has
283
+ // an active platform configured — required for environments like CI.
284
+ if (args.mode === "batch") {
285
+ if (args.noLlm) {
286
+ process.stderr.write("error: --traces (batch mode) does not support --no-llm; the cross-trace synthesizer requires LLM. Use --traces with a fresh run or fall back to single-trace `diagnose <conv_id>` for offline cases.\n");
287
+ return 2;
288
+ }
289
+ if (args.out === null) {
290
+ process.stderr.write("error: --traces requires --out=<dir> to avoid writing N yaml files into the current working directory\n");
291
+ return 2;
292
+ }
293
+ if (!Number.isInteger(args.maxParallel) || args.maxParallel < 1 || args.maxParallel > 64) {
294
+ process.stderr.write(`error: --max-parallel must be a positive integer between 1 and 64; got ${args.maxParallel}\n`);
295
+ return 2;
296
+ }
297
+ }
86
298
  let baseUrl = args.baseUrl ?? process.env.KWEAVER_BASE_URL ?? "";
87
299
  let token = args.token ?? process.env.KWEAVER_TOKEN ?? "";
88
300
  const bd = args.businessDomain ?? process.env.KWEAVER_BUSINESS_DOMAIN ?? "bd_public";
@@ -106,18 +318,80 @@ export async function runTraceCommand(rest) {
106
318
  process.stderr.write("error: missing --base-url / --token (or KWEAVER_BASE_URL / KWEAVER_TOKEN env)\n");
107
319
  return 5;
108
320
  }
321
+ // ── Batch mode dispatch ──────────────────────────────────────────────────
322
+ if (args.mode === "batch") {
323
+ // Narrowed by the early-validation block above (args.out !== null)
324
+ const outDir = args.out;
325
+ let convIds;
326
+ try {
327
+ convIds = await parseTracesList(args.traces);
328
+ }
329
+ catch (e) {
330
+ if (e instanceof TracesListError) {
331
+ process.stderr.write(`error: ${e.message}\n`);
332
+ return 2;
333
+ }
334
+ throw e;
335
+ }
336
+ ensureDefaultProviderRegistered();
337
+ try {
338
+ const result = await runBatch({
339
+ traces: convIds,
340
+ out: outDir,
341
+ rulesDir: args.rulesDir,
342
+ noBuiltin: args.noBuiltin,
343
+ noArtifacts: args.noArtifacts,
344
+ format: args.format ?? undefined, // ← plumb --format through
345
+ lang: args.lang ?? undefined,
346
+ timeoutMs: 60000,
347
+ maxParallel: args.maxParallel,
348
+ baseUrl,
349
+ token,
350
+ businessDomain: bd,
351
+ });
352
+ process.stderr.write(`wrote ${result.perTraceReportPaths.length} per-trace reports + ${result.scanSummaryPath} (${result.tracesReused} reused)\n`);
353
+ return 0;
354
+ }
355
+ catch (e) {
356
+ if (e instanceof SingleAgentValidationError) {
357
+ process.stderr.write(`error: ${e.message}\n`);
358
+ return 2;
359
+ }
360
+ process.stderr.write(`error: ${e.message}\n`);
361
+ return 1;
362
+ }
363
+ }
364
+ // ── Single-trace dispatch ────────────────────────────────────────────────
365
+ if (!args.noLlm)
366
+ ensureDefaultProviderRegistered();
109
367
  try {
110
- await diagnose(args.conversationId, {
368
+ const report = await diagnose(args.conversationId, {
111
369
  out: args.out,
112
370
  rulesDir: args.rulesDir,
113
371
  noBuiltin: args.noBuiltin,
114
- noLlm: true,
372
+ noLlm: args.noLlm,
373
+ format: args.format ?? undefined,
374
+ lang: args.lang ?? undefined,
115
375
  agentProvider: null,
116
376
  timeoutMs: 60000,
117
377
  baseUrl,
118
378
  token,
119
379
  businessDomain: bd,
120
380
  });
381
+ // Tell the user which file(s) we wrote, so they know whether to look for
382
+ // .yaml, .md, or both.
383
+ if (args.out !== null) {
384
+ const fmt = args.format ?? "both";
385
+ const { yamlPath, mdPath } = derivePaths(args.out, fmt);
386
+ const written = [];
387
+ if (yamlPath !== null)
388
+ written.push(yamlPath);
389
+ if (mdPath !== null)
390
+ written.push(mdPath);
391
+ if (written.length > 0) {
392
+ process.stderr.write(`wrote ${written.join(" + ")} (${report.findings.length} findings)\n`);
393
+ }
394
+ }
121
395
  return 0;
122
396
  }
123
397
  catch (e) {
@@ -166,3 +440,229 @@ async function runRulesValidate(rulePath) {
166
440
  process.stdout.write(`ok: ${rulePath} validates against diagnosis-rule/v1\n`);
167
441
  return 0;
168
442
  }
443
+ export class SchemaKindRequiredError extends Error {
444
+ constructor(filePath) {
445
+ super(`cannot infer schema kind for ${filePath}; pass --kind=<eval-set|eval-set-index|eval-set-input|test-report>`);
446
+ this.name = "SchemaKindRequiredError";
447
+ }
448
+ }
449
+ export function inferKind(filePath) {
450
+ const norm = filePath.replace(/\\/g, "/");
451
+ const base = norm.split("/").pop() ?? "";
452
+ // index.yaml in an eval-set dir (absolute or relative path)
453
+ if (base === "index.yaml" && /(^|\/)eval-sets\/[^/]+\/index\.yaml$/.test(norm)) {
454
+ return "eval-set-index";
455
+ }
456
+ if (base.endsWith("-test-report.yaml") || base === "test-report.yaml" || base === "report.yaml") {
457
+ if (/(^|\/)test-runs\//.test(norm) || base.includes("test-report"))
458
+ return "test-report";
459
+ }
460
+ if (base.endsWith("-eval-set-input.yaml") || base.includes("queries-input")) {
461
+ return "eval-set-input";
462
+ }
463
+ // shard inside eval-set dir (anything not index.yaml)
464
+ if (/(^|\/)eval-sets\/[^/]+\/[^/]+\.yaml$/.test(norm) && base !== "index.yaml") {
465
+ return "eval-set";
466
+ }
467
+ return null;
468
+ }
469
+ const SCHEMA_BY_KIND = {
470
+ "eval-set": EvalSetShardSchema,
471
+ "eval-set-index": EvalSetIndexSchema,
472
+ "eval-set-input": EvalSetInputSchema,
473
+ "test-report": TestReportSchema,
474
+ };
475
+ export async function runSchemaValidate(opts) {
476
+ if (!opts.filePath) {
477
+ process.stderr.write("error: schema validate requires a file path argument\n");
478
+ return 2;
479
+ }
480
+ const kind = opts.kind ?? inferKind(opts.filePath);
481
+ if (!kind) {
482
+ throw new SchemaKindRequiredError(opts.filePath);
483
+ }
484
+ const schema = SCHEMA_BY_KIND[kind];
485
+ if (!schema) {
486
+ process.stderr.write(`error: unknown --kind=${kind}; valid: ${Object.keys(SCHEMA_BY_KIND).join(", ")}\n`);
487
+ return 2;
488
+ }
489
+ let raw;
490
+ try {
491
+ raw = await readFile(opts.filePath, "utf8");
492
+ }
493
+ catch (e) {
494
+ process.stderr.write(`error: cannot read ${opts.filePath}: ${e.message}\n`);
495
+ return 1;
496
+ }
497
+ const yamlMod = await import("js-yaml");
498
+ let parsed;
499
+ try {
500
+ parsed = yamlMod.default.load(raw);
501
+ }
502
+ catch (e) {
503
+ process.stderr.write(`error: yaml parse failed: ${e.message}\n`);
504
+ return 1;
505
+ }
506
+ const result = schema.safeParse(parsed);
507
+ if (result.success) {
508
+ process.stdout.write(`✓ ${opts.filePath} valid against ${kind}\n`);
509
+ return 0;
510
+ }
511
+ const issue = result.error.issues[0];
512
+ const where = issue.path.map(String).join(".");
513
+ process.stderr.write(`✗ ${opts.filePath} invalid at '${where}': ${issue.message}\n`);
514
+ return 1;
515
+ }
516
+ async function runEvalSetBuild(args) {
517
+ // 参数检查:互斥 + 必填
518
+ const hasQueries = !!args.queriesPath;
519
+ const hasDiagnosis = !!args.diagnosisPath;
520
+ if (hasQueries === hasDiagnosis) {
521
+ process.stderr.write("error: must pass exactly one of --queries=<file> | --diagnosis=<dir>\n");
522
+ return 2;
523
+ }
524
+ if (!args.out) {
525
+ process.stderr.write("error: --out=<dir> is required\n");
526
+ return 2;
527
+ }
528
+ // eval_set_id 默认 = basename(out)
529
+ const evalSetId = args.evalSetId ?? path.basename(args.out.replace(/\/+$/, ""));
530
+ const repoDir = path.join(process.cwd(), "redaction-rules");
531
+ try {
532
+ const result = await build({
533
+ source: hasQueries
534
+ ? { kind: "queries", path: args.queriesPath }
535
+ : { kind: "diagnosis", path: args.diagnosisPath },
536
+ outDir: args.out,
537
+ evalSetId,
538
+ onConflict: args.onConflict ?? "fail",
539
+ redactionRulesCliFlag: args.redactionRules,
540
+ repoDir,
541
+ });
542
+ process.stdout.write(`✓ wrote ${result.cases_written} cases (${result.cases_skipped} skipped), ${result.shard_paths.length} shard(s)\n`);
543
+ process.stdout.write(` redaction_rules: ${result.redaction_rules_source}\n`);
544
+ if (result.conflicts.length > 0) {
545
+ process.stdout.write(` conflicts: ${result.conflicts.join(", ")}\n`);
546
+ }
547
+ return 0;
548
+ }
549
+ catch (e) {
550
+ if (e instanceof BuilderError) {
551
+ process.stderr.write(`error: ${e.message}\n`);
552
+ // query_id 冲突 → exit 6 (spec doc §5.4)
553
+ if (e.message.includes("query_id conflict"))
554
+ return 6;
555
+ return 1;
556
+ }
557
+ process.stderr.write(`error: ${e.message}\n`);
558
+ return 1;
559
+ }
560
+ }
561
+ async function runEvalSetTestCmd(args) {
562
+ if (!args.evalSetPath) {
563
+ process.stderr.write("error: eval-set directory is required\n");
564
+ return 2;
565
+ }
566
+ if (!args.candidateAgentId) {
567
+ process.stderr.write("error: --candidate=<agent_id> is required\n");
568
+ return 2;
569
+ }
570
+ if (!args.out) {
571
+ process.stderr.write("error: --out=<dir> is required\n");
572
+ return 2;
573
+ }
574
+ let baseUrl = args.baseUrl ?? process.env.KWEAVER_BASE_URL ?? "";
575
+ let token = args.token ?? process.env.KWEAVER_TOKEN ?? "";
576
+ const bd = args.businessDomain ?? process.env.KWEAVER_BUSINESS_DOMAIN ?? "bd_public";
577
+ if (!baseUrl || !token) {
578
+ try {
579
+ const t = await ensureValidToken();
580
+ if (!baseUrl)
581
+ baseUrl = t.baseUrl;
582
+ if (!token)
583
+ token = t.accessToken;
584
+ }
585
+ catch (e) {
586
+ process.stderr.write(`error: missing --base-url / --token, and no active platform in ~/.kweaver/ — ${e.message}\n`);
587
+ return 5;
588
+ }
589
+ }
590
+ if (!baseUrl || !token) {
591
+ process.stderr.write("error: missing --base-url / --token (or KWEAVER_BASE_URL / KWEAVER_TOKEN env)\n");
592
+ return 5;
593
+ }
594
+ // Resolve a SemanticMatchProvider for `semantic_match` assertions (D5).
595
+ // We register claude-code as the default agent-provider, load the builtin
596
+ // rubric template, and only wire the judge in if the provider reports
597
+ // available — otherwise semantic_match assertions skip with a clear reason
598
+ // rather than failing the whole run.
599
+ ensureDefaultProviderRegistered();
600
+ const promptRegistry = new PromptTemplateRegistry();
601
+ await promptRegistry.loadBuiltinDir(EVAL_SET_RUBRIC_DIR);
602
+ let semanticMatchProvider;
603
+ try {
604
+ const provider = defaultRegistry.resolve({
605
+ requiredCapabilities: ["structured_output"],
606
+ });
607
+ if (provider && (await provider.isAvailable())) {
608
+ semanticMatchProvider = createBuiltinSemanticMatchProvider({
609
+ provider,
610
+ promptRegistry,
611
+ lang: args.lang === "zh" ? "zh" : "en",
612
+ });
613
+ }
614
+ else {
615
+ process.stderr.write("warn: agent provider unavailable — `semantic_match` assertions will be skipped (install `claude` CLI or wire a stub provider)\n");
616
+ }
617
+ }
618
+ catch (e) {
619
+ process.stderr.write(`warn: could not resolve agent provider — ${e.message}\n`);
620
+ }
621
+ try {
622
+ await runEvalSetTest({
623
+ evalSetDir: args.evalSetPath,
624
+ candidateAgentId: args.candidateAgentId,
625
+ candidateAgentVersion: args.candidateAgentVersion,
626
+ outDir: args.out,
627
+ maxParallel: args.maxParallel,
628
+ deps: {
629
+ fetchAgent: async (agentId, version) => fetchAgentInfo({
630
+ baseUrl,
631
+ accessToken: token,
632
+ agentId,
633
+ version: version ?? "latest",
634
+ businessDomain: bd,
635
+ }),
636
+ sendChat: async ({ agentInfo, query }) => {
637
+ const result = await sendChatRequest({
638
+ baseUrl,
639
+ accessToken: token,
640
+ agentId: agentInfo.id,
641
+ agentKey: agentInfo.key,
642
+ agentVersion: agentInfo.version,
643
+ query,
644
+ stream: false,
645
+ businessDomain: bd,
646
+ });
647
+ return { text: result.text, conversationId: result.conversationId };
648
+ },
649
+ fetchTrace: async (conversationId) => {
650
+ const r = await getTracesByConversation({
651
+ baseUrl,
652
+ accessToken: token,
653
+ conversationId,
654
+ businessDomain: bd,
655
+ });
656
+ return { spans: r.spans };
657
+ },
658
+ semanticMatchProvider,
659
+ },
660
+ });
661
+ process.stdout.write(`✓ wrote ${args.out}/report.yaml\n`);
662
+ return 0;
663
+ }
664
+ catch (e) {
665
+ process.stderr.write(`error: ${e.message}\n`);
666
+ return 1;
667
+ }
668
+ }