@kweaver-ai/kweaver-sdk 0.7.4 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. package/README.md +39 -5
  2. package/README.zh.md +37 -5
  3. package/dist/agent-providers/index.d.ts +7 -0
  4. package/dist/agent-providers/index.js +5 -0
  5. package/dist/agent-providers/prompt-template.d.ts +62 -0
  6. package/dist/agent-providers/prompt-template.js +105 -0
  7. package/dist/agent-providers/prompts/rubric-judge-v1.prompt.md +51 -0
  8. package/dist/agent-providers/prompts/within-trace-synthesizer-v1.prompt.md +60 -0
  9. package/dist/agent-providers/providers/claude-code-subprocess.d.ts +74 -0
  10. package/dist/agent-providers/providers/claude-code-subprocess.js +259 -0
  11. package/dist/agent-providers/providers/stub.d.ts +47 -0
  12. package/dist/agent-providers/providers/stub.js +77 -0
  13. package/dist/agent-providers/registry.d.ts +45 -0
  14. package/dist/agent-providers/registry.js +77 -0
  15. package/dist/agent-providers/types.d.ts +91 -0
  16. package/dist/agent-providers/types.js +25 -0
  17. package/dist/api/agent-chat.js +8 -6
  18. package/dist/api/agent-observability.d.ts +51 -0
  19. package/dist/api/agent-observability.js +108 -0
  20. package/dist/api/context-loader.d.ts +1 -0
  21. package/dist/api/conversations.d.ts +4 -8
  22. package/dist/api/conversations.js +16 -58
  23. package/dist/api/datasources.d.ts +2 -20
  24. package/dist/api/datasources.js +7 -123
  25. package/dist/api/semantic-search.d.ts +5 -0
  26. package/dist/api/semantic-search.js +5 -0
  27. package/dist/api/skills.d.ts +75 -2
  28. package/dist/api/skills.js +108 -12
  29. package/dist/api/trace.d.ts +49 -0
  30. package/dist/api/trace.js +85 -0
  31. package/dist/api/vega.d.ts +53 -0
  32. package/dist/api/vega.js +144 -0
  33. package/dist/cli.js +12 -5
  34. package/dist/commands/agent/mode.d.ts +6 -0
  35. package/dist/commands/agent/mode.js +75 -0
  36. package/dist/commands/agent.js +101 -29
  37. package/dist/commands/bkn-ops.js +12 -6
  38. package/dist/commands/bkn-utils.d.ts +9 -0
  39. package/dist/commands/bkn-utils.js +17 -0
  40. package/dist/commands/context-loader.js +608 -38
  41. package/dist/commands/ds.js +7 -2
  42. package/dist/commands/skill.d.ts +21 -1
  43. package/dist/commands/skill.js +389 -1
  44. package/dist/commands/trace.d.ts +39 -0
  45. package/dist/commands/trace.js +668 -0
  46. package/dist/index.d.ts +2 -2
  47. package/dist/index.js +1 -1
  48. package/dist/resources/bkn.d.ts +5 -0
  49. package/dist/resources/bkn.js +5 -0
  50. package/dist/resources/datasources.js +2 -1
  51. package/dist/resources/skills.d.ts +17 -1
  52. package/dist/resources/skills.js +32 -1
  53. package/dist/trace-ai/diagnose/agent-binding.d.ts +67 -0
  54. package/dist/trace-ai/diagnose/agent-binding.js +257 -0
  55. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.d.ts +2 -0
  56. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.js +15 -0
  57. package/dist/trace-ai/diagnose/builtin-rules/excessive-tool-calls-per-turn.yaml +16 -0
  58. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.d.ts +2 -0
  59. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.js +44 -0
  60. package/dist/trace-ai/diagnose/builtin-rules/llm-response-truncated-no-continue.yaml +15 -0
  61. package/dist/trace-ai/diagnose/builtin-rules/register.d.ts +1 -0
  62. package/dist/trace-ai/diagnose/builtin-rules/register.js +11 -0
  63. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.d.ts +2 -0
  64. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.js +29 -0
  65. package/dist/trace-ai/diagnose/builtin-rules/retrieval-empty-no-fallback.yaml +15 -0
  66. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.d.ts +2 -0
  67. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.js +45 -0
  68. package/dist/trace-ai/diagnose/builtin-rules/tool-error-swallowed.yaml +15 -0
  69. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.d.ts +2 -0
  70. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.js +38 -0
  71. package/dist/trace-ai/diagnose/builtin-rules/tool-loop-no-state-change.yaml +16 -0
  72. package/dist/trace-ai/diagnose/builtin-rules/tool-retry-intent-mismatch.yaml +68 -0
  73. package/dist/trace-ai/diagnose/index.d.ts +32 -0
  74. package/dist/trace-ai/diagnose/index.js +246 -0
  75. package/dist/trace-ai/diagnose/output-schema-converter.d.ts +24 -0
  76. package/dist/trace-ai/diagnose/output-schema-converter.js +81 -0
  77. package/dist/trace-ai/diagnose/predicate-registry.d.ts +7 -0
  78. package/dist/trace-ai/diagnose/predicate-registry.js +30 -0
  79. package/dist/trace-ai/diagnose/query-extractor.d.ts +14 -0
  80. package/dist/trace-ai/diagnose/query-extractor.js +45 -0
  81. package/dist/trace-ai/diagnose/report-assembler.d.ts +31 -0
  82. package/dist/trace-ai/diagnose/report-assembler.js +100 -0
  83. package/dist/trace-ai/diagnose/report-markdown.d.ts +18 -0
  84. package/dist/trace-ai/diagnose/report-markdown.js +192 -0
  85. package/dist/trace-ai/diagnose/rule-loader.d.ts +11 -0
  86. package/dist/trace-ai/diagnose/rule-loader.js +120 -0
  87. package/dist/trace-ai/diagnose/schemas.d.ts +184 -0
  88. package/dist/trace-ai/diagnose/schemas.js +154 -0
  89. package/dist/trace-ai/diagnose/signal-probe.d.ts +17 -0
  90. package/dist/trace-ai/diagnose/signal-probe.js +39 -0
  91. package/dist/trace-ai/diagnose/synthesizer-agent.d.ts +40 -0
  92. package/dist/trace-ai/diagnose/synthesizer-agent.js +158 -0
  93. package/dist/trace-ai/diagnose/synthesizer-template.d.ts +2 -0
  94. package/dist/trace-ai/diagnose/synthesizer-template.js +49 -0
  95. package/dist/trace-ai/diagnose/trace-shaper.d.ts +3 -0
  96. package/dist/trace-ai/diagnose/trace-shaper.js +73 -0
  97. package/dist/trace-ai/diagnose/types.d.ts +173 -0
  98. package/dist/trace-ai/diagnose/types.js +1 -0
  99. package/dist/trace-ai/eval-set/assertion-evaluator.d.ts +29 -0
  100. package/dist/trace-ai/eval-set/assertion-evaluator.js +100 -0
  101. package/dist/trace-ai/eval-set/builder.d.ts +36 -0
  102. package/dist/trace-ai/eval-set/builder.js +126 -0
  103. package/dist/trace-ai/eval-set/index.d.ts +15 -0
  104. package/dist/trace-ai/eval-set/index.js +10 -0
  105. package/dist/trace-ai/eval-set/output-writer.d.ts +27 -0
  106. package/dist/trace-ai/eval-set/output-writer.js +126 -0
  107. package/dist/trace-ai/eval-set/query-picker.d.ts +37 -0
  108. package/dist/trace-ai/eval-set/query-picker.js +147 -0
  109. package/dist/trace-ai/eval-set/redactor.d.ts +42 -0
  110. package/dist/trace-ai/eval-set/redactor.js +133 -0
  111. package/dist/trace-ai/eval-set/rubric-templates/answer-match-reference.prompt.md +19 -0
  112. package/dist/trace-ai/eval-set/schemas.d.ts +136 -0
  113. package/dist/trace-ai/eval-set/schemas.js +130 -0
  114. package/dist/trace-ai/eval-set/semantic-match-provider.d.ts +33 -0
  115. package/dist/trace-ai/eval-set/semantic-match-provider.js +51 -0
  116. package/dist/trace-ai/eval-set/test-runner.d.ts +34 -0
  117. package/dist/trace-ai/eval-set/test-runner.js +153 -0
  118. package/dist/trace-ai/eval-set/types.d.ts +46 -0
  119. package/dist/trace-ai/eval-set/types.js +8 -0
  120. package/dist/trace-ai/exp/bundle-writer.d.ts +10 -0
  121. package/dist/trace-ai/exp/bundle-writer.js +54 -0
  122. package/dist/trace-ai/exp/claude-binary.d.ts +5 -0
  123. package/dist/trace-ai/exp/claude-binary.js +30 -0
  124. package/dist/trace-ai/exp/coordinator.d.ts +45 -0
  125. package/dist/trace-ai/exp/coordinator.js +203 -0
  126. package/dist/trace-ai/exp/eval-runner.d.ts +14 -0
  127. package/dist/trace-ai/exp/eval-runner.js +47 -0
  128. package/dist/trace-ai/exp/exp-store/abort-signal.d.ts +3 -0
  129. package/dist/trace-ai/exp/exp-store/abort-signal.js +27 -0
  130. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.d.ts +4 -0
  131. package/dist/trace-ai/exp/exp-store/candidate-lineage-yaml.js +37 -0
  132. package/dist/trace-ai/exp/exp-store/events-jsonl.d.ts +17 -0
  133. package/dist/trace-ai/exp/exp-store/events-jsonl.js +60 -0
  134. package/dist/trace-ai/exp/exp-store/exp-registry.d.ts +6 -0
  135. package/dist/trace-ai/exp/exp-store/exp-registry.js +41 -0
  136. package/dist/trace-ai/exp/exp-store/index.d.ts +46 -0
  137. package/dist/trace-ai/exp/exp-store/index.js +59 -0
  138. package/dist/trace-ai/exp/exp-store/lock.d.ts +3 -0
  139. package/dist/trace-ai/exp/exp-store/lock.js +73 -0
  140. package/dist/trace-ai/exp/exp-store/mission-md.d.ts +3 -0
  141. package/dist/trace-ai/exp/exp-store/mission-md.js +37 -0
  142. package/dist/trace-ai/exp/exp-store/readme-template.d.ts +5 -0
  143. package/dist/trace-ai/exp/exp-store/readme-template.js +25 -0
  144. package/dist/trace-ai/exp/exp-store/round-yaml.d.ts +3 -0
  145. package/dist/trace-ai/exp/exp-store/round-yaml.js +33 -0
  146. package/dist/trace-ai/exp/index.d.ts +8 -0
  147. package/dist/trace-ai/exp/index.js +238 -0
  148. package/dist/trace-ai/exp/info.d.ts +35 -0
  149. package/dist/trace-ai/exp/info.js +120 -0
  150. package/dist/trace-ai/exp/patch/agent-config.d.ts +1 -0
  151. package/dist/trace-ai/exp/patch/agent-config.js +26 -0
  152. package/dist/trace-ai/exp/patch/index.d.ts +2 -0
  153. package/dist/trace-ai/exp/patch/index.js +13 -0
  154. package/dist/trace-ai/exp/patch/skill.d.ts +1 -0
  155. package/dist/trace-ai/exp/patch/skill.js +24 -0
  156. package/dist/trace-ai/exp/providers/synthesizer-client.d.ts +14 -0
  157. package/dist/trace-ai/exp/providers/synthesizer-client.js +39 -0
  158. package/dist/trace-ai/exp/providers/triage-client.d.ts +19 -0
  159. package/dist/trace-ai/exp/providers/triage-client.js +51 -0
  160. package/dist/trace-ai/exp/schemas.d.ts +147 -0
  161. package/dist/trace-ai/exp/schemas.js +50 -0
  162. package/dist/trace-ai/exp/scoring.d.ts +2 -0
  163. package/dist/trace-ai/exp/scoring.js +46 -0
  164. package/dist/trace-ai/scan/aggregator.d.ts +20 -0
  165. package/dist/trace-ai/scan/aggregator.js +26 -0
  166. package/dist/trace-ai/scan/artifacts/paths.d.ts +12 -0
  167. package/dist/trace-ai/scan/artifacts/paths.js +18 -0
  168. package/dist/trace-ai/scan/artifacts/writer.d.ts +67 -0
  169. package/dist/trace-ai/scan/artifacts/writer.js +96 -0
  170. package/dist/trace-ai/scan/batched-rubric.d.ts +55 -0
  171. package/dist/trace-ai/scan/batched-rubric.js +159 -0
  172. package/dist/trace-ai/scan/cross-trace-synthesizer.d.ts +24 -0
  173. package/dist/trace-ai/scan/cross-trace-synthesizer.js +93 -0
  174. package/dist/trace-ai/scan/index.d.ts +31 -0
  175. package/dist/trace-ai/scan/index.js +390 -0
  176. package/dist/trace-ai/scan/prompts/builtin/cross-trace-synthesizer-v1.prompt.md +44 -0
  177. package/dist/trace-ai/scan/prompts/builtin/rubric-judge-batch-v1.prompt.md +44 -0
  178. package/dist/trace-ai/scan/runner.d.ts +25 -0
  179. package/dist/trace-ai/scan/runner.js +42 -0
  180. package/dist/trace-ai/scan/sampler.d.ts +18 -0
  181. package/dist/trace-ai/scan/sampler.js +81 -0
  182. package/dist/trace-ai/scan/scan-summary-markdown.d.ts +2 -0
  183. package/dist/trace-ai/scan/scan-summary-markdown.js +71 -0
  184. package/dist/trace-ai/scan/scan-summary-schema.d.ts +73 -0
  185. package/dist/trace-ai/scan/scan-summary-schema.js +61 -0
  186. package/dist/trace-ai/scan/single-agent-validator.d.ts +23 -0
  187. package/dist/trace-ai/scan/single-agent-validator.js +42 -0
  188. package/dist/trace-ai/scan/traces-list-parser.d.ts +15 -0
  189. package/dist/trace-ai/scan/traces-list-parser.js +46 -0
  190. package/package.json +14 -4
@@ -0,0 +1,390 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import yaml from "js-yaml";
4
+ import { fileURLToPath } from "node:url";
5
+ import { getSpansByConversationId } from "../../api/trace.js";
6
+ import { assembleTraceTree } from "../diagnose/trace-shaper.js";
7
+ import { loadRules } from "../diagnose/rule-loader.js";
8
+ import "../diagnose/builtin-rules/register.js"; // side effect: registers all builtin predicates
9
+ import { runRules, rubricRules } from "../diagnose/signal-probe.js";
10
+ import { templateSynthesize } from "../diagnose/synthesizer-template.js";
11
+ import { assembleReport, reportToYamlObject, symbolicHitsToFindings } from "../diagnose/report-assembler.js";
12
+ import { renderReportMarkdown } from "../diagnose/report-markdown.js";
13
+ import { ReportSchema } from "../diagnose/schemas.js";
14
+ import { defaultRegistry } from "../../agent-providers/registry.js";
15
+ import { defaultPromptRegistry } from "../../agent-providers/prompt-template.js";
16
+ import { resolveRubricInput, renderChangeTemplate } from "../diagnose/agent-binding.js";
17
+ import { validateSingleAgent } from "./single-agent-validator.js";
18
+ import { runPerTracePipeline } from "./runner.js";
19
+ import { runBatchedRubric } from "./batched-rubric.js";
20
+ import { aggregate } from "./aggregator.js";
21
+ import { sample } from "./sampler.js";
22
+ import { runCrossTraceSynthesizer } from "./cross-trace-synthesizer.js";
23
+ import { renderScanSummaryMarkdown } from "./scan-summary-markdown.js";
24
+ import { ArtifactWriter } from "./artifacts/writer.js";
25
+ import { resolveArtifactsBase } from "./artifacts/paths.js";
26
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
27
+ const BUILTIN_RULES_DIR = path.join(__dirname, "..", "diagnose", "builtin-rules");
28
+ const SHARED_PROMPT_DIR = path.join(__dirname, "..", "..", "agent-providers", "prompts");
29
+ const SCAN_PROMPT_DIR = path.join(__dirname, "prompts", "builtin");
30
+ async function ensurePromptsLoaded(reg) {
31
+ await reg.loadBuiltinDir(SHARED_PROMPT_DIR).catch(() => undefined);
32
+ await reg.loadBuiltinDir(SCAN_PROMPT_DIR).catch(() => undefined);
33
+ }
34
+ async function readReportFromDisk(yamlPath) {
35
+ const text = await fs.readFile(yamlPath, "utf8");
36
+ const obj = yaml.load(text);
37
+ const parsed = ReportSchema.parse(obj);
38
+ return {
39
+ schemaVersion: "trace-diagnose-report/v1",
40
+ trace: {
41
+ traceId: parsed.trace.trace_id,
42
+ agentId: parsed.trace.agent_id,
43
+ tenant: parsed.trace.tenant,
44
+ },
45
+ run: {
46
+ diagnosedAt: parsed.run.diagnosed_at,
47
+ cliVersion: parsed.run.cli_version,
48
+ mode: parsed.run.mode,
49
+ rulesApplied: parsed.run.rules_applied,
50
+ rulesSkipped: parsed.run.rules_skipped.map((s) => ({
51
+ ruleId: s.rule_id,
52
+ reason: s.reason,
53
+ })),
54
+ synthesizerMode: parsed.run.synthesizer_mode,
55
+ },
56
+ summary: {
57
+ headline: parsed.summary.headline,
58
+ primaryRootCause: parsed.summary.primary_root_cause === null
59
+ ? null
60
+ : {
61
+ findingIds: parsed.summary.primary_root_cause.finding_ids,
62
+ description: parsed.summary.primary_root_cause.description,
63
+ targetForFix: parsed.summary.primary_root_cause.target_for_fix,
64
+ },
65
+ fixPriority: parsed.summary.fix_priority.map((p) => ({
66
+ findingId: p.finding_id,
67
+ reason: p.reason,
68
+ })),
69
+ crossFindingLinks: parsed.summary.cross_finding_links.map((l) => ({
70
+ findingIds: l.finding_ids,
71
+ relation: l.relation,
72
+ })),
73
+ },
74
+ findings: parsed.findings.map((f) => ({
75
+ ruleId: f.rule_id,
76
+ judgmentKind: f.judgment_kind,
77
+ severity: f.severity,
78
+ symptom: f.symptom,
79
+ likelyCause: f.likely_cause,
80
+ evidence: { spans: f.evidence.spans, excerpt: f.evidence.excerpt },
81
+ suggestedFix: { target: f.suggested_fix.target, change: f.suggested_fix.change },
82
+ confidence: f.confidence,
83
+ verifyWith: {
84
+ suggestedEvalCase: {
85
+ queryId: f.verify_with.suggested_eval_case.query_id,
86
+ query: f.verify_with.suggested_eval_case.query,
87
+ assertions: f.verify_with.suggested_eval_case.assertions,
88
+ },
89
+ },
90
+ })),
91
+ };
92
+ }
93
+ /**
94
+ * Orchestrator: walks N conv_ids through the batch pipeline.
95
+ * Single-agent enforced; --no-llm rejected by CLI (not here).
96
+ */
97
+ export async function runBatch(opts) {
98
+ const t_start = Date.now();
99
+ const registry = defaultRegistry;
100
+ const promptRegistry = defaultPromptRegistry;
101
+ await ensurePromptsLoaded(promptRegistry);
102
+ const writeFormats = opts.format ?? "both";
103
+ const tracesDir = path.join(opts.out, "traces");
104
+ // 1. Single-agent validation (also caches first batch of getSpansByConversationId results)
105
+ const cachedSpans = new Map();
106
+ const validation = await validateSingleAgent(opts.traces, async (convId) => {
107
+ const fetched = await getSpansByConversationId({
108
+ baseUrl: opts.baseUrl,
109
+ token: opts.token,
110
+ businessDomain: opts.businessDomain,
111
+ conversationId: convId,
112
+ });
113
+ cachedSpans.set(convId, fetched.spans);
114
+ return {
115
+ spans: fetched.spans.map((s) => ({ attributes: (s.attributes ?? {}) })),
116
+ conversation_id: convId,
117
+ };
118
+ });
119
+ const agentId = validation.agentId;
120
+ // 2. Artifacts writer
121
+ const artifactsBase = resolveArtifactsBase({ mode: "batch", out: opts.out });
122
+ const artifacts = new ArtifactWriter({ base: artifactsBase, enabled: !opts.noArtifacts });
123
+ // 3. Load rules (gates_on metadata available after this)
124
+ const rules = await loadRules({
125
+ builtinDir: opts.noBuiltin ? null : BUILTIN_RULES_DIR,
126
+ cwdRulesDir: opts.rulesDir,
127
+ extraRulesDir: null,
128
+ noBuiltin: opts.noBuiltin,
129
+ });
130
+ const allRubricWork = [];
131
+ const t_stage1 = Date.now();
132
+ const perTrace = [];
133
+ let cursor = 0;
134
+ while (cursor < opts.traces.length) {
135
+ const chunk = opts.traces.slice(cursor, cursor + opts.maxParallel);
136
+ const results = await Promise.all(chunk.map(async (convId) => {
137
+ const r = await runPerTracePipeline({
138
+ convId,
139
+ outDir: tracesDir,
140
+ runDiagnose: async (id, partial) => {
141
+ const rawSpans = cachedSpans.get(id) ??
142
+ (await getSpansByConversationId({
143
+ baseUrl: opts.baseUrl,
144
+ token: opts.token,
145
+ businessDomain: opts.businessDomain,
146
+ conversationId: id,
147
+ })).spans;
148
+ const firstTraceId = rawSpans.find((s) => s.traceId)?.traceId ?? id;
149
+ const tree = assembleTraceTree(firstTraceId, rawSpans);
150
+ const hitsMap = await runRules(rules, tree);
151
+ const symbolicFindings = symbolicHitsToFindings(rules, hitsMap);
152
+ // Determine which symbolic rules fired (for paired-gate rubric filtering)
153
+ const firedRuleIds = new Set(symbolicFindings.map((f) => f.ruleId));
154
+ for (const rule of rubricRules(rules)) {
155
+ const gates = rule.rubric?.gatesOn;
156
+ if (gates && gates.length > 0 && !gates.some((g) => firedRuleIds.has(g)))
157
+ continue;
158
+ const resolvedInputs = {};
159
+ for (const inp of rule.rubric.inputs) {
160
+ resolvedInputs[inp.kind] = resolveRubricInput(inp, tree);
161
+ }
162
+ allRubricWork.push({
163
+ rule,
164
+ trace: {
165
+ traceId: tree.traceId,
166
+ spans: tree.spans.map((s) => s.spanId),
167
+ inputs: resolvedInputs,
168
+ },
169
+ });
170
+ }
171
+ const summary = templateSynthesize(symbolicFindings);
172
+ const report = assembleReport({
173
+ traceId: tree.traceId,
174
+ agentId,
175
+ tenant: null,
176
+ cliVersion: "0.7.4",
177
+ rules,
178
+ hits: hitsMap,
179
+ extraFindings: [],
180
+ summary,
181
+ mode: "hybrid",
182
+ synthesizerMode: "template",
183
+ });
184
+ await fs.writeFile(partial, yaml.dump(reportToYamlObject(report)), "utf8");
185
+ if (writeFormats !== "yaml") {
186
+ await fs.writeFile(path.join(path.dirname(partial), `${id}.md`), renderReportMarkdown(report, { conversationId: id, businessDomain: opts.businessDomain }), "utf8");
187
+ }
188
+ return { traceId: tree.traceId, agentId };
189
+ },
190
+ });
191
+ // Re-read the (possibly just-written, possibly reused) report from disk
192
+ const report = await readReportFromDisk(path.join(tracesDir, `${convId}.yaml`));
193
+ return { convId, report, reused: r.reused };
194
+ }));
195
+ perTrace.push(...results);
196
+ cursor += opts.maxParallel;
197
+ }
198
+ const t_stage1_end = Date.now();
199
+ // 5. Stage-2 batched rubric (per rule, chunks of 10)
200
+ const t_stage2_start = Date.now();
201
+ let stage2Chunks = 0;
202
+ // Group rubric work by rule_id
203
+ const workByRule = new Map();
204
+ for (const w of allRubricWork) {
205
+ const arr = workByRule.get(w.rule.id) ?? [];
206
+ arr.push(w);
207
+ workByRule.set(w.rule.id, arr);
208
+ }
209
+ for (const [ruleId, items] of workByRule.entries()) {
210
+ const rule = items[0].rule;
211
+ const traces = items.map((i) => i.trace);
212
+ stage2Chunks += Math.ceil(traces.length / 10);
213
+ let provider;
214
+ try {
215
+ provider = registry.resolve({ preferred: rule.rubric.agentBinding.provider });
216
+ }
217
+ catch {
218
+ // Provider not registered — skip this rule
219
+ continue;
220
+ }
221
+ if (!provider)
222
+ continue;
223
+ const batchedRule = {
224
+ ruleId,
225
+ judgeQuestion: rule.rubric.judgeQuestion,
226
+ outputSchema: rule.rubric.outputZodSchema,
227
+ outputSchemaRaw: rule.rubric.outputSchemaRaw,
228
+ promptTemplateRef: "builtin:rubric-judge-batch-v1",
229
+ };
230
+ const result = await runBatchedRubric({
231
+ rule: batchedRule,
232
+ traces,
233
+ agentId,
234
+ provider,
235
+ promptRegistry,
236
+ chunkSize: 10,
237
+ lang: opts.lang,
238
+ artifacts,
239
+ timeoutMs: opts.timeoutMs,
240
+ });
241
+ // Fold verdicts back into per-trace report objects
242
+ for (const v of result.verdicts) {
243
+ const pt = perTrace.find((p) => p.report.trace.traceId === v.traceId);
244
+ if (!pt)
245
+ continue;
246
+ // Build bindings for change_template / assertion_templates rendering.
247
+ // Bindings shape matches what PR-B single-trace agent-binding.ts passes:
248
+ // the rubric verdict's `out` object (category, severity, reasoning, first_violating_step_id, evidence_span_ids).
249
+ const bindings = {
250
+ category: v.category,
251
+ reasoning: v.reasoning,
252
+ severity: v.severity,
253
+ first_violating_step_id: v.firstViolatingStepId,
254
+ evidence_span_ids: v.evidenceSpanIds,
255
+ };
256
+ pt.report.findings.push({
257
+ ruleId,
258
+ judgmentKind: "rubric",
259
+ severity: v.severity,
260
+ symptom: rule.symptom,
261
+ likelyCause: v.category,
262
+ evidence: { spans: v.evidenceSpanIds, excerpt: v.reasoning },
263
+ suggestedFix: {
264
+ target: rule.suggestedFix.target,
265
+ change: renderChangeTemplate(rule.suggestedFix.changeTemplate, bindings),
266
+ },
267
+ confidence: "medium",
268
+ verifyWith: {
269
+ suggestedEvalCase: {
270
+ queryId: null,
271
+ query: null,
272
+ assertions: rule.verifyWith.assertionTemplates.map((t) => renderChangeTemplate(t, bindings)),
273
+ },
274
+ },
275
+ });
276
+ // Re-write yaml + md with updated findings
277
+ await fs.writeFile(path.join(tracesDir, `${pt.convId}.yaml`), yaml.dump(reportToYamlObject(pt.report)), "utf8");
278
+ if (writeFormats !== "yaml") {
279
+ await fs.writeFile(path.join(tracesDir, `${pt.convId}.md`), renderReportMarkdown(pt.report, { conversationId: pt.convId, businessDomain: opts.businessDomain }), "utf8");
280
+ }
281
+ }
282
+ for (const s of result.skipped) {
283
+ const pt = perTrace.find((p) => p.report.trace.traceId === s.traceId);
284
+ if (!pt)
285
+ continue;
286
+ pt.report.run.rulesSkipped.push({ ruleId, reason: s.reason });
287
+ await fs.writeFile(path.join(tracesDir, `${pt.convId}.yaml`), yaml.dump(reportToYamlObject(pt.report)), "utf8");
288
+ }
289
+ }
290
+ const t_stage2_end = Date.now();
291
+ // 6. Aggregator + sampler
292
+ const allReports = perTrace.map((p) => p.report);
293
+ const aggregates = aggregate(allReports);
294
+ const samplerOutput = sample(allReports);
295
+ // 7. Stage-4: cross-trace synth
296
+ const t_stage4_start = Date.now();
297
+ let synthProvider;
298
+ try {
299
+ synthProvider = registry.resolve({});
300
+ }
301
+ catch {
302
+ synthProvider = null;
303
+ }
304
+ let synthSummary = null;
305
+ if (synthProvider) {
306
+ const result = await runCrossTraceSynthesizer({
307
+ agentId,
308
+ aggregates,
309
+ samples: samplerOutput,
310
+ nTotal: allReports.length,
311
+ provider: synthProvider,
312
+ promptRegistry,
313
+ lang: opts.lang,
314
+ artifacts,
315
+ timeoutMs: opts.timeoutMs,
316
+ });
317
+ synthSummary = result.summary;
318
+ }
319
+ const t_stage4_end = Date.now();
320
+ // 8. Assemble + write scan-summary
321
+ const tracesReused = perTrace.filter((p) => p.reused).length;
322
+ const scanSummary = {
323
+ schema_version: "scan-summary/v1",
324
+ scan: {
325
+ agent_id: agentId,
326
+ trace_count: allReports.length,
327
+ traces_with_findings: allReports.filter((r) => r.findings.length > 0).length,
328
+ traces_reused: tracesReused,
329
+ traces_freshly_diagnosed: allReports.length - tracesReused,
330
+ resumed_from_partial: tracesReused > 0,
331
+ diagnosed_at: new Date().toISOString(),
332
+ cli_version: "0.7.4",
333
+ synthesizer_mode: "agent",
334
+ },
335
+ summary: synthSummary,
336
+ aggregates,
337
+ per_trace_index: perTrace.map((p) => ({
338
+ trace_id: p.report.trace.traceId,
339
+ conversation_id: p.convId,
340
+ report_path: `traces/${p.convId}.yaml`,
341
+ finding_count: p.report.findings.length,
342
+ })),
343
+ };
344
+ const scanSummaryYamlPath = path.join(opts.out, "scan-summary.yaml");
345
+ const scanSummaryMdPath = path.join(opts.out, "scan-summary.md");
346
+ await fs.writeFile(scanSummaryYamlPath, yaml.dump(scanSummary), "utf8");
347
+ if (writeFormats !== "yaml") {
348
+ await fs.writeFile(scanSummaryMdPath, renderScanSummaryMarkdown(scanSummary), "utf8");
349
+ }
350
+ // 9. Run metadata artifact
351
+ const t_total = Date.now() - t_start;
352
+ await artifacts.writeRunMetadata({
353
+ cli_args: { traces: opts.traces, out: opts.out, lang: opts.lang ?? "en" },
354
+ agent_id: agentId,
355
+ rule_load_summary: {
356
+ rules_applied: rules.map((r) => r.id),
357
+ rules_skipped_at_load: [],
358
+ rules_dir: opts.rulesDir ?? "builtin",
359
+ },
360
+ single_agent_validation: {
361
+ checked_conv_ids: validation.checkedConvIds,
362
+ agent_id_resolved: agentId,
363
+ },
364
+ timing: {
365
+ stage_1_ms: t_stage1_end - t_stage1,
366
+ stage_2_ms: t_stage2_end - t_stage2_start,
367
+ stage_3_ms: 0,
368
+ stage_4_ms: t_stage4_end - t_stage4_start,
369
+ total_ms: t_total,
370
+ },
371
+ llm_calls: {
372
+ stage_2_chunks: stage2Chunks,
373
+ stage_3: 0,
374
+ stage_4: synthSummary ? 1 : 0,
375
+ total: stage2Chunks + (synthSummary ? 1 : 0),
376
+ },
377
+ cost_estimate_usd: {
378
+ stage_2: stage2Chunks * 0.005,
379
+ stage_4: (synthSummary ? 1 : 0) * 0.05,
380
+ total: stage2Chunks * 0.005 + (synthSummary ? 1 : 0) * 0.05,
381
+ model_price_table_version: "2026-05",
382
+ },
383
+ });
384
+ return {
385
+ scanSummaryPath: scanSummaryYamlPath,
386
+ perTraceReportPaths: perTrace.map((p) => path.join(tracesDir, `${p.convId}.yaml`)),
387
+ tracesDiagnosed: allReports.length,
388
+ tracesReused,
389
+ };
390
+ }
@@ -0,0 +1,44 @@
1
+ # Cross-Trace Synthesizer
2
+
3
+ You are summarizing a batch of {{n_total}} agent trace diagnoses for agent
4
+ {{agent_id}}. All traces belong to this single agent. Aggregate statistics
5
+ have been computed deterministically. You see {{sample_count}} representative
6
+ trace summaries selected as samples ({{sample_ratio}} of total). Your job:
7
+ compose a short narrative explaining the dominant failure patterns,
8
+ prioritized rule-level fixes, and cross-rule relationships **specific to
9
+ this agent's program**.
10
+
11
+ ## Aggregated Stats (deterministic)
12
+
13
+ ```yaml
14
+ {{aggregates}}
15
+ ```
16
+
17
+ ## Representative Samples ({{sample_count}} of {{n_total}})
18
+
19
+ {{samples_yaml}}
20
+
21
+ ## Output Schema
22
+ Reply with a single JSON object satisfying this schema. No prose outside the
23
+ JSON.
24
+
25
+ ```json
26
+ {{output_schema}}
27
+ ```
28
+
29
+ {{language_instruction}}
30
+
31
+ ## Composition Rules
32
+ 1. `headline` ≤ 160 chars; lead with the dominant rule pattern named in
33
+ aggregates.rule_frequency. Frame as "this agent does X" since all traces
34
+ share the same agent.
35
+ 2. `primary_root_cause.rule_ids` lists rules that, if fixed in THIS agent's
36
+ program, would resolve the most traces. Cite aggregate counts; do not
37
+ invent rule_ids not in aggregates.
38
+ 3. `fix_priority` MUST order ALL rules in aggregates.rule_frequency from
39
+ highest to lowest impact. `affected_trace_count` must match aggregates.
40
+ 4. `cross_rule_links` only when ≥ 2 rules fire on the same trace (sampler
41
+ shows co-fire cases; aggregator surfaces counts indirectly).
42
+ 5. Aggregate-grounded only: every claim in `primary_root_cause.description`
43
+ and `fix_priority[].reason` must be backed by aggregates or samples; the
44
+ LLM does not invent new rule_ids or trace counts.
@@ -0,0 +1,44 @@
1
+ # Trace-Diagnose Rubric Judge (Batched)
2
+
3
+ You are evaluating one rubric rule across multiple agent traces from the
4
+ same agent (agent_id: {{agent_id}}). Read the rule's judge question, the
5
+ supplied traces, and reply with a single JSON object containing one verdict
6
+ per trace.
7
+
8
+ ## Rule
9
+ - **rule_id**: `{{rule_id}}`
10
+ - **batch_size**: {{batch_size}}
11
+
12
+ ## Judge Question
13
+ {{judge_question}}
14
+
15
+ ## Traces
16
+ Each trace below is identified by `trace_id`. Each trace's inputs follow the
17
+ rule's `inputs` schema (resolved from the trace's spans).
18
+
19
+ {{traces_yaml}}
20
+
21
+ ## Output Schema
22
+ Reply with a single JSON object. Each entry in `trace_results` corresponds to
23
+ one trace in the supplied batch, in any order. The `trace_id` field MUST echo
24
+ back the trace_id from the input.
25
+
26
+ ```json
27
+ {{output_schema}}
28
+ ```
29
+
30
+ {{language_instruction}}
31
+
32
+ ## Output Rules
33
+ 1. ONE entry per input trace_id, no duplicates, no extra entries.
34
+ 2. `first_violating_step_id` MUST be a real span id from THAT trace's spans —
35
+ the diagnose pipeline cross-checks; mis-attributed IDs cause the entry to
36
+ be discarded with `agent-error:schema_violation`.
37
+ 3. `reasoning` should cite span ids in the affected trace. When multiple traces
38
+ share a pattern, you may cite that in one trace's reasoning ("same retry
39
+ pattern as trace tr_xxx").
40
+ 4. Pick the closest category even if imperfect; do not fall through to `other`
41
+ unless evidence actively rules out every named category.
42
+ 5. If you cannot evaluate a trace (missing spans, malformed input), emit an
43
+ entry with `category: other`, `reasoning` explaining the gap, `severity: low`,
44
+ `first_violating_step_id` = any real span_id from that trace.
@@ -0,0 +1,25 @@
1
+ export interface DiagnoseInvocation {
2
+ /** Invoked by runPerTracePipeline. MUST write the per-trace yaml to `partialPath`;
3
+ * the runner then atomic-renames to `<conv_id>.yaml`. */
4
+ (convId: string, partialPath: string): Promise<{
5
+ traceId: string;
6
+ agentId: string | null;
7
+ }>;
8
+ }
9
+ export interface RunPerTracePipelineOpts {
10
+ convId: string;
11
+ outDir: string;
12
+ runDiagnose: DiagnoseInvocation;
13
+ }
14
+ export interface RunPerTracePipelineResult {
15
+ reused: boolean;
16
+ traceId?: string;
17
+ agentId?: string | null;
18
+ }
19
+ /**
20
+ * Process one conv_id: skip if the per-trace yaml already exists and parses;
21
+ * otherwise invoke runDiagnose (which writes to a .partial path), then
22
+ * atomic-rename to the final path on success. Corrupt existing yaml is
23
+ * logged + overwritten.
24
+ */
25
+ export declare function runPerTracePipeline(opts: RunPerTracePipelineOpts): Promise<RunPerTracePipelineResult>;
@@ -0,0 +1,42 @@
1
+ import fs from "node:fs/promises";
2
+ import path from "node:path";
3
+ import yaml from "js-yaml";
4
+ import { ReportSchema } from "../diagnose/schemas.js";
5
+ async function safeReadYaml(filePath) {
6
+ try {
7
+ const text = await fs.readFile(filePath, "utf8");
8
+ return yaml.load(text);
9
+ }
10
+ catch {
11
+ return null;
12
+ }
13
+ }
14
+ async function isValidExistingReport(filePath) {
15
+ const obj = await safeReadYaml(filePath);
16
+ if (obj === null)
17
+ return false;
18
+ return ReportSchema.safeParse(obj).success;
19
+ }
20
+ /**
21
+ * Process one conv_id: skip if the per-trace yaml already exists and parses;
22
+ * otherwise invoke runDiagnose (which writes to a .partial path), then
23
+ * atomic-rename to the final path on success. Corrupt existing yaml is
24
+ * logged + overwritten.
25
+ */
26
+ export async function runPerTracePipeline(opts) {
27
+ const finalPath = path.join(opts.outDir, `${opts.convId}.yaml`);
28
+ const partialPath = `${finalPath}.partial`;
29
+ const existed = await fs.stat(finalPath).then(() => true).catch(() => false);
30
+ if (existed) {
31
+ if (await isValidExistingReport(finalPath)) {
32
+ return { reused: true };
33
+ }
34
+ process.stderr.write(`warning: existing ${finalPath} is corrupt or schema-incompatible; re-diagnosing\n`);
35
+ await fs.rm(finalPath, { force: true });
36
+ }
37
+ await fs.mkdir(opts.outDir, { recursive: true });
38
+ const result = await opts.runDiagnose(opts.convId, partialPath);
39
+ // Atomic rename .partial → final
40
+ await fs.rename(partialPath, finalPath);
41
+ return { reused: false, traceId: result.traceId, agentId: result.agentId };
42
+ }
@@ -0,0 +1,18 @@
1
+ import type { Report } from "../diagnose/types.js";
2
+ export interface Sample {
3
+ trace_id: string;
4
+ conversation_id: string | null;
5
+ headline: string;
6
+ rule_ids: string[];
7
+ selected_as: string;
8
+ }
9
+ export interface SamplerOutput {
10
+ samples: Sample[];
11
+ }
12
+ /**
13
+ * Deterministic K=5 sampler: top-1 by severity per dominant rule (count ≥
14
+ * max(3, 5% of N)) + up to one outlier (rubric self-labeled FP, e.g.
15
+ * likely_cause='other' or severity='low'). Sorted by selected_as / trace_id
16
+ * for stability.
17
+ */
18
+ export declare function sample(reports: Report[]): SamplerOutput;
@@ -0,0 +1,81 @@
1
+ const SEVERITY_RANK = { high: 3, medium: 2, low: 1 };
2
+ const K_MAX = 5;
3
+ function dominantThreshold(N) {
4
+ return Math.max(3, Math.ceil(0.05 * N));
5
+ }
6
+ function pickTopBySeverityForRule(reports, ruleId) {
7
+ let best = null;
8
+ for (const r of reports) {
9
+ for (const f of r.findings) {
10
+ if (f.ruleId !== ruleId)
11
+ continue;
12
+ const rank = SEVERITY_RANK[f.severity] ?? 0;
13
+ if (!best || rank > best.rank || (rank === best.rank && r.trace.traceId < best.report.trace.traceId)) {
14
+ best = { report: r, rank };
15
+ }
16
+ }
17
+ }
18
+ return best?.report ?? null;
19
+ }
20
+ function isOutlierFinding(f) {
21
+ return f.judgmentKind === "rubric" && (f.likelyCause === "other" || f.severity === "low");
22
+ }
23
+ function toSample(r, selectedAs) {
24
+ const rule_ids = [...new Set(r.findings.map((f) => f.ruleId))].sort();
25
+ return {
26
+ trace_id: r.trace.traceId,
27
+ conversation_id: null,
28
+ headline: r.summary.headline,
29
+ rule_ids,
30
+ selected_as: selectedAs,
31
+ };
32
+ }
33
+ /**
34
+ * Deterministic K=5 sampler: top-1 by severity per dominant rule (count ≥
35
+ * max(3, 5% of N)) + up to one outlier (rubric self-labeled FP, e.g.
36
+ * likely_cause='other' or severity='low'). Sorted by selected_as / trace_id
37
+ * for stability.
38
+ */
39
+ export function sample(reports) {
40
+ const N = reports.length;
41
+ if (N === 0)
42
+ return { samples: [] };
43
+ // Count rule frequency, identify dominant.
44
+ const counts = new Map();
45
+ for (const r of reports) {
46
+ for (const f of r.findings) {
47
+ counts.set(f.ruleId, (counts.get(f.ruleId) ?? 0) + 1);
48
+ }
49
+ }
50
+ const threshold = dominantThreshold(N);
51
+ const dominantRules = [...counts.entries()]
52
+ .filter(([, c]) => c >= threshold)
53
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
54
+ .map(([id]) => id);
55
+ const picked = [];
56
+ const usedTraceIds = new Set();
57
+ // Top-1 by severity per dominant rule.
58
+ for (const ruleId of dominantRules) {
59
+ if (picked.length >= K_MAX)
60
+ break;
61
+ const r = pickTopBySeverityForRule(reports, ruleId);
62
+ if (r && !usedTraceIds.has(r.trace.traceId)) {
63
+ picked.push(toSample(r, `top-1 high-severity for ${ruleId}`));
64
+ usedTraceIds.add(r.trace.traceId);
65
+ }
66
+ }
67
+ // One outlier (rubric self-labeled FP) if there's slack.
68
+ if (picked.length < K_MAX) {
69
+ for (const r of reports) {
70
+ if (usedTraceIds.has(r.trace.traceId))
71
+ continue;
72
+ const fpFinding = r.findings.find(isOutlierFinding);
73
+ if (fpFinding) {
74
+ picked.push(toSample(r, `outlier (rubric self-labeled FP for ${fpFinding.ruleId})`));
75
+ usedTraceIds.add(r.trace.traceId);
76
+ break;
77
+ }
78
+ }
79
+ }
80
+ return { samples: picked.slice(0, K_MAX) };
81
+ }
@@ -0,0 +1,2 @@
1
+ import type { ScanSummary } from "./scan-summary-schema.js";
2
+ export declare function renderScanSummaryMarkdown(s: ScanSummary): string;